From a155f39ede080a67e80361676f692f6088d9fd53 Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Tue, 10 Mar 2026 13:29:21 +0200 Subject: [PATCH] feat: instant "Einen Moment" filler when look_at_screen is invoked Plays immediate spoken feedback so the user knows the bot is processing their screen share / camera before the vision API responds. Co-Authored-By: Claude Opus 4.6 --- voice.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/voice.py b/voice.py index 3223868..2592da5 100644 --- a/voice.py +++ b/voice.py @@ -1136,6 +1136,7 @@ class VoiceSession: # Vision tool — capture video frame and analyze with vision model _video_track_ref = self # reference to VoiceSession for video track access _lk_room_ref = self.lk_room + _session_ref = self.session # for say() in tools @function_tool async def look_at_screen(question: str) -> str: @@ -1151,6 +1152,12 @@ class VoiceSession: if not video_track: return ("Kein Video verfuegbar. Der Nutzer muss seine Kamera oder " "Bildschirmfreigabe aktivieren bevor ich etwas sehen kann.") + # Instant filler so user knows bot is looking + try: + await _session_ref.say("Einen Moment, ich schaue mir das an.", + allow_interruptions=True, add_to_chat_ctx=False) + except Exception: + pass try: # Capture single frame from video track stream = rtc.VideoStream(video_track)