feat: instant "Einen Moment" filler when look_at_screen is invoked

Plays immediate spoken feedback so the user knows the bot is processing their screen share / camera before the vision API responds. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-10 13:29:21 +02:00
parent 5521819358
commit a155f39ede
1 changed files with 7 additions and 0 deletions
--- a/voice.py
+++ b/voice.py
@@ -1136,6 +1136,7 @@ class VoiceSession:
            # Vision tool — capture video frame and analyze with vision model
            _video_track_ref = self  # reference to VoiceSession for video track access
            _lk_room_ref = self.lk_room
+            _session_ref = self.session  # for say() in tools

            @function_tool
            async def look_at_screen(question: str) -> str:
@@ -1151,6 +1152,12 @@ class VoiceSession:
                if not video_track:
                    return ("Kein Video verfuegbar. Der Nutzer muss seine Kamera oder "
                            "Bildschirmfreigabe aktivieren bevor ich etwas sehen kann.")
+                # Instant filler so user knows bot is looking
+                try:
+                    await _session_ref.say("Einen Moment, ich schaue mir das an.",
+                                           allow_interruptions=True, add_to_chat_ctx=False)
+                except Exception:
+                    pass
                try:
                    # Capture single frame from video track
                    stream = rtc.VideoStream(video_track)