feat: instant "Einen Moment" filler when look_at_screen is invoked

Plays immediate spoken feedback so the user knows the bot is processing
their screen share / camera before the vision API responds.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-03-10 13:29:21 +02:00
parent 5521819358
commit a155f39ede

View File

@@ -1136,6 +1136,7 @@ class VoiceSession:
# Vision tool — capture video frame and analyze with vision model # Vision tool — capture video frame and analyze with vision model
_video_track_ref = self # reference to VoiceSession for video track access _video_track_ref = self # reference to VoiceSession for video track access
_lk_room_ref = self.lk_room _lk_room_ref = self.lk_room
_session_ref = self.session # for say() in tools
@function_tool @function_tool
async def look_at_screen(question: str) -> str: async def look_at_screen(question: str) -> str:
@@ -1151,6 +1152,12 @@ class VoiceSession:
if not video_track: if not video_track:
return ("Kein Video verfuegbar. Der Nutzer muss seine Kamera oder " return ("Kein Video verfuegbar. Der Nutzer muss seine Kamera oder "
"Bildschirmfreigabe aktivieren bevor ich etwas sehen kann.") "Bildschirmfreigabe aktivieren bevor ich etwas sehen kann.")
# Instant filler so user knows bot is looking
try:
await _session_ref.say("Einen Moment, ich schaue mir das an.",
allow_interruptions=True, add_to_chat_ctx=False)
except Exception:
pass
try: try:
# Capture single frame from video track # Capture single frame from video track
stream = rtc.VideoStream(video_track) stream = rtc.VideoStream(video_track)