fix(MAT-56): Prevent bot silence from STT noise leak + LLM timeout

Three fixes for the bot going silent after ~10 messages:

1. STT artifact handler now returns early — previously detected noise
   leaks ("Vielen Dank.", etc.) but still appended them to transcript,
   inflating context until LLM timed out after 4 retries.

2. Context truncation — caps LLM chat context at 40 items and internal
   transcript at 80 entries to prevent unbounded growth in long sessions.

3. LLM timeout recovery — watchdog detects when agent has been silent
   for >60s despite user activity, sends a recovery reply asking user
   to repeat their question instead of staying permanently silent.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-27 07:58:11 +02:00
parent b19300d3ce
commit 3bf9229ae4

View File

@@ -108,11 +108,15 @@ def _is_stt_artifact(text: str) -> bool:
return False return False
_MAX_CHAT_CTX_ITEMS = 40 # Keep last N items in LLM context to prevent unbounded growth
class _NoiseFilterAgent(Agent): class _NoiseFilterAgent(Agent):
"""Agent that suppresses ElevenLabs STT artifacts before LLM sees them. """Agent that suppresses ElevenLabs STT artifacts before LLM sees them.
Uses on_user_turn_completed() which runs after VAD+STT, so no backpressure Uses on_user_turn_completed() which runs after VAD+STT, so no backpressure
risk to the audio pipeline. Raises StopResponse to silently discard noise. risk to the audio pipeline. Raises StopResponse to silently discard noise.
Also truncates chat context to prevent unbounded growth in long sessions.
""" """
async def on_user_turn_completed( async def on_user_turn_completed(
@@ -126,6 +130,15 @@ class _NoiseFilterAgent(Agent):
turn_ctx.items.pop() turn_ctx.items.pop()
raise StopResponse() raise StopResponse()
# Truncate context: keep system/instructions + last N items
if len(turn_ctx.items) > _MAX_CHAT_CTX_ITEMS + 5:
# Preserve first item (system prompt) and trim middle
keep_start = 1 # system prompt
keep_end = _MAX_CHAT_CTX_ITEMS
old_len = len(turn_ctx.items)
turn_ctx.items[:] = turn_ctx.items[:keep_start] + turn_ctx.items[-keep_end:]
logger.info("CONTEXT_TRUNCATED: %d -> %d items", old_len, len(turn_ctx.items))
_vad = None _vad = None
def _get_vad(): def _get_vad():
@@ -756,11 +769,15 @@ class VoiceSession:
text = ev.transcript or "" text = ev.transcript or ""
if text and _is_stt_artifact(text): if text and _is_stt_artifact(text):
logger.warning("NOISE_LEAK: artifact reached STT: %s", text) logger.warning("NOISE_LEAK: artifact reached STT: %s", text)
else: return # Do NOT add artifacts to transcript — they inflate context
if text:
logger.info("USER_SPEECH: %s", text) logger.info("USER_SPEECH: %s", text)
if ev.transcript: if ev.transcript:
_last_user_speech.append(ev.transcript) _last_user_speech.append(ev.transcript)
self._transcript.append({"role": "user", "text": ev.transcript}) self._transcript.append({"role": "user", "text": ev.transcript})
# Cap transcript to prevent unbounded memory growth
if len(self._transcript) > _MAX_CHAT_CTX_ITEMS * 2:
self._transcript[:] = self._transcript[-_MAX_CHAT_CTX_ITEMS:]
@self.session.on("conversation_item_added") @self.session.on("conversation_item_added")
def _on_conversation_item(ev): def _on_conversation_item(ev):
@@ -1047,13 +1064,43 @@ class VoiceSession:
except asyncio.TimeoutError: except asyncio.TimeoutError:
logger.error("Greeting timed out") logger.error("Greeting timed out")
# VAD watchdog: log diagnostic and attempt E2EE key recovery if stuck # VAD watchdog: log diagnostic, attempt E2EE key recovery, and recover from LLM failures
import time as _time import time as _time
_last_agent_speech_time = _time.monotonic()
_llm_recovery_attempted = False
@self.session.on("conversation_item_added")
def _track_agent_speech(ev):
nonlocal _last_agent_speech_time, _llm_recovery_attempted
role = getattr(ev.item, "role", "?")
if role == "assistant":
_last_agent_speech_time = _time.monotonic()
_llm_recovery_attempted = False # reset on successful speech
while True: while True:
await asyncio.sleep(10) await asyncio.sleep(10)
# LLM timeout recovery: if user has been speaking but agent
# hasn't responded in >60s, the LLM pipeline is likely stuck
sc = _vad_state_log.get("speaking_count", 0)
agent_silent_secs = _time.monotonic() - _last_agent_speech_time
if sc > 0 and agent_silent_secs > 60 and not _llm_recovery_attempted:
_llm_recovery_attempted = True
logger.warning(
"LLM_RECOVERY: agent silent for %.0fs after %d user turns "
"— sending recovery reply", agent_silent_secs, sc)
try:
await asyncio.wait_for(
self.session.generate_reply(
instructions="Entschuldigung, ich hatte kurz ein technisches Problem. "
"Kannst du deine letzte Frage bitte wiederholen?"),
timeout=30.0)
logger.info("LLM_RECOVERY: recovery reply sent successfully")
except Exception as exc:
logger.error("LLM_RECOVERY: recovery reply failed: %s", exc)
away_since = _vad_state_log.get("away_since") away_since = _vad_state_log.get("away_since")
if away_since and (_time.monotonic() - away_since) > 30: if away_since and (_time.monotonic() - away_since) > 30:
sc = _vad_state_log.get("speaking_count", 0)
e2ee_ok = any( e2ee_ok = any(
str(getattr(p, '_e2ee_state', '')) == 'OK' str(getattr(p, '_e2ee_state', '')) == 'OK'
for p in self.lk_room.remote_participants.values() for p in self.lk_room.remote_participants.values()