fix(stt): filter ElevenLabs noise annotations before LLM
scribe_v2_realtime annotates background audio as *Störgeräusche*, *Fernsehgeräusche* etc. Override stt_node to drop these so the LLM only receives actual speech transcripts. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
26
voice.py
26
voice.py
@@ -55,6 +55,30 @@ def _build_voice_prompt() -> str:
|
|||||||
datetime=now.strftime("%A, %d. %B %Y %H:%M %Z")
|
datetime=now.strftime("%A, %d. %B %Y %H:%M %Z")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ElevenLabs scribe_v2_realtime annotates non-speech audio as *Geräusch* etc.
|
||||||
|
# Filter these out so the LLM never sees them.
|
||||||
|
_NOISE_ANNOTATION_RE = re.compile(r'^\*[^*]+\*$')
|
||||||
|
|
||||||
|
class _VoiceAgent(Agent):
|
||||||
|
async def stt_node(self, audio, model_settings):
|
||||||
|
from livekit.agents import stt as lk_stt
|
||||||
|
result = Agent.default.stt_node(self, audio, model_settings)
|
||||||
|
if asyncio.iscoroutine(result):
|
||||||
|
result = await result
|
||||||
|
if result is None:
|
||||||
|
return
|
||||||
|
async for event in result:
|
||||||
|
if isinstance(event, lk_stt.SpeechEvent):
|
||||||
|
alts = getattr(event, 'alternatives', None)
|
||||||
|
if alts and _NOISE_ANNOTATION_RE.match(alts[0].text.strip()):
|
||||||
|
logger.debug("STT noise filtered: %s", alts[0].text)
|
||||||
|
continue
|
||||||
|
elif isinstance(event, str) and _NOISE_ANNOTATION_RE.match(event.strip()):
|
||||||
|
logger.debug("STT noise filtered: %s", event)
|
||||||
|
continue
|
||||||
|
yield event
|
||||||
|
|
||||||
|
|
||||||
_vad = None
|
_vad = None
|
||||||
def _get_vad():
|
def _get_vad():
|
||||||
global _vad
|
global _vad
|
||||||
@@ -611,7 +635,7 @@ class VoiceSession:
|
|||||||
logger.info("SEARCH_RESULT: %s", result[:200])
|
logger.info("SEARCH_RESULT: %s", result[:200])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
agent = Agent(
|
agent = _VoiceAgent(
|
||||||
instructions=_build_voice_prompt() + memory_section,
|
instructions=_build_voice_prompt() + memory_section,
|
||||||
tools=[search_web],
|
tools=[search_web],
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user