fix: Use scribe_v2_realtime model for ElevenLabs STT (streaming mode)

scribe_v1 (REST) sets streaming=False, incompatible with livekit-agents 1.4 AgentSession.
scribe_v2_realtime uses WebSocket streaming (confirmed working with Starter plan).
Removes separate _stt_session aiohttp client.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-22 17:24:16 +02:00
parent 045e5831a6
commit 4012950197

View File

@@ -231,7 +231,7 @@ class VoiceSession:
self.session = None
self._task = None
self._http_session = None
self._stt_session = None
self._stt_session = None # kept for cleanup compat
self._caller_key: bytes | None = None
self._caller_identity: str | None = None
self._caller_all_keys: dict = {} # {index: bytes} — all caller keys by index
@@ -555,10 +555,9 @@ class VoiceSession:
# Voice pipeline — George (British male, multilingual DE/EN)
self._http_session = aiohttp.ClientSession()
self._stt_session = aiohttp.ClientSession() # separate session avoids WS/HTTP conflicts
voice_id = os.environ.get("ELEVENLABS_VOICE_ID", DEFAULT_VOICE_ID)
self.session = AgentSession(
stt=elevenlabs.STT(api_key=ELEVENLABS_KEY, http_session=self._stt_session),
stt=elevenlabs.STT(api_key=ELEVENLABS_KEY, model_id="scribe_v2_realtime"),
llm=lk_openai.LLM(base_url=LITELLM_URL, api_key=LITELLM_KEY, model=self.model),
tts=elevenlabs.TTS(voice_id=voice_id, model="eleven_multilingual_v2",
api_key=ELEVENLABS_KEY, http_session=self._http_session),