fix(voice): full E2EE bidirectional audio pipeline working

- bot.py: track active callers per room; only stop session when last
  caller leaves (fixes premature cancellation when Playwright browser
  hangs up while real app is still in call)

- voice.py: pre-compute 8 HMAC-ratcheted keys from EC's base key so
  decryption works immediately without waiting ~30s for Matrix to
  deliver EC's key-rotation event (root cause of user→bot silence)

- voice.py: fix set_key() argument order (identity, key, index) at all
  call sites — was (identity, index, key) causing TypeError

- voice.py: add audio frame monitor (AUDIO_FLOW) and mute/unmute event
  handlers for diagnostics

- voice.py: update livekit-agents 1.4.2 event names: user_state_changed,
  user_input_transcribed, conversation_item_added

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-22 15:17:35 +02:00
parent c379064f80
commit 2b8744de6e
2 changed files with 109 additions and 26 deletions

16
bot.py
View File

@@ -266,6 +266,7 @@ class Bot:
self.lkapi = None
self.voice_sessions: dict[str, VoiceSession] = {}
self.active_calls = set() # rooms where we've sent call member event
self.active_callers: dict[str, set[str]] = {} # room_id → set of caller user IDs
self.rag = DocumentRAG(WILDFILES_BASE_URL, WILDFILES_ORG)
self.memory = MemoryClient(MEMORY_SERVICE_URL)
self.llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY) if LITELLM_URL else None
@@ -401,6 +402,7 @@ class Bot:
logger.info("Call detected in %s from %s, joining...", room_id, event.sender)
self.active_calls.add(room_id)
self.active_callers.setdefault(room_id, set()).add(event.sender)
# Get the foci_preferred from the caller's event
content = event.source["content"]
@@ -478,10 +480,20 @@ class Bot:
self.voice_sessions.pop(room_id, None)
else:
# Empty content = someone left the call, check if anyone is still calling
# Empty content = someone left the call
room_id = room.room_id
if room_id in self.active_calls:
# Stop voice session
# Remove this caller from active set
callers = self.active_callers.get(room_id, set())
callers.discard(event.sender)
if callers:
logger.info("Participant %s left %s but %d other(s) still in call — keeping session",
event.sender, room_id, len(callers))
return
# No callers left — stop voice session
logger.info("Last caller %s left %s — stopping session", event.sender, room_id)
self.active_callers.pop(room_id, None)
vs = self.voice_sessions.pop(room_id, None)
if vs:
try: