From 85f8df5690df93868ec49650e82accb2b64e2411 Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Fri, 20 Feb 2026 17:21:51 +0200 Subject: [PATCH] fix: VoiceSession cleanup on call leave + CXXABI compat + proactive E2EE key read - Stop VoiceSession when call leave event received - Copy libstdc++ from rust build stage to fix CXXABI_1.3.15 mismatch - Read caller encryption key from room state before starting VoiceSession Co-Authored-By: Claude Opus 4.6 --- Dockerfile | 3 +++ bot.py | 67 +++++++++++++++++++++++++++++++++++------------------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3b82b1e..356162f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,6 +49,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt +# Copy libstdc++ from build stage to match CXXABI version used by patched FFI +COPY --from=rust-build /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 + # Overwrite installed FFI binary with patched version (HKDF + key_ring_size support) COPY --from=rust-build /build/livekit-rust-sdks/target/release/liblivekit_ffi.so /patched/ ENV LIVEKIT_LIB_PATH=/patched/liblivekit_ffi.so diff --git a/bot.py b/bot.py index b5852cf..876807d 100644 --- a/bot.py +++ b/bot.py @@ -35,6 +35,7 @@ from nio import ( ) from nio.crypto.attachments import decrypt_attachment from livekit import api +from voice import VoiceSession BOT_DEVICE_ID = "AIBOT" CALL_MEMBER_TYPE = "org.matrix.msc3401.call.member" @@ -263,7 +264,7 @@ class Bot: config=config, ) self.lkapi = None - self.dispatched_rooms = set() + self.voice_sessions: dict[str, VoiceSession] = {} self.active_calls = set() # rooms where we've sent call member event self.rag = DocumentRAG(WILDFILES_BASE_URL, WILDFILES_ORG) self.memory = MemoryClient(MEMORY_SERVICE_URL) @@ -369,6 +370,21 @@ class Bot: await self._route_verification(room, event) return + # Forward encryption key events to active voice sessions + if event.type == ENCRYPTION_KEYS_TYPE: + room_id = room.room_id + vs = self.voice_sessions.get(room_id) + if vs: + content = event.source.get("content", {}) + device_id = content.get("device_id", "") + for k in content.get("keys", []): + if "key" in k and "index" in k: + key_b64 = k["key"] + key_b64 += "=" * (-len(key_b64) % 4) + key_bytes = base64.urlsafe_b64decode(key_b64) + vs.on_encryption_key(event.sender, device_id, key_bytes, k["index"]) + return + if event.type != CALL_MEMBER_TYPE: return if event.sender == BOT_USER: @@ -397,31 +413,27 @@ class Bot: lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=") logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id) - if room_id not in self.dispatched_rooms: + if room_id not in self.voice_sessions: try: - # Collect E2EE encryption keys from room state - e2ee_key = await self._get_call_encryption_key(room_id, event.sender) - dispatch_metadata = "" - if e2ee_key: - # Generate agent's own key and publish it - agent_key = os.urandom(32) - await self._publish_encryption_key(room_id, agent_key) - dispatch_metadata = json.dumps({ - "e2ee_key": base64.b64encode(agent_key).decode(), - }) - logger.info("E2EE key prepared for agent dispatch") - - await self.lkapi.agent_dispatch.create_dispatch( - api.CreateAgentDispatchRequest( - agent_name=AGENT_NAME, - room=lk_room_name, - metadata=dispatch_metadata, - ) + model = self.room_models.get(room_id, DEFAULT_MODEL) + vs = VoiceSession( + nio_client=self.client, + room_id=room_id, + device_id=BOT_DEVICE_ID, + lk_url=LK_URL, + model=model, ) - self.dispatched_rooms.add(room_id) - logger.info("Agent dispatched to LiveKit room %s (e2ee=%s)", lk_room_name, bool(e2ee_key)) + # Read existing encryption keys from room state before starting + caller_key = await self._get_call_encryption_key(room_id, event.sender) + if caller_key: + vs.on_encryption_key(event.sender, "", caller_key, 0) + + await vs.start() + self.voice_sessions[room_id] = vs + logger.info("Voice session started for room %s (e2ee_key=%s)", + room_id, "yes" if caller_key else "no") except Exception: - logger.exception("Dispatch failed for %s", lk_room_name) + logger.exception("Voice session start failed for %s", room_id) # Send our own call member state event call_content = { @@ -451,6 +463,15 @@ class Bot: # Empty content = someone left the call, check if anyone is still calling room_id = room.room_id if room_id in self.active_calls: + # Stop voice session + vs = self.voice_sessions.pop(room_id, None) + if vs: + try: + await vs.stop() + logger.info("Voice session stopped for %s", room_id) + except Exception: + logger.exception("Failed to stop voice session for %s", room_id) + # Leave the call too self.active_calls.discard(room_id) state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call"