fix: VoiceSession cleanup on call leave + CXXABI compat + proactive E2EE key read

- Stop VoiceSession when call leave event received
- Copy libstdc++ from rust build stage to fix CXXABI_1.3.15 mismatch
- Read caller encryption key from room state before starting VoiceSession

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-20 17:21:51 +02:00
parent e5e8b56482
commit 85f8df5690
2 changed files with 47 additions and 23 deletions

View File

@@ -49,6 +49,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
COPY requirements.txt . COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
# Copy libstdc++ from build stage to match CXXABI version used by patched FFI
COPY --from=rust-build /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/lib/x86_64-linux-gnu/libstdc++.so.6
# Overwrite installed FFI binary with patched version (HKDF + key_ring_size support) # Overwrite installed FFI binary with patched version (HKDF + key_ring_size support)
COPY --from=rust-build /build/livekit-rust-sdks/target/release/liblivekit_ffi.so /patched/ COPY --from=rust-build /build/livekit-rust-sdks/target/release/liblivekit_ffi.so /patched/
ENV LIVEKIT_LIB_PATH=/patched/liblivekit_ffi.so ENV LIVEKIT_LIB_PATH=/patched/liblivekit_ffi.so

67
bot.py
View File

@@ -35,6 +35,7 @@ from nio import (
) )
from nio.crypto.attachments import decrypt_attachment from nio.crypto.attachments import decrypt_attachment
from livekit import api from livekit import api
from voice import VoiceSession
BOT_DEVICE_ID = "AIBOT" BOT_DEVICE_ID = "AIBOT"
CALL_MEMBER_TYPE = "org.matrix.msc3401.call.member" CALL_MEMBER_TYPE = "org.matrix.msc3401.call.member"
@@ -263,7 +264,7 @@ class Bot:
config=config, config=config,
) )
self.lkapi = None self.lkapi = None
self.dispatched_rooms = set() self.voice_sessions: dict[str, VoiceSession] = {}
self.active_calls = set() # rooms where we've sent call member event self.active_calls = set() # rooms where we've sent call member event
self.rag = DocumentRAG(WILDFILES_BASE_URL, WILDFILES_ORG) self.rag = DocumentRAG(WILDFILES_BASE_URL, WILDFILES_ORG)
self.memory = MemoryClient(MEMORY_SERVICE_URL) self.memory = MemoryClient(MEMORY_SERVICE_URL)
@@ -369,6 +370,21 @@ class Bot:
await self._route_verification(room, event) await self._route_verification(room, event)
return return
# Forward encryption key events to active voice sessions
if event.type == ENCRYPTION_KEYS_TYPE:
room_id = room.room_id
vs = self.voice_sessions.get(room_id)
if vs:
content = event.source.get("content", {})
device_id = content.get("device_id", "")
for k in content.get("keys", []):
if "key" in k and "index" in k:
key_b64 = k["key"]
key_b64 += "=" * (-len(key_b64) % 4)
key_bytes = base64.urlsafe_b64decode(key_b64)
vs.on_encryption_key(event.sender, device_id, key_bytes, k["index"])
return
if event.type != CALL_MEMBER_TYPE: if event.type != CALL_MEMBER_TYPE:
return return
if event.sender == BOT_USER: if event.sender == BOT_USER:
@@ -397,31 +413,27 @@ class Bot:
lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=") lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=")
logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id) logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id)
if room_id not in self.dispatched_rooms: if room_id not in self.voice_sessions:
try: try:
# Collect E2EE encryption keys from room state model = self.room_models.get(room_id, DEFAULT_MODEL)
e2ee_key = await self._get_call_encryption_key(room_id, event.sender) vs = VoiceSession(
dispatch_metadata = "" nio_client=self.client,
if e2ee_key: room_id=room_id,
# Generate agent's own key and publish it device_id=BOT_DEVICE_ID,
agent_key = os.urandom(32) lk_url=LK_URL,
await self._publish_encryption_key(room_id, agent_key) model=model,
dispatch_metadata = json.dumps({ )
"e2ee_key": base64.b64encode(agent_key).decode(), # Read existing encryption keys from room state before starting
}) caller_key = await self._get_call_encryption_key(room_id, event.sender)
logger.info("E2EE key prepared for agent dispatch") if caller_key:
vs.on_encryption_key(event.sender, "", caller_key, 0)
await self.lkapi.agent_dispatch.create_dispatch( await vs.start()
api.CreateAgentDispatchRequest( self.voice_sessions[room_id] = vs
agent_name=AGENT_NAME, logger.info("Voice session started for room %s (e2ee_key=%s)",
room=lk_room_name, room_id, "yes" if caller_key else "no")
metadata=dispatch_metadata,
)
)
self.dispatched_rooms.add(room_id)
logger.info("Agent dispatched to LiveKit room %s (e2ee=%s)", lk_room_name, bool(e2ee_key))
except Exception: except Exception:
logger.exception("Dispatch failed for %s", lk_room_name) logger.exception("Voice session start failed for %s", room_id)
# Send our own call member state event # Send our own call member state event
call_content = { call_content = {
@@ -451,6 +463,15 @@ class Bot:
# Empty content = someone left the call, check if anyone is still calling # Empty content = someone left the call, check if anyone is still calling
room_id = room.room_id room_id = room.room_id
if room_id in self.active_calls: if room_id in self.active_calls:
# Stop voice session
vs = self.voice_sessions.pop(room_id, None)
if vs:
try:
await vs.stop()
logger.info("Voice session stopped for %s", room_id)
except Exception:
logger.exception("Failed to stop voice session for %s", room_id)
# Leave the call too # Leave the call too
self.active_calls.discard(room_id) self.active_calls.discard(room_id)
state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call" state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call"