From 533847c952076cbd64908feb3efbc858233137b3 Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Sat, 21 Feb 2026 18:50:19 +0200 Subject: [PATCH] fix: Switch E2EE from shared key to per-participant key mode Element Call uses per-participant keys via MatrixKeyProvider.onSetEncryptionKey(), not shared key mode. This was causing silence with E2EE enabled. - Set bot's own key and caller's key separately via e2ee_manager.key_provider.set_key() - Live-update caller key when received after connect - Fallback to set_shared_key if per-participant API unavailable Co-Authored-By: Claude Opus 4.6 --- bot.py | 16 +++++---- voice.py | 103 ++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 89 insertions(+), 30 deletions(-) diff --git a/bot.py b/bot.py index 72d66eb..5d6beda 100644 --- a/bot.py +++ b/bot.py @@ -448,12 +448,13 @@ class Bot: model = self.room_models.get(room_id, DEFAULT_MODEL) caller_device_id = content.get("device_id", "") - # Publish a placeholder key first to trigger Element Call - # to share its key with us. We'll republish the real shared - # key once we receive the caller's key. + # Generate bot's E2EE key and publish it so Element Call + # can decrypt our audio. This also triggers Element Call + # to share its key with us. import secrets - placeholder_key = secrets.token_bytes(16) - await self._publish_encryption_key(room_id, placeholder_key) + bot_key = secrets.token_bytes(16) + # Publish bot's key early so Element Call can decrypt our audio + await self._publish_encryption_key(room_id, bot_key) vs = VoiceSession( nio_client=self.client, @@ -461,8 +462,9 @@ class Bot: device_id=BOT_DEVICE_ID, lk_url=LK_URL, model=model, - publish_key_cb=lambda key: asyncio.ensure_future( - self._publish_encryption_key(room_id, key)), + publish_key_cb=lambda key, rid=room_id: asyncio.ensure_future( + self._publish_encryption_key(rid, key)), + bot_key=bot_key, ) # Check timeline for caller's key diff --git a/voice.py b/voice.py index e571889..d767a90 100644 --- a/voice.py +++ b/voice.py @@ -87,7 +87,7 @@ def _build_e2ee_options(shared_key: bytes) -> rtc.E2EEOptions: class VoiceSession: def __init__(self, nio_client, room_id, device_id, lk_url, model="claude-sonnet", - publish_key_cb=None): + publish_key_cb=None, bot_key: bytes | None = None): self.nio_client = nio_client self.room_id = room_id self.device_id = device_id @@ -97,20 +97,39 @@ class VoiceSession: self.session = None self._task = None self._http_session = None - self._e2ee_key: bytes | None = None + self._caller_key: bytes | None = None + self._caller_identity: str | None = None # "sender:device_id" format + self._bot_key: bytes = bot_key or os.urandom(16) self._publish_key_cb = publish_key_cb def on_encryption_key(self, sender, device_id, key, index): - """Receive E2EE key from Element Call participant.""" - if key and not self._e2ee_key: - self._e2ee_key = key - logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", - sender, device_id, index, len(key)) + """Receive E2EE key from Element Call participant. + + If the room is already connected, immediately set the key on the + key provider so we can decrypt the caller's audio. + """ + if not key: + return + identity = _make_lk_identity(sender, device_id) + self._caller_key = key + self._caller_identity = identity + logger.info("E2EE key received from %s:%s (identity=%s, index=%d, %d bytes)", + sender, device_id, identity, index, len(key)) + + # If already connected, set key on the key provider immediately + if self.lk_room: + try: + kp = self.lk_room.e2ee_manager.key_provider + kp.set_key(identity, key, key_index=index) + logger.info("Live-updated caller E2EE key for %s", identity) + except Exception: + logger.warning("Could not live-update caller E2EE key", exc_info=True) async def _fetch_encryption_key_http(self) -> bytes | None: """Fetch encryption key from room timeline (NOT state) via Matrix HTTP API. Element Call distributes encryption keys as timeline events, not state. + Also sets self._caller_identity from the event sender + device_id. """ import httpx homeserver = str(self.nio_client.homeserver) @@ -134,14 +153,16 @@ class VoiceSession: if sender == user_id: continue # skip our own key content = evt.get("content", {}) - logger.info("Found encryption_keys timeline event: sender=%s content=%s", - sender, content) + caller_device = content.get("device_id", "") + logger.info("Found encryption_keys timeline event: sender=%s device=%s", + sender, caller_device) + if caller_device: + self._caller_identity = _make_lk_identity(sender, caller_device) for k in content.get("keys", []): key_b64 = k.get("key", "") if key_b64: key_b64 += "=" * (-len(key_b64) % 4) - import base64 as b64 - return b64.urlsafe_b64decode(key_b64) + return base64.urlsafe_b64decode(key_b64) logger.info("No encryption_keys events in last %d timeline events", len(events)) except Exception as e: logger.warning("HTTP encryption key fetch failed: %s", e) @@ -177,26 +198,25 @@ class VoiceSession: # Check timeline for caller's encryption key caller_key = await self._fetch_encryption_key_http() if caller_key: - self._e2ee_key = caller_key + self._caller_key = caller_key logger.info("Got caller E2EE key via timeline (%d bytes)", len(caller_key)) - if not self._e2ee_key: + if not self._caller_key: # Wait up to 15s for key via sync handler (bot.py forwards # encryption_keys timeline events to on_encryption_key) logger.info("No key in timeline yet, waiting for sync...") for _ in range(150): - if self._e2ee_key: + if self._caller_key: break await asyncio.sleep(0.1) - # E2EE disabled — Element Call key derivation mismatch not yet resolved. - # Audio pipeline confirmed working without E2EE. - if self._e2ee_key: - logger.info("Caller E2EE key available (%d bytes) — E2EE disabled pending fix", - len(self._e2ee_key)) - if self._publish_key_cb: - self._publish_key_cb(self._e2ee_key) - e2ee_opts = None + # Publish bot's own key so caller can decrypt our audio + if self._publish_key_cb: + self._publish_key_cb(self._bot_key) + + # Build E2EE options with empty shared key — we set per-participant + # keys after connect via e2ee_manager.key_provider.set_key() + e2ee_opts = _build_e2ee_options(b"") room_opts = rtc.RoomOptions(e2ee=e2ee_opts) self.lk_room = rtc.Room() @@ -214,9 +234,46 @@ class VoiceSession: logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind) await self.lk_room.connect(self.lk_url, jwt, options=room_opts) - logger.info("Connected (E2EE=HKDF), remote=%d", + logger.info("Connected (E2EE=per-participant), remote=%d", len(self.lk_room.remote_participants)) + # Set per-participant E2EE keys via key provider + bot_identity = _make_lk_identity(user_id, self.device_id) + try: + kp = self.lk_room.e2ee_manager.key_provider + + # Set bot's own key (encrypts outgoing audio) + kp.set_key(bot_identity, self._bot_key, key_index=0) + logger.info("Set bot E2EE key for identity=%s (%d bytes)", + bot_identity, len(self._bot_key)) + + # Set caller's key (decrypts incoming audio) + if self._caller_key and self._caller_identity: + kp.set_key(self._caller_identity, self._caller_key, key_index=0) + logger.info("Set caller E2EE key for identity=%s (%d bytes)", + self._caller_identity, len(self._caller_key)) + elif self._caller_key: + # Caller identity not yet known — try to get from remote participants + for p in self.lk_room.remote_participants.values(): + kp.set_key(p.identity, self._caller_key, key_index=0) + logger.info("Set caller E2EE key for identity=%s (%d bytes)", + p.identity, len(self._caller_key)) + break + else: + logger.warning("No caller E2EE key available — caller audio will be silent") + except AttributeError: + logger.warning("e2ee_manager.key_provider not available — " + "falling back to shared key mode") + # Fallback: set shared key after connect if per-participant isn't supported + if self._caller_key: + try: + kp = self.lk_room.e2ee_manager.key_provider + kp.set_shared_key(self._caller_key, key_index=0) + logger.info("Fallback: set shared E2EE key (%d bytes)", + len(self._caller_key)) + except Exception: + logger.exception("Fallback shared key also failed") + # Find the remote participant, wait up to 10s if not yet connected remote_identity = None for p in self.lk_room.remote_participants.values():