From b65d04389b18a9b93cc58ed5400125178d19d9ef Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Sun, 22 Feb 2026 06:41:20 +0200 Subject: [PATCH] fix: Switch E2EE to per-participant keys instead of shared key Element Call uses per-participant keys, not shared key mode. Bot now generates its own key, publishes it, and sets both keys via key_provider.set_key() after connecting. Co-Authored-By: Claude Opus 4.6 --- bot.py | 12 ++++----- voice.py | 76 ++++++++++++++++++++++++++++++++++++++++---------------- 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/bot.py b/bot.py index 72d66eb..f49a1e8 100644 --- a/bot.py +++ b/bot.py @@ -448,12 +448,9 @@ class Bot: model = self.room_models.get(room_id, DEFAULT_MODEL) caller_device_id = content.get("device_id", "") - # Publish a placeholder key first to trigger Element Call - # to share its key with us. We'll republish the real shared - # key once we receive the caller's key. + # Generate bot's own E2EE key (16 bytes like Element Call) import secrets - placeholder_key = secrets.token_bytes(16) - await self._publish_encryption_key(room_id, placeholder_key) + bot_key = secrets.token_bytes(16) vs = VoiceSession( nio_client=self.client, @@ -461,8 +458,9 @@ class Bot: device_id=BOT_DEVICE_ID, lk_url=LK_URL, model=model, - publish_key_cb=lambda key: asyncio.ensure_future( - self._publish_encryption_key(room_id, key)), + bot_key=bot_key, + publish_key_cb=lambda key, rid=room_id: asyncio.ensure_future( + self._publish_encryption_key(rid, key)), ) # Check timeline for caller's key diff --git a/voice.py b/voice.py index fdfaf93..73d5c83 100644 --- a/voice.py +++ b/voice.py @@ -87,7 +87,7 @@ def _build_e2ee_options(shared_key: bytes) -> rtc.E2EEOptions: class VoiceSession: def __init__(self, nio_client, room_id, device_id, lk_url, model="claude-sonnet", - publish_key_cb=None): + publish_key_cb=None, bot_key: bytes | None = None): self.nio_client = nio_client self.room_id = room_id self.device_id = device_id @@ -97,15 +97,26 @@ class VoiceSession: self.session = None self._task = None self._http_session = None - self._e2ee_key: bytes | None = None + self._caller_key: bytes | None = None + self._caller_identity: str | None = None + self._bot_key: bytes = bot_key or os.urandom(16) self._publish_key_cb = publish_key_cb def on_encryption_key(self, sender, device_id, key, index): """Receive E2EE key from Element Call participant.""" - if key and not self._e2ee_key: - self._e2ee_key = key + if key and not self._caller_key: + self._caller_key = key + self._caller_identity = f"{sender}:{device_id}" logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", sender, device_id, index, len(key)) + # Live-update key provider if already connected + if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'): + try: + kp = self.lk_room.e2ee_manager.key_provider + kp.set_key(self._caller_identity, key, index) + logger.info("Live-set caller key for %s", self._caller_identity) + except Exception as e: + logger.warning("Failed to live-set caller key: %s", e) async def _fetch_encryption_key_http(self) -> bytes | None: """Fetch encryption key from room timeline (NOT state) via Matrix HTTP API. @@ -134,14 +145,18 @@ class VoiceSession: if sender == user_id: continue # skip our own key content = evt.get("content", {}) - logger.info("Found encryption_keys timeline event: sender=%s content=%s", - sender, content) + device = content.get("device_id", "") + logger.info("Found encryption_keys timeline event: sender=%s device=%s", + sender, device) for k in content.get("keys", []): key_b64 = k.get("key", "") if key_b64: key_b64 += "=" * (-len(key_b64) % 4) import base64 as b64 - return b64.urlsafe_b64decode(key_b64) + key_bytes = b64.urlsafe_b64decode(key_b64) + if device: + self._caller_identity = f"{sender}:{device}" + return key_bytes logger.info("No encryption_keys events in last %d timeline events", len(events)) except Exception as e: logger.warning("HTTP encryption key fetch failed: %s", e) @@ -172,33 +187,31 @@ class VoiceSession: async def _run(self): try: user_id = self.nio_client.user_id + bot_identity = _make_lk_identity(user_id, self.device_id) jwt = _generate_lk_jwt(self.room_id, user_id, self.device_id) + # Publish bot's own key immediately so Element Call can decrypt us + if self._publish_key_cb: + self._publish_key_cb(self._bot_key) + logger.info("Published bot E2EE key (%d bytes)", len(self._bot_key)) + # Check timeline for caller's encryption key caller_key = await self._fetch_encryption_key_http() if caller_key: - self._e2ee_key = caller_key + self._caller_key = caller_key logger.info("Got caller E2EE key via timeline (%d bytes)", len(caller_key)) - if not self._e2ee_key: - # Wait up to 15s for key via sync handler (bot.py forwards - # encryption_keys timeline events to on_encryption_key) + if not self._caller_key: + # Wait up to 15s for key via sync handler logger.info("No key in timeline yet, waiting for sync...") for _ in range(150): - if self._e2ee_key: + if self._caller_key: break await asyncio.sleep(0.1) - # Use caller's key as shared key for E2EE - if self._e2ee_key: - logger.info("Enabling E2EE with caller's key (%d bytes)", len(self._e2ee_key)) - if self._publish_key_cb: - self._publish_key_cb(self._e2ee_key) - e2ee_opts = _build_e2ee_options(self._e2ee_key) - else: - logger.warning("No E2EE key available, connecting without encryption") - e2ee_opts = None - + # Connect with E2EE enabled using bot's key as initial shared_key + # (required to initialize encryption framework, we override per-participant after) + e2ee_opts = _build_e2ee_options(self._bot_key) room_opts = rtc.RoomOptions(e2ee=e2ee_opts) self.lk_room = rtc.Room() @@ -218,6 +231,13 @@ class VoiceSession: logger.info("Connected (E2EE=HKDF), remote=%d", len(self.lk_room.remote_participants)) + # Set per-participant keys via key provider + kp = self.lk_room.e2ee_manager.key_provider + + # Bot's own key — encrypts outgoing audio + kp.set_key(bot_identity, self._bot_key, 0) + logger.info("Set bot key for %s (%d bytes)", bot_identity, len(self._bot_key)) + # Find the remote participant, wait up to 10s if not yet connected remote_identity = None for p in self.lk_room.remote_participants.values(): @@ -232,6 +252,18 @@ class VoiceSession: break if remote_identity: break + + # Set caller's key — decrypts incoming audio + if self._caller_key: + caller_id = remote_identity or self._caller_identity + if caller_id: + kp.set_key(caller_id, self._caller_key, 0) + logger.info("Set caller key for %s (%d bytes)", caller_id, len(self._caller_key)) + else: + logger.warning("Have caller key but no caller identity") + else: + logger.warning("No caller E2EE key available") + if remote_identity: logger.info("Linking to remote participant: %s", remote_identity)