From 630a0de97070474fe1e6f14f0af179224577fe6f Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Sun, 22 Feb 2026 08:10:27 +0200 Subject: [PATCH] fix(e2ee): revert to per-participant mode with proper rotation handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shared-key mode uses HKDF with empty info, but Element Call JS uses participant identity as HKDF info. Per-participant mode (set_key with identity) matches EC's derivation. Previous per-participant attempt (b65d043) failed because key rotation (index 0→1 when bot joins) wasn't handled. Now on_encryption_key calls set_key(caller_id, key, index) on rotation, so the bot stays in sync. Changes: - _build_e2ee_options(): remove caller_key param, shared_key=b"" (per-participant mode) - _run(): set_key(remote_identity, caller_key, 0) for incoming decryption - on_encryption_key: only set_key() on rotation (no set_shared_key) --- voice.py | 58 +++++++++++++++++++++++--------------------------------- 1 file changed, 24 insertions(+), 34 deletions(-) diff --git a/voice.py b/voice.py index aa123f6..be591f8 100644 --- a/voice.py +++ b/voice.py @@ -69,16 +69,16 @@ def _generate_lk_jwt(room_id, user_id, device_id): KDF_HKDF = 1 -def _build_e2ee_options(caller_key: bytes = b"") -> rtc.E2EEOptions: +def _build_e2ee_options() -> rtc.E2EEOptions: """Build HKDF E2EE options matching Element Call's key derivation. - Pass caller_key as shared_key to initialize in true shared-key mode. - This ensures the Rust FFI decrypts incoming frames using caller's key. - Outgoing encryption is overridden via set_key(bot_identity, bot_key) after connect. + Use per-participant key mode (no shared_key) so the Rust FFI uses the + participant identity as HKDF info — matching Element Call's JS SFrame. + Keys are set post-connect via set_key(participant_identity, key, index). Element Call uses: ratchetWindowSize=16, keyringSize=256, salt="LKFrameEncryptionKey" """ key_opts = rtc.KeyProviderOptions( - shared_key=caller_key, + shared_key=b"", # empty = per-participant mode; keys set via set_key() after connect ratchet_window_size=16, ratchet_salt=b"LKFrameEncryptionKey", failure_tolerance=-1, @@ -116,16 +116,15 @@ class VoiceSession: self._caller_all_keys[index] = key logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", sender, device_id, index, len(key)) - # Live-update both shared key and per-participant key on rotation. + # Live-update per-participant key on rotation (Element Call rotates on bot join). + # Use only set_key() (per-participant mode) — matching EC's HKDF info=identity. if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'): try: kp = self.lk_room.e2ee_manager.key_provider - kp.set_shared_key(key, index) - logger.info("Live-updated shared key[%d] (%d bytes)", index, len(key)) - # Also update per-participant key (belt+suspenders for rotations) caller_id = self._caller_identity or f"{sender}:{device_id}" kp.set_key(caller_id, key, index) - logger.info("Live-updated per-participant key[%d] for %s", index, caller_id) + logger.info("Live-updated per-participant key[%d] for %s (%d bytes)", + index, caller_id, len(key)) except Exception as e: logger.warning("Failed to live-update caller key: %s", e) @@ -228,10 +227,10 @@ class VoiceSession: break await asyncio.sleep(0.1) - # Connect with caller_key as shared_key so Rust FFI decrypts - # incoming audio in true shared-key mode. Outgoing encryption - # is overridden to bot_key via set_key(bot_identity) after connect. - e2ee_opts = _build_e2ee_options(self._caller_key or b"") + # Connect in per-participant mode (empty shared_key) so Rust FFI uses + # identity-based HKDF — matching Element Call's JS SFrame key derivation. + # Keys are set post-connect via set_key(identity, key, index). + e2ee_opts = _build_e2ee_options() room_opts = rtc.RoomOptions(e2ee=e2ee_opts) self.lk_room = rtc.Room() @@ -280,30 +279,21 @@ class VoiceSession: if remote_identity: break - # Caller key was passed as shared_key at connect time — verify it's stored. - if self._caller_key: - logger.info("Caller key active as shared_key (%d bytes, index 0)", len(self._caller_key)) - try: - stored = kp.export_shared_key(0) - if stored == self._caller_key: - logger.info("VERIFIED: shared key[0] matches caller key (%d bytes)", len(stored)) - else: - logger.warning("MISMATCH: stored shared key[0] (%d bytes) != caller key (%d bytes)", - len(stored), len(self._caller_key)) - logger.warning("stored=%s", stored.hex()) - logger.warning("caller=%s", self._caller_key.hex()) - except Exception as e: - logger.warning("Could not export shared key: %s", e) - else: - logger.warning("No caller E2EE key — incoming audio will be silence") - - # Also set caller key as per-participant key (belt+suspenders: both modes) + # Set caller's per-participant key (HKDF info=caller_identity = matching EC JS). if self._caller_key and remote_identity: try: kp.set_key(remote_identity, self._caller_key, 0) - logger.info("Also set per-participant key for %s (%d bytes)", remote_identity, len(self._caller_key)) + logger.info("Set caller key for %s (index=0, %d bytes)", remote_identity, len(self._caller_key)) + # Also set via caller_identity (belt+suspenders if identities differ) + if self._caller_identity and self._caller_identity != remote_identity: + kp.set_key(self._caller_identity, self._caller_key, 0) + logger.info("Also set caller key via identity %s", self._caller_identity) except Exception as e: - logger.warning("Failed to set per-participant caller key: %s", e) + logger.warning("Failed to set caller per-participant key: %s", e) + elif not self._caller_key: + logger.warning("No caller E2EE key — incoming audio will be silence") + elif not remote_identity: + logger.warning("No remote participant found — caller key not set") if remote_identity: logger.info("Linking to remote participant: %s", remote_identity)