From c379064f80978ed47308f595fbab67bf21741caf Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Sun, 22 Feb 2026 14:05:54 +0200 Subject: [PATCH] =?UTF-8?q?fix(voice):=20set=20caller=20key=20in=20on=5Ftr?= =?UTF-8?q?ack=5Fsubscribed=20=E2=80=94=20frame=20cryptor=20must=20exist?= =?UTF-8?q?=20for=20HKDF=20to=20apply?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: C++ set_key() only applies HKDF when impl_->GetKey(pid) returns a valid handler, which requires the frame cryptor for that participant to be initialized. Frame cryptors are created at track subscription time, not at connect time. Calling set_key(caller_identity, key) immediately after connect() skips HKDF derivation (impl_->GetKey returns null) → raw key stored → DEC_FAILED. Fix: move caller key setting to on_track_subscribed where frame cryptor definitely exists. Also update on_encryption_key to use set_key() for key rotation updates. --- voice.py | 120 +++++++++++++++++++++++++++++-------------------------- 1 file changed, 64 insertions(+), 56 deletions(-) diff --git a/voice.py b/voice.py index 6e2ca01..c68eed8 100644 --- a/voice.py +++ b/voice.py @@ -144,18 +144,20 @@ class VoiceSession: self._caller_all_keys[index] = key logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", sender, device_id, index, len(key)) - # Live-update shared_key with pre-derived AES key on rotation. - # set_shared_key() bypasses Rust FFI KDF — we pre-derive with Python HKDF. - # Per-participant set_key() is NOT used for caller (Rust HKDF may differ from JS). - if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'): - try: - kp = self.lk_room.e2ee_manager.key_provider - derived = _hkdf_derive(key) - kp.set_shared_key(derived, index) - logger.info("Live-updated shared_key (pre-derived)[%d] for caller (%d bytes)", - index, len(derived)) - except Exception as e: - logger.warning("Failed to live-update caller shared_key: %s", e) + # Live-update key on rotation — use set_key() which applies HKDF via Rust FFI. + # At this point the track is usually already subscribed so frame cryptor exists. + if self.lk_room and self._caller_identity: + caller_lk_id = self._caller_identity # e.g. "@user:server.eu:DEVICEID" + for p in self.lk_room.remote_participants.values(): + if p.identity == caller_lk_id: + try: + kp = self.lk_room.e2ee_manager.key_provider + ok = kp.set_key(p.identity, index, key) + logger.info("Live-updated set_key[%d] for %s (ok=%s, %d bytes)", + index, p.identity, ok, len(key)) + except Exception as e: + logger.warning("Failed to live-update caller key: %s", e) + break async def _fetch_encryption_key_http(self) -> bytes | None: """Fetch encryption key from room timeline (NOT state) via Matrix HTTP API. @@ -256,28 +258,29 @@ class VoiceSession: break await asyncio.sleep(0.1) - # DIAGNOSTIC: disable E2EE entirely to check if EC sends encrypted or plaintext. - # If VAD triggers → EC sends plaintext (E2EE disabled on EC side). - # If VAD silent → EC sends encrypted (frame format or key issue). - e2ee_opts = None # TODO: re-enable after diagnosis + # E2EE: re-enabled after diagnostic confirmed EC encrypts audio. + # Root cause found: set_key() only applies HKDF if the frame cryptor for that + # participant already exists. Must call set_key() in on_track_subscribed, not at connect time. + key_opts = rtc.KeyProviderOptions( + shared_key=b"", # per-participant mode + ratchet_window_size=16, + ratchet_salt=b"LKFrameEncryptionKey", + failure_tolerance=-1, + key_ring_size=16, + key_derivation_function=KDF_HKDF, # Rust applies HKDF matching EC JS SDK + ) + e2ee_opts = rtc.E2EEOptions( + encryption_type=rtc.EncryptionType.GCM, + key_provider_options=key_opts, + ) room_opts = rtc.RoomOptions(e2ee=e2ee_opts) self.lk_room = rtc.Room() @self.lk_room.on("participant_connected") def on_p(p): logger.info("Participant connected: %s", p.identity) - # Apply any already-received caller keys to the new participant's LK identity. - # This handles the case where key arrives before the participant joins LiveKit. - if self._caller_all_keys: - try: - kp_local = self.lk_room.e2ee_manager.key_provider - # Use shared_key with pre-derived AES key (not set_key which applies Rust HKDF) - for idx, base_k in sorted(self._caller_all_keys.items()): - kp_local.set_shared_key(_hkdf_derive(base_k), idx) - logger.info("on_p: set shared_key (pre-derived) for %d caller key(s)", - len(self._caller_all_keys)) - except Exception as exc: - logger.warning("on_p: failed to set caller shared_key: %s", exc) + # Note: do NOT set keys here — frame cryptor not initialized yet at participant_connected. + # Keys are set in on_track_subscribed where the frame cryptor definitely exists. @self.lk_room.on("track_published") def on_tp(pub, p): @@ -286,6 +289,23 @@ class VoiceSession: @self.lk_room.on("track_subscribed") def on_ts(t, pub, p): logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind) + # *** KEY FIX: set_key() with KDF_HKDF only applies HKDF when the frame cryptor + # for this participant already exists. The frame cryptor is created at track + # subscription time. Calling set_key() BEFORE track subscription (at connect) + # skips HKDF derivation → raw key stored → DEC_FAILED. + # Solution: set caller key HERE, after frame cryptor is initialized. + if int(t.kind) == 1 and e2ee_opts is not None: # audio track only + try: + kp_local = self.lk_room.e2ee_manager.key_provider + if self._caller_all_keys: + for idx, base_k in sorted(self._caller_all_keys.items()): + ok = kp_local.set_key(p.identity, idx, base_k) + logger.info("on_ts: set_key[%d] for %s (ok=%s, %d bytes)", + idx, p.identity, ok, len(base_k)) + else: + logger.warning("on_ts: no caller keys available yet — will set on key receipt") + except Exception as exc: + logger.warning("on_ts: set_key failed: %s", exc) _e2ee_state_names = {0:"NEW",1:"OK",2:"ENC_FAILED",3:"DEC_FAILED",4:"MISSING_KEY",5:"RATCHETED",6:"INTERNAL_ERR"} @self.lk_room.on("e2ee_state_changed") @@ -298,27 +318,12 @@ class VoiceSession: logger.info("Connected (E2EE=HKDF), remote=%d", len(self.lk_room.remote_participants)) - # *** FIX: Set keys immediately after connect — BEFORE the rotation wait. - # The caller's track is subscribed during the wait; if no key is set when - # the frame cryptor is first created it enters DEC_FAILED and drops all frames - # even after the key is set later. - kp = None - if e2ee_opts is not None: - kp = self.lk_room.e2ee_manager.key_provider - kp.set_key(bot_identity, self._bot_key, 0) - logger.info("Set bot raw key for %s (%d bytes)", bot_identity, len(self._bot_key)) - # Caller keys: use set_shared_key with pre-derived AES (bypasses Rust HKDF). - # Per-participant set_key is NOT called for caller — Rust HKDF may not match EC's JS HKDF. - if self._caller_all_keys: - for idx, base_k in sorted(self._caller_all_keys.items()): - kp.set_shared_key(_hkdf_derive(base_k), idx) - logger.info("Early-set shared_key (pre-derived) for caller indices %s", - list(self._caller_all_keys.keys())) - elif self._caller_key: - kp.set_shared_key(_hkdf_derive(self._caller_key), 0) - logger.info("Early-set shared_key (pre-derived) caller key[0] (%d bytes)", 16) - else: - logger.info("E2EE disabled (diagnostic mode) — skipping key setup") + # Set bot's own key immediately after connect — local frame cryptor exists at connect time. + # CALLER keys are set in on_track_subscribed (NOT here) because the caller's frame cryptor + # is only created when their track arrives. Calling set_key() before that skips HKDF. + kp = self.lk_room.e2ee_manager.key_provider + ok = kp.set_key(bot_identity, 0, self._bot_key) + logger.info("Set bot key for %s (ok=%s, %d bytes)", bot_identity, ok, len(self._bot_key)) # Element Call rotates its encryption key when bot joins the LiveKit room. # EC sends the new key via Matrix (Megolm-encrypted); nio sync will decrypt it @@ -357,16 +362,19 @@ class VoiceSession: # Set shared_key with pre-derived AES key for caller decryption. # NOT using set_key() for caller — Rust HKDF may produce different result than EC's JS HKDF. # set_shared_key() stores key raw (no KDF applied) — we pre-derive in Python. - if kp is not None and self._caller_all_keys: + # After key rotation wait: if track already subscribed, set rotated key. + # (Usually on_track_subscribed handles this, but if track arrived before rotation, + # the rotated key needs to be set here for the already-subscribed participant.) + if self._caller_all_keys and remote_identity: try: for idx, base_k in sorted(self._caller_all_keys.items()): - derived = _hkdf_derive(base_k) - kp.set_shared_key(derived, idx) - logger.info("Set shared_key (pre-derived)[%d] (%d bytes)", idx, len(derived)) + ok = kp.set_key(remote_identity, idx, base_k) + logger.info("Post-rotation set_key[%d] for %s (ok=%s)", + idx, remote_identity, ok) except Exception as e: - logger.warning("Failed to set caller shared_key: %s", e) - elif e2ee_opts is not None and not self._caller_all_keys: - logger.warning("No caller E2EE keys — incoming audio will be silence") + logger.warning("Post-rotation set_key failed: %s", e) + elif not self._caller_all_keys: + logger.warning("No caller E2EE keys received — incoming audio will be silence") if remote_identity: logger.info("Linking to remote participant: %s", remote_identity)