diff --git a/voice.py b/voice.py index deb89cb..99b5a34 100644 --- a/voice.py +++ b/voice.py @@ -82,6 +82,20 @@ KDF_HKDF = 1 # Rust FFI applies HKDF internally (proto enum value 1) # NOTE: value 0 in the proto is PBKDF2, NOT raw/none mode — see e2ee_patch.py +def _hkdf_derive(ikm: bytes) -> bytes: + """Pre-derive AES key via HKDF-SHA256 matching livekit-client-sdk-js deriveEncryptionKey(). + + JS params: hash=SHA-256, salt=encode("LKFrameEncryptionKey"), info=ArrayBuffer(128), length=128bit + We set this pre-derived key via set_shared_key() which bypasses Rust FFI KDF entirely. + """ + import hmac + salt = b"LKFrameEncryptionKey" + info = b"\x00" * 128 + prk = hmac.new(salt, ikm, hashlib.sha256).digest() + t1 = hmac.new(prk, info + b"\x01", hashlib.sha256).digest() + return t1[:16] + + def _build_e2ee_options() -> rtc.E2EEOptions: """Build E2EE options — let Rust FFI apply HKDF internally (KDF_HKDF=1). @@ -130,29 +144,18 @@ class VoiceSession: self._caller_all_keys[index] = key logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", sender, device_id, index, len(key)) - # Live-update per-participant key on rotation — pass raw base key, Rust FFI applies HKDF. + # Live-update shared_key with pre-derived AES key on rotation. + # set_shared_key() bypasses Rust FFI KDF — we pre-derive with Python HKDF. + # Per-participant set_key() is NOT used for caller (Rust HKDF may differ from JS). if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'): try: kp = self.lk_room.e2ee_manager.key_provider - caller_id = self._caller_identity or f"{sender}:{device_id}" - kp.set_key(caller_id, key, index) - logger.info("Live-updated caller raw key[%d] for %s (%d bytes)", - index, caller_id, len(key)) - # Also set for all current remote participants by LK identity — - # handles mismatch between Matrix device_id and LK session identity. - for p in self.lk_room.remote_participants.values(): - if p.identity != caller_id: - kp.set_key(p.identity, key, index) - logger.info("Live-updated caller raw key[%d] for LK identity %s", - index, p.identity) - # Also update shared_key fallback — FFI may use this for incoming decryption. - try: - kp.set_shared_key(key, index) - logger.info("Live-updated shared_key fallback[%d]", index) - except Exception: - pass + derived = _hkdf_derive(key) + kp.set_shared_key(derived, index) + logger.info("Live-updated shared_key (pre-derived)[%d] for caller (%d bytes)", + index, len(derived)) except Exception as e: - logger.warning("Failed to live-update caller key: %s", e) + logger.warning("Failed to live-update caller shared_key: %s", e) async def _fetch_encryption_key_http(self) -> bytes | None: """Fetch encryption key from room timeline (NOT state) via Matrix HTTP API. @@ -268,12 +271,13 @@ class VoiceSession: if self._caller_all_keys: try: kp_local = self.lk_room.e2ee_manager.key_provider + # Use shared_key with pre-derived AES key (not set_key which applies Rust HKDF) for idx, base_k in sorted(self._caller_all_keys.items()): - kp_local.set_key(p.identity, base_k, idx) - logger.info("on_p: applied %d caller key(s) to %s", - len(self._caller_all_keys), p.identity) + kp_local.set_shared_key(_hkdf_derive(base_k), idx) + logger.info("on_p: set shared_key (pre-derived) for %d caller key(s)", + len(self._caller_all_keys)) except Exception as exc: - logger.warning("on_p: failed to set caller key for %s: %s", p.identity, exc) + logger.warning("on_p: failed to set caller shared_key: %s", exc) @self.lk_room.on("track_published") def on_tp(pub, p): @@ -301,15 +305,16 @@ class VoiceSession: kp = self.lk_room.e2ee_manager.key_provider kp.set_key(bot_identity, self._bot_key, 0) logger.info("Set bot raw key for %s (%d bytes)", bot_identity, len(self._bot_key)) - if self._caller_identity and self._caller_all_keys: + # Caller keys: use set_shared_key with pre-derived AES (bypasses Rust HKDF). + # Per-participant set_key is NOT called for caller — Rust HKDF may not match EC's JS HKDF. + if self._caller_all_keys: for idx, base_k in sorted(self._caller_all_keys.items()): - kp.set_key(self._caller_identity, base_k, idx) - logger.info("Early-set caller raw keys %s for %s", - list(self._caller_all_keys.keys()), self._caller_identity) - elif self._caller_key and self._caller_identity: - kp.set_key(self._caller_identity, self._caller_key, 0) - logger.info("Early-set caller raw key[0] for %s (%d bytes)", - self._caller_identity, len(self._caller_key)) + kp.set_shared_key(_hkdf_derive(base_k), idx) + logger.info("Early-set shared_key (pre-derived) for caller indices %s", + list(self._caller_all_keys.keys())) + elif self._caller_key: + kp.set_shared_key(_hkdf_derive(self._caller_key), 0) + logger.info("Early-set shared_key (pre-derived) caller key[0] (%d bytes)", 16) # Element Call rotates its encryption key when bot joins the LiveKit room. # EC sends the new key via Matrix (Megolm-encrypted); nio sync will decrypt it @@ -345,36 +350,19 @@ class VoiceSession: if remote_identity: break - # Set ALL known caller keys — raw base keys, Rust FFI applies HKDF internally. - if self._caller_all_keys and remote_identity: + # Set shared_key with pre-derived AES key for caller decryption. + # NOT using set_key() for caller — Rust HKDF may produce different result than EC's JS HKDF. + # set_shared_key() stores key raw (no KDF applied) — we pre-derive in Python. + if self._caller_all_keys: try: for idx, base_k in sorted(self._caller_all_keys.items()): - kp.set_key(remote_identity, base_k, idx) - logger.info("Set caller raw key[%d] for %s (%d bytes)", - idx, remote_identity, len(base_k)) - # Belt+suspenders: also set via matrix identity if different from LK identity - if self._caller_identity and self._caller_identity != remote_identity: - for idx, base_k in sorted(self._caller_all_keys.items()): - kp.set_key(self._caller_identity, base_k, idx) - logger.info("Also set caller keys via matrix identity %s", self._caller_identity) + derived = _hkdf_derive(base_k) + kp.set_shared_key(derived, idx) + logger.info("Set shared_key (pre-derived)[%d] (%d bytes)", idx, len(derived)) except Exception as e: - logger.warning("Failed to set caller per-participant keys: %s", e) + logger.warning("Failed to set caller shared_key: %s", e) elif not self._caller_all_keys: logger.warning("No caller E2EE keys — incoming audio will be silence") - elif not remote_identity: - logger.warning("No remote participant found — caller keys not set") - - # Fallback: also set shared_key to the most recent caller key. - # In the patched Rust FFI, per-participant decryption may fall back to shared_key - # for incoming audio. This was confirmed working in e3ede3f (Feb 21 19:40 UTC). - if self._caller_key: - try: - max_idx = max(self._caller_all_keys.keys()) if self._caller_all_keys else 0 - kp.set_shared_key(self._caller_key, max_idx) - logger.info("Set shared_key fallback to caller key[%d] (%d bytes)", - max_idx, len(self._caller_key)) - except Exception as e: - logger.warning("Failed to set shared_key fallback: %s", e) if remote_identity: logger.info("Linking to remote participant: %s", remote_identity)