From 6b457a2aef42f6fcb0635d484f40d0f0ea400bb7 Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Sun, 22 Feb 2026 09:09:34 +0200 Subject: [PATCH] fix(voice): use correct HKDF info=128zeros, length=16 matching LiveKit JS SDK LiveKit JS SDK deriveKeys(): info=new ArrayBuffer(128) (128 zero bytes, NOT identity), output=16 bytes AES-128. Previous code used identity as info and 32-byte output - both wrong, caused silence in both directions. Co-Authored-By: Claude Sonnet 4.6 --- voice.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/voice.py b/voice.py index 247d422..89d51ff 100644 --- a/voice.py +++ b/voice.py @@ -72,8 +72,14 @@ KDF_NONE = 0 _RATCHET_SALT = b"LKFrameEncryptionKey" -def _hkdf(ikm: bytes, salt: bytes, info: bytes, length: int = 32) -> bytes: - """HKDF-SHA256 (RFC 5869). Pre-derives frame key to bypass Rust FFI's HKDF.""" +def _hkdf(ikm: bytes, salt: bytes, info: bytes, length: int = 16) -> bytes: + """HKDF-SHA256 (RFC 5869). Pre-derives frame key to bypass Rust FFI's HKDF. + + LiveKit JS SDK deriveKeys() uses: + salt = TextEncoder("LKFrameEncryptionKey") + info = new ArrayBuffer(128) # 128 zero bytes — NOT participant identity + length = 128 bits = 16 bytes (AES-128-GCM) + """ import hmac as _hmac, hashlib as _hashlib prk = _hmac.new(salt, ikm, _hashlib.sha256).digest() okm, t = b"", b"" @@ -83,13 +89,17 @@ def _hkdf(ikm: bytes, salt: bytes, info: bytes, length: int = 32) -> bytes: return okm[:length] +# 128 zero bytes — matches LiveKit JS SDK's `info: new ArrayBuffer(128)` in getAlgoOptions() +_HKDF_INFO = b"\x00" * 128 + + def _build_e2ee_options() -> rtc.E2EEOptions: """Build E2EE options — KDF disabled; we pre-derive HKDF keys in Python. - The Rust FFI's KDF_HKDF path for INCOMING decryption may use wrong parameters. - We pre-derive HKDF(base_key, salt="LKFrameEncryptionKey", info=identity) in Python - and pass the derived key with KDF_NONE so the Rust FFI uses it directly. - Element Call uses: ratchetWindowSize=10, keyringSize=256, salt="LKFrameEncryptionKey" + We pre-derive HKDF(base_key, salt="LKFrameEncryptionKey", info=128zeros) in Python + and pass the 16-byte AES-128 key with KDF_NONE so the Rust FFI uses it directly. + LiveKit JS SDK deriveKeys(): salt=ratchetSalt, info=new ArrayBuffer(128), output=128-bit. + Element Call uses: ratchetWindowSize=10, keyringSize=256, ratchetSalt="LKFrameEncryptionKey" """ key_opts = rtc.KeyProviderOptions( shared_key=b"", # empty = per-participant mode @@ -135,7 +145,7 @@ class VoiceSession: try: kp = self.lk_room.e2ee_manager.key_provider caller_id = self._caller_identity or f"{sender}:{device_id}" - derived = _hkdf(key, _RATCHET_SALT, caller_id.encode()) + derived = _hkdf(key, _RATCHET_SALT, _HKDF_INFO) kp.set_key(caller_id, derived, index) logger.info("Live-updated caller frame key[%d] for %s (%d→%d bytes)", index, caller_id, len(key), len(derived)) @@ -291,13 +301,14 @@ class VoiceSession: logger.warning("No key rotation after 10s — using pre-join key[%d]", pre_max_idx) # Set per-participant keys via key provider. - # We pre-derive HKDF(base_key, salt=ratchetSalt, info=identity) in Python - # and pass the derived key with KDF_NONE so the Rust FFI uses it directly. - # This matches Element Call's JS E2EE worker derivation exactly. + # We pre-derive HKDF(base_key, salt=ratchetSalt, info=128zeros) → 16-byte AES-128 key + # and pass it with KDF_NONE so the Rust FFI uses it directly. + # Matches LiveKit JS SDK deriveKeys(): info=new ArrayBuffer(128), output=128-bit. kp = self.lk_room.e2ee_manager.key_provider - # Bot's own key — pre-derive HKDF then set for outgoing encryption - bot_frame_key = _hkdf(self._bot_key, _RATCHET_SALT, bot_identity.encode()) + # Bot's own key — pre-derive HKDF then set for outgoing encryption. + # LiveKit JS SDK: HKDF(base_key, "LKFrameEncryptionKey", 128-zero-info) → 16-byte AES-128 key + bot_frame_key = _hkdf(self._bot_key, _RATCHET_SALT, _HKDF_INFO) kp.set_key(bot_identity, bot_frame_key, 0) logger.info("Set bot frame key for %s (base=%d→derived=%d bytes)", bot_identity, len(self._bot_key), len(bot_frame_key)) @@ -323,14 +334,14 @@ class VoiceSession: if self._caller_all_keys and remote_identity: try: for idx, base_k in sorted(self._caller_all_keys.items()): - derived_k = _hkdf(base_k, _RATCHET_SALT, remote_identity.encode()) + derived_k = _hkdf(base_k, _RATCHET_SALT, _HKDF_INFO) kp.set_key(remote_identity, derived_k, idx) logger.info("Set caller frame key[%d] for %s (base=%d→derived=%d bytes)", idx, remote_identity, len(base_k), len(derived_k)) # Belt+suspenders: also set via matrix identity if different from LK identity if self._caller_identity and self._caller_identity != remote_identity: for idx, base_k in sorted(self._caller_all_keys.items()): - derived_k = _hkdf(base_k, _RATCHET_SALT, self._caller_identity.encode()) + derived_k = _hkdf(base_k, _RATCHET_SALT, _HKDF_INFO) kp.set_key(self._caller_identity, derived_k, idx) logger.info("Also set caller keys via matrix identity %s", self._caller_identity) except Exception as e: