diff --git a/voice.py b/voice.py index 89d51ff..241387b 100644 --- a/voice.py +++ b/voice.py @@ -66,40 +66,17 @@ def _generate_lk_jwt(room_id, user_id, device_id): return token.to_jwt() -KDF_HKDF = 1 -KDF_NONE = 0 - -_RATCHET_SALT = b"LKFrameEncryptionKey" - - -def _hkdf(ikm: bytes, salt: bytes, info: bytes, length: int = 16) -> bytes: - """HKDF-SHA256 (RFC 5869). Pre-derives frame key to bypass Rust FFI's HKDF. - - LiveKit JS SDK deriveKeys() uses: - salt = TextEncoder("LKFrameEncryptionKey") - info = new ArrayBuffer(128) # 128 zero bytes — NOT participant identity - length = 128 bits = 16 bytes (AES-128-GCM) - """ - import hmac as _hmac, hashlib as _hashlib - prk = _hmac.new(salt, ikm, _hashlib.sha256).digest() - okm, t = b"", b"" - for i in range(1, (length + 31) // 32 + 1): - t = _hmac.new(prk, t + info + bytes([i]), _hashlib.sha256).digest() - okm += t - return okm[:length] - - -# 128 zero bytes — matches LiveKit JS SDK's `info: new ArrayBuffer(128)` in getAlgoOptions() -_HKDF_INFO = b"\x00" * 128 +KDF_HKDF = 1 # Rust FFI applies HKDF internally (proto enum value 1) +# NOTE: value 0 in the proto is PBKDF2, NOT raw/none mode — see e2ee_patch.py def _build_e2ee_options() -> rtc.E2EEOptions: - """Build E2EE options — KDF disabled; we pre-derive HKDF keys in Python. + """Build E2EE options — let Rust FFI apply HKDF internally (KDF_HKDF=1). - We pre-derive HKDF(base_key, salt="LKFrameEncryptionKey", info=128zeros) in Python - and pass the 16-byte AES-128 key with KDF_NONE so the Rust FFI uses it directly. - LiveKit JS SDK deriveKeys(): salt=ratchetSalt, info=new ArrayBuffer(128), output=128-bit. + Pass raw base keys from Matrix key exchange events directly to set_key(). + The Rust FFI derives the AES frame key via HKDF(base_key, ratchetSalt, ...) internally. Element Call uses: ratchetWindowSize=10, keyringSize=256, ratchetSalt="LKFrameEncryptionKey" + NOTE: proto value 0 = PBKDF2 (not raw/none) — must use KDF_HKDF=1. """ key_opts = rtc.KeyProviderOptions( shared_key=b"", # empty = per-participant mode @@ -107,7 +84,7 @@ def _build_e2ee_options() -> rtc.E2EEOptions: ratchet_salt=b"LKFrameEncryptionKey", failure_tolerance=-1, key_ring_size=256, - key_derivation_function=KDF_NONE, # we pre-derive; FFI uses key directly + key_derivation_function=KDF_HKDF, # Rust FFI applies HKDF; we pass raw base keys ) return rtc.E2EEOptions(key_provider_options=key_opts) @@ -140,15 +117,14 @@ class VoiceSession: self._caller_all_keys[index] = key logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", sender, device_id, index, len(key)) - # Live-update per-participant key on rotation — pre-derive HKDF matching KDF_NONE mode. + # Live-update per-participant key on rotation — pass raw base key, Rust FFI applies HKDF. if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'): try: kp = self.lk_room.e2ee_manager.key_provider caller_id = self._caller_identity or f"{sender}:{device_id}" - derived = _hkdf(key, _RATCHET_SALT, _HKDF_INFO) - kp.set_key(caller_id, derived, index) - logger.info("Live-updated caller frame key[%d] for %s (%d→%d bytes)", - index, caller_id, len(key), len(derived)) + kp.set_key(caller_id, key, index) + logger.info("Live-updated caller raw key[%d] for %s (%d bytes)", + index, caller_id, len(key)) except Exception as e: logger.warning("Failed to live-update caller key: %s", e) @@ -300,18 +276,13 @@ class VoiceSession: else: logger.warning("No key rotation after 10s — using pre-join key[%d]", pre_max_idx) - # Set per-participant keys via key provider. - # We pre-derive HKDF(base_key, salt=ratchetSalt, info=128zeros) → 16-byte AES-128 key - # and pass it with KDF_NONE so the Rust FFI uses it directly. - # Matches LiveKit JS SDK deriveKeys(): info=new ArrayBuffer(128), output=128-bit. + # Set per-participant keys via key provider — pass raw base keys. + # KDF_HKDF=1: Rust FFI applies HKDF(base_key, ratchetSalt, ...) internally. kp = self.lk_room.e2ee_manager.key_provider - # Bot's own key — pre-derive HKDF then set for outgoing encryption. - # LiveKit JS SDK: HKDF(base_key, "LKFrameEncryptionKey", 128-zero-info) → 16-byte AES-128 key - bot_frame_key = _hkdf(self._bot_key, _RATCHET_SALT, _HKDF_INFO) - kp.set_key(bot_identity, bot_frame_key, 0) - logger.info("Set bot frame key for %s (base=%d→derived=%d bytes)", - bot_identity, len(self._bot_key), len(bot_frame_key)) + # Bot's own key — raw base key, Rust FFI derives AES frame key via HKDF + kp.set_key(bot_identity, self._bot_key, 0) + logger.info("Set bot raw key for %s (%d bytes)", bot_identity, len(self._bot_key)) # Find the remote participant, wait up to 10s if not yet connected remote_identity = None @@ -328,21 +299,17 @@ class VoiceSession: if remote_identity: break - # Set ALL known caller keys — pre-derive HKDF(base_key, ratchetSalt, identity). - # EC encrypts user audio with HKDF(user_base_key, "LKFrameEncryptionKey", user_identity). - # With KDF_NONE, the Rust FFI uses the key directly, so we must pre-derive. + # Set ALL known caller keys — raw base keys, Rust FFI applies HKDF internally. if self._caller_all_keys and remote_identity: try: for idx, base_k in sorted(self._caller_all_keys.items()): - derived_k = _hkdf(base_k, _RATCHET_SALT, _HKDF_INFO) - kp.set_key(remote_identity, derived_k, idx) - logger.info("Set caller frame key[%d] for %s (base=%d→derived=%d bytes)", - idx, remote_identity, len(base_k), len(derived_k)) + kp.set_key(remote_identity, base_k, idx) + logger.info("Set caller raw key[%d] for %s (%d bytes)", + idx, remote_identity, len(base_k)) # Belt+suspenders: also set via matrix identity if different from LK identity if self._caller_identity and self._caller_identity != remote_identity: for idx, base_k in sorted(self._caller_all_keys.items()): - derived_k = _hkdf(base_k, _RATCHET_SALT, _HKDF_INFO) - kp.set_key(self._caller_identity, derived_k, idx) + kp.set_key(self._caller_identity, base_k, idx) logger.info("Also set caller keys via matrix identity %s", self._caller_identity) except Exception as e: logger.warning("Failed to set caller per-participant keys: %s", e)