diff --git a/voice.py b/voice.py index a54734f..247d422 100644 --- a/voice.py +++ b/voice.py @@ -67,23 +67,37 @@ def _generate_lk_jwt(room_id, user_id, device_id): KDF_HKDF = 1 +KDF_NONE = 0 + +_RATCHET_SALT = b"LKFrameEncryptionKey" + + +def _hkdf(ikm: bytes, salt: bytes, info: bytes, length: int = 32) -> bytes: + """HKDF-SHA256 (RFC 5869). Pre-derives frame key to bypass Rust FFI's HKDF.""" + import hmac as _hmac, hashlib as _hashlib + prk = _hmac.new(salt, ikm, _hashlib.sha256).digest() + okm, t = b"", b"" + for i in range(1, (length + 31) // 32 + 1): + t = _hmac.new(prk, t + info + bytes([i]), _hashlib.sha256).digest() + okm += t + return okm[:length] def _build_e2ee_options() -> rtc.E2EEOptions: - """Build HKDF E2EE options matching Element Call's key derivation. + """Build E2EE options — KDF disabled; we pre-derive HKDF keys in Python. - Use per-participant key mode (no shared_key) so the Rust FFI uses the - participant identity as HKDF info — matching Element Call's JS SFrame. - Keys are set post-connect via set_key(participant_identity, key, index). - Element Call uses: ratchetWindowSize=16, keyringSize=256, salt="LKFrameEncryptionKey" + The Rust FFI's KDF_HKDF path for INCOMING decryption may use wrong parameters. + We pre-derive HKDF(base_key, salt="LKFrameEncryptionKey", info=identity) in Python + and pass the derived key with KDF_NONE so the Rust FFI uses it directly. + Element Call uses: ratchetWindowSize=10, keyringSize=256, salt="LKFrameEncryptionKey" """ key_opts = rtc.KeyProviderOptions( - shared_key=b"", # empty = per-participant mode; keys set via set_key() after connect - ratchet_window_size=16, + shared_key=b"", # empty = per-participant mode + ratchet_window_size=10, ratchet_salt=b"LKFrameEncryptionKey", failure_tolerance=-1, key_ring_size=256, - key_derivation_function=KDF_HKDF, + key_derivation_function=KDF_NONE, # we pre-derive; FFI uses key directly ) return rtc.E2EEOptions(key_provider_options=key_opts) @@ -116,15 +130,15 @@ class VoiceSession: self._caller_all_keys[index] = key logger.info("E2EE key received from %s:%s (index=%d, %d bytes)", sender, device_id, index, len(key)) - # Live-update per-participant key on rotation (Element Call rotates on bot join). - # Use only set_key() (per-participant mode) — matching EC's HKDF info=identity. + # Live-update per-participant key on rotation — pre-derive HKDF matching KDF_NONE mode. if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'): try: kp = self.lk_room.e2ee_manager.key_provider caller_id = self._caller_identity or f"{sender}:{device_id}" - kp.set_key(caller_id, key, index) - logger.info("Live-updated per-participant key[%d] for %s (%d bytes)", - index, caller_id, len(key)) + derived = _hkdf(key, _RATCHET_SALT, caller_id.encode()) + kp.set_key(caller_id, derived, index) + logger.info("Live-updated caller frame key[%d] for %s (%d→%d bytes)", + index, caller_id, len(key), len(derived)) except Exception as e: logger.warning("Failed to live-update caller key: %s", e) @@ -276,12 +290,17 @@ class VoiceSession: else: logger.warning("No key rotation after 10s — using pre-join key[%d]", pre_max_idx) - # Set per-participant keys via key provider + # Set per-participant keys via key provider. + # We pre-derive HKDF(base_key, salt=ratchetSalt, info=identity) in Python + # and pass the derived key with KDF_NONE so the Rust FFI uses it directly. + # This matches Element Call's JS E2EE worker derivation exactly. kp = self.lk_room.e2ee_manager.key_provider - # Bot's own key — encrypts outgoing audio - kp.set_key(bot_identity, self._bot_key, 0) - logger.info("Set bot key for %s (%d bytes)", bot_identity, len(self._bot_key)) + # Bot's own key — pre-derive HKDF then set for outgoing encryption + bot_frame_key = _hkdf(self._bot_key, _RATCHET_SALT, bot_identity.encode()) + kp.set_key(bot_identity, bot_frame_key, 0) + logger.info("Set bot frame key for %s (base=%d→derived=%d bytes)", + bot_identity, len(self._bot_key), len(bot_frame_key)) # Find the remote participant, wait up to 10s if not yet connected remote_identity = None @@ -298,18 +317,22 @@ class VoiceSession: if remote_identity: break - # Set ALL known caller keys (per-participant, HKDF info=remote_identity). - # EC may have already rotated (index 0→1) by the time bot connects. + # Set ALL known caller keys — pre-derive HKDF(base_key, ratchetSalt, identity). + # EC encrypts user audio with HKDF(user_base_key, "LKFrameEncryptionKey", user_identity). + # With KDF_NONE, the Rust FFI uses the key directly, so we must pre-derive. if self._caller_all_keys and remote_identity: try: - for idx, k in sorted(self._caller_all_keys.items()): - kp.set_key(remote_identity, k, idx) - logger.info("Set caller key[%d] for %s (%d bytes)", idx, remote_identity, len(k)) + for idx, base_k in sorted(self._caller_all_keys.items()): + derived_k = _hkdf(base_k, _RATCHET_SALT, remote_identity.encode()) + kp.set_key(remote_identity, derived_k, idx) + logger.info("Set caller frame key[%d] for %s (base=%d→derived=%d bytes)", + idx, remote_identity, len(base_k), len(derived_k)) # Belt+suspenders: also set via matrix identity if different from LK identity if self._caller_identity and self._caller_identity != remote_identity: - for idx, k in sorted(self._caller_all_keys.items()): - kp.set_key(self._caller_identity, k, idx) - logger.info("Also set all caller keys via identity %s", self._caller_identity) + for idx, base_k in sorted(self._caller_all_keys.items()): + derived_k = _hkdf(base_k, _RATCHET_SALT, self._caller_identity.encode()) + kp.set_key(self._caller_identity, derived_k, idx) + logger.info("Also set caller keys via matrix identity %s", self._caller_identity) except Exception as e: logger.warning("Failed to set caller per-participant keys: %s", e) elif not self._caller_all_keys: