fix(voice): pre-derive HKDF in Python, use KDF_NONE to bypass Rust FFI HKDF

Rust FFI's KDF_HKDF path for incoming decryption may use wrong parameters.
Pre-derive HKDF(base_key, salt="LKFrameEncryptionKey", info=identity) in Python
and pass derived key with KDF_NONE so Rust FFI uses it directly as frame key.

Matches EC's MatrixKeyProvider: ratchetWindowSize=10, keyringSize=256.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-22 08:47:41 +02:00
parent c330900a3a
commit 4f8bfbe479

View File

@@ -67,23 +67,37 @@ def _generate_lk_jwt(room_id, user_id, device_id):
KDF_HKDF = 1
KDF_NONE = 0
_RATCHET_SALT = b"LKFrameEncryptionKey"
def _hkdf(ikm: bytes, salt: bytes, info: bytes, length: int = 32) -> bytes:
"""HKDF-SHA256 (RFC 5869). Pre-derives frame key to bypass Rust FFI's HKDF."""
import hmac as _hmac, hashlib as _hashlib
prk = _hmac.new(salt, ikm, _hashlib.sha256).digest()
okm, t = b"", b""
for i in range(1, (length + 31) // 32 + 1):
t = _hmac.new(prk, t + info + bytes([i]), _hashlib.sha256).digest()
okm += t
return okm[:length]
def _build_e2ee_options() -> rtc.E2EEOptions:
"""Build HKDF E2EE options matching Element Call's key derivation.
"""Build E2EE options — KDF disabled; we pre-derive HKDF keys in Python.
Use per-participant key mode (no shared_key) so the Rust FFI uses the
participant identity as HKDF info — matching Element Call's JS SFrame.
Keys are set post-connect via set_key(participant_identity, key, index).
Element Call uses: ratchetWindowSize=16, keyringSize=256, salt="LKFrameEncryptionKey"
The Rust FFI's KDF_HKDF path for INCOMING decryption may use wrong parameters.
We pre-derive HKDF(base_key, salt="LKFrameEncryptionKey", info=identity) in Python
and pass the derived key with KDF_NONE so the Rust FFI uses it directly.
Element Call uses: ratchetWindowSize=10, keyringSize=256, salt="LKFrameEncryptionKey"
"""
key_opts = rtc.KeyProviderOptions(
shared_key=b"", # empty = per-participant mode; keys set via set_key() after connect
ratchet_window_size=16,
shared_key=b"", # empty = per-participant mode
ratchet_window_size=10,
ratchet_salt=b"LKFrameEncryptionKey",
failure_tolerance=-1,
key_ring_size=256,
key_derivation_function=KDF_HKDF,
key_derivation_function=KDF_NONE, # we pre-derive; FFI uses key directly
)
return rtc.E2EEOptions(key_provider_options=key_opts)
@@ -116,15 +130,15 @@ class VoiceSession:
self._caller_all_keys[index] = key
logger.info("E2EE key received from %s:%s (index=%d, %d bytes)",
sender, device_id, index, len(key))
# Live-update per-participant key on rotation (Element Call rotates on bot join).
# Use only set_key() (per-participant mode) — matching EC's HKDF info=identity.
# Live-update per-participant key on rotation — pre-derive HKDF matching KDF_NONE mode.
if self.lk_room and hasattr(self.lk_room, 'e2ee_manager'):
try:
kp = self.lk_room.e2ee_manager.key_provider
caller_id = self._caller_identity or f"{sender}:{device_id}"
kp.set_key(caller_id, key, index)
logger.info("Live-updated per-participant key[%d] for %s (%d bytes)",
index, caller_id, len(key))
derived = _hkdf(key, _RATCHET_SALT, caller_id.encode())
kp.set_key(caller_id, derived, index)
logger.info("Live-updated caller frame key[%d] for %s (%d%d bytes)",
index, caller_id, len(key), len(derived))
except Exception as e:
logger.warning("Failed to live-update caller key: %s", e)
@@ -276,12 +290,17 @@ class VoiceSession:
else:
logger.warning("No key rotation after 10s — using pre-join key[%d]", pre_max_idx)
# Set per-participant keys via key provider
# Set per-participant keys via key provider.
# We pre-derive HKDF(base_key, salt=ratchetSalt, info=identity) in Python
# and pass the derived key with KDF_NONE so the Rust FFI uses it directly.
# This matches Element Call's JS E2EE worker derivation exactly.
kp = self.lk_room.e2ee_manager.key_provider
# Bot's own key — encrypts outgoing audio
kp.set_key(bot_identity, self._bot_key, 0)
logger.info("Set bot key for %s (%d bytes)", bot_identity, len(self._bot_key))
# Bot's own key — pre-derive HKDF then set for outgoing encryption
bot_frame_key = _hkdf(self._bot_key, _RATCHET_SALT, bot_identity.encode())
kp.set_key(bot_identity, bot_frame_key, 0)
logger.info("Set bot frame key for %s (base=%d→derived=%d bytes)",
bot_identity, len(self._bot_key), len(bot_frame_key))
# Find the remote participant, wait up to 10s if not yet connected
remote_identity = None
@@ -298,18 +317,22 @@ class VoiceSession:
if remote_identity:
break
# Set ALL known caller keys (per-participant, HKDF info=remote_identity).
# EC may have already rotated (index 0→1) by the time bot connects.
# Set ALL known caller keys — pre-derive HKDF(base_key, ratchetSalt, identity).
# EC encrypts user audio with HKDF(user_base_key, "LKFrameEncryptionKey", user_identity).
# With KDF_NONE, the Rust FFI uses the key directly, so we must pre-derive.
if self._caller_all_keys and remote_identity:
try:
for idx, k in sorted(self._caller_all_keys.items()):
kp.set_key(remote_identity, k, idx)
logger.info("Set caller key[%d] for %s (%d bytes)", idx, remote_identity, len(k))
for idx, base_k in sorted(self._caller_all_keys.items()):
derived_k = _hkdf(base_k, _RATCHET_SALT, remote_identity.encode())
kp.set_key(remote_identity, derived_k, idx)
logger.info("Set caller frame key[%d] for %s (base=%d→derived=%d bytes)",
idx, remote_identity, len(base_k), len(derived_k))
# Belt+suspenders: also set via matrix identity if different from LK identity
if self._caller_identity and self._caller_identity != remote_identity:
for idx, k in sorted(self._caller_all_keys.items()):
kp.set_key(self._caller_identity, k, idx)
logger.info("Also set all caller keys via identity %s", self._caller_identity)
for idx, base_k in sorted(self._caller_all_keys.items()):
derived_k = _hkdf(base_k, _RATCHET_SALT, self._caller_identity.encode())
kp.set_key(self._caller_identity, derived_k, idx)
logger.info("Also set caller keys via matrix identity %s", self._caller_identity)
except Exception as e:
logger.warning("Failed to set caller per-participant keys: %s", e)
elif not self._caller_all_keys: