fix(e2ee): revert incorrect HKDF patch, remove pre-ratcheting

The HKDF sed patch in Dockerfile was wrong — it swapped salt/info
based on incorrect analysis of minified JS. The original Rust FFI
parameters are correct: salt="LKFrameEncryptionKey", info=[0;128].

Also removed Python-side HMAC pre-ratcheting of keys. Element Call
uses explicit key rotation via Matrix events, not HMAC ratcheting.

Added diagnostic logging to trace exact key bytes during E2EE setup.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-22 21:44:11 +02:00
parent d30e9f8c83
commit 230c083b7b
2 changed files with 15 additions and 29 deletions

View File

@@ -10,17 +10,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
WORKDIR /build WORKDIR /build
RUN git clone --branch EC-compat-changes --depth 1 --recurse-submodules \ RUN git clone --branch EC-compat-changes --depth 1 --recurse-submodules \
https://github.com/onestacked/livekit-rust-sdks.git https://github.com/onestacked/livekit-rust-sdks.git
WORKDIR /build/livekit-rust-sdks
# Patch HKDF to match Element Call JS SDK parameters.
# EC JS: salt=Uint8Array(8) (8 zero bytes), info=encode("LKFrameEncryptionKey")
# Rust fork: salt=ratchet_salt ("LKFrameEncryptionKey"), info=[0u8;128]
# Fix: use 8 zero bytes as HKDF salt, use the callback's salt param as HKDF info.
RUN find . -name '*.rs' -path '*/e2ee*' -exec grep -l 'hkdf.*expand' {} \; | head -1 | \
xargs -I{} sh -c 'echo "Patching HKDF in: {}"; \
sed -i "s|hkdf::Hkdf::<Sha256>::new(Some(salt), key);|hkdf::Hkdf::<Sha256>::new(Some(\&[0u8; 8]), key);|" "{}" && \
sed -i "s|hkdf.expand(\&\[0u8; 128\], derived_key)|hkdf.expand(salt, derived_key)|" "{}"'
WORKDIR /build/livekit-rust-sdks/livekit-ffi WORKDIR /build/livekit-rust-sdks/livekit-ffi
RUN cargo build --release RUN cargo build --release

View File

@@ -295,18 +295,14 @@ class VoiceSession:
if not self._caller_key: if not self._caller_key:
self._caller_key = key self._caller_key = key
self._caller_identity = f"{sender}:{device_id}" self._caller_identity = f"{sender}:{device_id}"
# Pre-compute ratcheted keys from this base key to cover EC's key rotation. # Store only the real key at its received index (no pre-ratcheting).
# EC rotates (via HMAC ratchet) when new participants join — the rotated key # EC distributes keys explicitly via Matrix — no HMAC ratcheting.
# arrives via Matrix sync with ~30s delay. Setting ratcheted indices 0..N self._caller_all_keys[index] = key
# proactively means decryption works immediately without waiting for Matrix. logger.info("E2EE key received from %s:%s (index=%d, %d bytes, raw=%s)",
ratcheted = _ratchet_keys(key, count=8) sender, device_id, index, len(key), key.hex())
for ridx, rkey in ratcheted.items(): # Diagnostic: compute Python HKDF to compare with what Rust FFI should derive
actual_idx = index + ridx derived = _hkdf_derive(key)
if actual_idx not in self._caller_all_keys: # don't overwrite real received keys logger.info("E2EE_DIAG: Python HKDF(raw_key) = %s (expected AES-128-GCM key)", derived.hex())
self._caller_all_keys[actual_idx] = rkey
self._caller_all_keys[index] = key # always store the real key at its index
logger.info("E2EE key received from %s:%s (index=%d, %d bytes) — pre-computed ratchets [%d..%d]",
sender, device_id, index, len(key), index, index + 7)
# Live-update key on rotation — use set_key() which applies HKDF via Rust FFI. # Live-update key on rotation — use set_key() which applies HKDF via Rust FFI.
# At this point the track is usually already subscribed so frame cryptor exists. # At this point the track is usually already subscribed so frame cryptor exists.
if self.lk_room and self._caller_identity: if self.lk_room and self._caller_identity:
@@ -465,27 +461,28 @@ class VoiceSession:
# Solution: set caller key HERE, after frame cryptor is initialized. # Solution: set caller key HERE, after frame cryptor is initialized.
if int(t.kind) == 1 and e2ee_opts is not None: # audio track only if int(t.kind) == 1 and e2ee_opts is not None: # audio track only
caller_id = p.identity caller_id = p.identity
logger.info("E2EE_DIAG: track_subscribed for %s, have %d caller keys",
caller_id, len(self._caller_all_keys))
try: try:
kp_local = self.lk_room.e2ee_manager.key_provider kp_local = self.lk_room.e2ee_manager.key_provider
if self._caller_all_keys: if self._caller_all_keys:
for idx, base_k in sorted(self._caller_all_keys.items()): for idx, base_k in sorted(self._caller_all_keys.items()):
ok = kp_local.set_key(caller_id, base_k, idx) ok = kp_local.set_key(caller_id, base_k, idx)
logger.info("on_ts: set_key[%d] for %s (ok=%s, %d bytes)", logger.info("on_ts: set_key[%d] for %s (ok=%s, %d bytes, raw=%s)",
idx, caller_id, ok, len(base_k)) idx, caller_id, ok, len(base_k), base_k.hex())
else: else:
logger.warning("on_ts: no caller keys available yet") logger.warning("on_ts: no caller keys available yet")
except Exception as exc: except Exception as exc:
logger.warning("on_ts: set_key failed: %s", exc) logger.warning("on_ts: set_key failed: %s", exc)
# Delayed retry — frame cryptor may not be ready at track_subscribed time # Delayed retry with additional diagnostics
async def _delayed_set_key(pid=caller_id): async def _delayed_set_key(pid=caller_id):
await asyncio.sleep(1.0) await asyncio.sleep(1.5)
try: try:
kp_d = self.lk_room.e2ee_manager.key_provider kp_d = self.lk_room.e2ee_manager.key_provider
if self._caller_all_keys: if self._caller_all_keys:
for idx, base_k in sorted(self._caller_all_keys.items()): for idx, base_k in sorted(self._caller_all_keys.items()):
ok = kp_d.set_key(pid, base_k, idx) ok = kp_d.set_key(pid, base_k, idx)
logger.info("on_ts_delayed: set_key[%d] for %s (ok=%s, %d bytes)", logger.info("on_ts_delayed: set_key[%d] for %s (ok=%s)", idx, pid, ok)
idx, pid, ok, len(base_k))
except Exception as exc: except Exception as exc:
logger.warning("on_ts_delayed: set_key failed: %s", exc) logger.warning("on_ts_delayed: set_key failed: %s", exc)
asyncio.ensure_future(_delayed_set_key()) asyncio.ensure_future(_delayed_set_key())