fix(e2ee): switch to PR #921 Rust FFI branch for native HKDF (MAT-144)

PR #904 callback-based HKDF hack only fired for the first frame cryptor
(audio), leaving video frame cryptors with PBKDF2 - DEC_FAILED oscillation.
PR #921 integrates HKDF natively at the WebRTC C++ level, applying uniformly
to all frame cryptors (audio + video).

Also removes aggressive video re-keying workaround and adds 5s cooldown
to DEC_FAILED re-keying handler to prevent tight loops.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-03-10 09:52:32 +02:00
parent 4fc268cdd7
commit f85562ed28
2 changed files with 16 additions and 23 deletions

View File

@@ -1,6 +1,8 @@
# Stage 1: Build patched Rust FFI with HKDF support for Element Call E2EE
# Fork: onestacked/livekit-rust-sdks branch EC-compat-changes
# PR: https://github.com/livekit/rust-sdks/pull/904
# Stage 1: Build patched Rust FFI with native HKDF support for Element Call E2EE
# Fork: onestacked/livekit-rust-sdks branch EC-compat-changes-webrtc-change
# PR: https://github.com/livekit/rust-sdks/pull/921 (proper HKDF at WebRTC C++ level)
# Replaces #904 which used a callback hack that only worked for the first frame cryptor
# (audio), causing DEC_FAILED on video tracks (MAT-144).
# Must use rust:latest (trixie/sid) — bookworm GCC 12 can't compile webrtc C++20 code
FROM rust:latest AS rust-build
RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -8,7 +10,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libva-dev libglib2.0-dev nasm make clang \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /build
RUN git clone --branch EC-compat-changes --depth 1 --recurse-submodules \
RUN git clone --branch EC-compat-changes-webrtc-change --depth 1 --recurse-submodules \
https://github.com/onestacked/livekit-rust-sdks.git
WORKDIR /build/livekit-rust-sdks/livekit-ffi
RUN cargo build --release

View File

@@ -695,23 +695,6 @@ class VoiceSession:
_derive_and_set_key(kp_local, caller_id, base_k, idx)
logger.info("on_ts: set key[%d] for %s (%s track)",
idx, caller_id, track_type)
# MAT-144: Video frame cryptors may not be fully initialized
# when set_key is first called. Schedule aggressive re-keying.
if int(t.kind) == 2:
async def _video_rekey(pid=caller_id):
for delay in (0.3, 0.8, 2.0, 5.0):
await asyncio.sleep(delay)
if not self.lk_room:
break
try:
kp_v = self.lk_room.e2ee_manager.key_provider
for idx, base_k in sorted(self._caller_all_keys.items()):
_derive_and_set_key(kp_v, pid, base_k, idx)
logger.info("video_rekey: re-set %d keys for %s (delay=%.1fs)",
len(self._caller_all_keys), pid, delay)
except Exception as exc:
logger.warning("video_rekey failed: %s", exc)
asyncio.ensure_future(_video_rekey())
else:
logger.warning("on_ts: no caller keys yet — scheduling 0.5s retry")
async def _brief_key_retry(pid=caller_id):
@@ -731,19 +714,27 @@ class VoiceSession:
logger.warning("on_ts: set_key failed: %s", exc)
_e2ee_state_names = {0:"NEW",1:"OK",2:"ENC_FAILED",3:"DEC_FAILED",4:"MISSING_KEY",5:"RATCHETED",6:"INTERNAL_ERR"}
_last_rekey_time = {} # per-participant cooldown for DEC_FAILED re-keying
@self.lk_room.on("e2ee_state_changed")
def on_e2ee_state(participant, state):
state_name = _e2ee_state_names.get(int(state), f"UNKNOWN_{state}")
p_id = participant.identity if participant else "local"
logger.info("E2EE_STATE: participant=%s state=%s", p_id, state_name)
# When remote participant frame cryptor is NEW or MISSING_KEY → set their key
# When remote participant needs key: NEW, MISSING_KEY, or DEC_FAILED (with cooldown)
if participant and p_id != bot_identity and int(state) in (0, 3, 4):
now = time.monotonic()
# DEC_FAILED: only re-key every 5s to avoid tight loops
if int(state) == 3:
last = _last_rekey_time.get(p_id, 0)
if (now - last) < 5.0:
return
_last_rekey_time[p_id] = now
if self._caller_all_keys:
try:
kp_e = self.lk_room.e2ee_manager.key_provider
for idx, base_k in sorted(self._caller_all_keys.items()):
_derive_and_set_key(kp_e, p_id, base_k, idx)
logger.info("e2ee_state: derived+set key[%d] for %s on %s",
logger.info("e2ee_state: set key[%d] for %s on %s",
idx, p_id, state_name)
except Exception as exc:
logger.warning("e2ee_state set_key failed: %s", exc)