feat(e2ee): Add HKDF E2EE support for Element Call compatibility

Element Call uses HKDF-SHA256 + AES-128-GCM for frame encryption,
while the LiveKit Rust SDK defaults to PBKDF2 + AES-256-GCM.

- Multi-stage Dockerfile builds patched Rust FFI from EC-compat fork
- Generates Python protobuf bindings with new fields
- patch_sdk.py modifies installed livekit-rtc for new proto fields
- agent.py passes E2EE options with HKDF to ctx.connect()
- bot.py exchanges encryption keys via Matrix state events
- Separate Dockerfile.bot for bot service (no Rust build needed)

Ref: livekit/rust-sdks#904, livekit/python-sdks#570

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-20 16:28:56 +02:00
parent 578b6bb56f
commit fc3d915939
7 changed files with 309 additions and 8 deletions

View File

@@ -1,8 +1,13 @@
import os
import json
import base64
import logging
from livekit.agents import Agent, AgentSession, AgentServer, JobContext, JobProcess, cli
from livekit.plugins import openai as lk_openai, elevenlabs, silero
import livekit.rtc as rtc
from e2ee_patch import KDF_HKDF
logger = logging.getLogger("matrix-ai-agent")
logging.basicConfig(level=logging.DEBUG)
@@ -27,12 +32,63 @@ def prewarm(proc: JobProcess):
server.setup_fnc = prewarm
def build_e2ee_options(shared_key: bytes) -> rtc.E2EEOptions:
"""Build E2EE options with HKDF key derivation (Element Call compatible).
Uses patched KeyProviderOptions with key_ring_size and key_derivation_function
fields added by patch_sdk.py during Docker build.
"""
key_opts = rtc.KeyProviderOptions(
shared_key=shared_key,
ratchet_window_size=0,
ratchet_salt=b"LKFrameEncryptionKey",
failure_tolerance=-1,
key_ring_size=16,
key_derivation_function=KDF_HKDF,
)
return rtc.E2EEOptions(key_provider_options=key_opts)
def get_e2ee_key(ctx: JobContext) -> bytes | None:
"""Extract E2EE shared key from dispatch metadata or environment."""
# Try dispatch metadata first (set by bot.py)
metadata_str = getattr(ctx.job, "metadata", None) or ""
if metadata_str:
try:
meta = json.loads(metadata_str)
key_b64 = meta.get("e2ee_key")
if key_b64:
key = base64.b64decode(key_b64)
logger.info("E2EE key from dispatch metadata (%d bytes)", len(key))
return key
except (json.JSONDecodeError, Exception) as e:
logger.warning("Failed to parse dispatch metadata for E2EE key: %s", e)
# Fallback: environment variable (for testing)
env_key = os.environ.get("E2EE_SHARED_KEY")
if env_key:
key = base64.b64decode(env_key) if len(env_key) > 32 else env_key.encode()
logger.info("E2EE key from environment (%d bytes)", len(key))
return key
return None
@server.rtc_session(agent_name=os.environ.get("AGENT_NAME", "matrix-ai"))
async def entrypoint(ctx: JobContext):
logger.info("Job received for room %s", ctx.job.room.name)
# Standard framework connection (handles audio pipeline properly)
await ctx.connect()
# Check for E2EE key
e2ee_key = get_e2ee_key(ctx)
e2ee_opts = None
if e2ee_key:
e2ee_opts = build_e2ee_options(e2ee_key)
logger.info("E2EE enabled with HKDF key derivation")
else:
logger.info("E2EE disabled (no key provided)")
# Connect to room with optional E2EE
await ctx.connect(e2ee=e2ee_opts)
logger.info("Connected to room, local identity: %s", ctx.room.local_participant.identity)
logger.info("Remote participants: %s", list(ctx.room.remote_participants.keys()))
@@ -53,7 +109,6 @@ async def entrypoint(ctx: JobContext):
vad=ctx.proc.userdata["vad"],
)
# Debug: log pipeline events
@session.on("user_speech_committed")
def on_speech(msg):
logger.info("USER_SPEECH_COMMITTED: %s", msg.text_content)