fix: E2EE key timing + verbose logging + shorter greeting

- Reorder: send call member event BEFORE creating VoiceSession
- Store VoiceSession BEFORE start so sync handler can forward keys
- Increase E2EE key wait from 3s to 10s
- Add INFO-level logging for key lookup + room state scan via HTTP API
- Tighten voice system prompt to prevent long rambling greetings

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-21 14:55:52 +02:00
parent 80582860b9
commit 85df4b295f
2 changed files with 62 additions and 42 deletions

86
bot.py
View File

@@ -413,34 +413,8 @@ class Bot:
lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=") lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=")
logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id) logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id)
if room_id not in self.voice_sessions: # Send our own call member state event FIRST so Element Call
try: # sends encryption_keys in response (before we start VoiceSession)
model = self.room_models.get(room_id, DEFAULT_MODEL)
vs = VoiceSession(
nio_client=self.client,
room_id=room_id,
device_id=BOT_DEVICE_ID,
lk_url=LK_URL,
model=model,
)
# Read existing encryption keys from room state before starting
caller_device_id = content.get("device_id", "")
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
if caller_key:
vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0)
await vs.start()
self.voice_sessions[room_id] = vs
logger.info("Voice session started for room %s (e2ee_key=%s)",
room_id, "yes" if caller_key else "no")
# Publish our E2EE key so Element Call sees us as encrypted
if caller_key:
await self._publish_encryption_key(room_id, caller_key)
except Exception:
logger.exception("Voice session start failed for %s", room_id)
# Send our own call member state event
call_content = { call_content = {
"application": "m.call", "application": "m.call",
"call_id": "", "call_id": "",
@@ -464,6 +438,37 @@ class Bot:
except Exception: except Exception:
logger.exception("Failed to send call member event in %s", room_id) logger.exception("Failed to send call member event in %s", room_id)
# Now create VoiceSession — encryption_keys may arrive via sync
# while VoiceSession waits for key (up to 10s)
if room_id not in self.voice_sessions:
try:
model = self.room_models.get(room_id, DEFAULT_MODEL)
caller_device_id = content.get("device_id", "")
vs = VoiceSession(
nio_client=self.client,
room_id=room_id,
device_id=BOT_DEVICE_ID,
lk_url=LK_URL,
model=model,
)
# Try reading encryption key from room state
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
if caller_key:
vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0)
# Store BEFORE start so on_unknown handler can forward keys
self.voice_sessions[room_id] = vs
await vs.start()
logger.info("Voice session started for room %s (e2ee_key=%s)",
room_id, "yes" if caller_key else "no")
# Publish our E2EE key so Element Call sees us as encrypted
if caller_key:
await self._publish_encryption_key(room_id, caller_key)
except Exception:
logger.exception("Voice session start failed for %s", room_id)
self.voice_sessions.pop(room_id, None)
else: else:
# Empty content = someone left the call, check if anyone is still calling # Empty content = someone left the call, check if anyone is still calling
room_id = room.room_id room_id = room.room_id
@@ -1452,18 +1457,29 @@ class Bot:
resp = await self.client.room_get_state_event( resp = await self.client.room_get_state_event(
room_id, ENCRYPTION_KEYS_TYPE, state_key, room_id, ENCRYPTION_KEYS_TYPE, state_key,
) )
logger.info("E2EE key lookup state_key=%s → resp type=%s", state_key, type(resp).__name__)
key = self._extract_e2ee_key(resp, sender, state_key) key = self._extract_e2ee_key(resp, sender, state_key)
if key: if key:
return key return key
except Exception as e: except Exception as e:
logger.debug("No encryption key at state_key=%s: %s", state_key, e) logger.info("E2EE key lookup state_key=%s failed: %s", state_key, e)
# Fallback: scan all room state for any encryption_keys event # Fallback: scan all room state via HTTP API for any encryption_keys event
try: try:
resp = await self.client.room_get_state(room_id) import httpx
if hasattr(resp, "events"): token = self.client.access_token
for evt in resp.events: url = f"{HOMESERVER}/_matrix/client/v3/rooms/{room_id}/state"
if evt.get("type") == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER: async with httpx.AsyncClient(timeout=10.0) as http:
resp = await http.get(url, headers={"Authorization": f"Bearer {token}"})
resp.raise_for_status()
events = resp.json()
logger.info("Room state scan: %d events total", len(events))
for evt in events:
evt_type = evt.get("type", "")
if "call" in evt_type or "encryption" in evt_type:
logger.info(" state event: type=%s state_key=%s content_keys=%s",
evt_type, evt.get("state_key", ""), list(evt.get("content", {}).keys())[:5])
if evt_type == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER:
content = evt.get("content", {}) content = evt.get("content", {})
keys = content.get("keys", []) keys = content.get("keys", [])
for k in keys: for k in keys:
@@ -1475,7 +1491,7 @@ class Bot:
evt.get("state_key", "?"), len(key)) evt.get("state_key", "?"), len(key))
return key return key
except Exception as e: except Exception as e:
logger.debug("Room state scan for encryption keys failed: %s", e) logger.warning("Room state scan for encryption keys failed: %s", e)
logger.warning("No E2EE encryption key found for %s in %s", sender, room_id) logger.warning("No E2EE encryption key found for %s in %s", sender, room_id)
return None return None

View File

@@ -23,10 +23,14 @@ ELEVENLABS_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
DEFAULT_VOICE_ID = "onwK4e9ZLuTAKqWW03F9" # Daniel - male, free tier DEFAULT_VOICE_ID = "onwK4e9ZLuTAKqWW03F9" # Daniel - male, free tier
VOICE_PROMPT = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf. VOICE_PROMPT = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf.
Regeln:
- Halte Antworten KURZ - 1-3 Saetze maximal STRIKTE Regeln:
- Sei direkt, keine Fuellwoerter - Antworte IMMER auf Deutsch
- Antworte immer auf Deutsch""" - Halte JEDE Antwort auf MAXIMAL 1-2 kurze Saetze
- Sei direkt und praezise, keine Fuellwoerter
- Erfinde NICHTS - keine Geschichten, keine Musik, keine Fantasie
- Beantworte nur was gefragt wird
- Wenn niemand etwas fragt, sage nur kurz Hallo"""
_vad = None _vad = None
def _get_vad(): def _get_vad():
@@ -121,13 +125,13 @@ class VoiceSession:
user_id = self.nio_client.user_id user_id = self.nio_client.user_id
jwt = _generate_lk_jwt(self.room_id, user_id, self.device_id) jwt = _generate_lk_jwt(self.room_id, user_id, self.device_id)
# Wait up to 3s for E2EE encryption key from Element Call # Wait up to 10s for E2EE encryption key from Element Call
for _ in range(30): for _ in range(100):
if self._e2ee_key: if self._e2ee_key:
break break
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
if not self._e2ee_key: if not self._e2ee_key:
logger.warning("No E2EE key received after 3s, connecting without encryption") logger.warning("No E2EE key received after 10s, connecting without encryption")
# Connect with E2EE if key available # Connect with E2EE if key available
e2ee_opts = None e2ee_opts = None