fix: E2EE key timing + verbose logging + shorter greeting
- Reorder: send call member event BEFORE creating VoiceSession - Store VoiceSession BEFORE start so sync handler can forward keys - Increase E2EE key wait from 3s to 10s - Add INFO-level logging for key lookup + room state scan via HTTP API - Tighten voice system prompt to prevent long rambling greetings Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
86
bot.py
86
bot.py
@@ -413,34 +413,8 @@ class Bot:
|
||||
lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=")
|
||||
logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id)
|
||||
|
||||
if room_id not in self.voice_sessions:
|
||||
try:
|
||||
model = self.room_models.get(room_id, DEFAULT_MODEL)
|
||||
vs = VoiceSession(
|
||||
nio_client=self.client,
|
||||
room_id=room_id,
|
||||
device_id=BOT_DEVICE_ID,
|
||||
lk_url=LK_URL,
|
||||
model=model,
|
||||
)
|
||||
# Read existing encryption keys from room state before starting
|
||||
caller_device_id = content.get("device_id", "")
|
||||
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
|
||||
if caller_key:
|
||||
vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0)
|
||||
|
||||
await vs.start()
|
||||
self.voice_sessions[room_id] = vs
|
||||
logger.info("Voice session started for room %s (e2ee_key=%s)",
|
||||
room_id, "yes" if caller_key else "no")
|
||||
|
||||
# Publish our E2EE key so Element Call sees us as encrypted
|
||||
if caller_key:
|
||||
await self._publish_encryption_key(room_id, caller_key)
|
||||
except Exception:
|
||||
logger.exception("Voice session start failed for %s", room_id)
|
||||
|
||||
# Send our own call member state event
|
||||
# Send our own call member state event FIRST so Element Call
|
||||
# sends encryption_keys in response (before we start VoiceSession)
|
||||
call_content = {
|
||||
"application": "m.call",
|
||||
"call_id": "",
|
||||
@@ -464,6 +438,37 @@ class Bot:
|
||||
except Exception:
|
||||
logger.exception("Failed to send call member event in %s", room_id)
|
||||
|
||||
# Now create VoiceSession — encryption_keys may arrive via sync
|
||||
# while VoiceSession waits for key (up to 10s)
|
||||
if room_id not in self.voice_sessions:
|
||||
try:
|
||||
model = self.room_models.get(room_id, DEFAULT_MODEL)
|
||||
caller_device_id = content.get("device_id", "")
|
||||
vs = VoiceSession(
|
||||
nio_client=self.client,
|
||||
room_id=room_id,
|
||||
device_id=BOT_DEVICE_ID,
|
||||
lk_url=LK_URL,
|
||||
model=model,
|
||||
)
|
||||
# Try reading encryption key from room state
|
||||
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
|
||||
if caller_key:
|
||||
vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0)
|
||||
|
||||
# Store BEFORE start so on_unknown handler can forward keys
|
||||
self.voice_sessions[room_id] = vs
|
||||
await vs.start()
|
||||
logger.info("Voice session started for room %s (e2ee_key=%s)",
|
||||
room_id, "yes" if caller_key else "no")
|
||||
|
||||
# Publish our E2EE key so Element Call sees us as encrypted
|
||||
if caller_key:
|
||||
await self._publish_encryption_key(room_id, caller_key)
|
||||
except Exception:
|
||||
logger.exception("Voice session start failed for %s", room_id)
|
||||
self.voice_sessions.pop(room_id, None)
|
||||
|
||||
else:
|
||||
# Empty content = someone left the call, check if anyone is still calling
|
||||
room_id = room.room_id
|
||||
@@ -1452,18 +1457,29 @@ class Bot:
|
||||
resp = await self.client.room_get_state_event(
|
||||
room_id, ENCRYPTION_KEYS_TYPE, state_key,
|
||||
)
|
||||
logger.info("E2EE key lookup state_key=%s → resp type=%s", state_key, type(resp).__name__)
|
||||
key = self._extract_e2ee_key(resp, sender, state_key)
|
||||
if key:
|
||||
return key
|
||||
except Exception as e:
|
||||
logger.debug("No encryption key at state_key=%s: %s", state_key, e)
|
||||
logger.info("E2EE key lookup state_key=%s failed: %s", state_key, e)
|
||||
|
||||
# Fallback: scan all room state for any encryption_keys event
|
||||
# Fallback: scan all room state via HTTP API for any encryption_keys event
|
||||
try:
|
||||
resp = await self.client.room_get_state(room_id)
|
||||
if hasattr(resp, "events"):
|
||||
for evt in resp.events:
|
||||
if evt.get("type") == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER:
|
||||
import httpx
|
||||
token = self.client.access_token
|
||||
url = f"{HOMESERVER}/_matrix/client/v3/rooms/{room_id}/state"
|
||||
async with httpx.AsyncClient(timeout=10.0) as http:
|
||||
resp = await http.get(url, headers={"Authorization": f"Bearer {token}"})
|
||||
resp.raise_for_status()
|
||||
events = resp.json()
|
||||
logger.info("Room state scan: %d events total", len(events))
|
||||
for evt in events:
|
||||
evt_type = evt.get("type", "")
|
||||
if "call" in evt_type or "encryption" in evt_type:
|
||||
logger.info(" state event: type=%s state_key=%s content_keys=%s",
|
||||
evt_type, evt.get("state_key", ""), list(evt.get("content", {}).keys())[:5])
|
||||
if evt_type == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER:
|
||||
content = evt.get("content", {})
|
||||
keys = content.get("keys", [])
|
||||
for k in keys:
|
||||
@@ -1475,7 +1491,7 @@ class Bot:
|
||||
evt.get("state_key", "?"), len(key))
|
||||
return key
|
||||
except Exception as e:
|
||||
logger.debug("Room state scan for encryption keys failed: %s", e)
|
||||
logger.warning("Room state scan for encryption keys failed: %s", e)
|
||||
|
||||
logger.warning("No E2EE encryption key found for %s in %s", sender, room_id)
|
||||
return None
|
||||
|
||||
18
voice.py
18
voice.py
@@ -23,10 +23,14 @@ ELEVENLABS_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
|
||||
DEFAULT_VOICE_ID = "onwK4e9ZLuTAKqWW03F9" # Daniel - male, free tier
|
||||
|
||||
VOICE_PROMPT = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf.
|
||||
Regeln:
|
||||
- Halte Antworten KURZ - 1-3 Saetze maximal
|
||||
- Sei direkt, keine Fuellwoerter
|
||||
- Antworte immer auf Deutsch"""
|
||||
|
||||
STRIKTE Regeln:
|
||||
- Antworte IMMER auf Deutsch
|
||||
- Halte JEDE Antwort auf MAXIMAL 1-2 kurze Saetze
|
||||
- Sei direkt und praezise, keine Fuellwoerter
|
||||
- Erfinde NICHTS - keine Geschichten, keine Musik, keine Fantasie
|
||||
- Beantworte nur was gefragt wird
|
||||
- Wenn niemand etwas fragt, sage nur kurz Hallo"""
|
||||
|
||||
_vad = None
|
||||
def _get_vad():
|
||||
@@ -121,13 +125,13 @@ class VoiceSession:
|
||||
user_id = self.nio_client.user_id
|
||||
jwt = _generate_lk_jwt(self.room_id, user_id, self.device_id)
|
||||
|
||||
# Wait up to 3s for E2EE encryption key from Element Call
|
||||
for _ in range(30):
|
||||
# Wait up to 10s for E2EE encryption key from Element Call
|
||||
for _ in range(100):
|
||||
if self._e2ee_key:
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
if not self._e2ee_key:
|
||||
logger.warning("No E2EE key received after 3s, connecting without encryption")
|
||||
logger.warning("No E2EE key received after 10s, connecting without encryption")
|
||||
|
||||
# Connect with E2EE if key available
|
||||
e2ee_opts = None
|
||||
|
||||
Reference in New Issue
Block a user