fix: E2EE key timing + verbose logging + shorter greeting
- Reorder: send call member event BEFORE creating VoiceSession - Store VoiceSession BEFORE start so sync handler can forward keys - Increase E2EE key wait from 3s to 10s - Add INFO-level logging for key lookup + room state scan via HTTP API - Tighten voice system prompt to prevent long rambling greetings Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
86
bot.py
86
bot.py
@@ -413,34 +413,8 @@ class Bot:
|
|||||||
lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=")
|
lk_room_name = base64.b64encode(lk_room_hash).decode().rstrip("=")
|
||||||
logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id)
|
logger.info("LiveKit room name: %s (hashed from %s)", lk_room_name, room_id)
|
||||||
|
|
||||||
if room_id not in self.voice_sessions:
|
# Send our own call member state event FIRST so Element Call
|
||||||
try:
|
# sends encryption_keys in response (before we start VoiceSession)
|
||||||
model = self.room_models.get(room_id, DEFAULT_MODEL)
|
|
||||||
vs = VoiceSession(
|
|
||||||
nio_client=self.client,
|
|
||||||
room_id=room_id,
|
|
||||||
device_id=BOT_DEVICE_ID,
|
|
||||||
lk_url=LK_URL,
|
|
||||||
model=model,
|
|
||||||
)
|
|
||||||
# Read existing encryption keys from room state before starting
|
|
||||||
caller_device_id = content.get("device_id", "")
|
|
||||||
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
|
|
||||||
if caller_key:
|
|
||||||
vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0)
|
|
||||||
|
|
||||||
await vs.start()
|
|
||||||
self.voice_sessions[room_id] = vs
|
|
||||||
logger.info("Voice session started for room %s (e2ee_key=%s)",
|
|
||||||
room_id, "yes" if caller_key else "no")
|
|
||||||
|
|
||||||
# Publish our E2EE key so Element Call sees us as encrypted
|
|
||||||
if caller_key:
|
|
||||||
await self._publish_encryption_key(room_id, caller_key)
|
|
||||||
except Exception:
|
|
||||||
logger.exception("Voice session start failed for %s", room_id)
|
|
||||||
|
|
||||||
# Send our own call member state event
|
|
||||||
call_content = {
|
call_content = {
|
||||||
"application": "m.call",
|
"application": "m.call",
|
||||||
"call_id": "",
|
"call_id": "",
|
||||||
@@ -464,6 +438,37 @@ class Bot:
|
|||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Failed to send call member event in %s", room_id)
|
logger.exception("Failed to send call member event in %s", room_id)
|
||||||
|
|
||||||
|
# Now create VoiceSession — encryption_keys may arrive via sync
|
||||||
|
# while VoiceSession waits for key (up to 10s)
|
||||||
|
if room_id not in self.voice_sessions:
|
||||||
|
try:
|
||||||
|
model = self.room_models.get(room_id, DEFAULT_MODEL)
|
||||||
|
caller_device_id = content.get("device_id", "")
|
||||||
|
vs = VoiceSession(
|
||||||
|
nio_client=self.client,
|
||||||
|
room_id=room_id,
|
||||||
|
device_id=BOT_DEVICE_ID,
|
||||||
|
lk_url=LK_URL,
|
||||||
|
model=model,
|
||||||
|
)
|
||||||
|
# Try reading encryption key from room state
|
||||||
|
caller_key = await self._get_call_encryption_key(room_id, event.sender, caller_device_id)
|
||||||
|
if caller_key:
|
||||||
|
vs.on_encryption_key(event.sender, caller_device_id, caller_key, 0)
|
||||||
|
|
||||||
|
# Store BEFORE start so on_unknown handler can forward keys
|
||||||
|
self.voice_sessions[room_id] = vs
|
||||||
|
await vs.start()
|
||||||
|
logger.info("Voice session started for room %s (e2ee_key=%s)",
|
||||||
|
room_id, "yes" if caller_key else "no")
|
||||||
|
|
||||||
|
# Publish our E2EE key so Element Call sees us as encrypted
|
||||||
|
if caller_key:
|
||||||
|
await self._publish_encryption_key(room_id, caller_key)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Voice session start failed for %s", room_id)
|
||||||
|
self.voice_sessions.pop(room_id, None)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Empty content = someone left the call, check if anyone is still calling
|
# Empty content = someone left the call, check if anyone is still calling
|
||||||
room_id = room.room_id
|
room_id = room.room_id
|
||||||
@@ -1452,18 +1457,29 @@ class Bot:
|
|||||||
resp = await self.client.room_get_state_event(
|
resp = await self.client.room_get_state_event(
|
||||||
room_id, ENCRYPTION_KEYS_TYPE, state_key,
|
room_id, ENCRYPTION_KEYS_TYPE, state_key,
|
||||||
)
|
)
|
||||||
|
logger.info("E2EE key lookup state_key=%s → resp type=%s", state_key, type(resp).__name__)
|
||||||
key = self._extract_e2ee_key(resp, sender, state_key)
|
key = self._extract_e2ee_key(resp, sender, state_key)
|
||||||
if key:
|
if key:
|
||||||
return key
|
return key
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("No encryption key at state_key=%s: %s", state_key, e)
|
logger.info("E2EE key lookup state_key=%s failed: %s", state_key, e)
|
||||||
|
|
||||||
# Fallback: scan all room state for any encryption_keys event
|
# Fallback: scan all room state via HTTP API for any encryption_keys event
|
||||||
try:
|
try:
|
||||||
resp = await self.client.room_get_state(room_id)
|
import httpx
|
||||||
if hasattr(resp, "events"):
|
token = self.client.access_token
|
||||||
for evt in resp.events:
|
url = f"{HOMESERVER}/_matrix/client/v3/rooms/{room_id}/state"
|
||||||
if evt.get("type") == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER:
|
async with httpx.AsyncClient(timeout=10.0) as http:
|
||||||
|
resp = await http.get(url, headers={"Authorization": f"Bearer {token}"})
|
||||||
|
resp.raise_for_status()
|
||||||
|
events = resp.json()
|
||||||
|
logger.info("Room state scan: %d events total", len(events))
|
||||||
|
for evt in events:
|
||||||
|
evt_type = evt.get("type", "")
|
||||||
|
if "call" in evt_type or "encryption" in evt_type:
|
||||||
|
logger.info(" state event: type=%s state_key=%s content_keys=%s",
|
||||||
|
evt_type, evt.get("state_key", ""), list(evt.get("content", {}).keys())[:5])
|
||||||
|
if evt_type == ENCRYPTION_KEYS_TYPE and evt.get("sender") != BOT_USER:
|
||||||
content = evt.get("content", {})
|
content = evt.get("content", {})
|
||||||
keys = content.get("keys", [])
|
keys = content.get("keys", [])
|
||||||
for k in keys:
|
for k in keys:
|
||||||
@@ -1475,7 +1491,7 @@ class Bot:
|
|||||||
evt.get("state_key", "?"), len(key))
|
evt.get("state_key", "?"), len(key))
|
||||||
return key
|
return key
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Room state scan for encryption keys failed: %s", e)
|
logger.warning("Room state scan for encryption keys failed: %s", e)
|
||||||
|
|
||||||
logger.warning("No E2EE encryption key found for %s in %s", sender, room_id)
|
logger.warning("No E2EE encryption key found for %s in %s", sender, room_id)
|
||||||
return None
|
return None
|
||||||
|
|||||||
18
voice.py
18
voice.py
@@ -23,10 +23,14 @@ ELEVENLABS_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
|
|||||||
DEFAULT_VOICE_ID = "onwK4e9ZLuTAKqWW03F9" # Daniel - male, free tier
|
DEFAULT_VOICE_ID = "onwK4e9ZLuTAKqWW03F9" # Daniel - male, free tier
|
||||||
|
|
||||||
VOICE_PROMPT = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf.
|
VOICE_PROMPT = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf.
|
||||||
Regeln:
|
|
||||||
- Halte Antworten KURZ - 1-3 Saetze maximal
|
STRIKTE Regeln:
|
||||||
- Sei direkt, keine Fuellwoerter
|
- Antworte IMMER auf Deutsch
|
||||||
- Antworte immer auf Deutsch"""
|
- Halte JEDE Antwort auf MAXIMAL 1-2 kurze Saetze
|
||||||
|
- Sei direkt und praezise, keine Fuellwoerter
|
||||||
|
- Erfinde NICHTS - keine Geschichten, keine Musik, keine Fantasie
|
||||||
|
- Beantworte nur was gefragt wird
|
||||||
|
- Wenn niemand etwas fragt, sage nur kurz Hallo"""
|
||||||
|
|
||||||
_vad = None
|
_vad = None
|
||||||
def _get_vad():
|
def _get_vad():
|
||||||
@@ -121,13 +125,13 @@ class VoiceSession:
|
|||||||
user_id = self.nio_client.user_id
|
user_id = self.nio_client.user_id
|
||||||
jwt = _generate_lk_jwt(self.room_id, user_id, self.device_id)
|
jwt = _generate_lk_jwt(self.room_id, user_id, self.device_id)
|
||||||
|
|
||||||
# Wait up to 3s for E2EE encryption key from Element Call
|
# Wait up to 10s for E2EE encryption key from Element Call
|
||||||
for _ in range(30):
|
for _ in range(100):
|
||||||
if self._e2ee_key:
|
if self._e2ee_key:
|
||||||
break
|
break
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
if not self._e2ee_key:
|
if not self._e2ee_key:
|
||||||
logger.warning("No E2EE key received after 3s, connecting without encryption")
|
logger.warning("No E2EE key received after 10s, connecting without encryption")
|
||||||
|
|
||||||
# Connect with E2EE if key available
|
# Connect with E2EE if key available
|
||||||
e2ee_opts = None
|
e2ee_opts = None
|
||||||
|
|||||||
Reference in New Issue
Block a user