revert: Restore voice.py and bot.py to last known working state (9aef846)
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
65
voice.py
65
voice.py
@@ -24,16 +24,12 @@ LITELLM_KEY = os.environ.get("LITELLM_API_KEY", "not-needed")
|
||||
LK_API_KEY = os.environ.get("LIVEKIT_API_KEY", "")
|
||||
LK_API_SECRET = os.environ.get("LIVEKIT_API_SECRET", "")
|
||||
ELEVENLABS_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
|
||||
DEFAULT_VOICE_ID = "JBFqnCBsd6RMkjVDRZzb" # George - warm, captivating, British male
|
||||
DEFAULT_VOICE_ID = "onwK4e9ZLuTAKqWW03F9" # Daniel - male, free tier
|
||||
|
||||
VOICE_PROMPT_TEMPLATE = """Du bist ein hilfreicher Sprachassistent von Agiliton in einem Matrix-Anruf.
|
||||
Du heisst "Agiliton Assistant". Du basierst auf dem Modell {model}.
|
||||
Wenn jemand fragt welches Modell du bist, sei transparent und sage es.
|
||||
|
||||
Aktuelle Zeit: {datetime}
|
||||
VOICE_PROMPT = """Du bist ein hilfreicher Sprachassistent in einem Matrix-Anruf.
|
||||
|
||||
STRIKTE Regeln:
|
||||
- Antworte in der Sprache in der der Nutzer spricht
|
||||
- Antworte IMMER auf Deutsch
|
||||
- Halte JEDE Antwort auf MAXIMAL 1-2 kurze Saetze
|
||||
- Sei direkt und praezise, keine Fuellwoerter
|
||||
- Erfinde NICHTS - keine Geschichten, keine Musik, keine Fantasie
|
||||
@@ -91,7 +87,7 @@ def _build_e2ee_options(shared_key: bytes) -> rtc.E2EEOptions:
|
||||
|
||||
class VoiceSession:
|
||||
def __init__(self, nio_client, room_id, device_id, lk_url, model="claude-sonnet",
|
||||
publish_key_cb=None, bot_key: bytes | None = None):
|
||||
publish_key_cb=None):
|
||||
self.nio_client = nio_client
|
||||
self.room_id = room_id
|
||||
self.device_id = device_id
|
||||
@@ -101,26 +97,20 @@ class VoiceSession:
|
||||
self.session = None
|
||||
self._task = None
|
||||
self._http_session = None
|
||||
self._caller_key: bytes | None = None
|
||||
self._caller_identity: str | None = None # "sender:device_id" format
|
||||
self._bot_key: bytes = bot_key or os.urandom(16)
|
||||
self._e2ee_key: bytes | None = None
|
||||
self._publish_key_cb = publish_key_cb
|
||||
|
||||
def on_encryption_key(self, sender, device_id, key, index):
|
||||
"""Receive E2EE key from Element Call participant."""
|
||||
if not key:
|
||||
return
|
||||
identity = _make_lk_identity(sender, device_id)
|
||||
self._caller_key = key
|
||||
self._caller_identity = identity
|
||||
logger.info("E2EE key received from %s:%s (identity=%s, index=%d, %d bytes)",
|
||||
sender, device_id, identity, index, len(key))
|
||||
if key and not self._e2ee_key:
|
||||
self._e2ee_key = key
|
||||
logger.info("E2EE key received from %s:%s (index=%d, %d bytes)",
|
||||
sender, device_id, index, len(key))
|
||||
|
||||
async def _fetch_encryption_key_http(self) -> bytes | None:
|
||||
"""Fetch encryption key from room timeline (NOT state) via Matrix HTTP API.
|
||||
|
||||
Element Call distributes encryption keys as timeline events, not state.
|
||||
Also sets self._caller_identity from the event sender + device_id.
|
||||
"""
|
||||
import httpx
|
||||
homeserver = str(self.nio_client.homeserver)
|
||||
@@ -144,16 +134,14 @@ class VoiceSession:
|
||||
if sender == user_id:
|
||||
continue # skip our own key
|
||||
content = evt.get("content", {})
|
||||
caller_device = content.get("device_id", "")
|
||||
logger.info("Found encryption_keys timeline event: sender=%s device=%s",
|
||||
sender, caller_device)
|
||||
if caller_device:
|
||||
self._caller_identity = _make_lk_identity(sender, caller_device)
|
||||
logger.info("Found encryption_keys timeline event: sender=%s content=%s",
|
||||
sender, content)
|
||||
for k in content.get("keys", []):
|
||||
key_b64 = k.get("key", "")
|
||||
if key_b64:
|
||||
key_b64 += "=" * (-len(key_b64) % 4)
|
||||
return base64.urlsafe_b64decode(key_b64)
|
||||
import base64 as b64
|
||||
return b64.urlsafe_b64decode(key_b64)
|
||||
logger.info("No encryption_keys events in last %d timeline events", len(events))
|
||||
except Exception as e:
|
||||
logger.warning("HTTP encryption key fetch failed: %s", e)
|
||||
@@ -189,26 +177,25 @@ class VoiceSession:
|
||||
# Check timeline for caller's encryption key
|
||||
caller_key = await self._fetch_encryption_key_http()
|
||||
if caller_key:
|
||||
self._caller_key = caller_key
|
||||
self._e2ee_key = caller_key
|
||||
logger.info("Got caller E2EE key via timeline (%d bytes)", len(caller_key))
|
||||
|
||||
if not self._caller_key:
|
||||
if not self._e2ee_key:
|
||||
# Wait up to 15s for key via sync handler (bot.py forwards
|
||||
# encryption_keys timeline events to on_encryption_key)
|
||||
logger.info("No key in timeline yet, waiting for sync...")
|
||||
for _ in range(150):
|
||||
if self._caller_key:
|
||||
if self._e2ee_key:
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Publish bot key so Element Call sees us as an E2EE participant
|
||||
if self._publish_key_cb:
|
||||
self._publish_key_cb(self._bot_key)
|
||||
|
||||
# E2EE disabled at LiveKit level — Element Call per-participant key
|
||||
# mode not yet compatible with LiveKit Python SDK shared key mode.
|
||||
# Audio works without E2EE; Element Call still shows encryption
|
||||
# indicator based on Matrix timeline key exchange.
|
||||
# E2EE disabled — Element Call key derivation mismatch not yet resolved.
|
||||
# Audio pipeline confirmed working without E2EE.
|
||||
if self._e2ee_key:
|
||||
logger.info("Caller E2EE key available (%d bytes) — E2EE disabled pending fix",
|
||||
len(self._e2ee_key))
|
||||
if self._publish_key_cb:
|
||||
self._publish_key_cb(self._e2ee_key)
|
||||
e2ee_opts = None
|
||||
|
||||
room_opts = rtc.RoomOptions(e2ee=e2ee_opts)
|
||||
@@ -227,7 +214,7 @@ class VoiceSession:
|
||||
logger.info("Track sub: %s %s kind=%s", p.identity, pub.sid, t.kind)
|
||||
|
||||
await self.lk_room.connect(self.lk_url, jwt, options=room_opts)
|
||||
logger.info("Connected (E2EE=shared key), remote=%d",
|
||||
logger.info("Connected (E2EE=HKDF), remote=%d",
|
||||
len(self.lk_room.remote_participants))
|
||||
|
||||
# Find the remote participant, wait up to 10s if not yet connected
|
||||
@@ -267,9 +254,7 @@ class VoiceSession:
|
||||
def _on_agent_speech(msg):
|
||||
logger.info("AGENT_SPEECH: %s", msg.text_content)
|
||||
|
||||
now = datetime.datetime.now(datetime.timezone.utc).strftime("%A, %B %d, %Y %H:%M UTC")
|
||||
prompt = VOICE_PROMPT_TEMPLATE.format(model=self.model, datetime=now)
|
||||
agent = Agent(instructions=prompt)
|
||||
agent = Agent(instructions=VOICE_PROMPT)
|
||||
io_opts = room_io.RoomOptions(
|
||||
participant_identity=remote_identity,
|
||||
) if remote_identity else room_io.RoomOptions()
|
||||
|
||||
Reference in New Issue
Block a user