Files
matrix-ai-agent/bot.py
Christian Gick d5af90c7c7 fix(CF-1170): Fix STT by correcting agent dispatch flow
Three fixes for voice agent not responding to speech:
1. Agent name: add --agent-name matrix-ai to CLI (was empty, dispatch couldnt match)
2. Move dispatch from on_invite to on_unknown call handler (dispatch when call starts, not on room join)
3. Use LiveKit room name from foci_preferred instead of raw Matrix room ID

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 17:21:32 +02:00

238 lines
9.0 KiB
Python

import os
import json
import asyncio
import logging
from nio import (
AsyncClient,
AsyncClientConfig,
LoginResponse,
InviteMemberEvent,
MegolmEvent,
SyncResponse,
UnknownEvent,
KeyVerificationStart,
KeyVerificationCancel,
KeyVerificationKey,
KeyVerificationMac,
ToDeviceError,
)
from livekit import api
BOT_DEVICE_ID = "AIBOT"
CALL_MEMBER_TYPE = "org.matrix.msc3401.call.member"
logger = logging.getLogger("matrix-ai-bot")
HOMESERVER = os.environ["MATRIX_HOMESERVER"]
BOT_USER = os.environ["MATRIX_BOT_USER"]
BOT_PASS = os.environ["MATRIX_BOT_PASSWORD"]
LK_URL = os.environ["LIVEKIT_URL"]
LK_KEY = os.environ["LIVEKIT_API_KEY"]
LK_SECRET = os.environ["LIVEKIT_API_SECRET"]
AGENT_NAME = os.environ.get("AGENT_NAME", "matrix-ai")
STORE_PATH = os.environ.get("CRYPTO_STORE_PATH", "/data/crypto_store")
CREDS_FILE = os.path.join(STORE_PATH, "credentials.json")
class Bot:
def __init__(self):
config = AsyncClientConfig(
max_limit_exceeded=0,
max_timeouts=0,
store_sync_tokens=True,
encryption_enabled=True,
)
self.client = AsyncClient(
HOMESERVER,
BOT_USER,
store_path=STORE_PATH,
config=config,
)
self.lkapi = None
self.dispatched_rooms = set()
self.active_calls = set() # rooms where we've sent call member event
async def start(self):
# Restore existing session or create new one
if os.path.exists(CREDS_FILE):
with open(CREDS_FILE) as f:
creds = json.load(f)
self.client.restore_login(
user_id=creds["user_id"],
device_id=creds["device_id"],
access_token=creds["access_token"],
)
self.client.load_store()
logger.info("Restored session as %s (device %s)", creds["user_id"], creds["device_id"])
else:
resp = await self.client.login(BOT_PASS, device_name="ai-voice-bot")
if not isinstance(resp, LoginResponse):
logger.error("Login failed: %s", resp)
return
# Persist credentials for next restart
with open(CREDS_FILE, "w") as f:
json.dump({
"user_id": resp.user_id,
"device_id": resp.device_id,
"access_token": resp.access_token,
}, f)
logger.info("Logged in as %s (device %s) — credentials saved", resp.user_id, resp.device_id)
if self.client.should_upload_keys:
await self.client.keys_upload()
self.lkapi = api.LiveKitAPI(LK_URL, LK_KEY, LK_SECRET)
self.client.add_event_callback(self.on_invite, InviteMemberEvent)
self.client.add_event_callback(self.on_megolm, MegolmEvent)
self.client.add_event_callback(self.on_unknown, UnknownEvent)
self.client.add_response_callback(self.on_sync, SyncResponse)
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationStart)
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationKey)
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationMac)
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationCancel)
await self.client.sync_forever(timeout=30000, full_state=True)
async def on_invite(self, room, event: InviteMemberEvent):
if event.state_key != BOT_USER:
return
logger.info("Invited to %s, joining room", room.room_id)
await self.client.join(room.room_id)
async def on_sync(self, response: SyncResponse):
"""After each sync, trust all devices in our rooms."""
for user_id in list(self.client.device_store.users):
for device in self.client.device_store.active_user_devices(user_id):
if not device.verified:
self.client.verify_device(device)
logger.info("Auto-trusted device %s of %s", device.device_id, user_id)
async def on_unknown(self, room, event: UnknownEvent):
"""Handle call member state events to join calls."""
if event.type != CALL_MEMBER_TYPE:
return
if event.sender == BOT_USER:
return # ignore our own events
# Non-empty content means someone started/is in a call
if event.source.get("content", {}):
room_id = room.room_id
if room_id in self.active_calls:
return
logger.info("Call detected in %s from %s, joining...", room_id, event.sender)
self.active_calls.add(room_id)
# Get the foci_preferred from the caller's event
content = event.source["content"]
foci = content.get("foci_preferred", [{
"type": "livekit",
"livekit_service_url": f"{HOMESERVER}/livekit-jwt-service",
"livekit_alias": room_id,
}])
# Extract LiveKit room name from foci and dispatch agent
lk_room_name = room_id # fallback
for f in foci:
if f.get("type") == "livekit" and f.get("livekit_alias"):
lk_room_name = f["livekit_alias"]
break
logger.info("LiveKit room name: %s (from foci_preferred)", lk_room_name)
if room_id not in self.dispatched_rooms:
try:
await self.lkapi.agent_dispatch.create_dispatch(
api.CreateAgentDispatchRequest(
agent_name=AGENT_NAME,
room=lk_room_name,
)
)
self.dispatched_rooms.add(room_id)
logger.info("Agent dispatched to LiveKit room %s", lk_room_name)
except Exception:
logger.exception("Dispatch failed for %s", lk_room_name)
# Send our own call member state event
call_content = {
"application": "m.call",
"call_id": "",
"scope": "m.room",
"device_id": BOT_DEVICE_ID,
"expires": 7200000,
"focus_active": {
"type": "livekit",
"focus_selection": "oldest_membership",
},
"foci_preferred": foci,
"m.call.intent": "audio",
}
state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call"
try:
resp = await self.client.room_put_state(
room_id, CALL_MEMBER_TYPE, call_content, state_key=state_key,
)
logger.info("Sent call member event in %s: %s", room_id, resp)
except Exception:
logger.exception("Failed to send call member event in %s", room_id)
else:
# Empty content = someone left the call, check if anyone is still calling
room_id = room.room_id
if room_id in self.active_calls:
# Leave the call too
self.active_calls.discard(room_id)
state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call"
try:
await self.client.room_put_state(
room_id, CALL_MEMBER_TYPE, {}, state_key=state_key,
)
logger.info("Left call in %s", room_id)
except Exception:
logger.exception("Failed to leave call in %s", room_id)
async def on_megolm(self, room, event: MegolmEvent):
"""Log undecryptable messages."""
logger.warning(
"Undecryptable event %s in %s from %s",
event.event_id, room.room_id, event.sender,
)
async def on_key_verification(self, event):
"""Auto-accept key verification requests."""
if isinstance(event, KeyVerificationStart):
sas = self.client.key_verifications.get(event.transaction_id)
if sas:
await self.client.accept_key_verification(event.transaction_id)
await self.client.to_device(sas.share_key())
elif isinstance(event, KeyVerificationKey):
sas = self.client.key_verifications.get(event.transaction_id)
if sas:
await self.client.confirm_short_auth_string(event.transaction_id)
elif isinstance(event, KeyVerificationMac):
sas = self.client.key_verifications.get(event.transaction_id)
if sas:
mac = sas.get_mac()
if not isinstance(mac, ToDeviceError):
await self.client.to_device(mac)
async def cleanup(self):
await self.client.close()
if self.lkapi:
await self.lkapi.aclose()
async def main():
os.makedirs(STORE_PATH, exist_ok=True)
bot = Bot()
try:
await bot.start()
finally:
await bot.cleanup()
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
asyncio.run(main())