Three fixes for voice agent not responding to speech: 1. Agent name: add --agent-name matrix-ai to CLI (was empty, dispatch couldnt match) 2. Move dispatch from on_invite to on_unknown call handler (dispatch when call starts, not on room join) 3. Use LiveKit room name from foci_preferred instead of raw Matrix room ID Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
238 lines
9.0 KiB
Python
238 lines
9.0 KiB
Python
import os
|
|
import json
|
|
import asyncio
|
|
import logging
|
|
|
|
from nio import (
|
|
AsyncClient,
|
|
AsyncClientConfig,
|
|
LoginResponse,
|
|
InviteMemberEvent,
|
|
MegolmEvent,
|
|
SyncResponse,
|
|
UnknownEvent,
|
|
KeyVerificationStart,
|
|
KeyVerificationCancel,
|
|
KeyVerificationKey,
|
|
KeyVerificationMac,
|
|
ToDeviceError,
|
|
)
|
|
from livekit import api
|
|
|
|
BOT_DEVICE_ID = "AIBOT"
|
|
CALL_MEMBER_TYPE = "org.matrix.msc3401.call.member"
|
|
|
|
logger = logging.getLogger("matrix-ai-bot")
|
|
|
|
HOMESERVER = os.environ["MATRIX_HOMESERVER"]
|
|
BOT_USER = os.environ["MATRIX_BOT_USER"]
|
|
BOT_PASS = os.environ["MATRIX_BOT_PASSWORD"]
|
|
LK_URL = os.environ["LIVEKIT_URL"]
|
|
LK_KEY = os.environ["LIVEKIT_API_KEY"]
|
|
LK_SECRET = os.environ["LIVEKIT_API_SECRET"]
|
|
AGENT_NAME = os.environ.get("AGENT_NAME", "matrix-ai")
|
|
STORE_PATH = os.environ.get("CRYPTO_STORE_PATH", "/data/crypto_store")
|
|
CREDS_FILE = os.path.join(STORE_PATH, "credentials.json")
|
|
|
|
|
|
class Bot:
|
|
def __init__(self):
|
|
config = AsyncClientConfig(
|
|
max_limit_exceeded=0,
|
|
max_timeouts=0,
|
|
store_sync_tokens=True,
|
|
encryption_enabled=True,
|
|
)
|
|
self.client = AsyncClient(
|
|
HOMESERVER,
|
|
BOT_USER,
|
|
store_path=STORE_PATH,
|
|
config=config,
|
|
)
|
|
self.lkapi = None
|
|
self.dispatched_rooms = set()
|
|
self.active_calls = set() # rooms where we've sent call member event
|
|
|
|
async def start(self):
|
|
# Restore existing session or create new one
|
|
if os.path.exists(CREDS_FILE):
|
|
with open(CREDS_FILE) as f:
|
|
creds = json.load(f)
|
|
self.client.restore_login(
|
|
user_id=creds["user_id"],
|
|
device_id=creds["device_id"],
|
|
access_token=creds["access_token"],
|
|
)
|
|
self.client.load_store()
|
|
logger.info("Restored session as %s (device %s)", creds["user_id"], creds["device_id"])
|
|
else:
|
|
resp = await self.client.login(BOT_PASS, device_name="ai-voice-bot")
|
|
if not isinstance(resp, LoginResponse):
|
|
logger.error("Login failed: %s", resp)
|
|
return
|
|
# Persist credentials for next restart
|
|
with open(CREDS_FILE, "w") as f:
|
|
json.dump({
|
|
"user_id": resp.user_id,
|
|
"device_id": resp.device_id,
|
|
"access_token": resp.access_token,
|
|
}, f)
|
|
logger.info("Logged in as %s (device %s) — credentials saved", resp.user_id, resp.device_id)
|
|
|
|
if self.client.should_upload_keys:
|
|
await self.client.keys_upload()
|
|
|
|
self.lkapi = api.LiveKitAPI(LK_URL, LK_KEY, LK_SECRET)
|
|
self.client.add_event_callback(self.on_invite, InviteMemberEvent)
|
|
self.client.add_event_callback(self.on_megolm, MegolmEvent)
|
|
self.client.add_event_callback(self.on_unknown, UnknownEvent)
|
|
self.client.add_response_callback(self.on_sync, SyncResponse)
|
|
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationStart)
|
|
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationKey)
|
|
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationMac)
|
|
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationCancel)
|
|
|
|
await self.client.sync_forever(timeout=30000, full_state=True)
|
|
|
|
async def on_invite(self, room, event: InviteMemberEvent):
|
|
if event.state_key != BOT_USER:
|
|
return
|
|
logger.info("Invited to %s, joining room", room.room_id)
|
|
await self.client.join(room.room_id)
|
|
|
|
async def on_sync(self, response: SyncResponse):
|
|
"""After each sync, trust all devices in our rooms."""
|
|
for user_id in list(self.client.device_store.users):
|
|
for device in self.client.device_store.active_user_devices(user_id):
|
|
if not device.verified:
|
|
self.client.verify_device(device)
|
|
logger.info("Auto-trusted device %s of %s", device.device_id, user_id)
|
|
|
|
async def on_unknown(self, room, event: UnknownEvent):
|
|
"""Handle call member state events to join calls."""
|
|
if event.type != CALL_MEMBER_TYPE:
|
|
return
|
|
if event.sender == BOT_USER:
|
|
return # ignore our own events
|
|
|
|
# Non-empty content means someone started/is in a call
|
|
if event.source.get("content", {}):
|
|
room_id = room.room_id
|
|
if room_id in self.active_calls:
|
|
return
|
|
|
|
logger.info("Call detected in %s from %s, joining...", room_id, event.sender)
|
|
self.active_calls.add(room_id)
|
|
|
|
# Get the foci_preferred from the caller's event
|
|
content = event.source["content"]
|
|
foci = content.get("foci_preferred", [{
|
|
"type": "livekit",
|
|
"livekit_service_url": f"{HOMESERVER}/livekit-jwt-service",
|
|
"livekit_alias": room_id,
|
|
}])
|
|
|
|
# Extract LiveKit room name from foci and dispatch agent
|
|
lk_room_name = room_id # fallback
|
|
for f in foci:
|
|
if f.get("type") == "livekit" and f.get("livekit_alias"):
|
|
lk_room_name = f["livekit_alias"]
|
|
break
|
|
logger.info("LiveKit room name: %s (from foci_preferred)", lk_room_name)
|
|
|
|
if room_id not in self.dispatched_rooms:
|
|
try:
|
|
await self.lkapi.agent_dispatch.create_dispatch(
|
|
api.CreateAgentDispatchRequest(
|
|
agent_name=AGENT_NAME,
|
|
room=lk_room_name,
|
|
)
|
|
)
|
|
self.dispatched_rooms.add(room_id)
|
|
logger.info("Agent dispatched to LiveKit room %s", lk_room_name)
|
|
except Exception:
|
|
logger.exception("Dispatch failed for %s", lk_room_name)
|
|
|
|
# Send our own call member state event
|
|
call_content = {
|
|
"application": "m.call",
|
|
"call_id": "",
|
|
"scope": "m.room",
|
|
"device_id": BOT_DEVICE_ID,
|
|
"expires": 7200000,
|
|
"focus_active": {
|
|
"type": "livekit",
|
|
"focus_selection": "oldest_membership",
|
|
},
|
|
"foci_preferred": foci,
|
|
"m.call.intent": "audio",
|
|
}
|
|
|
|
state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call"
|
|
try:
|
|
resp = await self.client.room_put_state(
|
|
room_id, CALL_MEMBER_TYPE, call_content, state_key=state_key,
|
|
)
|
|
logger.info("Sent call member event in %s: %s", room_id, resp)
|
|
except Exception:
|
|
logger.exception("Failed to send call member event in %s", room_id)
|
|
|
|
else:
|
|
# Empty content = someone left the call, check if anyone is still calling
|
|
room_id = room.room_id
|
|
if room_id in self.active_calls:
|
|
# Leave the call too
|
|
self.active_calls.discard(room_id)
|
|
state_key = f"_{BOT_USER}_{BOT_DEVICE_ID}_m.call"
|
|
try:
|
|
await self.client.room_put_state(
|
|
room_id, CALL_MEMBER_TYPE, {}, state_key=state_key,
|
|
)
|
|
logger.info("Left call in %s", room_id)
|
|
except Exception:
|
|
logger.exception("Failed to leave call in %s", room_id)
|
|
|
|
async def on_megolm(self, room, event: MegolmEvent):
|
|
"""Log undecryptable messages."""
|
|
logger.warning(
|
|
"Undecryptable event %s in %s from %s",
|
|
event.event_id, room.room_id, event.sender,
|
|
)
|
|
|
|
async def on_key_verification(self, event):
|
|
"""Auto-accept key verification requests."""
|
|
if isinstance(event, KeyVerificationStart):
|
|
sas = self.client.key_verifications.get(event.transaction_id)
|
|
if sas:
|
|
await self.client.accept_key_verification(event.transaction_id)
|
|
await self.client.to_device(sas.share_key())
|
|
elif isinstance(event, KeyVerificationKey):
|
|
sas = self.client.key_verifications.get(event.transaction_id)
|
|
if sas:
|
|
await self.client.confirm_short_auth_string(event.transaction_id)
|
|
elif isinstance(event, KeyVerificationMac):
|
|
sas = self.client.key_verifications.get(event.transaction_id)
|
|
if sas:
|
|
mac = sas.get_mac()
|
|
if not isinstance(mac, ToDeviceError):
|
|
await self.client.to_device(mac)
|
|
|
|
async def cleanup(self):
|
|
await self.client.close()
|
|
if self.lkapi:
|
|
await self.lkapi.aclose()
|
|
|
|
|
|
async def main():
|
|
os.makedirs(STORE_PATH, exist_ok=True)
|
|
bot = Bot()
|
|
try:
|
|
await bot.start()
|
|
finally:
|
|
await bot.cleanup()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO)
|
|
asyncio.run(main())
|