feat: Blinkist-style audio summary bot (MAT-74)
Add interactive article summary feature: user pastes URL → bot asks language/duration/topics → generates audio summary via LLM + ElevenLabs TTS → posts MP3 inline with transcript and follow-up Q&A. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
74
bot.py
74
bot.py
@@ -40,6 +40,7 @@ from nio import (
|
||||
from nio.crypto.attachments import decrypt_attachment
|
||||
from livekit import api
|
||||
from voice import VoiceSession
|
||||
from article_summary import ArticleSummaryHandler
|
||||
|
||||
BOT_DEVICE_ID = "AIBOT"
|
||||
CALL_MEMBER_TYPE = "org.matrix.msc3401.call.member"
|
||||
@@ -77,6 +78,9 @@ BOT_API_KEY = os.environ.get("BOT_API_KEY", "")
|
||||
RAG_ENDPOINT = os.environ.get("RAG_ENDPOINT", "") # Customer-VM RAG service (e.g. http://127.0.0.1:8765)
|
||||
RAG_AUTH_TOKEN = os.environ.get("RAG_AUTH_TOKEN", "") # Bearer token for local RAG
|
||||
BRAVE_API_KEY = os.environ.get("BRAVE_API_KEY", "")
|
||||
ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
|
||||
ELEVENLABS_VOICE_ID = os.environ.get("ELEVENLABS_VOICE_ID", "ML23UVoFL5mI6APbRAeR")
|
||||
FIRECRAWL_URL = os.environ.get("FIRECRAWL_URL", "")
|
||||
MAX_TOOL_ITERATIONS = 5
|
||||
|
||||
SYSTEM_PROMPT = """You are a helpful AI assistant in a Matrix chat room.
|
||||
@@ -962,6 +966,17 @@ class Bot:
|
||||
self._sync_token_received = False
|
||||
self._verifications: dict[str, dict] = {} # txn_id -> verification state
|
||||
self._room_document_context: dict[str, list[dict]] = {} # room_id -> [{type, filename, text, timestamp}, ...]
|
||||
# Article summary handler (Blinkist-style audio summaries)
|
||||
if self.llm and ELEVENLABS_API_KEY:
|
||||
self.article_handler = ArticleSummaryHandler(
|
||||
llm_client=self.llm,
|
||||
model=DEFAULT_MODEL,
|
||||
elevenlabs_key=ELEVENLABS_API_KEY,
|
||||
voice_id=ELEVENLABS_VOICE_ID,
|
||||
firecrawl_url=FIRECRAWL_URL or None,
|
||||
)
|
||||
else:
|
||||
self.article_handler = None
|
||||
|
||||
async def _has_documents(self, matrix_user_id: str) -> bool:
|
||||
"""Check if user has documents via local RAG or MatrixHost portal API.
|
||||
@@ -1530,6 +1545,24 @@ class Bot:
|
||||
logger.info("Confluence page %s detected in room %s",
|
||||
confluence_page_id, room.room_id)
|
||||
|
||||
# Check article summary FSM (Blinkist-style audio summaries)
|
||||
if self.article_handler:
|
||||
summary_response = await self.article_handler.handle_message(
|
||||
room.room_id, sender, body
|
||||
)
|
||||
if summary_response is not None:
|
||||
if summary_response == "__GENERATE__":
|
||||
await self.client.room_typing(room.room_id, typing_state=True)
|
||||
try:
|
||||
await self.article_handler.generate_and_post(
|
||||
self, room.room_id, sender
|
||||
)
|
||||
finally:
|
||||
await self.client.room_typing(room.room_id, typing_state=False)
|
||||
elif summary_response:
|
||||
await self._send_text(room.room_id, summary_response)
|
||||
return
|
||||
|
||||
await self.client.room_typing(room.room_id, typing_state=True)
|
||||
try:
|
||||
await self._respond_with_ai(room, body, sender=sender, image_data=image_data)
|
||||
@@ -2331,6 +2364,47 @@ class Bot:
|
||||
content=content,
|
||||
)
|
||||
|
||||
async def _send_audio(self, room_id: str, audio_bytes: bytes, filename: str, duration_seconds: float):
|
||||
"""Upload audio to Matrix homeserver and send as m.audio event."""
|
||||
from nio import UploadResponse
|
||||
upload_resp, maybe_keys = await self.client.upload(
|
||||
data_provider=io.BytesIO(audio_bytes),
|
||||
content_type="audio/mpeg",
|
||||
filename=filename,
|
||||
filesize=len(audio_bytes),
|
||||
encrypt=True,
|
||||
)
|
||||
if not isinstance(upload_resp, UploadResponse):
|
||||
logger.error("Audio upload failed: %s", upload_resp)
|
||||
await self._send_text(room_id, "Sorry, I couldn't upload the audio file.")
|
||||
return
|
||||
|
||||
content = {
|
||||
"msgtype": "m.audio",
|
||||
"body": filename,
|
||||
"info": {
|
||||
"mimetype": "audio/mpeg",
|
||||
"size": len(audio_bytes),
|
||||
"duration": int(duration_seconds * 1000), # Matrix uses milliseconds
|
||||
},
|
||||
}
|
||||
if maybe_keys:
|
||||
content["file"] = {
|
||||
"url": upload_resp.content_uri,
|
||||
"key": maybe_keys["key"],
|
||||
"iv": maybe_keys["iv"],
|
||||
"hashes": maybe_keys["hashes"],
|
||||
"v": maybe_keys["v"],
|
||||
}
|
||||
else:
|
||||
content["url"] = upload_resp.content_uri
|
||||
|
||||
await self.client.room_send(
|
||||
room_id,
|
||||
message_type="m.room.message",
|
||||
content=content,
|
||||
)
|
||||
|
||||
async def _summarize_call(self, transcript: list[dict], room_id: str) -> str:
|
||||
"""Generate a concise summary of a voice call transcript via LLM."""
|
||||
# Format transcript for the LLM
|
||||
|
||||
Reference in New Issue
Block a user