Files
matrix-ai-agent/article_summary/__init__.py
Christian Gick 4ec4054db4 feat: Blinkist-style audio summary bot (MAT-74)
Add interactive article summary feature: user pastes URL → bot asks
language/duration/topics → generates audio summary via LLM + ElevenLabs
TTS → posts MP3 inline with transcript and follow-up Q&A.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 17:39:09 +02:00

334 lines
12 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Blinkist-style article audio summary handler for Matrix bot."""
from __future__ import annotations
import logging
import re
from typing import TYPE_CHECKING
from openai import AsyncOpenAI
from .state import ArticleState, SessionManager
from .extractor import extract_article, detect_topics, is_article_url
from .summarizer import summarize_article
from .tts import generate_audio
if TYPE_CHECKING:
pass # Bot type would cause circular import
logger = logging.getLogger("article-summary")
# URL regex — matches http/https URLs in message text
URL_PATTERN = re.compile(r'https?://[^\s\)>\]"]+')
CANCEL_WORDS = {"cancel", "stop", "abbrechen", "abbruch", "nevermind"}
LANGUAGE_OPTIONS = {
"1": ("en", "English"),
"2": ("de", "German"),
"en": ("en", "English"),
"de": ("de", "German"),
"english": ("en", "English"),
"german": ("de", "German"),
"deutsch": ("de", "German"),
}
DURATION_OPTIONS = {
"1": 5,
"2": 10,
"3": 15,
"5": 5,
"10": 10,
"15": 15,
}
class ArticleSummaryHandler:
"""Handles the interactive article summary conversation flow."""
def __init__(
self,
llm_client: AsyncOpenAI,
model: str,
elevenlabs_key: str,
voice_id: str,
firecrawl_url: str | None = None,
) -> None:
self.llm = llm_client
self.model = model
self.elevenlabs_key = elevenlabs_key
self.voice_id = voice_id
self.firecrawl_url = firecrawl_url
self.sessions = SessionManager()
async def handle_message(
self, room_id: str, sender: str, body: str
) -> str | None:
"""Process a message through the article summary FSM.
Returns:
- None: Not handled (pass to normal AI handler).
- str: Text response to send.
- "__GENERATE__": Signal to run the full generation pipeline.
"""
body_lower = body.strip().lower()
session = self.sessions.get(sender, room_id)
# Cancel from any active state
if session.state != ArticleState.IDLE and body_lower in CANCEL_WORDS:
self.sessions.reset(sender, room_id)
return "Summary cancelled."
# Route based on current state
if session.state == ArticleState.IDLE:
return await self._check_for_url(room_id, sender, body)
elif session.state == ArticleState.URL_DETECTED:
# Waiting for language selection
return self._on_language(room_id, sender, body_lower)
elif session.state == ArticleState.LANGUAGE:
# Waiting for duration selection
return self._on_duration(room_id, sender, body_lower)
elif session.state == ArticleState.DURATION:
# Waiting for topic selection
return self._on_topics(room_id, sender, body)
elif session.state == ArticleState.GENERATING:
return "Still generating your summary, please wait..."
elif session.state == ArticleState.COMPLETE:
# Follow-up Q&A about the article
return await self._on_followup(room_id, sender, body)
return None
async def _check_for_url(
self, room_id: str, sender: str, body: str
) -> str | None:
"""Check if message contains an article URL."""
urls = URL_PATTERN.findall(body)
# Filter to article-like URLs
article_urls = [u for u in urls if is_article_url(u)]
if not article_urls:
return None
url = article_urls[0]
session = self.sessions.get(sender, room_id)
# Extract article content
logger.info("Extracting article from %s", url)
article = await extract_article(url, self.firecrawl_url)
if not article:
return None # Could not extract — let normal handler deal with it
session.url = url
session.title = article["title"]
session.content = article["content"]
word_count = article["word_count"]
read_time = max(1, word_count // 200)
# Detect topics via LLM
session.detected_topics = await detect_topics(
article["content"], self.llm, self.model
)
session.state = ArticleState.URL_DETECTED
self.sessions.touch(sender, room_id)
topics_hint = ""
if session.detected_topics:
topics_hint = f"\nTopics: {', '.join(session.detected_topics)}"
return (
f"**Found:** {session.title} (~{read_time} min read){topics_hint}\n\n"
f"Want an audio summary? What language?\n"
f"1⃣ English\n"
f"2⃣ German\n\n"
f"_(or say \"cancel\" to skip)_"
)
def _on_language(
self, room_id: str, sender: str, choice: str
) -> str | None:
"""Handle language selection."""
lang = LANGUAGE_OPTIONS.get(choice)
if not lang:
return "Please pick a language: **1** for English, **2** for German."
session = self.sessions.get(sender, room_id)
session.language = lang[0]
session.state = ArticleState.LANGUAGE
self.sessions.touch(sender, room_id)
return (
f"Language: **{lang[1]}**. How long should the summary be?\n"
f"1⃣ 5 min (short)\n"
f"2⃣ 10 min (standard)\n"
f"3⃣ 15 min (detailed)"
)
def _on_duration(
self, room_id: str, sender: str, choice: str
) -> str | None:
"""Handle duration selection."""
duration = DURATION_OPTIONS.get(choice)
if not duration:
return "Please pick: **1** (5 min), **2** (10 min), or **3** (15 min)."
session = self.sessions.get(sender, room_id)
session.duration_minutes = duration
session.state = ArticleState.DURATION
self.sessions.touch(sender, room_id)
if session.detected_topics:
topic_list = "\n".join(
f"{t}" for t in session.detected_topics
)
return (
f"Duration: **{duration} min**. Focus on which topics?\n"
f"{topic_list}\n\n"
f"Reply with topic numbers (comma-separated), specific topics, or **all**."
)
else:
return (
f"Duration: **{duration} min**. Any specific topics to focus on?\n"
f"Reply with topics (comma-separated) or **all** for a general summary."
)
def _on_topics(
self, room_id: str, sender: str, body: str
) -> str | None:
"""Handle topic selection. Returns __GENERATE__ to trigger pipeline."""
session = self.sessions.get(sender, room_id)
body_lower = body.strip().lower()
if body_lower in ("all", "alle", "everything", "alles"):
session.topics = session.detected_topics or []
else:
# Try to match by number
parts = re.split(r'[,\s]+', body.strip())
selected = []
for p in parts:
p = p.strip()
if p.isdigit():
idx = int(p) - 1
if 0 <= idx < len(session.detected_topics):
selected.append(session.detected_topics[idx])
elif p:
selected.append(p)
session.topics = selected or session.detected_topics or []
session.state = ArticleState.GENERATING
self.sessions.touch(sender, room_id)
return "__GENERATE__"
async def generate_and_post(self, bot, room_id: str, sender: str) -> None:
"""Run the full pipeline: summarize → TTS → upload MP3."""
session = self.sessions.get(sender, room_id)
topics_str = ", ".join(session.topics) if session.topics else "all topics"
await bot._send_text(
room_id,
f"Generating {session.duration_minutes}-min {session.language.upper()} "
f"summary of **{session.title}** (focus: {topics_str})...",
)
try:
# Step 1: Summarize
summary = await summarize_article(
content=session.content,
language=session.language,
duration_minutes=session.duration_minutes,
topics=session.topics,
llm_client=self.llm,
model=self.model,
)
session.summary_text = summary
# Step 2: TTS
mp3_bytes, duration_secs = await generate_audio(
text=summary,
api_key=self.elevenlabs_key,
voice_id=self.voice_id,
language=session.language,
)
# Step 3: Upload and send audio
filename = re.sub(r'[^\w\s-]', '', session.title)[:50].strip()
filename = f"{filename}.mp3" if filename else "summary.mp3"
await bot._send_audio(room_id, mp3_bytes, filename, duration_secs)
# Step 4: Send transcript
transcript_preview = summary[:500]
if len(summary) > 500:
transcript_preview += "..."
await bot._send_text(
room_id,
f"**Summary of:** {session.title}\n\n{transcript_preview}\n\n"
f"_You can ask follow-up questions about this article._",
)
session.state = ArticleState.COMPLETE
self.sessions.touch(sender, room_id)
except Exception:
logger.exception("Article summary pipeline failed for %s", session.url)
await bot._send_text(
room_id, "Sorry, I couldn't generate the audio summary. Please try again."
)
self.sessions.reset(sender, room_id)
async def _on_followup(
self, room_id: str, sender: str, body: str
) -> str | None:
"""Answer follow-up questions about the summarized article."""
session = self.sessions.get(sender, room_id)
# If user posts a new URL, start fresh
urls = URL_PATTERN.findall(body)
if any(is_article_url(u) for u in urls):
self.sessions.reset(sender, room_id)
return await self._check_for_url(room_id, sender, body)
# Check if it looks like a question about the article
question_indicators = ["?", "what", "how", "why", "explain", "was", "wie", "warum", "erkläre"]
is_question = any(q in body.lower() for q in question_indicators)
if not is_question:
# Not a question — reset and let normal handler take over
self.sessions.reset(sender, room_id)
return None
try:
resp = await self.llm.chat.completions.create(
model=self.model,
messages=[
{
"role": "system",
"content": (
"You are answering follow-up questions about an article. "
"Use the article content below to answer. Be concise. "
"Respond in the same language as the question."
),
},
{
"role": "user",
"content": (
f"Article: {session.title}\n\n"
f"{session.content[:8000]}\n\n"
f"Summary: {session.summary_text[:3000]}\n\n"
f"Question: {body}"
),
},
],
max_tokens=500,
temperature=0.5,
)
return resp.choices[0].message.content.strip()
except Exception:
logger.warning("Follow-up Q&A failed", exc_info=True)
self.sessions.reset(sender, room_id)
return None