feat: Blinkist-style audio summary bot (MAT-74)

Add interactive article summary feature: user pastes URL → bot asks language/duration/topics → generates audio summary via LLM + ElevenLabs TTS → posts MP3 inline with transcript and follow-up Q&A. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 17:39:09 +02:00
parent 1000891a97
commit 4ec4054db4
6 changed files with 789 additions and 0 deletions
--- a/article_summary/summarizer.py
+++ b/article_summary/summarizer.py
@@ -0,0 +1,68 @@
+"""LLM-powered article summarization with personalization."""
+
+from __future__ import annotations
+
+import logging
+
+from openai import AsyncOpenAI
+
+logger = logging.getLogger("article-summary.summarizer")
+
+WORDS_PER_MINUTE = 150  # Clear narration pace
+
+
+async def summarize_article(
+    content: str,
+    language: str,
+    duration_minutes: int,
+    topics: list[str],
+    llm_client: AsyncOpenAI,
+    model: str,
+) -> str:
+    """Generate a narrative summary of article content.
+
+    Args:
+        content: Article text (max ~15K chars).
+        language: Target language ("en" or "de").
+        duration_minutes: Target audio duration (5, 10, or 15).
+        topics: Focus topics selected by user.
+        llm_client: AsyncOpenAI instance (LiteLLM).
+        model: Model name to use.
+
+    Returns:
+        Summary text ready for TTS.
+    """
+    word_target = duration_minutes * WORDS_PER_MINUTE
+    lang_name = "German" if language == "de" else "English"
+    topics_str = ", ".join(topics) if topics else "all topics"
+
+    system_prompt = f"""You are a professional audio narrator creating a Blinkist-style summary.
+
+RULES:
+- Write in {lang_name}.
+- Target approximately {word_target} words (for a {duration_minutes}-minute audio).
+- Focus on: {topics_str}.
+- Use a conversational, engaging narrator tone — as if explaining to a curious friend.
+- Structure: brief intro → key insights → practical takeaways → brief conclusion.
+- Use flowing prose, NOT bullet points or lists.
+- Do NOT include any formatting markers, headers, or markdown.
+- Do NOT say "In this article..." — jump straight into the content.
+- Make it sound natural when read aloud."""
+
+    # Truncate very long content
+    if len(content) > 12_000:
+        content = content[:12_000] + "\n\n[Article continues...]"
+
+    resp = await llm_client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": f"Summarize this article:\n\n{content}"},
+        ],
+        max_tokens=word_target * 2,  # tokens ≈ 1.3x words, with headroom
+        temperature=0.7,
+    )
+
+    summary = resp.choices[0].message.content.strip()
+    logger.info("Generated summary: %d words (target: %d)", len(summary.split()), word_target)
+    return summary