feat: Blinkist-style audio summary bot (MAT-74)
Add interactive article summary feature: user pastes URL → bot asks language/duration/topics → generates audio summary via LLM + ElevenLabs TTS → posts MP3 inline with transcript and follow-up Q&A. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
68
article_summary/summarizer.py
Normal file
68
article_summary/summarizer.py
Normal file
@@ -0,0 +1,68 @@
|
||||
"""LLM-powered article summarization with personalization."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
logger = logging.getLogger("article-summary.summarizer")
|
||||
|
||||
WORDS_PER_MINUTE = 150 # Clear narration pace
|
||||
|
||||
|
||||
async def summarize_article(
|
||||
content: str,
|
||||
language: str,
|
||||
duration_minutes: int,
|
||||
topics: list[str],
|
||||
llm_client: AsyncOpenAI,
|
||||
model: str,
|
||||
) -> str:
|
||||
"""Generate a narrative summary of article content.
|
||||
|
||||
Args:
|
||||
content: Article text (max ~15K chars).
|
||||
language: Target language ("en" or "de").
|
||||
duration_minutes: Target audio duration (5, 10, or 15).
|
||||
topics: Focus topics selected by user.
|
||||
llm_client: AsyncOpenAI instance (LiteLLM).
|
||||
model: Model name to use.
|
||||
|
||||
Returns:
|
||||
Summary text ready for TTS.
|
||||
"""
|
||||
word_target = duration_minutes * WORDS_PER_MINUTE
|
||||
lang_name = "German" if language == "de" else "English"
|
||||
topics_str = ", ".join(topics) if topics else "all topics"
|
||||
|
||||
system_prompt = f"""You are a professional audio narrator creating a Blinkist-style summary.
|
||||
|
||||
RULES:
|
||||
- Write in {lang_name}.
|
||||
- Target approximately {word_target} words (for a {duration_minutes}-minute audio).
|
||||
- Focus on: {topics_str}.
|
||||
- Use a conversational, engaging narrator tone — as if explaining to a curious friend.
|
||||
- Structure: brief intro → key insights → practical takeaways → brief conclusion.
|
||||
- Use flowing prose, NOT bullet points or lists.
|
||||
- Do NOT include any formatting markers, headers, or markdown.
|
||||
- Do NOT say "In this article..." — jump straight into the content.
|
||||
- Make it sound natural when read aloud."""
|
||||
|
||||
# Truncate very long content
|
||||
if len(content) > 12_000:
|
||||
content = content[:12_000] + "\n\n[Article continues...]"
|
||||
|
||||
resp = await llm_client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": f"Summarize this article:\n\n{content}"},
|
||||
],
|
||||
max_tokens=word_target * 2, # tokens ≈ 1.3x words, with headroom
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
summary = resp.choices[0].message.content.strip()
|
||||
logger.info("Generated summary: %d words (target: %d)", len(summary.split()), word_target)
|
||||
return summary
|
||||
Reference in New Issue
Block a user