fix: increase voice PDF context to 40k chars, fix language detection sanity

- Voice context per-document limit 10k→40k chars (was cutting off at page 6)
- Language detection: reject results >30 chars (LLM returning sentences)
- Voice.py: generalize "PDF" label to "Dokumente"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-23 12:40:13 +02:00
parent 751bfbd164
commit e81aa79396
2 changed files with 3 additions and 2 deletions

3
bot.py
View File

@@ -474,7 +474,7 @@ class Bot:
parts = []
for e in doc_entries:
label = {"pdf": "PDF", "image": "Bild", "text": "Datei"}.get(e["type"], "Dokument")
text = e["text"][:10000] if e["type"] != "image" else e["text"][:2000]
text = e["text"][:40000] if e["type"] != "image" else e["text"][:2000]
parts.append(f"[{label}: {e['filename']}]\n{text}")
document_context = "\n\n".join(parts)
logger.info("Passing %d document context(s) to voice session (%d chars total)",
@@ -786,6 +786,7 @@ class Bot:
logger.info("Translation check: detected=%s, preferred=%s, len=%d", detected_lang, preferred_lang, len(body))
if (
detected_lang != "Unknown"
and len(detected_lang) < 30 # sanity check: language name, not a sentence
and detected_lang.lower() != preferred_lang.lower()
and len(body) > 10 # skip very short messages
):