fix: increase voice PDF context to 40k chars, fix language detection sanity
- Voice context per-document limit 10k→40k chars (was cutting off at page 6) - Language detection: reject results >30 chars (LLM returning sentences) - Voice.py: generalize "PDF" label to "Dokumente" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
3
bot.py
3
bot.py
@@ -474,7 +474,7 @@ class Bot:
|
|||||||
parts = []
|
parts = []
|
||||||
for e in doc_entries:
|
for e in doc_entries:
|
||||||
label = {"pdf": "PDF", "image": "Bild", "text": "Datei"}.get(e["type"], "Dokument")
|
label = {"pdf": "PDF", "image": "Bild", "text": "Datei"}.get(e["type"], "Dokument")
|
||||||
text = e["text"][:10000] if e["type"] != "image" else e["text"][:2000]
|
text = e["text"][:40000] if e["type"] != "image" else e["text"][:2000]
|
||||||
parts.append(f"[{label}: {e['filename']}]\n{text}")
|
parts.append(f"[{label}: {e['filename']}]\n{text}")
|
||||||
document_context = "\n\n".join(parts)
|
document_context = "\n\n".join(parts)
|
||||||
logger.info("Passing %d document context(s) to voice session (%d chars total)",
|
logger.info("Passing %d document context(s) to voice session (%d chars total)",
|
||||||
@@ -786,6 +786,7 @@ class Bot:
|
|||||||
logger.info("Translation check: detected=%s, preferred=%s, len=%d", detected_lang, preferred_lang, len(body))
|
logger.info("Translation check: detected=%s, preferred=%s, len=%d", detected_lang, preferred_lang, len(body))
|
||||||
if (
|
if (
|
||||||
detected_lang != "Unknown"
|
detected_lang != "Unknown"
|
||||||
|
and len(detected_lang) < 30 # sanity check: language name, not a sentence
|
||||||
and detected_lang.lower() != preferred_lang.lower()
|
and detected_lang.lower() != preferred_lang.lower()
|
||||||
and len(body) > 10 # skip very short messages
|
and len(body) > 10 # skip very short messages
|
||||||
):
|
):
|
||||||
|
|||||||
2
voice.py
2
voice.py
@@ -700,7 +700,7 @@ class VoiceSession:
|
|||||||
|
|
||||||
instructions = _build_voice_prompt(model=self.model, timezone=user_timezone) + memory_section
|
instructions = _build_voice_prompt(model=self.model, timezone=user_timezone) + memory_section
|
||||||
if self._document_context:
|
if self._document_context:
|
||||||
instructions += f"\n\nDokument-Kontext (PDF im Raum hochgeladen):\n{self._document_context}"
|
instructions += f"\n\nDokument-Kontext (im Raum hochgeladen):\n{self._document_context}"
|
||||||
agent = _NoiseFilterAgent(
|
agent = _NoiseFilterAgent(
|
||||||
instructions=instructions,
|
instructions=instructions,
|
||||||
tools=[search_web, set_user_timezone],
|
tools=[search_web, set_user_timezone],
|
||||||
|
|||||||
Reference in New Issue
Block a user