feat: DM translation workflow for forwarded foreign messages

Detect when a DM message is in a foreign language and offer an
interactive menu: translate, compose reply in that language, or
respond normally. Supports forwarded WhatsApp messages via Element.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-19 08:56:49 +02:00
parent 2cf69b30df
commit d6c30abca3
2 changed files with 123 additions and 2 deletions

1
.gitignore vendored
View File

@@ -2,3 +2,4 @@
__pycache__/
*.pyc
.venv/
.claude-session/

124
bot.py
View File

@@ -79,7 +79,8 @@ IMPORTANT RULES — FOLLOW THESE STRICTLY:
- You can see and analyze images that users send. Describe what you see when asked about an image.
- You can read and analyze PDF documents that users send. Summarize content and answer questions about them.
- You can generate images when asked — use the generate_image tool for any image creation, drawing, or illustration requests.
- If user memories are provided, use them to personalize responses. Address users by name if known."""
- If user memories are provided, use them to personalize responses. Address users by name if known.
- When asked to translate, provide ONLY the translation with no explanation."""
IMAGE_GEN_TOOLS = [{
"type": "function",
@@ -106,6 +107,7 @@ HELP_TEXT = """**AI Bot Commands**
- `!ai auto-rename on|off` — Auto-rename room based on conversation topic
- `!ai forget` — Delete all memories the bot has about you
- `!ai memories` — Show what the bot remembers about you
- **Translate**: Forward a message to this DM — bot detects language and offers translation
- **@mention the bot** or start with `!ai` for a regular AI response"""
@@ -216,6 +218,8 @@ class Bot:
self._sync_token_received = False
self._verifications: dict[str, dict] = {} # txn_id -> verification state
self._pending_connects: dict[str, str] = {} # matrix_user_id -> device_code
self._pending_translate: dict[str, dict] = {} # sender -> {text, detected_lang, room_id}
self._pending_reply: dict[str, dict] = {} # sender -> {target_lang}
@staticmethod
def _load_user_keys() -> dict[str, str]:
@@ -490,6 +494,60 @@ class Bot:
logger.debug("Memory extraction failed", exc_info=True)
return existing
async def _detect_language(self, text: str) -> str:
"""Detect the language of a text using a fast LLM call."""
if not self.llm:
return "Unknown"
try:
resp = await self.llm.chat.completions.create(
model=DEFAULT_MODEL,
messages=[
{"role": "system", "content": "What language is this text? Reply with ONLY the language name in English."},
{"role": "user", "content": text[:500]},
],
max_tokens=10,
)
return resp.choices[0].message.content.strip()
except Exception:
logger.debug("Language detection failed", exc_info=True)
return "Unknown"
async def _translate_text(self, text: str, target_language: str, model: str | None = None) -> str:
"""Translate text to the target language using LLM."""
if not self.llm:
return text
try:
resp = await self.llm.chat.completions.create(
model=model or DEFAULT_MODEL,
messages=[
{"role": "system", "content": f"Translate the following text to {target_language}. Return ONLY the translation."},
{"role": "user", "content": text},
],
max_tokens=1000,
)
return resp.choices[0].message.content.strip()
except Exception:
logger.debug("Translation failed", exc_info=True)
return f"[Translation failed] {text}"
def _get_preferred_language(self, user_id: str) -> str:
"""Extract user's preferred language from memories, default English."""
memories = self._load_memories(user_id)
known_langs = [
"English", "German", "French", "Spanish", "Italian", "Portuguese",
"Dutch", "Russian", "Chinese", "Japanese", "Korean", "Arabic",
"Turkish", "Polish", "Swedish", "Norwegian", "Danish", "Finnish",
"Greek", "Hebrew", "Hindi", "Thai", "Vietnamese", "Indonesian",
"Czech", "Romanian", "Hungarian", "Ukrainian", "Croatian", "Serbian",
]
for m in memories:
fact = m["fact"].lower()
if "language" in fact or "speaks" in fact or "prefers" in fact:
for lang in known_langs:
if lang.lower() in fact:
return lang
return "English"
async def on_text_message(self, room, event: RoomMessageText):
"""Handle text messages: commands and AI responses."""
if event.sender == BOT_USER:
@@ -529,6 +587,45 @@ class Bot:
await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).")
return
sender = event.sender
# --- DM translation workflow: handle pending reply composition ---
if is_dm and sender in self._pending_reply:
pending = self._pending_reply.pop(sender)
await self.client.room_typing(room.room_id, typing_state=True)
try:
translated = await self._translate_text(body, pending["target_lang"])
await self._send_text(room.room_id, translated)
finally:
await self.client.room_typing(room.room_id, typing_state=False)
return
# --- DM translation workflow: handle menu response ---
if is_dm and sender in self._pending_translate:
pending = self._pending_translate.pop(sender)
choice = body.strip().lower()
preferred_lang = self._get_preferred_language(sender)
if choice in ("1", "1") or choice.startswith("translate"):
await self.client.room_typing(room.room_id, typing_state=True)
try:
translated = await self._translate_text(pending["text"], preferred_lang)
await self._send_text(room.room_id, translated)
finally:
await self.client.room_typing(room.room_id, typing_state=False)
return
elif choice in ("2", "2") or choice.startswith("reply"):
self._pending_reply[sender] = {"target_lang": pending["detected_lang"]}
await self._send_text(
room.room_id,
f"Type your message — I'll translate it to **{pending['detected_lang']}**.",
)
return
# choice "3" or anything else → proceed with normal AI response
# (fall through to normal flow below with original pending text context)
# Check if a recent image was sent in this room (within 60s)
image_data = None
cached = self._recent_images.get(room.room_id)
@@ -538,9 +635,32 @@ class Bot:
image_data = (b64, mime)
del self._recent_images[room.room_id]
# --- DM translation workflow: detect foreign language ---
if is_dm and not body.startswith("!ai") and not image_data:
preferred_lang = self._get_preferred_language(sender)
detected_lang = await self._detect_language(body)
if (
detected_lang != "Unknown"
and detected_lang.lower() != preferred_lang.lower()
and len(body) > 10 # skip very short messages
):
self._pending_translate[sender] = {
"text": body,
"detected_lang": detected_lang,
"room_id": room.room_id,
}
menu = (
f"This looks like **{detected_lang}**. What would you like?\n\n"
f"1⃣ **Translate to {preferred_lang}**\n"
f"2⃣ **Help me reply in {detected_lang}** (type your response, I'll translate)\n"
f"3⃣ **Just respond normally**"
)
await self._send_text(room.room_id, menu)
return
await self.client.room_typing(room.room_id, typing_state=True)
try:
await self._respond_with_ai(room, body, sender=event.sender, image_data=image_data)
await self._respond_with_ai(room, body, sender=sender, image_data=image_data)
finally:
await self.client.room_typing(room.room_id, typing_state=False)