feat: DM translation workflow for forwarded foreign messages

Detect when a DM message is in a foreign language and offer an interactive menu: translate, compose reply in that language, or respond normally. Supports forwarded WhatsApp messages via Element. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 08:56:49 +02:00
parent 2cf69b30df
commit d6c30abca3
2 changed files with 123 additions and 2 deletions
--- a/bot.py
+++ b/bot.py
@@ -79,7 +79,8 @@ IMPORTANT RULES — FOLLOW THESE STRICTLY:
 - You can see and analyze images that users send. Describe what you see when asked about an image.
 - You can read and analyze PDF documents that users send. Summarize content and answer questions about them.
 - You can generate images when asked — use the generate_image tool for any image creation, drawing, or illustration requests.
- If user memories are provided, use them to personalize responses. Address users by name if known."""
+- If user memories are provided, use them to personalize responses. Address users by name if known.
+- When asked to translate, provide ONLY the translation with no explanation."""

 IMAGE_GEN_TOOLS = [{
    "type": "function",
@@ -106,6 +107,7 @@ HELP_TEXT = """**AI Bot Commands**
 - `!ai auto-rename on|off` — Auto-rename room based on conversation topic
 - `!ai forget` — Delete all memories the bot has about you
 - `!ai memories` — Show what the bot remembers about you
+- **Translate**: Forward a message to this DM — bot detects language and offers translation
 - **@mention the bot** or start with `!ai` for a regular AI response"""


@@ -216,6 +218,8 @@ class Bot:
        self._sync_token_received = False
        self._verifications: dict[str, dict] = {}  # txn_id -> verification state
        self._pending_connects: dict[str, str] = {}  # matrix_user_id -> device_code
+        self._pending_translate: dict[str, dict] = {}  # sender -> {text, detected_lang, room_id}
+        self._pending_reply: dict[str, dict] = {}  # sender -> {target_lang}

    @staticmethod
    def _load_user_keys() -> dict[str, str]:
@@ -490,6 +494,60 @@ class Bot:
            logger.debug("Memory extraction failed", exc_info=True)
            return existing

+    async def _detect_language(self, text: str) -> str:
+        """Detect the language of a text using a fast LLM call."""
+        if not self.llm:
+            return "Unknown"
+        try:
+            resp = await self.llm.chat.completions.create(
+                model=DEFAULT_MODEL,
+                messages=[
+                    {"role": "system", "content": "What language is this text? Reply with ONLY the language name in English."},
+                    {"role": "user", "content": text[:500]},
+                ],
+                max_tokens=10,
+            )
+            return resp.choices[0].message.content.strip()
+        except Exception:
+            logger.debug("Language detection failed", exc_info=True)
+            return "Unknown"
+
+    async def _translate_text(self, text: str, target_language: str, model: str | None = None) -> str:
+        """Translate text to the target language using LLM."""
+        if not self.llm:
+            return text
+        try:
+            resp = await self.llm.chat.completions.create(
+                model=model or DEFAULT_MODEL,
+                messages=[
+                    {"role": "system", "content": f"Translate the following text to {target_language}. Return ONLY the translation."},
+                    {"role": "user", "content": text},
+                ],
+                max_tokens=1000,
+            )
+            return resp.choices[0].message.content.strip()
+        except Exception:
+            logger.debug("Translation failed", exc_info=True)
+            return f"[Translation failed] {text}"
+
+    def _get_preferred_language(self, user_id: str) -> str:
+        """Extract user's preferred language from memories, default English."""
+        memories = self._load_memories(user_id)
+        known_langs = [
+            "English", "German", "French", "Spanish", "Italian", "Portuguese",
+            "Dutch", "Russian", "Chinese", "Japanese", "Korean", "Arabic",
+            "Turkish", "Polish", "Swedish", "Norwegian", "Danish", "Finnish",
+            "Greek", "Hebrew", "Hindi", "Thai", "Vietnamese", "Indonesian",
+            "Czech", "Romanian", "Hungarian", "Ukrainian", "Croatian", "Serbian",
+        ]
+        for m in memories:
+            fact = m["fact"].lower()
+            if "language" in fact or "speaks" in fact or "prefers" in fact:
+                for lang in known_langs:
+                    if lang.lower() in fact:
+                        return lang
+        return "English"
+
    async def on_text_message(self, room, event: RoomMessageText):
        """Handle text messages: commands and AI responses."""
        if event.sender == BOT_USER:
@@ -529,6 +587,45 @@ class Bot:
            await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).")
            return

+        sender = event.sender
+
+        # --- DM translation workflow: handle pending reply composition ---
+        if is_dm and sender in self._pending_reply:
+            pending = self._pending_reply.pop(sender)
+            await self.client.room_typing(room.room_id, typing_state=True)
+            try:
+                translated = await self._translate_text(body, pending["target_lang"])
+                await self._send_text(room.room_id, translated)
+            finally:
+                await self.client.room_typing(room.room_id, typing_state=False)
+            return
+
+        # --- DM translation workflow: handle menu response ---
+        if is_dm and sender in self._pending_translate:
+            pending = self._pending_translate.pop(sender)
+            choice = body.strip().lower()
+            preferred_lang = self._get_preferred_language(sender)
+
+            if choice in ("1", "1️⃣") or choice.startswith("translate"):
+                await self.client.room_typing(room.room_id, typing_state=True)
+                try:
+                    translated = await self._translate_text(pending["text"], preferred_lang)
+                    await self._send_text(room.room_id, translated)
+                finally:
+                    await self.client.room_typing(room.room_id, typing_state=False)
+                return
+
+            elif choice in ("2", "2️⃣") or choice.startswith("reply"):
+                self._pending_reply[sender] = {"target_lang": pending["detected_lang"]}
+                await self._send_text(
+                    room.room_id,
+                    f"Type your message — I'll translate it to **{pending['detected_lang']}**.",
+                )
+                return
+
+            # choice "3" or anything else → proceed with normal AI response
+            # (fall through to normal flow below with original pending text context)
+
        # Check if a recent image was sent in this room (within 60s)
        image_data = None
        cached = self._recent_images.get(room.room_id)
@@ -538,9 +635,32 @@ class Bot:
                image_data = (b64, mime)
                del self._recent_images[room.room_id]

+        # --- DM translation workflow: detect foreign language ---
+        if is_dm and not body.startswith("!ai") and not image_data:
+            preferred_lang = self._get_preferred_language(sender)
+            detected_lang = await self._detect_language(body)
+            if (
+                detected_lang != "Unknown"
+                and detected_lang.lower() != preferred_lang.lower()
+                and len(body) > 10  # skip very short messages
+            ):
+                self._pending_translate[sender] = {
+                    "text": body,
+                    "detected_lang": detected_lang,
+                    "room_id": room.room_id,
+                }
+                menu = (
+                    f"This looks like **{detected_lang}**. What would you like?\n\n"
+                    f"1️⃣ **Translate to {preferred_lang}**\n"
+                    f"2️⃣ **Help me reply in {detected_lang}** (type your response, I'll translate)\n"
+                    f"3️⃣ **Just respond normally**"
+                )
+                await self._send_text(room.room_id, menu)
+                return
+
        await self.client.room_typing(room.room_id, typing_state=True)
        try:
-            await self._respond_with_ai(room, body, sender=event.sender, image_data=image_data)
+            await self._respond_with_ai(room, body, sender=sender, image_data=image_data)
        finally:
            await self.client.room_typing(room.room_id, typing_state=False)