diff --git a/.gitignore b/.gitignore index fc36f7d..810a41c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__/ *.pyc .venv/ +.claude-session/ diff --git a/bot.py b/bot.py index 90749bc..61a3cd2 100644 --- a/bot.py +++ b/bot.py @@ -79,7 +79,8 @@ IMPORTANT RULES — FOLLOW THESE STRICTLY: - You can see and analyze images that users send. Describe what you see when asked about an image. - You can read and analyze PDF documents that users send. Summarize content and answer questions about them. - You can generate images when asked — use the generate_image tool for any image creation, drawing, or illustration requests. -- If user memories are provided, use them to personalize responses. Address users by name if known.""" +- If user memories are provided, use them to personalize responses. Address users by name if known. +- When asked to translate, provide ONLY the translation with no explanation.""" IMAGE_GEN_TOOLS = [{ "type": "function", @@ -106,6 +107,7 @@ HELP_TEXT = """**AI Bot Commands** - `!ai auto-rename on|off` — Auto-rename room based on conversation topic - `!ai forget` — Delete all memories the bot has about you - `!ai memories` — Show what the bot remembers about you +- **Translate**: Forward a message to this DM — bot detects language and offers translation - **@mention the bot** or start with `!ai` for a regular AI response""" @@ -216,6 +218,8 @@ class Bot: self._sync_token_received = False self._verifications: dict[str, dict] = {} # txn_id -> verification state self._pending_connects: dict[str, str] = {} # matrix_user_id -> device_code + self._pending_translate: dict[str, dict] = {} # sender -> {text, detected_lang, room_id} + self._pending_reply: dict[str, dict] = {} # sender -> {target_lang} @staticmethod def _load_user_keys() -> dict[str, str]: @@ -490,6 +494,60 @@ class Bot: logger.debug("Memory extraction failed", exc_info=True) return existing + async def _detect_language(self, text: str) -> str: + """Detect the language of a text using a fast LLM call.""" + if not self.llm: + return "Unknown" + try: + resp = await self.llm.chat.completions.create( + model=DEFAULT_MODEL, + messages=[ + {"role": "system", "content": "What language is this text? Reply with ONLY the language name in English."}, + {"role": "user", "content": text[:500]}, + ], + max_tokens=10, + ) + return resp.choices[0].message.content.strip() + except Exception: + logger.debug("Language detection failed", exc_info=True) + return "Unknown" + + async def _translate_text(self, text: str, target_language: str, model: str | None = None) -> str: + """Translate text to the target language using LLM.""" + if not self.llm: + return text + try: + resp = await self.llm.chat.completions.create( + model=model or DEFAULT_MODEL, + messages=[ + {"role": "system", "content": f"Translate the following text to {target_language}. Return ONLY the translation."}, + {"role": "user", "content": text}, + ], + max_tokens=1000, + ) + return resp.choices[0].message.content.strip() + except Exception: + logger.debug("Translation failed", exc_info=True) + return f"[Translation failed] {text}" + + def _get_preferred_language(self, user_id: str) -> str: + """Extract user's preferred language from memories, default English.""" + memories = self._load_memories(user_id) + known_langs = [ + "English", "German", "French", "Spanish", "Italian", "Portuguese", + "Dutch", "Russian", "Chinese", "Japanese", "Korean", "Arabic", + "Turkish", "Polish", "Swedish", "Norwegian", "Danish", "Finnish", + "Greek", "Hebrew", "Hindi", "Thai", "Vietnamese", "Indonesian", + "Czech", "Romanian", "Hungarian", "Ukrainian", "Croatian", "Serbian", + ] + for m in memories: + fact = m["fact"].lower() + if "language" in fact or "speaks" in fact or "prefers" in fact: + for lang in known_langs: + if lang.lower() in fact: + return lang + return "English" + async def on_text_message(self, room, event: RoomMessageText): """Handle text messages: commands and AI responses.""" if event.sender == BOT_USER: @@ -529,6 +587,45 @@ class Bot: await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).") return + sender = event.sender + + # --- DM translation workflow: handle pending reply composition --- + if is_dm and sender in self._pending_reply: + pending = self._pending_reply.pop(sender) + await self.client.room_typing(room.room_id, typing_state=True) + try: + translated = await self._translate_text(body, pending["target_lang"]) + await self._send_text(room.room_id, translated) + finally: + await self.client.room_typing(room.room_id, typing_state=False) + return + + # --- DM translation workflow: handle menu response --- + if is_dm and sender in self._pending_translate: + pending = self._pending_translate.pop(sender) + choice = body.strip().lower() + preferred_lang = self._get_preferred_language(sender) + + if choice in ("1", "1️⃣") or choice.startswith("translate"): + await self.client.room_typing(room.room_id, typing_state=True) + try: + translated = await self._translate_text(pending["text"], preferred_lang) + await self._send_text(room.room_id, translated) + finally: + await self.client.room_typing(room.room_id, typing_state=False) + return + + elif choice in ("2", "2️⃣") or choice.startswith("reply"): + self._pending_reply[sender] = {"target_lang": pending["detected_lang"]} + await self._send_text( + room.room_id, + f"Type your message — I'll translate it to **{pending['detected_lang']}**.", + ) + return + + # choice "3" or anything else → proceed with normal AI response + # (fall through to normal flow below with original pending text context) + # Check if a recent image was sent in this room (within 60s) image_data = None cached = self._recent_images.get(room.room_id) @@ -538,9 +635,32 @@ class Bot: image_data = (b64, mime) del self._recent_images[room.room_id] + # --- DM translation workflow: detect foreign language --- + if is_dm and not body.startswith("!ai") and not image_data: + preferred_lang = self._get_preferred_language(sender) + detected_lang = await self._detect_language(body) + if ( + detected_lang != "Unknown" + and detected_lang.lower() != preferred_lang.lower() + and len(body) > 10 # skip very short messages + ): + self._pending_translate[sender] = { + "text": body, + "detected_lang": detected_lang, + "room_id": room.room_id, + } + menu = ( + f"This looks like **{detected_lang}**. What would you like?\n\n" + f"1️⃣ **Translate to {preferred_lang}**\n" + f"2️⃣ **Help me reply in {detected_lang}** (type your response, I'll translate)\n" + f"3️⃣ **Just respond normally**" + ) + await self._send_text(room.room_id, menu) + return + await self.client.room_typing(room.room_id, typing_state=True) try: - await self._respond_with_ai(room, body, sender=event.sender, image_data=image_data) + await self._respond_with_ai(room, body, sender=sender, image_data=image_data) finally: await self.client.room_typing(room.room_id, typing_state=False)