diff --git a/bot.py b/bot.py index fd7c0f4..cb09da4 100644 --- a/bot.py +++ b/bot.py @@ -19,6 +19,7 @@ from nio import ( LoginResponse, InviteMemberEvent, MegolmEvent, + RoomEncryptedImage, RoomMessageFile, RoomMessageImage, RoomMessageText, @@ -31,6 +32,7 @@ from nio import ( KeyVerificationMac, ToDeviceError, ) +from nio.crypto.attachments import decrypt_attachment from livekit import api BOT_DEVICE_ID = "AIBOT" @@ -200,6 +202,7 @@ class Bot: self.user_keys: dict[str, str] = self._load_user_keys() # matrix_user_id -> api_key self.room_models: dict[str, str] = {} # room_id -> model name self.auto_rename_rooms: set[str] = set() # rooms with auto-rename enabled + self._recent_images: dict[str, tuple[str, str, float]] = {} # room_id -> (b64, mime, timestamp) self.renamed_rooms: dict[str, float] = {} # room_id -> timestamp of last rename self._loaded_rooms: set[str] = set() # rooms where we've loaded state self._sync_token_received = False @@ -259,6 +262,7 @@ class Bot: self.client.add_event_callback(self.on_unknown, UnknownEvent) self.client.add_event_callback(self.on_text_message, RoomMessageText) self.client.add_event_callback(self.on_image_message, RoomMessageImage) + self.client.add_event_callback(self.on_encrypted_image_message, RoomEncryptedImage) self.client.add_event_callback(self.on_file_message, RoomMessageFile) self.client.add_event_callback(self.on_room_unknown, RoomMessageUnknown) self.client.add_response_callback(self.on_sync, SyncResponse) @@ -435,9 +439,18 @@ class Bot: await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).") return + # Check if a recent image was sent in this room (within 60s) + image_data = None + cached = self._recent_images.get(room.room_id) + if cached: + b64, mime, ts = cached + if time.time() - ts < 60: + image_data = (b64, mime) + del self._recent_images[room.room_id] + await self.client.room_typing(room.room_id, typing_state=True) try: - await self._respond_with_ai(room, body, sender=event.sender) + await self._respond_with_ai(room, body, sender=event.sender, image_data=image_data) finally: await self.client.room_typing(room.room_id, typing_state=False) @@ -490,7 +503,70 @@ class Bot: b64_data = base64.b64encode(img_bytes).decode("utf-8") caption = (event.body or "").strip() - text = caption if caption and caption != "image" else "What's in this image?" + # Treat filenames (contain dots or are very long) as no caption + is_filename = not caption or caption == "image" or "." in caption or len(caption) > 100 + text = "What's in this image?" if is_filename else caption + + # Cache image for follow-up text messages + self._recent_images[room.room_id] = (b64_data, mime_type, time.time()) + + await self.client.room_typing(room.room_id, typing_state=True) + try: + await self._respond_with_ai(room, text, sender=event.sender, image_data=(b64_data, mime_type)) + finally: + await self.client.room_typing(room.room_id, typing_state=False) + + async def on_encrypted_image_message(self, room, event: RoomEncryptedImage): + """Handle encrypted image messages: decrypt, encode, and send to AI.""" + if event.sender == BOT_USER: + return + if not self._sync_token_received: + return + server_ts = event.server_timestamp / 1000 + if time.time() - server_ts > 30: + return + + await self._load_room_settings(room.room_id) + + is_dm = room.member_count == 2 + if not is_dm: + body = (event.body or "").strip() + bot_display = self.client.user_id.split(":")[0].lstrip("@") + mentioned = ( + BOT_USER in body + or f"@{bot_display}" in body.lower() + or bot_display.lower() in body.lower() + ) + if not mentioned: + return + + if not self.llm: + await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).") + return + + mxc_url = event.url + if not mxc_url: + return + try: + resp = await self.client.download(mxc=mxc_url) + if not hasattr(resp, "body"): + logger.warning("Encrypted image download failed for %s", mxc_url) + return + # Decrypt the attachment + img_bytes = decrypt_attachment(resp.body, event.key["k"], event.hashes["sha256"], event.iv) + except Exception: + logger.exception("Failed to download/decrypt encrypted image %s", mxc_url) + return + + mime_type = getattr(event, "mimetype", None) or "image/png" + b64_data = base64.b64encode(img_bytes).decode("utf-8") + + caption = (event.body or "").strip() + is_filename = not caption or caption == "image" or "." in caption or len(caption) > 100 + text = "What's in this image?" if is_filename else caption + + # Cache image for follow-up text messages + self._recent_images[room.room_id] = (b64_data, mime_type, time.time()) await self.client.room_typing(room.room_id, typing_state=True) try: