fix: Handle encrypted images + link text to recent images
- Add RoomEncryptedImage callback with decrypt_attachment for E2E rooms - Cache recent images per room (60s TTL) so follow-up text messages like "was ist das" get the image context instead of hallucinating - Treat filenames (containing dots) as no-caption, default to "What's in this image?" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
80
bot.py
80
bot.py
@@ -19,6 +19,7 @@ from nio import (
|
||||
LoginResponse,
|
||||
InviteMemberEvent,
|
||||
MegolmEvent,
|
||||
RoomEncryptedImage,
|
||||
RoomMessageFile,
|
||||
RoomMessageImage,
|
||||
RoomMessageText,
|
||||
@@ -31,6 +32,7 @@ from nio import (
|
||||
KeyVerificationMac,
|
||||
ToDeviceError,
|
||||
)
|
||||
from nio.crypto.attachments import decrypt_attachment
|
||||
from livekit import api
|
||||
|
||||
BOT_DEVICE_ID = "AIBOT"
|
||||
@@ -200,6 +202,7 @@ class Bot:
|
||||
self.user_keys: dict[str, str] = self._load_user_keys() # matrix_user_id -> api_key
|
||||
self.room_models: dict[str, str] = {} # room_id -> model name
|
||||
self.auto_rename_rooms: set[str] = set() # rooms with auto-rename enabled
|
||||
self._recent_images: dict[str, tuple[str, str, float]] = {} # room_id -> (b64, mime, timestamp)
|
||||
self.renamed_rooms: dict[str, float] = {} # room_id -> timestamp of last rename
|
||||
self._loaded_rooms: set[str] = set() # rooms where we've loaded state
|
||||
self._sync_token_received = False
|
||||
@@ -259,6 +262,7 @@ class Bot:
|
||||
self.client.add_event_callback(self.on_unknown, UnknownEvent)
|
||||
self.client.add_event_callback(self.on_text_message, RoomMessageText)
|
||||
self.client.add_event_callback(self.on_image_message, RoomMessageImage)
|
||||
self.client.add_event_callback(self.on_encrypted_image_message, RoomEncryptedImage)
|
||||
self.client.add_event_callback(self.on_file_message, RoomMessageFile)
|
||||
self.client.add_event_callback(self.on_room_unknown, RoomMessageUnknown)
|
||||
self.client.add_response_callback(self.on_sync, SyncResponse)
|
||||
@@ -435,9 +439,18 @@ class Bot:
|
||||
await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).")
|
||||
return
|
||||
|
||||
# Check if a recent image was sent in this room (within 60s)
|
||||
image_data = None
|
||||
cached = self._recent_images.get(room.room_id)
|
||||
if cached:
|
||||
b64, mime, ts = cached
|
||||
if time.time() - ts < 60:
|
||||
image_data = (b64, mime)
|
||||
del self._recent_images[room.room_id]
|
||||
|
||||
await self.client.room_typing(room.room_id, typing_state=True)
|
||||
try:
|
||||
await self._respond_with_ai(room, body, sender=event.sender)
|
||||
await self._respond_with_ai(room, body, sender=event.sender, image_data=image_data)
|
||||
finally:
|
||||
await self.client.room_typing(room.room_id, typing_state=False)
|
||||
|
||||
@@ -490,7 +503,70 @@ class Bot:
|
||||
b64_data = base64.b64encode(img_bytes).decode("utf-8")
|
||||
|
||||
caption = (event.body or "").strip()
|
||||
text = caption if caption and caption != "image" else "What's in this image?"
|
||||
# Treat filenames (contain dots or are very long) as no caption
|
||||
is_filename = not caption or caption == "image" or "." in caption or len(caption) > 100
|
||||
text = "What's in this image?" if is_filename else caption
|
||||
|
||||
# Cache image for follow-up text messages
|
||||
self._recent_images[room.room_id] = (b64_data, mime_type, time.time())
|
||||
|
||||
await self.client.room_typing(room.room_id, typing_state=True)
|
||||
try:
|
||||
await self._respond_with_ai(room, text, sender=event.sender, image_data=(b64_data, mime_type))
|
||||
finally:
|
||||
await self.client.room_typing(room.room_id, typing_state=False)
|
||||
|
||||
async def on_encrypted_image_message(self, room, event: RoomEncryptedImage):
|
||||
"""Handle encrypted image messages: decrypt, encode, and send to AI."""
|
||||
if event.sender == BOT_USER:
|
||||
return
|
||||
if not self._sync_token_received:
|
||||
return
|
||||
server_ts = event.server_timestamp / 1000
|
||||
if time.time() - server_ts > 30:
|
||||
return
|
||||
|
||||
await self._load_room_settings(room.room_id)
|
||||
|
||||
is_dm = room.member_count == 2
|
||||
if not is_dm:
|
||||
body = (event.body or "").strip()
|
||||
bot_display = self.client.user_id.split(":")[0].lstrip("@")
|
||||
mentioned = (
|
||||
BOT_USER in body
|
||||
or f"@{bot_display}" in body.lower()
|
||||
or bot_display.lower() in body.lower()
|
||||
)
|
||||
if not mentioned:
|
||||
return
|
||||
|
||||
if not self.llm:
|
||||
await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).")
|
||||
return
|
||||
|
||||
mxc_url = event.url
|
||||
if not mxc_url:
|
||||
return
|
||||
try:
|
||||
resp = await self.client.download(mxc=mxc_url)
|
||||
if not hasattr(resp, "body"):
|
||||
logger.warning("Encrypted image download failed for %s", mxc_url)
|
||||
return
|
||||
# Decrypt the attachment
|
||||
img_bytes = decrypt_attachment(resp.body, event.key["k"], event.hashes["sha256"], event.iv)
|
||||
except Exception:
|
||||
logger.exception("Failed to download/decrypt encrypted image %s", mxc_url)
|
||||
return
|
||||
|
||||
mime_type = getattr(event, "mimetype", None) or "image/png"
|
||||
b64_data = base64.b64encode(img_bytes).decode("utf-8")
|
||||
|
||||
caption = (event.body or "").strip()
|
||||
is_filename = not caption or caption == "image" or "." in caption or len(caption) > 100
|
||||
text = "What's in this image?" if is_filename else caption
|
||||
|
||||
# Cache image for follow-up text messages
|
||||
self._recent_images[room.room_id] = (b64_data, mime_type, time.time())
|
||||
|
||||
await self.client.room_typing(room.room_id, typing_state=True)
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user