fix: encrypted file handler + summary heading/markup fixes
- Add RoomEncryptedFile handler for PDFs/docs in encrypted rooms - Tell summary LLM not to include headings (prevents duplicate) - Strip <br/> after block elements in _md_to_html Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
97
bot.py
97
bot.py
@@ -21,6 +21,7 @@ from nio import (
|
|||||||
LoginResponse,
|
LoginResponse,
|
||||||
InviteMemberEvent,
|
InviteMemberEvent,
|
||||||
MegolmEvent,
|
MegolmEvent,
|
||||||
|
RoomEncryptedFile,
|
||||||
RoomEncryptedImage,
|
RoomEncryptedImage,
|
||||||
RoomMessageFile,
|
RoomMessageFile,
|
||||||
RoomMessageImage,
|
RoomMessageImage,
|
||||||
@@ -339,6 +340,7 @@ class Bot:
|
|||||||
self.client.add_event_callback(self.on_image_message, RoomMessageImage)
|
self.client.add_event_callback(self.on_image_message, RoomMessageImage)
|
||||||
self.client.add_event_callback(self.on_encrypted_image_message, RoomEncryptedImage)
|
self.client.add_event_callback(self.on_encrypted_image_message, RoomEncryptedImage)
|
||||||
self.client.add_event_callback(self.on_file_message, RoomMessageFile)
|
self.client.add_event_callback(self.on_file_message, RoomMessageFile)
|
||||||
|
self.client.add_event_callback(self.on_encrypted_file_message, RoomEncryptedFile)
|
||||||
self.client.add_event_callback(self.on_room_unknown, RoomMessageUnknown)
|
self.client.add_event_callback(self.on_room_unknown, RoomMessageUnknown)
|
||||||
self.client.add_response_callback(self.on_sync, SyncResponse)
|
self.client.add_response_callback(self.on_sync, SyncResponse)
|
||||||
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationStart)
|
self.client.add_to_device_callback(self.on_key_verification, KeyVerificationStart)
|
||||||
@@ -1041,6 +1043,95 @@ class Bot:
|
|||||||
finally:
|
finally:
|
||||||
await self.client.room_typing(room.room_id, typing_state=False)
|
await self.client.room_typing(room.room_id, typing_state=False)
|
||||||
|
|
||||||
|
async def on_encrypted_file_message(self, room, event: RoomEncryptedFile):
|
||||||
|
"""Handle encrypted file messages: decrypt and process like on_file_message."""
|
||||||
|
if event.sender == BOT_USER:
|
||||||
|
return
|
||||||
|
if not self._sync_token_received:
|
||||||
|
return
|
||||||
|
server_ts = event.server_timestamp / 1000
|
||||||
|
if time.time() - server_ts > 30:
|
||||||
|
return
|
||||||
|
|
||||||
|
source = event.source or {}
|
||||||
|
content = source.get("content", {})
|
||||||
|
filename = content.get("body", "file")
|
||||||
|
ext = os.path.splitext(filename.lower())[1]
|
||||||
|
|
||||||
|
is_pdf = ext == ".pdf"
|
||||||
|
is_docx = ext == ".docx"
|
||||||
|
is_text = ext in self._TEXT_EXTENSIONS
|
||||||
|
|
||||||
|
if not (is_pdf or is_docx or is_text):
|
||||||
|
return
|
||||||
|
|
||||||
|
await self._load_room_settings(room.room_id)
|
||||||
|
|
||||||
|
is_dm = room.member_count == 2
|
||||||
|
if not is_dm:
|
||||||
|
body = (event.body or "").strip()
|
||||||
|
bot_display = self.client.user_id.split(":")[0].lstrip("@")
|
||||||
|
mentioned = (
|
||||||
|
BOT_USER in body
|
||||||
|
or f"@{bot_display}" in body.lower()
|
||||||
|
or bot_display.lower() in body.lower()
|
||||||
|
)
|
||||||
|
if not mentioned:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not self.llm:
|
||||||
|
await self._send_text(room.room_id, "LLM not configured (LITELLM_BASE_URL not set).")
|
||||||
|
return
|
||||||
|
|
||||||
|
mxc_url = event.url
|
||||||
|
if not mxc_url:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
resp = await self.client.download(mxc=mxc_url)
|
||||||
|
if not hasattr(resp, "body"):
|
||||||
|
logger.warning("Encrypted file download failed for %s", mxc_url)
|
||||||
|
return
|
||||||
|
file_bytes = decrypt_attachment(resp.body, event.key["k"], event.hashes["sha256"], event.iv)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to download/decrypt encrypted file %s", mxc_url)
|
||||||
|
return
|
||||||
|
|
||||||
|
if is_pdf:
|
||||||
|
extracted = self._extract_pdf_text(file_bytes)
|
||||||
|
doc_type = "pdf"
|
||||||
|
elif is_docx:
|
||||||
|
extracted = self._extract_docx_text(file_bytes)
|
||||||
|
doc_type = "text"
|
||||||
|
else:
|
||||||
|
extracted = self._extract_text_file(file_bytes)
|
||||||
|
doc_type = "text"
|
||||||
|
|
||||||
|
if not extracted:
|
||||||
|
await self._send_text(room.room_id, f"I couldn't extract any text from that file ({filename}).")
|
||||||
|
return
|
||||||
|
|
||||||
|
if len(extracted) > 50000:
|
||||||
|
extracted = extracted[:50000] + "\n\n[... truncated, file too long ...]"
|
||||||
|
|
||||||
|
docs = self._room_document_context.setdefault(room.room_id, [])
|
||||||
|
docs.append({
|
||||||
|
"type": doc_type,
|
||||||
|
"filename": filename,
|
||||||
|
"text": extracted,
|
||||||
|
"timestamp": time.time(),
|
||||||
|
})
|
||||||
|
if len(docs) > 5:
|
||||||
|
del docs[:-5]
|
||||||
|
|
||||||
|
label = "PDF" if is_pdf else "Word document" if is_docx else "file"
|
||||||
|
user_message = f'The user sent a {label} named "{filename}". Here is the extracted text:\n\n{extracted}\n\nPlease summarize or answer questions about this document.'
|
||||||
|
|
||||||
|
await self.client.room_typing(room.room_id, typing_state=True)
|
||||||
|
try:
|
||||||
|
await self._respond_with_ai(room, user_message, sender=event.sender)
|
||||||
|
finally:
|
||||||
|
await self.client.room_typing(room.room_id, typing_state=False)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_pdf_text(pdf_bytes: bytes) -> str:
|
def _extract_pdf_text(pdf_bytes: bytes) -> str:
|
||||||
"""Extract text from PDF bytes using pymupdf."""
|
"""Extract text from PDF bytes using pymupdf."""
|
||||||
@@ -1503,6 +1594,9 @@ class Bot:
|
|||||||
safe = re.sub(r"^# (.+)$", r"<h1>\1</h1>", safe, flags=re.MULTILINE)
|
safe = re.sub(r"^# (.+)$", r"<h1>\1</h1>", safe, flags=re.MULTILINE)
|
||||||
# Line breaks
|
# Line breaks
|
||||||
safe = safe.replace("\n", "<br/>")
|
safe = safe.replace("\n", "<br/>")
|
||||||
|
# Remove redundant <br/> after block elements
|
||||||
|
safe = re.sub(r"(</h[1-6]>)(<br/>)+", r"\1", safe)
|
||||||
|
safe = re.sub(r"(</pre>)(<br/>)+", r"\1", safe)
|
||||||
return safe
|
return safe
|
||||||
|
|
||||||
async def _generate_and_send_image(self, room_id: str, prompt: str):
|
async def _generate_and_send_image(self, room_id: str, prompt: str):
|
||||||
@@ -1577,7 +1671,8 @@ class Bot:
|
|||||||
{"role": "system", "content": (
|
{"role": "system", "content": (
|
||||||
"Fasse das folgende Anruf-Transkript kurz und praegnant zusammen. "
|
"Fasse das folgende Anruf-Transkript kurz und praegnant zusammen. "
|
||||||
"Nenne die wichtigsten besprochenen Punkte, Entscheidungen und offene Fragen. "
|
"Nenne die wichtigsten besprochenen Punkte, Entscheidungen und offene Fragen. "
|
||||||
"Antworte in der Sprache des Gespraechs. Maximal 5-8 Saetze."
|
"Antworte in der Sprache des Gespraechs. Maximal 5-8 Saetze. "
|
||||||
|
"Keine Ueberschrift, kein Markdown-Heading — beginne direkt mit dem Text."
|
||||||
)},
|
)},
|
||||||
{"role": "user", "content": transcript_text},
|
{"role": "user", "content": transcript_text},
|
||||||
],
|
],
|
||||||
|
|||||||
Reference in New Issue
Block a user