fix: harden Matrix ecosystem — pool recovery, parallel queries, voice persistence

- Memory service: asyncpg pool auto-reconnect on connection loss, IVFFlat lists 10→100 - Bot: parallel RAG/memory/chunk queries (asyncio.gather), parallel tool execution - Bot: skip memory extraction for trivial messages (<20 chars, no personal facts) - Bot: persist voice call transcripts as searchable conversation chunks - RAG: JSON parse safety in AI metadata, embedding_status tracking, fetch timeouts - Drive sync: token refresh mutex to prevent race conditions, fetch timeouts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 18:10:22 +02:00
parent 9fcdedc4b4
commit 1c8d45c31b
2 changed files with 115 additions and 22 deletions
--- a/bot.py
+++ b/bot.py
@@ -1317,6 +1317,24 @@ class Bot:
                            await self._send_text(room_id, f"**Anruf-Zusammenfassung:**\n\n{summary}")
                        except Exception:
                            logger.exception("Failed to post call summary for %s", room_id)
+                        # Persist voice transcript as conversation chunks in memory service
+                        try:
+                            caller = event.sender
+                            for entry in transcript:
+                                if entry["role"] == "user":
+                                    user_text = entry["text"]
+                                    # Find the next assistant response
+                                    idx = transcript.index(entry)
+                                    ai_text = ""
+                                    if idx + 1 < len(transcript) and transcript[idx + 1]["role"] == "assistant":
+                                        ai_text = transcript[idx + 1]["text"]
+                                    if user_text and ai_text:
+                                        await self._store_conversation_chunk(
+                                            user_text, ai_text, caller, room_id
+                                        )
+                            logger.info("Stored %d voice transcript chunks for %s", len(transcript) // 2, room_id)
+                        except Exception:
+                            logger.warning("Failed to store voice transcript chunks for %s", room_id, exc_info=True)
                        # Extract and post document annotations if a document was discussed
                        if doc_context:
                            try:
@@ -1415,6 +1433,22 @@ class Bot:
            summary=summary, original_ts=time.time(),
        )

+    # Regex for detecting personal facts worth extracting (pronouns, possessives, identity markers)
+    _PERSONAL_FACT_RE = re.compile(
+        r"\b(ich|mein|meine|meinem|i'm|i am|my |mine|we |our |"
+        r"name is|hei[sß]e|wohne|arbeite|lebe|studier|born|live|work|"
+        r"prefer|favorite|hobby|birthday|family|wife|husband|partner|child|dog|cat)\b",
+        re.IGNORECASE,
+    )
+
+    def _is_trivial_message(self, text: str) -> bool:
+        """Return True if the message is too trivial for memory extraction."""
+        if len(text) >= 20:
+            return False
+        if self._PERSONAL_FACT_RE.search(text):
+            return False
+        return True
+
    async def _extract_and_store_memories(self, user_message: str, ai_reply: str,
                                          existing_facts: list[str], model: str,
                                          sender: str, room_id: str):
@@ -1422,6 +1456,11 @@ class Bot:
        if not self.llm:
            return

+        # Skip extraction for trivial messages (saves ~2-3s + 1 LLM call)
+        if self._is_trivial_message(user_message):
+            logger.debug("Skipping memory extraction for trivial message: %s", user_message[:40])
+            return
+
        existing_text = "\n".join(f"- {f}" for f in existing_facts) if existing_facts else "(none)"
        logger.info("Memory extraction: user_msg=%s... (%d existing facts)", user_message[:80], len(existing_facts))

@@ -2221,23 +2260,34 @@ class Bot:
        # Rewrite query using conversation context for better RAG search
        search_query = await self._rewrite_query(user_message, history)

-        # Document context via MatrixHost API
-        doc_results = await self.rag.search(search_query, matrix_user_id=sender) if sender else []
+        # Run RAG search, memory query, and chunk query in parallel (independent)
+        if sender:
+            doc_results_coro = self.rag.search(search_query, matrix_user_id=sender)
+            memories_coro = self.memory.query(sender, user_message, top_k=10)
+            chunks_coro = self.memory.query_chunks(search_query, user_id=sender, room_id=room.room_id, top_k=5)
+            doc_results, memories, chunks = await asyncio.gather(
+                doc_results_coro, memories_coro, chunks_coro,
+                return_exceptions=True,
+            )
+            # Handle exceptions from gather
+            if isinstance(doc_results, BaseException):
+                logger.warning("RAG search failed: %s", doc_results)
+                doc_results = []
+            if isinstance(memories, BaseException):
+                logger.warning("Memory query failed: %s", memories)
+                memories = []
+            if isinstance(chunks, BaseException):
+                logger.warning("Chunk query failed: %s", chunks)
+                chunks = []
+        else:
+            doc_results, memories, chunks = [], [], []
+
        doc_context = self.rag.format_context(doc_results)
        if doc_context:
            logger.info("RAG found %d docs for: %s (original: %s)", len(doc_results), search_query[:50], user_message[:50])
        else:
            logger.info("RAG found 0 docs for: %s (original: %s)", search_query[:50], user_message[:50])
-
-        # Query relevant memories via semantic search
-        memories = await self.memory.query(sender, user_message, top_k=10) if sender else []
        memory_context = self._format_memories(memories)
-
-        # Query relevant conversation chunks (RAG over chat history)
-        if sender:
-            chunks = await self.memory.query_chunks(search_query, user_id=sender, room_id=room.room_id, top_k=5)
-        else:
-            chunks = []
        chunk_context = self._format_chunks(chunks)

        # Include room document context (PDFs, Confluence pages, images uploaded to room)
@@ -2327,19 +2377,20 @@ class Bot:
                    })
                messages.append(assistant_msg)

-                # Execute each tool and append results
-                for tc in choice.message.tool_calls:
+                # Execute tools in parallel when multiple are requested
+                async def _run_tool(tc):
                    try:
                        args = json.loads(tc.function.arguments)
                    except json.JSONDecodeError:
                        args = {}
                    result = await self._execute_tool(tc.function.name, args, sender, room.room_id)
-                    messages.append({
-                        "role": "tool",
-                        "tool_call_id": tc.id,
-                        "content": result,
-                    })
                    logger.info("Tool %s executed (iter %d) for %s", tc.function.name, iteration, sender)
+                    return {"role": "tool", "tool_call_id": tc.id, "content": result}
+
+                tool_results = await asyncio.gather(
+                    *[_run_tool(tc) for tc in choice.message.tool_calls]
+                )
+                messages.extend(tool_results)

            # Tag whether tools were used during the loop
            if iteration > 0: