fix: harden Matrix ecosystem — pool recovery, parallel queries, voice persistence

- Memory service: asyncpg pool auto-reconnect on connection loss, IVFFlat lists 10→100
- Bot: parallel RAG/memory/chunk queries (asyncio.gather), parallel tool execution
- Bot: skip memory extraction for trivial messages (<20 chars, no personal facts)
- Bot: persist voice call transcripts as searchable conversation chunks
- RAG: JSON parse safety in AI metadata, embedding_status tracking, fetch timeouts
- Drive sync: token refresh mutex to prevent race conditions, fetch timeouts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-03-08 18:10:22 +02:00
parent 9fcdedc4b4
commit 1c8d45c31b
2 changed files with 115 additions and 22 deletions

87
bot.py
View File

@@ -1317,6 +1317,24 @@ class Bot:
await self._send_text(room_id, f"**Anruf-Zusammenfassung:**\n\n{summary}")
except Exception:
logger.exception("Failed to post call summary for %s", room_id)
# Persist voice transcript as conversation chunks in memory service
try:
caller = event.sender
for entry in transcript:
if entry["role"] == "user":
user_text = entry["text"]
# Find the next assistant response
idx = transcript.index(entry)
ai_text = ""
if idx + 1 < len(transcript) and transcript[idx + 1]["role"] == "assistant":
ai_text = transcript[idx + 1]["text"]
if user_text and ai_text:
await self._store_conversation_chunk(
user_text, ai_text, caller, room_id
)
logger.info("Stored %d voice transcript chunks for %s", len(transcript) // 2, room_id)
except Exception:
logger.warning("Failed to store voice transcript chunks for %s", room_id, exc_info=True)
# Extract and post document annotations if a document was discussed
if doc_context:
try:
@@ -1415,6 +1433,22 @@ class Bot:
summary=summary, original_ts=time.time(),
)
# Regex for detecting personal facts worth extracting (pronouns, possessives, identity markers)
_PERSONAL_FACT_RE = re.compile(
r"\b(ich|mein|meine|meinem|i'm|i am|my |mine|we |our |"
r"name is|hei[sß]e|wohne|arbeite|lebe|studier|born|live|work|"
r"prefer|favorite|hobby|birthday|family|wife|husband|partner|child|dog|cat)\b",
re.IGNORECASE,
)
def _is_trivial_message(self, text: str) -> bool:
"""Return True if the message is too trivial for memory extraction."""
if len(text) >= 20:
return False
if self._PERSONAL_FACT_RE.search(text):
return False
return True
async def _extract_and_store_memories(self, user_message: str, ai_reply: str,
existing_facts: list[str], model: str,
sender: str, room_id: str):
@@ -1422,6 +1456,11 @@ class Bot:
if not self.llm:
return
# Skip extraction for trivial messages (saves ~2-3s + 1 LLM call)
if self._is_trivial_message(user_message):
logger.debug("Skipping memory extraction for trivial message: %s", user_message[:40])
return
existing_text = "\n".join(f"- {f}" for f in existing_facts) if existing_facts else "(none)"
logger.info("Memory extraction: user_msg=%s... (%d existing facts)", user_message[:80], len(existing_facts))
@@ -2221,23 +2260,34 @@ class Bot:
# Rewrite query using conversation context for better RAG search
search_query = await self._rewrite_query(user_message, history)
# Document context via MatrixHost API
doc_results = await self.rag.search(search_query, matrix_user_id=sender) if sender else []
# Run RAG search, memory query, and chunk query in parallel (independent)
if sender:
doc_results_coro = self.rag.search(search_query, matrix_user_id=sender)
memories_coro = self.memory.query(sender, user_message, top_k=10)
chunks_coro = self.memory.query_chunks(search_query, user_id=sender, room_id=room.room_id, top_k=5)
doc_results, memories, chunks = await asyncio.gather(
doc_results_coro, memories_coro, chunks_coro,
return_exceptions=True,
)
# Handle exceptions from gather
if isinstance(doc_results, BaseException):
logger.warning("RAG search failed: %s", doc_results)
doc_results = []
if isinstance(memories, BaseException):
logger.warning("Memory query failed: %s", memories)
memories = []
if isinstance(chunks, BaseException):
logger.warning("Chunk query failed: %s", chunks)
chunks = []
else:
doc_results, memories, chunks = [], [], []
doc_context = self.rag.format_context(doc_results)
if doc_context:
logger.info("RAG found %d docs for: %s (original: %s)", len(doc_results), search_query[:50], user_message[:50])
else:
logger.info("RAG found 0 docs for: %s (original: %s)", search_query[:50], user_message[:50])
# Query relevant memories via semantic search
memories = await self.memory.query(sender, user_message, top_k=10) if sender else []
memory_context = self._format_memories(memories)
# Query relevant conversation chunks (RAG over chat history)
if sender:
chunks = await self.memory.query_chunks(search_query, user_id=sender, room_id=room.room_id, top_k=5)
else:
chunks = []
chunk_context = self._format_chunks(chunks)
# Include room document context (PDFs, Confluence pages, images uploaded to room)
@@ -2327,19 +2377,20 @@ class Bot:
})
messages.append(assistant_msg)
# Execute each tool and append results
for tc in choice.message.tool_calls:
# Execute tools in parallel when multiple are requested
async def _run_tool(tc):
try:
args = json.loads(tc.function.arguments)
except json.JSONDecodeError:
args = {}
result = await self._execute_tool(tc.function.name, args, sender, room.room_id)
messages.append({
"role": "tool",
"tool_call_id": tc.id,
"content": result,
})
logger.info("Tool %s executed (iter %d) for %s", tc.function.name, iteration, sender)
return {"role": "tool", "tool_call_id": tc.id, "content": result}
tool_results = await asyncio.gather(
*[_run_tool(tc) for tc in choice.message.tool_calls]
)
messages.extend(tool_results)
# Tag whether tools were used during the loop
if iteration > 0: