feat(MAT-13): Add conversation chunk RAG for Matrix chat history

Add semantic search over past conversations alongside existing memory facts. New conversation_chunks table stores user-assistant exchanges with LLM-generated summaries embedded for retrieval. Bot queries chunks on each message and injects relevant past conversations into the system prompt. New exchanges are indexed automatically after each bot response. Memory-service: /chunks/store, /chunks/query, /chunks/bulk-store endpoints Bot: chunk query + formatting, live indexing via asyncio.gather with memory extraction Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 07:48:19 +02:00
parent 6fe9607fb1
commit fb54ac2bea
2 changed files with 277 additions and 8 deletions
--- a/bot.py
+++ b/bot.py
@@ -457,6 +457,39 @@ class MemoryClient:
            logger.warning("Memory list failed", exc_info=True)
            return []

+    async def store_chunk(self, user_id: str, room_id: str, chunk_text: str,
+                          summary: str, source_event_id: str = "", original_ts: float = 0.0):
+        if not self.enabled:
+            return
+        try:
+            async with httpx.AsyncClient(timeout=15.0) as client:
+                await client.post(
+                    f"{self.base_url}/chunks/store",
+                    json={
+                        "user_id": user_id, "room_id": room_id,
+                        "chunk_text": chunk_text, "summary": summary,
+                        "source_event_id": source_event_id, "original_ts": original_ts,
+                    },
+                )
+        except Exception:
+            logger.warning("Chunk store failed", exc_info=True)
+
+    async def query_chunks(self, query: str, user_id: str = "", room_id: str = "",
+                           top_k: int = 5) -> list[dict]:
+        if not self.enabled:
+            return []
+        try:
+            async with httpx.AsyncClient(timeout=10.0) as client:
+                resp = await client.post(
+                    f"{self.base_url}/chunks/query",
+                    json={"user_id": user_id, "room_id": room_id, "query": query, "top_k": top_k},
+                )
+                resp.raise_for_status()
+                return resp.json().get("results", [])
+        except Exception:
+            logger.warning("Chunk query failed", exc_info=True)
+            return []
+

 class AtlassianClient:
    """Fetches per-user Atlassian tokens from the portal and calls Atlassian REST APIs."""
@@ -1173,6 +1206,52 @@ class Bot:
        facts = [m["fact"] for m in memories]
        return "You have these memories about this user:\n" + "\n".join(f"- {f}" for f in facts)

+    @staticmethod
+    def _format_chunks(chunks: list[dict]) -> str:
+        """Format conversation chunk results as a system prompt section."""
+        if not chunks:
+            return ""
+        parts = ["Relevant past conversations:"]
+        for c in chunks:
+            ts = c.get("original_ts", 0)
+            date_str = time.strftime("%Y-%m-%d", time.gmtime(ts)) if ts else "unknown"
+            summary = c.get("summary", "")
+            text = c.get("chunk_text", "")
+            # Truncate chunk text to ~500 chars for context window efficiency
+            if len(text) > 500:
+                text = text[:500] + "..."
+            parts.append(f"\n### {summary} ({date_str})\n{text}")
+        return "\n".join(parts)
+
+    async def _store_conversation_chunk(self, user_message: str, ai_reply: str,
+                                          sender: str, room_id: str):
+        """Store a user-assistant exchange as a conversation chunk for RAG."""
+        if not self.llm or not self.memory.enabled:
+            return
+        chunk_text = f"User: {user_message}\nAssistant: {ai_reply}"
+        try:
+            resp = await self.llm.chat.completions.create(
+                model="claude-haiku",
+                messages=[
+                    {"role": "system", "content": (
+                        "Summarize this conversation exchange in 1-2 sentences for search indexing. "
+                        "Focus on the topic and key information discussed. Be concise. "
+                        "Write the summary in the same language as the conversation."
+                    )},
+                    {"role": "user", "content": chunk_text[:2000]},
+                ],
+                max_tokens=100,
+            )
+            summary = resp.choices[0].message.content.strip()
+        except Exception:
+            logger.warning("Chunk summarization failed, using truncated message", exc_info=True)
+            summary = user_message[:200]
+
+        await self.memory.store_chunk(
+            user_id=sender, room_id=room_id, chunk_text=chunk_text,
+            summary=summary, original_ts=time.time(),
+        )
+
    async def _extract_and_store_memories(self, user_message: str, ai_reply: str,
                                          existing_facts: list[str], model: str,
                                          sender: str, room_id: str):
@@ -2092,6 +2171,10 @@ class Bot:
        memories = await self.memory.query(sender, user_message, top_k=10) if sender else []
        memory_context = self._format_memories(memories)

+        # Query relevant conversation chunks (RAG over chat history)
+        chunks = await self.memory.query_chunks(search_query, user_id=sender or "", top_k=5)
+        chunk_context = self._format_chunks(chunks)
+
        # Include room document context (PDFs, Confluence pages, images uploaded to room)
        room_doc_context = ""
        room_docs = [e for e in self._room_document_context.get(room.room_id, [])
@@ -2114,6 +2197,8 @@ class Bot:
        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
        if memory_context:
            messages.append({"role": "system", "content": memory_context})
+        if chunk_context:
+            messages.append({"role": "system", "content": chunk_context})
        if doc_context:
            messages.append({"role": "system", "content": doc_context})
        if room_doc_context:
@@ -2181,20 +2266,25 @@ class Bot:
            if reply:
                await self._send_text(room.room_id, reply)

-            # Extract and store new memories (after reply sent, with timeout)
+            # Extract and store new memories + conversation chunk (after reply sent)
            if sender and reply:
                existing_facts = [m["fact"] for m in memories]
                try:
                    await asyncio.wait_for(
-                        self._extract_and_store_memories(
-                            user_message, reply, existing_facts, model, sender, room.room_id
+                        asyncio.gather(
+                            self._extract_and_store_memories(
+                                user_message, reply, existing_facts, model, sender, room.room_id
+                            ),
+                            self._store_conversation_chunk(
+                                user_message, reply, sender, room.room_id
+                            ),
                        ),
-                        timeout=15.0,
+                        timeout=20.0,
                    )
                except asyncio.TimeoutError:
-                    logger.warning("Memory extraction timed out for %s", sender)
+                    logger.warning("Memory/chunk extraction timed out for %s", sender)
                except Exception:
-                    logger.warning("Memory save failed", exc_info=True)
+                    logger.warning("Memory/chunk save failed", exc_info=True)

            # Auto-rename: only for group rooms with explicit opt-in (not DMs)
            if room.room_id in self.auto_rename_rooms: