feat: Add persistent user memory system

- Extract and store memorable facts (name, language, preferences) per user - Inject memories into system prompt for personalized responses - LLM-based extraction after each response, deduplication against existing - JSON files on Docker volume (/data/memories), capped at 50 per user - System prompt updated: respond in users language, use memories - Commands: !ai memories (view), !ai forget (delete all) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 08:19:12 +02:00
parent 420b8a1e73
commit d7e32acfcb
2 changed files with 141 additions and 4 deletions
--- a/bot.py
+++ b/bot.py
@@ -8,6 +8,8 @@ import re
 import time
 import uuid

+import hashlib
+
 import fitz  # pymupdf
 import httpx
 from openai import AsyncOpenAI
@@ -58,9 +60,12 @@ DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "claude-sonnet")
 WILDFILES_BASE_URL = os.environ.get("WILDFILES_BASE_URL", "")
 WILDFILES_ORG = os.environ.get("WILDFILES_ORG", "")
 USER_KEYS_FILE = os.environ.get("USER_KEYS_FILE", "/data/user_keys.json")
+MEMORIES_DIR = os.environ.get("MEMORIES_DIR", "/data/memories")
+MAX_MEMORIES_PER_USER = 50

 SYSTEM_PROMPT = """You are a helpful AI assistant in a Matrix chat room.
 Keep answers concise but thorough. Use markdown formatting when helpful.
+Always respond in the same language the user writes in. If you have memories about the user's preferred language, use that language consistently.

 IMPORTANT RULES — FOLLOW THESE STRICTLY:
 - When document context is provided below, use it to answer. Always include any links.
@@ -73,7 +78,8 @@ IMPORTANT RULES — FOLLOW THESE STRICTLY:
 - If no relevant documents were found, simply say you don't have information on that topic and ask if you can help with something else. Do NOT speculate about why or suggest the user look elsewhere.
 - You can see and analyze images that users send. Describe what you see when asked about an image.
 - You can read and analyze PDF documents that users send. Summarize content and answer questions about them.
- You can generate images when asked — use the generate_image tool for any image creation, drawing, or illustration requests."""
+- You can generate images when asked — use the generate_image tool for any image creation, drawing, or illustration requests.
+- If user memories are provided, use them to personalize responses. Address users by name if known."""

 IMAGE_GEN_TOOLS = [{
    "type": "function",
@@ -98,6 +104,8 @@ HELP_TEXT = """**AI Bot Commands**
 - `!ai wildfiles connect` — Connect your WildFiles account (opens browser approval)
 - `!ai wildfiles disconnect` — Disconnect your WildFiles account
 - `!ai auto-rename on|off` — Auto-rename room based on conversation topic
+- `!ai forget` — Delete all memories the bot has about you
+- `!ai memories` — Show what the bot remembers about you
 - **@mention the bot** or start with `!ai` for a regular AI response"""


@@ -400,6 +408,88 @@ class Bot:
            except Exception:
                pass  # State event doesn't exist yet

+    # --- User memory helpers ---
+
+    def _memory_path(self, user_id: str) -> str:
+        """Get the file path for a user's memory store."""
+        uid_hash = hashlib.sha256(user_id.encode()).hexdigest()[:16]
+        return os.path.join(MEMORIES_DIR, f"{uid_hash}.json")
+
+    def _load_memories(self, user_id: str) -> list[dict]:
+        """Load memories for a user. Returns list of {fact, created, source_room}."""
+        path = self._memory_path(user_id)
+        try:
+            with open(path) as f:
+                return json.load(f)
+        except (FileNotFoundError, json.JSONDecodeError):
+            return []
+
+    def _save_memories(self, user_id: str, memories: list[dict]):
+        """Save memories for a user, capping at MAX_MEMORIES_PER_USER."""
+        os.makedirs(MEMORIES_DIR, exist_ok=True)
+        # Keep only the most recent memories
+        memories = memories[-MAX_MEMORIES_PER_USER:]
+        path = self._memory_path(user_id)
+        with open(path, "w") as f:
+            json.dump(memories, f, indent=2)
+
+    def _format_memories(self, memories: list[dict]) -> str:
+        """Format memories as a system prompt section."""
+        if not memories:
+            return ""
+        facts = [m["fact"] for m in memories]
+        return "You have these memories about this user:\n" + "\n".join(f"- {f}" for f in facts)
+
+    async def _extract_memories(self, user_message: str, ai_reply: str,
+                                existing: list[dict], model: str,
+                                sender: str, room_id: str) -> list[dict]:
+        """Use LLM to extract memorable facts from the conversation, deduplicate with existing."""
+        if not self.llm:
+            return existing
+
+        existing_facts = [m["fact"] for m in existing]
+        existing_text = "\n".join(f"- {f}" for f in existing_facts) if existing_facts else "(none)"
+
+        try:
+            resp = await self.llm.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": (
+                        "You extract memorable facts about users from conversations. "
+                        "Return a JSON array of strings — each string is a concise fact worth remembering. "
+                        "Include: name, language preference, location, occupation, interests, preferences, "
+                        "family, pets, projects, important dates, or any personal detail shared. "
+                        "Do NOT include: the current question/topic, temporary info, or things the AI said. "
+                        "Do NOT duplicate existing memories (rephrase or skip if already known). "
+                        "Return [] if nothing new is worth remembering."
+                    )},
+                    {"role": "user", "content": (
+                        f"Existing memories:\n{existing_text}\n\n"
+                        f"User message: {user_message[:500]}\n"
+                        f"AI reply: {ai_reply[:500]}\n\n"
+                        "New facts to remember (JSON array of strings):"
+                    )},
+                ],
+                max_tokens=300,
+            )
+            raw = resp.choices[0].message.content.strip()
+            # Parse JSON array from response
+            if raw.startswith("```"):
+                raw = raw.split("\n", 1)[-1].rsplit("```", 1)[0]
+            new_facts = json.loads(raw)
+            if not isinstance(new_facts, list):
+                return existing
+
+            now = time.time()
+            for fact in new_facts:
+                if isinstance(fact, str) and fact.strip():
+                    existing.append({"fact": fact.strip(), "created": now, "source_room": room_id})
+
+            return existing
+        except Exception:
+            logger.debug("Memory extraction failed", exc_info=True)
+            return existing
+
    async def on_text_message(self, room, event: RoomMessageText):
        """Handle text messages: commands and AI responses."""
        if event.sender == BOT_USER:
@@ -722,6 +812,31 @@ class Bot:
            status = "enabled" if enabled else "disabled"
            await self._send_text(room.room_id, f"Auto-rename **{status}** for this room.")

+        elif cmd == "forget":
+            sender = event.sender if event else None
+            if sender:
+                path = self._memory_path(sender)
+                try:
+                    os.remove(path)
+                except FileNotFoundError:
+                    pass
+                await self._send_text(room.room_id, "All my memories about you have been deleted.")
+            else:
+                await self._send_text(room.room_id, "Could not identify user.")
+
+        elif cmd == "memories":
+            sender = event.sender if event else None
+            if sender:
+                memories = self._load_memories(sender)
+                if memories:
+                    text = f"**I remember {len(memories)} things about you:**\n"
+                    text += "\n".join(f"- {m['fact']}" for m in memories)
+                else:
+                    text = "I don't have any memories about you yet."
+                await self._send_text(room.room_id, text)
+            else:
+                await self._send_text(room.room_id, "Could not identify user.")
+
        elif cmd.startswith("search "):
            query = cmd[7:].strip()
            if not query:
@@ -893,8 +1008,14 @@ class Bot:
        else:
            logger.info("RAG found 0 docs for: %s (original: %s)", search_query[:50], user_message[:50])

+        # Load user memories
+        memories = self._load_memories(sender) if sender else []
+        memory_context = self._format_memories(memories)
+
        # Build conversation context
        messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+        if memory_context:
+            messages.append({"role": "system", "content": memory_context})
        if doc_context:
            messages.append({"role": "system", "content": doc_context})
        messages.extend(history)
@@ -918,17 +1039,31 @@ class Bot:
                tools=IMAGE_GEN_TOOLS if not image_data else None,
            )
            choice = resp.choices[0]
+            reply = choice.message.content or ""

            if choice.message.tool_calls:
                for tc in choice.message.tool_calls:
                    if tc.function.name == "generate_image":
                        args = json.loads(tc.function.arguments)
                        await self._generate_and_send_image(room.room_id, args["prompt"])
-                if choice.message.content:
-                    await self._send_text(room.room_id, choice.message.content)
+                if reply:
+                    await self._send_text(room.room_id, reply)
            else:
-                reply = choice.message.content
                await self._send_text(room.room_id, reply)
+
+            # Extract and save new memories (fire-and-forget, don't block response)
+            if sender and reply:
+                try:
+                    updated = await self._extract_memories(
+                        user_message, reply, memories, model, sender, room.room_id
+                    )
+                    if len(updated) > len(memories):
+                        self._save_memories(sender, updated)
+                        logger.info("Saved %d new memories for %s (total: %d)",
+                                    len(updated) - len(memories), sender, len(updated))
+                except Exception:
+                    logger.debug("Memory save failed", exc_info=True)
+
            # Auto-rename: only for group rooms with explicit opt-in (not DMs)
            if room.room_id in self.auto_rename_rooms:
                last_rename = self.renamed_rooms.get(room.room_id, 0)