fix: Fix memory system persistence and consolidate language prefs

- Replace separate bot-crypto/bot-memories volumes with single bot-data:/data
  volume so user_keys.json and language_prefs.json persist across restarts
- Remove redundant language_prefs.json infrastructure (constant, load/save,
  dict) — language preference now read from memories (last match wins)
- Add robust JSON extraction in _extract_memories (regex fallback for
  markdown fences, embedded arrays, non-array responses)
- Add info-level logging throughout memory extraction pipeline
- Add asyncio.wait_for timeout (15s) on memory extraction to prevent hangs
- Add !ai memory <fact> command for explicit, reliable memory storage
- Update _get_preferred_language to return last match (most recent wins)
- Update !ai forget to clear in-memory caches (pending translate/reply)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-19 09:49:05 +02:00
parent 2fd5806654
commit b5c33f4701
2 changed files with 50 additions and 46 deletions

88
bot.py
View File

@@ -61,7 +61,6 @@ WILDFILES_BASE_URL = os.environ.get("WILDFILES_BASE_URL", "")
WILDFILES_ORG = os.environ.get("WILDFILES_ORG", "")
USER_KEYS_FILE = os.environ.get("USER_KEYS_FILE", "/data/user_keys.json")
MEMORIES_DIR = os.environ.get("MEMORIES_DIR", "/data/memories")
LANGUAGE_PREFS_FILE = os.environ.get("LANGUAGE_PREFS_FILE", "/data/language_prefs.json")
MAX_MEMORIES_PER_USER = 50
SYSTEM_PROMPT = """You are a helpful AI assistant in a Matrix chat room.
@@ -106,6 +105,7 @@ HELP_TEXT = """**AI Bot Commands**
- `!ai wildfiles connect` — Connect your WildFiles account (opens browser approval)
- `!ai wildfiles disconnect` — Disconnect your WildFiles account
- `!ai auto-rename on|off` — Auto-rename room based on conversation topic
- `!ai memory <fact>` — Explicitly tell the bot to remember something
- `!ai forget` — Delete all memories the bot has about you
- `!ai memories` — Show what the bot remembers about you
- **Translate**: Forward a message to this DM — bot detects language and offers translation
@@ -221,7 +221,6 @@ class Bot:
self._pending_connects: dict[str, str] = {} # matrix_user_id -> device_code
self._pending_translate: dict[str, dict] = {} # sender -> {text, detected_lang, room_id}
self._pending_reply: dict[str, dict] = {} # sender -> {target_lang}
self.language_prefs: dict[str, str] = self._load_language_prefs() # user_id -> language
@staticmethod
def _load_user_keys() -> dict[str, str]:
@@ -241,23 +240,6 @@ class Bot:
except Exception:
logger.exception("Failed to save user keys")
@staticmethod
def _load_language_prefs() -> dict[str, str]:
if os.path.exists(LANGUAGE_PREFS_FILE):
try:
with open(LANGUAGE_PREFS_FILE) as f:
return json.load(f)
except Exception:
logger.warning("Failed to load language prefs, starting fresh")
return {}
def _save_language_prefs(self):
try:
with open(LANGUAGE_PREFS_FILE, "w") as f:
json.dump(self.language_prefs, f)
except Exception:
logger.exception("Failed to save language prefs")
async def start(self):
# Restore existing session or create new one
if os.path.exists(CREDS_FILE):
@@ -473,6 +455,8 @@ class Bot:
existing_facts = [m["fact"] for m in existing]
existing_text = "\n".join(f"- {f}" for f in existing_facts) if existing_facts else "(none)"
logger.info("Memory extraction: user_msg=%s... (%d existing facts)", user_message[:80], len(existing_facts))
try:
resp = await self.llm.chat.completions.create(
model=model,
@@ -496,29 +480,30 @@ class Bot:
max_tokens=300,
)
raw = resp.choices[0].message.content.strip()
# Parse JSON array from response
logger.info("Memory extraction raw response: %s", raw[:200])
# Robust JSON extraction: strip markdown fences, find array
if raw.startswith("```"):
raw = raw.split("\n", 1)[-1].rsplit("```", 1)[0]
raw = re.sub(r"^```\w*\n?", "", raw)
raw = re.sub(r"\n?```$", "", raw)
match = re.search(r"\[.*\]", raw, re.DOTALL)
if match:
raw = match.group(0)
new_facts = json.loads(raw)
if not isinstance(new_facts, list):
logger.warning("Memory extraction returned non-list: %s", type(new_facts))
return existing
logger.info("Memory extraction found %d new facts", len(new_facts))
now = time.time()
for fact in new_facts:
if isinstance(fact, str) and fact.strip():
existing.append({"fact": fact.strip(), "created": now, "source_room": room_id})
# Auto-detect language preference from new facts
fl = fact.lower()
if "language" in fl or "speaks" in fl or "prefers" in fl:
for lang in ["English", "German", "French", "Spanish", "Italian",
"Portuguese", "Dutch", "Russian", "Chinese", "Japanese",
"Korean", "Arabic", "Turkish", "Greek", "Hebrew"]:
if lang.lower() in fl:
self.language_prefs[sender] = lang
self._save_language_prefs()
logger.info("Auto-detected language preference: %s for %s", lang, sender)
break
return existing
except json.JSONDecodeError:
logger.warning("Memory extraction JSON parse failed, raw: %s", raw[:200])
return existing
except Exception:
logger.warning("Memory extraction failed", exc_info=True)
@@ -561,11 +546,7 @@ class Bot:
return f"[Translation failed] {text}"
def _get_preferred_language(self, user_id: str) -> str:
"""Get user's preferred language from explicit prefs, then memories, default English."""
# Check explicit language preference first
if user_id in self.language_prefs:
return self.language_prefs[user_id]
# Fallback: scan memories
"""Get user's preferred language from memories (last match = most recent)."""
memories = self._load_memories(user_id)
known_langs = [
"English", "German", "French", "Spanish", "Italian", "Portuguese",
@@ -574,13 +555,15 @@ class Bot:
"Greek", "Hebrew", "Hindi", "Thai", "Vietnamese", "Indonesian",
"Czech", "Romanian", "Hungarian", "Ukrainian", "Croatian", "Serbian",
]
result = "English"
for m in memories:
fact = m["fact"].lower()
if "language" in fact or "speaks" in fact or "prefers" in fact:
for lang in known_langs:
if lang.lower() in fact:
return lang
return "English"
result = lang
break
return result
async def on_text_message(self, room, event: RoomMessageText):
"""Handle text messages: commands and AI responses."""
@@ -967,6 +950,21 @@ class Bot:
status = "enabled" if enabled else "disabled"
await self._send_text(room.room_id, f"Auto-rename **{status}** for this room.")
elif cmd.startswith("memory "):
fact = cmd[7:].strip()
sender = event.sender if event else None
if not fact:
await self._send_text(room.room_id, "Usage: `!ai memory <fact>`")
return
if sender:
memories = self._load_memories(sender)
memories.append({"fact": fact, "created": time.time(), "source_room": room.room_id})
self._save_memories(sender, memories)
await self._send_text(room.room_id, f"Remembered: {fact}")
logger.info("Explicit memory stored for %s: %s", sender, fact[:80])
else:
await self._send_text(room.room_id, "Could not identify user.")
elif cmd == "forget":
sender = event.sender if event else None
if sender:
@@ -975,6 +973,9 @@ class Bot:
os.remove(path)
except FileNotFoundError:
pass
# Clear any in-memory caches for this user
self._pending_translate.pop(sender, None)
self._pending_reply.pop(sender, None)
await self._send_text(room.room_id, "All my memories about you have been deleted.")
else:
await self._send_text(room.room_id, "Could not identify user.")
@@ -1206,11 +1207,14 @@ class Bot:
else:
await self._send_text(room.room_id, reply)
# Extract and save new memories (fire-and-forget, don't block response)
# Extract and save new memories (after reply sent, with timeout)
if sender and reply:
try:
updated = await self._extract_memories(
updated = await asyncio.wait_for(
self._extract_memories(
user_message, reply, memories, model, sender, room.room_id
),
timeout=15.0,
)
if len(updated) > len(memories):
self._save_memories(sender, updated)
@@ -1218,6 +1222,8 @@ class Bot:
len(updated) - len(memories), sender, len(updated))
else:
logger.info("No new memories extracted for %s", sender)
except asyncio.TimeoutError:
logger.warning("Memory extraction timed out for %s", sender)
except Exception:
logger.warning("Memory save failed", exc_info=True)

View File

@@ -18,9 +18,7 @@ services:
- WILDFILES_BASE_URL
- WILDFILES_ORG
volumes:
- bot-crypto:/data/crypto_store
- bot-memories:/data/memories
- bot-data:/data
volumes:
bot-crypto:
bot-memories:
bot-data: