From a11cafc1d6fca6d3337d3bcddc7142d431bc5c0e Mon Sep 17 00:00:00 2001
From: Christian Gick <service@agiliton.eu>
Date: Mon, 23 Feb 2026 10:40:59 +0200
Subject: [PATCH] feat(memory): store full conversation exchanges instead of
 LLM-extracted facts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace _extract_voice_memories with _store_voice_exchange
- Store raw "User: ... / Assistant: ..." pairs directly
- No LLM call needed — faster, cheaper, no lost context
- Load as "Frühere Gespräche" with full thread context

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 voice.py | 66 ++++++++++++--------------------------------------------
 1 file changed, 14 insertions(+), 52 deletions(-)

diff --git a/voice.py b/voice.py
index cfd72d6..7f975bf 100644
--- a/voice.py
+++ b/voice.py
@@ -192,59 +192,21 @@ async def _brave_search(query: str, count: int = 5) -> str:
         return f"Search failed: {exc}"
 
 
-async def _extract_voice_memories(user_text: str, agent_text: str,
-                                   user_id: str, room_id: str) -> None:
-    """Extract memorable facts from a voice exchange and store them."""
-    if not LITELLM_URL or not MEMORY_SERVICE_URL:
+async def _store_voice_exchange(user_text: str, agent_text: str,
+                                user_id: str, room_id: str) -> None:
+    """Store the full conversation exchange as memory (no LLM extraction)."""
+    if not MEMORY_SERVICE_URL:
         return
+    exchange = f"User: {user_text}\nAssistant: {agent_text}"
     try:
-        # Fetch existing facts to avoid duplicates
         async with httpx.AsyncClient(timeout=10.0) as client:
-            resp = await client.post(
-                f"{MEMORY_SERVICE_URL}/memories/query",
-                json={"user_id": user_id, "query": "all facts", "top_k": 20},
+            await client.post(
+                f"{MEMORY_SERVICE_URL}/memories/store",
+                json={"user_id": user_id, "fact": exchange, "source_room": room_id},
             )
-            existing = [m["fact"] for m in resp.json().get("results", [])] if resp.is_success else []
-
-        existing_text = "\n".join(f"- {f}" for f in existing) if existing else "(none)"
-        llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY)
-        resp = await llm.chat.completions.create(
-            model="claude-haiku",
-            messages=[
-                {"role": "system", "content": (
-                    "Extract memorable facts about the user from this voice conversation snippet. "
-                    "Return a JSON array of concise strings. Include: name, preferences, location, "
-                    "occupation, interests, family, projects. Skip duplicate or temporary info. "
-                    "Return [] if nothing new."
-                )},
-                {"role": "user", "content": (
-                    f"Existing memories:\n{existing_text}\n\n"
-                    f"User said: {user_text}\nAssistant replied: {agent_text}\n\n"
-                    "New facts (JSON array):"
-                )},
-            ],
-            max_tokens=200,
-        )
-        raw = resp.choices[0].message.content.strip()
-        if raw.startswith("```"):
-            raw = re.sub(r"^```\w*\n?", "", raw)
-            raw = re.sub(r"\n?```$", "", raw)
-        match = re.search(r"\[.*\]", raw, re.DOTALL)
-        if match:
-            raw = match.group(0)
-        new_facts = json.loads(raw)
-        if not isinstance(new_facts, list):
-            return
-        async with httpx.AsyncClient(timeout=10.0) as client:
-            for fact in new_facts:
-                if isinstance(fact, str) and fact.strip():
-                    await client.post(
-                        f"{MEMORY_SERVICE_URL}/memories/store",
-                        json={"user_id": user_id, "fact": fact.strip(), "source_room": room_id},
-                    )
-                    logger.info("Memory stored for %s: %s", user_id, fact[:80])
+            logger.info("Memory stored for %s: %s", user_id, exchange[:120])
     except Exception as exc:
-        logger.warning("Voice memory extraction failed: %s", exc)
+        logger.warning("Voice memory store failed: %s", exc)
 
 
 def _build_e2ee_options() -> rtc.E2EEOptions:
@@ -569,8 +531,8 @@ class VoiceSession:
                 try:
                     mems = await self._memory.query(self._caller_user_id, "voice call", top_k=10)
                     if mems:
-                        memory_section = "\n\nKontext aus früheren Gesprächen mit diesem Nutzer:\n" + \
-                            "\n".join(f"- {m['fact']}" for m in mems)
+                        memory_section = "\n\nFrühere Gespräche mit diesem Nutzer:\n" + \
+                            "\n---\n".join(m['fact'] for m in mems)
                         logger.info("Loaded %d memories for %s", len(mems), self._caller_user_id)
                 except Exception as exc:
                     logger.warning("Memory query failed: %s", exc)
@@ -640,8 +602,8 @@ class VoiceSession:
                         user_text = " ".join(_last_user_speech)
                         _last_user_speech.clear()
                         asyncio.ensure_future(
-                            _extract_voice_memories(user_text, text,
-                                                    self._caller_user_id, self.room_id))
+                            _store_voice_exchange(user_text, text,
+                                                  self._caller_user_id, self.room_id))
 
             # Brave Search tool — lets the agent answer questions about current events
             @function_tool