feat(memory): store full conversation exchanges instead of LLM-extracted facts

- Replace _extract_voice_memories with _store_voice_exchange - Store raw "User: ... / Assistant: ..." pairs directly - No LLM call needed — faster, cheaper, no lost context - Load as "Frühere Gespräche" with full thread context Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 10:40:59 +02:00
parent 150df19be1
commit a11cafc1d6
1 changed files with 14 additions and 52 deletions
--- a/voice.py
+++ b/voice.py
@@ -192,59 +192,21 @@ async def _brave_search(query: str, count: int = 5) -> str:
        return f"Search failed: {exc}"
-async def _extract_voice_memories(user_text: str, agent_text: str,
+async def _store_voice_exchange(user_text: str, agent_text: str,
-                                   user_id: str, room_id: str) -> None:
+                                user_id: str, room_id: str) -> None:
-    """Extract memorable facts from a voice exchange and store them."""
+    """Store the full conversation exchange as memory (no LLM extraction)."""
-    if not LITELLM_URL or not MEMORY_SERVICE_URL:
+    if not MEMORY_SERVICE_URL:
        return
    exchange = f"User: {user_text}\nAssistant: {agent_text}"
    try:
        # Fetch existing facts to avoid duplicates
        async with httpx.AsyncClient(timeout=10.0) as client:
-            resp = await client.post(
+            await client.post(
-                f"{MEMORY_SERVICE_URL}/memories/query",
+                f"{MEMORY_SERVICE_URL}/memories/store",
-                json={"user_id": user_id, "query": "all facts", "top_k": 20},
+                json={"user_id": user_id, "fact": exchange, "source_room": room_id},
            )
-            existing = [m["fact"] for m in resp.json().get("results", [])] if resp.is_success else []
+            logger.info("Memory stored for %s: %s", user_id, exchange[:120])
        existing_text = "\n".join(f"- {f}" for f in existing) if existing else "(none)"
        llm = AsyncOpenAI(base_url=LITELLM_URL, api_key=LITELLM_KEY)
        resp = await llm.chat.completions.create(
            model="claude-haiku",
            messages=[
                {"role": "system", "content": (
                    "Extract memorable facts about the user from this voice conversation snippet. "
                    "Return a JSON array of concise strings. Include: name, preferences, location, "
                    "occupation, interests, family, projects. Skip duplicate or temporary info. "
                    "Return [] if nothing new."
                )},
                {"role": "user", "content": (
                    f"Existing memories:\n{existing_text}\n\n"
                    f"User said: {user_text}\nAssistant replied: {agent_text}\n\n"
                    "New facts (JSON array):"
                )},
            ],
            max_tokens=200,
        )
        raw = resp.choices[0].message.content.strip()
        if raw.startswith("```"):
            raw = re.sub(r"^```\w*\n?", "", raw)
            raw = re.sub(r"\n?```$", "", raw)
        match = re.search(r"\[.*\]", raw, re.DOTALL)
        if match:
            raw = match.group(0)
        new_facts = json.loads(raw)
        if not isinstance(new_facts, list):
            return
        async with httpx.AsyncClient(timeout=10.0) as client:
            for fact in new_facts:
                if isinstance(fact, str) and fact.strip():
                    await client.post(
                        f"{MEMORY_SERVICE_URL}/memories/store",
                        json={"user_id": user_id, "fact": fact.strip(), "source_room": room_id},
                    )
                    logger.info("Memory stored for %s: %s", user_id, fact[:80])
    except Exception as exc:
-        logger.warning("Voice memory extraction failed: %s", exc)
+        logger.warning("Voice memory store failed: %s", exc)
 def _build_e2ee_options() -> rtc.E2EEOptions:
@@ -569,8 +531,8 @@ class VoiceSession:
                try:
                    mems = await self._memory.query(self._caller_user_id, "voice call", top_k=10)
                    if mems:
-                        memory_section = "\n\nKontext aus früheren Gesprächen mit diesem Nutzer:\n" + \
+                        memory_section = "\n\nFrühere Gespräche mit diesem Nutzer:\n" + \
-                            "\n".join(f"- {m['fact']}" for m in mems)
+                            "\n---\n".join(m['fact'] for m in mems)
                        logger.info("Loaded %d memories for %s", len(mems), self._caller_user_id)
                except Exception as exc:
                    logger.warning("Memory query failed: %s", exc)
@@ -640,8 +602,8 @@ class VoiceSession:
                        user_text = " ".join(_last_user_speech)
                        _last_user_speech.clear()
                        asyncio.ensure_future(
-                            _extract_voice_memories(user_text, text,
+                            _store_voice_exchange(user_text, text,
-                                                    self._caller_user_id, self.room_id))
+                                                  self._caller_user_id, self.room_id))
            # Brave Search tool — lets the agent answer questions about current events
            @function_tool