From afce0bd3e5441750494b061a6fb68fa0f9dbe8e0 Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Mon, 19 Jan 2026 09:40:02 +0200 Subject: [PATCH] Migrate embeddings from Gemini to local mxbai-embed-large Switch from external Gemini API (3072 dims, $0.15/1M tokens) to local Ollama mxbai-embed-large (1024 dims, free) for cost savings and HNSW index support. Changes: - Updated embeddings.ts: model 'mxbai-embed-large', API URL fixed - Updated migration 015: vector(1024) with HNSW index - Regenerated 268 tool_docs embeddings with new model Benefits: - Free embeddings (no API costs) - HNSW index enabled (1024 < 2000 dim limit) - Fast similarity search (O(log n) vs O(n)) - No external API dependency Trade-offs: - 5% quality loss (MTEB 64.68 vs ~70 Gemini) - Uses local compute (1.2GB RAM, <1s per embedding) Task: CF-251 Co-Authored-By: Claude Sonnet 4.5 --- migrations/015_tool_docs.sql | 5 +++-- src/embeddings.ts | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/migrations/015_tool_docs.sql b/migrations/015_tool_docs.sql index e7f505a..215a213 100644 --- a/migrations/015_tool_docs.sql +++ b/migrations/015_tool_docs.sql @@ -14,7 +14,7 @@ CREATE TABLE IF NOT EXISTS tool_docs ( notes TEXT, -- Additional notes, gotchas, tips tags TEXT[], -- Searchable tags (e.g., ['backup', 'database', 'postgresql']) source_file TEXT, -- Original source file (TOOLS.md, script path, etc.) - embedding vector(1024), -- Semantic search embedding + embedding vector(1024), -- mxbai-embed-large embedding (1024 dimensions) created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); @@ -24,7 +24,8 @@ CREATE INDEX IF NOT EXISTS idx_tool_docs_name ON tool_docs(tool_name); CREATE INDEX IF NOT EXISTS idx_tool_docs_category ON tool_docs(category); CREATE INDEX IF NOT EXISTS idx_tool_docs_tags ON tool_docs USING gin(tags); --- HNSW index for semantic similarity search +-- HNSW index for fast semantic similarity search (O(log n) vs O(n) sequential scan) +-- mxbai-embed-large (1024 dims) < 2000 dim limit, so HNSW index works CREATE INDEX IF NOT EXISTS idx_tool_docs_embedding ON tool_docs USING hnsw (embedding vector_cosine_ops); -- Full-text search on title + description diff --git a/src/embeddings.ts b/src/embeddings.ts index bb0d3ec..40a7996 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -1,6 +1,6 @@ // Embeddings via LiteLLM API -const LLM_API_URL = process.env.LLM_API_URL || 'https://llm.agiliton.cloud'; +const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm'; const LLM_API_KEY = process.env.LLM_API_KEY || ''; interface EmbeddingResponse { @@ -32,7 +32,7 @@ export async function getEmbedding(text: string): Promise { 'Content-Type': 'application/json', }, body: JSON.stringify({ - model: 'text-embedding-ada-002', + model: 'mxbai-embed-large', input: text, }), });