Migrate embeddings from Gemini to local mxbai-embed-large

Switch from external Gemini API (3072 dims, $0.15/1M tokens) to local
Ollama mxbai-embed-large (1024 dims, free) for cost savings and HNSW
index support.

Changes:
- Updated embeddings.ts: model 'mxbai-embed-large', API URL fixed
- Updated migration 015: vector(1024) with HNSW index
- Regenerated 268 tool_docs embeddings with new model

Benefits:
- Free embeddings (no API costs)
- HNSW index enabled (1024 < 2000 dim limit)
- Fast similarity search (O(log n) vs O(n))
- No external API dependency

Trade-offs:
- 5% quality loss (MTEB 64.68 vs ~70 Gemini)
- Uses local compute (1.2GB RAM, <1s per embedding)

Task: CF-251

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-01-19 09:40:02 +02:00
parent 0aa10d3003
commit afce0bd3e5
2 changed files with 5 additions and 4 deletions

View File

@@ -14,7 +14,7 @@ CREATE TABLE IF NOT EXISTS tool_docs (
notes TEXT, -- Additional notes, gotchas, tips notes TEXT, -- Additional notes, gotchas, tips
tags TEXT[], -- Searchable tags (e.g., ['backup', 'database', 'postgresql']) tags TEXT[], -- Searchable tags (e.g., ['backup', 'database', 'postgresql'])
source_file TEXT, -- Original source file (TOOLS.md, script path, etc.) source_file TEXT, -- Original source file (TOOLS.md, script path, etc.)
embedding vector(1024), -- Semantic search embedding embedding vector(1024), -- mxbai-embed-large embedding (1024 dimensions)
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
); );
@@ -24,7 +24,8 @@ CREATE INDEX IF NOT EXISTS idx_tool_docs_name ON tool_docs(tool_name);
CREATE INDEX IF NOT EXISTS idx_tool_docs_category ON tool_docs(category); CREATE INDEX IF NOT EXISTS idx_tool_docs_category ON tool_docs(category);
CREATE INDEX IF NOT EXISTS idx_tool_docs_tags ON tool_docs USING gin(tags); CREATE INDEX IF NOT EXISTS idx_tool_docs_tags ON tool_docs USING gin(tags);
-- HNSW index for semantic similarity search -- HNSW index for fast semantic similarity search (O(log n) vs O(n) sequential scan)
-- mxbai-embed-large (1024 dims) < 2000 dim limit, so HNSW index works
CREATE INDEX IF NOT EXISTS idx_tool_docs_embedding ON tool_docs USING hnsw (embedding vector_cosine_ops); CREATE INDEX IF NOT EXISTS idx_tool_docs_embedding ON tool_docs USING hnsw (embedding vector_cosine_ops);
-- Full-text search on title + description -- Full-text search on title + description

View File

@@ -1,6 +1,6 @@
// Embeddings via LiteLLM API // Embeddings via LiteLLM API
const LLM_API_URL = process.env.LLM_API_URL || 'https://llm.agiliton.cloud'; const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm';
const LLM_API_KEY = process.env.LLM_API_KEY || ''; const LLM_API_KEY = process.env.LLM_API_KEY || '';
interface EmbeddingResponse { interface EmbeddingResponse {
@@ -32,7 +32,7 @@ export async function getEmbedding(text: string): Promise<number[] | null> {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
body: JSON.stringify({ body: JSON.stringify({
model: 'text-embedding-ada-002', model: 'mxbai-embed-large',
input: text, input: text,
}), }),
}); });