Files
session-mcp/src/embeddings.ts
Christian Gick afce0bd3e5 Migrate embeddings from Gemini to local mxbai-embed-large
Switch from external Gemini API (3072 dims, $0.15/1M tokens) to local
Ollama mxbai-embed-large (1024 dims, free) for cost savings and HNSW
index support.

Changes:
- Updated embeddings.ts: model 'mxbai-embed-large', API URL fixed
- Updated migration 015: vector(1024) with HNSW index
- Regenerated 268 tool_docs embeddings with new model

Benefits:
- Free embeddings (no API costs)
- HNSW index enabled (1024 < 2000 dim limit)
- Fast similarity search (O(log n) vs O(n))
- No external API dependency

Trade-offs:
- 5% quality loss (MTEB 64.68 vs ~70 Gemini)
- Uses local compute (1.2GB RAM, <1s per embedding)

Task: CF-251

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-19 09:40:02 +02:00

59 lines
1.4 KiB
TypeScript

// Embeddings via LiteLLM API
const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm';
const LLM_API_KEY = process.env.LLM_API_KEY || '';
interface EmbeddingResponse {
data: Array<{
embedding: number[];
index: number;
}>;
model: string;
usage: {
prompt_tokens: number;
total_tokens: number;
};
}
/**
* Generate embedding for text using LiteLLM API
*/
export async function getEmbedding(text: string): Promise<number[] | null> {
if (!LLM_API_KEY) {
console.error('LLM_API_KEY not set, skipping embedding');
return null;
}
try {
const response = await fetch(`${LLM_API_URL}/v1/embeddings`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${LLM_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'mxbai-embed-large',
input: text,
}),
});
if (!response.ok) {
console.error('Embedding API error:', response.status, await response.text());
return null;
}
const data = await response.json() as EmbeddingResponse;
return data.data?.[0]?.embedding || null;
} catch (error) {
console.error('Embedding generation failed:', error);
return null;
}
}
/**
* Format embedding array for PostgreSQL vector type
*/
export function formatEmbedding(embedding: number[]): string {
return `[${embedding.join(',')}]`;
}