Switch from external Gemini API (3072 dims, $0.15/1M tokens) to local Ollama mxbai-embed-large (1024 dims, free) for cost savings and HNSW index support. Changes: - Updated embeddings.ts: model 'mxbai-embed-large', API URL fixed - Updated migration 015: vector(1024) with HNSW index - Regenerated 268 tool_docs embeddings with new model Benefits: - Free embeddings (no API costs) - HNSW index enabled (1024 < 2000 dim limit) - Fast similarity search (O(log n) vs O(n)) - No external API dependency Trade-offs: - 5% quality loss (MTEB 64.68 vs ~70 Gemini) - Uses local compute (1.2GB RAM, <1s per embedding) Task: CF-251 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
59 lines
1.4 KiB
TypeScript
59 lines
1.4 KiB
TypeScript
// Embeddings via LiteLLM API
|
|
|
|
const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm';
|
|
const LLM_API_KEY = process.env.LLM_API_KEY || '';
|
|
|
|
interface EmbeddingResponse {
|
|
data: Array<{
|
|
embedding: number[];
|
|
index: number;
|
|
}>;
|
|
model: string;
|
|
usage: {
|
|
prompt_tokens: number;
|
|
total_tokens: number;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Generate embedding for text using LiteLLM API
|
|
*/
|
|
export async function getEmbedding(text: string): Promise<number[] | null> {
|
|
if (!LLM_API_KEY) {
|
|
console.error('LLM_API_KEY not set, skipping embedding');
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
const response = await fetch(`${LLM_API_URL}/v1/embeddings`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `Bearer ${LLM_API_KEY}`,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
model: 'mxbai-embed-large',
|
|
input: text,
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
console.error('Embedding API error:', response.status, await response.text());
|
|
return null;
|
|
}
|
|
|
|
const data = await response.json() as EmbeddingResponse;
|
|
return data.data?.[0]?.embedding || null;
|
|
} catch (error) {
|
|
console.error('Embedding generation failed:', error);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Format embedding array for PostgreSQL vector type
|
|
*/
|
|
export function formatEmbedding(embedding: number[]): string {
|
|
return `[${embedding.join(',')}]`;
|
|
}
|