// Embeddings via LiteLLM API import { createHash } from 'crypto'; /** * Generate SHA-256 content hash for dedup before embedding API call (CF-1314) */ export function generateContentHash(text: string): string { return createHash('sha256').update(text).digest('hex'); } interface EmbeddingResponse { data: Array<{ embedding: number[]; index: number; }>; model: string; usage: { prompt_tokens: number; total_tokens: number; }; } /** * Generate embedding for text using LiteLLM API */ export async function getEmbedding(text: string): Promise { // Read env vars at runtime (after dotenv.config() in index.ts) const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm'; const LLM_API_KEY = process.env.LLM_API_KEY || ''; if (!LLM_API_KEY) { console.error('LLM_API_KEY not set, skipping embedding (check .env file)'); console.error('LLM_API_URL:', LLM_API_URL); return null; } try { const response = await fetch(`${LLM_API_URL}/v1/embeddings`, { method: 'POST', headers: { 'Authorization': `Bearer ${LLM_API_KEY}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: 'mxbai-embed-large', input: text, }), }); if (!response.ok) { console.error('Embedding API error:', response.status, await response.text()); return null; } const data = await response.json() as EmbeddingResponse; return data.data?.[0]?.embedding || null; } catch (error) { console.error('Embedding generation failed:', error); return null; } } /** * Format embedding array for PostgreSQL vector type */ export function formatEmbedding(embedding: number[]): string { return `[${embedding.join(',')}]`; } /** * Reciprocal Rank Fusion — merge two ranked result lists (CF-1315) * @param vectorResults IDs ranked by vector similarity (best first) * @param keywordResults IDs ranked by ts_rank (best first) * @param k RRF parameter (default 60, standard) * @returns Merged IDs sorted by RRF score descending */ export function rrfMerge( vectorResults: (number | string)[], keywordResults: (number | string)[], k: number = 60 ): { id: number | string; score: number }[] { const scores = new Map(); vectorResults.forEach((id, rank) => { scores.set(id, (scores.get(id) || 0) + 1 / (k + rank + 1)); }); keywordResults.forEach((id, rank) => { scores.set(id, (scores.get(id) || 0) + 1 / (k + rank + 1)); }); return Array.from(scores.entries()) .map(([id, score]) => ({ id, score })) .sort((a, b) => b.score - a.score); }