Add PostgreSQL full-text search alongside pgvector for exact matches on Jira keys, error messages, file paths. Merge results with Reciprocal Rank Fusion. Default mode: hybrid, with graceful degradation to keyword-only when embeddings unavailable. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
96 lines
2.6 KiB
TypeScript
96 lines
2.6 KiB
TypeScript
// Embeddings via LiteLLM API
|
|
|
|
import { createHash } from 'crypto';
|
|
|
|
/**
|
|
* Generate SHA-256 content hash for dedup before embedding API call (CF-1314)
|
|
*/
|
|
export function generateContentHash(text: string): string {
|
|
return createHash('sha256').update(text).digest('hex');
|
|
}
|
|
|
|
interface EmbeddingResponse {
|
|
data: Array<{
|
|
embedding: number[];
|
|
index: number;
|
|
}>;
|
|
model: string;
|
|
usage: {
|
|
prompt_tokens: number;
|
|
total_tokens: number;
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Generate embedding for text using LiteLLM API
|
|
*/
|
|
export async function getEmbedding(text: string): Promise<number[] | null> {
|
|
// Read env vars at runtime (after dotenv.config() in index.ts)
|
|
const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm';
|
|
const LLM_API_KEY = process.env.LLM_API_KEY || '';
|
|
|
|
if (!LLM_API_KEY) {
|
|
console.error('LLM_API_KEY not set, skipping embedding (check .env file)');
|
|
console.error('LLM_API_URL:', LLM_API_URL);
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
const response = await fetch(`${LLM_API_URL}/v1/embeddings`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `Bearer ${LLM_API_KEY}`,
|
|
'Content-Type': 'application/json',
|
|
},
|
|
body: JSON.stringify({
|
|
model: 'mxbai-embed-large',
|
|
input: text,
|
|
}),
|
|
});
|
|
|
|
if (!response.ok) {
|
|
console.error('Embedding API error:', response.status, await response.text());
|
|
return null;
|
|
}
|
|
|
|
const data = await response.json() as EmbeddingResponse;
|
|
return data.data?.[0]?.embedding || null;
|
|
} catch (error) {
|
|
console.error('Embedding generation failed:', error);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Format embedding array for PostgreSQL vector type
|
|
*/
|
|
export function formatEmbedding(embedding: number[]): string {
|
|
return `[${embedding.join(',')}]`;
|
|
}
|
|
|
|
/**
|
|
* Reciprocal Rank Fusion — merge two ranked result lists (CF-1315)
|
|
* @param vectorResults IDs ranked by vector similarity (best first)
|
|
* @param keywordResults IDs ranked by ts_rank (best first)
|
|
* @param k RRF parameter (default 60, standard)
|
|
* @returns Merged IDs sorted by RRF score descending
|
|
*/
|
|
export function rrfMerge(
|
|
vectorResults: (number | string)[],
|
|
keywordResults: (number | string)[],
|
|
k: number = 60
|
|
): { id: number | string; score: number }[] {
|
|
const scores = new Map<number | string, number>();
|
|
|
|
vectorResults.forEach((id, rank) => {
|
|
scores.set(id, (scores.get(id) || 0) + 1 / (k + rank + 1));
|
|
});
|
|
keywordResults.forEach((id, rank) => {
|
|
scores.set(id, (scores.get(id) || 0) + 1 / (k + rank + 1));
|
|
});
|
|
|
|
return Array.from(scores.entries())
|
|
.map(([id, score]) => ({ id, score }))
|
|
.sort((a, b) => b.score - a.score);
|
|
}
|