feat(CF-1315): Hybrid search with tsvector + RRF

Add PostgreSQL full-text search alongside pgvector for exact matches
on Jira keys, error messages, file paths. Merge results with
Reciprocal Rank Fusion. Default mode: hybrid, with graceful
degradation to keyword-only when embeddings unavailable.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-18 08:46:39 +02:00
parent 1f499bd926
commit 4f8996cd82
8 changed files with 434 additions and 183 deletions

View File

@@ -67,3 +67,29 @@ export async function getEmbedding(text: string): Promise<number[] | null> {
export function formatEmbedding(embedding: number[]): string {
return `[${embedding.join(',')}]`;
}
/**
* Reciprocal Rank Fusion — merge two ranked result lists (CF-1315)
* @param vectorResults IDs ranked by vector similarity (best first)
* @param keywordResults IDs ranked by ts_rank (best first)
* @param k RRF parameter (default 60, standard)
* @returns Merged IDs sorted by RRF score descending
*/
export function rrfMerge(
vectorResults: (number | string)[],
keywordResults: (number | string)[],
k: number = 60
): { id: number | string; score: number }[] {
const scores = new Map<number | string, number>();
vectorResults.forEach((id, rank) => {
scores.set(id, (scores.get(id) || 0) + 1 / (k + rank + 1));
});
keywordResults.forEach((id, rank) => {
scores.set(id, (scores.get(id) || 0) + 1 / (k + rank + 1));
});
return Array.from(scores.entries())
.map(([id, score]) => ({ id, score }))
.sort((a, b) => b.score - a.score);
}