feat: Add cross-encoder re-ranking after hybrid search (CF-1317)

Add rerank() function calling LiteLLM /v1/rerank endpoint (Cohere-compatible).
Plugged into all 3 search functions (sessions, session-docs, archives) after
RRF merge. Disabled by default via RERANK_ENABLED env var. Graceful fallback
to RRF-only ranking on API failure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-02-19 16:36:24 +02:00
parent 0150575713
commit ef74d7912e
5 changed files with 124 additions and 10 deletions

View File

@@ -8,6 +8,10 @@ POSTGRES_PORT=6432
LLM_API_URL=https://api.agiliton.cloud/llm
LLM_API_KEY=your_llm_api_key_here
# Cross-encoder re-ranking (CF-1317)
RERANK_ENABLED=false
RERANK_MODEL=rerank-v3.5
# Jira Cloud (session tracking)
JIRA_URL=https://agiliton.atlassian.net
JIRA_USERNAME=your_email@agiliton.eu

View File

@@ -68,6 +68,58 @@ export function formatEmbedding(embedding: number[]): string {
return `[${embedding.join(',')}]`;
}
/**
* Cross-encoder re-ranking via LiteLLM /rerank endpoint (CF-1317)
* Calls Cohere-compatible rerank API to reorder candidates by relevance.
* Returns null on failure (caller falls back to original order).
*/
export interface RerankResult {
index: number;
relevance_score: number;
}
export async function rerank(
query: string,
documents: string[],
topN?: number
): Promise<RerankResult[] | null> {
if (process.env.RERANK_ENABLED !== 'true') return null;
if (documents.length === 0) return null;
const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm';
const LLM_API_KEY = process.env.LLM_API_KEY || '';
const model = process.env.RERANK_MODEL || 'rerank-v3.5';
if (!LLM_API_KEY) return null;
try {
const response = await fetch(`${LLM_API_URL}/v1/rerank`, {
method: 'POST',
headers: {
'Authorization': `Bearer ${LLM_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model,
query,
documents,
top_n: topN || documents.length,
}),
});
if (!response.ok) {
console.error('Rerank API error:', response.status, await response.text());
return null;
}
const data = await response.json() as { results: RerankResult[] };
return data.results || null;
} catch (error) {
console.error('Rerank failed (falling back to RRF order):', error);
return null;
}
}
/**
* Reciprocal Rank Fusion — merge two ranked result lists (CF-1315)
* @param vectorResults IDs ranked by vector similarity (best first)

View File

@@ -1,7 +1,7 @@
// Project archives operations for database-backed archival
import { query, queryOne, execute } from '../db.js';
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js';
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js';
type ArchiveType = 'session' | 'research' | 'audit' | 'investigation' | 'completed' | 'migration';
@@ -193,10 +193,30 @@ export async function archiveSearch(args: ArchiveSearchArgs): Promise<string> {
let finalIds: number[];
let searchLabel: string;
let rerankScores: Map<number, number> | null = null;
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
const merged = rrfMerge(vectorIds, keywordIds);
finalIds = merged.slice(0, limit).map(m => m.id as number);
finalIds = merged.map(m => m.id as number);
searchLabel = 'hybrid';
// Cross-encoder re-ranking (CF-1317)
const docs = finalIds.map(id => {
const r = vectorRows.get(id) || keywordRows.get(id);
return (r as any)?.title || '';
});
const reranked = await rerank(searchQuery, docs, limit);
if (reranked) {
rerankScores = new Map();
const reorderedIds = reranked.map(r => {
rerankScores!.set(finalIds[r.index], r.relevance_score);
return finalIds[r.index];
});
finalIds = reorderedIds;
searchLabel = 'hybrid+rerank';
} else {
finalIds = finalIds.slice(0, limit);
}
} else if (vectorIds.length > 0) {
finalIds = vectorIds;
searchLabel = 'vector';
@@ -212,9 +232,12 @@ export async function archiveSearch(args: ArchiveSearchArgs): Promise<string> {
for (const id of finalIds) {
const a = vectorRows.get(id) || keywordRows.get(id);
if (!a) continue;
const sim = vectorRows.has(id) ? ` (${Math.round((vectorRows.get(id)!).similarity * 100)}% match)` : '';
const simParts: string[] = [];
if (vectorRows.has(id)) simParts.push(`${Math.round((vectorRows.get(id)!).similarity * 100)}% match`);
if (rerankScores?.has(id)) simParts.push(`rerank: ${rerankScores.get(id)!.toFixed(2)}`);
const scores = simParts.length > 0 ? ` (${simParts.join(', ')})` : '';
const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : '';
lines.push(`**[${a.archive_type}]** ${a.title}${sim}`);
lines.push(`**[${a.archive_type}]** ${a.title}${scores}`);
lines.push(` Archived: ${a.archived_at}${sizeStr}`);
if (a.original_path) {
lines.push(` Path: ${a.original_path}`);

View File

@@ -2,7 +2,7 @@
// Replaces file-based CLAUDE.md and plan files with database storage
import { query, queryOne, execute } from '../db.js';
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js';
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js';
import { getSessionId } from './session-id.js';
// ============================================================================
@@ -545,7 +545,19 @@ export async function sessionSemanticSearch(args: SessionSemanticSearchArgs): Pr
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
const merged = rrfMerge(vectorIds, keywordIds);
finalIds = merged.slice(0, limit).map(m => m.id as string);
finalIds = merged.map(m => m.id as string);
// Cross-encoder re-ranking (CF-1317)
const docs = finalIds.map(id => {
const r = vectorRows.get(id) || keywordRows.get(id);
return r?.summary || '';
});
const reranked = await rerank(searchQuery, docs, limit);
if (reranked) {
finalIds = reranked.map(r => finalIds[r.index]);
} else {
finalIds = finalIds.slice(0, limit);
}
} else if (vectorIds.length > 0) {
finalIds = vectorIds;
} else if (keywordIds.length > 0) {

View File

@@ -2,7 +2,7 @@
// Sessions auto-create CF Jira issues and post output on close (CF-762)
import { query, queryOne, execute } from '../db.js';
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js';
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js';
import { createSessionIssue, addComment, transitionToDone, updateIssueDescription } from '../services/jira.js';
interface SessionStartArgs {
@@ -412,10 +412,30 @@ export async function sessionSearch(args: SessionSearchArgs): Promise<string> {
let finalIds: string[];
let searchLabel: string;
let rerankScores: Map<string, number> | null = null;
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
const merged = rrfMerge(vectorIds, keywordIds);
finalIds = merged.slice(0, limit).map(m => m.id as string);
finalIds = merged.map(m => m.id as string);
searchLabel = 'hybrid';
// Cross-encoder re-ranking (CF-1317)
const docs = finalIds.map(id => {
const r = vectorRows.get(id) || keywordRows.get(id);
return (r as any)?.summary || '';
});
const reranked = await rerank(searchQuery, docs, limit);
if (reranked) {
rerankScores = new Map();
const reorderedIds = reranked.map(r => {
rerankScores!.set(finalIds[r.index], r.relevance_score);
return finalIds[r.index];
});
finalIds = reorderedIds;
searchLabel = 'hybrid+rerank';
} else {
finalIds = finalIds.slice(0, limit);
}
} else if (vectorIds.length > 0) {
finalIds = vectorIds;
searchLabel = 'vector';
@@ -431,10 +451,13 @@ export async function sessionSearch(args: SessionSearchArgs): Promise<string> {
for (const id of finalIds) {
const s = vectorRows.get(id) || keywordRows.get(id);
if (!s) continue;
const sim = vectorRows.has(id) ? ` (${Math.round((vectorRows.get(id)!).similarity * 100)}% match)` : '';
const simParts: string[] = [];
if (vectorRows.has(id)) simParts.push(`${Math.round((vectorRows.get(id)!).similarity * 100)}% match`);
if (rerankScores?.has(id)) simParts.push(`rerank: ${rerankScores.get(id)!.toFixed(2)}`);
const scores = simParts.length > 0 ? ` (${simParts.join(', ')})` : '';
const num = s.session_number ? `#${s.session_number}` : '';
const duration = s.duration_minutes ? `(${s.duration_minutes}m)` : '';
lines.push(`**${s.project} ${num}** ${duration}${sim}`);
lines.push(`**${s.project} ${num}** ${duration}${scores}`);
lines.push(` ${s.summary || 'No summary'}`);
lines.push('');
}