feat: Add cross-encoder re-ranking after hybrid search (CF-1317)
Add rerank() function calling LiteLLM /v1/rerank endpoint (Cohere-compatible). Plugged into all 3 search functions (sessions, session-docs, archives) after RRF merge. Disabled by default via RERANK_ENABLED env var. Graceful fallback to RRF-only ranking on API failure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,10 @@ POSTGRES_PORT=6432
|
|||||||
LLM_API_URL=https://api.agiliton.cloud/llm
|
LLM_API_URL=https://api.agiliton.cloud/llm
|
||||||
LLM_API_KEY=your_llm_api_key_here
|
LLM_API_KEY=your_llm_api_key_here
|
||||||
|
|
||||||
|
# Cross-encoder re-ranking (CF-1317)
|
||||||
|
RERANK_ENABLED=false
|
||||||
|
RERANK_MODEL=rerank-v3.5
|
||||||
|
|
||||||
# Jira Cloud (session tracking)
|
# Jira Cloud (session tracking)
|
||||||
JIRA_URL=https://agiliton.atlassian.net
|
JIRA_URL=https://agiliton.atlassian.net
|
||||||
JIRA_USERNAME=your_email@agiliton.eu
|
JIRA_USERNAME=your_email@agiliton.eu
|
||||||
|
|||||||
@@ -68,6 +68,58 @@ export function formatEmbedding(embedding: number[]): string {
|
|||||||
return `[${embedding.join(',')}]`;
|
return `[${embedding.join(',')}]`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Cross-encoder re-ranking via LiteLLM /rerank endpoint (CF-1317)
|
||||||
|
* Calls Cohere-compatible rerank API to reorder candidates by relevance.
|
||||||
|
* Returns null on failure (caller falls back to original order).
|
||||||
|
*/
|
||||||
|
export interface RerankResult {
|
||||||
|
index: number;
|
||||||
|
relevance_score: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function rerank(
|
||||||
|
query: string,
|
||||||
|
documents: string[],
|
||||||
|
topN?: number
|
||||||
|
): Promise<RerankResult[] | null> {
|
||||||
|
if (process.env.RERANK_ENABLED !== 'true') return null;
|
||||||
|
if (documents.length === 0) return null;
|
||||||
|
|
||||||
|
const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm';
|
||||||
|
const LLM_API_KEY = process.env.LLM_API_KEY || '';
|
||||||
|
const model = process.env.RERANK_MODEL || 'rerank-v3.5';
|
||||||
|
|
||||||
|
if (!LLM_API_KEY) return null;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(`${LLM_API_URL}/v1/rerank`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${LLM_API_KEY}`,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
query,
|
||||||
|
documents,
|
||||||
|
top_n: topN || documents.length,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
console.error('Rerank API error:', response.status, await response.text());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json() as { results: RerankResult[] };
|
||||||
|
return data.results || null;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Rerank failed (falling back to RRF order):', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reciprocal Rank Fusion — merge two ranked result lists (CF-1315)
|
* Reciprocal Rank Fusion — merge two ranked result lists (CF-1315)
|
||||||
* @param vectorResults IDs ranked by vector similarity (best first)
|
* @param vectorResults IDs ranked by vector similarity (best first)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
// Project archives operations for database-backed archival
|
// Project archives operations for database-backed archival
|
||||||
|
|
||||||
import { query, queryOne, execute } from '../db.js';
|
import { query, queryOne, execute } from '../db.js';
|
||||||
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js';
|
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js';
|
||||||
|
|
||||||
type ArchiveType = 'session' | 'research' | 'audit' | 'investigation' | 'completed' | 'migration';
|
type ArchiveType = 'session' | 'research' | 'audit' | 'investigation' | 'completed' | 'migration';
|
||||||
|
|
||||||
@@ -193,10 +193,30 @@ export async function archiveSearch(args: ArchiveSearchArgs): Promise<string> {
|
|||||||
let finalIds: number[];
|
let finalIds: number[];
|
||||||
let searchLabel: string;
|
let searchLabel: string;
|
||||||
|
|
||||||
|
let rerankScores: Map<number, number> | null = null;
|
||||||
|
|
||||||
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
||||||
const merged = rrfMerge(vectorIds, keywordIds);
|
const merged = rrfMerge(vectorIds, keywordIds);
|
||||||
finalIds = merged.slice(0, limit).map(m => m.id as number);
|
finalIds = merged.map(m => m.id as number);
|
||||||
searchLabel = 'hybrid';
|
searchLabel = 'hybrid';
|
||||||
|
|
||||||
|
// Cross-encoder re-ranking (CF-1317)
|
||||||
|
const docs = finalIds.map(id => {
|
||||||
|
const r = vectorRows.get(id) || keywordRows.get(id);
|
||||||
|
return (r as any)?.title || '';
|
||||||
|
});
|
||||||
|
const reranked = await rerank(searchQuery, docs, limit);
|
||||||
|
if (reranked) {
|
||||||
|
rerankScores = new Map();
|
||||||
|
const reorderedIds = reranked.map(r => {
|
||||||
|
rerankScores!.set(finalIds[r.index], r.relevance_score);
|
||||||
|
return finalIds[r.index];
|
||||||
|
});
|
||||||
|
finalIds = reorderedIds;
|
||||||
|
searchLabel = 'hybrid+rerank';
|
||||||
|
} else {
|
||||||
|
finalIds = finalIds.slice(0, limit);
|
||||||
|
}
|
||||||
} else if (vectorIds.length > 0) {
|
} else if (vectorIds.length > 0) {
|
||||||
finalIds = vectorIds;
|
finalIds = vectorIds;
|
||||||
searchLabel = 'vector';
|
searchLabel = 'vector';
|
||||||
@@ -212,9 +232,12 @@ export async function archiveSearch(args: ArchiveSearchArgs): Promise<string> {
|
|||||||
for (const id of finalIds) {
|
for (const id of finalIds) {
|
||||||
const a = vectorRows.get(id) || keywordRows.get(id);
|
const a = vectorRows.get(id) || keywordRows.get(id);
|
||||||
if (!a) continue;
|
if (!a) continue;
|
||||||
const sim = vectorRows.has(id) ? ` (${Math.round((vectorRows.get(id)!).similarity * 100)}% match)` : '';
|
const simParts: string[] = [];
|
||||||
|
if (vectorRows.has(id)) simParts.push(`${Math.round((vectorRows.get(id)!).similarity * 100)}% match`);
|
||||||
|
if (rerankScores?.has(id)) simParts.push(`rerank: ${rerankScores.get(id)!.toFixed(2)}`);
|
||||||
|
const scores = simParts.length > 0 ? ` (${simParts.join(', ')})` : '';
|
||||||
const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : '';
|
const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : '';
|
||||||
lines.push(`**[${a.archive_type}]** ${a.title}${sim}`);
|
lines.push(`**[${a.archive_type}]** ${a.title}${scores}`);
|
||||||
lines.push(` Archived: ${a.archived_at}${sizeStr}`);
|
lines.push(` Archived: ${a.archived_at}${sizeStr}`);
|
||||||
if (a.original_path) {
|
if (a.original_path) {
|
||||||
lines.push(` Path: ${a.original_path}`);
|
lines.push(` Path: ${a.original_path}`);
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
// Replaces file-based CLAUDE.md and plan files with database storage
|
// Replaces file-based CLAUDE.md and plan files with database storage
|
||||||
|
|
||||||
import { query, queryOne, execute } from '../db.js';
|
import { query, queryOne, execute } from '../db.js';
|
||||||
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js';
|
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js';
|
||||||
import { getSessionId } from './session-id.js';
|
import { getSessionId } from './session-id.js';
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@@ -545,7 +545,19 @@ export async function sessionSemanticSearch(args: SessionSemanticSearchArgs): Pr
|
|||||||
|
|
||||||
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
||||||
const merged = rrfMerge(vectorIds, keywordIds);
|
const merged = rrfMerge(vectorIds, keywordIds);
|
||||||
finalIds = merged.slice(0, limit).map(m => m.id as string);
|
finalIds = merged.map(m => m.id as string);
|
||||||
|
|
||||||
|
// Cross-encoder re-ranking (CF-1317)
|
||||||
|
const docs = finalIds.map(id => {
|
||||||
|
const r = vectorRows.get(id) || keywordRows.get(id);
|
||||||
|
return r?.summary || '';
|
||||||
|
});
|
||||||
|
const reranked = await rerank(searchQuery, docs, limit);
|
||||||
|
if (reranked) {
|
||||||
|
finalIds = reranked.map(r => finalIds[r.index]);
|
||||||
|
} else {
|
||||||
|
finalIds = finalIds.slice(0, limit);
|
||||||
|
}
|
||||||
} else if (vectorIds.length > 0) {
|
} else if (vectorIds.length > 0) {
|
||||||
finalIds = vectorIds;
|
finalIds = vectorIds;
|
||||||
} else if (keywordIds.length > 0) {
|
} else if (keywordIds.length > 0) {
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
// Sessions auto-create CF Jira issues and post output on close (CF-762)
|
// Sessions auto-create CF Jira issues and post output on close (CF-762)
|
||||||
|
|
||||||
import { query, queryOne, execute } from '../db.js';
|
import { query, queryOne, execute } from '../db.js';
|
||||||
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js';
|
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js';
|
||||||
import { createSessionIssue, addComment, transitionToDone, updateIssueDescription } from '../services/jira.js';
|
import { createSessionIssue, addComment, transitionToDone, updateIssueDescription } from '../services/jira.js';
|
||||||
|
|
||||||
interface SessionStartArgs {
|
interface SessionStartArgs {
|
||||||
@@ -412,10 +412,30 @@ export async function sessionSearch(args: SessionSearchArgs): Promise<string> {
|
|||||||
let finalIds: string[];
|
let finalIds: string[];
|
||||||
let searchLabel: string;
|
let searchLabel: string;
|
||||||
|
|
||||||
|
let rerankScores: Map<string, number> | null = null;
|
||||||
|
|
||||||
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
||||||
const merged = rrfMerge(vectorIds, keywordIds);
|
const merged = rrfMerge(vectorIds, keywordIds);
|
||||||
finalIds = merged.slice(0, limit).map(m => m.id as string);
|
finalIds = merged.map(m => m.id as string);
|
||||||
searchLabel = 'hybrid';
|
searchLabel = 'hybrid';
|
||||||
|
|
||||||
|
// Cross-encoder re-ranking (CF-1317)
|
||||||
|
const docs = finalIds.map(id => {
|
||||||
|
const r = vectorRows.get(id) || keywordRows.get(id);
|
||||||
|
return (r as any)?.summary || '';
|
||||||
|
});
|
||||||
|
const reranked = await rerank(searchQuery, docs, limit);
|
||||||
|
if (reranked) {
|
||||||
|
rerankScores = new Map();
|
||||||
|
const reorderedIds = reranked.map(r => {
|
||||||
|
rerankScores!.set(finalIds[r.index], r.relevance_score);
|
||||||
|
return finalIds[r.index];
|
||||||
|
});
|
||||||
|
finalIds = reorderedIds;
|
||||||
|
searchLabel = 'hybrid+rerank';
|
||||||
|
} else {
|
||||||
|
finalIds = finalIds.slice(0, limit);
|
||||||
|
}
|
||||||
} else if (vectorIds.length > 0) {
|
} else if (vectorIds.length > 0) {
|
||||||
finalIds = vectorIds;
|
finalIds = vectorIds;
|
||||||
searchLabel = 'vector';
|
searchLabel = 'vector';
|
||||||
@@ -431,10 +451,13 @@ export async function sessionSearch(args: SessionSearchArgs): Promise<string> {
|
|||||||
for (const id of finalIds) {
|
for (const id of finalIds) {
|
||||||
const s = vectorRows.get(id) || keywordRows.get(id);
|
const s = vectorRows.get(id) || keywordRows.get(id);
|
||||||
if (!s) continue;
|
if (!s) continue;
|
||||||
const sim = vectorRows.has(id) ? ` (${Math.round((vectorRows.get(id)!).similarity * 100)}% match)` : '';
|
const simParts: string[] = [];
|
||||||
|
if (vectorRows.has(id)) simParts.push(`${Math.round((vectorRows.get(id)!).similarity * 100)}% match`);
|
||||||
|
if (rerankScores?.has(id)) simParts.push(`rerank: ${rerankScores.get(id)!.toFixed(2)}`);
|
||||||
|
const scores = simParts.length > 0 ? ` (${simParts.join(', ')})` : '';
|
||||||
const num = s.session_number ? `#${s.session_number}` : '';
|
const num = s.session_number ? `#${s.session_number}` : '';
|
||||||
const duration = s.duration_minutes ? `(${s.duration_minutes}m)` : '';
|
const duration = s.duration_minutes ? `(${s.duration_minutes}m)` : '';
|
||||||
lines.push(`**${s.project} ${num}** ${duration}${sim}`);
|
lines.push(`**${s.project} ${num}** ${duration}${scores}`);
|
||||||
lines.push(` ${s.summary || 'No summary'}`);
|
lines.push(` ${s.summary || 'No summary'}`);
|
||||||
lines.push('');
|
lines.push('');
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user