diff --git a/.env.example b/.env.example index b425d21..7f22924 100644 --- a/.env.example +++ b/.env.example @@ -8,6 +8,10 @@ POSTGRES_PORT=6432 LLM_API_URL=https://api.agiliton.cloud/llm LLM_API_KEY=your_llm_api_key_here +# Cross-encoder re-ranking (CF-1317) +RERANK_ENABLED=false +RERANK_MODEL=rerank-v3.5 + # Jira Cloud (session tracking) JIRA_URL=https://agiliton.atlassian.net JIRA_USERNAME=your_email@agiliton.eu diff --git a/src/embeddings.ts b/src/embeddings.ts index ecd86a5..baa6790 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -68,6 +68,58 @@ export function formatEmbedding(embedding: number[]): string { return `[${embedding.join(',')}]`; } +/** + * Cross-encoder re-ranking via LiteLLM /rerank endpoint (CF-1317) + * Calls Cohere-compatible rerank API to reorder candidates by relevance. + * Returns null on failure (caller falls back to original order). + */ +export interface RerankResult { + index: number; + relevance_score: number; +} + +export async function rerank( + query: string, + documents: string[], + topN?: number +): Promise { + if (process.env.RERANK_ENABLED !== 'true') return null; + if (documents.length === 0) return null; + + const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm'; + const LLM_API_KEY = process.env.LLM_API_KEY || ''; + const model = process.env.RERANK_MODEL || 'rerank-v3.5'; + + if (!LLM_API_KEY) return null; + + try { + const response = await fetch(`${LLM_API_URL}/v1/rerank`, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${LLM_API_KEY}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model, + query, + documents, + top_n: topN || documents.length, + }), + }); + + if (!response.ok) { + console.error('Rerank API error:', response.status, await response.text()); + return null; + } + + const data = await response.json() as { results: RerankResult[] }; + return data.results || null; + } catch (error) { + console.error('Rerank failed (falling back to RRF order):', error); + return null; + } +} + /** * Reciprocal Rank Fusion — merge two ranked result lists (CF-1315) * @param vectorResults IDs ranked by vector similarity (best first) diff --git a/src/tools/archives.ts b/src/tools/archives.ts index d8aabaf..724440b 100644 --- a/src/tools/archives.ts +++ b/src/tools/archives.ts @@ -1,7 +1,7 @@ // Project archives operations for database-backed archival import { query, queryOne, execute } from '../db.js'; -import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js'; +import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js'; type ArchiveType = 'session' | 'research' | 'audit' | 'investigation' | 'completed' | 'migration'; @@ -193,10 +193,30 @@ export async function archiveSearch(args: ArchiveSearchArgs): Promise { let finalIds: number[]; let searchLabel: string; + let rerankScores: Map | null = null; + if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) { const merged = rrfMerge(vectorIds, keywordIds); - finalIds = merged.slice(0, limit).map(m => m.id as number); + finalIds = merged.map(m => m.id as number); searchLabel = 'hybrid'; + + // Cross-encoder re-ranking (CF-1317) + const docs = finalIds.map(id => { + const r = vectorRows.get(id) || keywordRows.get(id); + return (r as any)?.title || ''; + }); + const reranked = await rerank(searchQuery, docs, limit); + if (reranked) { + rerankScores = new Map(); + const reorderedIds = reranked.map(r => { + rerankScores!.set(finalIds[r.index], r.relevance_score); + return finalIds[r.index]; + }); + finalIds = reorderedIds; + searchLabel = 'hybrid+rerank'; + } else { + finalIds = finalIds.slice(0, limit); + } } else if (vectorIds.length > 0) { finalIds = vectorIds; searchLabel = 'vector'; @@ -212,9 +232,12 @@ export async function archiveSearch(args: ArchiveSearchArgs): Promise { for (const id of finalIds) { const a = vectorRows.get(id) || keywordRows.get(id); if (!a) continue; - const sim = vectorRows.has(id) ? ` (${Math.round((vectorRows.get(id)!).similarity * 100)}% match)` : ''; + const simParts: string[] = []; + if (vectorRows.has(id)) simParts.push(`${Math.round((vectorRows.get(id)!).similarity * 100)}% match`); + if (rerankScores?.has(id)) simParts.push(`rerank: ${rerankScores.get(id)!.toFixed(2)}`); + const scores = simParts.length > 0 ? ` (${simParts.join(', ')})` : ''; const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : ''; - lines.push(`**[${a.archive_type}]** ${a.title}${sim}`); + lines.push(`**[${a.archive_type}]** ${a.title}${scores}`); lines.push(` Archived: ${a.archived_at}${sizeStr}`); if (a.original_path) { lines.push(` Path: ${a.original_path}`); diff --git a/src/tools/session-docs.ts b/src/tools/session-docs.ts index 07a65c0..e8faa70 100644 --- a/src/tools/session-docs.ts +++ b/src/tools/session-docs.ts @@ -2,7 +2,7 @@ // Replaces file-based CLAUDE.md and plan files with database storage import { query, queryOne, execute } from '../db.js'; -import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js'; +import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js'; import { getSessionId } from './session-id.js'; // ============================================================================ @@ -545,7 +545,19 @@ export async function sessionSemanticSearch(args: SessionSemanticSearchArgs): Pr if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) { const merged = rrfMerge(vectorIds, keywordIds); - finalIds = merged.slice(0, limit).map(m => m.id as string); + finalIds = merged.map(m => m.id as string); + + // Cross-encoder re-ranking (CF-1317) + const docs = finalIds.map(id => { + const r = vectorRows.get(id) || keywordRows.get(id); + return r?.summary || ''; + }); + const reranked = await rerank(searchQuery, docs, limit); + if (reranked) { + finalIds = reranked.map(r => finalIds[r.index]); + } else { + finalIds = finalIds.slice(0, limit); + } } else if (vectorIds.length > 0) { finalIds = vectorIds; } else if (keywordIds.length > 0) { diff --git a/src/tools/sessions.ts b/src/tools/sessions.ts index 7d5c0b3..fc29dfb 100644 --- a/src/tools/sessions.ts +++ b/src/tools/sessions.ts @@ -2,7 +2,7 @@ // Sessions auto-create CF Jira issues and post output on close (CF-762) import { query, queryOne, execute } from '../db.js'; -import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge } from '../embeddings.js'; +import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js'; import { createSessionIssue, addComment, transitionToDone, updateIssueDescription } from '../services/jira.js'; interface SessionStartArgs { @@ -412,10 +412,30 @@ export async function sessionSearch(args: SessionSearchArgs): Promise { let finalIds: string[]; let searchLabel: string; + let rerankScores: Map | null = null; + if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) { const merged = rrfMerge(vectorIds, keywordIds); - finalIds = merged.slice(0, limit).map(m => m.id as string); + finalIds = merged.map(m => m.id as string); searchLabel = 'hybrid'; + + // Cross-encoder re-ranking (CF-1317) + const docs = finalIds.map(id => { + const r = vectorRows.get(id) || keywordRows.get(id); + return (r as any)?.summary || ''; + }); + const reranked = await rerank(searchQuery, docs, limit); + if (reranked) { + rerankScores = new Map(); + const reorderedIds = reranked.map(r => { + rerankScores!.set(finalIds[r.index], r.relevance_score); + return finalIds[r.index]; + }); + finalIds = reorderedIds; + searchLabel = 'hybrid+rerank'; + } else { + finalIds = finalIds.slice(0, limit); + } } else if (vectorIds.length > 0) { finalIds = vectorIds; searchLabel = 'vector'; @@ -431,10 +451,13 @@ export async function sessionSearch(args: SessionSearchArgs): Promise { for (const id of finalIds) { const s = vectorRows.get(id) || keywordRows.get(id); if (!s) continue; - const sim = vectorRows.has(id) ? ` (${Math.round((vectorRows.get(id)!).similarity * 100)}% match)` : ''; + const simParts: string[] = []; + if (vectorRows.has(id)) simParts.push(`${Math.round((vectorRows.get(id)!).similarity * 100)}% match`); + if (rerankScores?.has(id)) simParts.push(`rerank: ${rerankScores.get(id)!.toFixed(2)}`); + const scores = simParts.length > 0 ? ` (${simParts.join(', ')})` : ''; const num = s.session_number ? `#${s.session_number}` : ''; const duration = s.duration_minutes ? `(${s.duration_minutes}m)` : ''; - lines.push(`**${s.project} ${num}** ${duration}${sim}`); + lines.push(`**${s.project} ${num}** ${duration}${scores}`); lines.push(` ${s.summary || 'No summary'}`); lines.push(''); }