Add rerank() function calling LiteLLM /v1/rerank endpoint (Cohere-compatible). Plugged into all 3 search functions (sessions, session-docs, archives) after RRF merge. Disabled by default via RERANK_ENABLED env var. Graceful fallback to RRF-only ranking on API failure. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
339 lines
10 KiB
TypeScript
339 lines
10 KiB
TypeScript
// Project archives operations for database-backed archival
|
|
|
|
import { query, queryOne, execute } from '../db.js';
|
|
import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js';
|
|
|
|
type ArchiveType = 'session' | 'research' | 'audit' | 'investigation' | 'completed' | 'migration';
|
|
|
|
interface Archive {
|
|
id: number;
|
|
project_key: string;
|
|
archive_type: ArchiveType;
|
|
title: string;
|
|
content: string;
|
|
original_path: string | null;
|
|
file_size: number | null;
|
|
archived_at: string;
|
|
archived_by_session: string | null;
|
|
metadata: Record<string, unknown>;
|
|
created_at: string;
|
|
updated_at: string;
|
|
}
|
|
|
|
interface ArchiveAddArgs {
|
|
project: string;
|
|
archive_type: ArchiveType;
|
|
title: string;
|
|
content: string;
|
|
original_path?: string;
|
|
file_size?: number;
|
|
archived_by_session?: string;
|
|
metadata?: Record<string, unknown>;
|
|
}
|
|
|
|
type SearchMode = 'hybrid' | 'vector' | 'keyword';
|
|
|
|
interface ArchiveSearchArgs {
|
|
query: string;
|
|
project?: string;
|
|
archive_type?: ArchiveType;
|
|
limit?: number;
|
|
search_mode?: SearchMode;
|
|
}
|
|
|
|
interface ArchiveListArgs {
|
|
project?: string;
|
|
archive_type?: ArchiveType;
|
|
since?: string;
|
|
limit?: number;
|
|
}
|
|
|
|
interface ArchiveGetArgs {
|
|
id: number;
|
|
}
|
|
|
|
/**
|
|
* Verify project exists
|
|
*/
|
|
async function verifyProject(projectKey: string): Promise<boolean> {
|
|
const result = await queryOne<{ key: string }>(
|
|
'SELECT key FROM projects WHERE key = $1',
|
|
[projectKey]
|
|
);
|
|
return !!result;
|
|
}
|
|
|
|
/**
|
|
* Add a new archive entry
|
|
*/
|
|
export async function archiveAdd(args: ArchiveAddArgs): Promise<string> {
|
|
const { project, archive_type, title, content, original_path, file_size, archived_by_session, metadata } = args;
|
|
|
|
// Verify project exists
|
|
const exists = await verifyProject(project);
|
|
if (!exists) {
|
|
return `Error: Project not found: ${project}`;
|
|
}
|
|
|
|
// CF-1314: Hash content for dedup before embedding API call
|
|
const embedText = `${title}. ${content.substring(0, 1000)}`;
|
|
const contentHash = generateContentHash(embedText);
|
|
|
|
const existing = await queryOne<{ id: number }>(
|
|
'SELECT id FROM project_archives WHERE content_hash = $1 AND project_key = $2 LIMIT 1',
|
|
[contentHash, project]
|
|
);
|
|
if (existing) {
|
|
return `Archive already exists (id: ${existing.id}): [${archive_type}] ${title}`;
|
|
}
|
|
|
|
// Generate embedding for semantic search
|
|
const embedding = await getEmbedding(embedText);
|
|
const embeddingValue = embedding ? formatEmbedding(embedding) : null;
|
|
|
|
await execute(
|
|
`INSERT INTO project_archives
|
|
(project_key, archive_type, title, content, original_path, file_size, archived_by_session, metadata, embedding, content_hash)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`,
|
|
[
|
|
project,
|
|
archive_type,
|
|
title,
|
|
content,
|
|
original_path || null,
|
|
file_size || null,
|
|
archived_by_session || null,
|
|
JSON.stringify(metadata || {}),
|
|
embeddingValue,
|
|
contentHash
|
|
]
|
|
);
|
|
|
|
const sizeStr = file_size ? ` (${Math.round(file_size / 1024)}KB)` : '';
|
|
return `Archived: [${archive_type}] ${title}${sizeStr}`;
|
|
}
|
|
|
|
/**
|
|
* Search archives with hybrid (vector + keyword), vector-only, or keyword-only mode (CF-1315)
|
|
*/
|
|
export async function archiveSearch(args: ArchiveSearchArgs): Promise<string> {
|
|
const { query: searchQuery, project, archive_type, limit = 5, search_mode = 'hybrid' } = args;
|
|
|
|
// Build shared filter clause
|
|
const buildFilter = (startIdx: number) => {
|
|
let where = '';
|
|
const params: unknown[] = [];
|
|
let idx = startIdx;
|
|
if (project) {
|
|
where += ` AND project_key = $${idx++}`;
|
|
params.push(project);
|
|
}
|
|
if (archive_type) {
|
|
where += ` AND archive_type = $${idx++}`;
|
|
params.push(archive_type);
|
|
}
|
|
return { where, params, nextIdx: idx };
|
|
};
|
|
|
|
// Vector search
|
|
let vectorIds: number[] = [];
|
|
let vectorRows: Map<number, Archive & { similarity: number }> = new Map();
|
|
let embeddingFailed = false;
|
|
|
|
if (search_mode !== 'keyword') {
|
|
const embedding = await getEmbedding(searchQuery);
|
|
if (embedding) {
|
|
const embeddingStr = formatEmbedding(embedding);
|
|
const filter = buildFilter(3);
|
|
const params: unknown[] = [embeddingStr, limit, ...filter.params];
|
|
|
|
const rows = await query<Archive & { similarity: number }>(
|
|
`SELECT id, archive_type, title, original_path, file_size,
|
|
to_char(archived_at, 'YYYY-MM-DD') as archived_at,
|
|
1 - (embedding <=> $1) as similarity
|
|
FROM project_archives
|
|
WHERE embedding IS NOT NULL${filter.where}
|
|
ORDER BY embedding <=> $1
|
|
LIMIT $2`,
|
|
params
|
|
);
|
|
vectorIds = rows.map(r => r.id);
|
|
for (const r of rows) vectorRows.set(r.id, r);
|
|
} else {
|
|
embeddingFailed = true;
|
|
if (search_mode === 'vector') {
|
|
return 'Error: Could not generate embedding for vector search';
|
|
}
|
|
}
|
|
}
|
|
|
|
// Keyword search
|
|
let keywordIds: number[] = [];
|
|
let keywordRows: Map<number, Archive & { rank: number }> = new Map();
|
|
|
|
if (search_mode !== 'vector') {
|
|
const filter = buildFilter(3);
|
|
const params: unknown[] = [searchQuery, limit, ...filter.params];
|
|
|
|
const rows = await query<Archive & { rank: number }>(
|
|
`SELECT id, archive_type, title, original_path, file_size,
|
|
to_char(archived_at, 'YYYY-MM-DD') as archived_at,
|
|
ts_rank(search_vector, plainto_tsquery('english', $1)) as rank
|
|
FROM project_archives
|
|
WHERE search_vector @@ plainto_tsquery('english', $1)${filter.where}
|
|
ORDER BY rank DESC
|
|
LIMIT $2`,
|
|
params
|
|
);
|
|
keywordIds = rows.map(r => r.id);
|
|
for (const r of rows) keywordRows.set(r.id, r);
|
|
}
|
|
|
|
// Merge results
|
|
let finalIds: number[];
|
|
let searchLabel: string;
|
|
|
|
let rerankScores: Map<number, number> | null = null;
|
|
|
|
if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) {
|
|
const merged = rrfMerge(vectorIds, keywordIds);
|
|
finalIds = merged.map(m => m.id as number);
|
|
searchLabel = 'hybrid';
|
|
|
|
// Cross-encoder re-ranking (CF-1317)
|
|
const docs = finalIds.map(id => {
|
|
const r = vectorRows.get(id) || keywordRows.get(id);
|
|
return (r as any)?.title || '';
|
|
});
|
|
const reranked = await rerank(searchQuery, docs, limit);
|
|
if (reranked) {
|
|
rerankScores = new Map();
|
|
const reorderedIds = reranked.map(r => {
|
|
rerankScores!.set(finalIds[r.index], r.relevance_score);
|
|
return finalIds[r.index];
|
|
});
|
|
finalIds = reorderedIds;
|
|
searchLabel = 'hybrid+rerank';
|
|
} else {
|
|
finalIds = finalIds.slice(0, limit);
|
|
}
|
|
} else if (vectorIds.length > 0) {
|
|
finalIds = vectorIds;
|
|
searchLabel = 'vector';
|
|
} else if (keywordIds.length > 0) {
|
|
finalIds = keywordIds;
|
|
searchLabel = embeddingFailed ? 'keyword (embedding unavailable)' : 'keyword';
|
|
} else {
|
|
return 'No relevant archives found';
|
|
}
|
|
|
|
// Format output
|
|
const lines = [`Relevant archives (${searchLabel}):\n`];
|
|
for (const id of finalIds) {
|
|
const a = vectorRows.get(id) || keywordRows.get(id);
|
|
if (!a) continue;
|
|
const simParts: string[] = [];
|
|
if (vectorRows.has(id)) simParts.push(`${Math.round((vectorRows.get(id)!).similarity * 100)}% match`);
|
|
if (rerankScores?.has(id)) simParts.push(`rerank: ${rerankScores.get(id)!.toFixed(2)}`);
|
|
const scores = simParts.length > 0 ? ` (${simParts.join(', ')})` : '';
|
|
const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : '';
|
|
lines.push(`**[${a.archive_type}]** ${a.title}${scores}`);
|
|
lines.push(` Archived: ${a.archived_at}${sizeStr}`);
|
|
if (a.original_path) {
|
|
lines.push(` Path: ${a.original_path}`);
|
|
}
|
|
lines.push('');
|
|
}
|
|
|
|
return lines.join('\n');
|
|
}
|
|
|
|
/**
|
|
* List archives (non-semantic)
|
|
*/
|
|
export async function archiveList(args: ArchiveListArgs): Promise<string> {
|
|
const { project, archive_type, since, limit = 20 } = args;
|
|
|
|
let whereClause = 'WHERE 1=1';
|
|
const params: unknown[] = [];
|
|
let paramIndex = 1;
|
|
|
|
if (project) {
|
|
whereClause += ` AND project_key = $${paramIndex++}`;
|
|
params.push(project);
|
|
}
|
|
if (archive_type) {
|
|
whereClause += ` AND archive_type = $${paramIndex++}`;
|
|
params.push(archive_type);
|
|
}
|
|
if (since) {
|
|
whereClause += ` AND archived_at >= $${paramIndex++}`;
|
|
params.push(since);
|
|
}
|
|
|
|
params.push(limit);
|
|
|
|
const archives = await query<Archive>(
|
|
`SELECT id, archive_type, title, original_path, file_size,
|
|
to_char(archived_at, 'YYYY-MM-DD') as archived_at
|
|
FROM project_archives
|
|
${whereClause}
|
|
ORDER BY archived_at DESC
|
|
LIMIT $${paramIndex}`,
|
|
params
|
|
);
|
|
|
|
if (archives.length === 0) {
|
|
return `No archives found${project ? ` for project ${project}` : ''}`;
|
|
}
|
|
|
|
const lines = [`Archives${project ? ` (${project})` : ''}:\n`];
|
|
for (const a of archives) {
|
|
const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : '';
|
|
lines.push(`• [${a.archive_type}] ${a.title} - ${a.archived_at}${sizeStr}`);
|
|
if (a.original_path) {
|
|
lines.push(` ${a.original_path}`);
|
|
}
|
|
}
|
|
|
|
return lines.join('\n');
|
|
}
|
|
|
|
/**
|
|
* Get specific archive by ID
|
|
*/
|
|
export async function archiveGet(args: ArchiveGetArgs): Promise<string> {
|
|
const archive = await queryOne<Archive>(
|
|
`SELECT id, project_key, archive_type, title, content, original_path, file_size,
|
|
to_char(archived_at, 'YYYY-MM-DD') as archived_at,
|
|
archived_by_session, metadata
|
|
FROM project_archives
|
|
WHERE id = $1`,
|
|
[args.id]
|
|
);
|
|
|
|
if (!archive) {
|
|
return `Archive not found: ${args.id}`;
|
|
}
|
|
|
|
const sizeStr = archive.file_size ? ` (${Math.round(archive.file_size / 1024)}KB)` : '';
|
|
const lines = [
|
|
`# Archive #${archive.id}\n`,
|
|
`**Type:** ${archive.archive_type}`,
|
|
`**Title:** ${archive.title}`,
|
|
`**Archived:** ${archive.archived_at}${sizeStr}`,
|
|
];
|
|
|
|
if (archive.original_path) {
|
|
lines.push(`**Original Path:** ${archive.original_path}`);
|
|
}
|
|
if (archive.archived_by_session) {
|
|
lines.push(`**Session:** ${archive.archived_by_session}`);
|
|
}
|
|
|
|
lines.push('\n---\n');
|
|
lines.push(archive.content);
|
|
|
|
return lines.join('\n');
|
|
}
|