// Project archives operations for database-backed archival import { query, queryOne, execute } from '../db.js'; import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js'; type ArchiveType = 'session' | 'research' | 'audit' | 'investigation' | 'completed' | 'migration'; interface Archive { id: number; project_key: string; archive_type: ArchiveType; title: string; content: string; original_path: string | null; file_size: number | null; archived_at: string; archived_by_session: string | null; metadata: Record; created_at: string; updated_at: string; } interface ArchiveAddArgs { project: string; archive_type: ArchiveType; title: string; content: string; original_path?: string; file_size?: number; archived_by_session?: string; metadata?: Record; } type SearchMode = 'hybrid' | 'vector' | 'keyword'; interface ArchiveSearchArgs { query: string; project?: string; archive_type?: ArchiveType; limit?: number; search_mode?: SearchMode; } interface ArchiveListArgs { project?: string; archive_type?: ArchiveType; since?: string; limit?: number; } interface ArchiveGetArgs { id: number; } /** * Verify project exists */ async function verifyProject(projectKey: string): Promise { const result = await queryOne<{ key: string }>( 'SELECT key FROM projects WHERE key = $1', [projectKey] ); return !!result; } /** * Add a new archive entry */ export async function archiveAdd(args: ArchiveAddArgs): Promise { const { project, archive_type, title, content, original_path, file_size, archived_by_session, metadata } = args; // Verify project exists const exists = await verifyProject(project); if (!exists) { return `Error: Project not found: ${project}`; } // CF-1314: Hash content for dedup before embedding API call const embedText = `${title}. ${content.substring(0, 1000)}`; const contentHash = generateContentHash(embedText); const existing = await queryOne<{ id: number }>( 'SELECT id FROM project_archives WHERE content_hash = $1 AND project_key = $2 LIMIT 1', [contentHash, project] ); if (existing) { return `Archive already exists (id: ${existing.id}): [${archive_type}] ${title}`; } // Generate embedding for semantic search const embedding = await getEmbedding(embedText); const embeddingValue = embedding ? formatEmbedding(embedding) : null; await execute( `INSERT INTO project_archives (project_key, archive_type, title, content, original_path, file_size, archived_by_session, metadata, embedding, content_hash) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`, [ project, archive_type, title, content, original_path || null, file_size || null, archived_by_session || null, JSON.stringify(metadata || {}), embeddingValue, contentHash ] ); const sizeStr = file_size ? ` (${Math.round(file_size / 1024)}KB)` : ''; return `Archived: [${archive_type}] ${title}${sizeStr}`; } /** * Search archives with hybrid (vector + keyword), vector-only, or keyword-only mode (CF-1315) */ export async function archiveSearch(args: ArchiveSearchArgs): Promise { const { query: searchQuery, project, archive_type, limit = 5, search_mode = 'hybrid' } = args; // Build shared filter clause const buildFilter = (startIdx: number) => { let where = ''; const params: unknown[] = []; let idx = startIdx; if (project) { where += ` AND project_key = $${idx++}`; params.push(project); } if (archive_type) { where += ` AND archive_type = $${idx++}`; params.push(archive_type); } return { where, params, nextIdx: idx }; }; // Vector search let vectorIds: number[] = []; let vectorRows: Map = new Map(); let embeddingFailed = false; if (search_mode !== 'keyword') { const embedding = await getEmbedding(searchQuery); if (embedding) { const embeddingStr = formatEmbedding(embedding); const filter = buildFilter(3); const params: unknown[] = [embeddingStr, limit, ...filter.params]; const rows = await query( `SELECT id, archive_type, title, original_path, file_size, to_char(archived_at, 'YYYY-MM-DD') as archived_at, 1 - (embedding <=> $1) as similarity FROM project_archives WHERE embedding IS NOT NULL${filter.where} ORDER BY embedding <=> $1 LIMIT $2`, params ); vectorIds = rows.map(r => r.id); for (const r of rows) vectorRows.set(r.id, r); } else { embeddingFailed = true; if (search_mode === 'vector') { return 'Error: Could not generate embedding for vector search'; } } } // Keyword search let keywordIds: number[] = []; let keywordRows: Map = new Map(); if (search_mode !== 'vector') { const filter = buildFilter(3); const params: unknown[] = [searchQuery, limit, ...filter.params]; const rows = await query( `SELECT id, archive_type, title, original_path, file_size, to_char(archived_at, 'YYYY-MM-DD') as archived_at, ts_rank(search_vector, plainto_tsquery('english', $1)) as rank FROM project_archives WHERE search_vector @@ plainto_tsquery('english', $1)${filter.where} ORDER BY rank DESC LIMIT $2`, params ); keywordIds = rows.map(r => r.id); for (const r of rows) keywordRows.set(r.id, r); } // Merge results let finalIds: number[]; let searchLabel: string; let rerankScores: Map | null = null; if (search_mode === 'hybrid' && vectorIds.length > 0 && keywordIds.length > 0) { const merged = rrfMerge(vectorIds, keywordIds); finalIds = merged.map(m => m.id as number); searchLabel = 'hybrid'; // Cross-encoder re-ranking (CF-1317) const docs = finalIds.map(id => { const r = vectorRows.get(id) || keywordRows.get(id); return (r as any)?.title || ''; }); const reranked = await rerank(searchQuery, docs, limit); if (reranked) { rerankScores = new Map(); const reorderedIds = reranked.map(r => { rerankScores!.set(finalIds[r.index], r.relevance_score); return finalIds[r.index]; }); finalIds = reorderedIds; searchLabel = 'hybrid+rerank'; } else { finalIds = finalIds.slice(0, limit); } } else if (vectorIds.length > 0) { finalIds = vectorIds; searchLabel = 'vector'; } else if (keywordIds.length > 0) { finalIds = keywordIds; searchLabel = embeddingFailed ? 'keyword (embedding unavailable)' : 'keyword'; } else { return 'No relevant archives found'; } // Format output const lines = [`Relevant archives (${searchLabel}):\n`]; for (const id of finalIds) { const a = vectorRows.get(id) || keywordRows.get(id); if (!a) continue; const simParts: string[] = []; if (vectorRows.has(id)) simParts.push(`${Math.round((vectorRows.get(id)!).similarity * 100)}% match`); if (rerankScores?.has(id)) simParts.push(`rerank: ${rerankScores.get(id)!.toFixed(2)}`); const scores = simParts.length > 0 ? ` (${simParts.join(', ')})` : ''; const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : ''; lines.push(`**[${a.archive_type}]** ${a.title}${scores}`); lines.push(` Archived: ${a.archived_at}${sizeStr}`); if (a.original_path) { lines.push(` Path: ${a.original_path}`); } lines.push(''); } return lines.join('\n'); } /** * List archives (non-semantic) */ export async function archiveList(args: ArchiveListArgs): Promise { const { project, archive_type, since, limit = 20 } = args; let whereClause = 'WHERE 1=1'; const params: unknown[] = []; let paramIndex = 1; if (project) { whereClause += ` AND project_key = $${paramIndex++}`; params.push(project); } if (archive_type) { whereClause += ` AND archive_type = $${paramIndex++}`; params.push(archive_type); } if (since) { whereClause += ` AND archived_at >= $${paramIndex++}`; params.push(since); } params.push(limit); const archives = await query( `SELECT id, archive_type, title, original_path, file_size, to_char(archived_at, 'YYYY-MM-DD') as archived_at FROM project_archives ${whereClause} ORDER BY archived_at DESC LIMIT $${paramIndex}`, params ); if (archives.length === 0) { return `No archives found${project ? ` for project ${project}` : ''}`; } const lines = [`Archives${project ? ` (${project})` : ''}:\n`]; for (const a of archives) { const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : ''; lines.push(`• [${a.archive_type}] ${a.title} - ${a.archived_at}${sizeStr}`); if (a.original_path) { lines.push(` ${a.original_path}`); } } return lines.join('\n'); } /** * Get specific archive by ID */ export async function archiveGet(args: ArchiveGetArgs): Promise { const archive = await queryOne( `SELECT id, project_key, archive_type, title, content, original_path, file_size, to_char(archived_at, 'YYYY-MM-DD') as archived_at, archived_by_session, metadata FROM project_archives WHERE id = $1`, [args.id] ); if (!archive) { return `Archive not found: ${args.id}`; } const sizeStr = archive.file_size ? ` (${Math.round(archive.file_size / 1024)}KB)` : ''; const lines = [ `# Archive #${archive.id}\n`, `**Type:** ${archive.archive_type}`, `**Title:** ${archive.title}`, `**Archived:** ${archive.archived_at}${sizeStr}`, ]; if (archive.original_path) { lines.push(`**Original Path:** ${archive.original_path}`); } if (archive.archived_by_session) { lines.push(`**Session:** ${archive.archived_by_session}`); } lines.push('\n---\n'); lines.push(archive.content); return lines.join('\n'); }