From 1231835e02a31a5a3f0ed8fb5845295eb07eb71d Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Mon, 19 Jan 2026 11:38:48 +0200 Subject: [PATCH] Add project_archives table and MCP tools (CF-264) - Created migration 009: project_archives table with semantic search - Implemented archives.ts: archiveAdd, archiveSearch, archiveList, archiveGet - Registered archive tools in index.ts and tools/index.ts - Archive types: session, research, audit, investigation, completed, migration - Uses project_key (TEXT) FK to projects table - Tested: archive_add and archive_list working correctly Replaces filesystem archives with database-backed storage. Eliminates context pollution from Glob/Grep operations. Task: CF-264 Session: session_20260119111342_66de546b Co-Authored-By: Claude Sonnet 4.5 --- migrations/009_create_project_archives.sql | 33 +++ src/index.ts | 36 +++ src/tools/archives.ts | 264 +++++++++++++++++++++ src/tools/index.ts | 58 +++++ 4 files changed, 391 insertions(+) create mode 100644 migrations/009_create_project_archives.sql create mode 100644 src/tools/archives.ts diff --git a/migrations/009_create_project_archives.sql b/migrations/009_create_project_archives.sql new file mode 100644 index 0000000..43b0da6 --- /dev/null +++ b/migrations/009_create_project_archives.sql @@ -0,0 +1,33 @@ +-- Create project_archives table for database-backed archival system +-- Replaces filesystem archives with searchable, semantic-enabled storage + +-- Drop if exists to recreate with correct schema +DROP TABLE IF EXISTS project_archives; + +CREATE TABLE project_archives ( + id SERIAL PRIMARY KEY, + project_key TEXT NOT NULL REFERENCES projects(key), + archive_type VARCHAR(50) NOT NULL, + title VARCHAR(500) NOT NULL, + content TEXT NOT NULL, + original_path VARCHAR(1000), + file_size INTEGER, + archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + archived_by_session VARCHAR(100), + metadata JSONB DEFAULT '{}', + embedding vector(1536), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- Indexes for performance +CREATE INDEX idx_archives_project ON project_archives(project_key); +CREATE INDEX idx_archives_type ON project_archives(archive_type); +CREATE INDEX idx_archives_archived_at ON project_archives(archived_at); +CREATE INDEX idx_archives_embedding ON project_archives USING ivfflat (embedding vector_cosine_ops); +CREATE INDEX idx_archives_metadata ON project_archives USING gin(metadata); + +-- Archive types: session, research, audit, investigation, completed, migration +COMMENT ON TABLE project_archives IS 'Database-backed archive storage with semantic search'; +COMMENT ON COLUMN project_archives.archive_type IS 'Type: session, research, audit, investigation, completed, migration'; +COMMENT ON COLUMN project_archives.metadata IS 'JSONB for frontmatter, tags, etc'; diff --git a/src/index.ts b/src/index.ts index 349d77e..961ced1 100644 --- a/src/index.ts +++ b/src/index.ts @@ -63,6 +63,7 @@ import { sessionProductivityAnalytics, sessionPatternDetection, } from './tools/session-docs.js'; +import { archiveAdd, archiveSearch, archiveList, archiveGet } from './tools/archives.js'; // Create MCP server const server = new Server( @@ -597,6 +598,41 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { ); break; + // Archives + case 'archive_add': + result = await archiveAdd({ + project: a.project, + archive_type: a.archive_type, + title: a.title, + content: a.content, + original_path: a.original_path, + file_size: a.file_size, + archived_by_session: a.archived_by_session, + metadata: a.metadata, + }); + break; + case 'archive_search': + result = await archiveSearch({ + query: a.query, + project: a.project, + archive_type: a.archive_type, + limit: a.limit, + }); + break; + case 'archive_list': + result = await archiveList({ + project: a.project, + archive_type: a.archive_type, + since: a.since, + limit: a.limit, + }); + break; + case 'archive_get': + result = await archiveGet({ + id: a.id, + }); + break; + default: throw new Error(`Unknown tool: ${name}`); } diff --git a/src/tools/archives.ts b/src/tools/archives.ts new file mode 100644 index 0000000..54a18e8 --- /dev/null +++ b/src/tools/archives.ts @@ -0,0 +1,264 @@ +// Project archives operations for database-backed archival + +import { query, queryOne, execute } from '../db.js'; +import { getEmbedding, formatEmbedding } from '../embeddings.js'; + +type ArchiveType = 'session' | 'research' | 'audit' | 'investigation' | 'completed' | 'migration'; + +interface Archive { + id: number; + project_key: string; + archive_type: ArchiveType; + title: string; + content: string; + original_path: string | null; + file_size: number | null; + archived_at: string; + archived_by_session: string | null; + metadata: Record; + created_at: string; + updated_at: string; +} + +interface ArchiveAddArgs { + project: string; + archive_type: ArchiveType; + title: string; + content: string; + original_path?: string; + file_size?: number; + archived_by_session?: string; + metadata?: Record; +} + +interface ArchiveSearchArgs { + query: string; + project?: string; + archive_type?: ArchiveType; + limit?: number; +} + +interface ArchiveListArgs { + project?: string; + archive_type?: ArchiveType; + since?: string; + limit?: number; +} + +interface ArchiveGetArgs { + id: number; +} + +/** + * Verify project exists + */ +async function verifyProject(projectKey: string): Promise { + const result = await queryOne<{ key: string }>( + 'SELECT key FROM projects WHERE key = $1', + [projectKey] + ); + return !!result; +} + +/** + * Add a new archive entry + */ +export async function archiveAdd(args: ArchiveAddArgs): Promise { + const { project, archive_type, title, content, original_path, file_size, archived_by_session, metadata } = args; + + // Verify project exists + const exists = await verifyProject(project); + if (!exists) { + return `Error: Project not found: ${project}`; + } + + // Generate embedding for semantic search + const embedText = `${title}. ${content.substring(0, 1000)}`; // Limit content length for embedding + const embedding = await getEmbedding(embedText); + const embeddingValue = embedding ? formatEmbedding(embedding) : null; + + if (embeddingValue) { + await execute( + `INSERT INTO project_archives + (project_key, archive_type, title, content, original_path, file_size, archived_by_session, metadata, embedding) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, + [ + project, + archive_type, + title, + content, + original_path || null, + file_size || null, + archived_by_session || null, + JSON.stringify(metadata || {}), + embeddingValue + ] + ); + } else { + await execute( + `INSERT INTO project_archives + (project_key, archive_type, title, content, original_path, file_size, archived_by_session, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`, + [ + project, + archive_type, + title, + content, + original_path || null, + file_size || null, + archived_by_session || null, + JSON.stringify(metadata || {}) + ] + ); + } + + const sizeStr = file_size ? ` (${Math.round(file_size / 1024)}KB)` : ''; + return `Archived: [${archive_type}] ${title}${sizeStr}`; +} + +/** + * Search archives semantically + */ +export async function archiveSearch(args: ArchiveSearchArgs): Promise { + const { query: searchQuery, project, archive_type, limit = 5 } = args; + + // Generate embedding for search + const embedding = await getEmbedding(searchQuery); + + if (!embedding) { + return 'Error: Could not generate embedding for search'; + } + + const embeddingStr = formatEmbedding(embedding); + + let whereClause = 'WHERE embedding IS NOT NULL'; + const params: unknown[] = [embeddingStr, limit]; + let paramIndex = 3; + + if (project) { + whereClause += ` AND project_key = $${paramIndex++}`; + params.splice(params.length - 1, 0, project); + } + if (archive_type) { + whereClause += ` AND archive_type = $${paramIndex++}`; + params.splice(params.length - 1, 0, archive_type); + } + + const archives = await query( + `SELECT id, archive_type, title, original_path, file_size, + to_char(archived_at, 'YYYY-MM-DD') as archived_at, + 1 - (embedding <=> $1) as similarity + FROM project_archives + ${whereClause} + ORDER BY embedding <=> $1 + LIMIT $2`, + params + ); + + if (archives.length === 0) { + return 'No relevant archives found'; + } + + const lines = ['Relevant archives:\n']; + for (const a of archives) { + const sim = Math.round(a.similarity * 100); + const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : ''; + lines.push(`**[${a.archive_type}]** ${a.title} (${sim}% match)`); + lines.push(` Archived: ${a.archived_at}${sizeStr}`); + if (a.original_path) { + lines.push(` Path: ${a.original_path}`); + } + lines.push(''); + } + + return lines.join('\n'); +} + +/** + * List archives (non-semantic) + */ +export async function archiveList(args: ArchiveListArgs): Promise { + const { project, archive_type, since, limit = 20 } = args; + + let whereClause = 'WHERE 1=1'; + const params: unknown[] = []; + let paramIndex = 1; + + if (project) { + whereClause += ` AND project_key = $${paramIndex++}`; + params.push(project); + } + if (archive_type) { + whereClause += ` AND archive_type = $${paramIndex++}`; + params.push(archive_type); + } + if (since) { + whereClause += ` AND archived_at >= $${paramIndex++}`; + params.push(since); + } + + params.push(limit); + + const archives = await query( + `SELECT id, archive_type, title, original_path, file_size, + to_char(archived_at, 'YYYY-MM-DD') as archived_at + FROM project_archives + ${whereClause} + ORDER BY archived_at DESC + LIMIT $${paramIndex}`, + params + ); + + if (archives.length === 0) { + return `No archives found${project ? ` for project ${project}` : ''}`; + } + + const lines = [`Archives${project ? ` (${project})` : ''}:\n`]; + for (const a of archives) { + const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : ''; + lines.push(`• [${a.archive_type}] ${a.title} - ${a.archived_at}${sizeStr}`); + if (a.original_path) { + lines.push(` ${a.original_path}`); + } + } + + return lines.join('\n'); +} + +/** + * Get specific archive by ID + */ +export async function archiveGet(args: ArchiveGetArgs): Promise { + const archive = await queryOne( + `SELECT id, project_key, archive_type, title, content, original_path, file_size, + to_char(archived_at, 'YYYY-MM-DD') as archived_at, + archived_by_session, metadata + FROM project_archives + WHERE id = $1`, + [args.id] + ); + + if (!archive) { + return `Archive not found: ${args.id}`; + } + + const sizeStr = archive.file_size ? ` (${Math.round(archive.file_size / 1024)}KB)` : ''; + const lines = [ + `# Archive #${archive.id}\n`, + `**Type:** ${archive.archive_type}`, + `**Title:** ${archive.title}`, + `**Archived:** ${archive.archived_at}${sizeStr}`, + ]; + + if (archive.original_path) { + lines.push(`**Original Path:** ${archive.original_path}`); + } + if (archive.archived_by_session) { + lines.push(`**Session:** ${archive.archived_by_session}`); + } + + lines.push('\n---\n'); + lines.push(archive.content); + + return lines.join('\n'); +} diff --git a/src/tools/index.ts b/src/tools/index.ts index 9909d6b..87b837f 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -974,4 +974,62 @@ export const toolDefinitions = [ }, }, }, + + // Archive Tools + { + name: 'archive_add', + description: 'Archive content to database with semantic embedding. Replaces filesystem archives.', + inputSchema: { + type: 'object', + properties: { + project: { type: 'string', description: 'Project key (e.g., CF, VPN)' }, + archive_type: { type: 'string', enum: ['session', 'research', 'audit', 'investigation', 'completed', 'migration'], description: 'Archive type' }, + title: { type: 'string', description: 'Archive title' }, + content: { type: 'string', description: 'Archive content (markdown)' }, + original_path: { type: 'string', description: 'Original file path (optional)' }, + file_size: { type: 'number', description: 'File size in bytes (optional)' }, + archived_by_session: { type: 'string', description: 'Session ID that archived it (optional)' }, + metadata: { type: 'object', description: 'Additional metadata (optional)' }, + }, + required: ['project', 'archive_type', 'title', 'content'], + }, + }, + { + name: 'archive_search', + description: 'Search archives using semantic similarity', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: 'Search query' }, + project: { type: 'string', description: 'Filter by project (optional)' }, + archive_type: { type: 'string', enum: ['session', 'research', 'audit', 'investigation', 'completed', 'migration'], description: 'Filter by archive type (optional)' }, + limit: { type: 'number', description: 'Max results (default: 5)' }, + }, + required: ['query'], + }, + }, + { + name: 'archive_list', + description: 'List archives with optional filters', + inputSchema: { + type: 'object', + properties: { + project: { type: 'string', description: 'Filter by project (optional)' }, + archive_type: { type: 'string', enum: ['session', 'research', 'audit', 'investigation', 'completed', 'migration'], description: 'Filter by archive type (optional)' }, + since: { type: 'string', description: 'Show archives since date (ISO format, optional)' }, + limit: { type: 'number', description: 'Max results (default: 20)' }, + }, + }, + }, + { + name: 'archive_get', + description: 'Get full content of specific archive by ID', + inputSchema: { + type: 'object', + properties: { + id: { type: 'number', description: 'Archive ID' }, + }, + required: ['id'], + }, + }, ];