From 1231835e02a31a5a3f0ed8fb5845295eb07eb71d Mon Sep 17 00:00:00 2001
From: Christian Gick <service@agiliton.eu>
Date: Mon, 19 Jan 2026 11:38:48 +0200
Subject: [PATCH] Add project_archives table and MCP tools (CF-264)

- Created migration 009: project_archives table with semantic search
- Implemented archives.ts: archiveAdd, archiveSearch, archiveList, archiveGet
- Registered archive tools in index.ts and tools/index.ts
- Archive types: session, research, audit, investigation, completed, migration
- Uses project_key (TEXT) FK to projects table
- Tested: archive_add and archive_list working correctly

Replaces filesystem archives with database-backed storage.
Eliminates context pollution from Glob/Grep operations.

Task: CF-264
Session: session_20260119111342_66de546b

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 migrations/009_create_project_archives.sql |  33 +++
 src/index.ts                               |  36 +++
 src/tools/archives.ts                      | 264 +++++++++++++++++++++
 src/tools/index.ts                         |  58 +++++
 4 files changed, 391 insertions(+)
 create mode 100644 migrations/009_create_project_archives.sql
 create mode 100644 src/tools/archives.ts

diff --git a/migrations/009_create_project_archives.sql b/migrations/009_create_project_archives.sql
new file mode 100644
index 0000000..43b0da6
--- /dev/null
+++ b/migrations/009_create_project_archives.sql
@@ -0,0 +1,33 @@
+-- Create project_archives table for database-backed archival system
+-- Replaces filesystem archives with searchable, semantic-enabled storage
+
+-- Drop if exists to recreate with correct schema
+DROP TABLE IF EXISTS project_archives;
+
+CREATE TABLE project_archives (
+    id SERIAL PRIMARY KEY,
+    project_key TEXT NOT NULL REFERENCES projects(key),
+    archive_type VARCHAR(50) NOT NULL,
+    title VARCHAR(500) NOT NULL,
+    content TEXT NOT NULL,
+    original_path VARCHAR(1000),
+    file_size INTEGER,
+    archived_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    archived_by_session VARCHAR(100),
+    metadata JSONB DEFAULT '{}',
+    embedding vector(1536),
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Indexes for performance
+CREATE INDEX idx_archives_project ON project_archives(project_key);
+CREATE INDEX idx_archives_type ON project_archives(archive_type);
+CREATE INDEX idx_archives_archived_at ON project_archives(archived_at);
+CREATE INDEX idx_archives_embedding ON project_archives USING ivfflat (embedding vector_cosine_ops);
+CREATE INDEX idx_archives_metadata ON project_archives USING gin(metadata);
+
+-- Archive types: session, research, audit, investigation, completed, migration
+COMMENT ON TABLE project_archives IS 'Database-backed archive storage with semantic search';
+COMMENT ON COLUMN project_archives.archive_type IS 'Type: session, research, audit, investigation, completed, migration';
+COMMENT ON COLUMN project_archives.metadata IS 'JSONB for frontmatter, tags, etc';
diff --git a/src/index.ts b/src/index.ts
index 349d77e..961ced1 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -63,6 +63,7 @@ import {
   sessionProductivityAnalytics,
   sessionPatternDetection,
 } from './tools/session-docs.js';
+import { archiveAdd, archiveSearch, archiveList, archiveGet } from './tools/archives.js';
 
 // Create MCP server
 const server = new Server(
@@ -597,6 +598,41 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
         );
         break;
 
+      // Archives
+      case 'archive_add':
+        result = await archiveAdd({
+          project: a.project,
+          archive_type: a.archive_type,
+          title: a.title,
+          content: a.content,
+          original_path: a.original_path,
+          file_size: a.file_size,
+          archived_by_session: a.archived_by_session,
+          metadata: a.metadata,
+        });
+        break;
+      case 'archive_search':
+        result = await archiveSearch({
+          query: a.query,
+          project: a.project,
+          archive_type: a.archive_type,
+          limit: a.limit,
+        });
+        break;
+      case 'archive_list':
+        result = await archiveList({
+          project: a.project,
+          archive_type: a.archive_type,
+          since: a.since,
+          limit: a.limit,
+        });
+        break;
+      case 'archive_get':
+        result = await archiveGet({
+          id: a.id,
+        });
+        break;
+
       default:
         throw new Error(`Unknown tool: ${name}`);
     }
diff --git a/src/tools/archives.ts b/src/tools/archives.ts
new file mode 100644
index 0000000..54a18e8
--- /dev/null
+++ b/src/tools/archives.ts
@@ -0,0 +1,264 @@
+// Project archives operations for database-backed archival
+
+import { query, queryOne, execute } from '../db.js';
+import { getEmbedding, formatEmbedding } from '../embeddings.js';
+
+type ArchiveType = 'session' | 'research' | 'audit' | 'investigation' | 'completed' | 'migration';
+
+interface Archive {
+  id: number;
+  project_key: string;
+  archive_type: ArchiveType;
+  title: string;
+  content: string;
+  original_path: string | null;
+  file_size: number | null;
+  archived_at: string;
+  archived_by_session: string | null;
+  metadata: Record<string, unknown>;
+  created_at: string;
+  updated_at: string;
+}
+
+interface ArchiveAddArgs {
+  project: string;
+  archive_type: ArchiveType;
+  title: string;
+  content: string;
+  original_path?: string;
+  file_size?: number;
+  archived_by_session?: string;
+  metadata?: Record<string, unknown>;
+}
+
+interface ArchiveSearchArgs {
+  query: string;
+  project?: string;
+  archive_type?: ArchiveType;
+  limit?: number;
+}
+
+interface ArchiveListArgs {
+  project?: string;
+  archive_type?: ArchiveType;
+  since?: string;
+  limit?: number;
+}
+
+interface ArchiveGetArgs {
+  id: number;
+}
+
+/**
+ * Verify project exists
+ */
+async function verifyProject(projectKey: string): Promise<boolean> {
+  const result = await queryOne<{ key: string }>(
+    'SELECT key FROM projects WHERE key = $1',
+    [projectKey]
+  );
+  return !!result;
+}
+
+/**
+ * Add a new archive entry
+ */
+export async function archiveAdd(args: ArchiveAddArgs): Promise<string> {
+  const { project, archive_type, title, content, original_path, file_size, archived_by_session, metadata } = args;
+
+  // Verify project exists
+  const exists = await verifyProject(project);
+  if (!exists) {
+    return `Error: Project not found: ${project}`;
+  }
+
+  // Generate embedding for semantic search
+  const embedText = `${title}. ${content.substring(0, 1000)}`; // Limit content length for embedding
+  const embedding = await getEmbedding(embedText);
+  const embeddingValue = embedding ? formatEmbedding(embedding) : null;
+
+  if (embeddingValue) {
+    await execute(
+      `INSERT INTO project_archives
+       (project_key, archive_type, title, content, original_path, file_size, archived_by_session, metadata, embedding)
+       VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`,
+      [
+        project,
+        archive_type,
+        title,
+        content,
+        original_path || null,
+        file_size || null,
+        archived_by_session || null,
+        JSON.stringify(metadata || {}),
+        embeddingValue
+      ]
+    );
+  } else {
+    await execute(
+      `INSERT INTO project_archives
+       (project_key, archive_type, title, content, original_path, file_size, archived_by_session, metadata)
+       VALUES ($1, $2, $3, $4, $5, $6, $7, $8)`,
+      [
+        project,
+        archive_type,
+        title,
+        content,
+        original_path || null,
+        file_size || null,
+        archived_by_session || null,
+        JSON.stringify(metadata || {})
+      ]
+    );
+  }
+
+  const sizeStr = file_size ? ` (${Math.round(file_size / 1024)}KB)` : '';
+  return `Archived: [${archive_type}] ${title}${sizeStr}`;
+}
+
+/**
+ * Search archives semantically
+ */
+export async function archiveSearch(args: ArchiveSearchArgs): Promise<string> {
+  const { query: searchQuery, project, archive_type, limit = 5 } = args;
+
+  // Generate embedding for search
+  const embedding = await getEmbedding(searchQuery);
+
+  if (!embedding) {
+    return 'Error: Could not generate embedding for search';
+  }
+
+  const embeddingStr = formatEmbedding(embedding);
+
+  let whereClause = 'WHERE embedding IS NOT NULL';
+  const params: unknown[] = [embeddingStr, limit];
+  let paramIndex = 3;
+
+  if (project) {
+    whereClause += ` AND project_key = $${paramIndex++}`;
+    params.splice(params.length - 1, 0, project);
+  }
+  if (archive_type) {
+    whereClause += ` AND archive_type = $${paramIndex++}`;
+    params.splice(params.length - 1, 0, archive_type);
+  }
+
+  const archives = await query<Archive & { similarity: number }>(
+    `SELECT id, archive_type, title, original_path, file_size,
+            to_char(archived_at, 'YYYY-MM-DD') as archived_at,
+            1 - (embedding <=> $1) as similarity
+     FROM project_archives
+     ${whereClause}
+     ORDER BY embedding <=> $1
+     LIMIT $2`,
+    params
+  );
+
+  if (archives.length === 0) {
+    return 'No relevant archives found';
+  }
+
+  const lines = ['Relevant archives:\n'];
+  for (const a of archives) {
+    const sim = Math.round(a.similarity * 100);
+    const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : '';
+    lines.push(`**[${a.archive_type}]** ${a.title} (${sim}% match)`);
+    lines.push(`  Archived: ${a.archived_at}${sizeStr}`);
+    if (a.original_path) {
+      lines.push(`  Path: ${a.original_path}`);
+    }
+    lines.push('');
+  }
+
+  return lines.join('\n');
+}
+
+/**
+ * List archives (non-semantic)
+ */
+export async function archiveList(args: ArchiveListArgs): Promise<string> {
+  const { project, archive_type, since, limit = 20 } = args;
+
+  let whereClause = 'WHERE 1=1';
+  const params: unknown[] = [];
+  let paramIndex = 1;
+
+  if (project) {
+    whereClause += ` AND project_key = $${paramIndex++}`;
+    params.push(project);
+  }
+  if (archive_type) {
+    whereClause += ` AND archive_type = $${paramIndex++}`;
+    params.push(archive_type);
+  }
+  if (since) {
+    whereClause += ` AND archived_at >= $${paramIndex++}`;
+    params.push(since);
+  }
+
+  params.push(limit);
+
+  const archives = await query<Archive>(
+    `SELECT id, archive_type, title, original_path, file_size,
+            to_char(archived_at, 'YYYY-MM-DD') as archived_at
+     FROM project_archives
+     ${whereClause}
+     ORDER BY archived_at DESC
+     LIMIT $${paramIndex}`,
+    params
+  );
+
+  if (archives.length === 0) {
+    return `No archives found${project ? ` for project ${project}` : ''}`;
+  }
+
+  const lines = [`Archives${project ? ` (${project})` : ''}:\n`];
+  for (const a of archives) {
+    const sizeStr = a.file_size ? ` (${Math.round(a.file_size / 1024)}KB)` : '';
+    lines.push(`• [${a.archive_type}] ${a.title} - ${a.archived_at}${sizeStr}`);
+    if (a.original_path) {
+      lines.push(`  ${a.original_path}`);
+    }
+  }
+
+  return lines.join('\n');
+}
+
+/**
+ * Get specific archive by ID
+ */
+export async function archiveGet(args: ArchiveGetArgs): Promise<string> {
+  const archive = await queryOne<Archive>(
+    `SELECT id, project_key, archive_type, title, content, original_path, file_size,
+            to_char(archived_at, 'YYYY-MM-DD') as archived_at,
+            archived_by_session, metadata
+     FROM project_archives
+     WHERE id = $1`,
+    [args.id]
+  );
+
+  if (!archive) {
+    return `Archive not found: ${args.id}`;
+  }
+
+  const sizeStr = archive.file_size ? ` (${Math.round(archive.file_size / 1024)}KB)` : '';
+  const lines = [
+    `# Archive #${archive.id}\n`,
+    `**Type:** ${archive.archive_type}`,
+    `**Title:** ${archive.title}`,
+    `**Archived:** ${archive.archived_at}${sizeStr}`,
+  ];
+
+  if (archive.original_path) {
+    lines.push(`**Original Path:** ${archive.original_path}`);
+  }
+  if (archive.archived_by_session) {
+    lines.push(`**Session:** ${archive.archived_by_session}`);
+  }
+
+  lines.push('\n---\n');
+  lines.push(archive.content);
+
+  return lines.join('\n');
+}
diff --git a/src/tools/index.ts b/src/tools/index.ts
index 9909d6b..87b837f 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -974,4 +974,62 @@ export const toolDefinitions = [
       },
     },
   },
+
+  // Archive Tools
+  {
+    name: 'archive_add',
+    description: 'Archive content to database with semantic embedding. Replaces filesystem archives.',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        project: { type: 'string', description: 'Project key (e.g., CF, VPN)' },
+        archive_type: { type: 'string', enum: ['session', 'research', 'audit', 'investigation', 'completed', 'migration'], description: 'Archive type' },
+        title: { type: 'string', description: 'Archive title' },
+        content: { type: 'string', description: 'Archive content (markdown)' },
+        original_path: { type: 'string', description: 'Original file path (optional)' },
+        file_size: { type: 'number', description: 'File size in bytes (optional)' },
+        archived_by_session: { type: 'string', description: 'Session ID that archived it (optional)' },
+        metadata: { type: 'object', description: 'Additional metadata (optional)' },
+      },
+      required: ['project', 'archive_type', 'title', 'content'],
+    },
+  },
+  {
+    name: 'archive_search',
+    description: 'Search archives using semantic similarity',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        query: { type: 'string', description: 'Search query' },
+        project: { type: 'string', description: 'Filter by project (optional)' },
+        archive_type: { type: 'string', enum: ['session', 'research', 'audit', 'investigation', 'completed', 'migration'], description: 'Filter by archive type (optional)' },
+        limit: { type: 'number', description: 'Max results (default: 5)' },
+      },
+      required: ['query'],
+    },
+  },
+  {
+    name: 'archive_list',
+    description: 'List archives with optional filters',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        project: { type: 'string', description: 'Filter by project (optional)' },
+        archive_type: { type: 'string', enum: ['session', 'research', 'audit', 'investigation', 'completed', 'migration'], description: 'Filter by archive type (optional)' },
+        since: { type: 'string', description: 'Show archives since date (ISO format, optional)' },
+        limit: { type: 'number', description: 'Max results (default: 20)' },
+      },
+    },
+  },
+  {
+    name: 'archive_get',
+    description: 'Get full content of specific archive by ID',
+    inputSchema: {
+      type: 'object',
+      properties: {
+        id: { type: 'number', description: 'Archive ID' },
+      },
+      required: ['id'],
+    },
+  },
 ];