Files
session-mcp/migrate-all-docs-batch.mjs
Christian Gick 25bed341e9 Add migration scripts for archive database migration
Three migration scripts for complete file-to-database migration:
- migrate-all-docs-batch.mjs: Main migration (1,172 files)
- migrate-missed-docs.mjs: Supplementary for hidden dirs (34 files)
- migrate-external-archive.mjs: External archive cleanup (5 files)

Total migrated: 1,211 files (~15MB) to project_archives table
All with semantic embeddings for vector search

Related: CF-267, CF-268

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-01-19 15:01:18 +02:00

481 lines
12 KiB
JavaScript
Executable File

#!/usr/bin/env node
/**
* Comprehensive batch migration of all documentation to task-mcp database
*
* Migrates ~1,293 .md files from Development directory with intelligent archive type detection.
*
* Archive types:
* - session: Session plans, notes, CLAUDE_HISTORY.md
* - investigation: Investigation files
* - completed: planning.md, completed work
* - research: Documentation, guides, architecture docs (default)
*
* Excludes:
* - README.md, CHANGELOG.md, LICENSE.md, CONTRIBUTING.md
* - Templates (*-template.md, /templates/)
* - Active tracking (tasks.md)
* - Vendor directories (go/pkg/mod, Tools/Github-Ranking)
* - Build artifacts (node_modules, .git, build, dist)
* - Already migrated (.migrated-to-mcp/)
*/
import { readFileSync, readdirSync, renameSync, mkdirSync, existsSync, statSync } from 'fs';
import { join, basename, dirname, relative } from 'path';
import { homedir } from 'os';
import dotenv from 'dotenv';
import { archiveAdd } from './dist/tools/archives.js';
// Load environment
dotenv.config();
// Configuration
const DEV_DIR = join(homedir(), 'Development');
const DRY_RUN = process.argv.includes('--dry-run');
// Standard files to keep
const STANDARD_FILES = [
'README.md',
'CHANGELOG.md',
'LICENSE.md',
'CONTRIBUTING.md',
'CODE_OF_CONDUCT.md'
];
// Files/patterns to exclude
const EXCLUDE_PATTERNS = [
// Build and dependencies
'/node_modules/',
'/.git/',
'/build/',
'/dist/',
'/vendor/',
// Vendor/external tools
'/go/pkg/mod/',
'/Tools/Github-Ranking/',
'/Tools/awesome-',
// Already migrated
'/.migrated-to-mcp/',
'/.framework-backup/',
'/.claude-backup/',
// Archived projects
'/Archived/',
// System directories
'/.DS_Store',
'/.idea/',
'/.vscode/'
];
// Active files to keep (not migrate)
const ACTIVE_FILES = [
'tasks.md',
'FEATURES.md'
];
/**
* Check if file should be excluded
*/
function shouldExclude(filePath, filename) {
// Exclude standard files
if (STANDARD_FILES.includes(filename)) {
return true;
}
// Exclude active tracking files
if (ACTIVE_FILES.includes(filename)) {
return true;
}
// Exclude templates
if (filename.endsWith('-template.md') || filename.includes('template') || filePath.includes('/templates/')) {
return true;
}
// Exclude by pattern
if (EXCLUDE_PATTERNS.some(pattern => filePath.includes(pattern))) {
return true;
}
return false;
}
/**
* Determine archive type based on filename and path
*/
function getArchiveType(filePath, filename) {
const lowerPath = filePath.toLowerCase();
const lowerFile = filename.toLowerCase();
// Session files
if (filePath.includes('.claude-session/') && filename === 'plan.md') {
return 'session';
}
if (filePath.includes('.claude-session/') && filename === 'notes.md') {
return 'session';
}
if (filename === 'CLAUDE_HISTORY.md') {
return 'session';
}
if (filename === 'SESSION_COMPLETE.md') {
return 'session';
}
if (lowerFile.includes('session') && lowerFile.includes('complete')) {
return 'session';
}
// Investigation files
if (lowerFile.startsWith('investigation-')) {
return 'investigation';
}
if (lowerFile.includes('investigation')) {
return 'investigation';
}
if (lowerPath.includes('/investigations/')) {
return 'investigation';
}
// Completed work
if (filename === 'planning.md') {
return 'completed';
}
if (lowerFile.includes('complete') && !lowerFile.includes('session')) {
return 'completed';
}
// Audit files
if (lowerFile.includes('audit')) {
return 'audit';
}
// Default: research documentation
return 'research';
}
/**
* Extract project key from directory path
*/
function getProjectKey(filePath) {
const parts = filePath.split('/');
// ClaudeFramework files
if (filePath.includes('/ClaudeFramework/')) {
return 'CF';
}
// Apps directory
if (filePath.includes('/Apps/')) {
const idx = parts.indexOf('Apps');
const projectName = parts[idx + 1] || '';
return generateProjectKey(projectName);
}
// Infrastructure directory
if (filePath.includes('/Infrastructure/')) {
const idx = parts.indexOf('Infrastructure');
// Use the subdirectory after Infrastructure
const projectName = parts[idx + 1] || '';
return generateProjectKey(projectName);
}
// Fallback to CF
return 'CF';
}
/**
* Generate 2-letter project key from project name
*/
function generateProjectKey(projectName) {
if (!projectName) return 'CF';
// Special cases
const specialCases = {
'eToroGridbot': 'GB',
'ZorkiOS': 'ZK',
'RealEstate': 'RE',
'AgilitonScripts': 'AS',
'VPN': 'VPN', // Keep full
'mcp-servers': 'MC',
'cloudmemorymcp': 'CM'
};
if (specialCases[projectName]) {
return specialCases[projectName];
}
// Generate from name
const normalized = projectName
.replace(/([A-Z])/g, ' $1') // Split camelCase
.trim()
.toUpperCase();
const words = normalized.split(/\s+/);
if (words.length >= 2) {
return words[0][0] + words[1][0];
} else if (words[0].length >= 2) {
return words[0].substring(0, 2);
} else {
return words[0][0] + 'X';
}
}
/**
* Extract title from markdown content (first H1)
*/
function extractTitle(content, filename, projectKey) {
const lines = content.split('\n');
// Look for first H1
for (const line of lines) {
if (line.startsWith('# ')) {
return line.slice(2).trim().substring(0, 500);
}
}
// Fallback to project key + filename
const baseName = filename.replace('.md', '').replace(/_/g, ' ');
return `${projectKey} - ${baseName}`.substring(0, 500);
}
/**
* Migrate a single file
*/
async function migrateFile(filePath) {
const filename = basename(filePath);
const projectKey = getProjectKey(filePath);
const archiveType = getArchiveType(filePath, filename);
try {
const content = readFileSync(filePath, 'utf-8');
const title = extractTitle(content, filename, projectKey);
const fileSize = statSync(filePath).size;
console.log(`\n[${new Date().toISOString().substring(11, 19)}] ${DRY_RUN ? '[DRY RUN] ' : ''}Migrating: ${relative(DEV_DIR, filePath)}`);
console.log(` Project: ${projectKey}`);
console.log(` Title: ${title}`);
console.log(` Size: ${Math.round(fileSize / 1024)}KB`);
console.log(` Type: ${archiveType}`);
if (DRY_RUN) {
console.log(` → Would migrate to database`);
return { success: true, filename, projectKey, title, fileSize, archiveType, filePath, dryRun: true };
}
// Call archive_add
const result = await archiveAdd({
project: projectKey,
archive_type: archiveType,
title,
content,
original_path: filePath,
file_size: fileSize
});
console.log(` ✓ Migrated to database`);
// Move to backup
const backupDir = join(dirname(filePath), '.migrated-to-mcp');
if (!existsSync(backupDir)) {
mkdirSync(backupDir, { recursive: true });
}
const backupPath = join(backupDir, filename);
renameSync(filePath, backupPath);
console.log(` ✓ Moved to backup`);
return { success: true, filename, projectKey, title, fileSize, archiveType, filePath };
} catch (error) {
console.error(` ✗ Error: ${error.message}`);
return { success: false, filename, projectKey, archiveType, error: error.message, filePath };
}
}
/**
* Find all .md files to migrate recursively
*/
function findAllMarkdownFiles() {
const files = [];
function scanDir(dir) {
if (!existsSync(dir)) return;
try {
const entries = readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = join(dir, entry.name);
if (entry.isDirectory()) {
// Skip hidden directories and known excludes
if (!entry.name.startsWith('.') && entry.name !== 'node_modules') {
// Check if this directory path should be excluded
if (!EXCLUDE_PATTERNS.some(pattern => fullPath.includes(pattern))) {
scanDir(fullPath);
}
}
} else if (entry.isFile() && entry.name.endsWith('.md')) {
// Check if this specific file should be excluded
if (!shouldExclude(fullPath, entry.name)) {
files.push(fullPath);
}
}
}
} catch (error) {
// Skip directories we can't read
}
}
scanDir(DEV_DIR);
return files.sort();
}
/**
* Main migration function
*/
async function main() {
console.log('================================================================================');
console.log(`Comprehensive Documentation Migration → task-mcp database ${DRY_RUN ? '[DRY RUN]' : ''}`);
console.log('================================================================================\n');
console.log('Scanning Development directory for .md files...\n');
// Find all files
const filesToMigrate = findAllMarkdownFiles();
console.log(`Found ${filesToMigrate.length} documentation files to migrate\n`);
if (filesToMigrate.length === 0) {
console.log('✓ No files to migrate!');
return;
}
// Calculate total size
const totalSize = filesToMigrate.reduce((sum, f) => {
try {
return sum + statSync(f).size;
} catch {
return sum;
}
}, 0);
console.log(`Total size: ${Math.round(totalSize / 1024 / 1024 * 100) / 100}MB\n`);
// Show sample paths by category
const byType = {};
filesToMigrate.forEach(f => {
const type = getArchiveType(f, basename(f));
if (!byType[type]) byType[type] = [];
byType[type].push(f);
});
console.log('Files by archive type:');
Object.entries(byType).forEach(([type, files]) => {
console.log(`\n ${type}: ${files.length} files`);
files.slice(0, 3).forEach(f => {
console.log(` - ${relative(DEV_DIR, f)}`);
});
if (files.length > 3) {
console.log(` ... and ${files.length - 3} more`);
}
});
if (DRY_RUN) {
console.log('\n' + '='.repeat(80));
console.log('DRY RUN MODE - No files will be migrated');
console.log('='.repeat(80));
console.log('\nRemove --dry-run flag to execute migration\n');
return;
}
console.log('\n' + '='.repeat(80));
console.log('Starting migration...');
console.log('='.repeat(80) + '\n');
const results = {
success: [],
failed: []
};
// Migrate each file
for (let i = 0; i < filesToMigrate.length; i++) {
const filePath = filesToMigrate[i];
const result = await migrateFile(filePath);
if (result.success) {
results.success.push(result);
} else {
results.failed.push(result);
}
// Progress indicator
const progress = Math.round(((i + 1) / filesToMigrate.length) * 100);
console.log(` Progress: ${i + 1}/${filesToMigrate.length} (${progress}%)`);
// Small delay to avoid overwhelming the database
await new Promise(resolve => setTimeout(resolve, 50));
}
// Summary
console.log('\n' + '='.repeat(80));
console.log('Migration Complete');
console.log('='.repeat(80) + '\n');
console.log(`✓ Successfully migrated: ${results.success.length} files`);
console.log(`✗ Failed: ${results.failed.length} files`);
if (results.failed.length > 0) {
console.log('\nFailed files:');
results.failed.forEach(f => {
console.log(` - ${relative(DEV_DIR, f.filePath)}: ${f.error}`);
});
}
// Group by archive type
const successByType = {};
results.success.forEach(r => {
successByType[r.archiveType] = (successByType[r.archiveType] || 0) + 1;
});
console.log('\nBy archive type:');
Object.entries(successByType)
.sort((a, b) => b[1] - a[1])
.forEach(([type, count]) => {
console.log(` - ${type}: ${count} files`);
});
// Group by project
const byProject = {};
results.success.forEach(r => {
byProject[r.projectKey] = (byProject[r.projectKey] || 0) + 1;
});
console.log('\nBy project:');
Object.entries(byProject)
.sort((a, b) => b[1] - a[1])
.forEach(([project, count]) => {
console.log(` - ${project}: ${count} files`);
});
console.log('\nBackup location: <project-dir>/.migrated-to-mcp/');
console.log('Original files can be restored if needed.');
// Calculate migrated size
const migratedSize = results.success.reduce((sum, r) => sum + (r.fileSize || 0), 0);
console.log(`\nTotal data migrated: ${Math.round(migratedSize / 1024 / 1024 * 100) / 100}MB`);
}
// Run migration
main()
.then(() => {
console.log('\n✓ Migration script completed successfully');
process.exit(0);
})
.catch(error => {
console.error('\n✗ Migration script failed:', error);
console.error(error.stack);
process.exit(1);
});