Three migration scripts for complete file-to-database migration: - migrate-all-docs-batch.mjs: Main migration (1,172 files) - migrate-missed-docs.mjs: Supplementary for hidden dirs (34 files) - migrate-external-archive.mjs: External archive cleanup (5 files) Total migrated: 1,211 files (~15MB) to project_archives table All with semantic embeddings for vector search Related: CF-267, CF-268 Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
481 lines
12 KiB
JavaScript
Executable File
481 lines
12 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
/**
|
|
* Comprehensive batch migration of all documentation to task-mcp database
|
|
*
|
|
* Migrates ~1,293 .md files from Development directory with intelligent archive type detection.
|
|
*
|
|
* Archive types:
|
|
* - session: Session plans, notes, CLAUDE_HISTORY.md
|
|
* - investigation: Investigation files
|
|
* - completed: planning.md, completed work
|
|
* - research: Documentation, guides, architecture docs (default)
|
|
*
|
|
* Excludes:
|
|
* - README.md, CHANGELOG.md, LICENSE.md, CONTRIBUTING.md
|
|
* - Templates (*-template.md, /templates/)
|
|
* - Active tracking (tasks.md)
|
|
* - Vendor directories (go/pkg/mod, Tools/Github-Ranking)
|
|
* - Build artifacts (node_modules, .git, build, dist)
|
|
* - Already migrated (.migrated-to-mcp/)
|
|
*/
|
|
|
|
import { readFileSync, readdirSync, renameSync, mkdirSync, existsSync, statSync } from 'fs';
|
|
import { join, basename, dirname, relative } from 'path';
|
|
import { homedir } from 'os';
|
|
import dotenv from 'dotenv';
|
|
import { archiveAdd } from './dist/tools/archives.js';
|
|
|
|
// Load environment
|
|
dotenv.config();
|
|
|
|
// Configuration
|
|
const DEV_DIR = join(homedir(), 'Development');
|
|
const DRY_RUN = process.argv.includes('--dry-run');
|
|
|
|
// Standard files to keep
|
|
const STANDARD_FILES = [
|
|
'README.md',
|
|
'CHANGELOG.md',
|
|
'LICENSE.md',
|
|
'CONTRIBUTING.md',
|
|
'CODE_OF_CONDUCT.md'
|
|
];
|
|
|
|
// Files/patterns to exclude
|
|
const EXCLUDE_PATTERNS = [
|
|
// Build and dependencies
|
|
'/node_modules/',
|
|
'/.git/',
|
|
'/build/',
|
|
'/dist/',
|
|
'/vendor/',
|
|
|
|
// Vendor/external tools
|
|
'/go/pkg/mod/',
|
|
'/Tools/Github-Ranking/',
|
|
'/Tools/awesome-',
|
|
|
|
// Already migrated
|
|
'/.migrated-to-mcp/',
|
|
'/.framework-backup/',
|
|
'/.claude-backup/',
|
|
|
|
// Archived projects
|
|
'/Archived/',
|
|
|
|
// System directories
|
|
'/.DS_Store',
|
|
'/.idea/',
|
|
'/.vscode/'
|
|
];
|
|
|
|
// Active files to keep (not migrate)
|
|
const ACTIVE_FILES = [
|
|
'tasks.md',
|
|
'FEATURES.md'
|
|
];
|
|
|
|
/**
|
|
* Check if file should be excluded
|
|
*/
|
|
function shouldExclude(filePath, filename) {
|
|
// Exclude standard files
|
|
if (STANDARD_FILES.includes(filename)) {
|
|
return true;
|
|
}
|
|
|
|
// Exclude active tracking files
|
|
if (ACTIVE_FILES.includes(filename)) {
|
|
return true;
|
|
}
|
|
|
|
// Exclude templates
|
|
if (filename.endsWith('-template.md') || filename.includes('template') || filePath.includes('/templates/')) {
|
|
return true;
|
|
}
|
|
|
|
// Exclude by pattern
|
|
if (EXCLUDE_PATTERNS.some(pattern => filePath.includes(pattern))) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Determine archive type based on filename and path
|
|
*/
|
|
function getArchiveType(filePath, filename) {
|
|
const lowerPath = filePath.toLowerCase();
|
|
const lowerFile = filename.toLowerCase();
|
|
|
|
// Session files
|
|
if (filePath.includes('.claude-session/') && filename === 'plan.md') {
|
|
return 'session';
|
|
}
|
|
if (filePath.includes('.claude-session/') && filename === 'notes.md') {
|
|
return 'session';
|
|
}
|
|
if (filename === 'CLAUDE_HISTORY.md') {
|
|
return 'session';
|
|
}
|
|
if (filename === 'SESSION_COMPLETE.md') {
|
|
return 'session';
|
|
}
|
|
if (lowerFile.includes('session') && lowerFile.includes('complete')) {
|
|
return 'session';
|
|
}
|
|
|
|
// Investigation files
|
|
if (lowerFile.startsWith('investigation-')) {
|
|
return 'investigation';
|
|
}
|
|
if (lowerFile.includes('investigation')) {
|
|
return 'investigation';
|
|
}
|
|
if (lowerPath.includes('/investigations/')) {
|
|
return 'investigation';
|
|
}
|
|
|
|
// Completed work
|
|
if (filename === 'planning.md') {
|
|
return 'completed';
|
|
}
|
|
if (lowerFile.includes('complete') && !lowerFile.includes('session')) {
|
|
return 'completed';
|
|
}
|
|
|
|
// Audit files
|
|
if (lowerFile.includes('audit')) {
|
|
return 'audit';
|
|
}
|
|
|
|
// Default: research documentation
|
|
return 'research';
|
|
}
|
|
|
|
/**
|
|
* Extract project key from directory path
|
|
*/
|
|
function getProjectKey(filePath) {
|
|
const parts = filePath.split('/');
|
|
|
|
// ClaudeFramework files
|
|
if (filePath.includes('/ClaudeFramework/')) {
|
|
return 'CF';
|
|
}
|
|
|
|
// Apps directory
|
|
if (filePath.includes('/Apps/')) {
|
|
const idx = parts.indexOf('Apps');
|
|
const projectName = parts[idx + 1] || '';
|
|
return generateProjectKey(projectName);
|
|
}
|
|
|
|
// Infrastructure directory
|
|
if (filePath.includes('/Infrastructure/')) {
|
|
const idx = parts.indexOf('Infrastructure');
|
|
// Use the subdirectory after Infrastructure
|
|
const projectName = parts[idx + 1] || '';
|
|
return generateProjectKey(projectName);
|
|
}
|
|
|
|
// Fallback to CF
|
|
return 'CF';
|
|
}
|
|
|
|
/**
|
|
* Generate 2-letter project key from project name
|
|
*/
|
|
function generateProjectKey(projectName) {
|
|
if (!projectName) return 'CF';
|
|
|
|
// Special cases
|
|
const specialCases = {
|
|
'eToroGridbot': 'GB',
|
|
'ZorkiOS': 'ZK',
|
|
'RealEstate': 'RE',
|
|
'AgilitonScripts': 'AS',
|
|
'VPN': 'VPN', // Keep full
|
|
'mcp-servers': 'MC',
|
|
'cloudmemorymcp': 'CM'
|
|
};
|
|
|
|
if (specialCases[projectName]) {
|
|
return specialCases[projectName];
|
|
}
|
|
|
|
// Generate from name
|
|
const normalized = projectName
|
|
.replace(/([A-Z])/g, ' $1') // Split camelCase
|
|
.trim()
|
|
.toUpperCase();
|
|
|
|
const words = normalized.split(/\s+/);
|
|
|
|
if (words.length >= 2) {
|
|
return words[0][0] + words[1][0];
|
|
} else if (words[0].length >= 2) {
|
|
return words[0].substring(0, 2);
|
|
} else {
|
|
return words[0][0] + 'X';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract title from markdown content (first H1)
|
|
*/
|
|
function extractTitle(content, filename, projectKey) {
|
|
const lines = content.split('\n');
|
|
|
|
// Look for first H1
|
|
for (const line of lines) {
|
|
if (line.startsWith('# ')) {
|
|
return line.slice(2).trim().substring(0, 500);
|
|
}
|
|
}
|
|
|
|
// Fallback to project key + filename
|
|
const baseName = filename.replace('.md', '').replace(/_/g, ' ');
|
|
return `${projectKey} - ${baseName}`.substring(0, 500);
|
|
}
|
|
|
|
/**
|
|
* Migrate a single file
|
|
*/
|
|
async function migrateFile(filePath) {
|
|
const filename = basename(filePath);
|
|
const projectKey = getProjectKey(filePath);
|
|
const archiveType = getArchiveType(filePath, filename);
|
|
|
|
try {
|
|
const content = readFileSync(filePath, 'utf-8');
|
|
const title = extractTitle(content, filename, projectKey);
|
|
const fileSize = statSync(filePath).size;
|
|
|
|
console.log(`\n[${new Date().toISOString().substring(11, 19)}] ${DRY_RUN ? '[DRY RUN] ' : ''}Migrating: ${relative(DEV_DIR, filePath)}`);
|
|
console.log(` Project: ${projectKey}`);
|
|
console.log(` Title: ${title}`);
|
|
console.log(` Size: ${Math.round(fileSize / 1024)}KB`);
|
|
console.log(` Type: ${archiveType}`);
|
|
|
|
if (DRY_RUN) {
|
|
console.log(` → Would migrate to database`);
|
|
return { success: true, filename, projectKey, title, fileSize, archiveType, filePath, dryRun: true };
|
|
}
|
|
|
|
// Call archive_add
|
|
const result = await archiveAdd({
|
|
project: projectKey,
|
|
archive_type: archiveType,
|
|
title,
|
|
content,
|
|
original_path: filePath,
|
|
file_size: fileSize
|
|
});
|
|
|
|
console.log(` ✓ Migrated to database`);
|
|
|
|
// Move to backup
|
|
const backupDir = join(dirname(filePath), '.migrated-to-mcp');
|
|
if (!existsSync(backupDir)) {
|
|
mkdirSync(backupDir, { recursive: true });
|
|
}
|
|
|
|
const backupPath = join(backupDir, filename);
|
|
renameSync(filePath, backupPath);
|
|
console.log(` ✓ Moved to backup`);
|
|
|
|
return { success: true, filename, projectKey, title, fileSize, archiveType, filePath };
|
|
|
|
} catch (error) {
|
|
console.error(` ✗ Error: ${error.message}`);
|
|
return { success: false, filename, projectKey, archiveType, error: error.message, filePath };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Find all .md files to migrate recursively
|
|
*/
|
|
function findAllMarkdownFiles() {
|
|
const files = [];
|
|
|
|
function scanDir(dir) {
|
|
if (!existsSync(dir)) return;
|
|
|
|
try {
|
|
const entries = readdirSync(dir, { withFileTypes: true });
|
|
|
|
for (const entry of entries) {
|
|
const fullPath = join(dir, entry.name);
|
|
|
|
if (entry.isDirectory()) {
|
|
// Skip hidden directories and known excludes
|
|
if (!entry.name.startsWith('.') && entry.name !== 'node_modules') {
|
|
// Check if this directory path should be excluded
|
|
if (!EXCLUDE_PATTERNS.some(pattern => fullPath.includes(pattern))) {
|
|
scanDir(fullPath);
|
|
}
|
|
}
|
|
} else if (entry.isFile() && entry.name.endsWith('.md')) {
|
|
// Check if this specific file should be excluded
|
|
if (!shouldExclude(fullPath, entry.name)) {
|
|
files.push(fullPath);
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
// Skip directories we can't read
|
|
}
|
|
}
|
|
|
|
scanDir(DEV_DIR);
|
|
return files.sort();
|
|
}
|
|
|
|
/**
|
|
* Main migration function
|
|
*/
|
|
async function main() {
|
|
console.log('================================================================================');
|
|
console.log(`Comprehensive Documentation Migration → task-mcp database ${DRY_RUN ? '[DRY RUN]' : ''}`);
|
|
console.log('================================================================================\n');
|
|
|
|
console.log('Scanning Development directory for .md files...\n');
|
|
|
|
// Find all files
|
|
const filesToMigrate = findAllMarkdownFiles();
|
|
|
|
console.log(`Found ${filesToMigrate.length} documentation files to migrate\n`);
|
|
|
|
if (filesToMigrate.length === 0) {
|
|
console.log('✓ No files to migrate!');
|
|
return;
|
|
}
|
|
|
|
// Calculate total size
|
|
const totalSize = filesToMigrate.reduce((sum, f) => {
|
|
try {
|
|
return sum + statSync(f).size;
|
|
} catch {
|
|
return sum;
|
|
}
|
|
}, 0);
|
|
|
|
console.log(`Total size: ${Math.round(totalSize / 1024 / 1024 * 100) / 100}MB\n`);
|
|
|
|
// Show sample paths by category
|
|
const byType = {};
|
|
filesToMigrate.forEach(f => {
|
|
const type = getArchiveType(f, basename(f));
|
|
if (!byType[type]) byType[type] = [];
|
|
byType[type].push(f);
|
|
});
|
|
|
|
console.log('Files by archive type:');
|
|
Object.entries(byType).forEach(([type, files]) => {
|
|
console.log(`\n ${type}: ${files.length} files`);
|
|
files.slice(0, 3).forEach(f => {
|
|
console.log(` - ${relative(DEV_DIR, f)}`);
|
|
});
|
|
if (files.length > 3) {
|
|
console.log(` ... and ${files.length - 3} more`);
|
|
}
|
|
});
|
|
|
|
if (DRY_RUN) {
|
|
console.log('\n' + '='.repeat(80));
|
|
console.log('DRY RUN MODE - No files will be migrated');
|
|
console.log('='.repeat(80));
|
|
console.log('\nRemove --dry-run flag to execute migration\n');
|
|
return;
|
|
}
|
|
|
|
console.log('\n' + '='.repeat(80));
|
|
console.log('Starting migration...');
|
|
console.log('='.repeat(80) + '\n');
|
|
|
|
const results = {
|
|
success: [],
|
|
failed: []
|
|
};
|
|
|
|
// Migrate each file
|
|
for (let i = 0; i < filesToMigrate.length; i++) {
|
|
const filePath = filesToMigrate[i];
|
|
const result = await migrateFile(filePath);
|
|
|
|
if (result.success) {
|
|
results.success.push(result);
|
|
} else {
|
|
results.failed.push(result);
|
|
}
|
|
|
|
// Progress indicator
|
|
const progress = Math.round(((i + 1) / filesToMigrate.length) * 100);
|
|
console.log(` Progress: ${i + 1}/${filesToMigrate.length} (${progress}%)`);
|
|
|
|
// Small delay to avoid overwhelming the database
|
|
await new Promise(resolve => setTimeout(resolve, 50));
|
|
}
|
|
|
|
// Summary
|
|
console.log('\n' + '='.repeat(80));
|
|
console.log('Migration Complete');
|
|
console.log('='.repeat(80) + '\n');
|
|
console.log(`✓ Successfully migrated: ${results.success.length} files`);
|
|
console.log(`✗ Failed: ${results.failed.length} files`);
|
|
|
|
if (results.failed.length > 0) {
|
|
console.log('\nFailed files:');
|
|
results.failed.forEach(f => {
|
|
console.log(` - ${relative(DEV_DIR, f.filePath)}: ${f.error}`);
|
|
});
|
|
}
|
|
|
|
// Group by archive type
|
|
const successByType = {};
|
|
results.success.forEach(r => {
|
|
successByType[r.archiveType] = (successByType[r.archiveType] || 0) + 1;
|
|
});
|
|
|
|
console.log('\nBy archive type:');
|
|
Object.entries(successByType)
|
|
.sort((a, b) => b[1] - a[1])
|
|
.forEach(([type, count]) => {
|
|
console.log(` - ${type}: ${count} files`);
|
|
});
|
|
|
|
// Group by project
|
|
const byProject = {};
|
|
results.success.forEach(r => {
|
|
byProject[r.projectKey] = (byProject[r.projectKey] || 0) + 1;
|
|
});
|
|
|
|
console.log('\nBy project:');
|
|
Object.entries(byProject)
|
|
.sort((a, b) => b[1] - a[1])
|
|
.forEach(([project, count]) => {
|
|
console.log(` - ${project}: ${count} files`);
|
|
});
|
|
|
|
console.log('\nBackup location: <project-dir>/.migrated-to-mcp/');
|
|
console.log('Original files can be restored if needed.');
|
|
|
|
// Calculate migrated size
|
|
const migratedSize = results.success.reduce((sum, r) => sum + (r.fileSize || 0), 0);
|
|
console.log(`\nTotal data migrated: ${Math.round(migratedSize / 1024 / 1024 * 100) / 100}MB`);
|
|
}
|
|
|
|
// Run migration
|
|
main()
|
|
.then(() => {
|
|
console.log('\n✓ Migration script completed successfully');
|
|
process.exit(0);
|
|
})
|
|
.catch(error => {
|
|
console.error('\n✗ Migration script failed:', error);
|
|
console.error(error.stack);
|
|
process.exit(1);
|
|
});
|