#!/usr/bin/env node /** * Learning MCP Server — pgvector-backed learnings + anti-patterns * * Tools: * learning_query — semantic (vector) search over learnings_embeddings * learning_search_fts — PostgreSQL full-text search fallback * learning_inject — get top learnings for task injection (by category/task) * learning_context — project-scoped learnings (Project + Related + Anti-patterns) * learning_add — insert a new learning (auto-embeds) * learning_mark_applied — increment applied_count by id * * Replaces direct psql access used by rag-query, learn-seed, learn-inject, learn-context. */ import * as Sentry from '@sentry/node'; import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; import { query, toVector } from './db.js'; import { embed } from './embeddings.js'; if (process.env.SENTRY_DSN) { Sentry.init({ dsn: process.env.SENTRY_DSN, environment: process.env.SENTRY_ENVIRONMENT ?? 'production', tracesSampleRate: 0.1, }); } // --------------------------------------------------------------------------- // Data access // --------------------------------------------------------------------------- async function semanticSearch({ text, limit = 5, category, minSimilarity = 0.3, project }) { const vec = await embed(text); const params = [toVector(vec), limit]; let where = `(review_status = 'approved' OR review_status IS NULL) AND embedding IS NOT NULL AND deprecated_at IS NULL`; if (category) { params.push(category); where += ` AND category = $${params.length}`; } if (project) { params.push(project); where += ` AND (project = $${params.length} OR project IS NULL)`; } const sql = ` SELECT id, learning_id, learning, context, category, project, is_anti_pattern, effectiveness_score, applied_count, source_file, 1 - (embedding <=> $1::vector) AS similarity FROM learnings_embeddings WHERE ${where} ORDER BY embedding <=> $1::vector LIMIT $2`; const rows = await query(sql, params); return rows.filter((r) => r.similarity >= minSimilarity); } async function ftsSearch({ text, limit = 10, category }) { const terms = text .split(/[^A-Za-z0-9]+/) .filter((w) => w.length >= 4) .slice(0, 10); if (terms.length === 0) return []; const tsquery = terms.join(' | '); const params = [tsquery, limit]; let where = ''; if (category) { params.push(category); where = `AND category = $${params.length}`; } const sql = ` SELECT id, learning_id, learning, category, source_file, effectiveness_score, is_anti_pattern, ts_rank(to_tsvector('english', learning || ' ' || COALESCE(context,'')), to_tsquery('english', $1)) AS rank FROM learnings_embeddings WHERE to_tsvector('english', learning || ' ' || COALESCE(context,'')) @@ to_tsquery('english', $1) AND (review_status = 'approved' OR review_status IS NULL) AND deprecated_at IS NULL ${where} ORDER BY rank DESC, effectiveness_score DESC LIMIT $2`; return await query(sql, params); } async function getProjectContext({ project, limit = 5 }) { const params = [project, limit]; const primary = await query( `SELECT id, learning, category, is_anti_pattern, effectiveness_score, applied_count FROM learnings_embeddings WHERE (project = $1 OR project_tags ILIKE '%' || $1 || '%') AND is_anti_pattern = false AND (review_status = 'approved' OR review_status IS NULL) AND deprecated_at IS NULL ORDER BY effectiveness_score DESC, applied_count DESC LIMIT $2`, params); const antiPatterns = await query( `SELECT id, learning, category, effectiveness_score FROM learnings_embeddings WHERE (project = $1 OR project_tags ILIKE '%' || $1 || '%') AND is_anti_pattern = true AND (review_status = 'approved' OR review_status IS NULL) AND deprecated_at IS NULL ORDER BY effectiveness_score DESC LIMIT 5`, [project]); return { primary, antiPatterns }; } async function injectForTask({ category, task, compact = false, project }) { const nPrimary = compact ? 3 : 5; const nAnti = compact ? 2 : 3; let primary = []; let anti = []; if (task) { primary = await semanticSearch({ text: task, limit: nPrimary, category, project, minSimilarity: 0.3, }); anti = await semanticSearch({ text: task, limit: nAnti, category, project, minSimilarity: 0.3, }).then((rows) => rows.filter((r) => r.is_anti_pattern)); } else if (category) { primary = await query( `SELECT id, learning_id, learning, category, effectiveness_score, applied_count, is_anti_pattern FROM learnings_embeddings WHERE category = $1 AND is_anti_pattern = false AND (review_status = 'approved' OR review_status IS NULL) AND deprecated_at IS NULL ORDER BY effectiveness_score DESC, applied_count DESC LIMIT $2`, [category, nPrimary]); anti = await query( `SELECT id, learning_id, learning, category, effectiveness_score FROM learnings_embeddings WHERE category = $1 AND is_anti_pattern = true AND (review_status = 'approved' OR review_status IS NULL) AND deprecated_at IS NULL ORDER BY effectiveness_score DESC LIMIT $2`, [category, nAnti]); } return { primary, antiPatterns: anti }; } async function addLearning(args) { const { learning, context = '', project = '', category = 'GENERAL', is_anti_pattern = false, effectiveness_score = 0.7, source_file = '', session_id = '', review_status = 'approved', } = args; if (!learning) throw new Error('learning is required'); const nextIdRow = await query('SELECT COALESCE(MAX(learning_id), 0) + 1 AS n FROM learnings_embeddings'); const nextId = nextIdRow[0]?.n ?? 1; let vec = null; try { vec = await embed(learning); } catch (e) { console.error('embed failed:', e.message); } const sql = ` INSERT INTO learnings_embeddings (learning_id, learning, context, category, project, is_anti_pattern, effectiveness_score, source_file, session_id, review_status, embedding) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11::vector) RETURNING id, learning_id`; const params = [ nextId, learning, context, category, project, is_anti_pattern, effectiveness_score, source_file, session_id, review_status, vec ? toVector(vec) : null, ]; const rows = await query(sql, params); return rows[0]; } async function markApplied(id) { await query( `UPDATE learnings_embeddings SET applied_count = applied_count + 1, last_applied = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE id = $1`, [id]); return { ok: true, id }; } // --------------------------------------------------------------------------- // Formatting // --------------------------------------------------------------------------- function formatRows(rows, { showScore = true } = {}) { if (!rows?.length) return 'No learnings found'; return rows.map((r) => { const marker = r.is_anti_pattern ? '⚠️ ANTI' : '✓'; const score = showScore && r.similarity != null ? ` (${(r.similarity * 100).toFixed(0)}%)` : r.effectiveness_score != null ? ` [eff=${Number(r.effectiveness_score).toFixed(2)}]` : ''; return ` ${marker} [${r.category}] #${r.id ?? r.learning_id}${score}: ${r.learning}`; }).join('\n'); } function formatContext(project, { primary, antiPatterns }) { const lines = [`## Project Context: ${project}`, '']; if (primary.length) { lines.push('### Project Learnings'); lines.push(formatRows(primary, { showScore: false })); lines.push(''); } if (antiPatterns.length) { lines.push('### Anti-Patterns'); lines.push(formatRows(antiPatterns, { showScore: false })); lines.push(''); } const ids = [...primary, ...antiPatterns].map((r) => r.id).filter(Boolean); if (ids.length) lines.push(``); return lines.join('\n'); } // --------------------------------------------------------------------------- // MCP server // --------------------------------------------------------------------------- const TOOLS = [ { name: 'learning_query', description: 'Semantic search over the pgvector learnings DB. Returns learnings most relevant to a natural-language task description.', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Free-text query / task description' }, limit: { type: 'integer', default: 5 }, category: { type: 'string', description: 'Optional category filter (SWIFT, PYTHON, INFRASTRUCTURE, AI, ...)' }, project: { type: 'string', description: 'Optional project scope (e.g., CF, LLB, WHMCS)' }, min_similarity: { type: 'number', default: 0.3 }, }, required: ['query'], }, }, { name: 'learning_search_fts', description: 'PostgreSQL full-text search over learnings (no embeddings needed). Useful when semantic search returns poor matches.', inputSchema: { type: 'object', properties: { query: { type: 'string' }, limit: { type: 'integer', default: 10 }, category: { type: 'string' }, }, required: ['query'], }, }, { name: 'learning_inject', description: 'Return top learnings for injection into a task context, scored for relevance. Provide either a task description or a category.', inputSchema: { type: 'object', properties: { task: { type: 'string', description: 'Task description (semantic match)' }, category: { type: 'string', description: 'Category filter' }, project: { type: 'string' }, compact: { type: 'boolean', default: false }, }, }, }, { name: 'learning_context', description: 'Get all learnings + anti-patterns for a project. Used at session start to surface project-scoped knowledge.', inputSchema: { type: 'object', properties: { project: { type: 'string', description: 'Project key (e.g., CF, LLB)' }, limit: { type: 'integer', default: 5 }, }, required: ['project'], }, }, { name: 'learning_add', description: 'Insert a new learning into pgvector (auto-embeds via LiteLLM). Use sparingly — prefer learn-from-session for bulk.', inputSchema: { type: 'object', properties: { learning: { type: 'string' }, context: { type: 'string' }, project: { type: 'string' }, category: { type: 'string', default: 'GENERAL' }, is_anti_pattern: { type: 'boolean', default: false }, effectiveness_score: { type: 'number', default: 0.7 }, source_file: { type: 'string' }, session_id: { type: 'string' }, }, required: ['learning'], }, }, { name: 'learning_mark_applied', description: 'Increment applied_count for a learning id (call when a learning was successfully used).', inputSchema: { type: 'object', properties: { id: { type: 'integer' } }, required: ['id'], }, }, ]; export function createServer() { const server = new Server( { name: 'learning-mcp', version: '1.0.0' }, { capabilities: { tools: {} } } ); server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS })); server.setRequestHandler(CallToolRequestSchema, async (req) => { const name = req.params.name; const args = req.params.arguments ?? {}; try { switch (name) { case 'learning_query': { const rows = await semanticSearch({ text: args.query, limit: args.limit ?? 5, category: args.category, project: args.project, minSimilarity: args.min_similarity ?? 0.3, }); return { content: [{ type: 'text', text: rows.length ? formatRows(rows) : 'No learnings match (try lower min_similarity or different phrasing)' }] }; } case 'learning_search_fts': { const rows = await ftsSearch({ text: args.query, limit: args.limit ?? 10, category: args.category, }); return { content: [{ type: 'text', text: formatRows(rows) }] }; } case 'learning_inject': { const { primary, antiPatterns } = await injectForTask(args); const parts = []; if (primary.length) { parts.push('### Relevant Learnings', formatRows(primary, { showScore: false })); } if (antiPatterns.length) { parts.push('', '### Anti-Patterns', formatRows(antiPatterns, { showScore: false })); } if (!parts.length) parts.push('No learnings found for this task'); return { content: [{ type: 'text', text: parts.join('\n') }] }; } case 'learning_context': { const ctx = await getProjectContext({ project: args.project, limit: args.limit ?? 5 }); return { content: [{ type: 'text', text: formatContext(args.project, ctx) }] }; } case 'learning_add': { const row = await addLearning(args); return { content: [{ type: 'text', text: `Inserted learning id=${row.id} learning_id=${row.learning_id}` }] }; } case 'learning_mark_applied': { await markApplied(args.id); return { content: [{ type: 'text', text: `Marked applied: id=${args.id}` }] }; } default: return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true }; } } catch (err) { Sentry.captureException?.(err); console.error(`[learning-mcp] ${name}:`, err); return { content: [{ type: 'text', text: `Error: ${err.message}` }], isError: true }; } }); return server; }