learning-mcp/src/server.js

#!/usr/bin/env node
/**
 * Learning MCP Server — pgvector-backed learnings + anti-patterns
 *
 * Tools:
 *   learning_query      — semantic (vector) search over learnings_embeddings
 *   learning_search_fts — PostgreSQL full-text search fallback
 *   learning_inject     — get top learnings for task injection (by category/task)
 *   learning_context    — project-scoped learnings (Project + Related + Anti-patterns)
 *   learning_add        — insert a new learning (auto-embeds)
 *   learning_mark_applied — increment applied_count by id
 *
 * Replaces direct psql access used by rag-query, learn-seed, learn-inject, learn-context.
 */
import * as Sentry from '@sentry/node';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';

import { query, toVector } from './db.js';
import { embed } from './embeddings.js';

if (process.env.SENTRY_DSN) {
    Sentry.init({
        dsn: process.env.SENTRY_DSN,
        environment: process.env.SENTRY_ENVIRONMENT ?? 'production',
        tracesSampleRate: 0.1,
    });
}

// ---------------------------------------------------------------------------
// Data access
// ---------------------------------------------------------------------------

async function semanticSearch({ text, limit = 5, category, minSimilarity = 0.3, project }) {
    const vec = await embed(text);
    const params = [toVector(vec), limit];
    let where = `(review_status = 'approved' OR review_status IS NULL)
                 AND embedding IS NOT NULL
                 AND deprecated_at IS NULL`;
    if (category) { params.push(category); where += ` AND category = $${params.length}`; }
    if (project) { params.push(project); where += ` AND (project = $${params.length} OR project IS NULL)`; }
    const sql = `
        SELECT id, learning_id, learning, context, category, project,
               is_anti_pattern, effectiveness_score, applied_count, source_file,
               1 - (embedding <=> $1::vector) AS similarity
        FROM learnings_embeddings
        WHERE ${where}
        ORDER BY embedding <=> $1::vector
        LIMIT $2`;
    const rows = await query(sql, params);
    return rows.filter((r) => r.similarity >= minSimilarity);
}

async function ftsSearch({ text, limit = 10, category }) {
    const terms = text
        .split(/[^A-Za-z0-9]+/)
        .filter((w) => w.length >= 4)
        .slice(0, 10);
    if (terms.length === 0) return [];
    const tsquery = terms.join(' | ');
    const params = [tsquery, limit];
    let where = '';
    if (category) { params.push(category); where = `AND category = $${params.length}`; }
    const sql = `
        SELECT id, learning_id, learning, category, source_file, effectiveness_score,
               is_anti_pattern,
               ts_rank(to_tsvector('english', learning || ' ' || COALESCE(context,'')),
                       to_tsquery('english', $1)) AS rank
        FROM learnings_embeddings
        WHERE to_tsvector('english', learning || ' ' || COALESCE(context,''))
              @@ to_tsquery('english', $1)
          AND (review_status = 'approved' OR review_status IS NULL)
          AND deprecated_at IS NULL
          ${where}
        ORDER BY rank DESC, effectiveness_score DESC
        LIMIT $2`;
    return await query(sql, params);
}

async function getProjectContext({ project, limit = 5 }) {
    const params = [project, limit];
    const primary = await query(
        `SELECT id, learning, category, is_anti_pattern, effectiveness_score, applied_count
         FROM learnings_embeddings
         WHERE (project = $1 OR project_tags ILIKE '%' || $1 || '%')
           AND is_anti_pattern = false
           AND (review_status = 'approved' OR review_status IS NULL)
           AND deprecated_at IS NULL
         ORDER BY effectiveness_score DESC, applied_count DESC
         LIMIT $2`, params);
    const antiPatterns = await query(
        `SELECT id, learning, category, effectiveness_score
         FROM learnings_embeddings
         WHERE (project = $1 OR project_tags ILIKE '%' || $1 || '%')
           AND is_anti_pattern = true
           AND (review_status = 'approved' OR review_status IS NULL)
           AND deprecated_at IS NULL
         ORDER BY effectiveness_score DESC
         LIMIT 5`, [project]);
    return { primary, antiPatterns };
}

async function injectForTask({ category, task, compact = false, project }) {
    const nPrimary = compact ? 3 : 5;
    const nAnti = compact ? 2 : 3;
    let primary = [];
    let anti = [];
    if (task) {
        primary = await semanticSearch({
            text: task, limit: nPrimary, category, project, minSimilarity: 0.3,
        });
        anti = await semanticSearch({
            text: task, limit: nAnti, category, project, minSimilarity: 0.3,
        }).then((rows) => rows.filter((r) => r.is_anti_pattern));
    } else if (category) {
        primary = await query(
            `SELECT id, learning_id, learning, category, effectiveness_score, applied_count, is_anti_pattern
             FROM learnings_embeddings
             WHERE category = $1 AND is_anti_pattern = false
               AND (review_status = 'approved' OR review_status IS NULL)
               AND deprecated_at IS NULL
             ORDER BY effectiveness_score DESC, applied_count DESC LIMIT $2`, [category, nPrimary]);
        anti = await query(
            `SELECT id, learning_id, learning, category, effectiveness_score
             FROM learnings_embeddings
             WHERE category = $1 AND is_anti_pattern = true
               AND (review_status = 'approved' OR review_status IS NULL)
               AND deprecated_at IS NULL
             ORDER BY effectiveness_score DESC LIMIT $2`, [category, nAnti]);
    }
    return { primary, antiPatterns: anti };
}

async function addLearning(args) {
    const {
        learning, context = '', project = '', category = 'GENERAL',
        is_anti_pattern = false, effectiveness_score = 0.7,
        source_file = '', session_id = '', review_status = 'approved',
    } = args;
    if (!learning) throw new Error('learning is required');

    const nextIdRow = await query('SELECT COALESCE(MAX(learning_id), 0) + 1 AS n FROM learnings_embeddings');
    const nextId = nextIdRow[0]?.n ?? 1;

    let vec = null;
    try { vec = await embed(learning); } catch (e) { console.error('embed failed:', e.message); }

    const sql = `
        INSERT INTO learnings_embeddings
          (learning_id, learning, context, category, project, is_anti_pattern,
           effectiveness_score, source_file, session_id, review_status, embedding)
        VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11::vector)
        RETURNING id, learning_id`;
    const params = [
        nextId, learning, context, category, project, is_anti_pattern,
        effectiveness_score, source_file, session_id, review_status,
        vec ? toVector(vec) : null,
    ];
    const rows = await query(sql, params);
    return rows[0];
}

async function markApplied(id) {
    await query(
        `UPDATE learnings_embeddings
         SET applied_count = applied_count + 1,
             last_applied = CURRENT_TIMESTAMP,
             updated_at = CURRENT_TIMESTAMP
         WHERE id = $1`, [id]);
    return { ok: true, id };
}

// ---------------------------------------------------------------------------
// Formatting
// ---------------------------------------------------------------------------

function formatRows(rows, { showScore = true } = {}) {
    if (!rows?.length) return 'No learnings found';
    return rows.map((r) => {
        const marker = r.is_anti_pattern ? '⚠️ ANTI' : '✓';
        const score = showScore && r.similarity != null
            ? ` (${(r.similarity * 100).toFixed(0)}%)`
            : r.effectiveness_score != null ? ` [eff=${Number(r.effectiveness_score).toFixed(2)}]` : '';
        return `  ${marker} [${r.category}] #${r.id ?? r.learning_id}${score}: ${r.learning}`;
    }).join('\n');
}

function formatContext(project, { primary, antiPatterns }) {
    const lines = [`## Project Context: ${project}`, ''];
    if (primary.length) {
        lines.push('### Project Learnings');
        lines.push(formatRows(primary, { showScore: false }));
        lines.push('');
    }
    if (antiPatterns.length) {
        lines.push('### Anti-Patterns');
        lines.push(formatRows(antiPatterns, { showScore: false }));
        lines.push('');
    }
    const ids = [...primary, ...antiPatterns].map((r) => r.id).filter(Boolean);
    if (ids.length) lines.push(`<!-- LEARNING_IDS: ${ids.join(',')} -->`);
    return lines.join('\n');
}

// ---------------------------------------------------------------------------
// MCP server
// ---------------------------------------------------------------------------

const TOOLS = [
    {
        name: 'learning_query',
        description: 'Semantic search over the pgvector learnings DB. Returns learnings most relevant to a natural-language task description.',
        inputSchema: {
            type: 'object',
            properties: {
                query: { type: 'string', description: 'Free-text query / task description' },
                limit: { type: 'integer', default: 5 },
                category: { type: 'string', description: 'Optional category filter (SWIFT, PYTHON, INFRASTRUCTURE, AI, ...)' },
                project: { type: 'string', description: 'Optional project scope (e.g., CF, LLB, WHMCS)' },
                min_similarity: { type: 'number', default: 0.3 },
            },
            required: ['query'],
        },
    },
    {
        name: 'learning_search_fts',
        description: 'PostgreSQL full-text search over learnings (no embeddings needed). Useful when semantic search returns poor matches.',
        inputSchema: {
            type: 'object',
            properties: {
                query: { type: 'string' },
                limit: { type: 'integer', default: 10 },
                category: { type: 'string' },
            },
            required: ['query'],
        },
    },
    {
        name: 'learning_inject',
        description: 'Return top learnings for injection into a task context, scored for relevance. Provide either a task description or a category.',
        inputSchema: {
            type: 'object',
            properties: {
                task: { type: 'string', description: 'Task description (semantic match)' },
                category: { type: 'string', description: 'Category filter' },
                project: { type: 'string' },
                compact: { type: 'boolean', default: false },
            },
        },
    },
    {
        name: 'learning_context',
        description: 'Get all learnings + anti-patterns for a project. Used at session start to surface project-scoped knowledge.',
        inputSchema: {
            type: 'object',
            properties: {
                project: { type: 'string', description: 'Project key (e.g., CF, LLB)' },
                limit: { type: 'integer', default: 5 },
            },
            required: ['project'],
        },
    },
    {
        name: 'learning_add',
        description: 'Insert a new learning into pgvector (auto-embeds via LiteLLM). Use sparingly — prefer learn-from-session for bulk.',
        inputSchema: {
            type: 'object',
            properties: {
                learning: { type: 'string' },
                context: { type: 'string' },
                project: { type: 'string' },
                category: { type: 'string', default: 'GENERAL' },
                is_anti_pattern: { type: 'boolean', default: false },
                effectiveness_score: { type: 'number', default: 0.7 },
                source_file: { type: 'string' },
                session_id: { type: 'string' },
            },
            required: ['learning'],
        },
    },
    {
        name: 'learning_mark_applied',
        description: 'Increment applied_count for a learning id (call when a learning was successfully used).',
        inputSchema: {
            type: 'object',
            properties: { id: { type: 'integer' } },
            required: ['id'],
        },
    },
];

export function createServer() {
    const server = new Server(
        { name: 'learning-mcp', version: '1.0.0' },
        { capabilities: { tools: {} } }
    );

    server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS }));

    server.setRequestHandler(CallToolRequestSchema, async (req) => {
        const name = req.params.name;
        const args = req.params.arguments ?? {};
        try {
            switch (name) {
                case 'learning_query': {
                    const rows = await semanticSearch({
                        text: args.query,
                        limit: args.limit ?? 5,
                        category: args.category,
                        project: args.project,
                        minSimilarity: args.min_similarity ?? 0.3,
                    });
                    return { content: [{ type: 'text', text: rows.length
                        ? formatRows(rows)
                        : 'No learnings match (try lower min_similarity or different phrasing)' }] };
                }
                case 'learning_search_fts': {
                    const rows = await ftsSearch({
                        text: args.query, limit: args.limit ?? 10, category: args.category,
                    });
                    return { content: [{ type: 'text', text: formatRows(rows) }] };
                }
                case 'learning_inject': {
                    const { primary, antiPatterns } = await injectForTask(args);
                    const parts = [];
                    if (primary.length) {
                        parts.push('### Relevant Learnings', formatRows(primary, { showScore: false }));
                    }
                    if (antiPatterns.length) {
                        parts.push('', '### Anti-Patterns', formatRows(antiPatterns, { showScore: false }));
                    }
                    if (!parts.length) parts.push('No learnings found for this task');
                    return { content: [{ type: 'text', text: parts.join('\n') }] };
                }
                case 'learning_context': {
                    const ctx = await getProjectContext({ project: args.project, limit: args.limit ?? 5 });
                    return { content: [{ type: 'text', text: formatContext(args.project, ctx) }] };
                }
                case 'learning_add': {
                    const row = await addLearning(args);
                    return { content: [{ type: 'text', text: `Inserted learning id=${row.id} learning_id=${row.learning_id}` }] };
                }
                case 'learning_mark_applied': {
                    await markApplied(args.id);
                    return { content: [{ type: 'text', text: `Marked applied: id=${args.id}` }] };
                }
                default:
                    return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true };
            }
        } catch (err) {
            Sentry.captureException?.(err);
            console.error(`[learning-mcp] ${name}:`, err);
            return { content: [{ type: 'text', text: `Error: ${err.message}` }], isError: true };
        }
    });

    return server;
}