Files
learning-mcp/src/server.js
2026-04-13 16:00:11 +03:00

359 lines
15 KiB
JavaScript

#!/usr/bin/env node
/**
* Learning MCP Server — pgvector-backed learnings + anti-patterns
*
* Tools:
* learning_query — semantic (vector) search over learnings_embeddings
* learning_search_fts — PostgreSQL full-text search fallback
* learning_inject — get top learnings for task injection (by category/task)
* learning_context — project-scoped learnings (Project + Related + Anti-patterns)
* learning_add — insert a new learning (auto-embeds)
* learning_mark_applied — increment applied_count by id
*
* Replaces direct psql access used by rag-query, learn-seed, learn-inject, learn-context.
*/
import * as Sentry from '@sentry/node';
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
import { query, toVector } from './db.js';
import { embed } from './embeddings.js';
if (process.env.SENTRY_DSN) {
Sentry.init({
dsn: process.env.SENTRY_DSN,
environment: process.env.SENTRY_ENVIRONMENT ?? 'production',
tracesSampleRate: 0.1,
});
}
// ---------------------------------------------------------------------------
// Data access
// ---------------------------------------------------------------------------
async function semanticSearch({ text, limit = 5, category, minSimilarity = 0.3, project }) {
const vec = await embed(text);
const params = [toVector(vec), limit];
let where = `(review_status = 'approved' OR review_status IS NULL)
AND embedding IS NOT NULL
AND deprecated_at IS NULL`;
if (category) { params.push(category); where += ` AND category = $${params.length}`; }
if (project) { params.push(project); where += ` AND (project = $${params.length} OR project IS NULL)`; }
const sql = `
SELECT id, learning_id, learning, context, category, project,
is_anti_pattern, effectiveness_score, applied_count, source_file,
1 - (embedding <=> $1::vector) AS similarity
FROM learnings_embeddings
WHERE ${where}
ORDER BY embedding <=> $1::vector
LIMIT $2`;
const rows = await query(sql, params);
return rows.filter((r) => r.similarity >= minSimilarity);
}
async function ftsSearch({ text, limit = 10, category }) {
const terms = text
.split(/[^A-Za-z0-9]+/)
.filter((w) => w.length >= 4)
.slice(0, 10);
if (terms.length === 0) return [];
const tsquery = terms.join(' | ');
const params = [tsquery, limit];
let where = '';
if (category) { params.push(category); where = `AND category = $${params.length}`; }
const sql = `
SELECT id, learning_id, learning, category, source_file, effectiveness_score,
is_anti_pattern,
ts_rank(to_tsvector('english', learning || ' ' || COALESCE(context,'')),
to_tsquery('english', $1)) AS rank
FROM learnings_embeddings
WHERE to_tsvector('english', learning || ' ' || COALESCE(context,''))
@@ to_tsquery('english', $1)
AND (review_status = 'approved' OR review_status IS NULL)
AND deprecated_at IS NULL
${where}
ORDER BY rank DESC, effectiveness_score DESC
LIMIT $2`;
return await query(sql, params);
}
async function getProjectContext({ project, limit = 5 }) {
const params = [project, limit];
const primary = await query(
`SELECT id, learning, category, is_anti_pattern, effectiveness_score, applied_count
FROM learnings_embeddings
WHERE (project = $1 OR project_tags ILIKE '%' || $1 || '%')
AND is_anti_pattern = false
AND (review_status = 'approved' OR review_status IS NULL)
AND deprecated_at IS NULL
ORDER BY effectiveness_score DESC, applied_count DESC
LIMIT $2`, params);
const antiPatterns = await query(
`SELECT id, learning, category, effectiveness_score
FROM learnings_embeddings
WHERE (project = $1 OR project_tags ILIKE '%' || $1 || '%')
AND is_anti_pattern = true
AND (review_status = 'approved' OR review_status IS NULL)
AND deprecated_at IS NULL
ORDER BY effectiveness_score DESC
LIMIT 5`, [project]);
return { primary, antiPatterns };
}
async function injectForTask({ category, task, compact = false, project }) {
const nPrimary = compact ? 3 : 5;
const nAnti = compact ? 2 : 3;
let primary = [];
let anti = [];
if (task) {
primary = await semanticSearch({
text: task, limit: nPrimary, category, project, minSimilarity: 0.3,
});
anti = await semanticSearch({
text: task, limit: nAnti, category, project, minSimilarity: 0.3,
}).then((rows) => rows.filter((r) => r.is_anti_pattern));
} else if (category) {
primary = await query(
`SELECT id, learning_id, learning, category, effectiveness_score, applied_count, is_anti_pattern
FROM learnings_embeddings
WHERE category = $1 AND is_anti_pattern = false
AND (review_status = 'approved' OR review_status IS NULL)
AND deprecated_at IS NULL
ORDER BY effectiveness_score DESC, applied_count DESC LIMIT $2`, [category, nPrimary]);
anti = await query(
`SELECT id, learning_id, learning, category, effectiveness_score
FROM learnings_embeddings
WHERE category = $1 AND is_anti_pattern = true
AND (review_status = 'approved' OR review_status IS NULL)
AND deprecated_at IS NULL
ORDER BY effectiveness_score DESC LIMIT $2`, [category, nAnti]);
}
return { primary, antiPatterns: anti };
}
async function addLearning(args) {
const {
learning, context = '', project = '', category = 'GENERAL',
is_anti_pattern = false, effectiveness_score = 0.7,
source_file = '', session_id = '', review_status = 'approved',
} = args;
if (!learning) throw new Error('learning is required');
const nextIdRow = await query('SELECT COALESCE(MAX(learning_id), 0) + 1 AS n FROM learnings_embeddings');
const nextId = nextIdRow[0]?.n ?? 1;
let vec = null;
try { vec = await embed(learning); } catch (e) { console.error('embed failed:', e.message); }
const sql = `
INSERT INTO learnings_embeddings
(learning_id, learning, context, category, project, is_anti_pattern,
effectiveness_score, source_file, session_id, review_status, embedding)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11::vector)
RETURNING id, learning_id`;
const params = [
nextId, learning, context, category, project, is_anti_pattern,
effectiveness_score, source_file, session_id, review_status,
vec ? toVector(vec) : null,
];
const rows = await query(sql, params);
return rows[0];
}
async function markApplied(id) {
await query(
`UPDATE learnings_embeddings
SET applied_count = applied_count + 1,
last_applied = CURRENT_TIMESTAMP,
updated_at = CURRENT_TIMESTAMP
WHERE id = $1`, [id]);
return { ok: true, id };
}
// ---------------------------------------------------------------------------
// Formatting
// ---------------------------------------------------------------------------
function formatRows(rows, { showScore = true } = {}) {
if (!rows?.length) return 'No learnings found';
return rows.map((r) => {
const marker = r.is_anti_pattern ? '⚠️ ANTI' : '✓';
const score = showScore && r.similarity != null
? ` (${(r.similarity * 100).toFixed(0)}%)`
: r.effectiveness_score != null ? ` [eff=${Number(r.effectiveness_score).toFixed(2)}]` : '';
return ` ${marker} [${r.category}] #${r.id ?? r.learning_id}${score}: ${r.learning}`;
}).join('\n');
}
function formatContext(project, { primary, antiPatterns }) {
const lines = [`## Project Context: ${project}`, ''];
if (primary.length) {
lines.push('### Project Learnings');
lines.push(formatRows(primary, { showScore: false }));
lines.push('');
}
if (antiPatterns.length) {
lines.push('### Anti-Patterns');
lines.push(formatRows(antiPatterns, { showScore: false }));
lines.push('');
}
const ids = [...primary, ...antiPatterns].map((r) => r.id).filter(Boolean);
if (ids.length) lines.push(`<!-- LEARNING_IDS: ${ids.join(',')} -->`);
return lines.join('\n');
}
// ---------------------------------------------------------------------------
// MCP server
// ---------------------------------------------------------------------------
const TOOLS = [
{
name: 'learning_query',
description: 'Semantic search over the pgvector learnings DB. Returns learnings most relevant to a natural-language task description.',
inputSchema: {
type: 'object',
properties: {
query: { type: 'string', description: 'Free-text query / task description' },
limit: { type: 'integer', default: 5 },
category: { type: 'string', description: 'Optional category filter (SWIFT, PYTHON, INFRASTRUCTURE, AI, ...)' },
project: { type: 'string', description: 'Optional project scope (e.g., CF, LLB, WHMCS)' },
min_similarity: { type: 'number', default: 0.3 },
},
required: ['query'],
},
},
{
name: 'learning_search_fts',
description: 'PostgreSQL full-text search over learnings (no embeddings needed). Useful when semantic search returns poor matches.',
inputSchema: {
type: 'object',
properties: {
query: { type: 'string' },
limit: { type: 'integer', default: 10 },
category: { type: 'string' },
},
required: ['query'],
},
},
{
name: 'learning_inject',
description: 'Return top learnings for injection into a task context, scored for relevance. Provide either a task description or a category.',
inputSchema: {
type: 'object',
properties: {
task: { type: 'string', description: 'Task description (semantic match)' },
category: { type: 'string', description: 'Category filter' },
project: { type: 'string' },
compact: { type: 'boolean', default: false },
},
},
},
{
name: 'learning_context',
description: 'Get all learnings + anti-patterns for a project. Used at session start to surface project-scoped knowledge.',
inputSchema: {
type: 'object',
properties: {
project: { type: 'string', description: 'Project key (e.g., CF, LLB)' },
limit: { type: 'integer', default: 5 },
},
required: ['project'],
},
},
{
name: 'learning_add',
description: 'Insert a new learning into pgvector (auto-embeds via LiteLLM). Use sparingly — prefer learn-from-session for bulk.',
inputSchema: {
type: 'object',
properties: {
learning: { type: 'string' },
context: { type: 'string' },
project: { type: 'string' },
category: { type: 'string', default: 'GENERAL' },
is_anti_pattern: { type: 'boolean', default: false },
effectiveness_score: { type: 'number', default: 0.7 },
source_file: { type: 'string' },
session_id: { type: 'string' },
},
required: ['learning'],
},
},
{
name: 'learning_mark_applied',
description: 'Increment applied_count for a learning id (call when a learning was successfully used).',
inputSchema: {
type: 'object',
properties: { id: { type: 'integer' } },
required: ['id'],
},
},
];
export function createServer() {
const server = new Server(
{ name: 'learning-mcp', version: '1.0.0' },
{ capabilities: { tools: {} } }
);
server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS }));
server.setRequestHandler(CallToolRequestSchema, async (req) => {
const name = req.params.name;
const args = req.params.arguments ?? {};
try {
switch (name) {
case 'learning_query': {
const rows = await semanticSearch({
text: args.query,
limit: args.limit ?? 5,
category: args.category,
project: args.project,
minSimilarity: args.min_similarity ?? 0.3,
});
return { content: [{ type: 'text', text: rows.length
? formatRows(rows)
: 'No learnings match (try lower min_similarity or different phrasing)' }] };
}
case 'learning_search_fts': {
const rows = await ftsSearch({
text: args.query, limit: args.limit ?? 10, category: args.category,
});
return { content: [{ type: 'text', text: formatRows(rows) }] };
}
case 'learning_inject': {
const { primary, antiPatterns } = await injectForTask(args);
const parts = [];
if (primary.length) {
parts.push('### Relevant Learnings', formatRows(primary, { showScore: false }));
}
if (antiPatterns.length) {
parts.push('', '### Anti-Patterns', formatRows(antiPatterns, { showScore: false }));
}
if (!parts.length) parts.push('No learnings found for this task');
return { content: [{ type: 'text', text: parts.join('\n') }] };
}
case 'learning_context': {
const ctx = await getProjectContext({ project: args.project, limit: args.limit ?? 5 });
return { content: [{ type: 'text', text: formatContext(args.project, ctx) }] };
}
case 'learning_add': {
const row = await addLearning(args);
return { content: [{ type: 'text', text: `Inserted learning id=${row.id} learning_id=${row.learning_id}` }] };
}
case 'learning_mark_applied': {
await markApplied(args.id);
return { content: [{ type: 'text', text: `Marked applied: id=${args.id}` }] };
}
default:
return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true };
}
} catch (err) {
Sentry.captureException?.(err);
console.error(`[learning-mcp] ${name}:`, err);
return { content: [{ type: 'text', text: `Error: ${err.message}` }], isError: true };
}
});
return server;
}