359 lines
15 KiB
JavaScript
359 lines
15 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Learning MCP Server — pgvector-backed learnings + anti-patterns
|
|
*
|
|
* Tools:
|
|
* learning_query — semantic (vector) search over learnings_embeddings
|
|
* learning_search_fts — PostgreSQL full-text search fallback
|
|
* learning_inject — get top learnings for task injection (by category/task)
|
|
* learning_context — project-scoped learnings (Project + Related + Anti-patterns)
|
|
* learning_add — insert a new learning (auto-embeds)
|
|
* learning_mark_applied — increment applied_count by id
|
|
*
|
|
* Replaces direct psql access used by rag-query, learn-seed, learn-inject, learn-context.
|
|
*/
|
|
import * as Sentry from '@sentry/node';
|
|
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
|
|
import { query, toVector } from './db.js';
|
|
import { embed } from './embeddings.js';
|
|
|
|
if (process.env.SENTRY_DSN) {
|
|
Sentry.init({
|
|
dsn: process.env.SENTRY_DSN,
|
|
environment: process.env.SENTRY_ENVIRONMENT ?? 'production',
|
|
tracesSampleRate: 0.1,
|
|
});
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Data access
|
|
// ---------------------------------------------------------------------------
|
|
|
|
async function semanticSearch({ text, limit = 5, category, minSimilarity = 0.3, project }) {
|
|
const vec = await embed(text);
|
|
const params = [toVector(vec), limit];
|
|
let where = `(review_status = 'approved' OR review_status IS NULL)
|
|
AND embedding IS NOT NULL
|
|
AND deprecated_at IS NULL`;
|
|
if (category) { params.push(category); where += ` AND category = $${params.length}`; }
|
|
if (project) { params.push(project); where += ` AND (project = $${params.length} OR project IS NULL)`; }
|
|
const sql = `
|
|
SELECT id, learning_id, learning, context, category, project,
|
|
is_anti_pattern, effectiveness_score, applied_count, source_file,
|
|
1 - (embedding <=> $1::vector) AS similarity
|
|
FROM learnings_embeddings
|
|
WHERE ${where}
|
|
ORDER BY embedding <=> $1::vector
|
|
LIMIT $2`;
|
|
const rows = await query(sql, params);
|
|
return rows.filter((r) => r.similarity >= minSimilarity);
|
|
}
|
|
|
|
async function ftsSearch({ text, limit = 10, category }) {
|
|
const terms = text
|
|
.split(/[^A-Za-z0-9]+/)
|
|
.filter((w) => w.length >= 4)
|
|
.slice(0, 10);
|
|
if (terms.length === 0) return [];
|
|
const tsquery = terms.join(' | ');
|
|
const params = [tsquery, limit];
|
|
let where = '';
|
|
if (category) { params.push(category); where = `AND category = $${params.length}`; }
|
|
const sql = `
|
|
SELECT id, learning_id, learning, category, source_file, effectiveness_score,
|
|
is_anti_pattern,
|
|
ts_rank(to_tsvector('english', learning || ' ' || COALESCE(context,'')),
|
|
to_tsquery('english', $1)) AS rank
|
|
FROM learnings_embeddings
|
|
WHERE to_tsvector('english', learning || ' ' || COALESCE(context,''))
|
|
@@ to_tsquery('english', $1)
|
|
AND (review_status = 'approved' OR review_status IS NULL)
|
|
AND deprecated_at IS NULL
|
|
${where}
|
|
ORDER BY rank DESC, effectiveness_score DESC
|
|
LIMIT $2`;
|
|
return await query(sql, params);
|
|
}
|
|
|
|
async function getProjectContext({ project, limit = 5 }) {
|
|
const params = [project, limit];
|
|
const primary = await query(
|
|
`SELECT id, learning, category, is_anti_pattern, effectiveness_score, applied_count
|
|
FROM learnings_embeddings
|
|
WHERE (project = $1 OR project_tags ILIKE '%' || $1 || '%')
|
|
AND is_anti_pattern = false
|
|
AND (review_status = 'approved' OR review_status IS NULL)
|
|
AND deprecated_at IS NULL
|
|
ORDER BY effectiveness_score DESC, applied_count DESC
|
|
LIMIT $2`, params);
|
|
const antiPatterns = await query(
|
|
`SELECT id, learning, category, effectiveness_score
|
|
FROM learnings_embeddings
|
|
WHERE (project = $1 OR project_tags ILIKE '%' || $1 || '%')
|
|
AND is_anti_pattern = true
|
|
AND (review_status = 'approved' OR review_status IS NULL)
|
|
AND deprecated_at IS NULL
|
|
ORDER BY effectiveness_score DESC
|
|
LIMIT 5`, [project]);
|
|
return { primary, antiPatterns };
|
|
}
|
|
|
|
async function injectForTask({ category, task, compact = false, project }) {
|
|
const nPrimary = compact ? 3 : 5;
|
|
const nAnti = compact ? 2 : 3;
|
|
let primary = [];
|
|
let anti = [];
|
|
if (task) {
|
|
primary = await semanticSearch({
|
|
text: task, limit: nPrimary, category, project, minSimilarity: 0.3,
|
|
});
|
|
anti = await semanticSearch({
|
|
text: task, limit: nAnti, category, project, minSimilarity: 0.3,
|
|
}).then((rows) => rows.filter((r) => r.is_anti_pattern));
|
|
} else if (category) {
|
|
primary = await query(
|
|
`SELECT id, learning_id, learning, category, effectiveness_score, applied_count, is_anti_pattern
|
|
FROM learnings_embeddings
|
|
WHERE category = $1 AND is_anti_pattern = false
|
|
AND (review_status = 'approved' OR review_status IS NULL)
|
|
AND deprecated_at IS NULL
|
|
ORDER BY effectiveness_score DESC, applied_count DESC LIMIT $2`, [category, nPrimary]);
|
|
anti = await query(
|
|
`SELECT id, learning_id, learning, category, effectiveness_score
|
|
FROM learnings_embeddings
|
|
WHERE category = $1 AND is_anti_pattern = true
|
|
AND (review_status = 'approved' OR review_status IS NULL)
|
|
AND deprecated_at IS NULL
|
|
ORDER BY effectiveness_score DESC LIMIT $2`, [category, nAnti]);
|
|
}
|
|
return { primary, antiPatterns: anti };
|
|
}
|
|
|
|
async function addLearning(args) {
|
|
const {
|
|
learning, context = '', project = '', category = 'GENERAL',
|
|
is_anti_pattern = false, effectiveness_score = 0.7,
|
|
source_file = '', session_id = '', review_status = 'approved',
|
|
} = args;
|
|
if (!learning) throw new Error('learning is required');
|
|
|
|
const nextIdRow = await query('SELECT COALESCE(MAX(learning_id), 0) + 1 AS n FROM learnings_embeddings');
|
|
const nextId = nextIdRow[0]?.n ?? 1;
|
|
|
|
let vec = null;
|
|
try { vec = await embed(learning); } catch (e) { console.error('embed failed:', e.message); }
|
|
|
|
const sql = `
|
|
INSERT INTO learnings_embeddings
|
|
(learning_id, learning, context, category, project, is_anti_pattern,
|
|
effectiveness_score, source_file, session_id, review_status, embedding)
|
|
VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11::vector)
|
|
RETURNING id, learning_id`;
|
|
const params = [
|
|
nextId, learning, context, category, project, is_anti_pattern,
|
|
effectiveness_score, source_file, session_id, review_status,
|
|
vec ? toVector(vec) : null,
|
|
];
|
|
const rows = await query(sql, params);
|
|
return rows[0];
|
|
}
|
|
|
|
async function markApplied(id) {
|
|
await query(
|
|
`UPDATE learnings_embeddings
|
|
SET applied_count = applied_count + 1,
|
|
last_applied = CURRENT_TIMESTAMP,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE id = $1`, [id]);
|
|
return { ok: true, id };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Formatting
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function formatRows(rows, { showScore = true } = {}) {
|
|
if (!rows?.length) return 'No learnings found';
|
|
return rows.map((r) => {
|
|
const marker = r.is_anti_pattern ? '⚠️ ANTI' : '✓';
|
|
const score = showScore && r.similarity != null
|
|
? ` (${(r.similarity * 100).toFixed(0)}%)`
|
|
: r.effectiveness_score != null ? ` [eff=${Number(r.effectiveness_score).toFixed(2)}]` : '';
|
|
return ` ${marker} [${r.category}] #${r.id ?? r.learning_id}${score}: ${r.learning}`;
|
|
}).join('\n');
|
|
}
|
|
|
|
function formatContext(project, { primary, antiPatterns }) {
|
|
const lines = [`## Project Context: ${project}`, ''];
|
|
if (primary.length) {
|
|
lines.push('### Project Learnings');
|
|
lines.push(formatRows(primary, { showScore: false }));
|
|
lines.push('');
|
|
}
|
|
if (antiPatterns.length) {
|
|
lines.push('### Anti-Patterns');
|
|
lines.push(formatRows(antiPatterns, { showScore: false }));
|
|
lines.push('');
|
|
}
|
|
const ids = [...primary, ...antiPatterns].map((r) => r.id).filter(Boolean);
|
|
if (ids.length) lines.push(`<!-- LEARNING_IDS: ${ids.join(',')} -->`);
|
|
return lines.join('\n');
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// MCP server
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const TOOLS = [
|
|
{
|
|
name: 'learning_query',
|
|
description: 'Semantic search over the pgvector learnings DB. Returns learnings most relevant to a natural-language task description.',
|
|
inputSchema: {
|
|
type: 'object',
|
|
properties: {
|
|
query: { type: 'string', description: 'Free-text query / task description' },
|
|
limit: { type: 'integer', default: 5 },
|
|
category: { type: 'string', description: 'Optional category filter (SWIFT, PYTHON, INFRASTRUCTURE, AI, ...)' },
|
|
project: { type: 'string', description: 'Optional project scope (e.g., CF, LLB, WHMCS)' },
|
|
min_similarity: { type: 'number', default: 0.3 },
|
|
},
|
|
required: ['query'],
|
|
},
|
|
},
|
|
{
|
|
name: 'learning_search_fts',
|
|
description: 'PostgreSQL full-text search over learnings (no embeddings needed). Useful when semantic search returns poor matches.',
|
|
inputSchema: {
|
|
type: 'object',
|
|
properties: {
|
|
query: { type: 'string' },
|
|
limit: { type: 'integer', default: 10 },
|
|
category: { type: 'string' },
|
|
},
|
|
required: ['query'],
|
|
},
|
|
},
|
|
{
|
|
name: 'learning_inject',
|
|
description: 'Return top learnings for injection into a task context, scored for relevance. Provide either a task description or a category.',
|
|
inputSchema: {
|
|
type: 'object',
|
|
properties: {
|
|
task: { type: 'string', description: 'Task description (semantic match)' },
|
|
category: { type: 'string', description: 'Category filter' },
|
|
project: { type: 'string' },
|
|
compact: { type: 'boolean', default: false },
|
|
},
|
|
},
|
|
},
|
|
{
|
|
name: 'learning_context',
|
|
description: 'Get all learnings + anti-patterns for a project. Used at session start to surface project-scoped knowledge.',
|
|
inputSchema: {
|
|
type: 'object',
|
|
properties: {
|
|
project: { type: 'string', description: 'Project key (e.g., CF, LLB)' },
|
|
limit: { type: 'integer', default: 5 },
|
|
},
|
|
required: ['project'],
|
|
},
|
|
},
|
|
{
|
|
name: 'learning_add',
|
|
description: 'Insert a new learning into pgvector (auto-embeds via LiteLLM). Use sparingly — prefer learn-from-session for bulk.',
|
|
inputSchema: {
|
|
type: 'object',
|
|
properties: {
|
|
learning: { type: 'string' },
|
|
context: { type: 'string' },
|
|
project: { type: 'string' },
|
|
category: { type: 'string', default: 'GENERAL' },
|
|
is_anti_pattern: { type: 'boolean', default: false },
|
|
effectiveness_score: { type: 'number', default: 0.7 },
|
|
source_file: { type: 'string' },
|
|
session_id: { type: 'string' },
|
|
},
|
|
required: ['learning'],
|
|
},
|
|
},
|
|
{
|
|
name: 'learning_mark_applied',
|
|
description: 'Increment applied_count for a learning id (call when a learning was successfully used).',
|
|
inputSchema: {
|
|
type: 'object',
|
|
properties: { id: { type: 'integer' } },
|
|
required: ['id'],
|
|
},
|
|
},
|
|
];
|
|
|
|
export function createServer() {
|
|
const server = new Server(
|
|
{ name: 'learning-mcp', version: '1.0.0' },
|
|
{ capabilities: { tools: {} } }
|
|
);
|
|
|
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: TOOLS }));
|
|
|
|
server.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
const name = req.params.name;
|
|
const args = req.params.arguments ?? {};
|
|
try {
|
|
switch (name) {
|
|
case 'learning_query': {
|
|
const rows = await semanticSearch({
|
|
text: args.query,
|
|
limit: args.limit ?? 5,
|
|
category: args.category,
|
|
project: args.project,
|
|
minSimilarity: args.min_similarity ?? 0.3,
|
|
});
|
|
return { content: [{ type: 'text', text: rows.length
|
|
? formatRows(rows)
|
|
: 'No learnings match (try lower min_similarity or different phrasing)' }] };
|
|
}
|
|
case 'learning_search_fts': {
|
|
const rows = await ftsSearch({
|
|
text: args.query, limit: args.limit ?? 10, category: args.category,
|
|
});
|
|
return { content: [{ type: 'text', text: formatRows(rows) }] };
|
|
}
|
|
case 'learning_inject': {
|
|
const { primary, antiPatterns } = await injectForTask(args);
|
|
const parts = [];
|
|
if (primary.length) {
|
|
parts.push('### Relevant Learnings', formatRows(primary, { showScore: false }));
|
|
}
|
|
if (antiPatterns.length) {
|
|
parts.push('', '### Anti-Patterns', formatRows(antiPatterns, { showScore: false }));
|
|
}
|
|
if (!parts.length) parts.push('No learnings found for this task');
|
|
return { content: [{ type: 'text', text: parts.join('\n') }] };
|
|
}
|
|
case 'learning_context': {
|
|
const ctx = await getProjectContext({ project: args.project, limit: args.limit ?? 5 });
|
|
return { content: [{ type: 'text', text: formatContext(args.project, ctx) }] };
|
|
}
|
|
case 'learning_add': {
|
|
const row = await addLearning(args);
|
|
return { content: [{ type: 'text', text: `Inserted learning id=${row.id} learning_id=${row.learning_id}` }] };
|
|
}
|
|
case 'learning_mark_applied': {
|
|
await markApplied(args.id);
|
|
return { content: [{ type: 'text', text: `Marked applied: id=${args.id}` }] };
|
|
}
|
|
default:
|
|
return { content: [{ type: 'text', text: `Unknown tool: ${name}` }], isError: true };
|
|
}
|
|
} catch (err) {
|
|
Sentry.captureException?.(err);
|
|
console.error(`[learning-mcp] ${name}:`, err);
|
|
return { content: [{ type: 'text', text: `Error: ${err.message}` }], isError: true };
|
|
}
|
|
});
|
|
|
|
return server;
|
|
}
|