diff --git a/.env.example b/.env.example index 7f22924..79d1ffd 100644 --- a/.env.example +++ b/.env.example @@ -8,6 +8,9 @@ POSTGRES_PORT=6432 LLM_API_URL=https://api.agiliton.cloud/llm LLM_API_KEY=your_llm_api_key_here +# LLM metadata extraction at embedding time (CF-1316) +METADATA_EXTRACTION_MODEL=claude-haiku-4-5-20251001 + # Cross-encoder re-ranking (CF-1317) RERANK_ENABLED=false RERANK_MODEL=rerank-v3.5 diff --git a/migrations/035_extracted_metadata.sql b/migrations/035_extracted_metadata.sql new file mode 100644 index 0000000..35c3d32 --- /dev/null +++ b/migrations/035_extracted_metadata.sql @@ -0,0 +1,7 @@ +-- CF-1316: Add LLM-extracted metadata JSONB column for filtered retrieval +-- Schema: { topics: string[], decisions: string[], blockers: string[], tools_used: string[], projects: string[], issue_keys: string[] } + +ALTER TABLE sessions ADD COLUMN IF NOT EXISTS extracted_metadata JSONB; + +-- GIN index for fast JSONB containment queries (@>) +CREATE INDEX IF NOT EXISTS idx_sessions_extracted_metadata ON sessions USING GIN(extracted_metadata); diff --git a/src/embeddings.ts b/src/embeddings.ts index baa6790..296d714 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -120,6 +120,92 @@ export async function rerank( } } +/** + * Extracted metadata schema (CF-1316) + */ +export interface ExtractedMetadata { + topics: string[]; + decisions: string[]; + blockers: string[]; + tools_used: string[]; + projects: string[]; + issue_keys: string[]; +} + +/** + * Extract structured metadata from session content using a fast LLM (CF-1316) + * Uses first 8,000 chars of content for cost optimization. + * Returns null on failure (non-blocking — don't break embedding pipeline). + */ +export async function extractMetadata(content: string): Promise { + const LLM_API_URL = process.env.LLM_API_URL || 'https://api.agiliton.cloud/llm'; + const LLM_API_KEY = process.env.LLM_API_KEY || ''; + const model = process.env.METADATA_EXTRACTION_MODEL || 'claude-haiku-4-5-20251001'; + + if (!LLM_API_KEY) return null; + + // Truncate to first 8K chars (cost optimization from Agentic RAG Module 4) + const truncated = content.slice(0, 8000); + + const systemPrompt = `Extract structured metadata from this session content. Return a JSON object with these fields: +- topics: Key technical topics discussed (e.g., "pgvector", "deployment", "authentication"). Max 10. +- decisions: Architecture or design decisions made (e.g., "Use RRF for hybrid search"). Max 5. +- blockers: Issues or blockers encountered (e.g., "Firecrawl connection refused"). Max 5. +- tools_used: Tools or commands used (e.g., "agiliton-deploy", "jira_create_issue"). Max 10. +- projects: Project keys mentioned (e.g., "CF", "BAB", "WF"). Max 5. +- issue_keys: Jira issue keys mentioned (e.g., "CF-1307", "BAB-42"). Max 10. + +Return ONLY valid JSON. If a field has no matches, use an empty array [].`; + + try { + const response = await fetch(`${LLM_API_URL}/v1/chat/completions`, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${LLM_API_KEY}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model, + messages: [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: truncated }, + ], + max_tokens: 1024, + temperature: 0, + }), + }); + + if (!response.ok) { + console.error('Metadata extraction API error:', response.status, await response.text()); + return null; + } + + const data = await response.json() as { + choices: Array<{ message: { content: string } }>; + }; + + const raw = data.choices?.[0]?.message?.content; + if (!raw) return null; + + // Parse JSON from response (handle markdown code blocks) + const jsonStr = raw.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim(); + const parsed = JSON.parse(jsonStr); + + // Validate and normalize + return { + topics: Array.isArray(parsed.topics) ? parsed.topics.slice(0, 10) : [], + decisions: Array.isArray(parsed.decisions) ? parsed.decisions.slice(0, 5) : [], + blockers: Array.isArray(parsed.blockers) ? parsed.blockers.slice(0, 5) : [], + tools_used: Array.isArray(parsed.tools_used) ? parsed.tools_used.slice(0, 10) : [], + projects: Array.isArray(parsed.projects) ? parsed.projects.slice(0, 5) : [], + issue_keys: Array.isArray(parsed.issue_keys) ? parsed.issue_keys.slice(0, 10) : [], + }; + } catch (error) { + console.error('Metadata extraction failed:', error); + return null; + } +} + /** * Reciprocal Rank Fusion — merge two ranked result lists (CF-1315) * @param vectorResults IDs ranked by vector similarity (best first) diff --git a/src/index.ts b/src/index.ts index 880895f..2316be6 100644 --- a/src/index.ts +++ b/src/index.ts @@ -425,6 +425,9 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { project: a.project, limit: a.limit, search_mode: a.search_mode, + filter_topics: a.filter_topics, + filter_projects: a.filter_projects, + filter_issue_keys: a.filter_issue_keys, }), null, 2 diff --git a/src/tools/index.ts b/src/tools/index.ts index 9a11db9..43c5a0e 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -615,7 +615,7 @@ export const toolDefinitions = [ }, { name: 'session_semantic_search', - description: 'Search across all session documentation using hybrid (vector + keyword), vector-only, or keyword-only search.', + description: 'Search across all session documentation using hybrid (vector + keyword), vector-only, or keyword-only search. Supports optional metadata filters (topics, projects, issue_keys) — only use filters when the user explicitly mentions a topic/project. When unsure, search without filters.', inputSchema: { type: 'object', properties: { @@ -623,6 +623,9 @@ export const toolDefinitions = [ project: { type: 'string', description: 'Filter by project (optional)' }, limit: { type: 'number', description: 'Max results (default: 10)' }, search_mode: { type: 'string', enum: ['hybrid', 'vector', 'keyword'], description: 'Search mode (default: hybrid)' }, + filter_topics: { type: 'array', items: { type: 'string' }, description: 'Filter by extracted topics (e.g., ["pgvector", "deployment"]). Only use when user explicitly mentions topics.' }, + filter_projects: { type: 'array', items: { type: 'string' }, description: 'Filter by extracted project keys (e.g., ["CF", "BAB"]). Only use when user explicitly mentions projects.' }, + filter_issue_keys: { type: 'array', items: { type: 'string' }, description: 'Filter by extracted Jira issue keys (e.g., ["CF-1307"]). Only use when user explicitly mentions issue keys.' }, }, required: ['query'], }, diff --git a/src/tools/session-docs.ts b/src/tools/session-docs.ts index e8faa70..80eae77 100644 --- a/src/tools/session-docs.ts +++ b/src/tools/session-docs.ts @@ -458,6 +458,9 @@ interface SessionSemanticSearchArgs { project?: string; limit?: number; search_mode?: SearchMode; + filter_topics?: string[]; + filter_projects?: string[]; + filter_issue_keys?: string[]; } interface SessionSearchResult { @@ -473,9 +476,9 @@ interface SessionSearchResult { * Semantic search across all session documentation with hybrid/vector/keyword modes (CF-1315) */ export async function sessionSemanticSearch(args: SessionSemanticSearchArgs): Promise { - const { query: searchQuery, project, limit = 10, search_mode = 'hybrid' } = args; + const { query: searchQuery, project, limit = 10, search_mode = 'hybrid', filter_topics, filter_projects, filter_issue_keys } = args; - // Build shared filter clause + // Build shared filter clause (CF-1316: metadata filters via JSONB @> containment) const buildFilter = (startIdx: number) => { let where = ''; const params: unknown[] = []; @@ -484,6 +487,18 @@ export async function sessionSemanticSearch(args: SessionSemanticSearchArgs): Pr where += ` AND s.project = $${idx++}`; params.push(project); } + if (filter_topics && filter_topics.length > 0) { + where += ` AND s.extracted_metadata->'topics' @> $${idx++}::jsonb`; + params.push(JSON.stringify(filter_topics)); + } + if (filter_projects && filter_projects.length > 0) { + where += ` AND s.extracted_metadata->'projects' @> $${idx++}::jsonb`; + params.push(JSON.stringify(filter_projects)); + } + if (filter_issue_keys && filter_issue_keys.length > 0) { + where += ` AND s.extracted_metadata->'issue_keys' @> $${idx++}::jsonb`; + params.push(JSON.stringify(filter_issue_keys)); + } return { where, params, nextIdx: idx }; }; diff --git a/src/tools/sessions.ts b/src/tools/sessions.ts index fc29dfb..870fc74 100644 --- a/src/tools/sessions.ts +++ b/src/tools/sessions.ts @@ -2,7 +2,7 @@ // Sessions auto-create CF Jira issues and post output on close (CF-762) import { query, queryOne, execute } from '../db.js'; -import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank } from '../embeddings.js'; +import { getEmbedding, formatEmbedding, generateContentHash, rrfMerge, rerank, extractMetadata } from '../embeddings.js'; import { createSessionIssue, addComment, transitionToDone, updateIssueDescription } from '../services/jira.js'; interface SessionStartArgs { @@ -163,9 +163,13 @@ export async function sessionEnd(args: SessionEndArgs): Promise { // CF-1314: Store content hash alongside embedding const contentHash = generateContentHash(summary); - // Generate embedding for semantic search - const embedding = await getEmbedding(summary); + // Generate embedding + extract metadata in parallel (CF-1316) + const [embedding, metadata] = await Promise.all([ + getEmbedding(summary), + extractMetadata(summary), + ]); const embeddingValue = embedding ? formatEmbedding(embedding) : null; + const metadataValue = metadata ? JSON.stringify(metadata) : null; await execute( `UPDATE sessions @@ -174,9 +178,10 @@ export async function sessionEnd(args: SessionEndArgs): Promise { embedding = $2, status = $3, content_hash = $4, + extracted_metadata = $5::jsonb, updated_at = NOW() - WHERE id = $5`, - [summary, embeddingValue, status, contentHash, session_id] + WHERE id = $6`, + [summary, embeddingValue, status, contentHash, metadataValue, session_id] ); // Get session details