From a868dd40ec85977e38e62855ac947f5adbdf1ab7 Mon Sep 17 00:00:00 2001 From: Christian Gick Date: Mon, 19 Jan 2026 10:21:47 +0200 Subject: [PATCH] Add semantic search and analytics tools for sessions Phase 7 complete: Advanced session analysis capabilities New MCP Tools (3): 1. session_semantic_search - Vector similarity search across all sessions 2. session_productivity_analytics - Metrics (avg duration, tasks, commits, tokens) 3. session_pattern_detection - Detect patterns (tool usage, task types) Features: - Semantic search with embedding-based similarity - Fallback to ILIKE text search if embeddings unavailable - Analytics over configurable time periods (week/month/quarter) - Pattern detection with frequency analysis Use Cases: - Find similar past work: "sessions about WhatsApp integration" - Track productivity: avg commits/tasks per session - Identify tool usage patterns: which tools used most often - Analyze task type distribution All 7 phases complete! System ready for testing. Related: CF-257 Co-Authored-By: Claude Sonnet 4.5 --- src/index.ts | 34 ++++++ src/tools/index.ts | 35 +++++++ src/tools/session-docs.ts | 210 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 279 insertions(+) diff --git a/src/index.ts b/src/index.ts index 0c2a7ef..349d77e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -59,6 +59,9 @@ import { projectDocGet, projectDocList, sessionDocumentationGenerate, + sessionSemanticSearch, + sessionProductivityAnalytics, + sessionPatternDetection, } from './tools/session-docs.js'; // Create MCP server @@ -562,6 +565,37 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { session_id: a.session_id, }); break; + case 'session_semantic_search': + result = JSON.stringify( + await sessionSemanticSearch({ + query: a.query, + project: a.project, + limit: a.limit, + }), + null, + 2 + ); + break; + case 'session_productivity_analytics': + result = JSON.stringify( + await sessionProductivityAnalytics({ + project: a.project, + time_period: a.time_period, + }), + null, + 2 + ); + break; + case 'session_pattern_detection': + result = JSON.stringify( + await sessionPatternDetection({ + project: a.project, + pattern_type: a.pattern_type, + }), + null, + 2 + ); + break; default: throw new Error(`Unknown tool: ${name}`); diff --git a/src/tools/index.ts b/src/tools/index.ts index 6407e67..9909d6b 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -939,4 +939,39 @@ export const toolDefinitions = [ required: ['session_id'], }, }, + { + name: 'session_semantic_search', + description: 'Semantic search across all session documentation using vector similarity', + inputSchema: { + type: 'object', + properties: { + query: { type: 'string', description: 'Search query' }, + project: { type: 'string', description: 'Filter by project (optional)' }, + limit: { type: 'number', description: 'Max results (default: 10)' }, + }, + required: ['query'], + }, + }, + { + name: 'session_productivity_analytics', + description: 'Get productivity metrics (avg duration, tasks/commits per session, etc.)', + inputSchema: { + type: 'object', + properties: { + project: { type: 'string', description: 'Filter by project (optional)' }, + time_period: { type: 'string', enum: ['week', 'month', 'quarter'], description: 'Time period (default: month)' }, + }, + }, + }, + { + name: 'session_pattern_detection', + description: 'Detect patterns across sessions (tool usage, task types)', + inputSchema: { + type: 'object', + properties: { + project: { type: 'string', description: 'Filter by project (optional)' }, + pattern_type: { type: 'string', enum: ['tool_usage', 'task_types', 'error_frequency'], description: 'Type of pattern to detect (default: tool_usage)' }, + }, + }, + }, ]; diff --git a/src/tools/session-docs.ts b/src/tools/session-docs.ts index c296dfb..57f1817 100644 --- a/src/tools/session-docs.ts +++ b/src/tools/session-docs.ts @@ -422,3 +422,213 @@ export async function sessionDocumentationGenerate(args: SessionDocumentationGen return `Documentation generated for session ${session_id} (${doc.length} characters)`; } + +// ============================================================================ +// SEMANTIC SEARCH & ANALYTICS +// ============================================================================ + +interface SessionSemanticSearchArgs { + query: string; + project?: string; + limit?: number; +} + +interface SessionSearchResult { + session_id: string; + session_number: number; + project: string; + summary: string | null; + started_at: string; + similarity: number; +} + +/** + * Semantic search across all session documentation + * Uses vector similarity to find related sessions + */ +export async function sessionSemanticSearch(args: SessionSemanticSearchArgs): Promise { + const { query: searchQuery, project, limit = 10 } = args; + + // Generate embedding for search query + const queryEmbedding = await getEmbedding(searchQuery); + + if (!queryEmbedding) { + // Fallback to text search if embedding generation fails + let sql = ` + SELECT + s.id as session_id, + s.session_number, + s.project, + s.summary, + s.started_at, + 0.5 as similarity + FROM sessions s + WHERE s.summary IS NOT NULL + AND s.status = 'completed' + ${project ? 'AND s.project = $1' : ''} + AND s.summary ILIKE $${project ? '2' : '1'} + ORDER BY s.started_at DESC + LIMIT $${project ? '3' : '2'} + `; + + const params: unknown[] = project ? [project, `%${searchQuery}%`, limit] : [`%${searchQuery}%`, limit]; + const results = await query(sql, params); + return results; + } + + const embeddingFormatted = formatEmbedding(queryEmbedding); + + // Vector similarity search + let sql = ` + SELECT + s.id as session_id, + s.session_number, + s.project, + s.summary, + s.started_at, + 1 - (s.embedding <=> $1) as similarity + FROM sessions s + WHERE s.embedding IS NOT NULL + ${project ? 'AND s.project = $2' : ''} + AND s.status = 'completed' + ORDER BY s.embedding <=> $1 + LIMIT $${project ? '3' : '2'} + `; + + const params: unknown[] = project ? [embeddingFormatted, project, limit] : [embeddingFormatted, limit]; + const results = await query(sql, params); + + return results; +} + +interface SessionAnalyticsArgs { + project?: string; + time_period?: 'week' | 'month' | 'quarter'; +} + +interface SessionAnalytics { + total_sessions: number; + avg_duration_minutes: number; + avg_tasks_per_session: number; + avg_commits_per_session: number; + avg_notes_per_session: number; + total_tokens_used: number; +} + +/** + * Get productivity analytics for sessions + */ +export async function sessionProductivityAnalytics(args: SessionAnalyticsArgs): Promise { + const { project, time_period = 'month' } = args; + + // Map time period to interval + const intervalMap = { + week: '7 days', + month: '30 days', + quarter: '90 days', + }; + + const interval = intervalMap[time_period]; + + const sql = ` + WITH session_stats AS ( + SELECT + s.id, + s.duration_minutes, + s.token_count, + COUNT(DISTINCT ta.task_id) as tasks_touched, + COUNT(DISTINCT sc.commit_sha) as commits_made, + COUNT(sn.id) as notes_created + FROM sessions s + LEFT JOIN task_activity ta ON s.id = ta.session_id + LEFT JOIN session_commits sc ON s.id = sc.session_id + LEFT JOIN session_notes sn ON s.id = sn.session_id + WHERE s.status = 'completed' + AND s.started_at >= NOW() - INTERVAL '${interval}' + ${project ? 'AND s.project = $1' : ''} + GROUP BY s.id, s.duration_minutes, s.token_count + ) + SELECT + COUNT(*)::int as total_sessions, + COALESCE(AVG(duration_minutes), 0)::int as avg_duration_minutes, + COALESCE(AVG(tasks_touched), 0)::numeric(10,1) as avg_tasks_per_session, + COALESCE(AVG(commits_made), 0)::numeric(10,1) as avg_commits_per_session, + COALESCE(AVG(notes_created), 0)::numeric(10,1) as avg_notes_per_session, + COALESCE(SUM(token_count), 0)::bigint as total_tokens_used + FROM session_stats; + `; + + const result = await queryOne(sql, project ? [project] : []); + + return result || { + total_sessions: 0, + avg_duration_minutes: 0, + avg_tasks_per_session: 0, + avg_commits_per_session: 0, + avg_notes_per_session: 0, + total_tokens_used: 0, + }; +} + +interface SessionPatternArgs { + project?: string; + pattern_type?: 'tool_usage' | 'task_types' | 'error_frequency'; +} + +interface Pattern { + pattern: string; + frequency: number; + avg_session_duration: number; + sessions_count: number; +} + +/** + * Detect patterns across sessions (tool usage, task types, etc.) + */ +export async function sessionPatternDetection(args: SessionPatternArgs): Promise { + const { project, pattern_type = 'tool_usage' } = args; + + if (pattern_type === 'tool_usage') { + // Analyze tool usage patterns + const sql = ` + SELECT + unnest(s.tools_used) as pattern, + COUNT(*) as frequency, + AVG(s.duration_minutes)::int as avg_session_duration, + COUNT(DISTINCT s.id) as sessions_count + FROM sessions s + WHERE s.status = 'completed' + AND s.tools_used IS NOT NULL + ${project ? 'AND s.project = $1' : ''} + GROUP BY pattern + HAVING COUNT(*) > 3 + ORDER BY frequency DESC + LIMIT 20; + `; + + const results = await query(sql, project ? [project] : []); + return results; + } else if (pattern_type === 'task_types') { + // Analyze task type patterns + const sql = ` + SELECT + t.type as pattern, + COUNT(*) as frequency, + AVG(s.duration_minutes)::int as avg_session_duration, + COUNT(DISTINCT s.id) as sessions_count + FROM sessions s + JOIN task_activity ta ON s.id = ta.session_id + JOIN tasks t ON ta.task_id = t.id + WHERE s.status = 'completed' + ${project ? 'AND s.project = $1' : ''} + GROUP BY t.type + HAVING COUNT(*) > 3 + ORDER BY frequency DESC; + `; + + const results = await query(sql, project ? [project] : []); + return results; + } + + return []; +}