Add semantic search and analytics tools for sessions

Phase 7 complete: Advanced session analysis capabilities

New MCP Tools (3):
1. session_semantic_search - Vector similarity search across all sessions
2. session_productivity_analytics - Metrics (avg duration, tasks, commits, tokens)
3. session_pattern_detection - Detect patterns (tool usage, task types)

Features:
- Semantic search with embedding-based similarity
- Fallback to ILIKE text search if embeddings unavailable
- Analytics over configurable time periods (week/month/quarter)
- Pattern detection with frequency analysis

Use Cases:
- Find similar past work: "sessions about WhatsApp integration"
- Track productivity: avg commits/tasks per session
- Identify tool usage patterns: which tools used most often
- Analyze task type distribution

All 7 phases complete! System ready for testing.

Related: CF-257

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Christian Gick
2026-01-19 10:21:47 +02:00
parent 3745a13eaf
commit a868dd40ec
3 changed files with 279 additions and 0 deletions

View File

@@ -939,4 +939,39 @@ export const toolDefinitions = [
required: ['session_id'],
},
},
{
name: 'session_semantic_search',
description: 'Semantic search across all session documentation using vector similarity',
inputSchema: {
type: 'object',
properties: {
query: { type: 'string', description: 'Search query' },
project: { type: 'string', description: 'Filter by project (optional)' },
limit: { type: 'number', description: 'Max results (default: 10)' },
},
required: ['query'],
},
},
{
name: 'session_productivity_analytics',
description: 'Get productivity metrics (avg duration, tasks/commits per session, etc.)',
inputSchema: {
type: 'object',
properties: {
project: { type: 'string', description: 'Filter by project (optional)' },
time_period: { type: 'string', enum: ['week', 'month', 'quarter'], description: 'Time period (default: month)' },
},
},
},
{
name: 'session_pattern_detection',
description: 'Detect patterns across sessions (tool usage, task types)',
inputSchema: {
type: 'object',
properties: {
project: { type: 'string', description: 'Filter by project (optional)' },
pattern_type: { type: 'string', enum: ['tool_usage', 'task_types', 'error_frequency'], description: 'Type of pattern to detect (default: tool_usage)' },
},
},
},
];

View File

@@ -422,3 +422,213 @@ export async function sessionDocumentationGenerate(args: SessionDocumentationGen
return `Documentation generated for session ${session_id} (${doc.length} characters)`;
}
// ============================================================================
// SEMANTIC SEARCH & ANALYTICS
// ============================================================================
interface SessionSemanticSearchArgs {
query: string;
project?: string;
limit?: number;
}
interface SessionSearchResult {
session_id: string;
session_number: number;
project: string;
summary: string | null;
started_at: string;
similarity: number;
}
/**
* Semantic search across all session documentation
* Uses vector similarity to find related sessions
*/
export async function sessionSemanticSearch(args: SessionSemanticSearchArgs): Promise<SessionSearchResult[]> {
const { query: searchQuery, project, limit = 10 } = args;
// Generate embedding for search query
const queryEmbedding = await getEmbedding(searchQuery);
if (!queryEmbedding) {
// Fallback to text search if embedding generation fails
let sql = `
SELECT
s.id as session_id,
s.session_number,
s.project,
s.summary,
s.started_at,
0.5 as similarity
FROM sessions s
WHERE s.summary IS NOT NULL
AND s.status = 'completed'
${project ? 'AND s.project = $1' : ''}
AND s.summary ILIKE $${project ? '2' : '1'}
ORDER BY s.started_at DESC
LIMIT $${project ? '3' : '2'}
`;
const params: unknown[] = project ? [project, `%${searchQuery}%`, limit] : [`%${searchQuery}%`, limit];
const results = await query<SessionSearchResult>(sql, params);
return results;
}
const embeddingFormatted = formatEmbedding(queryEmbedding);
// Vector similarity search
let sql = `
SELECT
s.id as session_id,
s.session_number,
s.project,
s.summary,
s.started_at,
1 - (s.embedding <=> $1) as similarity
FROM sessions s
WHERE s.embedding IS NOT NULL
${project ? 'AND s.project = $2' : ''}
AND s.status = 'completed'
ORDER BY s.embedding <=> $1
LIMIT $${project ? '3' : '2'}
`;
const params: unknown[] = project ? [embeddingFormatted, project, limit] : [embeddingFormatted, limit];
const results = await query<SessionSearchResult>(sql, params);
return results;
}
interface SessionAnalyticsArgs {
project?: string;
time_period?: 'week' | 'month' | 'quarter';
}
interface SessionAnalytics {
total_sessions: number;
avg_duration_minutes: number;
avg_tasks_per_session: number;
avg_commits_per_session: number;
avg_notes_per_session: number;
total_tokens_used: number;
}
/**
* Get productivity analytics for sessions
*/
export async function sessionProductivityAnalytics(args: SessionAnalyticsArgs): Promise<SessionAnalytics> {
const { project, time_period = 'month' } = args;
// Map time period to interval
const intervalMap = {
week: '7 days',
month: '30 days',
quarter: '90 days',
};
const interval = intervalMap[time_period];
const sql = `
WITH session_stats AS (
SELECT
s.id,
s.duration_minutes,
s.token_count,
COUNT(DISTINCT ta.task_id) as tasks_touched,
COUNT(DISTINCT sc.commit_sha) as commits_made,
COUNT(sn.id) as notes_created
FROM sessions s
LEFT JOIN task_activity ta ON s.id = ta.session_id
LEFT JOIN session_commits sc ON s.id = sc.session_id
LEFT JOIN session_notes sn ON s.id = sn.session_id
WHERE s.status = 'completed'
AND s.started_at >= NOW() - INTERVAL '${interval}'
${project ? 'AND s.project = $1' : ''}
GROUP BY s.id, s.duration_minutes, s.token_count
)
SELECT
COUNT(*)::int as total_sessions,
COALESCE(AVG(duration_minutes), 0)::int as avg_duration_minutes,
COALESCE(AVG(tasks_touched), 0)::numeric(10,1) as avg_tasks_per_session,
COALESCE(AVG(commits_made), 0)::numeric(10,1) as avg_commits_per_session,
COALESCE(AVG(notes_created), 0)::numeric(10,1) as avg_notes_per_session,
COALESCE(SUM(token_count), 0)::bigint as total_tokens_used
FROM session_stats;
`;
const result = await queryOne<SessionAnalytics>(sql, project ? [project] : []);
return result || {
total_sessions: 0,
avg_duration_minutes: 0,
avg_tasks_per_session: 0,
avg_commits_per_session: 0,
avg_notes_per_session: 0,
total_tokens_used: 0,
};
}
interface SessionPatternArgs {
project?: string;
pattern_type?: 'tool_usage' | 'task_types' | 'error_frequency';
}
interface Pattern {
pattern: string;
frequency: number;
avg_session_duration: number;
sessions_count: number;
}
/**
* Detect patterns across sessions (tool usage, task types, etc.)
*/
export async function sessionPatternDetection(args: SessionPatternArgs): Promise<Pattern[]> {
const { project, pattern_type = 'tool_usage' } = args;
if (pattern_type === 'tool_usage') {
// Analyze tool usage patterns
const sql = `
SELECT
unnest(s.tools_used) as pattern,
COUNT(*) as frequency,
AVG(s.duration_minutes)::int as avg_session_duration,
COUNT(DISTINCT s.id) as sessions_count
FROM sessions s
WHERE s.status = 'completed'
AND s.tools_used IS NOT NULL
${project ? 'AND s.project = $1' : ''}
GROUP BY pattern
HAVING COUNT(*) > 3
ORDER BY frequency DESC
LIMIT 20;
`;
const results = await query<Pattern>(sql, project ? [project] : []);
return results;
} else if (pattern_type === 'task_types') {
// Analyze task type patterns
const sql = `
SELECT
t.type as pattern,
COUNT(*) as frequency,
AVG(s.duration_minutes)::int as avg_session_duration,
COUNT(DISTINCT s.id) as sessions_count
FROM sessions s
JOIN task_activity ta ON s.id = ta.session_id
JOIN tasks t ON ta.task_id = t.id
WHERE s.status = 'completed'
${project ? 'AND s.project = $1' : ''}
GROUP BY t.type
HAVING COUNT(*) > 3
ORDER BY frequency DESC;
`;
const results = await query<Pattern>(sql, project ? [project] : []);
return results;
}
return [];
}