Add database migrations for session context system

Phase 1: Database Schema Implementation
- Migration 010: Sessions table with bulletproof auto-incrementing
  - Unique session IDs across all projects
  - Per-project session numbers (1, 2, 3, ...)
  - Atomic sequence generation (no race conditions)
  - Session-task and session-commit linking
  - Semantic search with pgvector HNSW indexes

- Migration 011: Memories table with enhanced schema
  - Migrated existing session_memories to memories
  - Added session_id and task_id foreign keys
  - Renamed columns for consistency
  - HNSW indexing for semantic search

- Migration 012: Builds table for CI/CD tracking
  - Links builds to sessions and versions
  - Tracks build status, timing, and metadata

All migrations tested and verified on agiliton database.

Related: CF-167 (Fix shared session-summary.md file conflict)
This commit is contained in:
Christian Gick
2026-01-17 07:41:03 +02:00
parent 6c8862dcc0
commit 00de7f1299
3 changed files with 274 additions and 0 deletions

170
migrations/010_sessions.sql Normal file
View File

@@ -0,0 +1,170 @@
-- Migration 010: Sessions table with bulletproof auto-incrementing
-- Purpose: Store session metadata with unique session numbers per project
-- Dependencies: 001_base_schema.sql (projects table, pgvector extension)
-- Sessions table: Core session metadata with temporal tracking
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY, -- Format: "session_{timestamp}_{uuid}"
project TEXT REFERENCES projects(key) ON DELETE SET NULL,
session_number INTEGER, -- Auto-increment per project (e.g., "Session 439")
-- Temporal tracking
started_at TIMESTAMP WITH TIME ZONE NOT NULL,
ended_at TIMESTAMP WITH TIME ZONE,
duration_minutes INTEGER GENERATED ALWAYS AS
(EXTRACT(EPOCH FROM (ended_at - started_at)) / 60) STORED,
-- Context
working_directory TEXT,
git_branch TEXT,
initial_prompt TEXT, -- First user message
summary TEXT, -- Auto-generated summary
-- Semantic search
embedding vector(1024), -- Embedding of summary for similarity search
-- Metrics
message_count INTEGER DEFAULT 0,
token_count INTEGER DEFAULT 0,
tools_used TEXT[], -- Array of tool names used
-- Status
status TEXT DEFAULT 'active' CHECK (status IN ('active', 'completed', 'interrupted')),
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Indexes for efficient querying
CREATE INDEX idx_sessions_project ON sessions(project);
CREATE INDEX idx_sessions_started ON sessions(started_at DESC);
CREATE INDEX idx_sessions_status ON sessions(status);
CREATE INDEX idx_sessions_number ON sessions(project, session_number DESC);
-- HNSW index for semantic similarity search (requires pgvector)
CREATE INDEX idx_sessions_embedding ON sessions USING hnsw (embedding vector_cosine_ops);
-- Unique session number per project (partial index - only when project is set)
CREATE UNIQUE INDEX idx_sessions_project_number ON sessions(project, session_number)
WHERE project IS NOT NULL;
-- Session number sequences per project (bulletproof autoincrement)
CREATE TABLE IF NOT EXISTS session_sequences (
project TEXT PRIMARY KEY REFERENCES projects(key) ON DELETE CASCADE,
next_number INTEGER DEFAULT 1,
last_updated TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Function for atomic session number generation (prevents race conditions)
CREATE OR REPLACE FUNCTION get_next_session_number(p_project TEXT)
RETURNS INTEGER AS $$
DECLARE
v_number INTEGER;
BEGIN
-- Insert project if doesn't exist
INSERT INTO projects (key, name) VALUES (p_project, p_project)
ON CONFLICT (key) DO NOTHING;
-- Insert sequence if doesn't exist
INSERT INTO session_sequences (project, next_number)
VALUES (p_project, 1)
ON CONFLICT (project) DO NOTHING;
-- Atomically increment and return (no race conditions possible)
UPDATE session_sequences
SET next_number = next_number + 1,
last_updated = NOW()
WHERE project = p_project
RETURNING next_number - 1 INTO v_number;
RETURN v_number;
END;
$$ LANGUAGE plpgsql;
-- Trigger to auto-assign session numbers on insert
CREATE OR REPLACE FUNCTION assign_session_number()
RETURNS TRIGGER AS $$
BEGIN
IF NEW.project IS NOT NULL AND NEW.session_number IS NULL THEN
NEW.session_number := get_next_session_number(NEW.project);
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER trg_assign_session_number
BEFORE INSERT ON sessions
FOR EACH ROW
EXECUTE FUNCTION assign_session_number();
-- Session-Task relationship enhancement
-- Add foreign key constraint to existing task_activity table
ALTER TABLE task_activity
DROP CONSTRAINT IF EXISTS fk_task_activity_session;
-- Drop NOT NULL constraint to allow NULL session_ids (for orphaned references)
ALTER TABLE task_activity
ALTER COLUMN session_id DROP NOT NULL;
-- First, set NULL for any session_ids that don't exist (orphaned references)
-- This cleans up existing data before adding the constraint
UPDATE task_activity
SET session_id = NULL
WHERE session_id IS NOT NULL
AND NOT EXISTS (SELECT 1 FROM sessions WHERE id = task_activity.session_id);
ALTER TABLE task_activity
ADD CONSTRAINT fk_task_activity_session
FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL;
-- Helper view: Session tasks with activity summary
CREATE OR REPLACE VIEW session_tasks AS
SELECT DISTINCT
ta.session_id,
ta.task_id,
t.project,
t.title,
t.status,
MIN(ta.created_at) as first_touched,
MAX(ta.created_at) as last_touched,
COUNT(*) as activity_count
FROM task_activity ta
JOIN tasks t ON ta.task_id = t.id
GROUP BY ta.session_id, ta.task_id, t.project, t.title, t.status;
-- Session-Commit linking table
CREATE TABLE IF NOT EXISTS session_commits (
id SERIAL PRIMARY KEY,
session_id TEXT NOT NULL REFERENCES sessions(id) ON DELETE CASCADE,
commit_sha TEXT NOT NULL,
repo TEXT NOT NULL,
commit_message TEXT,
committed_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
UNIQUE(session_id, commit_sha)
);
CREATE INDEX idx_session_commits_session ON session_commits(session_id);
CREATE INDEX idx_session_commits_sha ON session_commits(commit_sha);
-- Helper view: Link sessions to versions through commits
CREATE OR REPLACE VIEW session_versions AS
SELECT DISTINCT
sc.session_id,
tc.task_id,
t.version_id,
v.version,
v.status as version_status
FROM session_commits sc
JOIN task_commits tc ON sc.commit_sha = tc.commit_sha
JOIN tasks t ON tc.task_id = t.id
LEFT JOIN versions v ON t.version_id = v.id
WHERE v.id IS NOT NULL;
-- Comments for documentation
COMMENT ON TABLE sessions IS 'Session metadata with unique session numbers per project';
COMMENT ON TABLE session_sequences IS 'Atomic counters for session numbers per project';
COMMENT ON TABLE session_commits IS 'Links sessions to git commits';
COMMENT ON COLUMN sessions.session_number IS 'Auto-incrementing number per project (1, 2, 3, ...)';
COMMENT ON COLUMN sessions.embedding IS 'Vector embedding of session summary for semantic search';
COMMENT ON FUNCTION get_next_session_number IS 'Atomic function to get next session number for a project';

View File

@@ -0,0 +1,58 @@
-- Migration 011: Memories/Learnings table with semantic search
-- Purpose: Migrate existing session_memories to memories table with enhanced schema
-- Dependencies: 001_base_schema.sql (pgvector), 010_sessions.sql (sessions table)
-- Rename existing session_memories table to memories
ALTER TABLE IF EXISTS session_memories RENAME TO memories;
-- Add missing columns
ALTER TABLE memories
ADD COLUMN IF NOT EXISTS task_id TEXT REFERENCES tasks(id) ON DELETE SET NULL;
-- Rename source_session to session_id for consistency
ALTER TABLE memories
RENAME COLUMN source_session TO session_id;
-- Add foreign key constraint to sessions table
ALTER TABLE memories
ADD CONSTRAINT fk_memories_session
FOREIGN KEY (session_id) REFERENCES sessions(id) ON DELETE SET NULL;
-- Rename times_surfaced to access_count for consistency
ALTER TABLE memories
RENAME COLUMN times_surfaced TO access_count;
-- Rename last_surfaced to last_accessed_at for consistency
ALTER TABLE memories
RENAME COLUMN last_surfaced TO last_accessed_at;
-- Change varchar columns to TEXT for consistency
ALTER TABLE memories
ALTER COLUMN project TYPE TEXT;
-- Note: Keeping embedding as vector(1536) to preserve existing data
-- Future embeddings can use vector(1024) by updating the column if needed
-- Add missing indexes
CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id);
CREATE INDEX IF NOT EXISTS idx_memories_task ON memories(task_id);
CREATE INDEX IF NOT EXISTS idx_memories_project ON memories(project);
CREATE INDEX IF NOT EXISTS idx_memories_category ON memories(category);
CREATE INDEX IF NOT EXISTS idx_memories_created ON memories(created_at DESC);
-- Drop old index and create HNSW index for better performance
-- Note: Existing index is ivfflat, we want hnsw
DROP INDEX IF EXISTS idx_session_memories_embedding;
CREATE INDEX IF NOT EXISTS idx_memories_embedding ON memories USING hnsw (embedding vector_cosine_ops);
-- Full-text search on title and content (for keyword search)
CREATE INDEX IF NOT EXISTS idx_memories_fts ON memories USING gin(
to_tsvector('english', title || ' ' || content)
);
-- Comments for documentation
COMMENT ON TABLE memories IS 'Learnings and patterns discovered during development sessions';
COMMENT ON COLUMN memories.category IS 'Type of memory: pattern (reusable solution), fix (bug resolution), preference (user choice), gotcha (trap/pitfall), architecture (design decision)';
COMMENT ON COLUMN memories.embedding IS 'Vector embedding of title + content for semantic search';
COMMENT ON COLUMN memories.context IS 'Optional context describing when/where this memory applies';
COMMENT ON COLUMN memories.access_count IS 'Number of times this memory has been retrieved (for relevance ranking)';

46
migrations/012_builds.sql Normal file
View File

@@ -0,0 +1,46 @@
-- Migration 012: Builds table for CI/CD tracking
-- Purpose: Track builds and link them to sessions and versions
-- Dependencies: 001_base_schema.sql (versions table), 010_sessions.sql (sessions table)
-- Builds table: Store build information linked to sessions and versions
CREATE TABLE IF NOT EXISTS builds (
id SERIAL PRIMARY KEY,
session_id TEXT REFERENCES sessions(id) ON DELETE SET NULL,
version_id TEXT REFERENCES versions(id) ON DELETE CASCADE,
build_number INTEGER NOT NULL,
status TEXT DEFAULT 'pending' CHECK (status IN ('pending', 'running', 'success', 'failed')),
-- Build metadata
git_commit_sha TEXT,
git_branch TEXT,
build_log_url TEXT,
artifacts_url TEXT,
-- Timing
started_at TIMESTAMP WITH TIME ZONE NOT NULL,
finished_at TIMESTAMP WITH TIME ZONE,
duration_seconds INTEGER GENERATED ALWAYS AS
(EXTRACT(EPOCH FROM (finished_at - started_at))) STORED,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);
-- Indexes for efficient querying
CREATE INDEX idx_builds_session ON builds(session_id);
CREATE INDEX idx_builds_version ON builds(version_id);
CREATE INDEX idx_builds_status ON builds(status);
CREATE INDEX idx_builds_started ON builds(started_at DESC);
CREATE INDEX idx_builds_commit ON builds(git_commit_sha);
-- Unique constraint: one build number per version
CREATE UNIQUE INDEX idx_builds_version_number ON builds(version_id, build_number)
WHERE version_id IS NOT NULL;
-- Comments for documentation
COMMENT ON TABLE builds IS 'CI/CD build tracking linked to sessions and versions';
COMMENT ON COLUMN builds.session_id IS 'Optional link to session that triggered the build';
COMMENT ON COLUMN builds.version_id IS 'Link to version being built';
COMMENT ON COLUMN builds.duration_seconds IS 'Auto-calculated build duration in seconds';
COMMENT ON COLUMN builds.build_log_url IS 'URL to build logs (e.g., GitHub Actions run)';
COMMENT ON COLUMN builds.artifacts_url IS 'URL to build artifacts (e.g., app binary, Docker image)';