Files
StarPunk/starpunk/migrations.py
Phil Skentelbery 8352c3ab7c refactor: Rename SCHEMA_SQL to INITIAL_SCHEMA_SQL
This aligns with ADR-033's migration system redesign. The initial schema
represents the v1.0.0 baseline and should not be modified. All schema
changes after v1.0.0 must go in migration files.

Changes:
- Renamed SCHEMA_SQL → INITIAL_SCHEMA_SQL in database.py
- Updated all references in migrations.py comments
- Added comment: "DO NOT MODIFY - This represents the v1.0.0 schema state"
- No functional changes, purely documentation improvement

Part of v1.1.0 migration system redesign (Phase 2).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 09:59:17 -07:00

601 lines
23 KiB
Python

"""
Database migration runner for StarPunk
Automatically discovers and applies pending migrations on startup.
Migrations are numbered SQL files in the migrations/ directory.
Fresh Database Detection:
- If schema_migrations table is empty AND schema is current
- Marks all migrations as applied (skip execution)
- This handles databases created with current INITIAL_SCHEMA_SQL
Existing Database Behavior:
- Applies only pending migrations
- Migrations already in schema_migrations are skipped
Concurrency Protection:
- Uses database-level locking (BEGIN IMMEDIATE) to prevent race conditions
- Multiple workers can start simultaneously; only one applies migrations
- Other workers wait and verify completion using exponential backoff retry
"""
import sqlite3
from pathlib import Path
import logging
import time
import random
class MigrationError(Exception):
"""Raised when a migration fails to apply"""
pass
def create_migrations_table(conn):
"""
Create schema_migrations tracking table if it doesn't exist
Args:
conn: SQLite connection
"""
conn.execute("""
CREATE TABLE IF NOT EXISTS schema_migrations (
id INTEGER PRIMARY KEY AUTOINCREMENT,
migration_name TEXT UNIQUE NOT NULL,
applied_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_schema_migrations_name
ON schema_migrations(migration_name)
""")
conn.commit()
def is_schema_current(conn):
"""
Check if database schema is current (matches INITIAL_SCHEMA_SQL + all migrations)
Uses heuristic: Check for presence of latest schema features
Checks for:
- code_verifier column NOT in auth_state (removed in migration 003)
- authorization_codes table (migration 002 or INITIAL_SCHEMA_SQL >= v1.0.0-rc.1)
- token_hash column in tokens table (migration 002)
- Token indexes (migration 002 only, removed from INITIAL_SCHEMA_SQL in v1.0.0-rc.2)
Args:
conn: SQLite connection
Returns:
bool: True if schema is fully current (all tables, columns, AND indexes exist)
False if any piece is missing (legacy database needing migrations)
"""
try:
# Check for code_verifier column NOT in auth_state (removed in migration 003)
# If it still exists, schema is outdated
if column_exists(conn, 'auth_state', 'code_verifier'):
return False
# Check for authorization_codes table (added in migration 002)
if not table_exists(conn, 'authorization_codes'):
return False
# Check for token_hash column in tokens table (migration 002)
if not column_exists(conn, 'tokens', 'token_hash'):
return False
# Check for token indexes (created by migration 002 ONLY)
# These indexes were removed from INITIAL_SCHEMA_SQL in v1.0.0-rc.2
# to prevent conflicts when migrations run.
# A database with tables/columns but no indexes means:
# - INITIAL_SCHEMA_SQL was run (creating tables/columns)
# - But migration 002 hasn't run yet (no indexes)
# So it's NOT fully current and needs migrations.
if not index_exists(conn, 'idx_tokens_hash'):
return False
if not index_exists(conn, 'idx_tokens_me'):
return False
if not index_exists(conn, 'idx_tokens_expires'):
return False
return True
except sqlite3.OperationalError:
# Schema check failed - definitely not current
return False
def table_exists(conn, table_name):
"""
Check if table exists in database
Args:
conn: SQLite connection
table_name: Name of table to check
Returns:
bool: True if table exists
"""
cursor = conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
(table_name,)
)
return cursor.fetchone() is not None
def column_exists(conn, table_name, column_name):
"""
Check if column exists in table
Args:
conn: SQLite connection
table_name: Name of table
column_name: Name of column
Returns:
bool: True if column exists
"""
try:
cursor = conn.execute(f"PRAGMA table_info({table_name})")
columns = [row[1] for row in cursor.fetchall()]
return column_name in columns
except sqlite3.OperationalError:
return False
def index_exists(conn, index_name):
"""
Check if index exists in database
Args:
conn: SQLite connection
index_name: Name of index to check
Returns:
bool: True if index exists
"""
cursor = conn.execute(
"SELECT name FROM sqlite_master WHERE type='index' AND name=?",
(index_name,)
)
return cursor.fetchone() is not None
def is_migration_needed(conn, migration_name):
"""
Check if a specific migration is needed based on database state
This is used for fresh databases where INITIAL_SCHEMA_SQL may have already
included some migration features. We check the actual database state
rather than just applying all migrations blindly.
Args:
conn: SQLite connection
migration_name: Migration filename to check
Returns:
bool: True if migration should be applied, False if already applied via INITIAL_SCHEMA_SQL
"""
# Migration 001: Adds code_verifier column to auth_state
if migration_name == "001_add_code_verifier_to_auth_state.sql":
# Check if column already exists (was added to INITIAL_SCHEMA_SQL in v0.8.0)
return not column_exists(conn, 'auth_state', 'code_verifier')
# Migration 002: Creates new tokens/authorization_codes tables with indexes
if migration_name == "002_secure_tokens_and_authorization_codes.sql":
# This migration drops and recreates the tokens table, so we check if:
# 1. The new tokens table structure exists (token_hash column)
# 2. The authorization_codes table exists
# 3. The indexes exist
# If tables/columns are missing, this is a truly legacy database - migration needed
if not table_exists(conn, 'authorization_codes'):
return True
if not column_exists(conn, 'tokens', 'token_hash'):
return True
# If tables exist with correct structure, check indexes
# If indexes are missing but tables exist, this is a fresh database from
# INITIAL_SCHEMA_SQL that just needs indexes. We CANNOT run the full migration
# (it will fail trying to CREATE TABLE). Instead, we mark it as not needed
# and apply indexes separately.
has_all_indexes = (
index_exists(conn, 'idx_tokens_hash') and
index_exists(conn, 'idx_tokens_me') and
index_exists(conn, 'idx_tokens_expires') and
index_exists(conn, 'idx_auth_codes_hash') and
index_exists(conn, 'idx_auth_codes_expires')
)
if not has_all_indexes:
# Tables exist but indexes missing - this is a fresh database from INITIAL_SCHEMA_SQL
# We need to create just the indexes, not run the full migration
# Return False (don't run migration) and handle indexes separately
return False
# All features exist - migration not needed
return False
# Migration 003: Removes code_verifier column from auth_state
if migration_name == "003_remove_code_verifier_from_auth_state.sql":
# Check if column still exists (should be removed)
return column_exists(conn, 'auth_state', 'code_verifier')
# Unknown migration - assume it's needed
return True
def get_applied_migrations(conn):
"""
Get set of already-applied migration names
Args:
conn: SQLite connection
Returns:
set: Set of migration filenames that have been applied
"""
cursor = conn.execute(
"SELECT migration_name FROM schema_migrations ORDER BY id"
)
return set(row[0] for row in cursor.fetchall())
def discover_migration_files(migrations_dir):
"""
Discover all migration files in migrations directory
Args:
migrations_dir: Path to migrations directory
Returns:
list: Sorted list of (filename, full_path) tuples
"""
if not migrations_dir.exists():
return []
migration_files = []
for file_path in migrations_dir.glob("*.sql"):
migration_files.append((file_path.name, file_path))
# Sort by filename (numeric prefix ensures correct order)
migration_files.sort(key=lambda x: x[0])
return migration_files
def apply_migration(conn, migration_name, migration_path, logger=None):
"""
Apply a single migration file
Args:
conn: SQLite connection
migration_name: Filename of migration
migration_path: Full path to migration file
logger: Optional logger for output
Raises:
MigrationError: If migration fails to apply
"""
try:
# Read migration SQL
migration_sql = migration_path.read_text()
if logger:
logger.debug(f"Applying migration: {migration_name}")
# Execute migration in transaction
conn.execute("BEGIN")
conn.executescript(migration_sql)
# Record migration as applied
conn.execute(
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
(migration_name,)
)
conn.commit()
if logger:
logger.info(f"Applied migration: {migration_name}")
except Exception as e:
conn.rollback()
error_msg = f"Migration {migration_name} failed: {e}"
if logger:
logger.error(error_msg)
raise MigrationError(error_msg)
def run_migrations(db_path, logger=None):
"""
Run all pending database migrations with concurrency protection
Uses database-level locking (BEGIN IMMEDIATE) to prevent race conditions
when multiple workers start simultaneously. Only one worker will apply
migrations; others will wait and verify completion.
Called automatically during database initialization.
Discovers migration files, checks which have been applied,
and applies any pending migrations in order.
Fresh Database Behavior:
- If schema_migrations table is empty AND schema is current
- Marks all migrations as applied (skip execution)
- This handles databases created with current INITIAL_SCHEMA_SQL
Existing Database Behavior:
- Applies only pending migrations
- Migrations already in schema_migrations are skipped
Concurrency Protection:
- Uses BEGIN IMMEDIATE for database-level locking
- Implements exponential backoff retry (10 attempts, up to 120s total)
- Graduated logging (DEBUG → INFO → WARNING) based on retry count
- Creates new connection for each retry attempt
Args:
db_path: Path to SQLite database file
logger: Optional logger for output
Raises:
MigrationError: If any migration fails to apply or lock cannot be acquired
"""
if logger is None:
logger = logging.getLogger(__name__)
# Determine migrations directory
# Assumes migrations/ is in project root, sibling to starpunk/
migrations_dir = Path(__file__).parent.parent / "migrations"
if not migrations_dir.exists():
logger.warning(f"Migrations directory not found: {migrations_dir}")
return
# Retry configuration for lock acquisition
max_retries = 10
retry_count = 0
base_delay = 0.1 # 100ms
start_time = time.time()
max_total_time = 120 # 2 minutes absolute maximum
while retry_count < max_retries and (time.time() - start_time) < max_total_time:
conn = None
try:
# Connect with longer timeout for lock contention
# 30s per attempt allows one worker to complete migrations
conn = sqlite3.connect(db_path, timeout=30.0)
# Attempt to acquire exclusive lock for migrations
# BEGIN IMMEDIATE acquires RESERVED lock, preventing other writes
# but allowing reads. Escalates to EXCLUSIVE during actual writes.
conn.execute("BEGIN IMMEDIATE")
try:
# Ensure migrations tracking table exists
create_migrations_table(conn)
# Quick check: have migrations already been applied by another worker?
cursor = conn.execute("SELECT COUNT(*) FROM schema_migrations")
migration_count = cursor.fetchone()[0]
# Discover migration files
migration_files = discover_migration_files(migrations_dir)
if not migration_files:
conn.commit()
logger.info("No migration files found")
return
# If migrations exist and we're not the first worker, verify and exit
if migration_count > 0:
# Check if all migrations are applied
applied = get_applied_migrations(conn)
pending = [m for m, _ in migration_files if m not in applied]
if not pending:
conn.commit()
logger.debug("All migrations already applied by another worker")
return
# If there are pending migrations, we continue to apply them
logger.info(f"Found {len(pending)} pending migrations to apply")
# Fresh database detection (original logic preserved)
if migration_count == 0:
if is_schema_current(conn):
# Schema is current - mark all migrations as applied
for migration_name, _ in migration_files:
conn.execute(
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
(migration_name,)
)
conn.commit()
logger.info(
f"Fresh database detected: marked {len(migration_files)} "
f"migrations as applied (schema already current)"
)
return
else:
logger.info("Fresh database with partial schema: applying needed migrations")
# Get already-applied migrations
applied = get_applied_migrations(conn)
# Apply pending migrations (original logic preserved)
pending_count = 0
skipped_count = 0
for migration_name, migration_path in migration_files:
if migration_name not in applied:
# Check if migration is actually needed
# For fresh databases (migration_count == 0), check all migrations
# For migration 002, ALWAYS check (handles partially migrated databases)
should_check_needed = (
migration_count == 0 or
migration_name == "002_secure_tokens_and_authorization_codes.sql"
)
if should_check_needed and not is_migration_needed(conn, migration_name):
# Special handling for migration 002: if tables exist but indexes don't,
# create just the indexes
if migration_name == "002_secure_tokens_and_authorization_codes.sql":
# Check if we need to create indexes
indexes_to_create = []
if not index_exists(conn, 'idx_tokens_hash'):
indexes_to_create.append("CREATE INDEX idx_tokens_hash ON tokens(token_hash)")
if not index_exists(conn, 'idx_tokens_me'):
indexes_to_create.append("CREATE INDEX idx_tokens_me ON tokens(me)")
if not index_exists(conn, 'idx_tokens_expires'):
indexes_to_create.append("CREATE INDEX idx_tokens_expires ON tokens(expires_at)")
if not index_exists(conn, 'idx_auth_codes_hash'):
indexes_to_create.append("CREATE INDEX idx_auth_codes_hash ON authorization_codes(code_hash)")
if not index_exists(conn, 'idx_auth_codes_expires'):
indexes_to_create.append("CREATE INDEX idx_auth_codes_expires ON authorization_codes(expires_at)")
if indexes_to_create:
for index_sql in indexes_to_create:
conn.execute(index_sql)
logger.info(f"Created {len(indexes_to_create)} missing indexes from migration 002")
# Mark as applied without executing full migration (INITIAL_SCHEMA_SQL already has table changes)
conn.execute(
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
(migration_name,)
)
skipped_count += 1
logger.debug(f"Skipped migration {migration_name} (already in INITIAL_SCHEMA_SQL)")
else:
# Apply the migration (within our transaction)
try:
# Read migration SQL
migration_sql = migration_path.read_text()
logger.debug(f"Applying migration: {migration_name}")
# Execute migration (already in transaction)
conn.executescript(migration_sql)
# Record migration as applied
conn.execute(
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
(migration_name,)
)
logger.info(f"Applied migration: {migration_name}")
pending_count += 1
except Exception as e:
# Roll back the transaction - will be handled by outer exception handler
raise MigrationError(f"Migration {migration_name} failed: {e}")
# Commit all migrations atomically
conn.commit()
# Summary
total_count = len(migration_files)
if pending_count > 0 or skipped_count > 0:
if skipped_count > 0:
logger.info(
f"Migrations complete: {pending_count} applied, {skipped_count} skipped "
f"(already in INITIAL_SCHEMA_SQL), {total_count} total"
)
else:
logger.info(
f"Migrations complete: {pending_count} applied, "
f"{total_count} total"
)
else:
logger.info(f"All migrations up to date ({total_count} total)")
return # Success!
except MigrationError:
# Migration error - rollback and re-raise
try:
conn.rollback()
except Exception as rollback_error:
logger.critical(f"FATAL: Rollback failed: {rollback_error}")
raise SystemExit(1)
raise
except Exception as e:
# Unexpected error during migration - rollback and wrap
try:
conn.rollback()
except Exception as rollback_error:
logger.critical(f"FATAL: Rollback failed: {rollback_error}")
raise SystemExit(1)
raise MigrationError(f"Migration system error: {e}")
except sqlite3.OperationalError as e:
if "database is locked" in str(e).lower():
# Another worker has the lock, retry with exponential backoff
retry_count += 1
if retry_count < max_retries:
# Exponential backoff with jitter to prevent thundering herd
delay = base_delay * (2 ** retry_count) + random.uniform(0, 0.1)
# Graduated logging based on retry count
if retry_count <= 3:
# Normal operation - DEBUG level
logger.debug(
f"Database locked by another worker, retry {retry_count}/{max_retries} "
f"in {delay:.2f}s"
)
elif retry_count <= 7:
# Getting concerning - INFO level
logger.info(
f"Database locked by another worker, retry {retry_count}/{max_retries} "
f"in {delay:.2f}s"
)
else:
# Abnormal - WARNING level
logger.warning(
f"Database locked by another worker, retry {retry_count}/{max_retries} "
f"in {delay:.2f}s (approaching max retries)"
)
time.sleep(delay)
continue
else:
# Retries exhausted
elapsed = time.time() - start_time
raise MigrationError(
f"Failed to acquire migration lock after {max_retries} attempts over {elapsed:.1f}s. "
f"Possible causes:\n"
f"1. Another process is stuck in migration (check logs)\n"
f"2. Database file permissions issue\n"
f"3. Disk I/O problems\n"
f"Action: Restart container with single worker to diagnose"
)
else:
# Non-lock related database error
error_msg = f"Database error during migration: {e}"
logger.error(error_msg)
raise MigrationError(error_msg)
except MigrationError:
# Re-raise migration errors (already logged)
raise
except Exception as e:
# Unexpected error
error_msg = f"Unexpected error during migration: {e}"
logger.error(error_msg)
raise MigrationError(error_msg)
finally:
if conn:
try:
conn.close()
except:
pass # Ignore errors during cleanup
# Should only reach here if time limit exceeded
elapsed = time.time() - start_time
raise MigrationError(
f"Migration timeout: Failed to acquire lock within {max_total_time}s limit "
f"(elapsed: {elapsed:.1f}s, retries: {retry_count})"
)