This aligns with ADR-033's migration system redesign. The initial schema represents the v1.0.0 baseline and should not be modified. All schema changes after v1.0.0 must go in migration files. Changes: - Renamed SCHEMA_SQL → INITIAL_SCHEMA_SQL in database.py - Updated all references in migrations.py comments - Added comment: "DO NOT MODIFY - This represents the v1.0.0 schema state" - No functional changes, purely documentation improvement Part of v1.1.0 migration system redesign (Phase 2). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
601 lines
23 KiB
Python
601 lines
23 KiB
Python
"""
|
|
Database migration runner for StarPunk
|
|
|
|
Automatically discovers and applies pending migrations on startup.
|
|
Migrations are numbered SQL files in the migrations/ directory.
|
|
|
|
Fresh Database Detection:
|
|
- If schema_migrations table is empty AND schema is current
|
|
- Marks all migrations as applied (skip execution)
|
|
- This handles databases created with current INITIAL_SCHEMA_SQL
|
|
|
|
Existing Database Behavior:
|
|
- Applies only pending migrations
|
|
- Migrations already in schema_migrations are skipped
|
|
|
|
Concurrency Protection:
|
|
- Uses database-level locking (BEGIN IMMEDIATE) to prevent race conditions
|
|
- Multiple workers can start simultaneously; only one applies migrations
|
|
- Other workers wait and verify completion using exponential backoff retry
|
|
"""
|
|
|
|
import sqlite3
|
|
from pathlib import Path
|
|
import logging
|
|
import time
|
|
import random
|
|
|
|
|
|
class MigrationError(Exception):
|
|
"""Raised when a migration fails to apply"""
|
|
pass
|
|
|
|
|
|
def create_migrations_table(conn):
|
|
"""
|
|
Create schema_migrations tracking table if it doesn't exist
|
|
|
|
Args:
|
|
conn: SQLite connection
|
|
"""
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS schema_migrations (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
migration_name TEXT UNIQUE NOT NULL,
|
|
applied_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
""")
|
|
|
|
conn.execute("""
|
|
CREATE INDEX IF NOT EXISTS idx_schema_migrations_name
|
|
ON schema_migrations(migration_name)
|
|
""")
|
|
|
|
conn.commit()
|
|
|
|
|
|
def is_schema_current(conn):
|
|
"""
|
|
Check if database schema is current (matches INITIAL_SCHEMA_SQL + all migrations)
|
|
|
|
Uses heuristic: Check for presence of latest schema features
|
|
Checks for:
|
|
- code_verifier column NOT in auth_state (removed in migration 003)
|
|
- authorization_codes table (migration 002 or INITIAL_SCHEMA_SQL >= v1.0.0-rc.1)
|
|
- token_hash column in tokens table (migration 002)
|
|
- Token indexes (migration 002 only, removed from INITIAL_SCHEMA_SQL in v1.0.0-rc.2)
|
|
|
|
Args:
|
|
conn: SQLite connection
|
|
|
|
Returns:
|
|
bool: True if schema is fully current (all tables, columns, AND indexes exist)
|
|
False if any piece is missing (legacy database needing migrations)
|
|
"""
|
|
try:
|
|
# Check for code_verifier column NOT in auth_state (removed in migration 003)
|
|
# If it still exists, schema is outdated
|
|
if column_exists(conn, 'auth_state', 'code_verifier'):
|
|
return False
|
|
|
|
# Check for authorization_codes table (added in migration 002)
|
|
if not table_exists(conn, 'authorization_codes'):
|
|
return False
|
|
|
|
# Check for token_hash column in tokens table (migration 002)
|
|
if not column_exists(conn, 'tokens', 'token_hash'):
|
|
return False
|
|
|
|
# Check for token indexes (created by migration 002 ONLY)
|
|
# These indexes were removed from INITIAL_SCHEMA_SQL in v1.0.0-rc.2
|
|
# to prevent conflicts when migrations run.
|
|
# A database with tables/columns but no indexes means:
|
|
# - INITIAL_SCHEMA_SQL was run (creating tables/columns)
|
|
# - But migration 002 hasn't run yet (no indexes)
|
|
# So it's NOT fully current and needs migrations.
|
|
if not index_exists(conn, 'idx_tokens_hash'):
|
|
return False
|
|
if not index_exists(conn, 'idx_tokens_me'):
|
|
return False
|
|
if not index_exists(conn, 'idx_tokens_expires'):
|
|
return False
|
|
|
|
return True
|
|
except sqlite3.OperationalError:
|
|
# Schema check failed - definitely not current
|
|
return False
|
|
|
|
|
|
def table_exists(conn, table_name):
|
|
"""
|
|
Check if table exists in database
|
|
|
|
Args:
|
|
conn: SQLite connection
|
|
table_name: Name of table to check
|
|
|
|
Returns:
|
|
bool: True if table exists
|
|
"""
|
|
cursor = conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
|
|
(table_name,)
|
|
)
|
|
return cursor.fetchone() is not None
|
|
|
|
|
|
def column_exists(conn, table_name, column_name):
|
|
"""
|
|
Check if column exists in table
|
|
|
|
Args:
|
|
conn: SQLite connection
|
|
table_name: Name of table
|
|
column_name: Name of column
|
|
|
|
Returns:
|
|
bool: True if column exists
|
|
"""
|
|
try:
|
|
cursor = conn.execute(f"PRAGMA table_info({table_name})")
|
|
columns = [row[1] for row in cursor.fetchall()]
|
|
return column_name in columns
|
|
except sqlite3.OperationalError:
|
|
return False
|
|
|
|
|
|
def index_exists(conn, index_name):
|
|
"""
|
|
Check if index exists in database
|
|
|
|
Args:
|
|
conn: SQLite connection
|
|
index_name: Name of index to check
|
|
|
|
Returns:
|
|
bool: True if index exists
|
|
"""
|
|
cursor = conn.execute(
|
|
"SELECT name FROM sqlite_master WHERE type='index' AND name=?",
|
|
(index_name,)
|
|
)
|
|
return cursor.fetchone() is not None
|
|
|
|
|
|
def is_migration_needed(conn, migration_name):
|
|
"""
|
|
Check if a specific migration is needed based on database state
|
|
|
|
This is used for fresh databases where INITIAL_SCHEMA_SQL may have already
|
|
included some migration features. We check the actual database state
|
|
rather than just applying all migrations blindly.
|
|
|
|
Args:
|
|
conn: SQLite connection
|
|
migration_name: Migration filename to check
|
|
|
|
Returns:
|
|
bool: True if migration should be applied, False if already applied via INITIAL_SCHEMA_SQL
|
|
"""
|
|
# Migration 001: Adds code_verifier column to auth_state
|
|
if migration_name == "001_add_code_verifier_to_auth_state.sql":
|
|
# Check if column already exists (was added to INITIAL_SCHEMA_SQL in v0.8.0)
|
|
return not column_exists(conn, 'auth_state', 'code_verifier')
|
|
|
|
# Migration 002: Creates new tokens/authorization_codes tables with indexes
|
|
if migration_name == "002_secure_tokens_and_authorization_codes.sql":
|
|
# This migration drops and recreates the tokens table, so we check if:
|
|
# 1. The new tokens table structure exists (token_hash column)
|
|
# 2. The authorization_codes table exists
|
|
# 3. The indexes exist
|
|
|
|
# If tables/columns are missing, this is a truly legacy database - migration needed
|
|
if not table_exists(conn, 'authorization_codes'):
|
|
return True
|
|
if not column_exists(conn, 'tokens', 'token_hash'):
|
|
return True
|
|
|
|
# If tables exist with correct structure, check indexes
|
|
# If indexes are missing but tables exist, this is a fresh database from
|
|
# INITIAL_SCHEMA_SQL that just needs indexes. We CANNOT run the full migration
|
|
# (it will fail trying to CREATE TABLE). Instead, we mark it as not needed
|
|
# and apply indexes separately.
|
|
has_all_indexes = (
|
|
index_exists(conn, 'idx_tokens_hash') and
|
|
index_exists(conn, 'idx_tokens_me') and
|
|
index_exists(conn, 'idx_tokens_expires') and
|
|
index_exists(conn, 'idx_auth_codes_hash') and
|
|
index_exists(conn, 'idx_auth_codes_expires')
|
|
)
|
|
|
|
if not has_all_indexes:
|
|
# Tables exist but indexes missing - this is a fresh database from INITIAL_SCHEMA_SQL
|
|
# We need to create just the indexes, not run the full migration
|
|
# Return False (don't run migration) and handle indexes separately
|
|
return False
|
|
|
|
# All features exist - migration not needed
|
|
return False
|
|
|
|
# Migration 003: Removes code_verifier column from auth_state
|
|
if migration_name == "003_remove_code_verifier_from_auth_state.sql":
|
|
# Check if column still exists (should be removed)
|
|
return column_exists(conn, 'auth_state', 'code_verifier')
|
|
|
|
# Unknown migration - assume it's needed
|
|
return True
|
|
|
|
|
|
def get_applied_migrations(conn):
|
|
"""
|
|
Get set of already-applied migration names
|
|
|
|
Args:
|
|
conn: SQLite connection
|
|
|
|
Returns:
|
|
set: Set of migration filenames that have been applied
|
|
"""
|
|
cursor = conn.execute(
|
|
"SELECT migration_name FROM schema_migrations ORDER BY id"
|
|
)
|
|
return set(row[0] for row in cursor.fetchall())
|
|
|
|
|
|
def discover_migration_files(migrations_dir):
|
|
"""
|
|
Discover all migration files in migrations directory
|
|
|
|
Args:
|
|
migrations_dir: Path to migrations directory
|
|
|
|
Returns:
|
|
list: Sorted list of (filename, full_path) tuples
|
|
"""
|
|
if not migrations_dir.exists():
|
|
return []
|
|
|
|
migration_files = []
|
|
for file_path in migrations_dir.glob("*.sql"):
|
|
migration_files.append((file_path.name, file_path))
|
|
|
|
# Sort by filename (numeric prefix ensures correct order)
|
|
migration_files.sort(key=lambda x: x[0])
|
|
|
|
return migration_files
|
|
|
|
|
|
def apply_migration(conn, migration_name, migration_path, logger=None):
|
|
"""
|
|
Apply a single migration file
|
|
|
|
Args:
|
|
conn: SQLite connection
|
|
migration_name: Filename of migration
|
|
migration_path: Full path to migration file
|
|
logger: Optional logger for output
|
|
|
|
Raises:
|
|
MigrationError: If migration fails to apply
|
|
"""
|
|
try:
|
|
# Read migration SQL
|
|
migration_sql = migration_path.read_text()
|
|
|
|
if logger:
|
|
logger.debug(f"Applying migration: {migration_name}")
|
|
|
|
# Execute migration in transaction
|
|
conn.execute("BEGIN")
|
|
conn.executescript(migration_sql)
|
|
|
|
# Record migration as applied
|
|
conn.execute(
|
|
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
|
|
(migration_name,)
|
|
)
|
|
|
|
conn.commit()
|
|
|
|
if logger:
|
|
logger.info(f"Applied migration: {migration_name}")
|
|
|
|
except Exception as e:
|
|
conn.rollback()
|
|
error_msg = f"Migration {migration_name} failed: {e}"
|
|
if logger:
|
|
logger.error(error_msg)
|
|
raise MigrationError(error_msg)
|
|
|
|
|
|
def run_migrations(db_path, logger=None):
|
|
"""
|
|
Run all pending database migrations with concurrency protection
|
|
|
|
Uses database-level locking (BEGIN IMMEDIATE) to prevent race conditions
|
|
when multiple workers start simultaneously. Only one worker will apply
|
|
migrations; others will wait and verify completion.
|
|
|
|
Called automatically during database initialization.
|
|
Discovers migration files, checks which have been applied,
|
|
and applies any pending migrations in order.
|
|
|
|
Fresh Database Behavior:
|
|
- If schema_migrations table is empty AND schema is current
|
|
- Marks all migrations as applied (skip execution)
|
|
- This handles databases created with current INITIAL_SCHEMA_SQL
|
|
|
|
Existing Database Behavior:
|
|
- Applies only pending migrations
|
|
- Migrations already in schema_migrations are skipped
|
|
|
|
Concurrency Protection:
|
|
- Uses BEGIN IMMEDIATE for database-level locking
|
|
- Implements exponential backoff retry (10 attempts, up to 120s total)
|
|
- Graduated logging (DEBUG → INFO → WARNING) based on retry count
|
|
- Creates new connection for each retry attempt
|
|
|
|
Args:
|
|
db_path: Path to SQLite database file
|
|
logger: Optional logger for output
|
|
|
|
Raises:
|
|
MigrationError: If any migration fails to apply or lock cannot be acquired
|
|
"""
|
|
if logger is None:
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Determine migrations directory
|
|
# Assumes migrations/ is in project root, sibling to starpunk/
|
|
migrations_dir = Path(__file__).parent.parent / "migrations"
|
|
|
|
if not migrations_dir.exists():
|
|
logger.warning(f"Migrations directory not found: {migrations_dir}")
|
|
return
|
|
|
|
# Retry configuration for lock acquisition
|
|
max_retries = 10
|
|
retry_count = 0
|
|
base_delay = 0.1 # 100ms
|
|
start_time = time.time()
|
|
max_total_time = 120 # 2 minutes absolute maximum
|
|
|
|
while retry_count < max_retries and (time.time() - start_time) < max_total_time:
|
|
conn = None
|
|
try:
|
|
# Connect with longer timeout for lock contention
|
|
# 30s per attempt allows one worker to complete migrations
|
|
conn = sqlite3.connect(db_path, timeout=30.0)
|
|
|
|
# Attempt to acquire exclusive lock for migrations
|
|
# BEGIN IMMEDIATE acquires RESERVED lock, preventing other writes
|
|
# but allowing reads. Escalates to EXCLUSIVE during actual writes.
|
|
conn.execute("BEGIN IMMEDIATE")
|
|
|
|
try:
|
|
# Ensure migrations tracking table exists
|
|
create_migrations_table(conn)
|
|
|
|
# Quick check: have migrations already been applied by another worker?
|
|
cursor = conn.execute("SELECT COUNT(*) FROM schema_migrations")
|
|
migration_count = cursor.fetchone()[0]
|
|
|
|
# Discover migration files
|
|
migration_files = discover_migration_files(migrations_dir)
|
|
|
|
if not migration_files:
|
|
conn.commit()
|
|
logger.info("No migration files found")
|
|
return
|
|
|
|
# If migrations exist and we're not the first worker, verify and exit
|
|
if migration_count > 0:
|
|
# Check if all migrations are applied
|
|
applied = get_applied_migrations(conn)
|
|
pending = [m for m, _ in migration_files if m not in applied]
|
|
|
|
if not pending:
|
|
conn.commit()
|
|
logger.debug("All migrations already applied by another worker")
|
|
return
|
|
# If there are pending migrations, we continue to apply them
|
|
logger.info(f"Found {len(pending)} pending migrations to apply")
|
|
|
|
# Fresh database detection (original logic preserved)
|
|
if migration_count == 0:
|
|
if is_schema_current(conn):
|
|
# Schema is current - mark all migrations as applied
|
|
for migration_name, _ in migration_files:
|
|
conn.execute(
|
|
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
|
|
(migration_name,)
|
|
)
|
|
conn.commit()
|
|
logger.info(
|
|
f"Fresh database detected: marked {len(migration_files)} "
|
|
f"migrations as applied (schema already current)"
|
|
)
|
|
return
|
|
else:
|
|
logger.info("Fresh database with partial schema: applying needed migrations")
|
|
|
|
# Get already-applied migrations
|
|
applied = get_applied_migrations(conn)
|
|
|
|
# Apply pending migrations (original logic preserved)
|
|
pending_count = 0
|
|
skipped_count = 0
|
|
for migration_name, migration_path in migration_files:
|
|
if migration_name not in applied:
|
|
# Check if migration is actually needed
|
|
# For fresh databases (migration_count == 0), check all migrations
|
|
# For migration 002, ALWAYS check (handles partially migrated databases)
|
|
should_check_needed = (
|
|
migration_count == 0 or
|
|
migration_name == "002_secure_tokens_and_authorization_codes.sql"
|
|
)
|
|
|
|
if should_check_needed and not is_migration_needed(conn, migration_name):
|
|
# Special handling for migration 002: if tables exist but indexes don't,
|
|
# create just the indexes
|
|
if migration_name == "002_secure_tokens_and_authorization_codes.sql":
|
|
# Check if we need to create indexes
|
|
indexes_to_create = []
|
|
if not index_exists(conn, 'idx_tokens_hash'):
|
|
indexes_to_create.append("CREATE INDEX idx_tokens_hash ON tokens(token_hash)")
|
|
if not index_exists(conn, 'idx_tokens_me'):
|
|
indexes_to_create.append("CREATE INDEX idx_tokens_me ON tokens(me)")
|
|
if not index_exists(conn, 'idx_tokens_expires'):
|
|
indexes_to_create.append("CREATE INDEX idx_tokens_expires ON tokens(expires_at)")
|
|
if not index_exists(conn, 'idx_auth_codes_hash'):
|
|
indexes_to_create.append("CREATE INDEX idx_auth_codes_hash ON authorization_codes(code_hash)")
|
|
if not index_exists(conn, 'idx_auth_codes_expires'):
|
|
indexes_to_create.append("CREATE INDEX idx_auth_codes_expires ON authorization_codes(expires_at)")
|
|
|
|
if indexes_to_create:
|
|
for index_sql in indexes_to_create:
|
|
conn.execute(index_sql)
|
|
logger.info(f"Created {len(indexes_to_create)} missing indexes from migration 002")
|
|
|
|
# Mark as applied without executing full migration (INITIAL_SCHEMA_SQL already has table changes)
|
|
conn.execute(
|
|
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
|
|
(migration_name,)
|
|
)
|
|
skipped_count += 1
|
|
logger.debug(f"Skipped migration {migration_name} (already in INITIAL_SCHEMA_SQL)")
|
|
else:
|
|
# Apply the migration (within our transaction)
|
|
try:
|
|
# Read migration SQL
|
|
migration_sql = migration_path.read_text()
|
|
|
|
logger.debug(f"Applying migration: {migration_name}")
|
|
|
|
# Execute migration (already in transaction)
|
|
conn.executescript(migration_sql)
|
|
|
|
# Record migration as applied
|
|
conn.execute(
|
|
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
|
|
(migration_name,)
|
|
)
|
|
|
|
logger.info(f"Applied migration: {migration_name}")
|
|
pending_count += 1
|
|
|
|
except Exception as e:
|
|
# Roll back the transaction - will be handled by outer exception handler
|
|
raise MigrationError(f"Migration {migration_name} failed: {e}")
|
|
|
|
# Commit all migrations atomically
|
|
conn.commit()
|
|
|
|
# Summary
|
|
total_count = len(migration_files)
|
|
if pending_count > 0 or skipped_count > 0:
|
|
if skipped_count > 0:
|
|
logger.info(
|
|
f"Migrations complete: {pending_count} applied, {skipped_count} skipped "
|
|
f"(already in INITIAL_SCHEMA_SQL), {total_count} total"
|
|
)
|
|
else:
|
|
logger.info(
|
|
f"Migrations complete: {pending_count} applied, "
|
|
f"{total_count} total"
|
|
)
|
|
else:
|
|
logger.info(f"All migrations up to date ({total_count} total)")
|
|
|
|
return # Success!
|
|
|
|
except MigrationError:
|
|
# Migration error - rollback and re-raise
|
|
try:
|
|
conn.rollback()
|
|
except Exception as rollback_error:
|
|
logger.critical(f"FATAL: Rollback failed: {rollback_error}")
|
|
raise SystemExit(1)
|
|
raise
|
|
|
|
except Exception as e:
|
|
# Unexpected error during migration - rollback and wrap
|
|
try:
|
|
conn.rollback()
|
|
except Exception as rollback_error:
|
|
logger.critical(f"FATAL: Rollback failed: {rollback_error}")
|
|
raise SystemExit(1)
|
|
raise MigrationError(f"Migration system error: {e}")
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if "database is locked" in str(e).lower():
|
|
# Another worker has the lock, retry with exponential backoff
|
|
retry_count += 1
|
|
|
|
if retry_count < max_retries:
|
|
# Exponential backoff with jitter to prevent thundering herd
|
|
delay = base_delay * (2 ** retry_count) + random.uniform(0, 0.1)
|
|
|
|
# Graduated logging based on retry count
|
|
if retry_count <= 3:
|
|
# Normal operation - DEBUG level
|
|
logger.debug(
|
|
f"Database locked by another worker, retry {retry_count}/{max_retries} "
|
|
f"in {delay:.2f}s"
|
|
)
|
|
elif retry_count <= 7:
|
|
# Getting concerning - INFO level
|
|
logger.info(
|
|
f"Database locked by another worker, retry {retry_count}/{max_retries} "
|
|
f"in {delay:.2f}s"
|
|
)
|
|
else:
|
|
# Abnormal - WARNING level
|
|
logger.warning(
|
|
f"Database locked by another worker, retry {retry_count}/{max_retries} "
|
|
f"in {delay:.2f}s (approaching max retries)"
|
|
)
|
|
|
|
time.sleep(delay)
|
|
continue
|
|
else:
|
|
# Retries exhausted
|
|
elapsed = time.time() - start_time
|
|
raise MigrationError(
|
|
f"Failed to acquire migration lock after {max_retries} attempts over {elapsed:.1f}s. "
|
|
f"Possible causes:\n"
|
|
f"1. Another process is stuck in migration (check logs)\n"
|
|
f"2. Database file permissions issue\n"
|
|
f"3. Disk I/O problems\n"
|
|
f"Action: Restart container with single worker to diagnose"
|
|
)
|
|
else:
|
|
# Non-lock related database error
|
|
error_msg = f"Database error during migration: {e}"
|
|
logger.error(error_msg)
|
|
raise MigrationError(error_msg)
|
|
|
|
except MigrationError:
|
|
# Re-raise migration errors (already logged)
|
|
raise
|
|
|
|
except Exception as e:
|
|
# Unexpected error
|
|
error_msg = f"Unexpected error during migration: {e}"
|
|
logger.error(error_msg)
|
|
raise MigrationError(error_msg)
|
|
|
|
finally:
|
|
if conn:
|
|
try:
|
|
conn.close()
|
|
except:
|
|
pass # Ignore errors during cleanup
|
|
|
|
# Should only reach here if time limit exceeded
|
|
elapsed = time.time() - start_time
|
|
raise MigrationError(
|
|
f"Migration timeout: Failed to acquire lock within {max_total_time}s limit "
|
|
f"(elapsed: {elapsed:.1f}s, retries: {retry_count})"
|
|
)
|