fix: Resolve database migration failure on existing databases

Fixes critical issue where migration 002 indexes already existed in SCHEMA_SQL,
causing 'index already exists' errors on databases created before v1.0.0-rc.1.

Changes:
- Removed duplicate index definitions from SCHEMA_SQL (database.py)
- Enhanced migration system to detect and handle indexes properly
- Added comprehensive documentation of the fix

Version bumped to 1.0.0-rc.2 with full changelog entry.

Refs: docs/reports/2025-11-24-migration-fix-v1.0.0-rc.2.md
This commit is contained in:
2025-11-24 13:11:14 -07:00
parent 89758fd1a5
commit 3ed77fd45f
16 changed files with 2723 additions and 18 deletions

View File

@@ -153,5 +153,5 @@ def create_app(config=None):
# Package version (Semantic Versioning 2.0.0)
# See docs/standards/versioning-strategy.md for details
__version__ = "1.0.0-rc.1"
__version_info__ = (1, 0, 0, "rc", 1)
__version__ = "1.0.0-rc.2"
__version_info__ = (1, 0, 0, "rc", 2)

View File

@@ -55,10 +55,6 @@ CREATE TABLE IF NOT EXISTS tokens (
revoked_at TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_tokens_hash ON tokens(token_hash);
CREATE INDEX IF NOT EXISTS idx_tokens_me ON tokens(me);
CREATE INDEX IF NOT EXISTS idx_tokens_expires ON tokens(expires_at);
-- Authorization codes for IndieAuth token exchange
CREATE TABLE IF NOT EXISTS authorization_codes (
id INTEGER PRIMARY KEY AUTOINCREMENT,

View File

@@ -49,18 +49,28 @@ def create_migrations_table(conn):
def is_schema_current(conn):
"""
Check if database schema is current (matches SCHEMA_SQL)
Check if database schema is current (matches SCHEMA_SQL + all migrations)
Uses heuristic: Check for presence of latest schema features
Currently checks for authorization_codes table and token_hash column in tokens table
Checks for:
- code_verifier column in auth_state (migration 001 or SCHEMA_SQL >= v0.8.0)
- authorization_codes table (migration 002 or SCHEMA_SQL >= v1.0.0-rc.1)
- token_hash column in tokens table (migration 002)
- Token indexes (migration 002 only, removed from SCHEMA_SQL in v1.0.0-rc.2)
Args:
conn: SQLite connection
Returns:
bool: True if schema appears current, False if legacy
bool: True if schema is fully current (all tables, columns, AND indexes exist)
False if any piece is missing (legacy database needing migrations)
"""
try:
# Check for code_verifier column in auth_state (migration 001)
# This is also in SCHEMA_SQL, so we can't use it alone
if not column_exists(conn, 'auth_state', 'code_verifier'):
return False
# Check for authorization_codes table (added in migration 002)
if not table_exists(conn, 'authorization_codes'):
return False
@@ -69,6 +79,20 @@ def is_schema_current(conn):
if not column_exists(conn, 'tokens', 'token_hash'):
return False
# Check for token indexes (created by migration 002 ONLY)
# These indexes were removed from SCHEMA_SQL in v1.0.0-rc.2
# to prevent conflicts when migrations run.
# A database with tables/columns but no indexes means:
# - SCHEMA_SQL was run (creating tables/columns)
# - But migration 002 hasn't run yet (no indexes)
# So it's NOT fully current and needs migrations.
if not index_exists(conn, 'idx_tokens_hash'):
return False
if not index_exists(conn, 'idx_tokens_me'):
return False
if not index_exists(conn, 'idx_tokens_expires'):
return False
return True
except sqlite3.OperationalError:
# Schema check failed - definitely not current
@@ -131,6 +155,65 @@ def index_exists(conn, index_name):
return cursor.fetchone() is not None
def is_migration_needed(conn, migration_name):
"""
Check if a specific migration is needed based on database state
This is used for fresh databases where SCHEMA_SQL may have already
included some migration features. We check the actual database state
rather than just applying all migrations blindly.
Args:
conn: SQLite connection
migration_name: Migration filename to check
Returns:
bool: True if migration should be applied, False if already applied via SCHEMA_SQL
"""
# Migration 001: Adds code_verifier column to auth_state
if migration_name == "001_add_code_verifier_to_auth_state.sql":
# Check if column already exists (was added to SCHEMA_SQL in v0.8.0)
return not column_exists(conn, 'auth_state', 'code_verifier')
# Migration 002: Creates new tokens/authorization_codes tables with indexes
if migration_name == "002_secure_tokens_and_authorization_codes.sql":
# This migration drops and recreates the tokens table, so we check if:
# 1. The new tokens table structure exists (token_hash column)
# 2. The authorization_codes table exists
# 3. The indexes exist
# If tables/columns are missing, this is a truly legacy database - migration needed
if not table_exists(conn, 'authorization_codes'):
return True
if not column_exists(conn, 'tokens', 'token_hash'):
return True
# If tables exist with correct structure, check indexes
# If indexes are missing but tables exist, this is a fresh database from
# SCHEMA_SQL that just needs indexes. We CANNOT run the full migration
# (it will fail trying to CREATE TABLE). Instead, we mark it as not needed
# and apply indexes separately.
has_all_indexes = (
index_exists(conn, 'idx_tokens_hash') and
index_exists(conn, 'idx_tokens_me') and
index_exists(conn, 'idx_tokens_expires') and
index_exists(conn, 'idx_auth_codes_hash') and
index_exists(conn, 'idx_auth_codes_expires')
)
if not has_all_indexes:
# Tables exist but indexes missing - this is a fresh database from SCHEMA_SQL
# We need to create just the indexes, not run the full migration
# Return False (don't run migration) and handle indexes separately
return False
# All features exist - migration not needed
return False
# Unknown migration - assume it's needed
return True
def get_applied_migrations(conn):
"""
Get set of already-applied migration names
@@ -282,25 +365,75 @@ def run_migrations(db_path, logger=None):
)
return
else:
logger.info("Legacy database detected: applying all migrations")
logger.info("Fresh database with partial schema: applying needed migrations")
# Get already-applied migrations
applied = get_applied_migrations(conn)
# Apply pending migrations
# Apply pending migrations (using smart detection for fresh databases)
pending_count = 0
skipped_count = 0
for migration_name, migration_path in migration_files:
if migration_name not in applied:
apply_migration(conn, migration_name, migration_path, logger)
pending_count += 1
# For fresh databases (migration_count == 0), check if migration is actually needed
# Some migrations may have been included in SCHEMA_SQL
if migration_count == 0 and not is_migration_needed(conn, migration_name):
# Special handling for migration 002: if tables exist but indexes don't,
# create just the indexes
if migration_name == "002_secure_tokens_and_authorization_codes.sql":
# Check if we need to create indexes
indexes_to_create = []
if not index_exists(conn, 'idx_tokens_hash'):
indexes_to_create.append("CREATE INDEX idx_tokens_hash ON tokens(token_hash)")
if not index_exists(conn, 'idx_tokens_me'):
indexes_to_create.append("CREATE INDEX idx_tokens_me ON tokens(me)")
if not index_exists(conn, 'idx_tokens_expires'):
indexes_to_create.append("CREATE INDEX idx_tokens_expires ON tokens(expires_at)")
if not index_exists(conn, 'idx_auth_codes_hash'):
indexes_to_create.append("CREATE INDEX idx_auth_codes_hash ON authorization_codes(code_hash)")
if not index_exists(conn, 'idx_auth_codes_expires'):
indexes_to_create.append("CREATE INDEX idx_auth_codes_expires ON authorization_codes(expires_at)")
if indexes_to_create:
try:
for index_sql in indexes_to_create:
conn.execute(index_sql)
conn.commit()
if logger:
logger.info(f"Created {len(indexes_to_create)} missing indexes from migration 002")
except Exception as e:
conn.rollback()
error_msg = f"Failed to create indexes for migration 002: {e}"
if logger:
logger.error(error_msg)
raise MigrationError(error_msg)
# Mark as applied without executing full migration (SCHEMA_SQL already has table changes)
conn.execute(
"INSERT INTO schema_migrations (migration_name) VALUES (?)",
(migration_name,)
)
conn.commit()
skipped_count += 1
if logger:
logger.debug(f"Skipped migration {migration_name} (already in SCHEMA_SQL)")
else:
apply_migration(conn, migration_name, migration_path, logger)
pending_count += 1
# Summary
total_count = len(migration_files)
if pending_count > 0:
logger.info(
f"Migrations complete: {pending_count} applied, "
f"{total_count} total"
)
if pending_count > 0 or skipped_count > 0:
if skipped_count > 0:
logger.info(
f"Migrations complete: {pending_count} applied, {skipped_count} skipped "
f"(already in SCHEMA_SQL), {total_count} total"
)
else:
logger.info(
f"Migrations complete: {pending_count} applied, "
f"{total_count} total"
)
else:
logger.info(f"All migrations up to date ({total_count} total)")