feat(media): Make variant generation atomic with database
Per v1.5.0 Phase 4: - Generate variants to temp directory first - Perform database inserts in transaction - Move files to final location before commit - Clean up temp files on any failure - Add startup recovery for orphaned temp files - All media operations now fully atomic Changes: - Modified generate_all_variants() to return file moves - Modified save_media() to handle full atomic operation - Add cleanup_orphaned_temp_files() for startup recovery - Added 4 new tests for atomic behavior - Fixed HEIC variant format detection - Updated variant failure test for atomic behavior Fixes: - No orphaned files on database failures - No orphaned DB records on file failures - Startup recovery detects and cleans orphans 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -21,6 +21,7 @@ from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import uuid
|
||||
import io
|
||||
import shutil
|
||||
from typing import Optional, List, Dict, Tuple
|
||||
from flask import current_app
|
||||
|
||||
@@ -316,7 +317,8 @@ def generate_variant(
|
||||
variant_type: str,
|
||||
base_path: Path,
|
||||
base_filename: str,
|
||||
file_ext: str
|
||||
file_ext: str,
|
||||
relative_path: str = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Generate a single image variant
|
||||
@@ -327,6 +329,7 @@ def generate_variant(
|
||||
base_path: Directory to save to
|
||||
base_filename: Base filename (UUID without extension)
|
||||
file_ext: File extension (e.g., '.jpg')
|
||||
relative_path: Relative path for metadata (if None, calculated from base_path)
|
||||
|
||||
Returns:
|
||||
Dict with variant metadata (path, width, height, size_bytes)
|
||||
@@ -359,19 +362,42 @@ def generate_variant(
|
||||
|
||||
# Save with appropriate quality
|
||||
save_kwargs = {'optimize': True}
|
||||
if work_img.format in ['JPEG', 'JPG', None]:
|
||||
save_kwargs['quality'] = 85
|
||||
|
||||
# Determine format from extension
|
||||
save_format = 'JPEG' if file_ext.lower() in ['.jpg', '.jpeg'] else file_ext[1:].upper()
|
||||
# Determine format - prefer image's actual format over extension
|
||||
# This handles cases like HEIC -> JPEG conversion where extension doesn't match format
|
||||
if work_img.format and work_img.format in ['JPEG', 'PNG', 'GIF', 'WEBP']:
|
||||
save_format = work_img.format
|
||||
if save_format in ['JPEG', 'JPG']:
|
||||
save_kwargs['quality'] = 85
|
||||
else:
|
||||
# Fallback to extension-based detection
|
||||
if file_ext.lower() in ['.jpg', '.jpeg', '.heic']:
|
||||
save_format = 'JPEG'
|
||||
save_kwargs['quality'] = 85
|
||||
elif file_ext.lower() == '.png':
|
||||
save_format = 'PNG'
|
||||
elif file_ext.lower() == '.gif':
|
||||
save_format = 'GIF'
|
||||
elif file_ext.lower() == '.webp':
|
||||
save_format = 'WEBP'
|
||||
save_kwargs['quality'] = 85
|
||||
else:
|
||||
save_format = 'JPEG' # Default fallback
|
||||
save_kwargs['quality'] = 85
|
||||
|
||||
work_img.save(variant_path, format=save_format, **save_kwargs)
|
||||
|
||||
# Use provided relative path or calculate it
|
||||
if relative_path is None:
|
||||
relative_path = str(variant_path.relative_to(base_path.parent.parent)) # Relative to media root
|
||||
|
||||
return {
|
||||
'variant_type': variant_type,
|
||||
'path': str(variant_path.relative_to(base_path.parent.parent)), # Relative to media root
|
||||
'path': relative_path,
|
||||
'width': work_img.width,
|
||||
'height': work_img.height,
|
||||
'size_bytes': variant_path.stat().st_size
|
||||
'size_bytes': variant_path.stat().st_size,
|
||||
'temp_file': variant_path # Include temp file path for atomic operation
|
||||
}
|
||||
|
||||
|
||||
@@ -383,32 +409,53 @@ def generate_all_variants(
|
||||
media_id: int,
|
||||
year: str,
|
||||
month: str,
|
||||
optimized_bytes: bytes
|
||||
) -> List[Dict]:
|
||||
optimized_bytes: bytes,
|
||||
db = None
|
||||
) -> Tuple[List[Dict], List[Tuple[Path, Path]]]:
|
||||
"""
|
||||
Generate all variants for an image and store in database
|
||||
Generate all variants for an image and prepare database records
|
||||
|
||||
Per v1.5.0 Phase 4: Atomic variant generation
|
||||
- Generate variants to temp directory first
|
||||
- Return database insert data and file move operations
|
||||
- Caller handles transaction commit and file moves
|
||||
- This ensures true atomicity
|
||||
|
||||
Args:
|
||||
img: Source PIL Image (the optimized original)
|
||||
base_path: Directory containing the original
|
||||
base_path: Directory containing the original (final destination)
|
||||
base_filename: Base filename (UUID without extension)
|
||||
file_ext: File extension
|
||||
media_id: ID of parent media record
|
||||
year: Year string (e.g., '2025') for path calculation
|
||||
month: Month string (e.g., '01') for path calculation
|
||||
optimized_bytes: Bytes of optimized original (avoids re-reading file)
|
||||
db: Database connection (optional, for transaction control)
|
||||
|
||||
Returns:
|
||||
List of variant metadata dicts
|
||||
Tuple of (variant_metadata_list, file_moves_list)
|
||||
- variant_metadata_list: List of dicts ready for database insert
|
||||
- file_moves_list: List of (src_path, dst_path) tuples for file moves
|
||||
"""
|
||||
from starpunk.database import get_db
|
||||
|
||||
if db is None:
|
||||
db = get_db(current_app)
|
||||
|
||||
variants = []
|
||||
db = get_db(current_app)
|
||||
created_files = [] # Track files for cleanup on failure
|
||||
file_moves = []
|
||||
|
||||
# Create temp directory for atomic operation
|
||||
media_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'media'
|
||||
temp_dir = media_dir / '.tmp'
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create unique temp subdirectory for this operation
|
||||
temp_subdir = temp_dir / f"{base_filename}_{uuid.uuid4().hex[:8]}"
|
||||
temp_subdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
# Generate each variant type
|
||||
# Step 1: Generate all variants to temp directory
|
||||
for variant_type in ['thumb', 'small', 'medium', 'large']:
|
||||
# Skip if image is smaller than target
|
||||
spec = VARIANT_SPECS[variant_type]
|
||||
@@ -417,45 +464,59 @@ def generate_all_variants(
|
||||
if img.width < target_width and variant_type != 'thumb':
|
||||
continue # Skip variants larger than original
|
||||
|
||||
variant = generate_variant(img, variant_type, base_path, base_filename, file_ext)
|
||||
variants.append(variant)
|
||||
created_files.append(base_path / f"{base_filename}_{variant_type}{file_ext}")
|
||||
# Calculate final relative path (for database)
|
||||
final_relative_path = f"{year}/{month}/{base_filename}_{variant_type}{file_ext}"
|
||||
|
||||
# Insert into database
|
||||
db.execute(
|
||||
"""
|
||||
INSERT INTO media_variants
|
||||
(media_id, variant_type, path, width, height, size_bytes)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(media_id, variant['variant_type'], variant['path'],
|
||||
variant['width'], variant['height'], variant['size_bytes'])
|
||||
# Generate variant to temp directory
|
||||
variant = generate_variant(
|
||||
img,
|
||||
variant_type,
|
||||
temp_subdir, # Write to temp
|
||||
base_filename,
|
||||
file_ext,
|
||||
final_relative_path # Store final path in metadata
|
||||
)
|
||||
|
||||
# Also record the original as 'original' variant
|
||||
# Use explicit year/month for path calculation (avoids fragile parent traversal)
|
||||
original_path = f"{year}/{month}/{base_filename}{file_ext}"
|
||||
db.execute(
|
||||
"""
|
||||
INSERT INTO media_variants
|
||||
(media_id, variant_type, path, width, height, size_bytes)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(media_id, 'original', original_path, img.width, img.height,
|
||||
len(optimized_bytes)) # Use passed bytes instead of file I/O
|
||||
)
|
||||
# Prepare database metadata (without temp_file key)
|
||||
variant_metadata = {
|
||||
'variant_type': variant['variant_type'],
|
||||
'path': variant['path'],
|
||||
'width': variant['width'],
|
||||
'height': variant['height'],
|
||||
'size_bytes': variant['size_bytes']
|
||||
}
|
||||
variants.append(variant_metadata)
|
||||
|
||||
db.commit()
|
||||
return variants
|
||||
# Track file move operation
|
||||
temp_file = variant['temp_file']
|
||||
final_path = base_path / temp_file.name
|
||||
file_moves.append((temp_file, final_path, temp_subdir))
|
||||
|
||||
# Also prepare original variant metadata
|
||||
original_path = f"{year}/{month}/{base_filename}{file_ext}"
|
||||
variants.append({
|
||||
'variant_type': 'original',
|
||||
'path': original_path,
|
||||
'width': img.width,
|
||||
'height': img.height,
|
||||
'size_bytes': len(optimized_bytes)
|
||||
})
|
||||
|
||||
return variants, file_moves
|
||||
|
||||
except Exception as e:
|
||||
# Clean up any created variant files on failure
|
||||
for file_path in created_files:
|
||||
try:
|
||||
if file_path.exists():
|
||||
file_path.unlink()
|
||||
except OSError:
|
||||
pass # Best effort cleanup
|
||||
# Clean up temp files on failure
|
||||
try:
|
||||
if temp_subdir.exists():
|
||||
for file in temp_subdir.glob('*'):
|
||||
try:
|
||||
file.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
temp_subdir.rmdir()
|
||||
except OSError:
|
||||
pass # Best effort
|
||||
|
||||
raise # Re-raise the original exception
|
||||
|
||||
|
||||
@@ -526,46 +587,147 @@ def save_media(file_data: bytes, filename: str) -> Dict:
|
||||
full_dir = media_dir / year / month
|
||||
full_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Save optimized image (using bytes from optimize_image to avoid re-encoding)
|
||||
full_path = full_dir / stored_filename
|
||||
full_path.write_bytes(optimized_bytes)
|
||||
|
||||
# Get actual file size (from optimized bytes)
|
||||
actual_size = len(optimized_bytes)
|
||||
|
||||
# Insert into database
|
||||
db = get_db(current_app)
|
||||
cursor = db.execute(
|
||||
"""
|
||||
INSERT INTO media (filename, stored_filename, path, mime_type, size, width, height)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(filename, stored_filename, relative_path, mime_type, actual_size, width, height)
|
||||
)
|
||||
db.commit()
|
||||
media_id = cursor.lastrowid
|
||||
|
||||
# Generate variants (synchronous) - v1.4.0 Phase 2
|
||||
# Pass year, month, and optimized_bytes to avoid fragile path traversal and file I/O
|
||||
# Per v1.5.0 Phase 4: Atomic operation for all file saves and database inserts
|
||||
# Generate variants first (to temp directory)
|
||||
base_filename = stored_filename.rsplit('.', 1)[0]
|
||||
variants = []
|
||||
|
||||
db = get_db(current_app)
|
||||
variant_metadata = []
|
||||
file_moves = []
|
||||
temp_original_path = None
|
||||
temp_subdir = None
|
||||
|
||||
try:
|
||||
variants = generate_all_variants(
|
||||
# Step 1: Save original to temp directory
|
||||
media_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'media'
|
||||
temp_dir = media_dir / '.tmp'
|
||||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||||
temp_subdir = temp_dir / f"{base_filename}_{uuid.uuid4().hex[:8]}"
|
||||
temp_subdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
temp_original_path = temp_subdir / stored_filename
|
||||
temp_original_path.write_bytes(optimized_bytes)
|
||||
|
||||
# Step 2: Generate variants to temp directory
|
||||
variant_metadata, file_moves = generate_all_variants(
|
||||
optimized_img,
|
||||
full_dir,
|
||||
base_filename,
|
||||
file_ext,
|
||||
media_id,
|
||||
0, # media_id not yet known
|
||||
year,
|
||||
month,
|
||||
optimized_bytes
|
||||
optimized_bytes,
|
||||
db
|
||||
)
|
||||
|
||||
# Step 3: Begin transaction
|
||||
db.execute("BEGIN TRANSACTION")
|
||||
|
||||
# Step 4: Insert media record
|
||||
cursor = db.execute(
|
||||
"""
|
||||
INSERT INTO media (filename, stored_filename, path, mime_type, size, width, height)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(filename, stored_filename, relative_path, mime_type, actual_size, width, height)
|
||||
)
|
||||
media_id = cursor.lastrowid
|
||||
|
||||
# Step 5: Insert variant records
|
||||
for variant in variant_metadata:
|
||||
db.execute(
|
||||
"""
|
||||
INSERT INTO media_variants
|
||||
(media_id, variant_type, path, width, height, size_bytes)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(media_id, variant['variant_type'], variant['path'],
|
||||
variant['width'], variant['height'], variant['size_bytes'])
|
||||
)
|
||||
|
||||
# Step 6: Move files to final location (before commit for true atomicity)
|
||||
# If file moves fail, we can rollback the transaction
|
||||
try:
|
||||
# Move original file
|
||||
full_path = full_dir / stored_filename
|
||||
shutil.move(str(temp_original_path), str(full_path))
|
||||
|
||||
# Move variant files
|
||||
for temp_file, final_path, _ in file_moves:
|
||||
shutil.move(str(temp_file), str(final_path))
|
||||
|
||||
except Exception as e:
|
||||
# Rollback database if file move fails
|
||||
db.rollback()
|
||||
raise
|
||||
|
||||
# Step 7: Commit transaction (after files are moved successfully)
|
||||
db.commit()
|
||||
|
||||
# Step 8: Clean up temp directory
|
||||
try:
|
||||
if temp_subdir and temp_subdir.exists():
|
||||
temp_subdir.rmdir()
|
||||
except OSError:
|
||||
pass # Best effort
|
||||
|
||||
# Format variants for return value (same format as before)
|
||||
variants = [v for v in variant_metadata if v['variant_type'] != 'original']
|
||||
|
||||
except Exception as e:
|
||||
# Rollback database on any failure (best effort)
|
||||
try:
|
||||
db.rollback()
|
||||
except Exception:
|
||||
pass # May already be rolled back
|
||||
|
||||
# Clean up moved files if commit failed
|
||||
# (This handles the case where files were moved but commit failed)
|
||||
full_path = full_dir / stored_filename
|
||||
if full_path.exists():
|
||||
try:
|
||||
full_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
for _, final_path, _ in file_moves:
|
||||
try:
|
||||
if final_path.exists():
|
||||
final_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Clean up temp files on any failure
|
||||
if temp_original_path and temp_original_path.exists():
|
||||
try:
|
||||
temp_original_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
for temp_file, _, _ in file_moves:
|
||||
try:
|
||||
if temp_file.exists():
|
||||
temp_file.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Clean up temp subdirectory
|
||||
if temp_subdir and temp_subdir.exists():
|
||||
try:
|
||||
temp_subdir.rmdir()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Log and re-raise
|
||||
current_app.logger.warning(
|
||||
f'Media upload variant generation failed: filename="{filename}", '
|
||||
f'media_id={media_id}, error="{e}"'
|
||||
f'Media upload atomic operation failed: filename="{filename}", '
|
||||
f'error="{e}"'
|
||||
)
|
||||
# Continue - original image is still usable
|
||||
raise
|
||||
|
||||
# Log success
|
||||
was_optimized = len(optimized_bytes) < file_size
|
||||
@@ -981,3 +1143,74 @@ def cleanup_old_debug_files(app) -> None:
|
||||
f"Debug file cleanup: deleted {deleted_count} file(s), "
|
||||
f"freed {deleted_size / 1024 / 1024:.2f} MB"
|
||||
)
|
||||
|
||||
|
||||
def cleanup_orphaned_temp_files(app) -> None:
|
||||
"""
|
||||
Clean up orphaned temporary variant files on startup
|
||||
|
||||
Per v1.5.0 Phase 4:
|
||||
- Detect temp files left from failed operations
|
||||
- Log warnings for orphaned files
|
||||
- Clean up temp directory
|
||||
- Called on application startup
|
||||
|
||||
Args:
|
||||
app: Flask application instance (for config and logger)
|
||||
"""
|
||||
media_dir = Path(app.config.get('DATA_PATH', 'data')) / 'media'
|
||||
temp_dir = media_dir / '.tmp'
|
||||
|
||||
# Check if temp directory exists
|
||||
if not temp_dir.exists():
|
||||
return
|
||||
|
||||
# Find all subdirectories and files in temp directory
|
||||
orphaned_count = 0
|
||||
cleaned_size = 0
|
||||
|
||||
# Iterate through temp subdirectories
|
||||
for temp_subdir in temp_dir.iterdir():
|
||||
if not temp_subdir.is_dir():
|
||||
# Clean up any loose files (shouldn't normally exist)
|
||||
try:
|
||||
size = temp_subdir.stat().st_size
|
||||
temp_subdir.unlink()
|
||||
orphaned_count += 1
|
||||
cleaned_size += size
|
||||
app.logger.warning(f"Cleaned up orphaned temp file: {temp_subdir.name}")
|
||||
except OSError as e:
|
||||
app.logger.warning(f"Failed to delete orphaned temp file {temp_subdir.name}: {e}")
|
||||
continue
|
||||
|
||||
# Process subdirectory
|
||||
files_in_subdir = list(temp_subdir.glob('*'))
|
||||
if files_in_subdir:
|
||||
# Log orphaned operation
|
||||
app.logger.warning(
|
||||
f"Found orphaned temp directory from failed operation: {temp_subdir.name} "
|
||||
f"({len(files_in_subdir)} file(s))"
|
||||
)
|
||||
|
||||
# Clean up files
|
||||
for file_path in files_in_subdir:
|
||||
try:
|
||||
if file_path.is_file():
|
||||
size = file_path.stat().st_size
|
||||
file_path.unlink()
|
||||
orphaned_count += 1
|
||||
cleaned_size += size
|
||||
except OSError as e:
|
||||
app.logger.warning(f"Failed to delete orphaned temp file {file_path}: {e}")
|
||||
|
||||
# Remove empty subdirectory
|
||||
try:
|
||||
temp_subdir.rmdir()
|
||||
except OSError as e:
|
||||
app.logger.warning(f"Failed to remove temp directory {temp_subdir.name}: {e}")
|
||||
|
||||
if orphaned_count > 0:
|
||||
app.logger.info(
|
||||
f"Temp file cleanup: removed {orphaned_count} orphaned file(s), "
|
||||
f"freed {cleaned_size / 1024 / 1024:.2f} MB"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user