StarPunk/starpunk/media.py

"""
Media upload and management for StarPunk

Per ADR-057 and ADR-058:
- Social media attachment model (media at top of note)
- Pillow-based image optimization
- 50MB max upload, 10MB max output (v1.4.0)
- Image variants: thumb, small, medium, large (v1.4.0)
- Tiered resize strategy based on input size (v1.4.0)
- 12000x12000 max dimensions (v1.4.2)
- 4 images max per note

Debug file management (v1.5.0 Phase 2):
- Debug file saving disabled by default
- Automatic cleanup of old debug files
- Size limit enforcement
"""

from PIL import Image, ImageOps
from pathlib import Path
from datetime import datetime, timedelta
import uuid
import io
import shutil
from typing import Optional, List, Dict, Tuple
from flask import current_app

# HEIC/HEIF support - import registers with Pillow automatically
try:
    import pillow_heif
    pillow_heif.register_heif_opener()
    HEIC_SUPPORTED = True
except ImportError:
    HEIC_SUPPORTED = False

# Allowed MIME types per Q11
ALLOWED_MIME_TYPES = {
    'image/jpeg': ['.jpg', '.jpeg'],
    'image/png': ['.png'],
    'image/gif': ['.gif'],
    'image/webp': ['.webp']
}

# Limits per Q&A and ADR-058 (updated in v1.4.0)
MAX_FILE_SIZE = 50 * 1024 * 1024  # 50MB (v1.4.0)
MAX_OUTPUT_SIZE = 10 * 1024 * 1024  # 10MB target after optimization (v1.4.0)
MAX_DIMENSION = 12000             # 12000x12000 max input (v1.4.2 - supports modern phone cameras)
RESIZE_DIMENSION = 2048           # Auto-resize to 2048px (default)
MIN_QUALITY = 70                  # Minimum JPEG quality before rejection (v1.4.0)
MIN_DIMENSION = 640               # Minimum dimension before rejection (v1.4.0)
MAX_IMAGES_PER_NOTE = 4

# Variant specifications (v1.4.0 Phase 2)
VARIANT_SPECS = {
    'thumb': {'size': (150, 150), 'crop': True},
    'small': {'width': 320, 'crop': False},
    'medium': {'width': 640, 'crop': False},
    'large': {'width': 1280, 'crop': False},
}


def get_optimization_params(file_size: int) -> Tuple[int, int]:
    """
    Determine optimization parameters based on input file size

    Per v1.4.0 tiered resize strategy:
    - <=10MB: 2048px max, 95% quality
    - 10-25MB: 1600px max, 90% quality
    - 25-50MB: 1280px max, 85% quality

    Args:
        file_size: Original file size in bytes

    Returns:
        Tuple of (max_dimension, quality_percent)
    """
    if file_size <= 10 * 1024 * 1024:  # <=10MB
        return (2048, 95)
    elif file_size <= 25 * 1024 * 1024:  # 10-25MB
        return (1600, 90)
    else:  # 25-50MB
        return (1280, 85)


def validate_image(file_data: bytes, filename: str) -> Tuple[bytes, str, int, int]:
    """
    Validate image file

    Per Q11: Validate MIME type using Pillow
    Per Q6: Reject if >50MB or >4096px (updated v1.4.0)
    Per v1.4.2: Convert HEIC to JPEG (browsers cannot display HEIC)

    Args:
        file_data: Raw file bytes
        filename: Original filename

    Returns:
        Tuple of (file_data, mime_type, width, height)
        Note: file_data may be converted (e.g., HEIC to JPEG)

    Raises:
        ValueError: If file is invalid
    """
    # Check file size first (before loading)
    file_size = len(file_data)
    if file_size > MAX_FILE_SIZE:
        raise ValueError("File too large. Maximum size is 50MB")

    # Try to open with Pillow (validates integrity)
    try:
        img = Image.open(io.BytesIO(file_data))
        img.verify()  # Verify it's a valid image

        # Re-open after verify (verify() closes the file)
        img = Image.open(io.BytesIO(file_data))
    except Exception as e:
        # v1.4.2: If Pillow can't open, try explicitly as HEIC
        # iOS sometimes saves HEIC with .jpeg extension
        if HEIC_SUPPORTED:
            try:
                heif_file = pillow_heif.read_heif(file_data)
                img = Image.frombytes(
                    heif_file.mode,
                    heif_file.size,
                    heif_file.data,
                    "raw",
                )
                # Mark as HEIF so conversion happens below
                img.format = 'HEIF'
            except Exception as heic_error:
                # Log the magic bytes and save file for debugging (if in app context and enabled)
                try:
                    magic = file_data[:12].hex() if len(file_data) >= 12 else file_data.hex()
                    current_app.logger.warning(
                        f'Media upload failed both Pillow and HEIC: filename="{filename}", '
                        f'magic_bytes={magic}, pillow_error="{e}", heic_error="{heic_error}"'
                    )
                    # Save failed file for analysis (v1.5.0: only if enabled)
                    if current_app.config.get('DEBUG_SAVE_FAILED_UPLOADS', False):
                        debug_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'debug'
                        debug_dir.mkdir(parents=True, exist_ok=True)
                        # Sanitize filename to prevent path traversal (v1.5.0 security fix)
                        safe_filename = "".join(c for c in filename if c.isalnum() or c in "._-")[:50]
                        debug_file = debug_dir / f"failed_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{safe_filename}"
                        debug_file.write_bytes(file_data)
                        current_app.logger.info(f'Saved failed upload for analysis: {debug_file}')
                except RuntimeError:
                    pass  # Outside app context (e.g., tests)
                raise ValueError(f"Invalid or corrupted image: {e}")
        else:
            raise ValueError(f"Invalid or corrupted image: {e}")

    # HEIC/HEIF conversion (v1.4.2)
    # HEIC cannot be displayed in browsers, convert to JPEG
    if img.format in ('HEIF', 'HEIC'):
        if not HEIC_SUPPORTED:
            raise ValueError(
                "HEIC/HEIF images require pillow-heif library. "
                "Please convert to JPEG before uploading."
            )
        # Convert HEIC to JPEG in memory
        output = io.BytesIO()
        # Convert to RGB if needed (HEIC may have alpha channel)
        if img.mode in ('RGBA', 'P'):
            img = img.convert('RGB')
        img.save(output, format='JPEG', quality=95)
        output.seek(0)
        # Re-open as JPEG for further processing
        file_data = output.getvalue()
        img = Image.open(io.BytesIO(file_data))

    # MPO (Multi-Picture Object) conversion (v1.4.2)
    # MPO is used by iPhones for depth/portrait photos - extract primary image as JPEG
    if img.format == 'MPO':
        output = io.BytesIO()
        # Convert to RGB if needed
        if img.mode in ('RGBA', 'P'):
            img = img.convert('RGB')
        img.save(output, format='JPEG', quality=95)
        output.seek(0)
        file_data = output.getvalue()
        img = Image.open(io.BytesIO(file_data))

    # Check format is allowed
    if img.format:
        format_lower = img.format.lower()
        mime_type = f'image/{format_lower}'

        # Special case: JPEG format can be reported as 'jpeg'
        if format_lower == 'jpeg':
            mime_type = 'image/jpeg'

        if mime_type not in ALLOWED_MIME_TYPES:
            # Log the detected format for debugging (v1.4.2)
            try:
                current_app.logger.warning(
                    f'Media upload rejected format: filename="{filename}", '
                    f'detected_format="{img.format}", mime_type="{mime_type}"'
                )
            except RuntimeError:
                pass  # Outside app context
            raise ValueError(f"Invalid image format '{img.format}'. Accepted: JPEG, PNG, GIF, WebP")
    else:
        raise ValueError("Could not determine image format")

    # Check dimensions (v1.4.2: increased to 12000px to support modern phone cameras)
    # Images will be resized by optimize_image() anyway
    width, height = img.size
    if max(width, height) > MAX_DIMENSION:
        raise ValueError(f"Image dimensions too large. Maximum is {MAX_DIMENSION}x{MAX_DIMENSION} pixels")

    # Check for animated GIF (v1.4.0)
    # Animated GIFs cannot be resized, so reject if >10MB
    if img.format == 'GIF':
        try:
            img.seek(1)  # Try to seek to second frame
            # If successful, it's animated
            if file_size > MAX_OUTPUT_SIZE:
                raise ValueError(
                    "Animated GIF too large. Maximum size for animated GIFs is 10MB. "
                    "Consider using a shorter clip or lower resolution."
                )
            img.seek(0)  # Reset to first frame
        except EOFError:
            # Not animated, continue normally
            pass

    return file_data, mime_type, width, height


def optimize_image(image_data: bytes, original_size: int = None) -> Tuple[Image.Image, int, int, bytes]:
    """
    Optimize image for web display with size-aware strategy

    Per v1.4.0:
    - Tiered resize strategy based on input size
    - Iterative quality reduction if needed
    - Target output <=10MB

    Args:
        image_data: Raw image bytes
        original_size: Original file size (for tiered optimization)

    Returns:
        Tuple of (optimized_image, width, height, optimized_bytes)

    Raises:
        ValueError: If image cannot be optimized to target size
    """
    if original_size is None:
        original_size = len(image_data)

    # Get initial optimization parameters based on input size
    max_dim, quality = get_optimization_params(original_size)

    img = Image.open(io.BytesIO(image_data))

    # Save original format before any processing (copy() loses this)
    original_format = img.format

    # Correct EXIF orientation (per ADR-058), except for GIFs
    img = ImageOps.exif_transpose(img) if img.format != 'GIF' else img

    # For animated GIFs, return as-is (already validated in validate_image)
    if img.format == 'GIF' and getattr(img, 'is_animated', False):
        # Already checked size in validate_image, just return original
        return img, img.size[0], img.size[1], image_data

    # Iterative optimization loop
    while True:
        # Create copy for this iteration
        work_img = img.copy()

        # Resize if needed
        if max(work_img.size) > max_dim:
            work_img.thumbnail((max_dim, max_dim), Image.Resampling.LANCZOS)

        # Save to bytes to check size
        output = io.BytesIO()
        # Use original format (copy() loses the format attribute)
        save_format = original_format or 'JPEG'
        save_kwargs = {'optimize': True}

        if save_format in ['JPEG', 'JPG']:
            save_kwargs['quality'] = quality
        elif save_format == 'WEBP':
            save_kwargs['quality'] = quality
        # For GIF and PNG, just use optimize flag

        work_img.save(output, format=save_format, **save_kwargs)
        output_bytes = output.getvalue()

        # Check output size
        if len(output_bytes) <= MAX_OUTPUT_SIZE:
            width, height = work_img.size
            return work_img, width, height, output_bytes

        # Need to reduce further
        if quality > MIN_QUALITY:
            # Reduce quality first
            quality -= 5
        else:
            # Already at min quality, reduce dimensions
            max_dim = int(max_dim * 0.8)
            quality = 85  # Reset quality for new dimension

            # Safety check: minimum dimension
            if max_dim < MIN_DIMENSION:
                raise ValueError(
                    "Image cannot be optimized to target size. "
                    "Please use a smaller or lower-resolution image."
                )


def generate_variant(
    img: Image.Image,
    variant_type: str,
    base_path: Path,
    base_filename: str,
    file_ext: str,
    relative_path: str = None
) -> Dict:
    """
    Generate a single image variant

    Args:
        img: Source PIL Image
        variant_type: One of 'thumb', 'small', 'medium', 'large'
        base_path: Directory to save to
        base_filename: Base filename (UUID without extension)
        file_ext: File extension (e.g., '.jpg')
        relative_path: Relative path for metadata (if None, calculated from base_path)

    Returns:
        Dict with variant metadata (path, width, height, size_bytes)
    """
    spec = VARIANT_SPECS[variant_type]
    work_img = img.copy()

    if spec.get('crop'):
        # Center crop for thumbnails using ImageOps.fit()
        work_img = ImageOps.fit(
            work_img,
            spec['size'],
            method=Image.Resampling.LANCZOS,
            centering=(0.5, 0.5)
        )
    else:
        # Aspect-preserving resize
        target_width = spec['width']
        if work_img.width > target_width:
            ratio = target_width / work_img.width
            new_height = int(work_img.height * ratio)
            work_img = work_img.resize(
                (target_width, new_height),
                Image.Resampling.LANCZOS
            )

    # Generate variant filename
    variant_filename = f"{base_filename}_{variant_type}{file_ext}"
    variant_path = base_path / variant_filename

    # Save with appropriate quality
    save_kwargs = {'optimize': True}

    # Determine format - prefer image's actual format over extension
    # This handles cases like HEIC -> JPEG conversion where extension doesn't match format
    if work_img.format and work_img.format in ['JPEG', 'PNG', 'GIF', 'WEBP']:
        save_format = work_img.format
        if save_format in ['JPEG', 'JPG']:
            save_kwargs['quality'] = 85
    else:
        # Fallback to extension-based detection
        if file_ext.lower() in ['.jpg', '.jpeg', '.heic']:
            save_format = 'JPEG'
            save_kwargs['quality'] = 85
        elif file_ext.lower() == '.png':
            save_format = 'PNG'
        elif file_ext.lower() == '.gif':
            save_format = 'GIF'
        elif file_ext.lower() == '.webp':
            save_format = 'WEBP'
            save_kwargs['quality'] = 85
        else:
            save_format = 'JPEG'  # Default fallback
            save_kwargs['quality'] = 85

    work_img.save(variant_path, format=save_format, **save_kwargs)

    # Use provided relative path or calculate it
    if relative_path is None:
        relative_path = str(variant_path.relative_to(base_path.parent.parent))  # Relative to media root

    return {
        'variant_type': variant_type,
        'path': relative_path,
        'width': work_img.width,
        'height': work_img.height,
        'size_bytes': variant_path.stat().st_size,
        'temp_file': variant_path  # Include temp file path for atomic operation
    }


def generate_all_variants(
    img: Image.Image,
    base_path: Path,
    base_filename: str,
    file_ext: str,
    media_id: int,
    year: str,
    month: str,
    optimized_bytes: bytes,
    db = None
) -> Tuple[List[Dict], List[Tuple[Path, Path]]]:
    """
    Generate all variants for an image and prepare database records

    Per v1.5.0 Phase 4: Atomic variant generation
    - Generate variants to temp directory first
    - Return database insert data and file move operations
    - Caller handles transaction commit and file moves
    - This ensures true atomicity

    Args:
        img: Source PIL Image (the optimized original)
        base_path: Directory containing the original (final destination)
        base_filename: Base filename (UUID without extension)
        file_ext: File extension
        media_id: ID of parent media record
        year: Year string (e.g., '2025') for path calculation
        month: Month string (e.g., '01') for path calculation
        optimized_bytes: Bytes of optimized original (avoids re-reading file)
        db: Database connection (optional, for transaction control)

    Returns:
        Tuple of (variant_metadata_list, file_moves_list)
        - variant_metadata_list: List of dicts ready for database insert
        - file_moves_list: List of (src_path, dst_path) tuples for file moves
    """
    from starpunk.database import get_db

    if db is None:
        db = get_db(current_app)

    variants = []
    file_moves = []

    # Create temp directory for atomic operation
    media_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'media'
    temp_dir = media_dir / '.tmp'
    temp_dir.mkdir(parents=True, exist_ok=True)

    # Create unique temp subdirectory for this operation
    temp_subdir = temp_dir / f"{base_filename}_{uuid.uuid4().hex[:8]}"
    temp_subdir.mkdir(parents=True, exist_ok=True)

    try:
        # Step 1: Generate all variants to temp directory
        for variant_type in ['thumb', 'small', 'medium', 'large']:
            # Skip if image is smaller than target
            spec = VARIANT_SPECS[variant_type]
            target_width = spec.get('width') or spec['size'][0]

            if img.width < target_width and variant_type != 'thumb':
                continue  # Skip variants larger than original

            # Calculate final relative path (for database)
            final_relative_path = f"{year}/{month}/{base_filename}_{variant_type}{file_ext}"

            # Generate variant to temp directory
            variant = generate_variant(
                img,
                variant_type,
                temp_subdir,  # Write to temp
                base_filename,
                file_ext,
                final_relative_path  # Store final path in metadata
            )

            # Prepare database metadata (without temp_file key)
            variant_metadata = {
                'variant_type': variant['variant_type'],
                'path': variant['path'],
                'width': variant['width'],
                'height': variant['height'],
                'size_bytes': variant['size_bytes']
            }
            variants.append(variant_metadata)

            # Track file move operation
            temp_file = variant['temp_file']
            final_path = base_path / temp_file.name
            file_moves.append((temp_file, final_path, temp_subdir))

        # Also prepare original variant metadata
        original_path = f"{year}/{month}/{base_filename}{file_ext}"
        variants.append({
            'variant_type': 'original',
            'path': original_path,
            'width': img.width,
            'height': img.height,
            'size_bytes': len(optimized_bytes)
        })

        return variants, file_moves

    except Exception as e:
        # Clean up temp files on failure
        try:
            if temp_subdir.exists():
                for file in temp_subdir.glob('*'):
                    try:
                        file.unlink()
                    except OSError:
                        pass
                temp_subdir.rmdir()
        except OSError:
            pass  # Best effort

        raise  # Re-raise the original exception


def save_media(file_data: bytes, filename: str) -> Dict:
    """
    Save uploaded media file

    Per Q5: UUID-based filename to avoid collisions
    Per Q2: Date-organized path: /media/YYYY/MM/uuid.ext
    Per Q6: Validate, optimize, then save
    Per v1.4.0: Size-aware optimization with iterative quality reduction

    Args:
        file_data: Raw file bytes
        filename: Original filename

    Returns:
        Media metadata dict (for database insert)

    Raises:
        ValueError: If validation fails
    """
    from starpunk.database import get_db

    # Capture file size for logging
    file_size = len(file_data)

    try:
        # Validate image (returns 4-tuple with potentially converted bytes)
        try:
            file_data, mime_type, orig_width, orig_height = validate_image(file_data, filename)
        except ValueError as e:
            current_app.logger.warning(
                f'Media upload validation failed: filename="{filename}", '
                f'size={file_size}b, error="{e}"'
            )
            raise

        # Optimize image with size-aware strategy (now returns 4-tuple with bytes)
        try:
            optimized_img, width, height, optimized_bytes = optimize_image(file_data, file_size)
        except ValueError as e:
            current_app.logger.warning(
                f'Media upload optimization failed: filename="{filename}", '
                f'size={file_size}b, error="{e}"'
            )
            raise

        # Generate UUID-based filename (per Q5)
        file_ext = Path(filename).suffix.lower()
        if not file_ext:
            # Determine extension from MIME type
            for mime, exts in ALLOWED_MIME_TYPES.items():
                if mime == mime_type:
                    file_ext = exts[0]
                    break

        stored_filename = f"{uuid.uuid4()}{file_ext}"

        # Create date-based path (per Q2)
        now = datetime.now()
        year = now.strftime('%Y')
        month = now.strftime('%m')
        relative_path = f"{year}/{month}/{stored_filename}"

        # Get media directory from app config
        media_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'media'
        full_dir = media_dir / year / month
        full_dir.mkdir(parents=True, exist_ok=True)

        # Get actual file size (from optimized bytes)
        actual_size = len(optimized_bytes)

        # Per v1.5.0 Phase 4: Atomic operation for all file saves and database inserts
        # Generate variants first (to temp directory)
        base_filename = stored_filename.rsplit('.', 1)[0]

        db = get_db(current_app)
        variant_metadata = []
        file_moves = []
        temp_original_path = None
        temp_subdir = None

        try:
            # Step 1: Save original to temp directory
            media_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'media'
            temp_dir = media_dir / '.tmp'
            temp_dir.mkdir(parents=True, exist_ok=True)
            temp_subdir = temp_dir / f"{base_filename}_{uuid.uuid4().hex[:8]}"
            temp_subdir.mkdir(parents=True, exist_ok=True)

            temp_original_path = temp_subdir / stored_filename
            temp_original_path.write_bytes(optimized_bytes)

            # Step 2: Generate variants to temp directory
            variant_metadata, file_moves = generate_all_variants(
                optimized_img,
                full_dir,
                base_filename,
                file_ext,
                0,  # media_id not yet known
                year,
                month,
                optimized_bytes,
                db
            )

            # Step 3: Begin transaction
            db.execute("BEGIN TRANSACTION")

            # Step 4: Insert media record
            cursor = db.execute(
                """
                INSERT INTO media (filename, stored_filename, path, mime_type, size, width, height)
                VALUES (?, ?, ?, ?, ?, ?, ?)
                """,
                (filename, stored_filename, relative_path, mime_type, actual_size, width, height)
            )
            media_id = cursor.lastrowid

            # Step 5: Insert variant records
            for variant in variant_metadata:
                db.execute(
                    """
                    INSERT INTO media_variants
                    (media_id, variant_type, path, width, height, size_bytes)
                    VALUES (?, ?, ?, ?, ?, ?)
                    """,
                    (media_id, variant['variant_type'], variant['path'],
                     variant['width'], variant['height'], variant['size_bytes'])
                )

            # Step 6: Move files to final location (before commit for true atomicity)
            # If file moves fail, we can rollback the transaction
            try:
                # Move original file
                full_path = full_dir / stored_filename
                shutil.move(str(temp_original_path), str(full_path))

                # Move variant files
                for temp_file, final_path, _ in file_moves:
                    shutil.move(str(temp_file), str(final_path))

            except Exception as e:
                # Rollback database if file move fails
                db.rollback()
                raise

            # Step 7: Commit transaction (after files are moved successfully)
            db.commit()

            # Step 8: Clean up temp directory
            try:
                if temp_subdir and temp_subdir.exists():
                    temp_subdir.rmdir()
            except OSError:
                pass  # Best effort

            # Format variants for return value (same format as before)
            variants = [v for v in variant_metadata if v['variant_type'] != 'original']

        except Exception as e:
            # Rollback database on any failure (best effort)
            try:
                db.rollback()
            except Exception:
                pass  # May already be rolled back

            # Clean up moved files if commit failed
            # (This handles the case where files were moved but commit failed)
            full_path = full_dir / stored_filename
            if full_path.exists():
                try:
                    full_path.unlink()
                except OSError:
                    pass

            for _, final_path, _ in file_moves:
                try:
                    if final_path.exists():
                        final_path.unlink()
                except OSError:
                    pass

            # Clean up temp files on any failure
            if temp_original_path and temp_original_path.exists():
                try:
                    temp_original_path.unlink()
                except OSError:
                    pass

            for temp_file, _, _ in file_moves:
                try:
                    if temp_file.exists():
                        temp_file.unlink()
                except OSError:
                    pass

            # Clean up temp subdirectory
            if temp_subdir and temp_subdir.exists():
                try:
                    temp_subdir.rmdir()
                except OSError:
                    pass

            # Log and re-raise
            current_app.logger.warning(
                f'Media upload atomic operation failed: filename="{filename}", '
                f'error="{e}"'
            )
            raise

        # Log success
        was_optimized = len(optimized_bytes) < file_size
        current_app.logger.info(
            f'Media upload successful: filename="{filename}", '
            f'stored="{stored_filename}", size={len(optimized_bytes)}b, '
            f'optimized={was_optimized}, variants={len(variants)}'
        )

        return {
            'id': media_id,
            'filename': filename,
            'stored_filename': stored_filename,
            'path': relative_path,
            'mime_type': mime_type,
            'size': actual_size,
            'width': width,
            'height': height,
            'variants': variants
        }

    except ValueError:
        # Already logged at WARNING level in validation/optimization blocks
        raise

    except Exception as e:
        current_app.logger.error(
            f'Media upload failed unexpectedly: filename="{filename}", '
            f'error_type="{type(e).__name__}", error="{e}"'
        )
        raise


def attach_media_to_note(note_id: int, media_ids: List[int], captions: List[str]) -> None:
    """
    Attach media files to note

    Per Q4: Happens after note creation
    Per Q7: Captions are optional per image

    Args:
        note_id: Note to attach to
        media_ids: List of media IDs (max 4)
        captions: List of captions (same length as media_ids)

    Raises:
        ValueError: If more than MAX_IMAGES_PER_NOTE
    """
    from starpunk.database import get_db

    if len(media_ids) > MAX_IMAGES_PER_NOTE:
        raise ValueError(f"Maximum {MAX_IMAGES_PER_NOTE} images per note")

    db = get_db(current_app)

    # Delete existing associations (for edit case)
    db.execute("DELETE FROM note_media WHERE note_id = ?", (note_id,))

    # Insert new associations
    for i, (media_id, caption) in enumerate(zip(media_ids, captions)):
        db.execute(
            """
            INSERT INTO note_media (note_id, media_id, display_order, caption)
            VALUES (?, ?, ?, ?)
            """,
            (note_id, media_id, i, caption or None)
        )

    db.commit()


def get_note_media(note_id: int) -> List[Dict]:
    """
    Get all media attached to a note with variants (v1.4.0)

    Returns list sorted by display_order

    Args:
        note_id: Note ID to get media for

    Returns:
        List of media dicts with metadata (includes 'variants' key if variants exist)
    """
    from starpunk.database import get_db

    db = get_db(current_app)
    rows = db.execute(
        """
        SELECT
            m.id,
            m.filename,
            m.stored_filename,
            m.path,
            m.mime_type,
            m.size,
            m.width,
            m.height,
            nm.caption,
            nm.display_order
        FROM note_media nm
        JOIN media m ON nm.media_id = m.id
        WHERE nm.note_id = ?
        ORDER BY nm.display_order
        """,
        (note_id,)
    ).fetchall()

    media_list = []
    for row in rows:
        media_dict = {
            'id': row[0],
            'filename': row[1],
            'stored_filename': row[2],
            'path': row[3],
            'mime_type': row[4],
            'size': row[5],
            'width': row[6],
            'height': row[7],
            'caption': row[8],
            'display_order': row[9]
        }

        # Fetch variants for this media (v1.4.0 Phase 2)
        variants = db.execute(
            """
            SELECT variant_type, path, width, height, size_bytes
            FROM media_variants
            WHERE media_id = ?
            ORDER BY
                CASE variant_type
                    WHEN 'thumb' THEN 1
                    WHEN 'small' THEN 2
                    WHEN 'medium' THEN 3
                    WHEN 'large' THEN 4
                    WHEN 'original' THEN 5
                END
            """,
            (row[0],)
        ).fetchall()

        # Only add 'variants' key if variants exist (backwards compatibility)
        # Pre-v1.4.0 media won't have variants, and consumers shouldn't
        # expect the key to be present
        if variants:
            media_dict['variants'] = {
                v[0]: {
                    'path': v[1],
                    'width': v[2],
                    'height': v[3],
                    'size_bytes': v[4]
                }
                for v in variants
            }

        media_list.append(media_dict)

    return media_list


def get_media_for_notes(note_ids: List[int]) -> Dict[int, List[Dict]]:
    """
    Batch load media for multiple notes in single query

    Per v1.5.0 Phase 3: Fixes N+1 query pattern in feed generation.
    Loads media and variants for all notes in 2 queries instead of O(n).

    Args:
        note_ids: List of note IDs to load media for

    Returns:
        Dict mapping note_id to list of media dicts (same format as get_note_media)

    Examples:
        >>> result = get_media_for_notes([1, 2, 3])
        >>> result[1]  # Media for note 1
        [{'id': 10, 'filename': 'test.jpg', ...}]
        >>> result[2]  # Media for note 2
        []  # No media
    """
    from starpunk.database import get_db

    if not note_ids:
        return {}

    db = get_db(current_app)

    # Build placeholders for IN clause
    placeholders = ','.join('?' * len(note_ids))

    # Query 1: Get all media for all notes
    media_rows = db.execute(
        f"""
        SELECT
            nm.note_id,
            m.id,
            m.filename,
            m.stored_filename,
            m.path,
            m.mime_type,
            m.size,
            m.width,
            m.height,
            nm.caption,
            nm.display_order
        FROM note_media nm
        JOIN media m ON nm.media_id = m.id
        WHERE nm.note_id IN ({placeholders})
        ORDER BY nm.note_id, nm.display_order
        """,
        note_ids
    ).fetchall()

    # Extract all media IDs for variant query
    media_ids = [row[1] for row in media_rows]

    # Query 2: Get all variants for all media (if any media exists)
    variants_by_media = {}
    if media_ids:
        variant_placeholders = ','.join('?' * len(media_ids))
        variant_rows = db.execute(
            f"""
            SELECT media_id, variant_type, path, width, height, size_bytes
            FROM media_variants
            WHERE media_id IN ({variant_placeholders})
            ORDER BY media_id,
                CASE variant_type
                    WHEN 'thumb' THEN 1
                    WHEN 'small' THEN 2
                    WHEN 'medium' THEN 3
                    WHEN 'large' THEN 4
                    WHEN 'original' THEN 5
                END
            """,
            media_ids
        ).fetchall()

        # Group variants by media_id
        for row in variant_rows:
            media_id = row[0]
            if media_id not in variants_by_media:
                variants_by_media[media_id] = []
            variants_by_media[media_id].append({
                'variant_type': row[1],
                'path': row[2],
                'width': row[3],
                'height': row[4],
                'size_bytes': row[5]
            })

    # Build result dict grouped by note_id
    result = {note_id: [] for note_id in note_ids}

    for row in media_rows:
        note_id = row[0]
        media_id = row[1]

        media_dict = {
            'id': media_id,
            'filename': row[2],
            'stored_filename': row[3],
            'path': row[4],
            'mime_type': row[5],
            'size': row[6],
            'width': row[7],
            'height': row[8],
            'caption': row[9],
            'display_order': row[10]
        }

        # Add variants if they exist for this media
        if media_id in variants_by_media:
            media_dict['variants'] = {
                v['variant_type']: {
                    'path': v['path'],
                    'width': v['width'],
                    'height': v['height'],
                    'size_bytes': v['size_bytes']
                }
                for v in variants_by_media[media_id]
            }

        result[note_id].append(media_dict)

    return result


def delete_media(media_id: int) -> None:
    """
    Delete media file, variants, and database record

    Per Q8: Cleanup orphaned files
    Per v1.4.0: Also cleanup variant files

    Args:
        media_id: Media ID to delete
    """
    from starpunk.database import get_db

    db = get_db(current_app)

    # Get media path before deleting
    row = db.execute("SELECT path FROM media WHERE id = ?", (media_id,)).fetchone()
    if not row:
        return

    media_path = row[0]
    media_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'media'

    # Get variant paths before deleting (v1.4.0)
    variant_rows = db.execute(
        "SELECT path FROM media_variants WHERE media_id = ?",
        (media_id,)
    ).fetchall()

    # Delete database record (cascade will delete media_variants and note_media entries)
    db.execute("DELETE FROM media WHERE id = ?", (media_id,))
    db.commit()

    # Delete files from disk (best-effort cleanup)
    deleted_count = 0

    # Delete original file
    full_path = media_dir / media_path
    try:
        if full_path.exists():
            full_path.unlink()
            deleted_count += 1
    except OSError as e:
        current_app.logger.warning(f"Failed to delete media file {media_path}: {e}")

    # Delete variant files (v1.4.0)
    for variant_row in variant_rows:
        variant_path = media_dir / variant_row[0]
        try:
            if variant_path.exists():
                variant_path.unlink()
                deleted_count += 1
        except OSError as e:
            current_app.logger.warning(f"Failed to delete variant file {variant_row[0]}: {e}")

    current_app.logger.info(f"Deleted media {media_id}: {deleted_count} file(s) removed from disk")


def cleanup_old_debug_files(app) -> None:
    """
    Clean up old debug files based on age and size limits

    Per v1.5.0 Phase 2:
    - Delete files older than DEBUG_FILE_MAX_AGE_DAYS
    - Delete oldest files if total size exceeds DEBUG_FILE_MAX_SIZE_MB
    - Called on application startup

    Args:
        app: Flask application instance (for config and logger)
    """
    debug_dir = Path(app.config.get('DATA_PATH', 'data')) / 'debug'

    # Check if debug directory exists
    if not debug_dir.exists():
        return

    max_age_days = app.config.get('DEBUG_FILE_MAX_AGE_DAYS', 7)
    max_size_mb = app.config.get('DEBUG_FILE_MAX_SIZE_MB', 100)
    max_size_bytes = max_size_mb * 1024 * 1024

    # Get all debug files with their metadata
    debug_files = []
    for file_path in debug_dir.glob('failed_*'):
        if file_path.is_file():
            stat = file_path.stat()
            debug_files.append({
                'path': file_path,
                'mtime': datetime.fromtimestamp(stat.st_mtime),
                'size': stat.st_size
            })

    if not debug_files:
        return

    # Sort by modification time (oldest first)
    debug_files.sort(key=lambda f: f['mtime'])

    deleted_count = 0
    deleted_size = 0

    # Delete files older than max age
    cutoff_date = datetime.now() - timedelta(days=max_age_days)
    for file_info in debug_files[:]:  # Use slice to iterate over copy
        if file_info['mtime'] < cutoff_date:
            try:
                file_info['path'].unlink()
                deleted_count += 1
                deleted_size += file_info['size']
                debug_files.remove(file_info)
            except OSError as e:
                app.logger.warning(f"Failed to delete old debug file {file_info['path']}: {e}")

    # Check total size and delete oldest files if over limit
    total_size = sum(f['size'] for f in debug_files)
    while total_size > max_size_bytes and debug_files:
        # Delete oldest file
        oldest = debug_files.pop(0)
        try:
            oldest['path'].unlink()
            deleted_count += 1
            deleted_size += oldest['size']
            total_size -= oldest['size']
        except OSError as e:
            app.logger.warning(f"Failed to delete debug file for size limit {oldest['path']}: {e}")

    if deleted_count > 0:
        app.logger.info(
            f"Debug file cleanup: deleted {deleted_count} file(s), "
            f"freed {deleted_size / 1024 / 1024:.2f} MB"
        )


def cleanup_orphaned_temp_files(app) -> None:
    """
    Clean up orphaned temporary variant files on startup

    Per v1.5.0 Phase 4:
    - Detect temp files left from failed operations
    - Log warnings for orphaned files
    - Clean up temp directory
    - Called on application startup

    Args:
        app: Flask application instance (for config and logger)
    """
    media_dir = Path(app.config.get('DATA_PATH', 'data')) / 'media'
    temp_dir = media_dir / '.tmp'

    # Check if temp directory exists
    if not temp_dir.exists():
        return

    # Find all subdirectories and files in temp directory
    orphaned_count = 0
    cleaned_size = 0

    # Iterate through temp subdirectories
    for temp_subdir in temp_dir.iterdir():
        if not temp_subdir.is_dir():
            # Clean up any loose files (shouldn't normally exist)
            try:
                size = temp_subdir.stat().st_size
                temp_subdir.unlink()
                orphaned_count += 1
                cleaned_size += size
                app.logger.warning(f"Cleaned up orphaned temp file: {temp_subdir.name}")
            except OSError as e:
                app.logger.warning(f"Failed to delete orphaned temp file {temp_subdir.name}: {e}")
            continue

        # Process subdirectory
        files_in_subdir = list(temp_subdir.glob('*'))
        if files_in_subdir:
            # Log orphaned operation
            app.logger.warning(
                f"Found orphaned temp directory from failed operation: {temp_subdir.name} "
                f"({len(files_in_subdir)} file(s))"
            )

            # Clean up files
            for file_path in files_in_subdir:
                try:
                    if file_path.is_file():
                        size = file_path.stat().st_size
                        file_path.unlink()
                        orphaned_count += 1
                        cleaned_size += size
                except OSError as e:
                    app.logger.warning(f"Failed to delete orphaned temp file {file_path}: {e}")

        # Remove empty subdirectory
        try:
            temp_subdir.rmdir()
        except OSError as e:
            app.logger.warning(f"Failed to remove temp directory {temp_subdir.name}: {e}")

    if orphaned_count > 0:
        app.logger.info(
            f"Temp file cleanup: removed {orphaned_count} orphaned file(s), "
            f"freed {cleaned_size / 1024 / 1024:.2f} MB"
        )