feat: v1.5.0 Phase 2 - Debug File Management

Implement debug file management system with configuration controls,
automatic cleanup, and security improvements per v1.5.0 Phase 2.

## Changes

### Configuration (config.py)
- Add DEBUG_SAVE_FAILED_UPLOADS (default: false, production-safe)
- Add DEBUG_FILE_MAX_AGE_DAYS (default: 7 days)
- Add DEBUG_FILE_MAX_SIZE_MB (default: 100MB)

### Media Validation (media.py)
- Check config before saving debug files
- Sanitize filenames to prevent path traversal
- Pattern: alphanumeric + "._-", truncated to 50 chars
- Add cleanup_old_debug_files() function
  * Age-based cleanup (delete files older than MAX_AGE)
  * Size-based cleanup (delete oldest if total > MAX_SIZE)

### Application Startup (__init__.py)
- Run cleanup_old_debug_files() on startup
- Automatic maintenance of debug directory

### Tests (test_debug_file_management.py)
- 15 comprehensive tests
- Config defaults and overrides
- Debug file saving behavior
- Filename sanitization security
- Cleanup age and size limits
- Startup integration

## Security Improvements
- Debug saving disabled by default (production-safe)
- Filename sanitization prevents path traversal
- Automatic cleanup prevents disk exhaustion

## Acceptance Criteria
- [x] Configuration options added
- [x] Debug saving disabled by default
- [x] Filename sanitized before saving
- [x] Cleanup runs on startup
- [x] Old files deleted based on age
- [x] Size limit enforced

All tests pass. Ready for architect review.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-17 10:05:42 -07:00
parent 3f1f82a749
commit 1b45a64920
5 changed files with 649 additions and 8 deletions

View File

@@ -9,11 +9,16 @@ Per ADR-057 and ADR-058:
- Tiered resize strategy based on input size (v1.4.0)
- 12000x12000 max dimensions (v1.4.2)
- 4 images max per note
Debug file management (v1.5.0 Phase 2):
- Debug file saving disabled by default
- Automatic cleanup of old debug files
- Size limit enforcement
"""
from PIL import Image, ImageOps
from pathlib import Path
from datetime import datetime
from datetime import datetime, timedelta
import uuid
import io
from typing import Optional, List, Dict, Tuple
@@ -122,19 +127,22 @@ def validate_image(file_data: bytes, filename: str) -> Tuple[bytes, str, int, in
# Mark as HEIF so conversion happens below
img.format = 'HEIF'
except Exception as heic_error:
# Log the magic bytes and save file for debugging (if in app context)
# Log the magic bytes and save file for debugging (if in app context and enabled)
try:
magic = file_data[:12].hex() if len(file_data) >= 12 else file_data.hex()
current_app.logger.warning(
f'Media upload failed both Pillow and HEIC: filename="{filename}", '
f'magic_bytes={magic}, pillow_error="{e}", heic_error="{heic_error}"'
)
# Save failed file for analysis
debug_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'debug'
debug_dir.mkdir(parents=True, exist_ok=True)
debug_file = debug_dir / f"failed_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{filename}"
debug_file.write_bytes(file_data)
current_app.logger.info(f'Saved failed upload for analysis: {debug_file}')
# Save failed file for analysis (v1.5.0: only if enabled)
if current_app.config.get('DEBUG_SAVE_FAILED_UPLOADS', False):
debug_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'debug'
debug_dir.mkdir(parents=True, exist_ok=True)
# Sanitize filename to prevent path traversal (v1.5.0 security fix)
safe_filename = "".join(c for c in filename if c.isalnum() or c in "._-")[:50]
debug_file = debug_dir / f"failed_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{safe_filename}"
debug_file.write_bytes(file_data)
current_app.logger.info(f'Saved failed upload for analysis: {debug_file}')
except RuntimeError:
pass # Outside app context (e.g., tests)
raise ValueError(f"Invalid or corrupted image: {e}")
@@ -772,3 +780,77 @@ def delete_media(media_id: int) -> None:
current_app.logger.warning(f"Failed to delete variant file {variant_row[0]}: {e}")
current_app.logger.info(f"Deleted media {media_id}: {deleted_count} file(s) removed from disk")
def cleanup_old_debug_files(app) -> None:
"""
Clean up old debug files based on age and size limits
Per v1.5.0 Phase 2:
- Delete files older than DEBUG_FILE_MAX_AGE_DAYS
- Delete oldest files if total size exceeds DEBUG_FILE_MAX_SIZE_MB
- Called on application startup
Args:
app: Flask application instance (for config and logger)
"""
debug_dir = Path(app.config.get('DATA_PATH', 'data')) / 'debug'
# Check if debug directory exists
if not debug_dir.exists():
return
max_age_days = app.config.get('DEBUG_FILE_MAX_AGE_DAYS', 7)
max_size_mb = app.config.get('DEBUG_FILE_MAX_SIZE_MB', 100)
max_size_bytes = max_size_mb * 1024 * 1024
# Get all debug files with their metadata
debug_files = []
for file_path in debug_dir.glob('failed_*'):
if file_path.is_file():
stat = file_path.stat()
debug_files.append({
'path': file_path,
'mtime': datetime.fromtimestamp(stat.st_mtime),
'size': stat.st_size
})
if not debug_files:
return
# Sort by modification time (oldest first)
debug_files.sort(key=lambda f: f['mtime'])
deleted_count = 0
deleted_size = 0
# Delete files older than max age
cutoff_date = datetime.now() - timedelta(days=max_age_days)
for file_info in debug_files[:]: # Use slice to iterate over copy
if file_info['mtime'] < cutoff_date:
try:
file_info['path'].unlink()
deleted_count += 1
deleted_size += file_info['size']
debug_files.remove(file_info)
except OSError as e:
app.logger.warning(f"Failed to delete old debug file {file_info['path']}: {e}")
# Check total size and delete oldest files if over limit
total_size = sum(f['size'] for f in debug_files)
while total_size > max_size_bytes and debug_files:
# Delete oldest file
oldest = debug_files.pop(0)
try:
oldest['path'].unlink()
deleted_count += 1
deleted_size += oldest['size']
total_size -= oldest['size']
except OSError as e:
app.logger.warning(f"Failed to delete debug file for size limit {oldest['path']}: {e}")
if deleted_count > 0:
app.logger.info(
f"Debug file cleanup: deleted {deleted_count} file(s), "
f"freed {deleted_size / 1024 / 1024:.2f} MB"
)