StarPunk/starpunk/__init__.py

"""
StarPunk package initialization
Creates and configures the Flask application
"""

import logging
from logging.handlers import RotatingFileHandler
from pathlib import Path
from flask import Flask, g
import uuid


def configure_logging(app):
    """
    Configure application logging with RotatingFileHandler and structured logging

    Per ADR-054 and developer Q&A Q3:
    - Uses RotatingFileHandler (10MB files, keep 10)
    - Supports correlation IDs for request tracking
    - Uses Flask's app.logger for all logging

    Args:
        app: Flask application instance
    """
    log_level = app.config.get("LOG_LEVEL", "INFO").upper()

    # Set Flask logger level
    app.logger.setLevel(getattr(logging, log_level, logging.INFO))

    # Configure console handler
    console_handler = logging.StreamHandler()

    # Configure file handler with rotation (10MB per file, keep 10 files)
    log_dir = app.config.get("DATA_PATH", Path("./data")) / "logs"
    log_dir.mkdir(parents=True, exist_ok=True)
    log_file = log_dir / "starpunk.log"

    file_handler = RotatingFileHandler(
        log_file,
        maxBytes=10 * 1024 * 1024,  # 10MB
        backupCount=10
    )

    # Format with correlation ID support
    if log_level == "DEBUG":
        formatter = logging.Formatter(
            "[%(asctime)s] %(levelname)s - %(name)s [%(correlation_id)s]: %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S",
        )

        # Warn if DEBUG enabled in production
        if not app.debug and app.config.get("ENV") != "development":
            app.logger.warning(
                "=" * 70
                + "\n"
                + "WARNING: DEBUG logging enabled in production!\n"
                + "This logs detailed HTTP requests/responses.\n"
                + "Sensitive data is redacted, but consider using INFO level.\n"
                + "Set LOG_LEVEL=INFO in production for normal operation.\n"
                + "=" * 70
            )
    else:
        formatter = logging.Formatter(
            "[%(asctime)s] %(levelname)s [%(correlation_id)s]: %(message)s",
            datefmt="%Y-%m-%d %H:%M:%S"
        )

    # Add filter to inject correlation ID BEFORE setting formatters
    # This filter must be applied to handlers to work with all loggers
    class CorrelationIdFilter(logging.Filter):
        def filter(self, record):
            # Get correlation ID from Flask's g object, or use fallback
            # Handle case where we're outside of request context
            if not hasattr(record, 'correlation_id'):
                try:
                    from flask import has_request_context
                    if has_request_context():
                        record.correlation_id = getattr(g, 'correlation_id', 'no-request')
                    else:
                        record.correlation_id = 'init'
                except (RuntimeError, AttributeError):
                    record.correlation_id = 'init'
            return True

    correlation_filter = CorrelationIdFilter()

    # Apply filter to handlers (not loggers) to ensure all log records have correlation_id
    console_handler.addFilter(correlation_filter)
    file_handler.addFilter(correlation_filter)

    console_handler.setFormatter(formatter)
    file_handler.setFormatter(formatter)

    # Remove existing handlers and add our configured handlers
    app.logger.handlers.clear()
    app.logger.addHandler(console_handler)
    app.logger.addHandler(file_handler)

    # Also apply filter to root logger for any other loggers
    root_logger = logging.getLogger()
    root_logger.addFilter(correlation_filter)


def add_correlation_id():
    """Generate and store correlation ID for the current request"""
    if not hasattr(g, 'correlation_id'):
        g.correlation_id = str(uuid.uuid4())


def create_app(config=None):
    """
    Application factory for StarPunk

    Args:
        config: Optional configuration dict to override defaults

    Returns:
        Configured Flask application instance
    """
    app = Flask(__name__, static_folder="../static", template_folder="../templates")

    # Load configuration
    from starpunk.config import load_config

    load_config(app, config)

    # Configure logging
    configure_logging(app)

    # Initialize database schema
    from starpunk.database import init_db, init_pool

    init_db(app)

    # Initialize connection pool
    init_pool(app)

    # Setup HTTP metrics middleware (v1.1.2 Phase 1)
    if app.config.get('METRICS_ENABLED', True):
        from starpunk.monitoring import setup_http_metrics
        setup_http_metrics(app)
        app.logger.info("HTTP metrics middleware enabled")

    # Initialize feed cache (v1.1.2 Phase 3)
    if app.config.get('FEED_CACHE_ENABLED', True):
        from starpunk.feeds import configure_cache
        max_size = app.config.get('FEED_CACHE_MAX_SIZE', 50)
        ttl = app.config.get('FEED_CACHE_SECONDS', 300)
        configure_cache(max_size=max_size, ttl=ttl)
        app.logger.info(f"Feed cache enabled (max_size={max_size}, ttl={ttl}s)")

    # Initialize FTS index if needed
    from pathlib import Path
    from starpunk.search import has_fts_table, rebuild_fts_index
    import sqlite3

    db_path = Path(app.config["DATABASE_PATH"])
    data_path = Path(app.config["DATA_PATH"])

    if has_fts_table(db_path):
        # Check if index is empty (fresh migration or first run)
        try:
            conn = sqlite3.connect(db_path)
            count = conn.execute("SELECT COUNT(*) FROM notes_fts").fetchone()[0]
            conn.close()

            if count == 0:
                app.logger.info("FTS index is empty, populating from existing notes...")
                try:
                    rebuild_fts_index(db_path, data_path)
                    app.logger.info("FTS index successfully populated")
                except Exception as e:
                    app.logger.error(f"Failed to populate FTS index: {e}")
        except Exception as e:
            app.logger.debug(f"FTS index check skipped: {e}")

    # Register blueprints
    from starpunk.routes import register_routes

    register_routes(app)

    # Template context processor - Inject author profile (v1.2.0 Phase 2)
    @app.context_processor
    def inject_author():
        """
        Inject author profile into all templates

        Per Q19: Global context processor approach
        Makes author data available in all templates for h-card markup
        """
        from starpunk.author_discovery import get_author_profile

        # Get ADMIN_ME from config (single-user CMS)
        me_url = app.config.get('ADMIN_ME')

        if me_url:
            try:
                author = get_author_profile(me_url)
            except Exception as e:
                app.logger.warning(f"Failed to get author profile in template context: {e}")
                author = None
        else:
            author = None

        return {'author': author}

    # Request middleware - Add correlation ID to each request
    @app.before_request
    def before_request():
        """Add correlation ID to request context for tracing"""
        add_correlation_id()

    # Register centralized error handlers
    from starpunk.errors import register_error_handlers

    register_error_handlers(app)

    # Start memory monitor thread (v1.1.2 Phase 1)
    # Per CQ5: Skip in test mode
    if app.config.get('METRICS_ENABLED', True) and not app.config.get('TESTING', False):
        from starpunk.monitoring import MemoryMonitor
        memory_monitor = MemoryMonitor(interval=app.config.get('METRICS_MEMORY_INTERVAL', 30))
        memory_monitor.start()
        app.memory_monitor = memory_monitor
        app.logger.info(f"Memory monitor started (interval={memory_monitor.interval}s)")

        # Register cleanup handler
        @app.teardown_appcontext
        def cleanup_memory_monitor(error=None):
            if hasattr(app, 'memory_monitor') and app.memory_monitor.is_alive():
                app.memory_monitor.stop()

    # Health check endpoint for containers and monitoring
    @app.route("/health")
    def health_check():
        """
        Health check endpoint for containers and monitoring

        Per developer Q&A Q10:
        - Basic mode (/health): Public, no auth, returns 200 OK for load balancers
        - Detailed mode (/health?detailed=true): Requires auth, checks database/disk

        Returns:
            JSON with status and info (varies by mode)

        Response codes:
            200: Application healthy
            401: Unauthorized (detailed mode without auth)
            500: Application unhealthy

        Query parameters:
            detailed: If 'true', perform detailed checks (requires auth)
        """
        from flask import jsonify, request
        import os
        import shutil

        # Check if detailed mode requested
        detailed = request.args.get('detailed', '').lower() == 'true'

        if detailed:
            # Detailed mode requires authentication
            if not g.get('me'):
                return jsonify({"error": "Authentication required for detailed health check"}), 401

            # Perform comprehensive health checks
            checks = {}
            overall_healthy = True

            # Check database connectivity
            try:
                from starpunk.database import get_db
                db = get_db(app)
                db.execute("SELECT 1").fetchone()
                db.close()
                checks['database'] = {'status': 'healthy', 'message': 'Database accessible'}
            except Exception as e:
                checks['database'] = {'status': 'unhealthy', 'error': str(e)}
                overall_healthy = False

            # Check filesystem access
            try:
                data_path = app.config.get("DATA_PATH", "data")
                if not os.path.exists(data_path):
                    raise Exception("Data path not accessible")
                checks['filesystem'] = {'status': 'healthy', 'path': data_path}
            except Exception as e:
                checks['filesystem'] = {'status': 'unhealthy', 'error': str(e)}
                overall_healthy = False

            # Check disk space
            try:
                data_path = app.config.get("DATA_PATH", "data")
                stat = shutil.disk_usage(data_path)
                percent_free = (stat.free / stat.total) * 100
                checks['disk'] = {
                    'status': 'healthy' if percent_free > 10 else 'warning',
                    'total_gb': round(stat.total / (1024**3), 2),
                    'free_gb': round(stat.free / (1024**3), 2),
                    'percent_free': round(percent_free, 2)
                }
                if percent_free <= 5:
                    overall_healthy = False
            except Exception as e:
                checks['disk'] = {'status': 'unhealthy', 'error': str(e)}
                overall_healthy = False

            return jsonify({
                "status": "healthy" if overall_healthy else "unhealthy",
                "version": app.config.get("VERSION", __version__),
                "environment": app.config.get("ENV", "unknown"),
                "checks": checks
            }), 200 if overall_healthy else 500

        else:
            # Basic mode - just return 200 OK (for load balancers)
            # No authentication required, minimal checks
            return jsonify({
                "status": "ok",
                "version": app.config.get("VERSION", __version__)
            }), 200

    return app


# Package version (Semantic Versioning 2.0.0)
# See docs/standards/versioning-strategy.md for details
__version__ = "1.4.1"
__version_info__ = (1, 4, 1)