StarPunk/starpunk/monitoring/business.py

"""
Business metrics for StarPunk operations

Per v1.1.2 Phase 1:
- Track note operations (create, update, delete)
- Track feed generation and cache hits/misses
- Track content statistics

Per v1.1.2 Phase 3:
- Track feed statistics by format
- Track feed cache hit/miss rates
- Provide feed statistics dashboard

Example usage:
    >>> from starpunk.monitoring.business import track_note_created
    >>> track_note_created(note_id=123, content_length=500)
"""

from typing import Optional, Dict, Any

from starpunk.monitoring.metrics import record_metric, get_metrics_stats


def track_note_created(note_id: int, content_length: int, has_media: bool = False) -> None:
    """
    Track note creation event

    Args:
        note_id: ID of created note
        content_length: Length of note content in characters
        has_media: Whether note has media attachments
    """
    metadata = {
        'note_id': note_id,
        'content_length': content_length,
        'has_media': has_media,
    }

    record_metric(
        'render',  # Use 'render' for business metrics
        'note_created',
        content_length,
        metadata,
        force=True  # Always track business events
    )


def track_note_updated(note_id: int, content_length: int, fields_changed: Optional[list] = None) -> None:
    """
    Track note update event

    Args:
        note_id: ID of updated note
        content_length: New length of note content
        fields_changed: List of fields that were changed
    """
    metadata = {
        'note_id': note_id,
        'content_length': content_length,
    }

    if fields_changed:
        metadata['fields_changed'] = ','.join(fields_changed)

    record_metric(
        'render',
        'note_updated',
        content_length,
        metadata,
        force=True
    )


def track_note_deleted(note_id: int) -> None:
    """
    Track note deletion event

    Args:
        note_id: ID of deleted note
    """
    metadata = {
        'note_id': note_id,
    }

    record_metric(
        'render',
        'note_deleted',
        0,  # No meaningful duration for deletion
        metadata,
        force=True
    )


def track_feed_generated(format: str, item_count: int, duration_ms: float, cached: bool = False) -> None:
    """
    Track feed generation event

    Args:
        format: Feed format (rss, atom, json)
        item_count: Number of items in feed
        duration_ms: Time taken to generate feed
        cached: Whether feed was served from cache
    """
    metadata = {
        'format': format,
        'item_count': item_count,
        'cached': cached,
    }

    operation = f'feed_{format}{"_cached" if cached else "_generated"}'

    record_metric(
        'render',
        operation,
        duration_ms,
        metadata,
        force=True  # Always track feed operations
    )


def track_cache_hit(cache_type: str, key: str) -> None:
    """
    Track cache hit event

    Args:
        cache_type: Type of cache (feed, etc.)
        key: Cache key that was hit
    """
    metadata = {
        'cache_type': cache_type,
        'key': key,
    }

    record_metric(
        'render',
        f'{cache_type}_cache_hit',
        0,
        metadata,
        force=True
    )


def track_cache_miss(cache_type: str, key: str) -> None:
    """
    Track cache miss event

    Args:
        cache_type: Type of cache (feed, etc.)
        key: Cache key that was missed
    """
    metadata = {
        'cache_type': cache_type,
        'key': key,
    }

    record_metric(
        'render',
        f'{cache_type}_cache_miss',
        0,
        metadata,
        force=True
    )


def get_feed_statistics() -> Dict[str, Any]:
    """
    Get aggregated feed statistics from metrics buffer and feed cache.

    Analyzes metrics to provide feed-specific statistics including:
    - Total requests by format (RSS, ATOM, JSON)
    - Cache hit/miss rates by format
    - Feed generation times by format
    - Format popularity (percentage breakdown)
    - Feed cache internal statistics

    Returns:
        Dictionary with feed statistics:
        {
            'by_format': {
                'rss': {'generated': int, 'cached': int, 'total': int, 'avg_duration_ms': float},
                'atom': {...},
                'json': {...}
            },
            'cache': {
                'hits': int,
                'misses': int,
                'hit_rate': float (0.0-1.0),
                'entries': int,
                'evictions': int
            },
            'total_requests': int,
            'format_percentages': {
                'rss': float,
                'atom': float,
                'json': float
            }
        }

    Example:
        >>> stats = get_feed_statistics()
        >>> print(f"RSS requests: {stats['by_format']['rss']['total']}")
        >>> print(f"Cache hit rate: {stats['cache']['hit_rate']:.2%}")
    """
    # Get all metrics
    all_metrics = get_metrics_stats()

    # Initialize result structure
    result = {
        'by_format': {
            'rss': {'generated': 0, 'cached': 0, 'total': 0, 'avg_duration_ms': 0.0},
            'atom': {'generated': 0, 'cached': 0, 'total': 0, 'avg_duration_ms': 0.0},
            'json': {'generated': 0, 'cached': 0, 'total': 0, 'avg_duration_ms': 0.0},
        },
        'cache': {
            'hits': 0,
            'misses': 0,
            'hit_rate': 0.0,
        },
        'total_requests': 0,
        'format_percentages': {
            'rss': 0.0,
            'atom': 0.0,
            'json': 0.0,
        },
    }

    # Get by_operation metrics if available
    by_operation = all_metrics.get('by_operation', {})

    # Count feed operations by format
    for operation_name, op_stats in by_operation.items():
        # Feed operations are named: feed_rss_generated, feed_rss_cached, etc.
        if operation_name.startswith('feed_'):
            parts = operation_name.split('_')
            if len(parts) >= 3:
                format_name = parts[1]  # rss, atom, or json
                operation_type = parts[2]  # generated or cached

                if format_name in result['by_format']:
                    count = op_stats.get('count', 0)

                    if operation_type == 'generated':
                        result['by_format'][format_name]['generated'] = count
                        # Track average duration for generated feeds
                        result['by_format'][format_name]['avg_duration_ms'] = op_stats.get('avg_duration_ms', 0.0)
                    elif operation_type == 'cached':
                        result['by_format'][format_name]['cached'] = count

                    # Update total for this format
                    result['by_format'][format_name]['total'] = (
                        result['by_format'][format_name]['generated'] +
                        result['by_format'][format_name]['cached']
                    )

        # Track cache hits/misses
        elif operation_name == 'feed_cache_hit':
            result['cache']['hits'] = op_stats.get('count', 0)
        elif operation_name == 'feed_cache_miss':
            result['cache']['misses'] = op_stats.get('count', 0)

    # Calculate total requests across all formats
    result['total_requests'] = sum(
        fmt['total'] for fmt in result['by_format'].values()
    )

    # Calculate cache hit rate
    total_cache_requests = result['cache']['hits'] + result['cache']['misses']
    if total_cache_requests > 0:
        result['cache']['hit_rate'] = result['cache']['hits'] / total_cache_requests

    # Calculate format percentages
    if result['total_requests'] > 0:
        for format_name, fmt_stats in result['by_format'].items():
            result['format_percentages'][format_name] = (
                fmt_stats['total'] / result['total_requests']
            )

    # Get feed cache statistics if available
    try:
        from starpunk.feeds import get_cache
        feed_cache = get_cache()
        cache_stats = feed_cache.get_stats()

        # Merge cache stats (prefer FeedCache internal stats over metrics)
        result['cache']['entries'] = cache_stats.get('entries', 0)
        result['cache']['evictions'] = cache_stats.get('evictions', 0)

        # Use FeedCache hit rate if available and more accurate
        if cache_stats.get('hits', 0) + cache_stats.get('misses', 0) > 0:
            result['cache']['hits'] = cache_stats.get('hits', 0)
            result['cache']['misses'] = cache_stats.get('misses', 0)
            result['cache']['hit_rate'] = cache_stats.get('hit_rate', 0.0)

    except ImportError:
        # Feed cache not available, use defaults
        pass

    return result