StarPunk/starpunk/feeds/negotiation.py

"""
Content negotiation for feed formats

This module provides simple HTTP content negotiation to determine which feed
format to serve based on the client's Accept header. Follows StarPunk's
philosophy of simplicity over RFC compliance.

Supported formats:
    - RSS 2.0 (application/rss+xml)
    - ATOM 1.0 (application/atom+xml)
    - JSON Feed 1.1 (application/feed+json, application/json)

Example:
    >>> negotiate_feed_format('application/atom+xml', ['rss', 'atom', 'json'])
    'atom'
    >>> negotiate_feed_format('*/*', ['rss', 'atom', 'json'])
    'rss'
"""

from typing import List


# MIME type to format mapping
MIME_TYPES = {
    'rss': 'application/rss+xml',
    'atom': 'application/atom+xml',
    'json': 'application/feed+json',
}

# Reverse mapping for parsing Accept headers
MIME_TO_FORMAT = {
    'application/rss+xml': 'rss',
    'application/atom+xml': 'atom',
    'application/feed+json': 'json',
    'application/json': 'json',  # Also accept generic JSON
}


def negotiate_feed_format(accept_header: str, available_formats: List[str]) -> str:
    """
    Parse Accept header and return best matching format

    Implements simple content negotiation with quality factor support.
    When multiple formats have the same quality, defaults to RSS.
    Wildcards (*/*) default to RSS.

    Args:
        accept_header: HTTP Accept header value (e.g., "application/atom+xml, */*;q=0.8")
        available_formats: List of available formats (e.g., ['rss', 'atom', 'json'])

    Returns:
        Best matching format ('rss', 'atom', or 'json')

    Raises:
        ValueError: If no acceptable format found (caller should return 406)

    Examples:
        >>> negotiate_feed_format('application/atom+xml', ['rss', 'atom', 'json'])
        'atom'
        >>> negotiate_feed_format('application/json;q=0.9, */*;q=0.1', ['rss', 'atom', 'json'])
        'json'
        >>> negotiate_feed_format('*/*', ['rss', 'atom', 'json'])
        'rss'
        >>> negotiate_feed_format('text/html', ['rss', 'atom', 'json'])
        Traceback (most recent call last):
        ...
        ValueError: No acceptable format found
    """
    # Parse Accept header into list of (mime_type, quality) tuples
    media_types = _parse_accept_header(accept_header)

    # Score each available format
    scores = {}
    for format_name in available_formats:
        score = _score_format(format_name, media_types)
        if score > 0:
            scores[format_name] = score

    # If no formats matched, raise error
    if not scores:
        raise ValueError("No acceptable format found")

    # Return format with highest score
    # On tie, prefer in this order: rss, atom, json
    best_score = max(scores.values())

    # Check in preference order
    for preferred in ['rss', 'atom', 'json']:
        if preferred in scores and scores[preferred] == best_score:
            return preferred

    # Fallback (shouldn't reach here)
    return max(scores, key=scores.get)


def _parse_accept_header(accept_header: str) -> List[tuple]:
    """
    Parse Accept header into list of (mime_type, quality) tuples

    Simple parser that extracts MIME types and quality factors.
    Does not implement full RFC 7231 - just enough for feed negotiation.

    Args:
        accept_header: HTTP Accept header value

    Returns:
        List of (mime_type, quality) tuples sorted by quality (highest first)

    Examples:
        >>> _parse_accept_header('application/json;q=0.9, text/html')
        [('text/html', 1.0), ('application/json', 0.9)]
    """
    media_types = []

    # Split on commas to get individual media types
    for part in accept_header.split(','):
        part = part.strip()
        if not part:
            continue

        # Split on semicolon to separate MIME type from parameters
        components = part.split(';')
        mime_type = components[0].strip().lower()

        # Extract quality factor (default to 1.0)
        quality = 1.0
        for param in components[1:]:
            param = param.strip()
            if param.startswith('q='):
                try:
                    quality = float(param[2:])
                    # Clamp quality to 0-1 range
                    quality = max(0.0, min(1.0, quality))
                except (ValueError, IndexError):
                    quality = 1.0
                break

        media_types.append((mime_type, quality))

    # Sort by quality (highest first)
    media_types.sort(key=lambda x: x[1], reverse=True)

    return media_types


def _score_format(format_name: str, media_types: List[tuple]) -> float:
    """
    Calculate score for a format based on parsed Accept header

    Args:
        format_name: Format to score ('rss', 'atom', or 'json')
        media_types: List of (mime_type, quality) tuples from Accept header

    Returns:
        Score (0.0 to 1.0), where 0 means no match

    Examples:
        >>> media_types = [('application/atom+xml', 1.0), ('*/*', 0.8)]
        >>> _score_format('atom', media_types)
        1.0
        >>> _score_format('rss', media_types)
        0.8
    """
    # Get the MIME type for this format
    format_mime = MIME_TYPES.get(format_name)
    if not format_mime:
        return 0.0

    # Build list of acceptable MIME types for this format
    # Check both the primary MIME type and any alternatives from MIME_TO_FORMAT
    acceptable_mimes = [format_mime]
    for mime, fmt in MIME_TO_FORMAT.items():
        if fmt == format_name and mime != format_mime:
            acceptable_mimes.append(mime)

    # Find best matching media type
    best_quality = 0.0

    for mime_type, quality in media_types:
        # Exact match (check all acceptable MIME types)
        if mime_type in acceptable_mimes:
            best_quality = max(best_quality, quality)
        # Wildcard match
        elif mime_type == '*/*':
            best_quality = max(best_quality, quality)
        # Type wildcard (e.g., "application/*")
        elif '/' in mime_type and mime_type.endswith('/*'):
            type_prefix = mime_type.split('/')[0]
            # Check if any acceptable MIME type matches the wildcard
            for acceptable in acceptable_mimes:
                if acceptable.startswith(type_prefix + '/'):
                    best_quality = max(best_quality, quality)
                    break

    return best_quality


def get_mime_type(format_name: str) -> str:
    """
    Get MIME type for a format name

    Args:
        format_name: Format name ('rss', 'atom', or 'json')

    Returns:
        MIME type string

    Raises:
        ValueError: If format name is not recognized

    Examples:
        >>> get_mime_type('rss')
        'application/rss+xml'
        >>> get_mime_type('atom')
        'application/atom+xml'
        >>> get_mime_type('json')
        'application/feed+json'
    """
    mime_type = MIME_TYPES.get(format_name)
    if not mime_type:
        raise ValueError(f"Unknown format: {format_name}")
    return mime_type