feat: Complete Phase 2.4 - HTTP Content Negotiation

Implements HTTP content negotiation for feed format selection. Phase 2.4 Deliverables: - Content negotiation via Accept header parsing - Quality factor support (q= parameter) - 5 feed endpoints with format routing - 406 Not Acceptable responses with helpful errors - Comprehensive test coverage (63 tests) Endpoints: - /feed - Content negotiation based on Accept header - /feed.rss - Explicit RSS 2.0 - /feed.atom - Explicit ATOM 1.0 - /feed.json - Explicit JSON Feed 1.1 - /feed.xml - Backward compatibility (→ RSS) MIME Type Mapping: - application/rss+xml → RSS 2.0 - application/atom+xml → ATOM 1.0 - application/feed+json or application/json → JSON Feed 1.1 - */* → RSS 2.0 (default) Implementation: - Simple quality factor parsing (StarPunk philosophy) - Not full RFC 7231 compliance (minimal approach) - Reuses existing feed generators - No breaking changes Quality Metrics: - 132/132 tests passing (100%) - Zero breaking changes - Full backward compatibility - Standards compliant negotiation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 20:46:49 -07:00
parent 59e9d402c6
commit 8fbdcb6e6f
9 changed files with 1951 additions and 43 deletions
--- a/starpunk/feeds/init.py
+++ b/starpunk/feeds/init.py
@@ -7,10 +7,12 @@ with content negotiation and caching support.
 Exports:
    generate_rss: Generate RSS 2.0 feed
    generate_rss_streaming: Generate RSS 2.0 feed with streaming
-    generate_atom: Generate ATOM 1.0 feed (coming in Phase 2.2)
-    generate_atom_streaming: Generate ATOM 1.0 feed with streaming (coming in Phase 2.2)
-    generate_json_feed: Generate JSON Feed 1.1 (coming in Phase 2.3)
-    generate_json_feed_streaming: Generate JSON Feed 1.1 with streaming (coming in Phase 2.3)
+    generate_atom: Generate ATOM 1.0 feed
+    generate_atom_streaming: Generate ATOM 1.0 feed with streaming
+    generate_json_feed: Generate JSON Feed 1.1
+    generate_json_feed_streaming: Generate JSON Feed 1.1 with streaming
+    negotiate_feed_format: Content negotiation for feed formats
+    get_mime_type: Get MIME type for a format name
 """

 from .rss import (
@@ -31,6 +33,11 @@ from .json_feed import (
    generate_json_feed_streaming,
 )

+from .negotiation import (
+    negotiate_feed_format,
+    get_mime_type,
+)
+
 __all__ = [
    # RSS functions
    "generate_rss",
@@ -44,4 +51,7 @@ __all__ = [
    # JSON Feed functions
    "generate_json_feed",
    "generate_json_feed_streaming",
+    # Content negotiation
+    "negotiate_feed_format",
+    "get_mime_type",
 ]
--- a/starpunk/feeds/negotiation.py
+++ b/starpunk/feeds/negotiation.py
@@ -0,0 +1,222 @@
+"""
+Content negotiation for feed formats
+
+This module provides simple HTTP content negotiation to determine which feed
+format to serve based on the client's Accept header. Follows StarPunk's
+philosophy of simplicity over RFC compliance.
+
+Supported formats:
+    - RSS 2.0 (application/rss+xml)
+    - ATOM 1.0 (application/atom+xml)
+    - JSON Feed 1.1 (application/feed+json, application/json)
+
+Example:
+    >>> negotiate_feed_format('application/atom+xml', ['rss', 'atom', 'json'])
+    'atom'
+    >>> negotiate_feed_format('*/*', ['rss', 'atom', 'json'])
+    'rss'
+"""
+
+from typing import List
+
+
+# MIME type to format mapping
+MIME_TYPES = {
+    'rss': 'application/rss+xml',
+    'atom': 'application/atom+xml',
+    'json': 'application/feed+json',
+}
+
+# Reverse mapping for parsing Accept headers
+MIME_TO_FORMAT = {
+    'application/rss+xml': 'rss',
+    'application/atom+xml': 'atom',
+    'application/feed+json': 'json',
+    'application/json': 'json',  # Also accept generic JSON
+}
+
+
+def negotiate_feed_format(accept_header: str, available_formats: List[str]) -> str:
+    """
+    Parse Accept header and return best matching format
+
+    Implements simple content negotiation with quality factor support.
+    When multiple formats have the same quality, defaults to RSS.
+    Wildcards (*/*) default to RSS.
+
+    Args:
+        accept_header: HTTP Accept header value (e.g., "application/atom+xml, */*;q=0.8")
+        available_formats: List of available formats (e.g., ['rss', 'atom', 'json'])
+
+    Returns:
+        Best matching format ('rss', 'atom', or 'json')
+
+    Raises:
+        ValueError: If no acceptable format found (caller should return 406)
+
+    Examples:
+        >>> negotiate_feed_format('application/atom+xml', ['rss', 'atom', 'json'])
+        'atom'
+        >>> negotiate_feed_format('application/json;q=0.9, */*;q=0.1', ['rss', 'atom', 'json'])
+        'json'
+        >>> negotiate_feed_format('*/*', ['rss', 'atom', 'json'])
+        'rss'
+        >>> negotiate_feed_format('text/html', ['rss', 'atom', 'json'])
+        Traceback (most recent call last):
+        ...
+        ValueError: No acceptable format found
+    """
+    # Parse Accept header into list of (mime_type, quality) tuples
+    media_types = _parse_accept_header(accept_header)
+
+    # Score each available format
+    scores = {}
+    for format_name in available_formats:
+        score = _score_format(format_name, media_types)
+        if score > 0:
+            scores[format_name] = score
+
+    # If no formats matched, raise error
+    if not scores:
+        raise ValueError("No acceptable format found")
+
+    # Return format with highest score
+    # On tie, prefer in this order: rss, atom, json
+    best_score = max(scores.values())
+
+    # Check in preference order
+    for preferred in ['rss', 'atom', 'json']:
+        if preferred in scores and scores[preferred] == best_score:
+            return preferred
+
+    # Fallback (shouldn't reach here)
+    return max(scores, key=scores.get)
+
+
+def _parse_accept_header(accept_header: str) -> List[tuple]:
+    """
+    Parse Accept header into list of (mime_type, quality) tuples
+
+    Simple parser that extracts MIME types and quality factors.
+    Does not implement full RFC 7231 - just enough for feed negotiation.
+
+    Args:
+        accept_header: HTTP Accept header value
+
+    Returns:
+        List of (mime_type, quality) tuples sorted by quality (highest first)
+
+    Examples:
+        >>> _parse_accept_header('application/json;q=0.9, text/html')
+        [('text/html', 1.0), ('application/json', 0.9)]
+    """
+    media_types = []
+
+    # Split on commas to get individual media types
+    for part in accept_header.split(','):
+        part = part.strip()
+        if not part:
+            continue
+
+        # Split on semicolon to separate MIME type from parameters
+        components = part.split(';')
+        mime_type = components[0].strip().lower()
+
+        # Extract quality factor (default to 1.0)
+        quality = 1.0
+        for param in components[1:]:
+            param = param.strip()
+            if param.startswith('q='):
+                try:
+                    quality = float(param[2:])
+                    # Clamp quality to 0-1 range
+                    quality = max(0.0, min(1.0, quality))
+                except (ValueError, IndexError):
+                    quality = 1.0
+                break
+
+        media_types.append((mime_type, quality))
+
+    # Sort by quality (highest first)
+    media_types.sort(key=lambda x: x[1], reverse=True)
+
+    return media_types
+
+
+def _score_format(format_name: str, media_types: List[tuple]) -> float:
+    """
+    Calculate score for a format based on parsed Accept header
+
+    Args:
+        format_name: Format to score ('rss', 'atom', or 'json')
+        media_types: List of (mime_type, quality) tuples from Accept header
+
+    Returns:
+        Score (0.0 to 1.0), where 0 means no match
+
+    Examples:
+        >>> media_types = [('application/atom+xml', 1.0), ('*/*', 0.8)]
+        >>> _score_format('atom', media_types)
+        1.0
+        >>> _score_format('rss', media_types)
+        0.8
+    """
+    # Get the MIME type for this format
+    format_mime = MIME_TYPES.get(format_name)
+    if not format_mime:
+        return 0.0
+
+    # Build list of acceptable MIME types for this format
+    # Check both the primary MIME type and any alternatives from MIME_TO_FORMAT
+    acceptable_mimes = [format_mime]
+    for mime, fmt in MIME_TO_FORMAT.items():
+        if fmt == format_name and mime != format_mime:
+            acceptable_mimes.append(mime)
+
+    # Find best matching media type
+    best_quality = 0.0
+
+    for mime_type, quality in media_types:
+        # Exact match (check all acceptable MIME types)
+        if mime_type in acceptable_mimes:
+            best_quality = max(best_quality, quality)
+        # Wildcard match
+        elif mime_type == '*/*':
+            best_quality = max(best_quality, quality)
+        # Type wildcard (e.g., "application/*")
+        elif '/' in mime_type and mime_type.endswith('/*'):
+            type_prefix = mime_type.split('/')[0]
+            # Check if any acceptable MIME type matches the wildcard
+            for acceptable in acceptable_mimes:
+                if acceptable.startswith(type_prefix + '/'):
+                    best_quality = max(best_quality, quality)
+                    break
+
+    return best_quality
+
+
+def get_mime_type(format_name: str) -> str:
+    """
+    Get MIME type for a format name
+
+    Args:
+        format_name: Format name ('rss', 'atom', or 'json')
+
+    Returns:
+        MIME type string
+
+    Raises:
+        ValueError: If format name is not recognized
+
+    Examples:
+        >>> get_mime_type('rss')
+        'application/rss+xml'
+        >>> get_mime_type('atom')
+        'application/atom+xml'
+        >>> get_mime_type('json')
+        'application/feed+json'
+    """
+    mime_type = MIME_TYPES.get(format_name)
+    if not mime_type:
+        raise ValueError(f"Unknown format: {format_name}")
+    return mime_type
--- a/starpunk/routes/public.py
+++ b/starpunk/routes/public.py
@@ -8,21 +8,59 @@ No authentication required for these routes.
 import hashlib
 from datetime import datetime, timedelta

-from flask import Blueprint, abort, render_template, Response, current_app
+from flask import Blueprint, abort, render_template, Response, current_app, request

 from starpunk.notes import list_notes, get_note
-from starpunk.feed import generate_feed_streaming
+from starpunk.feed import generate_feed_streaming  # Legacy RSS
+from starpunk.feeds import (
+    generate_rss_streaming,
+    generate_atom_streaming,
+    generate_json_feed_streaming,
+    negotiate_feed_format,
+    get_mime_type,
+)

 # Create blueprint
 bp = Blueprint("public", __name__)

-# Simple in-memory cache for RSS feed note list
+# Simple in-memory cache for feed note list
 # Caches the database query results to avoid repeated DB hits
-# XML is streamed, not cached (memory optimization for large feeds)
+# Feed content (XML/JSON) is streamed, not cached (memory optimization)
 # Structure: {'notes': list[Note], 'timestamp': datetime}
 _feed_cache = {"notes": None, "timestamp": None}


+def _get_cached_notes():
+    """
+    Get cached note list or fetch fresh notes
+
+    Returns cached notes if still valid, otherwise fetches fresh notes
+    from database and updates cache.
+
+    Returns:
+        List of published notes for feed generation
+    """
+    # Get cache duration from config (in seconds)
+    cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)
+    cache_duration = timedelta(seconds=cache_seconds)
+    now = datetime.utcnow()
+
+    # Check if note list cache is valid
+    if _feed_cache["notes"] and _feed_cache["timestamp"]:
+        cache_age = now - _feed_cache["timestamp"]
+        if cache_age < cache_duration:
+            # Use cached note list
+            return _feed_cache["notes"]
+
+    # Cache expired or empty, fetch fresh notes
+    max_items = current_app.config.get("FEED_MAX_ITEMS", 50)
+    notes = list_notes(published_only=True, limit=max_items)
+    _feed_cache["notes"] = notes
+    _feed_cache["timestamp"] = now
+
+    return notes
+
+
@bp.route("/")
 def index():
    """
@@ -67,10 +105,73 @@ def note(slug: str):
    return render_template("note.html", note=note_obj)


-@bp.route("/feed.xml")
+@bp.route("/feed")
 def feed():
    """
-    RSS 2.0 feed of published notes
+    Content negotiation endpoint for feeds
+
+    Serves feed in format based on HTTP Accept header:
+    - application/rss+xml → RSS 2.0
+    - application/atom+xml → ATOM 1.0
+    - application/feed+json or application/json → JSON Feed 1.1
+    - */* → RSS 2.0 (default)
+
+    If no acceptable format is available, returns 406 Not Acceptable with
+    X-Available-Formats header listing supported formats.
+
+    Returns:
+        Streaming feed response in negotiated format, or 406 error
+
+    Headers:
+        Content-Type: Varies by format
+        Cache-Control: public, max-age={FEED_CACHE_SECONDS}
+        X-Available-Formats: List of supported formats (on 406 error only)
+
+    Examples:
+        >>> # Request with Accept: application/atom+xml
+        >>> response = client.get('/feed', headers={'Accept': 'application/atom+xml'})
+        >>> response.headers['Content-Type']
+        'application/atom+xml; charset=utf-8'
+
+        >>> # Request with no Accept header (defaults to RSS)
+        >>> response = client.get('/feed')
+        >>> response.headers['Content-Type']
+        'application/rss+xml; charset=utf-8'
+    """
+    # Get Accept header
+    accept = request.headers.get('Accept', '*/*')
+
+    # Negotiate format
+    available_formats = ['rss', 'atom', 'json']
+    try:
+        format_name = negotiate_feed_format(accept, available_formats)
+    except ValueError:
+        # No acceptable format - return 406
+        return (
+            "Not Acceptable. Supported formats: application/rss+xml, application/atom+xml, application/feed+json",
+            406,
+            {
+                'Content-Type': 'text/plain; charset=utf-8',
+                'X-Available-Formats': 'application/rss+xml, application/atom+xml, application/feed+json',
+            }
+        )
+
+    # Route to appropriate generator
+    if format_name == 'rss':
+        return feed_rss()
+    elif format_name == 'atom':
+        return feed_atom()
+    elif format_name == 'json':
+        return feed_json()
+    else:
+        # Shouldn't reach here, but be defensive
+        return feed_rss()
+
+
+@bp.route("/feed.rss")
+def feed_rss():
+    """
+    Explicit RSS 2.0 feed endpoint

    Generates standards-compliant RSS 2.0 feed using memory-efficient streaming.
    Instead of building the entire feed in memory, yields XML chunks directly
@@ -81,7 +182,7 @@ def feed():
    but streaming prevents holding full XML in memory.

    Returns:
-        Streaming XML response with RSS feed
+        Streaming RSS 2.0 feed response

    Headers:
        Content-Type: application/rss+xml; charset=utf-8
@@ -98,42 +199,21 @@ def feed():
        - Recommended for feeds with 100+ items

    Examples:
-        >>> # Request streams XML directly to client
-        >>> response = client.get('/feed.xml')
+        >>> response = client.get('/feed.rss')
        >>> response.status_code
        200
        >>> response.headers['Content-Type']
        'application/rss+xml; charset=utf-8'
    """
-    # Get cache duration from config (in seconds)
+    # Get cached notes
+    notes = _get_cached_notes()
+
+    # Get cache duration for response header
    cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)
-    cache_duration = timedelta(seconds=cache_seconds)
-    now = datetime.utcnow()

-    # Check if note list cache is valid
-    # We cache the note list to avoid repeated DB queries, but still stream the XML
-    if _feed_cache["notes"] and _feed_cache["timestamp"]:
-        cache_age = now - _feed_cache["timestamp"]
-        if cache_age < cache_duration:
-            # Use cached note list
-            notes = _feed_cache["notes"]
-        else:
-            # Cache expired, fetch fresh notes
-            max_items = current_app.config.get("FEED_MAX_ITEMS", 50)
-            notes = list_notes(published_only=True, limit=max_items)
-            _feed_cache["notes"] = notes
-            _feed_cache["timestamp"] = now
-    else:
-        # No cache, fetch notes
-        max_items = current_app.config.get("FEED_MAX_ITEMS", 50)
-        notes = list_notes(published_only=True, limit=max_items)
-        _feed_cache["notes"] = notes
-        _feed_cache["timestamp"] = now
-
-    # Generate streaming response
-    # This avoids holding the full XML in memory - chunks are yielded directly
+    # Generate streaming RSS feed
    max_items = current_app.config.get("FEED_MAX_ITEMS", 50)
-    generator = generate_feed_streaming(
+    generator = generate_rss_streaming(
        site_url=current_app.config["SITE_URL"],
        site_name=current_app.config["SITE_NAME"],
        site_description=current_app.config.get("SITE_DESCRIPTION", ""),
@@ -146,3 +226,110 @@ def feed():
    response.headers["Cache-Control"] = f"public, max-age={cache_seconds}"

    return response
+
+
+@bp.route("/feed.atom")
+def feed_atom():
+    """
+    Explicit ATOM 1.0 feed endpoint
+
+    Generates standards-compliant ATOM 1.0 feed using memory-efficient streaming.
+    Follows RFC 4287 specification for ATOM syndication format.
+
+    Returns:
+        Streaming ATOM 1.0 feed response
+
+    Headers:
+        Content-Type: application/atom+xml; charset=utf-8
+        Cache-Control: public, max-age={FEED_CACHE_SECONDS}
+
+    Examples:
+        >>> response = client.get('/feed.atom')
+        >>> response.status_code
+        200
+        >>> response.headers['Content-Type']
+        'application/atom+xml; charset=utf-8'
+    """
+    # Get cached notes
+    notes = _get_cached_notes()
+
+    # Get cache duration for response header
+    cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)
+
+    # Generate streaming ATOM feed
+    max_items = current_app.config.get("FEED_MAX_ITEMS", 50)
+    generator = generate_atom_streaming(
+        site_url=current_app.config["SITE_URL"],
+        site_name=current_app.config["SITE_NAME"],
+        site_description=current_app.config.get("SITE_DESCRIPTION", ""),
+        notes=notes,
+        limit=max_items,
+    )
+
+    # Return streaming response with appropriate headers
+    response = Response(generator, mimetype="application/atom+xml; charset=utf-8")
+    response.headers["Cache-Control"] = f"public, max-age={cache_seconds}"
+
+    return response
+
+
+@bp.route("/feed.json")
+def feed_json():
+    """
+    Explicit JSON Feed 1.1 endpoint
+
+    Generates standards-compliant JSON Feed 1.1 feed using memory-efficient streaming.
+    Follows JSON Feed specification (https://jsonfeed.org/version/1.1).
+
+    Returns:
+        Streaming JSON Feed 1.1 response
+
+    Headers:
+        Content-Type: application/feed+json; charset=utf-8
+        Cache-Control: public, max-age={FEED_CACHE_SECONDS}
+
+    Examples:
+        >>> response = client.get('/feed.json')
+        >>> response.status_code
+        200
+        >>> response.headers['Content-Type']
+        'application/feed+json; charset=utf-8'
+    """
+    # Get cached notes
+    notes = _get_cached_notes()
+
+    # Get cache duration for response header
+    cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)
+
+    # Generate streaming JSON Feed
+    max_items = current_app.config.get("FEED_MAX_ITEMS", 50)
+    generator = generate_json_feed_streaming(
+        site_url=current_app.config["SITE_URL"],
+        site_name=current_app.config["SITE_NAME"],
+        site_description=current_app.config.get("SITE_DESCRIPTION", ""),
+        notes=notes,
+        limit=max_items,
+    )
+
+    # Return streaming response with appropriate headers
+    response = Response(generator, mimetype="application/feed+json; charset=utf-8")
+    response.headers["Cache-Control"] = f"public, max-age={cache_seconds}"
+
+    return response
+
+
+@bp.route("/feed.xml")
+def feed_xml_legacy():
+    """
+    Legacy RSS 2.0 feed endpoint (backward compatibility)
+
+    Maintains backward compatibility for /feed.xml endpoint.
+    New code should use /feed.rss or /feed with content negotiation.
+
+    Returns:
+        Streaming RSS 2.0 feed response
+
+    See feed_rss() for full documentation.
+    """
+    # Use the new RSS endpoint
+    return feed_rss()