feat: Implement Phase 2 Feed Formats - ATOM, JSON Feed, RSS fix (Phases 2.0-2.3)

This commit implements the first three phases of v1.1.2 Phase 2 Feed Formats, adding ATOM 1.0 and JSON Feed 1.1 support alongside the existing RSS feed. CRITICAL BUG FIX: - Fixed RSS streaming feed ordering (was showing oldest-first instead of newest-first) - Streaming RSS removed incorrect reversed() call at line 198 - Feedgen RSS kept correct reversed() to compensate for library behavior NEW FEATURES: - ATOM 1.0 feed generation (RFC 4287 compliant) - Proper XML namespacing and RFC 3339 dates - Streaming and non-streaming methods - 11 comprehensive tests - JSON Feed 1.1 generation (JSON Feed spec compliant) - RFC 3339 dates and UTF-8 JSON output - Custom _starpunk extension with permalink_path and word_count - 13 comprehensive tests REFACTORING: - Restructured feed code into starpunk/feeds/ module - feeds/rss.py - RSS 2.0 (moved from feed.py) - feeds/atom.py - ATOM 1.0 (new) - feeds/json_feed.py - JSON Feed 1.1 (new) - Backward compatible feed.py shim for existing imports - Business metrics integrated into all feed generators TESTING: - Created shared test helper tests/helpers/feed_ordering.py - Helper validates newest-first ordering across all formats - 48 total feed tests, all passing - RSS: 24 tests - ATOM: 11 tests - JSON Feed: 13 tests FILES CHANGED: - Modified: starpunk/feed.py (now compatibility shim) - New: starpunk/feeds/ module with rss.py, atom.py, json_feed.py - New: tests/helpers/feed_ordering.py (shared test helper) - New: tests/test_feeds_atom.py, tests/test_feeds_json.py - Modified: CHANGELOG.md (Phase 2 entries) - New: docs/reports/2025-11-26-v1.1.2-phase2-feed-formats-partial.md NEXT STEPS: Phase 2.4 (Content Negotiation) pending - will add /feed endpoint with Accept header negotiation and explicit format endpoints. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 14:54:52 -07:00
parent b0230b1233
commit 59e9d402c6
14 changed files with 2663 additions and 637 deletions
--- a/starpunk/feeds/init.py
+++ b/starpunk/feeds/init.py
@@ -0,0 +1,47 @@
+"""
+Feed generation module for StarPunk
+
+This module provides feed generation in multiple formats (RSS, ATOM, JSON Feed)
+with content negotiation and caching support.
+
+Exports:
+    generate_rss: Generate RSS 2.0 feed
+    generate_rss_streaming: Generate RSS 2.0 feed with streaming
+    generate_atom: Generate ATOM 1.0 feed (coming in Phase 2.2)
+    generate_atom_streaming: Generate ATOM 1.0 feed with streaming (coming in Phase 2.2)
+    generate_json_feed: Generate JSON Feed 1.1 (coming in Phase 2.3)
+    generate_json_feed_streaming: Generate JSON Feed 1.1 with streaming (coming in Phase 2.3)
+"""
+
+from .rss import (
+    generate_rss,
+    generate_rss_streaming,
+    format_rfc822_date,
+    get_note_title,
+    clean_html_for_rss,
+)
+
+from .atom import (
+    generate_atom,
+    generate_atom_streaming,
+)
+
+from .json_feed import (
+    generate_json_feed,
+    generate_json_feed_streaming,
+)
+
+__all__ = [
+    # RSS functions
+    "generate_rss",
+    "generate_rss_streaming",
+    "format_rfc822_date",
+    "get_note_title",
+    "clean_html_for_rss",
+    # ATOM functions
+    "generate_atom",
+    "generate_atom_streaming",
+    # JSON Feed functions
+    "generate_json_feed",
+    "generate_json_feed_streaming",
+]
--- a/starpunk/feeds/atom.py
+++ b/starpunk/feeds/atom.py
@@ -0,0 +1,268 @@
+"""
+ATOM 1.0 feed generation for StarPunk
+
+This module provides ATOM 1.0 feed generation from published notes using
+Python's standard library xml.etree.ElementTree for proper XML handling.
+
+Functions:
+    generate_atom: Generate ATOM 1.0 XML feed from notes
+    generate_atom_streaming: Memory-efficient streaming ATOM generation
+
+Standards:
+    - ATOM 1.0 (RFC 4287) specification compliant
+    - RFC 3339 date format
+    - Proper XML namespacing
+    - Escaped HTML and text content
+"""
+
+# Standard library imports
+from datetime import datetime, timezone
+from typing import Optional
+import time
+import xml.etree.ElementTree as ET
+
+# Local imports
+from starpunk.models import Note
+from starpunk.monitoring.business import track_feed_generated
+
+
+# ATOM namespace
+ATOM_NS = "http://www.w3.org/2005/Atom"
+
+
+def generate_atom(
+    site_url: str,
+    site_name: str,
+    site_description: str,
+    notes: list[Note],
+    limit: int = 50,
+) -> str:
+    """
+    Generate ATOM 1.0 XML feed from published notes
+
+    Creates a standards-compliant ATOM 1.0 feed with proper metadata
+    and entry elements. Uses ElementTree for safe XML generation.
+
+    NOTE: For memory-efficient streaming, use generate_atom_streaming() instead.
+    This function is kept for caching use cases.
+
+    Args:
+        site_url: Base URL of the site (e.g., 'https://example.com')
+        site_name: Site title for feed
+        site_description: Site description for feed (subtitle)
+        notes: List of Note objects to include (should be published only)
+        limit: Maximum number of entries to include (default: 50)
+
+    Returns:
+        ATOM 1.0 XML string (UTF-8 encoded)
+
+    Raises:
+        ValueError: If site_url or site_name is empty
+
+    Examples:
+        >>> notes = list_notes(published_only=True, limit=50)
+        >>> feed_xml = generate_atom(
+        ...     site_url='https://example.com',
+        ...     site_name='My Blog',
+        ...     site_description='My personal notes',
+        ...     notes=notes
+        ... )
+        >>> print(feed_xml[:38])
+        <?xml version='1.0' encoding='UTF-8'?>
+    """
+    # Join streaming output for non-streaming version
+    return ''.join(generate_atom_streaming(
+        site_url=site_url,
+        site_name=site_name,
+        site_description=site_description,
+        notes=notes,
+        limit=limit
+    ))
+
+
+def generate_atom_streaming(
+    site_url: str,
+    site_name: str,
+    site_description: str,
+    notes: list[Note],
+    limit: int = 50,
+):
+    """
+    Generate ATOM 1.0 XML feed from published notes using streaming
+
+    Memory-efficient generator that yields XML chunks instead of building
+    the entire feed in memory. Recommended for large feeds (100+ entries).
+
+    Args:
+        site_url: Base URL of the site (e.g., 'https://example.com')
+        site_name: Site title for feed
+        site_description: Site description for feed
+        notes: List of Note objects to include (should be published only)
+        limit: Maximum number of entries to include (default: 50)
+
+    Yields:
+        XML chunks as strings (UTF-8)
+
+    Raises:
+        ValueError: If site_url or site_name is empty
+
+    Examples:
+        >>> from flask import Response
+        >>> notes = list_notes(published_only=True, limit=100)
+        >>> generator = generate_atom_streaming(
+        ...     site_url='https://example.com',
+        ...     site_name='My Blog',
+        ...     site_description='My personal notes',
+        ...     notes=notes
+        ... )
+        >>> return Response(generator, mimetype='application/atom+xml')
+    """
+    # Validate required parameters
+    if not site_url or not site_url.strip():
+        raise ValueError("site_url is required and cannot be empty")
+
+    if not site_name or not site_name.strip():
+        raise ValueError("site_name is required and cannot be empty")
+
+    # Remove trailing slash from site_url for consistency
+    site_url = site_url.rstrip("/")
+
+    # Track feed generation timing
+    start_time = time.time()
+    item_count = 0
+
+    # Current timestamp for updated
+    now = datetime.now(timezone.utc)
+
+    # Yield XML declaration
+    yield '<?xml version="1.0" encoding="utf-8"?>\n'
+
+    # Yield feed opening with namespace
+    yield f'<feed xmlns="{ATOM_NS}">\n'
+
+    # Yield feed metadata
+    yield f'  <id>{_escape_xml(site_url)}/</id>\n'
+    yield f'  <title>{_escape_xml(site_name)}</title>\n'
+    yield f'  <updated>{_format_atom_date(now)}</updated>\n'
+
+    # Links
+    yield f'  <link rel="alternate" type="text/html" href="{_escape_xml(site_url)}"/>\n'
+    yield f'  <link rel="self" type="application/atom+xml" href="{_escape_xml(site_url)}/feed.atom"/>\n'
+
+    # Optional subtitle
+    if site_description:
+        yield f'  <subtitle>{_escape_xml(site_description)}</subtitle>\n'
+
+    # Generator
+    yield '  <generator uri="https://github.com/yourusername/starpunk">StarPunk</generator>\n'
+
+    # Yield entries (newest first)
+    # Notes from database are already in DESC order (newest first)
+    for note in notes[:limit]:
+        item_count += 1
+
+        # Build permalink URL
+        permalink = f"{site_url}{note.permalink}"
+
+        yield '  <entry>\n'
+
+        # Required elements
+        yield f'    <id>{_escape_xml(permalink)}</id>\n'
+        yield f'    <title>{_escape_xml(note.title)}</title>\n'
+
+        # Use created_at for both published and updated
+        # (Note model doesn't have updated_at tracking yet)
+        yield f'    <published>{_format_atom_date(note.created_at)}</published>\n'
+        yield f'    <updated>{_format_atom_date(note.created_at)}</updated>\n'
+
+        # Link to entry
+        yield f'    <link rel="alternate" type="text/html" href="{_escape_xml(permalink)}"/>\n'
+
+        # Content
+        if note.html:
+            # HTML content - escaped
+            yield '    <content type="html">'
+            yield _escape_xml(note.html)
+            yield '</content>\n'
+        else:
+            # Plain text content
+            yield '    <content type="text">'
+            yield _escape_xml(note.content)
+            yield '</content>\n'
+
+        yield '  </entry>\n'
+
+    # Yield closing tag
+    yield '</feed>\n'
+
+    # Track feed generation metrics
+    duration_ms = (time.time() - start_time) * 1000
+    track_feed_generated(
+        format='atom',
+        item_count=item_count,
+        duration_ms=duration_ms,
+        cached=False
+    )
+
+
+def _escape_xml(text: str) -> str:
+    """
+    Escape special XML characters for safe inclusion in XML elements
+
+    Escapes the five predefined XML entities: &, <, >, ", '
+
+    Args:
+        text: Text to escape
+
+    Returns:
+        XML-safe text with escaped entities
+
+    Examples:
+        >>> _escape_xml("Hello & goodbye")
+        'Hello &amp; goodbye'
+        >>> _escape_xml('<p>HTML</p>')
+        '&lt;p&gt;HTML&lt;/p&gt;'
+    """
+    if not text:
+        return ""
+
+    # Escape in order: & first (to avoid double-escaping), then < > " '
+    text = text.replace("&", "&amp;")
+    text = text.replace("<", "&lt;")
+    text = text.replace(">", "&gt;")
+    text = text.replace('"', "&quot;")
+    text = text.replace("'", "&apos;")
+
+    return text
+
+
+def _format_atom_date(dt: datetime) -> str:
+    """
+    Format datetime to RFC 3339 format for ATOM
+
+    ATOM 1.0 requires RFC 3339 date format for published and updated elements.
+    RFC 3339 is a profile of ISO 8601.
+    Format: "2024-11-25T12:00:00Z" (UTC) or "2024-11-25T12:00:00-05:00" (with offset)
+
+    Args:
+        dt: Datetime object to format (naive datetime assumed to be UTC)
+
+    Returns:
+        RFC 3339 formatted date string
+
+    Examples:
+        >>> dt = datetime(2024, 11, 25, 12, 0, 0, tzinfo=timezone.utc)
+        >>> _format_atom_date(dt)
+        '2024-11-25T12:00:00Z'
+    """
+    # Ensure datetime has timezone (assume UTC if naive)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+
+    # Format to RFC 3339
+    # Use 'Z' suffix for UTC, otherwise include offset
+    if dt.tzinfo == timezone.utc:
+        return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+    else:
+        # Format with timezone offset
+        return dt.isoformat()
--- a/starpunk/feeds/json_feed.py
+++ b/starpunk/feeds/json_feed.py
@@ -0,0 +1,309 @@
+"""
+JSON Feed 1.1 generation for StarPunk
+
+This module provides JSON Feed 1.1 generation from published notes using
+Python's standard library json module for proper JSON serialization.
+
+Functions:
+    generate_json_feed: Generate JSON Feed 1.1 from notes
+    generate_json_feed_streaming: Memory-efficient streaming JSON generation
+
+Standards:
+    - JSON Feed 1.1 specification compliant
+    - RFC 3339 date format
+    - Proper JSON encoding
+    - UTF-8 output
+"""
+
+# Standard library imports
+from datetime import datetime, timezone
+from typing import Optional, Dict, Any
+import time
+import json
+
+# Local imports
+from starpunk.models import Note
+from starpunk.monitoring.business import track_feed_generated
+
+
+def generate_json_feed(
+    site_url: str,
+    site_name: str,
+    site_description: str,
+    notes: list[Note],
+    limit: int = 50,
+) -> str:
+    """
+    Generate JSON Feed 1.1 from published notes
+
+    Creates a standards-compliant JSON Feed 1.1 with proper metadata
+    and item objects. Uses Python's json module for safe serialization.
+
+    NOTE: For memory-efficient streaming, use generate_json_feed_streaming() instead.
+    This function is kept for caching use cases.
+
+    Args:
+        site_url: Base URL of the site (e.g., 'https://example.com')
+        site_name: Site title for feed
+        site_description: Site description for feed
+        notes: List of Note objects to include (should be published only)
+        limit: Maximum number of items to include (default: 50)
+
+    Returns:
+        JSON Feed 1.1 string (UTF-8 encoded, pretty-printed)
+
+    Raises:
+        ValueError: If site_url or site_name is empty
+
+    Examples:
+        >>> notes = list_notes(published_only=True, limit=50)
+        >>> feed_json = generate_json_feed(
+        ...     site_url='https://example.com',
+        ...     site_name='My Blog',
+        ...     site_description='My personal notes',
+        ...     notes=notes
+        ... )
+    """
+    # Validate required parameters
+    if not site_url or not site_url.strip():
+        raise ValueError("site_url is required and cannot be empty")
+
+    if not site_name or not site_name.strip():
+        raise ValueError("site_name is required and cannot be empty")
+
+    # Remove trailing slash from site_url for consistency
+    site_url = site_url.rstrip("/")
+
+    # Track feed generation timing
+    start_time = time.time()
+
+    # Build feed object
+    feed = _build_feed_object(
+        site_url=site_url,
+        site_name=site_name,
+        site_description=site_description,
+        notes=notes[:limit]
+    )
+
+    # Serialize to JSON (pretty-printed)
+    feed_json = json.dumps(feed, ensure_ascii=False, indent=2)
+
+    # Track feed generation metrics
+    duration_ms = (time.time() - start_time) * 1000
+    track_feed_generated(
+        format='json',
+        item_count=min(len(notes), limit),
+        duration_ms=duration_ms,
+        cached=False
+    )
+
+    return feed_json
+
+
+def generate_json_feed_streaming(
+    site_url: str,
+    site_name: str,
+    site_description: str,
+    notes: list[Note],
+    limit: int = 50,
+):
+    """
+    Generate JSON Feed 1.1 from published notes using streaming
+
+    Memory-efficient generator that yields JSON chunks instead of building
+    the entire feed in memory. Recommended for large feeds (100+ items).
+
+    Args:
+        site_url: Base URL of the site (e.g., 'https://example.com')
+        site_name: Site title for feed
+        site_description: Site description for feed
+        notes: List of Note objects to include (should be published only)
+        limit: Maximum number of items to include (default: 50)
+
+    Yields:
+        JSON chunks as strings (UTF-8)
+
+    Raises:
+        ValueError: If site_url or site_name is empty
+
+    Examples:
+        >>> from flask import Response
+        >>> notes = list_notes(published_only=True, limit=100)
+        >>> generator = generate_json_feed_streaming(
+        ...     site_url='https://example.com',
+        ...     site_name='My Blog',
+        ...     site_description='My personal notes',
+        ...     notes=notes
+        ... )
+        >>> return Response(generator, mimetype='application/json')
+    """
+    # Validate required parameters
+    if not site_url or not site_url.strip():
+        raise ValueError("site_url is required and cannot be empty")
+
+    if not site_name or not site_name.strip():
+        raise ValueError("site_name is required and cannot be empty")
+
+    # Remove trailing slash from site_url for consistency
+    site_url = site_url.rstrip("/")
+
+    # Track feed generation timing
+    start_time = time.time()
+    item_count = 0
+
+    # Start feed object
+    yield '{\n'
+    yield f'  "version": "https://jsonfeed.org/version/1.1",\n'
+    yield f'  "title": {json.dumps(site_name)},\n'
+    yield f'  "home_page_url": {json.dumps(site_url)},\n'
+    yield f'  "feed_url": {json.dumps(f"{site_url}/feed.json")},\n'
+
+    if site_description:
+        yield f'  "description": {json.dumps(site_description)},\n'
+
+    yield '  "language": "en",\n'
+
+    # Start items array
+    yield '  "items": [\n'
+
+    # Stream items (newest first)
+    # Notes from database are already in DESC order (newest first)
+    items = notes[:limit]
+    for i, note in enumerate(items):
+        item_count += 1
+
+        # Build item object
+        item = _build_item_object(site_url, note)
+
+        # Serialize item to JSON
+        item_json = json.dumps(item, ensure_ascii=False, indent=4)
+
+        # Indent properly for nested JSON
+        indented_lines = item_json.split('\n')
+        indented = '\n'.join('    ' + line for line in indented_lines)
+        yield indented
+
+        # Add comma between items (but not after last item)
+        if i < len(items) - 1:
+            yield ',\n'
+        else:
+            yield '\n'
+
+    # Close items array and feed
+    yield '  ]\n'
+    yield '}\n'
+
+    # Track feed generation metrics
+    duration_ms = (time.time() - start_time) * 1000
+    track_feed_generated(
+        format='json',
+        item_count=item_count,
+        duration_ms=duration_ms,
+        cached=False
+    )
+
+
+def _build_feed_object(
+    site_url: str,
+    site_name: str,
+    site_description: str,
+    notes: list[Note]
+) -> Dict[str, Any]:
+    """
+    Build complete JSON Feed object
+
+    Args:
+        site_url: Site URL (no trailing slash)
+        site_name: Feed title
+        site_description: Feed description
+        notes: List of notes (already limited)
+
+    Returns:
+        JSON Feed dictionary
+    """
+    feed = {
+        "version": "https://jsonfeed.org/version/1.1",
+        "title": site_name,
+        "home_page_url": site_url,
+        "feed_url": f"{site_url}/feed.json",
+        "language": "en",
+        "items": [_build_item_object(site_url, note) for note in notes]
+    }
+
+    if site_description:
+        feed["description"] = site_description
+
+    return feed
+
+
+def _build_item_object(site_url: str, note: Note) -> Dict[str, Any]:
+    """
+    Build JSON Feed item object from note
+
+    Args:
+        site_url: Site URL (no trailing slash)
+        note: Note to convert to item
+
+    Returns:
+        JSON Feed item dictionary
+    """
+    # Build permalink URL
+    permalink = f"{site_url}{note.permalink}"
+
+    # Create item with required fields
+    item = {
+        "id": permalink,
+        "url": permalink,
+    }
+
+    # Add title
+    item["title"] = note.title
+
+    # Add content (HTML or text)
+    if note.html:
+        item["content_html"] = note.html
+    else:
+        item["content_text"] = note.content
+
+    # Add publication date (RFC 3339 format)
+    item["date_published"] = _format_rfc3339_date(note.created_at)
+
+    # Add custom StarPunk extensions
+    item["_starpunk"] = {
+        "permalink_path": note.permalink,
+        "word_count": len(note.content.split())
+    }
+
+    return item
+
+
+def _format_rfc3339_date(dt: datetime) -> str:
+    """
+    Format datetime to RFC 3339 format for JSON Feed
+
+    JSON Feed 1.1 requires RFC 3339 date format for date_published and date_modified.
+    RFC 3339 is a profile of ISO 8601.
+    Format: "2024-11-25T12:00:00Z" (UTC) or "2024-11-25T12:00:00-05:00" (with offset)
+
+    Args:
+        dt: Datetime object to format (naive datetime assumed to be UTC)
+
+    Returns:
+        RFC 3339 formatted date string
+
+    Examples:
+        >>> dt = datetime(2024, 11, 25, 12, 0, 0, tzinfo=timezone.utc)
+        >>> _format_rfc3339_date(dt)
+        '2024-11-25T12:00:00Z'
+    """
+    # Ensure datetime has timezone (assume UTC if naive)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+
+    # Format to RFC 3339
+    # Use 'Z' suffix for UTC, otherwise include offset
+    if dt.tzinfo == timezone.utc:
+        return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
+    else:
+        # Format with timezone offset
+        return dt.isoformat()
--- a/starpunk/feeds/rss.py
+++ b/starpunk/feeds/rss.py
@@ -0,0 +1,397 @@
+"""
+RSS 2.0 feed generation for StarPunk
+
+This module provides RSS 2.0 feed generation from published notes using the
+feedgen library. Feeds include proper RFC-822 dates, CDATA-wrapped HTML
+content, and all required RSS elements.
+
+Functions:
+    generate_rss: Generate RSS 2.0 XML feed from notes
+    generate_rss_streaming: Memory-efficient streaming RSS generation
+    format_rfc822_date: Format datetime to RFC-822 for RSS
+    get_note_title: Extract title from note (first line or timestamp)
+    clean_html_for_rss: Clean HTML for CDATA safety
+
+Standards:
+    - RSS 2.0 specification compliant
+    - RFC-822 date format
+    - Atom self-link for feed discovery
+    - CDATA wrapping for HTML content
+"""
+
+# Standard library imports
+from datetime import datetime, timezone
+from typing import Optional
+import time
+
+# Third-party imports
+from feedgen.feed import FeedGenerator
+
+# Local imports
+from starpunk.models import Note
+from starpunk.monitoring.business import track_feed_generated
+
+
+def generate_rss(
+    site_url: str,
+    site_name: str,
+    site_description: str,
+    notes: list[Note],
+    limit: int = 50,
+) -> str:
+    """
+    Generate RSS 2.0 XML feed from published notes
+
+    Creates a standards-compliant RSS 2.0 feed with proper channel metadata
+    and item entries for each note. Includes Atom self-link for discovery.
+
+    NOTE: For memory-efficient streaming, use generate_rss_streaming() instead.
+    This function is kept for backwards compatibility and caching use cases.
+
+    Args:
+        site_url: Base URL of the site (e.g., 'https://example.com')
+        site_name: Site title for RSS channel
+        site_description: Site description for RSS channel
+        notes: List of Note objects to include (should be published only)
+        limit: Maximum number of items to include (default: 50)
+
+    Returns:
+        RSS 2.0 XML string (UTF-8 encoded, pretty-printed)
+
+    Raises:
+        ValueError: If site_url or site_name is empty
+
+    Examples:
+        >>> notes = list_notes(published_only=True, limit=50)
+        >>> feed_xml = generate_rss(
+        ...     site_url='https://example.com',
+        ...     site_name='My Blog',
+        ...     site_description='My personal notes',
+        ...     notes=notes
+        ... )
+        >>> print(feed_xml[:38])
+        <?xml version='1.0' encoding='UTF-8'?>
+    """
+    # Validate required parameters
+    if not site_url or not site_url.strip():
+        raise ValueError("site_url is required and cannot be empty")
+
+    if not site_name or not site_name.strip():
+        raise ValueError("site_name is required and cannot be empty")
+
+    # Remove trailing slash from site_url for consistency
+    site_url = site_url.rstrip("/")
+
+    # Create feed generator
+    fg = FeedGenerator()
+
+    # Set channel metadata (required elements)
+    fg.id(site_url)
+    fg.title(site_name)
+    fg.link(href=site_url, rel="alternate")
+    fg.description(site_description or site_name)
+    fg.language("en")
+
+    # Add self-link for feed discovery (Atom namespace)
+    fg.link(href=f"{site_url}/feed.xml", rel="self", type="application/rss+xml")
+
+    # Set last build date to now
+    fg.lastBuildDate(datetime.now(timezone.utc))
+
+    # Track feed generation timing
+    start_time = time.time()
+
+    # Add items (limit to configured maximum, newest first)
+    # Notes from database are DESC but feedgen reverses them, so we reverse back
+    for note in reversed(notes[:limit]):
+        # Create feed entry
+        fe = fg.add_entry()
+
+        # Build permalink URL
+        permalink = f"{site_url}{note.permalink}"
+
+        # Set required item elements
+        fe.id(permalink)
+        fe.title(get_note_title(note))
+        fe.link(href=permalink)
+        fe.guid(permalink, permalink=True)
+
+        # Set publication date (ensure UTC timezone)
+        pubdate = note.created_at
+        if pubdate.tzinfo is None:
+            # If naive datetime, assume UTC
+            pubdate = pubdate.replace(tzinfo=timezone.utc)
+        fe.pubDate(pubdate)
+
+        # Set description with HTML content in CDATA
+        # feedgen automatically wraps content in CDATA for RSS
+        html_content = clean_html_for_rss(note.html)
+        fe.description(html_content)
+
+    # Generate RSS 2.0 XML (pretty-printed)
+    feed_xml = fg.rss_str(pretty=True).decode("utf-8")
+
+    # Track feed generation metrics
+    duration_ms = (time.time() - start_time) * 1000
+    track_feed_generated(
+        format='rss',
+        item_count=min(len(notes), limit),
+        duration_ms=duration_ms,
+        cached=False
+    )
+
+    return feed_xml
+
+
+def generate_rss_streaming(
+    site_url: str,
+    site_name: str,
+    site_description: str,
+    notes: list[Note],
+    limit: int = 50,
+):
+    """
+    Generate RSS 2.0 XML feed from published notes using streaming
+
+    Memory-efficient generator that yields XML chunks instead of building
+    the entire feed in memory. Recommended for large feeds (100+ items).
+
+    Yields XML in semantic chunks (channel metadata, individual items, closing tags)
+    rather than character-by-character for optimal performance.
+
+    Args:
+        site_url: Base URL of the site (e.g., 'https://example.com')
+        site_name: Site title for RSS channel
+        site_description: Site description for RSS channel
+        notes: List of Note objects to include (should be published only)
+        limit: Maximum number of items to include (default: 50)
+
+    Yields:
+        XML chunks as strings (UTF-8)
+
+    Raises:
+        ValueError: If site_url or site_name is empty
+
+    Examples:
+        >>> from flask import Response
+        >>> notes = list_notes(published_only=True, limit=100)
+        >>> generator = generate_rss_streaming(
+        ...     site_url='https://example.com',
+        ...     site_name='My Blog',
+        ...     site_description='My personal notes',
+        ...     notes=notes
+        ... )
+        >>> return Response(generator, mimetype='application/rss+xml')
+    """
+    # Validate required parameters
+    if not site_url or not site_url.strip():
+        raise ValueError("site_url is required and cannot be empty")
+
+    if not site_name or not site_name.strip():
+        raise ValueError("site_name is required and cannot be empty")
+
+    # Remove trailing slash from site_url for consistency
+    site_url = site_url.rstrip("/")
+
+    # Track feed generation timing
+    start_time = time.time()
+    item_count = 0
+
+    # Current timestamp for lastBuildDate
+    now = datetime.now(timezone.utc)
+    last_build = format_rfc822_date(now)
+
+    # Yield XML declaration and opening RSS tag
+    yield '<?xml version="1.0" encoding="UTF-8"?>\n'
+    yield '<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">\n'
+    yield "  <channel>\n"
+
+    # Yield channel metadata
+    yield f"    <title>{_escape_xml(site_name)}</title>\n"
+    yield f"    <link>{_escape_xml(site_url)}</link>\n"
+    yield f"    <description>{_escape_xml(site_description or site_name)}</description>\n"
+    yield "    <language>en</language>\n"
+    yield f"    <lastBuildDate>{last_build}</lastBuildDate>\n"
+    yield f'    <atom:link href="{_escape_xml(site_url)}/feed.xml" rel="self" type="application/rss+xml"/>\n'
+
+    # Yield items (newest first)
+    # Notes from database are already in DESC order (newest first)
+    for note in notes[:limit]:
+        item_count += 1
+
+        # Build permalink URL
+        permalink = f"{site_url}{note.permalink}"
+
+        # Get note title
+        title = get_note_title(note)
+
+        # Format publication date
+        pubdate = note.created_at
+        if pubdate.tzinfo is None:
+            pubdate = pubdate.replace(tzinfo=timezone.utc)
+        pub_date_str = format_rfc822_date(pubdate)
+
+        # Get HTML content
+        html_content = clean_html_for_rss(note.html)
+
+        # Yield complete item as a single chunk
+        item_xml = f"""    <item>
+      <title>{_escape_xml(title)}</title>
+      <link>{_escape_xml(permalink)}</link>
+      <guid isPermaLink="true">{_escape_xml(permalink)}</guid>
+      <pubDate>{pub_date_str}</pubDate>
+      <description><![CDATA[{html_content}]]></description>
+    </item>
+"""
+        yield item_xml
+
+    # Yield closing tags
+    yield "  </channel>\n"
+    yield "</rss>\n"
+
+    # Track feed generation metrics
+    duration_ms = (time.time() - start_time) * 1000
+    track_feed_generated(
+        format='rss',
+        item_count=item_count,
+        duration_ms=duration_ms,
+        cached=False
+    )
+
+
+def _escape_xml(text: str) -> str:
+    """
+    Escape special XML characters for safe inclusion in XML elements
+
+    Escapes the five predefined XML entities: &, <, >, ", '
+
+    Args:
+        text: Text to escape
+
+    Returns:
+        XML-safe text with escaped entities
+
+    Examples:
+        >>> _escape_xml("Hello & goodbye")
+        'Hello &amp; goodbye'
+        >>> _escape_xml('<tag>')
+        '&lt;tag&gt;'
+    """
+    if not text:
+        return ""
+
+    # Escape in order: & first (to avoid double-escaping), then < > " '
+    text = text.replace("&", "&amp;")
+    text = text.replace("<", "&lt;")
+    text = text.replace(">", "&gt;")
+    text = text.replace('"', "&quot;")
+    text = text.replace("'", "&apos;")
+
+    return text
+
+
+def format_rfc822_date(dt: datetime) -> str:
+    """
+    Format datetime to RFC-822 format for RSS
+
+    RSS 2.0 requires RFC-822 date format for pubDate and lastBuildDate.
+    Format: "Mon, 18 Nov 2024 12:00:00 +0000"
+
+    Args:
+        dt: Datetime object to format (naive datetime assumed to be UTC)
+
+    Returns:
+        RFC-822 formatted date string
+
+    Examples:
+        >>> dt = datetime(2024, 11, 18, 12, 0, 0)
+        >>> format_rfc822_date(dt)
+        'Mon, 18 Nov 2024 12:00:00 +0000'
+    """
+    # Ensure datetime has timezone (assume UTC if naive)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+
+    # Format to RFC-822
+    # Format string: %a = weekday, %d = day, %b = month, %Y = year
+    #                %H:%M:%S = time, %z = timezone offset
+    return dt.strftime("%a, %d %b %Y %H:%M:%S %z")
+
+
+def get_note_title(note: Note) -> str:
+    """
+    Extract title from note content
+
+    Attempts to extract a meaningful title from the note. Uses the first
+    line of content (stripped of markdown heading syntax) or falls back
+    to a formatted timestamp if content is unavailable.
+
+    Algorithm:
+        1. Try note.title property (first line, stripped of # syntax)
+        2. Fall back to timestamp if title is unavailable
+
+    Args:
+        note: Note object
+
+    Returns:
+        Title string (max 100 chars, truncated if needed)
+
+    Examples:
+        >>> # Note with heading
+        >>> note = Note(...)  # content: "# My First Note\\n\\n..."
+        >>> get_note_title(note)
+        'My First Note'
+
+        >>> # Note without heading (timestamp fallback)
+        >>> note = Note(...)  # content: "Just some text"
+        >>> get_note_title(note)
+        'November 18, 2024 at 12:00 PM'
+    """
+    try:
+        # Use Note's title property (handles extraction logic)
+        title = note.title
+
+        # Truncate to 100 characters for RSS compatibility
+        if len(title) > 100:
+            title = title[:100].strip() + "..."
+
+        return title
+
+    except (FileNotFoundError, OSError, AttributeError):
+        # If title extraction fails, use timestamp
+        return note.created_at.strftime("%B %d, %Y at %I:%M %p")
+
+
+def clean_html_for_rss(html: str) -> str:
+    """
+    Ensure HTML is safe for RSS CDATA wrapping
+
+    RSS readers expect HTML content wrapped in CDATA sections. The feedgen
+    library handles CDATA wrapping automatically, but we need to ensure
+    the HTML doesn't contain CDATA end markers that would break parsing.
+
+    This function is primarily defensive - markdown-rendered HTML should
+    not contain CDATA markers, but we check anyway.
+
+    Args:
+        html: Rendered HTML content from markdown
+
+    Returns:
+        Cleaned HTML safe for CDATA wrapping
+
+    Examples:
+        >>> html = "<p>Hello world</p>"
+        >>> clean_html_for_rss(html)
+        '<p>Hello world</p>'
+
+        >>> # Edge case: HTML containing CDATA end marker
+        >>> html = "<p>Example: ]]></p>"
+        >>> clean_html_for_rss(html)
+        '<p>Example: ]] ></p>'
+    """
+    # Check for CDATA end marker and add space to break it
+    # This is extremely unlikely with markdown-rendered HTML but be safe
+    if "]]>" in html:
+        html = html.replace("]]>", "]] >")
+
+    return html