"""
ATOM 1.0 feed generation for StarPunk
This module provides ATOM 1.0 feed generation from published notes using
Python's standard library xml.etree.ElementTree for proper XML handling.
Functions:
generate_atom: Generate ATOM 1.0 XML feed from notes
generate_atom_streaming: Memory-efficient streaming ATOM generation
Standards:
- ATOM 1.0 (RFC 4287) specification compliant
- RFC 3339 date format
- Proper XML namespacing
- Escaped HTML and text content
"""
# Standard library imports
from datetime import datetime, timezone
from typing import Optional
import time
import xml.etree.ElementTree as ET
# Local imports
from starpunk.models import Note
from starpunk.monitoring.business import track_feed_generated
# ATOM namespace
ATOM_NS = "http://www.w3.org/2005/Atom"
def generate_atom(
site_url: str,
site_name: str,
site_description: str,
notes: list[Note],
limit: int = 50,
) -> str:
"""
Generate ATOM 1.0 XML feed from published notes
Creates a standards-compliant ATOM 1.0 feed with proper metadata
and entry elements. Uses ElementTree for safe XML generation.
NOTE: For memory-efficient streaming, use generate_atom_streaming() instead.
This function is kept for caching use cases.
Args:
site_url: Base URL of the site (e.g., 'https://example.com')
site_name: Site title for feed
site_description: Site description for feed (subtitle)
notes: List of Note objects to include (should be published only)
limit: Maximum number of entries to include (default: 50)
Returns:
ATOM 1.0 XML string (UTF-8 encoded)
Raises:
ValueError: If site_url or site_name is empty
Examples:
>>> notes = list_notes(published_only=True, limit=50)
>>> feed_xml = generate_atom(
... site_url='https://example.com',
... site_name='My Blog',
... site_description='My personal notes',
... notes=notes
... )
>>> print(feed_xml[:38])
"""
# Join streaming output for non-streaming version
return ''.join(generate_atom_streaming(
site_url=site_url,
site_name=site_name,
site_description=site_description,
notes=notes,
limit=limit
))
def generate_atom_streaming(
site_url: str,
site_name: str,
site_description: str,
notes: list[Note],
limit: int = 50,
):
"""
Generate ATOM 1.0 XML feed from published notes using streaming
Memory-efficient generator that yields XML chunks instead of building
the entire feed in memory. Recommended for large feeds (100+ entries).
Args:
site_url: Base URL of the site (e.g., 'https://example.com')
site_name: Site title for feed
site_description: Site description for feed
notes: List of Note objects to include (should be published only)
limit: Maximum number of entries to include (default: 50)
Yields:
XML chunks as strings (UTF-8)
Raises:
ValueError: If site_url or site_name is empty
Examples:
>>> from flask import Response
>>> notes = list_notes(published_only=True, limit=100)
>>> generator = generate_atom_streaming(
... site_url='https://example.com',
... site_name='My Blog',
... site_description='My personal notes',
... notes=notes
... )
>>> return Response(generator, mimetype='application/atom+xml')
"""
# Validate required parameters
if not site_url or not site_url.strip():
raise ValueError("site_url is required and cannot be empty")
if not site_name or not site_name.strip():
raise ValueError("site_name is required and cannot be empty")
# Remove trailing slash from site_url for consistency
site_url = site_url.rstrip("/")
# Track feed generation timing
start_time = time.time()
item_count = 0
# Current timestamp for updated
now = datetime.now(timezone.utc)
# Yield XML declaration
yield '\n'
# Yield feed opening with namespace
yield f''
html_content += '
HTML
') '<p>HTML</p>' """ if not text: return "" # Escape in order: & first (to avoid double-escaping), then < > " ' text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace(">", ">") text = text.replace('"', """) text = text.replace("'", "'") return text def _format_atom_date(dt: datetime) -> str: """ Format datetime to RFC 3339 format for ATOM ATOM 1.0 requires RFC 3339 date format for published and updated elements. RFC 3339 is a profile of ISO 8601. Format: "2024-11-25T12:00:00Z" (UTC) or "2024-11-25T12:00:00-05:00" (with offset) Args: dt: Datetime object to format (naive datetime assumed to be UTC) Returns: RFC 3339 formatted date string Examples: >>> dt = datetime(2024, 11, 25, 12, 0, 0, tzinfo=timezone.utc) >>> _format_atom_date(dt) '2024-11-25T12:00:00Z' """ # Ensure datetime has timezone (assume UTC if naive) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) # Format to RFC 3339 # Use 'Z' suffix for UTC, otherwise include offset if dt.tzinfo == timezone.utc: return dt.strftime("%Y-%m-%dT%H:%M:%SZ") else: # Format with timezone offset return dt.isoformat()