""" JSON Feed 1.1 generation for StarPunk This module provides JSON Feed 1.1 generation from published notes using Python's standard library json module for proper JSON serialization. Functions: generate_json_feed: Generate JSON Feed 1.1 from notes generate_json_feed_streaming: Memory-efficient streaming JSON generation Standards: - JSON Feed 1.1 specification compliant - RFC 3339 date format - Proper JSON encoding - UTF-8 output """ # Standard library imports from datetime import datetime, timezone from typing import Optional, Dict, Any import time import json # Local imports from starpunk.models import Note from starpunk.monitoring.business import track_feed_generated def generate_json_feed( site_url: str, site_name: str, site_description: str, notes: list[Note], limit: int = 50, ) -> str: """ Generate JSON Feed 1.1 from published notes Creates a standards-compliant JSON Feed 1.1 with proper metadata and item objects. Uses Python's json module for safe serialization. NOTE: For memory-efficient streaming, use generate_json_feed_streaming() instead. This function is kept for caching use cases. Args: site_url: Base URL of the site (e.g., 'https://example.com') site_name: Site title for feed site_description: Site description for feed notes: List of Note objects to include (should be published only) limit: Maximum number of items to include (default: 50) Returns: JSON Feed 1.1 string (UTF-8 encoded, pretty-printed) Raises: ValueError: If site_url or site_name is empty Examples: >>> notes = list_notes(published_only=True, limit=50) >>> feed_json = generate_json_feed( ... site_url='https://example.com', ... site_name='My Blog', ... site_description='My personal notes', ... notes=notes ... ) """ # Validate required parameters if not site_url or not site_url.strip(): raise ValueError("site_url is required and cannot be empty") if not site_name or not site_name.strip(): raise ValueError("site_name is required and cannot be empty") # Remove trailing slash from site_url for consistency site_url = site_url.rstrip("/") # Track feed generation timing start_time = time.time() # Build feed object feed = _build_feed_object( site_url=site_url, site_name=site_name, site_description=site_description, notes=notes[:limit] ) # Serialize to JSON (pretty-printed) feed_json = json.dumps(feed, ensure_ascii=False, indent=2) # Track feed generation metrics duration_ms = (time.time() - start_time) * 1000 track_feed_generated( format='json', item_count=min(len(notes), limit), duration_ms=duration_ms, cached=False ) return feed_json def generate_json_feed_streaming( site_url: str, site_name: str, site_description: str, notes: list[Note], limit: int = 50, ): """ Generate JSON Feed 1.1 from published notes using streaming Memory-efficient generator that yields JSON chunks instead of building the entire feed in memory. Recommended for large feeds (100+ items). Args: site_url: Base URL of the site (e.g., 'https://example.com') site_name: Site title for feed site_description: Site description for feed notes: List of Note objects to include (should be published only) limit: Maximum number of items to include (default: 50) Yields: JSON chunks as strings (UTF-8) Raises: ValueError: If site_url or site_name is empty Examples: >>> from flask import Response >>> notes = list_notes(published_only=True, limit=100) >>> generator = generate_json_feed_streaming( ... site_url='https://example.com', ... site_name='My Blog', ... site_description='My personal notes', ... notes=notes ... ) >>> return Response(generator, mimetype='application/json') """ # Validate required parameters if not site_url or not site_url.strip(): raise ValueError("site_url is required and cannot be empty") if not site_name or not site_name.strip(): raise ValueError("site_name is required and cannot be empty") # Remove trailing slash from site_url for consistency site_url = site_url.rstrip("/") # Track feed generation timing start_time = time.time() item_count = 0 # Start feed object yield '{\n' yield f' "version": "https://jsonfeed.org/version/1.1",\n' yield f' "title": {json.dumps(site_name)},\n' yield f' "home_page_url": {json.dumps(site_url)},\n' yield f' "feed_url": {json.dumps(f"{site_url}/feed.json")},\n' if site_description: yield f' "description": {json.dumps(site_description)},\n' yield ' "language": "en",\n' # Start items array yield ' "items": [\n' # Stream items (newest first) # Notes from database are already in DESC order (newest first) items = notes[:limit] for i, note in enumerate(items): item_count += 1 # Build item object item = _build_item_object(site_url, note) # Serialize item to JSON item_json = json.dumps(item, ensure_ascii=False, indent=4) # Indent properly for nested JSON indented_lines = item_json.split('\n') indented = '\n'.join(' ' + line for line in indented_lines) yield indented # Add comma between items (but not after last item) if i < len(items) - 1: yield ',\n' else: yield '\n' # Close items array and feed yield ' ]\n' yield '}\n' # Track feed generation metrics duration_ms = (time.time() - start_time) * 1000 track_feed_generated( format='json', item_count=item_count, duration_ms=duration_ms, cached=False ) def _build_feed_object( site_url: str, site_name: str, site_description: str, notes: list[Note] ) -> Dict[str, Any]: """ Build complete JSON Feed object Args: site_url: Site URL (no trailing slash) site_name: Feed title site_description: Feed description notes: List of notes (already limited) Returns: JSON Feed dictionary """ feed = { "version": "https://jsonfeed.org/version/1.1", "title": site_name, "home_page_url": site_url, "feed_url": f"{site_url}/feed.json", "language": "en", "items": [_build_item_object(site_url, note) for note in notes] } if site_description: feed["description"] = site_description return feed def _build_item_object(site_url: str, note: Note) -> Dict[str, Any]: """ Build JSON Feed item object from note Args: site_url: Site URL (no trailing slash) note: Note to convert to item Returns: JSON Feed item dictionary """ # Build permalink URL permalink = f"{site_url}{note.permalink}" # Create item with required fields item = { "id": permalink, "url": permalink, } # Add title item["title"] = note.title # Add image field (URL of first/main image) - per JSON Feed 1.1 spec # Per Q7: Field should be absent (not null) when no media if hasattr(note, 'media') and note.media: first_media = note.media[0] item["image"] = f"{site_url}/media/{first_media['path']}" # Add content (HTML or text) # Per Q24: Include media as HTML in content_html if note.html: content_html = "" # Add media at top if present (v1.2.0 Phase 3) if hasattr(note, 'media') and note.media: content_html += '
' for media_item in note.media: media_url = f"{site_url}/media/{media_item['path']}" caption = media_item.get('caption', '') content_html += f'{caption}' content_html += '
' # Add text content below media content_html += note.html item["content_html"] = content_html else: item["content_text"] = note.content # Add publication date (RFC 3339 format) item["date_published"] = _format_rfc3339_date(note.created_at) # Add attachments array (v1.2.0 Phase 3, per Q24 and ADR-057) # JSON Feed 1.1 native support for attachments if hasattr(note, 'media') and note.media: attachments = [] for media_item in note.media: media_url = f"{site_url}/media/{media_item['path']}" attachment = { 'url': media_url, 'mime_type': media_item.get('mime_type', 'image/jpeg'), 'size_in_bytes': media_item.get('size', 0) } # Add title (caption) if present if media_item.get('caption'): attachment['title'] = media_item['caption'] attachments.append(attachment) item["attachments"] = attachments # Add custom StarPunk extensions item["_starpunk"] = { "permalink_path": note.permalink, "word_count": len(note.content.split()) } return item def _format_rfc3339_date(dt: datetime) -> str: """ Format datetime to RFC 3339 format for JSON Feed JSON Feed 1.1 requires RFC 3339 date format for date_published and date_modified. RFC 3339 is a profile of ISO 8601. Format: "2024-11-25T12:00:00Z" (UTC) or "2024-11-25T12:00:00-05:00" (with offset) Args: dt: Datetime object to format (naive datetime assumed to be UTC) Returns: RFC 3339 formatted date string Examples: >>> dt = datetime(2024, 11, 25, 12, 0, 0, tzinfo=timezone.utc) >>> _format_rfc3339_date(dt) '2024-11-25T12:00:00Z' """ # Ensure datetime has timezone (assume UTC if naive) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) # Format to RFC 3339 # Use 'Z' suffix for UTC, otherwise include offset if dt.tzinfo == timezone.utc: return dt.strftime("%Y-%m-%dT%H:%M:%SZ") else: # Format with timezone offset return dt.isoformat()