Files
StarPunk/starpunk/feeds/json_feed.py
Phil Skentelbery 27501f6381 feat: v1.2.0-rc.2 - Media display fixes and feed enhancements
## Added
- Feed Media Enhancement with Media RSS namespace support
  - RSS enclosure, media:content, media:thumbnail elements
  - JSON Feed image field for first image
- ADR-059: Full feed media standardization roadmap

## Fixed
- Media display on homepage (was only showing on note pages)
- Responsive image sizing with CSS constraints
- Caption display (now alt text only, not visible)
- Logging correlation ID crash in non-request contexts

## Documentation
- Feed media design documents and implementation reports
- Media display fixes design and validation reports
- Updated ROADMAP with v1.3.0/v1.4.0 media plans

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-09 14:58:37 -07:00

349 lines
10 KiB
Python

"""
JSON Feed 1.1 generation for StarPunk
This module provides JSON Feed 1.1 generation from published notes using
Python's standard library json module for proper JSON serialization.
Functions:
generate_json_feed: Generate JSON Feed 1.1 from notes
generate_json_feed_streaming: Memory-efficient streaming JSON generation
Standards:
- JSON Feed 1.1 specification compliant
- RFC 3339 date format
- Proper JSON encoding
- UTF-8 output
"""
# Standard library imports
from datetime import datetime, timezone
from typing import Optional, Dict, Any
import time
import json
# Local imports
from starpunk.models import Note
from starpunk.monitoring.business import track_feed_generated
def generate_json_feed(
site_url: str,
site_name: str,
site_description: str,
notes: list[Note],
limit: int = 50,
) -> str:
"""
Generate JSON Feed 1.1 from published notes
Creates a standards-compliant JSON Feed 1.1 with proper metadata
and item objects. Uses Python's json module for safe serialization.
NOTE: For memory-efficient streaming, use generate_json_feed_streaming() instead.
This function is kept for caching use cases.
Args:
site_url: Base URL of the site (e.g., 'https://example.com')
site_name: Site title for feed
site_description: Site description for feed
notes: List of Note objects to include (should be published only)
limit: Maximum number of items to include (default: 50)
Returns:
JSON Feed 1.1 string (UTF-8 encoded, pretty-printed)
Raises:
ValueError: If site_url or site_name is empty
Examples:
>>> notes = list_notes(published_only=True, limit=50)
>>> feed_json = generate_json_feed(
... site_url='https://example.com',
... site_name='My Blog',
... site_description='My personal notes',
... notes=notes
... )
"""
# Validate required parameters
if not site_url or not site_url.strip():
raise ValueError("site_url is required and cannot be empty")
if not site_name or not site_name.strip():
raise ValueError("site_name is required and cannot be empty")
# Remove trailing slash from site_url for consistency
site_url = site_url.rstrip("/")
# Track feed generation timing
start_time = time.time()
# Build feed object
feed = _build_feed_object(
site_url=site_url,
site_name=site_name,
site_description=site_description,
notes=notes[:limit]
)
# Serialize to JSON (pretty-printed)
feed_json = json.dumps(feed, ensure_ascii=False, indent=2)
# Track feed generation metrics
duration_ms = (time.time() - start_time) * 1000
track_feed_generated(
format='json',
item_count=min(len(notes), limit),
duration_ms=duration_ms,
cached=False
)
return feed_json
def generate_json_feed_streaming(
site_url: str,
site_name: str,
site_description: str,
notes: list[Note],
limit: int = 50,
):
"""
Generate JSON Feed 1.1 from published notes using streaming
Memory-efficient generator that yields JSON chunks instead of building
the entire feed in memory. Recommended for large feeds (100+ items).
Args:
site_url: Base URL of the site (e.g., 'https://example.com')
site_name: Site title for feed
site_description: Site description for feed
notes: List of Note objects to include (should be published only)
limit: Maximum number of items to include (default: 50)
Yields:
JSON chunks as strings (UTF-8)
Raises:
ValueError: If site_url or site_name is empty
Examples:
>>> from flask import Response
>>> notes = list_notes(published_only=True, limit=100)
>>> generator = generate_json_feed_streaming(
... site_url='https://example.com',
... site_name='My Blog',
... site_description='My personal notes',
... notes=notes
... )
>>> return Response(generator, mimetype='application/json')
"""
# Validate required parameters
if not site_url or not site_url.strip():
raise ValueError("site_url is required and cannot be empty")
if not site_name or not site_name.strip():
raise ValueError("site_name is required and cannot be empty")
# Remove trailing slash from site_url for consistency
site_url = site_url.rstrip("/")
# Track feed generation timing
start_time = time.time()
item_count = 0
# Start feed object
yield '{\n'
yield f' "version": "https://jsonfeed.org/version/1.1",\n'
yield f' "title": {json.dumps(site_name)},\n'
yield f' "home_page_url": {json.dumps(site_url)},\n'
yield f' "feed_url": {json.dumps(f"{site_url}/feed.json")},\n'
if site_description:
yield f' "description": {json.dumps(site_description)},\n'
yield ' "language": "en",\n'
# Start items array
yield ' "items": [\n'
# Stream items (newest first)
# Notes from database are already in DESC order (newest first)
items = notes[:limit]
for i, note in enumerate(items):
item_count += 1
# Build item object
item = _build_item_object(site_url, note)
# Serialize item to JSON
item_json = json.dumps(item, ensure_ascii=False, indent=4)
# Indent properly for nested JSON
indented_lines = item_json.split('\n')
indented = '\n'.join(' ' + line for line in indented_lines)
yield indented
# Add comma between items (but not after last item)
if i < len(items) - 1:
yield ',\n'
else:
yield '\n'
# Close items array and feed
yield ' ]\n'
yield '}\n'
# Track feed generation metrics
duration_ms = (time.time() - start_time) * 1000
track_feed_generated(
format='json',
item_count=item_count,
duration_ms=duration_ms,
cached=False
)
def _build_feed_object(
site_url: str,
site_name: str,
site_description: str,
notes: list[Note]
) -> Dict[str, Any]:
"""
Build complete JSON Feed object
Args:
site_url: Site URL (no trailing slash)
site_name: Feed title
site_description: Feed description
notes: List of notes (already limited)
Returns:
JSON Feed dictionary
"""
feed = {
"version": "https://jsonfeed.org/version/1.1",
"title": site_name,
"home_page_url": site_url,
"feed_url": f"{site_url}/feed.json",
"language": "en",
"items": [_build_item_object(site_url, note) for note in notes]
}
if site_description:
feed["description"] = site_description
return feed
def _build_item_object(site_url: str, note: Note) -> Dict[str, Any]:
"""
Build JSON Feed item object from note
Args:
site_url: Site URL (no trailing slash)
note: Note to convert to item
Returns:
JSON Feed item dictionary
"""
# Build permalink URL
permalink = f"{site_url}{note.permalink}"
# Create item with required fields
item = {
"id": permalink,
"url": permalink,
}
# Add title
item["title"] = note.title
# Add image field (URL of first/main image) - per JSON Feed 1.1 spec
# Per Q7: Field should be absent (not null) when no media
if hasattr(note, 'media') and note.media:
first_media = note.media[0]
item["image"] = f"{site_url}/media/{first_media['path']}"
# Add content (HTML or text)
# Per Q24: Include media as HTML in content_html
if note.html:
content_html = ""
# Add media at top if present (v1.2.0 Phase 3)
if hasattr(note, 'media') and note.media:
content_html += '<div class="media">'
for media_item in note.media:
media_url = f"{site_url}/media/{media_item['path']}"
caption = media_item.get('caption', '')
content_html += f'<img src="{media_url}" alt="{caption}" />'
content_html += '</div>'
# Add text content below media
content_html += note.html
item["content_html"] = content_html
else:
item["content_text"] = note.content
# Add publication date (RFC 3339 format)
item["date_published"] = _format_rfc3339_date(note.created_at)
# Add attachments array (v1.2.0 Phase 3, per Q24 and ADR-057)
# JSON Feed 1.1 native support for attachments
if hasattr(note, 'media') and note.media:
attachments = []
for media_item in note.media:
media_url = f"{site_url}/media/{media_item['path']}"
attachment = {
'url': media_url,
'mime_type': media_item.get('mime_type', 'image/jpeg'),
'size_in_bytes': media_item.get('size', 0)
}
# Add title (caption) if present
if media_item.get('caption'):
attachment['title'] = media_item['caption']
attachments.append(attachment)
item["attachments"] = attachments
# Add custom StarPunk extensions
item["_starpunk"] = {
"permalink_path": note.permalink,
"word_count": len(note.content.split())
}
return item
def _format_rfc3339_date(dt: datetime) -> str:
"""
Format datetime to RFC 3339 format for JSON Feed
JSON Feed 1.1 requires RFC 3339 date format for date_published and date_modified.
RFC 3339 is a profile of ISO 8601.
Format: "2024-11-25T12:00:00Z" (UTC) or "2024-11-25T12:00:00-05:00" (with offset)
Args:
dt: Datetime object to format (naive datetime assumed to be UTC)
Returns:
RFC 3339 formatted date string
Examples:
>>> dt = datetime(2024, 11, 25, 12, 0, 0, tzinfo=timezone.utc)
>>> _format_rfc3339_date(dt)
'2024-11-25T12:00:00Z'
"""
# Ensure datetime has timezone (assume UTC if naive)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
# Format to RFC 3339
# Use 'Z' suffix for UTC, otherwise include offset
if dt.tzinfo == timezone.utc:
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
else:
# Format with timezone offset
return dt.isoformat()