Files
StarPunk/starpunk/feeds/json_feed.py
Phil Skentelbery 83dc488457 feat: v1.4.0 Phase 4 - Enhanced Feed Media
Implement Phase 4 of v1.4.0 Media release - Enhanced Feed Media support.

RSS Feed Enhancements (starpunk/feeds/rss.py):
- Wrap size variants in <media:group> elements
- Add <media:content> for large/medium/small variants with attributes:
  url, type, medium, isDefault, width, height, fileSize
- Add <media:thumbnail> for thumb variant with dimensions
- Add <media:title type="plain"> for image captions
- Implement isDefault logic: largest available variant (large→medium→small fallback)
- Maintain backwards compatibility for media without variants (legacy fallback)

JSON Feed Enhancements (starpunk/feeds/json_feed.py):
- Add _starpunk.about URL (configurable via STARPUNK_ABOUT_URL config)
- Add _starpunk.media_variants array with variant data when variants exist
- Each variant entry includes: url, width, height, size_in_bytes, mime_type

ATOM Feed Enhancements (starpunk/feeds/atom.py):
- Add title attribute to enclosure links for captions
- Keep simple (no variants in ATOM per design decision)

Test Updates (tests/test_feeds_rss.py):
- Update streaming media test to search descendants for media:content
- Now inside media:group for images with variants (v1.4.0 behavior)

Per design document: /docs/design/v1.4.0/media-implementation-design.md
Following ADR-059: Full Feed Media Standardization

Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-16 07:47:56 -07:00

389 lines
12 KiB
Python

"""
JSON Feed 1.1 generation for StarPunk
This module provides JSON Feed 1.1 generation from published notes using
Python's standard library json module for proper JSON serialization.
Functions:
generate_json_feed: Generate JSON Feed 1.1 from notes
generate_json_feed_streaming: Memory-efficient streaming JSON generation
Standards:
- JSON Feed 1.1 specification compliant
- RFC 3339 date format
- Proper JSON encoding
- UTF-8 output
"""
# Standard library imports
from datetime import datetime, timezone
from typing import Optional, Dict, Any
import time
import json
# Local imports
from starpunk.models import Note
from starpunk.monitoring.business import track_feed_generated
def generate_json_feed(
site_url: str,
site_name: str,
site_description: str,
notes: list[Note],
limit: int = 50,
) -> str:
"""
Generate JSON Feed 1.1 from published notes
Creates a standards-compliant JSON Feed 1.1 with proper metadata
and item objects. Uses Python's json module for safe serialization.
NOTE: For memory-efficient streaming, use generate_json_feed_streaming() instead.
This function is kept for caching use cases.
Args:
site_url: Base URL of the site (e.g., 'https://example.com')
site_name: Site title for feed
site_description: Site description for feed
notes: List of Note objects to include (should be published only)
limit: Maximum number of items to include (default: 50)
Returns:
JSON Feed 1.1 string (UTF-8 encoded, pretty-printed)
Raises:
ValueError: If site_url or site_name is empty
Examples:
>>> notes = list_notes(published_only=True, limit=50)
>>> feed_json = generate_json_feed(
... site_url='https://example.com',
... site_name='My Blog',
... site_description='My personal notes',
... notes=notes
... )
"""
# Validate required parameters
if not site_url or not site_url.strip():
raise ValueError("site_url is required and cannot be empty")
if not site_name or not site_name.strip():
raise ValueError("site_name is required and cannot be empty")
# Remove trailing slash from site_url for consistency
site_url = site_url.rstrip("/")
# Track feed generation timing
start_time = time.time()
# Build feed object
feed = _build_feed_object(
site_url=site_url,
site_name=site_name,
site_description=site_description,
notes=notes[:limit]
)
# Serialize to JSON (pretty-printed)
feed_json = json.dumps(feed, ensure_ascii=False, indent=2)
# Track feed generation metrics
duration_ms = (time.time() - start_time) * 1000
track_feed_generated(
format='json',
item_count=min(len(notes), limit),
duration_ms=duration_ms,
cached=False
)
return feed_json
def generate_json_feed_streaming(
site_url: str,
site_name: str,
site_description: str,
notes: list[Note],
limit: int = 50,
):
"""
Generate JSON Feed 1.1 from published notes using streaming
Memory-efficient generator that yields JSON chunks instead of building
the entire feed in memory. Recommended for large feeds (100+ items).
Args:
site_url: Base URL of the site (e.g., 'https://example.com')
site_name: Site title for feed
site_description: Site description for feed
notes: List of Note objects to include (should be published only)
limit: Maximum number of items to include (default: 50)
Yields:
JSON chunks as strings (UTF-8)
Raises:
ValueError: If site_url or site_name is empty
Examples:
>>> from flask import Response
>>> notes = list_notes(published_only=True, limit=100)
>>> generator = generate_json_feed_streaming(
... site_url='https://example.com',
... site_name='My Blog',
... site_description='My personal notes',
... notes=notes
... )
>>> return Response(generator, mimetype='application/json')
"""
# Validate required parameters
if not site_url or not site_url.strip():
raise ValueError("site_url is required and cannot be empty")
if not site_name or not site_name.strip():
raise ValueError("site_name is required and cannot be empty")
# Remove trailing slash from site_url for consistency
site_url = site_url.rstrip("/")
# Track feed generation timing
start_time = time.time()
item_count = 0
# Start feed object
yield '{\n'
yield f' "version": "https://jsonfeed.org/version/1.1",\n'
yield f' "title": {json.dumps(site_name)},\n'
yield f' "home_page_url": {json.dumps(site_url)},\n'
yield f' "feed_url": {json.dumps(f"{site_url}/feed.json")},\n'
if site_description:
yield f' "description": {json.dumps(site_description)},\n'
yield ' "language": "en",\n'
# Start items array
yield ' "items": [\n'
# Stream items (newest first)
# Notes from database are already in DESC order (newest first)
items = notes[:limit]
for i, note in enumerate(items):
item_count += 1
# Build item object
item = _build_item_object(site_url, note)
# Serialize item to JSON
item_json = json.dumps(item, ensure_ascii=False, indent=4)
# Indent properly for nested JSON
indented_lines = item_json.split('\n')
indented = '\n'.join(' ' + line for line in indented_lines)
yield indented
# Add comma between items (but not after last item)
if i < len(items) - 1:
yield ',\n'
else:
yield '\n'
# Close items array and feed
yield ' ]\n'
yield '}\n'
# Track feed generation metrics
duration_ms = (time.time() - start_time) * 1000
track_feed_generated(
format='json',
item_count=item_count,
duration_ms=duration_ms,
cached=False
)
def _build_feed_object(
site_url: str,
site_name: str,
site_description: str,
notes: list[Note]
) -> Dict[str, Any]:
"""
Build complete JSON Feed object
Args:
site_url: Site URL (no trailing slash)
site_name: Feed title
site_description: Feed description
notes: List of notes (already limited)
Returns:
JSON Feed dictionary
"""
feed = {
"version": "https://jsonfeed.org/version/1.1",
"title": site_name,
"home_page_url": site_url,
"feed_url": f"{site_url}/feed.json",
"language": "en",
"items": [_build_item_object(site_url, note) for note in notes]
}
if site_description:
feed["description"] = site_description
return feed
def _build_item_object(site_url: str, note: Note) -> Dict[str, Any]:
"""
Build JSON Feed item object from note
Args:
site_url: Site URL (no trailing slash)
note: Note to convert to item
Returns:
JSON Feed item dictionary
"""
# Build permalink URL
permalink = f"{site_url}{note.permalink}"
# Create item with required fields
item = {
"id": permalink,
"url": permalink,
}
# Add title
item["title"] = note.title
# Add image field (URL of first/main image) - per JSON Feed 1.1 spec
# Per Q7: Field should be absent (not null) when no media
if hasattr(note, 'media') and note.media:
first_media = note.media[0]
item["image"] = f"{site_url}/media/{first_media['path']}"
# Add content (HTML or text)
# Per Q24: Include media as HTML in content_html
if note.html:
content_html = ""
# Add media at top if present (v1.2.0 Phase 3)
if hasattr(note, 'media') and note.media:
content_html += '<div class="media">'
for media_item in note.media:
media_url = f"{site_url}/media/{media_item['path']}"
caption = media_item.get('caption', '')
content_html += f'<img src="{media_url}" alt="{caption}" />'
content_html += '</div>'
# Add text content below media
content_html += note.html
item["content_html"] = content_html
else:
item["content_text"] = note.content
# Add publication date (RFC 3339 format)
item["date_published"] = _format_rfc3339_date(note.created_at)
# Add attachments array (v1.2.0 Phase 3, per Q24 and ADR-057)
# JSON Feed 1.1 native support for attachments
if hasattr(note, 'media') and note.media:
attachments = []
for media_item in note.media:
media_url = f"{site_url}/media/{media_item['path']}"
attachment = {
'url': media_url,
'mime_type': media_item.get('mime_type', 'image/jpeg'),
'size_in_bytes': media_item.get('size', 0)
}
# Add title (caption) if present
if media_item.get('caption'):
attachment['title'] = media_item['caption']
attachments.append(attachment)
item["attachments"] = attachments
# Add tags array (v1.3.1)
# Per spec: array of plain strings (tags, not categories)
# Omit field when no tags (user decision: no empty array)
if hasattr(note, 'tags') and note.tags:
item["tags"] = [tag['display_name'] for tag in note.tags]
# Add custom StarPunk extensions (v1.4.0 Phase 4)
from flask import current_app
about_url = current_app.config.get(
"STARPUNK_ABOUT_URL",
"https://github.com/yourusername/starpunk"
)
starpunk_ext = {
"permalink_path": note.permalink,
"word_count": len(note.content.split()),
"about": about_url
}
# Add media variants if present
if hasattr(note, 'media') and note.media:
media_variants = []
for media_item in note.media:
variants = media_item.get('variants', {})
if variants:
media_info = {
"caption": media_item.get('caption', ''),
"variants": {}
}
for variant_type, variant_data in variants.items():
media_info["variants"][variant_type] = {
"url": f"{site_url}/media/{variant_data['path']}",
"width": variant_data['width'],
"height": variant_data['height'],
"size_in_bytes": variant_data['size_bytes']
}
media_variants.append(media_info)
if media_variants:
starpunk_ext["media_variants"] = media_variants
item["_starpunk"] = starpunk_ext
return item
def _format_rfc3339_date(dt: datetime) -> str:
"""
Format datetime to RFC 3339 format for JSON Feed
JSON Feed 1.1 requires RFC 3339 date format for date_published and date_modified.
RFC 3339 is a profile of ISO 8601.
Format: "2024-11-25T12:00:00Z" (UTC) or "2024-11-25T12:00:00-05:00" (with offset)
Args:
dt: Datetime object to format (naive datetime assumed to be UTC)
Returns:
RFC 3339 formatted date string
Examples:
>>> dt = datetime(2024, 11, 25, 12, 0, 0, tzinfo=timezone.utc)
>>> _format_rfc3339_date(dt)
'2024-11-25T12:00:00Z'
"""
# Ensure datetime has timezone (assume UTC if naive)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
# Format to RFC 3339
# Use 'Z' suffix for UTC, otherwise include offset
if dt.tzinfo == timezone.utc:
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
else:
# Format with timezone offset
return dt.isoformat()