## Added - Feed Media Enhancement with Media RSS namespace support - RSS enclosure, media:content, media:thumbnail elements - JSON Feed image field for first image - ADR-059: Full feed media standardization roadmap ## Fixed - Media display on homepage (was only showing on note pages) - Responsive image sizing with CSS constraints - Caption display (now alt text only, not visible) - Logging correlation ID crash in non-request contexts ## Documentation - Feed media design documents and implementation reports - Media display fixes design and validation reports - Updated ROADMAP with v1.3.0/v1.4.0 media plans 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
349 lines
10 KiB
Python
349 lines
10 KiB
Python
"""
|
|
JSON Feed 1.1 generation for StarPunk
|
|
|
|
This module provides JSON Feed 1.1 generation from published notes using
|
|
Python's standard library json module for proper JSON serialization.
|
|
|
|
Functions:
|
|
generate_json_feed: Generate JSON Feed 1.1 from notes
|
|
generate_json_feed_streaming: Memory-efficient streaming JSON generation
|
|
|
|
Standards:
|
|
- JSON Feed 1.1 specification compliant
|
|
- RFC 3339 date format
|
|
- Proper JSON encoding
|
|
- UTF-8 output
|
|
"""
|
|
|
|
# Standard library imports
|
|
from datetime import datetime, timezone
|
|
from typing import Optional, Dict, Any
|
|
import time
|
|
import json
|
|
|
|
# Local imports
|
|
from starpunk.models import Note
|
|
from starpunk.monitoring.business import track_feed_generated
|
|
|
|
|
|
def generate_json_feed(
|
|
site_url: str,
|
|
site_name: str,
|
|
site_description: str,
|
|
notes: list[Note],
|
|
limit: int = 50,
|
|
) -> str:
|
|
"""
|
|
Generate JSON Feed 1.1 from published notes
|
|
|
|
Creates a standards-compliant JSON Feed 1.1 with proper metadata
|
|
and item objects. Uses Python's json module for safe serialization.
|
|
|
|
NOTE: For memory-efficient streaming, use generate_json_feed_streaming() instead.
|
|
This function is kept for caching use cases.
|
|
|
|
Args:
|
|
site_url: Base URL of the site (e.g., 'https://example.com')
|
|
site_name: Site title for feed
|
|
site_description: Site description for feed
|
|
notes: List of Note objects to include (should be published only)
|
|
limit: Maximum number of items to include (default: 50)
|
|
|
|
Returns:
|
|
JSON Feed 1.1 string (UTF-8 encoded, pretty-printed)
|
|
|
|
Raises:
|
|
ValueError: If site_url or site_name is empty
|
|
|
|
Examples:
|
|
>>> notes = list_notes(published_only=True, limit=50)
|
|
>>> feed_json = generate_json_feed(
|
|
... site_url='https://example.com',
|
|
... site_name='My Blog',
|
|
... site_description='My personal notes',
|
|
... notes=notes
|
|
... )
|
|
"""
|
|
# Validate required parameters
|
|
if not site_url or not site_url.strip():
|
|
raise ValueError("site_url is required and cannot be empty")
|
|
|
|
if not site_name or not site_name.strip():
|
|
raise ValueError("site_name is required and cannot be empty")
|
|
|
|
# Remove trailing slash from site_url for consistency
|
|
site_url = site_url.rstrip("/")
|
|
|
|
# Track feed generation timing
|
|
start_time = time.time()
|
|
|
|
# Build feed object
|
|
feed = _build_feed_object(
|
|
site_url=site_url,
|
|
site_name=site_name,
|
|
site_description=site_description,
|
|
notes=notes[:limit]
|
|
)
|
|
|
|
# Serialize to JSON (pretty-printed)
|
|
feed_json = json.dumps(feed, ensure_ascii=False, indent=2)
|
|
|
|
# Track feed generation metrics
|
|
duration_ms = (time.time() - start_time) * 1000
|
|
track_feed_generated(
|
|
format='json',
|
|
item_count=min(len(notes), limit),
|
|
duration_ms=duration_ms,
|
|
cached=False
|
|
)
|
|
|
|
return feed_json
|
|
|
|
|
|
def generate_json_feed_streaming(
|
|
site_url: str,
|
|
site_name: str,
|
|
site_description: str,
|
|
notes: list[Note],
|
|
limit: int = 50,
|
|
):
|
|
"""
|
|
Generate JSON Feed 1.1 from published notes using streaming
|
|
|
|
Memory-efficient generator that yields JSON chunks instead of building
|
|
the entire feed in memory. Recommended for large feeds (100+ items).
|
|
|
|
Args:
|
|
site_url: Base URL of the site (e.g., 'https://example.com')
|
|
site_name: Site title for feed
|
|
site_description: Site description for feed
|
|
notes: List of Note objects to include (should be published only)
|
|
limit: Maximum number of items to include (default: 50)
|
|
|
|
Yields:
|
|
JSON chunks as strings (UTF-8)
|
|
|
|
Raises:
|
|
ValueError: If site_url or site_name is empty
|
|
|
|
Examples:
|
|
>>> from flask import Response
|
|
>>> notes = list_notes(published_only=True, limit=100)
|
|
>>> generator = generate_json_feed_streaming(
|
|
... site_url='https://example.com',
|
|
... site_name='My Blog',
|
|
... site_description='My personal notes',
|
|
... notes=notes
|
|
... )
|
|
>>> return Response(generator, mimetype='application/json')
|
|
"""
|
|
# Validate required parameters
|
|
if not site_url or not site_url.strip():
|
|
raise ValueError("site_url is required and cannot be empty")
|
|
|
|
if not site_name or not site_name.strip():
|
|
raise ValueError("site_name is required and cannot be empty")
|
|
|
|
# Remove trailing slash from site_url for consistency
|
|
site_url = site_url.rstrip("/")
|
|
|
|
# Track feed generation timing
|
|
start_time = time.time()
|
|
item_count = 0
|
|
|
|
# Start feed object
|
|
yield '{\n'
|
|
yield f' "version": "https://jsonfeed.org/version/1.1",\n'
|
|
yield f' "title": {json.dumps(site_name)},\n'
|
|
yield f' "home_page_url": {json.dumps(site_url)},\n'
|
|
yield f' "feed_url": {json.dumps(f"{site_url}/feed.json")},\n'
|
|
|
|
if site_description:
|
|
yield f' "description": {json.dumps(site_description)},\n'
|
|
|
|
yield ' "language": "en",\n'
|
|
|
|
# Start items array
|
|
yield ' "items": [\n'
|
|
|
|
# Stream items (newest first)
|
|
# Notes from database are already in DESC order (newest first)
|
|
items = notes[:limit]
|
|
for i, note in enumerate(items):
|
|
item_count += 1
|
|
|
|
# Build item object
|
|
item = _build_item_object(site_url, note)
|
|
|
|
# Serialize item to JSON
|
|
item_json = json.dumps(item, ensure_ascii=False, indent=4)
|
|
|
|
# Indent properly for nested JSON
|
|
indented_lines = item_json.split('\n')
|
|
indented = '\n'.join(' ' + line for line in indented_lines)
|
|
yield indented
|
|
|
|
# Add comma between items (but not after last item)
|
|
if i < len(items) - 1:
|
|
yield ',\n'
|
|
else:
|
|
yield '\n'
|
|
|
|
# Close items array and feed
|
|
yield ' ]\n'
|
|
yield '}\n'
|
|
|
|
# Track feed generation metrics
|
|
duration_ms = (time.time() - start_time) * 1000
|
|
track_feed_generated(
|
|
format='json',
|
|
item_count=item_count,
|
|
duration_ms=duration_ms,
|
|
cached=False
|
|
)
|
|
|
|
|
|
def _build_feed_object(
|
|
site_url: str,
|
|
site_name: str,
|
|
site_description: str,
|
|
notes: list[Note]
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Build complete JSON Feed object
|
|
|
|
Args:
|
|
site_url: Site URL (no trailing slash)
|
|
site_name: Feed title
|
|
site_description: Feed description
|
|
notes: List of notes (already limited)
|
|
|
|
Returns:
|
|
JSON Feed dictionary
|
|
"""
|
|
feed = {
|
|
"version": "https://jsonfeed.org/version/1.1",
|
|
"title": site_name,
|
|
"home_page_url": site_url,
|
|
"feed_url": f"{site_url}/feed.json",
|
|
"language": "en",
|
|
"items": [_build_item_object(site_url, note) for note in notes]
|
|
}
|
|
|
|
if site_description:
|
|
feed["description"] = site_description
|
|
|
|
return feed
|
|
|
|
|
|
def _build_item_object(site_url: str, note: Note) -> Dict[str, Any]:
|
|
"""
|
|
Build JSON Feed item object from note
|
|
|
|
Args:
|
|
site_url: Site URL (no trailing slash)
|
|
note: Note to convert to item
|
|
|
|
Returns:
|
|
JSON Feed item dictionary
|
|
"""
|
|
# Build permalink URL
|
|
permalink = f"{site_url}{note.permalink}"
|
|
|
|
# Create item with required fields
|
|
item = {
|
|
"id": permalink,
|
|
"url": permalink,
|
|
}
|
|
|
|
# Add title
|
|
item["title"] = note.title
|
|
|
|
# Add image field (URL of first/main image) - per JSON Feed 1.1 spec
|
|
# Per Q7: Field should be absent (not null) when no media
|
|
if hasattr(note, 'media') and note.media:
|
|
first_media = note.media[0]
|
|
item["image"] = f"{site_url}/media/{first_media['path']}"
|
|
|
|
# Add content (HTML or text)
|
|
# Per Q24: Include media as HTML in content_html
|
|
if note.html:
|
|
content_html = ""
|
|
|
|
# Add media at top if present (v1.2.0 Phase 3)
|
|
if hasattr(note, 'media') and note.media:
|
|
content_html += '<div class="media">'
|
|
for media_item in note.media:
|
|
media_url = f"{site_url}/media/{media_item['path']}"
|
|
caption = media_item.get('caption', '')
|
|
content_html += f'<img src="{media_url}" alt="{caption}" />'
|
|
content_html += '</div>'
|
|
|
|
# Add text content below media
|
|
content_html += note.html
|
|
item["content_html"] = content_html
|
|
else:
|
|
item["content_text"] = note.content
|
|
|
|
# Add publication date (RFC 3339 format)
|
|
item["date_published"] = _format_rfc3339_date(note.created_at)
|
|
|
|
# Add attachments array (v1.2.0 Phase 3, per Q24 and ADR-057)
|
|
# JSON Feed 1.1 native support for attachments
|
|
if hasattr(note, 'media') and note.media:
|
|
attachments = []
|
|
for media_item in note.media:
|
|
media_url = f"{site_url}/media/{media_item['path']}"
|
|
attachment = {
|
|
'url': media_url,
|
|
'mime_type': media_item.get('mime_type', 'image/jpeg'),
|
|
'size_in_bytes': media_item.get('size', 0)
|
|
}
|
|
# Add title (caption) if present
|
|
if media_item.get('caption'):
|
|
attachment['title'] = media_item['caption']
|
|
|
|
attachments.append(attachment)
|
|
|
|
item["attachments"] = attachments
|
|
|
|
# Add custom StarPunk extensions
|
|
item["_starpunk"] = {
|
|
"permalink_path": note.permalink,
|
|
"word_count": len(note.content.split())
|
|
}
|
|
|
|
return item
|
|
|
|
|
|
def _format_rfc3339_date(dt: datetime) -> str:
|
|
"""
|
|
Format datetime to RFC 3339 format for JSON Feed
|
|
|
|
JSON Feed 1.1 requires RFC 3339 date format for date_published and date_modified.
|
|
RFC 3339 is a profile of ISO 8601.
|
|
Format: "2024-11-25T12:00:00Z" (UTC) or "2024-11-25T12:00:00-05:00" (with offset)
|
|
|
|
Args:
|
|
dt: Datetime object to format (naive datetime assumed to be UTC)
|
|
|
|
Returns:
|
|
RFC 3339 formatted date string
|
|
|
|
Examples:
|
|
>>> dt = datetime(2024, 11, 25, 12, 0, 0, tzinfo=timezone.utc)
|
|
>>> _format_rfc3339_date(dt)
|
|
'2024-11-25T12:00:00Z'
|
|
"""
|
|
# Ensure datetime has timezone (assume UTC if naive)
|
|
if dt.tzinfo is None:
|
|
dt = dt.replace(tzinfo=timezone.utc)
|
|
|
|
# Format to RFC 3339
|
|
# Use 'Z' suffix for UTC, otherwise include offset
|
|
if dt.tzinfo == timezone.utc:
|
|
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
else:
|
|
# Format with timezone offset
|
|
return dt.isoformat()
|