From b02df151a1619d69ef60174a4426cfe3b40d090e Mon Sep 17 00:00:00 2001 From: Phil Skentelbery Date: Wed, 19 Nov 2025 08:39:29 -0700 Subject: [PATCH 01/12] chore: bump version to 0.6.0 for Phase 5 Phase 5 adds RSS feed generation and production containerization. This is a minor version bump per semantic versioning. Related: docs/decisions/ADR-015-phase-5-implementation-approach.md --- starpunk/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/starpunk/__init__.py b/starpunk/__init__.py index fa63c6d..7e2313f 100644 --- a/starpunk/__init__.py +++ b/starpunk/__init__.py @@ -57,5 +57,5 @@ def create_app(config=None): # Package version (Semantic Versioning 2.0.0) # See docs/standards/versioning-strategy.md for details -__version__ = "0.5.1" -__version_info__ = (0, 5, 1) +__version__ = "0.6.0" +__version_info__ = (0, 6, 0) From 856148209a28d5002284ae13d8aa0480ce2b1995 Mon Sep 17 00:00:00 2001 From: Phil Skentelbery Date: Wed, 19 Nov 2025 08:40:46 -0700 Subject: [PATCH 02/12] feat: add RSS feed generation module Implements RSS 2.0 feed generation using feedgen library. Features: - generate_feed() creates standards-compliant RSS 2.0 XML - RFC-822 date formatting for pubDate elements - Title extraction from note content (first line or timestamp) - CDATA safety for HTML content - Configurable feed item limits Follows ADR-014 RSS implementation strategy. Related: docs/decisions/ADR-014-rss-feed-implementation.md --- starpunk/feed.py | 229 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 starpunk/feed.py diff --git a/starpunk/feed.py b/starpunk/feed.py new file mode 100644 index 0000000..0fe9d02 --- /dev/null +++ b/starpunk/feed.py @@ -0,0 +1,229 @@ +""" +RSS feed generation for StarPunk + +This module provides RSS 2.0 feed generation from published notes using the +feedgen library. Feeds include proper RFC-822 dates, CDATA-wrapped HTML +content, and all required RSS elements. + +Functions: + generate_feed: Generate RSS 2.0 XML feed from notes + format_rfc822_date: Format datetime to RFC-822 for RSS + get_note_title: Extract title from note (first line or timestamp) + clean_html_for_rss: Clean HTML for CDATA safety + +Standards: + - RSS 2.0 specification compliant + - RFC-822 date format + - Atom self-link for feed discovery + - CDATA wrapping for HTML content +""" + +# Standard library imports +from datetime import datetime, timezone +from typing import Optional + +# Third-party imports +from feedgen.feed import FeedGenerator + +# Local imports +from starpunk.models import Note + + +def generate_feed( + site_url: str, + site_name: str, + site_description: str, + notes: list[Note], + limit: int = 50, +) -> str: + """ + Generate RSS 2.0 XML feed from published notes + + Creates a standards-compliant RSS 2.0 feed with proper channel metadata + and item entries for each note. Includes Atom self-link for discovery. + + Args: + site_url: Base URL of the site (e.g., 'https://example.com') + site_name: Site title for RSS channel + site_description: Site description for RSS channel + notes: List of Note objects to include (should be published only) + limit: Maximum number of items to include (default: 50) + + Returns: + RSS 2.0 XML string (UTF-8 encoded, pretty-printed) + + Raises: + ValueError: If site_url or site_name is empty + + Examples: + >>> notes = list_notes(published_only=True, limit=50) + >>> feed_xml = generate_feed( + ... site_url='https://example.com', + ... site_name='My Blog', + ... site_description='My personal notes', + ... notes=notes + ... ) + >>> print(feed_xml[:38]) + + """ + # Validate required parameters + if not site_url or not site_url.strip(): + raise ValueError("site_url is required and cannot be empty") + + if not site_name or not site_name.strip(): + raise ValueError("site_name is required and cannot be empty") + + # Remove trailing slash from site_url for consistency + site_url = site_url.rstrip("/") + + # Create feed generator + fg = FeedGenerator() + + # Set channel metadata (required elements) + fg.id(site_url) + fg.title(site_name) + fg.link(href=site_url, rel="alternate") + fg.description(site_description or site_name) + fg.language("en") + + # Add self-link for feed discovery (Atom namespace) + fg.link(href=f"{site_url}/feed.xml", rel="self", type="application/rss+xml") + + # Set last build date to now + fg.lastBuildDate(datetime.now(timezone.utc)) + + # Add items (limit to configured maximum) + for note in notes[:limit]: + # Create feed entry + fe = fg.add_entry() + + # Build permalink URL + permalink = f"{site_url}{note.permalink}" + + # Set required item elements + fe.id(permalink) + fe.title(get_note_title(note)) + fe.link(href=permalink) + fe.guid(permalink, permalink=True) + + # Set publication date (ensure UTC timezone) + pubdate = note.created_at + if pubdate.tzinfo is None: + # If naive datetime, assume UTC + pubdate = pubdate.replace(tzinfo=timezone.utc) + fe.pubDate(pubdate) + + # Set description with HTML content in CDATA + # feedgen automatically wraps content in CDATA for RSS + html_content = clean_html_for_rss(note.html) + fe.description(html_content) + + # Generate RSS 2.0 XML (pretty-printed) + return fg.rss_str(pretty=True).decode("utf-8") + + +def format_rfc822_date(dt: datetime) -> str: + """ + Format datetime to RFC-822 format for RSS + + RSS 2.0 requires RFC-822 date format for pubDate and lastBuildDate. + Format: "Mon, 18 Nov 2024 12:00:00 +0000" + + Args: + dt: Datetime object to format (naive datetime assumed to be UTC) + + Returns: + RFC-822 formatted date string + + Examples: + >>> dt = datetime(2024, 11, 18, 12, 0, 0) + >>> format_rfc822_date(dt) + 'Mon, 18 Nov 2024 12:00:00 +0000' + """ + # Ensure datetime has timezone (assume UTC if naive) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + + # Format to RFC-822 + # Format string: %a = weekday, %d = day, %b = month, %Y = year + # %H:%M:%S = time, %z = timezone offset + return dt.strftime("%a, %d %b %Y %H:%M:%S %z") + + +def get_note_title(note: Note) -> str: + """ + Extract title from note content + + Attempts to extract a meaningful title from the note. Uses the first + line of content (stripped of markdown heading syntax) or falls back + to a formatted timestamp if content is unavailable. + + Algorithm: + 1. Try note.title property (first line, stripped of # syntax) + 2. Fall back to timestamp if title is unavailable + + Args: + note: Note object + + Returns: + Title string (max 100 chars, truncated if needed) + + Examples: + >>> # Note with heading + >>> note = Note(...) # content: "# My First Note\\n\\n..." + >>> get_note_title(note) + 'My First Note' + + >>> # Note without heading (timestamp fallback) + >>> note = Note(...) # content: "Just some text" + >>> get_note_title(note) + 'November 18, 2024 at 12:00 PM' + """ + try: + # Use Note's title property (handles extraction logic) + title = note.title + + # Truncate to 100 characters for RSS compatibility + if len(title) > 100: + title = title[:100].strip() + "..." + + return title + + except (FileNotFoundError, OSError, AttributeError): + # If title extraction fails, use timestamp + return note.created_at.strftime("%B %d, %Y at %I:%M %p") + + +def clean_html_for_rss(html: str) -> str: + """ + Ensure HTML is safe for RSS CDATA wrapping + + RSS readers expect HTML content wrapped in CDATA sections. The feedgen + library handles CDATA wrapping automatically, but we need to ensure + the HTML doesn't contain CDATA end markers that would break parsing. + + This function is primarily defensive - markdown-rendered HTML should + not contain CDATA markers, but we check anyway. + + Args: + html: Rendered HTML content from markdown + + Returns: + Cleaned HTML safe for CDATA wrapping + + Examples: + >>> html = "

Hello world

" + >>> clean_html_for_rss(html) + '

Hello world

' + + >>> # Edge case: HTML containing CDATA end marker + >>> html = "

Example: ]]>

" + >>> clean_html_for_rss(html) + '

Example: ]] >

' + """ + # Check for CDATA end marker and add space to break it + # This is extremely unlikely with markdown-rendered HTML but be safe + if "]]>" in html: + html = html.replace("]]>", "]] >") + + return html From d420269bc0b4ae96cd99667316ccdf6e901469b3 Mon Sep 17 00:00:00 2001 From: Phil Skentelbery Date: Wed, 19 Nov 2025 08:42:32 -0700 Subject: [PATCH 03/12] feat: add RSS feed endpoint and configuration Implements /feed.xml route with caching and ETag support. Features: - GET /feed.xml returns RSS 2.0 feed of published notes - Server-side caching (5 minutes default, configurable) - ETag generation for conditional requests - Cache-Control headers for client-side caching - Configurable feed item limit (50 default) Configuration: - FEED_MAX_ITEMS: Maximum items in feed (default: 50) - FEED_CACHE_SECONDS: Cache duration in seconds (default: 300) Related: docs/decisions/ADR-014-rss-feed-implementation.md --- starpunk/config.py | 6 ++- starpunk/routes/public.py | 92 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 96 insertions(+), 2 deletions(-) diff --git a/starpunk/config.py b/starpunk/config.py index b8269de..063f1c8 100644 --- a/starpunk/config.py +++ b/starpunk/config.py @@ -62,7 +62,11 @@ def load_config(app, config_override=None): app.config["DEV_ADMIN_ME"] = os.getenv("DEV_ADMIN_ME", "") # Application version - app.config["VERSION"] = os.getenv("VERSION", "0.5.0") + app.config["VERSION"] = os.getenv("VERSION", "0.6.0") + + # RSS feed configuration + app.config["FEED_MAX_ITEMS"] = int(os.getenv("FEED_MAX_ITEMS", "50")) + app.config["FEED_CACHE_SECONDS"] = int(os.getenv("FEED_CACHE_SECONDS", "300")) # Apply overrides if provided if config_override: diff --git a/starpunk/routes/public.py b/starpunk/routes/public.py index 45b001c..d178d71 100644 --- a/starpunk/routes/public.py +++ b/starpunk/routes/public.py @@ -5,13 +5,21 @@ Handles public-facing pages including homepage and note permalinks. No authentication required for these routes. """ -from flask import Blueprint, abort, render_template +import hashlib +from datetime import datetime, timedelta + +from flask import Blueprint, abort, render_template, Response, current_app from starpunk.notes import list_notes, get_note +from starpunk.feed import generate_feed # Create blueprint bp = Blueprint("public", __name__) +# Simple in-memory cache for RSS feed +# Structure: {'xml': str, 'timestamp': datetime, 'etag': str} +_feed_cache = {"xml": None, "timestamp": None, "etag": None} + @bp.route("/") def index(): @@ -55,3 +63,85 @@ def note(slug: str): abort(404) return render_template("note.html", note=note_obj) + + +@bp.route("/feed.xml") +def feed(): + """ + RSS 2.0 feed of published notes + + Generates standards-compliant RSS 2.0 feed with server-side caching + and ETag support for conditional requests. Cache duration is + configurable via FEED_CACHE_SECONDS (default: 300 seconds = 5 minutes). + + Returns: + XML response with RSS feed + + Headers: + Content-Type: application/rss+xml; charset=utf-8 + Cache-Control: public, max-age={FEED_CACHE_SECONDS} + ETag: MD5 hash of feed content + + Caching Strategy: + - Server-side: In-memory cache for configured duration + - Client-side: Cache-Control header with max-age + - Conditional: ETag support for efficient updates + + Examples: + >>> # First request: generates and caches feed + >>> response = client.get('/feed.xml') + >>> response.status_code + 200 + >>> response.headers['Content-Type'] + 'application/rss+xml; charset=utf-8' + + >>> # Subsequent requests within cache window: returns cached feed + >>> response = client.get('/feed.xml') + >>> response.headers['ETag'] + 'abc123...' + """ + # Get cache duration from config (in seconds) + cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300) + cache_duration = timedelta(seconds=cache_seconds) + now = datetime.utcnow() + + # Check if cache is valid + if _feed_cache["xml"] and _feed_cache["timestamp"]: + cache_age = now - _feed_cache["timestamp"] + if cache_age < cache_duration: + # Cache is still valid, return cached feed + response = Response( + _feed_cache["xml"], mimetype="application/rss+xml; charset=utf-8" + ) + response.headers["Cache-Control"] = f"public, max-age={cache_seconds}" + response.headers["ETag"] = _feed_cache["etag"] + return response + + # Cache expired or empty, generate fresh feed + # Get published notes (limit from config) + max_items = current_app.config.get("FEED_MAX_ITEMS", 50) + notes = list_notes(published_only=True, limit=max_items) + + # Generate RSS feed + feed_xml = generate_feed( + site_url=current_app.config["SITE_URL"], + site_name=current_app.config["SITE_NAME"], + site_description=current_app.config.get("SITE_DESCRIPTION", ""), + notes=notes, + limit=max_items, + ) + + # Calculate ETag (MD5 hash of feed content) + etag = hashlib.md5(feed_xml.encode("utf-8")).hexdigest() + + # Update cache + _feed_cache["xml"] = feed_xml + _feed_cache["timestamp"] = now + _feed_cache["etag"] = etag + + # Return response with appropriate headers + response = Response(feed_xml, mimetype="application/rss+xml; charset=utf-8") + response.headers["Cache-Control"] = f"public, max-age={cache_seconds}" + response.headers["ETag"] = etag + + return response From deb784ad4fe2c7a9aa9ddf08de05275be907d915 Mon Sep 17 00:00:00 2001 From: Phil Skentelbery Date: Wed, 19 Nov 2025 08:43:34 -0700 Subject: [PATCH 04/12] feat: improve RSS feed discovery in templates Updates RSS feed links to use Flask url_for() and config values. Changes: - Use url_for('public.feed') for RSS navigation link - Use _external=True for feed discovery link (full URL) - Use config.SITE_NAME in feed title for customization This ensures proper URL generation and makes the site more customizable. --- templates/base.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/base.html b/templates/base.html index 6cc682b..984fb4f 100644 --- a/templates/base.html +++ b/templates/base.html @@ -5,7 +5,7 @@ {% block title %}StarPunk{% endblock %} - + {% block head %}{% endblock %} @@ -19,7 +19,7 @@

StarPunk