From 9a31632e057d7a4fe6880906c783792d8f778a54 Mon Sep 17 00:00:00 2001 From: Phil Skentelbery Date: Wed, 19 Nov 2025 08:48:35 -0700 Subject: [PATCH] test: add comprehensive RSS feed tests Adds unit tests for feed module and integration tests for feed route. test_feed.py: - Feed generation with various note counts - RFC-822 date formatting - Note title extraction - HTML cleaning for CDATA safety - Feed structure validation - Special characters and Unicode handling test_routes_feed.py: - Feed route accessibility and response - Content-Type and cache headers - ETag generation and validation - Server-side caching behavior - Published notes filtering - Feed item limit configuration - Configuration integration All tests follow existing test patterns and use proper fixtures. --- tests/test_feed.py | 432 ++++++++++++++++++++++++++++++++++++++ tests/test_routes_feed.py | 375 +++++++++++++++++++++++++++++++++ 2 files changed, 807 insertions(+) create mode 100644 tests/test_feed.py create mode 100644 tests/test_routes_feed.py diff --git a/tests/test_feed.py b/tests/test_feed.py new file mode 100644 index 0000000..437d520 --- /dev/null +++ b/tests/test_feed.py @@ -0,0 +1,432 @@ +""" +Tests for RSS feed generation module + +Tests cover: +- RSS feed generation with various note counts +- RFC-822 date formatting +- Note title extraction +- HTML cleaning for CDATA +- Feed structure and required elements +- Edge cases (empty feeds, special characters, etc.) +""" + +import pytest +from datetime import datetime, timezone +from xml.etree import ElementTree as ET + +from starpunk import create_app +from starpunk.feed import ( + generate_feed, + format_rfc822_date, + get_note_title, + clean_html_for_rss, +) +from starpunk.notes import create_note +from starpunk.models import Note + + +@pytest.fixture +def app(tmp_path): + """Create test application""" + test_data_dir = tmp_path / "data" + test_data_dir.mkdir(parents=True, exist_ok=True) + + test_config = { + "TESTING": True, + "DATABASE_PATH": test_data_dir / "starpunk.db", + "DATA_PATH": test_data_dir, + "NOTES_PATH": test_data_dir / "notes", + "SESSION_SECRET": "test-secret-key", + "ADMIN_ME": "https://test.example.com", + "SITE_URL": "https://example.com", + "SITE_NAME": "Test Blog", + "SITE_DESCRIPTION": "A test blog", + "DEV_MODE": False, + } + app = create_app(config=test_config) + yield app + + +@pytest.fixture +def sample_notes(app): + """Create sample published notes""" + with app.app_context(): + notes = [] + for i in range(5): + note = create_note( + content=f"# Test Note {i}\n\nThis is test content for note {i}.", + published=True, + ) + notes.append(note) + return notes + + +class TestGenerateFeed: + """Test generate_feed() function""" + + def test_generate_feed_basic(self, app, sample_notes): + """Test basic feed generation with notes""" + with app.app_context(): + feed_xml = generate_feed( + site_url="https://example.com", + site_name="Test Blog", + site_description="A test blog", + notes=sample_notes, + ) + + # Should return XML string + assert isinstance(feed_xml, str) + assert feed_xml.startswith("" in description or "bold" in description + assert "" in description or "italic" in description + + +class TestFormatRFC822Date: + """Test format_rfc822_date() function""" + + def test_format_rfc822_date_utc(self): + """Test RFC-822 date formatting with UTC datetime""" + dt = datetime(2024, 11, 18, 12, 0, 0, tzinfo=timezone.utc) + result = format_rfc822_date(dt) + + # Should match RFC-822 format + assert "Mon, 18 Nov 2024" in result + assert "12:00:00" in result + assert "+0000" in result + + def test_format_rfc822_date_naive(self): + """Test RFC-822 formatting with naive datetime (assumes UTC)""" + dt = datetime(2024, 11, 18, 12, 0, 0) # No timezone + result = format_rfc822_date(dt) + + # Should add UTC timezone + assert "Mon, 18 Nov 2024" in result + assert "+0000" in result + + def test_format_rfc822_date_format(self): + """Test RFC-822 date format is correct""" + dt = datetime(2024, 11, 18, 12, 30, 45, tzinfo=timezone.utc) + result = format_rfc822_date(dt) + + # Format: "Mon, 18 Nov 2024 12:30:45 +0000" + # Day name, day, month name, year, time, timezone + parts = result.split() + assert parts[0].rstrip(",") in [ + "Mon", + "Tue", + "Wed", + "Thu", + "Fri", + "Sat", + "Sun", + ] + assert parts[2] in [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ] + assert len(parts[3]) == 4 # Year is 4 digits + + +class TestGetNoteTitle: + """Test get_note_title() function""" + + def test_get_note_title_with_heading(self, app): + """Test title extraction from note with heading""" + with app.app_context(): + note = create_note(content="# My First Note\n\nContent here.", published=True) + title = get_note_title(note) + assert title == "My First Note" + + def test_get_note_title_without_heading(self, app): + """Test title extraction from note without heading""" + with app.app_context(): + note = create_note(content="Just some content without heading.", published=True) + title = get_note_title(note) + + # Should use first line (Note.title handles this) + assert len(title) > 0 + assert "Just some content" in title + + def test_get_note_title_truncates_long_titles(self, app): + """Test title truncation for long titles""" + with app.app_context(): + long_title = "A" * 150 + note = create_note(content=f"# {long_title}\n\nContent.", published=True) + title = get_note_title(note) + + # Should truncate to reasonable length + assert len(title) <= 103 # 100 chars + "..." + + def test_get_note_title_empty_content(self, app): + """Test title extraction with empty content""" + with app.app_context(): + note = create_note(content="\n\n\n", published=True) + title = get_note_title(note) + + # Should fall back to slug or timestamp + assert len(title) > 0 + + +class TestCleanHTMLForRSS: + """Test clean_html_for_rss() function""" + + def test_clean_html_normal_content(self): + """Test HTML cleaning with normal content""" + html = "

This is bold text.

" + result = clean_html_for_rss(html) + + # Should be unchanged + assert result == html + + def test_clean_html_with_cdata_end_marker(self): + """Test HTML cleaning with CDATA end marker""" + html = "

Example: ]]>

" + result = clean_html_for_rss(html) + + # Should break the CDATA end marker + assert "]]>" not in result + assert "]] >" in result + + def test_clean_html_preserves_other_content(self): + """Test HTML cleaning preserves other content""" + html = "

Normal content with links and emphasis.

" + result = clean_html_for_rss(html) + + # Should be unchanged + assert result == html + + def test_clean_html_empty_string(self): + """Test HTML cleaning with empty string""" + result = clean_html_for_rss("") + assert result == "" + + +class TestFeedIntegration: + """Integration tests for feed generation""" + + def test_feed_with_special_characters(self, app): + """Test feed handles special characters correctly""" + with app.app_context(): + note = create_note( + content="# Test & Special \n\nContent with 'quotes' and \"doubles\".", + published=True, + ) + + feed_xml = generate_feed( + site_url="https://example.com", + site_name="Test Blog", + site_description="A test blog", + notes=[note], + ) + + # Should produce valid XML (no parse errors) + root = ET.fromstring(feed_xml) + assert root is not None + + def test_feed_with_unicode_content(self, app): + """Test feed handles Unicode content correctly""" + with app.app_context(): + note = create_note( + content="# Test Unicode 你好 🚀\n\nContent with émojis and ünicode.", + published=True, + ) + + feed_xml = generate_feed( + site_url="https://example.com", + site_name="Test Blog", + site_description="A test blog", + notes=[note], + ) + + # Should produce valid UTF-8 XML + assert "encoding='UTF-8'" in feed_xml or "encoding=\"UTF-8\"" in feed_xml + root = ET.fromstring(feed_xml) + assert root is not None + + def test_feed_with_multiline_content(self, app): + """Test feed handles multiline note content""" + with app.app_context(): + note = create_note( + content="# Multiline Note\n\nParagraph 1\n\nParagraph 2\n\n- List item 1\n- List item 2", + published=True, + ) + + feed_xml = generate_feed( + site_url="https://example.com", + site_name="Test Blog", + site_description="A test blog", + notes=[note], + ) + + root = ET.fromstring(feed_xml) + channel = root.find("channel") + item = channel.find("item") + description = item.find("description").text + + # Should contain HTML paragraphs + assert description is not None + assert len(description) > 0 diff --git a/tests/test_routes_feed.py b/tests/test_routes_feed.py new file mode 100644 index 0000000..ee65beb --- /dev/null +++ b/tests/test_routes_feed.py @@ -0,0 +1,375 @@ +""" +Tests for RSS feed route (/feed.xml) + +Tests cover: +- Feed route returns valid XML +- Correct Content-Type header +- Caching behavior (server-side and client-side) +- ETag generation and validation +- Only published notes included +- Feed item limit configuration +- Cache expiration behavior +""" + +import pytest +import time +from xml.etree import ElementTree as ET + +from starpunk import create_app +from starpunk.notes import create_note + + +@pytest.fixture +def app(tmp_path): + """Create test application""" + test_data_dir = tmp_path / "data" + test_data_dir.mkdir(parents=True, exist_ok=True) + + test_config = { + "TESTING": True, + "DATABASE_PATH": test_data_dir / "starpunk.db", + "DATA_PATH": test_data_dir, + "NOTES_PATH": test_data_dir / "notes", + "SESSION_SECRET": "test-secret-key", + "ADMIN_ME": "https://test.example.com", + "SITE_URL": "https://example.com", + "SITE_NAME": "Test Blog", + "SITE_DESCRIPTION": "A test blog", + "DEV_MODE": False, + "FEED_MAX_ITEMS": 50, + "FEED_CACHE_SECONDS": 2, # Short cache for testing + } + app = create_app(config=test_config) + yield app + + +@pytest.fixture +def client(app): + """Test client for making requests""" + return app.test_client() + + +@pytest.fixture +def sample_notes(app): + """Create sample notes (mix of published and drafts)""" + with app.app_context(): + notes = [] + for i in range(10): + note = create_note( + content=f"# Test Note {i}\n\nContent for note {i}.", + published=(i < 7), # First 7 published, last 3 drafts + ) + notes.append(note) + return notes + + +class TestFeedRoute: + """Test /feed.xml route""" + + def test_feed_route_exists(self, client): + """Test /feed.xml route exists and returns 200""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + def test_feed_route_returns_xml(self, client): + """Test /feed.xml returns valid XML""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + # Should be valid XML + root = ET.fromstring(response.data) + assert root.tag == "rss" + + def test_feed_route_content_type(self, client): + """Test /feed.xml has correct Content-Type header""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + # Should have RSS content type + assert "application/rss+xml" in response.content_type + assert "charset=utf-8" in response.content_type.lower() + + def test_feed_route_cache_control_header(self, client, app): + """Test /feed.xml has Cache-Control header""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + # Should have Cache-Control header + assert "Cache-Control" in response.headers + assert "public" in response.headers["Cache-Control"] + + # Should include max-age matching config + cache_seconds = app.config.get("FEED_CACHE_SECONDS", 300) + assert f"max-age={cache_seconds}" in response.headers["Cache-Control"] + + def test_feed_route_etag_header(self, client): + """Test /feed.xml has ETag header""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + # Should have ETag header + assert "ETag" in response.headers + assert len(response.headers["ETag"]) > 0 + + +class TestFeedContent: + """Test feed content and structure""" + + def test_feed_only_published_notes(self, client, sample_notes): + """Test feed only includes published notes""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + items = channel.findall("item") + + # Should have 7 items (only published notes) + assert len(items) == 7 + + # Check that draft notes don't appear in feed + feed_text = response.data.decode("utf-8") + assert "Test Note 0" in feed_text # Published + assert "Test Note 6" in feed_text # Published + assert "Test Note 7" not in feed_text # Draft + assert "Test Note 8" not in feed_text # Draft + assert "Test Note 9" not in feed_text # Draft + + def test_feed_respects_limit_config(self, client, app): + """Test feed respects FEED_MAX_ITEMS configuration""" + # Create more notes than limit + with app.app_context(): + for i in range(60): + create_note(content=f"Note {i}", published=True) + + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + items = channel.findall("item") + + # Should respect configured limit (50) + max_items = app.config.get("FEED_MAX_ITEMS", 50) + assert len(items) <= max_items + + def test_feed_empty_when_no_notes(self, client): + """Test feed with no published notes""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + items = channel.findall("item") + + # Should have no items but still valid feed + assert len(items) == 0 + + # Channel should still have required elements + assert channel.find("title") is not None + assert channel.find("link") is not None + + def test_feed_has_required_channel_elements(self, client, app): + """Test feed has all required RSS channel elements""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + + # Check required elements + assert channel.find("title").text == app.config["SITE_NAME"] + assert channel.find("link").text == app.config["SITE_URL"] + assert channel.find("description") is not None + assert channel.find("language") is not None + + def test_feed_items_have_required_elements(self, client, sample_notes): + """Test feed items have all required RSS item elements""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + items = channel.findall("item") + + # Check first item has required elements + if len(items) > 0: + item = items[0] + assert item.find("title") is not None + assert item.find("link") is not None + assert item.find("guid") is not None + assert item.find("pubDate") is not None + assert item.find("description") is not None + + def test_feed_item_links_are_absolute(self, client, sample_notes, app): + """Test feed item links are absolute URLs""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + items = channel.findall("item") + + if len(items) > 0: + link = items[0].find("link").text + # Should start with site URL + assert link.startswith(app.config["SITE_URL"]) + # Should be full URL, not relative path + assert link.startswith("http") + + +class TestFeedCaching: + """Test feed caching behavior""" + + def test_feed_caches_response(self, client, sample_notes): + """Test feed caches response on server side""" + # First request + response1 = client.get("/feed.xml") + etag1 = response1.headers.get("ETag") + + # Second request (should be cached) + response2 = client.get("/feed.xml") + etag2 = response2.headers.get("ETag") + + # ETags should match (same cached content) + assert etag1 == etag2 + + # Content should be identical + assert response1.data == response2.data + + def test_feed_cache_expires(self, client, sample_notes, app): + """Test feed cache expires after configured duration""" + # First request + response1 = client.get("/feed.xml") + etag1 = response1.headers.get("ETag") + + # Wait for cache to expire (cache is 2 seconds in test config) + time.sleep(3) + + # Create new note (changes feed content) + with app.app_context(): + create_note(content="New note after cache expiry", published=True) + + # Second request (cache should be expired and regenerated) + response2 = client.get("/feed.xml") + etag2 = response2.headers.get("ETag") + + # ETags should be different (content changed) + assert etag1 != etag2 + + def test_feed_etag_changes_with_content(self, client, app): + """Test ETag changes when content changes""" + # First request + response1 = client.get("/feed.xml") + etag1 = response1.headers.get("ETag") + + # Wait for cache expiry + time.sleep(3) + + # Add new note + with app.app_context(): + create_note(content="New note changes ETag", published=True) + + # Second request + response2 = client.get("/feed.xml") + etag2 = response2.headers.get("ETag") + + # ETags should be different + assert etag1 != etag2 + + def test_feed_cache_consistent_within_window(self, client, sample_notes): + """Test cache returns consistent content within cache window""" + # Multiple requests within cache window + responses = [] + for _ in range(5): + response = client.get("/feed.xml") + responses.append(response) + + # All responses should be identical + first_content = responses[0].data + first_etag = responses[0].headers.get("ETag") + + for response in responses[1:]: + assert response.data == first_content + assert response.headers.get("ETag") == first_etag + + +class TestFeedEdgeCases: + """Test edge cases for feed route""" + + def test_feed_with_special_characters_in_content(self, client, app): + """Test feed handles special characters correctly""" + with app.app_context(): + create_note( + content="# Test & Special \n\n'Quotes' and \"doubles\".", + published=True, + ) + + response = client.get("/feed.xml") + assert response.status_code == 200 + + # Should produce valid XML despite special characters + root = ET.fromstring(response.data) + assert root is not None + + def test_feed_with_unicode_content(self, client, app): + """Test feed handles Unicode content""" + with app.app_context(): + create_note(content="# Test Unicode 你好 🚀\n\nEmojis and ümlauts.", published=True) + + response = client.get("/feed.xml") + assert response.status_code == 200 + + # Should handle UTF-8 correctly + root = ET.fromstring(response.data) + assert root is not None + + def test_feed_with_very_long_note(self, client, app): + """Test feed handles very long note content""" + with app.app_context(): + long_content = "# Long Note\n\n" + ("This is a very long paragraph. " * 100) + create_note(content=long_content, published=True) + + response = client.get("/feed.xml") + assert response.status_code == 200 + + # Should include full content (no truncation by default) + root = ET.fromstring(response.data) + assert root is not None + + +class TestFeedConfiguration: + """Test feed configuration options""" + + def test_feed_uses_site_name_from_config(self, client, app): + """Test feed uses SITE_NAME from config""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + title = channel.find("title").text + + assert title == app.config["SITE_NAME"] + + def test_feed_uses_site_url_from_config(self, client, app): + """Test feed uses SITE_URL from config""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + link = channel.find("link").text + + assert link == app.config["SITE_URL"] + + def test_feed_uses_site_description_from_config(self, client, app): + """Test feed uses SITE_DESCRIPTION from config""" + response = client.get("/feed.xml") + assert response.status_code == 200 + + root = ET.fromstring(response.data) + channel = root.find("channel") + description = channel.find("description").text + + assert description == app.config["SITE_DESCRIPTION"]