test: add comprehensive RSS feed tests

Adds unit tests for feed module and integration tests for feed route. test_feed.py: - Feed generation with various note counts - RFC-822 date formatting - Note title extraction - HTML cleaning for CDATA safety - Feed structure validation - Special characters and Unicode handling test_routes_feed.py: - Feed route accessibility and response - Content-Type and cache headers - ETag generation and validation - Server-side caching behavior - Published notes filtering - Feed item limit configuration - Configuration integration All tests follow existing test patterns and use proper fixtures.
2025-11-19 08:48:35 -07:00
parent deb784ad4f
commit 9a31632e05
2 changed files with 807 additions and 0 deletions
--- a/tests/test_feed.py
+++ b/tests/test_feed.py
@@ -0,0 +1,432 @@
+"""
+Tests for RSS feed generation module
+
+Tests cover:
+- RSS feed generation with various note counts
+- RFC-822 date formatting
+- Note title extraction
+- HTML cleaning for CDATA
+- Feed structure and required elements
+- Edge cases (empty feeds, special characters, etc.)
+"""
+
+import pytest
+from datetime import datetime, timezone
+from xml.etree import ElementTree as ET
+
+from starpunk import create_app
+from starpunk.feed import (
+    generate_feed,
+    format_rfc822_date,
+    get_note_title,
+    clean_html_for_rss,
+)
+from starpunk.notes import create_note
+from starpunk.models import Note
+
+
+@pytest.fixture
+def app(tmp_path):
+    """Create test application"""
+    test_data_dir = tmp_path / "data"
+    test_data_dir.mkdir(parents=True, exist_ok=True)
+
+    test_config = {
+        "TESTING": True,
+        "DATABASE_PATH": test_data_dir / "starpunk.db",
+        "DATA_PATH": test_data_dir,
+        "NOTES_PATH": test_data_dir / "notes",
+        "SESSION_SECRET": "test-secret-key",
+        "ADMIN_ME": "https://test.example.com",
+        "SITE_URL": "https://example.com",
+        "SITE_NAME": "Test Blog",
+        "SITE_DESCRIPTION": "A test blog",
+        "DEV_MODE": False,
+    }
+    app = create_app(config=test_config)
+    yield app
+
+
+@pytest.fixture
+def sample_notes(app):
+    """Create sample published notes"""
+    with app.app_context():
+        notes = []
+        for i in range(5):
+            note = create_note(
+                content=f"# Test Note {i}\n\nThis is test content for note {i}.",
+                published=True,
+            )
+            notes.append(note)
+        return notes
+
+
+class TestGenerateFeed:
+    """Test generate_feed() function"""
+
+    def test_generate_feed_basic(self, app, sample_notes):
+        """Test basic feed generation with notes"""
+        with app.app_context():
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=sample_notes,
+            )
+
+            # Should return XML string
+            assert isinstance(feed_xml, str)
+            assert feed_xml.startswith("<?xml")
+
+            # Parse XML to verify structure
+            root = ET.fromstring(feed_xml)
+            assert root.tag == "rss"
+            assert root.get("version") == "2.0"
+
+            # Find channel
+            channel = root.find("channel")
+            assert channel is not None
+
+            # Check required channel elements
+            assert channel.find("title").text == "Test Blog"
+            assert channel.find("link").text == "https://example.com"
+            assert channel.find("description").text == "A test blog"
+
+            # Check items (should have 5 items)
+            items = channel.findall("item")
+            assert len(items) == 5
+
+    def test_generate_feed_empty(self, app):
+        """Test feed generation with no notes"""
+        with app.app_context():
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=[],
+            )
+
+            # Should still generate valid XML
+            assert isinstance(feed_xml, str)
+            root = ET.fromstring(feed_xml)
+            channel = root.find("channel")
+            items = channel.findall("item")
+            assert len(items) == 0
+
+    def test_generate_feed_respects_limit(self, app, sample_notes):
+        """Test feed respects item limit"""
+        with app.app_context():
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=sample_notes,
+                limit=3,
+            )
+
+            root = ET.fromstring(feed_xml)
+            channel = root.find("channel")
+            items = channel.findall("item")
+
+            # Should only have 3 items (respecting limit)
+            assert len(items) == 3
+
+    def test_generate_feed_requires_site_url(self):
+        """Test feed generation requires site_url"""
+        with pytest.raises(ValueError, match="site_url is required"):
+            generate_feed(
+                site_url="",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=[],
+            )
+
+    def test_generate_feed_requires_site_name(self):
+        """Test feed generation requires site_name"""
+        with pytest.raises(ValueError, match="site_name is required"):
+            generate_feed(
+                site_url="https://example.com",
+                site_name="",
+                site_description="A test blog",
+                notes=[],
+            )
+
+    def test_generate_feed_strips_trailing_slash(self, app, sample_notes):
+        """Test feed strips trailing slash from site_url"""
+        with app.app_context():
+            feed_xml = generate_feed(
+                site_url="https://example.com/",  # Has trailing slash
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=sample_notes,
+                limit=1,
+            )
+
+            root = ET.fromstring(feed_xml)
+            channel = root.find("channel")
+            items = channel.findall("item")
+            link = items[0].find("link").text
+
+            # Link should not have double slash before /note/
+            assert "//" not in link.replace("https://", "")
+
+    def test_generate_feed_includes_atom_self_link(self, app):
+        """Test feed includes Atom self-link for discovery"""
+        with app.app_context():
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=[],
+            )
+
+            # Check for Atom namespace and self-link
+            assert "atom" in feed_xml
+            assert "feed.xml" in feed_xml
+            assert 'rel="self"' in feed_xml
+
+    def test_generate_feed_item_structure(self, app, sample_notes):
+        """Test individual feed item has all required elements"""
+        with app.app_context():
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=sample_notes[:1],
+            )
+
+            root = ET.fromstring(feed_xml)
+            channel = root.find("channel")
+            item = channel.find("item")
+
+            # Check required item elements
+            assert item.find("title") is not None
+            assert item.find("link") is not None
+            assert item.find("guid") is not None
+            assert item.find("pubDate") is not None
+            assert item.find("description") is not None
+
+            # Check GUID is permalink
+            guid = item.find("guid")
+            assert guid.get("isPermaLink") == "true"
+
+    def test_generate_feed_html_content(self, app):
+        """Test feed includes HTML content in description"""
+        with app.app_context():
+            note = create_note(
+                content="# Test\n\nThis is **bold** and *italic*.",
+                published=True,
+            )
+
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=[note],
+            )
+
+            root = ET.fromstring(feed_xml)
+            channel = root.find("channel")
+            item = channel.find("item")
+            description = item.find("description").text
+
+            # Should contain HTML tags
+            assert "<strong>" in description or "bold" in description
+            assert "<em>" in description or "italic" in description
+
+
+class TestFormatRFC822Date:
+    """Test format_rfc822_date() function"""
+
+    def test_format_rfc822_date_utc(self):
+        """Test RFC-822 date formatting with UTC datetime"""
+        dt = datetime(2024, 11, 18, 12, 0, 0, tzinfo=timezone.utc)
+        result = format_rfc822_date(dt)
+
+        # Should match RFC-822 format
+        assert "Mon, 18 Nov 2024" in result
+        assert "12:00:00" in result
+        assert "+0000" in result
+
+    def test_format_rfc822_date_naive(self):
+        """Test RFC-822 formatting with naive datetime (assumes UTC)"""
+        dt = datetime(2024, 11, 18, 12, 0, 0)  # No timezone
+        result = format_rfc822_date(dt)
+
+        # Should add UTC timezone
+        assert "Mon, 18 Nov 2024" in result
+        assert "+0000" in result
+
+    def test_format_rfc822_date_format(self):
+        """Test RFC-822 date format is correct"""
+        dt = datetime(2024, 11, 18, 12, 30, 45, tzinfo=timezone.utc)
+        result = format_rfc822_date(dt)
+
+        # Format: "Mon, 18 Nov 2024 12:30:45 +0000"
+        # Day name, day, month name, year, time, timezone
+        parts = result.split()
+        assert parts[0].rstrip(",") in [
+            "Mon",
+            "Tue",
+            "Wed",
+            "Thu",
+            "Fri",
+            "Sat",
+            "Sun",
+        ]
+        assert parts[2] in [
+            "Jan",
+            "Feb",
+            "Mar",
+            "Apr",
+            "May",
+            "Jun",
+            "Jul",
+            "Aug",
+            "Sep",
+            "Oct",
+            "Nov",
+            "Dec",
+        ]
+        assert len(parts[3]) == 4  # Year is 4 digits
+
+
+class TestGetNoteTitle:
+    """Test get_note_title() function"""
+
+    def test_get_note_title_with_heading(self, app):
+        """Test title extraction from note with heading"""
+        with app.app_context():
+            note = create_note(content="# My First Note\n\nContent here.", published=True)
+            title = get_note_title(note)
+            assert title == "My First Note"
+
+    def test_get_note_title_without_heading(self, app):
+        """Test title extraction from note without heading"""
+        with app.app_context():
+            note = create_note(content="Just some content without heading.", published=True)
+            title = get_note_title(note)
+
+            # Should use first line (Note.title handles this)
+            assert len(title) > 0
+            assert "Just some content" in title
+
+    def test_get_note_title_truncates_long_titles(self, app):
+        """Test title truncation for long titles"""
+        with app.app_context():
+            long_title = "A" * 150
+            note = create_note(content=f"# {long_title}\n\nContent.", published=True)
+            title = get_note_title(note)
+
+            # Should truncate to reasonable length
+            assert len(title) <= 103  # 100 chars + "..."
+
+    def test_get_note_title_empty_content(self, app):
+        """Test title extraction with empty content"""
+        with app.app_context():
+            note = create_note(content="\n\n\n", published=True)
+            title = get_note_title(note)
+
+            # Should fall back to slug or timestamp
+            assert len(title) > 0
+
+
+class TestCleanHTMLForRSS:
+    """Test clean_html_for_rss() function"""
+
+    def test_clean_html_normal_content(self):
+        """Test HTML cleaning with normal content"""
+        html = "<p>This is <strong>bold</strong> text.</p>"
+        result = clean_html_for_rss(html)
+
+        # Should be unchanged
+        assert result == html
+
+    def test_clean_html_with_cdata_end_marker(self):
+        """Test HTML cleaning with CDATA end marker"""
+        html = "<p>Example: ]]></p>"
+        result = clean_html_for_rss(html)
+
+        # Should break the CDATA end marker
+        assert "]]>" not in result
+        assert "]] >" in result
+
+    def test_clean_html_preserves_other_content(self):
+        """Test HTML cleaning preserves other content"""
+        html = "<p>Normal content with <a href='test'>links</a> and <em>emphasis</em>.</p>"
+        result = clean_html_for_rss(html)
+
+        # Should be unchanged
+        assert result == html
+
+    def test_clean_html_empty_string(self):
+        """Test HTML cleaning with empty string"""
+        result = clean_html_for_rss("")
+        assert result == ""
+
+
+class TestFeedIntegration:
+    """Integration tests for feed generation"""
+
+    def test_feed_with_special_characters(self, app):
+        """Test feed handles special characters correctly"""
+        with app.app_context():
+            note = create_note(
+                content="# Test & Special <Characters>\n\nContent with 'quotes' and \"doubles\".",
+                published=True,
+            )
+
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=[note],
+            )
+
+            # Should produce valid XML (no parse errors)
+            root = ET.fromstring(feed_xml)
+            assert root is not None
+
+    def test_feed_with_unicode_content(self, app):
+        """Test feed handles Unicode content correctly"""
+        with app.app_context():
+            note = create_note(
+                content="# Test Unicode 你好 🚀\n\nContent with émojis and ünicode.",
+                published=True,
+            )
+
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=[note],
+            )
+
+            # Should produce valid UTF-8 XML
+            assert "encoding='UTF-8'" in feed_xml or "encoding=\"UTF-8\"" in feed_xml
+            root = ET.fromstring(feed_xml)
+            assert root is not None
+
+    def test_feed_with_multiline_content(self, app):
+        """Test feed handles multiline note content"""
+        with app.app_context():
+            note = create_note(
+                content="# Multiline Note\n\nParagraph 1\n\nParagraph 2\n\n- List item 1\n- List item 2",
+                published=True,
+            )
+
+            feed_xml = generate_feed(
+                site_url="https://example.com",
+                site_name="Test Blog",
+                site_description="A test blog",
+                notes=[note],
+            )
+
+            root = ET.fromstring(feed_xml)
+            channel = root.find("channel")
+            item = channel.find("item")
+            description = item.find("description").text
+
+            # Should contain HTML paragraphs
+            assert description is not None
+            assert len(description) > 0