""" Tests for RSS feed generation module Tests cover: - RSS feed generation with various note counts - RFC-822 date formatting - Note title extraction - HTML cleaning for CDATA - Feed structure and required elements - Edge cases (empty feeds, special characters, etc.) """ import pytest from datetime import datetime, timezone from xml.etree import ElementTree as ET from starpunk import create_app from starpunk.feed import ( generate_feed, format_rfc822_date, get_note_title, clean_html_for_rss, ) from starpunk.notes import create_note from starpunk.models import Note @pytest.fixture def app(tmp_path): """Create test application""" test_data_dir = tmp_path / "data" test_data_dir.mkdir(parents=True, exist_ok=True) test_config = { "TESTING": True, "DATABASE_PATH": test_data_dir / "starpunk.db", "DATA_PATH": test_data_dir, "NOTES_PATH": test_data_dir / "notes", "SESSION_SECRET": "test-secret-key", "ADMIN_ME": "https://test.example.com", "SITE_URL": "https://example.com", "SITE_NAME": "Test Blog", "SITE_DESCRIPTION": "A test blog", "DEV_MODE": False, } app = create_app(config=test_config) yield app @pytest.fixture def sample_notes(app): """Create sample published notes""" with app.app_context(): notes = [] for i in range(5): note = create_note( content=f"# Test Note {i}\n\nThis is test content for note {i}.", published=True, ) notes.append(note) return notes class TestGenerateFeed: """Test generate_feed() function""" def test_generate_feed_basic(self, app, sample_notes): """Test basic feed generation with notes""" with app.app_context(): feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=sample_notes, ) # Should return XML string assert isinstance(feed_xml, str) assert feed_xml.startswith(" 0 assert channel.find("description").text == "A test blog" # Check items (should have 5 items) items = channel.findall("item") assert len(items) == 5 def test_generate_feed_empty(self, app): """Test feed generation with no notes""" with app.app_context(): feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=[], ) # Should still generate valid XML assert isinstance(feed_xml, str) root = ET.fromstring(feed_xml) channel = root.find("channel") items = channel.findall("item") assert len(items) == 0 def test_generate_feed_respects_limit(self, app, sample_notes): """Test feed respects item limit""" with app.app_context(): feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=sample_notes, limit=3, ) root = ET.fromstring(feed_xml) channel = root.find("channel") items = channel.findall("item") # Should only have 3 items (respecting limit) assert len(items) == 3 def test_generate_feed_requires_site_url(self): """Test feed generation requires site_url""" with pytest.raises(ValueError, match="site_url is required"): generate_feed( site_url="", site_name="Test Blog", site_description="A test blog", notes=[], ) def test_generate_feed_requires_site_name(self): """Test feed generation requires site_name""" with pytest.raises(ValueError, match="site_name is required"): generate_feed( site_url="https://example.com", site_name="", site_description="A test blog", notes=[], ) def test_generate_feed_strips_trailing_slash(self, app, sample_notes): """Test feed strips trailing slash from site_url""" with app.app_context(): feed_xml = generate_feed( site_url="https://example.com/", # Has trailing slash site_name="Test Blog", site_description="A test blog", notes=sample_notes, limit=1, ) root = ET.fromstring(feed_xml) channel = root.find("channel") items = channel.findall("item") link = items[0].find("link").text # Link should not have double slash before /note/ assert "//" not in link.replace("https://", "") def test_generate_feed_includes_atom_self_link(self, app): """Test feed includes Atom self-link for discovery""" with app.app_context(): feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=[], ) # Check for Atom namespace and self-link assert "atom" in feed_xml assert "feed.xml" in feed_xml assert 'rel="self"' in feed_xml def test_generate_feed_item_structure(self, app, sample_notes): """Test individual feed item has all required elements""" with app.app_context(): feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=sample_notes[:1], ) root = ET.fromstring(feed_xml) channel = root.find("channel") item = channel.find("item") # Check required item elements assert item.find("title") is not None assert item.find("link") is not None assert item.find("guid") is not None assert item.find("pubDate") is not None assert item.find("description") is not None # Check GUID is permalink guid = item.find("guid") assert guid.get("isPermaLink") == "true" def test_generate_feed_html_content(self, app): """Test feed includes HTML content in description""" with app.app_context(): note = create_note( content="# Test\n\nThis is **bold** and *italic*.", published=True, ) feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=[note], ) root = ET.fromstring(feed_xml) channel = root.find("channel") item = channel.find("item") description = item.find("description").text # Should contain HTML tags assert "" in description or "bold" in description assert "" in description or "italic" in description class TestFormatRFC822Date: """Test format_rfc822_date() function""" def test_format_rfc822_date_utc(self): """Test RFC-822 date formatting with UTC datetime""" dt = datetime(2024, 11, 18, 12, 0, 0, tzinfo=timezone.utc) result = format_rfc822_date(dt) # Should match RFC-822 format assert "Mon, 18 Nov 2024" in result assert "12:00:00" in result assert "+0000" in result def test_format_rfc822_date_naive(self): """Test RFC-822 formatting with naive datetime (assumes UTC)""" dt = datetime(2024, 11, 18, 12, 0, 0) # No timezone result = format_rfc822_date(dt) # Should add UTC timezone assert "Mon, 18 Nov 2024" in result assert "+0000" in result def test_format_rfc822_date_format(self): """Test RFC-822 date format is correct""" dt = datetime(2024, 11, 18, 12, 30, 45, tzinfo=timezone.utc) result = format_rfc822_date(dt) # Format: "Mon, 18 Nov 2024 12:30:45 +0000" # Day name, day, month name, year, time, timezone parts = result.split() assert parts[0].rstrip(",") in [ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", ] assert parts[2] in [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ] assert len(parts[3]) == 4 # Year is 4 digits class TestGetNoteTitle: """Test get_note_title() function""" def test_get_note_title_with_heading(self, app): """Test title extraction from note with heading""" with app.app_context(): note = create_note(content="# My First Note\n\nContent here.", published=True) title = get_note_title(note) assert title == "My First Note" def test_get_note_title_without_heading(self, app): """Test title extraction from note without heading""" with app.app_context(): note = create_note(content="Just some content without heading.", published=True) title = get_note_title(note) # Should use first line (Note.title handles this) assert len(title) > 0 assert "Just some content" in title def test_get_note_title_truncates_long_titles(self, app): """Test title truncation for long titles""" with app.app_context(): long_title = "A" * 150 note = create_note(content=f"# {long_title}\n\nContent.", published=True) title = get_note_title(note) # Should truncate to reasonable length assert len(title) <= 103 # 100 chars + "..." def test_get_note_title_minimal_content(self, app): """Test title extraction with minimal content""" with app.app_context(): note = create_note(content="x", published=True) title = get_note_title(note) # Should extract something (single character or slug) assert len(title) > 0 class TestCleanHTMLForRSS: """Test clean_html_for_rss() function""" def test_clean_html_normal_content(self): """Test HTML cleaning with normal content""" html = "

This is bold text.

" result = clean_html_for_rss(html) # Should be unchanged assert result == html def test_clean_html_with_cdata_end_marker(self): """Test HTML cleaning with CDATA end marker""" html = "

Example: ]]>

" result = clean_html_for_rss(html) # Should break the CDATA end marker assert "]]>" not in result assert "]] >" in result def test_clean_html_preserves_other_content(self): """Test HTML cleaning preserves other content""" html = "

Normal content with links and emphasis.

" result = clean_html_for_rss(html) # Should be unchanged assert result == html def test_clean_html_empty_string(self): """Test HTML cleaning with empty string""" result = clean_html_for_rss("") assert result == "" class TestFeedIntegration: """Integration tests for feed generation""" def test_feed_with_special_characters(self, app): """Test feed handles special characters correctly""" with app.app_context(): note = create_note( content="# Test & Special \n\nContent with 'quotes' and \"doubles\".", published=True, ) feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=[note], ) # Should produce valid XML (no parse errors) root = ET.fromstring(feed_xml) assert root is not None def test_feed_with_unicode_content(self, app): """Test feed handles Unicode content correctly""" with app.app_context(): note = create_note( content="# Test Unicode 你好 🚀\n\nContent with émojis and ünicode.", published=True, ) feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=[note], ) # Should produce valid UTF-8 XML assert "encoding='UTF-8'" in feed_xml or "encoding=\"UTF-8\"" in feed_xml root = ET.fromstring(feed_xml) assert root is not None def test_feed_with_multiline_content(self, app): """Test feed handles multiline note content""" with app.app_context(): note = create_note( content="# Multiline Note\n\nParagraph 1\n\nParagraph 2\n\n- List item 1\n- List item 2", published=True, ) feed_xml = generate_feed( site_url="https://example.com", site_name="Test Blog", site_description="A test blog", notes=[note], ) root = ET.fromstring(feed_xml) channel = root.find("channel") item = channel.find("item") description = item.find("description").text # Should contain HTML paragraphs assert description is not None assert len(description) > 0