test: add comprehensive RSS feed tests

Adds unit tests for feed module and integration tests for feed route.

test_feed.py:
- Feed generation with various note counts
- RFC-822 date formatting
- Note title extraction
- HTML cleaning for CDATA safety
- Feed structure validation
- Special characters and Unicode handling

test_routes_feed.py:
- Feed route accessibility and response
- Content-Type and cache headers
- ETag generation and validation
- Server-side caching behavior
- Published notes filtering
- Feed item limit configuration
- Configuration integration

All tests follow existing test patterns and use proper fixtures.
This commit is contained in:
2025-11-19 08:48:35 -07:00
parent deb784ad4f
commit 9a31632e05
2 changed files with 807 additions and 0 deletions

432
tests/test_feed.py Normal file
View File

@@ -0,0 +1,432 @@
"""
Tests for RSS feed generation module
Tests cover:
- RSS feed generation with various note counts
- RFC-822 date formatting
- Note title extraction
- HTML cleaning for CDATA
- Feed structure and required elements
- Edge cases (empty feeds, special characters, etc.)
"""
import pytest
from datetime import datetime, timezone
from xml.etree import ElementTree as ET
from starpunk import create_app
from starpunk.feed import (
generate_feed,
format_rfc822_date,
get_note_title,
clean_html_for_rss,
)
from starpunk.notes import create_note
from starpunk.models import Note
@pytest.fixture
def app(tmp_path):
"""Create test application"""
test_data_dir = tmp_path / "data"
test_data_dir.mkdir(parents=True, exist_ok=True)
test_config = {
"TESTING": True,
"DATABASE_PATH": test_data_dir / "starpunk.db",
"DATA_PATH": test_data_dir,
"NOTES_PATH": test_data_dir / "notes",
"SESSION_SECRET": "test-secret-key",
"ADMIN_ME": "https://test.example.com",
"SITE_URL": "https://example.com",
"SITE_NAME": "Test Blog",
"SITE_DESCRIPTION": "A test blog",
"DEV_MODE": False,
}
app = create_app(config=test_config)
yield app
@pytest.fixture
def sample_notes(app):
"""Create sample published notes"""
with app.app_context():
notes = []
for i in range(5):
note = create_note(
content=f"# Test Note {i}\n\nThis is test content for note {i}.",
published=True,
)
notes.append(note)
return notes
class TestGenerateFeed:
"""Test generate_feed() function"""
def test_generate_feed_basic(self, app, sample_notes):
"""Test basic feed generation with notes"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=sample_notes,
)
# Should return XML string
assert isinstance(feed_xml, str)
assert feed_xml.startswith("<?xml")
# Parse XML to verify structure
root = ET.fromstring(feed_xml)
assert root.tag == "rss"
assert root.get("version") == "2.0"
# Find channel
channel = root.find("channel")
assert channel is not None
# Check required channel elements
assert channel.find("title").text == "Test Blog"
assert channel.find("link").text == "https://example.com"
assert channel.find("description").text == "A test blog"
# Check items (should have 5 items)
items = channel.findall("item")
assert len(items) == 5
def test_generate_feed_empty(self, app):
"""Test feed generation with no notes"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[],
)
# Should still generate valid XML
assert isinstance(feed_xml, str)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
items = channel.findall("item")
assert len(items) == 0
def test_generate_feed_respects_limit(self, app, sample_notes):
"""Test feed respects item limit"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=sample_notes,
limit=3,
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
items = channel.findall("item")
# Should only have 3 items (respecting limit)
assert len(items) == 3
def test_generate_feed_requires_site_url(self):
"""Test feed generation requires site_url"""
with pytest.raises(ValueError, match="site_url is required"):
generate_feed(
site_url="",
site_name="Test Blog",
site_description="A test blog",
notes=[],
)
def test_generate_feed_requires_site_name(self):
"""Test feed generation requires site_name"""
with pytest.raises(ValueError, match="site_name is required"):
generate_feed(
site_url="https://example.com",
site_name="",
site_description="A test blog",
notes=[],
)
def test_generate_feed_strips_trailing_slash(self, app, sample_notes):
"""Test feed strips trailing slash from site_url"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com/", # Has trailing slash
site_name="Test Blog",
site_description="A test blog",
notes=sample_notes,
limit=1,
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
items = channel.findall("item")
link = items[0].find("link").text
# Link should not have double slash before /note/
assert "//" not in link.replace("https://", "")
def test_generate_feed_includes_atom_self_link(self, app):
"""Test feed includes Atom self-link for discovery"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[],
)
# Check for Atom namespace and self-link
assert "atom" in feed_xml
assert "feed.xml" in feed_xml
assert 'rel="self"' in feed_xml
def test_generate_feed_item_structure(self, app, sample_notes):
"""Test individual feed item has all required elements"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=sample_notes[:1],
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
item = channel.find("item")
# Check required item elements
assert item.find("title") is not None
assert item.find("link") is not None
assert item.find("guid") is not None
assert item.find("pubDate") is not None
assert item.find("description") is not None
# Check GUID is permalink
guid = item.find("guid")
assert guid.get("isPermaLink") == "true"
def test_generate_feed_html_content(self, app):
"""Test feed includes HTML content in description"""
with app.app_context():
note = create_note(
content="# Test\n\nThis is **bold** and *italic*.",
published=True,
)
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[note],
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
item = channel.find("item")
description = item.find("description").text
# Should contain HTML tags
assert "<strong>" in description or "bold" in description
assert "<em>" in description or "italic" in description
class TestFormatRFC822Date:
"""Test format_rfc822_date() function"""
def test_format_rfc822_date_utc(self):
"""Test RFC-822 date formatting with UTC datetime"""
dt = datetime(2024, 11, 18, 12, 0, 0, tzinfo=timezone.utc)
result = format_rfc822_date(dt)
# Should match RFC-822 format
assert "Mon, 18 Nov 2024" in result
assert "12:00:00" in result
assert "+0000" in result
def test_format_rfc822_date_naive(self):
"""Test RFC-822 formatting with naive datetime (assumes UTC)"""
dt = datetime(2024, 11, 18, 12, 0, 0) # No timezone
result = format_rfc822_date(dt)
# Should add UTC timezone
assert "Mon, 18 Nov 2024" in result
assert "+0000" in result
def test_format_rfc822_date_format(self):
"""Test RFC-822 date format is correct"""
dt = datetime(2024, 11, 18, 12, 30, 45, tzinfo=timezone.utc)
result = format_rfc822_date(dt)
# Format: "Mon, 18 Nov 2024 12:30:45 +0000"
# Day name, day, month name, year, time, timezone
parts = result.split()
assert parts[0].rstrip(",") in [
"Mon",
"Tue",
"Wed",
"Thu",
"Fri",
"Sat",
"Sun",
]
assert parts[2] in [
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
]
assert len(parts[3]) == 4 # Year is 4 digits
class TestGetNoteTitle:
"""Test get_note_title() function"""
def test_get_note_title_with_heading(self, app):
"""Test title extraction from note with heading"""
with app.app_context():
note = create_note(content="# My First Note\n\nContent here.", published=True)
title = get_note_title(note)
assert title == "My First Note"
def test_get_note_title_without_heading(self, app):
"""Test title extraction from note without heading"""
with app.app_context():
note = create_note(content="Just some content without heading.", published=True)
title = get_note_title(note)
# Should use first line (Note.title handles this)
assert len(title) > 0
assert "Just some content" in title
def test_get_note_title_truncates_long_titles(self, app):
"""Test title truncation for long titles"""
with app.app_context():
long_title = "A" * 150
note = create_note(content=f"# {long_title}\n\nContent.", published=True)
title = get_note_title(note)
# Should truncate to reasonable length
assert len(title) <= 103 # 100 chars + "..."
def test_get_note_title_empty_content(self, app):
"""Test title extraction with empty content"""
with app.app_context():
note = create_note(content="\n\n\n", published=True)
title = get_note_title(note)
# Should fall back to slug or timestamp
assert len(title) > 0
class TestCleanHTMLForRSS:
"""Test clean_html_for_rss() function"""
def test_clean_html_normal_content(self):
"""Test HTML cleaning with normal content"""
html = "<p>This is <strong>bold</strong> text.</p>"
result = clean_html_for_rss(html)
# Should be unchanged
assert result == html
def test_clean_html_with_cdata_end_marker(self):
"""Test HTML cleaning with CDATA end marker"""
html = "<p>Example: ]]></p>"
result = clean_html_for_rss(html)
# Should break the CDATA end marker
assert "]]>" not in result
assert "]] >" in result
def test_clean_html_preserves_other_content(self):
"""Test HTML cleaning preserves other content"""
html = "<p>Normal content with <a href='test'>links</a> and <em>emphasis</em>.</p>"
result = clean_html_for_rss(html)
# Should be unchanged
assert result == html
def test_clean_html_empty_string(self):
"""Test HTML cleaning with empty string"""
result = clean_html_for_rss("")
assert result == ""
class TestFeedIntegration:
"""Integration tests for feed generation"""
def test_feed_with_special_characters(self, app):
"""Test feed handles special characters correctly"""
with app.app_context():
note = create_note(
content="# Test & Special <Characters>\n\nContent with 'quotes' and \"doubles\".",
published=True,
)
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[note],
)
# Should produce valid XML (no parse errors)
root = ET.fromstring(feed_xml)
assert root is not None
def test_feed_with_unicode_content(self, app):
"""Test feed handles Unicode content correctly"""
with app.app_context():
note = create_note(
content="# Test Unicode 你好 🚀\n\nContent with émojis and ünicode.",
published=True,
)
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[note],
)
# Should produce valid UTF-8 XML
assert "encoding='UTF-8'" in feed_xml or "encoding=\"UTF-8\"" in feed_xml
root = ET.fromstring(feed_xml)
assert root is not None
def test_feed_with_multiline_content(self, app):
"""Test feed handles multiline note content"""
with app.app_context():
note = create_note(
content="# Multiline Note\n\nParagraph 1\n\nParagraph 2\n\n- List item 1\n- List item 2",
published=True,
)
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[note],
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
item = channel.find("item")
description = item.find("description").text
# Should contain HTML paragraphs
assert description is not None
assert len(description) > 0

375
tests/test_routes_feed.py Normal file
View File

@@ -0,0 +1,375 @@
"""
Tests for RSS feed route (/feed.xml)
Tests cover:
- Feed route returns valid XML
- Correct Content-Type header
- Caching behavior (server-side and client-side)
- ETag generation and validation
- Only published notes included
- Feed item limit configuration
- Cache expiration behavior
"""
import pytest
import time
from xml.etree import ElementTree as ET
from starpunk import create_app
from starpunk.notes import create_note
@pytest.fixture
def app(tmp_path):
"""Create test application"""
test_data_dir = tmp_path / "data"
test_data_dir.mkdir(parents=True, exist_ok=True)
test_config = {
"TESTING": True,
"DATABASE_PATH": test_data_dir / "starpunk.db",
"DATA_PATH": test_data_dir,
"NOTES_PATH": test_data_dir / "notes",
"SESSION_SECRET": "test-secret-key",
"ADMIN_ME": "https://test.example.com",
"SITE_URL": "https://example.com",
"SITE_NAME": "Test Blog",
"SITE_DESCRIPTION": "A test blog",
"DEV_MODE": False,
"FEED_MAX_ITEMS": 50,
"FEED_CACHE_SECONDS": 2, # Short cache for testing
}
app = create_app(config=test_config)
yield app
@pytest.fixture
def client(app):
"""Test client for making requests"""
return app.test_client()
@pytest.fixture
def sample_notes(app):
"""Create sample notes (mix of published and drafts)"""
with app.app_context():
notes = []
for i in range(10):
note = create_note(
content=f"# Test Note {i}\n\nContent for note {i}.",
published=(i < 7), # First 7 published, last 3 drafts
)
notes.append(note)
return notes
class TestFeedRoute:
"""Test /feed.xml route"""
def test_feed_route_exists(self, client):
"""Test /feed.xml route exists and returns 200"""
response = client.get("/feed.xml")
assert response.status_code == 200
def test_feed_route_returns_xml(self, client):
"""Test /feed.xml returns valid XML"""
response = client.get("/feed.xml")
assert response.status_code == 200
# Should be valid XML
root = ET.fromstring(response.data)
assert root.tag == "rss"
def test_feed_route_content_type(self, client):
"""Test /feed.xml has correct Content-Type header"""
response = client.get("/feed.xml")
assert response.status_code == 200
# Should have RSS content type
assert "application/rss+xml" in response.content_type
assert "charset=utf-8" in response.content_type.lower()
def test_feed_route_cache_control_header(self, client, app):
"""Test /feed.xml has Cache-Control header"""
response = client.get("/feed.xml")
assert response.status_code == 200
# Should have Cache-Control header
assert "Cache-Control" in response.headers
assert "public" in response.headers["Cache-Control"]
# Should include max-age matching config
cache_seconds = app.config.get("FEED_CACHE_SECONDS", 300)
assert f"max-age={cache_seconds}" in response.headers["Cache-Control"]
def test_feed_route_etag_header(self, client):
"""Test /feed.xml has ETag header"""
response = client.get("/feed.xml")
assert response.status_code == 200
# Should have ETag header
assert "ETag" in response.headers
assert len(response.headers["ETag"]) > 0
class TestFeedContent:
"""Test feed content and structure"""
def test_feed_only_published_notes(self, client, sample_notes):
"""Test feed only includes published notes"""
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
items = channel.findall("item")
# Should have 7 items (only published notes)
assert len(items) == 7
# Check that draft notes don't appear in feed
feed_text = response.data.decode("utf-8")
assert "Test Note 0" in feed_text # Published
assert "Test Note 6" in feed_text # Published
assert "Test Note 7" not in feed_text # Draft
assert "Test Note 8" not in feed_text # Draft
assert "Test Note 9" not in feed_text # Draft
def test_feed_respects_limit_config(self, client, app):
"""Test feed respects FEED_MAX_ITEMS configuration"""
# Create more notes than limit
with app.app_context():
for i in range(60):
create_note(content=f"Note {i}", published=True)
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
items = channel.findall("item")
# Should respect configured limit (50)
max_items = app.config.get("FEED_MAX_ITEMS", 50)
assert len(items) <= max_items
def test_feed_empty_when_no_notes(self, client):
"""Test feed with no published notes"""
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
items = channel.findall("item")
# Should have no items but still valid feed
assert len(items) == 0
# Channel should still have required elements
assert channel.find("title") is not None
assert channel.find("link") is not None
def test_feed_has_required_channel_elements(self, client, app):
"""Test feed has all required RSS channel elements"""
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
# Check required elements
assert channel.find("title").text == app.config["SITE_NAME"]
assert channel.find("link").text == app.config["SITE_URL"]
assert channel.find("description") is not None
assert channel.find("language") is not None
def test_feed_items_have_required_elements(self, client, sample_notes):
"""Test feed items have all required RSS item elements"""
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
items = channel.findall("item")
# Check first item has required elements
if len(items) > 0:
item = items[0]
assert item.find("title") is not None
assert item.find("link") is not None
assert item.find("guid") is not None
assert item.find("pubDate") is not None
assert item.find("description") is not None
def test_feed_item_links_are_absolute(self, client, sample_notes, app):
"""Test feed item links are absolute URLs"""
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
items = channel.findall("item")
if len(items) > 0:
link = items[0].find("link").text
# Should start with site URL
assert link.startswith(app.config["SITE_URL"])
# Should be full URL, not relative path
assert link.startswith("http")
class TestFeedCaching:
"""Test feed caching behavior"""
def test_feed_caches_response(self, client, sample_notes):
"""Test feed caches response on server side"""
# First request
response1 = client.get("/feed.xml")
etag1 = response1.headers.get("ETag")
# Second request (should be cached)
response2 = client.get("/feed.xml")
etag2 = response2.headers.get("ETag")
# ETags should match (same cached content)
assert etag1 == etag2
# Content should be identical
assert response1.data == response2.data
def test_feed_cache_expires(self, client, sample_notes, app):
"""Test feed cache expires after configured duration"""
# First request
response1 = client.get("/feed.xml")
etag1 = response1.headers.get("ETag")
# Wait for cache to expire (cache is 2 seconds in test config)
time.sleep(3)
# Create new note (changes feed content)
with app.app_context():
create_note(content="New note after cache expiry", published=True)
# Second request (cache should be expired and regenerated)
response2 = client.get("/feed.xml")
etag2 = response2.headers.get("ETag")
# ETags should be different (content changed)
assert etag1 != etag2
def test_feed_etag_changes_with_content(self, client, app):
"""Test ETag changes when content changes"""
# First request
response1 = client.get("/feed.xml")
etag1 = response1.headers.get("ETag")
# Wait for cache expiry
time.sleep(3)
# Add new note
with app.app_context():
create_note(content="New note changes ETag", published=True)
# Second request
response2 = client.get("/feed.xml")
etag2 = response2.headers.get("ETag")
# ETags should be different
assert etag1 != etag2
def test_feed_cache_consistent_within_window(self, client, sample_notes):
"""Test cache returns consistent content within cache window"""
# Multiple requests within cache window
responses = []
for _ in range(5):
response = client.get("/feed.xml")
responses.append(response)
# All responses should be identical
first_content = responses[0].data
first_etag = responses[0].headers.get("ETag")
for response in responses[1:]:
assert response.data == first_content
assert response.headers.get("ETag") == first_etag
class TestFeedEdgeCases:
"""Test edge cases for feed route"""
def test_feed_with_special_characters_in_content(self, client, app):
"""Test feed handles special characters correctly"""
with app.app_context():
create_note(
content="# Test & Special <Characters>\n\n'Quotes' and \"doubles\".",
published=True,
)
response = client.get("/feed.xml")
assert response.status_code == 200
# Should produce valid XML despite special characters
root = ET.fromstring(response.data)
assert root is not None
def test_feed_with_unicode_content(self, client, app):
"""Test feed handles Unicode content"""
with app.app_context():
create_note(content="# Test Unicode 你好 🚀\n\nEmojis and ümlauts.", published=True)
response = client.get("/feed.xml")
assert response.status_code == 200
# Should handle UTF-8 correctly
root = ET.fromstring(response.data)
assert root is not None
def test_feed_with_very_long_note(self, client, app):
"""Test feed handles very long note content"""
with app.app_context():
long_content = "# Long Note\n\n" + ("This is a very long paragraph. " * 100)
create_note(content=long_content, published=True)
response = client.get("/feed.xml")
assert response.status_code == 200
# Should include full content (no truncation by default)
root = ET.fromstring(response.data)
assert root is not None
class TestFeedConfiguration:
"""Test feed configuration options"""
def test_feed_uses_site_name_from_config(self, client, app):
"""Test feed uses SITE_NAME from config"""
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
title = channel.find("title").text
assert title == app.config["SITE_NAME"]
def test_feed_uses_site_url_from_config(self, client, app):
"""Test feed uses SITE_URL from config"""
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
link = channel.find("link").text
assert link == app.config["SITE_URL"]
def test_feed_uses_site_description_from_config(self, client, app):
"""Test feed uses SITE_DESCRIPTION from config"""
response = client.get("/feed.xml")
assert response.status_code == 200
root = ET.fromstring(response.data)
channel = root.find("channel")
description = channel.find("description").text
assert description == app.config["SITE_DESCRIPTION"]