Files
StarPunk/tests/test_feed.py
Phil Skentelbery d9df55ae63 fix: RSS feed now shows newest posts first
Fixed bug where feedgen library was reversing the order of feed items.
Database returns notes in DESC order (newest first), but feedgen was
displaying them oldest-first in the RSS XML. Added reversed() wrapper
to maintain correct chronological order in the feed.

Added regression test to verify feed order matches database order.

Bug confirmed by testing:
- Database: [Note 2, Note 1, Note 0] (newest first)
- Old feed: [Note 0, Note 1, Note 2] (oldest first) 
- New feed: [Note 2, Note 1, Note 0] (newest first) 

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 09:56:10 -07:00

476 lines
16 KiB
Python

"""
Tests for RSS feed generation module
Tests cover:
- RSS feed generation with various note counts
- RFC-822 date formatting
- Note title extraction
- HTML cleaning for CDATA
- Feed structure and required elements
- Edge cases (empty feeds, special characters, etc.)
"""
import pytest
from datetime import datetime, timezone
from xml.etree import ElementTree as ET
from starpunk import create_app
from starpunk.feed import (
generate_feed,
format_rfc822_date,
get_note_title,
clean_html_for_rss,
)
from starpunk.notes import create_note
from starpunk.models import Note
@pytest.fixture
def app(tmp_path):
"""Create test application"""
test_data_dir = tmp_path / "data"
test_data_dir.mkdir(parents=True, exist_ok=True)
test_config = {
"TESTING": True,
"DATABASE_PATH": test_data_dir / "starpunk.db",
"DATA_PATH": test_data_dir,
"NOTES_PATH": test_data_dir / "notes",
"SESSION_SECRET": "test-secret-key",
"ADMIN_ME": "https://test.example.com",
"SITE_URL": "https://example.com",
"SITE_NAME": "Test Blog",
"SITE_DESCRIPTION": "A test blog",
"DEV_MODE": False,
}
app = create_app(config=test_config)
yield app
@pytest.fixture
def sample_notes(app):
"""Create sample published notes"""
with app.app_context():
notes = []
for i in range(5):
note = create_note(
content=f"# Test Note {i}\n\nThis is test content for note {i}.",
published=True,
)
notes.append(note)
return notes
class TestGenerateFeed:
"""Test generate_feed() function"""
def test_generate_feed_basic(self, app, sample_notes):
"""Test basic feed generation with notes"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=sample_notes,
)
# Should return XML string
assert isinstance(feed_xml, str)
assert feed_xml.startswith("<?xml")
# Parse XML to verify structure
root = ET.fromstring(feed_xml)
assert root.tag == "rss"
assert root.get("version") == "2.0"
# Find channel
channel = root.find("channel")
assert channel is not None
# Check required channel elements
assert channel.find("title").text == "Test Blog"
# Note: feedgen may add self-link as alternate link, check for site URL in links
links = channel.findall("link")
assert len(links) > 0
assert channel.find("description").text == "A test blog"
# Check items (should have 5 items)
items = channel.findall("item")
assert len(items) == 5
def test_generate_feed_empty(self, app):
"""Test feed generation with no notes"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[],
)
# Should still generate valid XML
assert isinstance(feed_xml, str)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
items = channel.findall("item")
assert len(items) == 0
def test_generate_feed_respects_limit(self, app, sample_notes):
"""Test feed respects item limit"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=sample_notes,
limit=3,
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
items = channel.findall("item")
# Should only have 3 items (respecting limit)
assert len(items) == 3
def test_generate_feed_newest_first(self, app):
"""Test feed displays notes in newest-first order"""
with app.app_context():
# Create notes with distinct timestamps (oldest to newest in creation order)
import time
for i in range(3):
create_note(
content=f"# Note {i}\n\nContent {i}.",
published=True,
)
time.sleep(0.01) # Ensure distinct timestamps
# Get notes from database (should be DESC = newest first)
from starpunk.notes import list_notes
notes = list_notes(published_only=True, limit=10)
# Verify database returns newest first
assert "Note 2" in notes[0].title
assert "Note 0" in notes[-1].title
# Generate feed with notes from database
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=notes,
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
items = channel.findall("item")
# Feed should also show newest first (matching database order)
# First item should be newest (Note 2)
# Last item should be oldest (Note 0)
first_title = items[0].find("title").text
last_title = items[-1].find("title").text
assert "Note 2" in first_title
assert "Note 0" in last_title
def test_generate_feed_requires_site_url(self):
"""Test feed generation requires site_url"""
with pytest.raises(ValueError, match="site_url is required"):
generate_feed(
site_url="",
site_name="Test Blog",
site_description="A test blog",
notes=[],
)
def test_generate_feed_requires_site_name(self):
"""Test feed generation requires site_name"""
with pytest.raises(ValueError, match="site_name is required"):
generate_feed(
site_url="https://example.com",
site_name="",
site_description="A test blog",
notes=[],
)
def test_generate_feed_strips_trailing_slash(self, app, sample_notes):
"""Test feed strips trailing slash from site_url"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com/", # Has trailing slash
site_name="Test Blog",
site_description="A test blog",
notes=sample_notes,
limit=1,
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
items = channel.findall("item")
link = items[0].find("link").text
# Link should not have double slash before /note/
assert "//" not in link.replace("https://", "")
def test_generate_feed_includes_atom_self_link(self, app):
"""Test feed includes Atom self-link for discovery"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[],
)
# Check for Atom namespace and self-link
assert "atom" in feed_xml
assert "feed.xml" in feed_xml
assert 'rel="self"' in feed_xml
def test_generate_feed_item_structure(self, app, sample_notes):
"""Test individual feed item has all required elements"""
with app.app_context():
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=sample_notes[:1],
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
item = channel.find("item")
# Check required item elements
assert item.find("title") is not None
assert item.find("link") is not None
assert item.find("guid") is not None
assert item.find("pubDate") is not None
assert item.find("description") is not None
# Check GUID is permalink
guid = item.find("guid")
assert guid.get("isPermaLink") == "true"
def test_generate_feed_html_content(self, app):
"""Test feed includes HTML content in description"""
with app.app_context():
note = create_note(
content="# Test\n\nThis is **bold** and *italic*.",
published=True,
)
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[note],
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
item = channel.find("item")
description = item.find("description").text
# Should contain HTML tags
assert "<strong>" in description or "bold" in description
assert "<em>" in description or "italic" in description
class TestFormatRFC822Date:
"""Test format_rfc822_date() function"""
def test_format_rfc822_date_utc(self):
"""Test RFC-822 date formatting with UTC datetime"""
dt = datetime(2024, 11, 18, 12, 0, 0, tzinfo=timezone.utc)
result = format_rfc822_date(dt)
# Should match RFC-822 format
assert "Mon, 18 Nov 2024" in result
assert "12:00:00" in result
assert "+0000" in result
def test_format_rfc822_date_naive(self):
"""Test RFC-822 formatting with naive datetime (assumes UTC)"""
dt = datetime(2024, 11, 18, 12, 0, 0) # No timezone
result = format_rfc822_date(dt)
# Should add UTC timezone
assert "Mon, 18 Nov 2024" in result
assert "+0000" in result
def test_format_rfc822_date_format(self):
"""Test RFC-822 date format is correct"""
dt = datetime(2024, 11, 18, 12, 30, 45, tzinfo=timezone.utc)
result = format_rfc822_date(dt)
# Format: "Mon, 18 Nov 2024 12:30:45 +0000"
# Day name, day, month name, year, time, timezone
parts = result.split()
assert parts[0].rstrip(",") in [
"Mon",
"Tue",
"Wed",
"Thu",
"Fri",
"Sat",
"Sun",
]
assert parts[2] in [
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
]
assert len(parts[3]) == 4 # Year is 4 digits
class TestGetNoteTitle:
"""Test get_note_title() function"""
def test_get_note_title_with_heading(self, app):
"""Test title extraction from note with heading"""
with app.app_context():
note = create_note(content="# My First Note\n\nContent here.", published=True)
title = get_note_title(note)
assert title == "My First Note"
def test_get_note_title_without_heading(self, app):
"""Test title extraction from note without heading"""
with app.app_context():
note = create_note(content="Just some content without heading.", published=True)
title = get_note_title(note)
# Should use first line (Note.title handles this)
assert len(title) > 0
assert "Just some content" in title
def test_get_note_title_truncates_long_titles(self, app):
"""Test title truncation for long titles"""
with app.app_context():
long_title = "A" * 150
note = create_note(content=f"# {long_title}\n\nContent.", published=True)
title = get_note_title(note)
# Should truncate to reasonable length
assert len(title) <= 103 # 100 chars + "..."
def test_get_note_title_minimal_content(self, app):
"""Test title extraction with minimal content"""
with app.app_context():
note = create_note(content="x", published=True)
title = get_note_title(note)
# Should extract something (single character or slug)
assert len(title) > 0
class TestCleanHTMLForRSS:
"""Test clean_html_for_rss() function"""
def test_clean_html_normal_content(self):
"""Test HTML cleaning with normal content"""
html = "<p>This is <strong>bold</strong> text.</p>"
result = clean_html_for_rss(html)
# Should be unchanged
assert result == html
def test_clean_html_with_cdata_end_marker(self):
"""Test HTML cleaning with CDATA end marker"""
html = "<p>Example: ]]></p>"
result = clean_html_for_rss(html)
# Should break the CDATA end marker
assert "]]>" not in result
assert "]] >" in result
def test_clean_html_preserves_other_content(self):
"""Test HTML cleaning preserves other content"""
html = "<p>Normal content with <a href='test'>links</a> and <em>emphasis</em>.</p>"
result = clean_html_for_rss(html)
# Should be unchanged
assert result == html
def test_clean_html_empty_string(self):
"""Test HTML cleaning with empty string"""
result = clean_html_for_rss("")
assert result == ""
class TestFeedIntegration:
"""Integration tests for feed generation"""
def test_feed_with_special_characters(self, app):
"""Test feed handles special characters correctly"""
with app.app_context():
note = create_note(
content="# Test & Special <Characters>\n\nContent with 'quotes' and \"doubles\".",
published=True,
)
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[note],
)
# Should produce valid XML (no parse errors)
root = ET.fromstring(feed_xml)
assert root is not None
def test_feed_with_unicode_content(self, app):
"""Test feed handles Unicode content correctly"""
with app.app_context():
note = create_note(
content="# Test Unicode 你好 🚀\n\nContent with émojis and ünicode.",
published=True,
)
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[note],
)
# Should produce valid UTF-8 XML
assert "encoding='UTF-8'" in feed_xml or "encoding=\"UTF-8\"" in feed_xml
root = ET.fromstring(feed_xml)
assert root is not None
def test_feed_with_multiline_content(self, app):
"""Test feed handles multiline note content"""
with app.app_context():
note = create_note(
content="# Multiline Note\n\nParagraph 1\n\nParagraph 2\n\n- List item 1\n- List item 2",
published=True,
)
feed_xml = generate_feed(
site_url="https://example.com",
site_name="Test Blog",
site_description="A test blog",
notes=[note],
)
root = ET.fromstring(feed_xml)
channel = root.find("channel")
item = channel.find("item")
description = item.find("description").text
# Should contain HTML paragraphs
assert description is not None
assert len(description) > 0