rss_warhammer-community/tests/test_rss_generator.py
Phil 25086fc01b Add comprehensive RSS scraper implementation with security and testing
- Modular architecture with separate modules for scraping, parsing, security, validation, and caching
- Comprehensive security measures including HTML sanitization, rate limiting, and input validation
- Robust error handling with custom exceptions and retry logic
- HTTP caching with ETags and Last-Modified headers for efficiency
- Pre-compiled regex patterns for improved performance
- Comprehensive test suite with 66 tests covering all major functionality
- Docker support for containerized deployment
- Configuration management with environment variable support
- Working parser that successfully extracts 32 articles from Warhammer Community

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-06 09:15:06 -06:00

162 lines
6.1 KiB
Python

"""Tests for RSS generator module."""
import pytest
import os
import tempfile
from datetime import datetime
import pytz
from unittest.mock import patch, mock_open
from src.rss_scraper.rss_generator import generate_rss_feed, save_rss_feed, save_debug_html
from src.rss_scraper.exceptions import FileOperationError
class TestGenerateRssFeed:
"""Test RSS feed generation functionality."""
def test_generate_rss_feed_with_articles(self):
"""Test RSS generation with valid articles."""
timezone = pytz.UTC
articles = [
{
'title': 'Test Article 1',
'link': 'https://example.com/article1',
'date': datetime(2024, 1, 1, tzinfo=timezone)
},
{
'title': 'Test Article 2',
'link': 'https://example.com/article2',
'date': datetime(2024, 1, 2, tzinfo=timezone)
}
]
feed_url = "https://example.com"
rss_content = generate_rss_feed(articles, feed_url)
assert isinstance(rss_content, bytes)
rss_str = rss_content.decode('utf-8')
assert 'Test Article 1' in rss_str
assert 'Test Article 2' in rss_str
assert 'https://example.com/article1' in rss_str
assert 'https://example.com/article2' in rss_str
assert '<?xml version=' in rss_str
assert '<rss version=' in rss_str
def test_generate_rss_feed_empty_articles(self):
"""Test RSS generation with empty articles list."""
articles = []
feed_url = "https://example.com"
rss_content = generate_rss_feed(articles, feed_url)
assert isinstance(rss_content, bytes)
rss_str = rss_content.decode('utf-8')
assert '<?xml version=' in rss_str
assert '<rss version=' in rss_str
# Should still contain feed metadata
assert 'Warhammer Community RSS Feed' in rss_str
def test_generate_rss_feed_unicode_content(self):
"""Test RSS generation with unicode content."""
timezone = pytz.UTC
articles = [
{
'title': 'Tëst Artìclé with Ūnïcödë',
'link': 'https://example.com/unicode',
'date': datetime(2024, 1, 1, tzinfo=timezone)
}
]
feed_url = "https://example.com"
rss_content = generate_rss_feed(articles, feed_url)
assert isinstance(rss_content, bytes)
rss_str = rss_content.decode('utf-8')
assert 'Tëst Artìclé with Ūnïcödë' in rss_str
class TestSaveRssFeed:
"""Test RSS feed saving functionality."""
def test_save_rss_feed_success(self):
"""Test successful RSS feed saving."""
rss_content = b'<?xml version="1.0"?><rss>test</rss>'
with tempfile.TemporaryDirectory() as temp_dir:
result_path = save_rss_feed(rss_content, temp_dir)
assert os.path.exists(result_path)
assert result_path.endswith('warhammer_rss_feed.xml')
with open(result_path, 'rb') as f:
saved_content = f.read()
assert saved_content == rss_content
def test_save_rss_feed_permission_error(self):
"""Test RSS feed saving with permission error."""
rss_content = b'<?xml version="1.0"?><rss>test</rss>'
with patch('builtins.open', side_effect=PermissionError("Permission denied")):
with pytest.raises(FileOperationError):
save_rss_feed(rss_content, "/some/path")
def test_save_rss_feed_creates_directory(self):
"""Test that RSS feed saving creates directory if needed."""
rss_content = b'<?xml version="1.0"?><rss>test</rss>'
with tempfile.TemporaryDirectory() as temp_dir:
new_subdir = os.path.join(temp_dir, "new_subdir")
result_path = save_rss_feed(rss_content, new_subdir)
assert os.path.exists(new_subdir)
assert os.path.exists(result_path)
class TestSaveDebugHtml:
"""Test debug HTML saving functionality."""
def test_save_debug_html_success(self):
"""Test successful debug HTML saving."""
html_content = "<html><body>Test content</body></html>"
with tempfile.TemporaryDirectory() as temp_dir:
save_debug_html(html_content, temp_dir)
html_path = os.path.join(temp_dir, "page.html")
assert os.path.exists(html_path)
with open(html_path, 'r', encoding='utf-8') as f:
saved_content = f.read()
# BeautifulSoup prettifies the content
assert "Test content" in saved_content
def test_save_debug_html_permission_error(self):
"""Test debug HTML saving with permission error (should not raise)."""
html_content = "<html><body>Test content</body></html>"
with patch('builtins.open', side_effect=PermissionError("Permission denied")):
# Should not raise exception, just log warning
save_debug_html(html_content, "/some/path")
def test_save_debug_html_malformed_content(self):
"""Test debug HTML saving with malformed HTML content."""
malformed_html = "<html><body>Unclosed tags"
with tempfile.TemporaryDirectory() as temp_dir:
# Should handle malformed HTML gracefully
save_debug_html(malformed_html, temp_dir)
html_path = os.path.join(temp_dir, "page.html")
assert os.path.exists(html_path)
def test_save_debug_html_creates_directory(self):
"""Test that debug HTML saving creates directory if needed."""
html_content = "<html><body>Test content</body></html>"
with tempfile.TemporaryDirectory() as temp_dir:
new_subdir = os.path.join(temp_dir, "new_subdir")
save_debug_html(html_content, new_subdir)
assert os.path.exists(new_subdir)
html_path = os.path.join(new_subdir, "page.html")
assert os.path.exists(html_path)