- Modular architecture with separate modules for scraping, parsing, security, validation, and caching - Comprehensive security measures including HTML sanitization, rate limiting, and input validation - Robust error handling with custom exceptions and retry logic - HTTP caching with ETags and Last-Modified headers for efficiency - Pre-compiled regex patterns for improved performance - Comprehensive test suite with 66 tests covering all major functionality - Docker support for containerized deployment - Configuration management with environment variable support - Working parser that successfully extracts 32 articles from Warhammer Community 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
162 lines
6.1 KiB
Python
162 lines
6.1 KiB
Python
"""Tests for RSS generator module."""
|
|
|
|
import pytest
|
|
import os
|
|
import tempfile
|
|
from datetime import datetime
|
|
import pytz
|
|
from unittest.mock import patch, mock_open
|
|
|
|
from src.rss_scraper.rss_generator import generate_rss_feed, save_rss_feed, save_debug_html
|
|
from src.rss_scraper.exceptions import FileOperationError
|
|
|
|
|
|
class TestGenerateRssFeed:
|
|
"""Test RSS feed generation functionality."""
|
|
|
|
def test_generate_rss_feed_with_articles(self):
|
|
"""Test RSS generation with valid articles."""
|
|
timezone = pytz.UTC
|
|
articles = [
|
|
{
|
|
'title': 'Test Article 1',
|
|
'link': 'https://example.com/article1',
|
|
'date': datetime(2024, 1, 1, tzinfo=timezone)
|
|
},
|
|
{
|
|
'title': 'Test Article 2',
|
|
'link': 'https://example.com/article2',
|
|
'date': datetime(2024, 1, 2, tzinfo=timezone)
|
|
}
|
|
]
|
|
|
|
feed_url = "https://example.com"
|
|
rss_content = generate_rss_feed(articles, feed_url)
|
|
|
|
assert isinstance(rss_content, bytes)
|
|
rss_str = rss_content.decode('utf-8')
|
|
assert 'Test Article 1' in rss_str
|
|
assert 'Test Article 2' in rss_str
|
|
assert 'https://example.com/article1' in rss_str
|
|
assert 'https://example.com/article2' in rss_str
|
|
assert '<?xml version=' in rss_str
|
|
assert '<rss version=' in rss_str
|
|
|
|
def test_generate_rss_feed_empty_articles(self):
|
|
"""Test RSS generation with empty articles list."""
|
|
articles = []
|
|
feed_url = "https://example.com"
|
|
|
|
rss_content = generate_rss_feed(articles, feed_url)
|
|
|
|
assert isinstance(rss_content, bytes)
|
|
rss_str = rss_content.decode('utf-8')
|
|
assert '<?xml version=' in rss_str
|
|
assert '<rss version=' in rss_str
|
|
# Should still contain feed metadata
|
|
assert 'Warhammer Community RSS Feed' in rss_str
|
|
|
|
def test_generate_rss_feed_unicode_content(self):
|
|
"""Test RSS generation with unicode content."""
|
|
timezone = pytz.UTC
|
|
articles = [
|
|
{
|
|
'title': 'Tëst Artìclé with Ūnïcödë',
|
|
'link': 'https://example.com/unicode',
|
|
'date': datetime(2024, 1, 1, tzinfo=timezone)
|
|
}
|
|
]
|
|
|
|
feed_url = "https://example.com"
|
|
rss_content = generate_rss_feed(articles, feed_url)
|
|
|
|
assert isinstance(rss_content, bytes)
|
|
rss_str = rss_content.decode('utf-8')
|
|
assert 'Tëst Artìclé with Ūnïcödë' in rss_str
|
|
|
|
|
|
class TestSaveRssFeed:
|
|
"""Test RSS feed saving functionality."""
|
|
|
|
def test_save_rss_feed_success(self):
|
|
"""Test successful RSS feed saving."""
|
|
rss_content = b'<?xml version="1.0"?><rss>test</rss>'
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
result_path = save_rss_feed(rss_content, temp_dir)
|
|
|
|
assert os.path.exists(result_path)
|
|
assert result_path.endswith('warhammer_rss_feed.xml')
|
|
|
|
with open(result_path, 'rb') as f:
|
|
saved_content = f.read()
|
|
assert saved_content == rss_content
|
|
|
|
def test_save_rss_feed_permission_error(self):
|
|
"""Test RSS feed saving with permission error."""
|
|
rss_content = b'<?xml version="1.0"?><rss>test</rss>'
|
|
|
|
with patch('builtins.open', side_effect=PermissionError("Permission denied")):
|
|
with pytest.raises(FileOperationError):
|
|
save_rss_feed(rss_content, "/some/path")
|
|
|
|
def test_save_rss_feed_creates_directory(self):
|
|
"""Test that RSS feed saving creates directory if needed."""
|
|
rss_content = b'<?xml version="1.0"?><rss>test</rss>'
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
new_subdir = os.path.join(temp_dir, "new_subdir")
|
|
result_path = save_rss_feed(rss_content, new_subdir)
|
|
|
|
assert os.path.exists(new_subdir)
|
|
assert os.path.exists(result_path)
|
|
|
|
|
|
class TestSaveDebugHtml:
|
|
"""Test debug HTML saving functionality."""
|
|
|
|
def test_save_debug_html_success(self):
|
|
"""Test successful debug HTML saving."""
|
|
html_content = "<html><body>Test content</body></html>"
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
save_debug_html(html_content, temp_dir)
|
|
|
|
html_path = os.path.join(temp_dir, "page.html")
|
|
assert os.path.exists(html_path)
|
|
|
|
with open(html_path, 'r', encoding='utf-8') as f:
|
|
saved_content = f.read()
|
|
# BeautifulSoup prettifies the content
|
|
assert "Test content" in saved_content
|
|
|
|
def test_save_debug_html_permission_error(self):
|
|
"""Test debug HTML saving with permission error (should not raise)."""
|
|
html_content = "<html><body>Test content</body></html>"
|
|
|
|
with patch('builtins.open', side_effect=PermissionError("Permission denied")):
|
|
# Should not raise exception, just log warning
|
|
save_debug_html(html_content, "/some/path")
|
|
|
|
def test_save_debug_html_malformed_content(self):
|
|
"""Test debug HTML saving with malformed HTML content."""
|
|
malformed_html = "<html><body>Unclosed tags"
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
# Should handle malformed HTML gracefully
|
|
save_debug_html(malformed_html, temp_dir)
|
|
|
|
html_path = os.path.join(temp_dir, "page.html")
|
|
assert os.path.exists(html_path)
|
|
|
|
def test_save_debug_html_creates_directory(self):
|
|
"""Test that debug HTML saving creates directory if needed."""
|
|
html_content = "<html><body>Test content</body></html>"
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
new_subdir = os.path.join(temp_dir, "new_subdir")
|
|
save_debug_html(html_content, new_subdir)
|
|
|
|
assert os.path.exists(new_subdir)
|
|
html_path = os.path.join(new_subdir, "page.html")
|
|
assert os.path.exists(html_path) |