- Modular architecture with separate modules for scraping, parsing, security, validation, and caching - Comprehensive security measures including HTML sanitization, rate limiting, and input validation - Robust error handling with custom exceptions and retry logic - HTTP caching with ETags and Last-Modified headers for efficiency - Pre-compiled regex patterns for improved performance - Comprehensive test suite with 66 tests covering all major functionality - Docker support for containerized deployment - Configuration management with environment variable support - Working parser that successfully extracts 32 articles from Warhammer Community 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
116 lines
5.2 KiB
Python
116 lines
5.2 KiB
Python
"""Tests for configuration module."""
|
|
|
|
import pytest
|
|
import os
|
|
from unittest.mock import patch
|
|
|
|
from src.rss_scraper.config import Config
|
|
|
|
|
|
class TestConfig:
|
|
"""Test configuration functionality."""
|
|
|
|
def test_default_values(self):
|
|
"""Test that default configuration values are set correctly."""
|
|
assert Config.MAX_SCROLL_ITERATIONS == 5
|
|
assert Config.MAX_CONTENT_SIZE == 10 * 1024 * 1024
|
|
assert Config.MAX_TITLE_LENGTH == 500
|
|
assert Config.SCROLL_DELAY_SECONDS == 2.0
|
|
assert Config.PAGE_TIMEOUT_MS == 120000
|
|
assert Config.DEFAULT_URL == 'https://www.warhammer-community.com/en-gb/'
|
|
assert Config.DEFAULT_OUTPUT_DIR == '.'
|
|
assert Config.RSS_FILENAME == 'warhammer_rss_feed.xml'
|
|
assert Config.DEBUG_HTML_FILENAME == 'page.html'
|
|
assert Config.FEED_TITLE == 'Warhammer Community RSS Feed'
|
|
assert Config.FEED_DESCRIPTION == 'Latest Warhammer Community Articles'
|
|
|
|
def test_environment_variable_override(self):
|
|
"""Test that environment variables override default values."""
|
|
with patch.dict(os.environ, {
|
|
'MAX_SCROLL_ITERATIONS': '10',
|
|
'MAX_CONTENT_SIZE': '20971520', # 20MB
|
|
'SCROLL_DELAY_SECONDS': '1.5',
|
|
'DEFAULT_URL': 'https://example.com',
|
|
'RSS_FILENAME': 'custom_feed.xml'
|
|
}):
|
|
# Need to reload the config to pick up environment changes
|
|
import importlib
|
|
import config
|
|
importlib.reload(config)
|
|
|
|
assert config.Config.MAX_SCROLL_ITERATIONS == 10
|
|
assert config.Config.MAX_CONTENT_SIZE == 20971520
|
|
assert config.Config.SCROLL_DELAY_SECONDS == 1.5
|
|
assert config.Config.DEFAULT_URL == 'https://example.com'
|
|
assert config.Config.RSS_FILENAME == 'custom_feed.xml'
|
|
|
|
def test_get_output_dir_with_override(self):
|
|
"""Test get_output_dir method with override."""
|
|
result = Config.get_output_dir('/custom/path')
|
|
assert result == '/custom/path'
|
|
|
|
def test_get_output_dir_without_override(self):
|
|
"""Test get_output_dir method without override."""
|
|
result = Config.get_output_dir()
|
|
assert result == Config.DEFAULT_OUTPUT_DIR
|
|
|
|
def test_get_allowed_domains_default(self):
|
|
"""Test get_allowed_domains returns default domains."""
|
|
domains = Config.get_allowed_domains()
|
|
assert 'warhammer-community.com' in domains
|
|
assert 'www.warhammer-community.com' in domains
|
|
|
|
def test_get_allowed_domains_from_env(self):
|
|
"""Test get_allowed_domains reads from environment variable."""
|
|
with patch.dict(os.environ, {
|
|
'ALLOWED_DOMAINS': 'example.com,test.com,another.com'
|
|
}):
|
|
domains = Config.get_allowed_domains()
|
|
assert domains == ['example.com', 'test.com', 'another.com']
|
|
|
|
def test_validate_config_success(self):
|
|
"""Test that valid configuration passes validation."""
|
|
# Should not raise any exception
|
|
Config.validate_config()
|
|
|
|
def test_validate_config_negative_scroll_iterations(self):
|
|
"""Test validation fails for negative scroll iterations."""
|
|
with patch.object(Config, 'MAX_SCROLL_ITERATIONS', -1):
|
|
with pytest.raises(ValueError, match="MAX_SCROLL_ITERATIONS must be non-negative"):
|
|
Config.validate_config()
|
|
|
|
def test_validate_config_zero_content_size(self):
|
|
"""Test validation fails for zero content size."""
|
|
with patch.object(Config, 'MAX_CONTENT_SIZE', 0):
|
|
with pytest.raises(ValueError, match="MAX_CONTENT_SIZE must be positive"):
|
|
Config.validate_config()
|
|
|
|
def test_validate_config_zero_title_length(self):
|
|
"""Test validation fails for zero title length."""
|
|
with patch.object(Config, 'MAX_TITLE_LENGTH', 0):
|
|
with pytest.raises(ValueError, match="MAX_TITLE_LENGTH must be positive"):
|
|
Config.validate_config()
|
|
|
|
def test_validate_config_negative_scroll_delay(self):
|
|
"""Test validation fails for negative scroll delay."""
|
|
with patch.object(Config, 'SCROLL_DELAY_SECONDS', -1.0):
|
|
with pytest.raises(ValueError, match="SCROLL_DELAY_SECONDS must be non-negative"):
|
|
Config.validate_config()
|
|
|
|
def test_validate_config_zero_timeout(self):
|
|
"""Test validation fails for zero timeout."""
|
|
with patch.object(Config, 'PAGE_TIMEOUT_MS', 0):
|
|
with pytest.raises(ValueError, match="PAGE_TIMEOUT_MS must be positive"):
|
|
Config.validate_config()
|
|
|
|
def test_validate_config_invalid_url(self):
|
|
"""Test validation fails for invalid default URL."""
|
|
with patch.object(Config, 'DEFAULT_URL', 'not-a-url'):
|
|
with pytest.raises(ValueError, match="DEFAULT_URL must be a valid HTTP/HTTPS URL"):
|
|
Config.validate_config()
|
|
|
|
def test_validate_config_empty_domains(self):
|
|
"""Test validation fails for empty allowed domains."""
|
|
with patch.object(Config, 'get_allowed_domains', return_value=[]):
|
|
with pytest.raises(ValueError, match="ALLOWED_DOMAINS cannot be empty"):
|
|
Config.validate_config() |