- Modular architecture with separate modules for scraping, parsing, security, validation, and caching - Comprehensive security measures including HTML sanitization, rate limiting, and input validation - Robust error handling with custom exceptions and retry logic - HTTP caching with ETags and Last-Modified headers for efficiency - Pre-compiled regex patterns for improved performance - Comprehensive test suite with 66 tests covering all major functionality - Docker support for containerized deployment - Configuration management with environment variable support - Working parser that successfully extracts 32 articles from Warhammer Community 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
41 lines
874 B
Python
41 lines
874 B
Python
"""Custom exceptions for the RSS scraper."""
|
|
|
|
|
|
class ScrapingError(Exception):
|
|
"""Base exception for scraping-related errors."""
|
|
pass
|
|
|
|
|
|
class ValidationError(ScrapingError):
|
|
"""Exception raised for validation errors."""
|
|
pass
|
|
|
|
|
|
class NetworkError(ScrapingError):
|
|
"""Exception raised for network-related errors."""
|
|
pass
|
|
|
|
|
|
class PageLoadError(NetworkError):
|
|
"""Exception raised when page fails to load properly."""
|
|
pass
|
|
|
|
|
|
class ContentSizeError(ScrapingError):
|
|
"""Exception raised when content exceeds size limits."""
|
|
pass
|
|
|
|
|
|
class ParseError(ScrapingError):
|
|
"""Exception raised when HTML parsing fails."""
|
|
pass
|
|
|
|
|
|
class ConfigurationError(ScrapingError):
|
|
"""Exception raised for configuration-related errors."""
|
|
pass
|
|
|
|
|
|
class FileOperationError(ScrapingError):
|
|
"""Exception raised for file operation errors."""
|
|
pass |