"""Tests for main module functionality.""" import pytest import sys import tempfile from unittest.mock import patch, MagicMock from argparse import Namespace from main import parse_arguments, setup_logging, scrape_and_generate_rss from src.rss_scraper.exceptions import ValidationError, NetworkError, ParseError class TestParseArguments: """Test command line argument parsing.""" def test_parse_arguments_defaults(self): """Test parsing with default arguments.""" with patch('sys.argv', ['main.py']): args = parse_arguments() assert args.url == 'https://www.warhammer-community.com/en-gb/' assert args.output_dir is None assert args.max_scroll == 5 assert args.log_level == 'INFO' assert args.log_file == 'scraper.log' def test_parse_arguments_custom_values(self): """Test parsing with custom argument values.""" test_args = [ 'main.py', '--url', 'https://example.com', '--output-dir', '/custom/path', '--max-scroll', '10', '--log-level', 'DEBUG', '--log-file', 'custom.log' ] with patch('sys.argv', test_args): args = parse_arguments() assert args.url == 'https://example.com' assert args.output_dir == '/custom/path' assert args.max_scroll == 10 assert args.log_level == 'DEBUG' assert args.log_file == 'custom.log' def test_parse_arguments_invalid_max_scroll(self): """Test parsing fails with invalid max_scroll value.""" test_args = ['main.py', '--max-scroll', '-1'] with patch('sys.argv', test_args): with pytest.raises(SystemExit): parse_arguments() def test_parse_arguments_relative_output_dir(self): """Test that relative output directory is converted to absolute.""" test_args = ['main.py', '--output-dir', 'relative/path'] with patch('sys.argv', test_args): args = parse_arguments() assert args.output_dir.startswith('/') # Should be absolute path assert args.output_dir.endswith('relative/path') class TestSetupLogging: """Test logging setup functionality.""" def test_setup_logging_info_level(self): """Test logging setup with INFO level.""" with tempfile.NamedTemporaryFile(delete=False) as temp_file: setup_logging('INFO', temp_file.name) import logging logger = logging.getLogger('test') logger.info("Test message") logger.debug("Debug message") # Should not appear # Check that the log file was created and has correct level assert logging.getLogger().level == logging.INFO def test_setup_logging_debug_level(self): """Test logging setup with DEBUG level.""" with tempfile.NamedTemporaryFile(delete=False) as temp_file: setup_logging('DEBUG', temp_file.name) import logging assert logging.getLogger().level == logging.DEBUG def test_setup_logging_clears_existing_handlers(self): """Test that setup_logging clears existing handlers.""" import logging # Add a dummy handler dummy_handler = logging.StreamHandler() logging.getLogger().addHandler(dummy_handler) initial_handler_count = len(logging.getLogger().handlers) with tempfile.NamedTemporaryFile(delete=False) as temp_file: setup_logging('INFO', temp_file.name) # Should have exactly 2 handlers (console + file) assert len(logging.getLogger().handlers) == 2 class TestScrapeAndGenerateRss: """Test main scraping function.""" @patch('main.save_debug_html') @patch('main.save_rss_feed') @patch('main.generate_rss_feed') @patch('main.extract_articles_from_html') @patch('main.load_page_with_retry') @patch('main.validate_url') def test_scrape_and_generate_rss_success( self, mock_validate_url, mock_load_page, mock_extract_articles, mock_generate_rss, mock_save_rss, mock_save_html ): """Test successful RSS scraping and generation.""" # Setup mocks mock_validate_url.return_value = True mock_load_page.return_value = "test" mock_extract_articles.return_value = [ {'title': 'Test', 'link': 'http://example.com', 'date': 'date'} ] mock_generate_rss.return_value = b"feed" mock_save_rss.return_value = "/path/to/feed.xml" url = "https://www.warhammer-community.com/en-gb/" output_dir = "/test/output" # Should not raise any exception scrape_and_generate_rss(url, output_dir) # Verify all functions were called mock_validate_url.assert_called_once_with(url) mock_load_page.assert_called_once_with(url) mock_extract_articles.assert_called_once_with("test", url) mock_generate_rss.assert_called_once() mock_save_rss.assert_called_once() mock_save_html.assert_called_once() @patch('main.validate_url') def test_scrape_and_generate_rss_validation_error(self, mock_validate_url): """Test scraping fails with validation error.""" mock_validate_url.side_effect = ValidationError("Invalid URL") with pytest.raises(ValidationError): scrape_and_generate_rss("invalid-url") @patch('main.load_page_with_retry') @patch('main.validate_url') def test_scrape_and_generate_rss_network_error( self, mock_validate_url, mock_load_page ): """Test scraping fails with network error.""" mock_validate_url.return_value = True mock_load_page.side_effect = NetworkError("Network error") with pytest.raises(NetworkError): scrape_and_generate_rss("https://www.warhammer-community.com/en-gb/") @patch('main.extract_articles_from_html') @patch('main.load_page_with_retry') @patch('main.validate_url') def test_scrape_and_generate_rss_parse_error( self, mock_validate_url, mock_load_page, mock_extract_articles ): """Test scraping fails with parse error.""" mock_validate_url.return_value = True mock_load_page.return_value = "test" mock_extract_articles.side_effect = ParseError("Parse error") with pytest.raises(ParseError): scrape_and_generate_rss("https://www.warhammer-community.com/en-gb/") @patch('main.save_debug_html') @patch('main.save_rss_feed') @patch('main.generate_rss_feed') @patch('main.extract_articles_from_html') @patch('main.load_page_with_retry') @patch('main.validate_url') def test_scrape_and_generate_rss_default_output_dir( self, mock_validate_url, mock_load_page, mock_extract_articles, mock_generate_rss, mock_save_rss, mock_save_html ): """Test scraping uses default output directory when none provided.""" # Setup mocks mock_validate_url.return_value = True mock_load_page.return_value = "test" mock_extract_articles.return_value = [] mock_generate_rss.return_value = b"feed" mock_save_rss.return_value = "/path/to/feed.xml" url = "https://www.warhammer-community.com/en-gb/" # Call without output_dir scrape_and_generate_rss(url) # Verify functions were called (output_dir would be set to default) mock_validate_url.assert_called_once_with(url) mock_save_rss.assert_called_once_with(b"feed", ".") # Default output dir