rss_warhammer-community/tests/test_main.py

"""Tests for main module functionality."""

import pytest
import sys
import tempfile
from unittest.mock import patch, MagicMock
from argparse import Namespace

from main import parse_arguments, setup_logging, scrape_and_generate_rss
from src.rss_scraper.exceptions import ValidationError, NetworkError, ParseError


class TestParseArguments:
    """Test command line argument parsing."""

    def test_parse_arguments_defaults(self):
        """Test parsing with default arguments."""
        with patch('sys.argv', ['main.py']):
            args = parse_arguments()

            assert args.url == 'https://www.warhammer-community.com/en-gb/'
            assert args.output_dir is None
            assert args.max_scroll == 5
            assert args.log_level == 'INFO'
            assert args.log_file == 'scraper.log'

    def test_parse_arguments_custom_values(self):
        """Test parsing with custom argument values."""
        test_args = [
            'main.py',
            '--url', 'https://example.com',
            '--output-dir', '/custom/path',
            '--max-scroll', '10',
            '--log-level', 'DEBUG',
            '--log-file', 'custom.log'
        ]

        with patch('sys.argv', test_args):
            args = parse_arguments()

            assert args.url == 'https://example.com'
            assert args.output_dir == '/custom/path'
            assert args.max_scroll == 10
            assert args.log_level == 'DEBUG'
            assert args.log_file == 'custom.log'

    def test_parse_arguments_invalid_max_scroll(self):
        """Test parsing fails with invalid max_scroll value."""
        test_args = ['main.py', '--max-scroll', '-1']

        with patch('sys.argv', test_args):
            with pytest.raises(SystemExit):
                parse_arguments()

    def test_parse_arguments_relative_output_dir(self):
        """Test that relative output directory is converted to absolute."""
        test_args = ['main.py', '--output-dir', 'relative/path']

        with patch('sys.argv', test_args):
            args = parse_arguments()

            assert args.output_dir.startswith('/')  # Should be absolute path
            assert args.output_dir.endswith('relative/path')


class TestSetupLogging:
    """Test logging setup functionality."""

    def test_setup_logging_info_level(self):
        """Test logging setup with INFO level."""
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            setup_logging('INFO', temp_file.name)

            import logging
            logger = logging.getLogger('test')
            logger.info("Test message")
            logger.debug("Debug message")  # Should not appear

            # Check that the log file was created and has correct level
            assert logging.getLogger().level == logging.INFO

    def test_setup_logging_debug_level(self):
        """Test logging setup with DEBUG level."""
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            setup_logging('DEBUG', temp_file.name)

            import logging
            assert logging.getLogger().level == logging.DEBUG

    def test_setup_logging_clears_existing_handlers(self):
        """Test that setup_logging clears existing handlers."""
        import logging

        # Add a dummy handler
        dummy_handler = logging.StreamHandler()
        logging.getLogger().addHandler(dummy_handler)
        initial_handler_count = len(logging.getLogger().handlers)

        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            setup_logging('INFO', temp_file.name)

            # Should have exactly 2 handlers (console + file)
            assert len(logging.getLogger().handlers) == 2


class TestScrapeAndGenerateRss:
    """Test main scraping function."""

    @patch('main.save_debug_html')
    @patch('main.save_rss_feed')
    @patch('main.generate_rss_feed')
    @patch('main.extract_articles_from_html')
    @patch('main.load_page_with_retry')
    @patch('main.validate_url')
    def test_scrape_and_generate_rss_success(
        self, mock_validate_url, mock_load_page, mock_extract_articles,
        mock_generate_rss, mock_save_rss, mock_save_html
    ):
        """Test successful RSS scraping and generation."""
        # Setup mocks
        mock_validate_url.return_value = True
        mock_load_page.return_value = "<html>test</html>"
        mock_extract_articles.return_value = [
            {'title': 'Test', 'link': 'http://example.com', 'date': 'date'}
        ]
        mock_generate_rss.return_value = b"<rss>feed</rss>"
        mock_save_rss.return_value = "/path/to/feed.xml"

        url = "https://www.warhammer-community.com/en-gb/"
        output_dir = "/test/output"

        # Should not raise any exception
        scrape_and_generate_rss(url, output_dir)

        # Verify all functions were called
        mock_validate_url.assert_called_once_with(url)
        mock_load_page.assert_called_once_with(url)
        mock_extract_articles.assert_called_once_with("<html>test</html>", url)
        mock_generate_rss.assert_called_once()
        mock_save_rss.assert_called_once()
        mock_save_html.assert_called_once()

    @patch('main.validate_url')
    def test_scrape_and_generate_rss_validation_error(self, mock_validate_url):
        """Test scraping fails with validation error."""
        mock_validate_url.side_effect = ValidationError("Invalid URL")

        with pytest.raises(ValidationError):
            scrape_and_generate_rss("invalid-url")

    @patch('main.load_page_with_retry')
    @patch('main.validate_url')
    def test_scrape_and_generate_rss_network_error(
        self, mock_validate_url, mock_load_page
    ):
        """Test scraping fails with network error."""
        mock_validate_url.return_value = True
        mock_load_page.side_effect = NetworkError("Network error")

        with pytest.raises(NetworkError):
            scrape_and_generate_rss("https://www.warhammer-community.com/en-gb/")

    @patch('main.extract_articles_from_html')
    @patch('main.load_page_with_retry')
    @patch('main.validate_url')
    def test_scrape_and_generate_rss_parse_error(
        self, mock_validate_url, mock_load_page, mock_extract_articles
    ):
        """Test scraping fails with parse error."""
        mock_validate_url.return_value = True
        mock_load_page.return_value = "<html>test</html>"
        mock_extract_articles.side_effect = ParseError("Parse error")

        with pytest.raises(ParseError):
            scrape_and_generate_rss("https://www.warhammer-community.com/en-gb/")

    @patch('main.save_debug_html')
    @patch('main.save_rss_feed')
    @patch('main.generate_rss_feed')
    @patch('main.extract_articles_from_html')
    @patch('main.load_page_with_retry')
    @patch('main.validate_url')
    def test_scrape_and_generate_rss_default_output_dir(
        self, mock_validate_url, mock_load_page, mock_extract_articles,
        mock_generate_rss, mock_save_rss, mock_save_html
    ):
        """Test scraping uses default output directory when none provided."""
        # Setup mocks
        mock_validate_url.return_value = True
        mock_load_page.return_value = "<html>test</html>"
        mock_extract_articles.return_value = []
        mock_generate_rss.return_value = b"<rss>feed</rss>"
        mock_save_rss.return_value = "/path/to/feed.xml"

        url = "https://www.warhammer-community.com/en-gb/"

        # Call without output_dir
        scrape_and_generate_rss(url)

        # Verify functions were called (output_dir would be set to default)
        mock_validate_url.assert_called_once_with(url)
        mock_save_rss.assert_called_once_with(b"<rss>feed</rss>", ".")  # Default output dir