"""Tests for validation module.""" import pytest import os import tempfile from unittest.mock import patch from src.rss_scraper.validation import validate_url, validate_output_path, validate_link from src.rss_scraper.exceptions import ValidationError, FileOperationError from src.rss_scraper.config import Config class TestValidateUrl: """Test URL validation functionality.""" def test_valid_url(self): """Test validation of valid URLs.""" valid_urls = [ "https://www.warhammer-community.com/en-gb/", "https://warhammer-community.com/some/path", ] for url in valid_urls: assert validate_url(url) is True def test_invalid_url_format(self): """Test validation fails for invalid URL formats.""" invalid_urls = [ "not-a-url", "ftp://example.com", "", "http://", "https://", ] for url in invalid_urls: with pytest.raises(ValidationError): validate_url(url) def test_disallowed_domain(self): """Test validation fails for disallowed domains.""" disallowed_urls = [ "https://malicious-site.com", "https://example.com", "https://google.com", ] for url in disallowed_urls: with pytest.raises(ValidationError): validate_url(url) def test_case_insensitive_domain(self): """Test domain validation is case insensitive.""" urls = [ "https://WWW.WARHAMMER-COMMUNITY.COM", "https://Warhammer-Community.com", ] for url in urls: assert validate_url(url) is True class TestValidateOutputPath: """Test output path validation functionality.""" def test_valid_path_within_base(self): """Test validation of valid paths within base directory.""" with tempfile.TemporaryDirectory() as temp_dir: test_path = os.path.join(temp_dir, "output.xml") result = validate_output_path(test_path, temp_dir) assert result == os.path.abspath(test_path) def test_path_outside_base_directory(self): """Test validation fails for paths outside base directory.""" with tempfile.TemporaryDirectory() as temp_dir: outside_path = "/tmp/malicious.xml" with pytest.raises(ValidationError): validate_output_path(outside_path, temp_dir) def test_absolute_path_within_base_directory(self): """Test that absolute paths within base directory are allowed.""" with tempfile.TemporaryDirectory() as temp_dir: # This should work - absolute path within the base directory abs_path = os.path.join(temp_dir, "output.xml") result = validate_output_path(abs_path, temp_dir) assert result == os.path.abspath(abs_path) def test_creates_directory_if_not_exists(self): """Test that validation creates directory if it doesn't exist.""" with tempfile.TemporaryDirectory() as temp_dir: new_subdir = os.path.join(temp_dir, "new_subdir") test_path = os.path.join(new_subdir, "output.xml") result = validate_output_path(test_path, new_subdir) assert os.path.exists(new_subdir) assert result == os.path.abspath(test_path) def test_directory_traversal_protection(self): """Test that directory traversal attacks are blocked.""" with tempfile.TemporaryDirectory() as temp_dir: # These should be blocked - either by directory traversal check or outside-base check traversal_paths = [ "../../../etc/passwd", "subdir/../../../etc/passwd", "normal/../../../dangerous.xml" ] for path in traversal_paths: with pytest.raises(ValidationError): # Either error type is acceptable validate_output_path(path, temp_dir) def test_permission_error(self): """Test handling of permission errors.""" with patch('os.makedirs', side_effect=PermissionError("Permission denied")): with pytest.raises(FileOperationError): validate_output_path("/some/path/file.xml", "/some/path") class TestValidateLink: """Test link validation functionality.""" def test_valid_absolute_link(self): """Test validation of valid absolute links.""" base_url = "https://www.warhammer-community.com" valid_link = "https://www.warhammer-community.com/article" result = validate_link(valid_link, base_url) assert result == valid_link def test_valid_relative_link(self): """Test validation of valid relative links.""" base_url = "https://www.warhammer-community.com/en-gb/" relative_link = "/article/some-article" result = validate_link(relative_link, base_url) assert result == "https://www.warhammer-community.com/article/some-article" def test_none_link(self): """Test handling of None link.""" base_url = "https://www.warhammer-community.com" result = validate_link(None, base_url) assert result is None def test_empty_link(self): """Test handling of empty link.""" base_url = "https://www.warhammer-community.com" result = validate_link("", base_url) assert result is None def test_invalid_domain_link(self): """Test rejection of links from invalid domains.""" base_url = "https://www.warhammer-community.com" invalid_link = "https://malicious-site.com/article" result = validate_link(invalid_link, base_url) assert result is None def test_malformed_link(self): """Test handling of malformed links.""" base_url = "https://www.warhammer-community.com" malformed_links = [ "not-a-url", "://missing-scheme", "https://", ] for link in malformed_links: result = validate_link(link, base_url) assert result is None