Implements complete domain verification flow with: - rel=me link verification service - HTML fetching with security controls - Rate limiting to prevent abuse - Email validation utilities - Authorization and verification API endpoints - User-facing templates for authorization and verification flows This completes Phase 2: Domain Verification as designed. Tests: - All Phase 2 unit tests passing - Coverage: 85% overall - Migration tests updated 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
176 lines
7.0 KiB
Python
176 lines
7.0 KiB
Python
"""Tests for HTML fetcher service."""
|
|
import pytest
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
from urllib.error import URLError, HTTPError
|
|
|
|
from gondulf.services.html_fetcher import HTMLFetcherService
|
|
|
|
|
|
class TestHTMLFetcherService:
|
|
"""Tests for HTMLFetcherService."""
|
|
|
|
def test_init_default_params(self):
|
|
"""Test initialization with default parameters."""
|
|
fetcher = HTMLFetcherService()
|
|
assert fetcher.timeout == 10
|
|
assert fetcher.max_size == 1024 * 1024
|
|
assert fetcher.max_redirects == 5
|
|
assert "Gondulf" in fetcher.user_agent
|
|
|
|
def test_init_custom_params(self):
|
|
"""Test initialization with custom parameters."""
|
|
fetcher = HTMLFetcherService(
|
|
timeout=5,
|
|
max_size=512 * 1024,
|
|
max_redirects=3,
|
|
user_agent="TestAgent/1.0"
|
|
)
|
|
assert fetcher.timeout == 5
|
|
assert fetcher.max_size == 512 * 1024
|
|
assert fetcher.max_redirects == 3
|
|
assert fetcher.user_agent == "TestAgent/1.0"
|
|
|
|
def test_fetch_requires_https(self):
|
|
"""Test that fetch requires HTTPS URLs."""
|
|
fetcher = HTMLFetcherService()
|
|
with pytest.raises(ValueError, match="must use HTTPS"):
|
|
fetcher.fetch("http://example.com/")
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_success(self, mock_urlopen):
|
|
"""Test successful HTML fetch."""
|
|
# Mock response
|
|
mock_response = MagicMock()
|
|
mock_response.read.return_value = b"<html><body>Test</body></html>"
|
|
mock_response.headers.get_content_charset.return_value = "utf-8"
|
|
mock_response.headers.get.return_value = None # No Content-Length header
|
|
mock_response.__enter__.return_value = mock_response
|
|
mock_response.__exit__.return_value = None
|
|
mock_urlopen.return_value = mock_response
|
|
|
|
fetcher = HTMLFetcherService()
|
|
html = fetcher.fetch("https://example.com/")
|
|
|
|
assert html == "<html><body>Test</body></html>"
|
|
mock_urlopen.assert_called_once()
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_respects_timeout(self, mock_urlopen):
|
|
"""Test that fetch respects timeout parameter."""
|
|
mock_response = MagicMock()
|
|
mock_response.read.return_value = b"<html></html>"
|
|
mock_response.headers.get_content_charset.return_value = "utf-8"
|
|
mock_response.headers.get.return_value = None
|
|
mock_response.__enter__.return_value = mock_response
|
|
mock_response.__exit__.return_value = None
|
|
mock_urlopen.return_value = mock_response
|
|
|
|
fetcher = HTMLFetcherService(timeout=15)
|
|
fetcher.fetch("https://example.com/")
|
|
|
|
call_kwargs = mock_urlopen.call_args[1]
|
|
assert call_kwargs['timeout'] == 15
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_content_length_too_large(self, mock_urlopen):
|
|
"""Test that fetch returns None if Content-Length exceeds max_size."""
|
|
mock_response = MagicMock()
|
|
mock_response.headers.get.return_value = str(2 * 1024 * 1024) # 2MB
|
|
mock_response.__enter__.return_value = mock_response
|
|
mock_response.__exit__.return_value = None
|
|
mock_urlopen.return_value = mock_response
|
|
|
|
fetcher = HTMLFetcherService(max_size=1024 * 1024) # 1MB max
|
|
html = fetcher.fetch("https://example.com/")
|
|
|
|
assert html is None
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_response_too_large(self, mock_urlopen):
|
|
"""Test that fetch returns None if response exceeds max_size."""
|
|
# Create response larger than max_size
|
|
large_content = b"x" * (1024 * 1024 + 1) # 1MB + 1 byte
|
|
mock_response = MagicMock()
|
|
mock_response.read.return_value = large_content
|
|
mock_response.headers.get_content_charset.return_value = "utf-8"
|
|
mock_response.headers.get.return_value = None
|
|
mock_response.__enter__.return_value = mock_response
|
|
mock_response.__exit__.return_value = None
|
|
mock_urlopen.return_value = mock_response
|
|
|
|
fetcher = HTMLFetcherService(max_size=1024 * 1024)
|
|
html = fetcher.fetch("https://example.com/")
|
|
|
|
assert html is None
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_url_error(self, mock_urlopen):
|
|
"""Test that fetch returns None on URLError."""
|
|
mock_urlopen.side_effect = URLError("Connection failed")
|
|
|
|
fetcher = HTMLFetcherService()
|
|
html = fetcher.fetch("https://example.com/")
|
|
|
|
assert html is None
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_http_error(self, mock_urlopen):
|
|
"""Test that fetch returns None on HTTPError."""
|
|
mock_urlopen.side_effect = HTTPError(
|
|
"https://example.com/",
|
|
404,
|
|
"Not Found",
|
|
{},
|
|
None
|
|
)
|
|
|
|
fetcher = HTMLFetcherService()
|
|
html = fetcher.fetch("https://example.com/")
|
|
|
|
assert html is None
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_timeout_error(self, mock_urlopen):
|
|
"""Test that fetch returns None on timeout."""
|
|
mock_urlopen.side_effect = TimeoutError("Request timed out")
|
|
|
|
fetcher = HTMLFetcherService()
|
|
html = fetcher.fetch("https://example.com/")
|
|
|
|
assert html is None
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_unicode_decode_error(self, mock_urlopen):
|
|
"""Test that fetch returns None on Unicode decode error."""
|
|
mock_response = MagicMock()
|
|
mock_response.read.return_value = b"\xff\xfe" # Invalid UTF-8
|
|
mock_response.headers.get_content_charset.return_value = "utf-8"
|
|
mock_response.headers.get.return_value = None
|
|
mock_response.__enter__.return_value = mock_response
|
|
mock_response.__exit__.return_value = None
|
|
mock_urlopen.return_value = mock_response
|
|
|
|
fetcher = HTMLFetcherService()
|
|
# Should use 'replace' error handling and return a string
|
|
html = fetcher.fetch("https://example.com/")
|
|
|
|
assert html is not None # Should not fail, uses error='replace'
|
|
|
|
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
|
def test_fetch_sets_user_agent(self, mock_urlopen):
|
|
"""Test that fetch sets User-Agent header."""
|
|
mock_response = MagicMock()
|
|
mock_response.read.return_value = b"<html></html>"
|
|
mock_response.headers.get_content_charset.return_value = "utf-8"
|
|
mock_response.headers.get.return_value = None
|
|
mock_response.__enter__.return_value = mock_response
|
|
mock_response.__exit__.return_value = None
|
|
mock_urlopen.return_value = mock_response
|
|
|
|
fetcher = HTMLFetcherService(user_agent="CustomAgent/2.0")
|
|
fetcher.fetch("https://example.com/")
|
|
|
|
# Check that User-Agent header was set
|
|
request = mock_urlopen.call_args[0][0]
|
|
assert request.get_header('User-agent') == "CustomAgent/2.0"
|