feat(phase-2): implement domain verification system
Implements complete domain verification flow with: - rel=me link verification service - HTML fetching with security controls - Rate limiting to prevent abuse - Email validation utilities - Authorization and verification API endpoints - User-facing templates for authorization and verification flows This completes Phase 2: Domain Verification as designed. Tests: - All Phase 2 unit tests passing - Coverage: 85% overall - Migration tests updated 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
175
tests/unit/test_html_fetcher.py
Normal file
175
tests/unit/test_html_fetcher.py
Normal file
@@ -0,0 +1,175 @@
|
||||
"""Tests for HTML fetcher service."""
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
from urllib.error import URLError, HTTPError
|
||||
|
||||
from gondulf.services.html_fetcher import HTMLFetcherService
|
||||
|
||||
|
||||
class TestHTMLFetcherService:
|
||||
"""Tests for HTMLFetcherService."""
|
||||
|
||||
def test_init_default_params(self):
|
||||
"""Test initialization with default parameters."""
|
||||
fetcher = HTMLFetcherService()
|
||||
assert fetcher.timeout == 10
|
||||
assert fetcher.max_size == 1024 * 1024
|
||||
assert fetcher.max_redirects == 5
|
||||
assert "Gondulf" in fetcher.user_agent
|
||||
|
||||
def test_init_custom_params(self):
|
||||
"""Test initialization with custom parameters."""
|
||||
fetcher = HTMLFetcherService(
|
||||
timeout=5,
|
||||
max_size=512 * 1024,
|
||||
max_redirects=3,
|
||||
user_agent="TestAgent/1.0"
|
||||
)
|
||||
assert fetcher.timeout == 5
|
||||
assert fetcher.max_size == 512 * 1024
|
||||
assert fetcher.max_redirects == 3
|
||||
assert fetcher.user_agent == "TestAgent/1.0"
|
||||
|
||||
def test_fetch_requires_https(self):
|
||||
"""Test that fetch requires HTTPS URLs."""
|
||||
fetcher = HTMLFetcherService()
|
||||
with pytest.raises(ValueError, match="must use HTTPS"):
|
||||
fetcher.fetch("http://example.com/")
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_success(self, mock_urlopen):
|
||||
"""Test successful HTML fetch."""
|
||||
# Mock response
|
||||
mock_response = MagicMock()
|
||||
mock_response.read.return_value = b"<html><body>Test</body></html>"
|
||||
mock_response.headers.get_content_charset.return_value = "utf-8"
|
||||
mock_response.headers.get.return_value = None # No Content-Length header
|
||||
mock_response.__enter__.return_value = mock_response
|
||||
mock_response.__exit__.return_value = None
|
||||
mock_urlopen.return_value = mock_response
|
||||
|
||||
fetcher = HTMLFetcherService()
|
||||
html = fetcher.fetch("https://example.com/")
|
||||
|
||||
assert html == "<html><body>Test</body></html>"
|
||||
mock_urlopen.assert_called_once()
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_respects_timeout(self, mock_urlopen):
|
||||
"""Test that fetch respects timeout parameter."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.read.return_value = b"<html></html>"
|
||||
mock_response.headers.get_content_charset.return_value = "utf-8"
|
||||
mock_response.headers.get.return_value = None
|
||||
mock_response.__enter__.return_value = mock_response
|
||||
mock_response.__exit__.return_value = None
|
||||
mock_urlopen.return_value = mock_response
|
||||
|
||||
fetcher = HTMLFetcherService(timeout=15)
|
||||
fetcher.fetch("https://example.com/")
|
||||
|
||||
call_kwargs = mock_urlopen.call_args[1]
|
||||
assert call_kwargs['timeout'] == 15
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_content_length_too_large(self, mock_urlopen):
|
||||
"""Test that fetch returns None if Content-Length exceeds max_size."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.headers.get.return_value = str(2 * 1024 * 1024) # 2MB
|
||||
mock_response.__enter__.return_value = mock_response
|
||||
mock_response.__exit__.return_value = None
|
||||
mock_urlopen.return_value = mock_response
|
||||
|
||||
fetcher = HTMLFetcherService(max_size=1024 * 1024) # 1MB max
|
||||
html = fetcher.fetch("https://example.com/")
|
||||
|
||||
assert html is None
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_response_too_large(self, mock_urlopen):
|
||||
"""Test that fetch returns None if response exceeds max_size."""
|
||||
# Create response larger than max_size
|
||||
large_content = b"x" * (1024 * 1024 + 1) # 1MB + 1 byte
|
||||
mock_response = MagicMock()
|
||||
mock_response.read.return_value = large_content
|
||||
mock_response.headers.get_content_charset.return_value = "utf-8"
|
||||
mock_response.headers.get.return_value = None
|
||||
mock_response.__enter__.return_value = mock_response
|
||||
mock_response.__exit__.return_value = None
|
||||
mock_urlopen.return_value = mock_response
|
||||
|
||||
fetcher = HTMLFetcherService(max_size=1024 * 1024)
|
||||
html = fetcher.fetch("https://example.com/")
|
||||
|
||||
assert html is None
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_url_error(self, mock_urlopen):
|
||||
"""Test that fetch returns None on URLError."""
|
||||
mock_urlopen.side_effect = URLError("Connection failed")
|
||||
|
||||
fetcher = HTMLFetcherService()
|
||||
html = fetcher.fetch("https://example.com/")
|
||||
|
||||
assert html is None
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_http_error(self, mock_urlopen):
|
||||
"""Test that fetch returns None on HTTPError."""
|
||||
mock_urlopen.side_effect = HTTPError(
|
||||
"https://example.com/",
|
||||
404,
|
||||
"Not Found",
|
||||
{},
|
||||
None
|
||||
)
|
||||
|
||||
fetcher = HTMLFetcherService()
|
||||
html = fetcher.fetch("https://example.com/")
|
||||
|
||||
assert html is None
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_timeout_error(self, mock_urlopen):
|
||||
"""Test that fetch returns None on timeout."""
|
||||
mock_urlopen.side_effect = TimeoutError("Request timed out")
|
||||
|
||||
fetcher = HTMLFetcherService()
|
||||
html = fetcher.fetch("https://example.com/")
|
||||
|
||||
assert html is None
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_unicode_decode_error(self, mock_urlopen):
|
||||
"""Test that fetch returns None on Unicode decode error."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.read.return_value = b"\xff\xfe" # Invalid UTF-8
|
||||
mock_response.headers.get_content_charset.return_value = "utf-8"
|
||||
mock_response.headers.get.return_value = None
|
||||
mock_response.__enter__.return_value = mock_response
|
||||
mock_response.__exit__.return_value = None
|
||||
mock_urlopen.return_value = mock_response
|
||||
|
||||
fetcher = HTMLFetcherService()
|
||||
# Should use 'replace' error handling and return a string
|
||||
html = fetcher.fetch("https://example.com/")
|
||||
|
||||
assert html is not None # Should not fail, uses error='replace'
|
||||
|
||||
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
|
||||
def test_fetch_sets_user_agent(self, mock_urlopen):
|
||||
"""Test that fetch sets User-Agent header."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.read.return_value = b"<html></html>"
|
||||
mock_response.headers.get_content_charset.return_value = "utf-8"
|
||||
mock_response.headers.get.return_value = None
|
||||
mock_response.__enter__.return_value = mock_response
|
||||
mock_response.__exit__.return_value = None
|
||||
mock_urlopen.return_value = mock_response
|
||||
|
||||
fetcher = HTMLFetcherService(user_agent="CustomAgent/2.0")
|
||||
fetcher.fetch("https://example.com/")
|
||||
|
||||
# Check that User-Agent header was set
|
||||
request = mock_urlopen.call_args[0][0]
|
||||
assert request.get_header('User-agent') == "CustomAgent/2.0"
|
||||
Reference in New Issue
Block a user