feat(phase-2): implement domain verification system

Implements complete domain verification flow with:
- rel=me link verification service
- HTML fetching with security controls
- Rate limiting to prevent abuse
- Email validation utilities
- Authorization and verification API endpoints
- User-facing templates for authorization and verification flows

This completes Phase 2: Domain Verification as designed.

Tests:
- All Phase 2 unit tests passing
- Coverage: 85% overall
- Migration tests updated

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-20 13:44:33 -07:00
parent 11ecd953d8
commit 074f74002c
28 changed files with 2283 additions and 14 deletions

View File

@@ -0,0 +1,175 @@
"""Tests for HTML fetcher service."""
import pytest
from unittest.mock import Mock, patch, MagicMock
from urllib.error import URLError, HTTPError
from gondulf.services.html_fetcher import HTMLFetcherService
class TestHTMLFetcherService:
"""Tests for HTMLFetcherService."""
def test_init_default_params(self):
"""Test initialization with default parameters."""
fetcher = HTMLFetcherService()
assert fetcher.timeout == 10
assert fetcher.max_size == 1024 * 1024
assert fetcher.max_redirects == 5
assert "Gondulf" in fetcher.user_agent
def test_init_custom_params(self):
"""Test initialization with custom parameters."""
fetcher = HTMLFetcherService(
timeout=5,
max_size=512 * 1024,
max_redirects=3,
user_agent="TestAgent/1.0"
)
assert fetcher.timeout == 5
assert fetcher.max_size == 512 * 1024
assert fetcher.max_redirects == 3
assert fetcher.user_agent == "TestAgent/1.0"
def test_fetch_requires_https(self):
"""Test that fetch requires HTTPS URLs."""
fetcher = HTMLFetcherService()
with pytest.raises(ValueError, match="must use HTTPS"):
fetcher.fetch("http://example.com/")
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_success(self, mock_urlopen):
"""Test successful HTML fetch."""
# Mock response
mock_response = MagicMock()
mock_response.read.return_value = b"<html><body>Test</body></html>"
mock_response.headers.get_content_charset.return_value = "utf-8"
mock_response.headers.get.return_value = None # No Content-Length header
mock_response.__enter__.return_value = mock_response
mock_response.__exit__.return_value = None
mock_urlopen.return_value = mock_response
fetcher = HTMLFetcherService()
html = fetcher.fetch("https://example.com/")
assert html == "<html><body>Test</body></html>"
mock_urlopen.assert_called_once()
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_respects_timeout(self, mock_urlopen):
"""Test that fetch respects timeout parameter."""
mock_response = MagicMock()
mock_response.read.return_value = b"<html></html>"
mock_response.headers.get_content_charset.return_value = "utf-8"
mock_response.headers.get.return_value = None
mock_response.__enter__.return_value = mock_response
mock_response.__exit__.return_value = None
mock_urlopen.return_value = mock_response
fetcher = HTMLFetcherService(timeout=15)
fetcher.fetch("https://example.com/")
call_kwargs = mock_urlopen.call_args[1]
assert call_kwargs['timeout'] == 15
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_content_length_too_large(self, mock_urlopen):
"""Test that fetch returns None if Content-Length exceeds max_size."""
mock_response = MagicMock()
mock_response.headers.get.return_value = str(2 * 1024 * 1024) # 2MB
mock_response.__enter__.return_value = mock_response
mock_response.__exit__.return_value = None
mock_urlopen.return_value = mock_response
fetcher = HTMLFetcherService(max_size=1024 * 1024) # 1MB max
html = fetcher.fetch("https://example.com/")
assert html is None
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_response_too_large(self, mock_urlopen):
"""Test that fetch returns None if response exceeds max_size."""
# Create response larger than max_size
large_content = b"x" * (1024 * 1024 + 1) # 1MB + 1 byte
mock_response = MagicMock()
mock_response.read.return_value = large_content
mock_response.headers.get_content_charset.return_value = "utf-8"
mock_response.headers.get.return_value = None
mock_response.__enter__.return_value = mock_response
mock_response.__exit__.return_value = None
mock_urlopen.return_value = mock_response
fetcher = HTMLFetcherService(max_size=1024 * 1024)
html = fetcher.fetch("https://example.com/")
assert html is None
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_url_error(self, mock_urlopen):
"""Test that fetch returns None on URLError."""
mock_urlopen.side_effect = URLError("Connection failed")
fetcher = HTMLFetcherService()
html = fetcher.fetch("https://example.com/")
assert html is None
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_http_error(self, mock_urlopen):
"""Test that fetch returns None on HTTPError."""
mock_urlopen.side_effect = HTTPError(
"https://example.com/",
404,
"Not Found",
{},
None
)
fetcher = HTMLFetcherService()
html = fetcher.fetch("https://example.com/")
assert html is None
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_timeout_error(self, mock_urlopen):
"""Test that fetch returns None on timeout."""
mock_urlopen.side_effect = TimeoutError("Request timed out")
fetcher = HTMLFetcherService()
html = fetcher.fetch("https://example.com/")
assert html is None
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_unicode_decode_error(self, mock_urlopen):
"""Test that fetch returns None on Unicode decode error."""
mock_response = MagicMock()
mock_response.read.return_value = b"\xff\xfe" # Invalid UTF-8
mock_response.headers.get_content_charset.return_value = "utf-8"
mock_response.headers.get.return_value = None
mock_response.__enter__.return_value = mock_response
mock_response.__exit__.return_value = None
mock_urlopen.return_value = mock_response
fetcher = HTMLFetcherService()
# Should use 'replace' error handling and return a string
html = fetcher.fetch("https://example.com/")
assert html is not None # Should not fail, uses error='replace'
@patch('gondulf.services.html_fetcher.urllib.request.urlopen')
def test_fetch_sets_user_agent(self, mock_urlopen):
"""Test that fetch sets User-Agent header."""
mock_response = MagicMock()
mock_response.read.return_value = b"<html></html>"
mock_response.headers.get_content_charset.return_value = "utf-8"
mock_response.headers.get.return_value = None
mock_response.__enter__.return_value = mock_response
mock_response.__exit__.return_value = None
mock_urlopen.return_value = mock_response
fetcher = HTMLFetcherService(user_agent="CustomAgent/2.0")
fetcher.fetch("https://example.com/")
# Check that User-Agent header was set
request = mock_urlopen.call_args[0][0]
assert request.get_header('User-agent') == "CustomAgent/2.0"