feat(phase-2): implement domain verification system
Implements complete domain verification flow with: - rel=me link verification service - HTML fetching with security controls - Rate limiting to prevent abuse - Email validation utilities - Authorization and verification API endpoints - User-facing templates for authorization and verification flows This completes Phase 2: Domain Verification as designed. Tests: - All Phase 2 unit tests passing - Coverage: 85% overall - Migration tests updated 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
77
src/gondulf/services/html_fetcher.py
Normal file
77
src/gondulf/services/html_fetcher.py
Normal file
@@ -0,0 +1,77 @@
|
||||
"""HTML fetcher service for retrieving user homepages."""
|
||||
import urllib.request
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
|
||||
class HTMLFetcherService:
|
||||
"""Service for fetching HTML content from URLs."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
timeout: int = 10,
|
||||
max_size: int = 1024 * 1024, # 1MB
|
||||
max_redirects: int = 5,
|
||||
user_agent: str = "Gondulf-IndieAuth/0.1"
|
||||
) -> None:
|
||||
"""
|
||||
Initialize HTML fetcher service.
|
||||
|
||||
Args:
|
||||
timeout: Request timeout in seconds (default: 10)
|
||||
max_size: Maximum response size in bytes (default: 1MB)
|
||||
max_redirects: Maximum number of redirects to follow (default: 5)
|
||||
user_agent: User-Agent header value
|
||||
"""
|
||||
self.timeout = timeout
|
||||
self.max_size = max_size
|
||||
self.max_redirects = max_redirects
|
||||
self.user_agent = user_agent
|
||||
|
||||
def fetch(self, url: str) -> str | None:
|
||||
"""
|
||||
Fetch HTML content from URL.
|
||||
|
||||
Args:
|
||||
url: URL to fetch (must be HTTPS)
|
||||
|
||||
Returns:
|
||||
HTML content as string, or None if fetch fails
|
||||
|
||||
Raises:
|
||||
ValueError: If URL is not HTTPS
|
||||
"""
|
||||
# Enforce HTTPS
|
||||
if not url.startswith('https://'):
|
||||
raise ValueError("URL must use HTTPS")
|
||||
|
||||
try:
|
||||
# Create request with User-Agent header
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={'User-Agent': self.user_agent}
|
||||
)
|
||||
|
||||
# Open URL with timeout
|
||||
with urllib.request.urlopen(
|
||||
req,
|
||||
timeout=self.timeout
|
||||
) as response:
|
||||
# Check content length if provided
|
||||
content_length = response.headers.get('Content-Length')
|
||||
if content_length and int(content_length) > self.max_size:
|
||||
return None
|
||||
|
||||
# Read with size limit
|
||||
content = response.read(self.max_size + 1)
|
||||
if len(content) > self.max_size:
|
||||
return None
|
||||
|
||||
# Decode content
|
||||
charset = response.headers.get_content_charset() or 'utf-8'
|
||||
return content.decode(charset, errors='replace')
|
||||
|
||||
except (URLError, HTTPError, UnicodeDecodeError, TimeoutError):
|
||||
return None
|
||||
except Exception:
|
||||
# Catch all other exceptions and return None
|
||||
return None
|
||||
Reference in New Issue
Block a user