feat(phase-2): implement domain verification system
Implements complete domain verification flow with: - rel=me link verification service - HTML fetching with security controls - Rate limiting to prevent abuse - Email validation utilities - Authorization and verification API endpoints - User-facing templates for authorization and verification flows This completes Phase 2: Domain Verification as designed. Tests: - All Phase 2 unit tests passing - Coverage: 85% overall - Migration tests updated 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
76
src/gondulf/services/relme_parser.py
Normal file
76
src/gondulf/services/relme_parser.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""rel=me parser service for extracting email addresses from HTML."""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class RelMeParser:
|
||||
"""Service for parsing rel=me links from HTML."""
|
||||
|
||||
def parse_relme_links(self, html: str) -> list[str]:
|
||||
"""
|
||||
Parse HTML for rel=me links.
|
||||
|
||||
Args:
|
||||
html: HTML content to parse
|
||||
|
||||
Returns:
|
||||
List of rel=me link URLs
|
||||
"""
|
||||
try:
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
links = []
|
||||
|
||||
# Find all <a> tags with rel="me" attribute
|
||||
for link in soup.find_all('a', rel='me'):
|
||||
href = link.get('href')
|
||||
if href:
|
||||
links.append(href)
|
||||
|
||||
# Also check for <link> tags with rel="me"
|
||||
for link in soup.find_all('link', rel='me'):
|
||||
href = link.get('href')
|
||||
if href:
|
||||
links.append(href)
|
||||
|
||||
return links
|
||||
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
def extract_mailto_email(self, relme_links: list[str]) -> str | None:
|
||||
"""
|
||||
Extract email address from mailto: links.
|
||||
|
||||
Args:
|
||||
relme_links: List of rel=me link URLs
|
||||
|
||||
Returns:
|
||||
Email address if found, None otherwise
|
||||
"""
|
||||
for link in relme_links:
|
||||
if link.startswith('mailto:'):
|
||||
# Extract email address from mailto: link
|
||||
email = link[7:] # Remove 'mailto:' prefix
|
||||
|
||||
# Strip any query parameters (e.g., ?subject=...)
|
||||
if '?' in email:
|
||||
email = email.split('?')[0]
|
||||
|
||||
# Basic validation
|
||||
if '@' in email and '.' in email:
|
||||
return email.strip()
|
||||
|
||||
return None
|
||||
|
||||
def find_email(self, html: str) -> str | None:
|
||||
"""
|
||||
Find email address from HTML by parsing rel=me links.
|
||||
|
||||
Args:
|
||||
html: HTML content to parse
|
||||
|
||||
Returns:
|
||||
Email address if found, None otherwise
|
||||
"""
|
||||
relme_links = self.parse_relme_links(html)
|
||||
return self.extract_mailto_email(relme_links)
|
||||
Reference in New Issue
Block a user