Implements complete domain verification flow with: - rel=me link verification service - HTML fetching with security controls - Rate limiting to prevent abuse - Email validation utilities - Authorization and verification API endpoints - User-facing templates for authorization and verification flows This completes Phase 2: Domain Verification as designed. Tests: - All Phase 2 unit tests passing - Coverage: 85% overall - Migration tests updated 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
77 lines
2.1 KiB
Python
77 lines
2.1 KiB
Python
"""rel=me parser service for extracting email addresses from HTML."""
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
class RelMeParser:
|
|
"""Service for parsing rel=me links from HTML."""
|
|
|
|
def parse_relme_links(self, html: str) -> list[str]:
|
|
"""
|
|
Parse HTML for rel=me links.
|
|
|
|
Args:
|
|
html: HTML content to parse
|
|
|
|
Returns:
|
|
List of rel=me link URLs
|
|
"""
|
|
try:
|
|
soup = BeautifulSoup(html, 'html.parser')
|
|
links = []
|
|
|
|
# Find all <a> tags with rel="me" attribute
|
|
for link in soup.find_all('a', rel='me'):
|
|
href = link.get('href')
|
|
if href:
|
|
links.append(href)
|
|
|
|
# Also check for <link> tags with rel="me"
|
|
for link in soup.find_all('link', rel='me'):
|
|
href = link.get('href')
|
|
if href:
|
|
links.append(href)
|
|
|
|
return links
|
|
|
|
except Exception:
|
|
return []
|
|
|
|
def extract_mailto_email(self, relme_links: list[str]) -> str | None:
|
|
"""
|
|
Extract email address from mailto: links.
|
|
|
|
Args:
|
|
relme_links: List of rel=me link URLs
|
|
|
|
Returns:
|
|
Email address if found, None otherwise
|
|
"""
|
|
for link in relme_links:
|
|
if link.startswith('mailto:'):
|
|
# Extract email address from mailto: link
|
|
email = link[7:] # Remove 'mailto:' prefix
|
|
|
|
# Strip any query parameters (e.g., ?subject=...)
|
|
if '?' in email:
|
|
email = email.split('?')[0]
|
|
|
|
# Basic validation
|
|
if '@' in email and '.' in email:
|
|
return email.strip()
|
|
|
|
return None
|
|
|
|
def find_email(self, html: str) -> str | None:
|
|
"""
|
|
Find email address from HTML by parsing rel=me links.
|
|
|
|
Args:
|
|
html: HTML content to parse
|
|
|
|
Returns:
|
|
Email address if found, None otherwise
|
|
"""
|
|
relme_links = self.parse_relme_links(html)
|
|
return self.extract_mailto_email(relme_links)
|