feat(phase-2): implement domain verification system

Implements complete domain verification flow with: - rel=me link verification service - HTML fetching with security controls - Rate limiting to prevent abuse - Email validation utilities - Authorization and verification API endpoints - User-facing templates for authorization and verification flows This completes Phase 2: Domain Verification as designed. Tests: - All Phase 2 unit tests passing - Coverage: 85% overall - Migration tests updated 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 13:44:33 -07:00
parent 11ecd953d8
commit 074f74002c
28 changed files with 2283 additions and 14 deletions
--- a/src/gondulf/services/relme_parser.py
+++ b/src/gondulf/services/relme_parser.py
@@ -0,0 +1,76 @@
+"""rel=me parser service for extracting email addresses from HTML."""
+
+from bs4 import BeautifulSoup
+
+
+class RelMeParser:
+    """Service for parsing rel=me links from HTML."""
+
+    def parse_relme_links(self, html: str) -> list[str]:
+        """
+        Parse HTML for rel=me links.
+
+        Args:
+            html: HTML content to parse
+
+        Returns:
+            List of rel=me link URLs
+        """
+        try:
+            soup = BeautifulSoup(html, 'html.parser')
+            links = []
+
+            # Find all <a> tags with rel="me" attribute
+            for link in soup.find_all('a', rel='me'):
+                href = link.get('href')
+                if href:
+                    links.append(href)
+
+            # Also check for <link> tags with rel="me"
+            for link in soup.find_all('link', rel='me'):
+                href = link.get('href')
+                if href:
+                    links.append(href)
+
+            return links
+
+        except Exception:
+            return []
+
+    def extract_mailto_email(self, relme_links: list[str]) -> str | None:
+        """
+        Extract email address from mailto: links.
+
+        Args:
+            relme_links: List of rel=me link URLs
+
+        Returns:
+            Email address if found, None otherwise
+        """
+        for link in relme_links:
+            if link.startswith('mailto:'):
+                # Extract email address from mailto: link
+                email = link[7:]  # Remove 'mailto:' prefix
+
+                # Strip any query parameters (e.g., ?subject=...)
+                if '?' in email:
+                    email = email.split('?')[0]
+
+                # Basic validation
+                if '@' in email and '.' in email:
+                    return email.strip()
+
+        return None
+
+    def find_email(self, html: str) -> str | None:
+        """
+        Find email address from HTML by parsing rel=me links.
+
+        Args:
+            html: HTML content to parse
+
+        Returns:
+            Email address if found, None otherwise
+        """
+        relme_links = self.parse_relme_links(html)
+        return self.extract_mailto_email(relme_links)