feat(phase-2): implement domain verification system

Implements complete domain verification flow with: - rel=me link verification service - HTML fetching with security controls - Rate limiting to prevent abuse - Email validation utilities - Authorization and verification API endpoints - User-facing templates for authorization and verification flows This completes Phase 2: Domain Verification as designed. Tests: - All Phase 2 unit tests passing - Coverage: 85% overall - Migration tests updated 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 13:44:33 -07:00
parent 11ecd953d8
commit 074f74002c
28 changed files with 2283 additions and 14 deletions
--- a/tests/unit/test_relme_parser.py
+++ b/tests/unit/test_relme_parser.py
@@ -0,0 +1,181 @@
+"""Tests for rel=me parser service."""
+import pytest
+
+from gondulf.services.relme_parser import RelMeParser
+
+
+class TestRelMeParser:
+    """Tests for RelMeParser."""
+
+    def test_parse_relme_links_basic(self):
+        """Test parsing basic rel=me links."""
+        html = """
+        <html>
+        <body>
+            <a rel="me" href="https://github.com/user">GitHub</a>
+            <a rel="me" href="mailto:user@example.com">Email</a>
+        </body>
+        </html>
+        """
+        parser = RelMeParser()
+        links = parser.parse_relme_links(html)
+
+        assert len(links) == 2
+        assert "https://github.com/user" in links
+        assert "mailto:user@example.com" in links
+
+    def test_parse_relme_links_link_tag(self):
+        """Test parsing rel=me from <link> tags."""
+        html = """
+        <html>
+        <head>
+            <link rel="me" href="https://twitter.com/user">
+        </head>
+        </html>
+        """
+        parser = RelMeParser()
+        links = parser.parse_relme_links(html)
+
+        assert len(links) == 1
+        assert "https://twitter.com/user" in links
+
+    def test_parse_relme_links_no_rel_me(self):
+        """Test parsing HTML with no rel=me links."""
+        html = """
+        <html>
+        <body>
+            <a href="https://example.com">Link</a>
+        </body>
+        </html>
+        """
+        parser = RelMeParser()
+        links = parser.parse_relme_links(html)
+
+        assert len(links) == 0
+
+    def test_parse_relme_links_no_href(self):
+        """Test parsing rel=me link without href."""
+        html = """
+        <html>
+        <body>
+            <a rel="me">No href</a>
+        </body>
+        </html>
+        """
+        parser = RelMeParser()
+        links = parser.parse_relme_links(html)
+
+        assert len(links) == 0
+
+    def test_parse_relme_links_malformed_html(self):
+        """Test parsing malformed HTML returns empty list."""
+        html = "<html><body><<>>broken"
+        parser = RelMeParser()
+        links = parser.parse_relme_links(html)
+
+        # Should not crash, returns what it can parse
+        assert isinstance(links, list)
+
+    def test_extract_mailto_email_basic(self):
+        """Test extracting email from mailto: link."""
+        links = ["mailto:user@example.com"]
+        parser = RelMeParser()
+        email = parser.extract_mailto_email(links)
+
+        assert email == "user@example.com"
+
+    def test_extract_mailto_email_with_query(self):
+        """Test extracting email from mailto: link with query parameters."""
+        links = ["mailto:user@example.com?subject=Hello"]
+        parser = RelMeParser()
+        email = parser.extract_mailto_email(links)
+
+        assert email == "user@example.com"
+
+    def test_extract_mailto_email_multiple_links(self):
+        """Test extracting email from multiple links (returns first mailto:)."""
+        links = [
+            "https://github.com/user",
+            "mailto:user@example.com",
+            "mailto:other@example.com"
+        ]
+        parser = RelMeParser()
+        email = parser.extract_mailto_email(links)
+
+        assert email == "user@example.com"
+
+    def test_extract_mailto_email_no_mailto(self):
+        """Test extracting email when no mailto: links present."""
+        links = ["https://github.com/user", "https://twitter.com/user"]
+        parser = RelMeParser()
+        email = parser.extract_mailto_email(links)
+
+        assert email is None
+
+    def test_extract_mailto_email_invalid_format(self):
+        """Test extracting email from malformed mailto: link."""
+        links = ["mailto:notanemail"]
+        parser = RelMeParser()
+        email = parser.extract_mailto_email(links)
+
+        # Should return None for invalid email format
+        assert email is None
+
+    def test_extract_mailto_email_empty_list(self):
+        """Test extracting email from empty list."""
+        parser = RelMeParser()
+        email = parser.extract_mailto_email([])
+
+        assert email is None
+
+    def test_find_email_success(self):
+        """Test find_email combining parse and extract."""
+        html = """
+        <html>
+        <body>
+            <a rel="me" href="https://github.com/user">GitHub</a>
+            <a rel="me" href="mailto:user@example.com">Email</a>
+        </body>
+        </html>
+        """
+        parser = RelMeParser()
+        email = parser.find_email(html)
+
+        assert email == "user@example.com"
+
+    def test_find_email_no_email(self):
+        """Test find_email when no email present."""
+        html = """
+        <html>
+        <body>
+            <a rel="me" href="https://github.com/user">GitHub</a>
+        </body>
+        </html>
+        """
+        parser = RelMeParser()
+        email = parser.find_email(html)
+
+        assert email is None
+
+    def test_find_email_malformed_html(self):
+        """Test find_email with malformed HTML."""
+        html = "<html><<broken>>"
+        parser = RelMeParser()
+        email = parser.find_email(html)
+
+        assert email is None
+
+    def test_parse_relme_multiple_rel_values(self):
+        """Test parsing link with multiple rel values including 'me'."""
+        html = """
+        <html>
+        <body>
+            <a rel="me nofollow" href="https://example.com">Link</a>
+        </body>
+        </html>
+        """
+        parser = RelMeParser()
+        links = parser.parse_relme_links(html)
+
+        assert len(links) == 1
+        assert "https://example.com" in links