Files
Gondulf/tests/unit/test_relme_parser.py
Phil Skentelbery 074f74002c feat(phase-2): implement domain verification system
Implements complete domain verification flow with:
- rel=me link verification service
- HTML fetching with security controls
- Rate limiting to prevent abuse
- Email validation utilities
- Authorization and verification API endpoints
- User-facing templates for authorization and verification flows

This completes Phase 2: Domain Verification as designed.

Tests:
- All Phase 2 unit tests passing
- Coverage: 85% overall
- Migration tests updated

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 13:44:33 -07:00

182 lines
5.3 KiB
Python

"""Tests for rel=me parser service."""
import pytest
from gondulf.services.relme_parser import RelMeParser
class TestRelMeParser:
"""Tests for RelMeParser."""
def test_parse_relme_links_basic(self):
"""Test parsing basic rel=me links."""
html = """
<html>
<body>
<a rel="me" href="https://github.com/user">GitHub</a>
<a rel="me" href="mailto:user@example.com">Email</a>
</body>
</html>
"""
parser = RelMeParser()
links = parser.parse_relme_links(html)
assert len(links) == 2
assert "https://github.com/user" in links
assert "mailto:user@example.com" in links
def test_parse_relme_links_link_tag(self):
"""Test parsing rel=me from <link> tags."""
html = """
<html>
<head>
<link rel="me" href="https://twitter.com/user">
</head>
</html>
"""
parser = RelMeParser()
links = parser.parse_relme_links(html)
assert len(links) == 1
assert "https://twitter.com/user" in links
def test_parse_relme_links_no_rel_me(self):
"""Test parsing HTML with no rel=me links."""
html = """
<html>
<body>
<a href="https://example.com">Link</a>
</body>
</html>
"""
parser = RelMeParser()
links = parser.parse_relme_links(html)
assert len(links) == 0
def test_parse_relme_links_no_href(self):
"""Test parsing rel=me link without href."""
html = """
<html>
<body>
<a rel="me">No href</a>
</body>
</html>
"""
parser = RelMeParser()
links = parser.parse_relme_links(html)
assert len(links) == 0
def test_parse_relme_links_malformed_html(self):
"""Test parsing malformed HTML returns empty list."""
html = "<html><body><<>>broken"
parser = RelMeParser()
links = parser.parse_relme_links(html)
# Should not crash, returns what it can parse
assert isinstance(links, list)
def test_extract_mailto_email_basic(self):
"""Test extracting email from mailto: link."""
links = ["mailto:user@example.com"]
parser = RelMeParser()
email = parser.extract_mailto_email(links)
assert email == "user@example.com"
def test_extract_mailto_email_with_query(self):
"""Test extracting email from mailto: link with query parameters."""
links = ["mailto:user@example.com?subject=Hello"]
parser = RelMeParser()
email = parser.extract_mailto_email(links)
assert email == "user@example.com"
def test_extract_mailto_email_multiple_links(self):
"""Test extracting email from multiple links (returns first mailto:)."""
links = [
"https://github.com/user",
"mailto:user@example.com",
"mailto:other@example.com"
]
parser = RelMeParser()
email = parser.extract_mailto_email(links)
assert email == "user@example.com"
def test_extract_mailto_email_no_mailto(self):
"""Test extracting email when no mailto: links present."""
links = ["https://github.com/user", "https://twitter.com/user"]
parser = RelMeParser()
email = parser.extract_mailto_email(links)
assert email is None
def test_extract_mailto_email_invalid_format(self):
"""Test extracting email from malformed mailto: link."""
links = ["mailto:notanemail"]
parser = RelMeParser()
email = parser.extract_mailto_email(links)
# Should return None for invalid email format
assert email is None
def test_extract_mailto_email_empty_list(self):
"""Test extracting email from empty list."""
parser = RelMeParser()
email = parser.extract_mailto_email([])
assert email is None
def test_find_email_success(self):
"""Test find_email combining parse and extract."""
html = """
<html>
<body>
<a rel="me" href="https://github.com/user">GitHub</a>
<a rel="me" href="mailto:user@example.com">Email</a>
</body>
</html>
"""
parser = RelMeParser()
email = parser.find_email(html)
assert email == "user@example.com"
def test_find_email_no_email(self):
"""Test find_email when no email present."""
html = """
<html>
<body>
<a rel="me" href="https://github.com/user">GitHub</a>
</body>
</html>
"""
parser = RelMeParser()
email = parser.find_email(html)
assert email is None
def test_find_email_malformed_html(self):
"""Test find_email with malformed HTML."""
html = "<html><<broken>>"
parser = RelMeParser()
email = parser.find_email(html)
assert email is None
def test_parse_relme_multiple_rel_values(self):
"""Test parsing link with multiple rel values including 'me'."""
html = """
<html>
<body>
<a rel="me nofollow" href="https://example.com">Link</a>
</body>
</html>
"""
parser = RelMeParser()
links = parser.parse_relme_links(html)
assert len(links) == 1
assert "https://example.com" in links