Gondulf/tests/unit/test_relme_parser.py

"""Tests for rel=me parser service."""
import pytest

from gondulf.services.relme_parser import RelMeParser


class TestRelMeParser:
    """Tests for RelMeParser."""

    def test_parse_relme_links_basic(self):
        """Test parsing basic rel=me links."""
        html = """
        <html>
        <body>
            <a rel="me" href="https://github.com/user">GitHub</a>
            <a rel="me" href="mailto:user@example.com">Email</a>
        </body>
        </html>
        """
        parser = RelMeParser()
        links = parser.parse_relme_links(html)

        assert len(links) == 2
        assert "https://github.com/user" in links
        assert "mailto:user@example.com" in links

    def test_parse_relme_links_link_tag(self):
        """Test parsing rel=me from <link> tags."""
        html = """
        <html>
        <head>
            <link rel="me" href="https://twitter.com/user">
        </head>
        </html>
        """
        parser = RelMeParser()
        links = parser.parse_relme_links(html)

        assert len(links) == 1
        assert "https://twitter.com/user" in links

    def test_parse_relme_links_no_rel_me(self):
        """Test parsing HTML with no rel=me links."""
        html = """
        <html>
        <body>
            <a href="https://example.com">Link</a>
        </body>
        </html>
        """
        parser = RelMeParser()
        links = parser.parse_relme_links(html)

        assert len(links) == 0

    def test_parse_relme_links_no_href(self):
        """Test parsing rel=me link without href."""
        html = """
        <html>
        <body>
            <a rel="me">No href</a>
        </body>
        </html>
        """
        parser = RelMeParser()
        links = parser.parse_relme_links(html)

        assert len(links) == 0

    def test_parse_relme_links_malformed_html(self):
        """Test parsing malformed HTML returns empty list."""
        html = "<html><body><<>>broken"
        parser = RelMeParser()
        links = parser.parse_relme_links(html)

        # Should not crash, returns what it can parse
        assert isinstance(links, list)

    def test_extract_mailto_email_basic(self):
        """Test extracting email from mailto: link."""
        links = ["mailto:user@example.com"]
        parser = RelMeParser()
        email = parser.extract_mailto_email(links)

        assert email == "user@example.com"

    def test_extract_mailto_email_with_query(self):
        """Test extracting email from mailto: link with query parameters."""
        links = ["mailto:user@example.com?subject=Hello"]
        parser = RelMeParser()
        email = parser.extract_mailto_email(links)

        assert email == "user@example.com"

    def test_extract_mailto_email_multiple_links(self):
        """Test extracting email from multiple links (returns first mailto:)."""
        links = [
            "https://github.com/user",
            "mailto:user@example.com",
            "mailto:other@example.com"
        ]
        parser = RelMeParser()
        email = parser.extract_mailto_email(links)

        assert email == "user@example.com"

    def test_extract_mailto_email_no_mailto(self):
        """Test extracting email when no mailto: links present."""
        links = ["https://github.com/user", "https://twitter.com/user"]
        parser = RelMeParser()
        email = parser.extract_mailto_email(links)

        assert email is None

    def test_extract_mailto_email_invalid_format(self):
        """Test extracting email from malformed mailto: link."""
        links = ["mailto:notanemail"]
        parser = RelMeParser()
        email = parser.extract_mailto_email(links)

        # Should return None for invalid email format
        assert email is None

    def test_extract_mailto_email_empty_list(self):
        """Test extracting email from empty list."""
        parser = RelMeParser()
        email = parser.extract_mailto_email([])

        assert email is None

    def test_find_email_success(self):
        """Test find_email combining parse and extract."""
        html = """
        <html>
        <body>
            <a rel="me" href="https://github.com/user">GitHub</a>
            <a rel="me" href="mailto:user@example.com">Email</a>
        </body>
        </html>
        """
        parser = RelMeParser()
        email = parser.find_email(html)

        assert email == "user@example.com"

    def test_find_email_no_email(self):
        """Test find_email when no email present."""
        html = """
        <html>
        <body>
            <a rel="me" href="https://github.com/user">GitHub</a>
        </body>
        </html>
        """
        parser = RelMeParser()
        email = parser.find_email(html)

        assert email is None

    def test_find_email_malformed_html(self):
        """Test find_email with malformed HTML."""
        html = "<html><<broken>>"
        parser = RelMeParser()
        email = parser.find_email(html)

        assert email is None

    def test_parse_relme_multiple_rel_values(self):
        """Test parsing link with multiple rel values including 'me'."""
        html = """
        <html>
        <body>
            <a rel="me nofollow" href="https://example.com">Link</a>
        </body>
        </html>
        """
        parser = RelMeParser()
        links = parser.parse_relme_links(html)

        assert len(links) == 1
        assert "https://example.com" in links