"""Tests for h-app microformat parser service.""" import pytest from datetime import datetime, timedelta from unittest.mock import Mock, AsyncMock from gondulf.services.happ_parser import HAppParser, ClientMetadata from gondulf.services.html_fetcher import HTMLFetcherService class TestClientMetadata: """Tests for ClientMetadata dataclass.""" def test_client_metadata_creation(self): """Test creating ClientMetadata with all fields.""" metadata = ClientMetadata( name="Example App", logo="https://example.com/logo.png", url="https://example.com" ) assert metadata.name == "Example App" assert metadata.logo == "https://example.com/logo.png" assert metadata.url == "https://example.com" def test_client_metadata_optional_fields(self): """Test ClientMetadata with optional fields as None.""" metadata = ClientMetadata(name="Example App") assert metadata.name == "Example App" assert metadata.logo is None assert metadata.url is None class TestHAppParser: """Tests for HAppParser service.""" @pytest.fixture def mock_html_fetcher(self): """Create mock HTML fetcher.""" return Mock(spec=HTMLFetcherService) @pytest.fixture def parser(self, mock_html_fetcher): """Create HAppParser instance with mock fetcher.""" return HAppParser(html_fetcher=mock_html_fetcher) @pytest.mark.asyncio async def test_parse_extracts_app_name(self, parser, mock_html_fetcher): """Test parsing extracts application name from h-app.""" html = """
My IndieAuth Client
""" mock_html_fetcher.fetch.return_value = html metadata = await parser.fetch_and_parse("https://example.com") assert metadata.name == "My IndieAuth Client" @pytest.mark.asyncio async def test_parse_extracts_logo_url(self, parser, mock_html_fetcher): """Test parsing extracts logo URL from h-app.""" html = """
My App
""" mock_html_fetcher.fetch.return_value = html metadata = await parser.fetch_and_parse("https://example.com") # mf2py resolves relative URLs to absolute URLs assert metadata.logo == "https://example.com/icon.png" @pytest.mark.asyncio async def test_parse_extracts_app_url(self, parser, mock_html_fetcher): """Test parsing extracts application URL from h-app.""" html = """
My App
""" mock_html_fetcher.fetch.return_value = html metadata = await parser.fetch_and_parse("https://example.com") assert metadata.url == "https://example.com/app" @pytest.mark.asyncio async def test_parse_handles_missing_happ(self, parser, mock_html_fetcher): """Test parsing falls back to domain name when no h-app found.""" html = """

My Website

No microformat data here

""" mock_html_fetcher.fetch.return_value = html metadata = await parser.fetch_and_parse("https://example.com") assert metadata.name == "example.com" assert metadata.logo is None assert metadata.url is None @pytest.mark.asyncio async def test_parse_handles_partial_metadata(self, parser, mock_html_fetcher): """Test parsing handles h-app with only some properties.""" html = """
My App
""" mock_html_fetcher.fetch.return_value = html metadata = await parser.fetch_and_parse("https://example.com") assert metadata.name == "My App" assert metadata.logo is None # Should default to client_id assert metadata.url == "https://example.com" @pytest.mark.asyncio async def test_parse_handles_malformed_html(self, parser, mock_html_fetcher): """Test parsing handles malformed HTML gracefully.""" html = """
Incomplete """ mock_html_fetcher.fetch.return_value = html metadata = await parser.fetch_and_parse("https://example.com") # Should still extract something or fall back to domain assert metadata.name is not None @pytest.mark.asyncio async def test_fetch_failure_returns_domain_fallback(self, parser, mock_html_fetcher): """Test that fetch failure returns domain name fallback.""" mock_html_fetcher.fetch.side_effect = Exception("Network error") metadata = await parser.fetch_and_parse("https://example.com") assert metadata.name == "example.com" assert metadata.logo is None assert metadata.url is None @pytest.mark.asyncio async def test_fetch_none_returns_domain_fallback(self, parser, mock_html_fetcher): """Test that fetch returning None uses domain fallback.""" mock_html_fetcher.fetch.return_value = None metadata = await parser.fetch_and_parse("https://example.com") assert metadata.name == "example.com" @pytest.mark.asyncio async def test_caching_reduces_fetches(self, parser, mock_html_fetcher): """Test that caching reduces number of HTTP fetches.""" html = """
Cached App
""" mock_html_fetcher.fetch.return_value = html # First fetch metadata1 = await parser.fetch_and_parse("https://example.com") # Second fetch (should use cache) metadata2 = await parser.fetch_and_parse("https://example.com") assert metadata1.name == "Cached App" assert metadata2.name == "Cached App" # HTML fetcher should only be called once assert mock_html_fetcher.fetch.call_count == 1 @pytest.mark.asyncio async def test_cache_expiry_triggers_refetch(self, parser, mock_html_fetcher, monkeypatch): """Test that cache expiry triggers a new fetch.""" html = """
App Name
""" mock_html_fetcher.fetch.return_value = html # First fetch await parser.fetch_and_parse("https://example.com") # Manually expire the cache by setting TTL to 0 parser.cache_ttl = timedelta(seconds=0) # Second fetch (cache should be expired) await parser.fetch_and_parse("https://example.com") # Should have fetched twice due to cache expiry assert mock_html_fetcher.fetch.call_count == 2 @pytest.mark.asyncio async def test_extract_domain_name_basic(self, parser, mock_html_fetcher): """Test domain name extraction from basic URL.""" mock_html_fetcher.fetch.return_value = None metadata = await parser.fetch_and_parse("https://example.com/path") assert metadata.name == "example.com" @pytest.mark.asyncio async def test_extract_domain_name_with_port(self, parser, mock_html_fetcher): """Test domain name extraction from URL with port.""" mock_html_fetcher.fetch.return_value = None metadata = await parser.fetch_and_parse("https://example.com:8080/path") assert metadata.name == "example.com:8080" @pytest.mark.asyncio async def test_extract_domain_name_subdomain(self, parser, mock_html_fetcher): """Test domain name extraction from URL with subdomain.""" mock_html_fetcher.fetch.return_value = None metadata = await parser.fetch_and_parse("https://auth.example.com") assert metadata.name == "auth.example.com" @pytest.mark.asyncio async def test_multiple_happ_uses_first(self, parser, mock_html_fetcher): """Test that multiple h-app elements uses the first one.""" html = """
First App
Second App
""" mock_html_fetcher.fetch.return_value = html metadata = await parser.fetch_and_parse("https://example.com") assert metadata.name == "First App" @pytest.mark.asyncio async def test_parse_error_returns_domain_fallback(self, parser, mock_html_fetcher, monkeypatch): """Test that parse errors fall back to domain name.""" html = "Valid HTML" mock_html_fetcher.fetch.return_value = html # Mock mf2py.parse to raise exception def mock_parse_error(*args, **kwargs): raise Exception("Parse error") import gondulf.services.happ_parser as happ_module monkeypatch.setattr(happ_module, "mf2py", Mock(parse=mock_parse_error)) metadata = await parser.fetch_and_parse("https://example.com") # Should fall back to domain name assert metadata.name == "example.com" @pytest.mark.asyncio async def test_cache_different_clients_separately(self, parser, mock_html_fetcher): """Test that different client_ids are cached separately.""" html1 = '
App 1
' html2 = '
App 2
' mock_html_fetcher.fetch.side_effect = [html1, html2] metadata1 = await parser.fetch_and_parse("https://example1.com") metadata2 = await parser.fetch_and_parse("https://example2.com") assert metadata1.name == "App 1" assert metadata2.name == "App 2" assert mock_html_fetcher.fetch.call_count == 2