"""h-app microformat parser for client metadata extraction."""
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Dict
from urllib.parse import urlparse

import mf2py

from gondulf.services.html_fetcher import HTMLFetcherService

logger = logging.getLogger("gondulf.happ_parser")


@dataclass
class ClientMetadata:
    """Client metadata extracted from h-app markup."""
    name: str
    logo: str | None = None
    url: str | None = None


class HAppParser:
    """Parse h-app microformat data from client HTML."""

    def __init__(self, html_fetcher: HTMLFetcherService):
        """
        Initialize parser with HTML fetcher dependency.

        Args:
            html_fetcher: Service for fetching HTML content
        """
        self.html_fetcher = html_fetcher
        self.cache: Dict[str, tuple[ClientMetadata, datetime]] = {}
        self.cache_ttl = timedelta(hours=24)

    async def fetch_and_parse(self, client_id: str) -> ClientMetadata:
        """
        Fetch client_id URL and parse h-app metadata.

        Uses 24-hour caching to reduce HTTP requests.
        Falls back to domain name if h-app not found.

        Args:
            client_id: Client application URL

        Returns:
            ClientMetadata with name (always populated) and optional logo/url
        """
        # Check cache
        if client_id in self.cache:
            cached_metadata, cached_at = self.cache[client_id]
            if datetime.utcnow() - cached_at < self.cache_ttl:
                logger.debug(f"Returning cached metadata for {client_id}")
                return cached_metadata

        logger.info(f"Fetching h-app metadata from {client_id}")

        # Fetch HTML
        try:
            html = self.html_fetcher.fetch(client_id)
        except Exception as e:
            logger.warning(f"Failed to fetch {client_id}: {e}")
            html = None

        # Parse h-app or fallback to domain name
        if html:
            metadata = self._parse_h_app(html, client_id)
        else:
            logger.info(f"Using domain fallback for {client_id}")
            metadata = ClientMetadata(
                name=self._extract_domain_name(client_id)
            )

        # Cache result
        self.cache[client_id] = (metadata, datetime.utcnow())
        logger.debug(f"Cached metadata for {client_id}: {metadata.name}")

        return metadata

    def _parse_h_app(self, html: str, client_id: str) -> ClientMetadata:
        """
        Parse h-app microformat from HTML.

        Args:
            html: HTML content to parse
            client_id: Client URL (for resolving relative URLs)

        Returns:
            ClientMetadata with extracted values, or domain fallback if no h-app
        """
        try:
            # Parse microformats
            parsed = mf2py.parse(doc=html, url=client_id)

            # Find h-app items
            h_apps = [
                item for item in parsed.get('items', [])
                if 'h-app' in item.get('type', [])
            ]

            if not h_apps:
                logger.info(f"No h-app markup found at {client_id}")
                return ClientMetadata(
                    name=self._extract_domain_name(client_id)
                )

            # Use first h-app
            h_app = h_apps[0]
            properties = h_app.get('properties', {})

            # Extract properties
            name = properties.get('name', [None])[0] or self._extract_domain_name(client_id)

            # Extract logo - mf2py may return dict with 'value' key or string
            logo_raw = properties.get('logo', [None])[0]
            if isinstance(logo_raw, dict):
                logo = logo_raw.get('value')
            else:
                logo = logo_raw

            url = properties.get('url', [None])[0] or client_id

            logger.info(f"Extracted h-app metadata from {client_id}: name={name}")

            return ClientMetadata(
                name=name,
                logo=logo,
                url=url
            )

        except Exception as e:
            logger.error(f"Failed to parse h-app from {client_id}: {e}")
            return ClientMetadata(
                name=self._extract_domain_name(client_id)
            )

    def _extract_domain_name(self, client_id: str) -> str:
        """
        Extract domain name from client_id for fallback display.

        Args:
            client_id: Client URL

        Returns:
            Domain name (e.g., "example.com")
        """
        try:
            parsed = urlparse(client_id)
            domain = parsed.netloc or parsed.path
            return domain
        except Exception:
            return client_id