"""
External IndieAuth Token Verification with Endpoint Discovery

This module handles verification of bearer tokens issued by external
IndieAuth providers. Following the IndieAuth specification, endpoints
are discovered dynamically from the user's profile URL, not hardcoded.

For StarPunk V1 (single-user CMS), we always discover endpoints from
ADMIN_ME since only the site owner can post content.

Key Components:
    EndpointCache: Simple in-memory cache for discovered endpoints and tokens
    verify_external_token: Main entry point for token verification
    discover_endpoints: Discovers IndieAuth endpoints from profile URL

Configuration (via Flask app.config):
    ADMIN_ME: Site owner's profile URL (required)
    DEBUG: Allow HTTP endpoints in debug mode

ADR: ADR-031 IndieAuth Endpoint Discovery Implementation
Date: 2025-11-24
Version: v1.0.0-rc.5
"""

import hashlib
import logging
import re
import time
from typing import Dict, Optional, Any
from urllib.parse import urljoin, urlparse

import httpx
from bs4 import BeautifulSoup
from flask import current_app


# Timeouts
DISCOVERY_TIMEOUT = 5.0  # Profile fetch (cached, so can be slower)
VERIFICATION_TIMEOUT = 3.0  # Token verification (every request)

# Cache TTLs
ENDPOINT_CACHE_TTL = 3600  # 1 hour for endpoints
TOKEN_CACHE_TTL = 300  # 5 minutes for token verifications


class EndpointCache:
    """
    Simple in-memory cache for endpoint discovery and token verification

    V1 single-user implementation: We only cache one user's endpoints
    since StarPunk V1 is explicitly single-user (only ADMIN_ME can post).

    When V2 adds multi-user support, this will need refactoring to
    cache endpoints per profile URL.
    """

    def __init__(self):
        # Endpoint cache (single-user V1)
        self.endpoints: Optional[Dict[str, str]] = None
        self.endpoints_expire: float = 0

        # Token verification cache (token_hash -> (info, expiry))
        self.token_cache: Dict[str, tuple[Dict[str, Any], float]] = {}

    def get_endpoints(self, ignore_expiry: bool = False) -> Optional[Dict[str, str]]:
        """
        Get cached endpoints if still valid

        Args:
            ignore_expiry: Return cached endpoints even if expired (grace period)

        Returns:
            Cached endpoints dict or None if not cached or expired
        """
        if self.endpoints is None:
            return None

        if ignore_expiry or time.time() < self.endpoints_expire:
            return self.endpoints

        return None

    def set_endpoints(self, endpoints: Dict[str, str], ttl: int = ENDPOINT_CACHE_TTL):
        """Cache discovered endpoints"""
        self.endpoints = endpoints
        self.endpoints_expire = time.time() + ttl

    def get_token_info(self, token_hash: str) -> Optional[Dict[str, Any]]:
        """Get cached token verification if still valid"""
        if token_hash in self.token_cache:
            info, expiry = self.token_cache[token_hash]
            if time.time() < expiry:
                return info
            else:
                # Expired, remove from cache
                del self.token_cache[token_hash]
        return None

    def set_token_info(self, token_hash: str, info: Dict[str, Any], ttl: int = TOKEN_CACHE_TTL):
        """Cache token verification result"""
        expiry = time.time() + ttl
        self.token_cache[token_hash] = (info, expiry)


# Global cache instance (singleton for V1)
_cache = EndpointCache()


class DiscoveryError(Exception):
    """Raised when endpoint discovery fails"""
    pass


class TokenVerificationError(Exception):
    """Raised when token verification fails"""
    pass


def verify_external_token(token: str) -> Optional[Dict[str, Any]]:
    """
    Verify bearer token with external IndieAuth provider

    This is the main entry point for token verification. For StarPunk V1
    (single-user), we always discover endpoints from ADMIN_ME since only
    the site owner can post content.

    Process:
    1. Check token verification cache
    2. Discover endpoints from ADMIN_ME (with caching)
    3. Verify token with discovered endpoint
    4. Validate token belongs to ADMIN_ME
    5. Cache successful verification

    Args:
        token: Bearer token to verify

    Returns:
        Dict with token info (me, client_id, scope) if valid
        None if token is invalid or verification fails

    Token info dict contains:
        me: User's profile URL
        client_id: Client application URL
        scope: Space-separated list of scopes
    """
    admin_me = current_app.config.get("ADMIN_ME")

    if not admin_me:
        current_app.logger.error(
            "ADMIN_ME not configured. Cannot verify token ownership."
        )
        return None

    # Check token cache first
    token_hash = _hash_token(token)
    cached_info = _cache.get_token_info(token_hash)
    if cached_info:
        current_app.logger.debug("Token verification cache hit")
        return cached_info

    # Discover endpoints from ADMIN_ME (V1 single-user assumption)
    try:
        endpoints = discover_endpoints(admin_me)
    except DiscoveryError as e:
        current_app.logger.error(f"Endpoint discovery failed: {e}")
        return None

    token_endpoint = endpoints.get('token_endpoint')
    if not token_endpoint:
        current_app.logger.error("No token endpoint found in discovery")
        return None

    # Verify token with discovered endpoint
    try:
        token_info = _verify_with_endpoint(token_endpoint, token)
    except TokenVerificationError as e:
        current_app.logger.warning(f"Token verification failed: {e}")
        return None

    # Validate token belongs to admin (single-user security check)
    token_me = token_info.get('me', '')
    if normalize_url(token_me) != normalize_url(admin_me):
        current_app.logger.warning(
            f"Token 'me' mismatch: {token_me} != {admin_me}"
        )
        return None

    # Cache successful verification
    _cache.set_token_info(token_hash, token_info)

    current_app.logger.debug(f"Token verified successfully for {token_me}")
    return token_info


def discover_endpoints(profile_url: str) -> Dict[str, str]:
    """
    Discover IndieAuth endpoints from a profile URL

    Implements IndieAuth endpoint discovery per W3C spec:
    https://www.w3.org/TR/indieauth/#discovery-by-clients

    Discovery priority:
    1. HTTP Link headers (highest priority)
    2. HTML link elements

    Args:
        profile_url: User's profile URL (their IndieWeb identity)

    Returns:
        Dict with discovered endpoints:
        {
            'authorization_endpoint': 'https://...',
            'token_endpoint': 'https://...'
        }

    Raises:
        DiscoveryError: If discovery fails or no endpoints found
    """
    # Check cache first
    cached_endpoints = _cache.get_endpoints()
    if cached_endpoints:
        current_app.logger.debug("Endpoint discovery cache hit")
        return cached_endpoints

    # Validate profile URL
    _validate_profile_url(profile_url)

    try:
        # Fetch profile with discovery
        endpoints = _fetch_and_parse(profile_url)

        # Cache successful discovery
        _cache.set_endpoints(endpoints)

        return endpoints

    except Exception as e:
        # Check cache even if expired (grace period for network failures)
        cached = _cache.get_endpoints(ignore_expiry=True)
        if cached:
            current_app.logger.warning(
                f"Using expired cache due to discovery failure: {e}"
            )
            return cached

        # No cache available, must fail
        raise DiscoveryError(f"Endpoint discovery failed: {e}")


def _fetch_and_parse(profile_url: str) -> Dict[str, str]:
    """
    Fetch profile URL and parse endpoints from headers and HTML

    Args:
        profile_url: User's profile URL

    Returns:
        Dict with discovered endpoints

    Raises:
        DiscoveryError: If fetch fails or no endpoints found
    """
    try:
        response = httpx.get(
            profile_url,
            timeout=DISCOVERY_TIMEOUT,
            follow_redirects=True,
            headers={
                'Accept': 'text/html,application/xhtml+xml',
                'User-Agent': f'StarPunk/{current_app.config.get("VERSION", "1.0")}'
            }
        )
        response.raise_for_status()

    except httpx.TimeoutException:
        raise DiscoveryError(f"Timeout fetching profile: {profile_url}")
    except httpx.HTTPStatusError as e:
        raise DiscoveryError(f"HTTP {e.response.status_code} fetching profile")
    except httpx.RequestError as e:
        raise DiscoveryError(f"Network error fetching profile: {e}")

    endpoints = {}

    # 1. Parse HTTP Link headers (highest priority)
    link_header = response.headers.get('Link', '')
    if link_header:
        link_endpoints = _parse_link_header(link_header, profile_url)
        endpoints.update(link_endpoints)

    # 2. Parse HTML link elements
    content_type = response.headers.get('Content-Type', '')
    if 'text/html' in content_type or 'application/xhtml+xml' in content_type:
        try:
            html_endpoints = _parse_html_links(response.text, profile_url)
            # Merge: Link headers take priority (so update HTML first)
            html_endpoints.update(endpoints)
            endpoints = html_endpoints
        except Exception as e:
            current_app.logger.warning(f"HTML parsing failed: {e}")
            # Continue with Link header endpoints if HTML parsing fails

    # Validate we found required endpoints
    if 'token_endpoint' not in endpoints:
        raise DiscoveryError(
            f"No token endpoint found at {profile_url}. "
            "Ensure your profile has IndieAuth link elements or headers."
        )

    # Validate endpoint URLs
    for rel, url in endpoints.items():
        _validate_endpoint_url(url, rel)

    current_app.logger.info(
        f"Discovered endpoints from {profile_url}: "
        f"token={endpoints.get('token_endpoint')}, "
        f"auth={endpoints.get('authorization_endpoint')}"
    )

    return endpoints


def _parse_link_header(header: str, base_url: str) -> Dict[str, str]:
    """
    Parse HTTP Link header for IndieAuth endpoints

    Basic RFC 8288 support - handles simple Link headers.
    Limitations: Only supports quoted rel values, single Link headers.

    Example:
        Link: <https://auth.example.com/token>; rel="token_endpoint"

    Args:
        header: Link header value
        base_url: Base URL for resolving relative URLs

    Returns:
        Dict with discovered endpoints
    """
    endpoints = {}

    # Pattern: <url>; rel="relation"
    # Note: Simplified - doesn't handle all RFC 8288 edge cases
    pattern = r'<([^>]+)>;\s*rel="([^"]+)"'
    matches = re.findall(pattern, header)

    for url, rel in matches:
        if rel == 'authorization_endpoint':
            endpoints['authorization_endpoint'] = urljoin(base_url, url)
        elif rel == 'token_endpoint':
            endpoints['token_endpoint'] = urljoin(base_url, url)

    return endpoints


def _parse_html_links(html: str, base_url: str) -> Dict[str, str]:
    """
    Extract IndieAuth endpoints from HTML link elements

    Looks for:
        <link rel="authorization_endpoint" href="...">
        <link rel="token_endpoint" href="...">

    Args:
        html: HTML content
        base_url: Base URL for resolving relative URLs

    Returns:
        Dict with discovered endpoints
    """
    endpoints = {}

    try:
        soup = BeautifulSoup(html, 'html.parser')

        # Find all link elements (check both head and body - be liberal)
        for link in soup.find_all('link', rel=True):
            rel = link.get('rel')
            href = link.get('href')

            if not href:
                continue

            # rel can be a list or string
            if isinstance(rel, list):
                rel = ' '.join(rel)

            # Check for IndieAuth endpoints
            if 'authorization_endpoint' in rel:
                endpoints['authorization_endpoint'] = urljoin(base_url, href)
            elif 'token_endpoint' in rel:
                endpoints['token_endpoint'] = urljoin(base_url, href)

    except Exception as e:
        current_app.logger.warning(f"HTML parsing error: {e}")
        # Return what we found so far

    return endpoints


def _verify_with_endpoint(endpoint: str, token: str) -> Dict[str, Any]:
    """
    Verify token with the discovered token endpoint

    Makes GET request to endpoint with Authorization header.
    Implements retry logic for network errors only.

    Args:
        endpoint: Token endpoint URL
        token: Bearer token to verify

    Returns:
        Token info dict from endpoint

    Raises:
        TokenVerificationError: If verification fails
    """
    headers = {
        'Authorization': f'Bearer {token}',
        'Accept': 'application/json',
    }

    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = httpx.get(
                endpoint,
                headers=headers,
                timeout=VERIFICATION_TIMEOUT,
                follow_redirects=True,
            )

            # Handle HTTP status codes
            if response.status_code == 200:
                token_info = response.json()

                # Validate required fields
                if 'me' not in token_info:
                    raise TokenVerificationError("Token response missing 'me' field")

                return token_info

            # Client errors - don't retry
            elif response.status_code in [400, 401, 403, 404]:
                raise TokenVerificationError(
                    f"Token verification failed: HTTP {response.status_code}"
                )

            # Server errors - retry
            elif response.status_code in [500, 502, 503, 504]:
                if attempt < max_retries - 1:
                    wait_time = 2 ** attempt  # Exponential backoff
                    current_app.logger.debug(
                        f"Server error {response.status_code}, retrying in {wait_time}s..."
                    )
                    time.sleep(wait_time)
                    continue
                else:
                    raise TokenVerificationError(
                        f"Token endpoint error: HTTP {response.status_code}"
                    )

            # Other status codes
            else:
                raise TokenVerificationError(
                    f"Unexpected response: HTTP {response.status_code}"
                )

        except httpx.TimeoutException:
            if attempt < max_retries - 1:
                wait_time = 2 ** attempt
                current_app.logger.debug(f"Timeout, retrying in {wait_time}s...")
                time.sleep(wait_time)
                continue
            else:
                raise TokenVerificationError("Token verification timeout")

        except httpx.NetworkError as e:
            if attempt < max_retries - 1:
                wait_time = 2 ** attempt
                current_app.logger.debug(f"Network error, retrying in {wait_time}s...")
                time.sleep(wait_time)
                continue
            else:
                raise TokenVerificationError(f"Network error: {e}")

        except Exception as e:
            # Don't retry for unexpected errors
            raise TokenVerificationError(f"Verification failed: {e}")

    # Should never reach here, but just in case
    raise TokenVerificationError("Maximum retries exceeded")


def _validate_profile_url(url: str) -> None:
    """
    Validate profile URL format and security requirements

    Args:
        url: Profile URL to validate

    Raises:
        DiscoveryError: If URL is invalid or insecure
    """
    parsed = urlparse(url)

    # Must be absolute
    if not parsed.scheme or not parsed.netloc:
        raise DiscoveryError(f"Invalid profile URL format: {url}")

    # HTTPS required in production
    if not current_app.debug and parsed.scheme != 'https':
        raise DiscoveryError(
            f"HTTPS required for profile URLs in production. Got: {url}"
        )

    # Allow localhost only in debug mode
    if not current_app.debug and parsed.hostname in ['localhost', '127.0.0.1', '::1']:
        raise DiscoveryError(
            "Localhost URLs not allowed in production"
        )


def _validate_endpoint_url(url: str, rel: str) -> None:
    """
    Validate discovered endpoint URL

    Args:
        url: Endpoint URL to validate
        rel: Endpoint relation (for error messages)

    Raises:
        DiscoveryError: If URL is invalid or insecure
    """
    parsed = urlparse(url)

    # Must be absolute
    if not parsed.scheme or not parsed.netloc:
        raise DiscoveryError(f"Invalid {rel} URL format: {url}")

    # HTTPS required in production
    if not current_app.debug and parsed.scheme != 'https':
        raise DiscoveryError(
            f"HTTPS required for {rel} in production. Got: {url}"
        )

    # Allow localhost only in debug mode
    if not current_app.debug and parsed.hostname in ['localhost', '127.0.0.1', '::1']:
        raise DiscoveryError(
            f"Localhost not allowed for {rel} in production"
        )


def normalize_url(url: str) -> str:
    """
    Normalize URL for comparison

    Removes trailing slash and converts to lowercase.
    Used only for comparison, not for storage.

    Args:
        url: URL to normalize

    Returns:
        Normalized URL
    """
    return url.rstrip('/').lower()


def _hash_token(token: str) -> str:
    """
    Hash token for secure caching

    Uses SHA-256 to prevent tokens from appearing in logs
    and to create fixed-length cache keys.

    Args:
        token: Bearer token

    Returns:
        SHA-256 hash of token (hex)
    """
    return hashlib.sha256(token.encode()).hexdigest()


def check_scope(required_scope: str, token_scope: str) -> bool:
    """
    Check if token has required scope

    Scopes are space-separated in token_scope string.
    Any scope in the list satisfies the requirement.

    Args:
        required_scope: Scope needed (e.g., "create")
        token_scope: Space-separated scope string from token

    Returns:
        True if token has required scope, False otherwise

    Examples:
        >>> check_scope("create", "create update")
        True
        >>> check_scope("create", "read")
        False
        >>> check_scope("create", "")
        False
    """
    if not token_scope:
        return False

    scopes = token_scope.split()
    return required_scope in scopes