feat(phase-4a): complete Phase 3 implementation and gap analysis

Merges Phase 4a work including:

Implementation:
- Metadata discovery endpoint (/api/.well-known/oauth-authorization-server)
- h-app microformat parser service
- Enhanced authorization endpoint with client info display
- Configuration management system
- Dependency injection framework

Documentation:
- Comprehensive gap analysis for v1.0.0 compliance
- Phase 4a clarifications on development approach
- Phase 4-5 critical components breakdown

Testing:
- Unit tests for h-app parser (308 lines, comprehensive coverage)
- Unit tests for metadata endpoint (134 lines)
- Unit tests for configuration system (18 lines)
- Integration test updates

All tests passing with high coverage. Ready for Phase 4b security hardening.
This commit is contained in:
2025-11-20 17:16:11 -07:00
parent 5888e45b8c
commit 115e733604
18 changed files with 5815 additions and 4 deletions

View File

@@ -24,6 +24,7 @@ class Config:
# Required settings - no defaults
SECRET_KEY: str
BASE_URL: str
# Database
DATABASE_URL: str
@@ -69,6 +70,16 @@ class Config:
)
cls.SECRET_KEY = secret_key
# Required - BASE_URL must exist for OAuth metadata
base_url = os.getenv("GONDULF_BASE_URL")
if not base_url:
raise ConfigurationError(
"GONDULF_BASE_URL is required for OAuth 2.0 metadata endpoint. "
"Examples: https://auth.example.com or http://localhost:8000 (development only)"
)
# Normalize: remove trailing slash if present
cls.BASE_URL = base_url.rstrip("/")
# Database - with sensible default
cls.DATABASE_URL = os.getenv(
"GONDULF_DATABASE_URL", "sqlite:///./data/gondulf.db"
@@ -110,6 +121,21 @@ class Config:
Performs additional validation beyond initial loading.
"""
# Validate BASE_URL is a valid URL
if not cls.BASE_URL.startswith(("http://", "https://")):
raise ConfigurationError(
"GONDULF_BASE_URL must start with http:// or https://"
)
# Warn if using http:// in production-like settings
if cls.BASE_URL.startswith("http://") and "localhost" not in cls.BASE_URL:
import warnings
warnings.warn(
"GONDULF_BASE_URL uses http:// for non-localhost domain. "
"HTTPS is required for production IndieAuth servers.",
UserWarning
)
# Validate SMTP port is reasonable
if cls.SMTP_PORT < 1 or cls.SMTP_PORT > 65535:
raise ConfigurationError(

View File

@@ -6,6 +6,7 @@ from gondulf.database.connection import Database
from gondulf.dns import DNSService
from gondulf.email import EmailService
from gondulf.services.domain_verification import DomainVerificationService
from gondulf.services.happ_parser import HAppParser
from gondulf.services.html_fetcher import HTMLFetcherService
from gondulf.services.rate_limiter import RateLimiter
from gondulf.services.relme_parser import RelMeParser
@@ -70,6 +71,12 @@ def get_relme_parser() -> RelMeParser:
return RelMeParser()
@lru_cache
def get_happ_parser() -> HAppParser:
"""Get singleton h-app parser service."""
return HAppParser(html_fetcher=get_html_fetcher())
@lru_cache
def get_rate_limiter() -> RateLimiter:
"""Get singleton rate limiter service."""

View File

@@ -14,7 +14,7 @@ from gondulf.database.connection import Database
from gondulf.dns import DNSService
from gondulf.email import EmailService
from gondulf.logging_config import configure_logging
from gondulf.routers import authorization, token, verification
from gondulf.routers import authorization, metadata, token, verification
from gondulf.storage import CodeStore
# Load configuration at application startup
@@ -34,6 +34,7 @@ app = FastAPI(
# Register routers
app.include_router(authorization.router)
app.include_router(metadata.router)
app.include_router(token.router)
app.include_router(verification.router)

View File

@@ -7,8 +7,9 @@ from fastapi.responses import HTMLResponse, RedirectResponse
from fastapi.templating import Jinja2Templates
from gondulf.database.connection import Database
from gondulf.dependencies import get_database, get_verification_service
from gondulf.dependencies import get_database, get_happ_parser, get_verification_service
from gondulf.services.domain_verification import DomainVerificationService
from gondulf.services.happ_parser import HAppParser
from gondulf.utils.validation import (
extract_domain_from_url,
normalize_client_id,
@@ -32,7 +33,8 @@ async def authorize_get(
code_challenge_method: str | None = None,
scope: str | None = None,
me: str | None = None,
database: Database = Depends(get_database)
database: Database = Depends(get_database),
happ_parser: HAppParser = Depends(get_happ_parser)
) -> HTMLResponse:
"""
Handle authorization request (GET).
@@ -162,6 +164,15 @@ async def authorize_get(
# For Phase 2, we'll show consent form immediately (domain verification happens separately)
# In Phase 3, we'll check database for verified domains
# Fetch client metadata (h-app microformat)
client_metadata = None
try:
client_metadata = await happ_parser.fetch_and_parse(normalized_client_id)
logger.info(f"Fetched client metadata for {normalized_client_id}: {client_metadata.name}")
except Exception as e:
logger.warning(f"Failed to fetch client metadata for {normalized_client_id}: {e}")
# Continue without metadata - will show client_id instead
# Show consent form
return templates.TemplateResponse(
"authorize.html",
@@ -173,7 +184,8 @@ async def authorize_get(
"code_challenge": code_challenge,
"code_challenge_method": code_challenge_method,
"scope": scope or "",
"me": me
"me": me,
"client_metadata": client_metadata
}
)

View File

@@ -0,0 +1,48 @@
"""OAuth 2.0 Authorization Server Metadata endpoint (RFC 8414)."""
import json
import logging
from fastapi import APIRouter, Depends, Response
from gondulf.config import Config
from gondulf.dependencies import get_config
logger = logging.getLogger("gondulf.metadata")
router = APIRouter()
@router.get("/.well-known/oauth-authorization-server")
async def get_metadata(config: Config = Depends(get_config)) -> Response:
"""
OAuth 2.0 Authorization Server Metadata (RFC 8414).
Returns server capabilities for IndieAuth client discovery.
This endpoint is publicly accessible and cacheable.
Returns:
Response: JSON response with server metadata and Cache-Control header
"""
logger.debug("Metadata endpoint requested")
metadata = {
"issuer": config.BASE_URL,
"authorization_endpoint": f"{config.BASE_URL}/authorize",
"token_endpoint": f"{config.BASE_URL}/token",
"response_types_supported": ["code"],
"grant_types_supported": ["authorization_code"],
"code_challenge_methods_supported": [],
"token_endpoint_auth_methods_supported": ["none"],
"revocation_endpoint_auth_methods_supported": ["none"],
"scopes_supported": []
}
logger.debug(f"Returning metadata for issuer: {config.BASE_URL}")
return Response(
content=json.dumps(metadata, indent=2),
media_type="application/json",
headers={
"Cache-Control": "public, max-age=86400"
}
)

View File

@@ -0,0 +1,153 @@
"""h-app microformat parser for client metadata extraction."""
import logging
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Dict
from urllib.parse import urlparse
import mf2py
from gondulf.services.html_fetcher import HTMLFetcherService
logger = logging.getLogger("gondulf.happ_parser")
@dataclass
class ClientMetadata:
"""Client metadata extracted from h-app markup."""
name: str
logo: str | None = None
url: str | None = None
class HAppParser:
"""Parse h-app microformat data from client HTML."""
def __init__(self, html_fetcher: HTMLFetcherService):
"""
Initialize parser with HTML fetcher dependency.
Args:
html_fetcher: Service for fetching HTML content
"""
self.html_fetcher = html_fetcher
self.cache: Dict[str, tuple[ClientMetadata, datetime]] = {}
self.cache_ttl = timedelta(hours=24)
async def fetch_and_parse(self, client_id: str) -> ClientMetadata:
"""
Fetch client_id URL and parse h-app metadata.
Uses 24-hour caching to reduce HTTP requests.
Falls back to domain name if h-app not found.
Args:
client_id: Client application URL
Returns:
ClientMetadata with name (always populated) and optional logo/url
"""
# Check cache
if client_id in self.cache:
cached_metadata, cached_at = self.cache[client_id]
if datetime.utcnow() - cached_at < self.cache_ttl:
logger.debug(f"Returning cached metadata for {client_id}")
return cached_metadata
logger.info(f"Fetching h-app metadata from {client_id}")
# Fetch HTML
try:
html = self.html_fetcher.fetch(client_id)
except Exception as e:
logger.warning(f"Failed to fetch {client_id}: {e}")
html = None
# Parse h-app or fallback to domain name
if html:
metadata = self._parse_h_app(html, client_id)
else:
logger.info(f"Using domain fallback for {client_id}")
metadata = ClientMetadata(
name=self._extract_domain_name(client_id)
)
# Cache result
self.cache[client_id] = (metadata, datetime.utcnow())
logger.debug(f"Cached metadata for {client_id}: {metadata.name}")
return metadata
def _parse_h_app(self, html: str, client_id: str) -> ClientMetadata:
"""
Parse h-app microformat from HTML.
Args:
html: HTML content to parse
client_id: Client URL (for resolving relative URLs)
Returns:
ClientMetadata with extracted values, or domain fallback if no h-app
"""
try:
# Parse microformats
parsed = mf2py.parse(doc=html, url=client_id)
# Find h-app items
h_apps = [
item for item in parsed.get('items', [])
if 'h-app' in item.get('type', [])
]
if not h_apps:
logger.info(f"No h-app markup found at {client_id}")
return ClientMetadata(
name=self._extract_domain_name(client_id)
)
# Use first h-app
h_app = h_apps[0]
properties = h_app.get('properties', {})
# Extract properties
name = properties.get('name', [None])[0] or self._extract_domain_name(client_id)
# Extract logo - mf2py may return dict with 'value' key or string
logo_raw = properties.get('logo', [None])[0]
if isinstance(logo_raw, dict):
logo = logo_raw.get('value')
else:
logo = logo_raw
url = properties.get('url', [None])[0] or client_id
logger.info(f"Extracted h-app metadata from {client_id}: name={name}")
return ClientMetadata(
name=name,
logo=logo,
url=url
)
except Exception as e:
logger.error(f"Failed to parse h-app from {client_id}: {e}")
return ClientMetadata(
name=self._extract_domain_name(client_id)
)
def _extract_domain_name(self, client_id: str) -> str:
"""
Extract domain name from client_id for fallback display.
Args:
client_id: Client URL
Returns:
Domain name (e.g., "example.com")
"""
try:
parsed = urlparse(client_id)
domain = parsed.netloc or parsed.path
return domain
except Exception:
return client_id

View File

@@ -5,7 +5,23 @@
{% block content %}
<h1>Authorization Request</h1>
{% if client_metadata %}
<div class="client-metadata">
{% if client_metadata.logo %}
<img src="{{ client_metadata.logo }}" alt="{{ client_metadata.name or 'Client' }} logo" class="client-logo" style="max-width: 64px; max-height: 64px;">
{% endif %}
<h2>{{ client_metadata.name or client_id }}</h2>
{% if client_metadata.url %}
<p><a href="{{ client_metadata.url }}" target="_blank">{{ client_metadata.url }}</a></p>
{% endif %}
</div>
<p>The application <strong>{{ client_metadata.name or client_id }}</strong> wants to authenticate you.</p>
{% else %}
<div class="client-info">
<h2>{{ client_id }}</h2>
</div>
<p>The application <strong>{{ client_id }}</strong> wants to authenticate you.</p>
{% endif %}
{% if scope %}
<p>Requested permissions: <code>{{ scope }}</code></p>