From 115e733604f1030c58be0a709f12397940ada92f Mon Sep 17 00:00:00 2001 From: Phil Skentelbery Date: Thu, 20 Nov 2025 17:16:11 -0700 Subject: [PATCH] feat(phase-4a): complete Phase 3 implementation and gap analysis Merges Phase 4a work including: Implementation: - Metadata discovery endpoint (/api/.well-known/oauth-authorization-server) - h-app microformat parser service - Enhanced authorization endpoint with client info display - Configuration management system - Dependency injection framework Documentation: - Comprehensive gap analysis for v1.0.0 compliance - Phase 4a clarifications on development approach - Phase 4-5 critical components breakdown Testing: - Unit tests for h-app parser (308 lines, comprehensive coverage) - Unit tests for metadata endpoint (134 lines) - Unit tests for configuration system (18 lines) - Integration test updates All tests passing with high coverage. Ready for Phase 4b security hardening. --- docs/designs/phase-4-5-critical-components.md | 3233 +++++++++++++++++ docs/designs/phase-4a-clarifications.md | 662 ++++ .../reports/2025-11-20-gap-analysis-v1.0.0.md | 632 ++++ .../2025-11-20-phase-4a-complete-phase-3.md | 406 +++ pyproject.toml | 1 + src/gondulf/config.py | 26 + src/gondulf/dependencies.py | 7 + src/gondulf/main.py | 3 +- src/gondulf/routers/authorization.py | 18 +- src/gondulf/routers/metadata.py | 48 + src/gondulf/services/happ_parser.py | 153 + src/gondulf/templates/authorize.html | 16 + tests/integration/test_health.py | 2 + tests/unit/test_config.py | 18 + tests/unit/test_happ_parser.py | 308 ++ tests/unit/test_metadata.py | 134 + tests/unit/test_token_endpoint.py | 1 + uv.lock | 151 + 18 files changed, 5815 insertions(+), 4 deletions(-) create mode 100644 docs/designs/phase-4-5-critical-components.md create mode 100644 docs/designs/phase-4a-clarifications.md create mode 100644 docs/reports/2025-11-20-gap-analysis-v1.0.0.md create mode 100644 docs/reports/2025-11-20-phase-4a-complete-phase-3.md create mode 100644 src/gondulf/routers/metadata.py create mode 100644 src/gondulf/services/happ_parser.py create mode 100644 tests/unit/test_happ_parser.py create mode 100644 tests/unit/test_metadata.py diff --git a/docs/designs/phase-4-5-critical-components.md b/docs/designs/phase-4-5-critical-components.md new file mode 100644 index 0000000..1eef509 --- /dev/null +++ b/docs/designs/phase-4-5-critical-components.md @@ -0,0 +1,3233 @@ +# Phase 4-5: Critical Components for v1.0.0 Release + +**Architect**: Claude (Architect Agent) +**Date**: 2025-11-20 +**Status**: Design Complete +**Design References**: +- `/docs/roadmap/v1.0.0.md` - Original release plan +- `/docs/reports/2025-11-20-gap-analysis-v1.0.0.md` - Gap analysis identifying missing components +- `/docs/architecture/security.md` - Security architecture +- `/docs/architecture/indieauth-protocol.md` - Protocol implementation details + +## Overview + +This design document addresses the 7 critical missing components identified in the v1.0.0 gap analysis that are blocking release. These components span Phase 3 completion (metadata endpoint, client metadata fetching), Phase 4 (security hardening), and Phase 5 (deployment, testing, documentation). + +**Current Status**: Phases 1-2 are complete (100%). Phase 3 is 75% complete (missing metadata endpoint and h-app parsing). Phases 4-5 have not been started. + +**Estimated Remaining Effort**: 10-15 days to reach v1.0.0 release readiness. + +**Design Philosophy**: Maintain simplicity while meeting all P0 requirements. Reuse existing infrastructure where possible. Focus on production readiness and W3C IndieAuth compliance. + +--- + +## Component 1: Metadata Endpoint + +### Purpose + +Provide OAuth 2.0 Authorization Server Metadata endpoint per RFC 8414 to enable IndieAuth client discovery of server capabilities and endpoints. + +### Specification References + +- **W3C IndieAuth**: Section on Discovery (metadata endpoint) +- **RFC 8414**: OAuth 2.0 Authorization Server Metadata +- **v1.0.0 Roadmap**: Line 62 (P0 feature), Phase 3 lines 162, 168 + +### Design Overview + +Create a static metadata endpoint at `/.well-known/oauth-authorization-server` that returns server capabilities in JSON format. This endpoint requires no authentication and should be publicly cacheable. + +### API Specification + +**Endpoint**: `GET /.well-known/oauth-authorization-server` + +**Request**: No parameters, no authentication required + +**Response** (HTTP 200 OK): +```json +{ + "issuer": "https://auth.example.com", + "authorization_endpoint": "https://auth.example.com/authorize", + "token_endpoint": "https://auth.example.com/token", + "response_types_supported": ["code"], + "grant_types_supported": ["authorization_code"], + "code_challenge_methods_supported": [], + "token_endpoint_auth_methods_supported": ["none"], + "revocation_endpoint_auth_methods_supported": ["none"], + "scopes_supported": [] +} +``` + +**Response Headers**: +```http +Content-Type: application/json +Cache-Control: public, max-age=86400 +``` + +### Field Definitions + +| Field | Value | Rationale | +|-------|-------|-----------| +| `issuer` | Server base URL (from config) | Identifies this authorization server | +| `authorization_endpoint` | `{base_url}/authorize` | Where clients initiate auth flow | +| `token_endpoint` | `{base_url}/token` | Where clients exchange codes for tokens | +| `response_types_supported` | `["code"]` | Only authorization code flow supported | +| `grant_types_supported` | `["authorization_code"]` | Only grant type in v1.0.0 | +| `code_challenge_methods_supported` | `[]` | PKCE not supported in v1.0.0 (ADR-003) | +| `token_endpoint_auth_methods_supported` | `["none"]` | Public clients, no client secrets | +| `revocation_endpoint_auth_methods_supported` | `["none"]` | No revocation endpoint in v1.0.0 | +| `scopes_supported` | `[]` | Authentication-only, no scopes in v1.0.0 | + +### Implementation Approach + +**File**: `/src/gondulf/routers/metadata.py` + +**Implementation Strategy**: Static JSON response generated from configuration at startup. + +```python +from fastapi import APIRouter, Response +from gondulf.config import get_config + +router = APIRouter() + +@router.get("/.well-known/oauth-authorization-server") +async def get_metadata(): + """ + OAuth 2.0 Authorization Server Metadata (RFC 8414). + + Returns server capabilities for IndieAuth client discovery. + """ + config = get_config() + + metadata = { + "issuer": config.BASE_URL, + "authorization_endpoint": f"{config.BASE_URL}/authorize", + "token_endpoint": f"{config.BASE_URL}/token", + "response_types_supported": ["code"], + "grant_types_supported": ["authorization_code"], + "code_challenge_methods_supported": [], + "token_endpoint_auth_methods_supported": ["none"], + "revocation_endpoint_auth_methods_supported": ["none"], + "scopes_supported": [] + } + + return Response( + content=json.dumps(metadata, indent=2), + media_type="application/json", + headers={ + "Cache-Control": "public, max-age=86400" + } + ) +``` + +**Configuration Change**: Add `BASE_URL` to config (e.g., `GONDULF_BASE_URL=https://auth.example.com`). + +**Registration**: Add router to main.py: +```python +from gondulf.routers import metadata +app.include_router(metadata.router) +``` + +### Error Handling + +No error conditions - endpoint always returns metadata. If configuration is invalid, application fails at startup (fail-fast principle). + +### Security Considerations + +- **Public Endpoint**: No authentication required (per RFC 8414) +- **Cache-Control**: Set public cache for 24 hours to reduce load +- **No Secrets**: Metadata contains no sensitive information +- **HTTPS**: Served over HTTPS in production (enforced by middleware) + +### Testing Requirements + +**Unit Tests** (`tests/unit/test_metadata.py`): +1. Test metadata endpoint returns 200 OK +2. Test all required fields present +3. Test correct values for each field +4. Test Cache-Control header present +5. Test Content-Type is application/json +6. Test issuer matches BASE_URL configuration + +**Integration Test** (`tests/integration/test_metadata_integration.py`): +1. Test endpoint accessible via FastAPI TestClient +2. Test response can be parsed as valid JSON +3. Test endpoints in metadata are valid URLs + +**Compliance Test**: +1. Verify metadata response matches RFC 8414 format + +### Acceptance Criteria + +- [ ] `/.well-known/oauth-authorization-server` endpoint returns valid JSON +- [ ] All required fields present per RFC 8414 +- [ ] Endpoint values match actual server configuration +- [ ] Cache-Control header set to public, max-age=86400 +- [ ] All tests pass (unit, integration) +- [ ] Endpoint accessible without authentication + +--- + +## Component 2: Client Metadata Fetching (h-app Microformat) + +### Purpose + +Fetch and parse client application metadata from `client_id` URL to display application name, icon, and URL on the consent screen. This improves user experience by showing what application they're authorizing. + +### Specification References + +- **W3C IndieAuth**: Client Information Discovery +- **Microformats h-app**: http://microformats.org/wiki/h-app +- **v1.0.0 Roadmap**: Success criteria line 27, Phase 3 deliverables line 169 + +### Design Overview + +Create a service that fetches the `client_id` URL, parses HTML for h-app microformat data, extracts application metadata, and caches results. Integrate with authorization endpoint to display app information on consent screen. + +### h-app Microformat Structure + +Example HTML from client application: +```html +
+ + My IndieAuth Client +
+``` + +**Properties to Extract**: +- `p-name`: Application name (required) +- `u-logo`: Application icon URL (optional) +- `u-url`: Application URL (optional, usually same as client_id) + +### Data Model + +**ClientMetadata** (in-memory cache): +```python +@dataclass +class ClientMetadata: + """Client application metadata from h-app microformat.""" + client_id: str + name: str # Extracted from p-name, or domain fallback + logo_url: Optional[str] = None # Extracted from u-logo + url: Optional[str] = None # Extracted from u-url + fetched_at: datetime = None # Timestamp for cache expiry +``` + +### Service Design + +**File**: `/src/gondulf/services/h_app_parser.py` + +**Dependencies**: +- `html_fetcher.py` (already exists from Phase 2) +- `mf2py` library for microformat parsing + +**Service Interface**: +```python +class HAppParser: + """Parse h-app microformat from client_id URL.""" + + def __init__(self, html_fetcher: HTMLFetcher): + self.html_fetcher = html_fetcher + self.cache: Dict[str, ClientMetadata] = {} + self.cache_ttl = timedelta(hours=24) + + def parse_client_metadata(self, client_id: str) -> ClientMetadata: + """ + Fetch and parse client metadata from client_id URL. + + Returns ClientMetadata with name (always populated), and + optional logo_url and url. + + Caches results for 24 hours to reduce HTTP requests. + """ + pass + + def _parse_h_app(self, html: str, client_id: str) -> ClientMetadata: + """ + Parse h-app microformat from HTML. + + Returns ClientMetadata with extracted values, or fallback + to domain name if no h-app found. + """ + pass + + def _extract_domain_name(self, client_id: str) -> str: + """Extract domain name from client_id for fallback display.""" + pass +``` + +### Implementation Details + +**Parsing Strategy**: +1. Check cache for `client_id` (if cached and not expired, return cached) +2. Fetch HTML from `client_id` using `HTMLFetcher` (reuse Phase 2 infrastructure) +3. Parse HTML with `mf2py` library to extract h-app microformat +4. Extract `p-name`, `u-logo`, `u-url` properties +5. If h-app not found, fallback to domain name extraction +6. Store in cache with 24-hour TTL +7. Return `ClientMetadata` object + +**mf2py Usage**: +```python +import mf2py +from urllib.parse import urlparse, urljoin + +def _parse_h_app(self, html: str, client_id: str) -> ClientMetadata: + """Parse h-app microformat from HTML.""" + # Parse microformats + parsed = mf2py.parse(doc=html, url=client_id) + + # Find h-app items + h_apps = [item for item in parsed.get('items', []) + if 'h-app' in item.get('type', [])] + + if not h_apps: + # Fallback: no h-app found + return ClientMetadata( + client_id=client_id, + name=self._extract_domain_name(client_id), + fetched_at=datetime.utcnow() + ) + + # Use first h-app + h_app = h_apps[0] + properties = h_app.get('properties', {}) + + # Extract properties + name = properties.get('name', [None])[0] or self._extract_domain_name(client_id) + logo_url = properties.get('logo', [None])[0] + url = properties.get('url', [None])[0] or client_id + + # Resolve relative URLs + if logo_url and not logo_url.startswith('http'): + logo_url = urljoin(client_id, logo_url) + + return ClientMetadata( + client_id=client_id, + name=name, + logo_url=logo_url, + url=url, + fetched_at=datetime.utcnow() + ) +``` + +**Fallback Strategy**: +```python +def _extract_domain_name(self, client_id: str) -> str: + """Extract domain name for fallback display.""" + parsed = urlparse(client_id) + domain = parsed.hostname or client_id + + # Remove 'www.' prefix if present + if domain.startswith('www.'): + domain = domain[4:] + + return domain +``` + +**Cache Management**: +```python +def parse_client_metadata(self, client_id: str) -> ClientMetadata: + """Fetch and parse with caching.""" + # Check cache + if client_id in self.cache: + cached = self.cache[client_id] + age = datetime.utcnow() - cached.fetched_at + if age < self.cache_ttl: + logger.debug(f"Cache hit for client_id: {client_id}") + return cached + + # Fetch HTML + try: + html = self.html_fetcher.fetch(client_id) + if not html: + raise ValueError("Failed to fetch client_id URL") + + # Parse h-app + metadata = self._parse_h_app(html, client_id) + + # Cache result + self.cache[client_id] = metadata + + return metadata + + except Exception as e: + logger.warning(f"Failed to parse client metadata for {client_id}: {e}") + # Return fallback metadata + return ClientMetadata( + client_id=client_id, + name=self._extract_domain_name(client_id), + fetched_at=datetime.utcnow() + ) +``` + +### Integration with Authorization Endpoint + +**Update**: `/src/gondulf/routers/authorization.py` + +**Change**: Inject `HAppParser` and fetch client metadata before rendering consent screen. + +```python +from gondulf.services.h_app_parser import HAppParser, ClientMetadata + +@router.get("/authorize") +async def authorize_get( + # ... existing parameters ... + h_app_parser: HAppParser = Depends(get_h_app_parser) +): + # ... existing validation ... + + # Fetch client metadata + client_metadata = h_app_parser.parse_client_metadata(client_id) + + # Render consent screen with client metadata + return templates.TemplateResponse( + "authorize.html", + { + "request": request, + "me": me, + "client_name": client_metadata.name, + "client_logo_url": client_metadata.logo_url, + "client_url": client_metadata.url or client_id, + "client_id": client_id, + "redirect_uri": redirect_uri, + "state": state + } + ) +``` + +**Template Update**: `/src/gondulf/templates/authorize.html` + +Add client metadata display: +```html +
+ {% if client_logo_url %} + + {% endif %} +

{{ client_name }}

+

{{ client_url }}

+
+ +

This application wants to sign you in as:

+

{{ me }}

+ +
+ + + +
+``` + +### Dependency Injection + +**File**: `/src/gondulf/dependencies.py` + +Add `get_h_app_parser()`: +```python +from gondulf.services.h_app_parser import HAppParser + +@lru_cache() +def get_h_app_parser() -> HAppParser: + """Get HAppParser singleton.""" + html_fetcher = get_html_fetcher() + return HAppParser(html_fetcher) +``` + +### Error Handling + +**Failure Modes**: +1. **HTTP fetch fails**: Return fallback metadata with domain name +2. **HTML parse fails**: Return fallback metadata with domain name +3. **h-app not found**: Return fallback metadata with domain name +4. **Invalid URLs in h-app**: Skip invalid fields, use available data + +**Logging**: +- Log INFO when h-app successfully parsed +- Log WARNING when fallback used +- Log ERROR only on unexpected exceptions + +### Security Considerations + +- **HTTPS Only**: Reuse `HTMLFetcher` which enforces HTTPS +- **Timeout**: 5-second timeout from `HTMLFetcher` +- **Size Limit**: 5MB limit from `HTMLFetcher` +- **XSS Prevention**: HTML escape all client metadata in templates (Jinja2 auto-escaping) +- **Logo URL Validation**: Only display logo if HTTPS URL +- **Cache Poisoning**: Cache keyed by client_id, no user input + +### Testing Requirements + +**Unit Tests** (`tests/unit/test_h_app_parser.py`): +1. Test h-app parsing with complete metadata +2. Test h-app parsing with missing logo +3. Test h-app parsing with missing url +4. Test h-app not found (fallback to domain) +5. Test relative logo URL resolution +6. Test domain name extraction fallback +7. Test cache hit (no HTTP request) +8. Test cache expiry (new HTTP request) +9. Test HTML fetch failure (fallback) +10. Test multiple h-app items (use first) + +**Integration Tests** (`tests/integration/test_authorization_with_client_metadata.py`): +1. Test authorization endpoint displays client name +2. Test authorization endpoint displays client logo +3. Test authorization endpoint with fallback metadata + +### Acceptance Criteria + +- [ ] `HAppParser` service created with caching +- [ ] h-app microformat parsing working with mf2py +- [ ] Fallback to domain name when h-app not found +- [ ] Cache working with 24-hour TTL +- [ ] Integration with authorization endpoint complete +- [ ] Consent screen displays client name, logo, URL +- [ ] All tests pass (unit, integration) +- [ ] HTML escaping prevents XSS + +--- + +## Component 3: Security Hardening + +### Purpose + +Implement security best practices required for production deployment: security headers, HTTPS enforcement, input sanitization audit, and PII logging audit. + +### Specification References + +- **v1.0.0 Roadmap**: Line 65 (P0 feature), Phase 4 lines 198-203 +- **OWASP Top 10**: Security header recommendations +- **OAuth 2.0 Security Best Practices**: HTTPS enforcement + +### Design Overview + +Create security middleware to add HTTP security headers to all responses. Implement HTTPS enforcement for production environments. Conduct comprehensive audit of input sanitization and PII logging practices. + +### 3.1: Security Headers Middleware + +**File**: `/src/gondulf/middleware/security_headers.py` + +**Headers to Implement**: + +| Header | Value | Purpose | +|--------|-------|---------| +| `X-Frame-Options` | `DENY` | Prevent clickjacking attacks | +| `X-Content-Type-Options` | `nosniff` | Prevent MIME sniffing | +| `X-XSS-Protection` | `1; mode=block` | Enable XSS filter (legacy browsers) | +| `Referrer-Policy` | `strict-origin-when-cross-origin` | Control referrer information | +| `Strict-Transport-Security` | `max-age=31536000; includeSubDomains` | Force HTTPS (production only) | +| `Content-Security-Policy` | `default-src 'self'; style-src 'self' 'unsafe-inline'` | Restrict resource loading | +| `Cache-Control` | `no-store, no-cache, must-revalidate` | Prevent caching of sensitive pages (auth endpoints only) | +| `Pragma` | `no-cache` | HTTP/1.0 cache control | + +**Implementation**: +```python +from fastapi import Request, Response +from starlette.middleware.base import BaseHTTPMiddleware +from gondolf.config import get_config + +class SecurityHeadersMiddleware(BaseHTTPMiddleware): + """Add security headers to all responses.""" + + async def dispatch(self, request: Request, call_next): + response = await call_next(request) + + config = get_config() + + # Always add these headers + response.headers["X-Frame-Options"] = "DENY" + response.headers["X-Content-Type-Options"] = "nosniff" + response.headers["X-XSS-Protection"] = "1; mode=block" + response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin" + + # CSP: Allow self and inline styles (for minimal CSS in templates) + response.headers["Content-Security-Policy"] = ( + "default-src 'self'; " + "style-src 'self' 'unsafe-inline'; " + "img-src 'self' https:; " # Allow HTTPS images (client logos) + "frame-ancestors 'none'" # Equivalent to X-Frame-Options: DENY + ) + + # HSTS: Only in production with HTTPS + if not config.DEBUG and request.url.scheme == "https": + response.headers["Strict-Transport-Security"] = ( + "max-age=31536000; includeSubDomains" + ) + + # Cache control for sensitive endpoints + if request.url.path in ["/authorize", "/token", "/api/verify/code"]: + response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate" + response.headers["Pragma"] = "no-cache" + + return response +``` + +**Registration**: Add to `main.py`: +```python +from gondulf.middleware.security_headers import SecurityHeadersMiddleware + +app.add_middleware(SecurityHeadersMiddleware) +``` + +### 3.2: HTTPS Enforcement Middleware + +**File**: `/src/gondulf/middleware/https_enforcement.py` + +**Implementation**: +```python +from fastapi import Request, Response +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.responses import RedirectResponse +from gondolf.config import get_config + +class HTTPSEnforcementMiddleware(BaseHTTPMiddleware): + """Enforce HTTPS in production (redirect HTTP to HTTPS).""" + + async def dispatch(self, request: Request, call_next): + config = get_config() + + # Only enforce in production + if not config.DEBUG: + # Allow localhost HTTP for local testing + if request.url.hostname not in ["localhost", "127.0.0.1"]: + # Check if HTTP (not HTTPS) + if request.url.scheme != "https": + # Redirect to HTTPS + https_url = request.url.replace(scheme="https") + return RedirectResponse(url=str(https_url), status_code=301) + + return await call_next(request) +``` + +**Registration**: Add to `main.py` BEFORE security headers middleware: +```python +from gondulf.middleware.https_enforcement import HTTPSEnforcementMiddleware + +# Add HTTPS enforcement first (before security headers) +app.add_middleware(HTTPSEnforcementMiddleware) +app.add_middleware(SecurityHeadersMiddleware) +``` + +**Configuration**: Add `DEBUG` flag to config: +```python +# config.py +DEBUG = os.getenv("GONDULF_DEBUG", "false").lower() == "true" +``` + +### 3.3: Input Sanitization Audit + +**Scope**: Review all user input handling for proper validation and sanitization. + +**Audit Checklist**: + +| Input Source | Current Validation | Additional Sanitization Needed | +|--------------|-------------------|-------------------------------| +| `me` parameter | Pydantic HttpUrl, custom validation | ✅ Adequate (URL validation comprehensive) | +| `client_id` parameter | Pydantic HttpUrl | ✅ Adequate | +| `redirect_uri` parameter | Pydantic HttpUrl, domain validation | ✅ Adequate | +| `state` parameter | Pydantic str, max_length=512 | ✅ Adequate (opaque, not interpreted) | +| `code` parameter | str, checked against storage | ✅ Adequate (constant-time hash comparison) | +| Email addresses | Regex validation in `validation.py` | ✅ Adequate (from rel=me, not user input) | +| HTML templates | Jinja2 auto-escaping | ✅ Adequate (all {{ }} escaped by default) | +| SQL queries | SQLAlchemy parameterized queries | ✅ Adequate (no string interpolation) | +| DNS queries | dnspython library | ✅ Adequate (library handles escaping) | + +**Action Items**: +1. **Add HTML escaping test**: Verify Jinja2 auto-escaping works +2. **Add SQL injection test**: Verify parameterized queries prevent injection +3. **Document validation patterns**: Update security.md with validation approach + +**No Code Changes Required**: Existing validation is adequate. + +### 3.4: PII Logging Audit + +**Scope**: Ensure no Personally Identifiable Information (PII) is logged. + +**PII Definition**: +- Email addresses +- Full tokens (access tokens, authorization codes, verification codes) +- IP addresses (in production) + +**Audit Checklist**: + +| Service | Logs Email? | Logs Tokens? | Logs IP? | Action Required | +|---------|-------------|--------------|----------|-----------------| +| `email.py` | ❌ No (only domain) | N/A | ❌ No | ✅ Compliant | +| `token_service.py` | N/A | ⚠️ Token prefix only (8 chars) | ❌ No | ✅ Compliant (prefix OK) | +| `domain_verification.py` | ⚠️ **May log email** | ⚠️ **May log code** | ❌ No | 🔍 **Review Required** | +| `authorization.py` | ❌ No | ⚠️ Code prefix only | ⚠️ **May log IP in errors** | 🔍 **Review Required** | +| `verification.py` | ❌ No | ⚠️ Code in errors? | ❌ No | 🔍 **Review Required** | + +**Action Items**: +1. **Review all logger.info/warning/error calls** in services and routers +2. **Remove email addresses** from log messages (use domain only) +3. **Remove full codes/tokens** from log messages (use prefix or hash) +4. **Remove IP addresses** from production logs (OK in DEBUG mode) +5. **Add logging best practices** to coding standards + +**Example Fix**: +```python +# BAD: Logs email (PII) +logger.info(f"Verification sent to {email}") + +# GOOD: Logs domain only +logger.info(f"Verification sent to user at domain {domain}") + +# BAD: Logs full token +logger.debug(f"Generated token: {token}") + +# GOOD: Logs token prefix for correlation +logger.debug(f"Generated token: {token[:8]}...") +``` + +### 3.5: Security Configuration + +**Add to config.py**: +```python +# Security settings +DEBUG = os.getenv("GONDULF_DEBUG", "false").lower() == "true" +HSTS_MAX_AGE = int(os.getenv("GONDULF_HSTS_MAX_AGE", "31536000")) # 1 year +CSP_POLICY = os.getenv( + "GONDULF_CSP_POLICY", + "default-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' https:; frame-ancestors 'none'" +) +``` + +**Add to .env.example**: +```bash +# Security Settings (Production) +GONDULF_DEBUG=false +GONDULF_HSTS_MAX_AGE=31536000 # HSTS max-age in seconds (1 year default) +# GONDULF_CSP_POLICY=... # Custom CSP policy (optional) +``` + +### Error Handling + +**Middleware Errors**: +- Log errors but DO NOT block requests +- If middleware fails, continue without headers (fail-open for availability) +- Log ERROR level for middleware failures + +### Security Considerations + +- **HSTS Preloading**: Consider submitting domain to HSTS preload list (future) +- **CSP Reporting**: Consider adding CSP report-uri (future) +- **Security.txt**: Consider adding /.well-known/security.txt (future) + +### Testing Requirements + +**Unit Tests** (`tests/unit/test_security_middleware.py`): +1. Test security headers present on all responses +2. Test HSTS header only in production +3. Test HSTS header not present in DEBUG mode +4. Test HTTPS enforcement redirects HTTP to HTTPS +5. Test HTTPS enforcement allows localhost HTTP +6. Test Cache-Control headers on sensitive endpoints +7. Test CSP header allows self and inline styles + +**Integration Tests** (`tests/integration/test_security_integration.py`): +1. Test authorization endpoint has security headers +2. Test token endpoint has cache control headers +3. Test metadata endpoint does NOT have cache control (should be cacheable) + +**Security Tests** (`tests/security/test_input_validation.py`): +1. Test HTML escaping in templates (XSS prevention) +2. Test SQL injection prevention (parameterized queries) +3. Test URL validation rejects malicious URLs + +**PII Tests** (`tests/security/test_pii_logging.py`): +1. Test no email addresses in logs (mock logger, verify calls) +2. Test no full tokens in logs +3. Test no full codes in logs + +### Acceptance Criteria + +- [ ] Security headers middleware implemented and registered +- [ ] HTTPS enforcement middleware implemented and registered (production only) +- [ ] All security headers present on responses +- [ ] HSTS header only in production +- [ ] Input sanitization audit complete (documented) +- [ ] PII logging audit complete (issues fixed) +- [ ] All tests pass (unit, integration, security) +- [ ] No email addresses in logs +- [ ] No full tokens/codes in logs + +--- + +## Component 4: Deployment Configuration + +### Purpose + +Provide production-ready deployment artifacts: Dockerfile, docker-compose.yml, database backup script, and comprehensive environment variable documentation. + +### Specification References + +- **v1.0.0 Roadmap**: Line 66 (P0 feature), Phase 5 lines 233-236 +- **Docker Best Practices**: Multi-stage builds, non-root user, minimal base images + +### Design Overview + +Create Docker deployment configuration using multi-stage build for minimal image size. Provide docker-compose.yml for easy local testing. Create SQLite backup script with GPG encryption support. Document all environment variables comprehensively. + +### 4.1: Dockerfile + +**File**: `/Dockerfile` + +**Strategy**: Multi-stage build to minimize final image size. + +**Implementation**: +```dockerfile +# Stage 1: Builder +FROM python:3.11-slim AS builder + +# Install uv for fast dependency resolution +COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv + +# Set working directory +WORKDIR /app + +# Copy dependency files +COPY pyproject.toml uv.lock ./ + +# Install dependencies to a virtual environment +RUN uv sync --frozen --no-dev + +# Stage 2: Runtime +FROM python:3.11-slim + +# Create non-root user +RUN useradd --create-home --shell /bin/bash gondulf + +# Set working directory +WORKDIR /app + +# Copy virtual environment from builder +COPY --from=builder /app/.venv /app/.venv + +# Copy application code +COPY src/ /app/src/ +COPY migrations/ /app/migrations/ + +# Create data directory for SQLite +RUN mkdir -p /app/data && chown gondulf:gondulf /app/data + +# Switch to non-root user +USER gondulf + +# Set environment variables +ENV PATH="/app/.venv/bin:$PATH" +ENV PYTHONPATH="/app/src" +ENV GONDULF_DATABASE_URL="sqlite:////app/data/gondulf.db" + +# Expose port +EXPOSE 8000 + +# Health check +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" + +# Run application +CMD ["uvicorn", "gondulf.main:app", "--host", "0.0.0.0", "--port", "8000"] +``` + +**Build Instructions**: +```bash +# Build image +docker build -t gondulf:1.0.0 . + +# Tag as latest +docker tag gondulf:1.0.0 gondulf:latest +``` + +**Image Properties**: +- Base: `python:3.11-slim` (Debian-based, ~150MB) +- User: Non-root `gondulf` user +- Port: 8000 (Uvicorn default) +- Health check: `/health` endpoint every 30 seconds +- Data volume: `/app/data` (for SQLite database) + +### 4.2: docker-compose.yml + +**File**: `/docker-compose.yml` + +**Purpose**: Local testing and development deployment. + +**Implementation**: +```yaml +version: "3.8" + +services: + gondulf: + build: . + image: gondulf:latest + container_name: gondulf + restart: unless-stopped + ports: + - "8000:8000" + volumes: + - ./data:/app/data + - ./logs:/app/logs + environment: + # Required + - GONDULF_SECRET_KEY=${GONDULF_SECRET_KEY} + - GONDULF_BASE_URL=${GONDULF_BASE_URL:-http://localhost:8000} + + # SMTP Configuration (required for email verification) + - GONDULF_SMTP_HOST=${GONDULF_SMTP_HOST} + - GONDULF_SMTP_PORT=${GONDULF_SMTP_PORT:-587} + - GONDULF_SMTP_USERNAME=${GONDULF_SMTP_USERNAME} + - GONDULF_SMTP_PASSWORD=${GONDULF_SMTP_PASSWORD} + - GONDULF_SMTP_FROM_EMAIL=${GONDULF_SMTP_FROM_EMAIL} + - GONDULF_SMTP_USE_TLS=${GONDULF_SMTP_USE_TLS:-true} + + # Optional Configuration + - GONDULF_DEBUG=${GONDULF_DEBUG:-false} + - GONDULF_LOG_LEVEL=${GONDULF_LOG_LEVEL:-INFO} + - GONDULF_TOKEN_TTL=${GONDULF_TOKEN_TTL:-3600} + + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 5s + retries: 3 + start_period: 10s + + networks: + - gondulf-network + +networks: + gondulf-network: + driver: bridge + +volumes: + data: + logs: +``` + +**Usage**: +```bash +# Create .env file with required variables +cp .env.example .env +nano .env # Edit with your values + +# Start service +docker-compose up -d + +# View logs +docker-compose logs -f gondulf + +# Stop service +docker-compose down + +# Restart service +docker-compose restart gondulf +``` + +### 4.3: Backup Script + +**File**: `/scripts/backup_database.sh` + +**Purpose**: Backup SQLite database with optional GPG encryption. + +**Implementation**: +```bash +#!/bin/bash +set -euo pipefail + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +DATA_DIR="${DATA_DIR:-$PROJECT_ROOT/data}" +BACKUP_DIR="${BACKUP_DIR:-$PROJECT_ROOT/backups}" +DB_FILE="${DB_FILE:-$DATA_DIR/gondulf.db}" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +BACKUP_FILE="$BACKUP_DIR/gondulf_${TIMESTAMP}.db" +RETENTION_DAYS="${RETENTION_DAYS:-30}" + +# GPG encryption (optional) +GPG_RECIPIENT="${GPG_RECIPIENT:-}" + +# Colors for output +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +echo "=========================================" +echo "Gondulf Database Backup" +echo "=========================================" +echo + +# Check if database exists +if [ ! -f "$DB_FILE" ]; then + echo -e "${RED}ERROR: Database file not found: $DB_FILE${NC}" + exit 1 +fi + +# Create backup directory +mkdir -p "$BACKUP_DIR" + +# SQLite backup (using .backup command for consistency) +echo -e "${YELLOW}Backing up database...${NC}" +sqlite3 "$DB_FILE" ".backup $BACKUP_FILE" + +if [ $? -eq 0 ]; then + echo -e "${GREEN}✓ Database backed up to: $BACKUP_FILE${NC}" + + # Get file size + SIZE=$(du -h "$BACKUP_FILE" | cut -f1) + echo -e " Size: $SIZE" +else + echo -e "${RED}ERROR: Backup failed${NC}" + exit 1 +fi + +# GPG encryption (optional) +if [ -n "$GPG_RECIPIENT" ]; then + echo -e "${YELLOW}Encrypting backup with GPG...${NC}" + gpg --encrypt --recipient "$GPG_RECIPIENT" --output "${BACKUP_FILE}.gpg" "$BACKUP_FILE" + + if [ $? -eq 0 ]; then + echo -e "${GREEN}✓ Backup encrypted: ${BACKUP_FILE}.gpg${NC}" + + # Remove unencrypted backup + rm "$BACKUP_FILE" + BACKUP_FILE="${BACKUP_FILE}.gpg" + else + echo -e "${RED}WARNING: Encryption failed, keeping unencrypted backup${NC}" + fi +fi + +# Cleanup old backups +echo -e "${YELLOW}Cleaning up old backups (older than $RETENTION_DAYS days)...${NC}" +find "$BACKUP_DIR" -name "gondulf_*.db*" -type f -mtime +$RETENTION_DAYS -delete +CLEANED=$(find "$BACKUP_DIR" -name "gondulf_*.db*" -type f -mtime +$RETENTION_DAYS | wc -l) +echo -e "${GREEN}✓ Removed $CLEANED old backup(s)${NC}" + +# List recent backups +echo +echo "Recent backups:" +ls -lh "$BACKUP_DIR"/gondulf_*.db* 2>/dev/null | tail -5 || echo " No backups found" + +echo +echo -e "${GREEN}Backup complete!${NC}" +``` + +**Restore Script**: `/scripts/restore_database.sh` + +```bash +#!/bin/bash +set -euo pipefail + +# Configuration +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" +DATA_DIR="${DATA_DIR:-$PROJECT_ROOT/data}" +DB_FILE="${DB_FILE:-$DATA_DIR/gondulf.db}" + +# Colors +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' + +echo "=========================================" +echo "Gondulf Database Restore" +echo "=========================================" +echo + +# Check arguments +if [ $# -ne 1 ]; then + echo "Usage: $0 " + echo "Example: $0 backups/gondulf_20251120_143000.db" + exit 1 +fi + +BACKUP_FILE="$1" + +# Check if backup exists +if [ ! -f "$BACKUP_FILE" ]; then + echo -e "${RED}ERROR: Backup file not found: $BACKUP_FILE${NC}" + exit 1 +fi + +# Check if encrypted +if [[ "$BACKUP_FILE" == *.gpg ]]; then + echo -e "${YELLOW}Decrypting backup...${NC}" + DECRYPTED_FILE="${BACKUP_FILE%.gpg}" + gpg --decrypt --output "$DECRYPTED_FILE" "$BACKUP_FILE" + + if [ $? -ne 0 ]; then + echo -e "${RED}ERROR: Decryption failed${NC}" + exit 1 + fi + + BACKUP_FILE="$DECRYPTED_FILE" + echo -e "${GREEN}✓ Backup decrypted${NC}" +fi + +# Backup current database (if exists) +if [ -f "$DB_FILE" ]; then + CURRENT_BACKUP="${DB_FILE}.before_restore_$(date +%Y%m%d_%H%M%S)" + echo -e "${YELLOW}Backing up current database to: $CURRENT_BACKUP${NC}" + cp "$DB_FILE" "$CURRENT_BACKUP" + echo -e "${GREEN}✓ Current database backed up${NC}" +fi + +# Restore backup +echo -e "${YELLOW}Restoring database...${NC}" +cp "$BACKUP_FILE" "$DB_FILE" + +if [ $? -eq 0 ]; then + echo -e "${GREEN}✓ Database restored from: $BACKUP_FILE${NC}" + echo + echo -e "${YELLOW}NOTE: Please restart the Gondulf service${NC}" +else + echo -e "${RED}ERROR: Restore failed${NC}" + + # Restore from backup if exists + if [ -f "$CURRENT_BACKUP" ]; then + cp "$CURRENT_BACKUP" "$DB_FILE" + echo -e "${YELLOW}Restored previous database${NC}" + fi + + exit 1 +fi + +# Cleanup decrypted file if it was created +if [[ "$1" == *.gpg ]] && [ -f "${1%.gpg}" ]; then + rm "${1%.gpg}" +fi + +echo -e "${GREEN}Restore complete!${NC}" +``` + +**Make scripts executable**: +```bash +chmod +x scripts/backup_database.sh +chmod +x scripts/restore_database.sh +``` + +**Cron Setup** (example): +```bash +# Backup daily at 2 AM +0 2 * * * /app/scripts/backup_database.sh >> /app/logs/backup.log 2>&1 +``` + +### 4.4: Environment Variable Documentation + +**File**: `/docs/deployment/environment-variables.md` + +**Content**: +```markdown +# Environment Variables + +All Gondulf configuration is done via environment variables with the `GONDULF_` prefix. + +## Required Variables + +### SECRET_KEY (Required) +**Variable**: `GONDULF_SECRET_KEY` +**Description**: Secret key for cryptographic operations (token generation, session security) +**Format**: String, minimum 32 characters +**Example**: `GONDULF_SECRET_KEY=$(python -c "import secrets; print(secrets.token_urlsafe(32))")` +**Security**: NEVER commit to version control. Generate unique value per deployment. + +### BASE_URL (Required) +**Variable**: `GONDULF_BASE_URL` +**Description**: Base URL of the Gondulf server (used for metadata endpoint) +**Format**: URL with scheme (https://...) +**Example**: `GONDULF_BASE_URL=https://auth.example.com` +**Production**: Must be HTTPS + +### SMTP Configuration (Required for Email Verification) +**Variable**: `GONDULF_SMTP_HOST` +**Description**: SMTP server hostname +**Example**: `GONDULF_SMTP_HOST=smtp.gmail.com` + +**Variable**: `GONDULF_SMTP_PORT` +**Description**: SMTP server port +**Default**: `587` +**Common Values**: `587` (STARTTLS), `465` (implicit TLS), `25` (unencrypted) + +**Variable**: `GONDULF_SMTP_USERNAME` +**Description**: SMTP authentication username +**Example**: `GONDULF_SMTP_USERNAME=noreply@example.com` + +**Variable**: `GONDULF_SMTP_PASSWORD` +**Description**: SMTP authentication password +**Security**: Use app-specific password if available (Gmail, Outlook) + +**Variable**: `GONDULF_SMTP_FROM_EMAIL` +**Description**: From address for verification emails +**Example**: `GONDULF_SMTP_FROM_EMAIL=noreply@example.com` + +**Variable**: `GONDULF_SMTP_USE_TLS` +**Description**: Enable STARTTLS (port 587) +**Default**: `true` +**Values**: `true`, `false` + +## Optional Variables + +### Database Configuration +**Variable**: `GONDULF_DATABASE_URL` +**Description**: Database connection URL +**Default**: `sqlite:///./data/gondulf.db` +**Format**: SQLAlchemy connection string +**PostgreSQL Example**: `postgresql://user:pass@localhost/gondulf` (future) + +### Security Settings +**Variable**: `GONDULF_DEBUG` +**Description**: Enable debug mode (disables HTTPS enforcement) +**Default**: `false` +**Values**: `true`, `false` +**Warning**: NEVER enable in production + +**Variable**: `GONDULF_HSTS_MAX_AGE` +**Description**: HSTS max-age header value (seconds) +**Default**: `31536000` (1 year) + +### Token Configuration +**Variable**: `GONDULF_TOKEN_TTL` +**Description**: Access token time-to-live (seconds) +**Default**: `3600` (1 hour) +**Range**: `300` (5 min) to `86400` (24 hours) + +**Variable**: `GONDULF_TOKEN_CLEANUP_ENABLED` +**Description**: Enable automatic cleanup of expired tokens +**Default**: `true` +**Values**: `true`, `false` + +**Variable**: `GONDULF_TOKEN_CLEANUP_INTERVAL` +**Description**: Token cleanup interval (seconds) +**Default**: `3600` (1 hour) + +### Logging Configuration +**Variable**: `GONDULF_LOG_LEVEL` +**Description**: Logging level +**Default**: `INFO` +**Values**: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL` + +## Docker Environment Variables + +When running in Docker, use `-e` flag or `--env-file`: + +```bash +# Using -e flag +docker run -e GONDULF_SECRET_KEY=xxx -e GONDULF_BASE_URL=https://auth.example.com gondulf:latest + +# Using --env-file +docker run --env-file .env gondulf:latest +``` + +## Docker Compose + +Create `.env` file in project root: +```env +GONDULF_SECRET_KEY=your-secret-key-here +GONDULF_BASE_URL=https://auth.example.com +GONDULF_SMTP_HOST=smtp.gmail.com +GONDULF_SMTP_PORT=587 +GONDULF_SMTP_USERNAME=noreply@example.com +GONDULF_SMTP_PASSWORD=your-password-here +GONDULF_SMTP_FROM_EMAIL=noreply@example.com +``` + +Then run: +```bash +docker-compose up -d +``` + +## Security Recommendations + +1. **Generate SECRET_KEY**: Use `secrets.token_urlsafe(32)` or similar +2. **SMTP Password**: Use app-specific passwords (Gmail, Outlook) +3. **HTTPS Only**: Never expose HTTP in production +4. **Environment Files**: Add `.env` to `.gitignore` +5. **Docker Secrets**: Use Docker secrets in production (not environment variables) + +## Validation + +Gondulf validates configuration at startup. Invalid configuration will prevent application from starting (fail-fast). + +Missing required variables will produce clear error messages: +``` +ERROR: Required environment variable GONDULF_SECRET_KEY is not set +``` +``` + +### Testing Requirements + +**Dockerfile Tests** (`tests/docker/test_dockerfile.sh`): +1. Test image builds successfully +2. Test image runs as non-root user +3. Test health check passes +4. Test application starts +5. Test data volume persists + +**docker-compose Tests** (`tests/docker/test_docker_compose.sh`): +1. Test docker-compose up succeeds +2. Test service accessible on port 8000 +3. Test /health endpoint returns 200 +4. Test docker-compose down cleans up + +**Backup Script Tests** (`tests/scripts/test_backup.sh`): +1. Test backup creates file +2. Test backup with GPG encryption +3. Test backup cleanup removes old files +4. Test restore from backup +5. Test restore from encrypted backup + +### Acceptance Criteria + +- [ ] Dockerfile builds successfully +- [ ] Docker image runs as non-root user +- [ ] Health check passes +- [ ] docker-compose.yml starts service successfully +- [ ] Backup script creates backup successfully +- [ ] Backup script with GPG encryption works +- [ ] Restore script restores database successfully +- [ ] Environment variable documentation complete +- [ ] All tests pass (Docker, backup/restore) + +--- + +## Component 5: Integration & End-to-End Tests + +### Purpose + +Implement comprehensive integration and end-to-end tests to verify complete authentication flows, endpoint integration, and W3C IndieAuth compliance. + +### Specification References + +- **v1.0.0 Roadmap**: Line 67 (P0 feature), Testing Strategy lines 275-287, Phase 5 lines 230-232 +- **Testing Standards**: `/docs/standards/testing.md` + +### Design Overview + +Create integration test suite (20% of total tests) covering endpoint interactions and multi-component workflows. Create end-to-end test suite (10% of total tests) covering complete user journeys. Use FastAPI TestClient for HTTP testing, mock external dependencies (SMTP, DNS). + +### Test Organization + +**Directory Structure**: +``` +tests/ +├── unit/ # Existing unit tests (70%) +│ ├── test_*.py +│ └── ... +├── integration/ # NEW: Integration tests (20%) +│ ├── __init__.py +│ ├── conftest.py # Shared fixtures +│ ├── test_authorization_flow.py +│ ├── test_token_flow.py +│ ├── test_verification_flow.py +│ ├── test_metadata_endpoint.py +│ └── test_error_handling.py +├── e2e/ # NEW: End-to-end tests (10%) +│ ├── __init__.py +│ ├── conftest.py +│ ├── test_complete_auth_flow.py +│ ├── test_email_verification_flow.py +│ └── test_dns_verification_flow.py +└── security/ # NEW: Security tests + ├── __init__.py + ├── test_input_validation.py + ├── test_timing_attacks.py + └── test_pii_logging.py +``` + +### 5.1: Integration Tests + +**Purpose**: Test endpoint interactions and multi-component workflows without mocking application logic. + +#### Test: Authorization Flow Integration + +**File**: `tests/integration/test_authorization_flow.py` + +**Scenarios**: +1. Valid authorization request returns consent page +2. User approval generates authorization code +3. User denial redirects with error +4. Missing parameters return error +5. Invalid redirect_uri returns error +6. State parameter preserved through flow + +**Implementation Pattern**: +```python +import pytest +from fastapi.testclient import TestClient +from gondulf.main import app +from gondulf.dependencies import get_html_fetcher, get_dns_service + +@pytest.fixture +def client(): + """TestClient with mocked external dependencies.""" + # Mock HTML fetcher (no real HTTP requests) + def mock_html_fetcher(): + class MockHTMLFetcher: + def fetch(self, url): + return '' + return MockHTMLFetcher() + + # Mock DNS service (no real DNS queries) + def mock_dns_service(): + class MockDNSService: + def verify_txt_record(self, domain): + return True + return MockDNSService() + + # Override dependencies + app.dependency_overrides[get_html_fetcher] = mock_html_fetcher + app.dependency_overrides[get_dns_service] = mock_dns_service + + yield TestClient(app) + + # Cleanup + app.dependency_overrides.clear() + +def test_authorization_request_valid(client): + """Test valid authorization request returns consent page.""" + response = client.get( + "/authorize", + params={ + "me": "https://example.com", + "client_id": "https://client.example.com", + "redirect_uri": "https://client.example.com/callback", + "state": "random-state-value", + "response_type": "code" + } + ) + + assert response.status_code == 200 + assert "authorize" in response.text.lower() + assert "example.com" in response.text + assert "client.example.com" in response.text + +def test_authorization_approval_generates_code(client): + """Test user approval generates authorization code and redirects.""" + # First, get to consent page (this creates session/state) + response = client.get("/authorize", params={...}) + + # Then submit approval + response = client.post( + "/authorize", + data={ + "authorized": "true", + "me": "https://example.com", + "client_id": "https://client.example.com", + "redirect_uri": "https://client.example.com/callback", + "state": "random-state-value" + }, + allow_redirects=False + ) + + assert response.status_code == 302 + assert "code=" in response.headers["location"] + assert "state=random-state-value" in response.headers["location"] +``` + +#### Test: Token Flow Integration + +**File**: `tests/integration/test_token_flow.py` + +**Scenarios**: +1. Valid code exchange returns token +2. Expired code returns error +3. Used code returns error +4. Mismatched client_id returns error +5. Mismatched redirect_uri returns error +6. Token response includes correct fields + +**Implementation Pattern**: +```python +def test_token_exchange_valid(client, create_auth_code): + """Test valid authorization code exchanges for token.""" + # create_auth_code is a fixture that creates a valid code + code, metadata = create_auth_code( + me="https://example.com", + client_id="https://client.example.com", + redirect_uri="https://client.example.com/callback" + ) + + response = client.post( + "/token", + data={ + "grant_type": "authorization_code", + "code": code, + "client_id": "https://client.example.com", + "redirect_uri": "https://client.example.com/callback", + "me": "https://example.com" + } + ) + + assert response.status_code == 200 + json = response.json() + assert "access_token" in json + assert json["token_type"] == "Bearer" + assert json["me"] == "https://example.com" + assert len(json["access_token"]) == 43 # base64url(32 bytes) +``` + +#### Test: Verification Flow Integration + +**File**: `tests/integration/test_verification_flow.py` + +**Scenarios**: +1. Start verification sends email and returns success +2. Valid code verifies successfully +3. Invalid code returns error +4. Expired code returns error +5. Rate limiting prevents abuse + +#### Test: Metadata Endpoint + +**File**: `tests/integration/test_metadata_endpoint.py` + +**Scenarios**: +1. Metadata endpoint returns valid JSON +2. All required fields present +3. URLs are valid +4. Cache-Control header present + +#### Test: Error Handling + +**File**: `tests/integration/test_error_handling.py` + +**Scenarios**: +1. OAuth error responses formatted correctly +2. Error redirection preserves state parameter +3. Invalid redirect_uri shows error page (no redirect) +4. Server errors return 500 with error page + +### 5.2: End-to-End Tests + +**Purpose**: Test complete user journeys from start to finish. + +#### Test: Complete Authentication Flow + +**File**: `tests/e2e/test_complete_auth_flow.py` + +**Scenario**: Simulate complete IndieAuth flow: +1. Client initiates authorization request +2. Server checks domain verification (DNS + email) +3. User verifies email (enters code) +4. User approves authorization +5. Client exchanges code for token +6. Token is valid and can be used + +**Implementation**: +```python +@pytest.mark.e2e +def test_complete_authentication_flow(client, mock_smtp, mock_dns): + """Test complete IndieAuth authentication flow end-to-end.""" + # 1. Client initiates authorization + auth_response = client.get("/authorize", params={ + "me": "https://user.example.com", + "client_id": "https://app.example.com", + "redirect_uri": "https://app.example.com/callback", + "state": "client-random-state", + "response_type": "code" + }) + assert auth_response.status_code == 200 + + # 2. Server discovers email from rel=me (mocked) + # 3. Server sends verification email (captured by mock_smtp) + verify_response = client.post("/api/verify/start", json={ + "domain": "user.example.com" + }) + assert verify_response.json()["success"] is True + + # Extract verification code from mock SMTP + verification_code = mock_smtp.get_last_email_code() + + # 4. User submits verification code + code_response = client.post("/api/verify/code", json={ + "domain": "user.example.com", + "code": verification_code + }) + assert code_response.json()["success"] is True + + # 5. User approves authorization + approve_response = client.post("/authorize", data={ + "authorized": "true", + "me": "https://user.example.com", + "client_id": "https://app.example.com", + "redirect_uri": "https://app.example.com/callback", + "state": "client-random-state" + }, allow_redirects=False) + + assert approve_response.status_code == 302 + location = approve_response.headers["location"] + assert "code=" in location + assert "state=client-random-state" in location + + # Extract authorization code from redirect + from urllib.parse import urlparse, parse_qs + parsed = urlparse(location) + params = parse_qs(parsed.query) + auth_code = params["code"][0] + + # 6. Client exchanges code for token + token_response = client.post("/token", data={ + "grant_type": "authorization_code", + "code": auth_code, + "client_id": "https://app.example.com", + "redirect_uri": "https://app.example.com/callback", + "me": "https://user.example.com" + }) + + assert token_response.status_code == 200 + token_json = token_response.json() + assert "access_token" in token_json + assert token_json["me"] == "https://user.example.com" + + # 7. Token is valid (verify in database) + # This would be a token introspection endpoint in future + token = token_json["access_token"] + assert len(token) == 43 +``` + +#### Test: Email Verification Flow + +**File**: `tests/e2e/test_email_verification_flow.py` + +**Scenario**: Test email-based domain verification: +1. User starts verification +2. Email sent with code +3. User enters code +4. Domain marked as verified + +#### Test: DNS Verification Flow + +**File**: `tests/e2e/test_dns_verification_flow.py` + +**Scenario**: Test DNS TXT record verification: +1. User adds TXT record +2. Server queries DNS +3. Domain verified via TXT record +4. Email verification still required (two-factor) + +### 5.3: Security Tests + +**Purpose**: Test security properties and attack resistance. + +#### Test: Input Validation + +**File**: `tests/security/test_input_validation.py` + +**Scenarios**: +1. Malformed URLs rejected +2. SQL injection attempts blocked +3. XSS attempts escaped in templates +4. Open redirect attempts blocked +5. URL fragment in me parameter rejected + +**Implementation**: +```python +@pytest.mark.security +def test_sql_injection_prevention(client): + """Test SQL injection attempts are blocked.""" + # Attempt SQL injection in me parameter + response = client.get("/authorize", params={ + "me": "https://example.com'; DROP TABLE tokens; --", + "client_id": "https://app.example.com", + "redirect_uri": "https://app.example.com/callback", + "state": "state", + "response_type": "code" + }) + + # Should return error (invalid URL), not execute SQL + assert response.status_code in [400, 422] + + # Verify tokens table still exists + from gondulf.dependencies import get_database + db = get_database() + result = db.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='tokens'") + assert result.fetchone() is not None + +@pytest.mark.security +def test_xss_prevention_in_templates(client): + """Test XSS attempts are escaped in HTML templates.""" + xss_payload = "" + + response = client.get("/authorize", params={ + "me": f"https://example.com/{xss_payload}", + "client_id": "https://app.example.com", + "redirect_uri": "https://app.example.com/callback", + "state": xss_payload, + "response_type": "code" + }) + + # XSS payload should be HTML-escaped in response + assert "