fix(validation): implement W3C IndieAuth compliant client_id validation

Implements complete W3C IndieAuth Section 3.2 client identifier
validation including:
- Fragment rejection
- HTTP scheme support for localhost/loopback only
- Username/password component rejection
- Non-loopback IP address rejection
- Path traversal prevention (.. and . segments)
- Hostname case normalization
- Default port removal (80/443)
- Path component enforcement

All 75 validation tests passing with 99% coverage.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-24 18:14:55 -07:00
parent 1ef5cd9229
commit 526a21d3fb
7 changed files with 1842 additions and 25 deletions

View File

@@ -1,4 +1,5 @@
"""Client validation and utility functions."""
import ipaddress
import re
from urllib.parse import urlparse
@@ -24,41 +25,130 @@ def mask_email(email: str) -> str:
return f"{masked_local}@{domain}"
def normalize_client_id(client_id: str) -> str:
def validate_client_id(client_id: str) -> tuple[bool, str]:
"""
Normalize client_id URL to canonical form.
Rules:
- Ensure https:// scheme
- Remove default port (443)
- Preserve path
Validate client_id against W3C IndieAuth specification Section 3.2.
Args:
client_id: Client ID URL
client_id: The client identifier URL to validate
Returns:
Tuple of (is_valid, error_message)
- is_valid: True if client_id is valid, False otherwise
- error_message: Empty string if valid, specific error message if invalid
"""
try:
parsed = urlparse(client_id)
# 1. Check scheme
if parsed.scheme not in ['https', 'http']:
return False, "client_id must use https or http scheme"
# 2. HTTP only for localhost/loopback
if parsed.scheme == 'http':
# Note: parsed.hostname returns '::1' without brackets for IPv6
if parsed.hostname not in ['localhost', '127.0.0.1', '::1']:
return False, "client_id with http scheme is only allowed for localhost, 127.0.0.1, or [::1]"
# 3. No fragments allowed
if parsed.fragment:
return False, "client_id must not contain a fragment (#)"
# 4. No username/password allowed
if parsed.username or parsed.password:
return False, "client_id must not contain username or password"
# 5. Check for non-loopback IP addresses
if parsed.hostname:
try:
# parsed.hostname already has no brackets for IPv6
ip = ipaddress.ip_address(parsed.hostname)
if not ip.is_loopback:
return False, "client_id must not use IP address (except 127.0.0.1 or [::1])"
except ValueError:
# Not an IP address, it's a domain (valid)
pass
# 6. Check for . or .. path segments
if parsed.path:
segments = parsed.path.split('/')
for segment in segments:
if segment == '.' or segment == '..':
return False, "client_id must not contain single-dot (.) or double-dot (..) path segments"
return True, ""
except Exception as e:
return False, f"client_id must be a valid URL: {e}"
def normalize_client_id(client_id: str) -> str:
"""
Normalize client_id URL to canonical form per IndieAuth spec.
Normalization rules:
- Validate against specification first
- Convert hostname to lowercase
- Remove default ports (80 for http, 443 for https)
- Ensure path exists (default to "/" if empty)
- Preserve query string if present
- Never include fragments (already validated out)
Args:
client_id: Client ID URL to normalize
Returns:
Normalized client_id
Raises:
ValueError: If client_id does not use https scheme
ValueError: If client_id is not valid per specification
"""
# First validate
is_valid, error = validate_client_id(client_id)
if not is_valid:
raise ValueError(error)
parsed = urlparse(client_id)
# Ensure https
if parsed.scheme != 'https':
raise ValueError("client_id must use https scheme")
# Normalize hostname to lowercase
hostname = parsed.hostname.lower() if parsed.hostname else ''
# Remove default HTTPS port
netloc = parsed.netloc
if netloc.endswith(':443'):
netloc = netloc[:-4]
# Determine if this is an IPv6 address (for bracket handling)
is_ipv6 = ':' in hostname # Simple check since hostname has no brackets
# Reconstruct
normalized = f"https://{netloc}{parsed.path}"
# Handle port normalization
port = parsed.port
if (parsed.scheme == 'http' and port == 80) or \
(parsed.scheme == 'https' and port == 443):
# Default port, omit it
if is_ipv6:
netloc = f"[{hostname}]" # IPv6 needs brackets in URL
else:
netloc = hostname
elif port:
# Non-default port, include it
if is_ipv6:
netloc = f"[{hostname}]:{port}" # IPv6 with port needs brackets
else:
netloc = f"{hostname}:{port}"
else:
# No port
if is_ipv6:
netloc = f"[{hostname}]" # IPv6 needs brackets in URL
else:
netloc = hostname
# Ensure path exists
path = parsed.path if parsed.path else '/'
# Reconstruct URL
normalized = f"{parsed.scheme}://{netloc}{path}"
# Add query if present
if parsed.query:
normalized += f"?{parsed.query}"
if parsed.fragment:
normalized += f"#{parsed.fragment}"
# Never add fragment (validated out)
return normalized