fix(validation): implement W3C IndieAuth compliant client_id validation
Implements complete W3C IndieAuth Section 3.2 client identifier validation including: - Fragment rejection - HTTP scheme support for localhost/loopback only - Username/password component rejection - Non-loopback IP address rejection - Path traversal prevention (.. and . segments) - Hostname case normalization - Default port removal (80/443) - Path component enforcement All 75 validation tests passing with 99% coverage. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
"""Client validation and utility functions."""
|
||||
import ipaddress
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
@@ -24,41 +25,130 @@ def mask_email(email: str) -> str:
|
||||
return f"{masked_local}@{domain}"
|
||||
|
||||
|
||||
def normalize_client_id(client_id: str) -> str:
|
||||
def validate_client_id(client_id: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Normalize client_id URL to canonical form.
|
||||
|
||||
Rules:
|
||||
- Ensure https:// scheme
|
||||
- Remove default port (443)
|
||||
- Preserve path
|
||||
Validate client_id against W3C IndieAuth specification Section 3.2.
|
||||
|
||||
Args:
|
||||
client_id: Client ID URL
|
||||
client_id: The client identifier URL to validate
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
- is_valid: True if client_id is valid, False otherwise
|
||||
- error_message: Empty string if valid, specific error message if invalid
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(client_id)
|
||||
|
||||
# 1. Check scheme
|
||||
if parsed.scheme not in ['https', 'http']:
|
||||
return False, "client_id must use https or http scheme"
|
||||
|
||||
# 2. HTTP only for localhost/loopback
|
||||
if parsed.scheme == 'http':
|
||||
# Note: parsed.hostname returns '::1' without brackets for IPv6
|
||||
if parsed.hostname not in ['localhost', '127.0.0.1', '::1']:
|
||||
return False, "client_id with http scheme is only allowed for localhost, 127.0.0.1, or [::1]"
|
||||
|
||||
# 3. No fragments allowed
|
||||
if parsed.fragment:
|
||||
return False, "client_id must not contain a fragment (#)"
|
||||
|
||||
# 4. No username/password allowed
|
||||
if parsed.username or parsed.password:
|
||||
return False, "client_id must not contain username or password"
|
||||
|
||||
# 5. Check for non-loopback IP addresses
|
||||
if parsed.hostname:
|
||||
try:
|
||||
# parsed.hostname already has no brackets for IPv6
|
||||
ip = ipaddress.ip_address(parsed.hostname)
|
||||
if not ip.is_loopback:
|
||||
return False, "client_id must not use IP address (except 127.0.0.1 or [::1])"
|
||||
except ValueError:
|
||||
# Not an IP address, it's a domain (valid)
|
||||
pass
|
||||
|
||||
# 6. Check for . or .. path segments
|
||||
if parsed.path:
|
||||
segments = parsed.path.split('/')
|
||||
for segment in segments:
|
||||
if segment == '.' or segment == '..':
|
||||
return False, "client_id must not contain single-dot (.) or double-dot (..) path segments"
|
||||
|
||||
return True, ""
|
||||
|
||||
except Exception as e:
|
||||
return False, f"client_id must be a valid URL: {e}"
|
||||
|
||||
|
||||
def normalize_client_id(client_id: str) -> str:
|
||||
"""
|
||||
Normalize client_id URL to canonical form per IndieAuth spec.
|
||||
|
||||
Normalization rules:
|
||||
- Validate against specification first
|
||||
- Convert hostname to lowercase
|
||||
- Remove default ports (80 for http, 443 for https)
|
||||
- Ensure path exists (default to "/" if empty)
|
||||
- Preserve query string if present
|
||||
- Never include fragments (already validated out)
|
||||
|
||||
Args:
|
||||
client_id: Client ID URL to normalize
|
||||
|
||||
Returns:
|
||||
Normalized client_id
|
||||
|
||||
Raises:
|
||||
ValueError: If client_id does not use https scheme
|
||||
ValueError: If client_id is not valid per specification
|
||||
"""
|
||||
# First validate
|
||||
is_valid, error = validate_client_id(client_id)
|
||||
if not is_valid:
|
||||
raise ValueError(error)
|
||||
|
||||
parsed = urlparse(client_id)
|
||||
|
||||
# Ensure https
|
||||
if parsed.scheme != 'https':
|
||||
raise ValueError("client_id must use https scheme")
|
||||
# Normalize hostname to lowercase
|
||||
hostname = parsed.hostname.lower() if parsed.hostname else ''
|
||||
|
||||
# Remove default HTTPS port
|
||||
netloc = parsed.netloc
|
||||
if netloc.endswith(':443'):
|
||||
netloc = netloc[:-4]
|
||||
# Determine if this is an IPv6 address (for bracket handling)
|
||||
is_ipv6 = ':' in hostname # Simple check since hostname has no brackets
|
||||
|
||||
# Reconstruct
|
||||
normalized = f"https://{netloc}{parsed.path}"
|
||||
# Handle port normalization
|
||||
port = parsed.port
|
||||
if (parsed.scheme == 'http' and port == 80) or \
|
||||
(parsed.scheme == 'https' and port == 443):
|
||||
# Default port, omit it
|
||||
if is_ipv6:
|
||||
netloc = f"[{hostname}]" # IPv6 needs brackets in URL
|
||||
else:
|
||||
netloc = hostname
|
||||
elif port:
|
||||
# Non-default port, include it
|
||||
if is_ipv6:
|
||||
netloc = f"[{hostname}]:{port}" # IPv6 with port needs brackets
|
||||
else:
|
||||
netloc = f"{hostname}:{port}"
|
||||
else:
|
||||
# No port
|
||||
if is_ipv6:
|
||||
netloc = f"[{hostname}]" # IPv6 needs brackets in URL
|
||||
else:
|
||||
netloc = hostname
|
||||
|
||||
# Ensure path exists
|
||||
path = parsed.path if parsed.path else '/'
|
||||
|
||||
# Reconstruct URL
|
||||
normalized = f"{parsed.scheme}://{netloc}{path}"
|
||||
|
||||
# Add query if present
|
||||
if parsed.query:
|
||||
normalized += f"?{parsed.query}"
|
||||
if parsed.fragment:
|
||||
normalized += f"#{parsed.fragment}"
|
||||
|
||||
# Never add fragment (validated out)
|
||||
|
||||
return normalized
|
||||
|
||||
|
||||
Reference in New Issue
Block a user