Files
StarPunk/starpunk/slug_utils.py
Phil Skentelbery c7fcc21406 feat: Add custom slug support via mp-slug property
Implements custom slug handling for Micropub as specified in ADR-035.

Changes:
- Created starpunk/slug_utils.py with validation/sanitization functions
- Added RESERVED_SLUGS constant (api, admin, auth, feed, etc.)
- Modified create_note() to accept optional custom_slug parameter
- Integrated mp-slug extraction in Micropub handle_create()
- Slug sanitization: lowercase, hyphens, no special chars
- Conflict resolution: sequential numbering (-2, -3, etc.)
- Hierarchical slugs (/) rejected (deferred to v1.2.0)

Features:
- Custom slugs via Micropub's mp-slug property
- Automatic sanitization of invalid characters
- Reserved slug protection
- Sequential conflict resolution (not random)
- Clear error messages for validation failures

Part of v1.1.0 (Phase 4).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-25 10:05:38 -07:00

268 lines
6.4 KiB
Python

"""
Slug validation and sanitization utilities for StarPunk
This module provides functions for validating, sanitizing, and ensuring uniqueness
of note slugs. Supports custom slugs via Micropub's mp-slug property.
"""
import re
from typing import Optional, Set
# Reserved slugs that cannot be used for notes
# These correspond to application routes and special pages
RESERVED_SLUGS = frozenset([
# Core routes
'api',
'admin',
'auth',
'feed',
'static',
'notes',
# Auth/admin routes
'login',
'logout',
'settings',
'micropub',
'callback',
# Feed routes
'feed.xml',
'rss',
'atom',
# Special pages
'index',
'home',
'about',
'search',
])
# Slug validation regex
# Allows: lowercase letters, numbers, hyphens
# Must start with letter or number
# Must end with letter or number
# Cannot have consecutive hyphens
SLUG_PATTERN = re.compile(r'^[a-z0-9]([a-z0-9-]*[a-z0-9])?$')
# Maximum slug length
MAX_SLUG_LENGTH = 200
def is_reserved_slug(slug: str) -> bool:
"""
Check if slug is reserved
Args:
slug: Slug to check
Returns:
bool: True if slug is reserved
"""
return slug.lower() in RESERVED_SLUGS
def sanitize_slug(slug: str) -> str:
"""
Sanitize a custom slug
Converts to lowercase, replaces invalid characters with hyphens,
removes consecutive hyphens, and trims to max length.
Args:
slug: Raw slug input
Returns:
Sanitized slug string
Examples:
>>> sanitize_slug("Hello World!")
'hello-world'
>>> sanitize_slug("My--Post___Title")
'my-post-title'
>>> sanitize_slug(" leading-spaces ")
'leading-spaces'
"""
# Convert to lowercase
slug = slug.lower()
# Replace invalid characters with hyphens
# Allow only: a-z, 0-9, hyphens
slug = re.sub(r'[^a-z0-9-]+', '-', slug)
# Remove consecutive hyphens
slug = re.sub(r'-+', '-', slug)
# Trim leading/trailing hyphens
slug = slug.strip('-')
# Trim to max length
if len(slug) > MAX_SLUG_LENGTH:
slug = slug[:MAX_SLUG_LENGTH].rstrip('-')
return slug
def validate_slug(slug: str) -> bool:
"""
Validate slug format
Checks if slug matches required pattern:
- Only lowercase letters, numbers, hyphens
- Starts with letter or number
- Ends with letter or number
- No consecutive hyphens
- Not empty
- Not too long
Args:
slug: Slug to validate
Returns:
bool: True if valid, False otherwise
Examples:
>>> validate_slug("my-post")
True
>>> validate_slug("my--post") # consecutive hyphens
False
>>> validate_slug("-my-post") # starts with hyphen
False
>>> validate_slug("My-Post") # uppercase
False
"""
if not slug:
return False
if len(slug) > MAX_SLUG_LENGTH:
return False
if not SLUG_PATTERN.match(slug):
return False
return True
def make_slug_unique_with_suffix(base_slug: str, existing_slugs: Set[str], max_attempts: int = 99) -> str:
"""
Make slug unique by adding sequential numeric suffix
If base_slug exists, tries base_slug-2, base_slug-3, etc.
Uses sequential numbers (not random) for predictability.
Args:
base_slug: Base slug to make unique
existing_slugs: Set of existing slugs to check against
max_attempts: Maximum number of attempts (default: 99)
Returns:
Unique slug with suffix if needed
Raises:
ValueError: If unique slug cannot be generated after max_attempts
Examples:
>>> make_slug_unique_with_suffix("my-post", {"my-post"})
'my-post-2'
>>> make_slug_unique_with_suffix("my-post", {"my-post", "my-post-2"})
'my-post-3'
>>> make_slug_unique_with_suffix("my-post", set())
'my-post'
"""
# If base slug is available, use it
if base_slug not in existing_slugs:
return base_slug
# Try sequential suffixes
for i in range(2, max_attempts + 2):
candidate = f"{base_slug}-{i}"
if candidate not in existing_slugs:
return candidate
# Exhausted all attempts
raise ValueError(
f"Could not create unique slug after {max_attempts} attempts. "
f"Base slug: {base_slug}"
)
def validate_and_sanitize_custom_slug(custom_slug: str, existing_slugs: Set[str]) -> tuple[bool, Optional[str], Optional[str]]:
"""
Validate and sanitize a custom slug from Micropub
Performs full validation pipeline:
1. Sanitize the input
2. Check if it's reserved
3. Validate format
4. Make unique if needed
Args:
custom_slug: Raw custom slug from mp-slug property
existing_slugs: Set of existing slugs
Returns:
Tuple of (success, slug_or_none, error_message_or_none)
Examples:
>>> validate_and_sanitize_custom_slug("My Post", set())
(True, 'my-post', None)
>>> validate_and_sanitize_custom_slug("api", set())
(False, None, 'Slug "api" is reserved')
>>> validate_and_sanitize_custom_slug("/invalid/slug", set())
(False, None, 'Slug "/invalid/slug" contains hierarchical paths which are not supported')
"""
# Check for hierarchical paths (not supported in v1.1.0)
if '/' in custom_slug:
return (
False,
None,
f'Slug "{custom_slug}" contains hierarchical paths which are not supported'
)
# Sanitize
sanitized = sanitize_slug(custom_slug)
# Check if sanitization resulted in empty slug
if not sanitized:
return (
False,
None,
f'Slug "{custom_slug}" could not be sanitized to valid format'
)
# Check if reserved
if is_reserved_slug(sanitized):
return (
False,
None,
f'Slug "{sanitized}" is reserved and cannot be used'
)
# Validate format
if not validate_slug(sanitized):
return (
False,
None,
f'Slug "{sanitized}" does not match required format (lowercase letters, numbers, hyphens only)'
)
# Make unique if needed
try:
unique_slug = make_slug_unique_with_suffix(sanitized, existing_slugs)
return (True, unique_slug, None)
except ValueError as e:
return (
False,
None,
str(e)
)