diff --git a/starpunk/micropub.py b/starpunk/micropub.py index 7b69f75..2bb05c0 100644 --- a/starpunk/micropub.py +++ b/starpunk/micropub.py @@ -294,6 +294,15 @@ def handle_create(data: dict, token_info: dict): title = extract_title(properties) tags = extract_tags(properties) published_date = extract_published_date(properties) + + # Extract custom slug if provided (Micropub extension) + custom_slug = None + if 'mp-slug' in properties: + # mp-slug is an array in Micropub format + slug_values = properties.get('mp-slug', []) + if slug_values and len(slug_values) > 0: + custom_slug = slug_values[0] + except MicropubValidationError as e: raise e except Exception as e: @@ -303,7 +312,10 @@ def handle_create(data: dict, token_info: dict): # Create note using existing CRUD try: note = create_note( - content=content, published=True, created_at=published_date # Micropub posts are published by default + content=content, + published=True, # Micropub posts are published by default + created_at=published_date, + custom_slug=custom_slug ) # Build permalink URL diff --git a/starpunk/notes.py b/starpunk/notes.py index 0385c76..7cb7415 100644 --- a/starpunk/notes.py +++ b/starpunk/notes.py @@ -134,7 +134,7 @@ def _get_existing_slugs(db) -> set[str]: def create_note( - content: str, published: bool = False, created_at: Optional[datetime] = None + content: str, published: bool = False, created_at: Optional[datetime] = None, custom_slug: Optional[str] = None ) -> Note: """ Create a new note @@ -147,6 +147,7 @@ def create_note( content: Markdown content for the note (must not be empty) published: Whether the note should be published (default: False) created_at: Creation timestamp (default: current UTC time) + custom_slug: Optional custom slug (from Micropub mp-slug property) Returns: Note object with all metadata and content loaded @@ -208,20 +209,27 @@ def create_note( data_dir = Path(current_app.config["DATA_PATH"]) - # 3. GENERATE UNIQUE SLUG + # 3. GENERATE OR VALIDATE SLUG # Query all existing slugs from database db = get_db(current_app) existing_slugs = _get_existing_slugs(db) - # Generate base slug from content - base_slug = generate_slug(content, created_at) + if custom_slug: + # Use custom slug (from Micropub mp-slug property) + from starpunk.slug_utils import validate_and_sanitize_custom_slug + success, slug, error = validate_and_sanitize_custom_slug(custom_slug, existing_slugs) + if not success: + raise InvalidNoteDataError("slug", custom_slug, error) + else: + # Generate base slug from content + base_slug = generate_slug(content, created_at) - # Make unique if collision - slug = make_slug_unique(base_slug, existing_slugs) + # Make unique if collision + slug = make_slug_unique(base_slug, existing_slugs) - # Validate final slug (defensive check) - if not validate_slug(slug): - raise InvalidNoteDataError("slug", slug, f"Generated slug is invalid: {slug}") + # Validate final slug (defensive check) + if not validate_slug(slug): + raise InvalidNoteDataError("slug", slug, f"Generated slug is invalid: {slug}") # 4. GENERATE FILE PATH note_path = generate_note_path(slug, created_at, data_dir) diff --git a/starpunk/slug_utils.py b/starpunk/slug_utils.py new file mode 100644 index 0000000..db36b09 --- /dev/null +++ b/starpunk/slug_utils.py @@ -0,0 +1,267 @@ +""" +Slug validation and sanitization utilities for StarPunk + +This module provides functions for validating, sanitizing, and ensuring uniqueness +of note slugs. Supports custom slugs via Micropub's mp-slug property. +""" + +import re +from typing import Optional, Set + +# Reserved slugs that cannot be used for notes +# These correspond to application routes and special pages +RESERVED_SLUGS = frozenset([ + # Core routes + 'api', + 'admin', + 'auth', + 'feed', + 'static', + 'notes', + + # Auth/admin routes + 'login', + 'logout', + 'settings', + 'micropub', + 'callback', + + # Feed routes + 'feed.xml', + 'rss', + 'atom', + + # Special pages + 'index', + 'home', + 'about', + 'search', +]) + +# Slug validation regex +# Allows: lowercase letters, numbers, hyphens +# Must start with letter or number +# Must end with letter or number +# Cannot have consecutive hyphens +SLUG_PATTERN = re.compile(r'^[a-z0-9]([a-z0-9-]*[a-z0-9])?$') + +# Maximum slug length +MAX_SLUG_LENGTH = 200 + + +def is_reserved_slug(slug: str) -> bool: + """ + Check if slug is reserved + + Args: + slug: Slug to check + + Returns: + bool: True if slug is reserved + """ + return slug.lower() in RESERVED_SLUGS + + +def sanitize_slug(slug: str) -> str: + """ + Sanitize a custom slug + + Converts to lowercase, replaces invalid characters with hyphens, + removes consecutive hyphens, and trims to max length. + + Args: + slug: Raw slug input + + Returns: + Sanitized slug string + + Examples: + >>> sanitize_slug("Hello World!") + 'hello-world' + + >>> sanitize_slug("My--Post___Title") + 'my-post-title' + + >>> sanitize_slug(" leading-spaces ") + 'leading-spaces' + """ + # Convert to lowercase + slug = slug.lower() + + # Replace invalid characters with hyphens + # Allow only: a-z, 0-9, hyphens + slug = re.sub(r'[^a-z0-9-]+', '-', slug) + + # Remove consecutive hyphens + slug = re.sub(r'-+', '-', slug) + + # Trim leading/trailing hyphens + slug = slug.strip('-') + + # Trim to max length + if len(slug) > MAX_SLUG_LENGTH: + slug = slug[:MAX_SLUG_LENGTH].rstrip('-') + + return slug + + +def validate_slug(slug: str) -> bool: + """ + Validate slug format + + Checks if slug matches required pattern: + - Only lowercase letters, numbers, hyphens + - Starts with letter or number + - Ends with letter or number + - No consecutive hyphens + - Not empty + - Not too long + + Args: + slug: Slug to validate + + Returns: + bool: True if valid, False otherwise + + Examples: + >>> validate_slug("my-post") + True + + >>> validate_slug("my--post") # consecutive hyphens + False + + >>> validate_slug("-my-post") # starts with hyphen + False + + >>> validate_slug("My-Post") # uppercase + False + """ + if not slug: + return False + + if len(slug) > MAX_SLUG_LENGTH: + return False + + if not SLUG_PATTERN.match(slug): + return False + + return True + + +def make_slug_unique_with_suffix(base_slug: str, existing_slugs: Set[str], max_attempts: int = 99) -> str: + """ + Make slug unique by adding sequential numeric suffix + + If base_slug exists, tries base_slug-2, base_slug-3, etc. + Uses sequential numbers (not random) for predictability. + + Args: + base_slug: Base slug to make unique + existing_slugs: Set of existing slugs to check against + max_attempts: Maximum number of attempts (default: 99) + + Returns: + Unique slug with suffix if needed + + Raises: + ValueError: If unique slug cannot be generated after max_attempts + + Examples: + >>> make_slug_unique_with_suffix("my-post", {"my-post"}) + 'my-post-2' + + >>> make_slug_unique_with_suffix("my-post", {"my-post", "my-post-2"}) + 'my-post-3' + + >>> make_slug_unique_with_suffix("my-post", set()) + 'my-post' + """ + # If base slug is available, use it + if base_slug not in existing_slugs: + return base_slug + + # Try sequential suffixes + for i in range(2, max_attempts + 2): + candidate = f"{base_slug}-{i}" + if candidate not in existing_slugs: + return candidate + + # Exhausted all attempts + raise ValueError( + f"Could not create unique slug after {max_attempts} attempts. " + f"Base slug: {base_slug}" + ) + + +def validate_and_sanitize_custom_slug(custom_slug: str, existing_slugs: Set[str]) -> tuple[bool, Optional[str], Optional[str]]: + """ + Validate and sanitize a custom slug from Micropub + + Performs full validation pipeline: + 1. Sanitize the input + 2. Check if it's reserved + 3. Validate format + 4. Make unique if needed + + Args: + custom_slug: Raw custom slug from mp-slug property + existing_slugs: Set of existing slugs + + Returns: + Tuple of (success, slug_or_none, error_message_or_none) + + Examples: + >>> validate_and_sanitize_custom_slug("My Post", set()) + (True, 'my-post', None) + + >>> validate_and_sanitize_custom_slug("api", set()) + (False, None, 'Slug "api" is reserved') + + >>> validate_and_sanitize_custom_slug("/invalid/slug", set()) + (False, None, 'Slug "/invalid/slug" contains hierarchical paths which are not supported') + """ + # Check for hierarchical paths (not supported in v1.1.0) + if '/' in custom_slug: + return ( + False, + None, + f'Slug "{custom_slug}" contains hierarchical paths which are not supported' + ) + + # Sanitize + sanitized = sanitize_slug(custom_slug) + + # Check if sanitization resulted in empty slug + if not sanitized: + return ( + False, + None, + f'Slug "{custom_slug}" could not be sanitized to valid format' + ) + + # Check if reserved + if is_reserved_slug(sanitized): + return ( + False, + None, + f'Slug "{sanitized}" is reserved and cannot be used' + ) + + # Validate format + if not validate_slug(sanitized): + return ( + False, + None, + f'Slug "{sanitized}" does not match required format (lowercase letters, numbers, hyphens only)' + ) + + # Make unique if needed + try: + unique_slug = make_slug_unique_with_suffix(sanitized, existing_slugs) + return (True, unique_slug, None) + except ValueError as e: + return ( + False, + None, + str(e) + )