StarPunk/starpunk/micropub.py

"""
Micropub endpoint implementation for StarPunk

This module handles Micropub protocol requests, providing a standard IndieWeb
interface for creating posts via external clients.

Functions:
    normalize_properties: Convert form/JSON data to Micropub properties format
    extract_content: Get content from Micropub properties
    extract_title: Get or generate title from Micropub properties
    extract_tags: Get category tags from Micropub properties
    handle_create: Process Micropub create action
    handle_query: Process Micropub query endpoints
    extract_bearer_token: Get token from Authorization header or form

Exceptions:
    MicropubError: Base exception for Micropub operations
    MicropubAuthError: Authentication/authorization errors
    MicropubValidationError: Invalid request data

References:
    - W3C Micropub Specification: https://www.w3.org/TR/micropub/
    - IndieAuth Specification: https://www.w3.org/TR/indieauth/
"""

from datetime import datetime
from typing import Optional

from flask import Request, current_app, jsonify

from starpunk.notes import create_note, get_note, InvalidNoteDataError, NoteNotFoundError
from starpunk.auth_external import check_scope


# Custom Exceptions


class MicropubError(Exception):
    """Base exception for Micropub operations"""

    def __init__(self, error: str, error_description: str, status_code: int = 400):
        self.error = error
        self.error_description = error_description
        self.status_code = status_code
        super().__init__(error_description)


class MicropubAuthError(MicropubError):
    """Authentication or authorization error"""

    def __init__(self, error_description: str, status_code: int = 401):
        super().__init__("unauthorized", error_description, status_code)


class MicropubValidationError(MicropubError):
    """Invalid request data"""

    def __init__(self, error_description: str):
        super().__init__("invalid_request", error_description, 400)


# Response Helpers


def error_response(error: str, error_description: str, status_code: int = 400):
    """
    Generate OAuth 2.0 compliant error response

    Args:
        error: Error code (e.g., "invalid_request")
        error_description: Human-readable error description
        status_code: HTTP status code

    Returns:
        Tuple of (response, status_code)
    """
    return (
        jsonify({"error": error, "error_description": error_description}),
        status_code,
    )


# Token Extraction


def extract_bearer_token(request: Request) -> Optional[str]:
    """
    Extract bearer token from Authorization header or form parameter

    Micropub spec allows token in either location:
    - Authorization: Bearer <token>
    - access_token form parameter

    Args:
        request: Flask request object

    Returns:
        Token string if found, None otherwise
    """
    # Try Authorization header first
    auth_header = request.headers.get("Authorization", "")
    if auth_header.startswith("Bearer "):
        return auth_header[7:]  # Remove "Bearer " prefix

    # Try form parameter
    if request.method == "POST":
        return request.form.get("access_token")
    elif request.method == "GET":
        return request.args.get("access_token")

    return None


# Property Normalization


def normalize_properties(data: dict) -> dict:
    """
    Normalize Micropub properties from both form and JSON formats

    Handles two input formats:
    - JSON: {"type": ["h-entry"], "properties": {"content": ["value"]}}
    - Form: {content: ["value"], "category[]": ["tag1", "tag2"]}

    Args:
        data: Raw request data (form dict or JSON dict)

    Returns:
        Normalized properties dict with all values as lists
    """
    # JSON format has properties nested
    if "properties" in data:
        return data["properties"]

    # Form format - convert to properties dict
    properties = {}
    for key, value in data.items():
        # Skip reserved Micropub parameters
        if key.startswith("mp-") or key in ["action", "url", "access_token", "h"]:
            continue

        # Handle array notation: property[] -> property
        clean_key = key.rstrip("[]")

        # Ensure value is always a list
        if not isinstance(value, list):
            value = [value]

        properties[clean_key] = value

    return properties


# Property Extraction


def extract_content(properties: dict) -> str:
    """
    Extract content from Micropub properties

    Args:
        properties: Normalized Micropub properties dict

    Returns:
        Content string

    Raises:
        MicropubValidationError: If content is missing or empty
    """
    content_list = properties.get("content", [])

    # Handle both plain text and HTML/text objects
    if not content_list:
        raise MicropubValidationError("Content is required")

    content = content_list[0]

    # Handle structured content ({"html": "...", "text": "..."})
    if isinstance(content, dict):
        # Prefer text over html for markdown storage
        content = content.get("text") or content.get("html", "")

    if not content or not content.strip():
        raise MicropubValidationError("Content cannot be empty")

    return content.strip()


def extract_title(properties: dict) -> Optional[str]:
    """
    Extract or generate title from Micropub properties

    Per ADR-029 mapping rules:
    1. Use 'name' property if provided
    2. If no name, extract from content (first line, max 50 chars)

    Args:
        properties: Normalized Micropub properties dict

    Returns:
        Title string or None
    """
    # Try explicit name property first
    name = properties.get("name", [""])[0]
    if name:
        return name.strip()

    # Generate from content (first line, max 50 chars)
    content_list = properties.get("content", [])
    if content_list:
        content = content_list[0]
        # Handle structured content
        if isinstance(content, dict):
            content = content.get("text") or content.get("html", "")

        if content:
            first_line = content.split("\n")[0].strip()
            if len(first_line) > 50:
                return first_line[:50] + "..."
            return first_line

    return None


def extract_tags(properties: dict) -> list[str]:
    """
    Extract tags from Micropub category property

    Args:
        properties: Normalized Micropub properties dict

    Returns:
        List of tag strings
    """
    categories = properties.get("category", [])
    # Filter out empty strings and strip whitespace
    return [tag.strip() for tag in categories if tag and tag.strip()]


def extract_published_date(properties: dict) -> Optional[datetime]:
    """
    Extract published date from Micropub properties

    Args:
        properties: Normalized Micropub properties dict

    Returns:
        Datetime object if published date provided, None otherwise
    """
    published = properties.get("published", [""])[0]
    if not published:
        return None

    try:
        # Parse ISO 8601 datetime
        # datetime.fromisoformat handles most ISO formats
        return datetime.fromisoformat(published.replace("Z", "+00:00"))
    except (ValueError, AttributeError):
        # If parsing fails, log and return None (will use current time)
        current_app.logger.warning(f"Failed to parse published date: {published}")
        return None


# Action Handlers


def extract_photos(properties: dict) -> list[dict[str, str]]:
    """
    Extract photo URLs and alt text from Micropub properties

    Handles both simple URL strings and structured photo objects with alt text.

    Args:
        properties: Normalized Micropub properties dict

    Returns:
        List of dicts with 'url' and optional 'alt' keys

    Examples:
        >>> # Simple URL
        >>> extract_photos({'photo': ['https://example.com/photo.jpg']})
        [{'url': 'https://example.com/photo.jpg', 'alt': ''}]

        >>> # With alt text
        >>> extract_photos({'photo': [{'value': 'https://example.com/photo.jpg', 'alt': 'Sunset'}]})
        [{'url': 'https://example.com/photo.jpg', 'alt': 'Sunset'}]
    """
    photos = properties.get("photo", [])
    result = []

    for photo in photos:
        if isinstance(photo, str):
            # Simple URL string
            result.append({'url': photo, 'alt': ''})
        elif isinstance(photo, dict):
            # Structured object with value and alt
            url = photo.get('value') or photo.get('url', '')
            alt = photo.get('alt', '')
            if url:
                result.append({'url': url, 'alt': alt})

    return result


def _attach_photos_to_note(note_id: int, photos: list[dict[str, str]]) -> None:
    """
    Attach photos to a note by URL

    Photos must already exist on this server (uploaded via media endpoint).
    External URLs are accepted but stored as-is (no download).

    Args:
        note_id: ID of the note to attach to
        photos: List of dicts with 'url' and 'alt' keys
    """
    from starpunk.database import get_db
    from starpunk.media import attach_media_to_note

    # Normalize SITE_URL by stripping trailing slash for consistent comparison
    site_url = current_app.config.get("SITE_URL", "http://localhost:5000").rstrip('/')
    db = get_db(current_app)

    media_ids = []
    captions = []

    # Log warning if photos are being truncated
    if len(photos) > 4:
        current_app.logger.warning(
            f"Micropub create received {len(photos)} photos, truncating to 4 per ADR-057"
        )

    for photo in photos[:4]:  # Max 4 photos per ADR-057
        url = photo['url']
        alt = photo.get('alt', '')

        # Check if URL is on our server
        if url.startswith(site_url) or url.startswith('/media/'):
            # Extract path from URL
            if url.startswith(site_url):
                path = url[len(site_url):]
            else:
                path = url

            # Remove leading /media/ if present
            if path.startswith('/media/'):
                path = path[7:]

            # Look up media by path
            row = db.execute(
                "SELECT id FROM media WHERE path = ?",
                (path,)
            ).fetchone()

            if row:
                media_ids.append(row[0])
                captions.append(alt)
            else:
                current_app.logger.warning(f"Photo URL not found in media: {url}")
        else:
            # External URL - log but don't fail
            current_app.logger.info(f"External photo URL ignored: {url}")

    if media_ids:
        attach_media_to_note(note_id, media_ids, captions)


def handle_create(data: dict, token_info: dict):
    """
    Handle Micropub create action

    Creates a note using StarPunk's notes.py CRUD functions after
    mapping Micropub properties to StarPunk's note format.

    Args:
        data: Raw request data (form or JSON)
        token_info: Authenticated token information (me, client_id, scope)

    Returns:
        Tuple of (response_body, status_code, headers)

    Raises:
        MicropubError: If scope insufficient or creation fails
    """
    # Check scope
    if not check_scope("create", token_info.get("scope", "")):
        raise MicropubError(
            "insufficient_scope", "Token lacks create scope", status_code=403
        )

    # Extract mp-slug BEFORE normalizing properties (it's not a property!)
    # mp-slug is a Micropub server extension parameter that gets filtered during normalization
    custom_slug = None
    if isinstance(data, dict) and 'mp-slug' in data:
        # Handle both form-encoded (list) and JSON (could be string or list)
        slug_value = data.get('mp-slug')
        if isinstance(slug_value, list) and slug_value:
            custom_slug = slug_value[0]
        elif isinstance(slug_value, str):
            custom_slug = slug_value

    # Normalize and extract properties
    try:
        properties = normalize_properties(data)
        content = extract_content(properties)
        title = extract_title(properties)
        tags = extract_tags(properties)
        published_date = extract_published_date(properties)
        photos = extract_photos(properties)  # v1.4.0

    except MicropubValidationError as e:
        raise e
    except Exception as e:
        current_app.logger.error(f"Property extraction failed: {e}")
        raise MicropubValidationError(f"Failed to parse request: {str(e)}")

    # Create note using existing CRUD
    try:
        note = create_note(
            content=content,
            published=True,  # Micropub posts are published by default
            created_at=published_date,
            custom_slug=custom_slug,
            tags=tags if tags else None  # Pass tags to create_note (v1.3.0)
        )

        # Attach photos if present (v1.4.0)
        if photos:
            _attach_photos_to_note(note.id, photos)

        # Build permalink URL
        # Note: SITE_URL is normalized to include trailing slash (for IndieAuth spec compliance)
        site_url = current_app.config.get("SITE_URL", "http://localhost:5000")
        permalink = f"{site_url}notes/{note.slug}"

        # Return 201 Created with Location header
        return "", 201, {"Location": permalink}

    except InvalidNoteDataError as e:
        raise MicropubValidationError(str(e))
    except Exception as e:
        current_app.logger.error(f"Failed to create note via Micropub: {e}")
        raise MicropubError(
            "server_error", "Failed to create post", status_code=500
        )


def handle_query(args: dict, token_info: dict):
    """
    Handle Micropub query endpoints

    Supports:
    - q=config: Return server configuration
    - q=source: Return post source in Microformats2 JSON
    - q=syndicate-to: Return syndication targets (empty for V1)

    Args:
        args: Query string arguments
        token_info: Authenticated token information

    Returns:
        Tuple of (response, status_code)
    """
    q = args.get("q")

    if q == "config":
        # Return server configuration with media endpoint (v1.4.0)
        site_url = current_app.config.get("SITE_URL", "http://localhost:5000").rstrip('/')
        config = {
            "media-endpoint": f"{site_url}/micropub/media",
            "syndicate-to": [],  # No syndication targets in V1
            "post-types": [
                {"type": "note", "name": "Note", "properties": ["content"]},
                {"type": "photo", "name": "Photo", "properties": ["photo"]}
            ],
        }
        return jsonify(config), 200

    elif q == "source":
        # Return source of a specific post
        url = args.get("url")
        if not url:
            return error_response("invalid_request", "No URL provided")

        # Extract slug from URL
        try:
            # URL format: https://example.com/notes/{slug}
            slug = url.rstrip("/").split("/")[-1]
            note = get_note(slug)

            # Check if note exists
            if note is None:
                return error_response("invalid_request", "Post not found")

        except NoteNotFoundError:
            return error_response("invalid_request", "Post not found")
        except Exception as e:
            current_app.logger.error(f"Failed to get note source: {e}")
            return error_response("server_error", "Failed to retrieve post")

        # Convert note to Micropub Microformats2 format
        # Note: SITE_URL is normalized to include trailing slash (for IndieAuth spec compliance)
        site_url = current_app.config.get("SITE_URL", "http://localhost:5000")
        mf2 = {
            "type": ["h-entry"],
            "properties": {
                "content": [note.content],
                "published": [note.created_at.isoformat()],
                "url": [f"{site_url}notes/{note.slug}"],
            },
        }

        # Add optional properties
        if note.title:
            mf2["properties"]["name"] = [note.title]

        # Add tags if present (v1.3.0)
        if note.tags:
            mf2["properties"]["category"] = [tag["display_name"] for tag in note.tags]

        return jsonify(mf2), 200

    elif q == "syndicate-to":
        # Return syndication targets (none for V1)
        return jsonify({"syndicate-to": []}), 200

    else:
        return error_response("invalid_request", f"Unknown query: {q}")