Files
StarPunk/starpunk/micropub.py
Phil Skentelbery c64feaea23 feat: v1.4.0 Phase 3 - Micropub Media Endpoint
Implement W3C Micropub media endpoint for external client uploads.

Changes:
- Add POST /micropub/media endpoint in routes/micropub.py
  - Accept multipart/form-data with 'file' field
  - Require bearer token with 'create' scope
  - Return 201 Created with Location header
  - Validate, optimize, and generate variants via save_media()

- Update q=config response to advertise media-endpoint
  - Include media-endpoint URL in config response
  - Add 'photo' post-type to supported types

- Add photo property support to Micropub create
  - extract_photos() function to parse photo property
  - Handles both simple URL strings and structured objects with alt text
  - _attach_photos_to_note() function to attach photos by URL
  - Only attach photos from our server (by URL match)
  - External URLs logged but ignored (no download)
  - Maximum 4 photos per note (per ADR-057)

- SITE_URL normalization pattern
  - Use .rstrip('/') for consistent URL comparison
  - Applied in media endpoint and photo attachment

Per design document: docs/design/v1.4.0/media-implementation-design.md

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-10 18:32:21 -07:00

528 lines
16 KiB
Python

"""
Micropub endpoint implementation for StarPunk
This module handles Micropub protocol requests, providing a standard IndieWeb
interface for creating posts via external clients.
Functions:
normalize_properties: Convert form/JSON data to Micropub properties format
extract_content: Get content from Micropub properties
extract_title: Get or generate title from Micropub properties
extract_tags: Get category tags from Micropub properties
handle_create: Process Micropub create action
handle_query: Process Micropub query endpoints
extract_bearer_token: Get token from Authorization header or form
Exceptions:
MicropubError: Base exception for Micropub operations
MicropubAuthError: Authentication/authorization errors
MicropubValidationError: Invalid request data
References:
- W3C Micropub Specification: https://www.w3.org/TR/micropub/
- IndieAuth Specification: https://www.w3.org/TR/indieauth/
"""
from datetime import datetime
from typing import Optional
from flask import Request, current_app, jsonify
from starpunk.notes import create_note, get_note, InvalidNoteDataError, NoteNotFoundError
from starpunk.auth_external import check_scope
# Custom Exceptions
class MicropubError(Exception):
"""Base exception for Micropub operations"""
def __init__(self, error: str, error_description: str, status_code: int = 400):
self.error = error
self.error_description = error_description
self.status_code = status_code
super().__init__(error_description)
class MicropubAuthError(MicropubError):
"""Authentication or authorization error"""
def __init__(self, error_description: str, status_code: int = 401):
super().__init__("unauthorized", error_description, status_code)
class MicropubValidationError(MicropubError):
"""Invalid request data"""
def __init__(self, error_description: str):
super().__init__("invalid_request", error_description, 400)
# Response Helpers
def error_response(error: str, error_description: str, status_code: int = 400):
"""
Generate OAuth 2.0 compliant error response
Args:
error: Error code (e.g., "invalid_request")
error_description: Human-readable error description
status_code: HTTP status code
Returns:
Tuple of (response, status_code)
"""
return (
jsonify({"error": error, "error_description": error_description}),
status_code,
)
# Token Extraction
def extract_bearer_token(request: Request) -> Optional[str]:
"""
Extract bearer token from Authorization header or form parameter
Micropub spec allows token in either location:
- Authorization: Bearer <token>
- access_token form parameter
Args:
request: Flask request object
Returns:
Token string if found, None otherwise
"""
# Try Authorization header first
auth_header = request.headers.get("Authorization", "")
if auth_header.startswith("Bearer "):
return auth_header[7:] # Remove "Bearer " prefix
# Try form parameter
if request.method == "POST":
return request.form.get("access_token")
elif request.method == "GET":
return request.args.get("access_token")
return None
# Property Normalization
def normalize_properties(data: dict) -> dict:
"""
Normalize Micropub properties from both form and JSON formats
Handles two input formats:
- JSON: {"type": ["h-entry"], "properties": {"content": ["value"]}}
- Form: {content: ["value"], "category[]": ["tag1", "tag2"]}
Args:
data: Raw request data (form dict or JSON dict)
Returns:
Normalized properties dict with all values as lists
"""
# JSON format has properties nested
if "properties" in data:
return data["properties"]
# Form format - convert to properties dict
properties = {}
for key, value in data.items():
# Skip reserved Micropub parameters
if key.startswith("mp-") or key in ["action", "url", "access_token", "h"]:
continue
# Handle array notation: property[] -> property
clean_key = key.rstrip("[]")
# Ensure value is always a list
if not isinstance(value, list):
value = [value]
properties[clean_key] = value
return properties
# Property Extraction
def extract_content(properties: dict) -> str:
"""
Extract content from Micropub properties
Args:
properties: Normalized Micropub properties dict
Returns:
Content string
Raises:
MicropubValidationError: If content is missing or empty
"""
content_list = properties.get("content", [])
# Handle both plain text and HTML/text objects
if not content_list:
raise MicropubValidationError("Content is required")
content = content_list[0]
# Handle structured content ({"html": "...", "text": "..."})
if isinstance(content, dict):
# Prefer text over html for markdown storage
content = content.get("text") or content.get("html", "")
if not content or not content.strip():
raise MicropubValidationError("Content cannot be empty")
return content.strip()
def extract_title(properties: dict) -> Optional[str]:
"""
Extract or generate title from Micropub properties
Per ADR-029 mapping rules:
1. Use 'name' property if provided
2. If no name, extract from content (first line, max 50 chars)
Args:
properties: Normalized Micropub properties dict
Returns:
Title string or None
"""
# Try explicit name property first
name = properties.get("name", [""])[0]
if name:
return name.strip()
# Generate from content (first line, max 50 chars)
content_list = properties.get("content", [])
if content_list:
content = content_list[0]
# Handle structured content
if isinstance(content, dict):
content = content.get("text") or content.get("html", "")
if content:
first_line = content.split("\n")[0].strip()
if len(first_line) > 50:
return first_line[:50] + "..."
return first_line
return None
def extract_tags(properties: dict) -> list[str]:
"""
Extract tags from Micropub category property
Args:
properties: Normalized Micropub properties dict
Returns:
List of tag strings
"""
categories = properties.get("category", [])
# Filter out empty strings and strip whitespace
return [tag.strip() for tag in categories if tag and tag.strip()]
def extract_published_date(properties: dict) -> Optional[datetime]:
"""
Extract published date from Micropub properties
Args:
properties: Normalized Micropub properties dict
Returns:
Datetime object if published date provided, None otherwise
"""
published = properties.get("published", [""])[0]
if not published:
return None
try:
# Parse ISO 8601 datetime
# datetime.fromisoformat handles most ISO formats
return datetime.fromisoformat(published.replace("Z", "+00:00"))
except (ValueError, AttributeError):
# If parsing fails, log and return None (will use current time)
current_app.logger.warning(f"Failed to parse published date: {published}")
return None
# Action Handlers
def extract_photos(properties: dict) -> list[dict[str, str]]:
"""
Extract photo URLs and alt text from Micropub properties
Handles both simple URL strings and structured photo objects with alt text.
Args:
properties: Normalized Micropub properties dict
Returns:
List of dicts with 'url' and optional 'alt' keys
Examples:
>>> # Simple URL
>>> extract_photos({'photo': ['https://example.com/photo.jpg']})
[{'url': 'https://example.com/photo.jpg', 'alt': ''}]
>>> # With alt text
>>> extract_photos({'photo': [{'value': 'https://example.com/photo.jpg', 'alt': 'Sunset'}]})
[{'url': 'https://example.com/photo.jpg', 'alt': 'Sunset'}]
"""
photos = properties.get("photo", [])
result = []
for photo in photos:
if isinstance(photo, str):
# Simple URL string
result.append({'url': photo, 'alt': ''})
elif isinstance(photo, dict):
# Structured object with value and alt
url = photo.get('value') or photo.get('url', '')
alt = photo.get('alt', '')
if url:
result.append({'url': url, 'alt': alt})
return result
def _attach_photos_to_note(note_id: int, photos: list[dict[str, str]]) -> None:
"""
Attach photos to a note by URL
Photos must already exist on this server (uploaded via media endpoint).
External URLs are accepted but stored as-is (no download).
Args:
note_id: ID of the note to attach to
photos: List of dicts with 'url' and 'alt' keys
"""
from starpunk.database import get_db
from starpunk.media import attach_media_to_note
# Normalize SITE_URL by stripping trailing slash for consistent comparison
site_url = current_app.config.get("SITE_URL", "http://localhost:5000").rstrip('/')
db = get_db(current_app)
media_ids = []
captions = []
# Log warning if photos are being truncated
if len(photos) > 4:
current_app.logger.warning(
f"Micropub create received {len(photos)} photos, truncating to 4 per ADR-057"
)
for photo in photos[:4]: # Max 4 photos per ADR-057
url = photo['url']
alt = photo.get('alt', '')
# Check if URL is on our server
if url.startswith(site_url) or url.startswith('/media/'):
# Extract path from URL
if url.startswith(site_url):
path = url[len(site_url):]
else:
path = url
# Remove leading /media/ if present
if path.startswith('/media/'):
path = path[7:]
# Look up media by path
row = db.execute(
"SELECT id FROM media WHERE path = ?",
(path,)
).fetchone()
if row:
media_ids.append(row[0])
captions.append(alt)
else:
current_app.logger.warning(f"Photo URL not found in media: {url}")
else:
# External URL - log but don't fail
current_app.logger.info(f"External photo URL ignored: {url}")
if media_ids:
attach_media_to_note(note_id, media_ids, captions)
def handle_create(data: dict, token_info: dict):
"""
Handle Micropub create action
Creates a note using StarPunk's notes.py CRUD functions after
mapping Micropub properties to StarPunk's note format.
Args:
data: Raw request data (form or JSON)
token_info: Authenticated token information (me, client_id, scope)
Returns:
Tuple of (response_body, status_code, headers)
Raises:
MicropubError: If scope insufficient or creation fails
"""
# Check scope
if not check_scope("create", token_info.get("scope", "")):
raise MicropubError(
"insufficient_scope", "Token lacks create scope", status_code=403
)
# Extract mp-slug BEFORE normalizing properties (it's not a property!)
# mp-slug is a Micropub server extension parameter that gets filtered during normalization
custom_slug = None
if isinstance(data, dict) and 'mp-slug' in data:
# Handle both form-encoded (list) and JSON (could be string or list)
slug_value = data.get('mp-slug')
if isinstance(slug_value, list) and slug_value:
custom_slug = slug_value[0]
elif isinstance(slug_value, str):
custom_slug = slug_value
# Normalize and extract properties
try:
properties = normalize_properties(data)
content = extract_content(properties)
title = extract_title(properties)
tags = extract_tags(properties)
published_date = extract_published_date(properties)
photos = extract_photos(properties) # v1.4.0
except MicropubValidationError as e:
raise e
except Exception as e:
current_app.logger.error(f"Property extraction failed: {e}")
raise MicropubValidationError(f"Failed to parse request: {str(e)}")
# Create note using existing CRUD
try:
note = create_note(
content=content,
published=True, # Micropub posts are published by default
created_at=published_date,
custom_slug=custom_slug,
tags=tags if tags else None # Pass tags to create_note (v1.3.0)
)
# Attach photos if present (v1.4.0)
if photos:
_attach_photos_to_note(note.id, photos)
# Build permalink URL
# Note: SITE_URL is normalized to include trailing slash (for IndieAuth spec compliance)
site_url = current_app.config.get("SITE_URL", "http://localhost:5000")
permalink = f"{site_url}notes/{note.slug}"
# Return 201 Created with Location header
return "", 201, {"Location": permalink}
except InvalidNoteDataError as e:
raise MicropubValidationError(str(e))
except Exception as e:
current_app.logger.error(f"Failed to create note via Micropub: {e}")
raise MicropubError(
"server_error", "Failed to create post", status_code=500
)
def handle_query(args: dict, token_info: dict):
"""
Handle Micropub query endpoints
Supports:
- q=config: Return server configuration
- q=source: Return post source in Microformats2 JSON
- q=syndicate-to: Return syndication targets (empty for V1)
Args:
args: Query string arguments
token_info: Authenticated token information
Returns:
Tuple of (response, status_code)
"""
q = args.get("q")
if q == "config":
# Return server configuration with media endpoint (v1.4.0)
site_url = current_app.config.get("SITE_URL", "http://localhost:5000").rstrip('/')
config = {
"media-endpoint": f"{site_url}/micropub/media",
"syndicate-to": [], # No syndication targets in V1
"post-types": [
{"type": "note", "name": "Note", "properties": ["content"]},
{"type": "photo", "name": "Photo", "properties": ["photo"]}
],
}
return jsonify(config), 200
elif q == "source":
# Return source of a specific post
url = args.get("url")
if not url:
return error_response("invalid_request", "No URL provided")
# Extract slug from URL
try:
# URL format: https://example.com/notes/{slug}
slug = url.rstrip("/").split("/")[-1]
note = get_note(slug)
# Check if note exists
if note is None:
return error_response("invalid_request", "Post not found")
except NoteNotFoundError:
return error_response("invalid_request", "Post not found")
except Exception as e:
current_app.logger.error(f"Failed to get note source: {e}")
return error_response("server_error", "Failed to retrieve post")
# Convert note to Micropub Microformats2 format
# Note: SITE_URL is normalized to include trailing slash (for IndieAuth spec compliance)
site_url = current_app.config.get("SITE_URL", "http://localhost:5000")
mf2 = {
"type": ["h-entry"],
"properties": {
"content": [note.content],
"published": [note.created_at.isoformat()],
"url": [f"{site_url}notes/{note.slug}"],
},
}
# Add optional properties
if note.title:
mf2["properties"]["name"] = [note.title]
# Add tags if present (v1.3.0)
if note.tags:
mf2["properties"]["category"] = [tag["display_name"] for tag in note.tags]
return jsonify(mf2), 200
elif q == "syndicate-to":
# Return syndication targets (none for V1)
return jsonify({"syndicate-to": []}), 200
else:
return error_response("invalid_request", f"Unknown query: {q}")