Complete implementation of v1.2.0 "IndieWeb Features" release. ## Phase 1: Custom Slugs - Optional custom slug field in note creation form - Auto-sanitization (lowercase, hyphens only) - Uniqueness validation with auto-numbering - Read-only after creation to preserve permalinks - Matches Micropub mp-slug behavior ## Phase 2: Author Discovery + Microformats2 - Automatic h-card discovery from IndieAuth identity URL - 24-hour caching with graceful fallback - Never blocks login (per ADR-061) - Complete h-entry, h-card, h-feed markup - All required Microformats2 properties - rel-me links for identity verification - Passes IndieWeb validation ## Phase 3: Media Upload - Upload up to 4 images per note (JPEG, PNG, GIF, WebP) - Automatic optimization with Pillow - Auto-resize to 2048px - EXIF orientation correction - 95% quality compression - Social media-style layout (media top, text below) - Optional captions for accessibility - Integration with all feed formats (RSS, ATOM, JSON Feed) - Date-organized storage with UUID filenames - Immutable caching (1 year) ## Database Changes - migrations/006_add_author_profile.sql - Author discovery cache - migrations/007_add_media_support.sql - Media storage ## New Modules - starpunk/author_discovery.py - h-card discovery and caching - starpunk/media.py - Image upload, validation, optimization ## Documentation - 4 new ADRs (056, 057, 058, 061) - Complete design specifications - Developer Q&A with 40+ questions answered - 3 implementation reports - 3 architect reviews (all approved) ## Testing - 56 new tests for v1.2.0 features - 842 total tests in suite - All v1.2.0 feature tests passing ## Dependencies - Added: mf2py (Microformats2 parser) - Added: Pillow (image processing) Version: 1.2.0-rc.1 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
378 lines
11 KiB
Python
378 lines
11 KiB
Python
"""
|
|
Author profile discovery from IndieAuth identity
|
|
|
|
Per ADR-061 and v1.2.0 Phase 2:
|
|
- Discover h-card from user's IndieAuth 'me' URL
|
|
- Cache for 24 hours (per Q14)
|
|
- Graceful fallback if discovery fails
|
|
- Never block login functionality
|
|
|
|
Discovery Process:
|
|
1. Fetch user's profile URL
|
|
2. Parse h-card microformats using mf2py
|
|
3. Extract: name, photo, url, note (bio), rel-me links
|
|
4. Cache in author_profile table with 24-hour TTL
|
|
5. Return cached data on subsequent requests
|
|
|
|
Fallback Behavior (per Q14):
|
|
- If discovery fails, use cached data even if expired
|
|
- If no cache exists, use minimal defaults (domain as name)
|
|
- Never block or fail login due to discovery issues
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Optional
|
|
from urllib.parse import urlparse
|
|
|
|
import httpx
|
|
import mf2py
|
|
from flask import current_app
|
|
|
|
from starpunk.database import get_db
|
|
|
|
|
|
# Discovery timeout (per Q&A Q38)
|
|
DISCOVERY_TIMEOUT = 5.0
|
|
|
|
# Cache TTL (per Q&A Q14, Q19)
|
|
CACHE_TTL_HOURS = 24
|
|
|
|
|
|
class DiscoveryError(Exception):
|
|
"""Raised when author profile discovery fails"""
|
|
pass
|
|
|
|
|
|
def discover_author_profile(me_url: str) -> Optional[Dict]:
|
|
"""
|
|
Discover author h-card from IndieAuth profile URL
|
|
|
|
Per Q15: Use mf2py library (already a dependency)
|
|
Per Q14: Graceful fallback, never block login
|
|
Per Q16: Use first representative h-card
|
|
|
|
Args:
|
|
me_url: User's IndieAuth identity URL
|
|
|
|
Returns:
|
|
Dict with author profile data or None on failure
|
|
|
|
Profile dict contains:
|
|
- name: Author name (from p-name)
|
|
- photo: Author photo URL (from u-photo)
|
|
- url: Author canonical URL (from u-url)
|
|
- note: Author bio (from p-note)
|
|
- rel_me_links: List of rel-me URLs
|
|
"""
|
|
try:
|
|
current_app.logger.info(f"Discovering author profile from {me_url}")
|
|
|
|
# Fetch profile page with timeout
|
|
response = httpx.get(
|
|
me_url,
|
|
timeout=DISCOVERY_TIMEOUT,
|
|
follow_redirects=True,
|
|
headers={
|
|
'Accept': 'text/html,application/xhtml+xml',
|
|
'User-Agent': f'StarPunk/{current_app.config.get("VERSION", "1.2.0")}'
|
|
}
|
|
)
|
|
response.raise_for_status()
|
|
|
|
# Parse microformats from HTML
|
|
parsed = mf2py.parse(doc=response.text, url=me_url)
|
|
|
|
# Extract h-card (per Q16: first representative h-card)
|
|
hcard = _find_representative_hcard(parsed, me_url)
|
|
|
|
if not hcard:
|
|
current_app.logger.warning(f"No h-card found at {me_url}")
|
|
return None
|
|
|
|
# Extract h-card properties
|
|
profile = {
|
|
'name': _get_property(hcard, 'name'),
|
|
'photo': _get_property(hcard, 'photo'),
|
|
'url': _get_property(hcard, 'url') or me_url,
|
|
'note': _get_property(hcard, 'note'),
|
|
}
|
|
|
|
# Extract rel-me links (per Q17: store as list)
|
|
rel_me_links = parsed.get('rels', {}).get('me', [])
|
|
profile['rel_me_links'] = rel_me_links
|
|
|
|
current_app.logger.info(
|
|
f"Discovered author profile: name={profile.get('name')}, "
|
|
f"photo={'yes' if profile.get('photo') else 'no'}, "
|
|
f"rel_me_count={len(rel_me_links)}"
|
|
)
|
|
|
|
return profile
|
|
|
|
except httpx.TimeoutException:
|
|
current_app.logger.warning(f"Timeout discovering profile at {me_url}")
|
|
raise DiscoveryError(f"Timeout fetching profile: {me_url}")
|
|
|
|
except httpx.HTTPStatusError as e:
|
|
current_app.logger.warning(
|
|
f"HTTP {e.response.status_code} discovering profile at {me_url}"
|
|
)
|
|
raise DiscoveryError(f"HTTP error fetching profile: {e.response.status_code}")
|
|
|
|
except httpx.RequestError as e:
|
|
current_app.logger.warning(f"Network error discovering profile at {me_url}: {e}")
|
|
raise DiscoveryError(f"Network error: {e}")
|
|
|
|
except Exception as e:
|
|
current_app.logger.error(f"Unexpected error discovering profile at {me_url}: {e}")
|
|
raise DiscoveryError(f"Discovery failed: {e}")
|
|
|
|
|
|
def _find_representative_hcard(parsed: dict, me_url: str) -> Optional[dict]:
|
|
"""
|
|
Find representative h-card from parsed microformats
|
|
|
|
Per Q16: First representative h-card = first h-card with p-name
|
|
Per Q18: First h-card with url property matching profile URL
|
|
|
|
Args:
|
|
parsed: Parsed microformats data from mf2py
|
|
me_url: Profile URL for matching
|
|
|
|
Returns:
|
|
h-card dict or None if not found
|
|
"""
|
|
items = parsed.get('items', [])
|
|
|
|
# First try: h-card with matching URL (most specific)
|
|
for item in items:
|
|
if 'h-card' in item.get('type', []):
|
|
properties = item.get('properties', {})
|
|
urls = properties.get('url', [])
|
|
|
|
# Check if any URL matches the profile URL
|
|
for url in urls:
|
|
if isinstance(url, dict):
|
|
url = url.get('value', '')
|
|
if _normalize_url(url) == _normalize_url(me_url):
|
|
# Found matching h-card
|
|
return item
|
|
|
|
# Second try: First h-card with p-name (representative h-card)
|
|
for item in items:
|
|
if 'h-card' in item.get('type', []):
|
|
properties = item.get('properties', {})
|
|
if properties.get('name'):
|
|
return item
|
|
|
|
# Third try: Just use first h-card if any
|
|
for item in items:
|
|
if 'h-card' in item.get('type', []):
|
|
return item
|
|
|
|
return None
|
|
|
|
|
|
def _get_property(hcard: dict, prop_name: str) -> Optional[str]:
|
|
"""
|
|
Extract property value from h-card
|
|
|
|
Handles both string values and nested objects (for u-* properties)
|
|
|
|
Args:
|
|
hcard: h-card item dict
|
|
prop_name: Property name (e.g., 'name', 'photo', 'url')
|
|
|
|
Returns:
|
|
Property value as string or None
|
|
"""
|
|
properties = hcard.get('properties', {})
|
|
values = properties.get(prop_name, [])
|
|
|
|
if not values:
|
|
return None
|
|
|
|
# Get first value
|
|
value = values[0]
|
|
|
|
# Handle nested objects (e.g., u-photo might be {'value': '...', 'alt': '...'})
|
|
if isinstance(value, dict):
|
|
return value.get('value')
|
|
|
|
return value
|
|
|
|
|
|
def _normalize_url(url: str) -> str:
|
|
"""
|
|
Normalize URL for comparison
|
|
|
|
Removes trailing slash and converts to lowercase
|
|
|
|
Args:
|
|
url: URL to normalize
|
|
|
|
Returns:
|
|
Normalized URL
|
|
"""
|
|
if not url:
|
|
return ''
|
|
return url.rstrip('/').lower()
|
|
|
|
|
|
def get_author_profile(me_url: str, refresh: bool = False) -> Dict:
|
|
"""
|
|
Get author profile with caching
|
|
|
|
Per Q14: 24-hour cache, never block on failure
|
|
Per Q19: Use database for caching
|
|
|
|
Args:
|
|
me_url: User's IndieAuth identity URL
|
|
refresh: If True, force refresh from profile URL
|
|
|
|
Returns:
|
|
Author profile dict (from cache or fresh discovery)
|
|
Always returns a dict, never None (uses fallback defaults)
|
|
|
|
Profile dict contains:
|
|
- me: IndieAuth identity URL
|
|
- name: Author name
|
|
- photo: Author photo URL (may be None)
|
|
- url: Author canonical URL
|
|
- note: Author bio (may be None)
|
|
- rel_me_links: List of rel-me URLs
|
|
"""
|
|
db = get_db(current_app)
|
|
|
|
# Check cache unless refresh requested
|
|
if not refresh:
|
|
cached = db.execute(
|
|
"""
|
|
SELECT me, name, photo, url, note, rel_me_links, cached_until
|
|
FROM author_profile
|
|
WHERE me = ?
|
|
""",
|
|
(me_url,)
|
|
).fetchone()
|
|
|
|
if cached:
|
|
# Check if cache is still valid
|
|
cached_until = datetime.fromisoformat(cached['cached_until'])
|
|
if datetime.utcnow() < cached_until:
|
|
current_app.logger.debug(f"Using cached author profile for {me_url}")
|
|
|
|
# Parse rel_me_links from JSON
|
|
rel_me_links = json.loads(cached['rel_me_links']) if cached['rel_me_links'] else []
|
|
|
|
return {
|
|
'me': cached['me'],
|
|
'name': cached['name'],
|
|
'photo': cached['photo'],
|
|
'url': cached['url'],
|
|
'note': cached['note'],
|
|
'rel_me_links': rel_me_links,
|
|
}
|
|
|
|
# Attempt discovery
|
|
try:
|
|
profile = discover_author_profile(me_url)
|
|
|
|
if profile:
|
|
# Save to cache
|
|
save_author_profile(me_url, profile)
|
|
|
|
# Return with me_url added
|
|
profile['me'] = me_url
|
|
return profile
|
|
|
|
except DiscoveryError as e:
|
|
current_app.logger.warning(f"Discovery failed: {e}")
|
|
|
|
# Try to use expired cache as fallback (per Q14)
|
|
cached = db.execute(
|
|
"""
|
|
SELECT me, name, photo, url, note, rel_me_links
|
|
FROM author_profile
|
|
WHERE me = ?
|
|
""",
|
|
(me_url,)
|
|
).fetchone()
|
|
|
|
if cached:
|
|
current_app.logger.info(f"Using expired cache as fallback for {me_url}")
|
|
|
|
rel_me_links = json.loads(cached['rel_me_links']) if cached['rel_me_links'] else []
|
|
|
|
return {
|
|
'me': cached['me'],
|
|
'name': cached['name'],
|
|
'photo': cached['photo'],
|
|
'url': cached['url'],
|
|
'note': cached['note'],
|
|
'rel_me_links': rel_me_links,
|
|
}
|
|
|
|
# No cache, discovery failed - use minimal defaults (per Q14, Q21)
|
|
current_app.logger.warning(
|
|
f"No cached profile for {me_url}, using default fallback"
|
|
)
|
|
|
|
# Extract domain from URL for default name
|
|
try:
|
|
parsed_url = urlparse(me_url)
|
|
default_name = parsed_url.netloc or me_url
|
|
except Exception:
|
|
default_name = me_url
|
|
|
|
return {
|
|
'me': me_url,
|
|
'name': default_name,
|
|
'photo': None,
|
|
'url': me_url,
|
|
'note': None,
|
|
'rel_me_links': [],
|
|
}
|
|
|
|
|
|
def save_author_profile(me_url: str, profile: Dict) -> None:
|
|
"""
|
|
Save author profile to database
|
|
|
|
Per Q14: Sets cached_until to 24 hours from now
|
|
Per Q17: Store rel-me as JSON
|
|
|
|
Args:
|
|
me_url: User's IndieAuth identity URL
|
|
profile: Author profile dict from discovery
|
|
"""
|
|
db = get_db(current_app)
|
|
|
|
# Calculate cache expiry (24 hours from now)
|
|
cached_until = datetime.utcnow() + timedelta(hours=CACHE_TTL_HOURS)
|
|
|
|
# Convert rel_me_links to JSON (per Q17)
|
|
rel_me_json = json.dumps(profile.get('rel_me_links', []))
|
|
|
|
# Upsert (insert or replace)
|
|
db.execute(
|
|
"""
|
|
INSERT OR REPLACE INTO author_profile
|
|
(me, name, photo, url, note, rel_me_links, discovered_at, cached_until)
|
|
VALUES (?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, ?)
|
|
""",
|
|
(
|
|
me_url,
|
|
profile.get('name'),
|
|
profile.get('photo'),
|
|
profile.get('url'),
|
|
profile.get('note'),
|
|
rel_me_json,
|
|
cached_until.isoformat(),
|
|
)
|
|
)
|
|
db.commit()
|
|
|
|
current_app.logger.info(f"Saved author profile for {me_url} (expires {cached_until})")
|