StarPunk/starpunk/routes/public.py

"""
Public routes for StarPunk

Handles public-facing pages including homepage and note permalinks.
No authentication required for these routes.
"""

import hashlib
from datetime import datetime, timedelta

from flask import Blueprint, abort, render_template, Response, current_app, request, send_from_directory

from starpunk.notes import list_notes, get_note
from starpunk.feed import generate_feed_streaming  # Legacy RSS
from starpunk.feeds import (
    generate_rss,
    generate_rss_streaming,
    generate_atom,
    generate_atom_streaming,
    generate_json_feed,
    generate_json_feed_streaming,
    negotiate_feed_format,
    get_mime_type,
    get_cache,
    generate_opml,
)

# Create blueprint
bp = Blueprint("public", __name__)

# Simple in-memory cache for feed note list
# Caches the database query results to avoid repeated DB hits
# Feed content is now cached via FeedCache (Phase 3)
# Structure: {'notes': list[Note], 'timestamp': datetime}
_feed_cache = {"notes": None, "timestamp": None}


def _get_cached_notes():
    """
    Get cached note list or fetch fresh notes

    Returns cached notes if still valid, otherwise fetches fresh notes
    from database and updates cache. Includes media for each note.

    Returns:
        List of published notes for feed generation (with media attached)
    """
    from starpunk.media import get_note_media

    # Get cache duration from config (in seconds)
    cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)
    cache_duration = timedelta(seconds=cache_seconds)
    now = datetime.utcnow()

    # Check if note list cache is valid
    if _feed_cache["notes"] and _feed_cache["timestamp"]:
        cache_age = now - _feed_cache["timestamp"]
        if cache_age < cache_duration:
            # Use cached note list
            return _feed_cache["notes"]

    # Cache expired or empty, fetch fresh notes
    max_items = current_app.config.get("FEED_MAX_ITEMS", 50)
    notes = list_notes(published_only=True, limit=max_items)

    # Attach media to each note (v1.2.0 Phase 3)
    for note in notes:
        media = get_note_media(note.id)
        object.__setattr__(note, 'media', media)

    _feed_cache["notes"] = notes
    _feed_cache["timestamp"] = now

    return notes


def _generate_feed_with_cache(format_name: str, non_streaming_generator):
    """
    Generate feed with caching and ETag support.

    Implements Phase 3 feed caching:
    - Checks If-None-Match header for conditional requests
    - Uses FeedCache for content caching
    - Returns 304 Not Modified when appropriate
    - Adds ETag header to all responses

    Args:
        format_name: Feed format (rss, atom, json)
        non_streaming_generator: Function that returns full feed content (not streaming)

    Returns:
        Flask Response with appropriate headers and status
    """
    # Get cached notes
    notes = _get_cached_notes()

    # Check if caching is enabled
    cache_enabled = current_app.config.get("FEED_CACHE_ENABLED", True)

    if not cache_enabled:
        # Caching disabled, generate fresh feed
        max_items = current_app.config.get("FEED_MAX_ITEMS", 50)
        cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)

        # Generate feed content (non-streaming)
        content = non_streaming_generator(
            site_url=current_app.config["SITE_URL"],
            site_name=current_app.config["SITE_NAME"],
            site_description=current_app.config.get("SITE_DESCRIPTION", ""),
            notes=notes,
            limit=max_items,
        )

        response = Response(content, mimetype=get_mime_type(format_name))
        response.headers["Cache-Control"] = f"public, max-age={cache_seconds}"
        return response

    # Caching enabled - use FeedCache
    feed_cache = get_cache()
    notes_checksum = feed_cache.generate_notes_checksum(notes)

    # Check If-None-Match header for conditional requests
    if_none_match = request.headers.get('If-None-Match')

    # Try to get cached feed
    cached_result = feed_cache.get(format_name, notes_checksum)

    if cached_result:
        content, etag = cached_result

        # Check if client has current version
        if if_none_match and if_none_match == etag:
            # Client has current version, return 304 Not Modified
            response = Response(status=304)
            response.headers["ETag"] = etag
            return response

        # Return cached content with ETag
        response = Response(content, mimetype=get_mime_type(format_name))
        response.headers["ETag"] = etag
        cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)
        response.headers["Cache-Control"] = f"public, max-age={cache_seconds}"
        return response

    # Cache miss - generate fresh feed
    max_items = current_app.config.get("FEED_MAX_ITEMS", 50)

    # Generate feed content (non-streaming)
    content = non_streaming_generator(
        site_url=current_app.config["SITE_URL"],
        site_name=current_app.config["SITE_NAME"],
        site_description=current_app.config.get("SITE_DESCRIPTION", ""),
        notes=notes,
        limit=max_items,
    )

    # Store in cache and get ETag
    etag = feed_cache.set(format_name, content, notes_checksum)

    # Return fresh content with ETag
    response = Response(content, mimetype=get_mime_type(format_name))
    response.headers["ETag"] = etag
    cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)
    response.headers["Cache-Control"] = f"public, max-age={cache_seconds}"

    return response


@bp.route('/media/<path:path>')
def media_file(path):
    """
    Serve media files

    Per Q10: Set cache headers for media
    Per Q26: Absolute URLs in feeds constructed from this route

    Args:
        path: Relative path to media file (YYYY/MM/filename.ext)

    Returns:
        File response with caching headers

    Raises:
        404: If file not found

    Headers:
        Cache-Control: public, max-age=31536000, immutable

    Examples:
        >>> response = client.get('/media/2025/01/uuid.jpg')
        >>> response.status_code
        200
        >>> response.headers['Cache-Control']
        'public, max-age=31536000, immutable'
    """
    from pathlib import Path

    media_dir = Path(current_app.config.get('DATA_PATH', 'data')) / 'media'

    # Validate path is safe (prevent directory traversal)
    try:
        # Resolve path and ensure it's under media_dir
        requested_path = (media_dir / path).resolve()
        if not str(requested_path).startswith(str(media_dir.resolve())):
            abort(404)
    except (ValueError, OSError):
        abort(404)

    # Serve file with cache headers
    response = send_from_directory(media_dir, path)

    # Cache for 1 year (immutable content)
    # Media files are UUID-named, so changing content = new URL
    response.headers['Cache-Control'] = 'public, max-age=31536000, immutable'

    return response


@bp.route("/")
def index():
    """
    Homepage displaying recent published notes with media

    Returns:
        Rendered homepage template with note list including media

    Template: templates/index.html
    Microformats: h-feed containing h-entry items with u-photo
    """
    from starpunk.media import get_note_media
    from starpunk.tags import get_note_tags

    # Get recent published notes (limit 20)
    notes = list_notes(published_only=True, limit=20)

    # Attach media and tags to each note for display
    for note in notes:
        media = get_note_media(note.id)
        # Use object.__setattr__ since Note is frozen dataclass
        object.__setattr__(note, 'media', media)

        # Attach tags (v1.3.0 Phase 3)
        tags = get_note_tags(note.id)
        object.__setattr__(note, '_cached_tags', tags)

    return render_template("index.html", notes=notes)


@bp.route("/note/<slug>")
def note(slug: str):
    """
    Individual note permalink page

    Args:
        slug: URL-safe note identifier

    Returns:
        Rendered note template with full content

    Raises:
        404: If note not found or not published

    Template: templates/note.html
    Microformats: h-entry
    """
    from starpunk.media import get_note_media
    from starpunk.tags import get_note_tags

    # Get note by slug
    note_obj = get_note(slug=slug)

    # Return 404 if note doesn't exist or isn't published
    if not note_obj or not note_obj.published:
        abort(404)

    # Get media for note (v1.2.0 Phase 3)
    media = get_note_media(note_obj.id)

    # Attach media to note object for template
    # Use object.__setattr__ since Note is frozen dataclass
    object.__setattr__(note_obj, 'media', media)

    # Attach tags to note (v1.3.0 Phase 3)
    tags = get_note_tags(note_obj.id)
    object.__setattr__(note_obj, '_cached_tags', tags)

    return render_template("note.html", note=note_obj)


@bp.route("/tag/<tag>")
def tag(tag: str):
    """
    Tag archive page

    Lists all notes with a specific tag.

    Args:
        tag: Tag name (will be normalized before lookup)

    Returns:
        Rendered tag archive template

    Raises:
        404: If tag doesn't exist

    Note:
        URL accepts any format - normalized before lookup.
        /tag/IndieWeb and /tag/indieweb resolve to same tag.

    Template: templates/tag.html
    Microformats: h-feed containing h-entry items
    """
    from starpunk.tags import get_notes_by_tag, get_tag_by_name, normalize_tag
    from starpunk.media import get_note_media

    # Normalize the tag name before lookup
    normalized_name, _ = normalize_tag(tag)

    tag_info = get_tag_by_name(normalized_name)
    if not tag_info:
        abort(404)

    notes = get_notes_by_tag(normalized_name)

    # Attach media to each note (tags already pre-loaded by get_notes_by_tag)
    for note in notes:
        media = get_note_media(note.id)
        object.__setattr__(note, 'media', media)

    return render_template(
        "tag.html",
        tag=tag_info,
        notes=notes
    )


@bp.route("/feed")
def feed():
    """
    Content negotiation endpoint for feeds

    Serves feed in format based on HTTP Accept header:
    - application/rss+xml → RSS 2.0
    - application/atom+xml → ATOM 1.0
    - application/feed+json or application/json → JSON Feed 1.1
    - */* → RSS 2.0 (default)

    If no acceptable format is available, returns 406 Not Acceptable with
    X-Available-Formats header listing supported formats.

    Returns:
        Streaming feed response in negotiated format, or 406 error

    Headers:
        Content-Type: Varies by format
        Cache-Control: public, max-age={FEED_CACHE_SECONDS}
        X-Available-Formats: List of supported formats (on 406 error only)

    Examples:
        >>> # Request with Accept: application/atom+xml
        >>> response = client.get('/feed', headers={'Accept': 'application/atom+xml'})
        >>> response.headers['Content-Type']
        'application/atom+xml; charset=utf-8'

        >>> # Request with no Accept header (defaults to RSS)
        >>> response = client.get('/feed')
        >>> response.headers['Content-Type']
        'application/rss+xml; charset=utf-8'
    """
    # Get Accept header
    accept = request.headers.get('Accept', '*/*')

    # Negotiate format
    available_formats = ['rss', 'atom', 'json']
    try:
        format_name = negotiate_feed_format(accept, available_formats)
    except ValueError:
        # No acceptable format - return 406
        return (
            "Not Acceptable. Supported formats: application/rss+xml, application/atom+xml, application/feed+json",
            406,
            {
                'Content-Type': 'text/plain; charset=utf-8',
                'X-Available-Formats': 'application/rss+xml, application/atom+xml, application/feed+json',
            }
        )

    # Route to appropriate generator
    if format_name == 'rss':
        return feed_rss()
    elif format_name == 'atom':
        return feed_atom()
    elif format_name == 'json':
        return feed_json()
    else:
        # Shouldn't reach here, but be defensive
        return feed_rss()


@bp.route("/feed.rss")
def feed_rss():
    """
    Explicit RSS 2.0 feed endpoint (with caching)

    Generates standards-compliant RSS 2.0 feed with Phase 3 caching:
    - LRU cache with TTL (default 5 minutes)
    - ETag support for conditional requests
    - 304 Not Modified responses
    - SHA-256 checksums

    Returns:
        Cached or fresh RSS 2.0 feed response

    Headers:
        Content-Type: application/rss+xml; charset=utf-8
        Cache-Control: public, max-age={FEED_CACHE_SECONDS}
        ETag: W/"sha256_hash"

    Caching Strategy:
        - Database query cached (note list)
        - Feed content cached (full XML)
        - Conditional requests (If-None-Match)
        - Cache invalidation on content changes

    Examples:
        >>> response = client.get('/feed.rss')
        >>> response.status_code
        200
        >>> response.headers['Content-Type']
        'application/rss+xml; charset=utf-8'
        >>> response.headers['ETag']
        'W/"abc123..."'

        >>> # Conditional request
        >>> response = client.get('/feed.rss', headers={'If-None-Match': 'W/"abc123..."'})
        >>> response.status_code
        304
    """
    return _generate_feed_with_cache('rss', generate_rss)


@bp.route("/feed.atom")
def feed_atom():
    """
    Explicit ATOM 1.0 feed endpoint (with caching)

    Generates standards-compliant ATOM 1.0 feed with Phase 3 caching.
    Follows RFC 4287 specification for ATOM syndication format.

    Returns:
        Cached or fresh ATOM 1.0 feed response

    Headers:
        Content-Type: application/atom+xml; charset=utf-8
        Cache-Control: public, max-age={FEED_CACHE_SECONDS}
        ETag: W/"sha256_hash"

    Examples:
        >>> response = client.get('/feed.atom')
        >>> response.status_code
        200
        >>> response.headers['Content-Type']
        'application/atom+xml; charset=utf-8'
        >>> response.headers['ETag']
        'W/"abc123..."'
    """
    return _generate_feed_with_cache('atom', generate_atom)


@bp.route("/feed.json")
def feed_json():
    """
    Explicit JSON Feed 1.1 endpoint (with caching)

    Generates standards-compliant JSON Feed 1.1 feed with Phase 3 caching.
    Follows JSON Feed specification (https://jsonfeed.org/version/1.1).

    Returns:
        Cached or fresh JSON Feed 1.1 response

    Headers:
        Content-Type: application/feed+json; charset=utf-8
        Cache-Control: public, max-age={FEED_CACHE_SECONDS}
        ETag: W/"sha256_hash"

    Examples:
        >>> response = client.get('/feed.json')
        >>> response.status_code
        200
        >>> response.headers['Content-Type']
        'application/feed+json; charset=utf-8'
        >>> response.headers['ETag']
        'W/"abc123..."'
    """
    return _generate_feed_with_cache('json', generate_json_feed)


@bp.route("/feed.xml")
def feed_xml_legacy():
    """
    Legacy RSS 2.0 feed endpoint (backward compatibility)

    Maintains backward compatibility for /feed.xml endpoint.
    New code should use /feed.rss or /feed with content negotiation.

    Returns:
        Streaming RSS 2.0 feed response

    See feed_rss() for full documentation.
    """
    # Use the new RSS endpoint
    return feed_rss()


@bp.route("/opml.xml")
def opml():
    """
    OPML 2.0 feed subscription list endpoint (Phase 3)

    Generates OPML 2.0 document listing all available feed formats.
    Feed readers can import this file to subscribe to all feeds at once.

    Per v1.1.2 Phase 3:
    - OPML 2.0 compliant
    - Lists RSS, ATOM, and JSON Feed formats
    - Public access (no authentication required per CQ8)
    - Enables easy multi-feed subscription

    Returns:
        OPML 2.0 XML document

    Headers:
        Content-Type: application/xml; charset=utf-8
        Cache-Control: public, max-age={FEED_CACHE_SECONDS}

    Examples:
        >>> response = client.get('/opml.xml')
        >>> response.status_code
        200
        >>> response.headers['Content-Type']
        'application/xml; charset=utf-8'
        >>> b'<opml version="2.0">' in response.data
        True

    Standards:
        - OPML 2.0: http://opml.org/spec2.opml
    """
    # Generate OPML content
    opml_content = generate_opml(
        site_url=current_app.config["SITE_URL"],
        site_name=current_app.config["SITE_NAME"],
    )

    # Create response
    response = Response(opml_content, mimetype="application/xml")

    # Add cache headers (same as feed cache duration)
    cache_seconds = current_app.config.get("FEED_CACHE_SECONDS", 300)
    response.headers["Cache-Control"] = f"public, max-age={cache_seconds}"

    return response