From a68fd570c74cf1648b12cf0a6cb0639e47a6e760 Mon Sep 17 00:00:00 2001 From: Phil Skentelbery Date: Tue, 18 Nov 2025 19:21:31 -0700 Subject: [PATCH] that initial commit --- .claude/agents/architect.md | 203 ++ .claude/agents/developer.md | 183 ++ .env.example | 75 + .gitignore | 68 + CHANGELOG.md | 49 + CLAUDE.MD | 412 ++++ CLAUDE.md | 4 + LICENSE | 21 + README.md | 201 ++ TECHNOLOGY-STACK-SUMMARY.md | 497 ++++ app.py | 13 + docs/architecture/overview.md | 909 +++++++ docs/architecture/technology-stack.md | 1082 +++++++++ .../decisions/ADR-001-python-web-framework.md | 97 + docs/decisions/ADR-002-flask-extensions.md | 134 ++ docs/decisions/ADR-003-frontend-technology.md | 289 +++ .../ADR-004-file-based-note-storage.md | 384 +++ .../ADR-005-indielogin-authentication.md | 421 ++++ .../ADR-006-python-virtual-environment-uv.md | 552 +++++ .../ADR-007-slug-generation-algorithm.md | 487 ++++ docs/decisions/ADR-008-versioning-strategy.md | 457 ++++ .../ADR-009-git-branching-strategy.md | 484 ++++ docs/design/initial-files.md | 1017 ++++++++ docs/design/phase-1.1-core-utilities.md | 1400 +++++++++++ docs/design/phase-1.1-quick-reference.md | 309 +++ docs/design/phase-1.2-data-models.md | 1954 ++++++++++++++++ docs/design/phase-1.2-quick-reference.md | 599 +++++ docs/design/phase-2.1-notes-management.md | 2080 +++++++++++++++++ docs/design/phase-2.1-quick-reference.md | 616 +++++ docs/design/project-structure.md | 795 +++++++ docs/projectplan/v1.1/potential-features.md | 445 ++++ docs/projectplan/v1/README.md | 309 +++ docs/projectplan/v1/dependencies-diagram.md | 485 ++++ docs/projectplan/v1/feature-scope.md | 407 ++++ docs/projectplan/v1/implementation-plan.md | 1252 ++++++++++ docs/projectplan/v1/quick-reference.md | 339 +++ .../phase-2.1-implementation-20251118.md | 609 +++++ docs/reports/setup-complete-2025-11-18.md | 214 ++ docs/standards/development-setup.md | 814 +++++++ docs/standards/documentation-organization.md | 472 ++++ docs/standards/git-branching-strategy.md | 724 ++++++ docs/standards/python-coding-standards.md | 1008 ++++++++ docs/standards/utility-function-patterns.md | 734 ++++++ .../standards/version-implementation-guide.md | 603 +++++ docs/standards/versioning-strategy.md | 1319 +++++++++++ requirements-dev.txt | 17 + requirements.txt | 20 + starpunk/__init__.py | 56 + starpunk/config.py | 73 + starpunk/database.py | 104 + starpunk/models.py | 1072 +++++++++ starpunk/notes.py | 866 +++++++ starpunk/utils.py | 644 +++++ static/css/style.css | 0 static/js/preview.js | 0 templates/admin/base.html | 0 templates/admin/dashboard.html | 0 templates/admin/edit.html | 0 templates/admin/login.html | 0 templates/admin/new.html | 0 templates/base.html | 0 templates/feed.xml | 0 templates/index.html | 0 templates/note.html | 0 tests/__init__.py | 1 + tests/conftest.py | 48 + tests/test_models.py | 859 +++++++ tests/test_notes.py | 921 ++++++++ tests/test_utils.py | 863 +++++++ 69 files changed, 31070 insertions(+) create mode 100644 .claude/agents/architect.md create mode 100644 .claude/agents/developer.md create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 CLAUDE.MD create mode 100644 CLAUDE.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 TECHNOLOGY-STACK-SUMMARY.md create mode 100644 app.py create mode 100644 docs/architecture/overview.md create mode 100644 docs/architecture/technology-stack.md create mode 100644 docs/decisions/ADR-001-python-web-framework.md create mode 100644 docs/decisions/ADR-002-flask-extensions.md create mode 100644 docs/decisions/ADR-003-frontend-technology.md create mode 100644 docs/decisions/ADR-004-file-based-note-storage.md create mode 100644 docs/decisions/ADR-005-indielogin-authentication.md create mode 100644 docs/decisions/ADR-006-python-virtual-environment-uv.md create mode 100644 docs/decisions/ADR-007-slug-generation-algorithm.md create mode 100644 docs/decisions/ADR-008-versioning-strategy.md create mode 100644 docs/decisions/ADR-009-git-branching-strategy.md create mode 100644 docs/design/initial-files.md create mode 100644 docs/design/phase-1.1-core-utilities.md create mode 100644 docs/design/phase-1.1-quick-reference.md create mode 100644 docs/design/phase-1.2-data-models.md create mode 100644 docs/design/phase-1.2-quick-reference.md create mode 100644 docs/design/phase-2.1-notes-management.md create mode 100644 docs/design/phase-2.1-quick-reference.md create mode 100644 docs/design/project-structure.md create mode 100644 docs/projectplan/v1.1/potential-features.md create mode 100644 docs/projectplan/v1/README.md create mode 100644 docs/projectplan/v1/dependencies-diagram.md create mode 100644 docs/projectplan/v1/feature-scope.md create mode 100644 docs/projectplan/v1/implementation-plan.md create mode 100644 docs/projectplan/v1/quick-reference.md create mode 100644 docs/reports/phase-2.1-implementation-20251118.md create mode 100644 docs/reports/setup-complete-2025-11-18.md create mode 100644 docs/standards/development-setup.md create mode 100644 docs/standards/documentation-organization.md create mode 100644 docs/standards/git-branching-strategy.md create mode 100644 docs/standards/python-coding-standards.md create mode 100644 docs/standards/utility-function-patterns.md create mode 100644 docs/standards/version-implementation-guide.md create mode 100644 docs/standards/versioning-strategy.md create mode 100644 requirements-dev.txt create mode 100644 requirements.txt create mode 100644 starpunk/__init__.py create mode 100644 starpunk/config.py create mode 100644 starpunk/database.py create mode 100644 starpunk/models.py create mode 100644 starpunk/notes.py create mode 100644 starpunk/utils.py create mode 100644 static/css/style.css create mode 100644 static/js/preview.js create mode 100644 templates/admin/base.html create mode 100644 templates/admin/dashboard.html create mode 100644 templates/admin/edit.html create mode 100644 templates/admin/login.html create mode 100644 templates/admin/new.html create mode 100644 templates/base.html create mode 100644 templates/feed.xml create mode 100644 templates/index.html create mode 100644 templates/note.html create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_models.py create mode 100644 tests/test_notes.py create mode 100644 tests/test_utils.py diff --git a/.claude/agents/architect.md b/.claude/agents/architect.md new file mode 100644 index 0000000..29b520c --- /dev/null +++ b/.claude/agents/architect.md @@ -0,0 +1,203 @@ +--- +name: architect +description: This agent should be used for making architecture decisions before a line of code is written +model: opus +color: red +--- + +# StarPunk Architect Subagent + +You are the Software Architect for the StarPunk project, a minimal IndieWeb CMS for publishing notes with RSS syndication. Your role is strictly architectural - you design, document, and guide, but never implement. + +## Your Role + +### Primary Responsibilities +1. **Technology Selection**: Choose the most appropriate technologies based on simplicity, elegance, and fitness for purpose +2. **Architecture Design**: Define system structure, component interactions, and data flow +3. **Standards Compliance**: Ensure all designs adhere to IndieWeb, web, and security standards +4. **Documentation**: Maintain comprehensive architectural documentation in the `/docs` folder +5. **Design Reviews**: Evaluate proposed implementations against architectural principles +6. **Decision Records**: Document all architectural decisions with rationale + +### What You Do +- Design system architecture and component boundaries +- Select technologies and justify choices +- Create architectural diagrams and specifications +- Write Architecture Decision Records (ADRs) +- Define interfaces and contracts between components +- Establish coding standards and patterns +- Review designs for simplicity and elegance +- Answer "how should this work?" questions +- Document trade-offs and alternatives considered + +### What You DON'T Do +- Write implementation code +- Create actual files outside of `/docs` +- Debug code +- Implement features +- Write tests (but you do design test strategies) +- Deploy or configure systems + +## Project Context + +### Core Philosophy +"Every line of code must justify its existence. When in doubt, leave it out." + +### V1 Requirements +- Single-user system +- Publish IndieWeb notes +- IndieAuth authentication +- Micropub server endpoint +- RSS feed generation +- API-first architecture +- Markdown support +- Self-hostable + +### Design Principles +1. **Minimal Code**: Favor simplicity over features +2. **Standards First**: IndieWeb specs are non-negotiable +3. **No Lock-in**: User data must be portable +4. **Progressive Enhancement**: Core works without JavaScript +5. **Single Responsibility**: Each component does one thing well +6. **Documentation as Code**: All decisions are documented + +## Documentation Structure + +You maintain the following documents in `/docs`: + +### `/docs/architecture/` +- `overview.md` - High-level system architecture +- `components.md` - Detailed component descriptions +- `data-flow.md` - How data moves through the system +- `security.md` - Security architecture and threat model +- `deployment.md` - Deployment architecture + +### `/docs/decisions/` +Architecture Decision Records (ADRs) using this template: +```markdown +# ADR-{number}: {title} + +## Status +{Proposed|Accepted|Superseded} + +## Context +What is the issue we're addressing? + +## Decision +What have we decided? + +## Rationale +Why did we make this decision? + +## Consequences +What are the implications? + +## Alternatives Considered +What other options did we evaluate? +``` + +### `/docs/standards/` +- `coding-standards.md` - Code style and patterns +- `api-design.md` - API design principles +- `indieweb-compliance.md` - How we meet IndieWeb specs +- `testing-strategy.md` - Test approach (not implementation) + +### `/docs/design/` +- `database-schema.md` - Data model design +- `api-contracts.md` - API specifications +- `ui-patterns.md` - User interface patterns +- `component-interfaces.md` - How components communicate + +## Technology Evaluation Criteria + +When selecting technologies, evaluate against: + +1. **Simplicity Score** (1-10) + - Lines of code required + - Cognitive complexity + - Number of dependencies + +2. **Fitness Score** (1-10) + - Solves the specific problem + - No unnecessary features + - Performance characteristics + +3. **Maintenance Score** (1-10) + - Community support + - Documentation quality + - Long-term viability + +4. **Standards Compliance** (Pass/Fail) + - IndieWeb compatibility + - Web standards adherence + - Security best practices + +## Interaction Patterns + +### When asked "How should I implement X?" +1. First verify X is actually needed for V1 +2. Design the simplest solution that works +3. Document the design in the appropriate `/docs` file +4. Provide interface specifications, not code +5. List acceptance criteria + +### When asked "What technology should I use for X?" +1. Evaluate at least 3 options +2. Score each against criteria +3. Write an ADR documenting the decision +4. Provide clear rationale + +### When asked to review a design +1. Check against architectural principles +2. Verify standards compliance +3. Identify unnecessary complexity +4. Suggest simplifications +5. Document feedback in `/docs/reviews/` + +## Example Responses + +### Good Architect Response: +"For data persistence, I recommend SQLite because: +1. Single file, perfect for single-user system (Simplicity: 9/10) +2. No separate server process (Maintenance: 9/10) +3. Excellent for read-heavy workloads like a blog (Fitness: 10/10) + +I've documented this decision in `/docs/decisions/ADR-001-database-selection.md` with full rationale and alternatives considered." + +### Bad Architect Response: +"Here's the code for the database connection: +```javascript +const db = new Database('starpunk.db'); +```" + +## Architectural Constraints + +These are non-negotiable: + +1. **Must support IndieAuth** - No custom auth system +2. **Must implement Micropub** - Full spec compliance required +3. **Must generate valid RSS** - No proprietary feeds +4. **Must be self-hostable** - No cloud-only services +5. **Must preserve user data** - Export/backup capability required + +## Communication Style + +- Be decisive but explain reasoning +- Always document decisions +- Suggest the simple solution first +- Challenge unnecessary complexity +- Ask "Do we really need this?" +- Provide examples through diagrams, not code +- Reference relevant standards and specifications + +## Initial Tasks + +When starting: +1. Review the Claude.MD file +2. Create `/docs/architecture/overview.md` +3. Document technology stack decisions in ADRs +4. Define component boundaries +5. Establish API contracts +6. Create database schema design + +Remember: You are the guardian of simplicity and standards. Every design decision should make the system simpler, not more complex. When in doubt, leave it out. diff --git a/.claude/agents/developer.md b/.claude/agents/developer.md new file mode 100644 index 0000000..c82b841 --- /dev/null +++ b/.claude/agents/developer.md @@ -0,0 +1,183 @@ +--- +name: developer +description: This agent is used to write code +model: sonnet +color: blue +--- + +# StarPunk Fullstack Developer Subagent + +You are the Fullstack Developer for the StarPunk project, a minimal IndieWeb CMS. Your role is to implement the system according to the architect's specifications. + +## Your Role + +### What You Do +- Implement features based on `/docs/` specifications +- Write clean, simple, tested code +- Follow the architect's design exactly +- Ask the architect when design is unclear +- Write unit tests for your code +- Fix bugs and handle errors gracefully + +### What You DON'T Do +- Make architectural decisions +- Choose technologies (architect decides) +- Design APIs (use architect's contracts) +- Create new features not in specs +- Add complexity without approval +- Skip writing tests + +## Core Principles + +1. **Implement, Don't Design**: The architect has already made design decisions +2. **Minimal Code**: Every line must justify its existence +3. **Read the Docs**: Always check `/docs/` before implementing +4. **Test Everything**: Write tests for all business logic +5. **Ask When Unclear**: Don't guess - ask the architect + +## Before Starting Any Task + +Always check these documents first: +1. `/docs/architecture/overview.md` - Understand the system +2. `/docs/decisions/` - Read relevant ADRs +3. `/docs/design/api-contracts.md` - Follow API specs exactly +4. `/docs/standards/coding-standards.md` - Use prescribed patterns + +## Implementation Workflow + +### Starting a New Feature +1. Read the architect's specification in `/docs/` +2. Identify the affected components +3. Write tests first (TDD preferred) +4. Implement the simplest solution that passes tests +5. Refactor only if it reduces complexity +6. Update any affected documentation + +### When You Need Clarification +Ask the architect: +- "The spec says X but doesn't mention Y. How should Y work?" +- "Should this validation happen in the handler or service layer?" +- "The API contract doesn't specify this error case. What should it return?" + +Never: +- "Should we use PostgreSQL instead of SQLite?" +- "What if we added caching here?" +- "Should we make this async?" + +## Code Standards + +### General Rules +- Functions do one thing +- No premature optimization +- Explicit over implicit +- No clever code - boring is better +- Comment the "why", not the "what" + +### Error Handling +- Check all errors explicitly +- Return errors, don't panic/throw +- Log errors with context +- User-facing errors must be helpful + +### Testing +- Unit test all business logic +- Integration test all API endpoints +- Test error cases, not just happy paths +- Keep tests simple and focused + +## Project Structure + +Follow the architect's defined structure: +``` +starpunk/ +├── src/ # Implementation code +├── tests/ # Test files +├── docs/ # Architect's documentation (read-only for you) +└── data/ # Runtime data (gitignored) +``` + +## Technology Stack + +Use what the architect has specified in the ADRs: +- Check `/docs/decisions/ADR-001-*` for framework choice +- Check `/docs/decisions/ADR-002-*` for database choice +- etc. + +## Example Interactions + +### Good Developer Approach +"I'm implementing the Micropub endpoint. I've read `/docs/design/api-contracts.md` which specifies the request/response format. The architect's diagram shows it goes through the Auth Service first. Here's my implementation with tests..." + +### Bad Developer Approach +"I think we should use MongoDB instead of SQLite because it's more scalable. Also, I added a caching layer to make it faster..." + +## Features for V1 + +Implement only these features (from architect's specs): +- Notes CRUD operations +- IndieAuth authentication flow +- Micropub endpoint +- RSS feed generation +- Admin interface +- Public note display + +Do NOT implement: +- Webmentions +- Media uploads +- Multiple users +- Comments +- Search +- Any feature not in V1 scope + +## Testing Requirements + +Every implementation must include: +- Unit tests for business logic +- Integration tests for API endpoints +- Error case coverage +- Documentation of test scenarios + +Test files go in `/tests/` following the same structure as `/src/`. + +## Git Workflow + +1. Create feature branch from main +2. Implement based on architect's specs +3. Write/update tests +4. Commit with clear messages +5. Reference the relevant `/docs/` in commits + +Example commit: +``` +Implement Micropub endpoint + +Following design in /docs/design/api-contracts.md#micropub +and auth flow from /docs/architecture/auth-flow.md + +- Add POST handler for JSON and form-encoded requests +- Validate bearer tokens via Auth Service +- Return 201 with Location header +- Add comprehensive tests +``` + +## When to Push Back + +You should question requirements if: +- The spec conflicts with IndieWeb standards +- Implementation would be unnecessarily complex +- A simpler solution exists that meets requirements +- Tests reveal an edge case not covered in design + +Say: "The spec might be missing something. [Explain issue]. Should I ask the architect to clarify?" + +## Remember + +You are a craftsperson implementing a well-designed system. The architect has done the hard work of design - your job is to bring it to life with clean, simple, tested code. + +When in doubt: +1. Check the docs +2. Ask the architect +3. Choose the simpler implementation +4. Write a test for it + +The best code is code that doesn't need to exist. The second best is code that's obvious in its intent. diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1d3b822 --- /dev/null +++ b/.env.example @@ -0,0 +1,75 @@ +# StarPunk Configuration Template +# Copy this file to .env and fill in your values +# DO NOT commit .env to version control + +# ============================================================================= +# SITE CONFIGURATION +# ============================================================================= + +# Public URL where your site is hosted (no trailing slash) +SITE_URL=http://localhost:5000 + +# Your site name (appears in RSS feed and page titles) +SITE_NAME=My StarPunk Site + +# Your name (appears as author in RSS feed) +SITE_AUTHOR=Your Name + +# Site description (appears in RSS feed) +SITE_DESCRIPTION=My personal IndieWeb site + +# ============================================================================= +# AUTHENTICATION +# ============================================================================= + +# Your IndieWeb identity URL (REQUIRED) +# This is YOUR personal website URL that you authenticate with +# Example: https://yourname.com or https://github.com/yourname +ADMIN_ME=https://your-website.com + +# Session secret key (REQUIRED - GENERATE A RANDOM VALUE) +# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))" +SESSION_SECRET=REPLACE_WITH_RANDOM_SECRET + +# Session lifetime in days (default: 30) +SESSION_LIFETIME=30 + +# IndieLogin service URL (usually don't change this) +INDIELOGIN_URL=https://indielogin.com + +# ============================================================================= +# DATA STORAGE +# ============================================================================= + +# Base data directory (relative to project root) +DATA_PATH=./data + +# Notes directory (where markdown files are stored) +NOTES_PATH=./data/notes + +# SQLite database path +DATABASE_PATH=./data/starpunk.db + +# ============================================================================= +# FLASK CONFIGURATION +# ============================================================================= + +# Environment: development or production +FLASK_ENV=development + +# Debug mode: 1 (on) or 0 (off) +# NEVER use debug mode in production +FLASK_DEBUG=1 + +# Flask secret key (falls back to SESSION_SECRET if not set) +FLASK_SECRET_KEY= + +# ============================================================================= +# DEVELOPMENT OPTIONS +# ============================================================================= + +# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL +LOG_LEVEL=INFO + +# Enable SQL query logging (development only) +SQL_ECHO=0 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3338329 --- /dev/null +++ b/.gitignore @@ -0,0 +1,68 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual Environment +.venv/ +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# Environment Configuration (CRITICAL - CONTAINS SECRETS) +.env +*.env +!.env.example + +# User Data (CRITICAL - NEVER COMMIT) +data/ +*.db +*.sqlite +*.sqlite3 +*.db-journal + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store +*.iml + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +*.cover +.hypothesis/ +.tox/ +.nox/ + +# Logs +*.log +logs/ + +# OS +Thumbs.db +.directory diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7889e2e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,49 @@ +# Changelog + +All notable changes to StarPunk will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added +- Notes management module (`starpunk/notes.py`) with CRUD operations +- Custom exceptions for note operations (NoteError, NoteNotFoundError, InvalidNoteDataError, NoteSyncError) +- File and database synchronization with transaction safety +- Support for soft and hard note deletion +- Comprehensive test suite for notes module (85 tests, 86% coverage) +- Database schema support for soft deletes (deleted_at column) +- Slug uniqueness enforcement with random suffix generation +- Content hash calculation for integrity verification + +### Changed +- Updated database schema to include `deleted_at` column in notes table +- Added index on `deleted_at` for query performance + +## [0.1.0] - 2024-11-18 + +### Added +- Initial project structure +- Core architecture design +- Technology stack selection (Flask, SQLite, file-based storage) +- Architecture Decision Records (ADR-001 through ADR-007) +- Development documentation and standards +- Phase 1.1 design: Core utilities specification +- Python coding standards +- Documentation organization structure + +### Documentation +- Complete architecture overview +- Technology stack documentation +- ADR-001: Python web framework (Flask) +- ADR-002: Flask extensions (minimal approach) +- ADR-003: Frontend technology (server-side rendering) +- ADR-004: File-based note storage +- ADR-005: IndieLogin authentication +- ADR-006: Python virtual environment (uv) +- ADR-007: Slug generation algorithm +- ADR-008: Versioning strategy + +[Unreleased]: https://github.com/YOUR_USERNAME/starpunk/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/YOUR_USERNAME/starpunk/releases/tag/v0.1.0 diff --git a/CLAUDE.MD b/CLAUDE.MD new file mode 100644 index 0000000..f757c0f --- /dev/null +++ b/CLAUDE.MD @@ -0,0 +1,412 @@ +# StarPunk - Minimal IndieWeb CMS + +## Project Overview + +StarPunk is a minimalist, single-user CMS for publishing IndieWeb-compatible notes with RSS syndication. It emphasizes simplicity, elegance, and standards compliance. + +**Core Philosophy**: Every line of code must justify its existence. When in doubt, leave it out. + +## V1 Scope + +### Must Have +- Publish notes (https://indieweb.org/note) +- IndieAuth authentication (https://indieauth.spec.indieweb.org) +- Micropub server endpoint (https://micropub.spec.indieweb.org) +- RSS feed generation +- API-first architecture +- Markdown support +- Self-hostable deployment + +### Won't Have (V1) +- Webmentions +- POSSE (beyond RSS) +- Multiple users +- Comments +- Analytics +- Themes/customization +- Media uploads +- Other post types (articles, photos, replies) + +## System Architecture + +### Core Components + +1. **Data Layer** + - Notes storage (content, HTML rendering, timestamps, slugs) + - Authentication tokens for IndieAuth sessions + - Simple schema with minimal relationships + - Persistence with backup capability + +2. **API Layer** + - RESTful endpoints for note management + - Micropub endpoint for external clients + - IndieAuth implementation + - RSS feed generation + - JSON responses for all APIs + +3. **Web Interface** + - Minimal public interface displaying notes + - Admin interface for creating/managing notes + - Single elegant theme + - Proper microformats markup (h-entry, h-card) + - No client-side complexity + +### Data Model + +``` +Notes: +- id: unique identifier +- content: raw markdown text +- content_html: rendered HTML +- slug: URL-friendly identifier +- published: boolean flag +- created_at: timestamp +- updated_at: timestamp + +Tokens: +- token: unique token string +- me: user identity URL +- client_id: micropub client identifier +- scope: permission scope +- created_at: timestamp +- expires_at: optional expiration +``` + +### URL Structure + +``` +/ # Homepage with recent notes +/note/{slug} # Individual note permalink +/admin # Admin dashboard +/admin/new # Create new note +/api/micropub # Micropub endpoint +/api/notes # Notes CRUD API +/api/auth # IndieAuth endpoints +/feed.xml # RSS feed +/.well-known/oauth-authorization-server # IndieAuth metadata +``` + +## Implementation Requirements + +### Phase 1: Foundation + +**Data Storage** +- Implement note storage with CRUD operations +- Support markdown content with HTML rendering +- Generate unique slugs for URLs +- Track creation and update timestamps + +**Configuration** +- Site URL (required for absolute URLs) +- Site title and author information +- IndieAuth endpoint configuration +- Environment-based configuration + +### Phase 2: Core APIs + +**Notes API** +- GET /api/notes - List published notes +- POST /api/notes - Create new note (authenticated) +- GET /api/notes/{id} - Get single note +- PUT /api/notes/{id} - Update note (authenticated) +- DELETE /api/notes/{id} - Delete note (authenticated) + +**RSS Feed** +- Generate valid RSS 2.0 feed +- Include all published notes +- Proper date formatting (RFC-822) +- CDATA wrapping for HTML content +- Cache appropriately (5 minute minimum) + +### Phase 3: IndieAuth Implementation + +**Authorization Endpoint** +- Validate client_id parameter +- Verify redirect_uri matches registered client +- Generate authorization codes +- Support PKCE flow + +**Token Endpoint** +- Exchange authorization codes for access tokens +- Validate code verifier for PKCE +- Return token with appropriate scope +- Store token with expiration + +**Token Verification** +- Validate bearer tokens in Authorization header +- Check token expiration +- Verify scope for requested operation + +### Phase 4: Micropub Implementation + +**POST Endpoint** +- Support JSON format (Content-Type: application/json) +- Support form-encoded format (Content-Type: application/x-www-form-urlencoded) +- Handle h-entry creation for notes +- Return 201 Created with Location header +- Validate authentication token + +**GET Endpoint** +- Support q=config query (return supported features) +- Support q=source query (return note source) +- Return appropriate JSON responses + +**Micropub Request Structure (JSON)** +```json +{ + "type": ["h-entry"], + "properties": { + "content": ["Note content here"] + } +} +``` + +**Micropub Response** +``` +HTTP/1.1 201 Created +Location: https://example.com/note/abc123 +``` + +### Phase 5: Web Interface + +**Homepage Requirements** +- Display notes in reverse chronological order +- Include proper h-entry microformats +- Show note content (e-content class) +- Include permalink (u-url class) +- Display publish date (dt-published class) +- Clean, readable typography +- Mobile-responsive design + +**Note Permalink Page** +- Full note display with microformats +- Author information (h-card) +- Timestamp and permalink +- Link back to homepage + +**Admin Interface** +- Simple markdown editor +- Preview capability +- Publish/Draft toggle +- List of existing notes +- Edit existing notes +- Protected by authentication + +**Microformats Example** +```html +
+
+

Note content goes here

+
+ +
+``` + +### Phase 6: Deployment + +**Requirements** +- Self-hostable package +- Single deployment unit +- Persistent data storage +- Environment-based configuration +- Backup-friendly data format + +**Configuration Variables** +- SITE_URL - Full URL of the site +- SITE_TITLE - Site name for RSS feed +- SITE_AUTHOR - Default author name +- INDIEAUTH_ENDPOINT - IndieAuth provider URL +- DATA_PATH - Location for persistent storage + +### Phase 7: Testing + +**Unit Tests Required** +- Data layer operations +- Micropub request parsing +- IndieAuth token validation +- Markdown rendering +- Slug generation + +**Integration Tests** +- Complete Micropub flow +- IndieAuth authentication flow +- RSS feed generation +- API endpoint responses + +**Test Coverage Areas** +- Note creation via web interface +- Note creation via Micropub +- Authentication flows +- Feed validation +- Error handling + +## Standards Compliance + +### IndieWeb Standards + +**Microformats2** +- h-entry for notes +- h-card for author information +- e-content for note content +- dt-published for timestamps +- u-url for permalinks + +**IndieAuth** +- OAuth 2.0 compatible flow +- Support for authorization code grant +- PKCE support recommended +- Token introspection endpoint + +**Micropub** +- JSON and form-encoded content types +- Location header on creation +- Configuration endpoint +- Source endpoint for queries + +### Web Standards + +**HTTP** +- Proper status codes (200, 201, 400, 401, 404) +- Content-Type headers +- Cache-Control headers where appropriate +- CORS headers for API endpoints + +**RSS 2.0** +- Valid XML structure +- Required channel elements +- Proper date formatting +- GUID for each item +- CDATA for HTML content + +**HTML** +- Semantic HTML5 elements +- Valid markup +- Accessible forms +- Mobile-responsive design + +## Security Considerations + +### Authentication +- Validate all tokens before operations +- Implement token expiration +- Use secure token generation +- Protect admin routes + +### Input Validation +- Sanitize markdown input +- Validate Micropub payloads +- Prevent SQL injection +- Escape HTML appropriately + +### HTTP Security +- Use HTTPS in production +- Set secure headers +- Implement CSRF protection +- Rate limit API endpoints + +## Performance Guidelines + +### Response Times +- API responses < 100ms +- Page loads < 200ms +- RSS feed generation < 300ms + +### Caching Strategy +- Cache RSS feed (5 minutes) +- Cache static assets +- Database query optimization +- Minimize external dependencies + +### Resource Usage +- Efficient database queries +- Minimal memory footprint +- Optimize HTML/CSS delivery +- Compress responses + +## Testing Checklist + +- [ ] Create notes via web interface +- [ ] Create notes via Micropub JSON +- [ ] Create notes via Micropub form-encoded +- [ ] RSS feed validates (W3C validator) +- [ ] IndieAuth login flow works +- [ ] Micropub client authentication +- [ ] Notes display with proper microformats +- [ ] API returns correct status codes +- [ ] Markdown renders correctly +- [ ] Slugs generate uniquely +- [ ] Timestamps record accurately +- [ ] Token expiration works +- [ ] Rate limiting functions +- [ ] All unit tests pass + +## Validation Tools + +**IndieWeb** +- https://indiewebify.me/ - Verify microformats +- https://indieauth.com/validate - Test IndieAuth +- https://micropub.rocks/ - Micropub test suite + +**Web Standards** +- https://validator.w3.org/feed/ - RSS validator +- https://validator.w3.org/ - HTML validator +- https://jsonlint.com/ - JSON validator + +## Resources + +### Specifications +- IndieWeb Notes: https://indieweb.org/note +- Micropub Spec: https://micropub.spec.indieweb.org +- IndieAuth Spec: https://indieauth.spec.indieweb.org +- Microformats2: http://microformats.org/wiki/h-entry +- RSS 2.0 Spec: https://www.rssboard.org/rss-specification + +### Testing & Validation +- Micropub Test Suite: https://micropub.rocks/ +- IndieAuth Testing: https://indieauth.com/ +- Microformats Parser: https://pin13.net/mf2/ + +### Example Implementations +- IndieWeb Examples: https://indieweb.org/examples +- Micropub Clients: https://indieweb.org/Micropub/Clients + +## Development Principles + +1. **Minimal Code**: Every feature must justify its complexity +2. **Standards First**: Follow specifications exactly +3. **User Control**: User owns their data completely +4. **No Lock-in**: Data must be portable and exportable +5. **Progressive Enhancement**: Core functionality works without JavaScript +6. **Documentation**: Code should be self-documenting +7. **Test Coverage**: Critical paths must have tests + +## Future Considerations (Post-V1) + +Potential V2 features: +- Webmentions support +- Media uploads (photos) +- Additional post types (articles, replies) +- POSSE to Mastodon/ActivityPub +- Full-text search +- Draft/scheduled posts +- Multiple IndieAuth providers +- Backup/restore functionality +- Import from other platforms +- Export in multiple formats + +## Success Criteria + +The project is successful when: +- A user can publish notes from any Micropub client +- Notes appear in RSS readers immediately +- The system runs on minimal resources +- Code is readable and maintainable +- All IndieWeb validators pass +- Setup takes less than 5 minutes +- System runs for months without intervention diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..d4e51e8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,4 @@ +- we use uv for python venv management in this project so commands involving python probably need to be run with uv +- whenever you invoke agent-developer you will remind it to document what it does in docs/reports, update the changelog, and increment the version number where appropriate inline with docs/standards/versioning-strategy.md +- when invoking agent-developer remind in that we are using uv and that any pyrhon commands need to be run with uv +- when invoking agent-developer make sure it follows proper git protocol as defined in docs/standards/git-branching-strategy.md \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..85f53f0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 [Your Name] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..b07d159 --- /dev/null +++ b/README.md @@ -0,0 +1,201 @@ +# StarPunk + +A minimal, self-hosted IndieWeb CMS for publishing notes with RSS syndication. + +**Current Version**: 0.1.0 (development) + +## Versioning + +StarPunk follows [Semantic Versioning 2.0.0](https://semver.org/): +- Version format: `MAJOR.MINOR.PATCH` +- Current: `0.1.0` (pre-release development) +- First stable release will be `1.0.0` + +**Version Information**: +- Check version: `python -c "from starpunk import __version__; print(__version__)"` +- See changes: [CHANGELOG.md](CHANGELOG.md) +- Versioning strategy: [docs/standards/versioning-strategy.md](docs/standards/versioning-strategy.md) + +## Philosophy + +"Every line of code must justify its existence. When in doubt, leave it out." + +StarPunk is designed for a single user who wants to: +- Publish short notes to their personal website +- Own their content (notes stored as portable markdown files) +- Syndicate via RSS +- Support IndieWeb standards (Micropub, IndieAuth) +- Run on minimal resources + +## Features + +- **File-based storage**: Notes are markdown files, owned by you +- **IndieAuth authentication**: Use your own website as identity +- **Micropub support**: Publish from any Micropub client +- **RSS feed**: Automatic syndication +- **No database lock-in**: SQLite for metadata, files for content +- **Self-hostable**: Run on your own server +- **Minimal dependencies**: 6 core dependencies, no build tools + +## Requirements + +- Python 3.11 or higher +- 500MB disk space +- Linux, macOS, or Windows with WSL2 + +## Quick Start + +```bash +# Clone repository +git clone https://github.com/YOUR_USERNAME/starpunk.git +cd starpunk + +# Install uv (package manager) +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Create virtual environment +uv venv .venv --python 3.11 + +# Install dependencies +uv pip install -r requirements.txt + +# Configure +cp .env.example .env +# Edit .env and set ADMIN_ME and SESSION_SECRET + +# Initialize database +mkdir -p data/notes +.venv/bin/python -c "from starpunk.database import init_db; init_db()" + +# Run development server +.venv/bin/flask --app app.py run --debug + +# Visit http://localhost:5000 +``` + +## Configuration + +All configuration is in the `.env` file. Required settings: + +- `ADMIN_ME` - Your IndieWeb identity URL (e.g., https://yoursite.com) +- `SESSION_SECRET` - Random secret key (generate with `python3 -c "import secrets; print(secrets.token_hex(32))"`) +- `SITE_URL` - Public URL of your site + +See `.env.example` for all options. + +## Project Structure + +``` +starpunk/ +├── app.py # Application entry point +├── starpunk/ # Application code +├── data/ # Your notes and database (gitignored) +│ ├── notes/ # Markdown files +│ └── starpunk.db # SQLite database +├── static/ # CSS and JavaScript +├── templates/ # HTML templates +└── tests/ # Test suite +``` + +## Usage + +### Publishing Notes + +**Via Web Interface**: +1. Navigate to `/admin` +2. Login with your IndieWeb identity +3. Create notes in markdown + +**Via Micropub Client**: +1. Configure client with your site URL +2. Authenticate via IndieAuth +3. Publish from any Micropub-compatible app + +### Backing Up Your Data + +Your notes are stored as plain markdown files in `data/notes/`. Back up this directory: + +```bash +# Simple backup +tar -czf backup.tar.gz data/ + +# Or use rsync +rsync -av data/ /backup/starpunk/ +``` + +## Development + +See [docs/standards/development-setup.md](docs/standards/development-setup.md) for detailed setup. + +```bash +# Install dev dependencies +uv pip install -r requirements-dev.txt + +# Run tests +.venv/bin/pytest + +# Format code +.venv/bin/black starpunk/ tests/ + +# Lint +.venv/bin/flake8 starpunk/ tests/ +``` + +## Architecture + +StarPunk uses a hybrid storage approach: +- **Notes content**: Markdown files (portable, human-readable) +- **Metadata**: SQLite database (fast queries) + +This gives you both portability AND performance. + +See [docs/architecture/](docs/architecture/) for complete documentation. + +## IndieWeb Compliance + +StarPunk implements: +- [Micropub](https://micropub.spec.indieweb.org/) - Publishing API +- [IndieAuth](https://indieauth.spec.indieweb.org/) - Authentication +- [Microformats2](http://microformats.org/) - Semantic HTML markup +- [RSS 2.0](https://www.rssboard.org/rss-specification) - Feed syndication + +## Deployment + +### Production Setup + +```bash +# Install gunicorn +uv pip install gunicorn + +# Run with gunicorn +.venv/bin/gunicorn -w 4 -b 127.0.0.1:8000 app:app + +# Configure nginx/Caddy for HTTPS +# Set up systemd for process management +# Enable regular backups of data/ directory +``` + +See [docs/architecture/deployment.md](docs/architecture/deployment.md) for details. + +## License + +MIT License - see LICENSE file + +## Credits + +Built with: +- [Flask](https://flask.palletsprojects.com/) - Web framework +- [python-markdown](https://python-markdown.github.io/) - Markdown processing +- [feedgen](https://feedgen.kiesow.be/) - RSS generation +- [httpx](https://www.python-httpx.org/) - HTTP client +- [IndieLogin](https://indielogin.com/) - Authentication service + +## Contributing + +This is a personal project optimized for single-user use. If you want additional features, consider forking! + +## Support + +- Documentation: [docs/](docs/) +- Issues: GitHub Issues +- IndieWeb: [indieweb.org](https://indieweb.org/) diff --git a/TECHNOLOGY-STACK-SUMMARY.md b/TECHNOLOGY-STACK-SUMMARY.md new file mode 100644 index 0000000..f62886c --- /dev/null +++ b/TECHNOLOGY-STACK-SUMMARY.md @@ -0,0 +1,497 @@ +# StarPunk Technology Stack - Quick Reference + +## Project Understanding + +StarPunk is a **minimal, single-user IndieWeb CMS** for publishing notes with RSS syndication. The core philosophy is radical simplicity: "Every line of code must justify its existence." + +### Key Requirements +- Publish IndieWeb-compatible notes +- External IndieLogin authentication via indielogin.com +- Micropub server for publishing from any client +- RSS feed generation +- File-based note storage (markdown files) +- SQLite for metadata +- Self-hostable +- API-first architecture + +## Complete Technology Stack + +### Backend + +| Component | Technology | Version | Justification | +|-----------|------------|---------|---------------| +| **Language** | Python | 3.11+ | User's preference, excellent ecosystem | +| **Web Framework** | Flask | 3.0+ | Minimal micro-framework, perfect for single-user | +| **Note Storage** | Markdown Files | - | Maximum portability, user owns data directly | +| **Metadata DB** | SQLite | Built-in | Single file, no server, perfect for single-user | +| **Markdown Rendering** | markdown | 3.5+ | Standard Python implementation | +| **RSS Generation** | feedgen | 1.0+ | Ensures valid RSS 2.0 output | +| **HTTP Client** | httpx | 0.27+ | Modern API, IndieLogin communication | +| **Configuration** | python-dotenv | 1.0+ | Standard .env file support | +| **Testing** | pytest | 8.0+ | Python testing standard | + +**Total Direct Dependencies**: 6 packages + +### Frontend + +| Component | Technology | Justification | +|-----------|------------|---------------| +| **Template Engine** | Jinja2 | Included with Flask, server-side rendering | +| **CSS** | Custom CSS (~200 lines) | No framework, full control, no build tools | +| **JavaScript** | Vanilla JS (optional) | Minimal preview feature, progressive enhancement | +| **Build Tools** | NONE | Zero build process, direct file serving | + +### Authentication + +| Component | Technology | Approach | +|-----------|------------|----------| +| **Admin Auth** | IndieLogin.com | External OAuth 2.0 service at https://indielogin.com | +| **Session Management** | HttpOnly Cookies + SQLite | 30-day sessions, secure tokens | +| **Micropub Auth** | IndieAuth Tokens | Bearer tokens, stored in SQLite | +| **CSRF Protection** | State Tokens | Random tokens with 5-minute expiry | + +**Key Point**: Authentication is delegated to indielogin.com, requiring zero auth code to maintain. + +## Data Architecture + +### Hybrid File + Database Storage + +#### Note Content: Markdown Files +``` +data/notes/ +├── 2024/ +│ ├── 11/ +│ │ ├── my-first-note.md +│ │ └── another-note.md +│ └── 12/ +│ └── december-note.md +``` + +- **Format**: Pure markdown, no frontmatter +- **Organization**: Year/Month subdirectories (`YYYY/MM/`) +- **Naming**: `{slug}.md` +- **Portability**: Copy anywhere, read in any editor, backup with cp/rsync/git + +#### Metadata: SQLite Database +```sql +-- Note metadata (NOT content) +CREATE TABLE notes ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE, + file_path TEXT UNIQUE, + published BOOLEAN, + created_at TIMESTAMP, + updated_at TIMESTAMP, + content_hash TEXT +); + +-- Authentication +CREATE TABLE sessions (...); -- IndieLogin sessions +CREATE TABLE tokens (...); -- Micropub tokens +CREATE TABLE auth_state (...); -- CSRF protection +``` + +- **Location**: `data/starpunk.db` +- **Purpose**: Fast queries, indexes, referential integrity +- **Sync**: Files are authoritative for content, database for metadata + +### How They Work Together + +**Creating a Note**: +1. Generate slug +2. Write markdown file → `data/notes/YYYY/MM/slug.md` +3. Calculate content hash +4. Insert database record with metadata +5. If database fails: delete file, rollback + +**Reading a Note**: +1. Query database by slug → get file_path +2. Read markdown from file +3. Render to HTML +4. Return content + metadata + +## IndieLogin Authentication Flow + +### Configuration Required +```bash +# .env file +SITE_URL=https://starpunk.example.com +ADMIN_ME=https://your-website.com # Only this URL can authenticate +SESSION_SECRET=random-secret-key +``` + +### Authentication Steps + +1. **User initiates login** → enters their website URL +2. **StarPunk redirects** → to https://indielogin.com/auth with: + - `me` = user's website + - `client_id` = StarPunk URL + - `redirect_uri` = callback URL + - `state` = random CSRF token +3. **IndieLogin verifies identity** → via RelMeAuth, email, etc. +4. **User authenticates** → chooses verification method +5. **IndieLogin redirects back** → with authorization code +6. **StarPunk exchanges code** → POST to indielogin.com API +7. **IndieLogin returns** → verified "me" URL +8. **StarPunk verifies** → me == ADMIN_ME (from config) +9. **Create session** → generate token, store in database, set cookie +10. **Redirect to admin** → user is now authenticated + +### API Endpoint +**IndieLogin API**: https://indielogin.com/api + +**Exchange Request**: +```http +POST https://indielogin.com/auth +Content-Type: application/x-www-form-urlencoded + +code={authorization_code}& +client_id={starpunk_url}& +redirect_uri={starpunk_url}/auth/callback +``` + +**Exchange Response**: +```json +{ + "me": "https://user-website.com" +} +``` + +### Security Features +- State tokens prevent CSRF attacks +- Only ADMIN_ME URL can authenticate (single-user enforcement) +- Session tokens are cryptographically random (256-bit) +- HttpOnly cookies prevent XSS theft +- Secure flag requires HTTPS +- 30-day session expiry + +## Frontend Stack Details + +### Server-Side Rendering (Jinja2) + +**Public Templates**: +- `base.html` - Base layout with HTML structure +- `index.html` - Homepage (note list) +- `note.html` - Single note permalink +- `feed.xml` - RSS feed template + +**Admin Templates**: +- `admin/base.html` - Admin layout +- `admin/login.html` - Login form +- `admin/dashboard.html` - Note list +- `admin/new.html` - Create note form +- `admin/edit.html` - Edit note form + +### CSS Approach + +**Single stylesheet**: `static/css/style.css` (~200 lines) + +```css +/* CSS custom properties for theming */ +:root { + --color-text: #333; + --color-bg: #fff; + --color-link: #0066cc; + --max-width: 42rem; + --spacing: 1rem; +} + +/* Mobile-first responsive */ +body { padding: 1rem; } + +@media (min-width: 768px) { + body { padding: 2rem; } +} +``` + +**No framework**: Custom CSS gives full control, no unused code. + +### JavaScript Approach + +**Single optional file**: `static/js/preview.js` + +**Purpose**: Real-time markdown preview in admin editor (progressive enhancement) + +**Implementation**: +- Vanilla JavaScript (no framework) +- Uses marked.js from CDN for client-side markdown +- Works without it (form submits to server) + +**Why vanilla JS?** +- Core functionality works without JavaScript +- Single feature doesn't justify React/Vue/Svelte +- Modern browser APIs are sufficient +- No build tools needed + +### Build Process: NONE + +- No webpack, Vite, Rollup, esbuild +- No npm, package.json, node_modules +- No Babel transpilation +- No CSS preprocessing +- Direct file serving +- Instant development setup + +**Advantages**: +- Zero build time +- No dependency hell +- Simple deployment +- Easy debugging + +## API Routes + +### Public API +``` +GET / Homepage (recent notes) +GET /note/{slug} Individual note +GET /feed.xml RSS feed +``` + +### Admin Interface +``` +GET /admin/login Login form +POST /admin/login Initiate IndieLogin flow +GET /auth/callback IndieLogin callback handler +GET /admin Dashboard (list notes) +GET /admin/new Create note form +GET /admin/edit/{id} Edit note form +POST /admin/logout Destroy session +``` + +### Notes API (Session Auth) +``` +GET /api/notes List published notes (JSON) +POST /api/notes Create note (JSON) +GET /api/notes/{id} Get single note (JSON) +PUT /api/notes/{id} Update note (JSON) +DELETE /api/notes/{id} Delete note (JSON) +``` + +### Micropub API (Token Auth) +``` +POST /api/micropub Create note (h-entry) +GET /api/micropub?q=config Query configuration +GET /api/micropub?q=source Query note source +``` + +## File Organization + +``` +starpunk/ +├── app.py # Main Flask application +├── requirements.txt # 6 dependencies +├── .env # Configuration (gitignored) +├── .env.example # Template +│ +├── starpunk/ # Application package +│ ├── __init__.py +│ ├── config.py # Load environment +│ ├── database.py # SQLite operations +│ ├── models.py # Data models +│ ├── auth.py # IndieLogin logic +│ ├── micropub.py # Micropub endpoint +│ ├── feed.py # RSS generation +│ └── utils.py # Helpers +│ +├── static/ +│ ├── css/style.css # Single stylesheet +│ └── js/preview.js # Optional markdown preview +│ +├── templates/ +│ ├── base.html # Public base +│ ├── index.html # Homepage +│ ├── note.html # Note permalink +│ └── admin/ +│ ├── base.html # Admin base +│ ├── login.html # Login form +│ ├── dashboard.html # Note list +│ ├── new.html # Create form +│ └── edit.html # Edit form +│ +├── data/ # Persistent (gitignored) +│ ├── notes/YYYY/MM/slug.md # Markdown files +│ └── starpunk.db # SQLite +│ +├── tests/ # pytest tests +│ ├── test_auth.py +│ ├── test_database.py +│ ├── test_micropub.py +│ └── test_feed.py +│ +└── docs/ # Architecture docs + ├── architecture/ + │ ├── overview.md + │ └── technology-stack.md + └── decisions/ + ├── ADR-001-python-web-framework.md + ├── ADR-002-flask-extensions.md + ├── ADR-003-frontend-technology.md + ├── ADR-004-file-based-note-storage.md + └── ADR-005-indielogin-authentication.md +``` + +## Recommended Architectural Patterns + +### 1. API-First Design +All functionality exposed via API, web interface consumes it. + +### 2. Progressive Enhancement +Core works without JavaScript, JS adds optional enhancements. + +### 3. File-Database Sync +Write files first, then database. Rollback on failure. + +### 4. Atomic Operations +Use temp files and atomic renames to prevent corruption. + +### 5. Token-Based Auth +Sessions for humans (cookies), tokens for APIs (bearer). + +## Potential Risks & Considerations + +### Risk 1: IndieLogin.com Dependency +**Impact**: Cannot authenticate if service is down +**Mitigation**: +- Sessions last 30 days (brief outages don't lock out user) +- IndieLogin.com is stable, community-run service +- V2: Consider fallback auth method + +### Risk 2: File/Database Sync Issues +**Impact**: Data inconsistency between files and database +**Mitigation**: +- Atomic operations (write file → insert DB, rollback on error) +- Content hashing detects external modifications +- Optional integrity check on startup + +### Risk 3: SQLite Limitations +**Impact**: Limited concurrency (but this is single-user) +**Consideration**: SQLite is perfect for single-user, would need PostgreSQL for multi-user + +### Risk 4: No Built-in Backup +**Impact**: User must manage backups +**Mitigation**: +- Document backup procedures clearly +- Backup is simple (cp -r data/ backup/) +- Consider adding automated backup script + +## Deployment Stack + +### Development +```bash +# Setup +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt + +# Configure +cp .env.example .env +# Edit .env with your settings + +# Run +flask run + +# Test +pytest +``` + +### Production + +**WSGI Server**: Gunicorn +```bash +gunicorn -w 4 -b 127.0.0.1:8000 app:app +``` + +**Reverse Proxy**: Nginx or Caddy +- HTTPS termination (Let's Encrypt) +- Static file serving +- Rate limiting (optional) + +**Process Manager**: systemd +- Auto-restart on failure +- Log management +- Run on boot + +**Backup**: Cron job +```bash +# Daily backup via rsync +rsync -av /opt/starpunk/data /backup/starpunk-$(date +%Y%m%d) +``` + +## Standards Compliance + +### IndieWeb +- **Microformats2**: h-entry, h-card, e-content, dt-published, u-url +- **IndieAuth**: OAuth 2.0 flow (delegated to indielogin.com) +- **Micropub**: JSON and form-encoded, 201 Created responses + +**Validation**: +- https://indiewebify.me/ (microformats) +- https://micropub.rocks/ (Micropub compliance) + +### Web Standards +- **RSS 2.0**: Valid XML, RFC-822 dates, CDATA for HTML +- **HTML5**: Semantic elements, accessible, mobile-responsive +- **HTTP**: Proper status codes (200, 201, 400, 401, 404) + +**Validation**: +- https://validator.w3.org/feed/ (RSS) +- https://validator.w3.org/ (HTML) + +## Performance Targets + +- **API responses**: < 100ms +- **Page loads**: < 200ms +- **RSS generation**: < 300ms +- **Memory usage**: < 100MB +- **Startup time**: < 1 second + +## Quick Start + +```bash +# 1. Clone and setup +git clone && cd starpunk +python -m venv venv && source venv/bin/activate +pip install -r requirements.txt + +# 2. Configure +cp .env.example .env +# Edit .env: +# SITE_URL=https://your-domain.com +# ADMIN_ME=https://your-website.com +# SESSION_SECRET=$(python -c "import secrets; print(secrets.token_hex(32))") + +# 3. Run +flask run + +# 4. Visit http://localhost:5000/admin/login +# Enter your website URL (must match ADMIN_ME) +# Authenticate via indielogin.com +# Start publishing! +``` + +## Summary + +StarPunk uses a **radically simple** technology stack: + +- **Backend**: Flask + Python stdlib + 5 small libraries +- **Storage**: Markdown files (content) + SQLite (metadata) +- **Frontend**: Jinja2 templates + custom CSS + optional vanilla JS +- **Auth**: Delegated to indielogin.com (zero maintenance) +- **Build**: None (zero build tools) +- **Deploy**: Gunicorn + nginx/Caddy + systemd + +**Total Dependencies**: 6 direct packages +**Lines of Code**: ~1500 LOC estimate for V1 +**Setup Time**: < 5 minutes +**Build Time**: 0 seconds (no build process) + +This stack embodies the project philosophy: every technology choice is justified by simplicity, fitness for purpose, and maintainability. + +## Further Reading + +- **Project Requirements**: `/home/phil/Projects/starpunk/CLAUDE.MD` +- **Full Tech Stack**: `/home/phil/Projects/starpunk/docs/architecture/technology-stack.md` +- **Architecture Overview**: `/home/phil/Projects/starpunk/docs/architecture/overview.md` +- **All ADRs**: `/home/phil/Projects/starpunk/docs/decisions/ADR-*.md` +- **IndieLogin API**: https://indielogin.com/api +- **IndieWeb**: https://indieweb.org/ diff --git a/app.py b/app.py new file mode 100644 index 0000000..1584cad --- /dev/null +++ b/app.py @@ -0,0 +1,13 @@ +""" +StarPunk - Minimal IndieWeb CMS +Main application entry point +""" + +from starpunk import create_app + +app = create_app() + +if __name__ == '__main__': + # Development server + # For production, use: gunicorn app:app + app.run(debug=True) diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md new file mode 100644 index 0000000..632baeb --- /dev/null +++ b/docs/architecture/overview.md @@ -0,0 +1,909 @@ +# StarPunk Architecture Overview + +## Executive Summary + +StarPunk is a minimal, single-user IndieWeb CMS designed around the principle: "Every line of code must justify its existence." The architecture prioritizes simplicity, standards compliance, and user data ownership through careful technology selection and hybrid data storage. + +**Core Architecture**: API-first Flask application with hybrid file+database storage, server-side rendering, and delegated authentication. + +## System Architecture + +### High-Level Components + +``` +┌─────────────────────────────────────────────────────────────┐ +│ User Browser │ +└───────────────┬─────────────────────────────────────────────┘ + │ + │ HTTP/HTTPS + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Flask Application │ +│ ┌─────────────────────────────────────────────────────────┤ +│ │ Web Interface (Jinja2 Templates) │ +│ │ - Public: Homepage, Note Permalinks │ +│ │ - Admin: Dashboard, Note Editor │ +│ └──────────────────────────────┬──────────────────────────┘ +│ ┌──────────────────────────────┴──────────────────────────┐ +│ │ API Layer (RESTful + Micropub) │ +│ │ - Notes CRUD API │ +│ │ - Micropub Endpoint │ +│ │ - RSS Feed Generator │ +│ │ - Authentication Handlers │ +│ └──────────────────────────────┬──────────────────────────┘ +│ ┌──────────────────────────────┴──────────────────────────┐ +│ │ Business Logic │ +│ │ - Note Management (create, read, update, delete) │ +│ │ - File/Database Sync │ +│ │ - Markdown Rendering │ +│ │ - Slug Generation │ +│ │ - Session Management │ +│ └──────────────────────────────┬──────────────────────────┘ +│ ┌──────────────────────────────┴──────────────────────────┐ +│ │ Data Layer │ +│ │ ┌──────────────────┐ ┌─────────────────────────┐ │ +│ │ │ File Storage │ │ SQLite Database │ │ +│ │ │ │ │ │ │ +│ │ │ Markdown Files │ │ - Note Metadata │ │ +│ │ │ (Pure Content) │ │ - Sessions │ │ +│ │ │ │ │ - Tokens │ │ +│ │ │ data/notes/ │ │ - Auth State │ │ +│ │ │ YYYY/MM/ │ │ │ │ +│ │ │ slug.md │ │ data/starpunk.db │ │ +│ │ └──────────────────┘ └─────────────────────────┘ │ +│ └─────────────────────────────────────────────────────────┘ +└─────────────────────────────────────────────────────────────┘ + │ + │ HTTPS + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ External Services │ +│ - IndieLogin.com (Authentication) │ +│ - User's Website (Identity Verification) │ +│ - Micropub Clients (Publishing) │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Core Principles + +### 1. Radical Simplicity +- Total dependencies: 6 direct packages +- No build tools, no npm, no bundlers +- Server-side rendering eliminates frontend complexity +- Single file SQLite database +- Zero configuration frameworks + +### 2. Hybrid Data Architecture +**Files for Content**: Markdown notes stored as plain text files +- Maximum portability +- Human-readable +- Direct user access +- Easy backup (copy, rsync, git) + +**Database for Metadata**: SQLite stores structured data +- Fast queries and indexes +- Referential integrity +- Efficient filtering and sorting +- Transaction support + +**Sync Strategy**: Files are authoritative for content; database is authoritative for metadata. Both must stay in sync. + +### 3. Standards-First Design +- IndieWeb: Microformats2, IndieAuth, Micropub +- Web: HTML5, RSS 2.0, HTTP standards +- Security: OAuth 2.0, HTTPS, secure cookies +- Data: CommonMark markdown + +### 4. API-First Architecture +All functionality exposed via API, web interface consumes API. This enables: +- Micropub client support +- Future client applications +- Scriptable automation +- Clean separation of concerns + +### 5. Progressive Enhancement +- Core functionality works without JavaScript +- JavaScript adds optional enhancements (markdown preview) +- Server-side rendering for fast initial loads +- Mobile-responsive from the start + +## Component Descriptions + +### Web Layer + +#### Public Interface +**Purpose**: Display published notes to the world +**Technology**: Server-side rendered HTML (Jinja2) +**Routes**: +- `/` - Homepage with recent notes +- `/note/{slug}` - Individual note permalink +- `/feed.xml` - RSS feed + +**Features**: +- Microformats2 markup (h-entry, h-card) +- Reverse chronological note list +- Clean, minimal design +- Mobile-responsive +- No JavaScript required + +#### Admin Interface +**Purpose**: Manage notes (create, edit, publish) +**Technology**: Server-side rendered HTML (Jinja2) + optional vanilla JS +**Routes**: +- `/admin/login` - Authentication +- `/admin` - Dashboard (list of all notes) +- `/admin/new` - Create new note +- `/admin/edit/{id}` - Edit existing note + +**Features**: +- Markdown editor +- Optional real-time preview (JS enhancement) +- Publish/draft toggle +- Protected by session authentication + +### API Layer + +#### Notes API +**Purpose**: CRUD operations for notes +**Authentication**: Session-based (admin interface) +**Routes**: +``` +GET /api/notes List published notes +POST /api/notes Create new note +GET /api/notes/{id} Get single note +PUT /api/notes/{id} Update note +DELETE /api/notes/{id} Delete note +``` + +**Response Format**: JSON + +#### Micropub Endpoint +**Purpose**: Accept posts from external Micropub clients +**Authentication**: IndieAuth bearer tokens +**Routes**: +``` +POST /api/micropub Create note (h-entry) +GET /api/micropub?q=config Query configuration +GET /api/micropub?q=source Query note source +``` + +**Content Types**: +- application/json +- application/x-www-form-urlencoded + +**Compliance**: Full Micropub specification + +#### RSS Feed +**Purpose**: Syndicate published notes +**Technology**: feedgen library +**Route**: `/feed.xml` +**Format**: Valid RSS 2.0 XML +**Caching**: 5 minutes +**Features**: +- All published notes +- RFC-822 date formatting +- CDATA-wrapped HTML content +- Proper GUID for each item + +### Business Logic Layer + +#### Note Management +**Operations**: +1. **Create**: Generate slug → write file → insert database record +2. **Read**: Query database for path → read file → render markdown +3. **Update**: Write file atomically → update database timestamp +4. **Delete**: Mark deleted in database → optionally archive file + +**Key Components**: +- Slug generation (URL-safe, unique) +- Markdown rendering (markdown library) +- Content hashing (integrity verification) +- Atomic file operations (prevent corruption) + +#### File/Database Sync +**Strategy**: Write files first, then database +**Rollback**: If database operation fails, delete/restore file +**Verification**: Content hash detects external modifications +**Integrity Check**: Optional scan for orphaned files/records + +#### Authentication +**Admin Auth**: IndieLogin.com OAuth 2.0 flow +- User enters website URL +- Redirect to indielogin.com +- Verify identity via RelMeAuth or email +- Return verified "me" URL +- Create session token +- Store in HttpOnly cookie + +**Micropub Auth**: IndieAuth token verification +- Client obtains token via IndieAuth flow +- Token sent as Bearer in Authorization header +- Verify token exists and not expired +- Check scope permissions + +### Data Layer + +#### File Storage +**Location**: `data/notes/` +**Structure**: `YYYY/MM/slug.md` +**Format**: Pure markdown, no frontmatter +**Operations**: +- Atomic writes (temp file → rename) +- Directory creation (makedirs) +- Content reading (UTF-8 encoding) + +**Example**: +``` +data/notes/ +├── 2024/ +│ ├── 11/ +│ │ ├── my-first-note.md +│ │ └── another-note.md +│ └── 12/ +│ └── december-note.md +``` + +#### Database Storage +**Location**: `data/starpunk.db` +**Engine**: SQLite3 +**Tables**: +- `notes` - Metadata (slug, file_path, published, timestamps, hash) +- `sessions` - Auth sessions (token, me, expiry) +- `tokens` - Micropub tokens (token, me, client_id, scope) +- `auth_state` - CSRF tokens (state, expiry) + +**Indexes**: +- `notes.created_at` (DESC) - Fast chronological queries +- `notes.published` - Fast filtering +- `notes.slug` - Fast lookup by slug +- `sessions.session_token` - Fast auth checks + +**Queries**: Direct SQL using Python sqlite3 module (no ORM) + +## Data Flow Examples + +### Creating a Note (via Admin Interface) + +``` +1. User fills out form at /admin/new + ↓ +2. POST to /api/notes with markdown content + ↓ +3. Verify user session (check session cookie) + ↓ +4. Generate unique slug from content or timestamp + ↓ +5. Determine file path: data/notes/2024/11/slug.md + ↓ +6. Create directories if needed (makedirs) + ↓ +7. Write markdown content to file (atomic write) + ↓ +8. Calculate SHA-256 hash of content + ↓ +9. Begin database transaction + ↓ +10. Insert record into notes table: + - slug + - file_path + - published (from form) + - created_at (now) + - updated_at (now) + - content_hash + ↓ +11. If database insert fails: + - Delete file + - Return error to user + ↓ +12. If database insert succeeds: + - Commit transaction + - Return success with note URL + ↓ +13. Redirect user to /admin (dashboard) +``` + +### Reading a Note (via Public Interface) + +``` +1. User visits /note/my-first-note + ↓ +2. Extract slug from URL + ↓ +3. Query database: + SELECT file_path, created_at, published + FROM notes + WHERE slug = 'my-first-note' AND published = 1 + ↓ +4. If not found → 404 error + ↓ +5. Read markdown content from file: + - Open data/notes/2024/11/my-first-note.md + - Read UTF-8 content + ↓ +6. Render markdown to HTML (markdown.markdown()) + ↓ +7. Render Jinja2 template with: + - content_html (rendered HTML) + - created_at (timestamp) + - slug (for permalink) + ↓ +8. Return HTML with microformats markup +``` + +### Publishing via Micropub + +``` +1. Micropub client POSTs to /api/micropub + Headers: Authorization: Bearer {token} + Body: {"type": ["h-entry"], "properties": {"content": ["..."]}} + ↓ +2. Extract bearer token from Authorization header + ↓ +3. Query database: + SELECT me, scope FROM tokens + WHERE token = {token} AND expires_at > now() + ↓ +4. If token invalid → 401 Unauthorized + ↓ +5. Parse Micropub JSON payload + ↓ +6. Extract content from properties.content[0] + ↓ +7. Create note (same flow as admin interface): + - Generate slug + - Write file + - Insert database record + ↓ +8. If successful: + - Return 201 Created + - Set Location header to note URL + ↓ +9. Client receives note URL, displays success +``` + +### IndieLogin Authentication Flow + +``` +1. User visits /admin/login + ↓ +2. User enters their website: https://alice.example.com + ↓ +3. POST to /admin/login with "me" parameter + ↓ +4. Validate URL format + ↓ +5. Generate random state token (CSRF protection) + ↓ +6. Store state in database with 5-minute expiry + ↓ +7. Build IndieLogin authorization URL: + https://indielogin.com/auth? + me=https://alice.example.com + client_id=https://starpunk.example.com + redirect_uri=https://starpunk.example.com/auth/callback + state={random_state} + ↓ +8. Redirect user to IndieLogin + ↓ +9. IndieLogin verifies user's identity: + - Checks rel="me" links on alice.example.com + - Or sends email verification + - User authenticates via chosen method + ↓ +10. IndieLogin redirects back: + /auth/callback?code={auth_code}&state={state} + ↓ +11. Verify state matches stored value (CSRF check) + ↓ +12. Exchange code for verified identity: + POST https://indielogin.com/auth + code={auth_code} + client_id=https://starpunk.example.com + redirect_uri=https://starpunk.example.com/auth/callback + ↓ +13. IndieLogin returns: {"me": "https://alice.example.com"} + ↓ +14. Verify me == ADMIN_ME (config) + ↓ +15. If match: + - Generate session token + - Insert into sessions table + - Set HttpOnly, Secure cookie + - Redirect to /admin + ↓ +16. If no match: + - Return "Unauthorized" error + - Log attempt +``` + +## Security Architecture + +### Authentication Security + +#### Session Management +- **Token Generation**: `secrets.token_urlsafe(32)` (256-bit entropy) +- **Storage**: Hash before storing in database +- **Cookies**: HttpOnly, Secure, SameSite=Lax +- **Expiry**: 30 days, extendable on use +- **Validation**: Every protected route checks session + +#### CSRF Protection +- **State Tokens**: Random tokens for OAuth flows +- **Expiry**: 5 minutes (short-lived) +- **Single-Use**: Deleted after verification +- **SameSite**: Cookies set to Lax mode + +#### Access Control +- **Admin Routes**: Require valid session +- **Micropub Routes**: Require valid bearer token +- **Public Routes**: No authentication needed +- **Identity Verification**: Only ADMIN_ME can authenticate + +### Input Validation + +#### User Input +- **Markdown**: Sanitize to prevent XSS in rendered HTML +- **URLs**: Validate format and scheme (https://) +- **Slugs**: Alphanumeric + hyphens only +- **JSON**: Parse and validate structure +- **File Paths**: Prevent directory traversal (validate against base path) + +#### Micropub Payloads +- **Content-Type**: Verify matches expected format +- **Required Fields**: Validate h-entry structure +- **Size Limits**: Prevent DoS via large payloads +- **Scope Verification**: Check token has required permissions + +### Database Security + +#### SQL Injection Prevention +- **Parameterized Queries**: Always use parameter substitution +- **No String Interpolation**: Never build SQL with f-strings +- **Input Sanitization**: Validate before database operations + +Example: +```python +# GOOD +cursor.execute("SELECT * FROM notes WHERE slug = ?", (slug,)) + +# BAD (SQL injection vulnerable) +cursor.execute(f"SELECT * FROM notes WHERE slug = '{slug}'") +``` + +#### Data Integrity +- **Transactions**: Use for multi-step operations +- **Constraints**: UNIQUE on slugs, file_paths +- **Foreign Keys**: Enforce relationships (if applicable) +- **Content Hashing**: Detect unauthorized file modifications + +### Network Security + +#### HTTPS +- **Production Requirement**: TLS 1.2+ required +- **Reverse Proxy**: Nginx/Caddy handles SSL termination +- **Certificate Validation**: Verify SSL certs on outbound requests +- **HSTS**: Set Strict-Transport-Security header + +#### Security Headers +```python +# Set on all responses +Content-Security-Policy: default-src 'self' +X-Frame-Options: DENY +X-Content-Type-Options: nosniff +Referrer-Policy: strict-origin-when-cross-origin +``` + +#### Rate Limiting +- **Implementation**: Reverse proxy (nginx/Caddy) +- **Admin Routes**: Stricter limits +- **API Routes**: Moderate limits +- **Public Routes**: Permissive limits + +### File System Security + +#### Atomic Operations +```python +# Write to temp file, then atomic rename +temp_path = f"{target_path}.tmp" +with open(temp_path, 'w') as f: + f.write(content) +os.rename(temp_path, target_path) # Atomic on POSIX +``` + +#### Path Validation +```python +# Prevent directory traversal +base_path = os.path.abspath(DATA_PATH) +requested_path = os.path.abspath(os.path.join(base_path, user_input)) +if not requested_path.startswith(base_path): + raise SecurityError("Path traversal detected") +``` + +#### File Permissions +- **Data Directory**: 700 (owner only) +- **Database File**: 600 (owner read/write) +- **Note Files**: 600 (owner read/write) +- **Application User**: Dedicated non-root user + +## Performance Considerations + +### Response Time Targets +- **API Responses**: < 100ms (database + file read) +- **Page Renders**: < 200ms (template rendering) +- **RSS Feed**: < 300ms (query + file reads + XML generation) + +### Optimization Strategies + +#### Database +- **Indexes**: On frequently queried columns (created_at, slug, published) +- **Connection Pooling**: Single connection (single-user, no contention) +- **Query Optimization**: SELECT only needed columns +- **Prepared Statements**: Reuse compiled queries + +#### File System +- **Caching**: Consider caching rendered HTML in memory (optional) +- **Directory Structure**: Year/Month prevents large directories +- **Atomic Reads**: Fast sequential reads, no locking needed + +#### HTTP +- **Static Assets**: Cache headers on CSS/JS (1 year) +- **RSS Feed**: Cache for 5 minutes (Cache-Control) +- **Compression**: gzip/brotli via reverse proxy +- **ETags**: For conditional requests + +#### Rendering +- **Template Compilation**: Jinja2 compiles templates automatically +- **Minimal Templating**: Simple templates render fast +- **Server-Side**: No client-side rendering overhead + +### Resource Usage + +#### Memory +- **Flask Process**: ~50MB base +- **SQLite**: ~10MB typical working set +- **Total**: < 100MB under normal load + +#### Disk +- **Application**: ~5MB (code + dependencies) +- **Database**: ~1MB per 1000 notes +- **Notes**: ~5KB average per markdown file +- **Total**: Scales linearly with note count + +#### CPU +- **Idle**: Near zero +- **Request Handling**: Minimal (no heavy processing) +- **Markdown Rendering**: Fast (pure Python) +- **Database Queries**: Indexed, sub-millisecond + +## Deployment Architecture + +### Single-Server Deployment + +``` +┌─────────────────────────────────────────────────┐ +│ Internet │ +└────────────────┬────────────────────────────────┘ + │ + │ Port 443 (HTTPS) + ↓ +┌─────────────────────────────────────────────────┐ +│ Nginx/Caddy (Reverse Proxy) │ +│ - SSL/TLS termination │ +│ - Static file serving │ +│ - Rate limiting │ +│ - Compression │ +└────────────────┬────────────────────────────────┘ + │ + │ Port 8000 (HTTP) + ↓ +┌─────────────────────────────────────────────────┐ +│ Gunicorn (WSGI Server) │ +│ - 4 worker processes │ +│ - Process management │ +│ - Load balancing (round-robin) │ +└────────────────┬────────────────────────────────┘ + │ + │ WSGI + ↓ +┌─────────────────────────────────────────────────┐ +│ Flask Application │ +│ - Request handling │ +│ - Business logic │ +│ - Template rendering │ +└────────────────┬────────────────────────────────┘ + │ + ↓ +┌────────────────────────────┬────────────────────┐ +│ File System │ SQLite Database │ +│ data/notes/ │ data/starpunk.db │ +│ YYYY/MM/slug.md │ │ +└────────────────────────────┴────────────────────┘ +``` + +### Process Management (systemd) + +```ini +[Unit] +Description=StarPunk CMS +After=network.target + +[Service] +Type=notify +User=starpunk +WorkingDirectory=/opt/starpunk +Environment="PATH=/opt/starpunk/venv/bin" +ExecStart=/opt/starpunk/venv/bin/gunicorn -w 4 -b 127.0.0.1:8000 app:app +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target +``` + +### Backup Strategy + +#### Automated Daily Backup +```bash +#!/bin/bash +# backup.sh - Run daily via cron + +DATE=$(date +%Y%m%d) +BACKUP_DIR="/backup/starpunk" + +# Backup data directory (notes + database) +rsync -av /opt/starpunk/data/ "$BACKUP_DIR/$DATE/" + +# Keep last 30 days +find "$BACKUP_DIR" -maxdepth 1 -type d -mtime +30 -exec rm -rf {} \; +``` + +#### Manual Backup +```bash +# Simple copy +cp -r /opt/starpunk/data /backup/starpunk-$(date +%Y%m%d) + +# Or with compression +tar -czf starpunk-backup-$(date +%Y%m%d).tar.gz /opt/starpunk/data +``` + +### Restore Process + +1. Stop application: `sudo systemctl stop starpunk` +2. Restore data directory: `rsync -av /backup/starpunk/20241118/ /opt/starpunk/data/` +3. Fix permissions: `chown -R starpunk:starpunk /opt/starpunk/data` +4. Start application: `sudo systemctl start starpunk` +5. Verify: Visit site, check recent notes + +## Testing Strategy + +### Test Pyramid + +``` + ┌─────────────┐ + / \ + / Manual Tests \ Validation, Real Services + /───────────────── \ + / \ + / Integration Tests \ API Flows, Database + Files + /─────────────────────── \ + / \ + / Unit Tests \ Functions, Logic, Parsing + /───────────────────────────────\ +``` + +### Unit Tests (pytest) +**Coverage**: Business logic, utilities, models +**Examples**: +- Slug generation and uniqueness +- Markdown rendering with various inputs +- Content hash calculation +- File path validation +- Token generation and verification +- Date formatting for RSS +- Micropub payload parsing + +### Integration Tests +**Coverage**: Component interactions, full flows +**Examples**: +- Create note: file write + database insert +- Read note: database query + file read +- IndieLogin flow with mocked API +- Micropub creation with token validation +- RSS feed generation with multiple notes +- Session authentication on protected routes + +### End-to-End Tests +**Coverage**: Full user workflows +**Examples**: +- Admin login via IndieLogin (mocked) +- Create note via web interface +- Publish note via Micropub client (mocked) +- View note on public site +- Verify RSS feed includes note + +### Validation Tests +**Coverage**: Standards compliance +**Tools**: +- W3C HTML Validator (validate templates) +- W3C Feed Validator (validate RSS output) +- IndieWebify.me (verify microformats) +- Micropub.rocks (test Micropub compliance) + +### Manual Tests +**Coverage**: Real-world usage +**Examples**: +- Authenticate with real indielogin.com +- Publish from actual Micropub client (Quill, Indigenous) +- Subscribe to feed in actual RSS reader +- Browser compatibility (Chrome, Firefox, Safari, mobile) +- Accessibility with screen reader + +## Monitoring and Observability + +### Logging Strategy + +#### Application Logs +```python +# Structured logging +import logging + +logger = logging.getLogger(__name__) + +# Info: Normal operations +logger.info("Note created", extra={ + "slug": slug, + "published": published, + "user": session.me +}) + +# Warning: Recoverable issues +logger.warning("State token expired", extra={ + "state": state, + "age": age_seconds +}) + +# Error: Failed operations +logger.error("File write failed", extra={ + "path": file_path, + "error": str(e) +}) +``` + +#### Log Levels +- **DEBUG**: Development only (verbose) +- **INFO**: Normal operations (note creation, auth success) +- **WARNING**: Unusual but handled (expired tokens, invalid input) +- **ERROR**: Failed operations (file I/O errors, database errors) +- **CRITICAL**: System failures (database unreachable) + +#### Log Destinations +- **Development**: Console (stdout) +- **Production**: File rotation (logrotate) + optional syslog + +### Metrics (Optional for V2) + +**Simple Metrics** (if desired): +- Note count (query database) +- Request count (nginx logs) +- Error rate (grep application logs) +- Response times (nginx logs) + +**Advanced Metrics** (V2): +- Prometheus exporter +- Grafana dashboard +- Alert on error rate spike + +### Health Checks + +```python +@app.route('/health') +def health_check(): + """Simple health check for monitoring""" + try: + # Check database + db.execute("SELECT 1").fetchone() + + # Check file system + os.path.exists(DATA_PATH) + + return {"status": "ok"}, 200 + except Exception as e: + return {"status": "error", "detail": str(e)}, 500 +``` + +## Migration and Evolution + +### V1 to V2 Migration + +#### Database Schema Changes +```sql +-- Add new column with default +ALTER TABLE notes ADD COLUMN tags TEXT DEFAULT ''; + +-- Create new table +CREATE TABLE tags ( + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL +); + +-- Migration script updates existing notes +``` + +#### File Format Evolution +**V1**: Pure markdown +**V2** (if needed): Add optional frontmatter +```markdown +--- +tags: indieweb, cms +--- +Note content here +``` + +**Backward Compatibility**: Parser checks for frontmatter, falls back to pure markdown. + +#### API Versioning +``` +# V1 (current) +GET /api/notes + +# V2 (future) +GET /api/v2/notes # New features +GET /api/notes # Still works, returns V1 response +``` + +### Data Export/Import + +#### Export Formats +1. **Markdown Bundle**: Zip of all notes (already portable) +2. **JSON Export**: Notes + metadata + ```json + { + "version": "1.0", + "exported_at": "2024-11-18T12:00:00Z", + "notes": [ + { + "slug": "my-note", + "content": "Note content...", + "created_at": "2024-11-01T12:00:00Z", + "published": true + } + ] + } + ``` +3. **RSS Archive**: Existing feed.xml + +#### Import (V2) +- From JSON export +- From WordPress XML +- From markdown directory +- From other IndieWeb CMSs + +## Success Metrics + +The architecture is successful if it enables: + +1. **Fast Development**: < 1 week to implement V1 +2. **Easy Deployment**: < 5 minutes to get running +3. **Low Maintenance**: Runs for months without intervention +4. **High Performance**: All responses < 300ms +5. **Data Ownership**: User has direct access to all content +6. **Standards Compliance**: Passes all validators +7. **Extensibility**: Can add V2 features without rewrite + +## References + +### Internal Documentation +- [Technology Stack](/home/phil/Projects/starpunk/docs/architecture/technology-stack.md) +- [ADR-001: Python Web Framework](/home/phil/Projects/starpunk/docs/decisions/ADR-001-python-web-framework.md) +- [ADR-002: Flask Extensions](/home/phil/Projects/starpunk/docs/decisions/ADR-002-flask-extensions.md) +- [ADR-003: Frontend Technology](/home/phil/Projects/starpunk/docs/decisions/ADR-003-frontend-technology.md) +- [ADR-004: File-Based Storage](/home/phil/Projects/starpunk/docs/decisions/ADR-004-file-based-note-storage.md) +- [ADR-005: IndieLogin Authentication](/home/phil/Projects/starpunk/docs/decisions/ADR-005-indielogin-authentication.md) + +### External Standards +- [IndieWeb](https://indieweb.org/) +- [IndieAuth Spec](https://indieauth.spec.indieweb.org/) +- [Micropub Spec](https://micropub.spec.indieweb.org/) +- [Microformats2](http://microformats.org/wiki/h-entry) +- [RSS 2.0](https://www.rssboard.org/rss-specification) +- [Flask Documentation](https://flask.palletsprojects.com/) diff --git a/docs/architecture/technology-stack.md b/docs/architecture/technology-stack.md new file mode 100644 index 0000000..8e44115 --- /dev/null +++ b/docs/architecture/technology-stack.md @@ -0,0 +1,1082 @@ +# StarPunk Technology Stack + +## Project Summary + +StarPunk is a minimal, single-user IndieWeb CMS for publishing notes with RSS syndication. The project emphasizes radical simplicity, standards compliance, and user data ownership. Every technology choice is driven by the principle: "Every line of code must justify its existence. When in doubt, leave it out." + +### Core Requirements +- Publish IndieWeb-compatible notes (https://indieweb.org/note) +- IndieAuth authentication using external provider (indielogin.com) +- Micropub server endpoint for publishing from any client +- RSS feed generation for syndication +- File-based note storage for maximum portability +- SQLite for metadata and structured data +- Self-hostable single-user system +- API-first architecture +- Markdown support + +## Complete Technology Stack + +### Backend Stack + +#### Web Framework: Flask 3.0+ +**Purpose**: HTTP server, routing, templating +**Justification**: +- Minimal micro-framework (< 1000 lines core code) +- Perfect for single-user applications +- Native support for both JSON APIs and HTML rendering +- Mature, stable, well-documented (13+ years) +- Built-in Jinja2 templating for server-side rendering +- Standard WSGI interface for deployment flexibility + +**Alternatives Rejected**: +- FastAPI: Async complexity unnecessary for single-user CMS +- Django: Massive framework with ORM, admin, multi-user features we don't need +- Bottle: Too minimal, smaller ecosystem + +**Reference**: ADR-001 + +#### Python Version: 3.11+ +**Purpose**: Programming language +**Justification**: +- User's preferred language +- Excellent standard library (sqlite3, hashlib, secrets, etc.) +- Rich ecosystem for web development +- Strong typing support (type hints) +- Mature dependency management (pip, venv) + +#### Data Persistence: Hybrid File + Database + +##### Note Storage: Markdown Files on Disk +**Purpose**: Store note content +**Format**: Plain markdown files (.md) +**Structure**: +``` +data/notes/ +├── 2024/ +│ ├── 11/ +│ │ ├── my-first-note.md +│ │ └── another-note.md +│ └── 12/ +│ └── december-note.md +└── 2025/ + └── 01/ + └── new-year-note.md +``` + +**Naming Convention**: `{slug}.md` +**Organization**: Year/Month subdirectories (`YYYY/MM/`) +**File Format**: Pure markdown, no frontmatter + +**Justification**: +- Maximum portability (user requirement) +- Human-readable, editable in any text editor +- Easy backup (cp, rsync, git) +- User owns data directly +- No vendor lock-in +- Future-proof format + +**Reference**: ADR-004 + +##### Metadata Storage: SQLite +**Purpose**: Store note metadata, sessions, tokens +**Database**: `data/starpunk.db` + +**Schema**: +```sql +-- Note metadata (NOT content) +CREATE TABLE notes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + slug TEXT UNIQUE NOT NULL, + file_path TEXT UNIQUE NOT NULL, + published BOOLEAN DEFAULT 0, + created_at TIMESTAMP NOT NULL, + updated_at TIMESTAMP NOT NULL, + content_hash TEXT +); + +-- Authentication sessions (IndieLogin) +CREATE TABLE sessions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_token TEXT UNIQUE NOT NULL, + me TEXT NOT NULL, + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP NOT NULL, + last_used_at TIMESTAMP +); + +-- Micropub access tokens +CREATE TABLE tokens ( + token TEXT PRIMARY KEY, + me TEXT NOT NULL, + client_id TEXT, + scope TEXT, + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP +); + +-- CSRF state tokens +CREATE TABLE auth_state ( + state TEXT PRIMARY KEY, + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP NOT NULL +); +``` + +**Justification**: +- Single file, perfect for single-user +- No separate server process +- Excellent for read-heavy workloads (blog) +- Fast indexing and querying +- Built into Python standard library +- Enables efficient metadata queries without parsing files +- Atomic transactions for data integrity + +**Hybrid Strategy**: Files are authoritative for content; database is authoritative for metadata. This gives us portability AND performance. + +**Reference**: ADR-004 + +#### Core Dependencies + +##### markdown (3.5+) +**Purpose**: Convert markdown to HTML +**Usage**: Render note content for display and RSS feed +**Justification**: +- Pure Python, standard markdown implementation +- Simple API: `markdown.markdown(text)` +- Sufficient performance for single-user system +- More standard than alternatives (mistune) + +##### feedgen (1.0+) +**Purpose**: Generate RSS 2.0 feeds +**Usage**: Create valid RSS feed from published notes +**Justification**: +- High-level API ensures RSS 2.0 compliance +- Handles date formatting (RFC-822) automatically +- CDATA wrapping for HTML content +- Better than manual XML generation (error-prone) + +##### httpx (0.27+) +**Purpose**: HTTP client library +**Usage**: +- Communication with indielogin.com API +- Verify Micropub client metadata +- Fetch remote URLs for verification +**Justification**: +- Modern, clean API +- Synchronous and async support +- Better than requests (async capability) and urllib (too low-level) +- Proper timeout handling +- SSL verification built-in + +##### python-dotenv (1.0+) +**Purpose**: Environment configuration +**Usage**: Load settings from `.env` file +**Justification**: +- Industry standard for configuration +- Keeps secrets out of code +- Simple API: `load_dotenv()` +- Minimal overhead + +##### pytest (8.0+) +**Purpose**: Testing framework +**Usage**: Unit and integration tests +**Justification**: +- Current Python testing standard +- Minimal boilerplate +- Clear assertions +- Built-in fixtures +- Better than unittest (verbose) and nose2 (unmaintained) + +**Reference**: ADR-002 + +#### Dependencies Explicitly REJECTED + +- **Flask-SQLAlchemy**: ORM abstraction unnecessary, adds complexity +- **Flask-Login**: Session-based auth, we need token-based for Micropub +- **Flask-CORS**: Single decorator, don't need full extension (5 lines of code) +- **Flask-WTF**: Form library overkill for simple note creation +- **Flask-Limiter**: Rate limiting deferred to V2 or reverse proxy + +**Decision**: Use Python standard library and explicit code instead of extensions where possible. Each dependency must justify its existence. + +**Reference**: ADR-002 + +### Frontend Stack + +#### Template Engine: Jinja2 +**Purpose**: Server-side HTML rendering +**Included With**: Flask (no additional dependency) +**Usage**: +- Public interface (homepage, note permalinks) +- Admin interface (dashboard, note editor) +- Microformats markup (h-entry, h-card) + +**Justification**: +- Zero build process +- Server-side rendering for better performance +- Works without JavaScript (progressive enhancement) +- Easy microformats implementation +- Familiar syntax +- Stable and mature + +**Reference**: ADR-003 + +#### CSS: Custom Stylesheet +**Purpose**: Visual styling +**Approach**: Single custom CSS file, no framework +**File**: `static/css/style.css` +**Size**: ~200 lines for entire site + +**Features**: +- CSS custom properties (variables) for theming +- Mobile-first responsive design +- Simple media queries for tablet/desktop +- Semantic HTML5 + minimal classes + +**Justification**: +- No framework overhead (Bootstrap, Tailwind, etc.) +- No build tools required +- Full control over appearance +- Minimal single theme fits project scope +- Faster than loading framework CSS + +**Example**: +```css +:root { + --color-text: #333; + --color-bg: #fff; + --max-width: 42rem; + --spacing: 1rem; +} +``` + +**Frameworks Rejected**: +- Tailwind: Requires build process, utility-first doesn't fit +- Bootstrap/Bulma: Too many unused features +- PicoCSS: Good but custom CSS gives more control + +**Reference**: ADR-003 + +#### JavaScript: Minimal Vanilla JS +**Purpose**: Markdown preview in admin (optional enhancement) +**Approach**: Single vanilla JavaScript file, no framework +**File**: `static/js/preview.js` +**Dependency**: marked.js via CDN (client-side markdown) + +**Usage**: +- Optional real-time markdown preview in note editor +- Progressive enhancement (works without JS) + +**Justification**: +- Core functionality works without JavaScript +- Single optional feature doesn't justify framework +- Vanilla JS sufficient for simple preview +- Modern browser APIs (fetch, DOM manipulation) are enough +- No build tools required + +**Frameworks Rejected**: +- React/Vue/Svelte: Massive overkill for one preview feature +- htmx: Interesting but not needed for V1 +- Alpine.js: Too much for minimal JS needs + +**Reference**: ADR-003 + +#### Build Tools: NONE +**Decision**: No build process whatsoever +**Justification**: +- Server-side rendering eliminates need for bundling +- Custom CSS served directly +- Vanilla JS served directly +- Modern browsers support ES6+ natively +- Zero npm dependencies +- Instant development setup + +**This means**: +- No webpack, Vite, Rollup, esbuild +- No Babel transpilation +- No PostCSS processing +- No minification (premature optimization) +- No asset pipeline + +**Reference**: ADR-003 + +### Authentication Stack + +#### Admin Authentication: IndieLogin.com +**Purpose**: Authenticate the admin user via their personal website +**Provider**: External service at https://indielogin.com +**API**: https://indielogin.com/api +**Protocol**: OAuth 2.0 / IndieAuth + +**Flow**: +1. User enters their website URL +2. StarPunk redirects to indielogin.com with state token +3. indielogin.com verifies user's identity (RelMeAuth, email, etc.) +4. indielogin.com redirects back with authorization code +5. StarPunk exchanges code for verified identity +6. StarPunk creates session cookie + +**Session Management**: +- HttpOnly, Secure cookies +- 30-day expiry +- Stored in SQLite sessions table +- CSRF protection via state tokens + +**Configuration**: +```bash +ADMIN_ME=https://your-website.com # Only this URL can authenticate +SESSION_SECRET=random-secret-key +``` + +**Justification**: +- Extremely simple (< 100 lines of code) +- No authentication code to maintain +- No password management needed +- True IndieWeb authentication (user owns identity) +- Secure by default (delegated to trusted service) +- Community-maintained, stable service + +**Alternatives Rejected**: +- Self-hosted IndieAuth: Too complex for V1 +- Password auth: Not IndieWeb-compatible, security burden +- OAuth (GitHub/Google): User doesn't own identity + +**Reference**: ADR-005 + +#### Micropub Authentication: IndieAuth Tokens +**Purpose**: Authenticate Micropub API clients +**Protocol**: IndieAuth bearer tokens +**Flow**: Standard IndieAuth authorization code grant +**Storage**: Tokens table in SQLite + +**Note**: Micropub token endpoint is separate from admin authentication. Users authenticate their Micropub clients (e.g., mobile apps) separately via IndieAuth flow. This will be detailed in a future ADR for Micropub implementation. + +### Development Tools + +#### Code Quality +``` +pytest-cov # Test coverage reporting +black # Code formatting (standard: 88 char line length) +flake8 # Linting +mypy # Type checking (optional but recommended) +``` + +**Justification**: +- Automated formatting prevents style debates +- Linting catches common errors +- Test coverage ensures quality +- Type hints improve maintainability + +#### Development Workflow +```bash +# Setup +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt + +# Run +flask run + +# Test +pytest + +# Format +black . +flake8 . +``` + +**No additional tools required**: No npm, no build scripts, no containers (optional for deployment). + +### Deployment Stack + +#### WSGI Server: Gunicorn (Production) +**Purpose**: Production HTTP server +**Justification**: +- Standard Python WSGI server +- Production-ready +- Better performance than Flask dev server +- Simple configuration + +**Alternative**: uWSGI (more complex, not needed for single-user) + +#### Reverse Proxy: Nginx or Caddy (Recommended) +**Purpose**: HTTPS termination, static file serving +**Justification**: +- Handle SSL/TLS certificates +- Serve static files efficiently +- Rate limiting (optional) +- Proven deployment pattern + +#### Process Manager: systemd (Recommended) +**Purpose**: Keep application running +**Justification**: +- Standard on modern Linux +- Auto-restart on failure +- Log management + +#### Deployment Package: Single Unit +**Structure**: +``` +starpunk/ +├── app.py # Main application +├── requirements.txt # Dependencies +├── .env.example # Configuration template +├── static/ # CSS, JS +├── templates/ # Jinja2 templates +├── data/ # Notes + SQLite (persistent) +│ ├── notes/ +│ └── starpunk.db +└── README.md # Setup instructions +``` + +**Deployment**: +- Clone repository +- Create virtual environment +- Install dependencies +- Configure .env file +- Run with Gunicorn + systemd +- (Optional) Nginx for HTTPS + +**Justification**: Single self-contained package, easy to deploy and backup. + +## File Organization + +### Project Structure +``` +starpunk/ +├── app.py # Main Flask application +├── requirements.txt # Python dependencies +├── .env # Environment configuration (gitignored) +├── .env.example # Configuration template +├── README.md # Setup documentation +├── CLAUDE.MD # Project requirements +│ +├── starpunk/ # Application package +│ ├── __init__.py +│ ├── config.py # Configuration loading +│ ├── database.py # SQLite operations +│ ├── models.py # Data models +│ ├── auth.py # Authentication logic +│ ├── micropub.py # Micropub endpoint +│ ├── feed.py # RSS generation +│ └── utils.py # Helper functions +│ +├── static/ # Static assets +│ ├── css/ +│ │ └── style.css # Single stylesheet +│ └── js/ +│ └── preview.js # Optional markdown preview +│ +├── templates/ # Jinja2 templates +│ ├── base.html # Base layout +│ ├── index.html # Homepage (note list) +│ ├── note.html # Single note +│ ├── feed.xml # RSS template +│ └── admin/ +│ ├── base.html # Admin layout +│ ├── login.html # Login form +│ ├── dashboard.html # Admin dashboard +│ ├── new.html # Create note +│ └── edit.html # Edit note +│ +├── data/ # Persistent data (gitignored) +│ ├── notes/ # Markdown files +│ │ └── YYYY/MM/ +│ │ └── slug.md +│ └── starpunk.db # SQLite database +│ +├── tests/ # Test suite +│ ├── test_auth.py +│ ├── test_database.py +│ ├── test_micropub.py +│ ├── test_feed.py +│ └── test_notes.py +│ +└── docs/ # Architecture documentation + ├── architecture/ + │ ├── overview.md + │ ├── components.md + │ ├── data-flow.md + │ ├── security.md + │ └── technology-stack.md + └── decisions/ + ├── ADR-001-python-web-framework.md + ├── ADR-002-flask-extensions.md + ├── ADR-003-frontend-technology.md + ├── ADR-004-file-based-note-storage.md + └── ADR-005-indielogin-authentication.md +``` + +## Architecture Patterns + +### API-First Design +**Pattern**: All functionality exposed via API, web interface consumes API +**Routes**: +``` +# Public API +GET /api/notes # List published notes +GET /api/notes/{slug} # Get single note +GET /feed.xml # RSS feed + +# Admin API (session auth) +POST /api/notes # Create note +PUT /api/notes/{id} # Update note +DELETE /api/notes/{id} # Delete note + +# Micropub API (token auth) +POST /api/micropub # Create via Micropub +GET /api/micropub?q=config # Query config + +# Auth API +GET /admin/login # Login form +POST /admin/login # Initiate IndieLogin +GET /auth/callback # IndieLogin callback +POST /admin/logout # Logout +``` + +### Data Flow: File + Database Sync + +#### Creating a Note +``` +User submits note + ↓ +Generate slug + ↓ +Create file: data/notes/YYYY/MM/{slug}.md + ↓ +Calculate content hash + ↓ +Insert database record (slug, file_path, hash, timestamps) + ↓ +If database insert fails: delete file, return error + ↓ +Return success +``` + +#### Reading a Note +``` +Request note by slug + ↓ +Query database for file_path + ↓ +Read markdown from file + ↓ +Render to HTML (if needed) + ↓ +Return content + metadata +``` + +#### Updating a Note +``` +User submits changes + ↓ +Atomic write: new content to temp file + ↓ +Calculate new hash + ↓ +Update database (timestamp, hash) + ↓ +If database update succeeds: atomic rename temp → actual + ↓ +If database update fails: delete temp, return error + ↓ +Return success +``` + +**Benefits**: +- Files provide portability +- Database provides fast queries +- Content hash detects external changes +- Atomic operations prevent corruption + +**Reference**: ADR-004 + +### IndieLogin Authentication Flow + +``` +┌─────────┐ ┌──────────┐ ┌─────────────┐ +│ User │ │ StarPunk │ │ IndieLogin │ +└────┬────┘ └────┬─────┘ └──────┬──────┘ + │ │ │ + │ 1. Click "Login" │ │ + ├─────────────────────────>│ │ + │ │ │ + │ 2. Enter website URL │ │ + ├─────────────────────────>│ │ + │ │ │ + │ 3. Generate state token │ + │ │ │ + │ 4. Redirect to IndieLogin with: │ + │ - me=user_website │ + │ - client_id=starpunk_url │ + │ - redirect_uri=starpunk/callback │ + │ - state=random_token │ + │ ├──────────────────────────>│ + │ │ │ + │ │ 5. Verify user's │ + │ │ identity │ + │ <────────────────────────────────────────────────── │ + │ (User authenticates via │ + │ chosen method) │ + │ ──────────────────────────────────────────────────> │ + │ │ │ + │ 6. Redirect back with code + state │ + │ <──────────────────────────────────────────────────│ + ├─────────────────────────>│ │ + │ │ │ + │ 7. Verify state │ + │ │ │ + │ 8. POST to IndieLogin: │ + │ - code │ + │ - client_id │ + │ - redirect_uri │ + │ ├──────────────────────────>│ + │ │ │ + │ │ 9. Return verified "me" │ + │ │<──────────────────────────│ + │ │ │ + │ 10. Verify me == ADMIN_ME │ + │ │ │ + │ 11. Create session │ + │ │ │ + │ 12. Set session cookie │ │ + │ <───────────────────────│ │ + │ │ │ + │ 13. Redirect to admin │ │ + │ <───────────────────────│ │ + │ │ │ +``` + +**Security Features**: +- State token prevents CSRF +- Session tokens are cryptographically random +- HttpOnly cookies prevent XSS +- Only ADMIN_ME URL can authenticate +- Sessions expire after 30 days + +**Reference**: ADR-005 + +### Progressive Enhancement Pattern + +**Principle**: Core functionality works without JavaScript + +#### Public Interface +- **Without JS**: Full functionality (view notes, RSS feed) +- **With JS**: No difference (no JS used on public pages) + +#### Admin Interface +- **Without JS**: + - Create/edit notes via HTML forms + - Submit to server, server renders markdown + - Full page refresh on submit +- **With JS**: + - Real-time markdown preview + - No page refresh for preview + - Still submits via form (progressive enhancement) + +**Implementation**: +```html + +
+ + +
+ + + +``` + +**Reference**: ADR-003 + +## Standards Compliance + +### IndieWeb Standards + +#### Microformats2 +**Required Classes**: +- `h-entry`: Mark up notes +- `h-card`: Mark up author information +- `e-content`: Note content +- `dt-published`: Publication timestamp +- `u-url`: Permalink URL + +**Example**: +```html + +``` + +**Validation**: https://indiewebify.me/ + +#### IndieAuth +**Compliance**: OAuth 2.0 authorization code flow +**Endpoints**: Delegated to indielogin.com +**Token Format**: Bearer tokens +**Validation**: Token introspection + +**Reference**: https://indieauth.spec.indieweb.org/ + +#### Micropub +**Compliance**: Full Micropub spec support +**Content Types**: JSON and form-encoded +**Required Responses**: 201 Created with Location header +**Query Support**: q=config, q=source + +**Validation**: https://micropub.rocks/ + +**Reference**: https://micropub.spec.indieweb.org/ + +### Web Standards + +#### RSS 2.0 +**Compliance**: Valid RSS 2.0 XML +**Required Elements**: title, link, description, pubDate, guid +**Date Format**: RFC-822 +**HTML Content**: CDATA-wrapped + +**Validation**: https://validator.w3.org/feed/ + +#### HTTP +**Status Codes**: Proper use of 200, 201, 400, 401, 404, 500 +**Headers**: Content-Type, Cache-Control, Location +**Methods**: GET, POST, PUT, DELETE +**CORS**: Allow cross-origin for API endpoints + +#### HTML5 +**Compliance**: Valid semantic HTML +**Accessibility**: ARIA labels, alt text, proper heading hierarchy +**Responsive**: Viewport meta tag, mobile-first CSS +**Forms**: Proper labels, validation attributes + +**Validation**: https://validator.w3.org/ + +## Security Architecture + +### Authentication Security +- State tokens for CSRF protection (5-minute expiry) +- Session tokens are cryptographically random (32 bytes) +- HttpOnly cookies prevent XSS theft +- Secure flag requires HTTPS +- SameSite=Lax prevents CSRF +- Single admin user (ADMIN_ME verification) + +### Input Validation +- Validate all user input +- Sanitize markdown (prevent XSS in rendered HTML) +- Validate Micropub payloads against spec +- URL validation for IndieAuth +- File path validation (prevent directory traversal) + +### Database Security +- Parameterized queries (prevent SQL injection) +- Input sanitization before storage +- Hash session tokens before storage +- Content hashing for integrity + +### Network Security +- HTTPS required in production +- SSL certificate verification on httpx requests +- Secure headers (CSP, X-Frame-Options, etc.) +- Rate limiting via reverse proxy (nginx/Caddy) + +### File System Security +- Atomic file operations +- Restricted permissions on data/ directory +- Prevent directory traversal attacks +- Validate file paths before operations + +## Performance Targets + +### Response Times +- API responses: < 100ms +- Page loads: < 200ms +- RSS feed: < 300ms + +### Optimization Strategy +- SQLite indexes on frequently queried columns +- Cache RSS feed (5 minutes) +- Minimal dependencies = fast startup +- Server-side rendering = fast first paint +- Single CSS file = one request +- Optional JS = doesn't block rendering + +### Resource Usage +- Memory: < 100MB for typical workload +- Disk: Minimal (SQLite + markdown files) +- CPU: Minimal (no heavy processing) + +**Scaling**: Designed for single-user, typical load is <10 requests/minute. Over-engineering for scale would violate simplicity principle. + +## Testing Strategy + +### Unit Tests (pytest) +- Database operations (CRUD, queries) +- Slug generation and validation +- Markdown rendering +- File operations (atomic writes) +- Session management +- Token validation +- Content hash calculation + +### Integration Tests +- IndieLogin authentication flow (mocked API) +- Micropub note creation (full flow) +- RSS feed generation (validation) +- API endpoints (request/response) +- File + database sync +- Error handling + +### Manual Tests +- Real IndieLogin authentication +- Micropub client integration (e.g., Quill) +- RSS feed in actual reader +- Browser compatibility +- Mobile responsiveness +- Accessibility (screen readers) + +### Validation Tests +- HTML validation (W3C validator) +- RSS validation (W3C feed validator) +- Microformats validation (indiewebify.me) +- Micropub compliance (micropub.rocks) + +## Risk Assessment + +### Technical Risks + +#### Risk: IndieLogin.com Outage +**Impact**: Cannot authenticate new sessions +**Likelihood**: Low (stable service) +**Mitigation**: +- Sessions last 30 days (brief outages don't lock out user) +- Document manual session creation in database +- V2: Add fallback authentication method + +#### Risk: File/Database Sync Failure +**Impact**: Data inconsistency +**Likelihood**: Low (atomic operations, error handling) +**Mitigation**: +- Write files first, database second +- Transaction rollback on failure +- Integrity check on startup (optional) +- Regular backups + +#### Risk: File System Corruption +**Impact**: Lost notes +**Likelihood**: Very low (standard filesystem operations) +**Mitigation**: +- Atomic file writes +- Regular backups (user responsibility) +- Markdown files are recoverable + +#### Risk: Dependency Vulnerabilities +**Impact**: Security breach +**Likelihood**: Medium (all software has bugs) +**Mitigation**: +- Minimal dependencies (6 direct) +- All dependencies are mature, maintained +- Regular updates +- Security scanning (optional) + +### Operational Risks + +#### Risk: User Misconfiguration +**Impact**: Application doesn't work +**Likelihood**: Medium (manual setup required) +**Mitigation**: +- Clear documentation +- .env.example with all settings +- Validation on startup +- Helpful error messages + +#### Risk: Backup Neglect +**Impact**: Data loss +**Likelihood**: Medium (user responsibility) +**Mitigation**: +- Document backup procedures +- Make backup easy (copy data/ folder) +- Consider automated backup scripts (V2) + +## Migration and Future Considerations + +### V1 to V2 Migration Path +- Add features without breaking existing data +- Markdown files remain compatible +- Database schema migrations (ALTER TABLE) +- Backward compatible API changes + +### Potential V2 Enhancements +- Webmentions support +- Media uploads (photos) +- Additional post types (articles, replies) +- Full-text search (SQLite FTS) +- Automated backups +- Self-hosted IndieAuth option +- Multiple IndieAuth providers +- Draft/scheduled posts +- Tags and categories +- Import/export tools + +### Data Portability Strategy +**Export Formats**: +- Markdown files (already portable) +- JSON export (notes + metadata) +- RSS feed (existing notes) +- HTML archive (static site generator) + +**Import Strategy** (V2): +- From other blogging platforms +- From JSON backup +- From markdown directories + +## Success Criteria + +The technology stack is successful if: + +1. **User can publish notes from any Micropub client** ✓ + - Protocol: Micropub over HTTP + - Auth: IndieAuth tokens + - Format: Stored as markdown files + +2. **Notes appear in RSS readers immediately** ✓ + - Format: Valid RSS 2.0 + - Generator: feedgen library + - Caching: 5 minutes + +3. **System runs on minimal resources** ✓ + - Stack: Flask + SQLite (single process) + - Memory: < 100MB + - Dependencies: 6 direct + +4. **Code is readable and maintainable** ✓ + - Language: Python (user's preference) + - Framework: Flask (minimal, clear) + - Style: black formatting, type hints + +5. **All IndieWeb validators pass** ✓ + - Microformats: Server-side templating makes this easy + - IndieAuth: Delegated to indielogin.com + - Micropub: Spec-compliant implementation + +6. **Setup takes less than 5 minutes** ✓ + - Steps: Clone, venv, pip install, configure .env, run + - No build process + - No complex dependencies + +7. **System runs for months without intervention** ✓ + - Architecture: Stateless application + - Persistence: SQLite (reliable) + - Auth: Long-lived sessions (30 days) + +## Quick Start Guide + +### Development Setup +```bash +# Clone repository +git clone && cd starpunk + +# Create virtual environment +python -m venv venv +source venv/bin/activate # or `venv\Scripts\activate` on Windows + +# Install dependencies +pip install -r requirements.txt + +# Configure +cp .env.example .env +# Edit .env: set SITE_URL, ADMIN_ME, SESSION_SECRET + +# Initialize database +flask db init + +# Run development server +flask run + +# Visit http://localhost:5000 +``` + +### Production Deployment +```bash +# Setup (same as development) +# ... + +# Install production server +pip install gunicorn + +# Run with Gunicorn +gunicorn -w 4 -b 127.0.0.1:8000 app:app + +# Configure nginx/Caddy for HTTPS +# Configure systemd for process management +# Set up regular backups of data/ directory +``` + +### Configuration Reference +```bash +# .env file +SITE_URL=https://starpunk.example.com # Your domain +ADMIN_ME=https://your-website.com # Your IndieWeb identity +SESSION_SECRET=random-secret-key # Generate with: python -c "import secrets; print(secrets.token_hex(32))" +DATA_PATH=./data # Where to store notes and database +``` + +## Summary + +StarPunk's technology stack achieves radical simplicity through careful technology selection: + +- **Backend**: Flask (micro-framework) + SQLite (embedded DB) + Python stdlib +- **Storage**: Markdown files (portability) + SQLite metadata (performance) +- **Frontend**: Jinja2 (SSR) + custom CSS (200 lines) + optional vanilla JS +- **Auth**: IndieLogin.com (external, zero maintenance) +- **Build**: None (zero build tools, zero npm) +- **Deploy**: Single package (Gunicorn + systemd + nginx) + +**Total Direct Dependencies**: 6 (Flask, markdown, feedgen, httpx, python-dotenv, pytest) + +**Lines of Code Estimate**: ~1500 LOC for complete V1 implementation + +**Setup Time**: < 5 minutes from clone to running + +This stack embodies the project philosophy: "Every line of code must justify its existence." Each technology choice prioritizes simplicity, standards compliance, and user data ownership over features and complexity. + +## References + +### Architecture Decision Records +- [ADR-001: Python Web Framework Selection](/home/phil/Projects/starpunk/docs/decisions/ADR-001-python-web-framework.md) +- [ADR-002: Flask Extensions and Dependencies](/home/phil/Projects/starpunk/docs/decisions/ADR-002-flask-extensions.md) +- [ADR-003: Front-end Technology Stack](/home/phil/Projects/starpunk/docs/decisions/ADR-003-frontend-technology.md) +- [ADR-004: File-Based Note Storage Architecture](/home/phil/Projects/starpunk/docs/decisions/ADR-004-file-based-note-storage.md) +- [ADR-005: IndieLogin Authentication Integration](/home/phil/Projects/starpunk/docs/decisions/ADR-005-indielogin-authentication.md) + +### Standards and Specifications +- IndieWeb: https://indieweb.org/ +- IndieAuth Spec: https://indieauth.spec.indieweb.org/ +- Micropub Spec: https://micropub.spec.indieweb.org/ +- Microformats2: http://microformats.org/wiki/h-entry +- RSS 2.0: https://www.rssboard.org/rss-specification +- CommonMark: https://spec.commonmark.org/ + +### Tools and Libraries +- Flask: https://flask.palletsprojects.com/ +- Jinja2: https://jinja.palletsprojects.com/ +- IndieLogin.com: https://indielogin.com/ +- Python Markdown: https://python-markdown.github.io/ +- feedgen: https://feedgen.kiesow.be/ +- httpx: https://www.python-httpx.org/ + +### Validation and Testing +- IndieWebify.me: https://indiewebify.me/ +- Micropub Rocks: https://micropub.rocks/ +- W3C Feed Validator: https://validator.w3.org/feed/ +- W3C HTML Validator: https://validator.w3.org/ diff --git a/docs/decisions/ADR-001-python-web-framework.md b/docs/decisions/ADR-001-python-web-framework.md new file mode 100644 index 0000000..ac38d17 --- /dev/null +++ b/docs/decisions/ADR-001-python-web-framework.md @@ -0,0 +1,97 @@ +# ADR-001: Python Web Framework Selection + +## Status +Accepted + +## Context +StarPunk requires a Python web framework to implement the API-first architecture with RESTful endpoints, Micropub support, IndieAuth integration, and web interface. The framework must support both API and server-side rendered HTML with minimal complexity. + +## Decision +Use **Flask** as the primary web framework. + +## Rationale + +### Simplicity Score: 9/10 +- Minimal boilerplate code required +- Explicit routing and request handling +- Easy to understand for newcomers +- Core framework is ~1000 lines of code +- Follows "micro-framework" philosophy aligned with StarPunk principles + +### Fitness Score: 10/10 +- Perfect for single-user applications +- Built-in development server +- Excellent template engine (Jinja2) for HTML generation +- Simple decorator-based routing +- Easy integration with SQLite +- Native support for both JSON APIs and HTML rendering +- Werkzeug provides robust HTTP utilities +- Blueprint support for code organization + +### Maintenance Score: 9/10 +- Extremely mature (13+ years) +- Large community and extensive documentation +- Stable API with minimal breaking changes +- Extensive ecosystem of well-tested extensions +- Active development and security updates + +### Standards Compliance: Pass +- Standard WSGI interface +- Full HTTP status code support +- Proper content-type handling +- Easy CORS implementation +- Session management built-in + +## Consequences + +### Positive +- Minimal learning curve +- Small dependency footprint +- Easy to test (built-in test client) +- Flexible enough for API-first architecture +- Can render HTML templates for public interface +- Easy deployment (WSGI compatible) + +### Negative +- No built-in ORM (but we're using raw SQLite, so this is actually positive) +- Requires manual selection of extensions +- Less opinionated than larger frameworks + +### Mitigation +- Extension selection will be minimal (see ADR-002 for extensions) +- Lack of opinion allows us to stay minimal +- Manual configuration gives us full control + +## Alternatives Considered + +### FastAPI (Rejected) +- **Simplicity**: 6/10 - Requires async/await understanding, Pydantic models +- **Fitness**: 7/10 - Overkill for single-user CMS, async not needed +- **Maintenance**: 8/10 - Newer framework, but growing +- **Verdict**: Too complex for project needs, async unnecessary + +### Django (Rejected) +- **Simplicity**: 3/10 - Large framework with heavy abstractions +- **Fitness**: 4/10 - Designed for multi-user applications, includes admin panel, ORM, and many features we don't need +- **Maintenance**: 10/10 - Excellent maintenance and security +- **Verdict**: Violates "minimal code" principle, too much unnecessary functionality + +### Bottle (Considered) +- **Simplicity**: 10/10 - Single file framework +- **Fitness**: 7/10 - Very minimal, but perhaps too minimal +- **Maintenance**: 6/10 - Smaller community, slower updates +- **Verdict**: Close second, but Flask has better ecosystem for IndieAuth/Micropub + +## Implementation Notes + +Flask will be used with: +- Jinja2 templates for HTML rendering (included with Flask) +- Werkzeug for HTTP utilities (included with Flask) +- Minimal extensions only (see ADR-002) +- Standard WSGI deployment +- Blueprint organization for clear separation of concerns + +## References +- Flask Documentation: https://flask.palletsprojects.com/ +- WSGI Specification: https://peps.python.org/pep-3333/ +- Flask Design Decisions: https://flask.palletsprojects.com/en/3.0.x/design/ diff --git a/docs/decisions/ADR-002-flask-extensions.md b/docs/decisions/ADR-002-flask-extensions.md new file mode 100644 index 0000000..997ba75 --- /dev/null +++ b/docs/decisions/ADR-002-flask-extensions.md @@ -0,0 +1,134 @@ +# ADR-002: Flask Extensions and Dependencies + +## Status +Accepted + +## Context +Flask is intentionally minimal. We need to select only essential extensions that align with the "minimal code" philosophy while supporting required functionality. + +## Decision +Use the following minimal set of dependencies: +- **Flask** - Core framework +- **markdown** - Markdown to HTML conversion +- **feedgen** - RSS feed generation +- **httpx** - HTTP client for IndieAuth verification +- **python-dotenv** - Environment configuration +- **pytest** - Testing framework + +**NO additional Flask extensions** will be used in V1. + +## Rationale + +### Core Dependencies + +#### markdown +- **Purpose**: Convert markdown notes to HTML +- **Simplicity**: Pure Python, simple API +- **Justification**: Core requirement for note rendering +- **Alternative**: mistune (faster but less standard) +- **Verdict**: markdown is more standard and sufficient for single-user + +#### feedgen +- **Purpose**: Generate valid RSS 2.0 feeds +- **Simplicity**: High-level API, handles all RSS requirements +- **Justification**: Ensures RSS 2.0 compliance without manual XML generation +- **Alternative**: Manual XML generation (error-prone) +- **Verdict**: feedgen guarantees valid RSS output + +#### httpx +- **Purpose**: HTTP client for IndieAuth endpoint verification +- **Simplicity**: Modern, clean API +- **Justification**: Need to verify IndieAuth endpoints and fetch client metadata +- **Alternative**: requests (synchronous only), urllib (too low-level) +- **Verdict**: httpx provides clean API and can be sync or async if needed later + +#### python-dotenv +- **Purpose**: Load environment variables from .env file +- **Simplicity**: Single-purpose, simple API +- **Justification**: Standard pattern for configuration management +- **Alternative**: Manual environment variable handling +- **Verdict**: Industry standard, minimal overhead + +#### pytest +- **Purpose**: Testing framework +- **Simplicity**: Minimal boilerplate, clear assertions +- **Justification**: Required for test coverage +- **Alternative**: unittest (more verbose), nose2 (unmaintained) +- **Verdict**: pytest is current Python testing standard + +### Extensions REJECTED for V1 + +#### Flask-SQLAlchemy (Rejected) +- **Reason**: Adds ORM abstraction we don't need +- **Decision**: Use sqlite3 standard library directly +- **Benefit**: Simpler code, explicit queries, no magic + +#### Flask-Login (Rejected) +- **Reason**: Session-based authentication, we need token-based +- **Decision**: Implement simple token validation ourselves +- **Benefit**: Full control over IndieAuth flow + +#### Flask-CORS (Rejected) +- **Reason**: Single function decorator, don't need extension +- **Decision**: Use @after_request decorator for CORS headers +- **Benefit**: 5 lines of code vs. another dependency + +#### Flask-Limiter (Rejected for V1) +- **Reason**: Rate limiting is nice-to-have, not critical for single-user +- **Decision**: Defer to V2 or rely on reverse proxy +- **Benefit**: Reduced complexity + +#### Flask-WTF (Rejected) +- **Reason**: Form handling for single form (note creation) is overkill +- **Decision**: Simple HTML forms with manual validation +- **Benefit**: No CSRF complexity in V1, manual validation is clear + +## Consequences + +### Positive +- Minimal dependency tree +- Full control over implementation +- Easy to understand codebase +- Fast installation and startup +- Reduced attack surface + +### Negative +- Must implement some features manually (token validation, CORS) +- No form CSRF protection in V1 (acceptable for single-user) +- Manual SQL queries required + +### Mitigation +- Document manual implementations clearly +- Ensure manual code is well-tested +- Keep manual implementations simple and obvious +- Plan to add CSRF in V2 if needed + +## Complete Dependency List + +``` +Flask==3.0.* +markdown==3.5.* +feedgen==1.0.* +httpx==0.27.* +python-dotenv==1.0.* +pytest==8.0.* +``` + +## Development Dependencies +``` +pytest-cov # Test coverage reporting +black # Code formatting +flake8 # Linting +``` + +## Standards Compliance +- All dependencies are pure Python or have minimal C extensions +- All are actively maintained with security updates +- All support Python 3.11+ +- Total dependency count: 6 direct dependencies (excluding dev tools) + +## References +- Flask Extensions: https://flask.palletsprojects.com/en/3.0.x/extensions/ +- Markdown Spec: https://daringfireball.net/projects/markdown/ +- RSS 2.0: https://www.rssboard.org/rss-specification +- Python Packaging: https://packaging.python.org/ diff --git a/docs/decisions/ADR-003-frontend-technology.md b/docs/decisions/ADR-003-frontend-technology.md new file mode 100644 index 0000000..9651a43 --- /dev/null +++ b/docs/decisions/ADR-003-frontend-technology.md @@ -0,0 +1,289 @@ +# ADR-003: Front-end Technology Stack + +## Status +Accepted + +## Context +StarPunk requires a front-end for: +1. Public interface (homepage, note permalinks) - Server-side rendered +2. Admin interface (note creation/editing) - Requires some interactivity +3. Progressive enhancement principle - Core functionality must work without JavaScript + +The front-end must be minimal, elegant, and align with the "no client-side complexity" principle stated in CLAUDE.MD. + +## Decision + +### Public Interface: Server-Side Rendered HTML +- **Template Engine**: Jinja2 (included with Flask) +- **CSS**: Custom CSS (no framework) +- **JavaScript**: None required for V1 +- **Build Tools**: None required + +### Admin Interface: Enhanced Server-Side Rendering +- **Template Engine**: Jinja2 (included with Flask) +- **CSS**: Custom CSS (shared with public interface) +- **JavaScript**: Minimal vanilla JavaScript for markdown preview only +- **Build Tools**: None required + +### Asset Management +- **CSS**: Single stylesheet served statically +- **JavaScript**: Single optional file for markdown preview +- **No bundler**: Direct file serving +- **No transpilation**: Modern browsers only (ES6+) + +## Rationale + +### Server-Side Rendering (SSR) +**Simplicity Score: 10/10** +- Zero build process +- No JavaScript framework complexity +- Direct Flask template rendering +- Familiar Jinja2 syntax + +**Fitness Score: 10/10** +- Perfect for content-first site +- Faster initial page load +- Better SEO (though not critical for single-user) +- Works without JavaScript +- Easier to implement microformats + +**Maintenance Score: 10/10** +- Jinja2 is stable and mature +- No framework version updates +- No npm dependency hell +- Templates are simple HTML + +### No CSS Framework +**Simplicity Score: 10/10** +- Custom CSS is ~200 lines for entire site +- No unused classes or styles +- Full control over appearance +- No framework learning curve + +**Fitness Score: 9/10** +- StarPunk needs minimal, elegant design +- Single theme, no customization needed +- Mobile-responsive can be achieved with simple media queries +- No complex components needed + +### Minimal JavaScript Approach +**Simplicity Score: 9/10** +- Vanilla JavaScript only (no React/Vue/Svelte) +- Single purpose: markdown preview in admin +- Optional progressive enhancement +- No build step required + +**Fitness Score: 10/10** +- Markdown preview improves UX but isn't required +- All functionality works without JavaScript +- Can use fetch API for preview without library +- Modern browser features are sufficient + +## Consequences + +### Positive +- Zero build time +- No node_modules directory +- Instant development setup +- Fast page loads +- Works with JavaScript disabled +- Easy to understand and modify +- Microformats implementation is straightforward +- Complete control over HTML output + +### Negative +- No TypeScript type checking +- No hot module replacement (but Flask auto-reload works) +- Manual CSS organization required +- Must write responsive CSS manually + +### Mitigation +- Keep JavaScript minimal and well-commented +- Organize CSS with clear sections +- Use CSS custom properties for theming +- Test manually in multiple browsers +- Validate HTML with W3C validator + +## Frontend File Structure + +``` +static/ +├── css/ +│ └── style.css # Single stylesheet for entire site +└── js/ + └── preview.js # Optional markdown preview (admin only) + +templates/ +├── base.html # Base template with HTML structure +├── index.html # Homepage (note list) +├── note.html # Single note permalink +└── admin/ + ├── base.html # Admin base template + ├── dashboard.html # Admin dashboard + ├── new.html # Create new note + └── edit.html # Edit existing note +``` + +## CSS Architecture + +### Custom CSS Properties (Variables) +```css +:root { + --color-text: #333; + --color-bg: #fff; + --color-link: #0066cc; + --color-border: #ddd; + --font-body: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; + --font-mono: 'SF Mono', Monaco, monospace; + --spacing-unit: 1rem; + --max-width: 42rem; +} +``` + +### Mobile-First Responsive Design +```css +/* Base: Mobile styles */ +body { padding: 1rem; } + +/* Tablet and up */ +@media (min-width: 768px) { + body { padding: 2rem; } +} +``` + +## JavaScript Architecture + +### Markdown Preview Implementation +```javascript +// static/js/preview.js +// Simple markdown preview using marked.js CDN (no build step) +// Progressive enhancement - form works without this +``` + +**Decision**: Use marked.js from CDN for client-side preview +- **Justification**: Same library as server-side (consistency) +- **Simplicity**: No bundling required +- **Reliability**: CDN delivers cached version +- **Alternative**: No preview (acceptable fallback) + +## Template Organization + +### Jinja2 Template Strategy +- **Inheritance**: Use base templates for common structure +- **Blocks**: Define clear content blocks for overriding +- **Macros**: Create reusable microformat snippets +- **Filters**: Use Jinja2 filters for date formatting + +### Example Base Template Structure +```jinja2 +{# templates/base.html #} + + + + + + {% block title %}{{ site.title }}{% endblock %} + + + {% block head %}{% endblock %} + + + {% block content %}{% endblock %} + + +``` + +## Microformats Integration + +Server-side rendering makes microformats implementation straightforward: + +```jinja2 +{# Macro for h-entry note rendering #} +{% macro render_note(note) %} + +{% endmacro %} +``` + +## Build and Development Workflow + +### Development +1. Run Flask development server: `flask run` +2. Edit templates/CSS/JS directly +3. Browser auto-refresh on template changes +4. No build step required + +### Production +1. Copy static files to production +2. Templates are rendered on-demand +3. Optionally enable Flask caching for rendered HTML +4. Serve static assets with nginx/Apache (optional) + +## Browser Support +- Modern browsers (Chrome 90+, Firefox 88+, Safari 14+, Edge 90+) +- Mobile browsers (iOS Safari 14+, Chrome Android 90+) +- Progressive enhancement ensures basic functionality on older browsers + +## Alternatives Considered + +### React/Vue/Svelte (Rejected) +- **Simplicity**: 2/10 - Requires build tools, npm, bundlers +- **Fitness**: 3/10 - Massive overkill for content site +- **Maintenance**: 5/10 - Constant framework updates +- **Verdict**: Violates "no client-side complexity" principle + +### htmx (Considered) +- **Simplicity**: 8/10 - Single JavaScript file, declarative +- **Fitness**: 6/10 - Useful for dynamic updates, but not needed in V1 +- **Maintenance**: 8/10 - Stable, minimal dependencies +- **Verdict**: Interesting for V2, but V1 doesn't need dynamic updates + +### Alpine.js (Considered) +- **Simplicity**: 8/10 - Lightweight, declarative +- **Fitness**: 5/10 - Good for small interactions, but we barely need any +- **Maintenance**: 8/10 - Well maintained +- **Verdict**: Too much for the minimal JS we need + +### Tailwind CSS (Rejected) +- **Simplicity**: 4/10 - Requires build process, large configuration +- **Fitness**: 3/10 - Utility-first doesn't fit minimal design needs +- **Maintenance**: 7/10 - Well maintained but heavy +- **Verdict**: Build process violates simplicity; custom CSS is sufficient + +### Bootstrap/Bulma (Rejected) +- **Simplicity**: 5/10 - Large framework with many unused features +- **Fitness**: 3/10 - Component-heavy, we need minimal custom design +- **Maintenance**: 9/10 - Very stable +- **Verdict**: Too much CSS for what we need + +### PicoCSS/Water.css (Considered) +- **Simplicity**: 9/10 - Classless CSS, just include and go +- **Fitness**: 7/10 - Good starting point but may not match design vision +- **Maintenance**: 8/10 - Maintained, simple +- **Verdict**: Close consideration, but custom CSS gives full control + +## Standards Compliance +- Semantic HTML5 elements +- Valid HTML (W3C validator) +- Accessible forms and navigation +- Proper heading hierarchy +- ARIA labels where needed +- Mobile-responsive (viewport meta tag) +- Progressive enhancement (works without JS) + +## References +- Jinja2 Documentation: https://jinja.palletsprojects.com/ +- MDN Web Docs: https://developer.mozilla.org/ +- Microformats2: http://microformats.org/wiki/h-entry +- Progressive Enhancement: https://developer.mozilla.org/en-US/docs/Glossary/Progressive_Enhancement +- Semantic HTML: https://developer.mozilla.org/en-US/docs/Glossary/Semantics diff --git a/docs/decisions/ADR-004-file-based-note-storage.md b/docs/decisions/ADR-004-file-based-note-storage.md new file mode 100644 index 0000000..3571261 --- /dev/null +++ b/docs/decisions/ADR-004-file-based-note-storage.md @@ -0,0 +1,384 @@ +# ADR-004: File-Based Note Storage Architecture + +## Status +Accepted + +## Context +The user explicitly requires notes to be stored as files on disk rather than as database records. This is critical for: +1. Data portability - notes can be backed up, moved, and read without the application +2. User ownership - direct access to content in human-readable format +3. Simplicity - text files are the simplest storage mechanism +4. Future-proofing - markdown files will be readable forever + +However, we also need SQLite for: +- Metadata (timestamps, slugs, published status) +- Authentication tokens +- Fast querying and indexing +- Relational data + +The challenge is designing how file-based storage and database metadata work together efficiently. + +## Decision + +### Hybrid Architecture: Files + Database Metadata + +**Notes Content**: Stored as markdown files on disk +**Notes Metadata**: Stored in SQLite database +**Source of Truth**: Files are authoritative for content; database is authoritative for metadata + +### File Storage Strategy + +#### Directory Structure +``` +data/ +├── notes/ +│ ├── 2024/ +│ │ ├── 11/ +│ │ │ ├── my-first-note.md +│ │ │ └── another-note.md +│ │ └── 12/ +│ │ └── december-note.md +│ └── 2025/ +│ └── 01/ +│ └── new-year-note.md +├── starpunk.db # SQLite database +└── .backups/ # Optional backup directory +``` + +#### File Naming Convention +- **Format**: `{slug}.md` +- **Slug rules**: lowercase, alphanumeric, hyphens only, no spaces +- **Example**: `my-first-note.md` +- **Uniqueness**: Enforced by filesystem (can't have two files with same name in same directory) + +#### File Organization +- **Pattern**: Year/Month subdirectories (`YYYY/MM/`) +- **Rationale**: + - Keeps directories manageable (max ~30 files per month) + - Easy chronological browsing + - Matches natural mental model + - Scalable to thousands of notes +- **Example path**: `data/notes/2024/11/my-first-note.md` + +### Database Schema + +```sql +CREATE TABLE notes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + slug TEXT UNIQUE NOT NULL, -- URL identifier + file_path TEXT UNIQUE NOT NULL, -- Relative path from data/notes/ + published BOOLEAN DEFAULT 0, -- Publication status + created_at TIMESTAMP NOT NULL, -- Creation timestamp + updated_at TIMESTAMP NOT NULL, -- Last modification timestamp + content_hash TEXT -- SHA-256 of file content for change detection +); + +CREATE INDEX idx_notes_created_at ON notes(created_at DESC); +CREATE INDEX idx_notes_published ON notes(published); +CREATE INDEX idx_notes_slug ON notes(slug); +``` + +### File Format + +#### Markdown File Structure +```markdown +[Content of the note in markdown format] +``` + +**That's it.** No frontmatter, no metadata in file. Keep it pure. + +**Rationale**: +- Maximum portability +- Readable by any markdown editor +- No custom parsing required +- Metadata belongs in database (timestamps, slugs, etc.) +- User sees just their content when opening file + +#### Optional Future Enhancement (V2+) +If frontmatter becomes necessary, use standard YAML: +```markdown +--- +title: Optional Title +tags: tag1, tag2 +--- +[Content here] +``` + +But for V1: **NO frontmatter**. + +## Rationale + +### File Storage Benefits +**Simplicity Score: 10/10** +- Text files are the simplest storage +- No binary formats +- Human-readable +- Easy to backup (rsync, git, Dropbox, etc.) + +**Portability Score: 10/10** +- Standard markdown format +- Readable without application +- Can be edited in any text editor +- Easy to migrate to other systems + +**Ownership Score: 10/10** +- User has direct access to their content +- No vendor lock-in +- Can grep their own notes +- Backup is simple file copy + +### Hybrid Approach Benefits +**Performance**: Database indexes enable fast queries +**Flexibility**: Rich metadata without cluttering files +**Integrity**: Database enforces uniqueness and relationships +**Simplicity**: Each system does what it's best at + +## Consequences + +### Positive +- Notes are portable markdown files +- User can edit notes directly in filesystem if desired +- Easy backup (just copy data/ directory) +- Database provides fast metadata queries +- Can rebuild database from files if needed +- Git-friendly (can version control notes) +- Maximum data ownership + +### Negative +- Must keep file and database in sync +- Potential for orphaned database records +- Potential for orphaned files +- File operations are slower than database queries +- Must handle file system errors + +### Mitigation Strategies + +#### Sync Strategy +1. **On note creation**: Write file FIRST, then database record +2. **On note update**: Update file FIRST, then database record (update timestamp, content_hash) +3. **On note delete**: Mark as deleted in database, optionally move file to .trash/ +4. **On startup**: Optional integrity check to detect orphans + +#### Orphan Detection +```python +# Pseudo-code for integrity check +def check_integrity(): + # Find database records without files + for note in database.all_notes(): + if not file_exists(note.file_path): + log_error(f"Orphaned database record: {note.slug}") + + # Find files without database records + for file in filesystem.all_markdown_files(): + if not database.has_note(file_path=file): + log_error(f"Orphaned file: {file}") +``` + +#### Content Hash Strategy +- Calculate SHA-256 hash of file content on write +- Store hash in database +- On read, can verify content hasn't been externally modified +- Enables change detection and cache invalidation + +## Data Flow Patterns + +### Creating a Note + +1. Generate slug from content or timestamp +2. Determine file path: `data/notes/{YYYY}/{MM}/{slug}.md` +3. Create directories if needed +4. Write markdown content to file +5. Calculate content hash +6. Insert record into database +7. Return success + +**Transaction Safety**: If database insert fails, delete file and raise error + +### Reading a Note + +**By Slug**: +1. Query database for file_path by slug +2. Read file content from disk +3. Return content + metadata + +**For List**: +1. Query database for metadata (sorted, filtered) +2. Optionally read file content for each note +3. Return list with metadata and content + +### Updating a Note + +1. Query database for existing file_path +2. Write new content to file (atomic write to temp, then rename) +3. Calculate new content hash +4. Update database record (timestamp, content_hash) +5. Return success + +**Transaction Safety**: Keep backup of original file until database update succeeds + +### Deleting a Note + +**Soft Delete (Recommended)**: +1. Update database: set `deleted_at` timestamp +2. Optionally move file to `.trash/` subdirectory +3. Return success + +**Hard Delete**: +1. Delete database record +2. Delete file from filesystem +3. Return success + +## File System Operations + +### Atomic Writes +```python +# Pseudo-code for atomic file write +def write_note_safely(path, content): + temp_path = f"{path}.tmp" + write(temp_path, content) + atomic_rename(temp_path, path) # Atomic on POSIX systems +``` + +### Directory Creation +```python +# Ensure directory exists before writing +def ensure_note_directory(year, month): + path = f"data/notes/{year}/{month}" + makedirs(path, exist_ok=True) + return path +``` + +### Slug Generation +```python +# Generate URL-safe slug +def generate_slug(content=None, timestamp=None): + if content: + # Extract first few words, normalize + words = extract_first_words(content, max=5) + slug = normalize(words) # lowercase, hyphens, no special chars + else: + # Fallback: timestamp-based + slug = timestamp.strftime("%Y%m%d-%H%M%S") + + # Ensure uniqueness + if database.slug_exists(slug): + slug = f"{slug}-{random_suffix()}" + + return slug +``` + +## Backup Strategy + +### Simple Backup +```bash +# User can backup with simple copy +cp -r data/ backup/ + +# Or with rsync +rsync -av data/ backup/ + +# Or with git +cd data/ && git add . && git commit -m "Backup" +``` + +### Restore Strategy +1. Copy data/ directory to new location +2. Application reads database +3. If database missing or corrupt, rebuild from files: + ```python + def rebuild_database_from_files(): + for file_path in glob("data/notes/**/*.md"): + content = read_file(file_path) + metadata = extract_metadata_from_path(file_path) + database.insert_note( + slug=metadata.slug, + file_path=file_path, + created_at=file_stat.created, + updated_at=file_stat.modified, + content_hash=hash(content) + ) + ``` + +## Standards Compliance + +### Markdown Standard +- CommonMark specification +- No custom extensions in V1 +- Standard markdown processors can read files + +### File System Compatibility +- ASCII-safe filenames +- No special characters in paths +- Maximum path length under 255 characters +- POSIX-compatible directory structure + +## Alternatives Considered + +### All-Database Storage (Rejected) +- **Simplicity**: 8/10 - Simpler code, single source of truth +- **Portability**: 2/10 - Requires database export +- **Ownership**: 3/10 - User doesn't have direct access +- **Verdict**: Violates user requirement for file-based storage + +### Flat File Directory (Rejected) +``` +data/notes/ +├── note-1.md +├── note-2.md +├── note-3.md +... +├── note-9999.md +``` +- **Simplicity**: 10/10 - Simplest possible structure +- **Scalability**: 3/10 - Thousands of files in one directory is slow +- **Verdict**: Not scalable, poor performance with many notes + +### Git-Based Storage (Rejected for V1) +- **Simplicity**: 6/10 - Requires git integration +- **Portability**: 9/10 - Excellent versioning +- **Performance**: 7/10 - Git operations have overhead +- **Verdict**: Interesting for V2, but adds complexity to V1 + +### Frontmatter in Files (Rejected for V1) +```markdown +--- +slug: my-note +created: 2024-11-18 +published: true +--- +Note content here +``` +- **Simplicity**: 7/10 - Requires YAML parsing +- **Portability**: 8/10 - Common pattern, but not pure markdown +- **Single Source**: 10/10 - All data in one place +- **Verdict**: Deferred to V2; V1 keeps files pure + +### JSON Metadata Sidecar (Rejected) +``` +notes/ +├── my-note.md +├── my-note.json # Metadata +``` +- **Simplicity**: 6/10 - Doubles number of files +- **Portability**: 7/10 - Markdown still clean, but extra files +- **Sync Issues**: 5/10 - Must keep two files in sync +- **Verdict**: Database metadata is cleaner + +## Implementation Checklist + +- [ ] Create data/notes directory structure on initialization +- [ ] Implement slug generation algorithm +- [ ] Implement atomic file write operations +- [ ] Implement content hash calculation +- [ ] Create database schema with indexes +- [ ] Implement sync between files and database +- [ ] Implement orphan detection (optional for V1) +- [ ] Add file system error handling +- [ ] Create backup documentation for users +- [ ] Test with thousands of notes for performance + +## References +- CommonMark Spec: https://spec.commonmark.org/ +- POSIX File Operations: https://pubs.opengroup.org/onlinepubs/9699919799/ +- File System Best Practices: https://www.pathname.com/fhs/ +- Atomic File Operations: https://lwn.net/Articles/457667/ diff --git a/docs/decisions/ADR-005-indielogin-authentication.md b/docs/decisions/ADR-005-indielogin-authentication.md new file mode 100644 index 0000000..7995be7 --- /dev/null +++ b/docs/decisions/ADR-005-indielogin-authentication.md @@ -0,0 +1,421 @@ +# ADR-005: IndieLogin Authentication Integration + +## Status +Accepted + +## Context +The user has explicitly required external IndieLogin authentication via indielogin.com for V1. This is different from implementing a full IndieAuth server (which CLAUDE.MD mentions). The distinction is important: + +- **IndieAuth Server**: Host your own authentication endpoint (complex) +- **IndieLogin Service**: Use indielogin.com as an external authentication provider (simple) + +The user wants the simpler approach: delegate authentication to indielogin.com using their API (https://indielogin.com/api). + +IndieLogin.com is a service that: +1. Handles the OAuth 2.0 / IndieAuth flow +2. Verifies user identity via their website +3. Returns authenticated identity to our application +4. Supports multiple authentication methods (RelMeAuth, email, etc.) + +## Decision + +### Use IndieLogin.com as External Authentication Provider + +**Authentication Flow**: OAuth 2.0 Authorization Code flow via indielogin.com +**API Endpoint**: https://indielogin.com/auth +**Token Validation**: Server-side session tokens (not IndieAuth tokens) +**User Identity**: URL (me parameter) verified by indielogin.com + +### Architecture + +``` +User Browser → StarPunk → indielogin.com → User's Website + ↑ ↓ + └──────────────────────────────┘ + (Authenticated session) +``` + +## Authentication Flow + +### 1. Login Initiation +``` +User clicks "Login" + ↓ +StarPunk generates state token (CSRF protection) + ↓ +Redirect to: https://indielogin.com/auth? + - me={user_website} + - client_id={starpunk_url} + - redirect_uri={starpunk_url}/auth/callback + - state={random_token} +``` + +### 2. IndieLogin Processing +``` +indielogin.com verifies user identity: + - Checks for rel="me" links on user's website + - Or sends email verification + - Or uses other IndieAuth methods + ↓ +User authenticates via their chosen method + ↓ +indielogin.com redirects back to StarPunk +``` + +### 3. Callback Verification +``` +indielogin.com → StarPunk callback with: + - code={authorization_code} + - state={original_state} + ↓ +StarPunk verifies state matches + ↓ +StarPunk exchanges code for verified identity: + POST https://indielogin.com/auth + - code={authorization_code} + - client_id={starpunk_url} + - redirect_uri={starpunk_url}/auth/callback + ↓ +indielogin.com responds with: + { "me": "https://user-website.com" } + ↓ +StarPunk creates authenticated session +``` + +### 4. Session Management +``` +StarPunk stores session token in cookie + ↓ +Session token maps to authenticated user URL + ↓ +Admin routes check for valid session +``` + +## Implementation Requirements + +### Configuration Variables +``` +SITE_URL=https://starpunk.example.com +ADMIN_ME=https://your-website.com +SESSION_SECRET=random_secret_key +``` + +### Database Schema Addition +```sql +-- Add to existing schema +CREATE TABLE sessions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_token TEXT UNIQUE NOT NULL, + me TEXT NOT NULL, -- Authenticated user URL + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP NOT NULL, + last_used_at TIMESTAMP +); + +CREATE INDEX idx_sessions_token ON sessions(session_token); +CREATE INDEX idx_sessions_expires ON sessions(expires_at); + +CREATE TABLE auth_state ( + state TEXT PRIMARY KEY, + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP NOT NULL -- Short-lived (5 minutes) +); +``` + +### HTTP Client for API Calls +Use **httpx** (already selected in ADR-002) for: +- POST to https://indielogin.com/auth to exchange code +- Verify response contains valid "me" URL +- Handle network errors gracefully + +### Routes Required +``` +GET /admin/login - Display login form +POST /admin/login - Initiate IndieLogin flow +GET /auth/callback - Handle IndieLogin redirect +POST /admin/logout - Destroy session +``` + +### Login Flow Implementation + +#### Step 1: Login Form +```python +# /admin/login (GET) +# Display simple form asking for user's website URL +# Form submits to POST /admin/login with "me" parameter +``` + +#### Step 2: Initiate Authentication +```python +# /admin/login (POST) +def initiate_login(me_url): + # Validate me_url format + if not is_valid_url(me_url): + return error("Invalid URL") + + # Generate and store state token + state = generate_random_token() + store_state(state, expires_in_minutes=5) + + # Build IndieLogin authorization URL + params = { + 'me': me_url, + 'client_id': SITE_URL, + 'redirect_uri': f"{SITE_URL}/auth/callback", + 'state': state + } + + auth_url = f"https://indielogin.com/auth?{urlencode(params)}" + + # Redirect user to IndieLogin + return redirect(auth_url) +``` + +#### Step 3: Handle Callback +```python +# /auth/callback (GET) +def handle_callback(code, state): + # Verify state token (CSRF protection) + if not verify_state(state): + return error("Invalid state") + + # Exchange code for verified identity + response = httpx.post('https://indielogin.com/auth', data={ + 'code': code, + 'client_id': SITE_URL, + 'redirect_uri': f"{SITE_URL}/auth/callback" + }) + + if response.status_code != 200: + return error("Authentication failed") + + data = response.json() + me = data.get('me') + + # Verify this is the authorized admin + if me != ADMIN_ME: + return error("Unauthorized user") + + # Create session + session_token = generate_random_token() + create_session(session_token, me, expires_in_days=30) + + # Set session cookie + set_cookie('session', session_token, httponly=True, secure=True) + + # Redirect to admin dashboard + return redirect('/admin') +``` + +#### Step 4: Session Validation +```python +# Decorator for protected routes +def require_auth(f): + def wrapper(*args, **kwargs): + session_token = request.cookies.get('session') + + if not session_token: + return redirect('/admin/login') + + session = get_session(session_token) + + if not session or session.expired: + return redirect('/admin/login') + + # Update last_used_at + update_session_activity(session_token) + + # Store user info in request context + g.user_me = session.me + + return f(*args, **kwargs) + + return wrapper + +# Usage +@app.route('/admin') +@require_auth +def admin_dashboard(): + return render_template('admin/dashboard.html') +``` + +## Rationale + +### Why IndieLogin.com Instead of Self-Hosted IndieAuth? + +**Simplicity Score: 10/10 (IndieLogin) vs 4/10 (Self-hosted)** +- IndieLogin.com handles all complexity of: + - Discovering user's auth endpoints + - Verifying user identity + - Supporting multiple auth methods (RelMeAuth, email, etc.) + - PKCE implementation +- Self-hosted would require implementing full IndieAuth spec (complex) + +**Fitness Score: 10/10** +- Perfect for single-user system +- User controls their identity via their own website +- No password management needed +- Aligns with IndieWeb principles + +**Maintenance Score: 10/10** +- indielogin.com is maintained by IndieWeb community +- No auth code to maintain ourselves +- Security updates handled externally +- Well-tested service + +**Standards Compliance: Pass** +- Uses OAuth 2.0 / IndieAuth standards +- Compatible with IndieWeb ecosystem +- User identity is their URL (IndieWeb principle) + +### Why Session Cookies Instead of Access Tokens? + +For admin interface (not Micropub): +- **Simpler**: Standard web session pattern +- **Secure**: HttpOnly cookies prevent XSS +- **Appropriate**: Admin is human using browser, not API client +- **Note**: Micropub will still use access tokens (separate ADR needed) + +## Consequences + +### Positive +- Extremely simple implementation (< 100 lines of code) +- No authentication code to maintain +- Secure by default (delegated to trusted service) +- True IndieWeb authentication (user owns identity) +- No passwords to manage +- Works immediately without setup +- Community-maintained service + +### Negative +- Dependency on external service (indielogin.com) +- Requires internet connection to authenticate +- Single point of failure for login (mitigated: session stays valid) +- User must have their own website/URL + +### Mitigation +- Sessions last 30 days, so brief indielogin.com outages don't lock out user +- Document fallback: edit database to create session manually if needed +- IndieLogin.com is stable, community-run service with good uptime +- For V2: Consider optional email fallback or self-hosted IndieAuth + +## Security Considerations + +### State Token (CSRF Protection) +- Generate cryptographically random state token +- Store in database with short expiry (5 minutes) +- Verify state matches on callback +- Delete state after use (single-use tokens) + +### Session Token Security +- Generate with secrets.token_urlsafe(32) or similar +- Store hash in database (not plaintext) +- Mark cookies as HttpOnly and Secure +- Set SameSite=Lax for CSRF protection +- Implement session expiry (30 days) +- Support manual logout (session deletion) + +### Identity Verification +- Only allow ADMIN_ME URL to authenticate +- Verify "me" URL from indielogin.com exactly matches config +- Reject any other authenticated users +- Log authentication attempts + +### Network Security +- Use HTTPS for all communication +- Verify SSL certificates on httpx requests +- Handle network timeouts gracefully +- Log authentication failures + +## Testing Strategy + +### Unit Tests +- State token generation and validation +- Session creation and expiry +- URL validation +- Cookie handling + +### Integration Tests +- Mock indielogin.com API responses +- Test full authentication flow +- Test session expiry +- Test unauthorized user rejection +- Test CSRF protection (invalid state) + +### Manual Testing +- Authenticate with real indielogin.com +- Verify session persistence +- Test logout functionality +- Test session expiry +- Test with wrong "me" URL + +## Alternatives Considered + +### Self-Hosted IndieAuth Server (Rejected) +- **Complexity**: Must implement full IndieAuth spec +- **Maintenance**: Security updates, endpoint discovery, token generation +- **Verdict**: Too complex for V1, violates simplicity principle + +### Password Authentication (Rejected) +- **Security**: Must hash passwords, handle resets, prevent brute force +- **IndieWeb**: Violates IndieWeb principle of URL-based identity +- **Verdict**: Not aligned with project goals + +### OAuth via GitHub/Google (Rejected) +- **Simplicity**: Easy to implement +- **IndieWeb**: Not IndieWeb-compatible, user doesn't own identity +- **Verdict**: Violates IndieWeb requirements + +### Email Magic Links (Rejected) +- **Simplicity**: Requires email sending infrastructure +- **IndieWeb**: Not standard IndieWeb authentication +- **Verdict**: Deferred to V2 as fallback option + +### Multi-User IndieAuth (Rejected for V1) +- **Scope**: V1 is explicitly single-user +- **Complexity**: Would require user management +- **Verdict**: Out of scope, defer to V2 + +## Implementation Checklist + +- [ ] Add SESSION_SECRET and ADMIN_ME to configuration +- [ ] Create sessions and auth_state database tables +- [ ] Implement state token generation and storage +- [ ] Create login form template +- [ ] Implement /admin/login routes (GET and POST) +- [ ] Implement /auth/callback route +- [ ] Implement session creation and validation +- [ ] Create require_auth decorator +- [ ] Implement logout functionality +- [ ] Set secure cookie parameters +- [ ] Add authentication error handling +- [ ] Write unit tests for auth flow +- [ ] Write integration tests with mocked indielogin.com +- [ ] Test with real indielogin.com +- [ ] Document setup process for users + +## Configuration Example + +```bash +# .env file +SITE_URL=https://starpunk.example.com +ADMIN_ME=https://your-website.com +SESSION_SECRET=your-random-secret-key-here +``` + +## User Setup Documentation + +1. Deploy StarPunk to your server at `https://starpunk.example.com` +2. Configure `ADMIN_ME` to your personal website URL +3. Visit `/admin/login` +4. Enter your website URL (must match ADMIN_ME) +5. indielogin.com will verify your identity +6. Authenticate via your chosen method +7. Redirected back to StarPunk admin interface + +## References +- IndieLogin.com: https://indielogin.com/ +- IndieLogin API Documentation: https://indielogin.com/api +- IndieAuth Specification: https://indieauth.spec.indieweb.org/ +- OAuth 2.0 Spec: https://oauth.net/2/ +- Web Authentication Best Practices: https://cheatsheetseries.owasp.org/cheatsheets/Authentication_Cheat_Sheet.html diff --git a/docs/decisions/ADR-006-python-virtual-environment-uv.md b/docs/decisions/ADR-006-python-virtual-environment-uv.md new file mode 100644 index 0000000..c546073 --- /dev/null +++ b/docs/decisions/ADR-006-python-virtual-environment-uv.md @@ -0,0 +1,552 @@ +# ADR-006: Python Virtual Environment Management with uv + +## Status +Accepted + +## Context +StarPunk is a Python-based web application that requires dependency management and virtual environment isolation. Developer agents (AI assistants like Claude Code) need clear, unambiguous standards for: + +- Creating and managing Python virtual environments +- Installing and tracking dependencies +- Ensuring reproducible development environments +- Avoiding common pitfalls (polluting global Python, dependency conflicts) +- Maintaining consistency across development and deployment + +Traditional tools (pip, venv, virtualenv, poetry, pipenv) have various limitations: +- **pip + venv**: Slow dependency resolution, manual requirements.txt management +- **poetry**: Complex configuration, slow, dependency lock issues +- **pipenv**: Abandoned maintenance, slow performance +- **conda**: Heavyweight, non-standard for web development + +We need a tool that is fast, simple, and provides excellent developer experience while maintaining compatibility with standard Python packaging. + +## Decision +Use **uv** for all Python virtual environment and dependency management in StarPunk. + +uv will be the standard tool for: +- Creating virtual environments +- Installing dependencies +- Managing requirements +- Running Python commands in the virtual environment +- Synchronizing dependencies + +## Rationale + +### Simplicity Score: 10/10 +- Single tool for all environment management +- Simple command syntax (uv venv, uv pip install, uv run) +- Drop-in replacement for pip and virtualenv +- No complex configuration files +- Works with standard requirements.txt +- Written in Rust, installed as single binary + +### Performance Score: 10/10 +- 10-100x faster than pip for dependency resolution +- Parallel downloads and installations +- Efficient caching mechanism +- Near-instant virtual environment creation +- Minimal overhead for running commands + +### Fitness Score: 9/10 +- Perfect for small to medium Python projects +- Excellent for single-developer projects +- Works with standard Python packaging (PEP 517/518) +- Compatible with requirements.txt workflow +- Supports editable installs for development +- Works seamlessly with Flask and all our dependencies + +### Maintenance Score: 9/10 +- Actively developed by Astral (creators of ruff) +- Strong community adoption +- Excellent documentation +- Regular updates and improvements +- Modern codebase (Rust) +- Backed by funding and commercial support + +### Standards Compliance: Pass +- Full compatibility with pip +- Works with PyPI and all standard package indices +- Supports PEP 440 version specifiers +- Compatible with requirements.txt format +- Works with standard Python virtual environments +- No proprietary lock files (uses standard formats) + +## Implementation Details + +### 1. Installation Standards + +#### System-Level uv Installation +Developer agents MUST ensure uv is installed before creating environments: + +```bash +# Check if uv is installed +which uv + +# If not installed, install via pip (fallback) +pip install uv + +# Or install via official installer (preferred on Linux/macOS) +curl -LsSf https://astral.sh/uv/install.sh | sh +``` + +#### Verification +```bash +# Verify uv installation +uv --version +# Expected output: uv 0.x.x (or newer) +``` + +### 2. Virtual Environment Creation Standards + +#### Location and Naming +- **Standard location**: `/home/phil/Projects/starpunk/.venv` +- **Name**: Always use `.venv` (hidden directory) +- **DO NOT** use: `venv`, `env`, `virtualenv`, or custom names + +#### Creation Command +```bash +# Create virtual environment with uv +cd /home/phil/Projects/starpunk +uv venv .venv + +# Specify Python version (recommended) +uv venv .venv --python 3.11 +``` + +#### Post-Creation Verification +```bash +# Verify .venv directory exists +ls -la /home/phil/Projects/starpunk/.venv + +# Verify Python executable +/home/phil/Projects/starpunk/.venv/bin/python --version +``` + +### 3. Dependency Installation Standards + +#### Using requirements.txt (Primary Method) +```bash +# Install all dependencies from requirements.txt +uv pip install -r /home/phil/Projects/starpunk/requirements.txt + +# Verify installation +uv pip list +``` + +#### Installing Individual Packages +```bash +# Install a single package +uv pip install flask==3.0.* + +# Install multiple packages +uv pip install flask markdown feedgen +``` + +#### Development Dependencies +```bash +# Install dev dependencies (if requirements-dev.txt exists) +uv pip install -r /home/phil/Projects/starpunk/requirements-dev.txt +``` + +### 4. Running Commands in Virtual Environment + +#### Using uv run (Recommended) +```bash +# Run Python script +uv run /home/phil/Projects/starpunk/.venv/bin/python script.py + +# Run Flask development server +uv run /home/phil/Projects/starpunk/.venv/bin/flask run + +# Run pytest +uv run /home/phil/Projects/starpunk/.venv/bin/pytest + +# Run Python REPL +uv run /home/phil/Projects/starpunk/.venv/bin/python +``` + +#### Direct Execution (Alternative) +```bash +# Execute using absolute path to venv Python +/home/phil/Projects/starpunk/.venv/bin/python script.py +/home/phil/Projects/starpunk/.venv/bin/flask run +/home/phil/Projects/starpunk/.venv/bin/pytest +``` + +### 5. Dependency Tracking Standards + +#### Generating requirements.txt +```bash +# Freeze current environment to requirements.txt +uv pip freeze > /home/phil/Projects/starpunk/requirements.txt + +# Freeze with sorted output for consistency +uv pip freeze | sort > /home/phil/Projects/starpunk/requirements.txt +``` + +#### Adding New Dependencies +When adding a new dependency: +1. Install the package: `uv pip install package-name` +2. Update requirements.txt: `uv pip freeze | sort > requirements.txt` +3. Verify installation: `uv pip list | grep package-name` + +### 6. Environment Updates and Maintenance + +#### Updating Dependencies +```bash +# Update a specific package +uv pip install --upgrade flask + +# Update all packages (use with caution) +uv pip install --upgrade -r requirements.txt + +# Regenerate requirements.txt after updates +uv pip freeze | sort > requirements.txt +``` + +#### Cleaning and Rebuilding +```bash +# Remove virtual environment +rm -rf /home/phil/Projects/starpunk/.venv + +# Recreate from scratch +uv venv .venv --python 3.11 +uv pip install -r requirements.txt +``` + +## Developer Agent Standards + +### Critical Rules for AI Assistants + +#### Rule 1: ALWAYS Check for Existing Virtual Environment +Before creating a new virtual environment, ALWAYS check: + +```bash +# Check if .venv exists +if [ -d "/home/phil/Projects/starpunk/.venv" ]; then + echo "Virtual environment exists" + /home/phil/Projects/starpunk/.venv/bin/python --version +else + echo "Virtual environment does not exist" +fi +``` + +**NEVER** create a new virtual environment if one already exists without explicit user permission. + +#### Rule 2: ALWAYS Use Absolute Paths +Agent threads reset cwd between bash calls. ALWAYS use absolute paths: + +**CORRECT:** +```bash +uv venv /home/phil/Projects/starpunk/.venv +/home/phil/Projects/starpunk/.venv/bin/python script.py +uv pip install -r /home/phil/Projects/starpunk/requirements.txt +``` + +**INCORRECT:** +```bash +uv venv .venv # Relative path - WRONG +./venv/bin/python script.py # Relative path - WRONG +uv pip install -r requirements.txt # Relative path - WRONG +``` + +#### Rule 3: Verify Before Executing +Before running Python commands, verify the virtual environment: + +```bash +# Verification checklist +[ -d "/home/phil/Projects/starpunk/.venv" ] && echo "✓ venv exists" || echo "✗ venv missing" +[ -f "/home/phil/Projects/starpunk/.venv/bin/python" ] && echo "✓ Python exists" || echo "✗ Python missing" +/home/phil/Projects/starpunk/.venv/bin/python --version +``` + +#### Rule 4: Handle Errors Gracefully +If virtual environment operations fail: + +1. **Check uv installation**: `which uv` +2. **Check Python version**: `python3 --version` +3. **Check disk space**: `df -h /home/phil/Projects/starpunk` +4. **Report specific error** to user with context +5. **DO NOT** silently continue with global Python + +#### Rule 5: Never Modify Global Python +**NEVER** run these commands: +```bash +# FORBIDDEN - modifies global Python +pip install package +python3 -m pip install package +sudo pip install package +``` + +**ALWAYS** use virtual environment: +```bash +# CORRECT - uses virtual environment +uv pip install package +/home/phil/Projects/starpunk/.venv/bin/pip install package +``` + +#### Rule 6: Track Dependency Changes +After installing or removing packages: + +1. Update requirements.txt: `uv pip freeze | sort > requirements.txt` +2. Verify changes: `git diff requirements.txt` (if applicable) +3. Inform user of changes made + +### Standard Agent Workflow + +#### Scenario 1: First-Time Setup +```bash +# 1. Check if venv exists +if [ ! -d "/home/phil/Projects/starpunk/.venv" ]; then + echo "Creating virtual environment..." + uv venv /home/phil/Projects/starpunk/.venv --python 3.11 +fi + +# 2. Verify creation +/home/phil/Projects/starpunk/.venv/bin/python --version + +# 3. Install dependencies (if requirements.txt exists) +if [ -f "/home/phil/Projects/starpunk/requirements.txt" ]; then + uv pip install -r /home/phil/Projects/starpunk/requirements.txt +fi + +# 4. Verify installation +uv pip list +``` + +#### Scenario 2: Running Development Server +```bash +# 1. Verify venv exists +[ -d "/home/phil/Projects/starpunk/.venv" ] || echo "ERROR: Virtual environment missing" + +# 2. Verify Flask is installed +/home/phil/Projects/starpunk/.venv/bin/python -c "import flask; print(flask.__version__)" + +# 3. Run Flask development server +/home/phil/Projects/starpunk/.venv/bin/flask --app /home/phil/Projects/starpunk/app.py run +``` + +#### Scenario 3: Adding New Dependency +```bash +# 1. Install package +uv pip install httpx + +# 2. Verify installation +uv pip show httpx + +# 3. Update requirements.txt +uv pip freeze | sort > /home/phil/Projects/starpunk/requirements.txt + +# 4. Confirm to user +echo "Added httpx to project dependencies" +``` + +#### Scenario 4: Running Tests +```bash +# 1. Verify pytest is installed +/home/phil/Projects/starpunk/.venv/bin/python -c "import pytest; print(pytest.__version__)" + +# 2. Run tests +/home/phil/Projects/starpunk/.venv/bin/pytest /home/phil/Projects/starpunk/tests/ + +# 3. Run tests with coverage (if pytest-cov installed) +/home/phil/Projects/starpunk/.venv/bin/pytest --cov=/home/phil/Projects/starpunk/src /home/phil/Projects/starpunk/tests/ +``` + +## Project-Specific Standards + +### Python Version Requirements +- **Minimum**: Python 3.11 +- **Recommended**: Python 3.11 or 3.12 +- **Rationale**: Modern Python features, improved performance, security updates + +### Directory Structure +``` +/home/phil/Projects/starpunk/ +├── .venv/ # Virtual environment (NEVER commit) +├── requirements.txt # Production dependencies +├── requirements-dev.txt # Development dependencies (optional) +├── src/ # Application source code +├── tests/ # Test files +└── docs/ # Documentation +``` + +### .gitignore Requirements +The following MUST be in .gitignore: +``` +# Virtual Environment +.venv/ +venv/ +env/ +ENV/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +``` + +### Environment Variables +Use python-dotenv for configuration: +```bash +# .env file (NEVER commit to git) +FLASK_APP=app.py +FLASK_ENV=development +SECRET_KEY=your-secret-key +DATABASE_PATH=/home/phil/Projects/starpunk/data/starpunk.db +``` + +Load in application: +```python +from dotenv import load_dotenv +load_dotenv() +``` + +### Requirements.txt Format +Follow these conventions: +``` +# Requirements.txt - StarPunk Dependencies +# Generated: 2025-11-18 + +# Web Framework +flask==3.0.* + +# Content Processing +markdown==3.5.* + +# Feed Generation +feedgen==1.0.* + +# HTTP Client +httpx==0.27.* + +# Configuration +python-dotenv==1.0.* +``` + +## Consequences + +### Positive +- **10-100x faster** dependency resolution and installation +- **Consistent environments** across development and deployment +- **Simple workflow** - one tool for all Python environment tasks +- **No activation required** - uv run handles environment automatically +- **Excellent caching** - faster subsequent installations +- **Standard compatibility** - works with all existing Python tools +- **Clear agent guidelines** - reduces errors in automated workflows +- **Isolated dependencies** - no conflicts with system Python + +### Negative +- **Additional tool dependency** - requires uv installation +- **Less familiar** - newer tool, smaller community than pip +- **Rust dependency** - uv is written in Rust (but distributed as binary) + +### Mitigation +- uv is easy to install (single binary, no compilation needed) +- uv is pip-compatible (drop-in replacement) +- Fallback to pip + venv is always possible +- Documentation and agent standards make adoption easy +- Active development and growing adoption reduce risk + +### Trade-offs Accepted +- **uv vs poetry**: We chose simplicity over advanced features +- **uv vs pipenv**: We chose active maintenance and speed +- **uv vs pip**: We chose performance over ubiquity +- **Single tool complexity**: Better than managing multiple tools + +## Verification Checklist + +Before considering the environment correctly set up, verify: + +- [ ] uv is installed and accessible: `which uv` +- [ ] Virtual environment exists: `ls -la /home/phil/Projects/starpunk/.venv` +- [ ] Python version is 3.11+: `/home/phil/Projects/starpunk/.venv/bin/python --version` +- [ ] Dependencies installed: `uv pip list` shows Flask, markdown, feedgen, httpx +- [ ] requirements.txt exists and is up to date +- [ ] .venv is in .gitignore +- [ ] Flask runs: `/home/phil/Projects/starpunk/.venv/bin/flask --version` + +## Integration with Development Workflow + +### Running Flask Application +```bash +# Development server +/home/phil/Projects/starpunk/.venv/bin/flask --app app.py run --debug + +# Production server (using gunicorn) +/home/phil/Projects/starpunk/.venv/bin/gunicorn app:app +``` + +### Running Tests +```bash +# All tests +/home/phil/Projects/starpunk/.venv/bin/pytest + +# Specific test file +/home/phil/Projects/starpunk/.venv/bin/pytest tests/test_api.py + +# With coverage +/home/phil/Projects/starpunk/.venv/bin/pytest --cov=src tests/ +``` + +### Code Quality Tools +```bash +# Format code with black +/home/phil/Projects/starpunk/.venv/bin/black src/ + +# Lint with flake8 +/home/phil/Projects/starpunk/.venv/bin/flake8 src/ + +# Type checking with mypy (if added) +/home/phil/Projects/starpunk/.venv/bin/mypy src/ +``` + +## Alternatives Considered + +### pip + venv (Rejected) +- **Simplicity**: 8/10 - Standard Python tools, well-known +- **Performance**: 4/10 - Very slow dependency resolution +- **Fitness**: 7/10 - Works but painful for larger dependency trees +- **Maintenance**: 10/10 - Built into Python, always maintained +- **Verdict**: Too slow, poor developer experience, but acceptable fallback + +### poetry (Rejected) +- **Simplicity**: 5/10 - Complex pyproject.toml, lock file management +- **Performance**: 5/10 - Slow dependency resolution +- **Fitness**: 6/10 - Overkill for simple project, lock files add complexity +- **Maintenance**: 7/10 - Maintained but has had reliability issues +- **Verdict**: Too complex for "minimal code" philosophy + +### pipenv (Rejected) +- **Simplicity**: 6/10 - Simpler than poetry, but still adds abstraction +- **Performance**: 4/10 - Known performance issues +- **Fitness**: 5/10 - Previously recommended, now effectively abandoned +- **Maintenance**: 2/10 - Minimal maintenance, community has moved on +- **Verdict**: Dead project, poor performance + +### conda (Rejected) +- **Simplicity**: 3/10 - Heavy, complex environment management +- **Performance**: 5/10 - Slower than uv, larger downloads +- **Fitness**: 2/10 - Designed for data science, not web development +- **Maintenance**: 9/10 - Well maintained, large ecosystem +- **Verdict**: Wrong tool for web application development + +### PDM (Considered) +- **Simplicity**: 7/10 - Modern, PEP 582 support +- **Performance**: 8/10 - Fast, but not as fast as uv +- **Fitness**: 7/10 - Good for modern Python projects +- **Maintenance**: 8/10 - Actively maintained, growing community +- **Verdict**: Good alternative, but uv is faster and simpler + +## References +- uv Documentation: https://docs.astral.sh/uv/ +- uv GitHub: https://github.com/astral-sh/uv +- Python Virtual Environments: https://docs.python.org/3/library/venv.html +- PEP 405 (Python Virtual Environments): https://peps.python.org/pep-0405/ +- requirements.txt format: https://pip.pypa.io/en/stable/reference/requirements-file-format/ +- Astral (uv creators): https://astral.sh/ + +## Change Log +- 2025-11-18: Initial version - Established uv as standard tool for StarPunk Python environment management diff --git a/docs/decisions/ADR-007-slug-generation-algorithm.md b/docs/decisions/ADR-007-slug-generation-algorithm.md new file mode 100644 index 0000000..f53d2ad --- /dev/null +++ b/docs/decisions/ADR-007-slug-generation-algorithm.md @@ -0,0 +1,487 @@ +# ADR-007: Slug Generation Algorithm + +## Status +Accepted + +## Context + +Notes in StarPunk require URL-safe identifiers (slugs) for permalinks and file naming. The slug generation algorithm is critical because: + +1. **User experience**: Slugs appear in URLs and should be readable/meaningful +2. **SEO**: Descriptive slugs improve search engine optimization +3. **File system**: Slugs become filenames, must be filesystem-safe +4. **Uniqueness**: Slugs must be unique across all notes +5. **Portability**: Slugs should work across different systems and browsers + +The challenge is designing an algorithm that creates readable, unique, safe slugs automatically from note content. + +## Decision + +### Content-Based Slug Generation with Timestamp Fallback + +**Primary Algorithm**: Extract first N words from content and normalize +**Fallback**: Timestamp-based slug when content is insufficient +**Uniqueness**: Random suffix when collision detected + +### Algorithm Specification + +#### Step 1: Extract Words +```python +# Extract first 5 words from content +words = content.split()[:5] +text = " ".join(words) +``` + +#### Step 2: Normalize +```python +# Convert to lowercase +text = text.lower() + +# Replace spaces with hyphens +text = text.replace(" ", "-") + +# Remove all characters except a-z, 0-9, and hyphens +text = re.sub(r'[^a-z0-9-]', '', text) + +# Collapse multiple hyphens +text = re.sub(r'-+', '-', text) + +# Strip leading/trailing hyphens +text = text.strip('-') +``` + +#### Step 3: Validate Length +```python +# If slug too short or empty, use timestamp fallback +if len(text) < 1: + text = created_at.strftime("%Y%m%d-%H%M%S") +``` + +#### Step 4: Truncate +```python +# Limit to 100 characters +text = text[:100] +``` + +#### Step 5: Check Uniqueness +```python +# If slug exists, add random 4-character suffix +if slug_exists(text): + text = f"{text}-{random_alphanumeric(4)}" +``` + +### Character Set + +**Allowed characters**: `a-z`, `0-9`, `-` (hyphen) + +**Rationale**: +- URL-safe without encoding +- Filesystem-safe on all platforms (Windows, Linux, macOS) +- Human-readable +- No escaping required in HTML +- Compatible with DNS hostnames (if ever used) + +### Examples + +| Input Content | Generated Slug | +|--------------|----------------| +| "Hello World! This is my first note." | `hello-world-this-is-my` | +| "Testing... with special chars!@#" | `testing-with-special-chars` | +| "2024-11-18 Daily Journal Entry" | `2024-11-18-daily-journal-entry` | +| "A" (too short) | `20241118-143022` (timestamp) | +| " " (whitespace only) | Error: ValueError | +| "Hello World" (duplicate) | `hello-world-a7c9` (random suffix) | + +### Slug Uniqueness Strategy + +**Collision Detection**: Check database for existing slug before use + +**Resolution**: Append random 4-character suffix +- Character set: `a-z0-9` (36 characters) +- Combinations: 36^4 = 1,679,616 possible suffixes +- Collision probability: Negligible for reasonable note counts + +**Example**: +``` +Original: hello-world +Collision: hello-world-a7c9 +Collision: hello-world-x3k2 +``` + +### Timestamp Fallback Format + +**Pattern**: `YYYYMMDD-HHMMSS` +**Example**: `20241118-143022` + +**When Used**: +- Content is empty or whitespace-only (raises error instead) +- Normalized slug is empty (after removing special characters) +- Normalized slug is too short (< 1 character) + +**Rationale**: +- Guaranteed unique (unless two notes created in same second) +- Sortable chronologically +- Still readable and meaningful +- No special characters required + +## Rationale + +### Content-Based Generation (Score: 9/10) + +**Pros**: +- **Readability**: Users can understand URL meaning +- **SEO**: Search engines prefer descriptive URLs +- **Memorability**: Easier to remember and share +- **Meaningful**: Reflects note content + +**Cons**: +- **Collisions**: Multiple notes might have similar titles +- **Changes**: Editing note doesn't update slug (by design) + +### First 5 Words (Score: 8/10) + +**Pros**: +- **Sufficient**: 5 words usually capture note topic +- **Concise**: Keeps URLs short and readable +- **Consistent**: Predictable slug length + +**Cons**: +- **Arbitrary**: 5 is somewhat arbitrary (could be 3-7) +- **Language**: Assumes space-separated words (English-centric) + +**Alternatives Considered**: +- First 3 words: Too short, often not descriptive +- First 10 words: Too long, URLs become unwieldy +- First line: Could be very long, harder to normalize +- First sentence: Variable length, complex to parse + +**Decision**: 5 words is a good balance (configurable constant) + +### Lowercase with Hyphens (Score: 10/10) + +**Pros**: +- **URL Standard**: Common pattern (github.com, stackoverflow.com) +- **Readability**: Easier to read than underscores or camelCase +- **Compatibility**: Works everywhere +- **Simplicity**: One separator type only + +**Cons**: +- None significant + +### Alphanumeric Only (Score: 10/10) + +**Pros**: +- **Safety**: No escaping required in URLs or filenames +- **Portability**: Works on all filesystems (FAT32, NTFS, ext4, APFS) +- **Predictability**: No ambiguity about character handling + +**Cons**: +- **Unicode Loss**: Non-ASCII characters stripped (acceptable trade-off) + +### Random Suffix for Uniqueness (Score: 9/10) + +**Pros**: +- **Simplicity**: No complex conflict resolution +- **Security**: Cryptographically secure random (secrets module) +- **Scalability**: 1.6M possible suffixes per base slug + +**Cons**: +- **Ugliness**: Suffix looks less clean (but rare occurrence) +- **Unpredictability**: User can't control suffix + +**Alternatives Considered**: +- Incrementing numbers (`hello-world-2`, `hello-world-3`): More predictable but reveals note count +- Longer random suffix: More secure but uglier URLs +- User-specified slug: More complex, deferred to V2 + +**Decision**: 4-character random suffix is good balance + +## Consequences + +### Positive + +1. **Automatic**: No user input required for slug +2. **Readable**: Slugs are human-readable and meaningful +3. **Safe**: Works on all platforms and browsers +4. **Unique**: Collision resolution ensures uniqueness +5. **SEO-friendly**: Descriptive URLs help search ranking +6. **Predictable**: User can anticipate what slug will be +7. **Simple**: Single, consistent algorithm + +### Negative + +1. **Not editable**: User can't customize slug in V1 +2. **English-biased**: Assumes space-separated words +3. **Unicode stripped**: Non-ASCII content loses characters +4. **Content-dependent**: Similar content = similar slugs +5. **Timestamp fallback**: Short notes get ugly timestamp slugs + +### Mitigations + +**Non-editable slugs**: +- V1 trade-off for simplicity +- V2 can add custom slug support +- Users can still reference notes by slug once created + +**English-bias**: +- Acceptable for V1 (English-first IndieWeb) +- V2 can add Unicode slug support (requires more complex normalization) + +**Unicode stripping**: +- Markdown content can still contain Unicode (only slug is ASCII) +- Timestamp fallback ensures note is still creatable +- V2 can use Unicode normalization (transliteration) + +**Timestamp fallback**: +- Rare occurrence (most notes have >5 words) +- Still functional and unique +- V2 can improve (use first word if exists + timestamp) + +## Standards Compliance + +### URL Standards (RFC 3986) + +Slugs comply with URL path segment requirements: +- No percent-encoding required +- No reserved characters (`/`, `?`, `#`, etc.) +- Case-insensitive safe (always lowercase) + +### Filesystem Standards + +Slugs work on all major filesystems: +- **FAT32**: Yes (no special chars, length OK) +- **NTFS**: Yes +- **ext4**: Yes +- **APFS**: Yes +- **HFS+**: Yes + +**Reserved names**: None of our slugs conflict with OS reserved names (CON, PRN, etc.) + +### IndieWeb Recommendations + +Aligns with IndieWeb permalink best practices: +- Descriptive URLs +- No query parameters +- Short and memorable +- Permanent (don't change after creation) + +## Implementation Requirements + +### Validation Rules + +```python +# Valid slug pattern +SLUG_PATTERN = r'^[a-z0-9]+(?:-[a-z0-9]+)*$' + +# Constraints +MIN_SLUG_LENGTH = 1 +MAX_SLUG_LENGTH = 100 +``` + +### Reserved Slugs + +Certain slugs should be reserved for system routes: + +**Reserved List** (reject these slugs): +- `admin` +- `api` +- `static` +- `auth` +- `feed` +- `login` +- `logout` + +Implementation: +```python +RESERVED_SLUGS = {'admin', 'api', 'static', 'auth', 'feed', 'login', 'logout'} + +def is_slug_reserved(slug: str) -> bool: + return slug in RESERVED_SLUGS +``` + +### Error Cases + +```python +# Empty content +generate_slug("") # Raises ValueError + +# Whitespace only +generate_slug(" ") # Raises ValueError + +# Valid but short +generate_slug("Hi") # Returns timestamp: "20241118-143022" + +# Special characters only +generate_slug("!@#$%") # Returns timestamp: "20241118-143022" +``` + +## Alternatives Considered + +### UUID-based Slugs (Rejected) + +```python +slug = str(uuid.uuid4()) # "550e8400-e29b-41d4-a716-446655440000" +``` + +**Pros**: Guaranteed unique, no collision checking +**Cons**: Not human-readable, poor SEO, not memorable + +**Verdict**: Violates principle of readable URLs + +### Hash-based Slugs (Rejected) + +```python +slug = hashlib.sha256(content.encode()).hexdigest()[:12] # "a591a6d40bf4" +``` + +**Pros**: Deterministic, unique +**Cons**: Not human-readable, changes if content edited + +**Verdict**: Not meaningful to users + +### Title Extraction (Rejected for V1) + +```python +# Extract from # heading or first line +title = extract_title_from_markdown(content) +slug = normalize(title) +``` + +**Pros**: More semantic, uses actual title +**Cons**: Requires markdown parsing, more complex, title might not exist + +**Verdict**: Deferred to V2 (V1 uses first N words which is simpler) + +### User-Specified Slugs (Rejected for V1) + +```python +def create_note(content, custom_slug=None): + if custom_slug: + slug = validate_and_use(custom_slug) + else: + slug = generate_slug(content) +``` + +**Pros**: Maximum user control, no surprises +**Cons**: Requires UI input, validation complexity, user burden + +**Verdict**: Deferred to V2 (V1 auto-generates for simplicity) + +### Incrementing Numbers (Rejected) + +```python +# If collision, increment +slug = "hello-world" +slug = "hello-world-2" # Collision +slug = "hello-world-3" # Collision +``` + +**Pros**: Predictable, simple +**Cons**: Reveals note count, enumeration attack vector, less random + +**Verdict**: Random suffix is more secure and scales better + +## Performance Considerations + +### Generation Speed + +- Extract words: O(n) where n = content length (negligible, content is small) +- Normalize: O(m) where m = extracted text length (< 100 chars) +- Uniqueness check: O(1) database lookup with index +- Random suffix: O(1) generation + +**Target**: < 1ms per slug generation (easily achieved) + +### Database Impact + +- Index on `slug` column: O(log n) lookup +- Collision rate: < 1% (most notes have unique first 5 words) +- Random suffix retries: Nearly never (1.6M combinations) + +## Testing Requirements + +### Test Cases + +**Normal Cases**: +- Standard English content → descriptive slug +- Content with punctuation → punctuation removed +- Content with numbers → numbers preserved +- Content with hyphens → hyphens preserved + +**Edge Cases**: +- Very short content → timestamp fallback +- Empty content → ValueError +- Special characters only → timestamp fallback +- Very long words → truncated to max length +- Unicode content → stripped to ASCII + +**Collision Cases**: +- Duplicate slug → random suffix added +- Multiple collisions → different random suffixes +- Reserved slug → rejected + +**Security Cases**: +- Path traversal attempt (`../../../etc/passwd`) +- Special characters (` My Note Title") + assert "<" not in slug + assert ">" not in slug + assert "script" in slug # The word itself is fine + # Special chars removed, becomes one word, then first 5 words total + assert slug == "scriptalertxssscript-my-note-title" + + def test_random_suffix_uses_secrets_module(self): + """Test random suffix is cryptographically secure (not predictable).""" + # Generate many suffixes and ensure high entropy + suffixes = [generate_random_suffix() for _ in range(1000)] + unique_count = len(set(suffixes)) + # Should have very high uniqueness (>99%) + assert unique_count > 990 + + +class TestContentHashing: + """Test content hashing functions""" + + def test_calculate_content_hash_consistency(self): + """Test hash is consistent for same content.""" + hash1 = calculate_content_hash("Test content") + hash2 = calculate_content_hash("Test content") + assert hash1 == hash2 + + def test_calculate_content_hash_different(self): + """Test different content produces different hash.""" + hash1 = calculate_content_hash("Test content 1") + hash2 = calculate_content_hash("Test content 2") + assert hash1 != hash2 + + def test_calculate_content_hash_empty(self): + """Test hash of empty string.""" + hash_empty = calculate_content_hash("") + assert len(hash_empty) == 64 # SHA-256 produces 64 hex chars + assert hash_empty.isalnum() + + def test_calculate_content_hash_unicode(self): + """Test hash handles unicode correctly.""" + hash_val = calculate_content_hash("Hello 世界") + assert len(hash_val) == 64 + assert hash_val.isalnum() + + def test_calculate_content_hash_known_value(self): + """Test hash matches known SHA-256 value.""" + # Known SHA-256 hash for "Hello World" + expected = "a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e" + actual = calculate_content_hash("Hello World") + assert actual == expected + + def test_calculate_content_hash_multiline(self): + """Test hash of multiline content.""" + content = "Line 1\nLine 2\nLine 3" + hash_val = calculate_content_hash(content) + assert len(hash_val) == 64 + + def test_calculate_content_hash_special_characters(self): + """Test hash handles special characters.""" + content = "Special chars: !@#$%^&*()_+-=[]{}|;:',.<>?/~`" + hash_val = calculate_content_hash(content) + assert len(hash_val) == 64 + + +class TestFilePathOperations: + """Test file path generation and validation""" + + def test_generate_note_path_basic(self): + """Test basic note path generation.""" + dt = datetime(2024, 11, 18, 14, 30) + path = generate_note_path("test-note", dt, Path("data")) + assert path == Path("data/notes/2024/11/test-note.md") + + def test_generate_note_path_different_months(self): + """Test path generation for different months.""" + dt_jan = datetime(2024, 1, 5, 10, 0) + dt_dec = datetime(2024, 12, 25, 15, 30) + + path_jan = generate_note_path("jan-note", dt_jan, Path("data")) + path_dec = generate_note_path("dec-note", dt_dec, Path("data")) + + assert path_jan == Path("data/notes/2024/01/jan-note.md") + assert path_dec == Path("data/notes/2024/12/dec-note.md") + + def test_generate_note_path_different_years(self): + """Test path generation for different years.""" + dt_2024 = datetime(2024, 6, 15) + dt_2025 = datetime(2025, 6, 15) + + path_2024 = generate_note_path("note-2024", dt_2024, Path("data")) + path_2025 = generate_note_path("note-2025", dt_2025, Path("data")) + + assert path_2024 == Path("data/notes/2024/06/note-2024.md") + assert path_2025 == Path("data/notes/2025/06/note-2025.md") + + def test_generate_note_path_invalid_slug(self): + """Test note path generation rejects invalid slug.""" + dt = datetime(2024, 11, 18) + with pytest.raises(ValueError, match="Invalid slug"): + generate_note_path("Invalid Slug!", dt, Path("data")) + + def test_generate_note_path_with_numbers(self): + """Test path generation with slug containing numbers.""" + dt = datetime(2024, 11, 18) + path = generate_note_path("note-123-test", dt, Path("data")) + assert path == Path("data/notes/2024/11/note-123-test.md") + + def test_ensure_note_directory_creates_dirs(self, tmp_path): + """Test ensure_note_directory creates directories.""" + note_path = tmp_path / "notes" / "2024" / "11" / "test.md" + assert not note_path.parent.exists() + + result = ensure_note_directory(note_path) + + assert note_path.parent.exists() + assert result == note_path.parent + + def test_ensure_note_directory_existing_dirs(self, tmp_path): + """Test ensure_note_directory with existing directories.""" + note_path = tmp_path / "notes" / "2024" / "11" / "test.md" + note_path.parent.mkdir(parents=True) + + # Should not raise error + result = ensure_note_directory(note_path) + assert result == note_path.parent + + def test_ensure_note_directory_deep_structure(self, tmp_path): + """Test ensure_note_directory with deep directory structure.""" + note_path = tmp_path / "a" / "b" / "c" / "d" / "e" / "test.md" + result = ensure_note_directory(note_path) + + assert note_path.parent.exists() + assert result == note_path.parent + + def test_validate_note_path_safe(self, tmp_path): + """Test path validation accepts safe paths.""" + note_path = tmp_path / "notes" / "2024" / "11" / "note.md" + assert validate_note_path(note_path, tmp_path) is True + + def test_validate_note_path_traversal_dotdot(self, tmp_path): + """Test path validation rejects .. traversal.""" + note_path = tmp_path / "notes" / ".." / ".." / "etc" / "passwd" + assert validate_note_path(note_path, tmp_path) is False + + def test_validate_note_path_absolute_outside(self, tmp_path): + """Test path validation rejects absolute paths outside data dir.""" + assert validate_note_path(Path("/etc/passwd"), tmp_path) is False + + def test_validate_note_path_within_subdirectory(self, tmp_path): + """Test path validation accepts paths in subdirectories.""" + note_path = tmp_path / "notes" / "2024" / "11" / "subfolder" / "note.md" + assert validate_note_path(note_path, tmp_path) is True + + def test_validate_note_path_symlink_outside(self, tmp_path): + """Test path validation handles symlinks pointing outside.""" + # Create a symlink pointing outside data_dir + outside_dir = tmp_path.parent / "outside" + outside_dir.mkdir(exist_ok=True) + + link_path = tmp_path / "link" + link_path.symlink_to(outside_dir) + + target_path = link_path / "file.md" + assert validate_note_path(target_path, tmp_path) is False + + def test_validate_note_path_same_directory(self, tmp_path): + """Test path validation for file in data_dir root.""" + note_path = tmp_path / "note.md" + assert validate_note_path(note_path, tmp_path) is True + + +class TestAtomicFileOperations: + """Test atomic file write/read/delete operations""" + + def test_write_and_read_note_file(self, tmp_path): + """Test writing and reading note file.""" + file_path = tmp_path / "test.md" + content = "# Test Note\n\nThis is a test." + + write_note_file(file_path, content) + assert file_path.exists() + + read_content = read_note_file(file_path) + assert read_content == content + + def test_write_note_file_atomic(self, tmp_path): + """Test write is atomic (temp file cleaned up).""" + file_path = tmp_path / "test.md" + temp_path = file_path.with_suffix(".md.tmp") + + write_note_file(file_path, "Test") + + # Temp file should not exist after write + assert not temp_path.exists() + assert file_path.exists() + + def test_write_note_file_overwrites(self, tmp_path): + """Test writing overwrites existing file.""" + file_path = tmp_path / "test.md" + + write_note_file(file_path, "Original content") + write_note_file(file_path, "New content") + + content = read_note_file(file_path) + assert content == "New content" + + def test_write_note_file_unicode(self, tmp_path): + """Test writing unicode content.""" + file_path = tmp_path / "test.md" + content = "Unicode: 你好世界 🌍" + + write_note_file(file_path, content) + read_content = read_note_file(file_path) + + assert read_content == content + + def test_write_note_file_empty(self, tmp_path): + """Test writing empty file.""" + file_path = tmp_path / "test.md" + write_note_file(file_path, "") + + content = read_note_file(file_path) + assert content == "" + + def test_write_note_file_multiline(self, tmp_path): + """Test writing multiline content.""" + file_path = tmp_path / "test.md" + content = "Line 1\nLine 2\nLine 3\n" + + write_note_file(file_path, content) + read_content = read_note_file(file_path) + + assert read_content == content + + def test_read_note_file_not_found(self, tmp_path): + """Test reading non-existent file raises error.""" + file_path = tmp_path / "nonexistent.md" + with pytest.raises(FileNotFoundError): + read_note_file(file_path) + + def test_delete_note_file_hard(self, tmp_path): + """Test hard delete removes file.""" + file_path = tmp_path / "test.md" + file_path.write_text("Test") + + delete_note_file(file_path, soft=False) + assert not file_path.exists() + + def test_delete_note_file_soft(self, tmp_path): + """Test soft delete moves file to trash.""" + # Create note file + notes_dir = tmp_path / "notes" / "2024" / "11" + notes_dir.mkdir(parents=True) + file_path = notes_dir / "test.md" + file_path.write_text("Test") + + # Soft delete + delete_note_file(file_path, soft=True, data_dir=tmp_path) + + # Original should be gone + assert not file_path.exists() + + # Should be in trash + trash_path = tmp_path / TRASH_DIR_NAME / "2024" / "11" / "test.md" + assert trash_path.exists() + assert trash_path.read_text() == "Test" + + def test_delete_note_file_soft_without_data_dir(self, tmp_path): + """Test soft delete requires data_dir.""" + file_path = tmp_path / "test.md" + file_path.write_text("Test") + + with pytest.raises(ValueError, match="data_dir is required"): + delete_note_file(file_path, soft=True, data_dir=None) + + def test_delete_note_file_soft_different_months(self, tmp_path): + """Test soft delete preserves year/month structure.""" + # Create note in January + jan_dir = tmp_path / "notes" / "2024" / "01" + jan_dir.mkdir(parents=True) + jan_file = jan_dir / "jan-note.md" + jan_file.write_text("January note") + + # Create note in December + dec_dir = tmp_path / "notes" / "2024" / "12" + dec_dir.mkdir(parents=True) + dec_file = dec_dir / "dec-note.md" + dec_file.write_text("December note") + + # Soft delete both + delete_note_file(jan_file, soft=True, data_dir=tmp_path) + delete_note_file(dec_file, soft=True, data_dir=tmp_path) + + # Check trash structure + jan_trash = tmp_path / TRASH_DIR_NAME / "2024" / "01" / "jan-note.md" + dec_trash = tmp_path / TRASH_DIR_NAME / "2024" / "12" / "dec-note.md" + + assert jan_trash.exists() + assert dec_trash.exists() + + def test_delete_note_file_hard_not_found(self, tmp_path): + """Test hard delete of non-existent file raises error.""" + file_path = tmp_path / "nonexistent.md" + with pytest.raises(FileNotFoundError): + delete_note_file(file_path, soft=False) + + +class TestDateTimeFormatting: + """Test date/time formatting functions""" + + def test_format_rfc822_basic(self): + """Test RFC-822 date formatting.""" + dt = datetime(2024, 11, 18, 14, 30, 45) + formatted = format_rfc822(dt) + assert formatted == "Mon, 18 Nov 2024 14:30:45 +0000" + + def test_format_rfc822_different_dates(self): + """Test RFC-822 formatting for different dates.""" + dt1 = datetime(2024, 1, 1, 0, 0, 0) + dt2 = datetime(2024, 12, 31, 23, 59, 59) + + assert format_rfc822(dt1) == "Mon, 01 Jan 2024 00:00:00 +0000" + assert format_rfc822(dt2) == "Tue, 31 Dec 2024 23:59:59 +0000" + + def test_format_rfc822_weekdays(self): + """Test RFC-822 format includes correct weekday.""" + # Known dates and weekdays + monday = datetime(2024, 11, 18, 12, 0, 0) + friday = datetime(2024, 11, 22, 12, 0, 0) + sunday = datetime(2024, 11, 24, 12, 0, 0) + + assert format_rfc822(monday).startswith("Mon,") + assert format_rfc822(friday).startswith("Fri,") + assert format_rfc822(sunday).startswith("Sun,") + + def test_format_iso8601_basic(self): + """Test ISO 8601 date formatting.""" + dt = datetime(2024, 11, 18, 14, 30, 45) + formatted = format_iso8601(dt) + assert formatted == "2024-11-18T14:30:45Z" + + def test_format_iso8601_different_dates(self): + """Test ISO 8601 formatting for different dates.""" + dt1 = datetime(2024, 1, 1, 0, 0, 0) + dt2 = datetime(2024, 12, 31, 23, 59, 59) + + assert format_iso8601(dt1) == "2024-01-01T00:00:00Z" + assert format_iso8601(dt2) == "2024-12-31T23:59:59Z" + + def test_format_iso8601_single_digits(self): + """Test ISO 8601 format pads single digits.""" + dt = datetime(2024, 1, 5, 9, 8, 7) + formatted = format_iso8601(dt) + assert formatted == "2024-01-05T09:08:07Z" + + def test_parse_iso8601_basic(self): + """Test ISO 8601 date parsing.""" + dt = parse_iso8601("2024-11-18T14:30:45Z") + assert dt.year == 2024 + assert dt.month == 11 + assert dt.day == 18 + assert dt.hour == 14 + assert dt.minute == 30 + assert dt.second == 45 + + def test_parse_iso8601_without_z(self): + """Test ISO 8601 parsing without Z suffix.""" + dt = parse_iso8601("2024-11-18T14:30:45") + assert dt.year == 2024 + assert dt.month == 11 + assert dt.day == 18 + + def test_parse_iso8601_roundtrip(self): + """Test ISO 8601 format and parse roundtrip.""" + original = datetime(2024, 11, 18, 14, 30, 45) + formatted = format_iso8601(original) + parsed = parse_iso8601(formatted) + + assert parsed == original + + def test_parse_iso8601_invalid_format(self): + """Test ISO 8601 parsing rejects invalid format.""" + with pytest.raises(ValueError): + parse_iso8601("not-a-date") + + def test_parse_iso8601_invalid_date(self): + """Test ISO 8601 parsing rejects invalid date values.""" + with pytest.raises(ValueError): + parse_iso8601("2024-13-01T00:00:00Z") # Invalid month + + def test_format_and_parse_consistency(self): + """Test RFC-822 and ISO 8601 are both consistent.""" + dt = datetime(2024, 11, 18, 14, 30, 45) + + # ISO 8601 roundtrip + iso_formatted = format_iso8601(dt) + iso_parsed = parse_iso8601(iso_formatted) + assert iso_parsed == dt + + # RFC-822 format is consistent + rfc_formatted = format_rfc822(dt) + assert "2024" in rfc_formatted + assert "14:30:45" in rfc_formatted