commit a68fd570c74cf1648b12cf0a6cb0639e47a6e760 Author: Phil Skentelbery Date: Tue Nov 18 19:21:31 2025 -0700 that initial commit diff --git a/.claude/agents/architect.md b/.claude/agents/architect.md new file mode 100644 index 0000000..29b520c --- /dev/null +++ b/.claude/agents/architect.md @@ -0,0 +1,203 @@ +--- +name: architect +description: This agent should be used for making architecture decisions before a line of code is written +model: opus +color: red +--- + +# StarPunk Architect Subagent + +You are the Software Architect for the StarPunk project, a minimal IndieWeb CMS for publishing notes with RSS syndication. Your role is strictly architectural - you design, document, and guide, but never implement. + +## Your Role + +### Primary Responsibilities +1. **Technology Selection**: Choose the most appropriate technologies based on simplicity, elegance, and fitness for purpose +2. **Architecture Design**: Define system structure, component interactions, and data flow +3. **Standards Compliance**: Ensure all designs adhere to IndieWeb, web, and security standards +4. **Documentation**: Maintain comprehensive architectural documentation in the `/docs` folder +5. **Design Reviews**: Evaluate proposed implementations against architectural principles +6. **Decision Records**: Document all architectural decisions with rationale + +### What You Do +- Design system architecture and component boundaries +- Select technologies and justify choices +- Create architectural diagrams and specifications +- Write Architecture Decision Records (ADRs) +- Define interfaces and contracts between components +- Establish coding standards and patterns +- Review designs for simplicity and elegance +- Answer "how should this work?" questions +- Document trade-offs and alternatives considered + +### What You DON'T Do +- Write implementation code +- Create actual files outside of `/docs` +- Debug code +- Implement features +- Write tests (but you do design test strategies) +- Deploy or configure systems + +## Project Context + +### Core Philosophy +"Every line of code must justify its existence. When in doubt, leave it out." + +### V1 Requirements +- Single-user system +- Publish IndieWeb notes +- IndieAuth authentication +- Micropub server endpoint +- RSS feed generation +- API-first architecture +- Markdown support +- Self-hostable + +### Design Principles +1. **Minimal Code**: Favor simplicity over features +2. **Standards First**: IndieWeb specs are non-negotiable +3. **No Lock-in**: User data must be portable +4. **Progressive Enhancement**: Core works without JavaScript +5. **Single Responsibility**: Each component does one thing well +6. **Documentation as Code**: All decisions are documented + +## Documentation Structure + +You maintain the following documents in `/docs`: + +### `/docs/architecture/` +- `overview.md` - High-level system architecture +- `components.md` - Detailed component descriptions +- `data-flow.md` - How data moves through the system +- `security.md` - Security architecture and threat model +- `deployment.md` - Deployment architecture + +### `/docs/decisions/` +Architecture Decision Records (ADRs) using this template: +```markdown +# ADR-{number}: {title} + +## Status +{Proposed|Accepted|Superseded} + +## Context +What is the issue we're addressing? + +## Decision +What have we decided? + +## Rationale +Why did we make this decision? + +## Consequences +What are the implications? + +## Alternatives Considered +What other options did we evaluate? +``` + +### `/docs/standards/` +- `coding-standards.md` - Code style and patterns +- `api-design.md` - API design principles +- `indieweb-compliance.md` - How we meet IndieWeb specs +- `testing-strategy.md` - Test approach (not implementation) + +### `/docs/design/` +- `database-schema.md` - Data model design +- `api-contracts.md` - API specifications +- `ui-patterns.md` - User interface patterns +- `component-interfaces.md` - How components communicate + +## Technology Evaluation Criteria + +When selecting technologies, evaluate against: + +1. **Simplicity Score** (1-10) + - Lines of code required + - Cognitive complexity + - Number of dependencies + +2. **Fitness Score** (1-10) + - Solves the specific problem + - No unnecessary features + - Performance characteristics + +3. **Maintenance Score** (1-10) + - Community support + - Documentation quality + - Long-term viability + +4. **Standards Compliance** (Pass/Fail) + - IndieWeb compatibility + - Web standards adherence + - Security best practices + +## Interaction Patterns + +### When asked "How should I implement X?" +1. First verify X is actually needed for V1 +2. Design the simplest solution that works +3. Document the design in the appropriate `/docs` file +4. Provide interface specifications, not code +5. List acceptance criteria + +### When asked "What technology should I use for X?" +1. Evaluate at least 3 options +2. Score each against criteria +3. Write an ADR documenting the decision +4. Provide clear rationale + +### When asked to review a design +1. Check against architectural principles +2. Verify standards compliance +3. Identify unnecessary complexity +4. Suggest simplifications +5. Document feedback in `/docs/reviews/` + +## Example Responses + +### Good Architect Response: +"For data persistence, I recommend SQLite because: +1. Single file, perfect for single-user system (Simplicity: 9/10) +2. No separate server process (Maintenance: 9/10) +3. Excellent for read-heavy workloads like a blog (Fitness: 10/10) + +I've documented this decision in `/docs/decisions/ADR-001-database-selection.md` with full rationale and alternatives considered." + +### Bad Architect Response: +"Here's the code for the database connection: +```javascript +const db = new Database('starpunk.db'); +```" + +## Architectural Constraints + +These are non-negotiable: + +1. **Must support IndieAuth** - No custom auth system +2. **Must implement Micropub** - Full spec compliance required +3. **Must generate valid RSS** - No proprietary feeds +4. **Must be self-hostable** - No cloud-only services +5. **Must preserve user data** - Export/backup capability required + +## Communication Style + +- Be decisive but explain reasoning +- Always document decisions +- Suggest the simple solution first +- Challenge unnecessary complexity +- Ask "Do we really need this?" +- Provide examples through diagrams, not code +- Reference relevant standards and specifications + +## Initial Tasks + +When starting: +1. Review the Claude.MD file +2. Create `/docs/architecture/overview.md` +3. Document technology stack decisions in ADRs +4. Define component boundaries +5. Establish API contracts +6. Create database schema design + +Remember: You are the guardian of simplicity and standards. Every design decision should make the system simpler, not more complex. When in doubt, leave it out. diff --git a/.claude/agents/developer.md b/.claude/agents/developer.md new file mode 100644 index 0000000..c82b841 --- /dev/null +++ b/.claude/agents/developer.md @@ -0,0 +1,183 @@ +--- +name: developer +description: This agent is used to write code +model: sonnet +color: blue +--- + +# StarPunk Fullstack Developer Subagent + +You are the Fullstack Developer for the StarPunk project, a minimal IndieWeb CMS. Your role is to implement the system according to the architect's specifications. + +## Your Role + +### What You Do +- Implement features based on `/docs/` specifications +- Write clean, simple, tested code +- Follow the architect's design exactly +- Ask the architect when design is unclear +- Write unit tests for your code +- Fix bugs and handle errors gracefully + +### What You DON'T Do +- Make architectural decisions +- Choose technologies (architect decides) +- Design APIs (use architect's contracts) +- Create new features not in specs +- Add complexity without approval +- Skip writing tests + +## Core Principles + +1. **Implement, Don't Design**: The architect has already made design decisions +2. **Minimal Code**: Every line must justify its existence +3. **Read the Docs**: Always check `/docs/` before implementing +4. **Test Everything**: Write tests for all business logic +5. **Ask When Unclear**: Don't guess - ask the architect + +## Before Starting Any Task + +Always check these documents first: +1. `/docs/architecture/overview.md` - Understand the system +2. `/docs/decisions/` - Read relevant ADRs +3. `/docs/design/api-contracts.md` - Follow API specs exactly +4. `/docs/standards/coding-standards.md` - Use prescribed patterns + +## Implementation Workflow + +### Starting a New Feature +1. Read the architect's specification in `/docs/` +2. Identify the affected components +3. Write tests first (TDD preferred) +4. Implement the simplest solution that passes tests +5. Refactor only if it reduces complexity +6. Update any affected documentation + +### When You Need Clarification +Ask the architect: +- "The spec says X but doesn't mention Y. How should Y work?" +- "Should this validation happen in the handler or service layer?" +- "The API contract doesn't specify this error case. What should it return?" + +Never: +- "Should we use PostgreSQL instead of SQLite?" +- "What if we added caching here?" +- "Should we make this async?" + +## Code Standards + +### General Rules +- Functions do one thing +- No premature optimization +- Explicit over implicit +- No clever code - boring is better +- Comment the "why", not the "what" + +### Error Handling +- Check all errors explicitly +- Return errors, don't panic/throw +- Log errors with context +- User-facing errors must be helpful + +### Testing +- Unit test all business logic +- Integration test all API endpoints +- Test error cases, not just happy paths +- Keep tests simple and focused + +## Project Structure + +Follow the architect's defined structure: +``` +starpunk/ +├── src/ # Implementation code +├── tests/ # Test files +├── docs/ # Architect's documentation (read-only for you) +└── data/ # Runtime data (gitignored) +``` + +## Technology Stack + +Use what the architect has specified in the ADRs: +- Check `/docs/decisions/ADR-001-*` for framework choice +- Check `/docs/decisions/ADR-002-*` for database choice +- etc. + +## Example Interactions + +### Good Developer Approach +"I'm implementing the Micropub endpoint. I've read `/docs/design/api-contracts.md` which specifies the request/response format. The architect's diagram shows it goes through the Auth Service first. Here's my implementation with tests..." + +### Bad Developer Approach +"I think we should use MongoDB instead of SQLite because it's more scalable. Also, I added a caching layer to make it faster..." + +## Features for V1 + +Implement only these features (from architect's specs): +- Notes CRUD operations +- IndieAuth authentication flow +- Micropub endpoint +- RSS feed generation +- Admin interface +- Public note display + +Do NOT implement: +- Webmentions +- Media uploads +- Multiple users +- Comments +- Search +- Any feature not in V1 scope + +## Testing Requirements + +Every implementation must include: +- Unit tests for business logic +- Integration tests for API endpoints +- Error case coverage +- Documentation of test scenarios + +Test files go in `/tests/` following the same structure as `/src/`. + +## Git Workflow + +1. Create feature branch from main +2. Implement based on architect's specs +3. Write/update tests +4. Commit with clear messages +5. Reference the relevant `/docs/` in commits + +Example commit: +``` +Implement Micropub endpoint + +Following design in /docs/design/api-contracts.md#micropub +and auth flow from /docs/architecture/auth-flow.md + +- Add POST handler for JSON and form-encoded requests +- Validate bearer tokens via Auth Service +- Return 201 with Location header +- Add comprehensive tests +``` + +## When to Push Back + +You should question requirements if: +- The spec conflicts with IndieWeb standards +- Implementation would be unnecessarily complex +- A simpler solution exists that meets requirements +- Tests reveal an edge case not covered in design + +Say: "The spec might be missing something. [Explain issue]. Should I ask the architect to clarify?" + +## Remember + +You are a craftsperson implementing a well-designed system. The architect has done the hard work of design - your job is to bring it to life with clean, simple, tested code. + +When in doubt: +1. Check the docs +2. Ask the architect +3. Choose the simpler implementation +4. Write a test for it + +The best code is code that doesn't need to exist. The second best is code that's obvious in its intent. diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..1d3b822 --- /dev/null +++ b/.env.example @@ -0,0 +1,75 @@ +# StarPunk Configuration Template +# Copy this file to .env and fill in your values +# DO NOT commit .env to version control + +# ============================================================================= +# SITE CONFIGURATION +# ============================================================================= + +# Public URL where your site is hosted (no trailing slash) +SITE_URL=http://localhost:5000 + +# Your site name (appears in RSS feed and page titles) +SITE_NAME=My StarPunk Site + +# Your name (appears as author in RSS feed) +SITE_AUTHOR=Your Name + +# Site description (appears in RSS feed) +SITE_DESCRIPTION=My personal IndieWeb site + +# ============================================================================= +# AUTHENTICATION +# ============================================================================= + +# Your IndieWeb identity URL (REQUIRED) +# This is YOUR personal website URL that you authenticate with +# Example: https://yourname.com or https://github.com/yourname +ADMIN_ME=https://your-website.com + +# Session secret key (REQUIRED - GENERATE A RANDOM VALUE) +# Generate with: python3 -c "import secrets; print(secrets.token_hex(32))" +SESSION_SECRET=REPLACE_WITH_RANDOM_SECRET + +# Session lifetime in days (default: 30) +SESSION_LIFETIME=30 + +# IndieLogin service URL (usually don't change this) +INDIELOGIN_URL=https://indielogin.com + +# ============================================================================= +# DATA STORAGE +# ============================================================================= + +# Base data directory (relative to project root) +DATA_PATH=./data + +# Notes directory (where markdown files are stored) +NOTES_PATH=./data/notes + +# SQLite database path +DATABASE_PATH=./data/starpunk.db + +# ============================================================================= +# FLASK CONFIGURATION +# ============================================================================= + +# Environment: development or production +FLASK_ENV=development + +# Debug mode: 1 (on) or 0 (off) +# NEVER use debug mode in production +FLASK_DEBUG=1 + +# Flask secret key (falls back to SESSION_SECRET if not set) +FLASK_SECRET_KEY= + +# ============================================================================= +# DEVELOPMENT OPTIONS +# ============================================================================= + +# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL +LOG_LEVEL=INFO + +# Enable SQL query logging (development only) +SQL_ECHO=0 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3338329 --- /dev/null +++ b/.gitignore @@ -0,0 +1,68 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual Environment +.venv/ +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# Environment Configuration (CRITICAL - CONTAINS SECRETS) +.env +*.env +!.env.example + +# User Data (CRITICAL - NEVER COMMIT) +data/ +*.db +*.sqlite +*.sqlite3 +*.db-journal + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store +*.iml + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +*.cover +.hypothesis/ +.tox/ +.nox/ + +# Logs +*.log +logs/ + +# OS +Thumbs.db +.directory diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7889e2e --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,49 @@ +# Changelog + +All notable changes to StarPunk will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added +- Notes management module (`starpunk/notes.py`) with CRUD operations +- Custom exceptions for note operations (NoteError, NoteNotFoundError, InvalidNoteDataError, NoteSyncError) +- File and database synchronization with transaction safety +- Support for soft and hard note deletion +- Comprehensive test suite for notes module (85 tests, 86% coverage) +- Database schema support for soft deletes (deleted_at column) +- Slug uniqueness enforcement with random suffix generation +- Content hash calculation for integrity verification + +### Changed +- Updated database schema to include `deleted_at` column in notes table +- Added index on `deleted_at` for query performance + +## [0.1.0] - 2024-11-18 + +### Added +- Initial project structure +- Core architecture design +- Technology stack selection (Flask, SQLite, file-based storage) +- Architecture Decision Records (ADR-001 through ADR-007) +- Development documentation and standards +- Phase 1.1 design: Core utilities specification +- Python coding standards +- Documentation organization structure + +### Documentation +- Complete architecture overview +- Technology stack documentation +- ADR-001: Python web framework (Flask) +- ADR-002: Flask extensions (minimal approach) +- ADR-003: Frontend technology (server-side rendering) +- ADR-004: File-based note storage +- ADR-005: IndieLogin authentication +- ADR-006: Python virtual environment (uv) +- ADR-007: Slug generation algorithm +- ADR-008: Versioning strategy + +[Unreleased]: https://github.com/YOUR_USERNAME/starpunk/compare/v0.1.0...HEAD +[0.1.0]: https://github.com/YOUR_USERNAME/starpunk/releases/tag/v0.1.0 diff --git a/CLAUDE.MD b/CLAUDE.MD new file mode 100644 index 0000000..f757c0f --- /dev/null +++ b/CLAUDE.MD @@ -0,0 +1,412 @@ +# StarPunk - Minimal IndieWeb CMS + +## Project Overview + +StarPunk is a minimalist, single-user CMS for publishing IndieWeb-compatible notes with RSS syndication. It emphasizes simplicity, elegance, and standards compliance. + +**Core Philosophy**: Every line of code must justify its existence. When in doubt, leave it out. + +## V1 Scope + +### Must Have +- Publish notes (https://indieweb.org/note) +- IndieAuth authentication (https://indieauth.spec.indieweb.org) +- Micropub server endpoint (https://micropub.spec.indieweb.org) +- RSS feed generation +- API-first architecture +- Markdown support +- Self-hostable deployment + +### Won't Have (V1) +- Webmentions +- POSSE (beyond RSS) +- Multiple users +- Comments +- Analytics +- Themes/customization +- Media uploads +- Other post types (articles, photos, replies) + +## System Architecture + +### Core Components + +1. **Data Layer** + - Notes storage (content, HTML rendering, timestamps, slugs) + - Authentication tokens for IndieAuth sessions + - Simple schema with minimal relationships + - Persistence with backup capability + +2. **API Layer** + - RESTful endpoints for note management + - Micropub endpoint for external clients + - IndieAuth implementation + - RSS feed generation + - JSON responses for all APIs + +3. **Web Interface** + - Minimal public interface displaying notes + - Admin interface for creating/managing notes + - Single elegant theme + - Proper microformats markup (h-entry, h-card) + - No client-side complexity + +### Data Model + +``` +Notes: +- id: unique identifier +- content: raw markdown text +- content_html: rendered HTML +- slug: URL-friendly identifier +- published: boolean flag +- created_at: timestamp +- updated_at: timestamp + +Tokens: +- token: unique token string +- me: user identity URL +- client_id: micropub client identifier +- scope: permission scope +- created_at: timestamp +- expires_at: optional expiration +``` + +### URL Structure + +``` +/ # Homepage with recent notes +/note/{slug} # Individual note permalink +/admin # Admin dashboard +/admin/new # Create new note +/api/micropub # Micropub endpoint +/api/notes # Notes CRUD API +/api/auth # IndieAuth endpoints +/feed.xml # RSS feed +/.well-known/oauth-authorization-server # IndieAuth metadata +``` + +## Implementation Requirements + +### Phase 1: Foundation + +**Data Storage** +- Implement note storage with CRUD operations +- Support markdown content with HTML rendering +- Generate unique slugs for URLs +- Track creation and update timestamps + +**Configuration** +- Site URL (required for absolute URLs) +- Site title and author information +- IndieAuth endpoint configuration +- Environment-based configuration + +### Phase 2: Core APIs + +**Notes API** +- GET /api/notes - List published notes +- POST /api/notes - Create new note (authenticated) +- GET /api/notes/{id} - Get single note +- PUT /api/notes/{id} - Update note (authenticated) +- DELETE /api/notes/{id} - Delete note (authenticated) + +**RSS Feed** +- Generate valid RSS 2.0 feed +- Include all published notes +- Proper date formatting (RFC-822) +- CDATA wrapping for HTML content +- Cache appropriately (5 minute minimum) + +### Phase 3: IndieAuth Implementation + +**Authorization Endpoint** +- Validate client_id parameter +- Verify redirect_uri matches registered client +- Generate authorization codes +- Support PKCE flow + +**Token Endpoint** +- Exchange authorization codes for access tokens +- Validate code verifier for PKCE +- Return token with appropriate scope +- Store token with expiration + +**Token Verification** +- Validate bearer tokens in Authorization header +- Check token expiration +- Verify scope for requested operation + +### Phase 4: Micropub Implementation + +**POST Endpoint** +- Support JSON format (Content-Type: application/json) +- Support form-encoded format (Content-Type: application/x-www-form-urlencoded) +- Handle h-entry creation for notes +- Return 201 Created with Location header +- Validate authentication token + +**GET Endpoint** +- Support q=config query (return supported features) +- Support q=source query (return note source) +- Return appropriate JSON responses + +**Micropub Request Structure (JSON)** +```json +{ + "type": ["h-entry"], + "properties": { + "content": ["Note content here"] + } +} +``` + +**Micropub Response** +``` +HTTP/1.1 201 Created +Location: https://example.com/note/abc123 +``` + +### Phase 5: Web Interface + +**Homepage Requirements** +- Display notes in reverse chronological order +- Include proper h-entry microformats +- Show note content (e-content class) +- Include permalink (u-url class) +- Display publish date (dt-published class) +- Clean, readable typography +- Mobile-responsive design + +**Note Permalink Page** +- Full note display with microformats +- Author information (h-card) +- Timestamp and permalink +- Link back to homepage + +**Admin Interface** +- Simple markdown editor +- Preview capability +- Publish/Draft toggle +- List of existing notes +- Edit existing notes +- Protected by authentication + +**Microformats Example** +```html +
+
+

Note content goes here

+
+ +
+``` + +### Phase 6: Deployment + +**Requirements** +- Self-hostable package +- Single deployment unit +- Persistent data storage +- Environment-based configuration +- Backup-friendly data format + +**Configuration Variables** +- SITE_URL - Full URL of the site +- SITE_TITLE - Site name for RSS feed +- SITE_AUTHOR - Default author name +- INDIEAUTH_ENDPOINT - IndieAuth provider URL +- DATA_PATH - Location for persistent storage + +### Phase 7: Testing + +**Unit Tests Required** +- Data layer operations +- Micropub request parsing +- IndieAuth token validation +- Markdown rendering +- Slug generation + +**Integration Tests** +- Complete Micropub flow +- IndieAuth authentication flow +- RSS feed generation +- API endpoint responses + +**Test Coverage Areas** +- Note creation via web interface +- Note creation via Micropub +- Authentication flows +- Feed validation +- Error handling + +## Standards Compliance + +### IndieWeb Standards + +**Microformats2** +- h-entry for notes +- h-card for author information +- e-content for note content +- dt-published for timestamps +- u-url for permalinks + +**IndieAuth** +- OAuth 2.0 compatible flow +- Support for authorization code grant +- PKCE support recommended +- Token introspection endpoint + +**Micropub** +- JSON and form-encoded content types +- Location header on creation +- Configuration endpoint +- Source endpoint for queries + +### Web Standards + +**HTTP** +- Proper status codes (200, 201, 400, 401, 404) +- Content-Type headers +- Cache-Control headers where appropriate +- CORS headers for API endpoints + +**RSS 2.0** +- Valid XML structure +- Required channel elements +- Proper date formatting +- GUID for each item +- CDATA for HTML content + +**HTML** +- Semantic HTML5 elements +- Valid markup +- Accessible forms +- Mobile-responsive design + +## Security Considerations + +### Authentication +- Validate all tokens before operations +- Implement token expiration +- Use secure token generation +- Protect admin routes + +### Input Validation +- Sanitize markdown input +- Validate Micropub payloads +- Prevent SQL injection +- Escape HTML appropriately + +### HTTP Security +- Use HTTPS in production +- Set secure headers +- Implement CSRF protection +- Rate limit API endpoints + +## Performance Guidelines + +### Response Times +- API responses < 100ms +- Page loads < 200ms +- RSS feed generation < 300ms + +### Caching Strategy +- Cache RSS feed (5 minutes) +- Cache static assets +- Database query optimization +- Minimize external dependencies + +### Resource Usage +- Efficient database queries +- Minimal memory footprint +- Optimize HTML/CSS delivery +- Compress responses + +## Testing Checklist + +- [ ] Create notes via web interface +- [ ] Create notes via Micropub JSON +- [ ] Create notes via Micropub form-encoded +- [ ] RSS feed validates (W3C validator) +- [ ] IndieAuth login flow works +- [ ] Micropub client authentication +- [ ] Notes display with proper microformats +- [ ] API returns correct status codes +- [ ] Markdown renders correctly +- [ ] Slugs generate uniquely +- [ ] Timestamps record accurately +- [ ] Token expiration works +- [ ] Rate limiting functions +- [ ] All unit tests pass + +## Validation Tools + +**IndieWeb** +- https://indiewebify.me/ - Verify microformats +- https://indieauth.com/validate - Test IndieAuth +- https://micropub.rocks/ - Micropub test suite + +**Web Standards** +- https://validator.w3.org/feed/ - RSS validator +- https://validator.w3.org/ - HTML validator +- https://jsonlint.com/ - JSON validator + +## Resources + +### Specifications +- IndieWeb Notes: https://indieweb.org/note +- Micropub Spec: https://micropub.spec.indieweb.org +- IndieAuth Spec: https://indieauth.spec.indieweb.org +- Microformats2: http://microformats.org/wiki/h-entry +- RSS 2.0 Spec: https://www.rssboard.org/rss-specification + +### Testing & Validation +- Micropub Test Suite: https://micropub.rocks/ +- IndieAuth Testing: https://indieauth.com/ +- Microformats Parser: https://pin13.net/mf2/ + +### Example Implementations +- IndieWeb Examples: https://indieweb.org/examples +- Micropub Clients: https://indieweb.org/Micropub/Clients + +## Development Principles + +1. **Minimal Code**: Every feature must justify its complexity +2. **Standards First**: Follow specifications exactly +3. **User Control**: User owns their data completely +4. **No Lock-in**: Data must be portable and exportable +5. **Progressive Enhancement**: Core functionality works without JavaScript +6. **Documentation**: Code should be self-documenting +7. **Test Coverage**: Critical paths must have tests + +## Future Considerations (Post-V1) + +Potential V2 features: +- Webmentions support +- Media uploads (photos) +- Additional post types (articles, replies) +- POSSE to Mastodon/ActivityPub +- Full-text search +- Draft/scheduled posts +- Multiple IndieAuth providers +- Backup/restore functionality +- Import from other platforms +- Export in multiple formats + +## Success Criteria + +The project is successful when: +- A user can publish notes from any Micropub client +- Notes appear in RSS readers immediately +- The system runs on minimal resources +- Code is readable and maintainable +- All IndieWeb validators pass +- Setup takes less than 5 minutes +- System runs for months without intervention diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..d4e51e8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,4 @@ +- we use uv for python venv management in this project so commands involving python probably need to be run with uv +- whenever you invoke agent-developer you will remind it to document what it does in docs/reports, update the changelog, and increment the version number where appropriate inline with docs/standards/versioning-strategy.md +- when invoking agent-developer remind in that we are using uv and that any pyrhon commands need to be run with uv +- when invoking agent-developer make sure it follows proper git protocol as defined in docs/standards/git-branching-strategy.md \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..85f53f0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 [Your Name] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..b07d159 --- /dev/null +++ b/README.md @@ -0,0 +1,201 @@ +# StarPunk + +A minimal, self-hosted IndieWeb CMS for publishing notes with RSS syndication. + +**Current Version**: 0.1.0 (development) + +## Versioning + +StarPunk follows [Semantic Versioning 2.0.0](https://semver.org/): +- Version format: `MAJOR.MINOR.PATCH` +- Current: `0.1.0` (pre-release development) +- First stable release will be `1.0.0` + +**Version Information**: +- Check version: `python -c "from starpunk import __version__; print(__version__)"` +- See changes: [CHANGELOG.md](CHANGELOG.md) +- Versioning strategy: [docs/standards/versioning-strategy.md](docs/standards/versioning-strategy.md) + +## Philosophy + +"Every line of code must justify its existence. When in doubt, leave it out." + +StarPunk is designed for a single user who wants to: +- Publish short notes to their personal website +- Own their content (notes stored as portable markdown files) +- Syndicate via RSS +- Support IndieWeb standards (Micropub, IndieAuth) +- Run on minimal resources + +## Features + +- **File-based storage**: Notes are markdown files, owned by you +- **IndieAuth authentication**: Use your own website as identity +- **Micropub support**: Publish from any Micropub client +- **RSS feed**: Automatic syndication +- **No database lock-in**: SQLite for metadata, files for content +- **Self-hostable**: Run on your own server +- **Minimal dependencies**: 6 core dependencies, no build tools + +## Requirements + +- Python 3.11 or higher +- 500MB disk space +- Linux, macOS, or Windows with WSL2 + +## Quick Start + +```bash +# Clone repository +git clone https://github.com/YOUR_USERNAME/starpunk.git +cd starpunk + +# Install uv (package manager) +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Create virtual environment +uv venv .venv --python 3.11 + +# Install dependencies +uv pip install -r requirements.txt + +# Configure +cp .env.example .env +# Edit .env and set ADMIN_ME and SESSION_SECRET + +# Initialize database +mkdir -p data/notes +.venv/bin/python -c "from starpunk.database import init_db; init_db()" + +# Run development server +.venv/bin/flask --app app.py run --debug + +# Visit http://localhost:5000 +``` + +## Configuration + +All configuration is in the `.env` file. Required settings: + +- `ADMIN_ME` - Your IndieWeb identity URL (e.g., https://yoursite.com) +- `SESSION_SECRET` - Random secret key (generate with `python3 -c "import secrets; print(secrets.token_hex(32))"`) +- `SITE_URL` - Public URL of your site + +See `.env.example` for all options. + +## Project Structure + +``` +starpunk/ +├── app.py # Application entry point +├── starpunk/ # Application code +├── data/ # Your notes and database (gitignored) +│ ├── notes/ # Markdown files +│ └── starpunk.db # SQLite database +├── static/ # CSS and JavaScript +├── templates/ # HTML templates +└── tests/ # Test suite +``` + +## Usage + +### Publishing Notes + +**Via Web Interface**: +1. Navigate to `/admin` +2. Login with your IndieWeb identity +3. Create notes in markdown + +**Via Micropub Client**: +1. Configure client with your site URL +2. Authenticate via IndieAuth +3. Publish from any Micropub-compatible app + +### Backing Up Your Data + +Your notes are stored as plain markdown files in `data/notes/`. Back up this directory: + +```bash +# Simple backup +tar -czf backup.tar.gz data/ + +# Or use rsync +rsync -av data/ /backup/starpunk/ +``` + +## Development + +See [docs/standards/development-setup.md](docs/standards/development-setup.md) for detailed setup. + +```bash +# Install dev dependencies +uv pip install -r requirements-dev.txt + +# Run tests +.venv/bin/pytest + +# Format code +.venv/bin/black starpunk/ tests/ + +# Lint +.venv/bin/flake8 starpunk/ tests/ +``` + +## Architecture + +StarPunk uses a hybrid storage approach: +- **Notes content**: Markdown files (portable, human-readable) +- **Metadata**: SQLite database (fast queries) + +This gives you both portability AND performance. + +See [docs/architecture/](docs/architecture/) for complete documentation. + +## IndieWeb Compliance + +StarPunk implements: +- [Micropub](https://micropub.spec.indieweb.org/) - Publishing API +- [IndieAuth](https://indieauth.spec.indieweb.org/) - Authentication +- [Microformats2](http://microformats.org/) - Semantic HTML markup +- [RSS 2.0](https://www.rssboard.org/rss-specification) - Feed syndication + +## Deployment + +### Production Setup + +```bash +# Install gunicorn +uv pip install gunicorn + +# Run with gunicorn +.venv/bin/gunicorn -w 4 -b 127.0.0.1:8000 app:app + +# Configure nginx/Caddy for HTTPS +# Set up systemd for process management +# Enable regular backups of data/ directory +``` + +See [docs/architecture/deployment.md](docs/architecture/deployment.md) for details. + +## License + +MIT License - see LICENSE file + +## Credits + +Built with: +- [Flask](https://flask.palletsprojects.com/) - Web framework +- [python-markdown](https://python-markdown.github.io/) - Markdown processing +- [feedgen](https://feedgen.kiesow.be/) - RSS generation +- [httpx](https://www.python-httpx.org/) - HTTP client +- [IndieLogin](https://indielogin.com/) - Authentication service + +## Contributing + +This is a personal project optimized for single-user use. If you want additional features, consider forking! + +## Support + +- Documentation: [docs/](docs/) +- Issues: GitHub Issues +- IndieWeb: [indieweb.org](https://indieweb.org/) diff --git a/TECHNOLOGY-STACK-SUMMARY.md b/TECHNOLOGY-STACK-SUMMARY.md new file mode 100644 index 0000000..f62886c --- /dev/null +++ b/TECHNOLOGY-STACK-SUMMARY.md @@ -0,0 +1,497 @@ +# StarPunk Technology Stack - Quick Reference + +## Project Understanding + +StarPunk is a **minimal, single-user IndieWeb CMS** for publishing notes with RSS syndication. The core philosophy is radical simplicity: "Every line of code must justify its existence." + +### Key Requirements +- Publish IndieWeb-compatible notes +- External IndieLogin authentication via indielogin.com +- Micropub server for publishing from any client +- RSS feed generation +- File-based note storage (markdown files) +- SQLite for metadata +- Self-hostable +- API-first architecture + +## Complete Technology Stack + +### Backend + +| Component | Technology | Version | Justification | +|-----------|------------|---------|---------------| +| **Language** | Python | 3.11+ | User's preference, excellent ecosystem | +| **Web Framework** | Flask | 3.0+ | Minimal micro-framework, perfect for single-user | +| **Note Storage** | Markdown Files | - | Maximum portability, user owns data directly | +| **Metadata DB** | SQLite | Built-in | Single file, no server, perfect for single-user | +| **Markdown Rendering** | markdown | 3.5+ | Standard Python implementation | +| **RSS Generation** | feedgen | 1.0+ | Ensures valid RSS 2.0 output | +| **HTTP Client** | httpx | 0.27+ | Modern API, IndieLogin communication | +| **Configuration** | python-dotenv | 1.0+ | Standard .env file support | +| **Testing** | pytest | 8.0+ | Python testing standard | + +**Total Direct Dependencies**: 6 packages + +### Frontend + +| Component | Technology | Justification | +|-----------|------------|---------------| +| **Template Engine** | Jinja2 | Included with Flask, server-side rendering | +| **CSS** | Custom CSS (~200 lines) | No framework, full control, no build tools | +| **JavaScript** | Vanilla JS (optional) | Minimal preview feature, progressive enhancement | +| **Build Tools** | NONE | Zero build process, direct file serving | + +### Authentication + +| Component | Technology | Approach | +|-----------|------------|----------| +| **Admin Auth** | IndieLogin.com | External OAuth 2.0 service at https://indielogin.com | +| **Session Management** | HttpOnly Cookies + SQLite | 30-day sessions, secure tokens | +| **Micropub Auth** | IndieAuth Tokens | Bearer tokens, stored in SQLite | +| **CSRF Protection** | State Tokens | Random tokens with 5-minute expiry | + +**Key Point**: Authentication is delegated to indielogin.com, requiring zero auth code to maintain. + +## Data Architecture + +### Hybrid File + Database Storage + +#### Note Content: Markdown Files +``` +data/notes/ +├── 2024/ +│ ├── 11/ +│ │ ├── my-first-note.md +│ │ └── another-note.md +│ └── 12/ +│ └── december-note.md +``` + +- **Format**: Pure markdown, no frontmatter +- **Organization**: Year/Month subdirectories (`YYYY/MM/`) +- **Naming**: `{slug}.md` +- **Portability**: Copy anywhere, read in any editor, backup with cp/rsync/git + +#### Metadata: SQLite Database +```sql +-- Note metadata (NOT content) +CREATE TABLE notes ( + id INTEGER PRIMARY KEY, + slug TEXT UNIQUE, + file_path TEXT UNIQUE, + published BOOLEAN, + created_at TIMESTAMP, + updated_at TIMESTAMP, + content_hash TEXT +); + +-- Authentication +CREATE TABLE sessions (...); -- IndieLogin sessions +CREATE TABLE tokens (...); -- Micropub tokens +CREATE TABLE auth_state (...); -- CSRF protection +``` + +- **Location**: `data/starpunk.db` +- **Purpose**: Fast queries, indexes, referential integrity +- **Sync**: Files are authoritative for content, database for metadata + +### How They Work Together + +**Creating a Note**: +1. Generate slug +2. Write markdown file → `data/notes/YYYY/MM/slug.md` +3. Calculate content hash +4. Insert database record with metadata +5. If database fails: delete file, rollback + +**Reading a Note**: +1. Query database by slug → get file_path +2. Read markdown from file +3. Render to HTML +4. Return content + metadata + +## IndieLogin Authentication Flow + +### Configuration Required +```bash +# .env file +SITE_URL=https://starpunk.example.com +ADMIN_ME=https://your-website.com # Only this URL can authenticate +SESSION_SECRET=random-secret-key +``` + +### Authentication Steps + +1. **User initiates login** → enters their website URL +2. **StarPunk redirects** → to https://indielogin.com/auth with: + - `me` = user's website + - `client_id` = StarPunk URL + - `redirect_uri` = callback URL + - `state` = random CSRF token +3. **IndieLogin verifies identity** → via RelMeAuth, email, etc. +4. **User authenticates** → chooses verification method +5. **IndieLogin redirects back** → with authorization code +6. **StarPunk exchanges code** → POST to indielogin.com API +7. **IndieLogin returns** → verified "me" URL +8. **StarPunk verifies** → me == ADMIN_ME (from config) +9. **Create session** → generate token, store in database, set cookie +10. **Redirect to admin** → user is now authenticated + +### API Endpoint +**IndieLogin API**: https://indielogin.com/api + +**Exchange Request**: +```http +POST https://indielogin.com/auth +Content-Type: application/x-www-form-urlencoded + +code={authorization_code}& +client_id={starpunk_url}& +redirect_uri={starpunk_url}/auth/callback +``` + +**Exchange Response**: +```json +{ + "me": "https://user-website.com" +} +``` + +### Security Features +- State tokens prevent CSRF attacks +- Only ADMIN_ME URL can authenticate (single-user enforcement) +- Session tokens are cryptographically random (256-bit) +- HttpOnly cookies prevent XSS theft +- Secure flag requires HTTPS +- 30-day session expiry + +## Frontend Stack Details + +### Server-Side Rendering (Jinja2) + +**Public Templates**: +- `base.html` - Base layout with HTML structure +- `index.html` - Homepage (note list) +- `note.html` - Single note permalink +- `feed.xml` - RSS feed template + +**Admin Templates**: +- `admin/base.html` - Admin layout +- `admin/login.html` - Login form +- `admin/dashboard.html` - Note list +- `admin/new.html` - Create note form +- `admin/edit.html` - Edit note form + +### CSS Approach + +**Single stylesheet**: `static/css/style.css` (~200 lines) + +```css +/* CSS custom properties for theming */ +:root { + --color-text: #333; + --color-bg: #fff; + --color-link: #0066cc; + --max-width: 42rem; + --spacing: 1rem; +} + +/* Mobile-first responsive */ +body { padding: 1rem; } + +@media (min-width: 768px) { + body { padding: 2rem; } +} +``` + +**No framework**: Custom CSS gives full control, no unused code. + +### JavaScript Approach + +**Single optional file**: `static/js/preview.js` + +**Purpose**: Real-time markdown preview in admin editor (progressive enhancement) + +**Implementation**: +- Vanilla JavaScript (no framework) +- Uses marked.js from CDN for client-side markdown +- Works without it (form submits to server) + +**Why vanilla JS?** +- Core functionality works without JavaScript +- Single feature doesn't justify React/Vue/Svelte +- Modern browser APIs are sufficient +- No build tools needed + +### Build Process: NONE + +- No webpack, Vite, Rollup, esbuild +- No npm, package.json, node_modules +- No Babel transpilation +- No CSS preprocessing +- Direct file serving +- Instant development setup + +**Advantages**: +- Zero build time +- No dependency hell +- Simple deployment +- Easy debugging + +## API Routes + +### Public API +``` +GET / Homepage (recent notes) +GET /note/{slug} Individual note +GET /feed.xml RSS feed +``` + +### Admin Interface +``` +GET /admin/login Login form +POST /admin/login Initiate IndieLogin flow +GET /auth/callback IndieLogin callback handler +GET /admin Dashboard (list notes) +GET /admin/new Create note form +GET /admin/edit/{id} Edit note form +POST /admin/logout Destroy session +``` + +### Notes API (Session Auth) +``` +GET /api/notes List published notes (JSON) +POST /api/notes Create note (JSON) +GET /api/notes/{id} Get single note (JSON) +PUT /api/notes/{id} Update note (JSON) +DELETE /api/notes/{id} Delete note (JSON) +``` + +### Micropub API (Token Auth) +``` +POST /api/micropub Create note (h-entry) +GET /api/micropub?q=config Query configuration +GET /api/micropub?q=source Query note source +``` + +## File Organization + +``` +starpunk/ +├── app.py # Main Flask application +├── requirements.txt # 6 dependencies +├── .env # Configuration (gitignored) +├── .env.example # Template +│ +├── starpunk/ # Application package +│ ├── __init__.py +│ ├── config.py # Load environment +│ ├── database.py # SQLite operations +│ ├── models.py # Data models +│ ├── auth.py # IndieLogin logic +│ ├── micropub.py # Micropub endpoint +│ ├── feed.py # RSS generation +│ └── utils.py # Helpers +│ +├── static/ +│ ├── css/style.css # Single stylesheet +│ └── js/preview.js # Optional markdown preview +│ +├── templates/ +│ ├── base.html # Public base +│ ├── index.html # Homepage +│ ├── note.html # Note permalink +│ └── admin/ +│ ├── base.html # Admin base +│ ├── login.html # Login form +│ ├── dashboard.html # Note list +│ ├── new.html # Create form +│ └── edit.html # Edit form +│ +├── data/ # Persistent (gitignored) +│ ├── notes/YYYY/MM/slug.md # Markdown files +│ └── starpunk.db # SQLite +│ +├── tests/ # pytest tests +│ ├── test_auth.py +│ ├── test_database.py +│ ├── test_micropub.py +│ └── test_feed.py +│ +└── docs/ # Architecture docs + ├── architecture/ + │ ├── overview.md + │ └── technology-stack.md + └── decisions/ + ├── ADR-001-python-web-framework.md + ├── ADR-002-flask-extensions.md + ├── ADR-003-frontend-technology.md + ├── ADR-004-file-based-note-storage.md + └── ADR-005-indielogin-authentication.md +``` + +## Recommended Architectural Patterns + +### 1. API-First Design +All functionality exposed via API, web interface consumes it. + +### 2. Progressive Enhancement +Core works without JavaScript, JS adds optional enhancements. + +### 3. File-Database Sync +Write files first, then database. Rollback on failure. + +### 4. Atomic Operations +Use temp files and atomic renames to prevent corruption. + +### 5. Token-Based Auth +Sessions for humans (cookies), tokens for APIs (bearer). + +## Potential Risks & Considerations + +### Risk 1: IndieLogin.com Dependency +**Impact**: Cannot authenticate if service is down +**Mitigation**: +- Sessions last 30 days (brief outages don't lock out user) +- IndieLogin.com is stable, community-run service +- V2: Consider fallback auth method + +### Risk 2: File/Database Sync Issues +**Impact**: Data inconsistency between files and database +**Mitigation**: +- Atomic operations (write file → insert DB, rollback on error) +- Content hashing detects external modifications +- Optional integrity check on startup + +### Risk 3: SQLite Limitations +**Impact**: Limited concurrency (but this is single-user) +**Consideration**: SQLite is perfect for single-user, would need PostgreSQL for multi-user + +### Risk 4: No Built-in Backup +**Impact**: User must manage backups +**Mitigation**: +- Document backup procedures clearly +- Backup is simple (cp -r data/ backup/) +- Consider adding automated backup script + +## Deployment Stack + +### Development +```bash +# Setup +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt + +# Configure +cp .env.example .env +# Edit .env with your settings + +# Run +flask run + +# Test +pytest +``` + +### Production + +**WSGI Server**: Gunicorn +```bash +gunicorn -w 4 -b 127.0.0.1:8000 app:app +``` + +**Reverse Proxy**: Nginx or Caddy +- HTTPS termination (Let's Encrypt) +- Static file serving +- Rate limiting (optional) + +**Process Manager**: systemd +- Auto-restart on failure +- Log management +- Run on boot + +**Backup**: Cron job +```bash +# Daily backup via rsync +rsync -av /opt/starpunk/data /backup/starpunk-$(date +%Y%m%d) +``` + +## Standards Compliance + +### IndieWeb +- **Microformats2**: h-entry, h-card, e-content, dt-published, u-url +- **IndieAuth**: OAuth 2.0 flow (delegated to indielogin.com) +- **Micropub**: JSON and form-encoded, 201 Created responses + +**Validation**: +- https://indiewebify.me/ (microformats) +- https://micropub.rocks/ (Micropub compliance) + +### Web Standards +- **RSS 2.0**: Valid XML, RFC-822 dates, CDATA for HTML +- **HTML5**: Semantic elements, accessible, mobile-responsive +- **HTTP**: Proper status codes (200, 201, 400, 401, 404) + +**Validation**: +- https://validator.w3.org/feed/ (RSS) +- https://validator.w3.org/ (HTML) + +## Performance Targets + +- **API responses**: < 100ms +- **Page loads**: < 200ms +- **RSS generation**: < 300ms +- **Memory usage**: < 100MB +- **Startup time**: < 1 second + +## Quick Start + +```bash +# 1. Clone and setup +git clone && cd starpunk +python -m venv venv && source venv/bin/activate +pip install -r requirements.txt + +# 2. Configure +cp .env.example .env +# Edit .env: +# SITE_URL=https://your-domain.com +# ADMIN_ME=https://your-website.com +# SESSION_SECRET=$(python -c "import secrets; print(secrets.token_hex(32))") + +# 3. Run +flask run + +# 4. Visit http://localhost:5000/admin/login +# Enter your website URL (must match ADMIN_ME) +# Authenticate via indielogin.com +# Start publishing! +``` + +## Summary + +StarPunk uses a **radically simple** technology stack: + +- **Backend**: Flask + Python stdlib + 5 small libraries +- **Storage**: Markdown files (content) + SQLite (metadata) +- **Frontend**: Jinja2 templates + custom CSS + optional vanilla JS +- **Auth**: Delegated to indielogin.com (zero maintenance) +- **Build**: None (zero build tools) +- **Deploy**: Gunicorn + nginx/Caddy + systemd + +**Total Dependencies**: 6 direct packages +**Lines of Code**: ~1500 LOC estimate for V1 +**Setup Time**: < 5 minutes +**Build Time**: 0 seconds (no build process) + +This stack embodies the project philosophy: every technology choice is justified by simplicity, fitness for purpose, and maintainability. + +## Further Reading + +- **Project Requirements**: `/home/phil/Projects/starpunk/CLAUDE.MD` +- **Full Tech Stack**: `/home/phil/Projects/starpunk/docs/architecture/technology-stack.md` +- **Architecture Overview**: `/home/phil/Projects/starpunk/docs/architecture/overview.md` +- **All ADRs**: `/home/phil/Projects/starpunk/docs/decisions/ADR-*.md` +- **IndieLogin API**: https://indielogin.com/api +- **IndieWeb**: https://indieweb.org/ diff --git a/app.py b/app.py new file mode 100644 index 0000000..1584cad --- /dev/null +++ b/app.py @@ -0,0 +1,13 @@ +""" +StarPunk - Minimal IndieWeb CMS +Main application entry point +""" + +from starpunk import create_app + +app = create_app() + +if __name__ == '__main__': + # Development server + # For production, use: gunicorn app:app + app.run(debug=True) diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md new file mode 100644 index 0000000..632baeb --- /dev/null +++ b/docs/architecture/overview.md @@ -0,0 +1,909 @@ +# StarPunk Architecture Overview + +## Executive Summary + +StarPunk is a minimal, single-user IndieWeb CMS designed around the principle: "Every line of code must justify its existence." The architecture prioritizes simplicity, standards compliance, and user data ownership through careful technology selection and hybrid data storage. + +**Core Architecture**: API-first Flask application with hybrid file+database storage, server-side rendering, and delegated authentication. + +## System Architecture + +### High-Level Components + +``` +┌─────────────────────────────────────────────────────────────┐ +│ User Browser │ +└───────────────┬─────────────────────────────────────────────┘ + │ + │ HTTP/HTTPS + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ Flask Application │ +│ ┌─────────────────────────────────────────────────────────┤ +│ │ Web Interface (Jinja2 Templates) │ +│ │ - Public: Homepage, Note Permalinks │ +│ │ - Admin: Dashboard, Note Editor │ +│ └──────────────────────────────┬──────────────────────────┘ +│ ┌──────────────────────────────┴──────────────────────────┐ +│ │ API Layer (RESTful + Micropub) │ +│ │ - Notes CRUD API │ +│ │ - Micropub Endpoint │ +│ │ - RSS Feed Generator │ +│ │ - Authentication Handlers │ +│ └──────────────────────────────┬──────────────────────────┘ +│ ┌──────────────────────────────┴──────────────────────────┐ +│ │ Business Logic │ +│ │ - Note Management (create, read, update, delete) │ +│ │ - File/Database Sync │ +│ │ - Markdown Rendering │ +│ │ - Slug Generation │ +│ │ - Session Management │ +│ └──────────────────────────────┬──────────────────────────┘ +│ ┌──────────────────────────────┴──────────────────────────┐ +│ │ Data Layer │ +│ │ ┌──────────────────┐ ┌─────────────────────────┐ │ +│ │ │ File Storage │ │ SQLite Database │ │ +│ │ │ │ │ │ │ +│ │ │ Markdown Files │ │ - Note Metadata │ │ +│ │ │ (Pure Content) │ │ - Sessions │ │ +│ │ │ │ │ - Tokens │ │ +│ │ │ data/notes/ │ │ - Auth State │ │ +│ │ │ YYYY/MM/ │ │ │ │ +│ │ │ slug.md │ │ data/starpunk.db │ │ +│ │ └──────────────────┘ └─────────────────────────┘ │ +│ └─────────────────────────────────────────────────────────┘ +└─────────────────────────────────────────────────────────────┘ + │ + │ HTTPS + ↓ +┌─────────────────────────────────────────────────────────────┐ +│ External Services │ +│ - IndieLogin.com (Authentication) │ +│ - User's Website (Identity Verification) │ +│ - Micropub Clients (Publishing) │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Core Principles + +### 1. Radical Simplicity +- Total dependencies: 6 direct packages +- No build tools, no npm, no bundlers +- Server-side rendering eliminates frontend complexity +- Single file SQLite database +- Zero configuration frameworks + +### 2. Hybrid Data Architecture +**Files for Content**: Markdown notes stored as plain text files +- Maximum portability +- Human-readable +- Direct user access +- Easy backup (copy, rsync, git) + +**Database for Metadata**: SQLite stores structured data +- Fast queries and indexes +- Referential integrity +- Efficient filtering and sorting +- Transaction support + +**Sync Strategy**: Files are authoritative for content; database is authoritative for metadata. Both must stay in sync. + +### 3. Standards-First Design +- IndieWeb: Microformats2, IndieAuth, Micropub +- Web: HTML5, RSS 2.0, HTTP standards +- Security: OAuth 2.0, HTTPS, secure cookies +- Data: CommonMark markdown + +### 4. API-First Architecture +All functionality exposed via API, web interface consumes API. This enables: +- Micropub client support +- Future client applications +- Scriptable automation +- Clean separation of concerns + +### 5. Progressive Enhancement +- Core functionality works without JavaScript +- JavaScript adds optional enhancements (markdown preview) +- Server-side rendering for fast initial loads +- Mobile-responsive from the start + +## Component Descriptions + +### Web Layer + +#### Public Interface +**Purpose**: Display published notes to the world +**Technology**: Server-side rendered HTML (Jinja2) +**Routes**: +- `/` - Homepage with recent notes +- `/note/{slug}` - Individual note permalink +- `/feed.xml` - RSS feed + +**Features**: +- Microformats2 markup (h-entry, h-card) +- Reverse chronological note list +- Clean, minimal design +- Mobile-responsive +- No JavaScript required + +#### Admin Interface +**Purpose**: Manage notes (create, edit, publish) +**Technology**: Server-side rendered HTML (Jinja2) + optional vanilla JS +**Routes**: +- `/admin/login` - Authentication +- `/admin` - Dashboard (list of all notes) +- `/admin/new` - Create new note +- `/admin/edit/{id}` - Edit existing note + +**Features**: +- Markdown editor +- Optional real-time preview (JS enhancement) +- Publish/draft toggle +- Protected by session authentication + +### API Layer + +#### Notes API +**Purpose**: CRUD operations for notes +**Authentication**: Session-based (admin interface) +**Routes**: +``` +GET /api/notes List published notes +POST /api/notes Create new note +GET /api/notes/{id} Get single note +PUT /api/notes/{id} Update note +DELETE /api/notes/{id} Delete note +``` + +**Response Format**: JSON + +#### Micropub Endpoint +**Purpose**: Accept posts from external Micropub clients +**Authentication**: IndieAuth bearer tokens +**Routes**: +``` +POST /api/micropub Create note (h-entry) +GET /api/micropub?q=config Query configuration +GET /api/micropub?q=source Query note source +``` + +**Content Types**: +- application/json +- application/x-www-form-urlencoded + +**Compliance**: Full Micropub specification + +#### RSS Feed +**Purpose**: Syndicate published notes +**Technology**: feedgen library +**Route**: `/feed.xml` +**Format**: Valid RSS 2.0 XML +**Caching**: 5 minutes +**Features**: +- All published notes +- RFC-822 date formatting +- CDATA-wrapped HTML content +- Proper GUID for each item + +### Business Logic Layer + +#### Note Management +**Operations**: +1. **Create**: Generate slug → write file → insert database record +2. **Read**: Query database for path → read file → render markdown +3. **Update**: Write file atomically → update database timestamp +4. **Delete**: Mark deleted in database → optionally archive file + +**Key Components**: +- Slug generation (URL-safe, unique) +- Markdown rendering (markdown library) +- Content hashing (integrity verification) +- Atomic file operations (prevent corruption) + +#### File/Database Sync +**Strategy**: Write files first, then database +**Rollback**: If database operation fails, delete/restore file +**Verification**: Content hash detects external modifications +**Integrity Check**: Optional scan for orphaned files/records + +#### Authentication +**Admin Auth**: IndieLogin.com OAuth 2.0 flow +- User enters website URL +- Redirect to indielogin.com +- Verify identity via RelMeAuth or email +- Return verified "me" URL +- Create session token +- Store in HttpOnly cookie + +**Micropub Auth**: IndieAuth token verification +- Client obtains token via IndieAuth flow +- Token sent as Bearer in Authorization header +- Verify token exists and not expired +- Check scope permissions + +### Data Layer + +#### File Storage +**Location**: `data/notes/` +**Structure**: `YYYY/MM/slug.md` +**Format**: Pure markdown, no frontmatter +**Operations**: +- Atomic writes (temp file → rename) +- Directory creation (makedirs) +- Content reading (UTF-8 encoding) + +**Example**: +``` +data/notes/ +├── 2024/ +│ ├── 11/ +│ │ ├── my-first-note.md +│ │ └── another-note.md +│ └── 12/ +│ └── december-note.md +``` + +#### Database Storage +**Location**: `data/starpunk.db` +**Engine**: SQLite3 +**Tables**: +- `notes` - Metadata (slug, file_path, published, timestamps, hash) +- `sessions` - Auth sessions (token, me, expiry) +- `tokens` - Micropub tokens (token, me, client_id, scope) +- `auth_state` - CSRF tokens (state, expiry) + +**Indexes**: +- `notes.created_at` (DESC) - Fast chronological queries +- `notes.published` - Fast filtering +- `notes.slug` - Fast lookup by slug +- `sessions.session_token` - Fast auth checks + +**Queries**: Direct SQL using Python sqlite3 module (no ORM) + +## Data Flow Examples + +### Creating a Note (via Admin Interface) + +``` +1. User fills out form at /admin/new + ↓ +2. POST to /api/notes with markdown content + ↓ +3. Verify user session (check session cookie) + ↓ +4. Generate unique slug from content or timestamp + ↓ +5. Determine file path: data/notes/2024/11/slug.md + ↓ +6. Create directories if needed (makedirs) + ↓ +7. Write markdown content to file (atomic write) + ↓ +8. Calculate SHA-256 hash of content + ↓ +9. Begin database transaction + ↓ +10. Insert record into notes table: + - slug + - file_path + - published (from form) + - created_at (now) + - updated_at (now) + - content_hash + ↓ +11. If database insert fails: + - Delete file + - Return error to user + ↓ +12. If database insert succeeds: + - Commit transaction + - Return success with note URL + ↓ +13. Redirect user to /admin (dashboard) +``` + +### Reading a Note (via Public Interface) + +``` +1. User visits /note/my-first-note + ↓ +2. Extract slug from URL + ↓ +3. Query database: + SELECT file_path, created_at, published + FROM notes + WHERE slug = 'my-first-note' AND published = 1 + ↓ +4. If not found → 404 error + ↓ +5. Read markdown content from file: + - Open data/notes/2024/11/my-first-note.md + - Read UTF-8 content + ↓ +6. Render markdown to HTML (markdown.markdown()) + ↓ +7. Render Jinja2 template with: + - content_html (rendered HTML) + - created_at (timestamp) + - slug (for permalink) + ↓ +8. Return HTML with microformats markup +``` + +### Publishing via Micropub + +``` +1. Micropub client POSTs to /api/micropub + Headers: Authorization: Bearer {token} + Body: {"type": ["h-entry"], "properties": {"content": ["..."]}} + ↓ +2. Extract bearer token from Authorization header + ↓ +3. Query database: + SELECT me, scope FROM tokens + WHERE token = {token} AND expires_at > now() + ↓ +4. If token invalid → 401 Unauthorized + ↓ +5. Parse Micropub JSON payload + ↓ +6. Extract content from properties.content[0] + ↓ +7. Create note (same flow as admin interface): + - Generate slug + - Write file + - Insert database record + ↓ +8. If successful: + - Return 201 Created + - Set Location header to note URL + ↓ +9. Client receives note URL, displays success +``` + +### IndieLogin Authentication Flow + +``` +1. User visits /admin/login + ↓ +2. User enters their website: https://alice.example.com + ↓ +3. POST to /admin/login with "me" parameter + ↓ +4. Validate URL format + ↓ +5. Generate random state token (CSRF protection) + ↓ +6. Store state in database with 5-minute expiry + ↓ +7. Build IndieLogin authorization URL: + https://indielogin.com/auth? + me=https://alice.example.com + client_id=https://starpunk.example.com + redirect_uri=https://starpunk.example.com/auth/callback + state={random_state} + ↓ +8. Redirect user to IndieLogin + ↓ +9. IndieLogin verifies user's identity: + - Checks rel="me" links on alice.example.com + - Or sends email verification + - User authenticates via chosen method + ↓ +10. IndieLogin redirects back: + /auth/callback?code={auth_code}&state={state} + ↓ +11. Verify state matches stored value (CSRF check) + ↓ +12. Exchange code for verified identity: + POST https://indielogin.com/auth + code={auth_code} + client_id=https://starpunk.example.com + redirect_uri=https://starpunk.example.com/auth/callback + ↓ +13. IndieLogin returns: {"me": "https://alice.example.com"} + ↓ +14. Verify me == ADMIN_ME (config) + ↓ +15. If match: + - Generate session token + - Insert into sessions table + - Set HttpOnly, Secure cookie + - Redirect to /admin + ↓ +16. If no match: + - Return "Unauthorized" error + - Log attempt +``` + +## Security Architecture + +### Authentication Security + +#### Session Management +- **Token Generation**: `secrets.token_urlsafe(32)` (256-bit entropy) +- **Storage**: Hash before storing in database +- **Cookies**: HttpOnly, Secure, SameSite=Lax +- **Expiry**: 30 days, extendable on use +- **Validation**: Every protected route checks session + +#### CSRF Protection +- **State Tokens**: Random tokens for OAuth flows +- **Expiry**: 5 minutes (short-lived) +- **Single-Use**: Deleted after verification +- **SameSite**: Cookies set to Lax mode + +#### Access Control +- **Admin Routes**: Require valid session +- **Micropub Routes**: Require valid bearer token +- **Public Routes**: No authentication needed +- **Identity Verification**: Only ADMIN_ME can authenticate + +### Input Validation + +#### User Input +- **Markdown**: Sanitize to prevent XSS in rendered HTML +- **URLs**: Validate format and scheme (https://) +- **Slugs**: Alphanumeric + hyphens only +- **JSON**: Parse and validate structure +- **File Paths**: Prevent directory traversal (validate against base path) + +#### Micropub Payloads +- **Content-Type**: Verify matches expected format +- **Required Fields**: Validate h-entry structure +- **Size Limits**: Prevent DoS via large payloads +- **Scope Verification**: Check token has required permissions + +### Database Security + +#### SQL Injection Prevention +- **Parameterized Queries**: Always use parameter substitution +- **No String Interpolation**: Never build SQL with f-strings +- **Input Sanitization**: Validate before database operations + +Example: +```python +# GOOD +cursor.execute("SELECT * FROM notes WHERE slug = ?", (slug,)) + +# BAD (SQL injection vulnerable) +cursor.execute(f"SELECT * FROM notes WHERE slug = '{slug}'") +``` + +#### Data Integrity +- **Transactions**: Use for multi-step operations +- **Constraints**: UNIQUE on slugs, file_paths +- **Foreign Keys**: Enforce relationships (if applicable) +- **Content Hashing**: Detect unauthorized file modifications + +### Network Security + +#### HTTPS +- **Production Requirement**: TLS 1.2+ required +- **Reverse Proxy**: Nginx/Caddy handles SSL termination +- **Certificate Validation**: Verify SSL certs on outbound requests +- **HSTS**: Set Strict-Transport-Security header + +#### Security Headers +```python +# Set on all responses +Content-Security-Policy: default-src 'self' +X-Frame-Options: DENY +X-Content-Type-Options: nosniff +Referrer-Policy: strict-origin-when-cross-origin +``` + +#### Rate Limiting +- **Implementation**: Reverse proxy (nginx/Caddy) +- **Admin Routes**: Stricter limits +- **API Routes**: Moderate limits +- **Public Routes**: Permissive limits + +### File System Security + +#### Atomic Operations +```python +# Write to temp file, then atomic rename +temp_path = f"{target_path}.tmp" +with open(temp_path, 'w') as f: + f.write(content) +os.rename(temp_path, target_path) # Atomic on POSIX +``` + +#### Path Validation +```python +# Prevent directory traversal +base_path = os.path.abspath(DATA_PATH) +requested_path = os.path.abspath(os.path.join(base_path, user_input)) +if not requested_path.startswith(base_path): + raise SecurityError("Path traversal detected") +``` + +#### File Permissions +- **Data Directory**: 700 (owner only) +- **Database File**: 600 (owner read/write) +- **Note Files**: 600 (owner read/write) +- **Application User**: Dedicated non-root user + +## Performance Considerations + +### Response Time Targets +- **API Responses**: < 100ms (database + file read) +- **Page Renders**: < 200ms (template rendering) +- **RSS Feed**: < 300ms (query + file reads + XML generation) + +### Optimization Strategies + +#### Database +- **Indexes**: On frequently queried columns (created_at, slug, published) +- **Connection Pooling**: Single connection (single-user, no contention) +- **Query Optimization**: SELECT only needed columns +- **Prepared Statements**: Reuse compiled queries + +#### File System +- **Caching**: Consider caching rendered HTML in memory (optional) +- **Directory Structure**: Year/Month prevents large directories +- **Atomic Reads**: Fast sequential reads, no locking needed + +#### HTTP +- **Static Assets**: Cache headers on CSS/JS (1 year) +- **RSS Feed**: Cache for 5 minutes (Cache-Control) +- **Compression**: gzip/brotli via reverse proxy +- **ETags**: For conditional requests + +#### Rendering +- **Template Compilation**: Jinja2 compiles templates automatically +- **Minimal Templating**: Simple templates render fast +- **Server-Side**: No client-side rendering overhead + +### Resource Usage + +#### Memory +- **Flask Process**: ~50MB base +- **SQLite**: ~10MB typical working set +- **Total**: < 100MB under normal load + +#### Disk +- **Application**: ~5MB (code + dependencies) +- **Database**: ~1MB per 1000 notes +- **Notes**: ~5KB average per markdown file +- **Total**: Scales linearly with note count + +#### CPU +- **Idle**: Near zero +- **Request Handling**: Minimal (no heavy processing) +- **Markdown Rendering**: Fast (pure Python) +- **Database Queries**: Indexed, sub-millisecond + +## Deployment Architecture + +### Single-Server Deployment + +``` +┌─────────────────────────────────────────────────┐ +│ Internet │ +└────────────────┬────────────────────────────────┘ + │ + │ Port 443 (HTTPS) + ↓ +┌─────────────────────────────────────────────────┐ +│ Nginx/Caddy (Reverse Proxy) │ +│ - SSL/TLS termination │ +│ - Static file serving │ +│ - Rate limiting │ +│ - Compression │ +└────────────────┬────────────────────────────────┘ + │ + │ Port 8000 (HTTP) + ↓ +┌─────────────────────────────────────────────────┐ +│ Gunicorn (WSGI Server) │ +│ - 4 worker processes │ +│ - Process management │ +│ - Load balancing (round-robin) │ +└────────────────┬────────────────────────────────┘ + │ + │ WSGI + ↓ +┌─────────────────────────────────────────────────┐ +│ Flask Application │ +│ - Request handling │ +│ - Business logic │ +│ - Template rendering │ +└────────────────┬────────────────────────────────┘ + │ + ↓ +┌────────────────────────────┬────────────────────┐ +│ File System │ SQLite Database │ +│ data/notes/ │ data/starpunk.db │ +│ YYYY/MM/slug.md │ │ +└────────────────────────────┴────────────────────┘ +``` + +### Process Management (systemd) + +```ini +[Unit] +Description=StarPunk CMS +After=network.target + +[Service] +Type=notify +User=starpunk +WorkingDirectory=/opt/starpunk +Environment="PATH=/opt/starpunk/venv/bin" +ExecStart=/opt/starpunk/venv/bin/gunicorn -w 4 -b 127.0.0.1:8000 app:app +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target +``` + +### Backup Strategy + +#### Automated Daily Backup +```bash +#!/bin/bash +# backup.sh - Run daily via cron + +DATE=$(date +%Y%m%d) +BACKUP_DIR="/backup/starpunk" + +# Backup data directory (notes + database) +rsync -av /opt/starpunk/data/ "$BACKUP_DIR/$DATE/" + +# Keep last 30 days +find "$BACKUP_DIR" -maxdepth 1 -type d -mtime +30 -exec rm -rf {} \; +``` + +#### Manual Backup +```bash +# Simple copy +cp -r /opt/starpunk/data /backup/starpunk-$(date +%Y%m%d) + +# Or with compression +tar -czf starpunk-backup-$(date +%Y%m%d).tar.gz /opt/starpunk/data +``` + +### Restore Process + +1. Stop application: `sudo systemctl stop starpunk` +2. Restore data directory: `rsync -av /backup/starpunk/20241118/ /opt/starpunk/data/` +3. Fix permissions: `chown -R starpunk:starpunk /opt/starpunk/data` +4. Start application: `sudo systemctl start starpunk` +5. Verify: Visit site, check recent notes + +## Testing Strategy + +### Test Pyramid + +``` + ┌─────────────┐ + / \ + / Manual Tests \ Validation, Real Services + /───────────────── \ + / \ + / Integration Tests \ API Flows, Database + Files + /─────────────────────── \ + / \ + / Unit Tests \ Functions, Logic, Parsing + /───────────────────────────────\ +``` + +### Unit Tests (pytest) +**Coverage**: Business logic, utilities, models +**Examples**: +- Slug generation and uniqueness +- Markdown rendering with various inputs +- Content hash calculation +- File path validation +- Token generation and verification +- Date formatting for RSS +- Micropub payload parsing + +### Integration Tests +**Coverage**: Component interactions, full flows +**Examples**: +- Create note: file write + database insert +- Read note: database query + file read +- IndieLogin flow with mocked API +- Micropub creation with token validation +- RSS feed generation with multiple notes +- Session authentication on protected routes + +### End-to-End Tests +**Coverage**: Full user workflows +**Examples**: +- Admin login via IndieLogin (mocked) +- Create note via web interface +- Publish note via Micropub client (mocked) +- View note on public site +- Verify RSS feed includes note + +### Validation Tests +**Coverage**: Standards compliance +**Tools**: +- W3C HTML Validator (validate templates) +- W3C Feed Validator (validate RSS output) +- IndieWebify.me (verify microformats) +- Micropub.rocks (test Micropub compliance) + +### Manual Tests +**Coverage**: Real-world usage +**Examples**: +- Authenticate with real indielogin.com +- Publish from actual Micropub client (Quill, Indigenous) +- Subscribe to feed in actual RSS reader +- Browser compatibility (Chrome, Firefox, Safari, mobile) +- Accessibility with screen reader + +## Monitoring and Observability + +### Logging Strategy + +#### Application Logs +```python +# Structured logging +import logging + +logger = logging.getLogger(__name__) + +# Info: Normal operations +logger.info("Note created", extra={ + "slug": slug, + "published": published, + "user": session.me +}) + +# Warning: Recoverable issues +logger.warning("State token expired", extra={ + "state": state, + "age": age_seconds +}) + +# Error: Failed operations +logger.error("File write failed", extra={ + "path": file_path, + "error": str(e) +}) +``` + +#### Log Levels +- **DEBUG**: Development only (verbose) +- **INFO**: Normal operations (note creation, auth success) +- **WARNING**: Unusual but handled (expired tokens, invalid input) +- **ERROR**: Failed operations (file I/O errors, database errors) +- **CRITICAL**: System failures (database unreachable) + +#### Log Destinations +- **Development**: Console (stdout) +- **Production**: File rotation (logrotate) + optional syslog + +### Metrics (Optional for V2) + +**Simple Metrics** (if desired): +- Note count (query database) +- Request count (nginx logs) +- Error rate (grep application logs) +- Response times (nginx logs) + +**Advanced Metrics** (V2): +- Prometheus exporter +- Grafana dashboard +- Alert on error rate spike + +### Health Checks + +```python +@app.route('/health') +def health_check(): + """Simple health check for monitoring""" + try: + # Check database + db.execute("SELECT 1").fetchone() + + # Check file system + os.path.exists(DATA_PATH) + + return {"status": "ok"}, 200 + except Exception as e: + return {"status": "error", "detail": str(e)}, 500 +``` + +## Migration and Evolution + +### V1 to V2 Migration + +#### Database Schema Changes +```sql +-- Add new column with default +ALTER TABLE notes ADD COLUMN tags TEXT DEFAULT ''; + +-- Create new table +CREATE TABLE tags ( + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL +); + +-- Migration script updates existing notes +``` + +#### File Format Evolution +**V1**: Pure markdown +**V2** (if needed): Add optional frontmatter +```markdown +--- +tags: indieweb, cms +--- +Note content here +``` + +**Backward Compatibility**: Parser checks for frontmatter, falls back to pure markdown. + +#### API Versioning +``` +# V1 (current) +GET /api/notes + +# V2 (future) +GET /api/v2/notes # New features +GET /api/notes # Still works, returns V1 response +``` + +### Data Export/Import + +#### Export Formats +1. **Markdown Bundle**: Zip of all notes (already portable) +2. **JSON Export**: Notes + metadata + ```json + { + "version": "1.0", + "exported_at": "2024-11-18T12:00:00Z", + "notes": [ + { + "slug": "my-note", + "content": "Note content...", + "created_at": "2024-11-01T12:00:00Z", + "published": true + } + ] + } + ``` +3. **RSS Archive**: Existing feed.xml + +#### Import (V2) +- From JSON export +- From WordPress XML +- From markdown directory +- From other IndieWeb CMSs + +## Success Metrics + +The architecture is successful if it enables: + +1. **Fast Development**: < 1 week to implement V1 +2. **Easy Deployment**: < 5 minutes to get running +3. **Low Maintenance**: Runs for months without intervention +4. **High Performance**: All responses < 300ms +5. **Data Ownership**: User has direct access to all content +6. **Standards Compliance**: Passes all validators +7. **Extensibility**: Can add V2 features without rewrite + +## References + +### Internal Documentation +- [Technology Stack](/home/phil/Projects/starpunk/docs/architecture/technology-stack.md) +- [ADR-001: Python Web Framework](/home/phil/Projects/starpunk/docs/decisions/ADR-001-python-web-framework.md) +- [ADR-002: Flask Extensions](/home/phil/Projects/starpunk/docs/decisions/ADR-002-flask-extensions.md) +- [ADR-003: Frontend Technology](/home/phil/Projects/starpunk/docs/decisions/ADR-003-frontend-technology.md) +- [ADR-004: File-Based Storage](/home/phil/Projects/starpunk/docs/decisions/ADR-004-file-based-note-storage.md) +- [ADR-005: IndieLogin Authentication](/home/phil/Projects/starpunk/docs/decisions/ADR-005-indielogin-authentication.md) + +### External Standards +- [IndieWeb](https://indieweb.org/) +- [IndieAuth Spec](https://indieauth.spec.indieweb.org/) +- [Micropub Spec](https://micropub.spec.indieweb.org/) +- [Microformats2](http://microformats.org/wiki/h-entry) +- [RSS 2.0](https://www.rssboard.org/rss-specification) +- [Flask Documentation](https://flask.palletsprojects.com/) diff --git a/docs/architecture/technology-stack.md b/docs/architecture/technology-stack.md new file mode 100644 index 0000000..8e44115 --- /dev/null +++ b/docs/architecture/technology-stack.md @@ -0,0 +1,1082 @@ +# StarPunk Technology Stack + +## Project Summary + +StarPunk is a minimal, single-user IndieWeb CMS for publishing notes with RSS syndication. The project emphasizes radical simplicity, standards compliance, and user data ownership. Every technology choice is driven by the principle: "Every line of code must justify its existence. When in doubt, leave it out." + +### Core Requirements +- Publish IndieWeb-compatible notes (https://indieweb.org/note) +- IndieAuth authentication using external provider (indielogin.com) +- Micropub server endpoint for publishing from any client +- RSS feed generation for syndication +- File-based note storage for maximum portability +- SQLite for metadata and structured data +- Self-hostable single-user system +- API-first architecture +- Markdown support + +## Complete Technology Stack + +### Backend Stack + +#### Web Framework: Flask 3.0+ +**Purpose**: HTTP server, routing, templating +**Justification**: +- Minimal micro-framework (< 1000 lines core code) +- Perfect for single-user applications +- Native support for both JSON APIs and HTML rendering +- Mature, stable, well-documented (13+ years) +- Built-in Jinja2 templating for server-side rendering +- Standard WSGI interface for deployment flexibility + +**Alternatives Rejected**: +- FastAPI: Async complexity unnecessary for single-user CMS +- Django: Massive framework with ORM, admin, multi-user features we don't need +- Bottle: Too minimal, smaller ecosystem + +**Reference**: ADR-001 + +#### Python Version: 3.11+ +**Purpose**: Programming language +**Justification**: +- User's preferred language +- Excellent standard library (sqlite3, hashlib, secrets, etc.) +- Rich ecosystem for web development +- Strong typing support (type hints) +- Mature dependency management (pip, venv) + +#### Data Persistence: Hybrid File + Database + +##### Note Storage: Markdown Files on Disk +**Purpose**: Store note content +**Format**: Plain markdown files (.md) +**Structure**: +``` +data/notes/ +├── 2024/ +│ ├── 11/ +│ │ ├── my-first-note.md +│ │ └── another-note.md +│ └── 12/ +│ └── december-note.md +└── 2025/ + └── 01/ + └── new-year-note.md +``` + +**Naming Convention**: `{slug}.md` +**Organization**: Year/Month subdirectories (`YYYY/MM/`) +**File Format**: Pure markdown, no frontmatter + +**Justification**: +- Maximum portability (user requirement) +- Human-readable, editable in any text editor +- Easy backup (cp, rsync, git) +- User owns data directly +- No vendor lock-in +- Future-proof format + +**Reference**: ADR-004 + +##### Metadata Storage: SQLite +**Purpose**: Store note metadata, sessions, tokens +**Database**: `data/starpunk.db` + +**Schema**: +```sql +-- Note metadata (NOT content) +CREATE TABLE notes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + slug TEXT UNIQUE NOT NULL, + file_path TEXT UNIQUE NOT NULL, + published BOOLEAN DEFAULT 0, + created_at TIMESTAMP NOT NULL, + updated_at TIMESTAMP NOT NULL, + content_hash TEXT +); + +-- Authentication sessions (IndieLogin) +CREATE TABLE sessions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_token TEXT UNIQUE NOT NULL, + me TEXT NOT NULL, + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP NOT NULL, + last_used_at TIMESTAMP +); + +-- Micropub access tokens +CREATE TABLE tokens ( + token TEXT PRIMARY KEY, + me TEXT NOT NULL, + client_id TEXT, + scope TEXT, + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP +); + +-- CSRF state tokens +CREATE TABLE auth_state ( + state TEXT PRIMARY KEY, + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP NOT NULL +); +``` + +**Justification**: +- Single file, perfect for single-user +- No separate server process +- Excellent for read-heavy workloads (blog) +- Fast indexing and querying +- Built into Python standard library +- Enables efficient metadata queries without parsing files +- Atomic transactions for data integrity + +**Hybrid Strategy**: Files are authoritative for content; database is authoritative for metadata. This gives us portability AND performance. + +**Reference**: ADR-004 + +#### Core Dependencies + +##### markdown (3.5+) +**Purpose**: Convert markdown to HTML +**Usage**: Render note content for display and RSS feed +**Justification**: +- Pure Python, standard markdown implementation +- Simple API: `markdown.markdown(text)` +- Sufficient performance for single-user system +- More standard than alternatives (mistune) + +##### feedgen (1.0+) +**Purpose**: Generate RSS 2.0 feeds +**Usage**: Create valid RSS feed from published notes +**Justification**: +- High-level API ensures RSS 2.0 compliance +- Handles date formatting (RFC-822) automatically +- CDATA wrapping for HTML content +- Better than manual XML generation (error-prone) + +##### httpx (0.27+) +**Purpose**: HTTP client library +**Usage**: +- Communication with indielogin.com API +- Verify Micropub client metadata +- Fetch remote URLs for verification +**Justification**: +- Modern, clean API +- Synchronous and async support +- Better than requests (async capability) and urllib (too low-level) +- Proper timeout handling +- SSL verification built-in + +##### python-dotenv (1.0+) +**Purpose**: Environment configuration +**Usage**: Load settings from `.env` file +**Justification**: +- Industry standard for configuration +- Keeps secrets out of code +- Simple API: `load_dotenv()` +- Minimal overhead + +##### pytest (8.0+) +**Purpose**: Testing framework +**Usage**: Unit and integration tests +**Justification**: +- Current Python testing standard +- Minimal boilerplate +- Clear assertions +- Built-in fixtures +- Better than unittest (verbose) and nose2 (unmaintained) + +**Reference**: ADR-002 + +#### Dependencies Explicitly REJECTED + +- **Flask-SQLAlchemy**: ORM abstraction unnecessary, adds complexity +- **Flask-Login**: Session-based auth, we need token-based for Micropub +- **Flask-CORS**: Single decorator, don't need full extension (5 lines of code) +- **Flask-WTF**: Form library overkill for simple note creation +- **Flask-Limiter**: Rate limiting deferred to V2 or reverse proxy + +**Decision**: Use Python standard library and explicit code instead of extensions where possible. Each dependency must justify its existence. + +**Reference**: ADR-002 + +### Frontend Stack + +#### Template Engine: Jinja2 +**Purpose**: Server-side HTML rendering +**Included With**: Flask (no additional dependency) +**Usage**: +- Public interface (homepage, note permalinks) +- Admin interface (dashboard, note editor) +- Microformats markup (h-entry, h-card) + +**Justification**: +- Zero build process +- Server-side rendering for better performance +- Works without JavaScript (progressive enhancement) +- Easy microformats implementation +- Familiar syntax +- Stable and mature + +**Reference**: ADR-003 + +#### CSS: Custom Stylesheet +**Purpose**: Visual styling +**Approach**: Single custom CSS file, no framework +**File**: `static/css/style.css` +**Size**: ~200 lines for entire site + +**Features**: +- CSS custom properties (variables) for theming +- Mobile-first responsive design +- Simple media queries for tablet/desktop +- Semantic HTML5 + minimal classes + +**Justification**: +- No framework overhead (Bootstrap, Tailwind, etc.) +- No build tools required +- Full control over appearance +- Minimal single theme fits project scope +- Faster than loading framework CSS + +**Example**: +```css +:root { + --color-text: #333; + --color-bg: #fff; + --max-width: 42rem; + --spacing: 1rem; +} +``` + +**Frameworks Rejected**: +- Tailwind: Requires build process, utility-first doesn't fit +- Bootstrap/Bulma: Too many unused features +- PicoCSS: Good but custom CSS gives more control + +**Reference**: ADR-003 + +#### JavaScript: Minimal Vanilla JS +**Purpose**: Markdown preview in admin (optional enhancement) +**Approach**: Single vanilla JavaScript file, no framework +**File**: `static/js/preview.js` +**Dependency**: marked.js via CDN (client-side markdown) + +**Usage**: +- Optional real-time markdown preview in note editor +- Progressive enhancement (works without JS) + +**Justification**: +- Core functionality works without JavaScript +- Single optional feature doesn't justify framework +- Vanilla JS sufficient for simple preview +- Modern browser APIs (fetch, DOM manipulation) are enough +- No build tools required + +**Frameworks Rejected**: +- React/Vue/Svelte: Massive overkill for one preview feature +- htmx: Interesting but not needed for V1 +- Alpine.js: Too much for minimal JS needs + +**Reference**: ADR-003 + +#### Build Tools: NONE +**Decision**: No build process whatsoever +**Justification**: +- Server-side rendering eliminates need for bundling +- Custom CSS served directly +- Vanilla JS served directly +- Modern browsers support ES6+ natively +- Zero npm dependencies +- Instant development setup + +**This means**: +- No webpack, Vite, Rollup, esbuild +- No Babel transpilation +- No PostCSS processing +- No minification (premature optimization) +- No asset pipeline + +**Reference**: ADR-003 + +### Authentication Stack + +#### Admin Authentication: IndieLogin.com +**Purpose**: Authenticate the admin user via their personal website +**Provider**: External service at https://indielogin.com +**API**: https://indielogin.com/api +**Protocol**: OAuth 2.0 / IndieAuth + +**Flow**: +1. User enters their website URL +2. StarPunk redirects to indielogin.com with state token +3. indielogin.com verifies user's identity (RelMeAuth, email, etc.) +4. indielogin.com redirects back with authorization code +5. StarPunk exchanges code for verified identity +6. StarPunk creates session cookie + +**Session Management**: +- HttpOnly, Secure cookies +- 30-day expiry +- Stored in SQLite sessions table +- CSRF protection via state tokens + +**Configuration**: +```bash +ADMIN_ME=https://your-website.com # Only this URL can authenticate +SESSION_SECRET=random-secret-key +``` + +**Justification**: +- Extremely simple (< 100 lines of code) +- No authentication code to maintain +- No password management needed +- True IndieWeb authentication (user owns identity) +- Secure by default (delegated to trusted service) +- Community-maintained, stable service + +**Alternatives Rejected**: +- Self-hosted IndieAuth: Too complex for V1 +- Password auth: Not IndieWeb-compatible, security burden +- OAuth (GitHub/Google): User doesn't own identity + +**Reference**: ADR-005 + +#### Micropub Authentication: IndieAuth Tokens +**Purpose**: Authenticate Micropub API clients +**Protocol**: IndieAuth bearer tokens +**Flow**: Standard IndieAuth authorization code grant +**Storage**: Tokens table in SQLite + +**Note**: Micropub token endpoint is separate from admin authentication. Users authenticate their Micropub clients (e.g., mobile apps) separately via IndieAuth flow. This will be detailed in a future ADR for Micropub implementation. + +### Development Tools + +#### Code Quality +``` +pytest-cov # Test coverage reporting +black # Code formatting (standard: 88 char line length) +flake8 # Linting +mypy # Type checking (optional but recommended) +``` + +**Justification**: +- Automated formatting prevents style debates +- Linting catches common errors +- Test coverage ensures quality +- Type hints improve maintainability + +#### Development Workflow +```bash +# Setup +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt + +# Run +flask run + +# Test +pytest + +# Format +black . +flake8 . +``` + +**No additional tools required**: No npm, no build scripts, no containers (optional for deployment). + +### Deployment Stack + +#### WSGI Server: Gunicorn (Production) +**Purpose**: Production HTTP server +**Justification**: +- Standard Python WSGI server +- Production-ready +- Better performance than Flask dev server +- Simple configuration + +**Alternative**: uWSGI (more complex, not needed for single-user) + +#### Reverse Proxy: Nginx or Caddy (Recommended) +**Purpose**: HTTPS termination, static file serving +**Justification**: +- Handle SSL/TLS certificates +- Serve static files efficiently +- Rate limiting (optional) +- Proven deployment pattern + +#### Process Manager: systemd (Recommended) +**Purpose**: Keep application running +**Justification**: +- Standard on modern Linux +- Auto-restart on failure +- Log management + +#### Deployment Package: Single Unit +**Structure**: +``` +starpunk/ +├── app.py # Main application +├── requirements.txt # Dependencies +├── .env.example # Configuration template +├── static/ # CSS, JS +├── templates/ # Jinja2 templates +├── data/ # Notes + SQLite (persistent) +│ ├── notes/ +│ └── starpunk.db +└── README.md # Setup instructions +``` + +**Deployment**: +- Clone repository +- Create virtual environment +- Install dependencies +- Configure .env file +- Run with Gunicorn + systemd +- (Optional) Nginx for HTTPS + +**Justification**: Single self-contained package, easy to deploy and backup. + +## File Organization + +### Project Structure +``` +starpunk/ +├── app.py # Main Flask application +├── requirements.txt # Python dependencies +├── .env # Environment configuration (gitignored) +├── .env.example # Configuration template +├── README.md # Setup documentation +├── CLAUDE.MD # Project requirements +│ +├── starpunk/ # Application package +│ ├── __init__.py +│ ├── config.py # Configuration loading +│ ├── database.py # SQLite operations +│ ├── models.py # Data models +│ ├── auth.py # Authentication logic +│ ├── micropub.py # Micropub endpoint +│ ├── feed.py # RSS generation +│ └── utils.py # Helper functions +│ +├── static/ # Static assets +│ ├── css/ +│ │ └── style.css # Single stylesheet +│ └── js/ +│ └── preview.js # Optional markdown preview +│ +├── templates/ # Jinja2 templates +│ ├── base.html # Base layout +│ ├── index.html # Homepage (note list) +│ ├── note.html # Single note +│ ├── feed.xml # RSS template +│ └── admin/ +│ ├── base.html # Admin layout +│ ├── login.html # Login form +│ ├── dashboard.html # Admin dashboard +│ ├── new.html # Create note +│ └── edit.html # Edit note +│ +├── data/ # Persistent data (gitignored) +│ ├── notes/ # Markdown files +│ │ └── YYYY/MM/ +│ │ └── slug.md +│ └── starpunk.db # SQLite database +│ +├── tests/ # Test suite +│ ├── test_auth.py +│ ├── test_database.py +│ ├── test_micropub.py +│ ├── test_feed.py +│ └── test_notes.py +│ +└── docs/ # Architecture documentation + ├── architecture/ + │ ├── overview.md + │ ├── components.md + │ ├── data-flow.md + │ ├── security.md + │ └── technology-stack.md + └── decisions/ + ├── ADR-001-python-web-framework.md + ├── ADR-002-flask-extensions.md + ├── ADR-003-frontend-technology.md + ├── ADR-004-file-based-note-storage.md + └── ADR-005-indielogin-authentication.md +``` + +## Architecture Patterns + +### API-First Design +**Pattern**: All functionality exposed via API, web interface consumes API +**Routes**: +``` +# Public API +GET /api/notes # List published notes +GET /api/notes/{slug} # Get single note +GET /feed.xml # RSS feed + +# Admin API (session auth) +POST /api/notes # Create note +PUT /api/notes/{id} # Update note +DELETE /api/notes/{id} # Delete note + +# Micropub API (token auth) +POST /api/micropub # Create via Micropub +GET /api/micropub?q=config # Query config + +# Auth API +GET /admin/login # Login form +POST /admin/login # Initiate IndieLogin +GET /auth/callback # IndieLogin callback +POST /admin/logout # Logout +``` + +### Data Flow: File + Database Sync + +#### Creating a Note +``` +User submits note + ↓ +Generate slug + ↓ +Create file: data/notes/YYYY/MM/{slug}.md + ↓ +Calculate content hash + ↓ +Insert database record (slug, file_path, hash, timestamps) + ↓ +If database insert fails: delete file, return error + ↓ +Return success +``` + +#### Reading a Note +``` +Request note by slug + ↓ +Query database for file_path + ↓ +Read markdown from file + ↓ +Render to HTML (if needed) + ↓ +Return content + metadata +``` + +#### Updating a Note +``` +User submits changes + ↓ +Atomic write: new content to temp file + ↓ +Calculate new hash + ↓ +Update database (timestamp, hash) + ↓ +If database update succeeds: atomic rename temp → actual + ↓ +If database update fails: delete temp, return error + ↓ +Return success +``` + +**Benefits**: +- Files provide portability +- Database provides fast queries +- Content hash detects external changes +- Atomic operations prevent corruption + +**Reference**: ADR-004 + +### IndieLogin Authentication Flow + +``` +┌─────────┐ ┌──────────┐ ┌─────────────┐ +│ User │ │ StarPunk │ │ IndieLogin │ +└────┬────┘ └────┬─────┘ └──────┬──────┘ + │ │ │ + │ 1. Click "Login" │ │ + ├─────────────────────────>│ │ + │ │ │ + │ 2. Enter website URL │ │ + ├─────────────────────────>│ │ + │ │ │ + │ 3. Generate state token │ + │ │ │ + │ 4. Redirect to IndieLogin with: │ + │ - me=user_website │ + │ - client_id=starpunk_url │ + │ - redirect_uri=starpunk/callback │ + │ - state=random_token │ + │ ├──────────────────────────>│ + │ │ │ + │ │ 5. Verify user's │ + │ │ identity │ + │ <────────────────────────────────────────────────── │ + │ (User authenticates via │ + │ chosen method) │ + │ ──────────────────────────────────────────────────> │ + │ │ │ + │ 6. Redirect back with code + state │ + │ <──────────────────────────────────────────────────│ + ├─────────────────────────>│ │ + │ │ │ + │ 7. Verify state │ + │ │ │ + │ 8. POST to IndieLogin: │ + │ - code │ + │ - client_id │ + │ - redirect_uri │ + │ ├──────────────────────────>│ + │ │ │ + │ │ 9. Return verified "me" │ + │ │<──────────────────────────│ + │ │ │ + │ 10. Verify me == ADMIN_ME │ + │ │ │ + │ 11. Create session │ + │ │ │ + │ 12. Set session cookie │ │ + │ <───────────────────────│ │ + │ │ │ + │ 13. Redirect to admin │ │ + │ <───────────────────────│ │ + │ │ │ +``` + +**Security Features**: +- State token prevents CSRF +- Session tokens are cryptographically random +- HttpOnly cookies prevent XSS +- Only ADMIN_ME URL can authenticate +- Sessions expire after 30 days + +**Reference**: ADR-005 + +### Progressive Enhancement Pattern + +**Principle**: Core functionality works without JavaScript + +#### Public Interface +- **Without JS**: Full functionality (view notes, RSS feed) +- **With JS**: No difference (no JS used on public pages) + +#### Admin Interface +- **Without JS**: + - Create/edit notes via HTML forms + - Submit to server, server renders markdown + - Full page refresh on submit +- **With JS**: + - Real-time markdown preview + - No page refresh for preview + - Still submits via form (progressive enhancement) + +**Implementation**: +```html + +
+ + +
+ + + +``` + +**Reference**: ADR-003 + +## Standards Compliance + +### IndieWeb Standards + +#### Microformats2 +**Required Classes**: +- `h-entry`: Mark up notes +- `h-card`: Mark up author information +- `e-content`: Note content +- `dt-published`: Publication timestamp +- `u-url`: Permalink URL + +**Example**: +```html + +``` + +**Validation**: https://indiewebify.me/ + +#### IndieAuth +**Compliance**: OAuth 2.0 authorization code flow +**Endpoints**: Delegated to indielogin.com +**Token Format**: Bearer tokens +**Validation**: Token introspection + +**Reference**: https://indieauth.spec.indieweb.org/ + +#### Micropub +**Compliance**: Full Micropub spec support +**Content Types**: JSON and form-encoded +**Required Responses**: 201 Created with Location header +**Query Support**: q=config, q=source + +**Validation**: https://micropub.rocks/ + +**Reference**: https://micropub.spec.indieweb.org/ + +### Web Standards + +#### RSS 2.0 +**Compliance**: Valid RSS 2.0 XML +**Required Elements**: title, link, description, pubDate, guid +**Date Format**: RFC-822 +**HTML Content**: CDATA-wrapped + +**Validation**: https://validator.w3.org/feed/ + +#### HTTP +**Status Codes**: Proper use of 200, 201, 400, 401, 404, 500 +**Headers**: Content-Type, Cache-Control, Location +**Methods**: GET, POST, PUT, DELETE +**CORS**: Allow cross-origin for API endpoints + +#### HTML5 +**Compliance**: Valid semantic HTML +**Accessibility**: ARIA labels, alt text, proper heading hierarchy +**Responsive**: Viewport meta tag, mobile-first CSS +**Forms**: Proper labels, validation attributes + +**Validation**: https://validator.w3.org/ + +## Security Architecture + +### Authentication Security +- State tokens for CSRF protection (5-minute expiry) +- Session tokens are cryptographically random (32 bytes) +- HttpOnly cookies prevent XSS theft +- Secure flag requires HTTPS +- SameSite=Lax prevents CSRF +- Single admin user (ADMIN_ME verification) + +### Input Validation +- Validate all user input +- Sanitize markdown (prevent XSS in rendered HTML) +- Validate Micropub payloads against spec +- URL validation for IndieAuth +- File path validation (prevent directory traversal) + +### Database Security +- Parameterized queries (prevent SQL injection) +- Input sanitization before storage +- Hash session tokens before storage +- Content hashing for integrity + +### Network Security +- HTTPS required in production +- SSL certificate verification on httpx requests +- Secure headers (CSP, X-Frame-Options, etc.) +- Rate limiting via reverse proxy (nginx/Caddy) + +### File System Security +- Atomic file operations +- Restricted permissions on data/ directory +- Prevent directory traversal attacks +- Validate file paths before operations + +## Performance Targets + +### Response Times +- API responses: < 100ms +- Page loads: < 200ms +- RSS feed: < 300ms + +### Optimization Strategy +- SQLite indexes on frequently queried columns +- Cache RSS feed (5 minutes) +- Minimal dependencies = fast startup +- Server-side rendering = fast first paint +- Single CSS file = one request +- Optional JS = doesn't block rendering + +### Resource Usage +- Memory: < 100MB for typical workload +- Disk: Minimal (SQLite + markdown files) +- CPU: Minimal (no heavy processing) + +**Scaling**: Designed for single-user, typical load is <10 requests/minute. Over-engineering for scale would violate simplicity principle. + +## Testing Strategy + +### Unit Tests (pytest) +- Database operations (CRUD, queries) +- Slug generation and validation +- Markdown rendering +- File operations (atomic writes) +- Session management +- Token validation +- Content hash calculation + +### Integration Tests +- IndieLogin authentication flow (mocked API) +- Micropub note creation (full flow) +- RSS feed generation (validation) +- API endpoints (request/response) +- File + database sync +- Error handling + +### Manual Tests +- Real IndieLogin authentication +- Micropub client integration (e.g., Quill) +- RSS feed in actual reader +- Browser compatibility +- Mobile responsiveness +- Accessibility (screen readers) + +### Validation Tests +- HTML validation (W3C validator) +- RSS validation (W3C feed validator) +- Microformats validation (indiewebify.me) +- Micropub compliance (micropub.rocks) + +## Risk Assessment + +### Technical Risks + +#### Risk: IndieLogin.com Outage +**Impact**: Cannot authenticate new sessions +**Likelihood**: Low (stable service) +**Mitigation**: +- Sessions last 30 days (brief outages don't lock out user) +- Document manual session creation in database +- V2: Add fallback authentication method + +#### Risk: File/Database Sync Failure +**Impact**: Data inconsistency +**Likelihood**: Low (atomic operations, error handling) +**Mitigation**: +- Write files first, database second +- Transaction rollback on failure +- Integrity check on startup (optional) +- Regular backups + +#### Risk: File System Corruption +**Impact**: Lost notes +**Likelihood**: Very low (standard filesystem operations) +**Mitigation**: +- Atomic file writes +- Regular backups (user responsibility) +- Markdown files are recoverable + +#### Risk: Dependency Vulnerabilities +**Impact**: Security breach +**Likelihood**: Medium (all software has bugs) +**Mitigation**: +- Minimal dependencies (6 direct) +- All dependencies are mature, maintained +- Regular updates +- Security scanning (optional) + +### Operational Risks + +#### Risk: User Misconfiguration +**Impact**: Application doesn't work +**Likelihood**: Medium (manual setup required) +**Mitigation**: +- Clear documentation +- .env.example with all settings +- Validation on startup +- Helpful error messages + +#### Risk: Backup Neglect +**Impact**: Data loss +**Likelihood**: Medium (user responsibility) +**Mitigation**: +- Document backup procedures +- Make backup easy (copy data/ folder) +- Consider automated backup scripts (V2) + +## Migration and Future Considerations + +### V1 to V2 Migration Path +- Add features without breaking existing data +- Markdown files remain compatible +- Database schema migrations (ALTER TABLE) +- Backward compatible API changes + +### Potential V2 Enhancements +- Webmentions support +- Media uploads (photos) +- Additional post types (articles, replies) +- Full-text search (SQLite FTS) +- Automated backups +- Self-hosted IndieAuth option +- Multiple IndieAuth providers +- Draft/scheduled posts +- Tags and categories +- Import/export tools + +### Data Portability Strategy +**Export Formats**: +- Markdown files (already portable) +- JSON export (notes + metadata) +- RSS feed (existing notes) +- HTML archive (static site generator) + +**Import Strategy** (V2): +- From other blogging platforms +- From JSON backup +- From markdown directories + +## Success Criteria + +The technology stack is successful if: + +1. **User can publish notes from any Micropub client** ✓ + - Protocol: Micropub over HTTP + - Auth: IndieAuth tokens + - Format: Stored as markdown files + +2. **Notes appear in RSS readers immediately** ✓ + - Format: Valid RSS 2.0 + - Generator: feedgen library + - Caching: 5 minutes + +3. **System runs on minimal resources** ✓ + - Stack: Flask + SQLite (single process) + - Memory: < 100MB + - Dependencies: 6 direct + +4. **Code is readable and maintainable** ✓ + - Language: Python (user's preference) + - Framework: Flask (minimal, clear) + - Style: black formatting, type hints + +5. **All IndieWeb validators pass** ✓ + - Microformats: Server-side templating makes this easy + - IndieAuth: Delegated to indielogin.com + - Micropub: Spec-compliant implementation + +6. **Setup takes less than 5 minutes** ✓ + - Steps: Clone, venv, pip install, configure .env, run + - No build process + - No complex dependencies + +7. **System runs for months without intervention** ✓ + - Architecture: Stateless application + - Persistence: SQLite (reliable) + - Auth: Long-lived sessions (30 days) + +## Quick Start Guide + +### Development Setup +```bash +# Clone repository +git clone && cd starpunk + +# Create virtual environment +python -m venv venv +source venv/bin/activate # or `venv\Scripts\activate` on Windows + +# Install dependencies +pip install -r requirements.txt + +# Configure +cp .env.example .env +# Edit .env: set SITE_URL, ADMIN_ME, SESSION_SECRET + +# Initialize database +flask db init + +# Run development server +flask run + +# Visit http://localhost:5000 +``` + +### Production Deployment +```bash +# Setup (same as development) +# ... + +# Install production server +pip install gunicorn + +# Run with Gunicorn +gunicorn -w 4 -b 127.0.0.1:8000 app:app + +# Configure nginx/Caddy for HTTPS +# Configure systemd for process management +# Set up regular backups of data/ directory +``` + +### Configuration Reference +```bash +# .env file +SITE_URL=https://starpunk.example.com # Your domain +ADMIN_ME=https://your-website.com # Your IndieWeb identity +SESSION_SECRET=random-secret-key # Generate with: python -c "import secrets; print(secrets.token_hex(32))" +DATA_PATH=./data # Where to store notes and database +``` + +## Summary + +StarPunk's technology stack achieves radical simplicity through careful technology selection: + +- **Backend**: Flask (micro-framework) + SQLite (embedded DB) + Python stdlib +- **Storage**: Markdown files (portability) + SQLite metadata (performance) +- **Frontend**: Jinja2 (SSR) + custom CSS (200 lines) + optional vanilla JS +- **Auth**: IndieLogin.com (external, zero maintenance) +- **Build**: None (zero build tools, zero npm) +- **Deploy**: Single package (Gunicorn + systemd + nginx) + +**Total Direct Dependencies**: 6 (Flask, markdown, feedgen, httpx, python-dotenv, pytest) + +**Lines of Code Estimate**: ~1500 LOC for complete V1 implementation + +**Setup Time**: < 5 minutes from clone to running + +This stack embodies the project philosophy: "Every line of code must justify its existence." Each technology choice prioritizes simplicity, standards compliance, and user data ownership over features and complexity. + +## References + +### Architecture Decision Records +- [ADR-001: Python Web Framework Selection](/home/phil/Projects/starpunk/docs/decisions/ADR-001-python-web-framework.md) +- [ADR-002: Flask Extensions and Dependencies](/home/phil/Projects/starpunk/docs/decisions/ADR-002-flask-extensions.md) +- [ADR-003: Front-end Technology Stack](/home/phil/Projects/starpunk/docs/decisions/ADR-003-frontend-technology.md) +- [ADR-004: File-Based Note Storage Architecture](/home/phil/Projects/starpunk/docs/decisions/ADR-004-file-based-note-storage.md) +- [ADR-005: IndieLogin Authentication Integration](/home/phil/Projects/starpunk/docs/decisions/ADR-005-indielogin-authentication.md) + +### Standards and Specifications +- IndieWeb: https://indieweb.org/ +- IndieAuth Spec: https://indieauth.spec.indieweb.org/ +- Micropub Spec: https://micropub.spec.indieweb.org/ +- Microformats2: http://microformats.org/wiki/h-entry +- RSS 2.0: https://www.rssboard.org/rss-specification +- CommonMark: https://spec.commonmark.org/ + +### Tools and Libraries +- Flask: https://flask.palletsprojects.com/ +- Jinja2: https://jinja.palletsprojects.com/ +- IndieLogin.com: https://indielogin.com/ +- Python Markdown: https://python-markdown.github.io/ +- feedgen: https://feedgen.kiesow.be/ +- httpx: https://www.python-httpx.org/ + +### Validation and Testing +- IndieWebify.me: https://indiewebify.me/ +- Micropub Rocks: https://micropub.rocks/ +- W3C Feed Validator: https://validator.w3.org/feed/ +- W3C HTML Validator: https://validator.w3.org/ diff --git a/docs/decisions/ADR-001-python-web-framework.md b/docs/decisions/ADR-001-python-web-framework.md new file mode 100644 index 0000000..ac38d17 --- /dev/null +++ b/docs/decisions/ADR-001-python-web-framework.md @@ -0,0 +1,97 @@ +# ADR-001: Python Web Framework Selection + +## Status +Accepted + +## Context +StarPunk requires a Python web framework to implement the API-first architecture with RESTful endpoints, Micropub support, IndieAuth integration, and web interface. The framework must support both API and server-side rendered HTML with minimal complexity. + +## Decision +Use **Flask** as the primary web framework. + +## Rationale + +### Simplicity Score: 9/10 +- Minimal boilerplate code required +- Explicit routing and request handling +- Easy to understand for newcomers +- Core framework is ~1000 lines of code +- Follows "micro-framework" philosophy aligned with StarPunk principles + +### Fitness Score: 10/10 +- Perfect for single-user applications +- Built-in development server +- Excellent template engine (Jinja2) for HTML generation +- Simple decorator-based routing +- Easy integration with SQLite +- Native support for both JSON APIs and HTML rendering +- Werkzeug provides robust HTTP utilities +- Blueprint support for code organization + +### Maintenance Score: 9/10 +- Extremely mature (13+ years) +- Large community and extensive documentation +- Stable API with minimal breaking changes +- Extensive ecosystem of well-tested extensions +- Active development and security updates + +### Standards Compliance: Pass +- Standard WSGI interface +- Full HTTP status code support +- Proper content-type handling +- Easy CORS implementation +- Session management built-in + +## Consequences + +### Positive +- Minimal learning curve +- Small dependency footprint +- Easy to test (built-in test client) +- Flexible enough for API-first architecture +- Can render HTML templates for public interface +- Easy deployment (WSGI compatible) + +### Negative +- No built-in ORM (but we're using raw SQLite, so this is actually positive) +- Requires manual selection of extensions +- Less opinionated than larger frameworks + +### Mitigation +- Extension selection will be minimal (see ADR-002 for extensions) +- Lack of opinion allows us to stay minimal +- Manual configuration gives us full control + +## Alternatives Considered + +### FastAPI (Rejected) +- **Simplicity**: 6/10 - Requires async/await understanding, Pydantic models +- **Fitness**: 7/10 - Overkill for single-user CMS, async not needed +- **Maintenance**: 8/10 - Newer framework, but growing +- **Verdict**: Too complex for project needs, async unnecessary + +### Django (Rejected) +- **Simplicity**: 3/10 - Large framework with heavy abstractions +- **Fitness**: 4/10 - Designed for multi-user applications, includes admin panel, ORM, and many features we don't need +- **Maintenance**: 10/10 - Excellent maintenance and security +- **Verdict**: Violates "minimal code" principle, too much unnecessary functionality + +### Bottle (Considered) +- **Simplicity**: 10/10 - Single file framework +- **Fitness**: 7/10 - Very minimal, but perhaps too minimal +- **Maintenance**: 6/10 - Smaller community, slower updates +- **Verdict**: Close second, but Flask has better ecosystem for IndieAuth/Micropub + +## Implementation Notes + +Flask will be used with: +- Jinja2 templates for HTML rendering (included with Flask) +- Werkzeug for HTTP utilities (included with Flask) +- Minimal extensions only (see ADR-002) +- Standard WSGI deployment +- Blueprint organization for clear separation of concerns + +## References +- Flask Documentation: https://flask.palletsprojects.com/ +- WSGI Specification: https://peps.python.org/pep-3333/ +- Flask Design Decisions: https://flask.palletsprojects.com/en/3.0.x/design/ diff --git a/docs/decisions/ADR-002-flask-extensions.md b/docs/decisions/ADR-002-flask-extensions.md new file mode 100644 index 0000000..997ba75 --- /dev/null +++ b/docs/decisions/ADR-002-flask-extensions.md @@ -0,0 +1,134 @@ +# ADR-002: Flask Extensions and Dependencies + +## Status +Accepted + +## Context +Flask is intentionally minimal. We need to select only essential extensions that align with the "minimal code" philosophy while supporting required functionality. + +## Decision +Use the following minimal set of dependencies: +- **Flask** - Core framework +- **markdown** - Markdown to HTML conversion +- **feedgen** - RSS feed generation +- **httpx** - HTTP client for IndieAuth verification +- **python-dotenv** - Environment configuration +- **pytest** - Testing framework + +**NO additional Flask extensions** will be used in V1. + +## Rationale + +### Core Dependencies + +#### markdown +- **Purpose**: Convert markdown notes to HTML +- **Simplicity**: Pure Python, simple API +- **Justification**: Core requirement for note rendering +- **Alternative**: mistune (faster but less standard) +- **Verdict**: markdown is more standard and sufficient for single-user + +#### feedgen +- **Purpose**: Generate valid RSS 2.0 feeds +- **Simplicity**: High-level API, handles all RSS requirements +- **Justification**: Ensures RSS 2.0 compliance without manual XML generation +- **Alternative**: Manual XML generation (error-prone) +- **Verdict**: feedgen guarantees valid RSS output + +#### httpx +- **Purpose**: HTTP client for IndieAuth endpoint verification +- **Simplicity**: Modern, clean API +- **Justification**: Need to verify IndieAuth endpoints and fetch client metadata +- **Alternative**: requests (synchronous only), urllib (too low-level) +- **Verdict**: httpx provides clean API and can be sync or async if needed later + +#### python-dotenv +- **Purpose**: Load environment variables from .env file +- **Simplicity**: Single-purpose, simple API +- **Justification**: Standard pattern for configuration management +- **Alternative**: Manual environment variable handling +- **Verdict**: Industry standard, minimal overhead + +#### pytest +- **Purpose**: Testing framework +- **Simplicity**: Minimal boilerplate, clear assertions +- **Justification**: Required for test coverage +- **Alternative**: unittest (more verbose), nose2 (unmaintained) +- **Verdict**: pytest is current Python testing standard + +### Extensions REJECTED for V1 + +#### Flask-SQLAlchemy (Rejected) +- **Reason**: Adds ORM abstraction we don't need +- **Decision**: Use sqlite3 standard library directly +- **Benefit**: Simpler code, explicit queries, no magic + +#### Flask-Login (Rejected) +- **Reason**: Session-based authentication, we need token-based +- **Decision**: Implement simple token validation ourselves +- **Benefit**: Full control over IndieAuth flow + +#### Flask-CORS (Rejected) +- **Reason**: Single function decorator, don't need extension +- **Decision**: Use @after_request decorator for CORS headers +- **Benefit**: 5 lines of code vs. another dependency + +#### Flask-Limiter (Rejected for V1) +- **Reason**: Rate limiting is nice-to-have, not critical for single-user +- **Decision**: Defer to V2 or rely on reverse proxy +- **Benefit**: Reduced complexity + +#### Flask-WTF (Rejected) +- **Reason**: Form handling for single form (note creation) is overkill +- **Decision**: Simple HTML forms with manual validation +- **Benefit**: No CSRF complexity in V1, manual validation is clear + +## Consequences + +### Positive +- Minimal dependency tree +- Full control over implementation +- Easy to understand codebase +- Fast installation and startup +- Reduced attack surface + +### Negative +- Must implement some features manually (token validation, CORS) +- No form CSRF protection in V1 (acceptable for single-user) +- Manual SQL queries required + +### Mitigation +- Document manual implementations clearly +- Ensure manual code is well-tested +- Keep manual implementations simple and obvious +- Plan to add CSRF in V2 if needed + +## Complete Dependency List + +``` +Flask==3.0.* +markdown==3.5.* +feedgen==1.0.* +httpx==0.27.* +python-dotenv==1.0.* +pytest==8.0.* +``` + +## Development Dependencies +``` +pytest-cov # Test coverage reporting +black # Code formatting +flake8 # Linting +``` + +## Standards Compliance +- All dependencies are pure Python or have minimal C extensions +- All are actively maintained with security updates +- All support Python 3.11+ +- Total dependency count: 6 direct dependencies (excluding dev tools) + +## References +- Flask Extensions: https://flask.palletsprojects.com/en/3.0.x/extensions/ +- Markdown Spec: https://daringfireball.net/projects/markdown/ +- RSS 2.0: https://www.rssboard.org/rss-specification +- Python Packaging: https://packaging.python.org/ diff --git a/docs/decisions/ADR-003-frontend-technology.md b/docs/decisions/ADR-003-frontend-technology.md new file mode 100644 index 0000000..9651a43 --- /dev/null +++ b/docs/decisions/ADR-003-frontend-technology.md @@ -0,0 +1,289 @@ +# ADR-003: Front-end Technology Stack + +## Status +Accepted + +## Context +StarPunk requires a front-end for: +1. Public interface (homepage, note permalinks) - Server-side rendered +2. Admin interface (note creation/editing) - Requires some interactivity +3. Progressive enhancement principle - Core functionality must work without JavaScript + +The front-end must be minimal, elegant, and align with the "no client-side complexity" principle stated in CLAUDE.MD. + +## Decision + +### Public Interface: Server-Side Rendered HTML +- **Template Engine**: Jinja2 (included with Flask) +- **CSS**: Custom CSS (no framework) +- **JavaScript**: None required for V1 +- **Build Tools**: None required + +### Admin Interface: Enhanced Server-Side Rendering +- **Template Engine**: Jinja2 (included with Flask) +- **CSS**: Custom CSS (shared with public interface) +- **JavaScript**: Minimal vanilla JavaScript for markdown preview only +- **Build Tools**: None required + +### Asset Management +- **CSS**: Single stylesheet served statically +- **JavaScript**: Single optional file for markdown preview +- **No bundler**: Direct file serving +- **No transpilation**: Modern browsers only (ES6+) + +## Rationale + +### Server-Side Rendering (SSR) +**Simplicity Score: 10/10** +- Zero build process +- No JavaScript framework complexity +- Direct Flask template rendering +- Familiar Jinja2 syntax + +**Fitness Score: 10/10** +- Perfect for content-first site +- Faster initial page load +- Better SEO (though not critical for single-user) +- Works without JavaScript +- Easier to implement microformats + +**Maintenance Score: 10/10** +- Jinja2 is stable and mature +- No framework version updates +- No npm dependency hell +- Templates are simple HTML + +### No CSS Framework +**Simplicity Score: 10/10** +- Custom CSS is ~200 lines for entire site +- No unused classes or styles +- Full control over appearance +- No framework learning curve + +**Fitness Score: 9/10** +- StarPunk needs minimal, elegant design +- Single theme, no customization needed +- Mobile-responsive can be achieved with simple media queries +- No complex components needed + +### Minimal JavaScript Approach +**Simplicity Score: 9/10** +- Vanilla JavaScript only (no React/Vue/Svelte) +- Single purpose: markdown preview in admin +- Optional progressive enhancement +- No build step required + +**Fitness Score: 10/10** +- Markdown preview improves UX but isn't required +- All functionality works without JavaScript +- Can use fetch API for preview without library +- Modern browser features are sufficient + +## Consequences + +### Positive +- Zero build time +- No node_modules directory +- Instant development setup +- Fast page loads +- Works with JavaScript disabled +- Easy to understand and modify +- Microformats implementation is straightforward +- Complete control over HTML output + +### Negative +- No TypeScript type checking +- No hot module replacement (but Flask auto-reload works) +- Manual CSS organization required +- Must write responsive CSS manually + +### Mitigation +- Keep JavaScript minimal and well-commented +- Organize CSS with clear sections +- Use CSS custom properties for theming +- Test manually in multiple browsers +- Validate HTML with W3C validator + +## Frontend File Structure + +``` +static/ +├── css/ +│ └── style.css # Single stylesheet for entire site +└── js/ + └── preview.js # Optional markdown preview (admin only) + +templates/ +├── base.html # Base template with HTML structure +├── index.html # Homepage (note list) +├── note.html # Single note permalink +└── admin/ + ├── base.html # Admin base template + ├── dashboard.html # Admin dashboard + ├── new.html # Create new note + └── edit.html # Edit existing note +``` + +## CSS Architecture + +### Custom CSS Properties (Variables) +```css +:root { + --color-text: #333; + --color-bg: #fff; + --color-link: #0066cc; + --color-border: #ddd; + --font-body: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; + --font-mono: 'SF Mono', Monaco, monospace; + --spacing-unit: 1rem; + --max-width: 42rem; +} +``` + +### Mobile-First Responsive Design +```css +/* Base: Mobile styles */ +body { padding: 1rem; } + +/* Tablet and up */ +@media (min-width: 768px) { + body { padding: 2rem; } +} +``` + +## JavaScript Architecture + +### Markdown Preview Implementation +```javascript +// static/js/preview.js +// Simple markdown preview using marked.js CDN (no build step) +// Progressive enhancement - form works without this +``` + +**Decision**: Use marked.js from CDN for client-side preview +- **Justification**: Same library as server-side (consistency) +- **Simplicity**: No bundling required +- **Reliability**: CDN delivers cached version +- **Alternative**: No preview (acceptable fallback) + +## Template Organization + +### Jinja2 Template Strategy +- **Inheritance**: Use base templates for common structure +- **Blocks**: Define clear content blocks for overriding +- **Macros**: Create reusable microformat snippets +- **Filters**: Use Jinja2 filters for date formatting + +### Example Base Template Structure +```jinja2 +{# templates/base.html #} + + + + + + {% block title %}{{ site.title }}{% endblock %} + + + {% block head %}{% endblock %} + + + {% block content %}{% endblock %} + + +``` + +## Microformats Integration + +Server-side rendering makes microformats implementation straightforward: + +```jinja2 +{# Macro for h-entry note rendering #} +{% macro render_note(note) %} + +{% endmacro %} +``` + +## Build and Development Workflow + +### Development +1. Run Flask development server: `flask run` +2. Edit templates/CSS/JS directly +3. Browser auto-refresh on template changes +4. No build step required + +### Production +1. Copy static files to production +2. Templates are rendered on-demand +3. Optionally enable Flask caching for rendered HTML +4. Serve static assets with nginx/Apache (optional) + +## Browser Support +- Modern browsers (Chrome 90+, Firefox 88+, Safari 14+, Edge 90+) +- Mobile browsers (iOS Safari 14+, Chrome Android 90+) +- Progressive enhancement ensures basic functionality on older browsers + +## Alternatives Considered + +### React/Vue/Svelte (Rejected) +- **Simplicity**: 2/10 - Requires build tools, npm, bundlers +- **Fitness**: 3/10 - Massive overkill for content site +- **Maintenance**: 5/10 - Constant framework updates +- **Verdict**: Violates "no client-side complexity" principle + +### htmx (Considered) +- **Simplicity**: 8/10 - Single JavaScript file, declarative +- **Fitness**: 6/10 - Useful for dynamic updates, but not needed in V1 +- **Maintenance**: 8/10 - Stable, minimal dependencies +- **Verdict**: Interesting for V2, but V1 doesn't need dynamic updates + +### Alpine.js (Considered) +- **Simplicity**: 8/10 - Lightweight, declarative +- **Fitness**: 5/10 - Good for small interactions, but we barely need any +- **Maintenance**: 8/10 - Well maintained +- **Verdict**: Too much for the minimal JS we need + +### Tailwind CSS (Rejected) +- **Simplicity**: 4/10 - Requires build process, large configuration +- **Fitness**: 3/10 - Utility-first doesn't fit minimal design needs +- **Maintenance**: 7/10 - Well maintained but heavy +- **Verdict**: Build process violates simplicity; custom CSS is sufficient + +### Bootstrap/Bulma (Rejected) +- **Simplicity**: 5/10 - Large framework with many unused features +- **Fitness**: 3/10 - Component-heavy, we need minimal custom design +- **Maintenance**: 9/10 - Very stable +- **Verdict**: Too much CSS for what we need + +### PicoCSS/Water.css (Considered) +- **Simplicity**: 9/10 - Classless CSS, just include and go +- **Fitness**: 7/10 - Good starting point but may not match design vision +- **Maintenance**: 8/10 - Maintained, simple +- **Verdict**: Close consideration, but custom CSS gives full control + +## Standards Compliance +- Semantic HTML5 elements +- Valid HTML (W3C validator) +- Accessible forms and navigation +- Proper heading hierarchy +- ARIA labels where needed +- Mobile-responsive (viewport meta tag) +- Progressive enhancement (works without JS) + +## References +- Jinja2 Documentation: https://jinja.palletsprojects.com/ +- MDN Web Docs: https://developer.mozilla.org/ +- Microformats2: http://microformats.org/wiki/h-entry +- Progressive Enhancement: https://developer.mozilla.org/en-US/docs/Glossary/Progressive_Enhancement +- Semantic HTML: https://developer.mozilla.org/en-US/docs/Glossary/Semantics diff --git a/docs/decisions/ADR-004-file-based-note-storage.md b/docs/decisions/ADR-004-file-based-note-storage.md new file mode 100644 index 0000000..3571261 --- /dev/null +++ b/docs/decisions/ADR-004-file-based-note-storage.md @@ -0,0 +1,384 @@ +# ADR-004: File-Based Note Storage Architecture + +## Status +Accepted + +## Context +The user explicitly requires notes to be stored as files on disk rather than as database records. This is critical for: +1. Data portability - notes can be backed up, moved, and read without the application +2. User ownership - direct access to content in human-readable format +3. Simplicity - text files are the simplest storage mechanism +4. Future-proofing - markdown files will be readable forever + +However, we also need SQLite for: +- Metadata (timestamps, slugs, published status) +- Authentication tokens +- Fast querying and indexing +- Relational data + +The challenge is designing how file-based storage and database metadata work together efficiently. + +## Decision + +### Hybrid Architecture: Files + Database Metadata + +**Notes Content**: Stored as markdown files on disk +**Notes Metadata**: Stored in SQLite database +**Source of Truth**: Files are authoritative for content; database is authoritative for metadata + +### File Storage Strategy + +#### Directory Structure +``` +data/ +├── notes/ +│ ├── 2024/ +│ │ ├── 11/ +│ │ │ ├── my-first-note.md +│ │ │ └── another-note.md +│ │ └── 12/ +│ │ └── december-note.md +│ └── 2025/ +│ └── 01/ +│ └── new-year-note.md +├── starpunk.db # SQLite database +└── .backups/ # Optional backup directory +``` + +#### File Naming Convention +- **Format**: `{slug}.md` +- **Slug rules**: lowercase, alphanumeric, hyphens only, no spaces +- **Example**: `my-first-note.md` +- **Uniqueness**: Enforced by filesystem (can't have two files with same name in same directory) + +#### File Organization +- **Pattern**: Year/Month subdirectories (`YYYY/MM/`) +- **Rationale**: + - Keeps directories manageable (max ~30 files per month) + - Easy chronological browsing + - Matches natural mental model + - Scalable to thousands of notes +- **Example path**: `data/notes/2024/11/my-first-note.md` + +### Database Schema + +```sql +CREATE TABLE notes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + slug TEXT UNIQUE NOT NULL, -- URL identifier + file_path TEXT UNIQUE NOT NULL, -- Relative path from data/notes/ + published BOOLEAN DEFAULT 0, -- Publication status + created_at TIMESTAMP NOT NULL, -- Creation timestamp + updated_at TIMESTAMP NOT NULL, -- Last modification timestamp + content_hash TEXT -- SHA-256 of file content for change detection +); + +CREATE INDEX idx_notes_created_at ON notes(created_at DESC); +CREATE INDEX idx_notes_published ON notes(published); +CREATE INDEX idx_notes_slug ON notes(slug); +``` + +### File Format + +#### Markdown File Structure +```markdown +[Content of the note in markdown format] +``` + +**That's it.** No frontmatter, no metadata in file. Keep it pure. + +**Rationale**: +- Maximum portability +- Readable by any markdown editor +- No custom parsing required +- Metadata belongs in database (timestamps, slugs, etc.) +- User sees just their content when opening file + +#### Optional Future Enhancement (V2+) +If frontmatter becomes necessary, use standard YAML: +```markdown +--- +title: Optional Title +tags: tag1, tag2 +--- +[Content here] +``` + +But for V1: **NO frontmatter**. + +## Rationale + +### File Storage Benefits +**Simplicity Score: 10/10** +- Text files are the simplest storage +- No binary formats +- Human-readable +- Easy to backup (rsync, git, Dropbox, etc.) + +**Portability Score: 10/10** +- Standard markdown format +- Readable without application +- Can be edited in any text editor +- Easy to migrate to other systems + +**Ownership Score: 10/10** +- User has direct access to their content +- No vendor lock-in +- Can grep their own notes +- Backup is simple file copy + +### Hybrid Approach Benefits +**Performance**: Database indexes enable fast queries +**Flexibility**: Rich metadata without cluttering files +**Integrity**: Database enforces uniqueness and relationships +**Simplicity**: Each system does what it's best at + +## Consequences + +### Positive +- Notes are portable markdown files +- User can edit notes directly in filesystem if desired +- Easy backup (just copy data/ directory) +- Database provides fast metadata queries +- Can rebuild database from files if needed +- Git-friendly (can version control notes) +- Maximum data ownership + +### Negative +- Must keep file and database in sync +- Potential for orphaned database records +- Potential for orphaned files +- File operations are slower than database queries +- Must handle file system errors + +### Mitigation Strategies + +#### Sync Strategy +1. **On note creation**: Write file FIRST, then database record +2. **On note update**: Update file FIRST, then database record (update timestamp, content_hash) +3. **On note delete**: Mark as deleted in database, optionally move file to .trash/ +4. **On startup**: Optional integrity check to detect orphans + +#### Orphan Detection +```python +# Pseudo-code for integrity check +def check_integrity(): + # Find database records without files + for note in database.all_notes(): + if not file_exists(note.file_path): + log_error(f"Orphaned database record: {note.slug}") + + # Find files without database records + for file in filesystem.all_markdown_files(): + if not database.has_note(file_path=file): + log_error(f"Orphaned file: {file}") +``` + +#### Content Hash Strategy +- Calculate SHA-256 hash of file content on write +- Store hash in database +- On read, can verify content hasn't been externally modified +- Enables change detection and cache invalidation + +## Data Flow Patterns + +### Creating a Note + +1. Generate slug from content or timestamp +2. Determine file path: `data/notes/{YYYY}/{MM}/{slug}.md` +3. Create directories if needed +4. Write markdown content to file +5. Calculate content hash +6. Insert record into database +7. Return success + +**Transaction Safety**: If database insert fails, delete file and raise error + +### Reading a Note + +**By Slug**: +1. Query database for file_path by slug +2. Read file content from disk +3. Return content + metadata + +**For List**: +1. Query database for metadata (sorted, filtered) +2. Optionally read file content for each note +3. Return list with metadata and content + +### Updating a Note + +1. Query database for existing file_path +2. Write new content to file (atomic write to temp, then rename) +3. Calculate new content hash +4. Update database record (timestamp, content_hash) +5. Return success + +**Transaction Safety**: Keep backup of original file until database update succeeds + +### Deleting a Note + +**Soft Delete (Recommended)**: +1. Update database: set `deleted_at` timestamp +2. Optionally move file to `.trash/` subdirectory +3. Return success + +**Hard Delete**: +1. Delete database record +2. Delete file from filesystem +3. Return success + +## File System Operations + +### Atomic Writes +```python +# Pseudo-code for atomic file write +def write_note_safely(path, content): + temp_path = f"{path}.tmp" + write(temp_path, content) + atomic_rename(temp_path, path) # Atomic on POSIX systems +``` + +### Directory Creation +```python +# Ensure directory exists before writing +def ensure_note_directory(year, month): + path = f"data/notes/{year}/{month}" + makedirs(path, exist_ok=True) + return path +``` + +### Slug Generation +```python +# Generate URL-safe slug +def generate_slug(content=None, timestamp=None): + if content: + # Extract first few words, normalize + words = extract_first_words(content, max=5) + slug = normalize(words) # lowercase, hyphens, no special chars + else: + # Fallback: timestamp-based + slug = timestamp.strftime("%Y%m%d-%H%M%S") + + # Ensure uniqueness + if database.slug_exists(slug): + slug = f"{slug}-{random_suffix()}" + + return slug +``` + +## Backup Strategy + +### Simple Backup +```bash +# User can backup with simple copy +cp -r data/ backup/ + +# Or with rsync +rsync -av data/ backup/ + +# Or with git +cd data/ && git add . && git commit -m "Backup" +``` + +### Restore Strategy +1. Copy data/ directory to new location +2. Application reads database +3. If database missing or corrupt, rebuild from files: + ```python + def rebuild_database_from_files(): + for file_path in glob("data/notes/**/*.md"): + content = read_file(file_path) + metadata = extract_metadata_from_path(file_path) + database.insert_note( + slug=metadata.slug, + file_path=file_path, + created_at=file_stat.created, + updated_at=file_stat.modified, + content_hash=hash(content) + ) + ``` + +## Standards Compliance + +### Markdown Standard +- CommonMark specification +- No custom extensions in V1 +- Standard markdown processors can read files + +### File System Compatibility +- ASCII-safe filenames +- No special characters in paths +- Maximum path length under 255 characters +- POSIX-compatible directory structure + +## Alternatives Considered + +### All-Database Storage (Rejected) +- **Simplicity**: 8/10 - Simpler code, single source of truth +- **Portability**: 2/10 - Requires database export +- **Ownership**: 3/10 - User doesn't have direct access +- **Verdict**: Violates user requirement for file-based storage + +### Flat File Directory (Rejected) +``` +data/notes/ +├── note-1.md +├── note-2.md +├── note-3.md +... +├── note-9999.md +``` +- **Simplicity**: 10/10 - Simplest possible structure +- **Scalability**: 3/10 - Thousands of files in one directory is slow +- **Verdict**: Not scalable, poor performance with many notes + +### Git-Based Storage (Rejected for V1) +- **Simplicity**: 6/10 - Requires git integration +- **Portability**: 9/10 - Excellent versioning +- **Performance**: 7/10 - Git operations have overhead +- **Verdict**: Interesting for V2, but adds complexity to V1 + +### Frontmatter in Files (Rejected for V1) +```markdown +--- +slug: my-note +created: 2024-11-18 +published: true +--- +Note content here +``` +- **Simplicity**: 7/10 - Requires YAML parsing +- **Portability**: 8/10 - Common pattern, but not pure markdown +- **Single Source**: 10/10 - All data in one place +- **Verdict**: Deferred to V2; V1 keeps files pure + +### JSON Metadata Sidecar (Rejected) +``` +notes/ +├── my-note.md +├── my-note.json # Metadata +``` +- **Simplicity**: 6/10 - Doubles number of files +- **Portability**: 7/10 - Markdown still clean, but extra files +- **Sync Issues**: 5/10 - Must keep two files in sync +- **Verdict**: Database metadata is cleaner + +## Implementation Checklist + +- [ ] Create data/notes directory structure on initialization +- [ ] Implement slug generation algorithm +- [ ] Implement atomic file write operations +- [ ] Implement content hash calculation +- [ ] Create database schema with indexes +- [ ] Implement sync between files and database +- [ ] Implement orphan detection (optional for V1) +- [ ] Add file system error handling +- [ ] Create backup documentation for users +- [ ] Test with thousands of notes for performance + +## References +- CommonMark Spec: https://spec.commonmark.org/ +- POSIX File Operations: https://pubs.opengroup.org/onlinepubs/9699919799/ +- File System Best Practices: https://www.pathname.com/fhs/ +- Atomic File Operations: https://lwn.net/Articles/457667/ diff --git a/docs/decisions/ADR-005-indielogin-authentication.md b/docs/decisions/ADR-005-indielogin-authentication.md new file mode 100644 index 0000000..7995be7 --- /dev/null +++ b/docs/decisions/ADR-005-indielogin-authentication.md @@ -0,0 +1,421 @@ +# ADR-005: IndieLogin Authentication Integration + +## Status +Accepted + +## Context +The user has explicitly required external IndieLogin authentication via indielogin.com for V1. This is different from implementing a full IndieAuth server (which CLAUDE.MD mentions). The distinction is important: + +- **IndieAuth Server**: Host your own authentication endpoint (complex) +- **IndieLogin Service**: Use indielogin.com as an external authentication provider (simple) + +The user wants the simpler approach: delegate authentication to indielogin.com using their API (https://indielogin.com/api). + +IndieLogin.com is a service that: +1. Handles the OAuth 2.0 / IndieAuth flow +2. Verifies user identity via their website +3. Returns authenticated identity to our application +4. Supports multiple authentication methods (RelMeAuth, email, etc.) + +## Decision + +### Use IndieLogin.com as External Authentication Provider + +**Authentication Flow**: OAuth 2.0 Authorization Code flow via indielogin.com +**API Endpoint**: https://indielogin.com/auth +**Token Validation**: Server-side session tokens (not IndieAuth tokens) +**User Identity**: URL (me parameter) verified by indielogin.com + +### Architecture + +``` +User Browser → StarPunk → indielogin.com → User's Website + ↑ ↓ + └──────────────────────────────┘ + (Authenticated session) +``` + +## Authentication Flow + +### 1. Login Initiation +``` +User clicks "Login" + ↓ +StarPunk generates state token (CSRF protection) + ↓ +Redirect to: https://indielogin.com/auth? + - me={user_website} + - client_id={starpunk_url} + - redirect_uri={starpunk_url}/auth/callback + - state={random_token} +``` + +### 2. IndieLogin Processing +``` +indielogin.com verifies user identity: + - Checks for rel="me" links on user's website + - Or sends email verification + - Or uses other IndieAuth methods + ↓ +User authenticates via their chosen method + ↓ +indielogin.com redirects back to StarPunk +``` + +### 3. Callback Verification +``` +indielogin.com → StarPunk callback with: + - code={authorization_code} + - state={original_state} + ↓ +StarPunk verifies state matches + ↓ +StarPunk exchanges code for verified identity: + POST https://indielogin.com/auth + - code={authorization_code} + - client_id={starpunk_url} + - redirect_uri={starpunk_url}/auth/callback + ↓ +indielogin.com responds with: + { "me": "https://user-website.com" } + ↓ +StarPunk creates authenticated session +``` + +### 4. Session Management +``` +StarPunk stores session token in cookie + ↓ +Session token maps to authenticated user URL + ↓ +Admin routes check for valid session +``` + +## Implementation Requirements + +### Configuration Variables +``` +SITE_URL=https://starpunk.example.com +ADMIN_ME=https://your-website.com +SESSION_SECRET=random_secret_key +``` + +### Database Schema Addition +```sql +-- Add to existing schema +CREATE TABLE sessions ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_token TEXT UNIQUE NOT NULL, + me TEXT NOT NULL, -- Authenticated user URL + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP NOT NULL, + last_used_at TIMESTAMP +); + +CREATE INDEX idx_sessions_token ON sessions(session_token); +CREATE INDEX idx_sessions_expires ON sessions(expires_at); + +CREATE TABLE auth_state ( + state TEXT PRIMARY KEY, + created_at TIMESTAMP NOT NULL, + expires_at TIMESTAMP NOT NULL -- Short-lived (5 minutes) +); +``` + +### HTTP Client for API Calls +Use **httpx** (already selected in ADR-002) for: +- POST to https://indielogin.com/auth to exchange code +- Verify response contains valid "me" URL +- Handle network errors gracefully + +### Routes Required +``` +GET /admin/login - Display login form +POST /admin/login - Initiate IndieLogin flow +GET /auth/callback - Handle IndieLogin redirect +POST /admin/logout - Destroy session +``` + +### Login Flow Implementation + +#### Step 1: Login Form +```python +# /admin/login (GET) +# Display simple form asking for user's website URL +# Form submits to POST /admin/login with "me" parameter +``` + +#### Step 2: Initiate Authentication +```python +# /admin/login (POST) +def initiate_login(me_url): + # Validate me_url format + if not is_valid_url(me_url): + return error("Invalid URL") + + # Generate and store state token + state = generate_random_token() + store_state(state, expires_in_minutes=5) + + # Build IndieLogin authorization URL + params = { + 'me': me_url, + 'client_id': SITE_URL, + 'redirect_uri': f"{SITE_URL}/auth/callback", + 'state': state + } + + auth_url = f"https://indielogin.com/auth?{urlencode(params)}" + + # Redirect user to IndieLogin + return redirect(auth_url) +``` + +#### Step 3: Handle Callback +```python +# /auth/callback (GET) +def handle_callback(code, state): + # Verify state token (CSRF protection) + if not verify_state(state): + return error("Invalid state") + + # Exchange code for verified identity + response = httpx.post('https://indielogin.com/auth', data={ + 'code': code, + 'client_id': SITE_URL, + 'redirect_uri': f"{SITE_URL}/auth/callback" + }) + + if response.status_code != 200: + return error("Authentication failed") + + data = response.json() + me = data.get('me') + + # Verify this is the authorized admin + if me != ADMIN_ME: + return error("Unauthorized user") + + # Create session + session_token = generate_random_token() + create_session(session_token, me, expires_in_days=30) + + # Set session cookie + set_cookie('session', session_token, httponly=True, secure=True) + + # Redirect to admin dashboard + return redirect('/admin') +``` + +#### Step 4: Session Validation +```python +# Decorator for protected routes +def require_auth(f): + def wrapper(*args, **kwargs): + session_token = request.cookies.get('session') + + if not session_token: + return redirect('/admin/login') + + session = get_session(session_token) + + if not session or session.expired: + return redirect('/admin/login') + + # Update last_used_at + update_session_activity(session_token) + + # Store user info in request context + g.user_me = session.me + + return f(*args, **kwargs) + + return wrapper + +# Usage +@app.route('/admin') +@require_auth +def admin_dashboard(): + return render_template('admin/dashboard.html') +``` + +## Rationale + +### Why IndieLogin.com Instead of Self-Hosted IndieAuth? + +**Simplicity Score: 10/10 (IndieLogin) vs 4/10 (Self-hosted)** +- IndieLogin.com handles all complexity of: + - Discovering user's auth endpoints + - Verifying user identity + - Supporting multiple auth methods (RelMeAuth, email, etc.) + - PKCE implementation +- Self-hosted would require implementing full IndieAuth spec (complex) + +**Fitness Score: 10/10** +- Perfect for single-user system +- User controls their identity via their own website +- No password management needed +- Aligns with IndieWeb principles + +**Maintenance Score: 10/10** +- indielogin.com is maintained by IndieWeb community +- No auth code to maintain ourselves +- Security updates handled externally +- Well-tested service + +**Standards Compliance: Pass** +- Uses OAuth 2.0 / IndieAuth standards +- Compatible with IndieWeb ecosystem +- User identity is their URL (IndieWeb principle) + +### Why Session Cookies Instead of Access Tokens? + +For admin interface (not Micropub): +- **Simpler**: Standard web session pattern +- **Secure**: HttpOnly cookies prevent XSS +- **Appropriate**: Admin is human using browser, not API client +- **Note**: Micropub will still use access tokens (separate ADR needed) + +## Consequences + +### Positive +- Extremely simple implementation (< 100 lines of code) +- No authentication code to maintain +- Secure by default (delegated to trusted service) +- True IndieWeb authentication (user owns identity) +- No passwords to manage +- Works immediately without setup +- Community-maintained service + +### Negative +- Dependency on external service (indielogin.com) +- Requires internet connection to authenticate +- Single point of failure for login (mitigated: session stays valid) +- User must have their own website/URL + +### Mitigation +- Sessions last 30 days, so brief indielogin.com outages don't lock out user +- Document fallback: edit database to create session manually if needed +- IndieLogin.com is stable, community-run service with good uptime +- For V2: Consider optional email fallback or self-hosted IndieAuth + +## Security Considerations + +### State Token (CSRF Protection) +- Generate cryptographically random state token +- Store in database with short expiry (5 minutes) +- Verify state matches on callback +- Delete state after use (single-use tokens) + +### Session Token Security +- Generate with secrets.token_urlsafe(32) or similar +- Store hash in database (not plaintext) +- Mark cookies as HttpOnly and Secure +- Set SameSite=Lax for CSRF protection +- Implement session expiry (30 days) +- Support manual logout (session deletion) + +### Identity Verification +- Only allow ADMIN_ME URL to authenticate +- Verify "me" URL from indielogin.com exactly matches config +- Reject any other authenticated users +- Log authentication attempts + +### Network Security +- Use HTTPS for all communication +- Verify SSL certificates on httpx requests +- Handle network timeouts gracefully +- Log authentication failures + +## Testing Strategy + +### Unit Tests +- State token generation and validation +- Session creation and expiry +- URL validation +- Cookie handling + +### Integration Tests +- Mock indielogin.com API responses +- Test full authentication flow +- Test session expiry +- Test unauthorized user rejection +- Test CSRF protection (invalid state) + +### Manual Testing +- Authenticate with real indielogin.com +- Verify session persistence +- Test logout functionality +- Test session expiry +- Test with wrong "me" URL + +## Alternatives Considered + +### Self-Hosted IndieAuth Server (Rejected) +- **Complexity**: Must implement full IndieAuth spec +- **Maintenance**: Security updates, endpoint discovery, token generation +- **Verdict**: Too complex for V1, violates simplicity principle + +### Password Authentication (Rejected) +- **Security**: Must hash passwords, handle resets, prevent brute force +- **IndieWeb**: Violates IndieWeb principle of URL-based identity +- **Verdict**: Not aligned with project goals + +### OAuth via GitHub/Google (Rejected) +- **Simplicity**: Easy to implement +- **IndieWeb**: Not IndieWeb-compatible, user doesn't own identity +- **Verdict**: Violates IndieWeb requirements + +### Email Magic Links (Rejected) +- **Simplicity**: Requires email sending infrastructure +- **IndieWeb**: Not standard IndieWeb authentication +- **Verdict**: Deferred to V2 as fallback option + +### Multi-User IndieAuth (Rejected for V1) +- **Scope**: V1 is explicitly single-user +- **Complexity**: Would require user management +- **Verdict**: Out of scope, defer to V2 + +## Implementation Checklist + +- [ ] Add SESSION_SECRET and ADMIN_ME to configuration +- [ ] Create sessions and auth_state database tables +- [ ] Implement state token generation and storage +- [ ] Create login form template +- [ ] Implement /admin/login routes (GET and POST) +- [ ] Implement /auth/callback route +- [ ] Implement session creation and validation +- [ ] Create require_auth decorator +- [ ] Implement logout functionality +- [ ] Set secure cookie parameters +- [ ] Add authentication error handling +- [ ] Write unit tests for auth flow +- [ ] Write integration tests with mocked indielogin.com +- [ ] Test with real indielogin.com +- [ ] Document setup process for users + +## Configuration Example + +```bash +# .env file +SITE_URL=https://starpunk.example.com +ADMIN_ME=https://your-website.com +SESSION_SECRET=your-random-secret-key-here +``` + +## User Setup Documentation + +1. Deploy StarPunk to your server at `https://starpunk.example.com` +2. Configure `ADMIN_ME` to your personal website URL +3. Visit `/admin/login` +4. Enter your website URL (must match ADMIN_ME) +5. indielogin.com will verify your identity +6. Authenticate via your chosen method +7. Redirected back to StarPunk admin interface + +## References +- IndieLogin.com: https://indielogin.com/ +- IndieLogin API Documentation: https://indielogin.com/api +- IndieAuth Specification: https://indieauth.spec.indieweb.org/ +- OAuth 2.0 Spec: https://oauth.net/2/ +- Web Authentication Best Practices: https://cheatsheetseries.owasp.org/cheatsheets/Authentication_Cheat_Sheet.html diff --git a/docs/decisions/ADR-006-python-virtual-environment-uv.md b/docs/decisions/ADR-006-python-virtual-environment-uv.md new file mode 100644 index 0000000..c546073 --- /dev/null +++ b/docs/decisions/ADR-006-python-virtual-environment-uv.md @@ -0,0 +1,552 @@ +# ADR-006: Python Virtual Environment Management with uv + +## Status +Accepted + +## Context +StarPunk is a Python-based web application that requires dependency management and virtual environment isolation. Developer agents (AI assistants like Claude Code) need clear, unambiguous standards for: + +- Creating and managing Python virtual environments +- Installing and tracking dependencies +- Ensuring reproducible development environments +- Avoiding common pitfalls (polluting global Python, dependency conflicts) +- Maintaining consistency across development and deployment + +Traditional tools (pip, venv, virtualenv, poetry, pipenv) have various limitations: +- **pip + venv**: Slow dependency resolution, manual requirements.txt management +- **poetry**: Complex configuration, slow, dependency lock issues +- **pipenv**: Abandoned maintenance, slow performance +- **conda**: Heavyweight, non-standard for web development + +We need a tool that is fast, simple, and provides excellent developer experience while maintaining compatibility with standard Python packaging. + +## Decision +Use **uv** for all Python virtual environment and dependency management in StarPunk. + +uv will be the standard tool for: +- Creating virtual environments +- Installing dependencies +- Managing requirements +- Running Python commands in the virtual environment +- Synchronizing dependencies + +## Rationale + +### Simplicity Score: 10/10 +- Single tool for all environment management +- Simple command syntax (uv venv, uv pip install, uv run) +- Drop-in replacement for pip and virtualenv +- No complex configuration files +- Works with standard requirements.txt +- Written in Rust, installed as single binary + +### Performance Score: 10/10 +- 10-100x faster than pip for dependency resolution +- Parallel downloads and installations +- Efficient caching mechanism +- Near-instant virtual environment creation +- Minimal overhead for running commands + +### Fitness Score: 9/10 +- Perfect for small to medium Python projects +- Excellent for single-developer projects +- Works with standard Python packaging (PEP 517/518) +- Compatible with requirements.txt workflow +- Supports editable installs for development +- Works seamlessly with Flask and all our dependencies + +### Maintenance Score: 9/10 +- Actively developed by Astral (creators of ruff) +- Strong community adoption +- Excellent documentation +- Regular updates and improvements +- Modern codebase (Rust) +- Backed by funding and commercial support + +### Standards Compliance: Pass +- Full compatibility with pip +- Works with PyPI and all standard package indices +- Supports PEP 440 version specifiers +- Compatible with requirements.txt format +- Works with standard Python virtual environments +- No proprietary lock files (uses standard formats) + +## Implementation Details + +### 1. Installation Standards + +#### System-Level uv Installation +Developer agents MUST ensure uv is installed before creating environments: + +```bash +# Check if uv is installed +which uv + +# If not installed, install via pip (fallback) +pip install uv + +# Or install via official installer (preferred on Linux/macOS) +curl -LsSf https://astral.sh/uv/install.sh | sh +``` + +#### Verification +```bash +# Verify uv installation +uv --version +# Expected output: uv 0.x.x (or newer) +``` + +### 2. Virtual Environment Creation Standards + +#### Location and Naming +- **Standard location**: `/home/phil/Projects/starpunk/.venv` +- **Name**: Always use `.venv` (hidden directory) +- **DO NOT** use: `venv`, `env`, `virtualenv`, or custom names + +#### Creation Command +```bash +# Create virtual environment with uv +cd /home/phil/Projects/starpunk +uv venv .venv + +# Specify Python version (recommended) +uv venv .venv --python 3.11 +``` + +#### Post-Creation Verification +```bash +# Verify .venv directory exists +ls -la /home/phil/Projects/starpunk/.venv + +# Verify Python executable +/home/phil/Projects/starpunk/.venv/bin/python --version +``` + +### 3. Dependency Installation Standards + +#### Using requirements.txt (Primary Method) +```bash +# Install all dependencies from requirements.txt +uv pip install -r /home/phil/Projects/starpunk/requirements.txt + +# Verify installation +uv pip list +``` + +#### Installing Individual Packages +```bash +# Install a single package +uv pip install flask==3.0.* + +# Install multiple packages +uv pip install flask markdown feedgen +``` + +#### Development Dependencies +```bash +# Install dev dependencies (if requirements-dev.txt exists) +uv pip install -r /home/phil/Projects/starpunk/requirements-dev.txt +``` + +### 4. Running Commands in Virtual Environment + +#### Using uv run (Recommended) +```bash +# Run Python script +uv run /home/phil/Projects/starpunk/.venv/bin/python script.py + +# Run Flask development server +uv run /home/phil/Projects/starpunk/.venv/bin/flask run + +# Run pytest +uv run /home/phil/Projects/starpunk/.venv/bin/pytest + +# Run Python REPL +uv run /home/phil/Projects/starpunk/.venv/bin/python +``` + +#### Direct Execution (Alternative) +```bash +# Execute using absolute path to venv Python +/home/phil/Projects/starpunk/.venv/bin/python script.py +/home/phil/Projects/starpunk/.venv/bin/flask run +/home/phil/Projects/starpunk/.venv/bin/pytest +``` + +### 5. Dependency Tracking Standards + +#### Generating requirements.txt +```bash +# Freeze current environment to requirements.txt +uv pip freeze > /home/phil/Projects/starpunk/requirements.txt + +# Freeze with sorted output for consistency +uv pip freeze | sort > /home/phil/Projects/starpunk/requirements.txt +``` + +#### Adding New Dependencies +When adding a new dependency: +1. Install the package: `uv pip install package-name` +2. Update requirements.txt: `uv pip freeze | sort > requirements.txt` +3. Verify installation: `uv pip list | grep package-name` + +### 6. Environment Updates and Maintenance + +#### Updating Dependencies +```bash +# Update a specific package +uv pip install --upgrade flask + +# Update all packages (use with caution) +uv pip install --upgrade -r requirements.txt + +# Regenerate requirements.txt after updates +uv pip freeze | sort > requirements.txt +``` + +#### Cleaning and Rebuilding +```bash +# Remove virtual environment +rm -rf /home/phil/Projects/starpunk/.venv + +# Recreate from scratch +uv venv .venv --python 3.11 +uv pip install -r requirements.txt +``` + +## Developer Agent Standards + +### Critical Rules for AI Assistants + +#### Rule 1: ALWAYS Check for Existing Virtual Environment +Before creating a new virtual environment, ALWAYS check: + +```bash +# Check if .venv exists +if [ -d "/home/phil/Projects/starpunk/.venv" ]; then + echo "Virtual environment exists" + /home/phil/Projects/starpunk/.venv/bin/python --version +else + echo "Virtual environment does not exist" +fi +``` + +**NEVER** create a new virtual environment if one already exists without explicit user permission. + +#### Rule 2: ALWAYS Use Absolute Paths +Agent threads reset cwd between bash calls. ALWAYS use absolute paths: + +**CORRECT:** +```bash +uv venv /home/phil/Projects/starpunk/.venv +/home/phil/Projects/starpunk/.venv/bin/python script.py +uv pip install -r /home/phil/Projects/starpunk/requirements.txt +``` + +**INCORRECT:** +```bash +uv venv .venv # Relative path - WRONG +./venv/bin/python script.py # Relative path - WRONG +uv pip install -r requirements.txt # Relative path - WRONG +``` + +#### Rule 3: Verify Before Executing +Before running Python commands, verify the virtual environment: + +```bash +# Verification checklist +[ -d "/home/phil/Projects/starpunk/.venv" ] && echo "✓ venv exists" || echo "✗ venv missing" +[ -f "/home/phil/Projects/starpunk/.venv/bin/python" ] && echo "✓ Python exists" || echo "✗ Python missing" +/home/phil/Projects/starpunk/.venv/bin/python --version +``` + +#### Rule 4: Handle Errors Gracefully +If virtual environment operations fail: + +1. **Check uv installation**: `which uv` +2. **Check Python version**: `python3 --version` +3. **Check disk space**: `df -h /home/phil/Projects/starpunk` +4. **Report specific error** to user with context +5. **DO NOT** silently continue with global Python + +#### Rule 5: Never Modify Global Python +**NEVER** run these commands: +```bash +# FORBIDDEN - modifies global Python +pip install package +python3 -m pip install package +sudo pip install package +``` + +**ALWAYS** use virtual environment: +```bash +# CORRECT - uses virtual environment +uv pip install package +/home/phil/Projects/starpunk/.venv/bin/pip install package +``` + +#### Rule 6: Track Dependency Changes +After installing or removing packages: + +1. Update requirements.txt: `uv pip freeze | sort > requirements.txt` +2. Verify changes: `git diff requirements.txt` (if applicable) +3. Inform user of changes made + +### Standard Agent Workflow + +#### Scenario 1: First-Time Setup +```bash +# 1. Check if venv exists +if [ ! -d "/home/phil/Projects/starpunk/.venv" ]; then + echo "Creating virtual environment..." + uv venv /home/phil/Projects/starpunk/.venv --python 3.11 +fi + +# 2. Verify creation +/home/phil/Projects/starpunk/.venv/bin/python --version + +# 3. Install dependencies (if requirements.txt exists) +if [ -f "/home/phil/Projects/starpunk/requirements.txt" ]; then + uv pip install -r /home/phil/Projects/starpunk/requirements.txt +fi + +# 4. Verify installation +uv pip list +``` + +#### Scenario 2: Running Development Server +```bash +# 1. Verify venv exists +[ -d "/home/phil/Projects/starpunk/.venv" ] || echo "ERROR: Virtual environment missing" + +# 2. Verify Flask is installed +/home/phil/Projects/starpunk/.venv/bin/python -c "import flask; print(flask.__version__)" + +# 3. Run Flask development server +/home/phil/Projects/starpunk/.venv/bin/flask --app /home/phil/Projects/starpunk/app.py run +``` + +#### Scenario 3: Adding New Dependency +```bash +# 1. Install package +uv pip install httpx + +# 2. Verify installation +uv pip show httpx + +# 3. Update requirements.txt +uv pip freeze | sort > /home/phil/Projects/starpunk/requirements.txt + +# 4. Confirm to user +echo "Added httpx to project dependencies" +``` + +#### Scenario 4: Running Tests +```bash +# 1. Verify pytest is installed +/home/phil/Projects/starpunk/.venv/bin/python -c "import pytest; print(pytest.__version__)" + +# 2. Run tests +/home/phil/Projects/starpunk/.venv/bin/pytest /home/phil/Projects/starpunk/tests/ + +# 3. Run tests with coverage (if pytest-cov installed) +/home/phil/Projects/starpunk/.venv/bin/pytest --cov=/home/phil/Projects/starpunk/src /home/phil/Projects/starpunk/tests/ +``` + +## Project-Specific Standards + +### Python Version Requirements +- **Minimum**: Python 3.11 +- **Recommended**: Python 3.11 or 3.12 +- **Rationale**: Modern Python features, improved performance, security updates + +### Directory Structure +``` +/home/phil/Projects/starpunk/ +├── .venv/ # Virtual environment (NEVER commit) +├── requirements.txt # Production dependencies +├── requirements-dev.txt # Development dependencies (optional) +├── src/ # Application source code +├── tests/ # Test files +└── docs/ # Documentation +``` + +### .gitignore Requirements +The following MUST be in .gitignore: +``` +# Virtual Environment +.venv/ +venv/ +env/ +ENV/ + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +``` + +### Environment Variables +Use python-dotenv for configuration: +```bash +# .env file (NEVER commit to git) +FLASK_APP=app.py +FLASK_ENV=development +SECRET_KEY=your-secret-key +DATABASE_PATH=/home/phil/Projects/starpunk/data/starpunk.db +``` + +Load in application: +```python +from dotenv import load_dotenv +load_dotenv() +``` + +### Requirements.txt Format +Follow these conventions: +``` +# Requirements.txt - StarPunk Dependencies +# Generated: 2025-11-18 + +# Web Framework +flask==3.0.* + +# Content Processing +markdown==3.5.* + +# Feed Generation +feedgen==1.0.* + +# HTTP Client +httpx==0.27.* + +# Configuration +python-dotenv==1.0.* +``` + +## Consequences + +### Positive +- **10-100x faster** dependency resolution and installation +- **Consistent environments** across development and deployment +- **Simple workflow** - one tool for all Python environment tasks +- **No activation required** - uv run handles environment automatically +- **Excellent caching** - faster subsequent installations +- **Standard compatibility** - works with all existing Python tools +- **Clear agent guidelines** - reduces errors in automated workflows +- **Isolated dependencies** - no conflicts with system Python + +### Negative +- **Additional tool dependency** - requires uv installation +- **Less familiar** - newer tool, smaller community than pip +- **Rust dependency** - uv is written in Rust (but distributed as binary) + +### Mitigation +- uv is easy to install (single binary, no compilation needed) +- uv is pip-compatible (drop-in replacement) +- Fallback to pip + venv is always possible +- Documentation and agent standards make adoption easy +- Active development and growing adoption reduce risk + +### Trade-offs Accepted +- **uv vs poetry**: We chose simplicity over advanced features +- **uv vs pipenv**: We chose active maintenance and speed +- **uv vs pip**: We chose performance over ubiquity +- **Single tool complexity**: Better than managing multiple tools + +## Verification Checklist + +Before considering the environment correctly set up, verify: + +- [ ] uv is installed and accessible: `which uv` +- [ ] Virtual environment exists: `ls -la /home/phil/Projects/starpunk/.venv` +- [ ] Python version is 3.11+: `/home/phil/Projects/starpunk/.venv/bin/python --version` +- [ ] Dependencies installed: `uv pip list` shows Flask, markdown, feedgen, httpx +- [ ] requirements.txt exists and is up to date +- [ ] .venv is in .gitignore +- [ ] Flask runs: `/home/phil/Projects/starpunk/.venv/bin/flask --version` + +## Integration with Development Workflow + +### Running Flask Application +```bash +# Development server +/home/phil/Projects/starpunk/.venv/bin/flask --app app.py run --debug + +# Production server (using gunicorn) +/home/phil/Projects/starpunk/.venv/bin/gunicorn app:app +``` + +### Running Tests +```bash +# All tests +/home/phil/Projects/starpunk/.venv/bin/pytest + +# Specific test file +/home/phil/Projects/starpunk/.venv/bin/pytest tests/test_api.py + +# With coverage +/home/phil/Projects/starpunk/.venv/bin/pytest --cov=src tests/ +``` + +### Code Quality Tools +```bash +# Format code with black +/home/phil/Projects/starpunk/.venv/bin/black src/ + +# Lint with flake8 +/home/phil/Projects/starpunk/.venv/bin/flake8 src/ + +# Type checking with mypy (if added) +/home/phil/Projects/starpunk/.venv/bin/mypy src/ +``` + +## Alternatives Considered + +### pip + venv (Rejected) +- **Simplicity**: 8/10 - Standard Python tools, well-known +- **Performance**: 4/10 - Very slow dependency resolution +- **Fitness**: 7/10 - Works but painful for larger dependency trees +- **Maintenance**: 10/10 - Built into Python, always maintained +- **Verdict**: Too slow, poor developer experience, but acceptable fallback + +### poetry (Rejected) +- **Simplicity**: 5/10 - Complex pyproject.toml, lock file management +- **Performance**: 5/10 - Slow dependency resolution +- **Fitness**: 6/10 - Overkill for simple project, lock files add complexity +- **Maintenance**: 7/10 - Maintained but has had reliability issues +- **Verdict**: Too complex for "minimal code" philosophy + +### pipenv (Rejected) +- **Simplicity**: 6/10 - Simpler than poetry, but still adds abstraction +- **Performance**: 4/10 - Known performance issues +- **Fitness**: 5/10 - Previously recommended, now effectively abandoned +- **Maintenance**: 2/10 - Minimal maintenance, community has moved on +- **Verdict**: Dead project, poor performance + +### conda (Rejected) +- **Simplicity**: 3/10 - Heavy, complex environment management +- **Performance**: 5/10 - Slower than uv, larger downloads +- **Fitness**: 2/10 - Designed for data science, not web development +- **Maintenance**: 9/10 - Well maintained, large ecosystem +- **Verdict**: Wrong tool for web application development + +### PDM (Considered) +- **Simplicity**: 7/10 - Modern, PEP 582 support +- **Performance**: 8/10 - Fast, but not as fast as uv +- **Fitness**: 7/10 - Good for modern Python projects +- **Maintenance**: 8/10 - Actively maintained, growing community +- **Verdict**: Good alternative, but uv is faster and simpler + +## References +- uv Documentation: https://docs.astral.sh/uv/ +- uv GitHub: https://github.com/astral-sh/uv +- Python Virtual Environments: https://docs.python.org/3/library/venv.html +- PEP 405 (Python Virtual Environments): https://peps.python.org/pep-0405/ +- requirements.txt format: https://pip.pypa.io/en/stable/reference/requirements-file-format/ +- Astral (uv creators): https://astral.sh/ + +## Change Log +- 2025-11-18: Initial version - Established uv as standard tool for StarPunk Python environment management diff --git a/docs/decisions/ADR-007-slug-generation-algorithm.md b/docs/decisions/ADR-007-slug-generation-algorithm.md new file mode 100644 index 0000000..f53d2ad --- /dev/null +++ b/docs/decisions/ADR-007-slug-generation-algorithm.md @@ -0,0 +1,487 @@ +# ADR-007: Slug Generation Algorithm + +## Status +Accepted + +## Context + +Notes in StarPunk require URL-safe identifiers (slugs) for permalinks and file naming. The slug generation algorithm is critical because: + +1. **User experience**: Slugs appear in URLs and should be readable/meaningful +2. **SEO**: Descriptive slugs improve search engine optimization +3. **File system**: Slugs become filenames, must be filesystem-safe +4. **Uniqueness**: Slugs must be unique across all notes +5. **Portability**: Slugs should work across different systems and browsers + +The challenge is designing an algorithm that creates readable, unique, safe slugs automatically from note content. + +## Decision + +### Content-Based Slug Generation with Timestamp Fallback + +**Primary Algorithm**: Extract first N words from content and normalize +**Fallback**: Timestamp-based slug when content is insufficient +**Uniqueness**: Random suffix when collision detected + +### Algorithm Specification + +#### Step 1: Extract Words +```python +# Extract first 5 words from content +words = content.split()[:5] +text = " ".join(words) +``` + +#### Step 2: Normalize +```python +# Convert to lowercase +text = text.lower() + +# Replace spaces with hyphens +text = text.replace(" ", "-") + +# Remove all characters except a-z, 0-9, and hyphens +text = re.sub(r'[^a-z0-9-]', '', text) + +# Collapse multiple hyphens +text = re.sub(r'-+', '-', text) + +# Strip leading/trailing hyphens +text = text.strip('-') +``` + +#### Step 3: Validate Length +```python +# If slug too short or empty, use timestamp fallback +if len(text) < 1: + text = created_at.strftime("%Y%m%d-%H%M%S") +``` + +#### Step 4: Truncate +```python +# Limit to 100 characters +text = text[:100] +``` + +#### Step 5: Check Uniqueness +```python +# If slug exists, add random 4-character suffix +if slug_exists(text): + text = f"{text}-{random_alphanumeric(4)}" +``` + +### Character Set + +**Allowed characters**: `a-z`, `0-9`, `-` (hyphen) + +**Rationale**: +- URL-safe without encoding +- Filesystem-safe on all platforms (Windows, Linux, macOS) +- Human-readable +- No escaping required in HTML +- Compatible with DNS hostnames (if ever used) + +### Examples + +| Input Content | Generated Slug | +|--------------|----------------| +| "Hello World! This is my first note." | `hello-world-this-is-my` | +| "Testing... with special chars!@#" | `testing-with-special-chars` | +| "2024-11-18 Daily Journal Entry" | `2024-11-18-daily-journal-entry` | +| "A" (too short) | `20241118-143022` (timestamp) | +| " " (whitespace only) | Error: ValueError | +| "Hello World" (duplicate) | `hello-world-a7c9` (random suffix) | + +### Slug Uniqueness Strategy + +**Collision Detection**: Check database for existing slug before use + +**Resolution**: Append random 4-character suffix +- Character set: `a-z0-9` (36 characters) +- Combinations: 36^4 = 1,679,616 possible suffixes +- Collision probability: Negligible for reasonable note counts + +**Example**: +``` +Original: hello-world +Collision: hello-world-a7c9 +Collision: hello-world-x3k2 +``` + +### Timestamp Fallback Format + +**Pattern**: `YYYYMMDD-HHMMSS` +**Example**: `20241118-143022` + +**When Used**: +- Content is empty or whitespace-only (raises error instead) +- Normalized slug is empty (after removing special characters) +- Normalized slug is too short (< 1 character) + +**Rationale**: +- Guaranteed unique (unless two notes created in same second) +- Sortable chronologically +- Still readable and meaningful +- No special characters required + +## Rationale + +### Content-Based Generation (Score: 9/10) + +**Pros**: +- **Readability**: Users can understand URL meaning +- **SEO**: Search engines prefer descriptive URLs +- **Memorability**: Easier to remember and share +- **Meaningful**: Reflects note content + +**Cons**: +- **Collisions**: Multiple notes might have similar titles +- **Changes**: Editing note doesn't update slug (by design) + +### First 5 Words (Score: 8/10) + +**Pros**: +- **Sufficient**: 5 words usually capture note topic +- **Concise**: Keeps URLs short and readable +- **Consistent**: Predictable slug length + +**Cons**: +- **Arbitrary**: 5 is somewhat arbitrary (could be 3-7) +- **Language**: Assumes space-separated words (English-centric) + +**Alternatives Considered**: +- First 3 words: Too short, often not descriptive +- First 10 words: Too long, URLs become unwieldy +- First line: Could be very long, harder to normalize +- First sentence: Variable length, complex to parse + +**Decision**: 5 words is a good balance (configurable constant) + +### Lowercase with Hyphens (Score: 10/10) + +**Pros**: +- **URL Standard**: Common pattern (github.com, stackoverflow.com) +- **Readability**: Easier to read than underscores or camelCase +- **Compatibility**: Works everywhere +- **Simplicity**: One separator type only + +**Cons**: +- None significant + +### Alphanumeric Only (Score: 10/10) + +**Pros**: +- **Safety**: No escaping required in URLs or filenames +- **Portability**: Works on all filesystems (FAT32, NTFS, ext4, APFS) +- **Predictability**: No ambiguity about character handling + +**Cons**: +- **Unicode Loss**: Non-ASCII characters stripped (acceptable trade-off) + +### Random Suffix for Uniqueness (Score: 9/10) + +**Pros**: +- **Simplicity**: No complex conflict resolution +- **Security**: Cryptographically secure random (secrets module) +- **Scalability**: 1.6M possible suffixes per base slug + +**Cons**: +- **Ugliness**: Suffix looks less clean (but rare occurrence) +- **Unpredictability**: User can't control suffix + +**Alternatives Considered**: +- Incrementing numbers (`hello-world-2`, `hello-world-3`): More predictable but reveals note count +- Longer random suffix: More secure but uglier URLs +- User-specified slug: More complex, deferred to V2 + +**Decision**: 4-character random suffix is good balance + +## Consequences + +### Positive + +1. **Automatic**: No user input required for slug +2. **Readable**: Slugs are human-readable and meaningful +3. **Safe**: Works on all platforms and browsers +4. **Unique**: Collision resolution ensures uniqueness +5. **SEO-friendly**: Descriptive URLs help search ranking +6. **Predictable**: User can anticipate what slug will be +7. **Simple**: Single, consistent algorithm + +### Negative + +1. **Not editable**: User can't customize slug in V1 +2. **English-biased**: Assumes space-separated words +3. **Unicode stripped**: Non-ASCII content loses characters +4. **Content-dependent**: Similar content = similar slugs +5. **Timestamp fallback**: Short notes get ugly timestamp slugs + +### Mitigations + +**Non-editable slugs**: +- V1 trade-off for simplicity +- V2 can add custom slug support +- Users can still reference notes by slug once created + +**English-bias**: +- Acceptable for V1 (English-first IndieWeb) +- V2 can add Unicode slug support (requires more complex normalization) + +**Unicode stripping**: +- Markdown content can still contain Unicode (only slug is ASCII) +- Timestamp fallback ensures note is still creatable +- V2 can use Unicode normalization (transliteration) + +**Timestamp fallback**: +- Rare occurrence (most notes have >5 words) +- Still functional and unique +- V2 can improve (use first word if exists + timestamp) + +## Standards Compliance + +### URL Standards (RFC 3986) + +Slugs comply with URL path segment requirements: +- No percent-encoding required +- No reserved characters (`/`, `?`, `#`, etc.) +- Case-insensitive safe (always lowercase) + +### Filesystem Standards + +Slugs work on all major filesystems: +- **FAT32**: Yes (no special chars, length OK) +- **NTFS**: Yes +- **ext4**: Yes +- **APFS**: Yes +- **HFS+**: Yes + +**Reserved names**: None of our slugs conflict with OS reserved names (CON, PRN, etc.) + +### IndieWeb Recommendations + +Aligns with IndieWeb permalink best practices: +- Descriptive URLs +- No query parameters +- Short and memorable +- Permanent (don't change after creation) + +## Implementation Requirements + +### Validation Rules + +```python +# Valid slug pattern +SLUG_PATTERN = r'^[a-z0-9]+(?:-[a-z0-9]+)*$' + +# Constraints +MIN_SLUG_LENGTH = 1 +MAX_SLUG_LENGTH = 100 +``` + +### Reserved Slugs + +Certain slugs should be reserved for system routes: + +**Reserved List** (reject these slugs): +- `admin` +- `api` +- `static` +- `auth` +- `feed` +- `login` +- `logout` + +Implementation: +```python +RESERVED_SLUGS = {'admin', 'api', 'static', 'auth', 'feed', 'login', 'logout'} + +def is_slug_reserved(slug: str) -> bool: + return slug in RESERVED_SLUGS +``` + +### Error Cases + +```python +# Empty content +generate_slug("") # Raises ValueError + +# Whitespace only +generate_slug(" ") # Raises ValueError + +# Valid but short +generate_slug("Hi") # Returns timestamp: "20241118-143022" + +# Special characters only +generate_slug("!@#$%") # Returns timestamp: "20241118-143022" +``` + +## Alternatives Considered + +### UUID-based Slugs (Rejected) + +```python +slug = str(uuid.uuid4()) # "550e8400-e29b-41d4-a716-446655440000" +``` + +**Pros**: Guaranteed unique, no collision checking +**Cons**: Not human-readable, poor SEO, not memorable + +**Verdict**: Violates principle of readable URLs + +### Hash-based Slugs (Rejected) + +```python +slug = hashlib.sha256(content.encode()).hexdigest()[:12] # "a591a6d40bf4" +``` + +**Pros**: Deterministic, unique +**Cons**: Not human-readable, changes if content edited + +**Verdict**: Not meaningful to users + +### Title Extraction (Rejected for V1) + +```python +# Extract from # heading or first line +title = extract_title_from_markdown(content) +slug = normalize(title) +``` + +**Pros**: More semantic, uses actual title +**Cons**: Requires markdown parsing, more complex, title might not exist + +**Verdict**: Deferred to V2 (V1 uses first N words which is simpler) + +### User-Specified Slugs (Rejected for V1) + +```python +def create_note(content, custom_slug=None): + if custom_slug: + slug = validate_and_use(custom_slug) + else: + slug = generate_slug(content) +``` + +**Pros**: Maximum user control, no surprises +**Cons**: Requires UI input, validation complexity, user burden + +**Verdict**: Deferred to V2 (V1 auto-generates for simplicity) + +### Incrementing Numbers (Rejected) + +```python +# If collision, increment +slug = "hello-world" +slug = "hello-world-2" # Collision +slug = "hello-world-3" # Collision +``` + +**Pros**: Predictable, simple +**Cons**: Reveals note count, enumeration attack vector, less random + +**Verdict**: Random suffix is more secure and scales better + +## Performance Considerations + +### Generation Speed + +- Extract words: O(n) where n = content length (negligible, content is small) +- Normalize: O(m) where m = extracted text length (< 100 chars) +- Uniqueness check: O(1) database lookup with index +- Random suffix: O(1) generation + +**Target**: < 1ms per slug generation (easily achieved) + +### Database Impact + +- Index on `slug` column: O(log n) lookup +- Collision rate: < 1% (most notes have unique first 5 words) +- Random suffix retries: Nearly never (1.6M combinations) + +## Testing Requirements + +### Test Cases + +**Normal Cases**: +- Standard English content → descriptive slug +- Content with punctuation → punctuation removed +- Content with numbers → numbers preserved +- Content with hyphens → hyphens preserved + +**Edge Cases**: +- Very short content → timestamp fallback +- Empty content → ValueError +- Special characters only → timestamp fallback +- Very long words → truncated to max length +- Unicode content → stripped to ASCII + +**Collision Cases**: +- Duplicate slug → random suffix added +- Multiple collisions → different random suffixes +- Reserved slug → rejected + +**Security Cases**: +- Path traversal attempt (`../../../etc/passwd`) +- Special characters (` My Note Title") + assert "<" not in slug + assert ">" not in slug + assert "script" in slug # The word itself is fine + # Special chars removed, becomes one word, then first 5 words total + assert slug == "scriptalertxssscript-my-note-title" + + def test_random_suffix_uses_secrets_module(self): + """Test random suffix is cryptographically secure (not predictable).""" + # Generate many suffixes and ensure high entropy + suffixes = [generate_random_suffix() for _ in range(1000)] + unique_count = len(set(suffixes)) + # Should have very high uniqueness (>99%) + assert unique_count > 990 + + +class TestContentHashing: + """Test content hashing functions""" + + def test_calculate_content_hash_consistency(self): + """Test hash is consistent for same content.""" + hash1 = calculate_content_hash("Test content") + hash2 = calculate_content_hash("Test content") + assert hash1 == hash2 + + def test_calculate_content_hash_different(self): + """Test different content produces different hash.""" + hash1 = calculate_content_hash("Test content 1") + hash2 = calculate_content_hash("Test content 2") + assert hash1 != hash2 + + def test_calculate_content_hash_empty(self): + """Test hash of empty string.""" + hash_empty = calculate_content_hash("") + assert len(hash_empty) == 64 # SHA-256 produces 64 hex chars + assert hash_empty.isalnum() + + def test_calculate_content_hash_unicode(self): + """Test hash handles unicode correctly.""" + hash_val = calculate_content_hash("Hello 世界") + assert len(hash_val) == 64 + assert hash_val.isalnum() + + def test_calculate_content_hash_known_value(self): + """Test hash matches known SHA-256 value.""" + # Known SHA-256 hash for "Hello World" + expected = "a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e" + actual = calculate_content_hash("Hello World") + assert actual == expected + + def test_calculate_content_hash_multiline(self): + """Test hash of multiline content.""" + content = "Line 1\nLine 2\nLine 3" + hash_val = calculate_content_hash(content) + assert len(hash_val) == 64 + + def test_calculate_content_hash_special_characters(self): + """Test hash handles special characters.""" + content = "Special chars: !@#$%^&*()_+-=[]{}|;:',.<>?/~`" + hash_val = calculate_content_hash(content) + assert len(hash_val) == 64 + + +class TestFilePathOperations: + """Test file path generation and validation""" + + def test_generate_note_path_basic(self): + """Test basic note path generation.""" + dt = datetime(2024, 11, 18, 14, 30) + path = generate_note_path("test-note", dt, Path("data")) + assert path == Path("data/notes/2024/11/test-note.md") + + def test_generate_note_path_different_months(self): + """Test path generation for different months.""" + dt_jan = datetime(2024, 1, 5, 10, 0) + dt_dec = datetime(2024, 12, 25, 15, 30) + + path_jan = generate_note_path("jan-note", dt_jan, Path("data")) + path_dec = generate_note_path("dec-note", dt_dec, Path("data")) + + assert path_jan == Path("data/notes/2024/01/jan-note.md") + assert path_dec == Path("data/notes/2024/12/dec-note.md") + + def test_generate_note_path_different_years(self): + """Test path generation for different years.""" + dt_2024 = datetime(2024, 6, 15) + dt_2025 = datetime(2025, 6, 15) + + path_2024 = generate_note_path("note-2024", dt_2024, Path("data")) + path_2025 = generate_note_path("note-2025", dt_2025, Path("data")) + + assert path_2024 == Path("data/notes/2024/06/note-2024.md") + assert path_2025 == Path("data/notes/2025/06/note-2025.md") + + def test_generate_note_path_invalid_slug(self): + """Test note path generation rejects invalid slug.""" + dt = datetime(2024, 11, 18) + with pytest.raises(ValueError, match="Invalid slug"): + generate_note_path("Invalid Slug!", dt, Path("data")) + + def test_generate_note_path_with_numbers(self): + """Test path generation with slug containing numbers.""" + dt = datetime(2024, 11, 18) + path = generate_note_path("note-123-test", dt, Path("data")) + assert path == Path("data/notes/2024/11/note-123-test.md") + + def test_ensure_note_directory_creates_dirs(self, tmp_path): + """Test ensure_note_directory creates directories.""" + note_path = tmp_path / "notes" / "2024" / "11" / "test.md" + assert not note_path.parent.exists() + + result = ensure_note_directory(note_path) + + assert note_path.parent.exists() + assert result == note_path.parent + + def test_ensure_note_directory_existing_dirs(self, tmp_path): + """Test ensure_note_directory with existing directories.""" + note_path = tmp_path / "notes" / "2024" / "11" / "test.md" + note_path.parent.mkdir(parents=True) + + # Should not raise error + result = ensure_note_directory(note_path) + assert result == note_path.parent + + def test_ensure_note_directory_deep_structure(self, tmp_path): + """Test ensure_note_directory with deep directory structure.""" + note_path = tmp_path / "a" / "b" / "c" / "d" / "e" / "test.md" + result = ensure_note_directory(note_path) + + assert note_path.parent.exists() + assert result == note_path.parent + + def test_validate_note_path_safe(self, tmp_path): + """Test path validation accepts safe paths.""" + note_path = tmp_path / "notes" / "2024" / "11" / "note.md" + assert validate_note_path(note_path, tmp_path) is True + + def test_validate_note_path_traversal_dotdot(self, tmp_path): + """Test path validation rejects .. traversal.""" + note_path = tmp_path / "notes" / ".." / ".." / "etc" / "passwd" + assert validate_note_path(note_path, tmp_path) is False + + def test_validate_note_path_absolute_outside(self, tmp_path): + """Test path validation rejects absolute paths outside data dir.""" + assert validate_note_path(Path("/etc/passwd"), tmp_path) is False + + def test_validate_note_path_within_subdirectory(self, tmp_path): + """Test path validation accepts paths in subdirectories.""" + note_path = tmp_path / "notes" / "2024" / "11" / "subfolder" / "note.md" + assert validate_note_path(note_path, tmp_path) is True + + def test_validate_note_path_symlink_outside(self, tmp_path): + """Test path validation handles symlinks pointing outside.""" + # Create a symlink pointing outside data_dir + outside_dir = tmp_path.parent / "outside" + outside_dir.mkdir(exist_ok=True) + + link_path = tmp_path / "link" + link_path.symlink_to(outside_dir) + + target_path = link_path / "file.md" + assert validate_note_path(target_path, tmp_path) is False + + def test_validate_note_path_same_directory(self, tmp_path): + """Test path validation for file in data_dir root.""" + note_path = tmp_path / "note.md" + assert validate_note_path(note_path, tmp_path) is True + + +class TestAtomicFileOperations: + """Test atomic file write/read/delete operations""" + + def test_write_and_read_note_file(self, tmp_path): + """Test writing and reading note file.""" + file_path = tmp_path / "test.md" + content = "# Test Note\n\nThis is a test." + + write_note_file(file_path, content) + assert file_path.exists() + + read_content = read_note_file(file_path) + assert read_content == content + + def test_write_note_file_atomic(self, tmp_path): + """Test write is atomic (temp file cleaned up).""" + file_path = tmp_path / "test.md" + temp_path = file_path.with_suffix(".md.tmp") + + write_note_file(file_path, "Test") + + # Temp file should not exist after write + assert not temp_path.exists() + assert file_path.exists() + + def test_write_note_file_overwrites(self, tmp_path): + """Test writing overwrites existing file.""" + file_path = tmp_path / "test.md" + + write_note_file(file_path, "Original content") + write_note_file(file_path, "New content") + + content = read_note_file(file_path) + assert content == "New content" + + def test_write_note_file_unicode(self, tmp_path): + """Test writing unicode content.""" + file_path = tmp_path / "test.md" + content = "Unicode: 你好世界 🌍" + + write_note_file(file_path, content) + read_content = read_note_file(file_path) + + assert read_content == content + + def test_write_note_file_empty(self, tmp_path): + """Test writing empty file.""" + file_path = tmp_path / "test.md" + write_note_file(file_path, "") + + content = read_note_file(file_path) + assert content == "" + + def test_write_note_file_multiline(self, tmp_path): + """Test writing multiline content.""" + file_path = tmp_path / "test.md" + content = "Line 1\nLine 2\nLine 3\n" + + write_note_file(file_path, content) + read_content = read_note_file(file_path) + + assert read_content == content + + def test_read_note_file_not_found(self, tmp_path): + """Test reading non-existent file raises error.""" + file_path = tmp_path / "nonexistent.md" + with pytest.raises(FileNotFoundError): + read_note_file(file_path) + + def test_delete_note_file_hard(self, tmp_path): + """Test hard delete removes file.""" + file_path = tmp_path / "test.md" + file_path.write_text("Test") + + delete_note_file(file_path, soft=False) + assert not file_path.exists() + + def test_delete_note_file_soft(self, tmp_path): + """Test soft delete moves file to trash.""" + # Create note file + notes_dir = tmp_path / "notes" / "2024" / "11" + notes_dir.mkdir(parents=True) + file_path = notes_dir / "test.md" + file_path.write_text("Test") + + # Soft delete + delete_note_file(file_path, soft=True, data_dir=tmp_path) + + # Original should be gone + assert not file_path.exists() + + # Should be in trash + trash_path = tmp_path / TRASH_DIR_NAME / "2024" / "11" / "test.md" + assert trash_path.exists() + assert trash_path.read_text() == "Test" + + def test_delete_note_file_soft_without_data_dir(self, tmp_path): + """Test soft delete requires data_dir.""" + file_path = tmp_path / "test.md" + file_path.write_text("Test") + + with pytest.raises(ValueError, match="data_dir is required"): + delete_note_file(file_path, soft=True, data_dir=None) + + def test_delete_note_file_soft_different_months(self, tmp_path): + """Test soft delete preserves year/month structure.""" + # Create note in January + jan_dir = tmp_path / "notes" / "2024" / "01" + jan_dir.mkdir(parents=True) + jan_file = jan_dir / "jan-note.md" + jan_file.write_text("January note") + + # Create note in December + dec_dir = tmp_path / "notes" / "2024" / "12" + dec_dir.mkdir(parents=True) + dec_file = dec_dir / "dec-note.md" + dec_file.write_text("December note") + + # Soft delete both + delete_note_file(jan_file, soft=True, data_dir=tmp_path) + delete_note_file(dec_file, soft=True, data_dir=tmp_path) + + # Check trash structure + jan_trash = tmp_path / TRASH_DIR_NAME / "2024" / "01" / "jan-note.md" + dec_trash = tmp_path / TRASH_DIR_NAME / "2024" / "12" / "dec-note.md" + + assert jan_trash.exists() + assert dec_trash.exists() + + def test_delete_note_file_hard_not_found(self, tmp_path): + """Test hard delete of non-existent file raises error.""" + file_path = tmp_path / "nonexistent.md" + with pytest.raises(FileNotFoundError): + delete_note_file(file_path, soft=False) + + +class TestDateTimeFormatting: + """Test date/time formatting functions""" + + def test_format_rfc822_basic(self): + """Test RFC-822 date formatting.""" + dt = datetime(2024, 11, 18, 14, 30, 45) + formatted = format_rfc822(dt) + assert formatted == "Mon, 18 Nov 2024 14:30:45 +0000" + + def test_format_rfc822_different_dates(self): + """Test RFC-822 formatting for different dates.""" + dt1 = datetime(2024, 1, 1, 0, 0, 0) + dt2 = datetime(2024, 12, 31, 23, 59, 59) + + assert format_rfc822(dt1) == "Mon, 01 Jan 2024 00:00:00 +0000" + assert format_rfc822(dt2) == "Tue, 31 Dec 2024 23:59:59 +0000" + + def test_format_rfc822_weekdays(self): + """Test RFC-822 format includes correct weekday.""" + # Known dates and weekdays + monday = datetime(2024, 11, 18, 12, 0, 0) + friday = datetime(2024, 11, 22, 12, 0, 0) + sunday = datetime(2024, 11, 24, 12, 0, 0) + + assert format_rfc822(monday).startswith("Mon,") + assert format_rfc822(friday).startswith("Fri,") + assert format_rfc822(sunday).startswith("Sun,") + + def test_format_iso8601_basic(self): + """Test ISO 8601 date formatting.""" + dt = datetime(2024, 11, 18, 14, 30, 45) + formatted = format_iso8601(dt) + assert formatted == "2024-11-18T14:30:45Z" + + def test_format_iso8601_different_dates(self): + """Test ISO 8601 formatting for different dates.""" + dt1 = datetime(2024, 1, 1, 0, 0, 0) + dt2 = datetime(2024, 12, 31, 23, 59, 59) + + assert format_iso8601(dt1) == "2024-01-01T00:00:00Z" + assert format_iso8601(dt2) == "2024-12-31T23:59:59Z" + + def test_format_iso8601_single_digits(self): + """Test ISO 8601 format pads single digits.""" + dt = datetime(2024, 1, 5, 9, 8, 7) + formatted = format_iso8601(dt) + assert formatted == "2024-01-05T09:08:07Z" + + def test_parse_iso8601_basic(self): + """Test ISO 8601 date parsing.""" + dt = parse_iso8601("2024-11-18T14:30:45Z") + assert dt.year == 2024 + assert dt.month == 11 + assert dt.day == 18 + assert dt.hour == 14 + assert dt.minute == 30 + assert dt.second == 45 + + def test_parse_iso8601_without_z(self): + """Test ISO 8601 parsing without Z suffix.""" + dt = parse_iso8601("2024-11-18T14:30:45") + assert dt.year == 2024 + assert dt.month == 11 + assert dt.day == 18 + + def test_parse_iso8601_roundtrip(self): + """Test ISO 8601 format and parse roundtrip.""" + original = datetime(2024, 11, 18, 14, 30, 45) + formatted = format_iso8601(original) + parsed = parse_iso8601(formatted) + + assert parsed == original + + def test_parse_iso8601_invalid_format(self): + """Test ISO 8601 parsing rejects invalid format.""" + with pytest.raises(ValueError): + parse_iso8601("not-a-date") + + def test_parse_iso8601_invalid_date(self): + """Test ISO 8601 parsing rejects invalid date values.""" + with pytest.raises(ValueError): + parse_iso8601("2024-13-01T00:00:00Z") # Invalid month + + def test_format_and_parse_consistency(self): + """Test RFC-822 and ISO 8601 are both consistent.""" + dt = datetime(2024, 11, 18, 14, 30, 45) + + # ISO 8601 roundtrip + iso_formatted = format_iso8601(dt) + iso_parsed = parse_iso8601(iso_formatted) + assert iso_parsed == dt + + # RFC-822 format is consistent + rfc_formatted = format_rfc822(dt) + assert "2024" in rfc_formatted + assert "14:30:45" in rfc_formatted