diff --git a/migrations/005_add_fts5_search.sql b/migrations/005_add_fts5_search.sql new file mode 100644 index 0000000..9518c0b --- /dev/null +++ b/migrations/005_add_fts5_search.sql @@ -0,0 +1,44 @@ +-- Migration 005: Add full-text search using FTS5 +-- +-- Creates FTS5 virtual table for full-text search of notes. +-- Since note content is stored in external files (not in the database), +-- the FTS index must be maintained by application code, not SQL triggers. +-- +-- Requirements: +-- - SQLite compiled with FTS5 support +-- - Application code handles index synchronization +-- +-- Features: +-- - Full-text search on note content +-- - Porter stemming for better English search results +-- - Unicode normalization for international characters +-- - rowid matches notes.id for efficient lookups + +-- Create FTS5 virtual table for note search +-- Using porter stemmer for better English search results +-- Unicode61 tokenizer for international character support +-- Note: slug is UNINDEXED (not searchable, just for result display) +CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( + slug UNINDEXED, -- Slug for result linking (not searchable) + title, -- First line of note (searchable, high weight) + content, -- Full markdown content (searchable) + tokenize='porter unicode61' +); + +-- Create delete trigger to remove from FTS when note is deleted +-- This is the only trigger we can use since deletion doesn't require file access +CREATE TRIGGER IF NOT EXISTS notes_fts_delete +AFTER DELETE ON notes +BEGIN + DELETE FROM notes_fts WHERE rowid = OLD.id; +END; + +-- Note: INSERT and UPDATE triggers cannot be used because they would need +-- to read content from external files, which SQLite triggers cannot do. +-- The application code in starpunk/notes.py handles FTS updates for +-- create and update operations. + +-- Initial index population: +-- After this migration runs, the FTS index must be populated with existing notes. +-- This happens automatically on application startup via starpunk/search.py:rebuild_fts_index() +-- or can be triggered manually if needed. diff --git a/starpunk/notes.py b/starpunk/notes.py index 57b77d6..0385c76 100644 --- a/starpunk/notes.py +++ b/starpunk/notes.py @@ -286,6 +286,17 @@ def create_note( # Create Note object note = Note.from_row(row, data_dir) + # 9. UPDATE FTS INDEX (if available) + try: + from starpunk.search import update_fts_index, has_fts_table + db_path = Path(current_app.config["DATABASE_PATH"]) + if has_fts_table(db_path): + update_fts_index(db, note_id, slug, content) + db.commit() + except Exception as e: + # FTS update failure should not prevent note creation + current_app.logger.warning(f"Failed to update FTS index for note {slug}: {e}") + return note @@ -676,7 +687,19 @@ def update_note( f"Failed to update note: {existing_note.slug}", ) - # 6. RETURN UPDATED NOTE + # 6. UPDATE FTS INDEX (if available and content changed) + if content is not None: + try: + from starpunk.search import update_fts_index, has_fts_table + db_path = Path(current_app.config["DATABASE_PATH"]) + if has_fts_table(db_path): + update_fts_index(db, existing_note.id, existing_note.slug, content) + db.commit() + except Exception as e: + # FTS update failure should not prevent note update + current_app.logger.warning(f"Failed to update FTS index for note {existing_note.slug}: {e}") + + # 7. RETURN UPDATED NOTE updated_note = get_note(slug=existing_note.slug, load_content=True) return updated_note diff --git a/starpunk/search.py b/starpunk/search.py new file mode 100644 index 0000000..46acf52 --- /dev/null +++ b/starpunk/search.py @@ -0,0 +1,246 @@ +""" +Full-text search functionality for StarPunk + +This module provides FTS5-based search capabilities for notes. It handles: +- Search query execution with relevance ranking +- FTS index population and maintenance +- Graceful degradation when FTS5 is unavailable + +The FTS index is maintained by application code (not SQL triggers) because +note content is stored in external files that SQLite cannot access. +""" + +import sqlite3 +import logging +from pathlib import Path +from typing import Optional +from flask import current_app + +logger = logging.getLogger(__name__) + + +def check_fts5_support(db_path: Path) -> bool: + """ + Check if SQLite was compiled with FTS5 support + + Args: + db_path: Path to SQLite database + + Returns: + bool: True if FTS5 is available, False otherwise + """ + try: + conn = sqlite3.connect(db_path) + # Try to create a test FTS5 table + conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS _fts5_test USING fts5(content)") + conn.execute("DROP TABLE IF EXISTS _fts5_test") + conn.close() + return True + except sqlite3.OperationalError as e: + if "no such module" in str(e).lower(): + logger.warning(f"FTS5 not available in SQLite: {e}") + return False + raise + + +def has_fts_table(db_path: Path) -> bool: + """ + Check if FTS table exists in database + + Args: + db_path: Path to SQLite database + + Returns: + bool: True if notes_fts table exists + """ + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='notes_fts'" + ) + exists = cursor.fetchone() is not None + conn.close() + return exists + except sqlite3.Error: + return False + + +def update_fts_index(conn: sqlite3.Connection, note_id: int, slug: str, content: str): + """ + Update FTS index for a note (insert or replace) + + Extracts title from first line of content and updates the FTS index. + Uses REPLACE to handle both new notes and updates. + + Args: + conn: SQLite database connection + note_id: Note ID (used as FTS rowid) + slug: Note slug + content: Full markdown content + + Raises: + sqlite3.Error: If FTS update fails + """ + # Extract title from first line + lines = content.split('\n', 1) + title = lines[0].strip() if lines else '' + + # Remove markdown heading syntax (# ## ###) + if title.startswith('#'): + title = title.lstrip('#').strip() + + # Limit title length + if len(title) > 100: + title = title[:100] + '...' + + # Use REPLACE to handle both insert and update + # rowid explicitly set to match note ID for efficient lookups + conn.execute( + "REPLACE INTO notes_fts (rowid, slug, title, content) VALUES (?, ?, ?, ?)", + (note_id, slug, title, content) + ) + + +def delete_from_fts_index(conn: sqlite3.Connection, note_id: int): + """ + Remove note from FTS index + + Args: + conn: SQLite database connection + note_id: Note ID to remove + """ + conn.execute("DELETE FROM notes_fts WHERE rowid = ?", (note_id,)) + + +def rebuild_fts_index(db_path: Path, data_dir: Path): + """ + Rebuild entire FTS index from existing notes + + This is used during migration and can be run manually if the index + becomes corrupted. Reads all notes and re-indexes them. + + Args: + db_path: Path to SQLite database + data_dir: Path to data directory containing note files + + Raises: + sqlite3.Error: If rebuild fails + """ + logger.info("Rebuilding FTS index from existing notes") + + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + + try: + # Clear existing index + conn.execute("DELETE FROM notes_fts") + + # Get all non-deleted notes + cursor = conn.execute( + "SELECT id, slug, file_path FROM notes WHERE deleted_at IS NULL" + ) + + indexed_count = 0 + error_count = 0 + + for row in cursor: + try: + # Read note content from file + note_path = data_dir / row['file_path'] + if not note_path.exists(): + logger.warning(f"Note file not found: {note_path}") + error_count += 1 + continue + + content = note_path.read_text(encoding='utf-8') + + # Update FTS index + update_fts_index(conn, row['id'], row['slug'], content) + indexed_count += 1 + + except Exception as e: + logger.error(f"Failed to index note {row['slug']}: {e}") + error_count += 1 + + conn.commit() + logger.info(f"FTS index rebuilt: {indexed_count} notes indexed, {error_count} errors") + + except Exception as e: + conn.rollback() + logger.error(f"Failed to rebuild FTS index: {e}") + raise + finally: + conn.close() + + +def search_notes( + query: str, + db_path: Path, + published_only: bool = True, + limit: int = 50, + offset: int = 0 +) -> list[dict]: + """ + Search notes using FTS5 + + Args: + query: Search query (FTS5 query syntax supported) + db_path: Path to SQLite database + published_only: If True, only return published notes + limit: Maximum number of results + offset: Number of results to skip (for pagination) + + Returns: + List of dicts with keys: id, slug, title, rank, snippet + + Raises: + sqlite3.Error: If search fails + """ + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + + try: + # Build query + # FTS5 returns results ordered by relevance (rank) + # Lower rank = better match + sql = """ + SELECT + notes.id, + notes.slug, + notes_fts.title, + notes.published, + notes.created_at, + rank AS relevance, + snippet(notes_fts, 2, '', '', '...', 40) AS snippet + FROM notes_fts + INNER JOIN notes ON notes_fts.rowid = notes.id + WHERE notes_fts MATCH ? + AND notes.deleted_at IS NULL + """ + + params = [query] + + if published_only: + sql += " AND notes.published = 1" + + sql += " ORDER BY rank LIMIT ? OFFSET ?" + params.extend([limit, offset]) + + cursor = conn.execute(sql, params) + + results = [] + for row in cursor: + results.append({ + 'id': row['id'], + 'slug': row['slug'], + 'title': row['title'], + 'snippet': row['snippet'], + 'relevance': row['relevance'], + 'published': bool(row['published']), + 'created_at': row['created_at'], + }) + + return results + + finally: + conn.close()