""" Full-text search functionality for StarPunk This module provides FTS5-based search capabilities for notes. It handles: - Search query execution with relevance ranking - FTS index population and maintenance - Graceful degradation when FTS5 is unavailable The FTS index is maintained by application code (not SQL triggers) because note content is stored in external files that SQLite cannot access. """ import sqlite3 import logging from pathlib import Path from typing import Optional from flask import current_app logger = logging.getLogger(__name__) def check_fts5_support(db_path: Path) -> bool: """ Check if SQLite was compiled with FTS5 support Args: db_path: Path to SQLite database Returns: bool: True if FTS5 is available, False otherwise """ try: conn = sqlite3.connect(db_path) # Try to create a test FTS5 table conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS _fts5_test USING fts5(content)") conn.execute("DROP TABLE IF EXISTS _fts5_test") conn.close() return True except sqlite3.OperationalError as e: if "no such module" in str(e).lower(): logger.warning(f"FTS5 not available in SQLite: {e}") return False raise def has_fts_table(db_path: Path) -> bool: """ Check if FTS table exists in database Args: db_path: Path to SQLite database Returns: bool: True if notes_fts table exists """ try: conn = sqlite3.connect(db_path) cursor = conn.execute( "SELECT name FROM sqlite_master WHERE type='table' AND name='notes_fts'" ) exists = cursor.fetchone() is not None conn.close() return exists except sqlite3.Error: return False def update_fts_index(conn: sqlite3.Connection, note_id: int, slug: str, content: str): """ Update FTS index for a note (insert or replace) Extracts title from first line of content and updates the FTS index. Uses REPLACE to handle both new notes and updates. Args: conn: SQLite database connection note_id: Note ID (used as FTS rowid) slug: Note slug content: Full markdown content Raises: sqlite3.Error: If FTS update fails """ # Extract title from first line lines = content.split('\n', 1) title = lines[0].strip() if lines else '' # Remove markdown heading syntax (# ## ###) if title.startswith('#'): title = title.lstrip('#').strip() # Limit title length if len(title) > 100: title = title[:100] + '...' # Use REPLACE to handle both insert and update # rowid explicitly set to match note ID for efficient lookups conn.execute( "REPLACE INTO notes_fts (rowid, slug, title, content) VALUES (?, ?, ?, ?)", (note_id, slug, title, content) ) def delete_from_fts_index(conn: sqlite3.Connection, note_id: int): """ Remove note from FTS index Args: conn: SQLite database connection note_id: Note ID to remove """ conn.execute("DELETE FROM notes_fts WHERE rowid = ?", (note_id,)) def rebuild_fts_index(db_path: Path, data_dir: Path): """ Rebuild entire FTS index from existing notes This is used during migration and can be run manually if the index becomes corrupted. Reads all notes and re-indexes them. Args: db_path: Path to SQLite database data_dir: Path to data directory containing note files Raises: sqlite3.Error: If rebuild fails """ logger.info("Rebuilding FTS index from existing notes") conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row try: # Clear existing index conn.execute("DELETE FROM notes_fts") # Get all non-deleted notes cursor = conn.execute( "SELECT id, slug, file_path FROM notes WHERE deleted_at IS NULL" ) indexed_count = 0 error_count = 0 for row in cursor: try: # Read note content from file note_path = data_dir / row['file_path'] if not note_path.exists(): logger.warning(f"Note file not found: {note_path}") error_count += 1 continue content = note_path.read_text(encoding='utf-8') # Update FTS index update_fts_index(conn, row['id'], row['slug'], content) indexed_count += 1 except Exception as e: logger.error(f"Failed to index note {row['slug']}: {e}") error_count += 1 conn.commit() logger.info(f"FTS index rebuilt: {indexed_count} notes indexed, {error_count} errors") except Exception as e: conn.rollback() logger.error(f"Failed to rebuild FTS index: {e}") raise finally: conn.close() def search_notes( query: str, db_path: Path, published_only: bool = True, limit: int = 50, offset: int = 0 ) -> list[dict]: """ Search notes using FTS5 Args: query: Search query (FTS5 query syntax supported) db_path: Path to SQLite database published_only: If True, only return published notes limit: Maximum number of results offset: Number of results to skip (for pagination) Returns: List of dicts with keys: id, slug, title, rank, snippet Raises: sqlite3.Error: If search fails """ conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row try: # Build query # FTS5 returns results ordered by relevance (rank) # Lower rank = better match sql = """ SELECT notes.id, notes.slug, notes_fts.title, notes.published, notes.created_at, rank AS relevance, snippet(notes_fts, 2, '', '', '...', 40) AS snippet FROM notes_fts INNER JOIN notes ON notes_fts.rowid = notes.id WHERE notes_fts MATCH ? AND notes.deleted_at IS NULL """ params = [query] if published_only: sql += " AND notes.published = 1" sql += " ORDER BY rank LIMIT ? OFFSET ?" params.extend([limit, offset]) cursor = conn.execute(sql, params) results = [] for row in cursor: results.append({ 'id': row['id'], 'slug': row['slug'], 'title': row['title'], 'snippet': row['snippet'], 'relevance': row['relevance'], 'published': bool(row['published']), 'created_at': row['created_at'], }) return results finally: conn.close()