feat(tags): Add database schema and tags module (v1.3.0 Phase 1)

Implements tag/category system backend following microformats2 p-category specification.

Database changes:
- Migration 008: Add tags and note_tags tables
- Normalized tag storage (case-insensitive lookup, display name preserved)
- Indexes for performance

New module:
- starpunk/tags.py: Tag management functions
  - normalize_tag: Normalize tag strings
  - get_or_create_tag: Get or create tag records
  - add_tags_to_note: Associate tags with notes (replaces existing)
  - get_note_tags: Retrieve note tags (alphabetically ordered)
  - get_tag_by_name: Lookup tag by normalized name
  - get_notes_by_tag: Get all notes with specific tag
  - parse_tag_input: Parse comma-separated tag input

Model updates:
- Note.tags property (lazy-loaded, prefer pre-loading in routes)
- Note.to_dict() add include_tags parameter

CRUD updates:
- create_note() accepts tags parameter
- update_note() accepts tags parameter (None = no change, [] = remove all)

Micropub integration:
- Pass tags to create_note() (tags already extracted by extract_tags())
- Return tags in q=source response

Per design doc: docs/design/v1.3.0/microformats-tags-design.md

Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-12-10 11:24:23 -07:00
parent 927db4aea0
commit f10d0679da
188 changed files with 601 additions and 945 deletions

View File

@@ -318,7 +318,8 @@ def handle_create(data: dict, token_info: dict):
content=content,
published=True, # Micropub posts are published by default
created_at=published_date,
custom_slug=custom_slug
custom_slug=custom_slug,
tags=tags if tags else None # Pass tags to create_note (v1.3.0)
)
# Build permalink URL
@@ -403,9 +404,9 @@ def handle_query(args: dict, token_info: dict):
if note.title:
mf2["properties"]["name"] = [note.title]
# Tags not implemented in V1, skip category property
# if hasattr(note, 'tags') and note.tags:
# mf2["properties"]["category"] = note.tags
# Add tags if present (v1.3.0)
if note.tags:
mf2["properties"]["category"] = [tag["display_name"] for tag in note.tags]
return jsonify(mf2), 200

View File

@@ -121,6 +121,11 @@ class Note:
default=None, repr=False, compare=False, init=False
)
# Cached tags (loaded separately, not from database row)
_cached_tags: Optional[list[dict]] = field(
default=None, repr=False, compare=False, init=False
)
@classmethod
def from_row(cls, row: sqlite3.Row | dict[str, Any], data_dir: Path) -> "Note":
"""
@@ -358,8 +363,27 @@ class Note:
"""
return self.published
@property
def tags(self) -> list[dict]:
"""
Get note tags (lazy-loaded, but prefer pre-loading in routes)
Routes should pre-load tags using:
object.__setattr__(note, '_cached_tags', tags)
This property exists as a fallback for lazy loading.
Returns:
List of tag dicts with 'name' and 'display_name'
"""
if self._cached_tags is None:
from starpunk.tags import get_note_tags
tags = get_note_tags(self.id)
object.__setattr__(self, "_cached_tags", tags)
return self._cached_tags
def to_dict(
self, include_content: bool = False, include_html: bool = False
self, include_content: bool = False, include_html: bool = False, include_tags: bool = False
) -> dict[str, Any]:
"""
Serialize note to dictionary
@@ -370,6 +394,7 @@ class Note:
Args:
include_content: Include markdown content in output
include_html: Include rendered HTML in output
include_tags: Include tags in output (v1.3.0)
Returns:
Dictionary with note data
@@ -410,6 +435,9 @@ class Note:
if include_html:
data["html"] = self.html
if include_tags:
data["tags"] = [tag["display_name"] for tag in self.tags]
return data
def verify_integrity(self) -> bool:

View File

@@ -134,7 +134,11 @@ def _get_existing_slugs(db) -> set[str]:
def create_note(
content: str, published: bool = False, created_at: Optional[datetime] = None, custom_slug: Optional[str] = None
content: str,
published: bool = False,
created_at: Optional[datetime] = None,
custom_slug: Optional[str] = None,
tags: Optional[list[str]] = None
) -> Note:
"""
Create a new note
@@ -148,6 +152,7 @@ def create_note(
published: Whether the note should be published (default: False)
created_at: Creation timestamp (default: current UTC time)
custom_slug: Optional custom slug (from Micropub mp-slug property)
tags: Optional list of tag display names (v1.3.0)
Returns:
Note object with all metadata and content loaded
@@ -294,7 +299,16 @@ def create_note(
# Create Note object
note = Note.from_row(row, data_dir)
# 9. UPDATE FTS INDEX (if available)
# 9. ADD TAGS (v1.3.0)
if tags:
try:
from starpunk.tags import add_tags_to_note
add_tags_to_note(note_id, tags)
except Exception as e:
# Tag addition failure should not prevent note creation
current_app.logger.warning(f"Failed to add tags to note {slug}: {e}")
# 10. UPDATE FTS INDEX (if available)
try:
from starpunk.search import update_fts_index, has_fts_table
db_path = Path(current_app.config["DATABASE_PATH"])
@@ -540,6 +554,7 @@ def update_note(
id: Optional[int] = None,
content: Optional[str] = None,
published: Optional[bool] = None,
tags: Optional[list[str]] = None
) -> Note:
"""
Update a note's content and/or published status
@@ -553,6 +568,7 @@ def update_note(
id: Note ID to update (mutually exclusive with slug)
content: New markdown content (None = no change)
published: New published status (None = no change)
tags: New tags list (None = no change, [] = remove all tags) (v1.3.0)
Returns:
Updated Note object with new content and metadata
@@ -608,8 +624,8 @@ def update_note(
if slug is not None and id is not None:
raise ValueError("Cannot provide both slug and id")
if content is None and published is None:
raise ValueError("Must provide at least one of content or published to update")
if content is None and published is None and tags is None:
raise ValueError("Must provide at least one of content, published, or tags to update")
# Validate content if provided
if content is not None:
@@ -695,7 +711,16 @@ def update_note(
f"Failed to update note: {existing_note.slug}",
)
# 6. UPDATE FTS INDEX (if available and content changed)
# 6. UPDATE TAGS (v1.3.0)
if tags is not None:
try:
from starpunk.tags import add_tags_to_note
add_tags_to_note(existing_note.id, tags)
except Exception as e:
# Tag update failure should not prevent note update
current_app.logger.warning(f"Failed to update tags for note {existing_note.slug}: {e}")
# 7. UPDATE FTS INDEX (if available and content changed)
if content is not None:
try:
from starpunk.search import update_fts_index, has_fts_table
@@ -707,7 +732,7 @@ def update_note(
# FTS update failure should not prevent note update
current_app.logger.warning(f"Failed to update FTS index for note {existing_note.slug}: {e}")
# 7. RETURN UPDATED NOTE
# 8. RETURN UPDATED NOTE
updated_note = get_note(slug=existing_note.slug, load_content=True)
return updated_note

243
starpunk/tags.py Normal file
View File

@@ -0,0 +1,243 @@
"""
Tag management for StarPunk
Functions:
normalize_tag: Normalize tag string for storage
get_or_create_tag: Get existing tag or create new one
add_tags_to_note: Associate tags with a note
remove_tags_from_note: Remove tag associations
get_note_tags: Get all tags for a note
get_notes_by_tag: Get all notes with a specific tag
"""
import re
from typing import Optional
from flask import current_app
from starpunk.database import get_db
def normalize_tag(tag: str) -> tuple[str, str]:
"""
Normalize a tag string
Args:
tag: Raw tag string
Returns:
Tuple of (normalized_name, display_name)
Examples:
>>> normalize_tag(" IndieWeb ")
('indieweb', 'IndieWeb')
>>> normalize_tag("Machine Learning")
('machine-learning', 'Machine Learning')
"""
# Step 1: Strip whitespace for display_name
display_name = tag.strip()
# Step 2: Strip leading # characters
normalized = display_name.lstrip('#')
# Step 3: Replace spaces and slashes with hyphens
normalized = normalized.replace(' ', '-').replace('/', '-')
# Step 4: Remove characters not in [a-zA-Z0-9_-]
normalized = re.sub(r'[^a-zA-Z0-9_-]', '', normalized)
# Step 5: Collapse consecutive hyphens to single hyphen
normalized = re.sub(r'-+', '-', normalized)
# Step 6: Strip leading/trailing hyphens
normalized = normalized.strip('-')
# Step 7: Convert to lowercase for normalized name
normalized = normalized.lower()
return normalized, display_name
def get_or_create_tag(display_name: str) -> int:
"""
Get existing tag ID or create new tag
Args:
display_name: Tag display name
Returns:
Tag ID
"""
db = get_db(current_app)
normalized_name, clean_display = normalize_tag(display_name)
# Try to find existing tag
tag = db.execute(
"SELECT id FROM tags WHERE name = ?",
(normalized_name,)
).fetchone()
if tag:
return tag['id']
# Create new tag
cursor = db.execute(
"INSERT INTO tags (name, display_name) VALUES (?, ?)",
(normalized_name, clean_display)
)
db.commit()
return cursor.lastrowid
def add_tags_to_note(note_id: int, tags: list[str]) -> None:
"""
Associate tags with a note
Replaces all existing tags for the note.
Args:
note_id: Note database ID
tags: List of tag display names
"""
db = get_db(current_app)
# Remove existing tags
db.execute("DELETE FROM note_tags WHERE note_id = ?", (note_id,))
# Add new tags
for tag_display in tags:
tag_id = get_or_create_tag(tag_display)
db.execute(
"INSERT INTO note_tags (note_id, tag_id) VALUES (?, ?)",
(note_id, tag_id)
)
db.commit()
def get_note_tags(note_id: int) -> list[dict]:
"""
Get all tags for a note
Args:
note_id: Note database ID
Returns:
List of tag dicts with 'name' and 'display_name'
Ordered alphabetically by display_name (case-insensitive)
Query should use: ORDER BY LOWER(tags.display_name) ASC
"""
db = get_db(current_app)
tags = db.execute(
"""
SELECT tags.name, tags.display_name
FROM tags
JOIN note_tags ON tags.id = note_tags.tag_id
WHERE note_tags.note_id = ?
ORDER BY LOWER(tags.display_name) ASC
""",
(note_id,)
).fetchall()
return [dict(tag) for tag in tags]
def get_tag_by_name(name: str) -> Optional[dict]:
"""
Get tag by normalized name
Args:
name: Tag name (will be normalized before lookup)
Returns:
Tag dict with 'id', 'name', 'display_name' or None
"""
db = get_db(current_app)
normalized_name, _ = normalize_tag(name)
tag = db.execute(
"SELECT id, name, display_name FROM tags WHERE name = ?",
(normalized_name,)
).fetchone()
return dict(tag) if tag else None
def get_notes_by_tag(tag_name: str) -> list:
"""
Get all published notes with a specific tag
Args:
tag_name: Normalized tag name
Returns:
List of Note objects with tags pre-loaded
"""
from starpunk.notes import get_note
db = get_db(current_app)
rows = db.execute(
"""
SELECT DISTINCT notes.id
FROM notes
JOIN note_tags ON notes.id = note_tags.note_id
JOIN tags ON note_tags.tag_id = tags.id
WHERE tags.name = ? AND notes.published = 1
ORDER BY notes.created_at DESC
""",
(tag_name,)
).fetchall()
notes = []
for row in rows:
note = get_note(id=row['id'])
if note:
# Pre-load tags
tags = get_note_tags(note.id)
object.__setattr__(note, '_cached_tags', tags)
notes.append(note)
return notes
def parse_tag_input(input_string: str) -> list[str]:
"""
Parse comma-separated tag input from admin form
Splits on commas, trims whitespace, filters empties,
deduplicates by normalized name (keeps first occurrence).
Args:
input_string: Comma-separated tags (e.g., "Python, IndieWeb, Web")
Returns:
List of unique tag display names
Examples:
>>> parse_tag_input("Python, IndieWeb, Web")
['Python', 'IndieWeb', 'Web']
>>> parse_tag_input("Python, python, PYTHON")
['Python'] # First occurrence wins
>>> parse_tag_input(" , , valid , ")
['valid']
"""
if not input_string:
return []
# Split on commas and strip whitespace
tags = [tag.strip() for tag in input_string.split(',')]
# Filter empty strings
tags = [tag for tag in tags if tag]
# Deduplicate by normalized name (keep first occurrence)
seen = set()
unique_tags = []
for tag in tags:
normalized, _ = normalize_tag(tag)
if normalized and normalized not in seen:
seen.add(normalized)
unique_tags.append(tag)
return unique_tags