Implements feed caching layer with LRU eviction, TTL expiration, and ETag support. Phase 3.1: Feed Caching (Complete) - LRU cache with configurable max_size (default: 50 feeds) - TTL-based expiration (default: 300 seconds = 5 minutes) - SHA-256 checksums for cache keys and ETags - Weak ETag generation (W/"checksum") - If-None-Match header support for 304 Not Modified responses - Cache invalidation (全体 or per-format) - Hit/miss/eviction statistics tracking - Content-based cache keys (changes when notes are modified) Implementation: - Created starpunk/feeds/cache.py with FeedCache class - Integrated caching into feed routes (RSS, ATOM, JSON Feed) - Added ETag headers to all feed responses - 304 Not Modified responses for conditional requests - Configuration: FEED_CACHE_ENABLED, FEED_CACHE_MAX_SIZE - Global cache instance with singleton pattern Architecture: - Two-level caching: 1. Note list cache (simple dict, existing) 2. Feed content cache (LRU with TTL, new) - Cache keys include format + notes checksum - Checksums based on note IDs + updated timestamps - Non-streaming generators used for cacheable content Testing: - 25 comprehensive cache tests (100% passing) - Tests for LRU eviction, TTL expiration, statistics - Tests for checksum generation and consistency - Tests for ETag generation and uniqueness - All 114 feed tests passing (no regressions) Quality Metrics: - 114/114 tests passing (100%) - Zero breaking changes - Full backward compatibility - Cache disabled mode supported (FEED_CACHE_ENABLED=false) Performance Benefits: - Database queries reduced (note list cached) - Feed generation reduced (content cached) - Bandwidth saved (304 responses) - Memory efficient (LRU eviction) Note: Phase 3 is partially complete. Still pending: - Feed statistics dashboard - OPML 2.0 export endpoint 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
374 lines
12 KiB
Python
374 lines
12 KiB
Python
"""
|
|
Tests for feed caching layer (v1.1.2 Phase 3)
|
|
|
|
Tests the FeedCache class and caching integration with feed routes.
|
|
"""
|
|
|
|
import time
|
|
from datetime import datetime, timezone
|
|
|
|
import pytest
|
|
|
|
from starpunk.feeds.cache import FeedCache
|
|
from starpunk.models import Note
|
|
|
|
|
|
class TestFeedCacheBasics:
|
|
"""Test basic cache operations"""
|
|
|
|
def test_cache_initialization(self):
|
|
"""Cache initializes with correct settings"""
|
|
cache = FeedCache(max_size=100, ttl=600)
|
|
assert cache.max_size == 100
|
|
assert cache.ttl == 600
|
|
assert len(cache._cache) == 0
|
|
|
|
def test_cache_key_generation(self):
|
|
"""Cache keys are generated consistently"""
|
|
cache = FeedCache()
|
|
key1 = cache._generate_cache_key('rss', 'abc123')
|
|
key2 = cache._generate_cache_key('rss', 'abc123')
|
|
key3 = cache._generate_cache_key('atom', 'abc123')
|
|
|
|
assert key1 == key2
|
|
assert key1 != key3
|
|
assert key1 == 'feed:rss:abc123'
|
|
|
|
def test_etag_generation(self):
|
|
"""ETags are generated with weak format"""
|
|
cache = FeedCache()
|
|
content = "<?xml version='1.0'?><rss>...</rss>"
|
|
etag = cache._generate_etag(content)
|
|
|
|
assert etag.startswith('W/"')
|
|
assert etag.endswith('"')
|
|
assert len(etag) > 10 # SHA-256 hash is long
|
|
|
|
def test_etag_consistency(self):
|
|
"""Same content generates same ETag"""
|
|
cache = FeedCache()
|
|
content = "test content"
|
|
etag1 = cache._generate_etag(content)
|
|
etag2 = cache._generate_etag(content)
|
|
|
|
assert etag1 == etag2
|
|
|
|
def test_etag_uniqueness(self):
|
|
"""Different content generates different ETags"""
|
|
cache = FeedCache()
|
|
etag1 = cache._generate_etag("content 1")
|
|
etag2 = cache._generate_etag("content 2")
|
|
|
|
assert etag1 != etag2
|
|
|
|
|
|
class TestCacheOperations:
|
|
"""Test cache get/set operations"""
|
|
|
|
def test_set_and_get(self):
|
|
"""Can store and retrieve feed content"""
|
|
cache = FeedCache()
|
|
content = "<?xml version='1.0'?><rss>test</rss>"
|
|
checksum = "test123"
|
|
|
|
etag = cache.set('rss', content, checksum)
|
|
result = cache.get('rss', checksum)
|
|
|
|
assert result is not None
|
|
cached_content, cached_etag = result
|
|
assert cached_content == content
|
|
assert cached_etag == etag
|
|
assert cached_etag.startswith('W/"')
|
|
|
|
def test_cache_miss(self):
|
|
"""Returns None for cache miss"""
|
|
cache = FeedCache()
|
|
result = cache.get('rss', 'nonexistent')
|
|
assert result is None
|
|
|
|
def test_different_formats_cached_separately(self):
|
|
"""Different formats with same checksum are cached separately"""
|
|
cache = FeedCache()
|
|
rss_content = "RSS content"
|
|
atom_content = "ATOM content"
|
|
checksum = "same_checksum"
|
|
|
|
rss_etag = cache.set('rss', rss_content, checksum)
|
|
atom_etag = cache.set('atom', atom_content, checksum)
|
|
|
|
rss_result = cache.get('rss', checksum)
|
|
atom_result = cache.get('atom', checksum)
|
|
|
|
assert rss_result[0] == rss_content
|
|
assert atom_result[0] == atom_content
|
|
assert rss_etag != atom_etag
|
|
|
|
|
|
class TestCacheTTL:
|
|
"""Test TTL expiration"""
|
|
|
|
def test_ttl_expiration(self):
|
|
"""Cached entries expire after TTL"""
|
|
cache = FeedCache(ttl=1) # 1 second TTL
|
|
content = "test content"
|
|
checksum = "test123"
|
|
|
|
cache.set('rss', content, checksum)
|
|
|
|
# Should be cached initially
|
|
assert cache.get('rss', checksum) is not None
|
|
|
|
# Wait for TTL to expire
|
|
time.sleep(1.1)
|
|
|
|
# Should be expired
|
|
assert cache.get('rss', checksum) is None
|
|
|
|
def test_ttl_not_expired(self):
|
|
"""Cached entries remain valid within TTL"""
|
|
cache = FeedCache(ttl=10) # 10 second TTL
|
|
content = "test content"
|
|
checksum = "test123"
|
|
|
|
cache.set('rss', content, checksum)
|
|
time.sleep(0.1) # Small delay
|
|
|
|
# Should still be cached
|
|
assert cache.get('rss', checksum) is not None
|
|
|
|
|
|
class TestLRUEviction:
|
|
"""Test LRU eviction strategy"""
|
|
|
|
def test_lru_eviction(self):
|
|
"""LRU entries are evicted when cache is full"""
|
|
cache = FeedCache(max_size=3)
|
|
|
|
# Fill cache
|
|
cache.set('rss', 'content1', 'check1')
|
|
cache.set('rss', 'content2', 'check2')
|
|
cache.set('rss', 'content3', 'check3')
|
|
|
|
# All should be cached
|
|
assert cache.get('rss', 'check1') is not None
|
|
assert cache.get('rss', 'check2') is not None
|
|
assert cache.get('rss', 'check3') is not None
|
|
|
|
# Add one more (should evict oldest)
|
|
cache.set('rss', 'content4', 'check4')
|
|
|
|
# First entry should be evicted
|
|
assert cache.get('rss', 'check1') is None
|
|
assert cache.get('rss', 'check2') is not None
|
|
assert cache.get('rss', 'check3') is not None
|
|
assert cache.get('rss', 'check4') is not None
|
|
|
|
def test_lru_access_updates_order(self):
|
|
"""Accessing an entry moves it to end (most recently used)"""
|
|
cache = FeedCache(max_size=3)
|
|
|
|
# Fill cache
|
|
cache.set('rss', 'content1', 'check1')
|
|
cache.set('rss', 'content2', 'check2')
|
|
cache.set('rss', 'content3', 'check3')
|
|
|
|
# Access first entry (makes it most recent)
|
|
cache.get('rss', 'check1')
|
|
|
|
# Add new entry (should evict check2, not check1)
|
|
cache.set('rss', 'content4', 'check4')
|
|
|
|
assert cache.get('rss', 'check1') is not None # Still cached (accessed recently)
|
|
assert cache.get('rss', 'check2') is None # Evicted (oldest)
|
|
assert cache.get('rss', 'check3') is not None
|
|
assert cache.get('rss', 'check4') is not None
|
|
|
|
|
|
class TestCacheInvalidation:
|
|
"""Test cache invalidation"""
|
|
|
|
def test_invalidate_all(self):
|
|
"""Can invalidate entire cache"""
|
|
cache = FeedCache()
|
|
|
|
cache.set('rss', 'content1', 'check1')
|
|
cache.set('atom', 'content2', 'check2')
|
|
cache.set('json', 'content3', 'check3')
|
|
|
|
count = cache.invalidate()
|
|
|
|
assert count == 3
|
|
assert cache.get('rss', 'check1') is None
|
|
assert cache.get('atom', 'check2') is None
|
|
assert cache.get('json', 'check3') is None
|
|
|
|
def test_invalidate_specific_format(self):
|
|
"""Can invalidate specific format only"""
|
|
cache = FeedCache()
|
|
|
|
cache.set('rss', 'content1', 'check1')
|
|
cache.set('atom', 'content2', 'check2')
|
|
cache.set('json', 'content3', 'check3')
|
|
|
|
count = cache.invalidate('rss')
|
|
|
|
assert count == 1
|
|
assert cache.get('rss', 'check1') is None
|
|
assert cache.get('atom', 'check2') is not None
|
|
assert cache.get('json', 'check3') is not None
|
|
|
|
|
|
class TestCacheStatistics:
|
|
"""Test cache statistics tracking"""
|
|
|
|
def test_hit_tracking(self):
|
|
"""Cache hits are tracked"""
|
|
cache = FeedCache()
|
|
cache.set('rss', 'content', 'check1')
|
|
|
|
stats = cache.get_stats()
|
|
assert stats['hits'] == 0
|
|
|
|
cache.get('rss', 'check1') # Hit
|
|
stats = cache.get_stats()
|
|
assert stats['hits'] == 1
|
|
|
|
def test_miss_tracking(self):
|
|
"""Cache misses are tracked"""
|
|
cache = FeedCache()
|
|
|
|
stats = cache.get_stats()
|
|
assert stats['misses'] == 0
|
|
|
|
cache.get('rss', 'nonexistent') # Miss
|
|
stats = cache.get_stats()
|
|
assert stats['misses'] == 1
|
|
|
|
def test_hit_rate_calculation(self):
|
|
"""Hit rate is calculated correctly"""
|
|
cache = FeedCache()
|
|
cache.set('rss', 'content', 'check1')
|
|
|
|
cache.get('rss', 'check1') # Hit
|
|
cache.get('rss', 'nonexistent') # Miss
|
|
cache.get('rss', 'check1') # Hit
|
|
|
|
stats = cache.get_stats()
|
|
assert stats['hits'] == 2
|
|
assert stats['misses'] == 1
|
|
assert stats['hit_rate'] == 2.0 / 3.0 # 66.67%
|
|
|
|
def test_eviction_tracking(self):
|
|
"""Evictions are tracked"""
|
|
cache = FeedCache(max_size=2)
|
|
|
|
cache.set('rss', 'content1', 'check1')
|
|
cache.set('rss', 'content2', 'check2')
|
|
cache.set('rss', 'content3', 'check3') # Triggers eviction
|
|
|
|
stats = cache.get_stats()
|
|
assert stats['evictions'] == 1
|
|
|
|
|
|
class TestNotesChecksum:
|
|
"""Test notes checksum generation"""
|
|
|
|
def test_checksum_generation(self):
|
|
"""Can generate checksum from note list"""
|
|
cache = FeedCache()
|
|
now = datetime.now(timezone.utc)
|
|
from pathlib import Path
|
|
|
|
notes = [
|
|
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
|
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
|
]
|
|
|
|
checksum = cache.generate_notes_checksum(notes)
|
|
|
|
assert isinstance(checksum, str)
|
|
assert len(checksum) == 64 # SHA-256 hex digest length
|
|
|
|
def test_checksum_consistency(self):
|
|
"""Same notes generate same checksum"""
|
|
cache = FeedCache()
|
|
now = datetime.now(timezone.utc)
|
|
from pathlib import Path
|
|
|
|
notes = [
|
|
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
|
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
|
]
|
|
|
|
checksum1 = cache.generate_notes_checksum(notes)
|
|
checksum2 = cache.generate_notes_checksum(notes)
|
|
|
|
assert checksum1 == checksum2
|
|
|
|
def test_checksum_changes_on_note_change(self):
|
|
"""Checksum changes when notes are modified"""
|
|
cache = FeedCache()
|
|
now = datetime.now(timezone.utc)
|
|
later = datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc)
|
|
from pathlib import Path
|
|
|
|
notes1 = [
|
|
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
|
]
|
|
|
|
notes2 = [
|
|
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=later, published=True, _data_dir=Path("/tmp")),
|
|
]
|
|
|
|
checksum1 = cache.generate_notes_checksum(notes1)
|
|
checksum2 = cache.generate_notes_checksum(notes2)
|
|
|
|
assert checksum1 != checksum2
|
|
|
|
def test_checksum_changes_on_note_addition(self):
|
|
"""Checksum changes when notes are added"""
|
|
cache = FeedCache()
|
|
now = datetime.now(timezone.utc)
|
|
from pathlib import Path
|
|
|
|
notes1 = [
|
|
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
|
]
|
|
|
|
notes2 = [
|
|
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
|
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
|
]
|
|
|
|
checksum1 = cache.generate_notes_checksum(notes1)
|
|
checksum2 = cache.generate_notes_checksum(notes2)
|
|
|
|
assert checksum1 != checksum2
|
|
|
|
|
|
class TestGlobalCache:
|
|
"""Test global cache instance"""
|
|
|
|
def test_get_cache_returns_instance(self):
|
|
"""get_cache() returns FeedCache instance"""
|
|
from starpunk.feeds.cache import get_cache
|
|
cache = get_cache()
|
|
assert isinstance(cache, FeedCache)
|
|
|
|
def test_get_cache_returns_same_instance(self):
|
|
"""get_cache() returns singleton instance"""
|
|
from starpunk.feeds.cache import get_cache
|
|
cache1 = get_cache()
|
|
cache2 = get_cache()
|
|
assert cache1 is cache2
|
|
|
|
def test_configure_cache(self):
|
|
"""configure_cache() sets up global cache with params"""
|
|
from starpunk.feeds.cache import configure_cache, get_cache
|
|
|
|
configure_cache(max_size=100, ttl=600)
|
|
cache = get_cache()
|
|
|
|
assert cache.max_size == 100
|
|
assert cache.ttl == 600
|