feat: Implement Phase 3 Feed Caching (Partial)
Implements feed caching layer with LRU eviction, TTL expiration, and ETag support. Phase 3.1: Feed Caching (Complete) - LRU cache with configurable max_size (default: 50 feeds) - TTL-based expiration (default: 300 seconds = 5 minutes) - SHA-256 checksums for cache keys and ETags - Weak ETag generation (W/"checksum") - If-None-Match header support for 304 Not Modified responses - Cache invalidation (全体 or per-format) - Hit/miss/eviction statistics tracking - Content-based cache keys (changes when notes are modified) Implementation: - Created starpunk/feeds/cache.py with FeedCache class - Integrated caching into feed routes (RSS, ATOM, JSON Feed) - Added ETag headers to all feed responses - 304 Not Modified responses for conditional requests - Configuration: FEED_CACHE_ENABLED, FEED_CACHE_MAX_SIZE - Global cache instance with singleton pattern Architecture: - Two-level caching: 1. Note list cache (simple dict, existing) 2. Feed content cache (LRU with TTL, new) - Cache keys include format + notes checksum - Checksums based on note IDs + updated timestamps - Non-streaming generators used for cacheable content Testing: - 25 comprehensive cache tests (100% passing) - Tests for LRU eviction, TTL expiration, statistics - Tests for checksum generation and consistency - Tests for ETag generation and uniqueness - All 114 feed tests passing (no regressions) Quality Metrics: - 114/114 tests passing (100%) - Zero breaking changes - Full backward compatibility - Cache disabled mode supported (FEED_CACHE_ENABLED=false) Performance Benefits: - Database queries reduced (note list cached) - Feed generation reduced (content cached) - Bandwidth saved (304 responses) - Memory efficient (LRU eviction) Note: Phase 3 is partially complete. Still pending: - Feed statistics dashboard - OPML 2.0 export endpoint 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
373
tests/test_feeds_cache.py
Normal file
373
tests/test_feeds_cache.py
Normal file
@@ -0,0 +1,373 @@
|
||||
"""
|
||||
Tests for feed caching layer (v1.1.2 Phase 3)
|
||||
|
||||
Tests the FeedCache class and caching integration with feed routes.
|
||||
"""
|
||||
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import pytest
|
||||
|
||||
from starpunk.feeds.cache import FeedCache
|
||||
from starpunk.models import Note
|
||||
|
||||
|
||||
class TestFeedCacheBasics:
|
||||
"""Test basic cache operations"""
|
||||
|
||||
def test_cache_initialization(self):
|
||||
"""Cache initializes with correct settings"""
|
||||
cache = FeedCache(max_size=100, ttl=600)
|
||||
assert cache.max_size == 100
|
||||
assert cache.ttl == 600
|
||||
assert len(cache._cache) == 0
|
||||
|
||||
def test_cache_key_generation(self):
|
||||
"""Cache keys are generated consistently"""
|
||||
cache = FeedCache()
|
||||
key1 = cache._generate_cache_key('rss', 'abc123')
|
||||
key2 = cache._generate_cache_key('rss', 'abc123')
|
||||
key3 = cache._generate_cache_key('atom', 'abc123')
|
||||
|
||||
assert key1 == key2
|
||||
assert key1 != key3
|
||||
assert key1 == 'feed:rss:abc123'
|
||||
|
||||
def test_etag_generation(self):
|
||||
"""ETags are generated with weak format"""
|
||||
cache = FeedCache()
|
||||
content = "<?xml version='1.0'?><rss>...</rss>"
|
||||
etag = cache._generate_etag(content)
|
||||
|
||||
assert etag.startswith('W/"')
|
||||
assert etag.endswith('"')
|
||||
assert len(etag) > 10 # SHA-256 hash is long
|
||||
|
||||
def test_etag_consistency(self):
|
||||
"""Same content generates same ETag"""
|
||||
cache = FeedCache()
|
||||
content = "test content"
|
||||
etag1 = cache._generate_etag(content)
|
||||
etag2 = cache._generate_etag(content)
|
||||
|
||||
assert etag1 == etag2
|
||||
|
||||
def test_etag_uniqueness(self):
|
||||
"""Different content generates different ETags"""
|
||||
cache = FeedCache()
|
||||
etag1 = cache._generate_etag("content 1")
|
||||
etag2 = cache._generate_etag("content 2")
|
||||
|
||||
assert etag1 != etag2
|
||||
|
||||
|
||||
class TestCacheOperations:
|
||||
"""Test cache get/set operations"""
|
||||
|
||||
def test_set_and_get(self):
|
||||
"""Can store and retrieve feed content"""
|
||||
cache = FeedCache()
|
||||
content = "<?xml version='1.0'?><rss>test</rss>"
|
||||
checksum = "test123"
|
||||
|
||||
etag = cache.set('rss', content, checksum)
|
||||
result = cache.get('rss', checksum)
|
||||
|
||||
assert result is not None
|
||||
cached_content, cached_etag = result
|
||||
assert cached_content == content
|
||||
assert cached_etag == etag
|
||||
assert cached_etag.startswith('W/"')
|
||||
|
||||
def test_cache_miss(self):
|
||||
"""Returns None for cache miss"""
|
||||
cache = FeedCache()
|
||||
result = cache.get('rss', 'nonexistent')
|
||||
assert result is None
|
||||
|
||||
def test_different_formats_cached_separately(self):
|
||||
"""Different formats with same checksum are cached separately"""
|
||||
cache = FeedCache()
|
||||
rss_content = "RSS content"
|
||||
atom_content = "ATOM content"
|
||||
checksum = "same_checksum"
|
||||
|
||||
rss_etag = cache.set('rss', rss_content, checksum)
|
||||
atom_etag = cache.set('atom', atom_content, checksum)
|
||||
|
||||
rss_result = cache.get('rss', checksum)
|
||||
atom_result = cache.get('atom', checksum)
|
||||
|
||||
assert rss_result[0] == rss_content
|
||||
assert atom_result[0] == atom_content
|
||||
assert rss_etag != atom_etag
|
||||
|
||||
|
||||
class TestCacheTTL:
|
||||
"""Test TTL expiration"""
|
||||
|
||||
def test_ttl_expiration(self):
|
||||
"""Cached entries expire after TTL"""
|
||||
cache = FeedCache(ttl=1) # 1 second TTL
|
||||
content = "test content"
|
||||
checksum = "test123"
|
||||
|
||||
cache.set('rss', content, checksum)
|
||||
|
||||
# Should be cached initially
|
||||
assert cache.get('rss', checksum) is not None
|
||||
|
||||
# Wait for TTL to expire
|
||||
time.sleep(1.1)
|
||||
|
||||
# Should be expired
|
||||
assert cache.get('rss', checksum) is None
|
||||
|
||||
def test_ttl_not_expired(self):
|
||||
"""Cached entries remain valid within TTL"""
|
||||
cache = FeedCache(ttl=10) # 10 second TTL
|
||||
content = "test content"
|
||||
checksum = "test123"
|
||||
|
||||
cache.set('rss', content, checksum)
|
||||
time.sleep(0.1) # Small delay
|
||||
|
||||
# Should still be cached
|
||||
assert cache.get('rss', checksum) is not None
|
||||
|
||||
|
||||
class TestLRUEviction:
|
||||
"""Test LRU eviction strategy"""
|
||||
|
||||
def test_lru_eviction(self):
|
||||
"""LRU entries are evicted when cache is full"""
|
||||
cache = FeedCache(max_size=3)
|
||||
|
||||
# Fill cache
|
||||
cache.set('rss', 'content1', 'check1')
|
||||
cache.set('rss', 'content2', 'check2')
|
||||
cache.set('rss', 'content3', 'check3')
|
||||
|
||||
# All should be cached
|
||||
assert cache.get('rss', 'check1') is not None
|
||||
assert cache.get('rss', 'check2') is not None
|
||||
assert cache.get('rss', 'check3') is not None
|
||||
|
||||
# Add one more (should evict oldest)
|
||||
cache.set('rss', 'content4', 'check4')
|
||||
|
||||
# First entry should be evicted
|
||||
assert cache.get('rss', 'check1') is None
|
||||
assert cache.get('rss', 'check2') is not None
|
||||
assert cache.get('rss', 'check3') is not None
|
||||
assert cache.get('rss', 'check4') is not None
|
||||
|
||||
def test_lru_access_updates_order(self):
|
||||
"""Accessing an entry moves it to end (most recently used)"""
|
||||
cache = FeedCache(max_size=3)
|
||||
|
||||
# Fill cache
|
||||
cache.set('rss', 'content1', 'check1')
|
||||
cache.set('rss', 'content2', 'check2')
|
||||
cache.set('rss', 'content3', 'check3')
|
||||
|
||||
# Access first entry (makes it most recent)
|
||||
cache.get('rss', 'check1')
|
||||
|
||||
# Add new entry (should evict check2, not check1)
|
||||
cache.set('rss', 'content4', 'check4')
|
||||
|
||||
assert cache.get('rss', 'check1') is not None # Still cached (accessed recently)
|
||||
assert cache.get('rss', 'check2') is None # Evicted (oldest)
|
||||
assert cache.get('rss', 'check3') is not None
|
||||
assert cache.get('rss', 'check4') is not None
|
||||
|
||||
|
||||
class TestCacheInvalidation:
|
||||
"""Test cache invalidation"""
|
||||
|
||||
def test_invalidate_all(self):
|
||||
"""Can invalidate entire cache"""
|
||||
cache = FeedCache()
|
||||
|
||||
cache.set('rss', 'content1', 'check1')
|
||||
cache.set('atom', 'content2', 'check2')
|
||||
cache.set('json', 'content3', 'check3')
|
||||
|
||||
count = cache.invalidate()
|
||||
|
||||
assert count == 3
|
||||
assert cache.get('rss', 'check1') is None
|
||||
assert cache.get('atom', 'check2') is None
|
||||
assert cache.get('json', 'check3') is None
|
||||
|
||||
def test_invalidate_specific_format(self):
|
||||
"""Can invalidate specific format only"""
|
||||
cache = FeedCache()
|
||||
|
||||
cache.set('rss', 'content1', 'check1')
|
||||
cache.set('atom', 'content2', 'check2')
|
||||
cache.set('json', 'content3', 'check3')
|
||||
|
||||
count = cache.invalidate('rss')
|
||||
|
||||
assert count == 1
|
||||
assert cache.get('rss', 'check1') is None
|
||||
assert cache.get('atom', 'check2') is not None
|
||||
assert cache.get('json', 'check3') is not None
|
||||
|
||||
|
||||
class TestCacheStatistics:
|
||||
"""Test cache statistics tracking"""
|
||||
|
||||
def test_hit_tracking(self):
|
||||
"""Cache hits are tracked"""
|
||||
cache = FeedCache()
|
||||
cache.set('rss', 'content', 'check1')
|
||||
|
||||
stats = cache.get_stats()
|
||||
assert stats['hits'] == 0
|
||||
|
||||
cache.get('rss', 'check1') # Hit
|
||||
stats = cache.get_stats()
|
||||
assert stats['hits'] == 1
|
||||
|
||||
def test_miss_tracking(self):
|
||||
"""Cache misses are tracked"""
|
||||
cache = FeedCache()
|
||||
|
||||
stats = cache.get_stats()
|
||||
assert stats['misses'] == 0
|
||||
|
||||
cache.get('rss', 'nonexistent') # Miss
|
||||
stats = cache.get_stats()
|
||||
assert stats['misses'] == 1
|
||||
|
||||
def test_hit_rate_calculation(self):
|
||||
"""Hit rate is calculated correctly"""
|
||||
cache = FeedCache()
|
||||
cache.set('rss', 'content', 'check1')
|
||||
|
||||
cache.get('rss', 'check1') # Hit
|
||||
cache.get('rss', 'nonexistent') # Miss
|
||||
cache.get('rss', 'check1') # Hit
|
||||
|
||||
stats = cache.get_stats()
|
||||
assert stats['hits'] == 2
|
||||
assert stats['misses'] == 1
|
||||
assert stats['hit_rate'] == 2.0 / 3.0 # 66.67%
|
||||
|
||||
def test_eviction_tracking(self):
|
||||
"""Evictions are tracked"""
|
||||
cache = FeedCache(max_size=2)
|
||||
|
||||
cache.set('rss', 'content1', 'check1')
|
||||
cache.set('rss', 'content2', 'check2')
|
||||
cache.set('rss', 'content3', 'check3') # Triggers eviction
|
||||
|
||||
stats = cache.get_stats()
|
||||
assert stats['evictions'] == 1
|
||||
|
||||
|
||||
class TestNotesChecksum:
|
||||
"""Test notes checksum generation"""
|
||||
|
||||
def test_checksum_generation(self):
|
||||
"""Can generate checksum from note list"""
|
||||
cache = FeedCache()
|
||||
now = datetime.now(timezone.utc)
|
||||
from pathlib import Path
|
||||
|
||||
notes = [
|
||||
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
||||
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
||||
]
|
||||
|
||||
checksum = cache.generate_notes_checksum(notes)
|
||||
|
||||
assert isinstance(checksum, str)
|
||||
assert len(checksum) == 64 # SHA-256 hex digest length
|
||||
|
||||
def test_checksum_consistency(self):
|
||||
"""Same notes generate same checksum"""
|
||||
cache = FeedCache()
|
||||
now = datetime.now(timezone.utc)
|
||||
from pathlib import Path
|
||||
|
||||
notes = [
|
||||
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
||||
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
||||
]
|
||||
|
||||
checksum1 = cache.generate_notes_checksum(notes)
|
||||
checksum2 = cache.generate_notes_checksum(notes)
|
||||
|
||||
assert checksum1 == checksum2
|
||||
|
||||
def test_checksum_changes_on_note_change(self):
|
||||
"""Checksum changes when notes are modified"""
|
||||
cache = FeedCache()
|
||||
now = datetime.now(timezone.utc)
|
||||
later = datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc)
|
||||
from pathlib import Path
|
||||
|
||||
notes1 = [
|
||||
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
||||
]
|
||||
|
||||
notes2 = [
|
||||
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=later, published=True, _data_dir=Path("/tmp")),
|
||||
]
|
||||
|
||||
checksum1 = cache.generate_notes_checksum(notes1)
|
||||
checksum2 = cache.generate_notes_checksum(notes2)
|
||||
|
||||
assert checksum1 != checksum2
|
||||
|
||||
def test_checksum_changes_on_note_addition(self):
|
||||
"""Checksum changes when notes are added"""
|
||||
cache = FeedCache()
|
||||
now = datetime.now(timezone.utc)
|
||||
from pathlib import Path
|
||||
|
||||
notes1 = [
|
||||
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
||||
]
|
||||
|
||||
notes2 = [
|
||||
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
||||
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
|
||||
]
|
||||
|
||||
checksum1 = cache.generate_notes_checksum(notes1)
|
||||
checksum2 = cache.generate_notes_checksum(notes2)
|
||||
|
||||
assert checksum1 != checksum2
|
||||
|
||||
|
||||
class TestGlobalCache:
|
||||
"""Test global cache instance"""
|
||||
|
||||
def test_get_cache_returns_instance(self):
|
||||
"""get_cache() returns FeedCache instance"""
|
||||
from starpunk.feeds.cache import get_cache
|
||||
cache = get_cache()
|
||||
assert isinstance(cache, FeedCache)
|
||||
|
||||
def test_get_cache_returns_same_instance(self):
|
||||
"""get_cache() returns singleton instance"""
|
||||
from starpunk.feeds.cache import get_cache
|
||||
cache1 = get_cache()
|
||||
cache2 = get_cache()
|
||||
assert cache1 is cache2
|
||||
|
||||
def test_configure_cache(self):
|
||||
"""configure_cache() sets up global cache with params"""
|
||||
from starpunk.feeds.cache import configure_cache, get_cache
|
||||
|
||||
configure_cache(max_size=100, ttl=600)
|
||||
cache = get_cache()
|
||||
|
||||
assert cache.max_size == 100
|
||||
assert cache.ttl == 600
|
||||
Reference in New Issue
Block a user