feat: Implement Phase 3 Feed Caching (Partial)

Implements feed caching layer with LRU eviction, TTL expiration, and ETag support. Phase 3.1: Feed Caching (Complete) - LRU cache with configurable max_size (default: 50 feeds) - TTL-based expiration (default: 300 seconds = 5 minutes) - SHA-256 checksums for cache keys and ETags - Weak ETag generation (W/"checksum") - If-None-Match header support for 304 Not Modified responses - Cache invalidation (全体 or per-format) - Hit/miss/eviction statistics tracking - Content-based cache keys (changes when notes are modified) Implementation: - Created starpunk/feeds/cache.py with FeedCache class - Integrated caching into feed routes (RSS, ATOM, JSON Feed) - Added ETag headers to all feed responses - 304 Not Modified responses for conditional requests - Configuration: FEED_CACHE_ENABLED, FEED_CACHE_MAX_SIZE - Global cache instance with singleton pattern Architecture: - Two-level caching: 1. Note list cache (simple dict, existing) 2. Feed content cache (LRU with TTL, new) - Cache keys include format + notes checksum - Checksums based on note IDs + updated timestamps - Non-streaming generators used for cacheable content Testing: - 25 comprehensive cache tests (100% passing) - Tests for LRU eviction, TTL expiration, statistics - Tests for checksum generation and consistency - Tests for ETag generation and uniqueness - All 114 feed tests passing (no regressions) Quality Metrics: - 114/114 tests passing (100%) - Zero breaking changes - Full backward compatibility - Cache disabled mode supported (FEED_CACHE_ENABLED=false) Performance Benefits: - Database queries reduced (note list cached) - Feed generation reduced (content cached) - Bandwidth saved (304 responses) - Memory efficient (LRU eviction) Note: Phase 3 is partially complete. Still pending: - Feed statistics dashboard - OPML 2.0 export endpoint 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 21:14:03 -07:00
parent f59cbb30a5
commit c1dd706b8f
6 changed files with 827 additions and 88 deletions
--- a/tests/test_feeds_cache.py
+++ b/tests/test_feeds_cache.py
@@ -0,0 +1,373 @@
+"""
+Tests for feed caching layer (v1.1.2 Phase 3)
+
+Tests the FeedCache class and caching integration with feed routes.
+"""
+
+import time
+from datetime import datetime, timezone
+
+import pytest
+
+from starpunk.feeds.cache import FeedCache
+from starpunk.models import Note
+
+
+class TestFeedCacheBasics:
+    """Test basic cache operations"""
+
+    def test_cache_initialization(self):
+        """Cache initializes with correct settings"""
+        cache = FeedCache(max_size=100, ttl=600)
+        assert cache.max_size == 100
+        assert cache.ttl == 600
+        assert len(cache._cache) == 0
+
+    def test_cache_key_generation(self):
+        """Cache keys are generated consistently"""
+        cache = FeedCache()
+        key1 = cache._generate_cache_key('rss', 'abc123')
+        key2 = cache._generate_cache_key('rss', 'abc123')
+        key3 = cache._generate_cache_key('atom', 'abc123')
+
+        assert key1 == key2
+        assert key1 != key3
+        assert key1 == 'feed:rss:abc123'
+
+    def test_etag_generation(self):
+        """ETags are generated with weak format"""
+        cache = FeedCache()
+        content = "<?xml version='1.0'?><rss>...</rss>"
+        etag = cache._generate_etag(content)
+
+        assert etag.startswith('W/"')
+        assert etag.endswith('"')
+        assert len(etag) > 10  # SHA-256 hash is long
+
+    def test_etag_consistency(self):
+        """Same content generates same ETag"""
+        cache = FeedCache()
+        content = "test content"
+        etag1 = cache._generate_etag(content)
+        etag2 = cache._generate_etag(content)
+
+        assert etag1 == etag2
+
+    def test_etag_uniqueness(self):
+        """Different content generates different ETags"""
+        cache = FeedCache()
+        etag1 = cache._generate_etag("content 1")
+        etag2 = cache._generate_etag("content 2")
+
+        assert etag1 != etag2
+
+
+class TestCacheOperations:
+    """Test cache get/set operations"""
+
+    def test_set_and_get(self):
+        """Can store and retrieve feed content"""
+        cache = FeedCache()
+        content = "<?xml version='1.0'?><rss>test</rss>"
+        checksum = "test123"
+
+        etag = cache.set('rss', content, checksum)
+        result = cache.get('rss', checksum)
+
+        assert result is not None
+        cached_content, cached_etag = result
+        assert cached_content == content
+        assert cached_etag == etag
+        assert cached_etag.startswith('W/"')
+
+    def test_cache_miss(self):
+        """Returns None for cache miss"""
+        cache = FeedCache()
+        result = cache.get('rss', 'nonexistent')
+        assert result is None
+
+    def test_different_formats_cached_separately(self):
+        """Different formats with same checksum are cached separately"""
+        cache = FeedCache()
+        rss_content = "RSS content"
+        atom_content = "ATOM content"
+        checksum = "same_checksum"
+
+        rss_etag = cache.set('rss', rss_content, checksum)
+        atom_etag = cache.set('atom', atom_content, checksum)
+
+        rss_result = cache.get('rss', checksum)
+        atom_result = cache.get('atom', checksum)
+
+        assert rss_result[0] == rss_content
+        assert atom_result[0] == atom_content
+        assert rss_etag != atom_etag
+
+
+class TestCacheTTL:
+    """Test TTL expiration"""
+
+    def test_ttl_expiration(self):
+        """Cached entries expire after TTL"""
+        cache = FeedCache(ttl=1)  # 1 second TTL
+        content = "test content"
+        checksum = "test123"
+
+        cache.set('rss', content, checksum)
+
+        # Should be cached initially
+        assert cache.get('rss', checksum) is not None
+
+        # Wait for TTL to expire
+        time.sleep(1.1)
+
+        # Should be expired
+        assert cache.get('rss', checksum) is None
+
+    def test_ttl_not_expired(self):
+        """Cached entries remain valid within TTL"""
+        cache = FeedCache(ttl=10)  # 10 second TTL
+        content = "test content"
+        checksum = "test123"
+
+        cache.set('rss', content, checksum)
+        time.sleep(0.1)  # Small delay
+
+        # Should still be cached
+        assert cache.get('rss', checksum) is not None
+
+
+class TestLRUEviction:
+    """Test LRU eviction strategy"""
+
+    def test_lru_eviction(self):
+        """LRU entries are evicted when cache is full"""
+        cache = FeedCache(max_size=3)
+
+        # Fill cache
+        cache.set('rss', 'content1', 'check1')
+        cache.set('rss', 'content2', 'check2')
+        cache.set('rss', 'content3', 'check3')
+
+        # All should be cached
+        assert cache.get('rss', 'check1') is not None
+        assert cache.get('rss', 'check2') is not None
+        assert cache.get('rss', 'check3') is not None
+
+        # Add one more (should evict oldest)
+        cache.set('rss', 'content4', 'check4')
+
+        # First entry should be evicted
+        assert cache.get('rss', 'check1') is None
+        assert cache.get('rss', 'check2') is not None
+        assert cache.get('rss', 'check3') is not None
+        assert cache.get('rss', 'check4') is not None
+
+    def test_lru_access_updates_order(self):
+        """Accessing an entry moves it to end (most recently used)"""
+        cache = FeedCache(max_size=3)
+
+        # Fill cache
+        cache.set('rss', 'content1', 'check1')
+        cache.set('rss', 'content2', 'check2')
+        cache.set('rss', 'content3', 'check3')
+
+        # Access first entry (makes it most recent)
+        cache.get('rss', 'check1')
+
+        # Add new entry (should evict check2, not check1)
+        cache.set('rss', 'content4', 'check4')
+
+        assert cache.get('rss', 'check1') is not None  # Still cached (accessed recently)
+        assert cache.get('rss', 'check2') is None      # Evicted (oldest)
+        assert cache.get('rss', 'check3') is not None
+        assert cache.get('rss', 'check4') is not None
+
+
+class TestCacheInvalidation:
+    """Test cache invalidation"""
+
+    def test_invalidate_all(self):
+        """Can invalidate entire cache"""
+        cache = FeedCache()
+
+        cache.set('rss', 'content1', 'check1')
+        cache.set('atom', 'content2', 'check2')
+        cache.set('json', 'content3', 'check3')
+
+        count = cache.invalidate()
+
+        assert count == 3
+        assert cache.get('rss', 'check1') is None
+        assert cache.get('atom', 'check2') is None
+        assert cache.get('json', 'check3') is None
+
+    def test_invalidate_specific_format(self):
+        """Can invalidate specific format only"""
+        cache = FeedCache()
+
+        cache.set('rss', 'content1', 'check1')
+        cache.set('atom', 'content2', 'check2')
+        cache.set('json', 'content3', 'check3')
+
+        count = cache.invalidate('rss')
+
+        assert count == 1
+        assert cache.get('rss', 'check1') is None
+        assert cache.get('atom', 'check2') is not None
+        assert cache.get('json', 'check3') is not None
+
+
+class TestCacheStatistics:
+    """Test cache statistics tracking"""
+
+    def test_hit_tracking(self):
+        """Cache hits are tracked"""
+        cache = FeedCache()
+        cache.set('rss', 'content', 'check1')
+
+        stats = cache.get_stats()
+        assert stats['hits'] == 0
+
+        cache.get('rss', 'check1')  # Hit
+        stats = cache.get_stats()
+        assert stats['hits'] == 1
+
+    def test_miss_tracking(self):
+        """Cache misses are tracked"""
+        cache = FeedCache()
+
+        stats = cache.get_stats()
+        assert stats['misses'] == 0
+
+        cache.get('rss', 'nonexistent')  # Miss
+        stats = cache.get_stats()
+        assert stats['misses'] == 1
+
+    def test_hit_rate_calculation(self):
+        """Hit rate is calculated correctly"""
+        cache = FeedCache()
+        cache.set('rss', 'content', 'check1')
+
+        cache.get('rss', 'check1')  # Hit
+        cache.get('rss', 'nonexistent')  # Miss
+        cache.get('rss', 'check1')  # Hit
+
+        stats = cache.get_stats()
+        assert stats['hits'] == 2
+        assert stats['misses'] == 1
+        assert stats['hit_rate'] == 2.0 / 3.0  # 66.67%
+
+    def test_eviction_tracking(self):
+        """Evictions are tracked"""
+        cache = FeedCache(max_size=2)
+
+        cache.set('rss', 'content1', 'check1')
+        cache.set('rss', 'content2', 'check2')
+        cache.set('rss', 'content3', 'check3')  # Triggers eviction
+
+        stats = cache.get_stats()
+        assert stats['evictions'] == 1
+
+
+class TestNotesChecksum:
+    """Test notes checksum generation"""
+
+    def test_checksum_generation(self):
+        """Can generate checksum from note list"""
+        cache = FeedCache()
+        now = datetime.now(timezone.utc)
+        from pathlib import Path
+
+        notes = [
+            Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
+            Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
+        ]
+
+        checksum = cache.generate_notes_checksum(notes)
+
+        assert isinstance(checksum, str)
+        assert len(checksum) == 64  # SHA-256 hex digest length
+
+    def test_checksum_consistency(self):
+        """Same notes generate same checksum"""
+        cache = FeedCache()
+        now = datetime.now(timezone.utc)
+        from pathlib import Path
+
+        notes = [
+            Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
+            Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
+        ]
+
+        checksum1 = cache.generate_notes_checksum(notes)
+        checksum2 = cache.generate_notes_checksum(notes)
+
+        assert checksum1 == checksum2
+
+    def test_checksum_changes_on_note_change(self):
+        """Checksum changes when notes are modified"""
+        cache = FeedCache()
+        now = datetime.now(timezone.utc)
+        later = datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc)
+        from pathlib import Path
+
+        notes1 = [
+            Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
+        ]
+
+        notes2 = [
+            Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=later, published=True, _data_dir=Path("/tmp")),
+        ]
+
+        checksum1 = cache.generate_notes_checksum(notes1)
+        checksum2 = cache.generate_notes_checksum(notes2)
+
+        assert checksum1 != checksum2
+
+    def test_checksum_changes_on_note_addition(self):
+        """Checksum changes when notes are added"""
+        cache = FeedCache()
+        now = datetime.now(timezone.utc)
+        from pathlib import Path
+
+        notes1 = [
+            Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
+        ]
+
+        notes2 = [
+            Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
+            Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
+        ]
+
+        checksum1 = cache.generate_notes_checksum(notes1)
+        checksum2 = cache.generate_notes_checksum(notes2)
+
+        assert checksum1 != checksum2
+
+
+class TestGlobalCache:
+    """Test global cache instance"""
+
+    def test_get_cache_returns_instance(self):
+        """get_cache() returns FeedCache instance"""
+        from starpunk.feeds.cache import get_cache
+        cache = get_cache()
+        assert isinstance(cache, FeedCache)
+
+    def test_get_cache_returns_same_instance(self):
+        """get_cache() returns singleton instance"""
+        from starpunk.feeds.cache import get_cache
+        cache1 = get_cache()
+        cache2 = get_cache()
+        assert cache1 is cache2
+
+    def test_configure_cache(self):
+        """configure_cache() sets up global cache with params"""
+        from starpunk.feeds.cache import configure_cache, get_cache
+
+        configure_cache(max_size=100, ttl=600)
+        cache = get_cache()
+
+        assert cache.max_size == 100
+        assert cache.ttl == 600