Files
StarPunk/tests/test_feeds_cache.py
Phil Skentelbery c1dd706b8f feat: Implement Phase 3 Feed Caching (Partial)
Implements feed caching layer with LRU eviction, TTL expiration, and ETag support.

Phase 3.1: Feed Caching (Complete)
- LRU cache with configurable max_size (default: 50 feeds)
- TTL-based expiration (default: 300 seconds = 5 minutes)
- SHA-256 checksums for cache keys and ETags
- Weak ETag generation (W/"checksum")
- If-None-Match header support for 304 Not Modified responses
- Cache invalidation (全体 or per-format)
- Hit/miss/eviction statistics tracking
- Content-based cache keys (changes when notes are modified)

Implementation:
- Created starpunk/feeds/cache.py with FeedCache class
- Integrated caching into feed routes (RSS, ATOM, JSON Feed)
- Added ETag headers to all feed responses
- 304 Not Modified responses for conditional requests
- Configuration: FEED_CACHE_ENABLED, FEED_CACHE_MAX_SIZE
- Global cache instance with singleton pattern

Architecture:
- Two-level caching:
  1. Note list cache (simple dict, existing)
  2. Feed content cache (LRU with TTL, new)
- Cache keys include format + notes checksum
- Checksums based on note IDs + updated timestamps
- Non-streaming generators used for cacheable content

Testing:
- 25 comprehensive cache tests (100% passing)
- Tests for LRU eviction, TTL expiration, statistics
- Tests for checksum generation and consistency
- Tests for ETag generation and uniqueness
- All 114 feed tests passing (no regressions)

Quality Metrics:
- 114/114 tests passing (100%)
- Zero breaking changes
- Full backward compatibility
- Cache disabled mode supported (FEED_CACHE_ENABLED=false)

Performance Benefits:
- Database queries reduced (note list cached)
- Feed generation reduced (content cached)
- Bandwidth saved (304 responses)
- Memory efficient (LRU eviction)

Note: Phase 3 is partially complete. Still pending:
- Feed statistics dashboard
- OPML 2.0 export endpoint

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 21:14:03 -07:00

374 lines
12 KiB
Python

"""
Tests for feed caching layer (v1.1.2 Phase 3)
Tests the FeedCache class and caching integration with feed routes.
"""
import time
from datetime import datetime, timezone
import pytest
from starpunk.feeds.cache import FeedCache
from starpunk.models import Note
class TestFeedCacheBasics:
"""Test basic cache operations"""
def test_cache_initialization(self):
"""Cache initializes with correct settings"""
cache = FeedCache(max_size=100, ttl=600)
assert cache.max_size == 100
assert cache.ttl == 600
assert len(cache._cache) == 0
def test_cache_key_generation(self):
"""Cache keys are generated consistently"""
cache = FeedCache()
key1 = cache._generate_cache_key('rss', 'abc123')
key2 = cache._generate_cache_key('rss', 'abc123')
key3 = cache._generate_cache_key('atom', 'abc123')
assert key1 == key2
assert key1 != key3
assert key1 == 'feed:rss:abc123'
def test_etag_generation(self):
"""ETags are generated with weak format"""
cache = FeedCache()
content = "<?xml version='1.0'?><rss>...</rss>"
etag = cache._generate_etag(content)
assert etag.startswith('W/"')
assert etag.endswith('"')
assert len(etag) > 10 # SHA-256 hash is long
def test_etag_consistency(self):
"""Same content generates same ETag"""
cache = FeedCache()
content = "test content"
etag1 = cache._generate_etag(content)
etag2 = cache._generate_etag(content)
assert etag1 == etag2
def test_etag_uniqueness(self):
"""Different content generates different ETags"""
cache = FeedCache()
etag1 = cache._generate_etag("content 1")
etag2 = cache._generate_etag("content 2")
assert etag1 != etag2
class TestCacheOperations:
"""Test cache get/set operations"""
def test_set_and_get(self):
"""Can store and retrieve feed content"""
cache = FeedCache()
content = "<?xml version='1.0'?><rss>test</rss>"
checksum = "test123"
etag = cache.set('rss', content, checksum)
result = cache.get('rss', checksum)
assert result is not None
cached_content, cached_etag = result
assert cached_content == content
assert cached_etag == etag
assert cached_etag.startswith('W/"')
def test_cache_miss(self):
"""Returns None for cache miss"""
cache = FeedCache()
result = cache.get('rss', 'nonexistent')
assert result is None
def test_different_formats_cached_separately(self):
"""Different formats with same checksum are cached separately"""
cache = FeedCache()
rss_content = "RSS content"
atom_content = "ATOM content"
checksum = "same_checksum"
rss_etag = cache.set('rss', rss_content, checksum)
atom_etag = cache.set('atom', atom_content, checksum)
rss_result = cache.get('rss', checksum)
atom_result = cache.get('atom', checksum)
assert rss_result[0] == rss_content
assert atom_result[0] == atom_content
assert rss_etag != atom_etag
class TestCacheTTL:
"""Test TTL expiration"""
def test_ttl_expiration(self):
"""Cached entries expire after TTL"""
cache = FeedCache(ttl=1) # 1 second TTL
content = "test content"
checksum = "test123"
cache.set('rss', content, checksum)
# Should be cached initially
assert cache.get('rss', checksum) is not None
# Wait for TTL to expire
time.sleep(1.1)
# Should be expired
assert cache.get('rss', checksum) is None
def test_ttl_not_expired(self):
"""Cached entries remain valid within TTL"""
cache = FeedCache(ttl=10) # 10 second TTL
content = "test content"
checksum = "test123"
cache.set('rss', content, checksum)
time.sleep(0.1) # Small delay
# Should still be cached
assert cache.get('rss', checksum) is not None
class TestLRUEviction:
"""Test LRU eviction strategy"""
def test_lru_eviction(self):
"""LRU entries are evicted when cache is full"""
cache = FeedCache(max_size=3)
# Fill cache
cache.set('rss', 'content1', 'check1')
cache.set('rss', 'content2', 'check2')
cache.set('rss', 'content3', 'check3')
# All should be cached
assert cache.get('rss', 'check1') is not None
assert cache.get('rss', 'check2') is not None
assert cache.get('rss', 'check3') is not None
# Add one more (should evict oldest)
cache.set('rss', 'content4', 'check4')
# First entry should be evicted
assert cache.get('rss', 'check1') is None
assert cache.get('rss', 'check2') is not None
assert cache.get('rss', 'check3') is not None
assert cache.get('rss', 'check4') is not None
def test_lru_access_updates_order(self):
"""Accessing an entry moves it to end (most recently used)"""
cache = FeedCache(max_size=3)
# Fill cache
cache.set('rss', 'content1', 'check1')
cache.set('rss', 'content2', 'check2')
cache.set('rss', 'content3', 'check3')
# Access first entry (makes it most recent)
cache.get('rss', 'check1')
# Add new entry (should evict check2, not check1)
cache.set('rss', 'content4', 'check4')
assert cache.get('rss', 'check1') is not None # Still cached (accessed recently)
assert cache.get('rss', 'check2') is None # Evicted (oldest)
assert cache.get('rss', 'check3') is not None
assert cache.get('rss', 'check4') is not None
class TestCacheInvalidation:
"""Test cache invalidation"""
def test_invalidate_all(self):
"""Can invalidate entire cache"""
cache = FeedCache()
cache.set('rss', 'content1', 'check1')
cache.set('atom', 'content2', 'check2')
cache.set('json', 'content3', 'check3')
count = cache.invalidate()
assert count == 3
assert cache.get('rss', 'check1') is None
assert cache.get('atom', 'check2') is None
assert cache.get('json', 'check3') is None
def test_invalidate_specific_format(self):
"""Can invalidate specific format only"""
cache = FeedCache()
cache.set('rss', 'content1', 'check1')
cache.set('atom', 'content2', 'check2')
cache.set('json', 'content3', 'check3')
count = cache.invalidate('rss')
assert count == 1
assert cache.get('rss', 'check1') is None
assert cache.get('atom', 'check2') is not None
assert cache.get('json', 'check3') is not None
class TestCacheStatistics:
"""Test cache statistics tracking"""
def test_hit_tracking(self):
"""Cache hits are tracked"""
cache = FeedCache()
cache.set('rss', 'content', 'check1')
stats = cache.get_stats()
assert stats['hits'] == 0
cache.get('rss', 'check1') # Hit
stats = cache.get_stats()
assert stats['hits'] == 1
def test_miss_tracking(self):
"""Cache misses are tracked"""
cache = FeedCache()
stats = cache.get_stats()
assert stats['misses'] == 0
cache.get('rss', 'nonexistent') # Miss
stats = cache.get_stats()
assert stats['misses'] == 1
def test_hit_rate_calculation(self):
"""Hit rate is calculated correctly"""
cache = FeedCache()
cache.set('rss', 'content', 'check1')
cache.get('rss', 'check1') # Hit
cache.get('rss', 'nonexistent') # Miss
cache.get('rss', 'check1') # Hit
stats = cache.get_stats()
assert stats['hits'] == 2
assert stats['misses'] == 1
assert stats['hit_rate'] == 2.0 / 3.0 # 66.67%
def test_eviction_tracking(self):
"""Evictions are tracked"""
cache = FeedCache(max_size=2)
cache.set('rss', 'content1', 'check1')
cache.set('rss', 'content2', 'check2')
cache.set('rss', 'content3', 'check3') # Triggers eviction
stats = cache.get_stats()
assert stats['evictions'] == 1
class TestNotesChecksum:
"""Test notes checksum generation"""
def test_checksum_generation(self):
"""Can generate checksum from note list"""
cache = FeedCache()
now = datetime.now(timezone.utc)
from pathlib import Path
notes = [
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
]
checksum = cache.generate_notes_checksum(notes)
assert isinstance(checksum, str)
assert len(checksum) == 64 # SHA-256 hex digest length
def test_checksum_consistency(self):
"""Same notes generate same checksum"""
cache = FeedCache()
now = datetime.now(timezone.utc)
from pathlib import Path
notes = [
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
]
checksum1 = cache.generate_notes_checksum(notes)
checksum2 = cache.generate_notes_checksum(notes)
assert checksum1 == checksum2
def test_checksum_changes_on_note_change(self):
"""Checksum changes when notes are modified"""
cache = FeedCache()
now = datetime.now(timezone.utc)
later = datetime(2025, 11, 27, 12, 0, 0, tzinfo=timezone.utc)
from pathlib import Path
notes1 = [
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
]
notes2 = [
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=later, published=True, _data_dir=Path("/tmp")),
]
checksum1 = cache.generate_notes_checksum(notes1)
checksum2 = cache.generate_notes_checksum(notes2)
assert checksum1 != checksum2
def test_checksum_changes_on_note_addition(self):
"""Checksum changes when notes are added"""
cache = FeedCache()
now = datetime.now(timezone.utc)
from pathlib import Path
notes1 = [
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
]
notes2 = [
Note(id=1, slug="note1", file_path="note1.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
Note(id=2, slug="note2", file_path="note2.md", created_at=now, updated_at=now, published=True, _data_dir=Path("/tmp")),
]
checksum1 = cache.generate_notes_checksum(notes1)
checksum2 = cache.generate_notes_checksum(notes2)
assert checksum1 != checksum2
class TestGlobalCache:
"""Test global cache instance"""
def test_get_cache_returns_instance(self):
"""get_cache() returns FeedCache instance"""
from starpunk.feeds.cache import get_cache
cache = get_cache()
assert isinstance(cache, FeedCache)
def test_get_cache_returns_same_instance(self):
"""get_cache() returns singleton instance"""
from starpunk.feeds.cache import get_cache
cache1 = get_cache()
cache2 = get_cache()
assert cache1 is cache2
def test_configure_cache(self):
"""configure_cache() sets up global cache with params"""
from starpunk.feeds.cache import configure_cache, get_cache
configure_cache(max_size=100, ttl=600)
cache = get_cache()
assert cache.max_size == 100
assert cache.ttl == 600