# Performance Monitoring Foundation Specification ## Overview The performance monitoring foundation provides operators with visibility into StarPunk's runtime behavior, helping identify bottlenecks, track resource usage, and ensure optimal performance in production. ## Requirements ### Functional Requirements 1. **Timing Instrumentation** - Measure execution time for key operations - Track request processing duration - Monitor database query execution time - Measure template rendering time - Track static file serving time 2. **Database Performance Logging** - Log all queries when enabled - Detect and warn about slow queries - Track connection pool usage - Monitor transaction duration - Count query frequency by type 3. **Memory Usage Tracking** - Monitor process RSS memory - Track memory growth over time - Detect memory leaks - Per-request memory delta - Memory high water mark 4. **Performance Dashboard** - Real-time metrics display - Historical data (last 15 minutes) - Slow query log - Memory usage visualization - Endpoint performance table ### Non-Functional Requirements 1. **Performance Impact** - Monitoring overhead <1% when enabled - Zero impact when disabled - Efficient memory usage (<1MB for metrics) - No blocking operations 2. **Usability** - Simple enable/disable via configuration - Clear, actionable metrics - Self-explanatory dashboard - No external dependencies ## Design ### Architecture ``` ┌──────────────────────────────────────┐ │ HTTP Request │ │ ↓ │ │ Performance Middleware │ │ (start timer) │ │ ↓ │ │ ┌─────────────────┐ │ │ │ Request Handler │ │ │ │ ↓ │ │ │ │ Database Layer │←── Query Monitor │ │ ↓ │ │ │ │ Business Logic │←── Function Timer │ │ ↓ │ │ │ │ Response Build │ │ │ └─────────────────┘ │ │ ↓ │ │ Performance Middleware │ │ (stop timer) │ │ ↓ │ │ Metrics Collector ← Memory Monitor │ ↓ │ │ Circular Buffer │ │ ↓ │ │ Admin Dashboard │ └──────────────────────────────────────┘ ``` ### Data Model ```python from dataclasses import dataclass from typing import Optional, Dict, Any from datetime import datetime from collections import deque @dataclass class PerformanceMetric: """Single performance measurement""" timestamp: datetime category: str # 'http', 'db', 'function', 'memory' operation: str # Specific operation name duration_ms: Optional[float] # For timed operations value: Optional[float] # For measurements metadata: Dict[str, Any] # Additional context class MetricsBuffer: """Circular buffer for metrics storage""" def __init__(self, max_size: int = 1000): self.metrics = deque(maxlen=max_size) self.slow_queries = deque(maxlen=100) def add_metric(self, metric: PerformanceMetric): """Add metric to buffer""" self.metrics.append(metric) # Special handling for slow queries if (metric.category == 'db' and metric.duration_ms > config.PERF_SLOW_QUERY_THRESHOLD * 1000): self.slow_queries.append(metric) def get_recent(self, seconds: int = 900) -> List[PerformanceMetric]: """Get metrics from last N seconds""" cutoff = datetime.now() - timedelta(seconds=seconds) return [m for m in self.metrics if m.timestamp > cutoff] def get_summary(self) -> Dict[str, Any]: """Get summary statistics""" recent = self.get_recent() # Group by category and operation summary = defaultdict(lambda: { 'count': 0, 'total_ms': 0, 'avg_ms': 0, 'max_ms': 0, 'p95_ms': 0, 'p99_ms': 0 }) # Calculate statistics... return dict(summary) ``` ### Instrumentation Implementation #### Database Query Monitoring ```python import sqlite3 import time from contextlib import contextmanager @contextmanager def monitored_connection(): """Database connection with monitoring""" conn = sqlite3.connect(DATABASE_PATH) if config.PERF_MONITORING_ENABLED: # Set trace callback for query logging def trace_callback(statement): start_time = time.perf_counter() # Execute query (via monkey-patching) original_execute = conn.execute def monitored_execute(sql, params=None): result = original_execute(sql, params) duration = time.perf_counter() - start_time metric = PerformanceMetric( timestamp=datetime.now(), category='db', operation=sql.split()[0].upper(), # SELECT, INSERT, etc duration_ms=duration * 1000, metadata={ 'query': sql if config.PERF_LOG_QUERIES else None, 'params_count': len(params) if params else 0 } ) metrics_buffer.add_metric(metric) if duration > config.PERF_SLOW_QUERY_THRESHOLD: logger.warning( "Slow query detected", extra={ 'query': sql, 'duration_ms': duration * 1000 } ) return result conn.execute = monitored_execute conn.set_trace_callback(trace_callback) yield conn conn.close() ``` #### HTTP Request Monitoring ```python from flask import g, request import time @app.before_request def start_request_timer(): """Start timing the request""" if config.PERF_MONITORING_ENABLED: g.start_time = time.perf_counter() g.start_memory = get_memory_usage() @app.after_request def end_request_timer(response): """End timing and record metrics""" if config.PERF_MONITORING_ENABLED and hasattr(g, 'start_time'): duration = time.perf_counter() - g.start_time memory_delta = get_memory_usage() - g.start_memory metric = PerformanceMetric( timestamp=datetime.now(), category='http', operation=f"{request.method} {request.endpoint}", duration_ms=duration * 1000, metadata={ 'method': request.method, 'path': request.path, 'status': response.status_code, 'size': len(response.get_data()), 'memory_delta': memory_delta } ) metrics_buffer.add_metric(metric) return response ``` #### Memory Monitoring ```python import resource import threading import time class MemoryMonitor: """Background thread for memory monitoring""" def __init__(self): self.running = False self.thread = None self.high_water_mark = 0 def start(self): """Start memory monitoring""" if not config.PERF_MEMORY_TRACKING: return self.running = True self.thread = threading.Thread(target=self._monitor) self.thread.daemon = True self.thread.start() def _monitor(self): """Monitor memory usage""" while self.running: memory_mb = get_memory_usage() self.high_water_mark = max(self.high_water_mark, memory_mb) metric = PerformanceMetric( timestamp=datetime.now(), category='memory', operation='rss', value=memory_mb, metadata={ 'high_water_mark': self.high_water_mark } ) metrics_buffer.add_metric(metric) time.sleep(10) # Check every 10 seconds def get_memory_usage() -> float: """Get current memory usage in MB""" usage = resource.getrusage(resource.RUSAGE_SELF) return usage.ru_maxrss / 1024 # Convert KB to MB ``` ### Performance Dashboard #### Dashboard Route ```python @app.route('/admin/performance') @require_admin def performance_dashboard(): """Display performance metrics""" if not config.PERF_MONITORING_ENABLED: return render_template('admin/performance_disabled.html') summary = metrics_buffer.get_summary() slow_queries = list(metrics_buffer.slow_queries) memory_data = get_memory_graph_data() return render_template( 'admin/performance.html', summary=summary, slow_queries=slow_queries, memory_data=memory_data, uptime=get_uptime(), config={ 'slow_threshold': config.PERF_SLOW_QUERY_THRESHOLD, 'monitoring_enabled': config.PERF_MONITORING_ENABLED, 'memory_tracking': config.PERF_MEMORY_TRACKING } ) ``` #### Dashboard Template Structure ```html
{{ uptime }}
{{ summary.http.count }}
{{ summary.http.avg_ms|round(2) }}ms
{{ current_memory }}MB
| Time | Duration | Query |
|---|---|---|
| {{ query.timestamp|timeago }} | {{ query.duration_ms|round(2) }}ms | {{ query.metadata.query|truncate(100) }} |
| Endpoint | Calls | Avg (ms) | P95 (ms) | P99 (ms) |
|---|---|---|---|---|
| {{ endpoint }} | {{ stats.count }} | {{ stats.avg_ms|round(2) }} | {{ stats.p95_ms|round(2) }} | {{ stats.p99_ms|round(2) }} |