""" Metrics collection and buffering for performance monitoring Per ADR-053 and developer Q&A Q6, Q12: - Per-process circular buffers using deque - Configurable buffer size (default 1000 entries) - Include process ID in all metrics - Configuration-based sampling rates - Operation types: database, http, render Example usage: >>> from starpunk.monitoring import record_metric, get_metrics >>> >>> # Record a database operation >>> record_metric('database', 'query', duration_ms=45.2, query='SELECT * FROM notes') >>> >>> # Get all metrics >>> metrics = get_metrics() >>> print(f"Collected {len(metrics)} metrics") """ import os import random import time from collections import deque from dataclasses import dataclass, field, asdict from datetime import datetime from threading import Lock from typing import Any, Deque, Dict, List, Literal, Optional # Operation types for categorizing metrics OperationType = Literal["database", "http", "render"] # Module-level circular buffer (per-process) # Each process in a multi-process deployment maintains its own buffer _metrics_buffer: Optional["MetricsBuffer"] = None _buffer_lock = Lock() @dataclass class Metric: """ Represents a single performance metric Attributes: operation_type: Type of operation (database/http/render) operation_name: Name/description of operation timestamp: When the metric was recorded (ISO format) duration_ms: Duration in milliseconds process_id: Process ID that recorded the metric metadata: Additional operation-specific data """ operation_type: OperationType operation_name: str timestamp: str duration_ms: float process_id: int metadata: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> Dict[str, Any]: """Convert metric to dictionary for serialization""" return asdict(self) class MetricsBuffer: """ Circular buffer for storing performance metrics Per developer Q&A Q6: - Uses deque for efficient circular buffer - Per-process storage (not shared across workers) - Thread-safe with locking - Configurable max size (default 1000) - Automatic eviction of oldest entries when full Per developer Q&A Q12: - Configurable sampling rates per operation type - Default 10% sampling - Slow queries always logged regardless of sampling Example: >>> buffer = MetricsBuffer(max_size=1000) >>> buffer.record('database', 'query', 45.2, {'query': 'SELECT ...'}) >>> metrics = buffer.get_all() """ def __init__( self, max_size: int = 1000, sampling_rates: Optional[Dict[OperationType, float]] = None ): """ Initialize metrics buffer Args: max_size: Maximum number of metrics to store sampling_rates: Dict mapping operation type to sampling rate (0.0-1.0) Default: {'database': 0.1, 'http': 0.1, 'render': 0.1} """ self.max_size = max_size self._buffer: Deque[Metric] = deque(maxlen=max_size) self._lock = Lock() self._process_id = os.getpid() # Default sampling rates (10% for all operation types) self._sampling_rates = sampling_rates or { "database": 0.1, "http": 0.1, "render": 0.1, } def record( self, operation_type: OperationType, operation_name: str, duration_ms: float, metadata: Optional[Dict[str, Any]] = None, force: bool = False ) -> bool: """ Record a performance metric Args: operation_type: Type of operation (database/http/render) operation_name: Name/description of operation duration_ms: Duration in milliseconds metadata: Additional operation-specific data force: If True, bypass sampling (for slow query logging) Returns: True if metric was recorded, False if skipped due to sampling Example: >>> buffer.record('database', 'SELECT notes', 45.2, ... {'query': 'SELECT * FROM notes LIMIT 10'}) True """ # Apply sampling (unless forced) if not force: sampling_rate = self._sampling_rates.get(operation_type, 0.1) if random.random() > sampling_rate: return False metric = Metric( operation_type=operation_type, operation_name=operation_name, timestamp=datetime.utcnow().isoformat() + "Z", duration_ms=duration_ms, process_id=self._process_id, metadata=metadata or {} ) with self._lock: self._buffer.append(metric) return True def get_all(self) -> List[Metric]: """ Get all metrics from buffer Returns: List of metrics (oldest to newest) Example: >>> metrics = buffer.get_all() >>> len(metrics) 1000 """ with self._lock: return list(self._buffer) def get_recent(self, count: int) -> List[Metric]: """ Get most recent N metrics Args: count: Number of recent metrics to return Returns: List of most recent metrics (newest first) Example: >>> recent = buffer.get_recent(10) >>> len(recent) 10 """ with self._lock: # Convert to list, reverse to get newest first, then slice all_metrics = list(self._buffer) all_metrics.reverse() return all_metrics[:count] def get_by_type(self, operation_type: OperationType) -> List[Metric]: """ Get all metrics of a specific type Args: operation_type: Type to filter by (database/http/render) Returns: List of metrics matching the type Example: >>> db_metrics = buffer.get_by_type('database') """ with self._lock: return [m for m in self._buffer if m.operation_type == operation_type] def get_slow_operations( self, threshold_ms: float = 1000.0, operation_type: Optional[OperationType] = None ) -> List[Metric]: """ Get operations that exceeded a duration threshold Args: threshold_ms: Duration threshold in milliseconds operation_type: Optional type filter Returns: List of slow operations Example: >>> slow_queries = buffer.get_slow_operations(1000, 'database') """ with self._lock: metrics = list(self._buffer) # Filter by type if specified if operation_type: metrics = [m for m in metrics if m.operation_type == operation_type] # Filter by duration threshold return [m for m in metrics if m.duration_ms >= threshold_ms] def get_stats(self) -> Dict[str, Any]: """ Get statistics about the buffer Returns: Dict with buffer statistics Example: >>> stats = buffer.get_stats() >>> stats['total_count'] 1000 """ with self._lock: metrics = list(self._buffer) # Calculate stats per operation type type_stats = {} for op_type in ["database", "http", "render"]: type_metrics = [m for m in metrics if m.operation_type == op_type] if type_metrics: durations = [m.duration_ms for m in type_metrics] type_stats[op_type] = { "count": len(type_metrics), "avg_duration_ms": sum(durations) / len(durations), "min_duration_ms": min(durations), "max_duration_ms": max(durations), } else: type_stats[op_type] = { "count": 0, "avg_duration_ms": 0.0, "min_duration_ms": 0.0, "max_duration_ms": 0.0, } return { "total_count": len(metrics), "max_size": self.max_size, "process_id": self._process_id, "sampling_rates": self._sampling_rates, "by_type": type_stats, } def clear(self) -> None: """ Clear all metrics from buffer Example: >>> buffer.clear() """ with self._lock: self._buffer.clear() def set_sampling_rate( self, operation_type: OperationType, rate: float ) -> None: """ Update sampling rate for an operation type Args: operation_type: Type to update rate: New sampling rate (0.0-1.0) Example: >>> buffer.set_sampling_rate('database', 0.5) # 50% sampling """ if not 0.0 <= rate <= 1.0: raise ValueError("Sampling rate must be between 0.0 and 1.0") with self._lock: self._sampling_rates[operation_type] = rate def get_buffer() -> MetricsBuffer: """ Get or create the module-level metrics buffer This ensures a single buffer per process. In multi-process deployments (e.g., gunicorn), each worker process will have its own buffer. Returns: MetricsBuffer instance for this process Example: >>> buffer = get_buffer() >>> buffer.record('database', 'query', 45.2) """ global _metrics_buffer if _metrics_buffer is None: with _buffer_lock: # Double-check locking pattern if _metrics_buffer is None: # Get configuration from Flask app if available try: from flask import current_app max_size = current_app.config.get('METRICS_BUFFER_SIZE', 1000) sampling_rates = current_app.config.get('METRICS_SAMPLING_RATES', None) except (ImportError, RuntimeError): # Flask not available or no app context max_size = 1000 sampling_rates = None _metrics_buffer = MetricsBuffer( max_size=max_size, sampling_rates=sampling_rates ) return _metrics_buffer def record_metric( operation_type: OperationType, operation_name: str, duration_ms: float, metadata: Optional[Dict[str, Any]] = None, force: bool = False ) -> bool: """ Record a metric using the module-level buffer Convenience function that uses get_buffer() internally. Args: operation_type: Type of operation (database/http/render) operation_name: Name/description of operation duration_ms: Duration in milliseconds metadata: Additional operation-specific data force: If True, bypass sampling (for slow query logging) Returns: True if metric was recorded, False if skipped due to sampling Example: >>> record_metric('database', 'SELECT notes', 45.2, ... {'query': 'SELECT * FROM notes LIMIT 10'}) True """ buffer = get_buffer() return buffer.record(operation_type, operation_name, duration_ms, metadata, force) def get_metrics() -> List[Metric]: """ Get all metrics from the module-level buffer Returns: List of metrics (oldest to newest) Example: >>> metrics = get_metrics() >>> len(metrics) 1000 """ buffer = get_buffer() return buffer.get_all() def get_metrics_stats() -> Dict[str, Any]: """ Get statistics from the module-level buffer Returns: Dict with buffer statistics Example: >>> stats = get_metrics_stats() >>> print(f"Total metrics: {stats['total_count']}") """ buffer = get_buffer() return buffer.get_stats()