mirror of
https://github.com/ChuckBuilds/LEDMatrix.git
synced 2026-04-10 21:03:01 +00:00
feat(cache): Add intelligent disk cache cleanup with retention policies (#199)
* feat(cache): Add intelligent disk cache cleanup with retention policies - Add cleanup_expired_files() method to DiskCache class - Implement retention policies based on cache data types: * Odds data: 2 days (lines move frequently) * Live/recent/leaderboard: 7 days (weekly updates) * News/stocks: 14 days * Upcoming/schedules/team_info/logos: 60 days (stable data) - Add cleanup_disk_cache() orchestration in CacheManager - Start background cleanup thread running every 24 hours - Run cleanup on application startup - Add disk cleanup metrics tracking - Comprehensive logging with cleanup statistics This prevents disk cache from accumulating indefinitely while preserving important season data longer than volatile live game data. * refactor(cache): improve disk cache cleanup implementation - Implement force parameter throttle mechanism in cleanup_disk_cache - Fix TOCTOU race condition in disk cache cleanup (getsize/remove) - Reduce lock contention by processing files outside lock where possible - Add CacheStrategyProtocol for better type safety (replaces Any) - Move time import to module level in cache_metrics - Defer initial cleanup to background thread for non-blocking startup - Add graceful shutdown mechanism with threading.Event for cleanup thread - Add stop_cleanup_thread() method for controlled thread termination * fix(cache): improve disk cache cleanup initialization and error handling - Only start cleanup thread when disk caching is enabled (cache_dir is set) - Remove unused retention policy keys (leaderboard, live_scores, logos) - Handle FileNotFoundError as benign race condition in cleanup - Preserve existing OSError handling for actual file system errors --------- Co-authored-by: Chuck <chuck@example.com>
This commit is contained in:
30
src/cache/cache_metrics.py
vendored
30
src/cache/cache_metrics.py
vendored
@@ -5,6 +5,7 @@ Tracks cache performance metrics including hit rates, miss rates, and fetch time
|
||||
"""
|
||||
|
||||
import threading
|
||||
import time
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
@@ -28,7 +29,12 @@ class CacheMetrics:
|
||||
'background_hits': 0,
|
||||
'background_misses': 0,
|
||||
'total_fetch_time': 0.0,
|
||||
'fetch_count': 0
|
||||
'fetch_count': 0,
|
||||
# Disk cleanup metrics
|
||||
'last_disk_cleanup': 0.0,
|
||||
'total_files_cleaned': 0,
|
||||
'total_space_freed_mb': 0.0,
|
||||
'last_cleanup_duration_sec': 0.0
|
||||
}
|
||||
|
||||
def record_hit(self, cache_type: str = 'regular') -> None:
|
||||
@@ -69,6 +75,21 @@ class CacheMetrics:
|
||||
self._metrics['total_fetch_time'] += duration
|
||||
self._metrics['fetch_count'] += 1
|
||||
|
||||
def record_disk_cleanup(self, files_cleaned: int, space_freed_mb: float, duration_sec: float) -> None:
|
||||
"""
|
||||
Record disk cleanup operation results.
|
||||
|
||||
Args:
|
||||
files_cleaned: Number of files deleted
|
||||
space_freed_mb: Space freed in megabytes
|
||||
duration_sec: Duration of cleanup operation in seconds
|
||||
"""
|
||||
with self._lock:
|
||||
self._metrics['last_disk_cleanup'] = time.time()
|
||||
self._metrics['total_files_cleaned'] += files_cleaned
|
||||
self._metrics['total_space_freed_mb'] += space_freed_mb
|
||||
self._metrics['last_cleanup_duration_sec'] = duration_sec
|
||||
|
||||
def get_metrics(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get current cache performance metrics.
|
||||
@@ -93,7 +114,12 @@ class CacheMetrics:
|
||||
'api_calls_saved': self._metrics['api_calls_saved'],
|
||||
'average_fetch_time': avg_fetch_time,
|
||||
'total_fetch_time': self._metrics['total_fetch_time'],
|
||||
'fetch_count': self._metrics['fetch_count']
|
||||
'fetch_count': self._metrics['fetch_count'],
|
||||
# Disk cleanup metrics
|
||||
'last_disk_cleanup': self._metrics['last_disk_cleanup'],
|
||||
'total_files_cleaned': self._metrics['total_files_cleaned'],
|
||||
'total_space_freed_mb': self._metrics['total_space_freed_mb'],
|
||||
'last_cleanup_duration_sec': self._metrics['last_cleanup_duration_sec']
|
||||
}
|
||||
|
||||
def log_metrics(self) -> None:
|
||||
|
||||
130
src/cache/disk_cache.py
vendored
130
src/cache/disk_cache.py
vendored
@@ -10,12 +10,28 @@ import time
|
||||
import tempfile
|
||||
import logging
|
||||
import threading
|
||||
from typing import Dict, Any, Optional
|
||||
from typing import Dict, Any, Optional, Protocol
|
||||
from datetime import datetime
|
||||
|
||||
from src.exceptions import CacheError
|
||||
|
||||
|
||||
class CacheStrategyProtocol(Protocol):
|
||||
"""Protocol for cache strategy objects that categorize cache keys."""
|
||||
|
||||
def get_data_type_from_key(self, key: str) -> str:
|
||||
"""
|
||||
Determine the data type from a cache key.
|
||||
|
||||
Args:
|
||||
key: Cache key
|
||||
|
||||
Returns:
|
||||
Data type string for strategy lookup
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
class DateTimeEncoder(json.JSONEncoder):
|
||||
"""JSON encoder that handles datetime objects."""
|
||||
def default(self, obj: Any) -> Any:
|
||||
@@ -269,4 +285,116 @@ class DiskCache:
|
||||
def get_cache_dir(self) -> Optional[str]:
|
||||
"""Get the cache directory path."""
|
||||
return self.cache_dir
|
||||
|
||||
def cleanup_expired_files(self, cache_strategy: CacheStrategyProtocol, retention_policies: Dict[str, int]) -> Dict[str, Any]:
|
||||
"""
|
||||
Clean up expired cache files based on retention policies.
|
||||
|
||||
Args:
|
||||
cache_strategy: Object implementing CacheStrategyProtocol for categorizing files
|
||||
retention_policies: Dict mapping data types to retention days
|
||||
|
||||
Returns:
|
||||
Dictionary with cleanup statistics:
|
||||
- files_scanned: Total files checked
|
||||
- files_deleted: Files removed
|
||||
- space_freed_bytes: Bytes freed
|
||||
- errors: Number of errors encountered
|
||||
"""
|
||||
if not self.cache_dir or not os.path.exists(self.cache_dir):
|
||||
self.logger.warning("Cache directory not available for cleanup")
|
||||
return {'files_scanned': 0, 'files_deleted': 0, 'space_freed_bytes': 0, 'errors': 0}
|
||||
|
||||
stats = {
|
||||
'files_scanned': 0,
|
||||
'files_deleted': 0,
|
||||
'space_freed_bytes': 0,
|
||||
'errors': 0
|
||||
}
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
try:
|
||||
# Collect files to process outside the lock to avoid blocking cache operations
|
||||
# Only hold lock during directory listing to get snapshot of files
|
||||
try:
|
||||
with self._lock:
|
||||
# Get snapshot of files while holding lock briefly
|
||||
filenames = [f for f in os.listdir(self.cache_dir) if f.endswith('.json')]
|
||||
except OSError as list_error:
|
||||
self.logger.error("Error listing cache directory %s: %s", self.cache_dir, list_error, exc_info=True)
|
||||
stats['errors'] += 1
|
||||
return stats
|
||||
|
||||
# Process files outside the lock to avoid blocking get/set operations
|
||||
for filename in filenames:
|
||||
stats['files_scanned'] += 1
|
||||
file_path = os.path.join(self.cache_dir, filename)
|
||||
|
||||
try:
|
||||
# Get file age (outside lock - stat operations are generally atomic)
|
||||
file_mtime = os.path.getmtime(file_path)
|
||||
file_age_days = (current_time - file_mtime) / 86400 # Convert to days
|
||||
|
||||
# Extract cache key from filename (remove .json extension)
|
||||
cache_key = filename[:-5]
|
||||
|
||||
# Determine data type and retention policy
|
||||
data_type = cache_strategy.get_data_type_from_key(cache_key)
|
||||
retention_days = retention_policies.get(data_type, retention_policies.get('default', 30))
|
||||
|
||||
# Delete if older than retention period
|
||||
# Only hold lock during actual file deletion to ensure atomicity
|
||||
if file_age_days > retention_days:
|
||||
try:
|
||||
# Hold lock only during delete operation (get size and remove atomically)
|
||||
with self._lock:
|
||||
# Double-check file still exists (may have been deleted by another process)
|
||||
if os.path.exists(file_path):
|
||||
try:
|
||||
file_size = os.path.getsize(file_path)
|
||||
os.remove(file_path)
|
||||
# Only increment stats if removal succeeded
|
||||
stats['files_deleted'] += 1
|
||||
stats['space_freed_bytes'] += file_size
|
||||
self.logger.debug(
|
||||
"Deleted expired cache file: %s (age: %.1f days, type: %s, retention: %d days)",
|
||||
filename, file_age_days, data_type, retention_days
|
||||
)
|
||||
except FileNotFoundError:
|
||||
# File was deleted by another process between exists check and remove
|
||||
# This is a benign race condition, silently continue
|
||||
pass
|
||||
else:
|
||||
# File was deleted by another process before lock was acquired
|
||||
# This is a benign race condition, silently continue
|
||||
pass
|
||||
except FileNotFoundError:
|
||||
# File was already deleted by another process, skip it
|
||||
# This is a benign race condition, silently continue
|
||||
continue
|
||||
except OSError as e:
|
||||
# Other file system errors, log but don't fail the entire cleanup
|
||||
stats['errors'] += 1
|
||||
self.logger.warning("Error deleting cache file %s: %s", filename, e)
|
||||
continue
|
||||
|
||||
except FileNotFoundError:
|
||||
# File was deleted by another process between listing and processing
|
||||
# This is a benign race condition, silently continue
|
||||
continue
|
||||
except OSError as e:
|
||||
stats['errors'] += 1
|
||||
self.logger.warning("Error processing cache file %s: %s", filename, e)
|
||||
continue
|
||||
except Exception as e:
|
||||
stats['errors'] += 1
|
||||
self.logger.error("Unexpected error processing cache file %s: %s", filename, e, exc_info=True)
|
||||
continue
|
||||
|
||||
except OSError as e:
|
||||
self.logger.error("Error listing cache directory %s: %s", self.cache_dir, e, exc_info=True)
|
||||
stats['errors'] += 1
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
Reference in New Issue
Block a user