import json import os import time from datetime import datetime import pytz from typing import Any, Dict, Optional import logging import stat import threading import tempfile from pathlib import Path class DateTimeEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, datetime): return obj.isoformat() return super().default(obj) class CacheManager: """Manages caching of API responses to reduce API calls.""" def __init__(self): # Initialize logger first self.logger = logging.getLogger(__name__) # Determine the most reliable writable directory self.cache_dir = self._get_writable_cache_dir() if self.cache_dir: self.logger.info(f"Using cache directory: {self.cache_dir}") else: # This is a critical failure, as caching is essential. self.logger.error("Could not find or create a writable cache directory. Caching will be disabled.") self.cache_dir = None self._memory_cache = {} # In-memory cache for faster access self._memory_cache_timestamps = {} self._cache_lock = threading.Lock() # Initialize config manager for sport-specific intervals try: from src.config_manager import ConfigManager self.config_manager = ConfigManager() self.config_manager.load_config() except ImportError: self.config_manager = None self.logger.warning("ConfigManager not available, using default cache intervals") def _get_writable_cache_dir(self) -> Optional[str]: """Tries to find or create a writable cache directory, preferring a system path when available.""" # Attempt 1: System-wide persistent cache directory (preferred for services) try: system_cache_dir = '/var/cache/ledmatrix' if os.path.exists(system_cache_dir): test_file = os.path.join(system_cache_dir, '.writetest') try: with open(test_file, 'w') as f: f.write('test') os.remove(test_file) return system_cache_dir except (IOError, OSError): self.logger.warning(f"Directory exists but is not writable: {system_cache_dir}") else: os.makedirs(system_cache_dir, exist_ok=True) if os.access(system_cache_dir, os.W_OK): return system_cache_dir except Exception as e: self.logger.warning(f"Could not use /var/cache/ledmatrix: {e}") # Attempt 2: User's home directory (handling sudo), but avoid /root preference try: real_user = os.environ.get('SUDO_USER') or os.environ.get('USER', 'default') if real_user and real_user != 'root': home_dir = os.path.expanduser(f"~{real_user}") else: # When running as root and /var/cache/ledmatrix failed, still allow fallback to /root home_dir = os.path.expanduser('~') user_cache_dir = os.path.join(home_dir, '.ledmatrix_cache') os.makedirs(user_cache_dir, exist_ok=True) test_file = os.path.join(user_cache_dir, '.writetest') with open(test_file, 'w') as f: f.write('test') os.remove(test_file) return user_cache_dir except Exception as e: self.logger.warning(f"Could not use user-specific cache directory: {e}") # Attempt 3: /opt/ledmatrix/cache (alternative persistent location) try: opt_cache_dir = '/opt/ledmatrix/cache' # Check if directory exists and we can write to it if os.path.exists(opt_cache_dir): # Test if we can write to the existing directory test_file = os.path.join(opt_cache_dir, '.writetest') try: with open(test_file, 'w') as f: f.write('test') os.remove(test_file) return opt_cache_dir except (IOError, OSError): self.logger.warning(f"Directory exists but is not writable: {opt_cache_dir}") else: # Try to create the directory os.makedirs(opt_cache_dir, exist_ok=True) if os.access(opt_cache_dir, os.W_OK): return opt_cache_dir except Exception as e: self.logger.warning(f"Could not use /opt/ledmatrix/cache: {e}") # Attempt 4: System-wide temporary directory (fallback, not persistent) try: temp_cache_dir = os.path.join(tempfile.gettempdir(), 'ledmatrix_cache') os.makedirs(temp_cache_dir, exist_ok=True) if os.access(temp_cache_dir, os.W_OK): self.logger.warning("Using temporary cache directory - cache will NOT persist across restarts") return temp_cache_dir except Exception as e: self.logger.warning(f"Could not use system-wide temporary cache directory: {e}") # Return None if no directory is writable return None def _ensure_cache_dir(self): """This method is deprecated and no longer needed.""" pass def _get_cache_path(self, key: str) -> Optional[str]: """Get the path for a cache file.""" if not self.cache_dir: return None return os.path.join(self.cache_dir, f"{key}.json") def get_cached_data(self, key: str, max_age: int = 300, memory_ttl: Optional[int] = None) -> Optional[Dict]: """Get data from cache (memory first, then disk) honoring TTLs. - memory_ttl: TTL for in-memory entry; defaults to max_age if not provided - max_age: TTL for persisted (on-disk) entry based on the stored timestamp """ now = time.time() in_memory_ttl = memory_ttl if memory_ttl is not None else max_age # 1) Memory cache if key in self._memory_cache: timestamp = self._memory_cache_timestamps.get(key) if isinstance(timestamp, str): try: timestamp = float(timestamp) except ValueError: self.logger.error(f"Invalid timestamp format for key {key}: {timestamp}") timestamp = None if timestamp is not None and (now - float(timestamp) <= in_memory_ttl): return self._memory_cache[key] # Expired memory entry → evict and fall through to disk self._memory_cache.pop(key, None) self._memory_cache_timestamps.pop(key, None) # 2) Disk cache cache_path = self._get_cache_path(key) if cache_path and os.path.exists(cache_path): try: with self._cache_lock: with open(cache_path, 'r') as f: record = json.load(f) # Determine record timestamp (prefer embedded, else file mtime) record_ts = None if isinstance(record, dict): record_ts = record.get('timestamp') if record_ts is None: try: record_ts = os.path.getmtime(cache_path) except OSError: record_ts = None if record_ts is not None: try: record_ts = float(record_ts) except (TypeError, ValueError): record_ts = None if record_ts is None or (now - record_ts) <= max_age: # Hydrate memory cache (use current time to start memory TTL window) self._memory_cache[key] = record self._memory_cache_timestamps[key] = now return record else: # Stale on disk; keep file for potential diagnostics but treat as miss return None except json.JSONDecodeError as e: self.logger.error(f"Error parsing cache file for {key}: {e}") # If the file is corrupted, remove it try: os.remove(cache_path) except OSError: pass return None except Exception as e: self.logger.error(f"Error loading cache for {key}: {e}") return None # 3) Miss return None def save_cache(self, key: str, data: Dict) -> None: """ Save data to cache. Args: key: Cache key data: Data to cache """ try: # Update memory cache first self._memory_cache[key] = data self._memory_cache_timestamps[key] = time.time() # Save to file if a cache directory is available cache_path = self._get_cache_path(key) if cache_path: # Atomic write to avoid partial/corrupt files with self._cache_lock: tmp_dir = os.path.dirname(cache_path) try: fd, tmp_path = tempfile.mkstemp(prefix=f".{os.path.basename(cache_path)}.", dir=tmp_dir) try: with os.fdopen(fd, 'w') as tmp_file: json.dump(data, tmp_file, indent=4, cls=DateTimeEncoder) tmp_file.flush() os.fsync(tmp_file.fileno()) os.replace(tmp_path, cache_path) finally: if os.path.exists(tmp_path): try: os.remove(tmp_path) except OSError: pass except Exception as e: self.logger.error(f"Atomic write failed for key '{key}': {e}") # Attempt one-time fallback write directly into /var/cache/ledmatrix if available try: fallback_dir = '/var/cache/ledmatrix' if os.path.isdir(fallback_dir) and os.access(fallback_dir, os.W_OK): fallback_path = os.path.join(fallback_dir, os.path.basename(cache_path)) with open(fallback_path, 'w') as tmp_file: json.dump(data, tmp_file, indent=4, cls=DateTimeEncoder) self.logger.warning(f"Cache wrote to fallback location: {fallback_path}") except Exception as e2: self.logger.error(f"Fallback cache write also failed: {e2}") except (IOError, OSError) as e: self.logger.error(f"Failed to save cache for key '{key}': {e}") except Exception as e: self.logger.error(f"An unexpected error occurred while saving cache for key '{key}': {e}") def load_cache(self, key: str) -> Optional[Dict[str, Any]]: """Load data from cache with memory caching.""" current_time = time.time() # Check memory cache first if key in self._memory_cache: if current_time - self._memory_cache_timestamps.get(key, 0) < 60: # 1 minute TTL return self._memory_cache[key] else: # Clear expired memory cache if key in self._memory_cache: del self._memory_cache[key] if key in self._memory_cache_timestamps: del self._memory_cache_timestamps[key] cache_path = self._get_cache_path(key) if not cache_path or not os.path.exists(cache_path): return None try: with self._cache_lock: with open(cache_path, 'r') as f: try: data = json.load(f) # Update memory cache self._memory_cache[key] = data self._memory_cache_timestamps[key] = current_time return data except json.JSONDecodeError as e: self.logger.error(f"Error parsing cache file for {key}: {e}") # If the file is corrupted, remove it os.remove(cache_path) return None except Exception as e: self.logger.error(f"Error loading cache for {key}: {e}") return None def clear_cache(self, key: Optional[str] = None) -> None: """Clear cache for a specific key or all keys.""" with self._cache_lock: if key: # Clear specific key if key in self._memory_cache: del self._memory_cache[key] del self._memory_cache_timestamps[key] cache_path = self._get_cache_path(key) if cache_path and os.path.exists(cache_path): os.remove(cache_path) else: # Clear all keys self._memory_cache.clear() self._memory_cache_timestamps.clear() if self.cache_dir: for file in os.listdir(self.cache_dir): if file.endswith('.json'): os.remove(os.path.join(self.cache_dir, file)) def has_data_changed(self, data_type: str, new_data: Dict[str, Any]) -> bool: """Check if data has changed from cached version.""" cached_data = self.load_cache(data_type) if not cached_data: return True if data_type == 'weather': return self._has_weather_changed(cached_data, new_data) elif data_type == 'stocks': return self._has_stocks_changed(cached_data, new_data) elif data_type == 'stock_news': return self._has_news_changed(cached_data, new_data) elif data_type == 'nhl': return self._has_nhl_changed(cached_data, new_data) elif data_type == 'mlb': return self._has_mlb_changed(cached_data, new_data) return True def _has_weather_changed(self, cached: Dict[str, Any], new: Dict[str, Any]) -> bool: """Check if weather data has changed.""" # Handle new cache structure where data is nested under 'data' key if 'data' in cached: cached = cached['data'] # Handle case where cached data might be the weather data directly if 'current' in cached: # This is the new structure with 'current' and 'forecast' keys current_weather = cached.get('current', {}) if current_weather and 'main' in current_weather and 'weather' in current_weather: cached_temp = round(current_weather['main']['temp']) cached_condition = current_weather['weather'][0]['main'] return (cached_temp != new.get('temp') or cached_condition != new.get('condition')) # Handle old structure where temp and condition are directly accessible return (cached.get('temp') != new.get('temp') or cached.get('condition') != new.get('condition')) def _has_stocks_changed(self, cached: Dict[str, Any], new: Dict[str, Any]) -> bool: """Check if stock data has changed.""" if not self._is_market_open(): return False return cached.get('price') != new.get('price') def _has_news_changed(self, cached: Dict[str, Any], new: Dict[str, Any]) -> bool: """Check if news data has changed.""" # Handle both dictionary and list formats if isinstance(new, list): # If new data is a list, cached data should also be a list if not isinstance(cached, list): return True # Compare lengths and content if len(cached) != len(new): return True # Compare titles since they're unique enough for our purposes cached_titles = set(item.get('title', '') for item in cached) new_titles = set(item.get('title', '') for item in new) return cached_titles != new_titles else: # Original dictionary format handling cached_headlines = set(h.get('id') for h in cached.get('headlines', [])) new_headlines = set(h.get('id') for h in new.get('headlines', [])) return not cached_headlines.issuperset(new_headlines) def _has_nhl_changed(self, cached: Dict[str, Any], new: Dict[str, Any]) -> bool: """Check if NHL data has changed.""" return (cached.get('game_status') != new.get('game_status') or cached.get('score') != new.get('score')) def _has_mlb_changed(self, cached: Dict[str, Any], new: Dict[str, Any]) -> bool: """Check if MLB game data has changed.""" if not cached or not new: return True # Check if any games have changed status or score for game_id, new_game in new.items(): cached_game = cached.get(game_id) if not cached_game: return True # Check for score changes if (new_game['away_score'] != cached_game['away_score'] or new_game['home_score'] != cached_game['home_score']): return True # Check for status changes if new_game['status'] != cached_game['status']: return True # For live games, check inning and count if new_game['status'] == 'in': if (new_game['inning'] != cached_game['inning'] or new_game['inning_half'] != cached_game['inning_half'] or new_game['balls'] != cached_game['balls'] or new_game['strikes'] != cached_game['strikes'] or new_game['bases_occupied'] != cached_game['bases_occupied']): return True return False def _is_market_open(self) -> bool: """Check if the US stock market is currently open.""" et_tz = pytz.timezone('America/New_York') now = datetime.now(et_tz) # Check if it's a weekday if now.weekday() >= 5: # 5 = Saturday, 6 = Sunday return False # Convert current time to ET current_time = now.time() market_open = datetime.strptime('09:30', '%H:%M').time() market_close = datetime.strptime('16:00', '%H:%M').time() return market_open <= current_time <= market_close def update_cache(self, data_type: str, data: Dict[str, Any]) -> bool: """Update cache with new data.""" cache_data = { 'data': data, 'timestamp': time.time() } return self.save_cache(data_type, cache_data) def get(self, key: str, max_age: int = 300) -> Optional[Dict]: """Get data from cache if it exists and is not stale.""" cached_data = self.get_cached_data(key, max_age) if cached_data and 'data' in cached_data: return cached_data['data'] return cached_data def set(self, key: str, data: Dict) -> None: """Store data in cache with current timestamp.""" cache_data = { 'data': data, 'timestamp': time.time() } self.save_cache(key, cache_data) def setup_persistent_cache(self) -> bool: """ Set up a persistent cache directory with proper permissions. This should be run once with sudo to create the directory. """ try: # Try to create /var/cache/ledmatrix with proper permissions cache_dir = '/var/cache/ledmatrix' os.makedirs(cache_dir, exist_ok=True) # Set ownership to the real user (not root) real_user = os.environ.get('SUDO_USER') if real_user: import pwd try: uid = pwd.getpwnam(real_user).pw_uid gid = pwd.getpwnam(real_user).pw_gid os.chown(cache_dir, uid, gid) self.logger.info(f"Set ownership of {cache_dir} to {real_user}") except Exception as e: self.logger.warning(f"Could not set ownership: {e}") # Set permissions to 755 (rwxr-xr-x) os.chmod(cache_dir, 0o755) self.logger.info(f"Successfully set up persistent cache directory: {cache_dir}") return True except Exception as e: self.logger.error(f"Failed to set up persistent cache directory: {e}") return False def get_sport_live_interval(self, sport_key: str) -> int: """ Get the live_update_interval for a specific sport from config. Falls back to default values if config is not available. """ if not self.config_manager: # Default intervals - all sports use 60 seconds as default default_intervals = { 'soccer': 60, # Soccer default 'nfl': 60, # NFL default 'nhl': 60, # NHL default 'nba': 60, # NBA default 'mlb': 60, # MLB default 'milb': 60, # Minor league default 'ncaa_fb': 60, # College football default 'ncaa_baseball': 60, # College baseball default 'ncaam_basketball': 60, # College basketball default } return default_intervals.get(sport_key, 60) try: config = self.config_manager.config # For MiLB, look for "milb" config instead of "milb_scoreboard" if sport_key == 'milb': sport_config = config.get("milb", {}) else: sport_config = config.get(f"{sport_key}_scoreboard", {}) return sport_config.get("live_update_interval", 60) # Default to 60 seconds except Exception as e: self.logger.warning(f"Could not get live_update_interval for {sport_key}: {e}") return 60 # Default to 60 seconds def get_cache_strategy(self, data_type: str, sport_key: str = None) -> Dict[str, Any]: """ Get cache strategy for different data types. Now respects sport-specific live_update_interval configurations. """ # Get sport-specific live interval if provided live_interval = None if sport_key and data_type in ['sports_live', 'live_scores']: live_interval = self.get_sport_live_interval(sport_key) # Try to read sport-specific config for recent/upcoming recent_interval = None upcoming_interval = None if self.config_manager and sport_key: try: if sport_key == 'milb': sport_cfg = self.config_manager.config.get('milb', {}) else: sport_cfg = self.config_manager.config.get(f"{sport_key}_scoreboard", {}) recent_interval = sport_cfg.get('recent_update_interval') upcoming_interval = sport_cfg.get('upcoming_update_interval') except Exception as e: self.logger.debug(f"Could not read sport-specific recent/upcoming intervals for {sport_key}: {e}") strategies = { # Ultra time-sensitive data (live scores, current weather) 'live_scores': { 'max_age': live_interval or 15, # Use sport-specific interval 'memory_ttl': (live_interval or 15) * 2, # 2x for memory cache 'force_refresh': True }, 'sports_live': { 'max_age': live_interval or 30, # Use sport-specific interval 'memory_ttl': (live_interval or 30) * 2, 'force_refresh': True }, 'weather_current': { 'max_age': 300, # 5 minutes 'memory_ttl': 600, 'force_refresh': False }, # Market data (stocks, crypto) 'stocks': { 'max_age': 600, # 10 minutes 'memory_ttl': 1200, 'market_hours_only': True, 'force_refresh': False }, 'crypto': { 'max_age': 300, # 5 minutes (crypto trades 24/7) 'memory_ttl': 600, 'force_refresh': False }, # Sports data 'sports_recent': { 'max_age': recent_interval or 1800, # 30 minutes default; override by config 'memory_ttl': (recent_interval or 1800) * 2, 'force_refresh': False }, 'sports_upcoming': { 'max_age': upcoming_interval or 10800, # 3 hours default; override by config 'memory_ttl': (upcoming_interval or 10800) * 2, 'force_refresh': False }, 'sports_schedules': { 'max_age': 86400, # 24 hours 'memory_ttl': 172800, 'force_refresh': False }, 'leaderboard': { 'max_age': 604800, # 7 days (1 week) - football rankings updated weekly 'memory_ttl': 1209600, # 14 days in memory 'force_refresh': False }, # News and odds 'news': { 'max_age': 3600, # 1 hour 'memory_ttl': 7200, 'force_refresh': False }, 'odds': { 'max_age': 1800, # 30 minutes for upcoming games 'memory_ttl': 3600, 'force_refresh': False }, 'odds_live': { 'max_age': 120, # 2 minutes for live games (odds change rapidly) 'memory_ttl': 240, 'force_refresh': False }, # Static/stable data 'team_info': { 'max_age': 604800, # 1 week 'memory_ttl': 1209600, 'force_refresh': False }, 'logos': { 'max_age': 2592000, # 30 days 'memory_ttl': 5184000, 'force_refresh': False }, # Default fallback 'default': { 'max_age': 300, # 5 minutes 'memory_ttl': 600, 'force_refresh': False } } return strategies.get(data_type, strategies['default']) def get_data_type_from_key(self, key: str) -> str: """ Determine the appropriate cache strategy based on the cache key. This helps automatically select the right cache duration. """ key_lower = key.lower() # Live sports data if any(x in key_lower for x in ['live', 'current', 'scoreboard']): if 'soccer' in key_lower: return 'sports_live' # Soccer live data is very time-sensitive return 'sports_live' # Weather data if 'weather' in key_lower: return 'weather_current' # Market data if 'stock' in key_lower or 'crypto' in key_lower: if 'crypto' in key_lower: return 'crypto' return 'stocks' # News data if 'news' in key_lower: return 'news' # Odds data - differentiate between live and upcoming games if 'odds' in key_lower: # For live games, use shorter cache; for upcoming games, use longer cache if any(x in key_lower for x in ['live', 'current']): return 'odds_live' # Live odds change more frequently return 'odds' # Regular odds for upcoming games # Sports schedules and team info if any(x in key_lower for x in ['schedule', 'team_map', 'league']): return 'sports_schedules' # Recent games (last few hours) if 'recent' in key_lower: return 'sports_recent' # Upcoming games if 'upcoming' in key_lower: return 'sports_upcoming' # Static data like logos, team info if any(x in key_lower for x in ['logo', 'team_info', 'config']): return 'team_info' # Default fallback return 'default' def get_sport_key_from_cache_key(self, key: str) -> Optional[str]: """ Extract sport key from cache key to determine appropriate live_update_interval. """ key_lower = key.lower() # Map cache key patterns to sport keys sport_patterns = { 'nfl': ['nfl'], 'nba': ['nba', 'basketball'], 'mlb': ['mlb', 'baseball'], 'nhl': ['nhl', 'hockey'], 'soccer': ['soccer'], 'ncaa_fb': ['ncaa_fb', 'ncaafb', 'college_football'], 'ncaa_baseball': ['ncaa_baseball', 'college_baseball'], 'ncaam_basketball': ['ncaam_basketball', 'college_basketball'], 'milb': ['milb', 'minor_league'], } for sport_key, patterns in sport_patterns.items(): if any(pattern in key_lower for pattern in patterns): return sport_key return None def get_cached_data_with_strategy(self, key: str, data_type: str = 'default') -> Optional[Dict]: """ Get data from cache using data-type-specific strategy. Now respects sport-specific live_update_interval configurations. """ # Extract sport key for live sports data sport_key = None if data_type in ['sports_live', 'live_scores']: sport_key = self.get_sport_key_from_cache_key(key) strategy = self.get_cache_strategy(data_type, sport_key) max_age = strategy['max_age'] memory_ttl = strategy.get('memory_ttl', max_age) # For market data, check if market is open if strategy.get('market_hours_only', False) and not self._is_market_open(): # During off-hours, extend cache duration max_age *= 4 # 4x longer cache during off-hours record = self.get_cached_data(key, max_age, memory_ttl) # Unwrap if stored in { 'data': ..., 'timestamp': ... } if isinstance(record, dict) and 'data' in record: return record['data'] return record def get_with_auto_strategy(self, key: str) -> Optional[Dict]: """ Get cached data using automatically determined strategy. Now respects sport-specific live_update_interval configurations. """ data_type = self.get_data_type_from_key(key) return self.get_cached_data_with_strategy(key, data_type)