mirror of
https://github.com/ChuckBuilds/LEDMatrix.git
synced 2026-04-10 13:02:59 +00:00
feat: add error detection, monitoring, and code quality improvements (#223)
* feat: add error detection, monitoring, and code quality improvements This comprehensive update addresses automatic error detection, code quality, and plugin development experience: ## Error Detection & Monitoring - Add ErrorAggregator service for centralized error tracking - Add pattern detection for recurring errors (5+ in 60 min) - Add error dashboard API endpoints (/api/v3/errors/*) - Integrate error recording into plugin executor ## Code Quality - Remove 10 silent `except: pass` blocks in sports.py and football.py - Remove hardcoded debug log paths - Add pre-commit hooks to prevent future bare except clauses ## Validation & Type Safety - Add warnings when plugins lack config_schema.json - Add config key collision detection for plugins - Improve type coercion logging in BasePlugin ## Testing - Add test_config_validation_edge_cases.py - Add test_plugin_loading_failures.py - Add test_error_aggregator.py ## Documentation - Add PLUGIN_ERROR_HANDLING.md guide - Add CONFIG_DEBUGGING.md guide Note: GitHub Actions CI workflow is available in the plan but requires workflow scope to push. Add .github/workflows/ci.yml manually. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: address code review issues - Fix GitHub issues URL in CONFIG_DEBUGGING.md - Use RLock in error_aggregator.py to prevent deadlock in export_to_file - Distinguish missing vs invalid schema files in plugin_manager.py - Add assertions to test_null_value_for_required_field test - Remove unused initial_count variable in test_plugin_load_error_recorded - Add validation for max_age_hours in clear_old_errors API endpoint Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Chuck <chuck@example.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -387,43 +387,8 @@ class FootballLive(Football, SportsLive):
|
||||
main_img = main_img.convert('RGB') # Convert for display
|
||||
|
||||
# Display the final image
|
||||
# #region agent log
|
||||
import json
|
||||
import time
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "C",
|
||||
"location": "football.py:390",
|
||||
"message": "About to update display",
|
||||
"data": {
|
||||
"force_clear": force_clear,
|
||||
"game": game.get('away_abbr', '') + "@" + game.get('home_abbr', '')
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
self.display_manager.image.paste(main_img, (0, 0))
|
||||
self.display_manager.update_display() # Update display here for live
|
||||
# #region agent log
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "C",
|
||||
"location": "football.py:392",
|
||||
"message": "After update display",
|
||||
"data": {
|
||||
"force_clear": force_clear
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error displaying live Football game: {e}", exc_info=True) # Changed log prefix
|
||||
|
||||
@@ -207,25 +207,6 @@ class SportsCore(ABC):
|
||||
|
||||
def display(self, force_clear: bool = False) -> bool:
|
||||
"""Common display method for all NCAA FB managers""" # Updated docstring
|
||||
# #region agent log
|
||||
import json
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "D",
|
||||
"location": "sports.py:208",
|
||||
"message": "Display called",
|
||||
"data": {
|
||||
"force_clear": force_clear,
|
||||
"has_current_game": self.current_game is not None,
|
||||
"current_game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
if not self.is_enabled: # Check if module is enabled
|
||||
return False
|
||||
|
||||
@@ -248,40 +229,7 @@ class SportsCore(ABC):
|
||||
return False
|
||||
|
||||
try:
|
||||
# #region agent log
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "D",
|
||||
"location": "sports.py:232",
|
||||
"message": "About to draw scorebug",
|
||||
"data": {
|
||||
"force_clear": force_clear,
|
||||
"game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
self._draw_scorebug_layout(self.current_game, force_clear)
|
||||
# #region agent log
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "D",
|
||||
"location": "sports.py:235",
|
||||
"message": "After draw scorebug",
|
||||
"data": {
|
||||
"force_clear": force_clear
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
# display_manager.update_display() should be called within subclass draw methods
|
||||
# or after calling display() in the main loop. Let's keep it out of the base display.
|
||||
return True
|
||||
@@ -1443,48 +1391,9 @@ class SportsLive(SportsCore):
|
||||
self.live_games = sorted(new_live_games, key=lambda g: g.get('start_time_utc') or datetime.now(timezone.utc)) # Sort by start time
|
||||
# Reset index if current game is gone or list is new
|
||||
if not self.current_game or self.current_game['id'] not in new_game_ids:
|
||||
# #region agent log
|
||||
import json
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "B",
|
||||
"location": "sports.py:1393",
|
||||
"message": "Games loaded - resetting index and last_game_switch",
|
||||
"data": {
|
||||
"current_game_before": self.current_game['id'] if self.current_game else None,
|
||||
"live_games_count": len(self.live_games),
|
||||
"last_game_switch_before": self.last_game_switch,
|
||||
"current_time": current_time,
|
||||
"time_since_init": current_time - self.last_game_switch if self.last_game_switch > 0 else None
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
self.current_game_index = 0
|
||||
self.current_game = self.live_games[0] if self.live_games else None
|
||||
self.last_game_switch = current_time
|
||||
# #region agent log
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "B",
|
||||
"location": "sports.py:1396",
|
||||
"message": "Games loaded - after setting last_game_switch",
|
||||
"data": {
|
||||
"current_game_after": self.current_game['id'] if self.current_game else None,
|
||||
"last_game_switch_after": self.last_game_switch,
|
||||
"first_game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
else:
|
||||
# Find current game's new index if it still exists
|
||||
try:
|
||||
@@ -1530,70 +1439,9 @@ class SportsLive(SportsCore):
|
||||
# Handle game switching (outside test mode check)
|
||||
# Fix: Don't check for switching if last_game_switch is still 0 (games haven't been loaded yet)
|
||||
# This prevents immediate switching when the system has been running for a while before games load
|
||||
# #region agent log
|
||||
import json
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "A",
|
||||
"location": "sports.py:1432",
|
||||
"message": "Game switch check - before condition",
|
||||
"data": {
|
||||
"test_mode": self.test_mode,
|
||||
"live_games_count": len(self.live_games),
|
||||
"current_time": current_time,
|
||||
"last_game_switch": self.last_game_switch,
|
||||
"time_since_switch": current_time - self.last_game_switch,
|
||||
"game_display_duration": self.game_display_duration,
|
||||
"current_game_index": self.current_game_index,
|
||||
"will_switch": not self.test_mode and len(self.live_games) > 1 and self.last_game_switch > 0 and (current_time - self.last_game_switch) >= self.game_display_duration
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
if not self.test_mode and len(self.live_games) > 1 and self.last_game_switch > 0 and (current_time - self.last_game_switch) >= self.game_display_duration:
|
||||
# #region agent log
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "A",
|
||||
"location": "sports.py:1433",
|
||||
"message": "Game switch triggered",
|
||||
"data": {
|
||||
"old_index": self.current_game_index,
|
||||
"old_game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None,
|
||||
"time_since_switch": current_time - self.last_game_switch,
|
||||
"last_game_switch_before": self.last_game_switch
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
self.current_game_index = (self.current_game_index + 1) % len(self.live_games)
|
||||
self.current_game = self.live_games[self.current_game_index]
|
||||
self.last_game_switch = current_time
|
||||
# #region agent log
|
||||
try:
|
||||
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
|
||||
f.write(json.dumps({
|
||||
"sessionId": "debug-session",
|
||||
"runId": "run1",
|
||||
"hypothesisId": "A",
|
||||
"location": "sports.py:1436",
|
||||
"message": "Game switch completed",
|
||||
"data": {
|
||||
"new_index": self.current_game_index,
|
||||
"new_game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None,
|
||||
"last_game_switch_after": self.last_game_switch
|
||||
},
|
||||
"timestamp": int(time.time() * 1000)
|
||||
}) + "\n")
|
||||
except: pass
|
||||
# #endregion
|
||||
self.logger.info(f"Switched live view to: {self.current_game['away_abbr']}@{self.current_game['home_abbr']}") # Changed log prefix
|
||||
# Force display update via flag or direct call if needed, but usually let main loop handle
|
||||
|
||||
418
src/error_aggregator.py
Normal file
418
src/error_aggregator.py
Normal file
@@ -0,0 +1,418 @@
|
||||
"""
|
||||
Error Aggregation Service
|
||||
|
||||
Provides centralized error tracking, pattern detection, and reporting
|
||||
for the LEDMatrix system. Enables automatic bug detection by tracking
|
||||
error frequency, patterns, and context.
|
||||
|
||||
This is a local-only implementation with no external dependencies.
|
||||
Errors are stored in memory with optional JSON export.
|
||||
"""
|
||||
|
||||
import threading
|
||||
import traceback
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Any, Callable
|
||||
import logging
|
||||
|
||||
from src.exceptions import LEDMatrixError
|
||||
|
||||
|
||||
@dataclass
|
||||
class ErrorRecord:
|
||||
"""Record of a single error occurrence."""
|
||||
error_type: str
|
||||
message: str
|
||||
timestamp: datetime
|
||||
context: Dict[str, Any] = field(default_factory=dict)
|
||||
plugin_id: Optional[str] = None
|
||||
operation: Optional[str] = None
|
||||
stack_trace: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"error_type": self.error_type,
|
||||
"message": self.message,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"context": self.context,
|
||||
"plugin_id": self.plugin_id,
|
||||
"operation": self.operation,
|
||||
"stack_trace": self.stack_trace
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ErrorPattern:
|
||||
"""Detected error pattern for automatic detection."""
|
||||
error_type: str
|
||||
count: int
|
||||
first_seen: datetime
|
||||
last_seen: datetime
|
||||
affected_plugins: List[str] = field(default_factory=list)
|
||||
sample_messages: List[str] = field(default_factory=list)
|
||||
severity: str = "warning" # warning, error, critical
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
"error_type": self.error_type,
|
||||
"count": self.count,
|
||||
"first_seen": self.first_seen.isoformat(),
|
||||
"last_seen": self.last_seen.isoformat(),
|
||||
"affected_plugins": list(set(self.affected_plugins)),
|
||||
"sample_messages": self.sample_messages[:3], # Keep only 3 samples
|
||||
"severity": self.severity
|
||||
}
|
||||
|
||||
|
||||
class ErrorAggregator:
|
||||
"""
|
||||
Aggregates and analyzes errors across the system.
|
||||
|
||||
Features:
|
||||
- Error counting by type, plugin, and time window
|
||||
- Pattern detection (recurring errors)
|
||||
- Error rate alerting via callbacks
|
||||
- Export for analytics/reporting
|
||||
|
||||
Thread-safe for concurrent access.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_records: int = 1000,
|
||||
pattern_threshold: int = 5,
|
||||
pattern_window_minutes: int = 60,
|
||||
export_path: Optional[Path] = None
|
||||
):
|
||||
"""
|
||||
Initialize the error aggregator.
|
||||
|
||||
Args:
|
||||
max_records: Maximum number of error records to keep in memory
|
||||
pattern_threshold: Number of occurrences to detect a pattern
|
||||
pattern_window_minutes: Time window for pattern detection
|
||||
export_path: Optional path for JSON export (auto-export on pattern detection)
|
||||
"""
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.max_records = max_records
|
||||
self.pattern_threshold = pattern_threshold
|
||||
self.pattern_window = timedelta(minutes=pattern_window_minutes)
|
||||
self.export_path = export_path
|
||||
|
||||
self._records: List[ErrorRecord] = []
|
||||
self._error_counts: Dict[str, int] = defaultdict(int)
|
||||
self._plugin_error_counts: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
|
||||
self._patterns: Dict[str, ErrorPattern] = {}
|
||||
self._pattern_callbacks: List[Callable[[ErrorPattern], None]] = []
|
||||
self._lock = threading.RLock() # RLock allows nested acquisition for export_to_file
|
||||
|
||||
# Track session start for relative timing
|
||||
self._session_start = datetime.now()
|
||||
|
||||
def record_error(
|
||||
self,
|
||||
error: Exception,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
plugin_id: Optional[str] = None,
|
||||
operation: Optional[str] = None
|
||||
) -> ErrorRecord:
|
||||
"""
|
||||
Record an error occurrence.
|
||||
|
||||
Args:
|
||||
error: The exception that occurred
|
||||
context: Optional context dictionary with additional details
|
||||
plugin_id: Optional plugin ID that caused the error
|
||||
operation: Optional operation name (e.g., "update", "display")
|
||||
|
||||
Returns:
|
||||
The created ErrorRecord
|
||||
"""
|
||||
with self._lock:
|
||||
error_type = type(error).__name__
|
||||
|
||||
# Extract additional context from LEDMatrixError subclasses
|
||||
error_context = context or {}
|
||||
if isinstance(error, LEDMatrixError) and error.context:
|
||||
error_context.update(error.context)
|
||||
|
||||
record = ErrorRecord(
|
||||
error_type=error_type,
|
||||
message=str(error),
|
||||
timestamp=datetime.now(),
|
||||
context=error_context,
|
||||
plugin_id=plugin_id,
|
||||
operation=operation,
|
||||
stack_trace=traceback.format_exc()
|
||||
)
|
||||
|
||||
# Add record (with size limit)
|
||||
self._records.append(record)
|
||||
if len(self._records) > self.max_records:
|
||||
self._records.pop(0)
|
||||
|
||||
# Update counts
|
||||
self._error_counts[error_type] += 1
|
||||
if plugin_id:
|
||||
self._plugin_error_counts[plugin_id][error_type] += 1
|
||||
|
||||
# Check for patterns
|
||||
self._detect_pattern(record)
|
||||
|
||||
# Log the error
|
||||
self.logger.debug(
|
||||
f"Error recorded: {error_type} - {str(error)[:100]}",
|
||||
extra={"plugin_id": plugin_id, "operation": operation}
|
||||
)
|
||||
|
||||
return record
|
||||
|
||||
def _detect_pattern(self, record: ErrorRecord) -> None:
|
||||
"""Detect recurring error patterns."""
|
||||
cutoff = datetime.now() - self.pattern_window
|
||||
recent_same_type = [
|
||||
r for r in self._records
|
||||
if r.error_type == record.error_type and r.timestamp > cutoff
|
||||
]
|
||||
|
||||
if len(recent_same_type) >= self.pattern_threshold:
|
||||
pattern_key = record.error_type
|
||||
is_new_pattern = pattern_key not in self._patterns
|
||||
|
||||
# Determine severity based on count
|
||||
count = len(recent_same_type)
|
||||
if count > self.pattern_threshold * 3:
|
||||
severity = "critical"
|
||||
elif count > self.pattern_threshold * 2:
|
||||
severity = "error"
|
||||
else:
|
||||
severity = "warning"
|
||||
|
||||
# Collect affected plugins
|
||||
affected_plugins = [r.plugin_id for r in recent_same_type if r.plugin_id]
|
||||
|
||||
# Collect sample messages
|
||||
sample_messages = list(set(r.message for r in recent_same_type[:5]))
|
||||
|
||||
if is_new_pattern:
|
||||
pattern = ErrorPattern(
|
||||
error_type=record.error_type,
|
||||
count=count,
|
||||
first_seen=recent_same_type[0].timestamp,
|
||||
last_seen=record.timestamp,
|
||||
affected_plugins=affected_plugins,
|
||||
sample_messages=sample_messages,
|
||||
severity=severity
|
||||
)
|
||||
self._patterns[pattern_key] = pattern
|
||||
|
||||
self.logger.warning(
|
||||
f"Error pattern detected: {record.error_type} occurred "
|
||||
f"{count} times in last {self.pattern_window}. "
|
||||
f"Affected plugins: {set(affected_plugins) or 'unknown'}"
|
||||
)
|
||||
|
||||
# Notify callbacks
|
||||
for callback in self._pattern_callbacks:
|
||||
try:
|
||||
callback(pattern)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Pattern callback failed: {e}")
|
||||
|
||||
# Auto-export if path configured
|
||||
if self.export_path:
|
||||
self._auto_export()
|
||||
else:
|
||||
# Update existing pattern
|
||||
self._patterns[pattern_key].count = count
|
||||
self._patterns[pattern_key].last_seen = record.timestamp
|
||||
self._patterns[pattern_key].severity = severity
|
||||
self._patterns[pattern_key].affected_plugins.extend(affected_plugins)
|
||||
|
||||
def on_pattern_detected(self, callback: Callable[[ErrorPattern], None]) -> None:
|
||||
"""
|
||||
Register a callback to be called when a new error pattern is detected.
|
||||
|
||||
Args:
|
||||
callback: Function that takes an ErrorPattern as argument
|
||||
"""
|
||||
self._pattern_callbacks.append(callback)
|
||||
|
||||
def get_error_summary(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get summary of all errors for reporting.
|
||||
|
||||
Returns:
|
||||
Dictionary with error statistics and recent errors
|
||||
"""
|
||||
with self._lock:
|
||||
# Calculate error rate (errors per hour)
|
||||
session_duration = (datetime.now() - self._session_start).total_seconds() / 3600
|
||||
error_rate = len(self._records) / max(session_duration, 0.01)
|
||||
|
||||
return {
|
||||
"session_start": self._session_start.isoformat(),
|
||||
"total_errors": len(self._records),
|
||||
"error_rate_per_hour": round(error_rate, 2),
|
||||
"error_counts_by_type": dict(self._error_counts),
|
||||
"plugin_error_counts": {
|
||||
k: dict(v) for k, v in self._plugin_error_counts.items()
|
||||
},
|
||||
"active_patterns": {
|
||||
k: v.to_dict() for k, v in self._patterns.items()
|
||||
},
|
||||
"recent_errors": [
|
||||
r.to_dict() for r in self._records[-20:]
|
||||
]
|
||||
}
|
||||
|
||||
def get_plugin_health(self, plugin_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Get health status for a specific plugin.
|
||||
|
||||
Args:
|
||||
plugin_id: Plugin ID to check
|
||||
|
||||
Returns:
|
||||
Dictionary with plugin error statistics
|
||||
"""
|
||||
with self._lock:
|
||||
plugin_errors = self._plugin_error_counts.get(plugin_id, {})
|
||||
recent_plugin_errors = [
|
||||
r for r in self._records[-100:]
|
||||
if r.plugin_id == plugin_id
|
||||
]
|
||||
|
||||
# Determine health status
|
||||
recent_count = len(recent_plugin_errors)
|
||||
if recent_count == 0:
|
||||
status = "healthy"
|
||||
elif recent_count < 5:
|
||||
status = "degraded"
|
||||
else:
|
||||
status = "unhealthy"
|
||||
|
||||
return {
|
||||
"plugin_id": plugin_id,
|
||||
"status": status,
|
||||
"total_errors": sum(plugin_errors.values()),
|
||||
"error_types": dict(plugin_errors),
|
||||
"recent_error_count": recent_count,
|
||||
"last_error": recent_plugin_errors[-1].to_dict() if recent_plugin_errors else None
|
||||
}
|
||||
|
||||
def clear_old_records(self, max_age_hours: int = 24) -> int:
|
||||
"""
|
||||
Clear records older than specified age.
|
||||
|
||||
Args:
|
||||
max_age_hours: Maximum age in hours
|
||||
|
||||
Returns:
|
||||
Number of records cleared
|
||||
"""
|
||||
with self._lock:
|
||||
cutoff = datetime.now() - timedelta(hours=max_age_hours)
|
||||
original_count = len(self._records)
|
||||
self._records = [r for r in self._records if r.timestamp > cutoff]
|
||||
cleared = original_count - len(self._records)
|
||||
|
||||
if cleared > 0:
|
||||
self.logger.info(f"Cleared {cleared} old error records")
|
||||
|
||||
return cleared
|
||||
|
||||
def export_to_file(self, filepath: Path) -> None:
|
||||
"""
|
||||
Export error data to JSON file.
|
||||
|
||||
Args:
|
||||
filepath: Path to export file
|
||||
"""
|
||||
with self._lock:
|
||||
data = {
|
||||
"exported_at": datetime.now().isoformat(),
|
||||
"summary": self.get_error_summary(),
|
||||
"all_records": [r.to_dict() for r in self._records]
|
||||
}
|
||||
filepath.parent.mkdir(parents=True, exist_ok=True)
|
||||
filepath.write_text(json.dumps(data, indent=2))
|
||||
self.logger.info(f"Exported error data to {filepath}")
|
||||
|
||||
def _auto_export(self) -> None:
|
||||
"""Auto-export on pattern detection (if export_path configured)."""
|
||||
if self.export_path:
|
||||
try:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filepath = self.export_path / f"errors_{timestamp}.json"
|
||||
self.export_to_file(filepath)
|
||||
except Exception as e:
|
||||
self.logger.error(f"Auto-export failed: {e}")
|
||||
|
||||
|
||||
# Global singleton instance
|
||||
_error_aggregator: Optional[ErrorAggregator] = None
|
||||
_aggregator_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_error_aggregator(
|
||||
max_records: int = 1000,
|
||||
pattern_threshold: int = 5,
|
||||
pattern_window_minutes: int = 60,
|
||||
export_path: Optional[Path] = None
|
||||
) -> ErrorAggregator:
|
||||
"""
|
||||
Get or create the global error aggregator instance.
|
||||
|
||||
Args:
|
||||
max_records: Maximum records to keep (only used on first call)
|
||||
pattern_threshold: Pattern detection threshold (only used on first call)
|
||||
pattern_window_minutes: Pattern detection window (only used on first call)
|
||||
export_path: Export path for auto-export (only used on first call)
|
||||
|
||||
Returns:
|
||||
The global ErrorAggregator instance
|
||||
"""
|
||||
global _error_aggregator
|
||||
|
||||
with _aggregator_lock:
|
||||
if _error_aggregator is None:
|
||||
_error_aggregator = ErrorAggregator(
|
||||
max_records=max_records,
|
||||
pattern_threshold=pattern_threshold,
|
||||
pattern_window_minutes=pattern_window_minutes,
|
||||
export_path=export_path
|
||||
)
|
||||
return _error_aggregator
|
||||
|
||||
|
||||
def record_error(
|
||||
error: Exception,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
plugin_id: Optional[str] = None,
|
||||
operation: Optional[str] = None
|
||||
) -> ErrorRecord:
|
||||
"""
|
||||
Convenience function to record an error to the global aggregator.
|
||||
|
||||
Args:
|
||||
error: The exception that occurred
|
||||
context: Optional context dictionary
|
||||
plugin_id: Optional plugin ID
|
||||
operation: Optional operation name
|
||||
|
||||
Returns:
|
||||
The created ErrorRecord
|
||||
"""
|
||||
return get_error_aggregator().record_error(
|
||||
error=error,
|
||||
context=context,
|
||||
plugin_id=plugin_id,
|
||||
operation=operation
|
||||
)
|
||||
@@ -133,11 +133,11 @@ class BasePlugin(ABC):
|
||||
def get_display_duration(self) -> float:
|
||||
"""
|
||||
Get the display duration for this plugin instance.
|
||||
|
||||
|
||||
Automatically detects duration from:
|
||||
1. self.display_duration instance variable (if exists)
|
||||
2. self.config.get("display_duration", 15.0) (fallback)
|
||||
|
||||
|
||||
Can be overridden by plugins to provide dynamic durations based
|
||||
on content (e.g., longer duration for more complex displays).
|
||||
|
||||
@@ -155,27 +155,78 @@ class BasePlugin(ABC):
|
||||
elif isinstance(duration, (int, float)):
|
||||
if duration > 0:
|
||||
return float(duration)
|
||||
else:
|
||||
self.logger.debug(
|
||||
"display_duration instance variable is non-positive (%s), using config fallback",
|
||||
duration
|
||||
)
|
||||
# Try converting string representations of numbers
|
||||
elif isinstance(duration, str):
|
||||
try:
|
||||
duration_float = float(duration)
|
||||
if duration_float > 0:
|
||||
return duration_float
|
||||
else:
|
||||
self.logger.debug(
|
||||
"display_duration string value is non-positive (%s), using config fallback",
|
||||
duration
|
||||
)
|
||||
except (ValueError, TypeError):
|
||||
pass # Fall through to config
|
||||
except (TypeError, ValueError, AttributeError):
|
||||
pass # Fall through to config
|
||||
self.logger.warning(
|
||||
"display_duration instance variable has invalid string value '%s', using config fallback",
|
||||
duration
|
||||
)
|
||||
else:
|
||||
self.logger.warning(
|
||||
"display_duration instance variable has unexpected type %s (value: %s), using config fallback",
|
||||
type(duration).__name__, duration
|
||||
)
|
||||
except (TypeError, ValueError, AttributeError) as e:
|
||||
self.logger.warning(
|
||||
"Error reading display_duration instance variable: %s, using config fallback",
|
||||
e
|
||||
)
|
||||
|
||||
# Fall back to config
|
||||
config_duration = self.config.get("display_duration", 15.0)
|
||||
try:
|
||||
# Ensure config value is also a valid float
|
||||
if isinstance(config_duration, (int, float)):
|
||||
return float(config_duration) if config_duration > 0 else 15.0
|
||||
if config_duration > 0:
|
||||
return float(config_duration)
|
||||
else:
|
||||
self.logger.debug(
|
||||
"Config display_duration is non-positive (%s), using default 15.0",
|
||||
config_duration
|
||||
)
|
||||
return 15.0
|
||||
elif isinstance(config_duration, str):
|
||||
return float(config_duration) if float(config_duration) > 0 else 15.0
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
try:
|
||||
duration_float = float(config_duration)
|
||||
if duration_float > 0:
|
||||
return duration_float
|
||||
else:
|
||||
self.logger.debug(
|
||||
"Config display_duration string is non-positive (%s), using default 15.0",
|
||||
config_duration
|
||||
)
|
||||
return 15.0
|
||||
except ValueError:
|
||||
self.logger.warning(
|
||||
"Config display_duration has invalid string value '%s', using default 15.0",
|
||||
config_duration
|
||||
)
|
||||
return 15.0
|
||||
else:
|
||||
self.logger.warning(
|
||||
"Config display_duration has unexpected type %s (value: %s), using default 15.0",
|
||||
type(config_duration).__name__, config_duration
|
||||
)
|
||||
except (ValueError, TypeError) as e:
|
||||
self.logger.warning(
|
||||
"Error processing config display_duration: %s, using default 15.0",
|
||||
e
|
||||
)
|
||||
|
||||
return 15.0
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import logging
|
||||
|
||||
from src.exceptions import PluginError
|
||||
from src.logging_config import get_logger
|
||||
from src.error_aggregator import record_error
|
||||
|
||||
|
||||
class TimeoutError(Exception):
|
||||
@@ -80,12 +81,15 @@ class PluginExecutor:
|
||||
if not result_container['completed']:
|
||||
error_msg = f"{plugin_context} operation timed out after {timeout}s"
|
||||
self.logger.error(error_msg)
|
||||
raise TimeoutError(error_msg)
|
||||
|
||||
timeout_error = TimeoutError(error_msg)
|
||||
record_error(timeout_error, plugin_id=plugin_id, operation="timeout")
|
||||
raise timeout_error
|
||||
|
||||
if result_container['exception']:
|
||||
error = result_container['exception']
|
||||
error_msg = f"{plugin_context} operation failed: {error}"
|
||||
self.logger.error(error_msg, exc_info=True)
|
||||
record_error(error, plugin_id=plugin_id, operation="execute")
|
||||
raise PluginError(error_msg, plugin_id=plugin_id) from error
|
||||
|
||||
return result_container['value']
|
||||
@@ -128,7 +132,7 @@ class PluginExecutor:
|
||||
self.logger.error("Plugin %s update() timed out", plugin_id)
|
||||
return False
|
||||
except PluginError:
|
||||
# Already logged in execute_with_timeout
|
||||
# Already logged and recorded in execute_with_timeout
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
@@ -137,6 +141,7 @@ class PluginExecutor:
|
||||
e,
|
||||
exc_info=True
|
||||
)
|
||||
record_error(e, plugin_id=plugin_id, operation="update")
|
||||
return False
|
||||
|
||||
def execute_display(
|
||||
@@ -203,7 +208,7 @@ class PluginExecutor:
|
||||
self.logger.error("Plugin %s display() timed out", plugin_id)
|
||||
return False
|
||||
except PluginError:
|
||||
# Already logged in execute_with_timeout
|
||||
# Already logged and recorded in execute_with_timeout
|
||||
return False
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
@@ -212,6 +217,7 @@ class PluginExecutor:
|
||||
e,
|
||||
exc_info=True
|
||||
)
|
||||
record_error(e, plugin_id=plugin_id, operation="display")
|
||||
return False
|
||||
|
||||
def execute_safe(
|
||||
|
||||
@@ -136,13 +136,24 @@ class PluginManager:
|
||||
def discover_plugins(self) -> List[str]:
|
||||
"""
|
||||
Discover all plugins in the plugins directory.
|
||||
|
||||
|
||||
Also checks for potential config key collisions and logs warnings.
|
||||
|
||||
Returns:
|
||||
List of plugin IDs
|
||||
"""
|
||||
self.logger.info("Discovering plugins in %s", self.plugins_dir)
|
||||
plugin_ids = self._scan_directory_for_plugins(self.plugins_dir)
|
||||
self.logger.info("Discovered %d plugin(s)", len(plugin_ids))
|
||||
|
||||
# Check for config key collisions
|
||||
collisions = self.schema_manager.detect_config_key_collisions(plugin_ids)
|
||||
for collision in collisions:
|
||||
self.logger.warning(
|
||||
"Config collision detected: %s",
|
||||
collision.get('message', str(collision))
|
||||
)
|
||||
|
||||
return plugin_ids
|
||||
|
||||
def _get_dependency_marker_path(self, plugin_id: str) -> Path:
|
||||
@@ -288,6 +299,24 @@ class PluginManager:
|
||||
else:
|
||||
config = {}
|
||||
|
||||
# Check if plugin has a config schema
|
||||
schema_path = self.schema_manager.get_schema_path(plugin_id)
|
||||
if schema_path is None:
|
||||
# Schema file doesn't exist
|
||||
self.logger.warning(
|
||||
f"Plugin '{plugin_id}' has no config_schema.json - configuration will not be validated. "
|
||||
f"Consider adding a schema file for better error detection and user experience."
|
||||
)
|
||||
else:
|
||||
# Schema file exists, try to load it
|
||||
schema = self.schema_manager.load_schema(plugin_id)
|
||||
if schema is None:
|
||||
# Schema exists but couldn't be loaded (likely invalid JSON or schema)
|
||||
self.logger.warning(
|
||||
f"Plugin '{plugin_id}' has a config_schema.json but it could not be loaded. "
|
||||
f"The schema may be invalid. Please verify the schema file at: {schema_path}"
|
||||
)
|
||||
|
||||
# Merge config with schema defaults to ensure all defaults are applied
|
||||
try:
|
||||
defaults = self.schema_manager.generate_default_config(plugin_id, use_cache=True)
|
||||
|
||||
@@ -445,3 +445,62 @@ class SchemaManager:
|
||||
replace_none_with_defaults(merged, defaults)
|
||||
return merged
|
||||
|
||||
def detect_config_key_collisions(
|
||||
self,
|
||||
plugin_ids: List[str]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect config key collisions between plugins.
|
||||
|
||||
Checks for:
|
||||
1. Plugin IDs that collide with reserved system config keys
|
||||
2. Plugin IDs that might cause confusion or conflicts
|
||||
|
||||
Args:
|
||||
plugin_ids: List of plugin identifiers to check
|
||||
|
||||
Returns:
|
||||
List of collision warnings, each containing:
|
||||
- type: 'reserved_key_collision' or 'case_collision'
|
||||
- plugin_id: The plugin ID involved
|
||||
- message: Human-readable warning message
|
||||
"""
|
||||
collisions = []
|
||||
|
||||
# Reserved top-level config keys that plugins should not use as IDs
|
||||
reserved_keys = {
|
||||
'display', 'schedule', 'timezone', 'plugin_system',
|
||||
'display_modes', 'system', 'hardware', 'debug',
|
||||
'log_level', 'emulator', 'web_interface'
|
||||
}
|
||||
|
||||
# Track plugin IDs for case collision detection
|
||||
lowercase_ids: Dict[str, str] = {}
|
||||
|
||||
for plugin_id in plugin_ids:
|
||||
# Check reserved key collision
|
||||
if plugin_id.lower() in {k.lower() for k in reserved_keys}:
|
||||
collisions.append({
|
||||
"type": "reserved_key_collision",
|
||||
"plugin_id": plugin_id,
|
||||
"message": f"Plugin ID '{plugin_id}' conflicts with reserved config key. "
|
||||
f"This may cause configuration issues."
|
||||
})
|
||||
|
||||
# Check for case-insensitive collisions between plugins
|
||||
lower_id = plugin_id.lower()
|
||||
if lower_id in lowercase_ids:
|
||||
existing_id = lowercase_ids[lower_id]
|
||||
if existing_id != plugin_id:
|
||||
collisions.append({
|
||||
"type": "case_collision",
|
||||
"plugin_id": plugin_id,
|
||||
"conflicting_id": existing_id,
|
||||
"message": f"Plugin ID '{plugin_id}' may conflict with '{existing_id}' "
|
||||
f"on case-insensitive file systems."
|
||||
})
|
||||
else:
|
||||
lowercase_ids[lower_id] = plugin_id
|
||||
|
||||
return collisions
|
||||
|
||||
|
||||
Reference in New Issue
Block a user