feat: add error detection, monitoring, and code quality improvements (#223)

* feat: add error detection, monitoring, and code quality improvements

This comprehensive update addresses automatic error detection, code
quality, and plugin development experience:

## Error Detection & Monitoring
- Add ErrorAggregator service for centralized error tracking
- Add pattern detection for recurring errors (5+ in 60 min)
- Add error dashboard API endpoints (/api/v3/errors/*)
- Integrate error recording into plugin executor

## Code Quality
- Remove 10 silent `except: pass` blocks in sports.py and football.py
- Remove hardcoded debug log paths
- Add pre-commit hooks to prevent future bare except clauses

## Validation & Type Safety
- Add warnings when plugins lack config_schema.json
- Add config key collision detection for plugins
- Improve type coercion logging in BasePlugin

## Testing
- Add test_config_validation_edge_cases.py
- Add test_plugin_loading_failures.py
- Add test_error_aggregator.py

## Documentation
- Add PLUGIN_ERROR_HANDLING.md guide
- Add CONFIG_DEBUGGING.md guide

Note: GitHub Actions CI workflow is available in the plan but requires
workflow scope to push. Add .github/workflows/ci.yml manually.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* fix: address code review issues

- Fix GitHub issues URL in CONFIG_DEBUGGING.md
- Use RLock in error_aggregator.py to prevent deadlock in export_to_file
- Distinguish missing vs invalid schema files in plugin_manager.py
- Add assertions to test_null_value_for_required_field test
- Remove unused initial_count variable in test_plugin_load_error_recorded
- Add validation for max_age_hours in clear_old_errors API endpoint

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

---------

Co-authored-by: Chuck <chuck@example.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Chuck
2026-01-30 10:05:09 -05:00
committed by GitHub
parent 8912501604
commit 8fb2800495
14 changed files with 2330 additions and 202 deletions

View File

@@ -387,43 +387,8 @@ class FootballLive(Football, SportsLive):
main_img = main_img.convert('RGB') # Convert for display
# Display the final image
# #region agent log
import json
import time
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "C",
"location": "football.py:390",
"message": "About to update display",
"data": {
"force_clear": force_clear,
"game": game.get('away_abbr', '') + "@" + game.get('home_abbr', '')
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
self.display_manager.image.paste(main_img, (0, 0))
self.display_manager.update_display() # Update display here for live
# #region agent log
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "C",
"location": "football.py:392",
"message": "After update display",
"data": {
"force_clear": force_clear
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
except Exception as e:
self.logger.error(f"Error displaying live Football game: {e}", exc_info=True) # Changed log prefix

View File

@@ -207,25 +207,6 @@ class SportsCore(ABC):
def display(self, force_clear: bool = False) -> bool:
"""Common display method for all NCAA FB managers""" # Updated docstring
# #region agent log
import json
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "D",
"location": "sports.py:208",
"message": "Display called",
"data": {
"force_clear": force_clear,
"has_current_game": self.current_game is not None,
"current_game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
if not self.is_enabled: # Check if module is enabled
return False
@@ -248,40 +229,7 @@ class SportsCore(ABC):
return False
try:
# #region agent log
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "D",
"location": "sports.py:232",
"message": "About to draw scorebug",
"data": {
"force_clear": force_clear,
"game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
self._draw_scorebug_layout(self.current_game, force_clear)
# #region agent log
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "D",
"location": "sports.py:235",
"message": "After draw scorebug",
"data": {
"force_clear": force_clear
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
# display_manager.update_display() should be called within subclass draw methods
# or after calling display() in the main loop. Let's keep it out of the base display.
return True
@@ -1443,48 +1391,9 @@ class SportsLive(SportsCore):
self.live_games = sorted(new_live_games, key=lambda g: g.get('start_time_utc') or datetime.now(timezone.utc)) # Sort by start time
# Reset index if current game is gone or list is new
if not self.current_game or self.current_game['id'] not in new_game_ids:
# #region agent log
import json
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "B",
"location": "sports.py:1393",
"message": "Games loaded - resetting index and last_game_switch",
"data": {
"current_game_before": self.current_game['id'] if self.current_game else None,
"live_games_count": len(self.live_games),
"last_game_switch_before": self.last_game_switch,
"current_time": current_time,
"time_since_init": current_time - self.last_game_switch if self.last_game_switch > 0 else None
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
self.current_game_index = 0
self.current_game = self.live_games[0] if self.live_games else None
self.last_game_switch = current_time
# #region agent log
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "B",
"location": "sports.py:1396",
"message": "Games loaded - after setting last_game_switch",
"data": {
"current_game_after": self.current_game['id'] if self.current_game else None,
"last_game_switch_after": self.last_game_switch,
"first_game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
else:
# Find current game's new index if it still exists
try:
@@ -1530,70 +1439,9 @@ class SportsLive(SportsCore):
# Handle game switching (outside test mode check)
# Fix: Don't check for switching if last_game_switch is still 0 (games haven't been loaded yet)
# This prevents immediate switching when the system has been running for a while before games load
# #region agent log
import json
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "A",
"location": "sports.py:1432",
"message": "Game switch check - before condition",
"data": {
"test_mode": self.test_mode,
"live_games_count": len(self.live_games),
"current_time": current_time,
"last_game_switch": self.last_game_switch,
"time_since_switch": current_time - self.last_game_switch,
"game_display_duration": self.game_display_duration,
"current_game_index": self.current_game_index,
"will_switch": not self.test_mode and len(self.live_games) > 1 and self.last_game_switch > 0 and (current_time - self.last_game_switch) >= self.game_display_duration
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
if not self.test_mode and len(self.live_games) > 1 and self.last_game_switch > 0 and (current_time - self.last_game_switch) >= self.game_display_duration:
# #region agent log
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "A",
"location": "sports.py:1433",
"message": "Game switch triggered",
"data": {
"old_index": self.current_game_index,
"old_game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None,
"time_since_switch": current_time - self.last_game_switch,
"last_game_switch_before": self.last_game_switch
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
self.current_game_index = (self.current_game_index + 1) % len(self.live_games)
self.current_game = self.live_games[self.current_game_index]
self.last_game_switch = current_time
# #region agent log
try:
with open('/home/chuck/Github/LEDMatrix/.cursor/debug.log', 'a') as f:
f.write(json.dumps({
"sessionId": "debug-session",
"runId": "run1",
"hypothesisId": "A",
"location": "sports.py:1436",
"message": "Game switch completed",
"data": {
"new_index": self.current_game_index,
"new_game": self.current_game['away_abbr'] + "@" + self.current_game['home_abbr'] if self.current_game else None,
"last_game_switch_after": self.last_game_switch
},
"timestamp": int(time.time() * 1000)
}) + "\n")
except: pass
# #endregion
self.logger.info(f"Switched live view to: {self.current_game['away_abbr']}@{self.current_game['home_abbr']}") # Changed log prefix
# Force display update via flag or direct call if needed, but usually let main loop handle

418
src/error_aggregator.py Normal file
View File

@@ -0,0 +1,418 @@
"""
Error Aggregation Service
Provides centralized error tracking, pattern detection, and reporting
for the LEDMatrix system. Enables automatic bug detection by tracking
error frequency, patterns, and context.
This is a local-only implementation with no external dependencies.
Errors are stored in memory with optional JSON export.
"""
import threading
import traceback
import json
from collections import defaultdict
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from pathlib import Path
from typing import Dict, List, Optional, Any, Callable
import logging
from src.exceptions import LEDMatrixError
@dataclass
class ErrorRecord:
"""Record of a single error occurrence."""
error_type: str
message: str
timestamp: datetime
context: Dict[str, Any] = field(default_factory=dict)
plugin_id: Optional[str] = None
operation: Optional[str] = None
stack_trace: Optional[str] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return {
"error_type": self.error_type,
"message": self.message,
"timestamp": self.timestamp.isoformat(),
"context": self.context,
"plugin_id": self.plugin_id,
"operation": self.operation,
"stack_trace": self.stack_trace
}
@dataclass
class ErrorPattern:
"""Detected error pattern for automatic detection."""
error_type: str
count: int
first_seen: datetime
last_seen: datetime
affected_plugins: List[str] = field(default_factory=list)
sample_messages: List[str] = field(default_factory=list)
severity: str = "warning" # warning, error, critical
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
return {
"error_type": self.error_type,
"count": self.count,
"first_seen": self.first_seen.isoformat(),
"last_seen": self.last_seen.isoformat(),
"affected_plugins": list(set(self.affected_plugins)),
"sample_messages": self.sample_messages[:3], # Keep only 3 samples
"severity": self.severity
}
class ErrorAggregator:
"""
Aggregates and analyzes errors across the system.
Features:
- Error counting by type, plugin, and time window
- Pattern detection (recurring errors)
- Error rate alerting via callbacks
- Export for analytics/reporting
Thread-safe for concurrent access.
"""
def __init__(
self,
max_records: int = 1000,
pattern_threshold: int = 5,
pattern_window_minutes: int = 60,
export_path: Optional[Path] = None
):
"""
Initialize the error aggregator.
Args:
max_records: Maximum number of error records to keep in memory
pattern_threshold: Number of occurrences to detect a pattern
pattern_window_minutes: Time window for pattern detection
export_path: Optional path for JSON export (auto-export on pattern detection)
"""
self.logger = logging.getLogger(__name__)
self.max_records = max_records
self.pattern_threshold = pattern_threshold
self.pattern_window = timedelta(minutes=pattern_window_minutes)
self.export_path = export_path
self._records: List[ErrorRecord] = []
self._error_counts: Dict[str, int] = defaultdict(int)
self._plugin_error_counts: Dict[str, Dict[str, int]] = defaultdict(lambda: defaultdict(int))
self._patterns: Dict[str, ErrorPattern] = {}
self._pattern_callbacks: List[Callable[[ErrorPattern], None]] = []
self._lock = threading.RLock() # RLock allows nested acquisition for export_to_file
# Track session start for relative timing
self._session_start = datetime.now()
def record_error(
self,
error: Exception,
context: Optional[Dict[str, Any]] = None,
plugin_id: Optional[str] = None,
operation: Optional[str] = None
) -> ErrorRecord:
"""
Record an error occurrence.
Args:
error: The exception that occurred
context: Optional context dictionary with additional details
plugin_id: Optional plugin ID that caused the error
operation: Optional operation name (e.g., "update", "display")
Returns:
The created ErrorRecord
"""
with self._lock:
error_type = type(error).__name__
# Extract additional context from LEDMatrixError subclasses
error_context = context or {}
if isinstance(error, LEDMatrixError) and error.context:
error_context.update(error.context)
record = ErrorRecord(
error_type=error_type,
message=str(error),
timestamp=datetime.now(),
context=error_context,
plugin_id=plugin_id,
operation=operation,
stack_trace=traceback.format_exc()
)
# Add record (with size limit)
self._records.append(record)
if len(self._records) > self.max_records:
self._records.pop(0)
# Update counts
self._error_counts[error_type] += 1
if plugin_id:
self._plugin_error_counts[plugin_id][error_type] += 1
# Check for patterns
self._detect_pattern(record)
# Log the error
self.logger.debug(
f"Error recorded: {error_type} - {str(error)[:100]}",
extra={"plugin_id": plugin_id, "operation": operation}
)
return record
def _detect_pattern(self, record: ErrorRecord) -> None:
"""Detect recurring error patterns."""
cutoff = datetime.now() - self.pattern_window
recent_same_type = [
r for r in self._records
if r.error_type == record.error_type and r.timestamp > cutoff
]
if len(recent_same_type) >= self.pattern_threshold:
pattern_key = record.error_type
is_new_pattern = pattern_key not in self._patterns
# Determine severity based on count
count = len(recent_same_type)
if count > self.pattern_threshold * 3:
severity = "critical"
elif count > self.pattern_threshold * 2:
severity = "error"
else:
severity = "warning"
# Collect affected plugins
affected_plugins = [r.plugin_id for r in recent_same_type if r.plugin_id]
# Collect sample messages
sample_messages = list(set(r.message for r in recent_same_type[:5]))
if is_new_pattern:
pattern = ErrorPattern(
error_type=record.error_type,
count=count,
first_seen=recent_same_type[0].timestamp,
last_seen=record.timestamp,
affected_plugins=affected_plugins,
sample_messages=sample_messages,
severity=severity
)
self._patterns[pattern_key] = pattern
self.logger.warning(
f"Error pattern detected: {record.error_type} occurred "
f"{count} times in last {self.pattern_window}. "
f"Affected plugins: {set(affected_plugins) or 'unknown'}"
)
# Notify callbacks
for callback in self._pattern_callbacks:
try:
callback(pattern)
except Exception as e:
self.logger.error(f"Pattern callback failed: {e}")
# Auto-export if path configured
if self.export_path:
self._auto_export()
else:
# Update existing pattern
self._patterns[pattern_key].count = count
self._patterns[pattern_key].last_seen = record.timestamp
self._patterns[pattern_key].severity = severity
self._patterns[pattern_key].affected_plugins.extend(affected_plugins)
def on_pattern_detected(self, callback: Callable[[ErrorPattern], None]) -> None:
"""
Register a callback to be called when a new error pattern is detected.
Args:
callback: Function that takes an ErrorPattern as argument
"""
self._pattern_callbacks.append(callback)
def get_error_summary(self) -> Dict[str, Any]:
"""
Get summary of all errors for reporting.
Returns:
Dictionary with error statistics and recent errors
"""
with self._lock:
# Calculate error rate (errors per hour)
session_duration = (datetime.now() - self._session_start).total_seconds() / 3600
error_rate = len(self._records) / max(session_duration, 0.01)
return {
"session_start": self._session_start.isoformat(),
"total_errors": len(self._records),
"error_rate_per_hour": round(error_rate, 2),
"error_counts_by_type": dict(self._error_counts),
"plugin_error_counts": {
k: dict(v) for k, v in self._plugin_error_counts.items()
},
"active_patterns": {
k: v.to_dict() for k, v in self._patterns.items()
},
"recent_errors": [
r.to_dict() for r in self._records[-20:]
]
}
def get_plugin_health(self, plugin_id: str) -> Dict[str, Any]:
"""
Get health status for a specific plugin.
Args:
plugin_id: Plugin ID to check
Returns:
Dictionary with plugin error statistics
"""
with self._lock:
plugin_errors = self._plugin_error_counts.get(plugin_id, {})
recent_plugin_errors = [
r for r in self._records[-100:]
if r.plugin_id == plugin_id
]
# Determine health status
recent_count = len(recent_plugin_errors)
if recent_count == 0:
status = "healthy"
elif recent_count < 5:
status = "degraded"
else:
status = "unhealthy"
return {
"plugin_id": plugin_id,
"status": status,
"total_errors": sum(plugin_errors.values()),
"error_types": dict(plugin_errors),
"recent_error_count": recent_count,
"last_error": recent_plugin_errors[-1].to_dict() if recent_plugin_errors else None
}
def clear_old_records(self, max_age_hours: int = 24) -> int:
"""
Clear records older than specified age.
Args:
max_age_hours: Maximum age in hours
Returns:
Number of records cleared
"""
with self._lock:
cutoff = datetime.now() - timedelta(hours=max_age_hours)
original_count = len(self._records)
self._records = [r for r in self._records if r.timestamp > cutoff]
cleared = original_count - len(self._records)
if cleared > 0:
self.logger.info(f"Cleared {cleared} old error records")
return cleared
def export_to_file(self, filepath: Path) -> None:
"""
Export error data to JSON file.
Args:
filepath: Path to export file
"""
with self._lock:
data = {
"exported_at": datetime.now().isoformat(),
"summary": self.get_error_summary(),
"all_records": [r.to_dict() for r in self._records]
}
filepath.parent.mkdir(parents=True, exist_ok=True)
filepath.write_text(json.dumps(data, indent=2))
self.logger.info(f"Exported error data to {filepath}")
def _auto_export(self) -> None:
"""Auto-export on pattern detection (if export_path configured)."""
if self.export_path:
try:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filepath = self.export_path / f"errors_{timestamp}.json"
self.export_to_file(filepath)
except Exception as e:
self.logger.error(f"Auto-export failed: {e}")
# Global singleton instance
_error_aggregator: Optional[ErrorAggregator] = None
_aggregator_lock = threading.Lock()
def get_error_aggregator(
max_records: int = 1000,
pattern_threshold: int = 5,
pattern_window_minutes: int = 60,
export_path: Optional[Path] = None
) -> ErrorAggregator:
"""
Get or create the global error aggregator instance.
Args:
max_records: Maximum records to keep (only used on first call)
pattern_threshold: Pattern detection threshold (only used on first call)
pattern_window_minutes: Pattern detection window (only used on first call)
export_path: Export path for auto-export (only used on first call)
Returns:
The global ErrorAggregator instance
"""
global _error_aggregator
with _aggregator_lock:
if _error_aggregator is None:
_error_aggregator = ErrorAggregator(
max_records=max_records,
pattern_threshold=pattern_threshold,
pattern_window_minutes=pattern_window_minutes,
export_path=export_path
)
return _error_aggregator
def record_error(
error: Exception,
context: Optional[Dict[str, Any]] = None,
plugin_id: Optional[str] = None,
operation: Optional[str] = None
) -> ErrorRecord:
"""
Convenience function to record an error to the global aggregator.
Args:
error: The exception that occurred
context: Optional context dictionary
plugin_id: Optional plugin ID
operation: Optional operation name
Returns:
The created ErrorRecord
"""
return get_error_aggregator().record_error(
error=error,
context=context,
plugin_id=plugin_id,
operation=operation
)

View File

@@ -133,11 +133,11 @@ class BasePlugin(ABC):
def get_display_duration(self) -> float:
"""
Get the display duration for this plugin instance.
Automatically detects duration from:
1. self.display_duration instance variable (if exists)
2. self.config.get("display_duration", 15.0) (fallback)
Can be overridden by plugins to provide dynamic durations based
on content (e.g., longer duration for more complex displays).
@@ -155,27 +155,78 @@ class BasePlugin(ABC):
elif isinstance(duration, (int, float)):
if duration > 0:
return float(duration)
else:
self.logger.debug(
"display_duration instance variable is non-positive (%s), using config fallback",
duration
)
# Try converting string representations of numbers
elif isinstance(duration, str):
try:
duration_float = float(duration)
if duration_float > 0:
return duration_float
else:
self.logger.debug(
"display_duration string value is non-positive (%s), using config fallback",
duration
)
except (ValueError, TypeError):
pass # Fall through to config
except (TypeError, ValueError, AttributeError):
pass # Fall through to config
self.logger.warning(
"display_duration instance variable has invalid string value '%s', using config fallback",
duration
)
else:
self.logger.warning(
"display_duration instance variable has unexpected type %s (value: %s), using config fallback",
type(duration).__name__, duration
)
except (TypeError, ValueError, AttributeError) as e:
self.logger.warning(
"Error reading display_duration instance variable: %s, using config fallback",
e
)
# Fall back to config
config_duration = self.config.get("display_duration", 15.0)
try:
# Ensure config value is also a valid float
if isinstance(config_duration, (int, float)):
return float(config_duration) if config_duration > 0 else 15.0
if config_duration > 0:
return float(config_duration)
else:
self.logger.debug(
"Config display_duration is non-positive (%s), using default 15.0",
config_duration
)
return 15.0
elif isinstance(config_duration, str):
return float(config_duration) if float(config_duration) > 0 else 15.0
except (ValueError, TypeError):
pass
try:
duration_float = float(config_duration)
if duration_float > 0:
return duration_float
else:
self.logger.debug(
"Config display_duration string is non-positive (%s), using default 15.0",
config_duration
)
return 15.0
except ValueError:
self.logger.warning(
"Config display_duration has invalid string value '%s', using default 15.0",
config_duration
)
return 15.0
else:
self.logger.warning(
"Config display_duration has unexpected type %s (value: %s), using default 15.0",
type(config_duration).__name__, config_duration
)
except (ValueError, TypeError) as e:
self.logger.warning(
"Error processing config display_duration: %s, using default 15.0",
e
)
return 15.0

View File

@@ -13,6 +13,7 @@ import logging
from src.exceptions import PluginError
from src.logging_config import get_logger
from src.error_aggregator import record_error
class TimeoutError(Exception):
@@ -80,12 +81,15 @@ class PluginExecutor:
if not result_container['completed']:
error_msg = f"{plugin_context} operation timed out after {timeout}s"
self.logger.error(error_msg)
raise TimeoutError(error_msg)
timeout_error = TimeoutError(error_msg)
record_error(timeout_error, plugin_id=plugin_id, operation="timeout")
raise timeout_error
if result_container['exception']:
error = result_container['exception']
error_msg = f"{plugin_context} operation failed: {error}"
self.logger.error(error_msg, exc_info=True)
record_error(error, plugin_id=plugin_id, operation="execute")
raise PluginError(error_msg, plugin_id=plugin_id) from error
return result_container['value']
@@ -128,7 +132,7 @@ class PluginExecutor:
self.logger.error("Plugin %s update() timed out", plugin_id)
return False
except PluginError:
# Already logged in execute_with_timeout
# Already logged and recorded in execute_with_timeout
return False
except Exception as e:
self.logger.error(
@@ -137,6 +141,7 @@ class PluginExecutor:
e,
exc_info=True
)
record_error(e, plugin_id=plugin_id, operation="update")
return False
def execute_display(
@@ -203,7 +208,7 @@ class PluginExecutor:
self.logger.error("Plugin %s display() timed out", plugin_id)
return False
except PluginError:
# Already logged in execute_with_timeout
# Already logged and recorded in execute_with_timeout
return False
except Exception as e:
self.logger.error(
@@ -212,6 +217,7 @@ class PluginExecutor:
e,
exc_info=True
)
record_error(e, plugin_id=plugin_id, operation="display")
return False
def execute_safe(

View File

@@ -136,13 +136,24 @@ class PluginManager:
def discover_plugins(self) -> List[str]:
"""
Discover all plugins in the plugins directory.
Also checks for potential config key collisions and logs warnings.
Returns:
List of plugin IDs
"""
self.logger.info("Discovering plugins in %s", self.plugins_dir)
plugin_ids = self._scan_directory_for_plugins(self.plugins_dir)
self.logger.info("Discovered %d plugin(s)", len(plugin_ids))
# Check for config key collisions
collisions = self.schema_manager.detect_config_key_collisions(plugin_ids)
for collision in collisions:
self.logger.warning(
"Config collision detected: %s",
collision.get('message', str(collision))
)
return plugin_ids
def _get_dependency_marker_path(self, plugin_id: str) -> Path:
@@ -288,6 +299,24 @@ class PluginManager:
else:
config = {}
# Check if plugin has a config schema
schema_path = self.schema_manager.get_schema_path(plugin_id)
if schema_path is None:
# Schema file doesn't exist
self.logger.warning(
f"Plugin '{plugin_id}' has no config_schema.json - configuration will not be validated. "
f"Consider adding a schema file for better error detection and user experience."
)
else:
# Schema file exists, try to load it
schema = self.schema_manager.load_schema(plugin_id)
if schema is None:
# Schema exists but couldn't be loaded (likely invalid JSON or schema)
self.logger.warning(
f"Plugin '{plugin_id}' has a config_schema.json but it could not be loaded. "
f"The schema may be invalid. Please verify the schema file at: {schema_path}"
)
# Merge config with schema defaults to ensure all defaults are applied
try:
defaults = self.schema_manager.generate_default_config(plugin_id, use_cache=True)

View File

@@ -445,3 +445,62 @@ class SchemaManager:
replace_none_with_defaults(merged, defaults)
return merged
def detect_config_key_collisions(
self,
plugin_ids: List[str]
) -> List[Dict[str, Any]]:
"""
Detect config key collisions between plugins.
Checks for:
1. Plugin IDs that collide with reserved system config keys
2. Plugin IDs that might cause confusion or conflicts
Args:
plugin_ids: List of plugin identifiers to check
Returns:
List of collision warnings, each containing:
- type: 'reserved_key_collision' or 'case_collision'
- plugin_id: The plugin ID involved
- message: Human-readable warning message
"""
collisions = []
# Reserved top-level config keys that plugins should not use as IDs
reserved_keys = {
'display', 'schedule', 'timezone', 'plugin_system',
'display_modes', 'system', 'hardware', 'debug',
'log_level', 'emulator', 'web_interface'
}
# Track plugin IDs for case collision detection
lowercase_ids: Dict[str, str] = {}
for plugin_id in plugin_ids:
# Check reserved key collision
if plugin_id.lower() in {k.lower() for k in reserved_keys}:
collisions.append({
"type": "reserved_key_collision",
"plugin_id": plugin_id,
"message": f"Plugin ID '{plugin_id}' conflicts with reserved config key. "
f"This may cause configuration issues."
})
# Check for case-insensitive collisions between plugins
lower_id = plugin_id.lower()
if lower_id in lowercase_ids:
existing_id = lowercase_ids[lower_id]
if existing_id != plugin_id:
collisions.append({
"type": "case_collision",
"plugin_id": plugin_id,
"conflicting_id": existing_id,
"message": f"Plugin ID '{plugin_id}' may conflict with '{existing_id}' "
f"on case-insensitive file systems."
})
else:
lowercase_ids[lower_id] = plugin_id
return collisions