feat: add error detection, monitoring, and code quality improvements (#223)

* feat: add error detection, monitoring, and code quality improvements

This comprehensive update addresses automatic error detection, code
quality, and plugin development experience:

## Error Detection & Monitoring
- Add ErrorAggregator service for centralized error tracking
- Add pattern detection for recurring errors (5+ in 60 min)
- Add error dashboard API endpoints (/api/v3/errors/*)
- Integrate error recording into plugin executor

## Code Quality
- Remove 10 silent `except: pass` blocks in sports.py and football.py
- Remove hardcoded debug log paths
- Add pre-commit hooks to prevent future bare except clauses

## Validation & Type Safety
- Add warnings when plugins lack config_schema.json
- Add config key collision detection for plugins
- Improve type coercion logging in BasePlugin

## Testing
- Add test_config_validation_edge_cases.py
- Add test_plugin_loading_failures.py
- Add test_error_aggregator.py

## Documentation
- Add PLUGIN_ERROR_HANDLING.md guide
- Add CONFIG_DEBUGGING.md guide

Note: GitHub Actions CI workflow is available in the plan but requires
workflow scope to push. Add .github/workflows/ci.yml manually.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* fix: address code review issues

- Fix GitHub issues URL in CONFIG_DEBUGGING.md
- Use RLock in error_aggregator.py to prevent deadlock in export_to_file
- Distinguish missing vs invalid schema files in plugin_manager.py
- Add assertions to test_null_value_for_required_field test
- Remove unused initial_count variable in test_plugin_load_error_recorded
- Add validation for max_age_hours in clear_old_errors API endpoint

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

---------

Co-authored-by: Chuck <chuck@example.com>
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Chuck
2026-01-30 10:05:09 -05:00
committed by GitHub
parent 8912501604
commit 8fb2800495
14 changed files with 2330 additions and 202 deletions

View File

@@ -133,11 +133,11 @@ class BasePlugin(ABC):
def get_display_duration(self) -> float:
"""
Get the display duration for this plugin instance.
Automatically detects duration from:
1. self.display_duration instance variable (if exists)
2. self.config.get("display_duration", 15.0) (fallback)
Can be overridden by plugins to provide dynamic durations based
on content (e.g., longer duration for more complex displays).
@@ -155,27 +155,78 @@ class BasePlugin(ABC):
elif isinstance(duration, (int, float)):
if duration > 0:
return float(duration)
else:
self.logger.debug(
"display_duration instance variable is non-positive (%s), using config fallback",
duration
)
# Try converting string representations of numbers
elif isinstance(duration, str):
try:
duration_float = float(duration)
if duration_float > 0:
return duration_float
else:
self.logger.debug(
"display_duration string value is non-positive (%s), using config fallback",
duration
)
except (ValueError, TypeError):
pass # Fall through to config
except (TypeError, ValueError, AttributeError):
pass # Fall through to config
self.logger.warning(
"display_duration instance variable has invalid string value '%s', using config fallback",
duration
)
else:
self.logger.warning(
"display_duration instance variable has unexpected type %s (value: %s), using config fallback",
type(duration).__name__, duration
)
except (TypeError, ValueError, AttributeError) as e:
self.logger.warning(
"Error reading display_duration instance variable: %s, using config fallback",
e
)
# Fall back to config
config_duration = self.config.get("display_duration", 15.0)
try:
# Ensure config value is also a valid float
if isinstance(config_duration, (int, float)):
return float(config_duration) if config_duration > 0 else 15.0
if config_duration > 0:
return float(config_duration)
else:
self.logger.debug(
"Config display_duration is non-positive (%s), using default 15.0",
config_duration
)
return 15.0
elif isinstance(config_duration, str):
return float(config_duration) if float(config_duration) > 0 else 15.0
except (ValueError, TypeError):
pass
try:
duration_float = float(config_duration)
if duration_float > 0:
return duration_float
else:
self.logger.debug(
"Config display_duration string is non-positive (%s), using default 15.0",
config_duration
)
return 15.0
except ValueError:
self.logger.warning(
"Config display_duration has invalid string value '%s', using default 15.0",
config_duration
)
return 15.0
else:
self.logger.warning(
"Config display_duration has unexpected type %s (value: %s), using default 15.0",
type(config_duration).__name__, config_duration
)
except (ValueError, TypeError) as e:
self.logger.warning(
"Error processing config display_duration: %s, using default 15.0",
e
)
return 15.0

View File

@@ -13,6 +13,7 @@ import logging
from src.exceptions import PluginError
from src.logging_config import get_logger
from src.error_aggregator import record_error
class TimeoutError(Exception):
@@ -80,12 +81,15 @@ class PluginExecutor:
if not result_container['completed']:
error_msg = f"{plugin_context} operation timed out after {timeout}s"
self.logger.error(error_msg)
raise TimeoutError(error_msg)
timeout_error = TimeoutError(error_msg)
record_error(timeout_error, plugin_id=plugin_id, operation="timeout")
raise timeout_error
if result_container['exception']:
error = result_container['exception']
error_msg = f"{plugin_context} operation failed: {error}"
self.logger.error(error_msg, exc_info=True)
record_error(error, plugin_id=plugin_id, operation="execute")
raise PluginError(error_msg, plugin_id=plugin_id) from error
return result_container['value']
@@ -128,7 +132,7 @@ class PluginExecutor:
self.logger.error("Plugin %s update() timed out", plugin_id)
return False
except PluginError:
# Already logged in execute_with_timeout
# Already logged and recorded in execute_with_timeout
return False
except Exception as e:
self.logger.error(
@@ -137,6 +141,7 @@ class PluginExecutor:
e,
exc_info=True
)
record_error(e, plugin_id=plugin_id, operation="update")
return False
def execute_display(
@@ -203,7 +208,7 @@ class PluginExecutor:
self.logger.error("Plugin %s display() timed out", plugin_id)
return False
except PluginError:
# Already logged in execute_with_timeout
# Already logged and recorded in execute_with_timeout
return False
except Exception as e:
self.logger.error(
@@ -212,6 +217,7 @@ class PluginExecutor:
e,
exc_info=True
)
record_error(e, plugin_id=plugin_id, operation="display")
return False
def execute_safe(

View File

@@ -136,13 +136,24 @@ class PluginManager:
def discover_plugins(self) -> List[str]:
"""
Discover all plugins in the plugins directory.
Also checks for potential config key collisions and logs warnings.
Returns:
List of plugin IDs
"""
self.logger.info("Discovering plugins in %s", self.plugins_dir)
plugin_ids = self._scan_directory_for_plugins(self.plugins_dir)
self.logger.info("Discovered %d plugin(s)", len(plugin_ids))
# Check for config key collisions
collisions = self.schema_manager.detect_config_key_collisions(plugin_ids)
for collision in collisions:
self.logger.warning(
"Config collision detected: %s",
collision.get('message', str(collision))
)
return plugin_ids
def _get_dependency_marker_path(self, plugin_id: str) -> Path:
@@ -288,6 +299,24 @@ class PluginManager:
else:
config = {}
# Check if plugin has a config schema
schema_path = self.schema_manager.get_schema_path(plugin_id)
if schema_path is None:
# Schema file doesn't exist
self.logger.warning(
f"Plugin '{plugin_id}' has no config_schema.json - configuration will not be validated. "
f"Consider adding a schema file for better error detection and user experience."
)
else:
# Schema file exists, try to load it
schema = self.schema_manager.load_schema(plugin_id)
if schema is None:
# Schema exists but couldn't be loaded (likely invalid JSON or schema)
self.logger.warning(
f"Plugin '{plugin_id}' has a config_schema.json but it could not be loaded. "
f"The schema may be invalid. Please verify the schema file at: {schema_path}"
)
# Merge config with schema defaults to ensure all defaults are applied
try:
defaults = self.schema_manager.generate_default_config(plugin_id, use_cache=True)

View File

@@ -445,3 +445,62 @@ class SchemaManager:
replace_none_with_defaults(merged, defaults)
return merged
def detect_config_key_collisions(
self,
plugin_ids: List[str]
) -> List[Dict[str, Any]]:
"""
Detect config key collisions between plugins.
Checks for:
1. Plugin IDs that collide with reserved system config keys
2. Plugin IDs that might cause confusion or conflicts
Args:
plugin_ids: List of plugin identifiers to check
Returns:
List of collision warnings, each containing:
- type: 'reserved_key_collision' or 'case_collision'
- plugin_id: The plugin ID involved
- message: Human-readable warning message
"""
collisions = []
# Reserved top-level config keys that plugins should not use as IDs
reserved_keys = {
'display', 'schedule', 'timezone', 'plugin_system',
'display_modes', 'system', 'hardware', 'debug',
'log_level', 'emulator', 'web_interface'
}
# Track plugin IDs for case collision detection
lowercase_ids: Dict[str, str] = {}
for plugin_id in plugin_ids:
# Check reserved key collision
if plugin_id.lower() in {k.lower() for k in reserved_keys}:
collisions.append({
"type": "reserved_key_collision",
"plugin_id": plugin_id,
"message": f"Plugin ID '{plugin_id}' conflicts with reserved config key. "
f"This may cause configuration issues."
})
# Check for case-insensitive collisions between plugins
lower_id = plugin_id.lower()
if lower_id in lowercase_ids:
existing_id = lowercase_ids[lower_id]
if existing_id != plugin_id:
collisions.append({
"type": "case_collision",
"plugin_id": plugin_id,
"conflicting_id": existing_id,
"message": f"Plugin ID '{plugin_id}' may conflict with '{existing_id}' "
f"on case-insensitive file systems."
})
else:
lowercase_ids[lower_id] = plugin_id
return collisions