2 Commits

Author SHA1 Message Date
Chuck
37566d93ac fix(plugin_manager): address PR review — failure timestamp and error context
- Use time.time() at the point of failure instead of reusing current_time
  (captured before execution), so the full retry interval always elapses
  after a timeout rather than one execution-duration shorter

- Add PluginStateManager.set_error_info() to persist structured error context
  without changing plugin state; call it in both failure branches so
  get_error_info() / get_state_info() surface recoverable errors alongside
  ERROR-state errors

- Add warning log on the success=False branch (was previously silent)

- Pass a descriptive Exception (not a generic "Plugin execution failed") to
  health_tracker.record_failure() in the timeout/executor-error path

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 14:53:23 -04:00
Chuck
d0969ad57a fix(plugin_manager): prevent permanent ERROR state after update timeout
When execute_update() fails (timeout or unhandled exception), the plugin
state was set to ERROR with no recovery path. can_execute() returns False
for ERROR state, so the plugin's update() was never called again, leaving
it showing stale data indefinitely.

Instead, update plugin_last_update so the plugin waits one configured
interval before retrying, and keep the state ENABLED so recovery is
automatic on the next cycle.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 09:35:38 -04:00
2 changed files with 46 additions and 9 deletions

View File

@@ -734,14 +734,35 @@ class PluginManager:
if self.health_tracker:
self.health_tracker.record_success(plugin_id)
else:
# Execution failed (timeout or error)
self.state_manager.set_state(plugin_id, PluginState.ERROR)
# Execution failed (timeout or executor error) — stamp with the
# actual failure time (not current_time captured before execution)
# so the full interval elapses before the next retry.
failure_time = time.time()
err = Exception(f"Plugin {plugin_id} execution failed (timeout or executor error)")
error_info = {
'error': str(err),
'error_type': 'ExecutionFailure',
'timestamp': failure_time,
'recoverable': True,
}
self.logger.warning("Plugin %s update() failed; will retry after interval", plugin_id)
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state(plugin_id, PluginState.ENABLED)
self.state_manager.set_error_info(plugin_id, error_info)
if self.health_tracker:
self.health_tracker.record_failure(plugin_id, Exception("Plugin execution failed"))
self.health_tracker.record_failure(plugin_id, err)
except Exception as exc: # pylint: disable=broad-except
failure_time = time.time()
self.logger.exception("Error updating plugin %s: %s", plugin_id, exc)
self.state_manager.set_state(plugin_id, PluginState.ERROR, error=exc)
# Record failure
error_info = {
'error': str(exc),
'error_type': type(exc).__name__,
'timestamp': failure_time,
'recoverable': True,
}
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state(plugin_id, PluginState.ENABLED)
self.state_manager.set_error_info(plugin_id, error_info)
if self.health_tracker:
self.health_tracker.record_failure(plugin_id, exc)

View File

@@ -136,13 +136,29 @@ class PluginStateManager:
"""
return self._state_history.get(plugin_id, [])
def get_error_info(self, plugin_id: str) -> Optional[Dict[str, Any]]:
def set_error_info(self, plugin_id: str, error_info: Dict[str, Any]) -> None:
"""
Get error information for a plugin in ERROR state.
Persist structured error context without changing plugin state.
Used for recoverable failures (e.g. update timeout) where the plugin
stays ENABLED but the error details should remain queryable.
Args:
plugin_id: Plugin identifier
error_info: Arbitrary dict describing the error
"""
self._error_info[plugin_id] = error_info
def get_error_info(self, plugin_id: str) -> Optional[Dict[str, Any]]:
"""
Get error information for a plugin.
Returns the stored error dict whether the plugin is in ERROR state or
still ENABLED after a recoverable failure.
Args:
plugin_id: Plugin identifier
Returns:
Error information dict or None
"""