2 Commits

Author SHA1 Message Date
Chuck
37566d93ac fix(plugin_manager): address PR review — failure timestamp and error context
- Use time.time() at the point of failure instead of reusing current_time
  (captured before execution), so the full retry interval always elapses
  after a timeout rather than one execution-duration shorter

- Add PluginStateManager.set_error_info() to persist structured error context
  without changing plugin state; call it in both failure branches so
  get_error_info() / get_state_info() surface recoverable errors alongside
  ERROR-state errors

- Add warning log on the success=False branch (was previously silent)

- Pass a descriptive Exception (not a generic "Plugin execution failed") to
  health_tracker.record_failure() in the timeout/executor-error path

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 14:53:23 -04:00
Chuck
d0969ad57a fix(plugin_manager): prevent permanent ERROR state after update timeout
When execute_update() fails (timeout or unhandled exception), the plugin
state was set to ERROR with no recovery path. can_execute() returns False
for ERROR state, so the plugin's update() was never called again, leaving
it showing stale data indefinitely.

Instead, update plugin_last_update so the plugin waits one configured
interval before retrying, and keep the state ENABLED so recovery is
automatic on the next cycle.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 09:35:38 -04:00
2 changed files with 46 additions and 9 deletions

View File

@@ -734,14 +734,35 @@ class PluginManager:
if self.health_tracker:
self.health_tracker.record_success(plugin_id)
else:
# Execution failed (timeout or error)
self.state_manager.set_state(plugin_id, PluginState.ERROR)
# Execution failed (timeout or executor error) — stamp with the
# actual failure time (not current_time captured before execution)
# so the full interval elapses before the next retry.
failure_time = time.time()
err = Exception(f"Plugin {plugin_id} execution failed (timeout or executor error)")
error_info = {
'error': str(err),
'error_type': 'ExecutionFailure',
'timestamp': failure_time,
'recoverable': True,
}
self.logger.warning("Plugin %s update() failed; will retry after interval", plugin_id)
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state(plugin_id, PluginState.ENABLED)
self.state_manager.set_error_info(plugin_id, error_info)
if self.health_tracker:
self.health_tracker.record_failure(plugin_id, Exception("Plugin execution failed"))
self.health_tracker.record_failure(plugin_id, err)
except Exception as exc: # pylint: disable=broad-except
failure_time = time.time()
self.logger.exception("Error updating plugin %s: %s", plugin_id, exc)
self.state_manager.set_state(plugin_id, PluginState.ERROR, error=exc)
# Record failure
error_info = {
'error': str(exc),
'error_type': type(exc).__name__,
'timestamp': failure_time,
'recoverable': True,
}
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state(plugin_id, PluginState.ENABLED)
self.state_manager.set_error_info(plugin_id, error_info)
if self.health_tracker:
self.health_tracker.record_failure(plugin_id, exc)

View File

@@ -136,9 +136,25 @@ class PluginStateManager:
"""
return self._state_history.get(plugin_id, [])
def set_error_info(self, plugin_id: str, error_info: Dict[str, Any]) -> None:
"""
Persist structured error context without changing plugin state.
Used for recoverable failures (e.g. update timeout) where the plugin
stays ENABLED but the error details should remain queryable.
Args:
plugin_id: Plugin identifier
error_info: Arbitrary dict describing the error
"""
self._error_info[plugin_id] = error_info
def get_error_info(self, plugin_id: str) -> Optional[Dict[str, Any]]:
"""
Get error information for a plugin in ERROR state.
Get error information for a plugin.
Returns the stored error dict whether the plugin is in ERROR state or
still ENABLED after a recoverable failure.
Args:
plugin_id: Plugin identifier