fix(plugin_manager): address PR review — failure timestamp and error context

- Use time.time() at the point of failure instead of reusing current_time
  (captured before execution), so the full retry interval always elapses
  after a timeout rather than one execution-duration shorter

- Add PluginStateManager.set_error_info() to persist structured error context
  without changing plugin state; call it in both failure branches so
  get_error_info() / get_state_info() surface recoverable errors alongside
  ERROR-state errors

- Add warning log on the success=False branch (was previously silent)

- Pass a descriptive Exception (not a generic "Plugin execution failed") to
  health_tracker.record_failure() in the timeout/executor-error path

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Chuck
2026-04-28 14:53:23 -04:00
parent d0969ad57a
commit 37566d93ac
2 changed files with 44 additions and 11 deletions

View File

@@ -734,18 +734,35 @@ class PluginManager:
if self.health_tracker: if self.health_tracker:
self.health_tracker.record_success(plugin_id) self.health_tracker.record_success(plugin_id)
else: else:
# Execution failed (timeout or error) — update timestamp so the # Execution failed (timeout or executor error) — stamp with the
# plugin waits one full interval before retrying, but keep state # actual failure time (not current_time captured before execution)
# ENABLED so can_execute() returns True and recovery is automatic. # so the full interval elapses before the next retry.
self.plugin_last_update[plugin_id] = current_time failure_time = time.time()
err = Exception(f"Plugin {plugin_id} execution failed (timeout or executor error)")
error_info = {
'error': str(err),
'error_type': 'ExecutionFailure',
'timestamp': failure_time,
'recoverable': True,
}
self.logger.warning("Plugin %s update() failed; will retry after interval", plugin_id)
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state(plugin_id, PluginState.ENABLED) self.state_manager.set_state(plugin_id, PluginState.ENABLED)
self.state_manager.set_error_info(plugin_id, error_info)
if self.health_tracker: if self.health_tracker:
self.health_tracker.record_failure(plugin_id, Exception("Plugin execution failed")) self.health_tracker.record_failure(plugin_id, err)
except Exception as exc: # pylint: disable=broad-except except Exception as exc: # pylint: disable=broad-except
failure_time = time.time()
self.logger.exception("Error updating plugin %s: %s", plugin_id, exc) self.logger.exception("Error updating plugin %s: %s", plugin_id, exc)
# Same as the failure path above: stay ENABLED and wait one interval. error_info = {
self.plugin_last_update[plugin_id] = current_time 'error': str(exc),
'error_type': type(exc).__name__,
'timestamp': failure_time,
'recoverable': True,
}
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state(plugin_id, PluginState.ENABLED) self.state_manager.set_state(plugin_id, PluginState.ENABLED)
self.state_manager.set_error_info(plugin_id, error_info)
if self.health_tracker: if self.health_tracker:
self.health_tracker.record_failure(plugin_id, exc) self.health_tracker.record_failure(plugin_id, exc)

View File

@@ -136,13 +136,29 @@ class PluginStateManager:
""" """
return self._state_history.get(plugin_id, []) return self._state_history.get(plugin_id, [])
def get_error_info(self, plugin_id: str) -> Optional[Dict[str, Any]]: def set_error_info(self, plugin_id: str, error_info: Dict[str, Any]) -> None:
""" """
Get error information for a plugin in ERROR state. Persist structured error context without changing plugin state.
Used for recoverable failures (e.g. update timeout) where the plugin
stays ENABLED but the error details should remain queryable.
Args: Args:
plugin_id: Plugin identifier plugin_id: Plugin identifier
error_info: Arbitrary dict describing the error
"""
self._error_info[plugin_id] = error_info
def get_error_info(self, plugin_id: str) -> Optional[Dict[str, Any]]:
"""
Get error information for a plugin.
Returns the stored error dict whether the plugin is in ERROR state or
still ENABLED after a recoverable failure.
Args:
plugin_id: Plugin identifier
Returns: Returns:
Error information dict or None Error information dict or None
""" """