fix(plugin_manager): address PR review — failure timestamp and error context

- Use time.time() at the point of failure instead of reusing current_time (captured before execution), so the full retry interval always elapses after a timeout rather than one execution-duration shorter - Add PluginStateManager.set_error_info() to persist structured error context without changing plugin state; call it in both failure branches so get_error_info() / get_state_info() surface recoverable errors alongside ERROR-state errors - Add warning log on the success=False branch (was previously silent) - Pass a descriptive Exception (not a generic "Plugin execution failed") to health_tracker.record_failure() in the timeout/executor-error path Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix(plugin_manager): prevent permanent ERROR state after update timeout
2026-04-29 12:03:00 +00:00 · 2026-04-28 14:53:23 -04:00 · 2026-04-28 09:35:38 -04:00
2 changed files with 46 additions and 9 deletions
--- a/src/plugin_system/plugin_manager.py
+++ b/src/plugin_system/plugin_manager.py
@@ -734,14 +734,35 @@ class PluginManager:
                        if self.health_tracker:
                            self.health_tracker.record_success(plugin_id)
                    else:
-                        # Execution failed (timeout or error)
-                        self.state_manager.set_state(plugin_id, PluginState.ERROR)
+                        # Execution failed (timeout or executor error) — stamp with the
+                        # actual failure time (not current_time captured before execution)
+                        # so the full interval elapses before the next retry.
+                        failure_time = time.time()
+                        err = Exception(f"Plugin {plugin_id} execution failed (timeout or executor error)")
+                        error_info = {
+                            'error': str(err),
+                            'error_type': 'ExecutionFailure',
+                            'timestamp': failure_time,
+                            'recoverable': True,
+                        }
+                        self.logger.warning("Plugin %s update() failed; will retry after interval", plugin_id)
+                        self.plugin_last_update[plugin_id] = failure_time
+                        self.state_manager.set_state(plugin_id, PluginState.ENABLED)
+                        self.state_manager.set_error_info(plugin_id, error_info)
                        if self.health_tracker:
-                            self.health_tracker.record_failure(plugin_id, Exception("Plugin execution failed"))
+                            self.health_tracker.record_failure(plugin_id, err)
                except Exception as exc:  # pylint: disable=broad-except
+                    failure_time = time.time()
                    self.logger.exception("Error updating plugin %s: %s", plugin_id, exc)
-                    self.state_manager.set_state(plugin_id, PluginState.ERROR, error=exc)
-                    # Record failure
+                    error_info = {
+                        'error': str(exc),
+                        'error_type': type(exc).__name__,
+                        'timestamp': failure_time,
+                        'recoverable': True,
+                    }
+                    self.plugin_last_update[plugin_id] = failure_time
+                    self.state_manager.set_state(plugin_id, PluginState.ENABLED)
+                    self.state_manager.set_error_info(plugin_id, error_info)
                    if self.health_tracker:
                        self.health_tracker.record_failure(plugin_id, exc)

--- a/src/plugin_system/plugin_state.py
+++ b/src/plugin_system/plugin_state.py
@@ -136,9 +136,25 @@ class PluginStateManager:
        """
        return self._state_history.get(plugin_id, [])
    
+    def set_error_info(self, plugin_id: str, error_info: Dict[str, Any]) -> None:
+        """
+        Persist structured error context without changing plugin state.
+
+        Used for recoverable failures (e.g. update timeout) where the plugin
+        stays ENABLED but the error details should remain queryable.
+
+        Args:
+            plugin_id: Plugin identifier
+            error_info: Arbitrary dict describing the error
+        """
+        self._error_info[plugin_id] = error_info
+
    def get_error_info(self, plugin_id: str) -> Optional[Dict[str, Any]]:
        """
-        Get error information for a plugin in ERROR state.
+        Get error information for a plugin.
+
+        Returns the stored error dict whether the plugin is in ERROR state or
+        still ENABLED after a recoverable failure.

        Args:
            plugin_id: Plugin identifier