fix(plugin_manager): apply recovery logic to update_all_plugins; extract helper

update_all_plugins still set PluginState.ERROR on both failure paths, leaving
it inconsistent with the run_scheduled_updates fix from the same PR.

Extract _record_update_failure(plugin_id, exc=None) to hold all shared failure
logic: capture actual failure time, build structured error_info, log the retry
warning, stamp plugin_last_update, call set_state_with_error(ENABLED), and
forward to health_tracker. Replace all four failure sites (two in
run_scheduled_updates, two in update_all_plugins) with calls to this helper.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Chuck
2026-04-29 14:53:23 -04:00
parent 7b50a2db59
commit 54b131c9f4

View File

@@ -677,6 +677,44 @@ class PluginManager:
# Default: 60 seconds # Default: 60 seconds
return 60.0 return 60.0
def _record_update_failure(
self,
plugin_id: str,
exc: Optional[Exception] = None,
) -> None:
"""Apply the standard failure-recovery path for a plugin update.
Stamps plugin_last_update with the actual failure time so the full
configured interval elapses before the next retry, then transitions
the plugin back to ENABLED (not ERROR) with structured error context
so automatic recovery happens on the next scheduled cycle.
Args:
plugin_id: Plugin identifier
exc: The exception that caused the failure, if any. When None a
synthetic ExecutionFailure exception is constructed from the
timeout/executor-error path.
"""
failure_time = time.time()
if exc is not None:
err: Exception = exc
error_type = type(exc).__name__
else:
err = Exception(f"Plugin {plugin_id} execution failed (timeout or executor error)")
error_type = 'ExecutionFailure'
error_info = {
'error': str(err),
'error_type': error_type,
'timestamp': failure_time,
'recoverable': True,
}
self.logger.warning("Plugin %s update() failed; will retry after interval", plugin_id)
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state_with_error(plugin_id, PluginState.ENABLED, error_info, error=err)
if self.health_tracker:
self.health_tracker.record_failure(plugin_id, err)
def run_scheduled_updates(self, current_time: Optional[float] = None) -> None: def run_scheduled_updates(self, current_time: Optional[float] = None) -> None:
""" """
Trigger plugin updates based on their defined update intervals. Trigger plugin updates based on their defined update intervals.
@@ -734,35 +772,10 @@ class PluginManager:
if self.health_tracker: if self.health_tracker:
self.health_tracker.record_success(plugin_id) self.health_tracker.record_success(plugin_id)
else: else:
# Execution failed (timeout or executor error) — stamp with the self._record_update_failure(plugin_id)
# actual failure time (not current_time captured before execution)
# so the full interval elapses before the next retry.
failure_time = time.time()
err = Exception(f"Plugin {plugin_id} execution failed (timeout or executor error)")
error_info = {
'error': str(err),
'error_type': 'ExecutionFailure',
'timestamp': failure_time,
'recoverable': True,
}
self.logger.warning("Plugin %s update() failed; will retry after interval", plugin_id)
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state_with_error(plugin_id, PluginState.ENABLED, error_info, error=err)
if self.health_tracker:
self.health_tracker.record_failure(plugin_id, err)
except Exception as exc: # pylint: disable=broad-except except Exception as exc: # pylint: disable=broad-except
failure_time = time.time()
self.logger.exception("Error updating plugin %s: %s", plugin_id, exc) self.logger.exception("Error updating plugin %s: %s", plugin_id, exc)
error_info = { self._record_update_failure(plugin_id, exc=exc)
'error': str(exc),
'error_type': type(exc).__name__,
'timestamp': failure_time,
'recoverable': True,
}
self.plugin_last_update[plugin_id] = failure_time
self.state_manager.set_state_with_error(plugin_id, PluginState.ENABLED, error_info, error=exc)
if self.health_tracker:
self.health_tracker.record_failure(plugin_id, exc)
def update_all_plugins(self) -> None: def update_all_plugins(self) -> None:
""" """
@@ -788,14 +801,12 @@ class PluginManager:
if success: if success:
self.plugin_last_update[plugin_id] = time.time() self.plugin_last_update[plugin_id] = time.time()
self.state_manager.record_update(plugin_id) self.state_manager.record_update(plugin_id)
# Update state back to ENABLED
self.state_manager.set_state(plugin_id, PluginState.ENABLED) self.state_manager.set_state(plugin_id, PluginState.ENABLED)
else: else:
# Execution failed self._record_update_failure(plugin_id)
self.state_manager.set_state(plugin_id, PluginState.ERROR)
except Exception as exc: # pylint: disable=broad-except except Exception as exc: # pylint: disable=broad-except
self.logger.exception("Error updating plugin %s: %s", plugin_id, exc) self.logger.exception("Error updating plugin %s: %s", plugin_id, exc)
self.state_manager.set_state(plugin_id, PluginState.ERROR, error=exc) self._record_update_failure(plugin_id, exc=exc)
def get_plugin_health_metrics(self) -> Dict[str, Any]: def get_plugin_health_metrics(self) -> Dict[str, Any]:
""" """