From 09c79409863d121b4ddf794060d8c2971ab625fc Mon Sep 17 00:00:00 2001 From: Chuck Date: Wed, 29 Apr 2026 09:31:04 -0400 Subject: [PATCH] fix(plugin_manager): atomic state+error write via set_state_with_error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two-step set_state() / set_error_info() sequence left a window where readers could observe ENABLED state without the accompanying error context. Add threading.RLock to PluginStateManager and a new set_state_with_error() method that holds the lock for both the state-transition write and the _error_info write together. The method inlines the state-transition logic rather than calling set_state() internally to intentionally skip the "clear _error_info for non-ERROR states" side effect — the recoverable error dict is exactly what we want stored. Replace both paired set_state / set_error_info call sites in run_scheduled_updates() with the single atomic method. Co-Authored-By: Claude Sonnet 4.6 --- src/plugin_system/plugin_manager.py | 6 ++-- src/plugin_system/plugin_state.py | 47 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/plugin_system/plugin_manager.py b/src/plugin_system/plugin_manager.py index 07764dcb..7f84178a 100644 --- a/src/plugin_system/plugin_manager.py +++ b/src/plugin_system/plugin_manager.py @@ -747,8 +747,7 @@ class PluginManager: } self.logger.warning("Plugin %s update() failed; will retry after interval", plugin_id) self.plugin_last_update[plugin_id] = failure_time - self.state_manager.set_state(plugin_id, PluginState.ENABLED) - self.state_manager.set_error_info(plugin_id, error_info) + self.state_manager.set_state_with_error(plugin_id, PluginState.ENABLED, error_info, error=err) if self.health_tracker: self.health_tracker.record_failure(plugin_id, err) except Exception as exc: # pylint: disable=broad-except @@ -761,8 +760,7 @@ class PluginManager: 'recoverable': True, } self.plugin_last_update[plugin_id] = failure_time - self.state_manager.set_state(plugin_id, PluginState.ENABLED) - self.state_manager.set_error_info(plugin_id, error_info) + self.state_manager.set_state_with_error(plugin_id, PluginState.ENABLED, error_info, error=exc) if self.health_tracker: self.health_tracker.record_failure(plugin_id, exc) diff --git a/src/plugin_system/plugin_state.py b/src/plugin_system/plugin_state.py index 1351c933..1d595d26 100644 --- a/src/plugin_system/plugin_state.py +++ b/src/plugin_system/plugin_state.py @@ -5,6 +5,7 @@ Manages plugin state machine (loaded → enabled → running → error) with state transitions and queries. """ +import threading from enum import Enum from typing import Optional, Dict, Any from datetime import datetime @@ -34,6 +35,7 @@ class PluginStateManager: logger: Optional logger instance """ self.logger = logger or get_logger(__name__) + self._lock = threading.RLock() self._states: Dict[str, PluginState] = {} self._state_history: Dict[str, list] = {} self._error_info: Dict[str, Dict[str, Any]] = {} @@ -149,6 +151,51 @@ class PluginStateManager: """ self._error_info[plugin_id] = error_info + def set_state_with_error( + self, + plugin_id: str, + state: PluginState, + error_info: Dict[str, Any], + error: Optional[Exception] = None, + ) -> None: + """Set plugin state and persist error context atomically. + + Unlike calling set_state() then set_error_info() separately, this + method holds ``_lock`` for both writes so no reader can observe the + new state without the accompanying error context. + + Intentionally does not clear ``_error_info`` the way set_state() does + for non-ERROR transitions — this is the recoverable-failure path where + the error dict is the entire point. + + Args: + plugin_id: Plugin identifier + state: New state + error_info: Structured error dict to persist alongside the state + error: Optional exception recorded in the transition history + """ + with self._lock: + old_state = self._states.get(plugin_id, PluginState.UNLOADED) + self._states[plugin_id] = state + + if plugin_id not in self._state_history: + self._state_history[plugin_id] = [] + self._state_history[plugin_id].append({ + 'timestamp': datetime.now(), + 'from': old_state.value, + 'to': state.value, + 'error': str(error) if error else None, + }) + + self._error_info[plugin_id] = error_info + + self.logger.debug( + "Plugin %s state transition: %s → %s (recoverable error stored)", + plugin_id, + old_state.value, + state.value, + ) + def get_error_info(self, plugin_id: str) -> Optional[Dict[str, Any]]: """ Get error information for a plugin.