diff --git a/src/plugin_system/state_reconciliation.py b/src/plugin_system/state_reconciliation.py index ca381cfd..f6a6ce46 100644 --- a/src/plugin_system/state_reconciliation.py +++ b/src/plugin_system/state_reconciliation.py @@ -86,16 +86,38 @@ class StateReconciliation: self.plugins_dir = Path(plugins_dir) self.store_manager = store_manager self.logger = get_logger(__name__) + + # Plugin IDs that failed auto-repair and should NOT be retried this + # process lifetime. Prevents the infinite "attempt to reinstall missing + # plugin" loop when a config entry references a plugin that isn't in + # the registry (e.g. legacy 'github', 'youtube' entries). A process + # restart — or an explicit user-initiated reconcile with force=True — + # clears this so recovery is possible after the underlying issue is + # fixed. + self._unrecoverable_missing_on_disk: set = set() - def reconcile_state(self) -> ReconciliationResult: + def reconcile_state(self, force: bool = False) -> ReconciliationResult: """ Perform state reconciliation. - + Compares state from all sources and fixes safe inconsistencies. - + + Args: + force: If True, clear the unrecoverable-plugin cache before + reconciling so previously-failed auto-repairs are retried. + Intended for user-initiated reconcile requests after the + underlying issue (e.g. registry update) has been fixed. + Returns: ReconciliationResult with findings and fixes """ + if force and self._unrecoverable_missing_on_disk: + self.logger.info( + "Force reconcile requested; clearing %d cached unrecoverable plugin(s)", + len(self._unrecoverable_missing_on_disk), + ) + self._unrecoverable_missing_on_disk.clear() + self.logger.info("Starting state reconciliation") inconsistencies = [] @@ -280,7 +302,26 @@ class StateReconciliation: # Check: Plugin in config but not on disk if config.get('exists_in_config') and not disk.get('exists_on_disk'): - can_repair = self.store_manager is not None + # Skip plugins that previously failed auto-repair in this process. + # Re-attempting wastes CPU (network + git clone each request) and + # spams the logs with the same "Plugin not found in registry" + # error. The entry is still surfaced as MANUAL_FIX_REQUIRED so the + # UI can show it, but no auto-repair will run. + previously_unrecoverable = plugin_id in self._unrecoverable_missing_on_disk + # Also refuse to re-install a plugin that the user just uninstalled + # through the UI — prevents a race where the reconciler fires + # between file removal and config cleanup and resurrects the + # plugin the user just deleted. + recently_uninstalled = ( + self.store_manager is not None + and hasattr(self.store_manager, 'was_recently_uninstalled') + and self.store_manager.was_recently_uninstalled(plugin_id) + ) + can_repair = ( + self.store_manager is not None + and not previously_unrecoverable + and not recently_uninstalled + ) inconsistencies.append(Inconsistency( plugin_id=plugin_id, inconsistency_type=InconsistencyType.PLUGIN_MISSING_ON_DISK, @@ -342,7 +383,13 @@ class StateReconciliation: return False def _auto_repair_missing_plugin(self, plugin_id: str) -> bool: - """Attempt to reinstall a missing plugin from the store.""" + """Attempt to reinstall a missing plugin from the store. + + On failure, records plugin_id in ``_unrecoverable_missing_on_disk`` so + subsequent reconciliation passes within this process do not retry and + spam the log / CPU. A process restart (or an explicit ``force=True`` + reconcile) is required to clear the cache. + """ if not self.store_manager: return False @@ -351,6 +398,35 @@ class StateReconciliation: if plugin_id.startswith('ledmatrix-'): candidates.append(plugin_id[len('ledmatrix-'):]) + # Cheap pre-check: is any candidate actually present in the registry + # at all? If not, we know up-front this is unrecoverable and can skip + # the expensive install_plugin path (which does a forced GitHub fetch + # before failing). + registry_has_candidate = False + try: + registry = self.store_manager.fetch_registry() + registry_ids = { + p.get('id') for p in (registry.get('plugins', []) or []) if p.get('id') + } + registry_has_candidate = any(c in registry_ids for c in candidates) + except Exception as e: + # If we can't reach the registry, treat this as transient — don't + # mark unrecoverable, let the next pass try again. + self.logger.warning( + "[AutoRepair] Could not read registry to check %s: %s", plugin_id, e + ) + return False + + if not registry_has_candidate: + self.logger.warning( + "[AutoRepair] %s not present in registry; marking unrecoverable " + "(will not retry this session). Reinstall from the Plugin Store " + "or remove the stale config entry to clear this warning.", + plugin_id, + ) + self._unrecoverable_missing_on_disk.add(plugin_id) + return False + for candidate_id in candidates: try: self.logger.info("[AutoRepair] Attempting to reinstall missing plugin: %s", candidate_id) @@ -366,6 +442,11 @@ class StateReconciliation: except Exception as e: self.logger.error("[AutoRepair] Error reinstalling %s: %s", candidate_id, e, exc_info=True) - self.logger.warning("[AutoRepair] Could not reinstall %s from store", plugin_id) + self.logger.warning( + "[AutoRepair] Could not reinstall %s from store; marking unrecoverable " + "(will not retry this session).", + plugin_id, + ) + self._unrecoverable_missing_on_disk.add(plugin_id) return False diff --git a/src/plugin_system/store_manager.py b/src/plugin_system/store_manager.py index fba7dc36..42cb8176 100644 --- a/src/plugin_system/store_manager.py +++ b/src/plugin_system/store_manager.py @@ -53,7 +53,12 @@ class PluginStoreManager: self.registry_cache_time = None # Timestamp of when registry was cached self.github_cache = {} # Cache for GitHub API responses self.cache_timeout = 3600 # 1 hour cache timeout - self.registry_cache_timeout = 300 # 5 minutes for registry cache + # 15 minutes for registry cache. Long enough that the plugin list + # endpoint on a warm cache never hits the network, short enough that + # new plugins show up within a reasonable window. See also the + # stale-cache fallback in fetch_registry for transient network + # failures. + self.registry_cache_timeout = 900 self.commit_info_cache = {} # Cache for latest commit info: {key: (timestamp, data)} self.commit_cache_timeout = 300 # 5 minutes (same as registry) self.manifest_cache = {} # Cache for GitHub manifest fetches: {key: (timestamp, data)} @@ -62,9 +67,38 @@ class PluginStoreManager: self._token_validation_cache = {} # Cache for token validation results: {token: (is_valid, timestamp, error_message)} self._token_validation_cache_timeout = 300 # 5 minutes cache for token validation + # Per-plugin tombstone timestamps for plugins that were uninstalled + # recently via the UI. Used by the state reconciler to avoid + # resurrecting a plugin the user just deleted when reconciliation + # races against the uninstall operation. Cleared after ``_uninstall_tombstone_ttl``. + self._uninstall_tombstones: Dict[str, float] = {} + self._uninstall_tombstone_ttl = 300 # 5 minutes + + # Cache for _get_local_git_info: {plugin_path_str: (head_mtime, data)} + # Keyed on the mtime of .git/HEAD so we re-run git only when the + # working copy actually moved. Before this cache, every + # /plugins/installed request fired 4 git subprocesses per plugin, + # which pegged the CPU on a Pi4 with a dozen plugins. + self._git_info_cache: Dict[str, tuple] = {} + # Ensure plugins directory exists self.plugins_dir.mkdir(exist_ok=True) + def mark_recently_uninstalled(self, plugin_id: str) -> None: + """Record that ``plugin_id`` was just uninstalled by the user.""" + self._uninstall_tombstones[plugin_id] = time.time() + + def was_recently_uninstalled(self, plugin_id: str) -> bool: + """Return True if ``plugin_id`` has an active uninstall tombstone.""" + ts = self._uninstall_tombstones.get(plugin_id) + if ts is None: + return False + if time.time() - ts > self._uninstall_tombstone_ttl: + # Expired — clean up so the dict doesn't grow unbounded. + self._uninstall_tombstones.pop(plugin_id, None) + return False + return True + def _load_github_token(self) -> Optional[str]: """ Load GitHub API token from config_secrets.json if available. @@ -469,9 +503,16 @@ class PluginStoreManager: return self.registry_cache except requests.RequestException as e: self.logger.error(f"Error fetching registry: {e}") + # Prefer stale cache over an empty list so the plugin list UI + # keeps working on a flaky connection (e.g. Pi on WiFi). + if self.registry_cache: + self.logger.warning("Falling back to stale registry cache") + return self.registry_cache return {"plugins": []} except json.JSONDecodeError as e: self.logger.error(f"Error parsing registry JSON: {e}") + if self.registry_cache: + return self.registry_cache return {"plugins": []} def search_plugins(self, query: str = "", category: str = "", tags: List[str] = None, fetch_commit_info: bool = True, include_saved_repos: bool = True, saved_repositories_manager = None) -> List[Dict]: @@ -1561,11 +1602,29 @@ class PluginStoreManager: return False def _get_local_git_info(self, plugin_path: Path) -> Optional[Dict[str, str]]: - """Return local git branch, commit hash, and commit date if the plugin is a git checkout.""" + """Return local git branch, commit hash, and commit date if the plugin is a git checkout. + + Results are cached keyed on the mtime of ``.git/HEAD`` so repeated + calls (e.g. one per plugin on every ``/plugins/installed`` request) + skip the four ``git`` subprocesses when nothing has changed. + """ git_dir = plugin_path / '.git' if not git_dir.exists(): return None + # mtime-gated cache lookup. + head_file = git_dir / 'HEAD' + cache_key = str(plugin_path) + try: + head_mtime = head_file.stat().st_mtime + except OSError: + head_mtime = None + + if head_mtime is not None: + cached = self._git_info_cache.get(cache_key) + if cached is not None and cached[0] == head_mtime: + return cached[1] + try: sha_result = subprocess.run( ['git', '-C', str(plugin_path), 'rev-parse', 'HEAD'], @@ -1623,6 +1682,8 @@ class PluginStoreManager: result['date_iso'] = commit_date_iso result['date'] = self._iso_to_date(commit_date_iso) + if head_mtime is not None: + self._git_info_cache[cache_key] = (head_mtime, result) return result except subprocess.CalledProcessError as err: self.logger.debug(f"Failed to read git info for {plugin_path.name}: {err}") diff --git a/test/test_store_manager_caches.py b/test/test_store_manager_caches.py new file mode 100644 index 00000000..73ef53ef --- /dev/null +++ b/test/test_store_manager_caches.py @@ -0,0 +1,151 @@ +""" +Tests for the caching and tombstone behaviors added to PluginStoreManager +to fix the plugin-list slowness and the uninstall-resurrection bugs. + +Coverage targets: +- ``mark_recently_uninstalled`` / ``was_recently_uninstalled`` lifecycle and + TTL expiry. +- ``_get_local_git_info`` mtime-gated cache: ``git`` subprocesses only run + when ``.git/HEAD`` mtime changes. +- ``fetch_registry`` stale-cache fallback on network failure. +""" + +import os +import time +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import patch, MagicMock + +from src.plugin_system.store_manager import PluginStoreManager + + +class TestUninstallTombstone(unittest.TestCase): + def setUp(self): + self._tmp = TemporaryDirectory() + self.addCleanup(self._tmp.cleanup) + self.sm = PluginStoreManager(plugins_dir=self._tmp.name) + + def test_unmarked_plugin_is_not_recent(self): + self.assertFalse(self.sm.was_recently_uninstalled("foo")) + + def test_marking_makes_it_recent(self): + self.sm.mark_recently_uninstalled("foo") + self.assertTrue(self.sm.was_recently_uninstalled("foo")) + + def test_tombstone_expires_after_ttl(self): + self.sm._uninstall_tombstone_ttl = 0.05 + self.sm.mark_recently_uninstalled("foo") + self.assertTrue(self.sm.was_recently_uninstalled("foo")) + time.sleep(0.1) + self.assertFalse(self.sm.was_recently_uninstalled("foo")) + # Expired entry should also be pruned from the dict. + self.assertNotIn("foo", self.sm._uninstall_tombstones) + + +class TestGitInfoCache(unittest.TestCase): + def setUp(self): + self._tmp = TemporaryDirectory() + self.addCleanup(self._tmp.cleanup) + self.plugins_dir = Path(self._tmp.name) + self.sm = PluginStoreManager(plugins_dir=str(self.plugins_dir)) + + # Minimal fake git checkout: .git/HEAD needs to exist so the cache + # key (its mtime) is stable, but we mock subprocess so no actual git + # is required. + self.plugin_path = self.plugins_dir / "plg" + (self.plugin_path / ".git").mkdir(parents=True) + (self.plugin_path / ".git" / "HEAD").write_text("ref: refs/heads/main\n") + + def _fake_subprocess_run(self, *args, **kwargs): + # Return different dummy values depending on which git subcommand + # was invoked so the code paths that parse output all succeed. + cmd = args[0] + result = MagicMock() + result.returncode = 0 + if "rev-parse" in cmd and "HEAD" in cmd and "--abbrev-ref" not in cmd: + result.stdout = "abcdef1234567890\n" + elif "--abbrev-ref" in cmd: + result.stdout = "main\n" + elif "config" in cmd: + result.stdout = "https://example.com/repo.git\n" + elif "log" in cmd: + result.stdout = "2026-04-08T12:00:00+00:00\n" + else: + result.stdout = "" + return result + + def test_cache_hits_avoid_subprocess_calls(self): + with patch( + "src.plugin_system.store_manager.subprocess.run", + side_effect=self._fake_subprocess_run, + ) as mock_run: + first = self.sm._get_local_git_info(self.plugin_path) + self.assertIsNotNone(first) + self.assertEqual(first["short_sha"], "abcdef1") + calls_after_first = mock_run.call_count + self.assertEqual(calls_after_first, 4) + + # Second call with unchanged HEAD: zero new subprocess calls. + second = self.sm._get_local_git_info(self.plugin_path) + self.assertEqual(second, first) + self.assertEqual(mock_run.call_count, calls_after_first) + + def test_cache_invalidates_on_head_mtime_change(self): + with patch( + "src.plugin_system.store_manager.subprocess.run", + side_effect=self._fake_subprocess_run, + ) as mock_run: + self.sm._get_local_git_info(self.plugin_path) + calls_after_first = mock_run.call_count + + # Bump mtime on .git/HEAD to simulate a new commit being checked out. + head = self.plugin_path / ".git" / "HEAD" + new_time = head.stat().st_mtime + 10 + os.utime(head, (new_time, new_time)) + + self.sm._get_local_git_info(self.plugin_path) + self.assertEqual(mock_run.call_count, calls_after_first + 4) + + def test_no_git_directory_returns_none(self): + non_git = self.plugins_dir / "no_git" + non_git.mkdir() + self.assertIsNone(self.sm._get_local_git_info(non_git)) + + +class TestRegistryStaleCacheFallback(unittest.TestCase): + def setUp(self): + self._tmp = TemporaryDirectory() + self.addCleanup(self._tmp.cleanup) + self.sm = PluginStoreManager(plugins_dir=self._tmp.name) + + def test_network_failure_returns_stale_cache(self): + # Prime the cache with a known-good registry. + self.sm.registry_cache = {"plugins": [{"id": "cached"}]} + self.sm.registry_cache_time = time.time() - 10_000 # very old + self.sm.registry_cache_timeout = 1 # force re-fetch attempt + + import requests as real_requests + with patch.object( + self.sm, + "_http_get_with_retries", + side_effect=real_requests.RequestException("boom"), + ): + result = self.sm.fetch_registry() + + self.assertEqual(result, {"plugins": [{"id": "cached"}]}) + + def test_network_failure_with_no_cache_returns_empty(self): + self.sm.registry_cache = None + import requests as real_requests + with patch.object( + self.sm, + "_http_get_with_retries", + side_effect=real_requests.RequestException("boom"), + ): + result = self.sm.fetch_registry() + self.assertEqual(result, {"plugins": []}) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/web_interface/test_state_reconciliation.py b/test/web_interface/test_state_reconciliation.py index 538c252f..55d842ea 100644 --- a/test/web_interface/test_state_reconciliation.py +++ b/test/web_interface/test_state_reconciliation.py @@ -342,6 +342,121 @@ class TestStateReconciliation(unittest.TestCase): self.assertEqual(state, {}) +class TestStateReconciliationUnrecoverable(unittest.TestCase): + """Tests for the unrecoverable-plugin cache and force reconcile. + + Regression coverage for the infinite reinstall loop where a config + entry referenced a plugin not present in the registry (e.g. legacy + 'github' / 'youtube' entries). The reconciler used to retry the + install on every HTTP request; it now caches the failure for the + process lifetime and only retries on an explicit ``force=True`` + reconcile call. + """ + + def setUp(self): + self.temp_dir = Path(tempfile.mkdtemp()) + self.plugins_dir = self.temp_dir / "plugins" + self.plugins_dir.mkdir() + + self.state_manager = Mock(spec=PluginStateManager) + self.state_manager.get_all_states.return_value = {} + self.config_manager = Mock() + self.config_manager.load_config.return_value = { + "ghost": {"enabled": True} + } + self.plugin_manager = Mock() + self.plugin_manager.plugin_manifests = {} + self.plugin_manager.plugins = {} + + # Store manager with an empty registry — install_plugin always fails + self.store_manager = Mock() + self.store_manager.fetch_registry.return_value = {"plugins": []} + self.store_manager.install_plugin.return_value = False + self.store_manager.was_recently_uninstalled.return_value = False + + self.reconciler = StateReconciliation( + state_manager=self.state_manager, + config_manager=self.config_manager, + plugin_manager=self.plugin_manager, + plugins_dir=self.plugins_dir, + store_manager=self.store_manager, + ) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_not_in_registry_marks_unrecoverable_without_install(self): + """If the plugin isn't in the registry at all, skip install_plugin.""" + result = self.reconciler.reconcile_state() + + # One inconsistency, unfixable, no install attempt made. + self.assertEqual(len(result.inconsistencies_found), 1) + self.assertEqual(len(result.inconsistencies_fixed), 0) + self.store_manager.install_plugin.assert_not_called() + self.assertIn("ghost", self.reconciler._unrecoverable_missing_on_disk) + + def test_subsequent_reconcile_does_not_retry(self): + """Second reconcile pass must not touch install_plugin or fetch_registry again.""" + self.reconciler.reconcile_state() + self.store_manager.fetch_registry.reset_mock() + self.store_manager.install_plugin.reset_mock() + + result = self.reconciler.reconcile_state() + + # Still one inconsistency, still no install attempt, no new registry fetch + self.assertEqual(len(result.inconsistencies_found), 1) + inc = result.inconsistencies_found[0] + self.assertFalse(inc.can_auto_fix) + self.assertEqual(inc.fix_action, FixAction.MANUAL_FIX_REQUIRED) + self.store_manager.install_plugin.assert_not_called() + self.store_manager.fetch_registry.assert_not_called() + + def test_force_reconcile_clears_unrecoverable_cache(self): + """force=True must re-attempt previously-failed plugins.""" + self.reconciler.reconcile_state() + self.assertIn("ghost", self.reconciler._unrecoverable_missing_on_disk) + + # Now pretend the registry gained the plugin so the pre-check passes + # and install_plugin is actually invoked. + self.store_manager.fetch_registry.return_value = { + "plugins": [{"id": "ghost"}] + } + self.store_manager.install_plugin.return_value = True + self.store_manager.install_plugin.reset_mock() + + # Config still references ghost; disk still missing it — the + # reconciler should re-attempt install now that force=True cleared + # the cache. + result = self.reconciler.reconcile_state(force=True) + + self.store_manager.install_plugin.assert_called_with("ghost") + + def test_registry_unreachable_does_not_mark_unrecoverable(self): + """Transient registry failures should not poison the cache.""" + self.store_manager.fetch_registry.side_effect = Exception("network down") + + result = self.reconciler.reconcile_state() + + self.assertEqual(len(result.inconsistencies_found), 1) + self.assertNotIn("ghost", self.reconciler._unrecoverable_missing_on_disk) + self.store_manager.install_plugin.assert_not_called() + + def test_recently_uninstalled_skips_auto_repair(self): + """A freshly-uninstalled plugin must not be resurrected by the reconciler.""" + self.store_manager.was_recently_uninstalled.return_value = True + self.store_manager.fetch_registry.return_value = { + "plugins": [{"id": "ghost"}] + } + + result = self.reconciler.reconcile_state() + + self.assertEqual(len(result.inconsistencies_found), 1) + inc = result.inconsistencies_found[0] + self.assertFalse(inc.can_auto_fix) + self.assertEqual(inc.fix_action, FixAction.MANUAL_FIX_REQUIRED) + self.store_manager.install_plugin.assert_not_called() + + if __name__ == '__main__': unittest.main() diff --git a/web_interface/app.py b/web_interface/app.py index 66684f86..42568ce0 100644 --- a/web_interface/app.py +++ b/web_interface/app.py @@ -667,8 +667,20 @@ import threading as _threading _reconciliation_lock = _threading.Lock() def _run_startup_reconciliation() -> None: - """Run state reconciliation in background to auto-repair missing plugins.""" - global _reconciliation_done, _reconciliation_started + """Run state reconciliation in background to auto-repair missing plugins. + + Reconciliation runs exactly once per process lifetime, regardless of + whether every inconsistency could be auto-fixed. Previously, a failed + auto-repair (e.g. a config entry referencing a plugin that no longer + exists in the registry) would reset ``_reconciliation_started`` to False, + causing the ``@app.before_request`` hook to re-trigger reconciliation on + every single HTTP request — an infinite install-retry loop that pegged + the CPU and flooded the log. Unresolved issues are now left in place for + the user to address via the UI; the reconciler itself also caches + per-plugin unrecoverable failures internally so repeated reconcile calls + stay cheap. + """ + global _reconciliation_done from src.logging_config import get_logger _logger = get_logger('reconciliation') @@ -684,18 +696,22 @@ def _run_startup_reconciliation() -> None: result = reconciler.reconcile_state() if result.inconsistencies_found: _logger.info("[Reconciliation] %s", result.message) - if result.reconciliation_successful: - if result.inconsistencies_fixed: - plugin_manager.discover_plugins() - _reconciliation_done = True - else: - _logger.warning("[Reconciliation] Finished with unresolved issues, will retry") - with _reconciliation_lock: - _reconciliation_started = False + if result.inconsistencies_fixed: + plugin_manager.discover_plugins() + if not result.reconciliation_successful: + _logger.warning( + "[Reconciliation] Finished with %d unresolved issue(s); " + "will not retry automatically. Use the Plugin Store or the " + "manual 'Reconcile' action to resolve.", + len(result.inconsistencies_manual), + ) except Exception as e: _logger.error("[Reconciliation] Error: %s", e, exc_info=True) - with _reconciliation_lock: - _reconciliation_started = False + finally: + # Always mark done — we do not want an unhandled exception (or an + # unresolved inconsistency) to cause the @before_request hook to + # retrigger reconciliation on every subsequent request. + _reconciliation_done = True # Initialize health monitor and run reconciliation on first request @app.before_request diff --git a/web_interface/blueprints/api_v3.py b/web_interface/blueprints/api_v3.py index 7fe9c7b3..06cfc79e 100644 --- a/web_interface/blueprints/api_v3.py +++ b/web_interface/blueprints/api_v3.py @@ -1714,9 +1714,23 @@ def get_installed_plugins(): import json from pathlib import Path - # Re-discover plugins to ensure we have the latest list - # This handles cases where plugins are added/removed after app startup - api_v3.plugin_manager.discover_plugins() + # Re-discover plugins only if the plugins directory has actually + # changed since our last scan, or if the caller explicitly asked + # for a refresh. The previous unconditional ``discover_plugins()`` + # call (plus a per-plugin manifest re-read) made this endpoint + # O(plugins) in disk I/O on every page refresh, which on an SD-card + # Pi4 with ~15 plugins was pegging the CPU and blocking the UI + # "connecting to display" spinner for minutes. + force_refresh = request.args.get('refresh', '').lower() in ('1', 'true', 'yes') + plugins_dir_path = Path(api_v3.plugin_manager.plugins_dir) + try: + current_mtime = plugins_dir_path.stat().st_mtime if plugins_dir_path.exists() else 0 + except OSError: + current_mtime = 0 + last_mtime = getattr(api_v3, '_installed_plugins_dir_mtime', None) + if force_refresh or last_mtime != current_mtime: + api_v3.plugin_manager.discover_plugins() + api_v3._installed_plugins_dir_mtime = current_mtime # Get all installed plugin info from the plugin manager all_plugin_info = api_v3.plugin_manager.get_all_plugin_info() @@ -1729,17 +1743,10 @@ def get_installed_plugins(): for plugin_info in all_plugin_info: plugin_id = plugin_info.get('id') - # Re-read manifest from disk to ensure we have the latest metadata - manifest_path = Path(api_v3.plugin_manager.plugins_dir) / plugin_id / "manifest.json" - if manifest_path.exists(): - try: - with open(manifest_path, 'r', encoding='utf-8') as f: - fresh_manifest = json.load(f) - # Update plugin_info with fresh manifest data - plugin_info.update(fresh_manifest) - except Exception as e: - # If we can't read the fresh manifest, use the cached one - logger.warning("[PluginStore] Could not read fresh manifest for %s: %s", plugin_id, e) + # Note: we intentionally do NOT re-read manifest.json here. + # discover_plugins() above already reparses manifests on change; + # re-reading on every request added ~1 syscall+json.loads per + # plugin per request for no benefit. # Get enabled status from config (source of truth) # Read from config file first, fall back to plugin instance if config doesn't have the key @@ -2369,14 +2376,25 @@ def reconcile_plugin_state(): from src.plugin_system.state_reconciliation import StateReconciliation + # Pass the store manager so auto-repair of missing-on-disk plugins + # can actually run. Previously this endpoint silently degraded to + # MANUAL_FIX_REQUIRED because store_manager was omitted. reconciler = StateReconciliation( state_manager=api_v3.plugin_state_manager, config_manager=api_v3.config_manager, plugin_manager=api_v3.plugin_manager, - plugins_dir=Path(api_v3.plugin_manager.plugins_dir) + plugins_dir=Path(api_v3.plugin_manager.plugins_dir), + store_manager=api_v3.plugin_store_manager, ) - result = reconciler.reconcile_state() + # Allow the caller to force a retry of previously-unrecoverable + # plugins (e.g. after the registry has been updated or a typo fixed). + force = False + if request.is_json: + payload = request.get_json(silent=True) or {} + force = bool(payload.get('force', False)) + + result = reconciler.reconcile_state(force=force) return success_response( data={ @@ -2822,6 +2840,22 @@ def uninstall_plugin(): if api_v3.operation_queue: def uninstall_callback(operation): """Callback to execute plugin uninstallation.""" + # Drop a tombstone *first* so a background reconciliation pass + # that interleaves with us cannot see the transient + # "config-entry-with-no-files" state and resurrect the + # plugin the user just asked to delete. + if hasattr(api_v3.plugin_store_manager, 'mark_recently_uninstalled'): + api_v3.plugin_store_manager.mark_recently_uninstalled(plugin_id) + + # Clean up plugin configuration BEFORE removing files so the + # config entry and the on-disk files disappear together from + # the reconciler's point of view. + if not preserve_config: + try: + api_v3.config_manager.cleanup_plugin_config(plugin_id, remove_secrets=True) + except Exception as cleanup_err: + logger.warning("[PluginUninstall] Failed to cleanup config for %s: %s", plugin_id, cleanup_err) + # Unload the plugin first if it's loaded if api_v3.plugin_manager and plugin_id in api_v3.plugin_manager.plugins: api_v3.plugin_manager.unload_plugin(plugin_id) @@ -2844,13 +2878,6 @@ def uninstall_plugin(): if api_v3.schema_manager: api_v3.schema_manager.invalidate_cache(plugin_id) - # Clean up plugin configuration if not preserving - if not preserve_config: - try: - api_v3.config_manager.cleanup_plugin_config(plugin_id, remove_secrets=True) - except Exception as cleanup_err: - logger.warning("[PluginUninstall] Failed to cleanup config for %s: %s", plugin_id, cleanup_err) - # Remove from state manager if api_v3.plugin_state_manager: api_v3.plugin_state_manager.remove_plugin_state(plugin_id) @@ -2879,6 +2906,18 @@ def uninstall_plugin(): ) else: # Fallback to direct uninstall + # Tombstone + config cleanup happen BEFORE file removal — see + # queue path above for the full rationale (prevents reconciler + # resurrection race). + if hasattr(api_v3.plugin_store_manager, 'mark_recently_uninstalled'): + api_v3.plugin_store_manager.mark_recently_uninstalled(plugin_id) + + if not preserve_config: + try: + api_v3.config_manager.cleanup_plugin_config(plugin_id, remove_secrets=True) + except Exception as cleanup_err: + logger.warning("[PluginUninstall] Failed to cleanup config for %s: %s", plugin_id, cleanup_err) + # Unload the plugin first if it's loaded if api_v3.plugin_manager and plugin_id in api_v3.plugin_manager.plugins: api_v3.plugin_manager.unload_plugin(plugin_id) @@ -2891,13 +2930,6 @@ def uninstall_plugin(): if api_v3.schema_manager: api_v3.schema_manager.invalidate_cache(plugin_id) - # Clean up plugin configuration if not preserving - if not preserve_config: - try: - api_v3.config_manager.cleanup_plugin_config(plugin_id, remove_secrets=True) - except Exception as cleanup_err: - logger.warning("[PluginUninstall] Failed to cleanup config for %s: %s", plugin_id, cleanup_err) - # Remove from state manager if api_v3.plugin_state_manager: api_v3.plugin_state_manager.remove_plugin_state(plugin_id)