fix(plugins): stop reconciliation install loop, slow plugin list, and uninstall resurrection

Three interacting bugs reported by a user (Discord/ericepe) on a fresh install:

1. The state reconciler retried failed auto-repairs on every HTTP request,
   pegging CPU and flooding logs with "Plugin not found in registry: github
   / youtube". Root cause: ``_run_startup_reconciliation`` reset
   ``_reconciliation_started`` to False on any unresolved inconsistency, so
   ``@app.before_request`` re-fired the entire pass on the next request.
   Fix: run reconciliation exactly once per process; cache per-plugin
   unrecoverable failures inside the reconciler so even an explicit
   re-trigger stays cheap; add a registry pre-check to skip the expensive
   GitHub fetch when we already know the plugin is missing; expose
   ``force=True`` on ``/plugins/state/reconcile`` so users can retry after
   fixing the underlying issue.

2. Uninstalling a plugin via the UI succeeded but the plugin reappeared.
   Root cause: a race between ``store_manager.uninstall_plugin`` (removes
   files) and ``cleanup_plugin_config`` (removes config entry) — if
   reconciliation fired in the gap it saw "config entry with no files" and
   reinstalled. Fix: reorder uninstall to clean config FIRST, drop a
   short-lived "recently uninstalled" tombstone on the store manager that
   the reconciler honors, and pass ``store_manager`` to the manual
   ``/plugins/state/reconcile`` endpoint (it was previously omitted, which
   silently disabled auto-repair entirely).

3. ``GET /plugins/installed`` was very slow on a Pi4 (UI hung on
   "connecting to display" for minutes, ~98% CPU). Root causes: per-request
   ``discover_plugins()`` + manifest re-read + four ``git`` subprocesses per
   plugin (``rev-parse``, ``--abbrev-ref``, ``config``, ``log``). Fix:
   mtime-gate ``discover_plugins()`` and drop the per-plugin manifest
   re-read in the endpoint; cache ``_get_local_git_info`` keyed on
   ``.git/HEAD`` mtime so subprocesses only run when the working copy
   actually moved; bump registry cache TTL from 5 to 15 minutes and fall
   back to stale cache on transient network failure.

Tests: 16 reconciliation cases (including 5 new ones covering the
unrecoverable cache, force-reconcile path, transient-failure handling, and
recently-uninstalled tombstone) and 8 new store_manager cache tests
covering tombstone TTL, git-info mtime cache hit/miss, and the registry
stale-cache fallback. All 24 pass; the broader 288-test suite continues to
pass with no new failures.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Chuck
2026-04-08 09:46:55 -04:00
parent 781224591f
commit b031b99130
6 changed files with 506 additions and 50 deletions

View File

@@ -342,6 +342,121 @@ class TestStateReconciliation(unittest.TestCase):
self.assertEqual(state, {})
class TestStateReconciliationUnrecoverable(unittest.TestCase):
"""Tests for the unrecoverable-plugin cache and force reconcile.
Regression coverage for the infinite reinstall loop where a config
entry referenced a plugin not present in the registry (e.g. legacy
'github' / 'youtube' entries). The reconciler used to retry the
install on every HTTP request; it now caches the failure for the
process lifetime and only retries on an explicit ``force=True``
reconcile call.
"""
def setUp(self):
self.temp_dir = Path(tempfile.mkdtemp())
self.plugins_dir = self.temp_dir / "plugins"
self.plugins_dir.mkdir()
self.state_manager = Mock(spec=PluginStateManager)
self.state_manager.get_all_states.return_value = {}
self.config_manager = Mock()
self.config_manager.load_config.return_value = {
"ghost": {"enabled": True}
}
self.plugin_manager = Mock()
self.plugin_manager.plugin_manifests = {}
self.plugin_manager.plugins = {}
# Store manager with an empty registry — install_plugin always fails
self.store_manager = Mock()
self.store_manager.fetch_registry.return_value = {"plugins": []}
self.store_manager.install_plugin.return_value = False
self.store_manager.was_recently_uninstalled.return_value = False
self.reconciler = StateReconciliation(
state_manager=self.state_manager,
config_manager=self.config_manager,
plugin_manager=self.plugin_manager,
plugins_dir=self.plugins_dir,
store_manager=self.store_manager,
)
def tearDown(self):
shutil.rmtree(self.temp_dir)
def test_not_in_registry_marks_unrecoverable_without_install(self):
"""If the plugin isn't in the registry at all, skip install_plugin."""
result = self.reconciler.reconcile_state()
# One inconsistency, unfixable, no install attempt made.
self.assertEqual(len(result.inconsistencies_found), 1)
self.assertEqual(len(result.inconsistencies_fixed), 0)
self.store_manager.install_plugin.assert_not_called()
self.assertIn("ghost", self.reconciler._unrecoverable_missing_on_disk)
def test_subsequent_reconcile_does_not_retry(self):
"""Second reconcile pass must not touch install_plugin or fetch_registry again."""
self.reconciler.reconcile_state()
self.store_manager.fetch_registry.reset_mock()
self.store_manager.install_plugin.reset_mock()
result = self.reconciler.reconcile_state()
# Still one inconsistency, still no install attempt, no new registry fetch
self.assertEqual(len(result.inconsistencies_found), 1)
inc = result.inconsistencies_found[0]
self.assertFalse(inc.can_auto_fix)
self.assertEqual(inc.fix_action, FixAction.MANUAL_FIX_REQUIRED)
self.store_manager.install_plugin.assert_not_called()
self.store_manager.fetch_registry.assert_not_called()
def test_force_reconcile_clears_unrecoverable_cache(self):
"""force=True must re-attempt previously-failed plugins."""
self.reconciler.reconcile_state()
self.assertIn("ghost", self.reconciler._unrecoverable_missing_on_disk)
# Now pretend the registry gained the plugin so the pre-check passes
# and install_plugin is actually invoked.
self.store_manager.fetch_registry.return_value = {
"plugins": [{"id": "ghost"}]
}
self.store_manager.install_plugin.return_value = True
self.store_manager.install_plugin.reset_mock()
# Config still references ghost; disk still missing it — the
# reconciler should re-attempt install now that force=True cleared
# the cache.
result = self.reconciler.reconcile_state(force=True)
self.store_manager.install_plugin.assert_called_with("ghost")
def test_registry_unreachable_does_not_mark_unrecoverable(self):
"""Transient registry failures should not poison the cache."""
self.store_manager.fetch_registry.side_effect = Exception("network down")
result = self.reconciler.reconcile_state()
self.assertEqual(len(result.inconsistencies_found), 1)
self.assertNotIn("ghost", self.reconciler._unrecoverable_missing_on_disk)
self.store_manager.install_plugin.assert_not_called()
def test_recently_uninstalled_skips_auto_repair(self):
"""A freshly-uninstalled plugin must not be resurrected by the reconciler."""
self.store_manager.was_recently_uninstalled.return_value = True
self.store_manager.fetch_registry.return_value = {
"plugins": [{"id": "ghost"}]
}
result = self.reconciler.reconcile_state()
self.assertEqual(len(result.inconsistencies_found), 1)
inc = result.inconsistencies_found[0]
self.assertFalse(inc.can_auto_fix)
self.assertEqual(inc.fix_action, FixAction.MANUAL_FIX_REQUIRED)
self.store_manager.install_plugin.assert_not_called()
if __name__ == '__main__':
unittest.main()