mirror of
https://github.com/ChuckBuilds/LEDMatrix.git
synced 2026-04-10 21:03:01 +00:00
fix(plugins): stop reconciliation install loop, slow plugin list, and uninstall resurrection
Three interacting bugs reported by a user (Discord/ericepe) on a fresh install: 1. The state reconciler retried failed auto-repairs on every HTTP request, pegging CPU and flooding logs with "Plugin not found in registry: github / youtube". Root cause: ``_run_startup_reconciliation`` reset ``_reconciliation_started`` to False on any unresolved inconsistency, so ``@app.before_request`` re-fired the entire pass on the next request. Fix: run reconciliation exactly once per process; cache per-plugin unrecoverable failures inside the reconciler so even an explicit re-trigger stays cheap; add a registry pre-check to skip the expensive GitHub fetch when we already know the plugin is missing; expose ``force=True`` on ``/plugins/state/reconcile`` so users can retry after fixing the underlying issue. 2. Uninstalling a plugin via the UI succeeded but the plugin reappeared. Root cause: a race between ``store_manager.uninstall_plugin`` (removes files) and ``cleanup_plugin_config`` (removes config entry) — if reconciliation fired in the gap it saw "config entry with no files" and reinstalled. Fix: reorder uninstall to clean config FIRST, drop a short-lived "recently uninstalled" tombstone on the store manager that the reconciler honors, and pass ``store_manager`` to the manual ``/plugins/state/reconcile`` endpoint (it was previously omitted, which silently disabled auto-repair entirely). 3. ``GET /plugins/installed`` was very slow on a Pi4 (UI hung on "connecting to display" for minutes, ~98% CPU). Root causes: per-request ``discover_plugins()`` + manifest re-read + four ``git`` subprocesses per plugin (``rev-parse``, ``--abbrev-ref``, ``config``, ``log``). Fix: mtime-gate ``discover_plugins()`` and drop the per-plugin manifest re-read in the endpoint; cache ``_get_local_git_info`` keyed on ``.git/HEAD`` mtime so subprocesses only run when the working copy actually moved; bump registry cache TTL from 5 to 15 minutes and fall back to stale cache on transient network failure. Tests: 16 reconciliation cases (including 5 new ones covering the unrecoverable cache, force-reconcile path, transient-failure handling, and recently-uninstalled tombstone) and 8 new store_manager cache tests covering tombstone TTL, git-info mtime cache hit/miss, and the registry stale-cache fallback. All 24 pass; the broader 288-test suite continues to pass with no new failures. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -667,8 +667,20 @@ import threading as _threading
|
||||
_reconciliation_lock = _threading.Lock()
|
||||
|
||||
def _run_startup_reconciliation() -> None:
|
||||
"""Run state reconciliation in background to auto-repair missing plugins."""
|
||||
global _reconciliation_done, _reconciliation_started
|
||||
"""Run state reconciliation in background to auto-repair missing plugins.
|
||||
|
||||
Reconciliation runs exactly once per process lifetime, regardless of
|
||||
whether every inconsistency could be auto-fixed. Previously, a failed
|
||||
auto-repair (e.g. a config entry referencing a plugin that no longer
|
||||
exists in the registry) would reset ``_reconciliation_started`` to False,
|
||||
causing the ``@app.before_request`` hook to re-trigger reconciliation on
|
||||
every single HTTP request — an infinite install-retry loop that pegged
|
||||
the CPU and flooded the log. Unresolved issues are now left in place for
|
||||
the user to address via the UI; the reconciler itself also caches
|
||||
per-plugin unrecoverable failures internally so repeated reconcile calls
|
||||
stay cheap.
|
||||
"""
|
||||
global _reconciliation_done
|
||||
from src.logging_config import get_logger
|
||||
_logger = get_logger('reconciliation')
|
||||
|
||||
@@ -684,18 +696,22 @@ def _run_startup_reconciliation() -> None:
|
||||
result = reconciler.reconcile_state()
|
||||
if result.inconsistencies_found:
|
||||
_logger.info("[Reconciliation] %s", result.message)
|
||||
if result.reconciliation_successful:
|
||||
if result.inconsistencies_fixed:
|
||||
plugin_manager.discover_plugins()
|
||||
_reconciliation_done = True
|
||||
else:
|
||||
_logger.warning("[Reconciliation] Finished with unresolved issues, will retry")
|
||||
with _reconciliation_lock:
|
||||
_reconciliation_started = False
|
||||
if result.inconsistencies_fixed:
|
||||
plugin_manager.discover_plugins()
|
||||
if not result.reconciliation_successful:
|
||||
_logger.warning(
|
||||
"[Reconciliation] Finished with %d unresolved issue(s); "
|
||||
"will not retry automatically. Use the Plugin Store or the "
|
||||
"manual 'Reconcile' action to resolve.",
|
||||
len(result.inconsistencies_manual),
|
||||
)
|
||||
except Exception as e:
|
||||
_logger.error("[Reconciliation] Error: %s", e, exc_info=True)
|
||||
with _reconciliation_lock:
|
||||
_reconciliation_started = False
|
||||
finally:
|
||||
# Always mark done — we do not want an unhandled exception (or an
|
||||
# unresolved inconsistency) to cause the @before_request hook to
|
||||
# retrigger reconciliation on every subsequent request.
|
||||
_reconciliation_done = True
|
||||
|
||||
# Initialize health monitor and run reconciliation on first request
|
||||
@app.before_request
|
||||
|
||||
@@ -1714,9 +1714,23 @@ def get_installed_plugins():
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
# Re-discover plugins to ensure we have the latest list
|
||||
# This handles cases where plugins are added/removed after app startup
|
||||
api_v3.plugin_manager.discover_plugins()
|
||||
# Re-discover plugins only if the plugins directory has actually
|
||||
# changed since our last scan, or if the caller explicitly asked
|
||||
# for a refresh. The previous unconditional ``discover_plugins()``
|
||||
# call (plus a per-plugin manifest re-read) made this endpoint
|
||||
# O(plugins) in disk I/O on every page refresh, which on an SD-card
|
||||
# Pi4 with ~15 plugins was pegging the CPU and blocking the UI
|
||||
# "connecting to display" spinner for minutes.
|
||||
force_refresh = request.args.get('refresh', '').lower() in ('1', 'true', 'yes')
|
||||
plugins_dir_path = Path(api_v3.plugin_manager.plugins_dir)
|
||||
try:
|
||||
current_mtime = plugins_dir_path.stat().st_mtime if plugins_dir_path.exists() else 0
|
||||
except OSError:
|
||||
current_mtime = 0
|
||||
last_mtime = getattr(api_v3, '_installed_plugins_dir_mtime', None)
|
||||
if force_refresh or last_mtime != current_mtime:
|
||||
api_v3.plugin_manager.discover_plugins()
|
||||
api_v3._installed_plugins_dir_mtime = current_mtime
|
||||
|
||||
# Get all installed plugin info from the plugin manager
|
||||
all_plugin_info = api_v3.plugin_manager.get_all_plugin_info()
|
||||
@@ -1729,17 +1743,10 @@ def get_installed_plugins():
|
||||
for plugin_info in all_plugin_info:
|
||||
plugin_id = plugin_info.get('id')
|
||||
|
||||
# Re-read manifest from disk to ensure we have the latest metadata
|
||||
manifest_path = Path(api_v3.plugin_manager.plugins_dir) / plugin_id / "manifest.json"
|
||||
if manifest_path.exists():
|
||||
try:
|
||||
with open(manifest_path, 'r', encoding='utf-8') as f:
|
||||
fresh_manifest = json.load(f)
|
||||
# Update plugin_info with fresh manifest data
|
||||
plugin_info.update(fresh_manifest)
|
||||
except Exception as e:
|
||||
# If we can't read the fresh manifest, use the cached one
|
||||
logger.warning("[PluginStore] Could not read fresh manifest for %s: %s", plugin_id, e)
|
||||
# Note: we intentionally do NOT re-read manifest.json here.
|
||||
# discover_plugins() above already reparses manifests on change;
|
||||
# re-reading on every request added ~1 syscall+json.loads per
|
||||
# plugin per request for no benefit.
|
||||
|
||||
# Get enabled status from config (source of truth)
|
||||
# Read from config file first, fall back to plugin instance if config doesn't have the key
|
||||
@@ -2369,14 +2376,25 @@ def reconcile_plugin_state():
|
||||
|
||||
from src.plugin_system.state_reconciliation import StateReconciliation
|
||||
|
||||
# Pass the store manager so auto-repair of missing-on-disk plugins
|
||||
# can actually run. Previously this endpoint silently degraded to
|
||||
# MANUAL_FIX_REQUIRED because store_manager was omitted.
|
||||
reconciler = StateReconciliation(
|
||||
state_manager=api_v3.plugin_state_manager,
|
||||
config_manager=api_v3.config_manager,
|
||||
plugin_manager=api_v3.plugin_manager,
|
||||
plugins_dir=Path(api_v3.plugin_manager.plugins_dir)
|
||||
plugins_dir=Path(api_v3.plugin_manager.plugins_dir),
|
||||
store_manager=api_v3.plugin_store_manager,
|
||||
)
|
||||
|
||||
result = reconciler.reconcile_state()
|
||||
# Allow the caller to force a retry of previously-unrecoverable
|
||||
# plugins (e.g. after the registry has been updated or a typo fixed).
|
||||
force = False
|
||||
if request.is_json:
|
||||
payload = request.get_json(silent=True) or {}
|
||||
force = bool(payload.get('force', False))
|
||||
|
||||
result = reconciler.reconcile_state(force=force)
|
||||
|
||||
return success_response(
|
||||
data={
|
||||
@@ -2822,6 +2840,22 @@ def uninstall_plugin():
|
||||
if api_v3.operation_queue:
|
||||
def uninstall_callback(operation):
|
||||
"""Callback to execute plugin uninstallation."""
|
||||
# Drop a tombstone *first* so a background reconciliation pass
|
||||
# that interleaves with us cannot see the transient
|
||||
# "config-entry-with-no-files" state and resurrect the
|
||||
# plugin the user just asked to delete.
|
||||
if hasattr(api_v3.plugin_store_manager, 'mark_recently_uninstalled'):
|
||||
api_v3.plugin_store_manager.mark_recently_uninstalled(plugin_id)
|
||||
|
||||
# Clean up plugin configuration BEFORE removing files so the
|
||||
# config entry and the on-disk files disappear together from
|
||||
# the reconciler's point of view.
|
||||
if not preserve_config:
|
||||
try:
|
||||
api_v3.config_manager.cleanup_plugin_config(plugin_id, remove_secrets=True)
|
||||
except Exception as cleanup_err:
|
||||
logger.warning("[PluginUninstall] Failed to cleanup config for %s: %s", plugin_id, cleanup_err)
|
||||
|
||||
# Unload the plugin first if it's loaded
|
||||
if api_v3.plugin_manager and plugin_id in api_v3.plugin_manager.plugins:
|
||||
api_v3.plugin_manager.unload_plugin(plugin_id)
|
||||
@@ -2844,13 +2878,6 @@ def uninstall_plugin():
|
||||
if api_v3.schema_manager:
|
||||
api_v3.schema_manager.invalidate_cache(plugin_id)
|
||||
|
||||
# Clean up plugin configuration if not preserving
|
||||
if not preserve_config:
|
||||
try:
|
||||
api_v3.config_manager.cleanup_plugin_config(plugin_id, remove_secrets=True)
|
||||
except Exception as cleanup_err:
|
||||
logger.warning("[PluginUninstall] Failed to cleanup config for %s: %s", plugin_id, cleanup_err)
|
||||
|
||||
# Remove from state manager
|
||||
if api_v3.plugin_state_manager:
|
||||
api_v3.plugin_state_manager.remove_plugin_state(plugin_id)
|
||||
@@ -2879,6 +2906,18 @@ def uninstall_plugin():
|
||||
)
|
||||
else:
|
||||
# Fallback to direct uninstall
|
||||
# Tombstone + config cleanup happen BEFORE file removal — see
|
||||
# queue path above for the full rationale (prevents reconciler
|
||||
# resurrection race).
|
||||
if hasattr(api_v3.plugin_store_manager, 'mark_recently_uninstalled'):
|
||||
api_v3.plugin_store_manager.mark_recently_uninstalled(plugin_id)
|
||||
|
||||
if not preserve_config:
|
||||
try:
|
||||
api_v3.config_manager.cleanup_plugin_config(plugin_id, remove_secrets=True)
|
||||
except Exception as cleanup_err:
|
||||
logger.warning("[PluginUninstall] Failed to cleanup config for %s: %s", plugin_id, cleanup_err)
|
||||
|
||||
# Unload the plugin first if it's loaded
|
||||
if api_v3.plugin_manager and plugin_id in api_v3.plugin_manager.plugins:
|
||||
api_v3.plugin_manager.unload_plugin(plugin_id)
|
||||
@@ -2891,13 +2930,6 @@ def uninstall_plugin():
|
||||
if api_v3.schema_manager:
|
||||
api_v3.schema_manager.invalidate_cache(plugin_id)
|
||||
|
||||
# Clean up plugin configuration if not preserving
|
||||
if not preserve_config:
|
||||
try:
|
||||
api_v3.config_manager.cleanup_plugin_config(plugin_id, remove_secrets=True)
|
||||
except Exception as cleanup_err:
|
||||
logger.warning("[PluginUninstall] Failed to cleanup config for %s: %s", plugin_id, cleanup_err)
|
||||
|
||||
# Remove from state manager
|
||||
if api_v3.plugin_state_manager:
|
||||
api_v3.plugin_state_manager.remove_plugin_state(plugin_id)
|
||||
|
||||
Reference in New Issue
Block a user