fix: auto-repair missing plugins on startup (#293)

* fix: auto-repair missing plugins and graceful config fallback

Plugins whose directories are missing (failed update, migration, etc.)
now get automatically reinstalled from the store on startup. The config
endpoint no longer returns a hard 500 when a schema is unavailable —
it falls back to conservative key-name-based masking so the settings
page stays functional.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: handle ledmatrix- prefix in plugin updates and reconciliation

The store registry uses unprefixed IDs (e.g., 'weather') while older
installs used prefixed config keys (e.g., 'ledmatrix-weather'). Both
update_plugin() and auto-repair now try the unprefixed ID as a fallback
when the prefixed one isn't found in the registry.

Also filters system config keys (schedule, display, etc.) from
reconciliation to avoid false positives.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: address code review findings for plugin auto-repair

- Move backup-folder filter from _get_config_state to _get_disk_state
  where the artifact actually lives
- Run startup reconciliation in a background thread so requests aren't
  blocked by plugin reinstallation
- Set _reconciliation_done only after success so failures allow retries
- Replace print() with proper logger in reconciliation
- Wrap load_schema in try/except so exceptions fall through to
  conservative masking instead of 500
- Handle list values in _conservative_mask_config for nested secrets
- Remove duplicate import re

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: add thread-safe locking to PluginManager and fix reconciliation retry

PluginManager thread safety:
- Add RLock protecting plugin_manifests and plugin_directories
- Build scan results locally in _scan_directory_for_plugins, then update
  shared state under lock
- Protect reads in get_plugin_info, get_all_plugin_info,
  get_plugin_directory, get_plugin_display_modes, find_plugin_for_mode
- Protect manifest mutation in reload_plugin
- Prevents races between background reconciliation thread and request
  handlers reading plugin state

Reconciliation retry:
- Clear _reconciliation_started on exception so next request retries
- Check result.reconciliation_successful before marking done
- Reset _reconciliation_started on non-success results to allow retry

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Chuck
2026-03-25 17:09:49 -04:00
committed by GitHub
parent 81a022dbe8
commit 640a4c1706
5 changed files with 196 additions and 54 deletions

View File

@@ -14,6 +14,7 @@ import importlib.util
import sys
import subprocess
import time
import threading
from pathlib import Path
from typing import Dict, List, Optional, Any
import logging
@@ -74,6 +75,10 @@ class PluginManager:
self.state_manager = PluginStateManager(logger=self.logger)
self.schema_manager = SchemaManager(plugins_dir=self.plugins_dir, logger=self.logger)
# Lock protecting plugin_manifests and plugin_directories from
# concurrent mutation (background reconciliation) and reads (requests).
self._discovery_lock = threading.RLock()
# Active plugins
self.plugins: Dict[str, Any] = {}
self.plugin_manifests: Dict[str, Dict[str, Any]] = {}
@@ -106,6 +111,10 @@ class PluginManager:
if not directory.exists():
return plugin_ids
# Build new state locally before acquiring lock
new_manifests: Dict[str, Dict[str, Any]] = {}
new_directories: Dict[str, Path] = {}
try:
for item in directory.iterdir():
if not item.is_dir():
@@ -119,18 +128,21 @@ class PluginManager:
plugin_id = manifest.get('id')
if plugin_id:
plugin_ids.append(plugin_id)
self.plugin_manifests[plugin_id] = manifest
# Store directory mapping
if not hasattr(self, 'plugin_directories'):
self.plugin_directories = {}
self.plugin_directories[plugin_id] = item
new_manifests[plugin_id] = manifest
new_directories[plugin_id] = item
except (json.JSONDecodeError, PermissionError, OSError) as e:
self.logger.warning("Error reading manifest from %s: %s", manifest_path, e, exc_info=True)
continue
except (OSError, PermissionError) as e:
self.logger.error("Error scanning directory %s: %s", directory, e, exc_info=True)
# Update shared state under lock
with self._discovery_lock:
self.plugin_manifests.update(new_manifests)
if not hasattr(self, 'plugin_directories'):
self.plugin_directories = {}
self.plugin_directories.update(new_directories)
return plugin_ids
def discover_plugins(self) -> List[str]:
@@ -459,7 +471,9 @@ class PluginManager:
if manifest_path.exists():
try:
with open(manifest_path, 'r', encoding='utf-8') as f:
self.plugin_manifests[plugin_id] = json.load(f)
manifest = json.load(f)
with self._discovery_lock:
self.plugin_manifests[plugin_id] = manifest
except Exception as e:
self.logger.error("Error reading manifest: %s", e, exc_info=True)
return False
@@ -506,7 +520,8 @@ class PluginManager:
Returns:
Dict with plugin information or None if not found
"""
manifest = self.plugin_manifests.get(plugin_id)
with self._discovery_lock:
manifest = self.plugin_manifests.get(plugin_id)
if not manifest:
return None
@@ -533,7 +548,9 @@ class PluginManager:
Returns:
List of plugin info dictionaries
"""
return [info for info in [self.get_plugin_info(pid) for pid in self.plugin_manifests.keys()] if info]
with self._discovery_lock:
pids = list(self.plugin_manifests.keys())
return [info for info in [self.get_plugin_info(pid) for pid in pids] if info]
def get_plugin_directory(self, plugin_id: str) -> Optional[str]:
"""
@@ -545,8 +562,9 @@ class PluginManager:
Returns:
Directory path as string or None if not found
"""
if hasattr(self, 'plugin_directories') and plugin_id in self.plugin_directories:
return str(self.plugin_directories[plugin_id])
with self._discovery_lock:
if hasattr(self, 'plugin_directories') and plugin_id in self.plugin_directories:
return str(self.plugin_directories[plugin_id])
plugin_dir = self.plugins_dir / plugin_id
if plugin_dir.exists():
@@ -568,7 +586,8 @@ class PluginManager:
Returns:
List of display mode names
"""
manifest = self.plugin_manifests.get(plugin_id)
with self._discovery_lock:
manifest = self.plugin_manifests.get(plugin_id)
if not manifest:
return []
@@ -588,7 +607,9 @@ class PluginManager:
Plugin identifier or None if not found.
"""
normalized_mode = mode.strip().lower()
for plugin_id, manifest in self.plugin_manifests.items():
with self._discovery_lock:
manifests_snapshot = dict(self.plugin_manifests)
for plugin_id, manifest in manifests_snapshot.items():
display_modes = manifest.get('display_modes')
if isinstance(display_modes, list) and display_modes:
if any(m.lower() == normalized_mode for m in display_modes):

View File

@@ -67,7 +67,8 @@ class StateReconciliation:
state_manager: PluginStateManager,
config_manager,
plugin_manager,
plugins_dir: Path
plugins_dir: Path,
store_manager=None
):
"""
Initialize reconciliation system.
@@ -77,11 +78,13 @@ class StateReconciliation:
config_manager: ConfigManager instance
plugin_manager: PluginManager instance
plugins_dir: Path to plugins directory
store_manager: Optional PluginStoreManager for auto-repair
"""
self.state_manager = state_manager
self.config_manager = config_manager
self.plugin_manager = plugin_manager
self.plugins_dir = Path(plugins_dir)
self.store_manager = store_manager
self.logger = get_logger(__name__)
def reconcile_state(self) -> ReconciliationResult:
@@ -160,18 +163,32 @@ class StateReconciliation:
message=f"Reconciliation failed: {str(e)}"
)
# Top-level config keys that are NOT plugins
_SYSTEM_CONFIG_KEYS = frozenset({
'web_display_autostart', 'timezone', 'location', 'display',
'plugin_system', 'vegas_scroll_speed', 'vegas_separator_width',
'vegas_target_fps', 'vegas_buffer_ahead', 'vegas_plugin_order',
'vegas_excluded_plugins', 'vegas_scroll_enabled', 'logging',
'dim_schedule', 'network', 'system', 'schedule',
})
def _get_config_state(self) -> Dict[str, Dict[str, Any]]:
"""Get plugin state from config file."""
state = {}
try:
config = self.config_manager.load_config()
for plugin_id, plugin_config in config.items():
if isinstance(plugin_config, dict):
state[plugin_id] = {
'enabled': plugin_config.get('enabled', False),
'version': plugin_config.get('version'),
'exists_in_config': True
}
if not isinstance(plugin_config, dict):
continue
if plugin_id in self._SYSTEM_CONFIG_KEYS:
continue
if 'enabled' not in plugin_config:
continue
state[plugin_id] = {
'enabled': plugin_config.get('enabled', False),
'version': plugin_config.get('version'),
'exists_in_config': True
}
except Exception as e:
self.logger.warning(f"Error reading config state: {e}")
return state
@@ -184,6 +201,8 @@ class StateReconciliation:
for plugin_dir in self.plugins_dir.iterdir():
if plugin_dir.is_dir():
plugin_id = plugin_dir.name
if '.standalone-backup-' in plugin_id:
continue
manifest_path = plugin_dir / "manifest.json"
if manifest_path.exists():
import json
@@ -263,14 +282,15 @@ class StateReconciliation:
# Check: Plugin in config but not on disk
if config.get('exists_in_config') and not disk.get('exists_on_disk'):
can_repair = self.store_manager is not None
inconsistencies.append(Inconsistency(
plugin_id=plugin_id,
inconsistency_type=InconsistencyType.PLUGIN_MISSING_ON_DISK,
description=f"Plugin {plugin_id} in config but not on disk",
fix_action=FixAction.MANUAL_FIX_REQUIRED,
fix_action=FixAction.AUTO_FIX if can_repair else FixAction.MANUAL_FIX_REQUIRED,
current_state={'exists_on_disk': False},
expected_state={'exists_on_disk': True},
can_auto_fix=False
can_auto_fix=can_repair
))
# Check: Enabled state mismatch
@@ -303,6 +323,9 @@ class StateReconciliation:
self.logger.info(f"Fixed: Added {inconsistency.plugin_id} to config")
return True
elif inconsistency.inconsistency_type == InconsistencyType.PLUGIN_MISSING_ON_DISK:
return self._auto_repair_missing_plugin(inconsistency.plugin_id)
elif inconsistency.inconsistency_type == InconsistencyType.PLUGIN_ENABLED_MISMATCH:
# Sync enabled state from state manager to config
expected_enabled = inconsistency.expected_state.get('enabled')
@@ -320,3 +343,31 @@ class StateReconciliation:
return False
def _auto_repair_missing_plugin(self, plugin_id: str) -> bool:
"""Attempt to reinstall a missing plugin from the store."""
if not self.store_manager:
return False
# Try the plugin_id as-is, then without 'ledmatrix-' prefix
candidates = [plugin_id]
if plugin_id.startswith('ledmatrix-'):
candidates.append(plugin_id[len('ledmatrix-'):])
for candidate_id in candidates:
try:
self.logger.info("[AutoRepair] Attempting to reinstall missing plugin: %s", candidate_id)
result = self.store_manager.install_plugin(candidate_id)
if isinstance(result, dict):
success = result.get('success', False)
else:
success = bool(result)
if success:
self.logger.info("[AutoRepair] Successfully reinstalled plugin: %s (config key: %s)", candidate_id, plugin_id)
return True
except Exception as e:
self.logger.error("[AutoRepair] Error reinstalling %s: %s", candidate_id, e, exc_info=True)
self.logger.warning("[AutoRepair] Could not reinstall %s from store", plugin_id)
return False

View File

@@ -1784,6 +1784,12 @@ class PluginStoreManager:
# Try to get remote info from registry (optional)
self.fetch_registry(force_refresh=True)
plugin_info_remote = self.get_plugin_info(plugin_id, fetch_latest_from_github=True, force_refresh=True)
# Try without 'ledmatrix-' prefix (monorepo migration)
if not plugin_info_remote and plugin_id.startswith('ledmatrix-'):
alt_id = plugin_id[len('ledmatrix-'):]
plugin_info_remote = self.get_plugin_info(alt_id, fetch_latest_from_github=True, force_refresh=True)
if plugin_info_remote:
self.logger.info(f"Plugin {plugin_id} found in registry as {alt_id}")
remote_branch = None
remote_sha = None
@@ -2059,6 +2065,15 @@ class PluginStoreManager:
self.fetch_registry(force_refresh=True)
plugin_info_remote = self.get_plugin_info(plugin_id, fetch_latest_from_github=True, force_refresh=True)
# If not found, try without 'ledmatrix-' prefix (monorepo migration)
registry_id = plugin_id
if not plugin_info_remote and plugin_id.startswith('ledmatrix-'):
alt_id = plugin_id[len('ledmatrix-'):]
plugin_info_remote = self.get_plugin_info(alt_id, fetch_latest_from_github=True, force_refresh=True)
if plugin_info_remote:
registry_id = alt_id
self.logger.info(f"Plugin {plugin_id} found in registry as {alt_id}")
# If not in registry but we have a repo URL, try reinstalling from that URL
if not plugin_info_remote and repo_url:
self.logger.info(f"Plugin {plugin_id} not in registry but has git remote URL. Reinstalling from {repo_url} to enable updates...")
@@ -2111,13 +2126,13 @@ class PluginStoreManager:
self.logger.debug(f"Could not compare versions for {plugin_id}: {e}")
# Plugin is not a git repo but is in registry and has a newer version - reinstall
self.logger.info(f"Plugin {plugin_id} not installed via git; re-installing latest archive")
self.logger.info(f"Plugin {plugin_id} not installed via git; re-installing latest archive (registry id: {registry_id})")
# Remove directory and reinstall fresh
if not self._safe_remove_directory(plugin_path):
self.logger.error(f"Failed to remove old plugin directory for {plugin_id}")
return False
return self.install_plugin(plugin_id)
return self.install_plugin(registry_id)
except Exception as e:
import traceback

View File

@@ -651,12 +651,49 @@ def _initialize_health_monitor():
_health_monitor_initialized = True
# Initialize health monitor on first request (using before_request for compatibility)
_reconciliation_done = False
_reconciliation_started = False
def _run_startup_reconciliation():
"""Run state reconciliation in background to auto-repair missing plugins."""
global _reconciliation_done, _reconciliation_started
from src.logging_config import get_logger
_logger = get_logger('reconciliation')
try:
from src.plugin_system.state_reconciliation import StateReconciliation
reconciler = StateReconciliation(
state_manager=plugin_state_manager,
config_manager=config_manager,
plugin_manager=plugin_manager,
plugins_dir=plugins_dir,
store_manager=plugin_store_manager
)
result = reconciler.reconcile_state()
if result.inconsistencies_found:
_logger.info("[Reconciliation] %s", result.message)
if result.reconciliation_successful:
if result.inconsistencies_fixed:
plugin_manager.discover_plugins()
_reconciliation_done = True
else:
_logger.warning("[Reconciliation] Finished with unresolved issues, will retry")
_reconciliation_started = False
except Exception as e:
_logger.error("[Reconciliation] Error: %s", e, exc_info=True)
_reconciliation_started = False
# Initialize health monitor and run reconciliation on first request
@app.before_request
def check_health_monitor():
"""Ensure health monitor is initialized on first request."""
"""Ensure health monitor is initialized; launch reconciliation in background."""
global _reconciliation_started
if not _health_monitor_initialized:
_initialize_health_monitor()
if not _reconciliation_started:
_reconciliation_started = True
import threading
threading.Thread(target=_run_startup_reconciliation, daemon=True).start()
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True)

View File

@@ -33,6 +33,29 @@ from src.web_interface.secret_helpers import (
separate_secrets,
)
_SECRET_KEY_PATTERN = re.compile(
r'(api_key|api_secret|password|secret|token|auth_key|credential)',
re.IGNORECASE,
)
def _conservative_mask_config(config, _parent_key=None):
"""Mask string values whose keys look like secrets (no schema available)."""
if isinstance(config, list):
return [
_conservative_mask_config(item, _parent_key) if isinstance(item, (dict, list))
else ('' if isinstance(item, str) and item and _parent_key and _SECRET_KEY_PATTERN.search(_parent_key) else item)
for item in config
]
result = dict(config)
for key, value in result.items():
if isinstance(value, dict):
result[key] = _conservative_mask_config(value)
elif isinstance(value, list):
result[key] = _conservative_mask_config(value, key)
elif isinstance(value, str) and value and _SECRET_KEY_PATTERN.search(key):
result[key] = ''
return result
# Will be initialized when blueprint is registered
config_manager = None
plugin_manager = None
@@ -2505,24 +2528,19 @@ def get_plugin_config():
}
# Mask secret fields before returning to prevent exposing API keys
# Fail closed — if schema unavailable, refuse to return unmasked config
schema_mgr = api_v3.schema_manager
if not schema_mgr:
return error_response(
ErrorCode.CONFIG_LOAD_FAILED,
f"Cannot safely return config for {plugin_id}: schema manager unavailable",
status_code=500
)
schema_for_mask = None
if schema_mgr:
try:
schema_for_mask = schema_mgr.load_schema(plugin_id, use_cache=True)
except Exception as e:
logger.error("[PluginConfig] Error loading schema for %s: %s", plugin_id, e, exc_info=True)
schema_for_mask = schema_mgr.load_schema(plugin_id, use_cache=True)
if not schema_for_mask or 'properties' not in schema_for_mask:
return error_response(
ErrorCode.CONFIG_LOAD_FAILED,
f"Cannot safely return config for {plugin_id}: schema unavailable for secret masking",
status_code=500
)
plugin_config = mask_secret_fields(plugin_config, schema_for_mask['properties'])
if schema_for_mask and 'properties' in schema_for_mask:
plugin_config = mask_secret_fields(plugin_config, schema_for_mask['properties'])
else:
logger.warning("[PluginConfig] Schema unavailable for %s, applying conservative masking", plugin_id)
plugin_config = _conservative_mask_config(plugin_config)
return success_response(data=plugin_config)
except Exception as e: