From 9a74db6de3fc6b1c554f86f72fcffaf01d9c9f8c Mon Sep 17 00:00:00 2001 From: Chuck Date: Tue, 12 May 2026 11:47:56 -0400 Subject: [PATCH] fix: service control buttons and AP-mode SSH lockout post-install MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two user-reported issues after fresh install: 1. All service buttons (Start/Stop/Restart Display, Restart Web Service) failed silently — only Reboot worked. Root cause: sudoers rules use `ledmatrix.service` (with suffix) but api_v3.py called `sudo systemctl start ledmatrix` (no suffix). sudo does exact string matching, so every service action was rejected with returncode=1. Also missing from sudoers: ledmatrix-web, journalctl, and is-active entries. Fix: - Add `.service` suffix to all 8 sudo systemctl call sites in api_v3.py (_ensure_display_service_running, _stop_display_service, and all execute_system_action branches). - Add timeout=15 to all subprocess.run calls in execute_system_action (previously could hang indefinitely). - Add missing sudoers rules to first_time_install.sh and configure_web_sudo.sh: ledmatrix-web.service start/stop/restart, is-active for both name forms, and journalctl -u/-t ledmatrix rules. 2. SSH and web UI became inaccessible after ~1 hour even though the display kept running. Root cause: wifi_monitor_daemon restarts NetworkManager after 5 consecutive internet failures (~2.5 min). Each NM restart drops WiFi briefly. During that window check_and_manage_ap_mode() increments _disconnected_checks but the daemon never reset it after the restart. After 3 such NM-restart cycles, _disconnected_checks reached 3 and AP mode activated — changing the Pi from WiFi client to hotspot (192.168.4.1) and killing SSH on the old IP. Fix: - Reset wifi_manager._disconnected_checks = 0 in the daemon immediately after a successful NM restart so the brief drop it causes doesn't count toward AP-mode activation. - Increase _disconnected_checks_required from 3 to 6 (90s → 3min) as an additional buffer against transient network flaps. Co-Authored-By: Claude Sonnet 4.6 --- first_time_install.sh | 13 ++++++++++ scripts/install/configure_web_sudo.sh | 6 ++--- scripts/utils/wifi_monitor_daemon.py | 4 +++ src/wifi_manager.py | 2 +- web_interface/blueprints/api_v3.py | 37 +++++++++++++-------------- 5 files changed, 39 insertions(+), 23 deletions(-) diff --git a/first_time_install.sh b/first_time_install.sh index a1a792a3..3da73157 100644 --- a/first_time_install.sh +++ b/first_time_install.sh @@ -1086,6 +1086,7 @@ SYSTEMCTL_PATH=$(which systemctl) REBOOT_PATH=$(which reboot) POWEROFF_PATH=$(which poweroff) BASH_PATH=$(which bash) +JOURNALCTL_PATH=$(which journalctl 2>/dev/null || true) # Create sudoers content cat > /tmp/ledmatrix_web_sudoers << EOF @@ -1101,10 +1102,22 @@ $ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH restart ledmatrix.service $ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH enable ledmatrix.service $ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH disable ledmatrix.service $ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH status ledmatrix.service +$ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH is-active ledmatrix +$ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH is-active ledmatrix.service +$ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH start ledmatrix-web.service +$ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH stop ledmatrix-web.service +$ACTUAL_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH restart ledmatrix-web.service $ACTUAL_USER ALL=(ALL) NOPASSWD: $PYTHON_PATH $PROJECT_ROOT_DIR/display_controller.py $ACTUAL_USER ALL=(ALL) NOPASSWD: $BASH_PATH $PROJECT_ROOT_DIR/start_display.sh $ACTUAL_USER ALL=(ALL) NOPASSWD: $BASH_PATH $PROJECT_ROOT_DIR/stop_display.sh EOF +if [ -n "$JOURNALCTL_PATH" ]; then + cat >> /tmp/ledmatrix_web_sudoers << EOF +$ACTUAL_USER ALL=(ALL) NOPASSWD: $JOURNALCTL_PATH -u ledmatrix.service * +$ACTUAL_USER ALL=(ALL) NOPASSWD: $JOURNALCTL_PATH -u ledmatrix * +$ACTUAL_USER ALL=(ALL) NOPASSWD: $JOURNALCTL_PATH -t ledmatrix * +EOF +fi if [ -f "$SUDOERS_FILE" ] && cmp -s /tmp/ledmatrix_web_sudoers "$SUDOERS_FILE"; then echo "Sudoers configuration already up to date" diff --git a/scripts/install/configure_web_sudo.sh b/scripts/install/configure_web_sudo.sh index 9cc6ad5c..0a9df2da 100644 --- a/scripts/install/configure_web_sudo.sh +++ b/scripts/install/configure_web_sudo.sh @@ -89,9 +89,9 @@ TEMP_SUDOERS="/tmp/ledmatrix_web_sudoers_$$" echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH status ledmatrix.service" echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH is-active ledmatrix" echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH is-active ledmatrix.service" - echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH start ledmatrix-web" - echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH stop ledmatrix-web" - echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH restart ledmatrix-web" + echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH start ledmatrix-web.service" + echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH stop ledmatrix-web.service" + echo "$WEB_USER ALL=(ALL) NOPASSWD: $SYSTEMCTL_PATH restart ledmatrix-web.service" # Optional: journalctl (non-critical — skip if not found) if [ -n "$JOURNALCTL_PATH" ]; then diff --git a/scripts/utils/wifi_monitor_daemon.py b/scripts/utils/wifi_monitor_daemon.py index ea5efb69..8d83cf40 100755 --- a/scripts/utils/wifi_monitor_daemon.py +++ b/scripts/utils/wifi_monitor_daemon.py @@ -146,6 +146,10 @@ class WiFiMonitorDaemon: capture_output=True, timeout=20, check=True ) self._consecutive_internet_failures = 0 + # NM restart causes a brief WiFi drop; reset the AP-mode grace + # counter so that transient disconnect doesn't count toward + # triggering AP mode. + self.wifi_manager._disconnected_checks = 0 except subprocess.CalledProcessError as e: logger.error(f"NetworkManager restart failed (rc={e.returncode}); " "keeping failure counter unchanged") diff --git a/src/wifi_manager.py b/src/wifi_manager.py index e31bd5fb..cdc1f625 100644 --- a/src/wifi_manager.py +++ b/src/wifi_manager.py @@ -140,7 +140,7 @@ class WiFiManager: # Initialize disconnected check counter for grace period # This prevents AP mode from enabling on transient network hiccups self._disconnected_checks = 0 - self._disconnected_checks_required = 3 # Require 3 consecutive disconnected checks (90 seconds at 30s interval) + self._disconnected_checks_required = 6 # Require 6 consecutive disconnected checks (3 minutes at 30s interval) # Timestamp set when AP mode is enabled; used for the idle-timeout check self._ap_enabled_at: Optional[float] = None diff --git a/web_interface/blueprints/api_v3.py b/web_interface/blueprints/api_v3.py index 9b8fbb5e..c8c2fa44 100644 --- a/web_interface/blueprints/api_v3.py +++ b/web_interface/blueprints/api_v3.py @@ -218,7 +218,7 @@ def _ensure_display_service_running(): if status.get('active'): status['started'] = False return status - result = _run_systemctl_command(['sudo', 'systemctl', 'start', 'ledmatrix']) + result = _run_systemctl_command(['sudo', 'systemctl', 'start', 'ledmatrix.service']) service_status = _get_display_service_status() result['started'] = result.get('returncode') == 0 result['active'] = service_status.get('active') @@ -227,7 +227,7 @@ def _ensure_display_service_running(): def _stop_display_service(): """Stop the ledmatrix display service.""" - result = _run_systemctl_command(['sudo', 'systemctl', 'stop', 'ledmatrix']) + result = _run_systemctl_command(['sudo', 'systemctl', 'stop', 'ledmatrix.service']) status = _get_display_service_status() result['active'] = status.get('active') result['status'] = status @@ -1716,8 +1716,8 @@ def execute_system_action(): if mode: # For on-demand modes, we would need to integrate with the display controller # For now, just start the display service - result = subprocess.run(['sudo', 'systemctl', 'start', 'ledmatrix'], - capture_output=True, text=True) + result = subprocess.run(['sudo', 'systemctl', 'start', 'ledmatrix.service'], + capture_output=True, text=True, timeout=15) return jsonify({ 'status': 'success' if result.returncode == 0 else 'error', 'message': f'Started display in {mode} mode', @@ -1726,23 +1726,23 @@ def execute_system_action(): 'stderr': result.stderr }) else: - result = subprocess.run(['sudo', 'systemctl', 'start', 'ledmatrix'], - capture_output=True, text=True) + result = subprocess.run(['sudo', 'systemctl', 'start', 'ledmatrix.service'], + capture_output=True, text=True, timeout=15) elif action == 'stop_display': - result = subprocess.run(['sudo', 'systemctl', 'stop', 'ledmatrix'], - capture_output=True, text=True) + result = subprocess.run(['sudo', 'systemctl', 'stop', 'ledmatrix.service'], + capture_output=True, text=True, timeout=15) elif action == 'enable_autostart': - result = subprocess.run(['sudo', 'systemctl', 'enable', 'ledmatrix'], - capture_output=True, text=True) + result = subprocess.run(['sudo', 'systemctl', 'enable', 'ledmatrix.service'], + capture_output=True, text=True, timeout=15) elif action == 'disable_autostart': - result = subprocess.run(['sudo', 'systemctl', 'disable', 'ledmatrix'], - capture_output=True, text=True) + result = subprocess.run(['sudo', 'systemctl', 'disable', 'ledmatrix.service'], + capture_output=True, text=True, timeout=15) elif action == 'reboot_system': result = subprocess.run(['sudo', 'reboot'], - capture_output=True, text=True) + capture_output=True, text=True, timeout=10) elif action == 'shutdown_system': result = subprocess.run(['sudo', 'poweroff'], - capture_output=True, text=True) + capture_output=True, text=True, timeout=10) elif action == 'git_pull': # Use PROJECT_ROOT instead of hardcoded path project_dir = str(PROJECT_ROOT) @@ -1823,12 +1823,11 @@ def execute_system_action(): 'stderr': result.stderr }) elif action == 'restart_display_service': - result = subprocess.run(['sudo', 'systemctl', 'restart', 'ledmatrix'], - capture_output=True, text=True) + result = subprocess.run(['sudo', 'systemctl', 'restart', 'ledmatrix.service'], + capture_output=True, text=True, timeout=15) elif action == 'restart_web_service': - # Try to restart the web service (assuming it's ledmatrix-web.service) - result = subprocess.run(['sudo', 'systemctl', 'restart', 'ledmatrix-web'], - capture_output=True, text=True) + result = subprocess.run(['sudo', 'systemctl', 'restart', 'ledmatrix-web.service'], + capture_output=True, text=True, timeout=15) else: return jsonify({'status': 'error', 'message': f'Unknown action: {action}'}), 400