From 5beef0aa013bd1c7c8cf87f9eb8e92c3171fa342 Mon Sep 17 00:00:00 2001 From: Chuck <33324927+ChuckBuilds@users.noreply.github.com> Date: Thu, 11 Jun 2026 18:12:35 -0400 Subject: [PATCH] Improve first-time install error diagnostics and resilience (#369) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(install): don't let outer ERR trap mask first_time_install.sh failures set +e alone doesn't suppress bash's ERR trap, so any non-zero exit from first_time_install.sh inside the one-shot installer immediately triggered the outer on_error handler with a generic "Main installation, line 370" message — before the script could report the real exit code or point to logs/. Suspend the trap for that block so the existing if/else handling runs instead. * feat(install): surface root cause of web dependency install failures install_dependencies_apt.py previously reported only which packages failed, not why - the actual apt/pip error was discarded (apt) or could scroll out of the on_error log tail (pip), leaving "Step 7: Install web interface dependencies (line 915)" as the only visible detail. Capture command output for each install attempt and print a compact DEPENDENCY INSTALLATION FAILURES summary with the last lines of error output per package. Also run the installer with `python3 -u` for real-time, correctly-ordered logging, and widen the on_error tail from 50 to 100 lines so the summary isn't cut off. * feat(install): harden first-time install against common Pi failure modes - wait_for_apt_lock: apt_update/apt_install now wait (up to 3min) for unattended-upgrades to release the dpkg lock instead of failing outright with "Command failed after 3 attempts" right after first boot. - check_disk_space: new pre-flight check (Step 1) so a full SD card fails fast with a clear message instead of a cryptic mid-build error. - Step 6: wrap rpi-rgb-led-matrix git clone/submodule operations in retry for resilience to transient network issues. - Step 6: capture `pip install .` build output and print the last 50 lines on failure, so the actual cmake/compiler error is visible instead of just "Failed to install rpi-rgb-led-matrix Python package". * fix(install): bound subprocess output and dedupe apt update in dependency installer Address coderabbitai review on PR #369: - _run() now streams combined stdout/stderr to a temp file and returns only the last ERROR_TAIL_LINES lines, instead of buffering full output in memory (Codacy also flagged the previous capture_output call as a subprocess-without-static-string security issue; the new call is annotated as safe since cmd is built from hardcoded args). - `apt update` now runs once in main() instead of once per package needing an apt fallback. * fix(install): suppress remaining Codacy subprocess false-positive Codacy's Semgrep-based check still flagged the cmd-built subprocess.run call as "without a static string" even with the Bandit nosec applied. Add a nosemgrep marker alongside it - cmd is always a hardcoded apt/pip argument list, never user input. * fix(install): correctly detect already-installed dateutil/websocket-client Address remaining coderabbitai findings on PR #369: - check_package_installed() did __import__(package_name) directly, but python-dateutil and websocket-client import as dateutil/websocket. Both always failed the "already installed" check and were reinstalled on every run. Add an IMPORT_NAME_MAP for the mismatched names. - _run() still read the entire temp file into memory before slicing the tail. Stream it line-by-line into a deque(maxlen=ERROR_TAIL_LINES) instead so memory use stays bounded for very chatty commands. --------- Co-authored-by: Chuck --- first_time_install.sh | 81 +++++++++-- scripts/install/one-shot-install.sh | 10 +- scripts/install_dependencies_apt.py | 216 ++++++++++++++++++---------- 3 files changed, 213 insertions(+), 94 deletions(-) diff --git a/first_time_install.sh b/first_time_install.sh index e43a7b8b..44eed924 100644 --- a/first_time_install.sh +++ b/first_time_install.sh @@ -15,8 +15,8 @@ on_error() { echo "✗ An error occurred during: $CURRENT_STEP (line $line_no, exit $exit_code)" >&2 if [ -n "${LOG_FILE:-}" ]; then echo "See the log for details: $LOG_FILE" >&2 - echo "-- Last 50 lines from log --" >&2 - tail -n 50 "$LOG_FILE" >&2 || true + echo "-- Last 100 lines from log --" >&2 + tail -n 100 "$LOG_FILE" >&2 || true fi echo "\nCommon fixes:" >&2 echo "- Ensure the Pi is online (try: ping -c1 8.8.8.8)." >&2 @@ -202,8 +202,33 @@ retry() { done } -apt_update() { retry apt update; } -apt_install() { retry apt install -y "$@"; } +# Wait for another apt/dpkg process (commonly unattended-upgrades running +# shortly after first boot) to release its lock before we try apt ourselves. +# Without this, apt_update/apt_install can fail outright in the first couple +# minutes after a fresh Pi OS boot with a generic "Command failed after 3 +# attempts" error. +wait_for_apt_lock() { + command -v flock >/dev/null 2>&1 || return 0 + local lock_file="/var/lib/dpkg/lock-frontend" + local max_wait=180 + local waited=0 + local printed=0 + while ! flock -n "$lock_file" -c true 2>/dev/null; do + if [ "$printed" -eq 0 ]; then + echo "⚠ Waiting for another apt/dpkg process to finish (e.g. unattended-upgrades on first boot)..." + printed=1 + fi + if [ "$waited" -ge "$max_wait" ]; then + echo "⚠ Still waiting after ${max_wait}s; proceeding anyway." + break + fi + sleep 5 + waited=$((waited+5)) + done +} + +apt_update() { wait_for_apt_lock; retry apt update; } +apt_install() { wait_for_apt_lock; retry apt install -y "$@"; } apt_remove() { apt-get remove -y "$@" || true; } check_network() { @@ -222,6 +247,22 @@ check_network() { exit 1 } +check_disk_space() { + command -v df >/dev/null 2>&1 || return 0 + local available_mb + available_mb=$(df -m "$PROJECT_ROOT_DIR" | awk 'NR==2{print $4}') + available_mb=${available_mb:-0} + if [ "$available_mb" -lt 500 ]; then + echo "✗ ERROR: Insufficient disk space: ${available_mb}MB available (need at least 500MB)" + echo " Free up space first, e.g.: sudo apt clean && sudo apt autoremove" + exit 1 + elif [ "$available_mb" -lt 1024 ]; then + echo "⚠ Limited disk space: ${available_mb}MB available (recommend at least 1GB for the rpi-rgb-led-matrix build in Step 6)" + else + echo "✓ Disk space sufficient: ${available_mb}MB available" + fi +} + echo "" echo "This script will perform the following steps:" echo "1. Install system dependencies" @@ -271,8 +312,9 @@ CURRENT_STEP="Install system dependencies" echo "Step 1: Installing system dependencies..." echo "----------------------------------------" -# Ensure network is available before APT operations +# Pre-flight checks before APT operations check_network +check_disk_space # Update package list apt_update @@ -822,14 +864,14 @@ else # Try to initialize submodule if .gitmodules exists if [ -f "$PROJECT_ROOT_DIR/.gitmodules" ] && grep -q "rpi-rgb-led-matrix" "$PROJECT_ROOT_DIR/.gitmodules"; then echo "Initializing rpi-rgb-led-matrix submodule..." - if ! git submodule update --init --recursive rpi-rgb-led-matrix-master 2>&1; then + if ! retry git submodule update --init --recursive rpi-rgb-led-matrix-master; then echo "⚠ Submodule init failed, cloning directly from GitHub..." - git clone https://github.com/hzeller/rpi-rgb-led-matrix.git rpi-rgb-led-matrix-master + retry git clone https://github.com/hzeller/rpi-rgb-led-matrix.git rpi-rgb-led-matrix-master fi else # Fallback: clone directly if submodule not configured echo "Submodule not configured, cloning directly from GitHub..." - git clone https://github.com/hzeller/rpi-rgb-led-matrix.git rpi-rgb-led-matrix-master + retry git clone https://github.com/hzeller/rpi-rgb-led-matrix.git rpi-rgb-led-matrix-master fi fi @@ -841,23 +883,34 @@ else cd "$PROJECT_ROOT_DIR" rm -rf rpi-rgb-led-matrix-master if [ -f "$PROJECT_ROOT_DIR/.gitmodules" ] && grep -q "rpi-rgb-led-matrix" "$PROJECT_ROOT_DIR/.gitmodules"; then - git submodule update --init --recursive rpi-rgb-led-matrix-master + retry git submodule update --init --recursive rpi-rgb-led-matrix-master else - git clone https://github.com/hzeller/rpi-rgb-led-matrix.git rpi-rgb-led-matrix-master + retry git clone https://github.com/hzeller/rpi-rgb-led-matrix.git rpi-rgb-led-matrix-master fi fi - + pushd "$PROJECT_ROOT_DIR/rpi-rgb-led-matrix-master" >/dev/null echo "Installing rpi-rgb-led-matrix Python package (scikit-build-core + cmake)..." echo " Build deps required: python-dev-is-python3 cmake" echo " This compiles C++ — may take 2-5 minutes on Pi 4/5..." - if ! python3 -m pip install --break-system-packages .; then + BUILD_OUTPUT=$(mktemp) + BUILD_SUCCESS=false + if python3 -m pip install --break-system-packages . > "$BUILD_OUTPUT" 2>&1; then + BUILD_SUCCESS=true + fi + cat "$BUILD_OUTPUT" >> "$LOG_FILE" + if [ "$BUILD_SUCCESS" != true ]; then echo "✗ Failed to install rpi-rgb-led-matrix Python package" echo " Ensure build tools are installed:" echo " sudo apt install -y python-dev-is-python3 cmake build-essential" + echo "" + echo "-- Last 50 lines of build output --" + tail -n 50 "$BUILD_OUTPUT" + rm -f "$BUILD_OUTPUT" popd >/dev/null exit 1 fi + rm -f "$BUILD_OUTPUT" popd >/dev/null else echo "✗ rpi-rgb-led-matrix-master directory not found at $PROJECT_ROOT_DIR" @@ -912,7 +965,9 @@ else # Try to install dependencies using the smart installer if available if [ -f "$PROJECT_ROOT_DIR/scripts/install_dependencies_apt.py" ]; then echo "Using smart dependency installer..." - python3 "$PROJECT_ROOT_DIR/scripts/install_dependencies_apt.py" + # -u: unbuffered stdout/stderr so output is captured in $LOG_FILE in + # real time and in order relative to this script's own echo statements + python3 -u "$PROJECT_ROOT_DIR/scripts/install_dependencies_apt.py" else echo "Using pip to install dependencies..." if [ -f "$PROJECT_ROOT_DIR/requirements_web_v2.txt" ]; then diff --git a/scripts/install/one-shot-install.sh b/scripts/install/one-shot-install.sh index ddac1115..472b687f 100755 --- a/scripts/install/one-shot-install.sh +++ b/scripts/install/one-shot-install.sh @@ -340,9 +340,14 @@ main() { echo "" # Execute with proper error handling and non-interactive mode - # Temporarily disable errexit to capture exit code instead of exiting immediately + # Temporarily disable errexit AND the ERR trap to capture exit code instead of + # exiting immediately. `set +e` alone does not suppress the ERR trap, so without + # `trap '' ERR` a non-zero exit from first_time_install.sh would trigger on_error + # here with the generic "Main installation" message instead of the detailed + # if/else handling below. set +e - + trap '' ERR + # Check /tmp permissions - only fix if actually wrong (common in automated scenarios) # When running manually, /tmp usually has correct permissions (1777) TMP_PERMS=$(stat -c '%a' /tmp 2>/dev/null || echo "unknown") @@ -370,6 +375,7 @@ main() { sudo -E env TMPDIR=/tmp LEDMATRIX_ASSUME_YES=1 bash ./first_time_install.sh -y =6.5.0,<7.0.0', @@ -122,47 +178,49 @@ def main(): 'python-socketio>=5.11.0,<6.0.0', 'python-engineio>=4.9.0,<5.0.0' ] - + for package in special_packages: - if not install_via_pip(package): + ok, pip_output = install_via_pip(package) + if not ok: failed_packages.append(package) - + failure_details[package] = pip_output + # Install rgbmatrix module from local source (optional - may already be installed in Step 6) # Check if already installed first if check_package_installed('rgbmatrix'): print("rgbmatrix module already installed, skipping...") else: print("Installing rgbmatrix module from local source...") - try: - # Get project root (parent of scripts directory) - PROJECT_ROOT = Path(__file__).parent.parent - rgbmatrix_path = PROJECT_ROOT / 'rpi-rgb-led-matrix-master' / 'bindings' / 'python' - if rgbmatrix_path.exists(): - # Check if the module has been built (look for setup.py) - setup_py = rgbmatrix_path / 'setup.py' - if setup_py.exists(): - # Try installing - use regular install, not editable mode - # This is optional for web interface and should already be installed in Step 6 - subprocess.check_call([ - sys.executable, '-m', 'pip', 'install', '--break-system-packages', str(rgbmatrix_path) - ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + # Get project root (parent of scripts directory) + PROJECT_ROOT = Path(__file__).parent.parent + rgbmatrix_path = PROJECT_ROOT / 'rpi-rgb-led-matrix-master' / 'bindings' / 'python' + if rgbmatrix_path.exists(): + # Check if the module has been built (look for setup.py) + setup_py = rgbmatrix_path / 'setup.py' + if setup_py.exists(): + # Try installing - use regular install, not editable mode + # This is optional for web interface and should already be installed in Step 6 + ok, output = _run([sys.executable, '-m', 'pip', 'install', '--break-system-packages', str(rgbmatrix_path)]) + if ok: print("rgbmatrix module installed successfully") else: - print("Warning: rgbmatrix setup.py not found, module may need to be built first") - print(" This is normal if Step 6 hasn't completed yet.") + # Don't fail the whole installation - rgbmatrix is optional for web interface + # and should be installed in Step 6 of first_time_install.sh + print("Warning: Failed to install rgbmatrix module:") + for line in output.strip().splitlines()[-ERROR_TAIL_LINES:]: + print(f" {line}") + print(" This is normal if rgbmatrix hasn't been built yet (Step 6).") + print(" The web interface will work without it.") else: - print("Warning: rgbmatrix source not found (this is normal if Step 6 hasn't run yet)") - except subprocess.CalledProcessError as e: - # Don't fail the whole installation - rgbmatrix is optional for web interface - # and should be installed in Step 6 of first_time_install.sh - print(f"Warning: Failed to install rgbmatrix module: {e}") - print(" This is normal if rgbmatrix hasn't been built yet (Step 6).") - print(" The web interface will work without it.") - # Don't add to failed_packages since it's optional - + print("Warning: rgbmatrix setup.py not found, module may need to be built first") + print(" This is normal if Step 6 hasn't completed yet.") + else: + print("Warning: rgbmatrix source not found (this is normal if Step 6 hasn't run yet)") + if failed_packages: print(f"\nFailed to install the following packages: {failed_packages}") print("You may need to install them manually or check your system configuration.") + print_failure_summary(failed_packages, failure_details) return False else: print("\nAll dependencies installed successfully!")