mirror of
https://github.com/ChuckBuilds/LEDMatrix.git
synced 2026-05-01 21:13:01 +00:00
feat(wifi): adopt adsb-feeder-image hotspot patterns — DNS spoofing, connectivity check, idle timeout, wrong-password UX, watchdog escalation
Inspired by the production-proven approach in dirkhh/adsb-feeder-image. 1. DNS spoofing for automatic captive-portal popup (Change 1 — Critical) Write /etc/NetworkManager/dnsmasq-shared.d/ledmatrix-captive.conf with address=/#/192.168.4.1 before nmcli connection up so NM's built-in dnsmasq (ipv4.method=shared) resolves every hostname to the AP IP. This triggers the OS captive-portal popup automatically on iOS / Android / Windows / macOS — no manual navigation to 192.168.4.1:5000/setup required. New helpers: _write_nm_dnsmasq_captive_conf / _remove_nm_dnsmasq_captive_conf. New constants: NM_DNSMASQ_SHARED_DIR / NM_DNSMASQ_SHARED_CONF. 2. Real internet connectivity check (Change 2 — High) Add _check_internet_connectivity() (ping 8.8.8.8 + HTTP fallback). check_and_manage_ap_mode() now considers a device "disconnected" when nmcli shows connected but no real internet reachability, matching adsb-feeder's multi-method gateway/DNS/HTTP test approach. 3. AP idle timeout (Change 3 — Medium) Track _ap_enabled_at timestamp in enable_ap_mode(). Add _has_ap_clients() using 'iw dev <iface> station dump'. check_and_manage_ap_mode() auto-disables AP after ap_idle_timeout_minutes (default 15) with no associated clients. 4. Wrong-password error feedback (Change 4 — Medium) _connect_nmcli() detects "Secrets were required" / "authentication rejected" in nmcli stderr and prefixes the message with "wrong_password: ". The /api/v3/wifi/connect route propagates error_type="wrong_password" in the JSON response. captive_setup.html shows "Incorrect password — try again" (keeping the form active) instead of the generic failure message. 5. Escalating watchdog NM restart (Change 5 — Low) wifi_monitor_daemon.py tracks _consecutive_internet_failures. After _nm_restart_threshold (5) consecutive checks where nmcli shows connected but internet is unreachable, restart NetworkManager as a recovery step. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -43,6 +43,10 @@ class WiFiMonitorDaemon:
|
||||
self.wifi_manager = WiFiManager()
|
||||
self.running = True
|
||||
self.last_state = None
|
||||
# Counts consecutive checks where nmcli says "connected" but internet is unreachable.
|
||||
# After _nm_restart_threshold failures, NetworkManager is restarted as a recovery step.
|
||||
self._consecutive_internet_failures = 0
|
||||
self._nm_restart_threshold = 5 # ~2.5 min at 30s interval
|
||||
|
||||
# Register signal handlers for graceful shutdown
|
||||
signal.signal(signal.SIGINT, self._signal_handler)
|
||||
@@ -122,6 +126,28 @@ class WiFiMonitorDaemon:
|
||||
else:
|
||||
logger.debug(f"Status check: WiFi=disconnected, Ethernet={updated_ethernet}, AP={updated_status.ap_mode_active}")
|
||||
|
||||
# Escalating recovery: if nmcli reports connected but internet is
|
||||
# unreachable for several consecutive checks, restart NetworkManager.
|
||||
# check_and_manage_ap_mode() already calls _check_internet_connectivity()
|
||||
# internally; we track the failure count here from the observed state.
|
||||
if updated_status.connected and not updated_status.ap_mode_active:
|
||||
if not self.wifi_manager._check_internet_connectivity():
|
||||
self._consecutive_internet_failures += 1
|
||||
logger.warning(
|
||||
f"Internet unreachable despite nmcli connection "
|
||||
f"({self._consecutive_internet_failures}/{self._nm_restart_threshold})"
|
||||
)
|
||||
if self._consecutive_internet_failures >= self._nm_restart_threshold:
|
||||
logger.warning("Restarting NetworkManager to recover internet connectivity")
|
||||
import subprocess as _sp
|
||||
_sp.run(["sudo", "systemctl", "restart", "NetworkManager"],
|
||||
capture_output=True, timeout=20)
|
||||
self._consecutive_internet_failures = 0
|
||||
else:
|
||||
self._consecutive_internet_failures = 0
|
||||
else:
|
||||
self._consecutive_internet_failures = 0
|
||||
|
||||
# Sleep until next check
|
||||
time.sleep(self.check_interval)
|
||||
|
||||
|
||||
@@ -60,6 +60,11 @@ def get_wifi_config_path():
|
||||
|
||||
HOSTAPD_CONFIG_PATH = Path("/etc/hostapd/hostapd.conf")
|
||||
DNSMASQ_CONFIG_PATH = Path("/etc/dnsmasq.d/ledmatrix-captive.conf")
|
||||
# Drop-in config for NetworkManager's built-in dnsmasq (ipv4.method=shared).
|
||||
# Writing address=/#/<ap_ip> here causes NM to resolve every hostname to the AP,
|
||||
# triggering the OS captive-portal popup automatically on iOS/Android/Windows/macOS.
|
||||
NM_DNSMASQ_SHARED_DIR = Path("/etc/NetworkManager/dnsmasq-shared.d")
|
||||
NM_DNSMASQ_SHARED_CONF = NM_DNSMASQ_SHARED_DIR / "ledmatrix-captive.conf"
|
||||
HOSTAPD_SERVICE = "hostapd"
|
||||
DNSMASQ_SERVICE = "dnsmasq"
|
||||
|
||||
@@ -137,6 +142,9 @@ class WiFiManager:
|
||||
self._disconnected_checks = 0
|
||||
self._disconnected_checks_required = 3 # Require 3 consecutive disconnected checks (90 seconds at 30s interval)
|
||||
|
||||
# Timestamp set when AP mode is enabled; used for the idle-timeout check
|
||||
self._ap_enabled_at: Optional[float] = None
|
||||
|
||||
logger.info(f"WiFi Manager initialized - nmcli: {self.has_nmcli}, iwlist: {self.has_iwlist}, "
|
||||
f"hostapd: {self.has_hostapd}, dnsmasq: {self.has_dnsmasq}, "
|
||||
f"interface: {self._wifi_interface}, trixie: {self._is_trixie}")
|
||||
@@ -837,6 +845,88 @@ class WiFiManager:
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not tear down iptables redirect: {e}")
|
||||
|
||||
def _write_nm_dnsmasq_captive_conf(self, ap_ip: str = "192.168.4.1") -> None:
|
||||
"""
|
||||
Write the NM dnsmasq-shared.d drop-in that makes NM's built-in dnsmasq
|
||||
resolve every hostname to the AP IP. This triggers the OS captive-portal
|
||||
popup automatically on iOS / Android / Windows / macOS as soon as the
|
||||
device connects — no manual navigation required.
|
||||
|
||||
NetworkManager reads /etc/NetworkManager/dnsmasq-shared.d/*.conf when it
|
||||
starts the dnsmasq instance for ipv4.method=shared connections.
|
||||
"""
|
||||
try:
|
||||
content = f"# LEDMatrix captive portal: resolve all hostnames to AP\naddress=/#/{ap_ip}\n"
|
||||
with open("/tmp/ledmatrix-nm-dnsmasq.conf", "w") as f:
|
||||
f.write(content)
|
||||
subprocess.run(
|
||||
["sudo", "mkdir", "-p", str(NM_DNSMASQ_SHARED_DIR)],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
subprocess.run(
|
||||
["sudo", "cp", "/tmp/ledmatrix-nm-dnsmasq.conf", str(NM_DNSMASQ_SHARED_CONF)],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
logger.info(f"Wrote NM dnsmasq captive-portal config: {NM_DNSMASQ_SHARED_CONF}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not write NM dnsmasq captive config: {e}")
|
||||
|
||||
def _remove_nm_dnsmasq_captive_conf(self) -> None:
|
||||
"""Remove the NM dnsmasq-shared.d drop-in written by _write_nm_dnsmasq_captive_conf."""
|
||||
try:
|
||||
subprocess.run(
|
||||
["sudo", "rm", "-f", str(NM_DNSMASQ_SHARED_CONF)],
|
||||
capture_output=True, timeout=5
|
||||
)
|
||||
logger.info("Removed NM dnsmasq captive-portal config")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not remove NM dnsmasq captive config: {e}")
|
||||
|
||||
def _check_internet_connectivity(self, timeout: int = 5) -> bool:
|
||||
"""
|
||||
Test actual internet reachability — not just nmcli association state.
|
||||
|
||||
A device can be 'connected' in nmcli (associated with an AP) while the
|
||||
router has no WAN link. This check catches that case so the daemon can
|
||||
auto-enable AP mode even when nmcli reports a connection.
|
||||
|
||||
Returns True if at least one reachability method succeeds.
|
||||
"""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
["ping", "-c", "1", "-W", str(timeout), "8.8.8.8"],
|
||||
capture_output=True, timeout=timeout + 1
|
||||
)
|
||||
if r.returncode == 0:
|
||||
logger.debug("Internet connectivity confirmed via ping 8.8.8.8")
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
import urllib.request as _ureq
|
||||
_ureq.urlopen("http://connectivity-check.ubuntu.com/", timeout=timeout)
|
||||
logger.debug("Internet connectivity confirmed via HTTP check")
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
logger.debug("Internet connectivity check failed (both ping and HTTP)")
|
||||
return False
|
||||
|
||||
def _has_ap_clients(self) -> bool:
|
||||
"""
|
||||
Return True if at least one client is associated with the AP.
|
||||
Uses 'iw dev <iface> station dump' which works for both hostapd and
|
||||
nmcli AP modes.
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["iw", "dev", self._wifi_interface, "station", "dump"],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
return bool(result.stdout.strip())
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def scan_networks(self, allow_cached: bool = True) -> Tuple[List[WiFiNetwork], bool]:
|
||||
"""
|
||||
Scan for available WiFi networks.
|
||||
@@ -1471,12 +1561,27 @@ class WiFiManager:
|
||||
error_msg = result.stderr.strip() or result.stdout.strip()
|
||||
logger.error(f"Failed to connect to {ssid}: {error_msg}")
|
||||
self._show_led_message("Connection failed", duration=5)
|
||||
if self._is_wrong_password_error(error_msg):
|
||||
return False, f"wrong_password: {error_msg}"
|
||||
return False, error_msg
|
||||
except Exception as e:
|
||||
logger.error(f"Error connecting with nmcli: {e}")
|
||||
self._show_led_message("Connection error", duration=5)
|
||||
return False, str(e)
|
||||
|
||||
@staticmethod
|
||||
def _is_wrong_password_error(error_msg: str) -> bool:
|
||||
"""Return True when nmcli's error output indicates an authentication failure."""
|
||||
indicators = [
|
||||
"secrets were required",
|
||||
"no secret agent",
|
||||
"802-11-wireless-security.psk",
|
||||
"authentication rejected",
|
||||
"association rejected",
|
||||
]
|
||||
lower = error_msg.lower()
|
||||
return any(ind in lower for ind in indicators)
|
||||
|
||||
def _connect_wpa_supplicant(self, ssid: str, password: str) -> Tuple[bool, str]:
|
||||
"""Connect using wpa_supplicant (fallback)"""
|
||||
try:
|
||||
@@ -1748,13 +1853,17 @@ class WiFiManager:
|
||||
if self.has_hostapd and self.has_dnsmasq:
|
||||
result = self._enable_ap_mode_hostapd()
|
||||
if result[0]:
|
||||
self._ap_enabled_at = time.time()
|
||||
return result
|
||||
|
||||
# Fallback to nmcli hotspot (simpler, no captive portal)
|
||||
if self.has_nmcli:
|
||||
logger.info("hostapd/dnsmasq failed or unavailable, trying nmcli hotspot fallback...")
|
||||
self._show_led_message("Setup Mode", duration=5)
|
||||
return self._enable_ap_mode_nmcli_hotspot()
|
||||
result = self._enable_ap_mode_nmcli_hotspot()
|
||||
if result[0]:
|
||||
self._ap_enabled_at = time.time()
|
||||
return result
|
||||
|
||||
return False, "No WiFi tools available (nmcli, hostapd, or dnsmasq required)"
|
||||
except Exception as e:
|
||||
@@ -1911,6 +2020,12 @@ class WiFiManager:
|
||||
self._show_led_message("AP mode failed", duration=5)
|
||||
return False, f"Failed to create AP profile: {error_msg}"
|
||||
|
||||
# Write the NM dnsmasq-shared.d captive-portal config BEFORE bringing up
|
||||
# the connection so NM's dnsmasq picks it up at start time.
|
||||
# This causes every hostname DNS query from a connected device to resolve
|
||||
# to 192.168.4.1, automatically triggering the OS captive-portal popup.
|
||||
self._write_nm_dnsmasq_captive_conf()
|
||||
|
||||
logger.info("AP connection profile created, bringing it up...")
|
||||
up_result = subprocess.run(
|
||||
["nmcli", "connection", "up", "LEDMatrix-Setup-AP"],
|
||||
@@ -2127,11 +2242,13 @@ class WiFiManager:
|
||||
# so we only need to remove the iptables redirect rules we added.
|
||||
logger.info("Skipping NetworkManager restart (nmcli AP mode, restart not needed)")
|
||||
self._teardown_iptables_redirect()
|
||||
self._remove_nm_dnsmasq_captive_conf()
|
||||
# Ensure WiFi radio is enabled after nmcli operations
|
||||
wifi_enabled = self._ensure_wifi_radio_enabled(max_retries=3)
|
||||
if not wifi_enabled:
|
||||
logger.warning("WiFi radio may be disabled after nmcli AP cleanup")
|
||||
|
||||
self._ap_enabled_at = None
|
||||
logger.info("AP mode disabled successfully")
|
||||
return True, "AP mode disabled"
|
||||
except Exception as e:
|
||||
@@ -2278,22 +2395,30 @@ address=/detectportal.firefox.com/192.168.4.1
|
||||
f"Ethernet={ethernet_connected}, AP_active={ap_active}, "
|
||||
f"auto_enable={auto_enable}, disconnected_checks={self._disconnected_checks}")
|
||||
|
||||
# Determine if we should have AP mode active
|
||||
# AP mode should only be auto-enabled if:
|
||||
# - auto_enable_ap_mode is True AND
|
||||
# - WiFi is NOT connected AND
|
||||
# - Ethernet is NOT connected AND
|
||||
# - We've had multiple consecutive disconnected checks (grace period)
|
||||
is_disconnected = not status.connected and not ethernet_connected
|
||||
# Determine if we should have AP mode active.
|
||||
# "Disconnected" means either:
|
||||
# (a) nmcli reports no WiFi/Ethernet association, OR
|
||||
# (b) nmcli reports connected but there is no actual internet reachability.
|
||||
# Case (b) catches the common scenario where the Pi is associated with a
|
||||
# router whose WAN link is down (e.g. ISP outage, user moved home).
|
||||
has_association = status.connected or ethernet_connected
|
||||
if has_association:
|
||||
has_internet = self._check_internet_connectivity()
|
||||
else:
|
||||
has_internet = False
|
||||
|
||||
is_disconnected = not has_association or not has_internet
|
||||
|
||||
if is_disconnected:
|
||||
# Increment disconnected check counter
|
||||
self._disconnected_checks += 1
|
||||
logger.debug(f"Network disconnected (check {self._disconnected_checks}/{self._disconnected_checks_required})")
|
||||
reason = "no association" if not has_association else "no internet reachability"
|
||||
logger.debug(f"Network effectively disconnected ({reason}) "
|
||||
f"(check {self._disconnected_checks}/{self._disconnected_checks_required})")
|
||||
else:
|
||||
# Reset counter if we're connected
|
||||
# Reset counter if we're genuinely connected with internet
|
||||
if self._disconnected_checks > 0:
|
||||
logger.debug(f"Network connected, resetting disconnected check counter")
|
||||
logger.debug("Network connected with internet reachability, resetting counter")
|
||||
self._disconnected_checks = 0
|
||||
|
||||
# Only enable AP if we've had enough consecutive disconnected checks
|
||||
@@ -2323,11 +2448,11 @@ address=/detectportal.firefox.com/192.168.4.1
|
||||
elif not should_have_ap and ap_active:
|
||||
# Should not have AP but do - disable AP mode
|
||||
# Always disable if WiFi or Ethernet connects, regardless of auto_enable setting
|
||||
if status.connected or ethernet_connected:
|
||||
if not is_disconnected:
|
||||
success, message = self.disable_ap_mode()
|
||||
if success:
|
||||
if status.connected:
|
||||
logger.info("Auto-disabled AP mode (WiFi connected)")
|
||||
logger.info("Auto-disabled AP mode (WiFi connected with internet)")
|
||||
elif ethernet_connected:
|
||||
logger.info("Auto-disabled AP mode (Ethernet connected)")
|
||||
self._disconnected_checks = 0 # Reset counter
|
||||
@@ -2339,6 +2464,21 @@ address=/detectportal.firefox.com/192.168.4.1
|
||||
# Don't disable it automatically, let it stay active
|
||||
logger.debug("AP mode is active (manually enabled), keeping active")
|
||||
|
||||
# Idle-timeout check: disable AP if no client has connected within the window.
|
||||
# Only applies when AP is active and we haven't just decided to enable/disable it.
|
||||
if ap_active and self._ap_enabled_at is not None:
|
||||
idle_timeout_min = self.config.get("ap_idle_timeout_minutes", 15)
|
||||
elapsed = time.time() - self._ap_enabled_at
|
||||
if elapsed > idle_timeout_min * 60 and not self._has_ap_clients():
|
||||
logger.info(
|
||||
f"AP idle timeout ({idle_timeout_min} min, no clients) — disabling AP"
|
||||
)
|
||||
success, message = self.disable_ap_mode()
|
||||
if success:
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Failed to disable AP on idle timeout: {message}")
|
||||
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking AP mode: {e}", exc_info=True)
|
||||
|
||||
@@ -7133,9 +7133,14 @@ def connect_wifi():
|
||||
'message': message
|
||||
})
|
||||
else:
|
||||
# Propagate structured error type so the captive portal UI can show
|
||||
# "Wrong password — try again" instead of a generic failure message.
|
||||
error_type = "wrong_password" if (message or "").startswith("wrong_password:") else "connection_failed"
|
||||
clean_message = (message or "").removeprefix("wrong_password: ") or "Failed to connect to network"
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': message or 'Failed to connect to network'
|
||||
'message': clean_message,
|
||||
'error_type': error_type
|
||||
}), 400
|
||||
except Exception as e:
|
||||
logger.exception("[WiFi] Failed connecting to WiFi network")
|
||||
|
||||
@@ -191,7 +191,10 @@ function doConnect() {
|
||||
// Poll for the new IP
|
||||
setTimeout(function() { checkNewIP(ssid); }, 3000);
|
||||
} else {
|
||||
showMsg(data.message || 'Connection failed', 'err');
|
||||
var msg = data.error_type === 'wrong_password'
|
||||
? 'Incorrect password — please try again'
|
||||
: (data.message || 'Connection failed');
|
||||
showMsg(msg, 'err');
|
||||
connecting = false;
|
||||
btn.disabled = false;
|
||||
btn.innerHTML = 'Connect';
|
||||
|
||||
Reference in New Issue
Block a user