LEDMatrix/web_interface/app.py

from flask import Flask, Blueprint, render_template, request, redirect, url_for, flash, jsonify, Response, send_from_directory
import json
import logging
import os
import sys
import subprocess
import time
from pathlib import Path
from datetime import datetime, timedelta

# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

from src.config_manager import ConfigManager
from src.exceptions import ConfigError
from src.plugin_system.plugin_manager import PluginManager
from src.plugin_system.store_manager import PluginStoreManager
from src.plugin_system.saved_repositories import SavedRepositoriesManager
from src.plugin_system.schema_manager import SchemaManager
from src.plugin_system.operation_queue import PluginOperationQueue
from src.plugin_system.state_manager import PluginStateManager
from src.plugin_system.operation_history import OperationHistory
from src.plugin_system.health_monitor import PluginHealthMonitor
from src.wifi_manager import WiFiManager

# Create Flask app
app = Flask(__name__)
app.secret_key = os.urandom(24)
config_manager = ConfigManager()

# CSRF protection disabled for local-only application
# CSRF is designed for internet-facing web apps to prevent cross-site request forgery.
# For a local-only Raspberry Pi application, the threat model is different:
# - If an attacker has network access to perform CSRF, they have other attack vectors
# - All API endpoints are programmatic (HTMX/fetch) and don't include CSRF tokens
# - Forms use HTMX which doesn't automatically include CSRF tokens
# If you need CSRF protection (e.g., exposing to internet), properly implement CSRF tokens in HTMX forms
csrf = None

# Initialize rate limiting (prevent accidental abuse, not security)
try:
    from flask_limiter import Limiter
    from flask_limiter.util import get_remote_address

    limiter = Limiter(
        app=app,
        key_func=get_remote_address,
        default_limits=["1000 per minute"],  # Generous limit for local use
        storage_uri="memory://"  # In-memory storage for simplicity
    )
except ImportError:
    # flask-limiter not installed, rate limiting disabled
    limiter = None
    pass

# Import cache functions from separate module to avoid circular imports
from web_interface.cache import get_cached, set_cached, invalidate_cache

# Initialize plugin managers - read plugins directory from config
config = config_manager.load_config()
plugin_system_config = config.get('plugin_system', {})
plugins_dir_name = plugin_system_config.get('plugins_directory', 'plugin-repos')

# Resolve plugin directory - handle both absolute and relative paths
if os.path.isabs(plugins_dir_name):
    plugins_dir = Path(plugins_dir_name)
else:
    # If relative, resolve relative to the project root (LEDMatrix directory)
    project_root = Path(__file__).parent.parent
    plugins_dir = project_root / plugins_dir_name

plugin_manager = PluginManager(
    plugins_dir=str(plugins_dir),
    config_manager=config_manager,
    display_manager=None,  # Not needed for web interface
    cache_manager=None     # Not needed for web interface
)
plugin_store_manager = PluginStoreManager(plugins_dir=str(plugins_dir))
saved_repositories_manager = SavedRepositoriesManager()

# Initialize schema manager
schema_manager = SchemaManager(
    plugins_dir=plugins_dir,
    project_root=project_root,
    logger=None
)

# Initialize operation queue for plugin operations
# Use lazy_load=True to defer file loading until first use (improves startup time)
operation_queue = PluginOperationQueue(
    history_file=str(project_root / "data" / "plugin_operations.json"),
    max_history=500,
    lazy_load=True
)

# Initialize plugin state manager
# Use lazy_load=True to defer file loading until first use (improves startup time)
plugin_state_manager = PluginStateManager(
    state_file=str(project_root / "data" / "plugin_state.json"),
    auto_save=True,
    lazy_load=True
)

# Initialize operation history
# Use lazy_load=True to defer file loading until first use (improves startup time)
operation_history = OperationHistory(
    history_file=str(project_root / "data" / "operation_history.json"),
    max_records=1000,
    lazy_load=True
)

# Initialize health monitoring (if health tracker is available)
# Deferred until first request to improve startup time
health_monitor = None
_health_monitor_initialized = False

# Plugin discovery is deferred until first API request that needs it
# This improves startup time - endpoints will call discover_plugins() when needed

# Register blueprints
from web_interface.blueprints.pages_v3 import pages_v3
from web_interface.blueprints.api_v3 import api_v3

# Initialize managers in blueprints
pages_v3.config_manager = config_manager
pages_v3.plugin_manager = plugin_manager
pages_v3.plugin_store_manager = plugin_store_manager
pages_v3.saved_repositories_manager = saved_repositories_manager

api_v3.config_manager = config_manager
api_v3.plugin_manager = plugin_manager
api_v3.plugin_store_manager = plugin_store_manager
api_v3.saved_repositories_manager = saved_repositories_manager
api_v3.schema_manager = schema_manager
api_v3.operation_queue = operation_queue
api_v3.plugin_state_manager = plugin_state_manager
api_v3.operation_history = operation_history
api_v3.health_monitor = health_monitor
# Initialize cache manager for API endpoints
from src.cache_manager import CacheManager
api_v3.cache_manager = CacheManager()

app.register_blueprint(pages_v3, url_prefix='/v3')
app.register_blueprint(api_v3, url_prefix='/api/v3')

# Route to serve plugin asset files (registered on main app, not blueprint, for /assets/... path)
@app.route('/assets/plugins/<plugin_id>/uploads/<path:filename>', methods=['GET'])
def serve_plugin_asset(plugin_id, filename):
    """Serve uploaded asset files from assets/plugins/{plugin_id}/uploads/"""
    try:
        # Build the asset directory path
        assets_dir = project_root / 'assets' / 'plugins' / plugin_id / 'uploads'
        assets_dir = assets_dir.resolve()

        # Security check: ensure the assets directory exists and is within project_root
        if not assets_dir.exists() or not assets_dir.is_dir():
            return jsonify({'status': 'error', 'message': 'Asset directory not found'}), 404

        # Ensure we're serving from within the assets directory (prevent directory traversal)
        # Use proper path resolution instead of string prefix matching to prevent bypasses
        assets_dir_resolved = assets_dir.resolve()
        project_root_resolved = project_root.resolve()

        # Check that assets_dir is actually within project_root using commonpath
        try:
            common_path = os.path.commonpath([str(assets_dir_resolved), str(project_root_resolved)])
            if common_path != str(project_root_resolved):
                return jsonify({'status': 'error', 'message': 'Invalid asset path'}), 403
        except ValueError:
            # commonpath raises ValueError if paths are on different drives (Windows)
            return jsonify({'status': 'error', 'message': 'Invalid asset path'}), 403

        # Resolve the requested file path
        requested_file = (assets_dir / filename).resolve()

        # Security check: ensure file is within the assets directory using proper path comparison
        # Use commonpath to ensure assets_dir is a true parent of requested_file
        try:
            common_path = os.path.commonpath([str(requested_file), str(assets_dir_resolved)])
            if common_path != str(assets_dir_resolved):
                return jsonify({'status': 'error', 'message': 'Invalid file path'}), 403
        except ValueError:
            # commonpath raises ValueError if paths are on different drives (Windows)
            return jsonify({'status': 'error', 'message': 'Invalid file path'}), 403

        # Check if file exists
        if not requested_file.exists() or not requested_file.is_file():
            return jsonify({'status': 'error', 'message': 'File not found'}), 404

        # Determine content type based on file extension
        content_type = 'application/octet-stream'
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            content_type = 'image/jpeg' if filename.lower().endswith(('.jpg', '.jpeg')) else 'image/png'
        elif filename.lower().endswith('.gif'):
            content_type = 'image/gif'
        elif filename.lower().endswith('.bmp'):
            content_type = 'image/bmp'
        elif filename.lower().endswith('.webp'):
            content_type = 'image/webp'
        elif filename.lower().endswith('.svg'):
            content_type = 'image/svg+xml'
        elif filename.lower().endswith('.json'):
            content_type = 'application/json'
        elif filename.lower().endswith('.txt'):
            content_type = 'text/plain'

        # Use send_from_directory to serve the file
        return send_from_directory(str(assets_dir), filename, mimetype=content_type)

    except Exception as e:
        # Log the exception with full traceback server-side
        import traceback
        app.logger.exception('Error serving plugin asset file')

        # Return generic error message to client (avoid leaking internal details)
        # Only include detailed error information when in debug mode
        if app.debug:
            return jsonify({
                'status': 'error',
                'message': str(e),
                'traceback': traceback.format_exc()
            }), 500
        else:
            return jsonify({
                'status': 'error',
                'message': 'Internal server error'
            }), 500

# Prime psutil CPU measurement once at startup so interval=None returns a real value
try:
    import psutil as _psutil_prime
    _psutil_prime.cpu_percent(interval=None)
except ImportError:
    pass

# Cached AP mode check — avoids creating a WiFiManager per request
_ap_mode_cache = {'value': False, 'timestamp': 0}
_AP_MODE_CACHE_TTL = 30  # seconds — AP mode is user-initiated; 30s is fine

# Cached ledmatrix service status for SSE stats stream
_ledmatrix_service_cache = {'active': False, 'timestamp': 0}
_LEDMATRIX_SERVICE_CACHE_TTL = 15  # seconds

def is_ap_mode_active():
    """
    Check if access point mode is currently active (cached, 30s TTL).
    Uses a direct systemctl check instead of instantiating WiFiManager.
    """
    now = time.time()
    if (now - _ap_mode_cache['timestamp']) < _AP_MODE_CACHE_TTL:
        return _ap_mode_cache['value']
    try:
        result = subprocess.run(
            ['systemctl', 'is-active', 'hostapd'],
            capture_output=True, text=True, timeout=2
        )
        active = result.stdout.strip() == 'active'
        _ap_mode_cache['value'] = active
        _ap_mode_cache['timestamp'] = now
        return active
    except (subprocess.SubprocessError, OSError) as e:
        logging.getLogger('web_interface').error(f"AP mode check failed: {e}")
        return _ap_mode_cache['value']

# Captive portal detection endpoints
# When AP mode is active, return responses that TRIGGER the captive portal popup.
# When not in AP mode, return normal "success" responses so connectivity checks pass.
@app.route('/hotspot-detect.html')
def hotspot_detect():
    """iOS/macOS captive portal detection endpoint"""
    if is_ap_mode_active():
        # Non-"Success" title triggers iOS captive portal popup
        return redirect(url_for('pages_v3.captive_setup'), code=302)
    return '<HTML><HEAD><TITLE>Success</TITLE></HEAD><BODY>Success</BODY></HTML>', 200

@app.route('/generate_204')
def generate_204():
    """Android captive portal detection endpoint"""
    if is_ap_mode_active():
        # Android expects 204 = "internet works". Non-204 triggers portal popup.
        return redirect(url_for('pages_v3.captive_setup'), code=302)
    return '', 204

@app.route('/connecttest.txt')
def connecttest_txt():
    """Windows captive portal detection endpoint"""
    if is_ap_mode_active():
        return redirect(url_for('pages_v3.captive_setup'), code=302)
    return 'Microsoft Connect Test', 200

@app.route('/success.txt')
def success_txt():
    """Firefox captive portal detection endpoint"""
    if is_ap_mode_active():
        return redirect(url_for('pages_v3.captive_setup'), code=302)
    return 'success', 200

# Initialize logging
try:
    from web_interface.logging_config import setup_web_interface_logging, log_api_request
    # Use JSON logging in production, readable logs in development
    use_json_logging = os.environ.get('LEDMATRIX_JSON_LOGGING', 'false').lower() == 'true'
    setup_web_interface_logging(level='INFO', use_json=use_json_logging)
except ImportError:
    # Logging config not available, use default
    log_api_request = None
    pass

# Request timing and logging middleware
@app.before_request
def before_request():
    """Track request start time for logging."""
    from flask import request
    request.start_time = time.time()

@app.after_request
def after_request_logging(response):
    """Log API requests after response."""
    if log_api_request:
        try:
            from flask import request
            duration_ms = (time.time() - getattr(request, 'start_time', time.time())) * 1000
            ip_address = request.remote_addr if hasattr(request, 'remote_addr') else None
            log_api_request(
                method=request.method,
                path=request.path,
                status_code=response.status_code,
                duration_ms=duration_ms,
                ip_address=ip_address
            )
        except Exception:
            pass  # Don't break response if logging fails
    return response

# Global error handlers
@app.errorhandler(404)
def not_found_error(error):
    """Handle 404 errors."""
    return jsonify({
        'status': 'error',
        'error_code': 'NOT_FOUND',
        'message': 'Resource not found',
        'path': request.path
    }), 404

@app.errorhandler(500)
def internal_error(error):
    """Handle 500 errors."""
    import traceback
    error_details = traceback.format_exc()

    # Log the error
    import logging
    logger = logging.getLogger('web_interface')
    logger.error(f"Internal server error: {error}", exc_info=True)

    # Return user-friendly error (hide internal details in production)
    return jsonify({
        'status': 'error',
        'error_code': 'INTERNAL_ERROR',
        'message': 'An internal error occurred',
        'details': error_details if app.debug else None
    }), 500

@app.errorhandler(Exception)
def handle_exception(error):
    """Handle all unhandled exceptions."""
    import traceback
    import logging
    logger = logging.getLogger('web_interface')
    logger.error(f"Unhandled exception: {error}", exc_info=True)

    return jsonify({
        'status': 'error',
        'error_code': 'UNKNOWN_ERROR',
        'message': str(error) if app.debug else 'An error occurred',
        'details': traceback.format_exc() if app.debug else None
    }), 500

# Captive portal redirect middleware
@app.before_request
def captive_portal_redirect():
    """
    Redirect all HTTP requests to WiFi setup page when AP mode is active.
    This creates a captive portal experience where users are automatically
    directed to the WiFi configuration page.
    """
    # Check if AP mode is active
    if not is_ap_mode_active():
        return None  # Continue normal request processing

    # Get the request path
    path = request.path

    # List of paths that should NOT be redirected (allow normal operation)
    allowed_paths = [
        '/v3',  # Main interface and all sub-paths (includes /v3/setup)
        '/api/v3/',  # All API endpoints
        '/static/',  # Static files (CSS, JS, images)
        '/hotspot-detect.html',  # iOS/macOS detection
        '/generate_204',  # Android detection
        '/connecttest.txt',  # Windows detection
        '/success.txt',  # Firefox detection
        '/favicon.ico',  # Favicon
    ]

    for allowed_path in allowed_paths:
        if path.startswith(allowed_path):
            return None

    # Redirect to lightweight captive portal setup page (not the full UI)
    return redirect(url_for('pages_v3.captive_setup'), code=302)

# Add security headers and caching to all responses
@app.after_request
def add_security_headers(response):
    """Add security headers and caching to all responses"""
    # Only set standard security headers - avoid Permissions-Policy to prevent browser warnings
    # about unrecognized features
    response.headers['X-Content-Type-Options'] = 'nosniff'
    response.headers['X-Frame-Options'] = 'SAMEORIGIN'
    response.headers['X-XSS-Protection'] = '1; mode=block'

    # Add caching headers for static assets
    if request.path.startswith('/static/'):
        # Cache static assets for 1 year (with versioning via query params)
        response.headers['Cache-Control'] = 'public, max-age=31536000, immutable'
        response.headers['Expires'] = (datetime.now() + timedelta(days=365)).strftime('%a, %d %b %Y %H:%M:%S GMT')
    elif request.path.startswith('/api/v3/'):
        # Short cache for API responses (5 seconds) to allow for quick updates
        # but reduce server load for repeated requests
        if request.method == 'GET' and 'stream' not in request.path:
            response.headers['Cache-Control'] = 'private, max-age=5, must-revalidate'
    else:
        # No cache for HTML pages to ensure fresh content
        response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
        response.headers['Pragma'] = 'no-cache'
        response.headers['Expires'] = '0'

    return response

# SSE helper function
def sse_response(generator_func):
    """Helper to create SSE responses"""
    def generate():
        for data in generator_func():
            yield f"data: {json.dumps(data)}\n\n"
    return Response(generate(), mimetype='text/event-stream')

# System status generator for SSE
def system_status_generator():
    """Generate system status updates"""
    while True:
        try:
            # Try to import psutil for system stats
            try:
                import psutil
                # interval=None is non-blocking; primed at module startup above
                cpu_percent = round(psutil.cpu_percent(interval=None), 1)
                memory = psutil.virtual_memory()
                memory_used_percent = round(memory.percent, 1)

                # Try to get CPU temperature (Raspberry Pi specific)
                cpu_temp = 0
                try:
                    with open('/sys/class/thermal/thermal_zone0/temp', 'r') as f:
                        cpu_temp = round(float(f.read()) / 1000.0, 1)
                except (OSError, ValueError):
                    pass

            except ImportError:
                cpu_percent = 0
                memory_used_percent = 0
                cpu_temp = 0

            # Check if display service is running (cached to avoid per-client subprocess forks)
            now = time.time()
            if (now - _ledmatrix_service_cache['timestamp']) >= _LEDMATRIX_SERVICE_CACHE_TTL:
                try:
                    result = subprocess.run(['systemctl', 'is-active', 'ledmatrix'],
                                            capture_output=True, text=True, timeout=2)
                    _ledmatrix_service_cache['active'] = result.stdout.strip() == 'active'
                except (subprocess.SubprocessError, OSError):
                    pass
                _ledmatrix_service_cache['timestamp'] = now
            service_active = _ledmatrix_service_cache['active']

            status = {
                'timestamp': time.time(),
                'uptime': 'Running',
                'service_active': service_active,
                'cpu_percent': cpu_percent,
                'memory_used_percent': memory_used_percent,
                'cpu_temp': cpu_temp,
                'disk_used_percent': 0
            }
            yield status
        except Exception as e:
            yield {'error': str(e)}
        time.sleep(10)  # Update every 10 seconds (reduced frequency for better performance)

# Display preview generator for SSE
def display_preview_generator():
    """Generate display preview updates from snapshot file"""
    import base64
    from PIL import Image
    import io

    snapshot_path = "/tmp/led_matrix_preview.png"
    last_modified = None

    # Get display dimensions from config
    try:
        main_config = config_manager.load_config()
        cols = main_config.get('display', {}).get('hardware', {}).get('cols', 64)
        chain_length = main_config.get('display', {}).get('hardware', {}).get('chain_length', 2)
        rows = main_config.get('display', {}).get('hardware', {}).get('rows', 32)
        parallel = main_config.get('display', {}).get('hardware', {}).get('parallel', 1)
        width = cols * chain_length
        height = rows * parallel
    except (KeyError, TypeError, ValueError, ConfigError):
        width = 128
        height = 64

    while True:
        try:
            # Check if snapshot file exists and has been modified
            if os.path.exists(snapshot_path):
                current_modified = os.path.getmtime(snapshot_path)

                # Only read if file is new or has been updated
                if last_modified is None or current_modified > last_modified:
                    try:
                        # Read and encode the image
                        with Image.open(snapshot_path) as img:
                            # Convert to PNG and encode as base64
                            buffer = io.BytesIO()
                            img.save(buffer, format='PNG')
                            img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')

                            preview_data = {
                                'timestamp': time.time(),
                                'width': width,
                                'height': height,
                                'image': img_str
                            }
                            last_modified = current_modified
                            yield preview_data
                    except Exception as read_err:
                        # File might be being written, skip this update
                        pass
            else:
                # No snapshot available
                yield {
                    'timestamp': time.time(),
                    'width': width,
                    'height': height,
                    'image': None
                }

        except Exception as e:
            yield {'error': str(e)}

        time.sleep(1.0)  # Check once per second — halves PIL encode overhead vs 0.5s

# Logs generator for SSE
def logs_generator():
    """Generate log updates from journalctl"""
    while True:
        try:
            # Get recent logs from journalctl (simplified version)
            # Note: User should be in systemd-journal group to read logs without sudo
            try:
                result = subprocess.run(
                    ['journalctl', '-u', 'ledmatrix.service', '-n', '50', '--no-pager'],
                    capture_output=True, text=True, timeout=5
                )

                if result.returncode == 0:
                    logs_text = result.stdout.strip()
                    if logs_text:
                        logs_data = {
                            'timestamp': time.time(),
                            'logs': logs_text
                        }
                        yield logs_data
                    else:
                        # No logs available
                        logs_data = {
                            'timestamp': time.time(),
                            'logs': 'No logs available from ledmatrix service'
                        }
                        yield logs_data
                else:
                    # journalctl failed
                    error_data = {
                        'timestamp': time.time(),
                        'logs': f'journalctl failed with return code {result.returncode}: {result.stderr.strip()}'
                    }
                    yield error_data

            except subprocess.TimeoutExpired:
                # Timeout - just skip this update
                pass
            except Exception as e:
                error_data = {
                    'timestamp': time.time(),
                    'logs': f'Error running journalctl: {str(e)}'
                }
                yield error_data

        except Exception as e:
            error_data = {
                'timestamp': time.time(),
                'logs': f'Unexpected error in logs generator: {str(e)}'
            }
            yield error_data

        time.sleep(5)  # Update every 5 seconds (reduced frequency for better performance)

# SSE endpoints
@app.route('/api/v3/stream/stats')
def stream_stats():
    return sse_response(system_status_generator)

@app.route('/api/v3/stream/display')
def stream_display():
    return sse_response(display_preview_generator)

@app.route('/api/v3/stream/logs')
def stream_logs():
    return sse_response(logs_generator)

# Exempt SSE streams from CSRF and add rate limiting
if csrf:
    csrf.exempt(stream_stats)
    csrf.exempt(stream_display)
    csrf.exempt(stream_logs)
    # Note: api_v3 blueprint is exempted above after registration

if limiter:
    limiter.limit("20 per minute")(stream_stats)
    limiter.limit("20 per minute")(stream_display)
    limiter.limit("20 per minute")(stream_logs)

# Main route - redirect to v3 interface as default
@app.route('/')
def index():
    """Redirect to v3 interface"""
    return redirect(url_for('pages_v3.index'))

@app.route('/favicon.ico')
def favicon():
    """Return 204 No Content for favicon to avoid 404 errors"""
    return '', 204

def _initialize_health_monitor():
    """Initialize health monitoring after server is ready to accept requests."""
    global health_monitor, _health_monitor_initialized
    if _health_monitor_initialized:
        return

    if health_monitor is None and hasattr(plugin_manager, 'health_tracker') and plugin_manager.health_tracker:
        try:
            health_monitor = PluginHealthMonitor(
                health_tracker=plugin_manager.health_tracker,
                check_interval=60.0,  # Check every minute
                degraded_threshold=0.5,
                unhealthy_threshold=0.8,
                max_response_time=5.0
            )
            health_monitor.start_monitoring()
            print("✓ Plugin health monitoring started")
        except Exception as e:
            print(f"⚠ Could not start health monitoring: {e}")

    _health_monitor_initialized = True

_reconciliation_done = False
_reconciliation_started = False
import threading as _threading
_reconciliation_lock = _threading.Lock()

def _run_startup_reconciliation() -> None:
    """Run state reconciliation in background to auto-repair missing plugins.

    Reconciliation runs exactly once per process lifetime, regardless of
    whether every inconsistency could be auto-fixed. Previously, a failed
    auto-repair (e.g. a config entry referencing a plugin that no longer
    exists in the registry) would reset ``_reconciliation_started`` to False,
    causing the ``@app.before_request`` hook to re-trigger reconciliation on
    every single HTTP request — an infinite install-retry loop that pegged
    the CPU and flooded the log. Unresolved issues are now left in place for
    the user to address via the UI; the reconciler itself also caches
    per-plugin unrecoverable failures internally so repeated reconcile calls
    stay cheap.
    """
    global _reconciliation_done
    from src.logging_config import get_logger
    _logger = get_logger('reconciliation')

    try:
        from src.plugin_system.state_reconciliation import StateReconciliation
        reconciler = StateReconciliation(
            state_manager=plugin_state_manager,
            config_manager=config_manager,
            plugin_manager=plugin_manager,
            plugins_dir=plugins_dir,
            store_manager=plugin_store_manager
        )
        result = reconciler.reconcile_state()
        if result.inconsistencies_found:
            _logger.info("[Reconciliation] %s", result.message)
        if result.inconsistencies_fixed:
            plugin_manager.discover_plugins()
        if not result.reconciliation_successful:
            _logger.warning(
                "[Reconciliation] Finished with %d unresolved issue(s); "
                "will not retry automatically. Use the Plugin Store or the "
                "manual 'Reconcile' action to resolve.",
                len(result.inconsistencies_manual),
            )
    except Exception as e:
        _logger.error("[Reconciliation] Error: %s", e, exc_info=True)
    finally:
        # Always mark done — we do not want an unhandled exception (or an
        # unresolved inconsistency) to cause the @before_request hook to
        # retrigger reconciliation on every subsequent request.
        _reconciliation_done = True

# Initialize health monitor and run reconciliation on first request
@app.before_request
def check_health_monitor():
    """Ensure health monitor is initialized; launch reconciliation in background."""
    global _reconciliation_started
    if not _health_monitor_initialized:
        _initialize_health_monitor()
    with _reconciliation_lock:
        if not _reconciliation_started:
            _reconciliation_started = True
            _threading.Thread(target=_run_startup_reconciliation, daemon=True).start()

if __name__ == '__main__':
    # threaded=True is Flask's default since 1.0 but stated explicitly so that
    # long-lived /api/v3/stream/* SSE connections don't starve other requests.
    app.run(host='0.0.0.0', port=5000, debug=True, threaded=True)