diff --git a/src/plugin_system/store_manager.py b/src/plugin_system/store_manager.py index 42cb8176..8c0fff39 100644 --- a/src/plugin_system/store_manager.py +++ b/src/plugin_system/store_manager.py @@ -14,6 +14,7 @@ import zipfile import tempfile import requests import time +from concurrent.futures import ThreadPoolExecutor from datetime import datetime from pathlib import Path from typing import List, Dict, Optional, Any @@ -52,7 +53,7 @@ class PluginStoreManager: self.registry_cache = None self.registry_cache_time = None # Timestamp of when registry was cached self.github_cache = {} # Cache for GitHub API responses - self.cache_timeout = 3600 # 1 hour cache timeout + self.cache_timeout = 3600 # 1 hour cache timeout (repo info: stars, default_branch) # 15 minutes for registry cache. Long enough that the plugin list # endpoint on a warm cache never hits the network, short enough that # new plugins show up within a reasonable window. See also the @@ -60,9 +61,15 @@ class PluginStoreManager: # failures. self.registry_cache_timeout = 900 self.commit_info_cache = {} # Cache for latest commit info: {key: (timestamp, data)} - self.commit_cache_timeout = 300 # 5 minutes (same as registry) + # 30 minutes for commit/manifest caches. Plugin Store users browse + # the catalog via /plugins/store/list which fetches commit info and + # manifest data per plugin. 5-min TTLs meant every fresh browse on + # a Pi4 paid for ~3 HTTP requests × N plugins (30-60s serial). 30 + # minutes keeps the cache warm across a realistic session while + # still picking up upstream updates within a reasonable window. + self.commit_cache_timeout = 1800 self.manifest_cache = {} # Cache for GitHub manifest fetches: {key: (timestamp, data)} - self.manifest_cache_timeout = 300 # 5 minutes + self.manifest_cache_timeout = 1800 self.github_token = self._load_github_token() self._token_validation_cache = {} # Cache for token validation results: {token: (is_valid, timestamp, error_message)} self._token_validation_cache_timeout = 300 # 5 minutes cache for token validation @@ -342,7 +349,21 @@ class PluginStoreManager: if self.github_token: headers['Authorization'] = f'token {self.github_token}' - response = requests.get(api_url, headers=headers, timeout=10) + try: + response = requests.get(api_url, headers=headers, timeout=10) + except requests.RequestException as req_err: + # Network error: prefer a stale cache hit over an + # empty default so the UI keeps working on a flaky + # Pi WiFi link. + if cache_key in self.github_cache: + _, stale = self.github_cache[cache_key] + self.logger.warning( + "GitHub repo info fetch failed for %s (%s); serving stale cache.", + cache_key, req_err, + ) + return stale + raise + if response.status_code == 200: data = response.json() pushed_at = data.get('pushed_at', '') or data.get('updated_at', '') @@ -362,7 +383,16 @@ class PluginStoreManager: self.github_cache[cache_key] = (time.time(), repo_info) return repo_info elif response.status_code == 403: - # Rate limit or authentication issue + # Rate limit or authentication issue. If we have a + # previously-cached value, serve it rather than + # returning empty defaults — a stale star count is + # better than a reset to zero. + if cache_key in self.github_cache: + _, stale = self.github_cache[cache_key] + self.logger.warning( + "GitHub API 403 for %s; serving stale cache.", cache_key, + ) + return stale if not self.github_token: self.logger.warning( f"GitHub API rate limit likely exceeded (403). " @@ -376,6 +406,9 @@ class PluginStoreManager: ) else: self.logger.warning(f"GitHub API request failed: {response.status_code} for {api_url}") + if cache_key in self.github_cache: + _, stale = self.github_cache[cache_key] + return stale return { 'stars': 0, @@ -558,68 +591,91 @@ class PluginStoreManager: except Exception as e: self.logger.warning(f"Failed to fetch plugins from saved repository {repo_url}: {e}") - results = [] + # First pass: apply cheap filters (category/tags/query) so we only + # fetch GitHub metadata for plugins that will actually be returned. + filtered: List[Dict] = [] for plugin in plugins: - # Category filter if category and plugin.get('category') != category: continue - - # Tags filter (match any tag) if tags and not any(tag in plugin.get('tags', []) for tag in tags): continue - - # Query search (case-insensitive) if query: query_lower = query.lower() searchable_text = ' '.join([ plugin.get('name', ''), plugin.get('description', ''), plugin.get('id', ''), - plugin.get('author', '') + plugin.get('author', ''), ]).lower() - if query_lower not in searchable_text: continue + filtered.append(plugin) - # Enhance plugin data with GitHub metadata + def _enrich(plugin: Dict) -> Dict: + """Enrich a single plugin with GitHub metadata. + + Called concurrently from a ThreadPoolExecutor. Each underlying + HTTP helper (``_get_github_repo_info`` / ``_get_latest_commit_info`` + / ``_fetch_manifest_from_github``) is thread-safe — they use + ``requests`` and write their own cache keys on Python dicts, + which is atomic under the GIL for single-key assignments. + """ enhanced_plugin = plugin.copy() - - # Get real GitHub stars repo_url = plugin.get('repo', '') - if repo_url: - github_info = self._get_github_repo_info(repo_url) - enhanced_plugin['stars'] = github_info.get('stars', plugin.get('stars', 0)) - enhanced_plugin['default_branch'] = github_info.get('default_branch', plugin.get('branch', 'main')) - enhanced_plugin['last_updated_iso'] = github_info.get('last_commit_iso') - enhanced_plugin['last_updated'] = github_info.get('last_commit_date') + if not repo_url: + return enhanced_plugin - if fetch_commit_info: - branch = plugin.get('branch') or github_info.get('default_branch', 'main') + github_info = self._get_github_repo_info(repo_url) + enhanced_plugin['stars'] = github_info.get('stars', plugin.get('stars', 0)) + enhanced_plugin['default_branch'] = github_info.get('default_branch', plugin.get('branch', 'main')) + enhanced_plugin['last_updated_iso'] = github_info.get('last_commit_iso') + enhanced_plugin['last_updated'] = github_info.get('last_commit_date') - commit_info = self._get_latest_commit_info(repo_url, branch) - if commit_info: - enhanced_plugin['last_commit'] = commit_info.get('short_sha') - enhanced_plugin['last_commit_sha'] = commit_info.get('sha') - enhanced_plugin['last_updated'] = commit_info.get('date') or enhanced_plugin.get('last_updated') - enhanced_plugin['last_updated_iso'] = commit_info.get('date_iso') or enhanced_plugin.get('last_updated_iso') - enhanced_plugin['last_commit_message'] = commit_info.get('message') - enhanced_plugin['last_commit_author'] = commit_info.get('author') - enhanced_plugin['branch'] = commit_info.get('branch', branch) - enhanced_plugin['last_commit_branch'] = commit_info.get('branch') + if fetch_commit_info: + branch = plugin.get('branch') or github_info.get('default_branch', 'main') - # Fetch manifest from GitHub for additional metadata (description, etc.) - plugin_subpath = plugin.get('plugin_path', '') - manifest_rel = f"{plugin_subpath}/manifest.json" if plugin_subpath else "manifest.json" - github_manifest = self._fetch_manifest_from_github(repo_url, branch, manifest_rel) - if github_manifest: - if 'last_updated' in github_manifest and not enhanced_plugin.get('last_updated'): - enhanced_plugin['last_updated'] = github_manifest['last_updated'] - if 'description' in github_manifest: - enhanced_plugin['description'] = github_manifest['description'] + commit_info = self._get_latest_commit_info(repo_url, branch) + if commit_info: + enhanced_plugin['last_commit'] = commit_info.get('short_sha') + enhanced_plugin['last_commit_sha'] = commit_info.get('sha') + enhanced_plugin['last_updated'] = commit_info.get('date') or enhanced_plugin.get('last_updated') + enhanced_plugin['last_updated_iso'] = commit_info.get('date_iso') or enhanced_plugin.get('last_updated_iso') + enhanced_plugin['last_commit_message'] = commit_info.get('message') + enhanced_plugin['last_commit_author'] = commit_info.get('author') + enhanced_plugin['branch'] = commit_info.get('branch', branch) + enhanced_plugin['last_commit_branch'] = commit_info.get('branch') - results.append(enhanced_plugin) + plugin_subpath = plugin.get('plugin_path', '') + manifest_rel = f"{plugin_subpath}/manifest.json" if plugin_subpath else "manifest.json" + github_manifest = self._fetch_manifest_from_github(repo_url, branch, manifest_rel) + if github_manifest: + if 'last_updated' in github_manifest and not enhanced_plugin.get('last_updated'): + enhanced_plugin['last_updated'] = github_manifest['last_updated'] + if 'description' in github_manifest: + enhanced_plugin['description'] = github_manifest['description'] - return results + return enhanced_plugin + + # Fan out the per-plugin GitHub enrichment. The previous + # implementation did this serially, which on a Pi4 with ~15 plugins + # and a fresh cache meant 30+ HTTP requests in strict sequence (the + # "connecting to display" hang reported by users). With a thread + # pool, latency is dominated by the slowest request rather than + # their sum. Workers capped at 10 to stay well under the + # unauthenticated GitHub rate limit burst and avoid overwhelming a + # Pi's WiFi link. For a small number of plugins the pool is + # essentially free. + if not filtered: + return [] + + if len(filtered) == 1 or not fetch_commit_info and len(filtered) < 4: + # Not worth the pool overhead for tiny workloads. + return [_enrich(p) for p in filtered] + + max_workers = min(10, len(filtered)) + with ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix='plugin-search') as executor: + # executor.map preserves input order, which the UI relies on. + return list(executor.map(_enrich, filtered)) def _fetch_manifest_from_github(self, repo_url: str, branch: str = "master", manifest_path: str = "manifest.json", force_refresh: bool = False) -> Optional[Dict]: """ @@ -717,7 +773,22 @@ class PluginStoreManager: last_error = None for branch_name in branches_to_try: api_url = f"https://api.github.com/repos/{owner}/{repo}/commits/{branch_name}" - response = requests.get(api_url, headers=headers, timeout=10) + try: + response = requests.get(api_url, headers=headers, timeout=10) + except requests.RequestException as req_err: + # Network failure: fall back to a stale cache hit if + # available so the plugin store UI keeps populating + # commit info on a flaky WiFi link. + if cache_key in self.commit_info_cache: + _, stale = self.commit_info_cache[cache_key] + if stale is not None: + self.logger.warning( + "GitHub commit fetch failed for %s (%s); serving stale cache.", + cache_key, req_err, + ) + return stale + last_error = str(req_err) + continue if response.status_code == 200: commit_data = response.json() commit_sha_full = commit_data.get('sha', '') diff --git a/test/test_store_manager_caches.py b/test/test_store_manager_caches.py index 73ef53ef..dfebf963 100644 --- a/test/test_store_manager_caches.py +++ b/test/test_store_manager_caches.py @@ -113,6 +113,152 @@ class TestGitInfoCache(unittest.TestCase): self.assertIsNone(self.sm._get_local_git_info(non_git)) +class TestSearchPluginsParallel(unittest.TestCase): + """Plugin Store browse path — the per-plugin GitHub enrichment used to + run serially, turning a browse of 15 plugins into 30–45 sequential HTTP + requests on a cold cache. This batch of tests locks in the parallel + fan-out and verifies output shape/ordering haven't regressed. + """ + + def setUp(self): + self._tmp = TemporaryDirectory() + self.addCleanup(self._tmp.cleanup) + self.sm = PluginStoreManager(plugins_dir=self._tmp.name) + + # Fake registry with 5 plugins. + self.registry = { + "plugins": [ + {"id": f"plg{i}", "name": f"Plugin {i}", + "repo": f"https://github.com/owner/plg{i}", "category": "util"} + for i in range(5) + ] + } + self.sm.registry_cache = self.registry + self.sm.registry_cache_time = time.time() + + self._enrich_calls = [] + + def fake_repo(repo_url): + self._enrich_calls.append(("repo", repo_url)) + return {"stars": 1, "default_branch": "main", + "last_commit_iso": "2026-04-08T00:00:00Z", + "last_commit_date": "2026-04-08"} + + def fake_commit(repo_url, branch): + self._enrich_calls.append(("commit", repo_url, branch)) + return {"short_sha": "abc1234", "sha": "abc1234" + "0" * 33, + "date_iso": "2026-04-08T00:00:00Z", "date": "2026-04-08", + "message": "m", "author": "a", "branch": branch} + + def fake_manifest(repo_url, branch, manifest_path): + self._enrich_calls.append(("manifest", repo_url, branch)) + return {"description": "desc"} + + self.sm._get_github_repo_info = fake_repo + self.sm._get_latest_commit_info = fake_commit + self.sm._fetch_manifest_from_github = fake_manifest + + def test_results_preserve_registry_order(self): + results = self.sm.search_plugins(include_saved_repos=False) + self.assertEqual([p["id"] for p in results], + [f"plg{i}" for i in range(5)]) + + def test_filters_applied_before_enrichment(self): + # Filter down to a single plugin via category — ensures we don't + # waste GitHub calls enriching plugins that won't be returned. + self.registry["plugins"][2]["category"] = "special" + self.sm.registry_cache = self.registry + self._enrich_calls.clear() + results = self.sm.search_plugins(category="special", include_saved_repos=False) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]["id"], "plg2") + # Only one plugin should have been enriched. + repo_calls = [c for c in self._enrich_calls if c[0] == "repo"] + self.assertEqual(len(repo_calls), 1) + + def test_enrichment_runs_concurrently(self): + """Verify the thread pool actually runs fetches in parallel. + + We make each fake fetch sleep briefly and check wall time — if the + code had regressed to serial execution, 5 plugins × ~60ms sleep + would be ≥ 300ms. Parallel should be well under that. + """ + import threading + concurrent_workers = [] + peak_lock = threading.Lock() + peak = {"count": 0, "current": 0} + + def slow_repo(repo_url): + with peak_lock: + peak["current"] += 1 + if peak["current"] > peak["count"]: + peak["count"] = peak["current"] + time.sleep(0.05) + with peak_lock: + peak["current"] -= 1 + return {"stars": 0, "default_branch": "main", + "last_commit_iso": "", "last_commit_date": ""} + + self.sm._get_github_repo_info = slow_repo + self.sm._get_latest_commit_info = lambda *a, **k: None + self.sm._fetch_manifest_from_github = lambda *a, **k: None + + t0 = time.time() + results = self.sm.search_plugins(fetch_commit_info=False, include_saved_repos=False) + elapsed = time.time() - t0 + + self.assertEqual(len(results), 5) + # Serial would be ~250ms; parallel should be well under 200ms. + self.assertLess(elapsed, 0.2, + f"search_plugins took {elapsed:.3f}s — appears to have regressed to serial") + self.assertGreaterEqual(peak["count"], 2, + "no concurrent fetches observed — thread pool not engaging") + + +class TestStaleOnErrorFallbacks(unittest.TestCase): + """When GitHub is unreachable, previously-cached values should still be + returned rather than zero/None. Important on Pi's WiFi links. + """ + + def setUp(self): + self._tmp = TemporaryDirectory() + self.addCleanup(self._tmp.cleanup) + self.sm = PluginStoreManager(plugins_dir=self._tmp.name) + + def test_repo_info_stale_on_network_error(self): + cache_key = "owner/repo" + good = {"stars": 42, "default_branch": "main", + "last_commit_iso": "", "last_commit_date": "", + "forks": 0, "open_issues": 0, "updated_at_iso": "", + "language": "", "license": ""} + # Seed the cache with a known-good value, then force expiry. + self.sm.github_cache[cache_key] = (time.time() - 10_000, good) + self.sm.cache_timeout = 1 # force re-fetch + + import requests as real_requests + with patch("src.plugin_system.store_manager.requests.get", + side_effect=real_requests.ConnectionError("boom")): + result = self.sm._get_github_repo_info("https://github.com/owner/repo") + self.assertEqual(result["stars"], 42) + + def test_commit_info_stale_on_network_error(self): + cache_key = "owner/repo:main" + good = {"branch": "main", "sha": "a" * 40, "short_sha": "aaaaaaa", + "date_iso": "2026-04-08T00:00:00Z", "date": "2026-04-08", + "author": "x", "message": "y"} + self.sm.commit_info_cache[cache_key] = (time.time() - 10_000, good) + self.sm.commit_cache_timeout = 1 # force re-fetch + + import requests as real_requests + with patch("src.plugin_system.store_manager.requests.get", + side_effect=real_requests.ConnectionError("boom")): + result = self.sm._get_latest_commit_info( + "https://github.com/owner/repo", branch="main" + ) + self.assertIsNotNone(result) + self.assertEqual(result["short_sha"], "aaaaaaa") + + class TestRegistryStaleCacheFallback(unittest.TestCase): def setUp(self): self._tmp = TemporaryDirectory()