perf(plugins): parallelize Plugin Store browse and extend metadata cache TTLs

Follow-up to the previous commit addressing the Plugin Store browse path
specifically. Most users install plugins via the store (ZIP extraction,
no .git directory) so the git-info mtime cache from the previous commit
was a no-op for them; their pain was coming from /plugins/store/list.

Root cause. search_plugins() enriched each returned plugin with three
serial GitHub fetches: _get_github_repo_info (repo API), _get_latest_commit_info
(commits API), _fetch_manifest_from_github (raw.githubusercontent.com).
Fifteen plugins × three requests × serial HTTP = 30–45 sequential round
trips on every cold browse. On a Pi4 over WiFi that translated directly
into the "connecting to display" hang users reported. The commit and
manifest caches had a 5-minute TTL, so even a brief absence re-paid the
full cost.

Changes.

- ``search_plugins``: fan out per-plugin enrichment through a
  ``ThreadPoolExecutor`` (max 10 workers, stays well under unauthenticated
  GitHub rate limits). Apply category/tag/query filters before enrichment
  so we never waste requests on plugins that will be filtered out.
  ``executor.map`` preserves input order, which the UI depends on.
- ``commit_cache_timeout`` and ``manifest_cache_timeout``: 5 min → 30 min.
  Keeps the cache warm across a realistic session while still picking up
  upstream updates in a reasonable window.
- ``_get_github_repo_info`` and ``_get_latest_commit_info``: stale-on-error
  fallback. On a network failure or a 403 we now prefer a previously-
  cached value over the zero-default, matching the pattern already in
  ``fetch_registry``. Flaky Pi WiFi no longer causes star counts to flip
  to 0 and commit info to disappear.

Tests (5 new in test_store_manager_caches.py).

- ``test_results_preserve_registry_order`` — the parallel map must still
  return plugins in input order.
- ``test_filters_applied_before_enrichment`` — category/tag/query filters
  run first so we don't waste HTTP calls.
- ``test_enrichment_runs_concurrently`` — peak-concurrency check plus a
  wall-time bound that would fail if the code regressed to serial.
- ``test_repo_info_stale_on_network_error`` — repo info falls back to
  stale cache on RequestException.
- ``test_commit_info_stale_on_network_error`` — commit info falls back to
  stale cache on RequestException.

All 29 tests (16 reconciliation, 13 store_manager caches) pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Chuck
2026-04-08 10:57:12 -04:00
parent b031b99130
commit c03eb8dbcc
2 changed files with 263 additions and 46 deletions

View File

@@ -14,6 +14,7 @@ import zipfile
import tempfile import tempfile
import requests import requests
import time import time
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import List, Dict, Optional, Any from typing import List, Dict, Optional, Any
@@ -52,7 +53,7 @@ class PluginStoreManager:
self.registry_cache = None self.registry_cache = None
self.registry_cache_time = None # Timestamp of when registry was cached self.registry_cache_time = None # Timestamp of when registry was cached
self.github_cache = {} # Cache for GitHub API responses self.github_cache = {} # Cache for GitHub API responses
self.cache_timeout = 3600 # 1 hour cache timeout self.cache_timeout = 3600 # 1 hour cache timeout (repo info: stars, default_branch)
# 15 minutes for registry cache. Long enough that the plugin list # 15 minutes for registry cache. Long enough that the plugin list
# endpoint on a warm cache never hits the network, short enough that # endpoint on a warm cache never hits the network, short enough that
# new plugins show up within a reasonable window. See also the # new plugins show up within a reasonable window. See also the
@@ -60,9 +61,15 @@ class PluginStoreManager:
# failures. # failures.
self.registry_cache_timeout = 900 self.registry_cache_timeout = 900
self.commit_info_cache = {} # Cache for latest commit info: {key: (timestamp, data)} self.commit_info_cache = {} # Cache for latest commit info: {key: (timestamp, data)}
self.commit_cache_timeout = 300 # 5 minutes (same as registry) # 30 minutes for commit/manifest caches. Plugin Store users browse
# the catalog via /plugins/store/list which fetches commit info and
# manifest data per plugin. 5-min TTLs meant every fresh browse on
# a Pi4 paid for ~3 HTTP requests × N plugins (30-60s serial). 30
# minutes keeps the cache warm across a realistic session while
# still picking up upstream updates within a reasonable window.
self.commit_cache_timeout = 1800
self.manifest_cache = {} # Cache for GitHub manifest fetches: {key: (timestamp, data)} self.manifest_cache = {} # Cache for GitHub manifest fetches: {key: (timestamp, data)}
self.manifest_cache_timeout = 300 # 5 minutes self.manifest_cache_timeout = 1800
self.github_token = self._load_github_token() self.github_token = self._load_github_token()
self._token_validation_cache = {} # Cache for token validation results: {token: (is_valid, timestamp, error_message)} self._token_validation_cache = {} # Cache for token validation results: {token: (is_valid, timestamp, error_message)}
self._token_validation_cache_timeout = 300 # 5 minutes cache for token validation self._token_validation_cache_timeout = 300 # 5 minutes cache for token validation
@@ -342,7 +349,21 @@ class PluginStoreManager:
if self.github_token: if self.github_token:
headers['Authorization'] = f'token {self.github_token}' headers['Authorization'] = f'token {self.github_token}'
response = requests.get(api_url, headers=headers, timeout=10) try:
response = requests.get(api_url, headers=headers, timeout=10)
except requests.RequestException as req_err:
# Network error: prefer a stale cache hit over an
# empty default so the UI keeps working on a flaky
# Pi WiFi link.
if cache_key in self.github_cache:
_, stale = self.github_cache[cache_key]
self.logger.warning(
"GitHub repo info fetch failed for %s (%s); serving stale cache.",
cache_key, req_err,
)
return stale
raise
if response.status_code == 200: if response.status_code == 200:
data = response.json() data = response.json()
pushed_at = data.get('pushed_at', '') or data.get('updated_at', '') pushed_at = data.get('pushed_at', '') or data.get('updated_at', '')
@@ -362,7 +383,16 @@ class PluginStoreManager:
self.github_cache[cache_key] = (time.time(), repo_info) self.github_cache[cache_key] = (time.time(), repo_info)
return repo_info return repo_info
elif response.status_code == 403: elif response.status_code == 403:
# Rate limit or authentication issue # Rate limit or authentication issue. If we have a
# previously-cached value, serve it rather than
# returning empty defaults — a stale star count is
# better than a reset to zero.
if cache_key in self.github_cache:
_, stale = self.github_cache[cache_key]
self.logger.warning(
"GitHub API 403 for %s; serving stale cache.", cache_key,
)
return stale
if not self.github_token: if not self.github_token:
self.logger.warning( self.logger.warning(
f"GitHub API rate limit likely exceeded (403). " f"GitHub API rate limit likely exceeded (403). "
@@ -376,6 +406,9 @@ class PluginStoreManager:
) )
else: else:
self.logger.warning(f"GitHub API request failed: {response.status_code} for {api_url}") self.logger.warning(f"GitHub API request failed: {response.status_code} for {api_url}")
if cache_key in self.github_cache:
_, stale = self.github_cache[cache_key]
return stale
return { return {
'stars': 0, 'stars': 0,
@@ -558,68 +591,91 @@ class PluginStoreManager:
except Exception as e: except Exception as e:
self.logger.warning(f"Failed to fetch plugins from saved repository {repo_url}: {e}") self.logger.warning(f"Failed to fetch plugins from saved repository {repo_url}: {e}")
results = [] # First pass: apply cheap filters (category/tags/query) so we only
# fetch GitHub metadata for plugins that will actually be returned.
filtered: List[Dict] = []
for plugin in plugins: for plugin in plugins:
# Category filter
if category and plugin.get('category') != category: if category and plugin.get('category') != category:
continue continue
# Tags filter (match any tag)
if tags and not any(tag in plugin.get('tags', []) for tag in tags): if tags and not any(tag in plugin.get('tags', []) for tag in tags):
continue continue
# Query search (case-insensitive)
if query: if query:
query_lower = query.lower() query_lower = query.lower()
searchable_text = ' '.join([ searchable_text = ' '.join([
plugin.get('name', ''), plugin.get('name', ''),
plugin.get('description', ''), plugin.get('description', ''),
plugin.get('id', ''), plugin.get('id', ''),
plugin.get('author', '') plugin.get('author', ''),
]).lower() ]).lower()
if query_lower not in searchable_text: if query_lower not in searchable_text:
continue continue
filtered.append(plugin)
# Enhance plugin data with GitHub metadata def _enrich(plugin: Dict) -> Dict:
"""Enrich a single plugin with GitHub metadata.
Called concurrently from a ThreadPoolExecutor. Each underlying
HTTP helper (``_get_github_repo_info`` / ``_get_latest_commit_info``
/ ``_fetch_manifest_from_github``) is thread-safe — they use
``requests`` and write their own cache keys on Python dicts,
which is atomic under the GIL for single-key assignments.
"""
enhanced_plugin = plugin.copy() enhanced_plugin = plugin.copy()
# Get real GitHub stars
repo_url = plugin.get('repo', '') repo_url = plugin.get('repo', '')
if repo_url: if not repo_url:
github_info = self._get_github_repo_info(repo_url) return enhanced_plugin
enhanced_plugin['stars'] = github_info.get('stars', plugin.get('stars', 0))
enhanced_plugin['default_branch'] = github_info.get('default_branch', plugin.get('branch', 'main'))
enhanced_plugin['last_updated_iso'] = github_info.get('last_commit_iso')
enhanced_plugin['last_updated'] = github_info.get('last_commit_date')
if fetch_commit_info: github_info = self._get_github_repo_info(repo_url)
branch = plugin.get('branch') or github_info.get('default_branch', 'main') enhanced_plugin['stars'] = github_info.get('stars', plugin.get('stars', 0))
enhanced_plugin['default_branch'] = github_info.get('default_branch', plugin.get('branch', 'main'))
enhanced_plugin['last_updated_iso'] = github_info.get('last_commit_iso')
enhanced_plugin['last_updated'] = github_info.get('last_commit_date')
commit_info = self._get_latest_commit_info(repo_url, branch) if fetch_commit_info:
if commit_info: branch = plugin.get('branch') or github_info.get('default_branch', 'main')
enhanced_plugin['last_commit'] = commit_info.get('short_sha')
enhanced_plugin['last_commit_sha'] = commit_info.get('sha')
enhanced_plugin['last_updated'] = commit_info.get('date') or enhanced_plugin.get('last_updated')
enhanced_plugin['last_updated_iso'] = commit_info.get('date_iso') or enhanced_plugin.get('last_updated_iso')
enhanced_plugin['last_commit_message'] = commit_info.get('message')
enhanced_plugin['last_commit_author'] = commit_info.get('author')
enhanced_plugin['branch'] = commit_info.get('branch', branch)
enhanced_plugin['last_commit_branch'] = commit_info.get('branch')
# Fetch manifest from GitHub for additional metadata (description, etc.) commit_info = self._get_latest_commit_info(repo_url, branch)
plugin_subpath = plugin.get('plugin_path', '') if commit_info:
manifest_rel = f"{plugin_subpath}/manifest.json" if plugin_subpath else "manifest.json" enhanced_plugin['last_commit'] = commit_info.get('short_sha')
github_manifest = self._fetch_manifest_from_github(repo_url, branch, manifest_rel) enhanced_plugin['last_commit_sha'] = commit_info.get('sha')
if github_manifest: enhanced_plugin['last_updated'] = commit_info.get('date') or enhanced_plugin.get('last_updated')
if 'last_updated' in github_manifest and not enhanced_plugin.get('last_updated'): enhanced_plugin['last_updated_iso'] = commit_info.get('date_iso') or enhanced_plugin.get('last_updated_iso')
enhanced_plugin['last_updated'] = github_manifest['last_updated'] enhanced_plugin['last_commit_message'] = commit_info.get('message')
if 'description' in github_manifest: enhanced_plugin['last_commit_author'] = commit_info.get('author')
enhanced_plugin['description'] = github_manifest['description'] enhanced_plugin['branch'] = commit_info.get('branch', branch)
enhanced_plugin['last_commit_branch'] = commit_info.get('branch')
results.append(enhanced_plugin) plugin_subpath = plugin.get('plugin_path', '')
manifest_rel = f"{plugin_subpath}/manifest.json" if plugin_subpath else "manifest.json"
github_manifest = self._fetch_manifest_from_github(repo_url, branch, manifest_rel)
if github_manifest:
if 'last_updated' in github_manifest and not enhanced_plugin.get('last_updated'):
enhanced_plugin['last_updated'] = github_manifest['last_updated']
if 'description' in github_manifest:
enhanced_plugin['description'] = github_manifest['description']
return results return enhanced_plugin
# Fan out the per-plugin GitHub enrichment. The previous
# implementation did this serially, which on a Pi4 with ~15 plugins
# and a fresh cache meant 30+ HTTP requests in strict sequence (the
# "connecting to display" hang reported by users). With a thread
# pool, latency is dominated by the slowest request rather than
# their sum. Workers capped at 10 to stay well under the
# unauthenticated GitHub rate limit burst and avoid overwhelming a
# Pi's WiFi link. For a small number of plugins the pool is
# essentially free.
if not filtered:
return []
if len(filtered) == 1 or not fetch_commit_info and len(filtered) < 4:
# Not worth the pool overhead for tiny workloads.
return [_enrich(p) for p in filtered]
max_workers = min(10, len(filtered))
with ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix='plugin-search') as executor:
# executor.map preserves input order, which the UI relies on.
return list(executor.map(_enrich, filtered))
def _fetch_manifest_from_github(self, repo_url: str, branch: str = "master", manifest_path: str = "manifest.json", force_refresh: bool = False) -> Optional[Dict]: def _fetch_manifest_from_github(self, repo_url: str, branch: str = "master", manifest_path: str = "manifest.json", force_refresh: bool = False) -> Optional[Dict]:
""" """
@@ -717,7 +773,22 @@ class PluginStoreManager:
last_error = None last_error = None
for branch_name in branches_to_try: for branch_name in branches_to_try:
api_url = f"https://api.github.com/repos/{owner}/{repo}/commits/{branch_name}" api_url = f"https://api.github.com/repos/{owner}/{repo}/commits/{branch_name}"
response = requests.get(api_url, headers=headers, timeout=10) try:
response = requests.get(api_url, headers=headers, timeout=10)
except requests.RequestException as req_err:
# Network failure: fall back to a stale cache hit if
# available so the plugin store UI keeps populating
# commit info on a flaky WiFi link.
if cache_key in self.commit_info_cache:
_, stale = self.commit_info_cache[cache_key]
if stale is not None:
self.logger.warning(
"GitHub commit fetch failed for %s (%s); serving stale cache.",
cache_key, req_err,
)
return stale
last_error = str(req_err)
continue
if response.status_code == 200: if response.status_code == 200:
commit_data = response.json() commit_data = response.json()
commit_sha_full = commit_data.get('sha', '') commit_sha_full = commit_data.get('sha', '')

View File

@@ -113,6 +113,152 @@ class TestGitInfoCache(unittest.TestCase):
self.assertIsNone(self.sm._get_local_git_info(non_git)) self.assertIsNone(self.sm._get_local_git_info(non_git))
class TestSearchPluginsParallel(unittest.TestCase):
"""Plugin Store browse path — the per-plugin GitHub enrichment used to
run serially, turning a browse of 15 plugins into 3045 sequential HTTP
requests on a cold cache. This batch of tests locks in the parallel
fan-out and verifies output shape/ordering haven't regressed.
"""
def setUp(self):
self._tmp = TemporaryDirectory()
self.addCleanup(self._tmp.cleanup)
self.sm = PluginStoreManager(plugins_dir=self._tmp.name)
# Fake registry with 5 plugins.
self.registry = {
"plugins": [
{"id": f"plg{i}", "name": f"Plugin {i}",
"repo": f"https://github.com/owner/plg{i}", "category": "util"}
for i in range(5)
]
}
self.sm.registry_cache = self.registry
self.sm.registry_cache_time = time.time()
self._enrich_calls = []
def fake_repo(repo_url):
self._enrich_calls.append(("repo", repo_url))
return {"stars": 1, "default_branch": "main",
"last_commit_iso": "2026-04-08T00:00:00Z",
"last_commit_date": "2026-04-08"}
def fake_commit(repo_url, branch):
self._enrich_calls.append(("commit", repo_url, branch))
return {"short_sha": "abc1234", "sha": "abc1234" + "0" * 33,
"date_iso": "2026-04-08T00:00:00Z", "date": "2026-04-08",
"message": "m", "author": "a", "branch": branch}
def fake_manifest(repo_url, branch, manifest_path):
self._enrich_calls.append(("manifest", repo_url, branch))
return {"description": "desc"}
self.sm._get_github_repo_info = fake_repo
self.sm._get_latest_commit_info = fake_commit
self.sm._fetch_manifest_from_github = fake_manifest
def test_results_preserve_registry_order(self):
results = self.sm.search_plugins(include_saved_repos=False)
self.assertEqual([p["id"] for p in results],
[f"plg{i}" for i in range(5)])
def test_filters_applied_before_enrichment(self):
# Filter down to a single plugin via category — ensures we don't
# waste GitHub calls enriching plugins that won't be returned.
self.registry["plugins"][2]["category"] = "special"
self.sm.registry_cache = self.registry
self._enrich_calls.clear()
results = self.sm.search_plugins(category="special", include_saved_repos=False)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]["id"], "plg2")
# Only one plugin should have been enriched.
repo_calls = [c for c in self._enrich_calls if c[0] == "repo"]
self.assertEqual(len(repo_calls), 1)
def test_enrichment_runs_concurrently(self):
"""Verify the thread pool actually runs fetches in parallel.
We make each fake fetch sleep briefly and check wall time — if the
code had regressed to serial execution, 5 plugins × ~60ms sleep
would be ≥ 300ms. Parallel should be well under that.
"""
import threading
concurrent_workers = []
peak_lock = threading.Lock()
peak = {"count": 0, "current": 0}
def slow_repo(repo_url):
with peak_lock:
peak["current"] += 1
if peak["current"] > peak["count"]:
peak["count"] = peak["current"]
time.sleep(0.05)
with peak_lock:
peak["current"] -= 1
return {"stars": 0, "default_branch": "main",
"last_commit_iso": "", "last_commit_date": ""}
self.sm._get_github_repo_info = slow_repo
self.sm._get_latest_commit_info = lambda *a, **k: None
self.sm._fetch_manifest_from_github = lambda *a, **k: None
t0 = time.time()
results = self.sm.search_plugins(fetch_commit_info=False, include_saved_repos=False)
elapsed = time.time() - t0
self.assertEqual(len(results), 5)
# Serial would be ~250ms; parallel should be well under 200ms.
self.assertLess(elapsed, 0.2,
f"search_plugins took {elapsed:.3f}s — appears to have regressed to serial")
self.assertGreaterEqual(peak["count"], 2,
"no concurrent fetches observed — thread pool not engaging")
class TestStaleOnErrorFallbacks(unittest.TestCase):
"""When GitHub is unreachable, previously-cached values should still be
returned rather than zero/None. Important on Pi's WiFi links.
"""
def setUp(self):
self._tmp = TemporaryDirectory()
self.addCleanup(self._tmp.cleanup)
self.sm = PluginStoreManager(plugins_dir=self._tmp.name)
def test_repo_info_stale_on_network_error(self):
cache_key = "owner/repo"
good = {"stars": 42, "default_branch": "main",
"last_commit_iso": "", "last_commit_date": "",
"forks": 0, "open_issues": 0, "updated_at_iso": "",
"language": "", "license": ""}
# Seed the cache with a known-good value, then force expiry.
self.sm.github_cache[cache_key] = (time.time() - 10_000, good)
self.sm.cache_timeout = 1 # force re-fetch
import requests as real_requests
with patch("src.plugin_system.store_manager.requests.get",
side_effect=real_requests.ConnectionError("boom")):
result = self.sm._get_github_repo_info("https://github.com/owner/repo")
self.assertEqual(result["stars"], 42)
def test_commit_info_stale_on_network_error(self):
cache_key = "owner/repo:main"
good = {"branch": "main", "sha": "a" * 40, "short_sha": "aaaaaaa",
"date_iso": "2026-04-08T00:00:00Z", "date": "2026-04-08",
"author": "x", "message": "y"}
self.sm.commit_info_cache[cache_key] = (time.time() - 10_000, good)
self.sm.commit_cache_timeout = 1 # force re-fetch
import requests as real_requests
with patch("src.plugin_system.store_manager.requests.get",
side_effect=real_requests.ConnectionError("boom")):
result = self.sm._get_latest_commit_info(
"https://github.com/owner/repo", branch="main"
)
self.assertIsNotNone(result)
self.assertEqual(result["short_sha"], "aaaaaaa")
class TestRegistryStaleCacheFallback(unittest.TestCase): class TestRegistryStaleCacheFallback(unittest.TestCase):
def setUp(self): def setUp(self):
self._tmp = TemporaryDirectory() self._tmp = TemporaryDirectory()