perf(plugins): parallelize Plugin Store browse and extend metadata cache TTLs

Follow-up to the previous commit addressing the Plugin Store browse path
specifically. Most users install plugins via the store (ZIP extraction,
no .git directory) so the git-info mtime cache from the previous commit
was a no-op for them; their pain was coming from /plugins/store/list.

Root cause. search_plugins() enriched each returned plugin with three
serial GitHub fetches: _get_github_repo_info (repo API), _get_latest_commit_info
(commits API), _fetch_manifest_from_github (raw.githubusercontent.com).
Fifteen plugins × three requests × serial HTTP = 30–45 sequential round
trips on every cold browse. On a Pi4 over WiFi that translated directly
into the "connecting to display" hang users reported. The commit and
manifest caches had a 5-minute TTL, so even a brief absence re-paid the
full cost.

Changes.

- ``search_plugins``: fan out per-plugin enrichment through a
  ``ThreadPoolExecutor`` (max 10 workers, stays well under unauthenticated
  GitHub rate limits). Apply category/tag/query filters before enrichment
  so we never waste requests on plugins that will be filtered out.
  ``executor.map`` preserves input order, which the UI depends on.
- ``commit_cache_timeout`` and ``manifest_cache_timeout``: 5 min → 30 min.
  Keeps the cache warm across a realistic session while still picking up
  upstream updates in a reasonable window.
- ``_get_github_repo_info`` and ``_get_latest_commit_info``: stale-on-error
  fallback. On a network failure or a 403 we now prefer a previously-
  cached value over the zero-default, matching the pattern already in
  ``fetch_registry``. Flaky Pi WiFi no longer causes star counts to flip
  to 0 and commit info to disappear.

Tests (5 new in test_store_manager_caches.py).

- ``test_results_preserve_registry_order`` — the parallel map must still
  return plugins in input order.
- ``test_filters_applied_before_enrichment`` — category/tag/query filters
  run first so we don't waste HTTP calls.
- ``test_enrichment_runs_concurrently`` — peak-concurrency check plus a
  wall-time bound that would fail if the code regressed to serial.
- ``test_repo_info_stale_on_network_error`` — repo info falls back to
  stale cache on RequestException.
- ``test_commit_info_stale_on_network_error`` — commit info falls back to
  stale cache on RequestException.

All 29 tests (16 reconciliation, 13 store_manager caches) pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Chuck
2026-04-08 10:57:12 -04:00
parent b031b99130
commit c03eb8dbcc
2 changed files with 263 additions and 46 deletions

View File

@@ -14,6 +14,7 @@ import zipfile
import tempfile
import requests
import time
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Optional, Any
@@ -52,7 +53,7 @@ class PluginStoreManager:
self.registry_cache = None
self.registry_cache_time = None # Timestamp of when registry was cached
self.github_cache = {} # Cache for GitHub API responses
self.cache_timeout = 3600 # 1 hour cache timeout
self.cache_timeout = 3600 # 1 hour cache timeout (repo info: stars, default_branch)
# 15 minutes for registry cache. Long enough that the plugin list
# endpoint on a warm cache never hits the network, short enough that
# new plugins show up within a reasonable window. See also the
@@ -60,9 +61,15 @@ class PluginStoreManager:
# failures.
self.registry_cache_timeout = 900
self.commit_info_cache = {} # Cache for latest commit info: {key: (timestamp, data)}
self.commit_cache_timeout = 300 # 5 minutes (same as registry)
# 30 minutes for commit/manifest caches. Plugin Store users browse
# the catalog via /plugins/store/list which fetches commit info and
# manifest data per plugin. 5-min TTLs meant every fresh browse on
# a Pi4 paid for ~3 HTTP requests × N plugins (30-60s serial). 30
# minutes keeps the cache warm across a realistic session while
# still picking up upstream updates within a reasonable window.
self.commit_cache_timeout = 1800
self.manifest_cache = {} # Cache for GitHub manifest fetches: {key: (timestamp, data)}
self.manifest_cache_timeout = 300 # 5 minutes
self.manifest_cache_timeout = 1800
self.github_token = self._load_github_token()
self._token_validation_cache = {} # Cache for token validation results: {token: (is_valid, timestamp, error_message)}
self._token_validation_cache_timeout = 300 # 5 minutes cache for token validation
@@ -342,7 +349,21 @@ class PluginStoreManager:
if self.github_token:
headers['Authorization'] = f'token {self.github_token}'
response = requests.get(api_url, headers=headers, timeout=10)
try:
response = requests.get(api_url, headers=headers, timeout=10)
except requests.RequestException as req_err:
# Network error: prefer a stale cache hit over an
# empty default so the UI keeps working on a flaky
# Pi WiFi link.
if cache_key in self.github_cache:
_, stale = self.github_cache[cache_key]
self.logger.warning(
"GitHub repo info fetch failed for %s (%s); serving stale cache.",
cache_key, req_err,
)
return stale
raise
if response.status_code == 200:
data = response.json()
pushed_at = data.get('pushed_at', '') or data.get('updated_at', '')
@@ -362,7 +383,16 @@ class PluginStoreManager:
self.github_cache[cache_key] = (time.time(), repo_info)
return repo_info
elif response.status_code == 403:
# Rate limit or authentication issue
# Rate limit or authentication issue. If we have a
# previously-cached value, serve it rather than
# returning empty defaults — a stale star count is
# better than a reset to zero.
if cache_key in self.github_cache:
_, stale = self.github_cache[cache_key]
self.logger.warning(
"GitHub API 403 for %s; serving stale cache.", cache_key,
)
return stale
if not self.github_token:
self.logger.warning(
f"GitHub API rate limit likely exceeded (403). "
@@ -376,6 +406,9 @@ class PluginStoreManager:
)
else:
self.logger.warning(f"GitHub API request failed: {response.status_code} for {api_url}")
if cache_key in self.github_cache:
_, stale = self.github_cache[cache_key]
return stale
return {
'stars': 0,
@@ -558,68 +591,91 @@ class PluginStoreManager:
except Exception as e:
self.logger.warning(f"Failed to fetch plugins from saved repository {repo_url}: {e}")
results = []
# First pass: apply cheap filters (category/tags/query) so we only
# fetch GitHub metadata for plugins that will actually be returned.
filtered: List[Dict] = []
for plugin in plugins:
# Category filter
if category and plugin.get('category') != category:
continue
# Tags filter (match any tag)
if tags and not any(tag in plugin.get('tags', []) for tag in tags):
continue
# Query search (case-insensitive)
if query:
query_lower = query.lower()
searchable_text = ' '.join([
plugin.get('name', ''),
plugin.get('description', ''),
plugin.get('id', ''),
plugin.get('author', '')
plugin.get('author', ''),
]).lower()
if query_lower not in searchable_text:
continue
filtered.append(plugin)
# Enhance plugin data with GitHub metadata
def _enrich(plugin: Dict) -> Dict:
"""Enrich a single plugin with GitHub metadata.
Called concurrently from a ThreadPoolExecutor. Each underlying
HTTP helper (``_get_github_repo_info`` / ``_get_latest_commit_info``
/ ``_fetch_manifest_from_github``) is thread-safe — they use
``requests`` and write their own cache keys on Python dicts,
which is atomic under the GIL for single-key assignments.
"""
enhanced_plugin = plugin.copy()
# Get real GitHub stars
repo_url = plugin.get('repo', '')
if repo_url:
github_info = self._get_github_repo_info(repo_url)
enhanced_plugin['stars'] = github_info.get('stars', plugin.get('stars', 0))
enhanced_plugin['default_branch'] = github_info.get('default_branch', plugin.get('branch', 'main'))
enhanced_plugin['last_updated_iso'] = github_info.get('last_commit_iso')
enhanced_plugin['last_updated'] = github_info.get('last_commit_date')
if not repo_url:
return enhanced_plugin
if fetch_commit_info:
branch = plugin.get('branch') or github_info.get('default_branch', 'main')
github_info = self._get_github_repo_info(repo_url)
enhanced_plugin['stars'] = github_info.get('stars', plugin.get('stars', 0))
enhanced_plugin['default_branch'] = github_info.get('default_branch', plugin.get('branch', 'main'))
enhanced_plugin['last_updated_iso'] = github_info.get('last_commit_iso')
enhanced_plugin['last_updated'] = github_info.get('last_commit_date')
commit_info = self._get_latest_commit_info(repo_url, branch)
if commit_info:
enhanced_plugin['last_commit'] = commit_info.get('short_sha')
enhanced_plugin['last_commit_sha'] = commit_info.get('sha')
enhanced_plugin['last_updated'] = commit_info.get('date') or enhanced_plugin.get('last_updated')
enhanced_plugin['last_updated_iso'] = commit_info.get('date_iso') or enhanced_plugin.get('last_updated_iso')
enhanced_plugin['last_commit_message'] = commit_info.get('message')
enhanced_plugin['last_commit_author'] = commit_info.get('author')
enhanced_plugin['branch'] = commit_info.get('branch', branch)
enhanced_plugin['last_commit_branch'] = commit_info.get('branch')
if fetch_commit_info:
branch = plugin.get('branch') or github_info.get('default_branch', 'main')
# Fetch manifest from GitHub for additional metadata (description, etc.)
plugin_subpath = plugin.get('plugin_path', '')
manifest_rel = f"{plugin_subpath}/manifest.json" if plugin_subpath else "manifest.json"
github_manifest = self._fetch_manifest_from_github(repo_url, branch, manifest_rel)
if github_manifest:
if 'last_updated' in github_manifest and not enhanced_plugin.get('last_updated'):
enhanced_plugin['last_updated'] = github_manifest['last_updated']
if 'description' in github_manifest:
enhanced_plugin['description'] = github_manifest['description']
commit_info = self._get_latest_commit_info(repo_url, branch)
if commit_info:
enhanced_plugin['last_commit'] = commit_info.get('short_sha')
enhanced_plugin['last_commit_sha'] = commit_info.get('sha')
enhanced_plugin['last_updated'] = commit_info.get('date') or enhanced_plugin.get('last_updated')
enhanced_plugin['last_updated_iso'] = commit_info.get('date_iso') or enhanced_plugin.get('last_updated_iso')
enhanced_plugin['last_commit_message'] = commit_info.get('message')
enhanced_plugin['last_commit_author'] = commit_info.get('author')
enhanced_plugin['branch'] = commit_info.get('branch', branch)
enhanced_plugin['last_commit_branch'] = commit_info.get('branch')
results.append(enhanced_plugin)
plugin_subpath = plugin.get('plugin_path', '')
manifest_rel = f"{plugin_subpath}/manifest.json" if plugin_subpath else "manifest.json"
github_manifest = self._fetch_manifest_from_github(repo_url, branch, manifest_rel)
if github_manifest:
if 'last_updated' in github_manifest and not enhanced_plugin.get('last_updated'):
enhanced_plugin['last_updated'] = github_manifest['last_updated']
if 'description' in github_manifest:
enhanced_plugin['description'] = github_manifest['description']
return results
return enhanced_plugin
# Fan out the per-plugin GitHub enrichment. The previous
# implementation did this serially, which on a Pi4 with ~15 plugins
# and a fresh cache meant 30+ HTTP requests in strict sequence (the
# "connecting to display" hang reported by users). With a thread
# pool, latency is dominated by the slowest request rather than
# their sum. Workers capped at 10 to stay well under the
# unauthenticated GitHub rate limit burst and avoid overwhelming a
# Pi's WiFi link. For a small number of plugins the pool is
# essentially free.
if not filtered:
return []
if len(filtered) == 1 or not fetch_commit_info and len(filtered) < 4:
# Not worth the pool overhead for tiny workloads.
return [_enrich(p) for p in filtered]
max_workers = min(10, len(filtered))
with ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix='plugin-search') as executor:
# executor.map preserves input order, which the UI relies on.
return list(executor.map(_enrich, filtered))
def _fetch_manifest_from_github(self, repo_url: str, branch: str = "master", manifest_path: str = "manifest.json", force_refresh: bool = False) -> Optional[Dict]:
"""
@@ -717,7 +773,22 @@ class PluginStoreManager:
last_error = None
for branch_name in branches_to_try:
api_url = f"https://api.github.com/repos/{owner}/{repo}/commits/{branch_name}"
response = requests.get(api_url, headers=headers, timeout=10)
try:
response = requests.get(api_url, headers=headers, timeout=10)
except requests.RequestException as req_err:
# Network failure: fall back to a stale cache hit if
# available so the plugin store UI keeps populating
# commit info on a flaky WiFi link.
if cache_key in self.commit_info_cache:
_, stale = self.commit_info_cache[cache_key]
if stale is not None:
self.logger.warning(
"GitHub commit fetch failed for %s (%s); serving stale cache.",
cache_key, req_err,
)
return stale
last_error = str(req_err)
continue
if response.status_code == 200:
commit_data = response.json()
commit_sha_full = commit_data.get('sha', '')