bench_cli.py
python
sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7
fix: repair syntax errors from typing annotation cleanup
Sonnet 4.6
20 days ago
| 1 | """MuseWire CLI benchmark — times actual muse CLI commands against localhost and staging. |
| 2 | |
| 3 | Every operation uses the muse CLI. No HTTP clients. No internal imports. |
| 4 | |
| 5 | Usage: |
| 6 | python3 tests/bench_cli.py --size xs |
| 7 | python3 tests/bench_cli.py --size xs s m --hubs localhost staging |
| 8 | python3 tests/bench_cli.py --size all --runs 3 |
| 9 | """ |
| 10 | from __future__ import annotations |
| 11 | |
| 12 | import argparse |
| 13 | import itertools |
| 14 | import json |
| 15 | import math |
| 16 | import os |
| 17 | import re |
| 18 | import shutil |
| 19 | import statistics |
| 20 | import subprocess |
| 21 | import sys |
| 22 | import tempfile |
| 23 | import time |
| 24 | from pathlib import Path |
| 25 | |
| 26 | from mnemonic import Mnemonic |
| 27 | |
| 28 | import datetime |
| 29 | |
| 30 | from muse.core import transport, mpack |
| 31 | from muse.core.object_store import write_object, _created_object_shards |
| 32 | from muse.core.paths import muse_dir, server_objects_dir # noqa: F401 |
| 33 | from muse.core.snapshot import compute_commit_id, compute_snapshot_id |
| 34 | from muse.core.commits import CommitRecord, write_commit |
| 35 | from muse.core.refs import write_branch_ref |
| 36 | from muse.core.snapshots import SnapshotRecord, write_snapshot |
| 37 | from muse.core.types import blob_id, hash_file |
| 38 | import musehub.services.musehub_wire as musehub_wire |
| 39 | |
| 40 | REPO_ROOT = Path(__file__).parent.parent |
| 41 | LOCALHOST = "https://localhost:1337" |
| 42 | STAGING = "https://staging.musehub.ai" |
| 43 | HUB_URLS = {"localhost": LOCALHOST, "staging": STAGING} |
| 44 | BENCH_PREFIX = "bench-" |
| 45 | SEED_PREFIX = "bench-seed-" # persistent — never auto-purged |
| 46 | CACHE_DIR = Path.home() / ".cache" / "muse_bench" |
| 47 | |
| 48 | # Repos that must never be deleted under any circumstances. |
| 49 | _PERMANENT_REPOS: frozenset[str] = frozenset({ |
| 50 | "muse", "musehub", "agentception", "contracts", |
| 51 | "stori", "Stori", "maestro", "muse-zsh", "identity", |
| 52 | }) |
| 53 | |
| 54 | # Exact pattern a transient bench repo name must match before deletion is allowed. |
| 55 | # Covers all historical naming conventions: |
| 56 | # bench-push-xs-0-abc123 (current: integer run index) |
| 57 | # bench-push-xs-p2-abc123 (old: letter-prefixed run index) |
| 58 | # bench-fetch-xs-abc123 (old: no run index, hex only) |
| 59 | # bench-clone-xs-debug-abc123 (ad-hoc debug runs) |
| 60 | # bench-reftest-abc123 (ref-test repos) |
| 61 | # Also covers ad-hoc debug/diag repos created during debugging sessions. |
| 62 | _TRANSIENT_RE = re.compile( |
| 63 | r"^bench-(push|clone|fetch|pull)-[a-z]+-([a-z]*\d+|debug)-[0-9a-f]{6,}$" |
| 64 | r"|^bench-(push|clone|fetch|pull)-[a-z]+-[0-9a-f]{6,}$" |
| 65 | r"|^bench-reftest-[0-9a-f]{6,}$" |
| 66 | r"|^(dbg|diag)\d*-[0-9a-f]{6,}$" |
| 67 | ) |
| 68 | |
| 69 | # commits, files_per_commit, file_size_bytes |
| 70 | SIZE_MATRIX = { |
| 71 | "xs": (1, 1, 4_096), |
| 72 | "s": (10, 5, 4_096), |
| 73 | "m": (100, 5, 4_096), |
| 74 | "l": (1_000, 5, 4_096), |
| 75 | "xl": (2_000, 5, 4_096), |
| 76 | } |
| 77 | |
| 78 | # gates in ms — None means no gate (expected to be slow / CF ceiling) |
| 79 | GATES: dict[tuple[str, str, str], float | None] = { |
| 80 | ("push", "xs", "localhost"): 2_000, |
| 81 | ("push", "s", "localhost"): 5_000, |
| 82 | ("push", "m", "localhost"): 15_000, |
| 83 | ("push", "l", "localhost"): 15_000, |
| 84 | ("push", "xl", "localhost"): None, |
| 85 | ("clone", "xs", "localhost"): 2_000, |
| 86 | ("clone", "s", "localhost"): 5_000, |
| 87 | ("clone", "m", "localhost"): 15_000, |
| 88 | ("clone", "l", "localhost"): 15_000, |
| 89 | ("clone", "xl", "localhost"): None, |
| 90 | ("fetch", "xs", "localhost"): 2_000, |
| 91 | ("fetch", "s", "localhost"): 5_000, |
| 92 | ("fetch", "m", "localhost"): 15_000, |
| 93 | ("fetch", "l", "localhost"): 15_000, |
| 94 | ("fetch", "xl", "localhost"): None, |
| 95 | ("pull", "xs", "localhost"): 2_000, |
| 96 | ("pull", "s", "localhost"): 5_000, |
| 97 | ("pull", "m", "localhost"): 15_000, |
| 98 | ("pull", "l", "localhost"): 15_000, |
| 99 | ("pull", "xl", "localhost"): None, |
| 100 | ("push", "xs", "staging"): 5_000, |
| 101 | ("push", "s", "staging"): 10_000, |
| 102 | ("push", "m", "staging"): 20_000, |
| 103 | ("push", "l", "staging"): 30_000, |
| 104 | ("push", "xl", "staging"): None, |
| 105 | ("clone", "xs", "staging"): 5_000, |
| 106 | ("clone", "s", "staging"): 10_000, |
| 107 | ("clone", "m", "staging"): 20_000, |
| 108 | ("clone", "l", "staging"): 30_000, |
| 109 | ("clone", "xl", "staging"): None, |
| 110 | ("fetch", "xs", "staging"): 5_000, |
| 111 | ("fetch", "s", "staging"): 10_000, |
| 112 | ("fetch", "m", "staging"): 20_000, |
| 113 | ("fetch", "l", "staging"): 30_000, |
| 114 | ("fetch", "xl", "staging"): None, |
| 115 | ("pull", "xs", "staging"): 5_000, |
| 116 | ("pull", "s", "staging"): 10_000, |
| 117 | ("pull", "m", "staging"): 20_000, |
| 118 | ("pull", "l", "staging"): 30_000, |
| 119 | ("pull", "xl", "staging"): None, |
| 120 | } |
| 121 | |
| 122 | |
| 123 | # Source files whose content determines wire protocol correctness. |
| 124 | # Resolved from the actual loaded modules — robust regardless of install path. |
| 125 | _WIRE_SOURCES: list[Path] = [ |
| 126 | Path(transport.__file__), |
| 127 | Path(mpack.__file__), |
| 128 | Path(musehub_wire.__file__), |
| 129 | ] |
| 130 | |
| 131 | |
| 132 | def wire_hash() -> str: |
| 133 | """Return a hex digest of all wire protocol source files. |
| 134 | |
| 135 | Changes to pack.py, transport.py, mpack.py (client) or musehub_wire.py |
| 136 | (server) produce a new hash, automatically invalidating stale seed caches. |
| 137 | Uses muse.core.types.hash_file — the same content-addressing primitive |
| 138 | used throughout the rest of the ecosystem. |
| 139 | """ |
| 140 | combined = b"".join( |
| 141 | hash_file(p).encode() |
| 142 | for p in _WIRE_SOURCES |
| 143 | if p.exists() |
| 144 | ) |
| 145 | return blob_id(combined).split(":")[-1][:16] |
| 146 | |
| 147 | |
| 148 | # ── muse wrappers ───────────────────────────────────────────────────────────── |
| 149 | |
| 150 | def muse(*args: str, cwd: Path, timeout: int = 300) -> subprocess.CompletedProcess: |
| 151 | return subprocess.run( |
| 152 | ["muse"] + list(args), |
| 153 | cwd=str(cwd), capture_output=True, text=True, timeout=timeout, |
| 154 | ) |
| 155 | |
| 156 | |
| 157 | def muse_check(*args: str, cwd: Path, timeout: int = 300) -> str: |
| 158 | r = muse(*args, cwd=cwd, timeout=timeout) |
| 159 | if r.returncode != 0: |
| 160 | raise RuntimeError(f"muse {' '.join(args)} failed:\n{r.stderr[:400]}") |
| 161 | return r.stdout |
| 162 | |
| 163 | |
| 164 | def timed_muse(*args: str, cwd: Path, timeout: int = 600) -> tuple[float, bool, str]: |
| 165 | """Returns (elapsed_ms, success, error_snippet).""" |
| 166 | t0 = time.perf_counter() |
| 167 | r = muse(*args, cwd=cwd, timeout=timeout) |
| 168 | ms = (time.perf_counter() - t0) * 1000 |
| 169 | if r.stderr.strip(): |
| 170 | for line in r.stderr.strip().splitlines(): |
| 171 | print(f"\n [muse-log] {line}", flush=True) |
| 172 | return ms, r.returncode == 0, r.stderr[:400] if r.returncode != 0 else "" |
| 173 | |
| 174 | |
| 175 | def _wait_indexed(hub_url: str, slug: str, n_commits: int, *, timeout: int = 600) -> None: |
| 176 | """Block until the hub has HEAD accessible via clone+read. |
| 177 | |
| 178 | Verifies with `muse read --json` (HEAD only) — avoids serializing thousands |
| 179 | of commits with `muse log` which hangs on large repos. |
| 180 | """ |
| 181 | deadline = time.time() + timeout |
| 182 | attempt = 0 |
| 183 | t_start = time.time() |
| 184 | tmp = Path(tempfile.mkdtemp(prefix="muse_probe_")) |
| 185 | name = slug.split("/")[-1] |
| 186 | clone_dir = tmp / name |
| 187 | cloned = False |
| 188 | try: |
| 189 | while time.time() < deadline: |
| 190 | attempt += 1 |
| 191 | if not cloned: |
| 192 | t_clone0 = time.time() |
| 193 | r = muse("clone", f"{hub_url}/{slug}", cwd=tmp, timeout=300) |
| 194 | clone_ms = (time.time() - t_clone0) * 1000 |
| 195 | if r.stderr.strip(): |
| 196 | for line in r.stderr.strip().splitlines(): |
| 197 | print(f"\n [clone-log] {line}", flush=True) |
| 198 | if r.returncode != 0: |
| 199 | print(f"\n [_wait_indexed] attempt={attempt} clone FAILED in {clone_ms:.0f}ms: {r.stderr[-200:]}", flush=True) |
| 200 | time.sleep(2) |
| 201 | continue |
| 202 | cloned = True |
| 203 | else: |
| 204 | t_fetch0 = time.time() |
| 205 | fr = muse("fetch", "origin", cwd=clone_dir, timeout=300) |
| 206 | clone_ms = (time.time() - t_fetch0) * 1000 |
| 207 | if fr.stderr.strip(): |
| 208 | for line in fr.stderr.strip().splitlines(): |
| 209 | print(f"\n [fetch-log] {line}", flush=True) |
| 210 | |
| 211 | t_read0 = time.time() |
| 212 | read_r = muse("read", "--json", cwd=clone_dir) |
| 213 | read_ms = (time.time() - t_read0) * 1000 |
| 214 | elapsed = time.time() - t_start |
| 215 | if read_r.returncode != 0: |
| 216 | print(f"\n [_wait_indexed] attempt={attempt} clone={clone_ms:.0f}ms read FAILED in {read_ms:.0f}ms: {read_r.stderr[:120]}", flush=True) |
| 217 | else: |
| 218 | try: |
| 219 | commit_id = json.loads(read_r.stdout).get("commit_id", "") |
| 220 | print(f"\n [_wait_indexed] attempt={attempt} elapsed={elapsed:.1f}s clone={clone_ms:.0f}ms read={read_ms:.0f}ms commit={commit_id[:16]}", flush=True) |
| 221 | if commit_id: |
| 222 | return |
| 223 | except (ValueError, KeyError) as exc: |
| 224 | print(f"\n [_wait_indexed] attempt={attempt} JSON parse error: {exc}", flush=True) |
| 225 | time.sleep(2) |
| 226 | finally: |
| 227 | shutil.rmtree(tmp, ignore_errors=True) |
| 228 | raise TimeoutError(f"{slug}: HEAD not indexed within {timeout}s") |
| 229 | |
| 230 | |
| 231 | # ── repo lifecycle (muse CLI only) ──────────────────────────────────────────── |
| 232 | |
| 233 | def create_repo(hub_url: str, name: str) -> str: |
| 234 | """Create a bench repo on hub. Returns slug.""" |
| 235 | assert name.startswith(BENCH_PREFIX) |
| 236 | out = muse_check( |
| 237 | "hub", "repo", "create", "--name", name, "--visibility", "public", |
| 238 | "--no-init", "--hub", hub_url, "--json", |
| 239 | cwd=REPO_ROOT, |
| 240 | ) |
| 241 | return json.loads(out)["slug"] |
| 242 | |
| 243 | |
| 244 | def _safe_delete_repo(hub_url: str, slug: str) -> None: |
| 245 | """Delete a hub repo — only if it passes both independent safety guards. |
| 246 | |
| 247 | Guard 1 — permanent blocklist: repo name must not be in _PERMANENT_REPOS. |
| 248 | Guard 2 — exact pattern: name must match _TRANSIENT_RE. |
| 249 | |
| 250 | Both must pass. Any failure raises AssertionError before any network call. |
| 251 | """ |
| 252 | name = slug.split("/")[-1] |
| 253 | assert name.lower() not in {r.lower() for r in _PERMANENT_REPOS}, ( |
| 254 | f"SAFETY: refusing to delete permanent repo '{slug}'" |
| 255 | ) |
| 256 | assert _TRANSIENT_RE.match(name), ( |
| 257 | f"SAFETY: refusing to delete repo whose name doesn't match transient pattern: '{slug}'" |
| 258 | ) |
| 259 | r = muse("hub", "repo", "delete", slug, "--yes", "--hub", hub_url, "--json", cwd=REPO_ROOT) |
| 260 | if r.returncode != 0 and "404" not in r.stderr: |
| 261 | # Warn but don't raise — stale repos are purged at the next run's start. |
| 262 | # Deletion can fail transiently if a background job is still writing to the repo. |
| 263 | print(f"\n WARN: repo delete {slug} failed (will be purged next run): {r.stderr[:200]}", flush=True) |
| 264 | |
| 265 | |
| 266 | def purge_stale(hub_url: str) -> None: |
| 267 | out = muse_check("hub", "repo", "list", "--limit", "200", "--hub", hub_url, "--json", |
| 268 | cwd=REPO_ROOT) |
| 269 | repos = json.loads(out).get("repos", []) |
| 270 | stale = [r for r in repos if _TRANSIENT_RE.match(r["name"])] |
| 271 | if stale: |
| 272 | print(f" purging {len(stale)} stale bench repo(s) on {hub_url}…") |
| 273 | for r in stale: |
| 274 | _safe_delete_repo(hub_url, r["slug"]) |
| 275 | |
| 276 | |
| 277 | # ── local repo population ───────────────────────────────────────────────────── |
| 278 | |
| 279 | # BIP39 English wordlist — the same list used to back up your muse identity |
| 280 | # mnemonic. Each bench file is a unique deterministic slice, formatted as verse. |
| 281 | BIP39_WORDS: tuple[str, ...] = tuple(Mnemonic('english').wordlist) |
| 282 | |
| 283 | def bench_text(size: int, commit: int, file: int) -> bytes: |
| 284 | """Generate a deterministic BIP39 verse of exactly *size* bytes. |
| 285 | |
| 286 | Each file is a unique slice of the BIP39 wordlist — the same list used |
| 287 | to back up your muse identity mnemonic. Four words per line, six lines |
| 288 | per stanza, blank line between stanzas. |
| 289 | """ |
| 290 | header = f"# muse bench commit={commit} file={file}\n\n" |
| 291 | offset = (commit * 17 + file * 7) % len(BIP39_WORDS) |
| 292 | words = itertools.islice(itertools.cycle(BIP39_WORDS[offset:] + BIP39_WORDS[:offset]), size) |
| 293 | buf = [header] |
| 294 | total = len(header.encode()) |
| 295 | col = 0 |
| 296 | row = 0 |
| 297 | for word in words: |
| 298 | chunk = word + (" " if col < 3 else "\n") |
| 299 | buf.append(chunk) |
| 300 | total += len(chunk.encode()) |
| 301 | col = (col + 1) % 4 |
| 302 | if col == 0: |
| 303 | row += 1 |
| 304 | if row % 6 == 0: |
| 305 | buf.append("\n") |
| 306 | total += 1 |
| 307 | if total >= size: |
| 308 | break |
| 309 | return "".join(buf)[:size].encode() |
| 310 | |
| 311 | |
| 312 | def make_local_repo(n_commits: int, files_per_commit: int, file_size: int) -> Path: |
| 313 | """Create a tmpdir with a muse repo populated with n_commits commits.""" |
| 314 | tmpdir = Path(tempfile.mkdtemp(prefix="muse_bench_")) |
| 315 | muse_check("init", cwd=tmpdir) |
| 316 | |
| 317 | for ci in range(n_commits): |
| 318 | for fi in range(files_per_commit): |
| 319 | (tmpdir / f"f{ci}_{fi}.txt").write_bytes(bench_text(file_size, ci, fi)) |
| 320 | muse_check("code", "add", ".", cwd=tmpdir) |
| 321 | muse_check("commit", "-m", f"bench commit {ci}", |
| 322 | "--agent-id", "bench", "--model-id", "bench", |
| 323 | cwd=tmpdir) |
| 324 | |
| 325 | return tmpdir |
| 326 | |
| 327 | |
| 328 | # ── persistent seed helpers (Phase 2–3 implementation) ─────────────────────── |
| 329 | |
| 330 | def ensure_local_seed(size: str, *, reseed: bool = False) -> Path: |
| 331 | """Return path to a cached local muse repo seeded for *size*. |
| 332 | |
| 333 | Cache lives at CACHE_DIR/{size}/. Metadata is verified on every hit; |
| 334 | stale or missing metadata triggers a full rebuild. reseed=True forces |
| 335 | a rebuild even when metadata is valid. |
| 336 | """ |
| 337 | n_commits, files_per_commit, file_size = SIZE_MATRIX[size] |
| 338 | seed_dir = CACHE_DIR / size |
| 339 | meta_path = seed_dir / "cache_meta.json" |
| 340 | |
| 341 | def _valid_cache() -> bool: |
| 342 | if not seed_dir.exists() or not meta_path.exists(): |
| 343 | return False |
| 344 | try: |
| 345 | meta = json.loads(meta_path.read_text()) |
| 346 | return ( |
| 347 | meta.get("n_commits") == n_commits and |
| 348 | meta.get("files_per_commit") == files_per_commit and |
| 349 | meta.get("file_size") == file_size and |
| 350 | meta.get("wire_hash") == wire_hash() |
| 351 | ) |
| 352 | except Exception: |
| 353 | return False |
| 354 | |
| 355 | if not reseed and _valid_cache(): |
| 356 | return seed_dir |
| 357 | |
| 358 | # Build (or rebuild) the seeded repo in-process — no subprocess per commit. |
| 359 | if seed_dir.exists(): |
| 360 | # Purge any stale shard-cache entries for this seed_dir before deleting |
| 361 | # it. _created_object_shards is a module-level set in object_store that |
| 362 | # skips mkdir on subsequent writes to the same shard path. Without this |
| 363 | # purge, a reseed would rmtree the directory but leave stale entries in |
| 364 | # the set, causing write_object to skip mkdir and fail with ENOENT. |
| 365 | stale_prefix = str(seed_dir) + "/" |
| 366 | stale = {s for s in _created_object_shards if s.startswith(stale_prefix)} |
| 367 | _created_object_shards.difference_update(stale) |
| 368 | shutil.rmtree(seed_dir) |
| 369 | seed_dir.mkdir(parents=True) |
| 370 | |
| 371 | dot = muse_dir(seed_dir) |
| 372 | dot.mkdir() |
| 373 | repo_id = blob_id(f"bench-seed-{size}".encode()) |
| 374 | (dot / "repo.json").write_text(json.dumps({"repo_id": repo_id, "owner": "gabriel"})) |
| 375 | for d in ("commits", "snapshots", "objects"): |
| 376 | (dot / d).mkdir() |
| 377 | (dot / "refs" / "heads").mkdir(parents=True) |
| 378 | (dot / "HEAD").write_text("ref: refs/heads/main\n") |
| 379 | (dot / "config.toml").write_text("") |
| 380 | |
| 381 | ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 382 | parent: str | None = None |
| 383 | tip = "" |
| 384 | |
| 385 | for ci in range(n_commits): |
| 386 | blobs: dict[str, str] = {} |
| 387 | for fi in range(files_per_commit): |
| 388 | data = bench_text(file_size, ci, fi) |
| 389 | oid = blob_id(data) |
| 390 | write_object(seed_dir, oid, data) |
| 391 | blobs[f"f{ci}_{fi}.txt"] = oid |
| 392 | |
| 393 | sid = compute_snapshot_id(blobs) |
| 394 | write_snapshot(seed_dir, SnapshotRecord(snapshot_id=sid, manifest=blobs)) |
| 395 | |
| 396 | cid = compute_commit_id( |
| 397 | parent_ids=[parent] if parent else [], |
| 398 | snapshot_id=sid, |
| 399 | message=f"bench commit {ci}", |
| 400 | committed_at_iso=ts.isoformat(), |
| 401 | author="gabriel", |
| 402 | ) |
| 403 | write_commit(seed_dir, CommitRecord( |
| 404 | commit_id=cid, |
| 405 | branch="main", |
| 406 | snapshot_id=sid, |
| 407 | message=f"bench commit {ci}", |
| 408 | committed_at=ts, |
| 409 | parent_commit_id=parent, |
| 410 | parent2_commit_id=None, |
| 411 | author="gabriel", |
| 412 | metadata={}, |
| 413 | structured_delta=None, |
| 414 | sem_ver_bump="none", |
| 415 | breaking_changes=[], |
| 416 | agent_id="bench", |
| 417 | model_id="bench", |
| 418 | toolchain_id="", |
| 419 | prompt_hash="", |
| 420 | signature="", |
| 421 | signer_key_id="", |
| 422 | )) |
| 423 | parent = cid |
| 424 | tip = cid |
| 425 | ts = ts + datetime.timedelta(seconds=1) |
| 426 | |
| 427 | write_branch_ref(seed_dir, "main", tip) |
| 428 | |
| 429 | meta_path.write_text(json.dumps({ |
| 430 | "n_commits": n_commits, |
| 431 | "files_per_commit": files_per_commit, |
| 432 | "file_size": file_size, |
| 433 | "wire_hash": wire_hash(), |
| 434 | })) |
| 435 | return seed_dir |
| 436 | |
| 437 | |
| 438 | # In-process cache: once a hub seed is confirmed valid, skip all checks for the rest of the run. |
| 439 | _hub_seed_cache: dict[tuple[str, str], str] = {} |
| 440 | |
| 441 | |
| 442 | def ensure_hub_seed(hub_url: str, hub_alias: str, size: str, *, reseed: bool = False) -> str: |
| 443 | """Ensure bench-seed-{size} exists on hub; return slug gabriel/bench-seed-{size}. |
| 444 | |
| 445 | Checks hub repo list first — pushes only when the repo is absent (or |
| 446 | reseed=True). Never deletes the existing repo; reseed re-pushes on top. |
| 447 | """ |
| 448 | name = f"{SEED_PREFIX}{size}" |
| 449 | slug = f"gabriel/{name}" |
| 450 | |
| 451 | cache_key = (hub_url, size) |
| 452 | if not reseed and cache_key in _hub_seed_cache: |
| 453 | return _hub_seed_cache[cache_key] |
| 454 | |
| 455 | current_hash = wire_hash() |
| 456 | |
| 457 | out = muse_check("hub", "repo", "list", "--limit", "200", |
| 458 | "--hub", hub_url, "--json", cwd=REPO_ROOT) |
| 459 | repos = json.loads(out).get("repos", []) |
| 460 | match = next((r for r in repos if r["name"] == name), None) |
| 461 | |
| 462 | if not reseed and match is not None: |
| 463 | # Validate wire_hash stored in repo description. |
| 464 | desc = match.get("description", "") |
| 465 | stored_hash = "" |
| 466 | for part in desc.split(): |
| 467 | if part.startswith("wire_hash="): |
| 468 | stored_hash = part.split("=", 1)[1] |
| 469 | if stored_hash == current_hash: |
| 470 | # Also verify the push completed — a repo created via wizard but |
| 471 | # never pushed has head_commit_id == the init placeholder. |
| 472 | head_commit = match.get("head_commit_id") or match.get("head_commit") or "" |
| 473 | if not head_commit: |
| 474 | print(f" head_commit missing — rebuilding hub seed {name}…") |
| 475 | else: |
| 476 | _hub_seed_cache[cache_key] = slug |
| 477 | return slug |
| 478 | # Hash mismatch — delete stale seed and rebuild. |
| 479 | print(f" wire_hash changed — rebuilding hub seed {name}…") |
| 480 | muse_check("hub", "repo", "delete", slug, "--yes", |
| 481 | "--hub", hub_url, "--json", cwd=REPO_ROOT) |
| 482 | |
| 483 | # Repo absent (or stale/reseed) — create it, then push local seed. |
| 484 | if match is None or not reseed: |
| 485 | pass # already deleted above if stale |
| 486 | muse_check("hub", "repo", "create", "--name", name, |
| 487 | "--description", f"wire_hash={current_hash}", |
| 488 | "--visibility", "public", "--no-init", "--hub", hub_url, "--json", |
| 489 | cwd=REPO_ROOT) |
| 490 | |
| 491 | seed_dir = ensure_local_seed(size) |
| 492 | # Always reset origin so stale tracking refs never cause a silent no-op push. |
| 493 | # bench_push uses the same remove+add pattern on its per-run copies. |
| 494 | muse("remote", "remove", "origin", cwd=seed_dir) # ignore error if absent |
| 495 | muse_check("remote", "add", "origin", f"{hub_url}/{slug}", cwd=seed_dir) |
| 496 | muse_check("push", "origin", "main", cwd=seed_dir) |
| 497 | n_commits, _, _ = SIZE_MATRIX[size] |
| 498 | _wait_indexed(hub_url, slug, n_commits) |
| 499 | _hub_seed_cache[cache_key] = slug |
| 500 | return slug |
| 501 | |
| 502 | |
| 503 | # ── verb benchmarks ─────────────────────────────────────────────────────────── |
| 504 | |
| 505 | def bench_push(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]: |
| 506 | """Measure push throughput using the cached local seed repo. |
| 507 | |
| 508 | Creates a fresh hub repo per run (fast, ~100ms) and pushes the cached |
| 509 | local seed into it. No commit creation overhead after the first run. |
| 510 | """ |
| 511 | _, _, file_size = SIZE_MATRIX[size] |
| 512 | times: list[float] = [] |
| 513 | local = ensure_local_seed(size) |
| 514 | |
| 515 | for run_i in range(runs): |
| 516 | name = f"{BENCH_PREFIX}push-{size}-{run_i}-{os.urandom(3).hex()}" |
| 517 | slug = create_repo(hub_url, name) |
| 518 | # Wire a fresh remote for this run's hub repo. |
| 519 | run_dir = Path(tempfile.mkdtemp(prefix="muse_bench_push_")) |
| 520 | try: |
| 521 | # Work from a copy so the cached seed's remote stays clean. |
| 522 | shutil.copytree(str(local), str(run_dir / "repo"), symlinks=False) |
| 523 | run_repo = run_dir / "repo" |
| 524 | muse("remote", "remove", "origin", cwd=run_repo) # ignore error if absent |
| 525 | muse_check("remote", "add", "origin", f"{hub_url}/gabriel/{slug}", cwd=run_repo) |
| 526 | ms, ok, err = timed_muse("push", "origin", "main", cwd=run_repo) |
| 527 | if ok: |
| 528 | times.append(ms) |
| 529 | print(f" {ms:.0f}ms", end="", flush=True) |
| 530 | else: |
| 531 | print(f"\n ERROR: {err}", flush=True) |
| 532 | finally: |
| 533 | shutil.rmtree(run_dir, ignore_errors=True) |
| 534 | if cleanup: |
| 535 | _safe_delete_repo(hub_url, slug) |
| 536 | |
| 537 | return times |
| 538 | |
| 539 | |
| 540 | def bench_clone(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]: |
| 541 | """Measure clone throughput against the persistent hub seed repo.""" |
| 542 | print(f"\n HELLO WORLD bench_clone start hub={hub_url} size={size}", flush=True) |
| 543 | times: list[float] = [] |
| 544 | slug = ensure_hub_seed(hub_url, hub_alias, size) |
| 545 | print(f"\n HELLO WORLD ensure_hub_seed done slug={slug}", flush=True) |
| 546 | |
| 547 | for _ in range(runs): |
| 548 | clone_parent = Path(tempfile.mkdtemp(prefix="muse_bench_clone_")) |
| 549 | try: |
| 550 | ms, ok, err = timed_muse("clone", f"{hub_url}/{slug}", cwd=clone_parent) |
| 551 | if ok: |
| 552 | times.append(ms) |
| 553 | print(f" {ms:.0f}ms", end="", flush=True) |
| 554 | else: |
| 555 | print(f"\n ERROR: {err}", flush=True) |
| 556 | finally: |
| 557 | shutil.rmtree(clone_parent, ignore_errors=True) |
| 558 | |
| 559 | return times |
| 560 | |
| 561 | |
| 562 | def _bench_fetch_or_pull( |
| 563 | verb: str, |
| 564 | hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool, |
| 565 | ) -> list[float]: |
| 566 | """Measure fetch/pull against the persistent hub seed. |
| 567 | |
| 568 | User story: |
| 569 | 1. Hub seed already exists (ensure_hub_seed). |
| 570 | 2. Clone the seed — client is at the seeded state. |
| 571 | 3. Add exactly 1 delta commit to a run-local copy and push it. |
| 572 | 4. Measure fetch/pull from the clone (now 1 commit behind). |
| 573 | |
| 574 | The delta is always 1 commit — we measure wire-protocol latency, |
| 575 | not local commit-creation overhead. |
| 576 | """ |
| 577 | n_commits, _, file_size = SIZE_MATRIX[size] |
| 578 | times: list[float] = [] |
| 579 | seed_slug = ensure_hub_seed(hub_url, hub_alias, size) |
| 580 | |
| 581 | for run_i in range(runs): |
| 582 | # Per-run hub repo so delta pushes don't accumulate on the seed. |
| 583 | run_name = f"{BENCH_PREFIX}{verb}-{size}-{run_i}-{os.urandom(3).hex()}" |
| 584 | run_slug = create_repo(hub_url, run_name) |
| 585 | |
| 586 | run_dir = Path(tempfile.mkdtemp(prefix=f"muse_bench_{verb}_src_")) |
| 587 | clone_parent = Path(tempfile.mkdtemp(prefix=f"muse_bench_{verb}_dst_")) |
| 588 | try: |
| 589 | _t = time.time |
| 590 | def _step(label: str, t0: float) -> float: |
| 591 | t1 = _t() |
| 592 | print(f"\n [step] {label}: {(t1-t0)*1000:.0f}ms", flush=True) |
| 593 | return t1 |
| 594 | |
| 595 | t0 = _t() |
| 596 | # Copy local seed → run dir, push to run hub repo. |
| 597 | local_seed = ensure_local_seed(size) |
| 598 | shutil.copytree(str(local_seed), str(run_dir / "repo"), symlinks=False) |
| 599 | run_repo = run_dir / "repo" |
| 600 | t0 = _step("copytree", t0) |
| 601 | |
| 602 | muse("remote", "remove", "origin", cwd=run_repo) # ignore error if absent |
| 603 | muse_check("remote", "add", "origin", |
| 604 | f"{hub_url}/gabriel/{run_slug}", cwd=run_repo) |
| 605 | muse_check("push", "origin", "main", cwd=run_repo) |
| 606 | t0 = _step("seed push", t0) |
| 607 | |
| 608 | # Large mpacks defer commit writes to a background job — wait until |
| 609 | # the server's commit graph is fully indexed before cloning. |
| 610 | _wait_indexed(hub_url, f"gabriel/{run_slug}", n_commits) |
| 611 | t0 = _step("wait_indexed seed", t0) |
| 612 | |
| 613 | # Clone run repo — client is now at seeded state. |
| 614 | muse_check("clone", f"{hub_url}/gabriel/{run_slug}", cwd=clone_parent) |
| 615 | cloned = clone_parent / run_slug |
| 616 | t0 = _step("clone", t0) |
| 617 | |
| 618 | # Add exactly 1 delta commit and push. |
| 619 | # Materialise the working tree first so the delta commit doesn't |
| 620 | # incorrectly delete the seed files that are absent from disk. |
| 621 | # --force discards the "pending deletions" muse sees for unwritten seed files. |
| 622 | muse_check("checkout", "--force", "main", cwd=run_repo) |
| 623 | (run_repo / f"delta_{run_i}.txt").write_bytes(bench_text(file_size, run_i, 0)) |
| 624 | muse_check("code", "add", ".", cwd=run_repo) |
| 625 | muse_check("commit", "-m", f"delta {run_i}", |
| 626 | "--agent-id", "bench", "--model-id", "bench", cwd=run_repo) |
| 627 | muse_check("push", "origin", "main", cwd=run_repo) |
| 628 | t0 = _step("delta push", t0) |
| 629 | |
| 630 | # Wait for the delta commit to be indexed (inline for small mpacks, |
| 631 | # but generation computation depends on seed commits being in commit_graph). |
| 632 | _wait_indexed(hub_url, f"gabriel/{run_slug}", n_commits + 1) |
| 633 | t0 = _step("wait_indexed delta", t0) |
| 634 | |
| 635 | # Measure fetch/pull (client is 1 commit behind). |
| 636 | if verb == "fetch": |
| 637 | ms, ok, err = timed_muse("fetch", "origin", cwd=cloned) |
| 638 | else: |
| 639 | ms, ok, err = timed_muse("pull", "origin", "main", cwd=cloned) |
| 640 | _step(f"{verb} measurement", t0) |
| 641 | |
| 642 | if ok: |
| 643 | times.append(ms) |
| 644 | print(f" {ms:.0f}ms", end="", flush=True) |
| 645 | else: |
| 646 | print(f"\n ERROR: {err}", flush=True) |
| 647 | finally: |
| 648 | shutil.rmtree(run_dir, ignore_errors=True) |
| 649 | shutil.rmtree(clone_parent, ignore_errors=True) |
| 650 | if cleanup: |
| 651 | _safe_delete_repo(hub_url, f"gabriel/{run_slug}") |
| 652 | |
| 653 | return times |
| 654 | |
| 655 | |
| 656 | def bench_fetch(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]: |
| 657 | return _bench_fetch_or_pull("fetch", hub_url, hub_alias, size, runs, cleanup) |
| 658 | |
| 659 | |
| 660 | def bench_pull(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]: |
| 661 | return _bench_fetch_or_pull("pull", hub_url, hub_alias, size, runs, cleanup) |
| 662 | |
| 663 | |
| 664 | |
| 665 | |
| 666 | VERB_FNS = { |
| 667 | "push": bench_push, |
| 668 | "clone": bench_clone, |
| 669 | "fetch": bench_fetch, |
| 670 | "pull": bench_pull, |
| 671 | } |
| 672 | |
| 673 | |
| 674 | # ── output ──────────────────────────────────────────────────────────────────── |
| 675 | |
| 676 | def gate_str(verb: str, size: str, hub_alias: str, p50: float) -> str: |
| 677 | g = GATES.get((verb, size, hub_alias)) |
| 678 | if g is None: |
| 679 | return "—" |
| 680 | return f"✓ <{int(g/1000)}s" if p50 <= g else f"✗ <{int(g/1000)}s" |
| 681 | |
| 682 | |
| 683 | def print_table(rows: list[tuple], size: str) -> None: |
| 684 | cols = ["verb", "size", "hub", "p50 (ms)", "p95 (ms)", "gate"] |
| 685 | widths = [6, 5, 12, 10, 10, 12] |
| 686 | sep = " " |
| 687 | print() |
| 688 | print(f"muse CLI bench — size={size.upper()}") |
| 689 | print("=" * (sum(widths) + len(sep) * (len(widths) - 1))) |
| 690 | print(sep.join(c.ljust(w) for c, w in zip(cols, widths))) |
| 691 | print(sep.join("-" * w for w in widths)) |
| 692 | for verb, sz, hub_alias, times in rows: |
| 693 | if not times: |
| 694 | row = [verb, sz, hub_alias, "FAILED", "FAILED", "✗"] |
| 695 | else: |
| 696 | p50 = statistics.median(times) |
| 697 | t = sorted(times) |
| 698 | p95 = t[min(len(t) - 1, max(0, math.ceil(len(t) * 0.95) - 1))] |
| 699 | row = [verb, sz, hub_alias, f"{p50:.0f}", f"{p95:.0f}", |
| 700 | gate_str(verb, sz, hub_alias, p50)] |
| 701 | print(sep.join(s.ljust(w) for s, w in zip(row, widths))) |
| 702 | print() |
| 703 | |
| 704 | |
| 705 | def markdown_table(rows: list[tuple], size: str) -> str: |
| 706 | n_commits, fpc, fsz = SIZE_MATRIX[size] |
| 707 | mb = n_commits * fpc * fsz // 1024 // 1024 |
| 708 | lines = [ |
| 709 | f"### {size.upper()} ({n_commits} commits, {n_commits*fpc} files, ~{mb or '<1'} MB)", |
| 710 | "", |
| 711 | "| verb | hub | p50 (ms) | p95 (ms) | gate |", |
| 712 | "|------|-----|----------|----------|------|", |
| 713 | ] |
| 714 | for verb, sz, hub_alias, times in rows: |
| 715 | if not times: |
| 716 | lines.append(f"| {verb} | {hub_alias} | FAILED | FAILED | ✗ |") |
| 717 | else: |
| 718 | p50 = statistics.median(times) |
| 719 | t = sorted(times) |
| 720 | p95 = t[min(len(t) - 1, max(0, math.ceil(len(t) * 0.95) - 1))] |
| 721 | lines.append(f"| {verb} | {hub_alias} | **{p50:.0f}** | {p95:.0f} |" |
| 722 | f" {gate_str(verb, sz, hub_alias, p50)} |") |
| 723 | return "\n".join(lines) |
| 724 | |
| 725 | |
| 726 | # ── main ────────────────────────────────────────────────────────────────────── |
| 727 | |
| 728 | def main() -> None: |
| 729 | parser = argparse.ArgumentParser() |
| 730 | parser.add_argument("--size", nargs="+", default=["xs"], |
| 731 | help="xs s m l xl all") |
| 732 | parser.add_argument("--hubs", nargs="+", default=["localhost", "staging"], |
| 733 | choices=["localhost", "staging"]) |
| 734 | parser.add_argument("--verb", nargs="+", default=["push", "clone", "fetch", "pull"], |
| 735 | choices=["push", "clone", "fetch", "pull"]) |
| 736 | parser.add_argument("--runs", type=int, default=1) |
| 737 | parser.add_argument("--no-cleanup", action="store_true") |
| 738 | parser.add_argument("--reseed", action="store_true", |
| 739 | help="Rebuild local seed cache even if valid") |
| 740 | parser.add_argument("--reseed-hub", action="store_true", |
| 741 | help="Re-push hub seed repos even if present") |
| 742 | args = parser.parse_args() |
| 743 | |
| 744 | sizes = list(SIZE_MATRIX) if "all" in args.size else args.size |
| 745 | cleanup = not args.no_cleanup |
| 746 | |
| 747 | print(f"muse CLI bench hubs={args.hubs} verbs={args.verb} " |
| 748 | f"sizes={sizes} runs={args.runs}") |
| 749 | print() |
| 750 | |
| 751 | all_markdown: list[str] = [] |
| 752 | total_start = time.perf_counter() |
| 753 | |
| 754 | for size in sizes: |
| 755 | for hub_alias in args.hubs: |
| 756 | hub_url = HUB_URLS[hub_alias] |
| 757 | purge_stale(hub_url) |
| 758 | |
| 759 | rows: list[tuple] = [] |
| 760 | size_start = time.perf_counter() |
| 761 | |
| 762 | for verb in args.verb: |
| 763 | for hub_alias in args.hubs: |
| 764 | hub_url = HUB_URLS[hub_alias] |
| 765 | verb_start = time.perf_counter() |
| 766 | print(f" {verb}/{size}/{hub_alias}…", end="", flush=True) |
| 767 | try: |
| 768 | times = VERB_FNS[verb](hub_url, hub_alias, size, args.runs, cleanup) |
| 769 | except Exception as exc: |
| 770 | print(f"\n ERROR: {exc}", flush=True) |
| 771 | times = [] |
| 772 | verb_elapsed = (time.perf_counter() - verb_start) * 1000 |
| 773 | # Inline verb summary: p50 + gate + total wall time for this verb |
| 774 | if times: |
| 775 | p50 = statistics.median(times) |
| 776 | g = gate_str(verb, size, hub_alias, p50) |
| 777 | print(f" → p50={p50:.0f}ms {g} (verb wall={verb_elapsed:.0f}ms)") |
| 778 | else: |
| 779 | print(f" → FAILED (verb wall={verb_elapsed:.0f}ms)") |
| 780 | rows.append((verb, size, hub_alias, times)) |
| 781 | |
| 782 | size_elapsed = (time.perf_counter() - size_start) * 1000 |
| 783 | print_table(rows, size) |
| 784 | print(f" size={size.upper()} total: {size_elapsed:.0f}ms") |
| 785 | all_markdown.append(markdown_table(rows, size)) |
| 786 | |
| 787 | total_elapsed = (time.perf_counter() - total_start) * 1000 |
| 788 | print(f"\n ── overall: {total_elapsed:.0f}ms ──\n") |
| 789 | |
| 790 | if all_markdown: |
| 791 | print("Markdown (copy to issue comment):") |
| 792 | print("\n\n".join(all_markdown)) |
| 793 | |
| 794 | |
| 795 | if __name__ == "__main__": |
| 796 | main() |
File History
2 commits
sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7
fix: repair syntax errors from typing annotation cleanup
Sonnet 4.6
20 days ago
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595
fix: typing audit — 0 violations, 0 untyped defs across all…
Sonnet 4.6
minor
⚠
20 days ago