gabriel / musehub public
bench_cli.py python
796 lines 32.2 KB
Raw
sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7 fix: repair syntax errors from typing annotation cleanup Sonnet 4.6 20 days ago
1 """MuseWire CLI benchmark — times actual muse CLI commands against localhost and staging.
2
3 Every operation uses the muse CLI. No HTTP clients. No internal imports.
4
5 Usage:
6 python3 tests/bench_cli.py --size xs
7 python3 tests/bench_cli.py --size xs s m --hubs localhost staging
8 python3 tests/bench_cli.py --size all --runs 3
9 """
10 from __future__ import annotations
11
12 import argparse
13 import itertools
14 import json
15 import math
16 import os
17 import re
18 import shutil
19 import statistics
20 import subprocess
21 import sys
22 import tempfile
23 import time
24 from pathlib import Path
25
26 from mnemonic import Mnemonic
27
28 import datetime
29
30 from muse.core import transport, mpack
31 from muse.core.object_store import write_object, _created_object_shards
32 from muse.core.paths import muse_dir, server_objects_dir # noqa: F401
33 from muse.core.snapshot import compute_commit_id, compute_snapshot_id
34 from muse.core.commits import CommitRecord, write_commit
35 from muse.core.refs import write_branch_ref
36 from muse.core.snapshots import SnapshotRecord, write_snapshot
37 from muse.core.types import blob_id, hash_file
38 import musehub.services.musehub_wire as musehub_wire
39
40 REPO_ROOT = Path(__file__).parent.parent
41 LOCALHOST = "https://localhost:1337"
42 STAGING = "https://staging.musehub.ai"
43 HUB_URLS = {"localhost": LOCALHOST, "staging": STAGING}
44 BENCH_PREFIX = "bench-"
45 SEED_PREFIX = "bench-seed-" # persistent — never auto-purged
46 CACHE_DIR = Path.home() / ".cache" / "muse_bench"
47
48 # Repos that must never be deleted under any circumstances.
49 _PERMANENT_REPOS: frozenset[str] = frozenset({
50 "muse", "musehub", "agentception", "contracts",
51 "stori", "Stori", "maestro", "muse-zsh", "identity",
52 })
53
54 # Exact pattern a transient bench repo name must match before deletion is allowed.
55 # Covers all historical naming conventions:
56 # bench-push-xs-0-abc123 (current: integer run index)
57 # bench-push-xs-p2-abc123 (old: letter-prefixed run index)
58 # bench-fetch-xs-abc123 (old: no run index, hex only)
59 # bench-clone-xs-debug-abc123 (ad-hoc debug runs)
60 # bench-reftest-abc123 (ref-test repos)
61 # Also covers ad-hoc debug/diag repos created during debugging sessions.
62 _TRANSIENT_RE = re.compile(
63 r"^bench-(push|clone|fetch|pull)-[a-z]+-([a-z]*\d+|debug)-[0-9a-f]{6,}$"
64 r"|^bench-(push|clone|fetch|pull)-[a-z]+-[0-9a-f]{6,}$"
65 r"|^bench-reftest-[0-9a-f]{6,}$"
66 r"|^(dbg|diag)\d*-[0-9a-f]{6,}$"
67 )
68
69 # commits, files_per_commit, file_size_bytes
70 SIZE_MATRIX = {
71 "xs": (1, 1, 4_096),
72 "s": (10, 5, 4_096),
73 "m": (100, 5, 4_096),
74 "l": (1_000, 5, 4_096),
75 "xl": (2_000, 5, 4_096),
76 }
77
78 # gates in ms — None means no gate (expected to be slow / CF ceiling)
79 GATES: dict[tuple[str, str, str], float | None] = {
80 ("push", "xs", "localhost"): 2_000,
81 ("push", "s", "localhost"): 5_000,
82 ("push", "m", "localhost"): 15_000,
83 ("push", "l", "localhost"): 15_000,
84 ("push", "xl", "localhost"): None,
85 ("clone", "xs", "localhost"): 2_000,
86 ("clone", "s", "localhost"): 5_000,
87 ("clone", "m", "localhost"): 15_000,
88 ("clone", "l", "localhost"): 15_000,
89 ("clone", "xl", "localhost"): None,
90 ("fetch", "xs", "localhost"): 2_000,
91 ("fetch", "s", "localhost"): 5_000,
92 ("fetch", "m", "localhost"): 15_000,
93 ("fetch", "l", "localhost"): 15_000,
94 ("fetch", "xl", "localhost"): None,
95 ("pull", "xs", "localhost"): 2_000,
96 ("pull", "s", "localhost"): 5_000,
97 ("pull", "m", "localhost"): 15_000,
98 ("pull", "l", "localhost"): 15_000,
99 ("pull", "xl", "localhost"): None,
100 ("push", "xs", "staging"): 5_000,
101 ("push", "s", "staging"): 10_000,
102 ("push", "m", "staging"): 20_000,
103 ("push", "l", "staging"): 30_000,
104 ("push", "xl", "staging"): None,
105 ("clone", "xs", "staging"): 5_000,
106 ("clone", "s", "staging"): 10_000,
107 ("clone", "m", "staging"): 20_000,
108 ("clone", "l", "staging"): 30_000,
109 ("clone", "xl", "staging"): None,
110 ("fetch", "xs", "staging"): 5_000,
111 ("fetch", "s", "staging"): 10_000,
112 ("fetch", "m", "staging"): 20_000,
113 ("fetch", "l", "staging"): 30_000,
114 ("fetch", "xl", "staging"): None,
115 ("pull", "xs", "staging"): 5_000,
116 ("pull", "s", "staging"): 10_000,
117 ("pull", "m", "staging"): 20_000,
118 ("pull", "l", "staging"): 30_000,
119 ("pull", "xl", "staging"): None,
120 }
121
122
123 # Source files whose content determines wire protocol correctness.
124 # Resolved from the actual loaded modules — robust regardless of install path.
125 _WIRE_SOURCES: list[Path] = [
126 Path(transport.__file__),
127 Path(mpack.__file__),
128 Path(musehub_wire.__file__),
129 ]
130
131
132 def wire_hash() -> str:
133 """Return a hex digest of all wire protocol source files.
134
135 Changes to pack.py, transport.py, mpack.py (client) or musehub_wire.py
136 (server) produce a new hash, automatically invalidating stale seed caches.
137 Uses muse.core.types.hash_file — the same content-addressing primitive
138 used throughout the rest of the ecosystem.
139 """
140 combined = b"".join(
141 hash_file(p).encode()
142 for p in _WIRE_SOURCES
143 if p.exists()
144 )
145 return blob_id(combined).split(":")[-1][:16]
146
147
148 # ── muse wrappers ─────────────────────────────────────────────────────────────
149
150 def muse(*args: str, cwd: Path, timeout: int = 300) -> subprocess.CompletedProcess:
151 return subprocess.run(
152 ["muse"] + list(args),
153 cwd=str(cwd), capture_output=True, text=True, timeout=timeout,
154 )
155
156
157 def muse_check(*args: str, cwd: Path, timeout: int = 300) -> str:
158 r = muse(*args, cwd=cwd, timeout=timeout)
159 if r.returncode != 0:
160 raise RuntimeError(f"muse {' '.join(args)} failed:\n{r.stderr[:400]}")
161 return r.stdout
162
163
164 def timed_muse(*args: str, cwd: Path, timeout: int = 600) -> tuple[float, bool, str]:
165 """Returns (elapsed_ms, success, error_snippet)."""
166 t0 = time.perf_counter()
167 r = muse(*args, cwd=cwd, timeout=timeout)
168 ms = (time.perf_counter() - t0) * 1000
169 if r.stderr.strip():
170 for line in r.stderr.strip().splitlines():
171 print(f"\n [muse-log] {line}", flush=True)
172 return ms, r.returncode == 0, r.stderr[:400] if r.returncode != 0 else ""
173
174
175 def _wait_indexed(hub_url: str, slug: str, n_commits: int, *, timeout: int = 600) -> None:
176 """Block until the hub has HEAD accessible via clone+read.
177
178 Verifies with `muse read --json` (HEAD only) — avoids serializing thousands
179 of commits with `muse log` which hangs on large repos.
180 """
181 deadline = time.time() + timeout
182 attempt = 0
183 t_start = time.time()
184 tmp = Path(tempfile.mkdtemp(prefix="muse_probe_"))
185 name = slug.split("/")[-1]
186 clone_dir = tmp / name
187 cloned = False
188 try:
189 while time.time() < deadline:
190 attempt += 1
191 if not cloned:
192 t_clone0 = time.time()
193 r = muse("clone", f"{hub_url}/{slug}", cwd=tmp, timeout=300)
194 clone_ms = (time.time() - t_clone0) * 1000
195 if r.stderr.strip():
196 for line in r.stderr.strip().splitlines():
197 print(f"\n [clone-log] {line}", flush=True)
198 if r.returncode != 0:
199 print(f"\n [_wait_indexed] attempt={attempt} clone FAILED in {clone_ms:.0f}ms: {r.stderr[-200:]}", flush=True)
200 time.sleep(2)
201 continue
202 cloned = True
203 else:
204 t_fetch0 = time.time()
205 fr = muse("fetch", "origin", cwd=clone_dir, timeout=300)
206 clone_ms = (time.time() - t_fetch0) * 1000
207 if fr.stderr.strip():
208 for line in fr.stderr.strip().splitlines():
209 print(f"\n [fetch-log] {line}", flush=True)
210
211 t_read0 = time.time()
212 read_r = muse("read", "--json", cwd=clone_dir)
213 read_ms = (time.time() - t_read0) * 1000
214 elapsed = time.time() - t_start
215 if read_r.returncode != 0:
216 print(f"\n [_wait_indexed] attempt={attempt} clone={clone_ms:.0f}ms read FAILED in {read_ms:.0f}ms: {read_r.stderr[:120]}", flush=True)
217 else:
218 try:
219 commit_id = json.loads(read_r.stdout).get("commit_id", "")
220 print(f"\n [_wait_indexed] attempt={attempt} elapsed={elapsed:.1f}s clone={clone_ms:.0f}ms read={read_ms:.0f}ms commit={commit_id[:16]}", flush=True)
221 if commit_id:
222 return
223 except (ValueError, KeyError) as exc:
224 print(f"\n [_wait_indexed] attempt={attempt} JSON parse error: {exc}", flush=True)
225 time.sleep(2)
226 finally:
227 shutil.rmtree(tmp, ignore_errors=True)
228 raise TimeoutError(f"{slug}: HEAD not indexed within {timeout}s")
229
230
231 # ── repo lifecycle (muse CLI only) ────────────────────────────────────────────
232
233 def create_repo(hub_url: str, name: str) -> str:
234 """Create a bench repo on hub. Returns slug."""
235 assert name.startswith(BENCH_PREFIX)
236 out = muse_check(
237 "hub", "repo", "create", "--name", name, "--visibility", "public",
238 "--no-init", "--hub", hub_url, "--json",
239 cwd=REPO_ROOT,
240 )
241 return json.loads(out)["slug"]
242
243
244 def _safe_delete_repo(hub_url: str, slug: str) -> None:
245 """Delete a hub repo — only if it passes both independent safety guards.
246
247 Guard 1 — permanent blocklist: repo name must not be in _PERMANENT_REPOS.
248 Guard 2 — exact pattern: name must match _TRANSIENT_RE.
249
250 Both must pass. Any failure raises AssertionError before any network call.
251 """
252 name = slug.split("/")[-1]
253 assert name.lower() not in {r.lower() for r in _PERMANENT_REPOS}, (
254 f"SAFETY: refusing to delete permanent repo '{slug}'"
255 )
256 assert _TRANSIENT_RE.match(name), (
257 f"SAFETY: refusing to delete repo whose name doesn't match transient pattern: '{slug}'"
258 )
259 r = muse("hub", "repo", "delete", slug, "--yes", "--hub", hub_url, "--json", cwd=REPO_ROOT)
260 if r.returncode != 0 and "404" not in r.stderr:
261 # Warn but don't raise — stale repos are purged at the next run's start.
262 # Deletion can fail transiently if a background job is still writing to the repo.
263 print(f"\n WARN: repo delete {slug} failed (will be purged next run): {r.stderr[:200]}", flush=True)
264
265
266 def purge_stale(hub_url: str) -> None:
267 out = muse_check("hub", "repo", "list", "--limit", "200", "--hub", hub_url, "--json",
268 cwd=REPO_ROOT)
269 repos = json.loads(out).get("repos", [])
270 stale = [r for r in repos if _TRANSIENT_RE.match(r["name"])]
271 if stale:
272 print(f" purging {len(stale)} stale bench repo(s) on {hub_url}…")
273 for r in stale:
274 _safe_delete_repo(hub_url, r["slug"])
275
276
277 # ── local repo population ─────────────────────────────────────────────────────
278
279 # BIP39 English wordlist — the same list used to back up your muse identity
280 # mnemonic. Each bench file is a unique deterministic slice, formatted as verse.
281 BIP39_WORDS: tuple[str, ...] = tuple(Mnemonic('english').wordlist)
282
283 def bench_text(size: int, commit: int, file: int) -> bytes:
284 """Generate a deterministic BIP39 verse of exactly *size* bytes.
285
286 Each file is a unique slice of the BIP39 wordlist — the same list used
287 to back up your muse identity mnemonic. Four words per line, six lines
288 per stanza, blank line between stanzas.
289 """
290 header = f"# muse bench commit={commit} file={file}\n\n"
291 offset = (commit * 17 + file * 7) % len(BIP39_WORDS)
292 words = itertools.islice(itertools.cycle(BIP39_WORDS[offset:] + BIP39_WORDS[:offset]), size)
293 buf = [header]
294 total = len(header.encode())
295 col = 0
296 row = 0
297 for word in words:
298 chunk = word + (" " if col < 3 else "\n")
299 buf.append(chunk)
300 total += len(chunk.encode())
301 col = (col + 1) % 4
302 if col == 0:
303 row += 1
304 if row % 6 == 0:
305 buf.append("\n")
306 total += 1
307 if total >= size:
308 break
309 return "".join(buf)[:size].encode()
310
311
312 def make_local_repo(n_commits: int, files_per_commit: int, file_size: int) -> Path:
313 """Create a tmpdir with a muse repo populated with n_commits commits."""
314 tmpdir = Path(tempfile.mkdtemp(prefix="muse_bench_"))
315 muse_check("init", cwd=tmpdir)
316
317 for ci in range(n_commits):
318 for fi in range(files_per_commit):
319 (tmpdir / f"f{ci}_{fi}.txt").write_bytes(bench_text(file_size, ci, fi))
320 muse_check("code", "add", ".", cwd=tmpdir)
321 muse_check("commit", "-m", f"bench commit {ci}",
322 "--agent-id", "bench", "--model-id", "bench",
323 cwd=tmpdir)
324
325 return tmpdir
326
327
328 # ── persistent seed helpers (Phase 2–3 implementation) ───────────────────────
329
330 def ensure_local_seed(size: str, *, reseed: bool = False) -> Path:
331 """Return path to a cached local muse repo seeded for *size*.
332
333 Cache lives at CACHE_DIR/{size}/. Metadata is verified on every hit;
334 stale or missing metadata triggers a full rebuild. reseed=True forces
335 a rebuild even when metadata is valid.
336 """
337 n_commits, files_per_commit, file_size = SIZE_MATRIX[size]
338 seed_dir = CACHE_DIR / size
339 meta_path = seed_dir / "cache_meta.json"
340
341 def _valid_cache() -> bool:
342 if not seed_dir.exists() or not meta_path.exists():
343 return False
344 try:
345 meta = json.loads(meta_path.read_text())
346 return (
347 meta.get("n_commits") == n_commits and
348 meta.get("files_per_commit") == files_per_commit and
349 meta.get("file_size") == file_size and
350 meta.get("wire_hash") == wire_hash()
351 )
352 except Exception:
353 return False
354
355 if not reseed and _valid_cache():
356 return seed_dir
357
358 # Build (or rebuild) the seeded repo in-process — no subprocess per commit.
359 if seed_dir.exists():
360 # Purge any stale shard-cache entries for this seed_dir before deleting
361 # it. _created_object_shards is a module-level set in object_store that
362 # skips mkdir on subsequent writes to the same shard path. Without this
363 # purge, a reseed would rmtree the directory but leave stale entries in
364 # the set, causing write_object to skip mkdir and fail with ENOENT.
365 stale_prefix = str(seed_dir) + "/"
366 stale = {s for s in _created_object_shards if s.startswith(stale_prefix)}
367 _created_object_shards.difference_update(stale)
368 shutil.rmtree(seed_dir)
369 seed_dir.mkdir(parents=True)
370
371 dot = muse_dir(seed_dir)
372 dot.mkdir()
373 repo_id = blob_id(f"bench-seed-{size}".encode())
374 (dot / "repo.json").write_text(json.dumps({"repo_id": repo_id, "owner": "gabriel"}))
375 for d in ("commits", "snapshots", "objects"):
376 (dot / d).mkdir()
377 (dot / "refs" / "heads").mkdir(parents=True)
378 (dot / "HEAD").write_text("ref: refs/heads/main\n")
379 (dot / "config.toml").write_text("")
380
381 ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
382 parent: str | None = None
383 tip = ""
384
385 for ci in range(n_commits):
386 blobs: dict[str, str] = {}
387 for fi in range(files_per_commit):
388 data = bench_text(file_size, ci, fi)
389 oid = blob_id(data)
390 write_object(seed_dir, oid, data)
391 blobs[f"f{ci}_{fi}.txt"] = oid
392
393 sid = compute_snapshot_id(blobs)
394 write_snapshot(seed_dir, SnapshotRecord(snapshot_id=sid, manifest=blobs))
395
396 cid = compute_commit_id(
397 parent_ids=[parent] if parent else [],
398 snapshot_id=sid,
399 message=f"bench commit {ci}",
400 committed_at_iso=ts.isoformat(),
401 author="gabriel",
402 )
403 write_commit(seed_dir, CommitRecord(
404 commit_id=cid,
405 branch="main",
406 snapshot_id=sid,
407 message=f"bench commit {ci}",
408 committed_at=ts,
409 parent_commit_id=parent,
410 parent2_commit_id=None,
411 author="gabriel",
412 metadata={},
413 structured_delta=None,
414 sem_ver_bump="none",
415 breaking_changes=[],
416 agent_id="bench",
417 model_id="bench",
418 toolchain_id="",
419 prompt_hash="",
420 signature="",
421 signer_key_id="",
422 ))
423 parent = cid
424 tip = cid
425 ts = ts + datetime.timedelta(seconds=1)
426
427 write_branch_ref(seed_dir, "main", tip)
428
429 meta_path.write_text(json.dumps({
430 "n_commits": n_commits,
431 "files_per_commit": files_per_commit,
432 "file_size": file_size,
433 "wire_hash": wire_hash(),
434 }))
435 return seed_dir
436
437
438 # In-process cache: once a hub seed is confirmed valid, skip all checks for the rest of the run.
439 _hub_seed_cache: dict[tuple[str, str], str] = {}
440
441
442 def ensure_hub_seed(hub_url: str, hub_alias: str, size: str, *, reseed: bool = False) -> str:
443 """Ensure bench-seed-{size} exists on hub; return slug gabriel/bench-seed-{size}.
444
445 Checks hub repo list first — pushes only when the repo is absent (or
446 reseed=True). Never deletes the existing repo; reseed re-pushes on top.
447 """
448 name = f"{SEED_PREFIX}{size}"
449 slug = f"gabriel/{name}"
450
451 cache_key = (hub_url, size)
452 if not reseed and cache_key in _hub_seed_cache:
453 return _hub_seed_cache[cache_key]
454
455 current_hash = wire_hash()
456
457 out = muse_check("hub", "repo", "list", "--limit", "200",
458 "--hub", hub_url, "--json", cwd=REPO_ROOT)
459 repos = json.loads(out).get("repos", [])
460 match = next((r for r in repos if r["name"] == name), None)
461
462 if not reseed and match is not None:
463 # Validate wire_hash stored in repo description.
464 desc = match.get("description", "")
465 stored_hash = ""
466 for part in desc.split():
467 if part.startswith("wire_hash="):
468 stored_hash = part.split("=", 1)[1]
469 if stored_hash == current_hash:
470 # Also verify the push completed — a repo created via wizard but
471 # never pushed has head_commit_id == the init placeholder.
472 head_commit = match.get("head_commit_id") or match.get("head_commit") or ""
473 if not head_commit:
474 print(f" head_commit missing — rebuilding hub seed {name}…")
475 else:
476 _hub_seed_cache[cache_key] = slug
477 return slug
478 # Hash mismatch — delete stale seed and rebuild.
479 print(f" wire_hash changed — rebuilding hub seed {name}…")
480 muse_check("hub", "repo", "delete", slug, "--yes",
481 "--hub", hub_url, "--json", cwd=REPO_ROOT)
482
483 # Repo absent (or stale/reseed) — create it, then push local seed.
484 if match is None or not reseed:
485 pass # already deleted above if stale
486 muse_check("hub", "repo", "create", "--name", name,
487 "--description", f"wire_hash={current_hash}",
488 "--visibility", "public", "--no-init", "--hub", hub_url, "--json",
489 cwd=REPO_ROOT)
490
491 seed_dir = ensure_local_seed(size)
492 # Always reset origin so stale tracking refs never cause a silent no-op push.
493 # bench_push uses the same remove+add pattern on its per-run copies.
494 muse("remote", "remove", "origin", cwd=seed_dir) # ignore error if absent
495 muse_check("remote", "add", "origin", f"{hub_url}/{slug}", cwd=seed_dir)
496 muse_check("push", "origin", "main", cwd=seed_dir)
497 n_commits, _, _ = SIZE_MATRIX[size]
498 _wait_indexed(hub_url, slug, n_commits)
499 _hub_seed_cache[cache_key] = slug
500 return slug
501
502
503 # ── verb benchmarks ───────────────────────────────────────────────────────────
504
505 def bench_push(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]:
506 """Measure push throughput using the cached local seed repo.
507
508 Creates a fresh hub repo per run (fast, ~100ms) and pushes the cached
509 local seed into it. No commit creation overhead after the first run.
510 """
511 _, _, file_size = SIZE_MATRIX[size]
512 times: list[float] = []
513 local = ensure_local_seed(size)
514
515 for run_i in range(runs):
516 name = f"{BENCH_PREFIX}push-{size}-{run_i}-{os.urandom(3).hex()}"
517 slug = create_repo(hub_url, name)
518 # Wire a fresh remote for this run's hub repo.
519 run_dir = Path(tempfile.mkdtemp(prefix="muse_bench_push_"))
520 try:
521 # Work from a copy so the cached seed's remote stays clean.
522 shutil.copytree(str(local), str(run_dir / "repo"), symlinks=False)
523 run_repo = run_dir / "repo"
524 muse("remote", "remove", "origin", cwd=run_repo) # ignore error if absent
525 muse_check("remote", "add", "origin", f"{hub_url}/gabriel/{slug}", cwd=run_repo)
526 ms, ok, err = timed_muse("push", "origin", "main", cwd=run_repo)
527 if ok:
528 times.append(ms)
529 print(f" {ms:.0f}ms", end="", flush=True)
530 else:
531 print(f"\n ERROR: {err}", flush=True)
532 finally:
533 shutil.rmtree(run_dir, ignore_errors=True)
534 if cleanup:
535 _safe_delete_repo(hub_url, slug)
536
537 return times
538
539
540 def bench_clone(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]:
541 """Measure clone throughput against the persistent hub seed repo."""
542 print(f"\n HELLO WORLD bench_clone start hub={hub_url} size={size}", flush=True)
543 times: list[float] = []
544 slug = ensure_hub_seed(hub_url, hub_alias, size)
545 print(f"\n HELLO WORLD ensure_hub_seed done slug={slug}", flush=True)
546
547 for _ in range(runs):
548 clone_parent = Path(tempfile.mkdtemp(prefix="muse_bench_clone_"))
549 try:
550 ms, ok, err = timed_muse("clone", f"{hub_url}/{slug}", cwd=clone_parent)
551 if ok:
552 times.append(ms)
553 print(f" {ms:.0f}ms", end="", flush=True)
554 else:
555 print(f"\n ERROR: {err}", flush=True)
556 finally:
557 shutil.rmtree(clone_parent, ignore_errors=True)
558
559 return times
560
561
562 def _bench_fetch_or_pull(
563 verb: str,
564 hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool,
565 ) -> list[float]:
566 """Measure fetch/pull against the persistent hub seed.
567
568 User story:
569 1. Hub seed already exists (ensure_hub_seed).
570 2. Clone the seed — client is at the seeded state.
571 3. Add exactly 1 delta commit to a run-local copy and push it.
572 4. Measure fetch/pull from the clone (now 1 commit behind).
573
574 The delta is always 1 commit — we measure wire-protocol latency,
575 not local commit-creation overhead.
576 """
577 n_commits, _, file_size = SIZE_MATRIX[size]
578 times: list[float] = []
579 seed_slug = ensure_hub_seed(hub_url, hub_alias, size)
580
581 for run_i in range(runs):
582 # Per-run hub repo so delta pushes don't accumulate on the seed.
583 run_name = f"{BENCH_PREFIX}{verb}-{size}-{run_i}-{os.urandom(3).hex()}"
584 run_slug = create_repo(hub_url, run_name)
585
586 run_dir = Path(tempfile.mkdtemp(prefix=f"muse_bench_{verb}_src_"))
587 clone_parent = Path(tempfile.mkdtemp(prefix=f"muse_bench_{verb}_dst_"))
588 try:
589 _t = time.time
590 def _step(label: str, t0: float) -> float:
591 t1 = _t()
592 print(f"\n [step] {label}: {(t1-t0)*1000:.0f}ms", flush=True)
593 return t1
594
595 t0 = _t()
596 # Copy local seed → run dir, push to run hub repo.
597 local_seed = ensure_local_seed(size)
598 shutil.copytree(str(local_seed), str(run_dir / "repo"), symlinks=False)
599 run_repo = run_dir / "repo"
600 t0 = _step("copytree", t0)
601
602 muse("remote", "remove", "origin", cwd=run_repo) # ignore error if absent
603 muse_check("remote", "add", "origin",
604 f"{hub_url}/gabriel/{run_slug}", cwd=run_repo)
605 muse_check("push", "origin", "main", cwd=run_repo)
606 t0 = _step("seed push", t0)
607
608 # Large mpacks defer commit writes to a background job — wait until
609 # the server's commit graph is fully indexed before cloning.
610 _wait_indexed(hub_url, f"gabriel/{run_slug}", n_commits)
611 t0 = _step("wait_indexed seed", t0)
612
613 # Clone run repo — client is now at seeded state.
614 muse_check("clone", f"{hub_url}/gabriel/{run_slug}", cwd=clone_parent)
615 cloned = clone_parent / run_slug
616 t0 = _step("clone", t0)
617
618 # Add exactly 1 delta commit and push.
619 # Materialise the working tree first so the delta commit doesn't
620 # incorrectly delete the seed files that are absent from disk.
621 # --force discards the "pending deletions" muse sees for unwritten seed files.
622 muse_check("checkout", "--force", "main", cwd=run_repo)
623 (run_repo / f"delta_{run_i}.txt").write_bytes(bench_text(file_size, run_i, 0))
624 muse_check("code", "add", ".", cwd=run_repo)
625 muse_check("commit", "-m", f"delta {run_i}",
626 "--agent-id", "bench", "--model-id", "bench", cwd=run_repo)
627 muse_check("push", "origin", "main", cwd=run_repo)
628 t0 = _step("delta push", t0)
629
630 # Wait for the delta commit to be indexed (inline for small mpacks,
631 # but generation computation depends on seed commits being in commit_graph).
632 _wait_indexed(hub_url, f"gabriel/{run_slug}", n_commits + 1)
633 t0 = _step("wait_indexed delta", t0)
634
635 # Measure fetch/pull (client is 1 commit behind).
636 if verb == "fetch":
637 ms, ok, err = timed_muse("fetch", "origin", cwd=cloned)
638 else:
639 ms, ok, err = timed_muse("pull", "origin", "main", cwd=cloned)
640 _step(f"{verb} measurement", t0)
641
642 if ok:
643 times.append(ms)
644 print(f" {ms:.0f}ms", end="", flush=True)
645 else:
646 print(f"\n ERROR: {err}", flush=True)
647 finally:
648 shutil.rmtree(run_dir, ignore_errors=True)
649 shutil.rmtree(clone_parent, ignore_errors=True)
650 if cleanup:
651 _safe_delete_repo(hub_url, f"gabriel/{run_slug}")
652
653 return times
654
655
656 def bench_fetch(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]:
657 return _bench_fetch_or_pull("fetch", hub_url, hub_alias, size, runs, cleanup)
658
659
660 def bench_pull(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]:
661 return _bench_fetch_or_pull("pull", hub_url, hub_alias, size, runs, cleanup)
662
663
664
665
666 VERB_FNS = {
667 "push": bench_push,
668 "clone": bench_clone,
669 "fetch": bench_fetch,
670 "pull": bench_pull,
671 }
672
673
674 # ── output ────────────────────────────────────────────────────────────────────
675
676 def gate_str(verb: str, size: str, hub_alias: str, p50: float) -> str:
677 g = GATES.get((verb, size, hub_alias))
678 if g is None:
679 return "—"
680 return f"✓ <{int(g/1000)}s" if p50 <= g else f"✗ <{int(g/1000)}s"
681
682
683 def print_table(rows: list[tuple], size: str) -> None:
684 cols = ["verb", "size", "hub", "p50 (ms)", "p95 (ms)", "gate"]
685 widths = [6, 5, 12, 10, 10, 12]
686 sep = " "
687 print()
688 print(f"muse CLI bench — size={size.upper()}")
689 print("=" * (sum(widths) + len(sep) * (len(widths) - 1)))
690 print(sep.join(c.ljust(w) for c, w in zip(cols, widths)))
691 print(sep.join("-" * w for w in widths))
692 for verb, sz, hub_alias, times in rows:
693 if not times:
694 row = [verb, sz, hub_alias, "FAILED", "FAILED", "✗"]
695 else:
696 p50 = statistics.median(times)
697 t = sorted(times)
698 p95 = t[min(len(t) - 1, max(0, math.ceil(len(t) * 0.95) - 1))]
699 row = [verb, sz, hub_alias, f"{p50:.0f}", f"{p95:.0f}",
700 gate_str(verb, sz, hub_alias, p50)]
701 print(sep.join(s.ljust(w) for s, w in zip(row, widths)))
702 print()
703
704
705 def markdown_table(rows: list[tuple], size: str) -> str:
706 n_commits, fpc, fsz = SIZE_MATRIX[size]
707 mb = n_commits * fpc * fsz // 1024 // 1024
708 lines = [
709 f"### {size.upper()} ({n_commits} commits, {n_commits*fpc} files, ~{mb or '<1'} MB)",
710 "",
711 "| verb | hub | p50 (ms) | p95 (ms) | gate |",
712 "|------|-----|----------|----------|------|",
713 ]
714 for verb, sz, hub_alias, times in rows:
715 if not times:
716 lines.append(f"| {verb} | {hub_alias} | FAILED | FAILED | ✗ |")
717 else:
718 p50 = statistics.median(times)
719 t = sorted(times)
720 p95 = t[min(len(t) - 1, max(0, math.ceil(len(t) * 0.95) - 1))]
721 lines.append(f"| {verb} | {hub_alias} | **{p50:.0f}** | {p95:.0f} |"
722 f" {gate_str(verb, sz, hub_alias, p50)} |")
723 return "\n".join(lines)
724
725
726 # ── main ──────────────────────────────────────────────────────────────────────
727
728 def main() -> None:
729 parser = argparse.ArgumentParser()
730 parser.add_argument("--size", nargs="+", default=["xs"],
731 help="xs s m l xl all")
732 parser.add_argument("--hubs", nargs="+", default=["localhost", "staging"],
733 choices=["localhost", "staging"])
734 parser.add_argument("--verb", nargs="+", default=["push", "clone", "fetch", "pull"],
735 choices=["push", "clone", "fetch", "pull"])
736 parser.add_argument("--runs", type=int, default=1)
737 parser.add_argument("--no-cleanup", action="store_true")
738 parser.add_argument("--reseed", action="store_true",
739 help="Rebuild local seed cache even if valid")
740 parser.add_argument("--reseed-hub", action="store_true",
741 help="Re-push hub seed repos even if present")
742 args = parser.parse_args()
743
744 sizes = list(SIZE_MATRIX) if "all" in args.size else args.size
745 cleanup = not args.no_cleanup
746
747 print(f"muse CLI bench hubs={args.hubs} verbs={args.verb} "
748 f"sizes={sizes} runs={args.runs}")
749 print()
750
751 all_markdown: list[str] = []
752 total_start = time.perf_counter()
753
754 for size in sizes:
755 for hub_alias in args.hubs:
756 hub_url = HUB_URLS[hub_alias]
757 purge_stale(hub_url)
758
759 rows: list[tuple] = []
760 size_start = time.perf_counter()
761
762 for verb in args.verb:
763 for hub_alias in args.hubs:
764 hub_url = HUB_URLS[hub_alias]
765 verb_start = time.perf_counter()
766 print(f" {verb}/{size}/{hub_alias}…", end="", flush=True)
767 try:
768 times = VERB_FNS[verb](hub_url, hub_alias, size, args.runs, cleanup)
769 except Exception as exc:
770 print(f"\n ERROR: {exc}", flush=True)
771 times = []
772 verb_elapsed = (time.perf_counter() - verb_start) * 1000
773 # Inline verb summary: p50 + gate + total wall time for this verb
774 if times:
775 p50 = statistics.median(times)
776 g = gate_str(verb, size, hub_alias, p50)
777 print(f" → p50={p50:.0f}ms {g} (verb wall={verb_elapsed:.0f}ms)")
778 else:
779 print(f" → FAILED (verb wall={verb_elapsed:.0f}ms)")
780 rows.append((verb, size, hub_alias, times))
781
782 size_elapsed = (time.perf_counter() - size_start) * 1000
783 print_table(rows, size)
784 print(f" size={size.upper()} total: {size_elapsed:.0f}ms")
785 all_markdown.append(markdown_table(rows, size))
786
787 total_elapsed = (time.perf_counter() - total_start) * 1000
788 print(f"\n ── overall: {total_elapsed:.0f}ms ──\n")
789
790 if all_markdown:
791 print("Markdown (copy to issue comment):")
792 print("\n\n".join(all_markdown))
793
794
795 if __name__ == "__main__":
796 main()
File History 2 commits
sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7 fix: repair syntax errors from typing annotation cleanup Sonnet 4.6 20 days ago
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 20 days ago