tests/bench_cli.py · gabriel/musehub

bench_cli.py python

796 lines 32.2 KB

sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7 fix: repair syntax errors from typing annotation cleanup Sonnet 4.6 20 days ago

1	"""MuseWire CLI benchmark — times actual muse CLI commands against localhost and staging.
2
3	Every operation uses the muse CLI. No HTTP clients. No internal imports.
4
5	Usage:
6	python3 tests/bench_cli.py --size xs
7	python3 tests/bench_cli.py --size xs s m --hubs localhost staging
8	python3 tests/bench_cli.py --size all --runs 3
9	"""
10	from __future__ import annotations
11
12	import argparse
13	import itertools
14	import json
15	import math
16	import os
17	import re
18	import shutil
19	import statistics
20	import subprocess
21	import sys
22	import tempfile
23	import time
24	from pathlib import Path
25
26	from mnemonic import Mnemonic
27
28	import datetime
29
30	from muse.core import transport, mpack
31	from muse.core.object_store import write_object, _created_object_shards
32	from muse.core.paths import muse_dir, server_objects_dir # noqa: F401
33	from muse.core.snapshot import compute_commit_id, compute_snapshot_id
34	from muse.core.commits import CommitRecord, write_commit
35	from muse.core.refs import write_branch_ref
36	from muse.core.snapshots import SnapshotRecord, write_snapshot
37	from muse.core.types import blob_id, hash_file
38	import musehub.services.musehub_wire as musehub_wire
39
40	REPO_ROOT = Path(__file__).parent.parent
41	LOCALHOST = "https://localhost:1337"
42	STAGING = "https://staging.musehub.ai"
43	HUB_URLS = {"localhost": LOCALHOST, "staging": STAGING}
44	BENCH_PREFIX = "bench-"
45	SEED_PREFIX = "bench-seed-" # persistent — never auto-purged
46	CACHE_DIR = Path.home() / ".cache" / "muse_bench"
47
48	# Repos that must never be deleted under any circumstances.
49	_PERMANENT_REPOS: frozenset[str] = frozenset({
50	"muse", "musehub", "agentception", "contracts",
51	"stori", "Stori", "maestro", "muse-zsh", "identity",
52	})
53
54	# Exact pattern a transient bench repo name must match before deletion is allowed.
55	# Covers all historical naming conventions:
56	# bench-push-xs-0-abc123 (current: integer run index)
57	# bench-push-xs-p2-abc123 (old: letter-prefixed run index)
58	# bench-fetch-xs-abc123 (old: no run index, hex only)
59	# bench-clone-xs-debug-abc123 (ad-hoc debug runs)
60	# bench-reftest-abc123 (ref-test repos)
61	# Also covers ad-hoc debug/diag repos created during debugging sessions.
62	_TRANSIENT_RE = re.compile(
63	r"^bench-(push\|clone\|fetch\|pull)-[a-z]+-([a-z]*\d+\|debug)-[0-9a-f]{6,}$"
64	r"\|^bench-(push\|clone\|fetch\|pull)-[a-z]+-[0-9a-f]{6,}$"
65	r"\|^bench-reftest-[0-9a-f]{6,}$"
66	r"\|^(dbg\|diag)\d*-[0-9a-f]{6,}$"
67	)
68
69	# commits, files_per_commit, file_size_bytes
70	SIZE_MATRIX = {
71	"xs": (1, 1, 4_096),
72	"s": (10, 5, 4_096),
73	"m": (100, 5, 4_096),
74	"l": (1_000, 5, 4_096),
75	"xl": (2_000, 5, 4_096),
76	}
77
78	# gates in ms — None means no gate (expected to be slow / CF ceiling)
79	GATES: dict[tuple[str, str, str], float \| None] = {
80	("push", "xs", "localhost"): 2_000,
81	("push", "s", "localhost"): 5_000,
82	("push", "m", "localhost"): 15_000,
83	("push", "l", "localhost"): 15_000,
84	("push", "xl", "localhost"): None,
85	("clone", "xs", "localhost"): 2_000,
86	("clone", "s", "localhost"): 5_000,
87	("clone", "m", "localhost"): 15_000,
88	("clone", "l", "localhost"): 15_000,
89	("clone", "xl", "localhost"): None,
90	("fetch", "xs", "localhost"): 2_000,
91	("fetch", "s", "localhost"): 5_000,
92	("fetch", "m", "localhost"): 15_000,
93	("fetch", "l", "localhost"): 15_000,
94	("fetch", "xl", "localhost"): None,
95	("pull", "xs", "localhost"): 2_000,
96	("pull", "s", "localhost"): 5_000,
97	("pull", "m", "localhost"): 15_000,
98	("pull", "l", "localhost"): 15_000,
99	("pull", "xl", "localhost"): None,
100	("push", "xs", "staging"): 5_000,
101	("push", "s", "staging"): 10_000,
102	("push", "m", "staging"): 20_000,
103	("push", "l", "staging"): 30_000,
104	("push", "xl", "staging"): None,
105	("clone", "xs", "staging"): 5_000,
106	("clone", "s", "staging"): 10_000,
107	("clone", "m", "staging"): 20_000,
108	("clone", "l", "staging"): 30_000,
109	("clone", "xl", "staging"): None,
110	("fetch", "xs", "staging"): 5_000,
111	("fetch", "s", "staging"): 10_000,
112	("fetch", "m", "staging"): 20_000,
113	("fetch", "l", "staging"): 30_000,
114	("fetch", "xl", "staging"): None,
115	("pull", "xs", "staging"): 5_000,
116	("pull", "s", "staging"): 10_000,
117	("pull", "m", "staging"): 20_000,
118	("pull", "l", "staging"): 30_000,
119	("pull", "xl", "staging"): None,
120	}
121
122
123	# Source files whose content determines wire protocol correctness.
124	# Resolved from the actual loaded modules — robust regardless of install path.
125	_WIRE_SOURCES: list[Path] = [
126	Path(transport.__file__),
127	Path(mpack.__file__),
128	Path(musehub_wire.__file__),
129	]
130
131
132	def wire_hash() -> str:
133	"""Return a hex digest of all wire protocol source files.
134
135	Changes to pack.py, transport.py, mpack.py (client) or musehub_wire.py
136	(server) produce a new hash, automatically invalidating stale seed caches.
137	Uses muse.core.types.hash_file — the same content-addressing primitive
138	used throughout the rest of the ecosystem.
139	"""
140	combined = b"".join(
141	hash_file(p).encode()
142	for p in _WIRE_SOURCES
143	if p.exists()
144	)
145	return blob_id(combined).split(":")[-1][:16]
146
147
148	# ── muse wrappers ─────────────────────────────────────────────────────────────
149
150	def muse(*args: str, cwd: Path, timeout: int = 300) -> subprocess.CompletedProcess:
151	return subprocess.run(
152	["muse"] + list(args),
153	cwd=str(cwd), capture_output=True, text=True, timeout=timeout,
154	)
155
156
157	def muse_check(*args: str, cwd: Path, timeout: int = 300) -> str:
158	r = muse(*args, cwd=cwd, timeout=timeout)
159	if r.returncode != 0:
160	raise RuntimeError(f"muse {' '.join(args)} failed:\n{r.stderr[:400]}")
161	return r.stdout
162
163
164	def timed_muse(*args: str, cwd: Path, timeout: int = 600) -> tuple[float, bool, str]:
165	"""Returns (elapsed_ms, success, error_snippet)."""
166	t0 = time.perf_counter()
167	r = muse(*args, cwd=cwd, timeout=timeout)
168	ms = (time.perf_counter() - t0) * 1000
169	if r.stderr.strip():
170	for line in r.stderr.strip().splitlines():
171	print(f"\n [muse-log] {line}", flush=True)
172	return ms, r.returncode == 0, r.stderr[:400] if r.returncode != 0 else ""
173
174
175	def _wait_indexed(hub_url: str, slug: str, n_commits: int, *, timeout: int = 600) -> None:
176	"""Block until the hub has HEAD accessible via clone+read.
177
178	Verifies with `muse read --json` (HEAD only) — avoids serializing thousands
179	of commits with `muse log` which hangs on large repos.
180	"""
181	deadline = time.time() + timeout
182	attempt = 0
183	t_start = time.time()
184	tmp = Path(tempfile.mkdtemp(prefix="muse_probe_"))
185	name = slug.split("/")[-1]
186	clone_dir = tmp / name
187	cloned = False
188	try:
189	while time.time() < deadline:
190	attempt += 1
191	if not cloned:
192	t_clone0 = time.time()
193	r = muse("clone", f"{hub_url}/{slug}", cwd=tmp, timeout=300)
194	clone_ms = (time.time() - t_clone0) * 1000
195	if r.stderr.strip():
196	for line in r.stderr.strip().splitlines():
197	print(f"\n [clone-log] {line}", flush=True)
198	if r.returncode != 0:
199	print(f"\n [_wait_indexed] attempt={attempt} clone FAILED in {clone_ms:.0f}ms: {r.stderr[-200:]}", flush=True)
200	time.sleep(2)
201	continue
202	cloned = True
203	else:
204	t_fetch0 = time.time()
205	fr = muse("fetch", "origin", cwd=clone_dir, timeout=300)
206	clone_ms = (time.time() - t_fetch0) * 1000
207	if fr.stderr.strip():
208	for line in fr.stderr.strip().splitlines():
209	print(f"\n [fetch-log] {line}", flush=True)
210
211	t_read0 = time.time()
212	read_r = muse("read", "--json", cwd=clone_dir)
213	read_ms = (time.time() - t_read0) * 1000
214	elapsed = time.time() - t_start
215	if read_r.returncode != 0:
216	print(f"\n [_wait_indexed] attempt={attempt} clone={clone_ms:.0f}ms read FAILED in {read_ms:.0f}ms: {read_r.stderr[:120]}", flush=True)
217	else:
218	try:
219	commit_id = json.loads(read_r.stdout).get("commit_id", "")
220	print(f"\n [_wait_indexed] attempt={attempt} elapsed={elapsed:.1f}s clone={clone_ms:.0f}ms read={read_ms:.0f}ms commit={commit_id[:16]}", flush=True)
221	if commit_id:
222	return
223	except (ValueError, KeyError) as exc:
224	print(f"\n [_wait_indexed] attempt={attempt} JSON parse error: {exc}", flush=True)
225	time.sleep(2)
226	finally:
227	shutil.rmtree(tmp, ignore_errors=True)
228	raise TimeoutError(f"{slug}: HEAD not indexed within {timeout}s")
229
230
231	# ── repo lifecycle (muse CLI only) ────────────────────────────────────────────
232
233	def create_repo(hub_url: str, name: str) -> str:
234	"""Create a bench repo on hub. Returns slug."""
235	assert name.startswith(BENCH_PREFIX)
236	out = muse_check(
237	"hub", "repo", "create", "--name", name, "--visibility", "public",
238	"--no-init", "--hub", hub_url, "--json",
239	cwd=REPO_ROOT,
240	)
241	return json.loads(out)["slug"]
242
243
244	def _safe_delete_repo(hub_url: str, slug: str) -> None:
245	"""Delete a hub repo — only if it passes both independent safety guards.
246
247	Guard 1 — permanent blocklist: repo name must not be in _PERMANENT_REPOS.
248	Guard 2 — exact pattern: name must match _TRANSIENT_RE.
249
250	Both must pass. Any failure raises AssertionError before any network call.
251	"""
252	name = slug.split("/")[-1]
253	assert name.lower() not in {r.lower() for r in _PERMANENT_REPOS}, (
254	f"SAFETY: refusing to delete permanent repo '{slug}'"
255	)
256	assert _TRANSIENT_RE.match(name), (
257	f"SAFETY: refusing to delete repo whose name doesn't match transient pattern: '{slug}'"
258	)
259	r = muse("hub", "repo", "delete", slug, "--yes", "--hub", hub_url, "--json", cwd=REPO_ROOT)
260	if r.returncode != 0 and "404" not in r.stderr:
261	# Warn but don't raise — stale repos are purged at the next run's start.
262	# Deletion can fail transiently if a background job is still writing to the repo.
263	print(f"\n WARN: repo delete {slug} failed (will be purged next run): {r.stderr[:200]}", flush=True)
264
265
266	def purge_stale(hub_url: str) -> None:
267	out = muse_check("hub", "repo", "list", "--limit", "200", "--hub", hub_url, "--json",
268	cwd=REPO_ROOT)
269	repos = json.loads(out).get("repos", [])
270	stale = [r for r in repos if _TRANSIENT_RE.match(r["name"])]
271	if stale:
272	print(f" purging {len(stale)} stale bench repo(s) on {hub_url}…")
273	for r in stale:
274	_safe_delete_repo(hub_url, r["slug"])
275
276
277	# ── local repo population ─────────────────────────────────────────────────────
278
279	# BIP39 English wordlist — the same list used to back up your muse identity
280	# mnemonic. Each bench file is a unique deterministic slice, formatted as verse.
281	BIP39_WORDS: tuple[str, ...] = tuple(Mnemonic('english').wordlist)
282
283	def bench_text(size: int, commit: int, file: int) -> bytes:
284	"""Generate a deterministic BIP39 verse of exactly size bytes.
285
286	Each file is a unique slice of the BIP39 wordlist — the same list used
287	to back up your muse identity mnemonic. Four words per line, six lines
288	per stanza, blank line between stanzas.
289	"""
290	header = f"# muse bench commit={commit} file={file}\n\n"
291	offset = (commit * 17 + file * 7) % len(BIP39_WORDS)
292	words = itertools.islice(itertools.cycle(BIP39_WORDS[offset:] + BIP39_WORDS[:offset]), size)
293	buf = [header]
294	total = len(header.encode())
295	col = 0
296	row = 0
297	for word in words:
298	chunk = word + (" " if col < 3 else "\n")
299	buf.append(chunk)
300	total += len(chunk.encode())
301	col = (col + 1) % 4
302	if col == 0:
303	row += 1
304	if row % 6 == 0:
305	buf.append("\n")
306	total += 1
307	if total >= size:
308	break
309	return "".join(buf)[:size].encode()
310
311
312	def make_local_repo(n_commits: int, files_per_commit: int, file_size: int) -> Path:
313	"""Create a tmpdir with a muse repo populated with n_commits commits."""
314	tmpdir = Path(tempfile.mkdtemp(prefix="muse_bench_"))
315	muse_check("init", cwd=tmpdir)
316
317	for ci in range(n_commits):
318	for fi in range(files_per_commit):
319	(tmpdir / f"f{ci}_{fi}.txt").write_bytes(bench_text(file_size, ci, fi))
320	muse_check("code", "add", ".", cwd=tmpdir)
321	muse_check("commit", "-m", f"bench commit {ci}",
322	"--agent-id", "bench", "--model-id", "bench",
323	cwd=tmpdir)
324
325	return tmpdir
326
327
328	# ── persistent seed helpers (Phase 2–3 implementation) ───────────────────────
329
330	def ensure_local_seed(size: str, *, reseed: bool = False) -> Path:
331	"""Return path to a cached local muse repo seeded for size.
332
333	Cache lives at CACHE_DIR/{size}/. Metadata is verified on every hit;
334	stale or missing metadata triggers a full rebuild. reseed=True forces
335	a rebuild even when metadata is valid.
336	"""
337	n_commits, files_per_commit, file_size = SIZE_MATRIX[size]
338	seed_dir = CACHE_DIR / size
339	meta_path = seed_dir / "cache_meta.json"
340
341	def _valid_cache() -> bool:
342	if not seed_dir.exists() or not meta_path.exists():
343	return False
344	try:
345	meta = json.loads(meta_path.read_text())
346	return (
347	meta.get("n_commits") == n_commits and
348	meta.get("files_per_commit") == files_per_commit and
349	meta.get("file_size") == file_size and
350	meta.get("wire_hash") == wire_hash()
351	)
352	except Exception:
353	return False
354
355	if not reseed and _valid_cache():
356	return seed_dir
357
358	# Build (or rebuild) the seeded repo in-process — no subprocess per commit.
359	if seed_dir.exists():
360	# Purge any stale shard-cache entries for this seed_dir before deleting
361	# it. _created_object_shards is a module-level set in object_store that
362	# skips mkdir on subsequent writes to the same shard path. Without this
363	# purge, a reseed would rmtree the directory but leave stale entries in
364	# the set, causing write_object to skip mkdir and fail with ENOENT.
365	stale_prefix = str(seed_dir) + "/"
366	stale = {s for s in _created_object_shards if s.startswith(stale_prefix)}
367	_created_object_shards.difference_update(stale)
368	shutil.rmtree(seed_dir)
369	seed_dir.mkdir(parents=True)
370
371	dot = muse_dir(seed_dir)
372	dot.mkdir()
373	repo_id = blob_id(f"bench-seed-{size}".encode())
374	(dot / "repo.json").write_text(json.dumps({"repo_id": repo_id, "owner": "gabriel"}))
375	for d in ("commits", "snapshots", "objects"):
376	(dot / d).mkdir()
377	(dot / "refs" / "heads").mkdir(parents=True)
378	(dot / "HEAD").write_text("ref: refs/heads/main\n")
379	(dot / "config.toml").write_text("")
380
381	ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
382	parent: str \| None = None
383	tip = ""
384
385	for ci in range(n_commits):
386	blobs: dict[str, str] = {}
387	for fi in range(files_per_commit):
388	data = bench_text(file_size, ci, fi)
389	oid = blob_id(data)
390	write_object(seed_dir, oid, data)
391	blobs[f"f{ci}_{fi}.txt"] = oid
392
393	sid = compute_snapshot_id(blobs)
394	write_snapshot(seed_dir, SnapshotRecord(snapshot_id=sid, manifest=blobs))
395
396	cid = compute_commit_id(
397	parent_ids=[parent] if parent else [],
398	snapshot_id=sid,
399	message=f"bench commit {ci}",
400	committed_at_iso=ts.isoformat(),
401	author="gabriel",
402	)
403	write_commit(seed_dir, CommitRecord(
404	commit_id=cid,
405	branch="main",
406	snapshot_id=sid,
407	message=f"bench commit {ci}",
408	committed_at=ts,
409	parent_commit_id=parent,
410	parent2_commit_id=None,
411	author="gabriel",
412	metadata={},
413	structured_delta=None,
414	sem_ver_bump="none",
415	breaking_changes=[],
416	agent_id="bench",
417	model_id="bench",
418	toolchain_id="",
419	prompt_hash="",
420	signature="",
421	signer_key_id="",
422	))
423	parent = cid
424	tip = cid
425	ts = ts + datetime.timedelta(seconds=1)
426
427	write_branch_ref(seed_dir, "main", tip)
428
429	meta_path.write_text(json.dumps({
430	"n_commits": n_commits,
431	"files_per_commit": files_per_commit,
432	"file_size": file_size,
433	"wire_hash": wire_hash(),
434	}))
435	return seed_dir
436
437
438	# In-process cache: once a hub seed is confirmed valid, skip all checks for the rest of the run.
439	_hub_seed_cache: dict[tuple[str, str], str] = {}
440
441
442	def ensure_hub_seed(hub_url: str, hub_alias: str, size: str, *, reseed: bool = False) -> str:
443	"""Ensure bench-seed-{size} exists on hub; return slug gabriel/bench-seed-{size}.
444
445	Checks hub repo list first — pushes only when the repo is absent (or
446	reseed=True). Never deletes the existing repo; reseed re-pushes on top.
447	"""
448	name = f"{SEED_PREFIX}{size}"
449	slug = f"gabriel/{name}"
450
451	cache_key = (hub_url, size)
452	if not reseed and cache_key in _hub_seed_cache:
453	return _hub_seed_cache[cache_key]
454
455	current_hash = wire_hash()
456
457	out = muse_check("hub", "repo", "list", "--limit", "200",
458	"--hub", hub_url, "--json", cwd=REPO_ROOT)
459	repos = json.loads(out).get("repos", [])
460	match = next((r for r in repos if r["name"] == name), None)
461
462	if not reseed and match is not None:
463	# Validate wire_hash stored in repo description.
464	desc = match.get("description", "")
465	stored_hash = ""
466	for part in desc.split():
467	if part.startswith("wire_hash="):
468	stored_hash = part.split("=", 1)[1]
469	if stored_hash == current_hash:
470	# Also verify the push completed — a repo created via wizard but
471	# never pushed has head_commit_id == the init placeholder.
472	head_commit = match.get("head_commit_id") or match.get("head_commit") or ""
473	if not head_commit:
474	print(f" head_commit missing — rebuilding hub seed {name}…")
475	else:
476	_hub_seed_cache[cache_key] = slug
477	return slug
478	# Hash mismatch — delete stale seed and rebuild.
479	print(f" wire_hash changed — rebuilding hub seed {name}…")
480	muse_check("hub", "repo", "delete", slug, "--yes",
481	"--hub", hub_url, "--json", cwd=REPO_ROOT)
482
483	# Repo absent (or stale/reseed) — create it, then push local seed.
484	if match is None or not reseed:
485	pass # already deleted above if stale
486	muse_check("hub", "repo", "create", "--name", name,
487	"--description", f"wire_hash={current_hash}",
488	"--visibility", "public", "--no-init", "--hub", hub_url, "--json",
489	cwd=REPO_ROOT)
490
491	seed_dir = ensure_local_seed(size)
492	# Always reset origin so stale tracking refs never cause a silent no-op push.
493	# bench_push uses the same remove+add pattern on its per-run copies.
494	muse("remote", "remove", "origin", cwd=seed_dir) # ignore error if absent
495	muse_check("remote", "add", "origin", f"{hub_url}/{slug}", cwd=seed_dir)
496	muse_check("push", "origin", "main", cwd=seed_dir)
497	n_commits, _, _ = SIZE_MATRIX[size]
498	_wait_indexed(hub_url, slug, n_commits)
499	_hub_seed_cache[cache_key] = slug
500	return slug
501
502
503	# ── verb benchmarks ───────────────────────────────────────────────────────────
504
505	def bench_push(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]:
506	"""Measure push throughput using the cached local seed repo.
507
508	Creates a fresh hub repo per run (fast, ~100ms) and pushes the cached
509	local seed into it. No commit creation overhead after the first run.
510	"""
511	_, _, file_size = SIZE_MATRIX[size]
512	times: list[float] = []
513	local = ensure_local_seed(size)
514
515	for run_i in range(runs):
516	name = f"{BENCH_PREFIX}push-{size}-{run_i}-{os.urandom(3).hex()}"
517	slug = create_repo(hub_url, name)
518	# Wire a fresh remote for this run's hub repo.
519	run_dir = Path(tempfile.mkdtemp(prefix="muse_bench_push_"))
520	try:
521	# Work from a copy so the cached seed's remote stays clean.
522	shutil.copytree(str(local), str(run_dir / "repo"), symlinks=False)
523	run_repo = run_dir / "repo"
524	muse("remote", "remove", "origin", cwd=run_repo) # ignore error if absent
525	muse_check("remote", "add", "origin", f"{hub_url}/gabriel/{slug}", cwd=run_repo)
526	ms, ok, err = timed_muse("push", "origin", "main", cwd=run_repo)
527	if ok:
528	times.append(ms)
529	print(f" {ms:.0f}ms", end="", flush=True)
530	else:
531	print(f"\n ERROR: {err}", flush=True)
532	finally:
533	shutil.rmtree(run_dir, ignore_errors=True)
534	if cleanup:
535	_safe_delete_repo(hub_url, slug)
536
537	return times
538
539
540	def bench_clone(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]:
541	"""Measure clone throughput against the persistent hub seed repo."""
542	print(f"\n HELLO WORLD bench_clone start hub={hub_url} size={size}", flush=True)
543	times: list[float] = []
544	slug = ensure_hub_seed(hub_url, hub_alias, size)
545	print(f"\n HELLO WORLD ensure_hub_seed done slug={slug}", flush=True)
546
547	for _ in range(runs):
548	clone_parent = Path(tempfile.mkdtemp(prefix="muse_bench_clone_"))
549	try:
550	ms, ok, err = timed_muse("clone", f"{hub_url}/{slug}", cwd=clone_parent)
551	if ok:
552	times.append(ms)
553	print(f" {ms:.0f}ms", end="", flush=True)
554	else:
555	print(f"\n ERROR: {err}", flush=True)
556	finally:
557	shutil.rmtree(clone_parent, ignore_errors=True)
558
559	return times
560
561
562	def _bench_fetch_or_pull(
563	verb: str,
564	hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool,
565	) -> list[float]:
566	"""Measure fetch/pull against the persistent hub seed.
567
568	User story:
569	1. Hub seed already exists (ensure_hub_seed).
570	2. Clone the seed — client is at the seeded state.
571	3. Add exactly 1 delta commit to a run-local copy and push it.
572	4. Measure fetch/pull from the clone (now 1 commit behind).
573
574	The delta is always 1 commit — we measure wire-protocol latency,
575	not local commit-creation overhead.
576	"""
577	n_commits, _, file_size = SIZE_MATRIX[size]
578	times: list[float] = []
579	seed_slug = ensure_hub_seed(hub_url, hub_alias, size)
580
581	for run_i in range(runs):
582	# Per-run hub repo so delta pushes don't accumulate on the seed.
583	run_name = f"{BENCH_PREFIX}{verb}-{size}-{run_i}-{os.urandom(3).hex()}"
584	run_slug = create_repo(hub_url, run_name)
585
586	run_dir = Path(tempfile.mkdtemp(prefix=f"muse_bench_{verb}_src_"))
587	clone_parent = Path(tempfile.mkdtemp(prefix=f"muse_bench_{verb}_dst_"))
588	try:
589	_t = time.time
590	def _step(label: str, t0: float) -> float:
591	t1 = _t()
592	print(f"\n [step] {label}: {(t1-t0)*1000:.0f}ms", flush=True)
593	return t1
594
595	t0 = _t()
596	# Copy local seed → run dir, push to run hub repo.
597	local_seed = ensure_local_seed(size)
598	shutil.copytree(str(local_seed), str(run_dir / "repo"), symlinks=False)
599	run_repo = run_dir / "repo"
600	t0 = _step("copytree", t0)
601
602	muse("remote", "remove", "origin", cwd=run_repo) # ignore error if absent
603	muse_check("remote", "add", "origin",
604	f"{hub_url}/gabriel/{run_slug}", cwd=run_repo)
605	muse_check("push", "origin", "main", cwd=run_repo)
606	t0 = _step("seed push", t0)
607
608	# Large mpacks defer commit writes to a background job — wait until
609	# the server's commit graph is fully indexed before cloning.
610	_wait_indexed(hub_url, f"gabriel/{run_slug}", n_commits)
611	t0 = _step("wait_indexed seed", t0)
612
613	# Clone run repo — client is now at seeded state.
614	muse_check("clone", f"{hub_url}/gabriel/{run_slug}", cwd=clone_parent)
615	cloned = clone_parent / run_slug
616	t0 = _step("clone", t0)
617
618	# Add exactly 1 delta commit and push.
619	# Materialise the working tree first so the delta commit doesn't
620	# incorrectly delete the seed files that are absent from disk.
621	# --force discards the "pending deletions" muse sees for unwritten seed files.
622	muse_check("checkout", "--force", "main", cwd=run_repo)
623	(run_repo / f"delta_{run_i}.txt").write_bytes(bench_text(file_size, run_i, 0))
624	muse_check("code", "add", ".", cwd=run_repo)
625	muse_check("commit", "-m", f"delta {run_i}",
626	"--agent-id", "bench", "--model-id", "bench", cwd=run_repo)
627	muse_check("push", "origin", "main", cwd=run_repo)
628	t0 = _step("delta push", t0)
629
630	# Wait for the delta commit to be indexed (inline for small mpacks,
631	# but generation computation depends on seed commits being in commit_graph).
632	_wait_indexed(hub_url, f"gabriel/{run_slug}", n_commits + 1)
633	t0 = _step("wait_indexed delta", t0)
634
635	# Measure fetch/pull (client is 1 commit behind).
636	if verb == "fetch":
637	ms, ok, err = timed_muse("fetch", "origin", cwd=cloned)
638	else:
639	ms, ok, err = timed_muse("pull", "origin", "main", cwd=cloned)
640	_step(f"{verb} measurement", t0)
641
642	if ok:
643	times.append(ms)
644	print(f" {ms:.0f}ms", end="", flush=True)
645	else:
646	print(f"\n ERROR: {err}", flush=True)
647	finally:
648	shutil.rmtree(run_dir, ignore_errors=True)
649	shutil.rmtree(clone_parent, ignore_errors=True)
650	if cleanup:
651	_safe_delete_repo(hub_url, f"gabriel/{run_slug}")
652
653	return times
654
655
656	def bench_fetch(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]:
657	return _bench_fetch_or_pull("fetch", hub_url, hub_alias, size, runs, cleanup)
658
659
660	def bench_pull(hub_url: str, hub_alias: str, size: str, runs: int, cleanup: bool) -> list[float]:
661	return _bench_fetch_or_pull("pull", hub_url, hub_alias, size, runs, cleanup)
662
663
664
665
666	VERB_FNS = {
667	"push": bench_push,
668	"clone": bench_clone,
669	"fetch": bench_fetch,
670	"pull": bench_pull,
671	}
672
673
674	# ── output ────────────────────────────────────────────────────────────────────
675
676	def gate_str(verb: str, size: str, hub_alias: str, p50: float) -> str:
677	g = GATES.get((verb, size, hub_alias))
678	if g is None:
679	return "—"
680	return f"✓ <{int(g/1000)}s" if p50 <= g else f"✗ <{int(g/1000)}s"
681
682
683	def print_table(rows: list[tuple], size: str) -> None:
684	cols = ["verb", "size", "hub", "p50 (ms)", "p95 (ms)", "gate"]
685	widths = [6, 5, 12, 10, 10, 12]
686	sep = " "
687	print()
688	print(f"muse CLI bench — size={size.upper()}")
689	print("=" * (sum(widths) + len(sep) * (len(widths) - 1)))
690	print(sep.join(c.ljust(w) for c, w in zip(cols, widths)))
691	print(sep.join("-" * w for w in widths))
692	for verb, sz, hub_alias, times in rows:
693	if not times:
694	row = [verb, sz, hub_alias, "FAILED", "FAILED", "✗"]
695	else:
696	p50 = statistics.median(times)
697	t = sorted(times)
698	p95 = t[min(len(t) - 1, max(0, math.ceil(len(t) * 0.95) - 1))]
699	row = [verb, sz, hub_alias, f"{p50:.0f}", f"{p95:.0f}",
700	gate_str(verb, sz, hub_alias, p50)]
701	print(sep.join(s.ljust(w) for s, w in zip(row, widths)))
702	print()
703
704
705	def markdown_table(rows: list[tuple], size: str) -> str:
706	n_commits, fpc, fsz = SIZE_MATRIX[size]
707	mb = n_commits * fpc * fsz // 1024 // 1024
708	lines = [
709	f"### {size.upper()} ({n_commits} commits, {n_commits*fpc} files, ~{mb or '<1'} MB)",
710	"",
711	"\| verb \| hub \| p50 (ms) \| p95 (ms) \| gate \|",
712	"\|------\|-----\|----------\|----------\|------\|",
713	]
714	for verb, sz, hub_alias, times in rows:
715	if not times:
716	lines.append(f"\| {verb} \| {hub_alias} \| FAILED \| FAILED \| ✗ \|")
717	else:
718	p50 = statistics.median(times)
719	t = sorted(times)
720	p95 = t[min(len(t) - 1, max(0, math.ceil(len(t) * 0.95) - 1))]
721	lines.append(f"\| {verb} \| {hub_alias} \| {p50:.0f} \| {p95:.0f} \|"
722	f" {gate_str(verb, sz, hub_alias, p50)} \|")
723	return "\n".join(lines)
724
725
726	# ── main ──────────────────────────────────────────────────────────────────────
727
728	def main() -> None:
729	parser = argparse.ArgumentParser()
730	parser.add_argument("--size", nargs="+", default=["xs"],
731	help="xs s m l xl all")
732	parser.add_argument("--hubs", nargs="+", default=["localhost", "staging"],
733	choices=["localhost", "staging"])
734	parser.add_argument("--verb", nargs="+", default=["push", "clone", "fetch", "pull"],
735	choices=["push", "clone", "fetch", "pull"])
736	parser.add_argument("--runs", type=int, default=1)
737	parser.add_argument("--no-cleanup", action="store_true")
738	parser.add_argument("--reseed", action="store_true",
739	help="Rebuild local seed cache even if valid")
740	parser.add_argument("--reseed-hub", action="store_true",
741	help="Re-push hub seed repos even if present")
742	args = parser.parse_args()
743
744	sizes = list(SIZE_MATRIX) if "all" in args.size else args.size
745	cleanup = not args.no_cleanup
746
747	print(f"muse CLI bench hubs={args.hubs} verbs={args.verb} "
748	f"sizes={sizes} runs={args.runs}")
749	print()
750
751	all_markdown: list[str] = []
752	total_start = time.perf_counter()
753
754	for size in sizes:
755	for hub_alias in args.hubs:
756	hub_url = HUB_URLS[hub_alias]
757	purge_stale(hub_url)
758
759	rows: list[tuple] = []
760	size_start = time.perf_counter()
761
762	for verb in args.verb:
763	for hub_alias in args.hubs:
764	hub_url = HUB_URLS[hub_alias]
765	verb_start = time.perf_counter()
766	print(f" {verb}/{size}/{hub_alias}…", end="", flush=True)
767	try:
768	times = VERB_FNS[verb](hub_url, hub_alias, size, args.runs, cleanup)
769	except Exception as exc:
770	print(f"\n ERROR: {exc}", flush=True)
771	times = []
772	verb_elapsed = (time.perf_counter() - verb_start) * 1000
773	# Inline verb summary: p50 + gate + total wall time for this verb
774	if times:
775	p50 = statistics.median(times)
776	g = gate_str(verb, size, hub_alias, p50)
777	print(f" → p50={p50:.0f}ms {g} (verb wall={verb_elapsed:.0f}ms)")
778	else:
779	print(f" → FAILED (verb wall={verb_elapsed:.0f}ms)")
780	rows.append((verb, size, hub_alias, times))
781
782	size_elapsed = (time.perf_counter() - size_start) * 1000
783	print_table(rows, size)
784	print(f" size={size.upper()} total: {size_elapsed:.0f}ms")
785	all_markdown.append(markdown_table(rows, size))
786
787	total_elapsed = (time.perf_counter() - total_start) * 1000
788	print(f"\n ── overall: {total_elapsed:.0f}ms ──\n")
789
790	if all_markdown:
791	print("Markdown (copy to issue comment):")
792	print("\n\n".join(all_markdown))
793
794
795	if __name__ == "__main__":
796	main()

File History 2 commits

sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7 fix: repair syntax errors from typing annotation cleanup Sonnet 4.6 20 days ago

sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ 20 days ago

patch bench_cli.py 2× 20 days ago

insert bench_clone 2× 20 days ago

insert bench_fetch 2× 20 days ago

insert _bench_fetch_or_pull 2× 20 days ago

insert BENCH_PREFIX 2× 20 days ago

insert bench_pull 2× 20 days ago

insert bench_push 2× 20 days ago

insert bench_text 2× 20 days ago

insert BIP39_WORDS 2× 20 days ago

insert CACHE_DIR 2× 20 days ago

insert create_repo 2× 20 days ago

insert ensure_hub_seed 2× 20 days ago

insert ensure_local_seed 2× 20 days ago

insert GATES 2× 20 days ago

insert gate_str 2× 20 days ago

insert _hub_seed_cache 2× 20 days ago

insert HUB_URLS 2× 20 days ago

insert annotations 2× 20 days ago

insert argparse 2× 20 days ago

insert blob_id 2× 20 days ago

insert CommitRecord 2× 20 days ago

insert compute_commit_id 2× 20 days ago

insert compute_snapshot_id 2× 20 days ago

insert _created_object_shards 2× 20 days ago

insert datetime 2× 20 days ago

insert hash_file 2× 20 days ago

insert itertools 2× 20 days ago

insert json 2× 20 days ago

insert math 2× 20 days ago

insert Mnemonic 2× 20 days ago

insert mpack 2× 20 days ago

insert muse_dir 2× 20 days ago

insert musehub_wire 2× 20 days ago

insert os 2× 20 days ago

insert Path 2× 20 days ago

insert re 2× 20 days ago

insert server_objects_dir 2× 20 days ago

insert shutil 2× 20 days ago

insert SnapshotRecord 2× 20 days ago

insert statistics 2× 20 days ago

insert subprocess 2× 20 days ago

insert sys 2× 20 days ago

insert tempfile 2× 20 days ago

insert time 2× 20 days ago

insert transport 2× 20 days ago

insert write_branch_ref 2× 20 days ago

insert write_commit 2× 20 days ago

insert write_object 2× 20 days ago

insert write_snapshot 2× 20 days ago

insert LOCALHOST 2× 20 days ago

Pathtests/bench_cli.py

Lines796

Size32.2 KB

LangPython

Refsha256:3ff9c9863a9891bdcde71b4a43228f66d0493e38b7cc1d09fe9eb7de774046b2

Object ID

sha256:f4b52e231604ed2d8c8cafa8cb4a54afc5963cafbfc96d5efb7c8e74409fdc1e…

Last commit

sha256:4992098130166d191cefed0a2821d19cd3cdd3cf50867a4e715c2b30636826c7

fix: repair syntax errors from typing annotation …

20 days ago

Quick links

Blame History