gabriel / muse public

test_cmd_verify_shallow.py file-level

at sha256:d · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:4 Merge branch 'dev' into main · gabriel · Jun 17, 2026
1 """Integration tests for shallow-object-store + promisor-remote architecture.
2
3 Core semantics
4 --------------
5 A Muse repo's local object store is legitimately *shallow*: it may not hold
6 every historical object blob. Missing objects are not automatically failures.
7 Their status depends on what is known about the remote:
8
9 PRESENT β†’ object file exists locally β†’ verified (checked)
10 PROMISED β†’ absent but a promisor remote exists β†’ not a failure; counted
11 MISSING β†’ absent AND no promisor remote at all β†’ kind="object" failure
12
13 Shallow graft semantics
14 -----------------------
15 `.muse/shallow` lists the deepest commits included in local history. The BFS
16 walk in run_verify stops at these commits β€” it does NOT enqueue their parents.
17 Objects beyond the graft boundary are not expected locally.
18
19 strict mode
20 -----------
21 `run_verify(strict=True)` treats every absent object as a failure, regardless
22 of promisor remotes. Use this when you need to prove complete local integrity.
23
24 Coverage
25 --------
26 U β€” unit: VerifyResult has promised_objects, shallow_commits, is_shallow,
27 promisor_remotes fields
28 E β€” promisor: missing objects with promisor β†’ not failures
29 missing objects without promisor β†’ failures
30 F β€” strict: strict=True fails on promised objects
31 S β€” shallow: BFS stops at graft boundary; parents beyond not checked
32 C β€” CLI: --strict flag; JSON output includes new fields
33 I β€” integration: real repo layout, multi-branch, orphan sweep
34 """
35
36 from __future__ import annotations
37 from collections.abc import Mapping
38
39 import datetime
40 import json
41 import pathlib
42 import threading
43
44 import pytest
45 from tests.cli_test_helper import CliRunner, InvokeResult
46
47 from muse.core.types import blob_id, long_id, fake_id
48 from muse.core.object_store import object_path, write_object
49 from muse.core.shallow import add_shallow, write_shallow
50 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
51 from muse.core.commits import (
52 CommitRecord,
53 write_commit,
54 )
55 from muse.core.snapshots import (
56 SnapshotRecord,
57 write_snapshot,
58 )
59 from muse.core.verify import run_verify
60 from muse.core.paths import muse_dir, ref_path
61
62 runner = CliRunner()
63 _REPO_ID = "shallow-verify-test"
64
65
66 # ---------------------------------------------------------------------------
67 # Helpers
68 # ---------------------------------------------------------------------------
69
70
71
72 def _init_repo(
73 path: pathlib.Path,
74 remotes: Mapping[str, object] | None = None,
75 ) -> pathlib.Path:
76 dot_muse = muse_dir(path)
77 for d in ("commits", "snapshots", "objects", "refs/heads"):
78 (dot_muse / d).mkdir(parents=True, exist_ok=True)
79 (dot_muse / "HEAD").write_text("ref: refs/heads/main")
80 (dot_muse / "repo.json").write_text(
81 json.dumps({"repo_id": _REPO_ID, "domain": "code"})
82 )
83 if remotes:
84 lines = []
85 for name, cfg in remotes.items():
86 lines.append(f"[remotes.{name}]")
87 lines.append(f'url = "{cfg["url"]}"')
88 if "promisor" in cfg:
89 val = "true" if cfg["promisor"] else "false"
90 lines.append(f"promisor = {val}")
91 (dot_muse / "config.toml").write_text("\n".join(lines) + "\n")
92 return path
93
94
95 def _make_commit(
96 root: pathlib.Path,
97 parent_id: str | None = None,
98 content: bytes = b"data",
99 branch: str = "main",
100 idx: int = 0,
101 write_objects: bool = True,
102 ) -> tuple[str, str]:
103 """Create a commit and return (commit_id, obj_id).
104
105 When write_objects=False, the object is NOT written to the store β€”
106 simulating a shallow gap.
107 """
108 raw = content + str(idx).encode()
109 obj_id = blob_id(raw)
110 if write_objects:
111 write_object(root, obj_id, raw)
112 manifest = {f"file_{idx}.txt": obj_id}
113 snap_id = compute_snapshot_id(manifest)
114 if write_objects:
115 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
116 else:
117 # Write the snapshot record even for shallow commits so the commit
118 # can be read back, but omit the object file.
119 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
120 committed_at = (
121 datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
122 + datetime.timedelta(hours=idx)
123 )
124 parent_ids = [parent_id] if parent_id else []
125 commit_id = compute_commit_id(
126 parent_ids, snap_id, f"commit {idx}", committed_at.isoformat(),
127 )
128 write_commit(
129 root,
130 CommitRecord(
131 commit_id=commit_id,
132 branch=branch,
133 snapshot_id=snap_id,
134 message=f"commit {idx}",
135 committed_at=committed_at,
136 parent_commit_id=parent_id,
137 ),
138 )
139 (ref_path(root, branch)).write_text(commit_id)
140 return commit_id, obj_id
141
142
143 def _env(repo: pathlib.Path) -> Mapping[str, str]:
144 return {"MUSE_REPO_ROOT": str(repo)}
145
146
147 def _invoke(repo: pathlib.Path, *args: str) -> InvokeResult:
148 from muse.cli.app import main as cli_main
149 return runner.invoke(cli_main, ["verify", *args], env=_env(repo))
150
151
152 # ---------------------------------------------------------------------------
153 # U β€” VerifyResult shape: new fields present
154 # ---------------------------------------------------------------------------
155
156 class TestVerifyResultShape:
157 def test_promised_objects_field_present(self, tmp_path: pathlib.Path) -> None:
158 repo = _init_repo(tmp_path)
159 result = run_verify(repo)
160 assert "promised_objects" in result
161
162 def test_shallow_commits_field_present(self, tmp_path: pathlib.Path) -> None:
163 repo = _init_repo(tmp_path)
164 result = run_verify(repo)
165 assert "shallow_commits" in result
166
167 def test_is_shallow_field_present(self, tmp_path: pathlib.Path) -> None:
168 repo = _init_repo(tmp_path)
169 result = run_verify(repo)
170 assert "is_shallow" in result
171
172 def test_promisor_remotes_field_present(self, tmp_path: pathlib.Path) -> None:
173 repo = _init_repo(tmp_path)
174 result = run_verify(repo)
175 assert "promisor_remotes" in result
176
177 def test_promised_objects_zero_for_clean_repo(self, tmp_path: pathlib.Path) -> None:
178 repo = _init_repo(tmp_path)
179 _make_commit(repo, idx=0)
180 result = run_verify(repo)
181 assert result["promised_objects"] == 0
182
183 def test_is_shallow_false_without_shallow_file(self, tmp_path: pathlib.Path) -> None:
184 repo = _init_repo(tmp_path)
185 result = run_verify(repo)
186 assert result["is_shallow"] is False
187
188 def test_promisor_remotes_empty_without_config(self, tmp_path: pathlib.Path) -> None:
189 repo = _init_repo(tmp_path)
190 result = run_verify(repo)
191 assert result["promisor_remotes"] == []
192
193
194 # ---------------------------------------------------------------------------
195 # E β€” Promisor semantics: PROMISED β‰  failure
196 # ---------------------------------------------------------------------------
197
198 class TestPromisorSemantics:
199 def test_missing_object_with_promisor_not_a_failure(
200 self, tmp_path: pathlib.Path
201 ) -> None:
202 repo = _init_repo(tmp_path, remotes={
203 "local": {"url": "https://localhost:1337/gabriel/muse"},
204 })
205 # Write commit + snapshot but NOT the object β€” shallow gap
206 _make_commit(repo, idx=0, write_objects=False)
207 result = run_verify(repo)
208 assert result["all_ok"] is True
209 assert result["promised_objects"] >= 1
210 assert result["failures"] == []
211
212 def test_missing_object_without_promisor_is_failure(
213 self, tmp_path: pathlib.Path
214 ) -> None:
215 repo = _init_repo(tmp_path) # no remotes
216 _make_commit(repo, idx=0, write_objects=False)
217 result = run_verify(repo)
218 assert result["all_ok"] is False
219 assert any(f["kind"] == "object" for f in result["failures"])
220
221 def test_promised_objects_counted_correctly(
222 self, tmp_path: pathlib.Path
223 ) -> None:
224 repo = _init_repo(tmp_path, remotes={
225 "local": {"url": "https://localhost:1337/gabriel/muse"},
226 })
227 # 3 commits, each with a missing object
228 prev: str | None = None
229 for i in range(3):
230 prev, _ = _make_commit(repo, parent_id=prev, idx=i, write_objects=False)
231 result = run_verify(repo)
232 assert result["promised_objects"] == 3
233 assert result["all_ok"] is True
234
235 def test_present_objects_not_counted_as_promised(
236 self, tmp_path: pathlib.Path
237 ) -> None:
238 repo = _init_repo(tmp_path, remotes={
239 "local": {"url": "https://localhost:1337/gabriel/muse"},
240 })
241 _make_commit(repo, idx=0, write_objects=True) # object IS present
242 result = run_verify(repo)
243 assert result["promised_objects"] == 0
244
245 def test_promisor_false_opt_out_causes_failure(
246 self, tmp_path: pathlib.Path
247 ) -> None:
248 repo = _init_repo(tmp_path, remotes={
249 "mirror": {"url": "http://mirror.example.com/muse", "promisor": False},
250 })
251 _make_commit(repo, idx=0, write_objects=False)
252 result = run_verify(repo)
253 assert result["all_ok"] is False
254 assert result["promised_objects"] == 0
255
256 def test_promisor_remotes_listed_in_result(
257 self, tmp_path: pathlib.Path
258 ) -> None:
259 repo = _init_repo(tmp_path, remotes={
260 "local": {"url": "https://localhost:1337/gabriel/muse"},
261 "staging": {"url": "https://staging.musehub.ai/gabriel/muse"},
262 })
263 result = run_verify(repo)
264 assert "local" in result["promisor_remotes"]
265 assert "staging" in result["promisor_remotes"]
266
267 def test_mixed_present_and_promised(self, tmp_path: pathlib.Path) -> None:
268 repo = _init_repo(tmp_path, remotes={
269 "local": {"url": "https://localhost:1337/gabriel/muse"},
270 })
271 _make_commit(repo, idx=0, write_objects=True) # PRESENT
272 prev, _ = _make_commit(repo, parent_id=None, idx=1, write_objects=False) # PROMISED
273 # update ref to idx=1
274 result = run_verify(repo)
275 assert result["all_ok"] is True
276 assert result["objects_checked"] >= 1 # idx=0 present and checked
277 assert result["promised_objects"] >= 1 # idx=1 promised
278
279
280 # ---------------------------------------------------------------------------
281 # F β€” strict mode: promised objects become failures
282 # ---------------------------------------------------------------------------
283
284 class TestStrictMode:
285 def test_strict_fails_on_promised_object(self, tmp_path: pathlib.Path) -> None:
286 repo = _init_repo(tmp_path, remotes={
287 "local": {"url": "https://localhost:1337/gabriel/muse"},
288 })
289 _make_commit(repo, idx=0, write_objects=False)
290 result = run_verify(repo, strict=True)
291 assert result["all_ok"] is False
292 assert any(f["kind"] == "object" for f in result["failures"])
293
294 def test_strict_does_not_change_result_for_present_objects(
295 self, tmp_path: pathlib.Path
296 ) -> None:
297 repo = _init_repo(tmp_path, remotes={
298 "local": {"url": "https://localhost:1337/gabriel/muse"},
299 })
300 _make_commit(repo, idx=0, write_objects=True)
301 result = run_verify(repo, strict=True)
302 assert result["all_ok"] is True
303 assert result["promised_objects"] == 0
304
305 def test_strict_promised_objects_still_zero_in_strict(
306 self, tmp_path: pathlib.Path
307 ) -> None:
308 repo = _init_repo(tmp_path, remotes={
309 "local": {"url": "https://localhost:1337/gabriel/muse"},
310 })
311 _make_commit(repo, idx=0, write_objects=False)
312 result = run_verify(repo, strict=True)
313 # In strict mode, absent objects go to failures, not promised_objects
314 assert result["promised_objects"] == 0
315
316 def test_strict_fail_fast(self, tmp_path: pathlib.Path) -> None:
317 repo = _init_repo(tmp_path, remotes={
318 "local": {"url": "https://localhost:1337/gabriel/muse"},
319 })
320 prev: str | None = None
321 for i in range(5):
322 prev, _ = _make_commit(repo, parent_id=prev, idx=i, write_objects=False)
323 result = run_verify(repo, strict=True, fail_fast=True)
324 assert result["all_ok"] is False
325 assert len(result["failures"]) == 1
326
327
328 # ---------------------------------------------------------------------------
329 # S β€” shallow graft: BFS stops at boundary
330 # ---------------------------------------------------------------------------
331
332 class TestShallowGraft:
333 def test_is_shallow_true_when_shallow_file_exists(
334 self, tmp_path: pathlib.Path
335 ) -> None:
336 repo = _init_repo(tmp_path)
337 cid, _ = _make_commit(repo, idx=0)
338 add_shallow(repo, cid)
339 result = run_verify(repo)
340 assert result["is_shallow"] is True
341
342 def test_shallow_commits_counted(self, tmp_path: pathlib.Path) -> None:
343 repo = _init_repo(tmp_path)
344 cid, _ = _make_commit(repo, idx=0)
345 add_shallow(repo, cid)
346 result = run_verify(repo)
347 assert result["shallow_commits"] >= 1
348
349 def test_parents_beyond_graft_not_checked(self, tmp_path: pathlib.Path) -> None:
350 """Commit chain: old β†’ graft β†’ new.
351 The graft is in .muse/shallow. The old commit's objects are not in
352 the local store. Verify must NOT report the old commit's objects as
353 missing β€” they're beyond the graft boundary.
354 """
355 repo = _init_repo(tmp_path) # no remotes β€” would fail if walked past graft
356
357 # old commit: object NOT in store
358 old_cid, old_obj_id = _make_commit(repo, idx=0, write_objects=False)
359
360 # graft commit: parents=old, object IS in store
361 graft_cid, _ = _make_commit(repo, parent_id=old_cid, idx=1, write_objects=True)
362 add_shallow(repo, graft_cid)
363
364 # current tip: parent=graft, object IS in store
365 tip_cid, _ = _make_commit(repo, parent_id=graft_cid, idx=2, write_objects=True)
366
367 result = run_verify(repo)
368 # No failures: old commit's objects are beyond the graft, not checked
369 assert result["all_ok"] is True, f"Unexpected failures: {result['failures']}"
370
371 def test_graft_objects_themselves_are_checked(self, tmp_path: pathlib.Path) -> None:
372 """The graft commit's own objects ARE expected locally."""
373 repo = _init_repo(tmp_path)
374 cid, obj_id = _make_commit(repo, idx=0, write_objects=True)
375 add_shallow(repo, cid)
376 result = run_verify(repo)
377 assert result["all_ok"] is True
378 assert result["objects_checked"] >= 1
379
380 def test_multiple_grafts(self, tmp_path: pathlib.Path) -> None:
381 # Two grafts on separate branches so both are reachable from branch refs.
382 repo = _init_repo(tmp_path)
383 cid_a, _ = _make_commit(repo, idx=0, branch="main", write_objects=True)
384 cid_b, _ = _make_commit(repo, idx=1, branch="dev", write_objects=True)
385 write_shallow(repo, {cid_a, cid_b})
386 result = run_verify(repo)
387 assert result["shallow_commits"] >= 2
388 assert result["is_shallow"] is True
389
390
391 # ---------------------------------------------------------------------------
392 # C β€” CLI surface
393 # ---------------------------------------------------------------------------
394
395 class TestCLIShallow:
396 def test_json_has_promised_objects(self, tmp_path: pathlib.Path) -> None:
397 repo = _init_repo(tmp_path)
398 _make_commit(repo, idx=0)
399 d = json.loads(_invoke(repo, "--json").output)
400 assert "promised_objects" in d
401
402 def test_json_has_shallow_commits(self, tmp_path: pathlib.Path) -> None:
403 repo = _init_repo(tmp_path)
404 _make_commit(repo, idx=0)
405 d = json.loads(_invoke(repo, "--json").output)
406 assert "shallow_commits" in d
407
408 def test_json_has_is_shallow(self, tmp_path: pathlib.Path) -> None:
409 repo = _init_repo(tmp_path)
410 _make_commit(repo, idx=0)
411 d = json.loads(_invoke(repo, "--json").output)
412 assert "is_shallow" in d
413
414 def test_json_has_promisor_remotes(self, tmp_path: pathlib.Path) -> None:
415 repo = _init_repo(tmp_path)
416 _make_commit(repo, idx=0)
417 d = json.loads(_invoke(repo, "--json").output)
418 assert "promisor_remotes" in d
419
420 def test_strict_flag_exists(self, tmp_path: pathlib.Path) -> None:
421 repo = _init_repo(tmp_path)
422 _make_commit(repo, idx=0)
423 r = _invoke(repo, "--strict", "--json")
424 # Just check it doesn't error on unknown flag
425 assert r.exit_code in (0, 1) # 0=ok 1=failures
426
427 def test_strict_fails_on_promised_via_cli(self, tmp_path: pathlib.Path) -> None:
428 repo = _init_repo(tmp_path, remotes={
429 "local": {"url": "https://localhost:1337/gabriel/muse"},
430 })
431 _make_commit(repo, idx=0, write_objects=False)
432 # Without --strict: ok
433 r_default = _invoke(repo, "--json")
434 d_default = json.loads(r_default.output)
435 assert d_default["all_ok"] is True
436 # With --strict: failure
437 r_strict = _invoke(repo, "--strict", "--json")
438 assert r_strict.exit_code == 1
439 d_strict = json.loads(r_strict.output)
440 assert d_strict["all_ok"] is False
441
442 def test_is_shallow_true_in_json_when_shallow_file(
443 self, tmp_path: pathlib.Path
444 ) -> None:
445 repo = _init_repo(tmp_path)
446 cid, _ = _make_commit(repo, idx=0)
447 add_shallow(repo, cid)
448 d = json.loads(_invoke(repo, "--json").output)
449 assert d["is_shallow"] is True
450
451 def test_promisor_remotes_listed_in_json(self, tmp_path: pathlib.Path) -> None:
452 repo = _init_repo(tmp_path, remotes={
453 "local": {"url": "https://localhost:1337/gabriel/muse"},
454 })
455 _make_commit(repo, idx=0)
456 d = json.loads(_invoke(repo, "--json").output)
457 assert "local" in d["promisor_remotes"]
458
459
460 # ---------------------------------------------------------------------------
461 # I β€” Integration: realistic scenario
462 # ---------------------------------------------------------------------------
463
464 class TestIntegration:
465 def test_clean_repo_no_remotes_all_ok(self, tmp_path: pathlib.Path) -> None:
466 repo = _init_repo(tmp_path)
467 prev: str | None = None
468 for i in range(5):
469 prev, _ = _make_commit(repo, parent_id=prev, idx=i)
470 result = run_verify(repo)
471 assert result["all_ok"] is True
472 assert result["promised_objects"] == 0
473
474 def test_shallow_repo_with_promisor_all_ok(self, tmp_path: pathlib.Path) -> None:
475 """Simulate a normal agent repo: recent objects present, history shallow."""
476 repo = _init_repo(tmp_path, remotes={
477 "local": {"url": "https://localhost:1337/gabriel/muse"},
478 })
479 # "old" history: objects not local (shallow gap)
480 prev: str | None = None
481 for i in range(10):
482 prev, _ = _make_commit(repo, parent_id=prev, idx=i, write_objects=False)
483 graft = prev
484 add_shallow(repo, graft)
485 # "recent" history: objects local
486 for i in range(10, 15):
487 prev, _ = _make_commit(repo, parent_id=prev, idx=i, write_objects=True)
488 result = run_verify(repo)
489 assert result["all_ok"] is True
490 assert result["is_shallow"] is True
491 # The graft commit's own objects are verified (they may be absent/promised).
492 # Its ancestors' snapshots are collected during the graft walk and skipped
493 # by the orphan sweep β€” so only the graft's own missing object counts.
494 assert result["promised_objects"] <= 1 # at most the graft's own object
495 assert result["objects_checked"] >= 5 # recent objects verified
496
497 def test_orphan_snapshot_with_missing_object_and_promisor(
498 self, tmp_path: pathlib.Path
499 ) -> None:
500 repo = _init_repo(tmp_path, remotes={
501 "local": {"url": "https://localhost:1337/gabriel/muse"},
502 })
503 # Orphan snapshot (no branch ref) with missing object
504 obj_id = fake_id("orphan-obj-f")
505 manifest = {"orphan.py": obj_id}
506 snap_id = compute_snapshot_id(manifest)
507 write_snapshot(repo, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
508 result = run_verify(repo)
509 assert result["all_ok"] is True
510 assert result["promised_objects"] >= 1
511
512 def test_concurrent_reads_stable(self, tmp_path: pathlib.Path) -> None:
513 repo = _init_repo(tmp_path, remotes={
514 "local": {"url": "https://localhost:1337/gabriel/muse"},
515 })
516 prev: str | None = None
517 for i in range(5):
518 prev, _ = _make_commit(repo, parent_id=prev, idx=i)
519
520 results: list[dict] = []
521 errors: list[Exception] = []
522 lock = threading.Lock()
523
524 def _read() -> None:
525 try:
526 r = _invoke(repo, "--json")
527 d = json.loads(r.output)
528 with lock:
529 results.append(d)
530 except Exception as exc:
531 with lock:
532 errors.append(exc)
533
534 threads = [threading.Thread(target=_read) for _ in range(8)]
535 for t in threads:
536 t.start()
537 for t in threads:
538 t.join()
539
540 assert errors == []
541 assert len(results) == 8
542 assert all(d["all_ok"] is True for d in results)