gabriel / muse public
test_restore_supercharge.py python
530 lines 19.6 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """Supercharge tests for ``muse restore`` — performance, data integrity,
2 object-store corruption, concurrency, and source+staged combos.
3
4 Coverage tiers added here:
5 - Performance: duration_ms present, non-negative, and reasonable
6 - Data integrity: complete JSON schema, correct types, exit_code field
7 - Error mapping: object store corruption → exit code 3 (INTERNAL_ERROR)
8 - Concurrent: two threads restore independent files without racing
9 - Source+staged: --source --staged restores stage entry from source commit
10 - Text summary: text output includes "Restored N" summary line
11 - Docstring gap: _resolve_source_manifest returns {} for bad ref (not raises)
12 """
13
14 from __future__ import annotations
15 from collections.abc import Mapping
16
17 import json
18 import pathlib
19 import threading
20 import time
21 import datetime
22 import pytest
23
24 from tests.cli_test_helper import CliRunner, InvokeResult
25
26 from muse.core.object_store import write_object
27 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
28 from muse.core.commits import (
29 CommitRecord,
30 write_commit,
31 )
32 from muse.core.snapshots import (
33 SnapshotRecord,
34 write_snapshot,
35 )
36 from muse.core.types import Manifest, blob_id
37 from muse.plugins.code.stage import StagedFileMap, make_entry, read_stage, write_stage
38 from muse.core.paths import heads_dir, muse_dir, ref_path
39
40 runner = CliRunner()
41
42 _REPO_ID = "restore-supercharge-test"
43 _counter = 1000 # offset to avoid collisions with test_cmd_restore.py
44
45
46
47
48 def _init_repo(path: pathlib.Path) -> pathlib.Path:
49 muse = muse_dir(path)
50 for d in ("commits", "snapshots", "objects", "refs/heads", "code"):
51 (muse / d).mkdir(parents=True, exist_ok=True)
52 (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
53 (muse / "repo.json").write_text(
54 json.dumps({"repo_id": _REPO_ID, "domain": "code"}), encoding="utf-8"
55 )
56 return path
57
58
59 def _env(repo: pathlib.Path) -> Mapping[str, str]:
60 return {"MUSE_REPO_ROOT": str(repo)}
61
62
63 def _commit_files(root: pathlib.Path, files: Mapping[str, bytes], branch: str = "main") -> str:
64 global _counter
65 _counter += 1
66 manifest: Manifest = {}
67 for rel_path, content in files.items():
68 obj_id = blob_id(content)
69 write_object(root, obj_id, content)
70 manifest[rel_path] = obj_id
71 abs_path = root / rel_path
72 abs_path.parent.mkdir(parents=True, exist_ok=True)
73 abs_path.write_bytes(content)
74 snap_id = compute_snapshot_id(manifest)
75 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
76 committed_at = datetime.datetime.now(datetime.timezone.utc)
77 commit_id = compute_commit_id(
78 parent_ids=[],
79 snapshot_id=snap_id,
80 message=f"commit {_counter}",
81 committed_at_iso=committed_at.isoformat(),
82 )
83 write_commit(
84 root,
85 CommitRecord(
86 commit_id=commit_id,
87 branch=branch,
88 snapshot_id=snap_id,
89 message=f"commit {_counter}",
90 committed_at=committed_at,
91 ),
92 )
93 (ref_path(root, branch)).write_text(commit_id, encoding="utf-8")
94 return commit_id
95
96
97 def _invoke(repo: pathlib.Path, *args: str) -> InvokeResult:
98 from muse.cli.app import main as cli
99 return runner.invoke(cli, ["restore", *args], env=_env(repo))
100
101
102 # ---------------------------------------------------------------------------
103 # Performance tier
104 # ---------------------------------------------------------------------------
105
106
107 def test_restore_json_has_duration_ms(tmp_path: pathlib.Path) -> None:
108 """JSON output must include 'duration_ms' as a non-negative float."""
109 root = _init_repo(tmp_path)
110 _commit_files(root, {"a.py": b"# orig\n"})
111 (root / "a.py").write_bytes(b"# dirty\n")
112
113 result = _invoke(root, "--json", "a.py")
114 assert result.exit_code == 0
115 data = json.loads(result.stdout)
116 assert "duration_ms" in data, "JSON must include 'duration_ms'"
117 assert isinstance(data["duration_ms"], (int, float)), "duration_ms must be numeric"
118 assert data["duration_ms"] >= 0, "duration_ms must be non-negative"
119
120
121 def test_restore_duration_ms_is_reasonable(tmp_path: pathlib.Path) -> None:
122 """duration_ms for a single-file restore should be well under 5 seconds."""
123 root = _init_repo(tmp_path)
124 _commit_files(root, {"a.py": b"# orig\n"})
125 (root / "a.py").write_bytes(b"# dirty\n")
126
127 result = _invoke(root, "--json", "a.py")
128 assert result.exit_code == 0
129 data = json.loads(result.stdout)
130 assert data["duration_ms"] < 5_000, f"duration_ms={data['duration_ms']} is suspiciously large"
131
132
133 def test_restore_dry_run_json_has_duration_ms(tmp_path: pathlib.Path) -> None:
134 """duration_ms must be present even in dry-run mode."""
135 root = _init_repo(tmp_path)
136 _commit_files(root, {"a.py": b"# orig\n"})
137 (root / "a.py").write_bytes(b"# dirty\n")
138
139 result = _invoke(root, "--dry-run", "--json", "a.py")
140 assert result.exit_code == 0
141 data = json.loads(result.stdout)
142 assert "duration_ms" in data
143
144
145 # ---------------------------------------------------------------------------
146 # Data integrity tier
147 # ---------------------------------------------------------------------------
148
149
150 def test_restore_json_schema_complete_on_success(tmp_path: pathlib.Path) -> None:
151 """All required JSON fields are present with correct types on success."""
152 root = _init_repo(tmp_path)
153 _commit_files(root, {"s.py": b"# orig\n"})
154 (root / "s.py").write_bytes(b"# dirty\n")
155
156 result = _invoke(root, "--json", "s.py")
157 assert result.exit_code == 0
158 data = json.loads(result.stdout)
159
160 assert isinstance(data["restored"], list)
161 assert isinstance(data["not_found"], list)
162 assert isinstance(data["dry_run"], bool)
163 assert isinstance(data["staged"], bool)
164 assert isinstance(data["worktree"], bool)
165 assert isinstance(data["duration_ms"], (int, float))
166 assert isinstance(data["exit_code"], int)
167
168
169 def test_restore_json_exit_code_zero_on_success(tmp_path: pathlib.Path) -> None:
170 """exit_code in JSON is 0 when all files are restored successfully."""
171 root = _init_repo(tmp_path)
172 _commit_files(root, {"ok.py": b"# orig\n"})
173 (root / "ok.py").write_bytes(b"# dirty\n")
174
175 result = _invoke(root, "--json", "ok.py")
176 assert result.exit_code == 0
177 data = json.loads(result.stdout)
178 assert data["exit_code"] == 0
179
180
181 def test_restore_json_exit_code_one_when_file_not_found(tmp_path: pathlib.Path) -> None:
182 """exit_code in JSON is 1 (USER_ERROR) when a file is not in source."""
183 root = _init_repo(tmp_path)
184 _commit_files(root, {"anchor.py": b"# anchor\n"})
185
186 result = _invoke(root, "--json", "ghost.py")
187 assert result.exit_code != 0
188 data = json.loads(result.stdout)
189 assert data["exit_code"] == 1
190
191
192 def test_restore_json_restored_list_correct(tmp_path: pathlib.Path) -> None:
193 """restored list contains exactly the successfully restored paths."""
194 root = _init_repo(tmp_path)
195 _commit_files(root, {"x.py": b"# x\n", "y.py": b"# y\n"})
196 (root / "x.py").write_bytes(b"# dirty x\n")
197 (root / "y.py").write_bytes(b"# dirty y\n")
198
199 result = _invoke(root, "--json", "x.py", "y.py")
200 data = json.loads(result.stdout)
201 assert sorted(data["restored"]) == ["x.py", "y.py"]
202 assert data["not_found"] == []
203
204
205 def test_restore_json_not_found_list_correct(tmp_path: pathlib.Path) -> None:
206 """not_found list contains paths that were absent from the source manifest."""
207 root = _init_repo(tmp_path)
208 _commit_files(root, {"real.py": b"# real\n"})
209 (root / "real.py").write_bytes(b"# dirty\n")
210
211 result = _invoke(root, "--json", "real.py", "ghost.py")
212 data = json.loads(result.stdout)
213 assert "real.py" in data["restored"]
214 assert "ghost.py" in data["not_found"]
215
216
217 def test_restore_json_staged_and_worktree_flags_reflect_args(tmp_path: pathlib.Path) -> None:
218 """staged/worktree fields in JSON reflect the CLI flags used."""
219 root = _init_repo(tmp_path)
220 _commit_files(root, {"f.py": b"# orig\n"})
221 obj_id = blob_id(b"# mod\n")
222 write_object(root, obj_id, b"# mod\n")
223 stage: StagedFileMap = {"f.py": make_entry(obj_id, "M")}
224 write_stage(root, stage)
225
226 result = _invoke(root, "--staged", "--worktree", "--json", "f.py")
227 data = json.loads(result.stdout)
228 assert data["staged"] is True
229 assert data["worktree"] is True
230
231
232 # ---------------------------------------------------------------------------
233 # Error mapping — object store corruption → exit code 3
234 # ---------------------------------------------------------------------------
235
236
237 def test_restore_missing_object_exits_3(tmp_path: pathlib.Path) -> None:
238 """When an object_id is in the manifest but missing from the store, exit code must be 3."""
239 root = _init_repo(tmp_path)
240 content = b"# original\n"
241 obj_id = blob_id(content)
242
243 # Build a manifest pointing at an object that is NOT in the store.
244 # We write the commit but deliberately don't call write_object.
245 manifest: Manifest = {"corrupt.py": obj_id}
246 snap_id = compute_snapshot_id(manifest)
247 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
248 committed_at = datetime.datetime.now(datetime.timezone.utc)
249 global _counter
250 _counter += 1
251 commit_id = compute_commit_id( parent_ids=[],
252 snapshot_id=snap_id,
253 message=f"corrupt commit {_counter}",
254 committed_at_iso=committed_at.isoformat(),
255 )
256 write_commit(
257 root,
258 CommitRecord(
259 commit_id=commit_id,
260 branch="main",
261 snapshot_id=snap_id,
262 message=f"corrupt commit {_counter}",
263 committed_at=committed_at,
264 ),
265 )
266 (heads_dir(root) / "main").write_text(commit_id, encoding="utf-8")
267 # Create the file on disk so path resolution doesn't fail
268 (root / "corrupt.py").write_bytes(b"# dirty\n")
269
270 result = _invoke(root, "corrupt.py")
271 assert result.exit_code == 3, (
272 f"Expected exit code 3 (INTERNAL_ERROR) for missing object, got {result.exit_code}"
273 )
274
275
276 def test_restore_missing_object_json_exit_code_3(tmp_path: pathlib.Path) -> None:
277 """JSON exit_code is 3 when the object is missing from the store."""
278 root = _init_repo(tmp_path)
279 content = b"# original\n"
280 obj_id = blob_id(content)
281
282 manifest: Manifest = {"corrupt2.py": obj_id}
283 snap_id = compute_snapshot_id(manifest)
284 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
285 committed_at = datetime.datetime.now(datetime.timezone.utc)
286 global _counter
287 _counter += 1
288 commit_id = compute_commit_id( parent_ids=[],
289 snapshot_id=snap_id,
290 message=f"corrupt2 {_counter}",
291 committed_at_iso=committed_at.isoformat(),
292 )
293 write_commit(
294 root,
295 CommitRecord(
296 commit_id=commit_id,
297 branch="main",
298 snapshot_id=snap_id,
299 message=f"corrupt2 {_counter}",
300 committed_at=committed_at,
301 ),
302 )
303 (heads_dir(root) / "main").write_text(commit_id, encoding="utf-8")
304 (root / "corrupt2.py").write_bytes(b"# dirty\n")
305
306 result = _invoke(root, "--json", "corrupt2.py")
307 assert result.exit_code == 3
308 data = json.loads(result.stdout)
309 assert data["exit_code"] == 3
310
311
312 # ---------------------------------------------------------------------------
313 # Concurrent restore
314 # ---------------------------------------------------------------------------
315
316
317 def test_restore_concurrent_independent_files(tmp_path: pathlib.Path) -> None:
318 """Two threads restore independent files without racing or corrupting each other."""
319 root = _init_repo(tmp_path)
320 original_a = b"# thread-a original\n"
321 original_b = b"# thread-b original\n"
322 _commit_files(root, {"ta.py": original_a, "tb.py": original_b})
323 (root / "ta.py").write_bytes(b"# dirty a\n")
324 (root / "tb.py").write_bytes(b"# dirty b\n")
325
326 errors: list[Exception] = []
327
328 def restore_a() -> None:
329 try:
330 result = _invoke(root, "ta.py")
331 assert result.exit_code == 0, f"thread-a exit {result.exit_code}"
332 except Exception as exc:
333 errors.append(exc)
334
335 def restore_b() -> None:
336 try:
337 result = _invoke(root, "tb.py")
338 assert result.exit_code == 0, f"thread-b exit {result.exit_code}"
339 except Exception as exc:
340 errors.append(exc)
341
342 t1 = threading.Thread(target=restore_a)
343 t2 = threading.Thread(target=restore_b)
344 t1.start()
345 t2.start()
346 t1.join(timeout=10)
347 t2.join(timeout=10)
348
349 assert not errors, f"Concurrent restore errors: {errors}"
350 assert (root / "ta.py").read_bytes() == original_a
351 assert (root / "tb.py").read_bytes() == original_b
352
353
354 # ---------------------------------------------------------------------------
355 # --source --staged combo
356 # ---------------------------------------------------------------------------
357
358
359 def test_restore_source_and_staged_clears_stage_from_source(tmp_path: pathlib.Path) -> None:
360 """--source <ref> --staged clears the stage entry so it matches source."""
361 root = _init_repo(tmp_path)
362 v1_content = b"# v1\n"
363 v1_commit = _commit_files(root, {"versioned.py": v1_content})
364
365 # Update to v2
366 v2_content = b"# v2\n"
367 _commit_files(root, {"versioned.py": v2_content})
368
369 # Stage a modification on top of v2
370 mod_content = b"# staged mod\n"
371 obj_id = blob_id(mod_content)
372 write_object(root, obj_id, mod_content)
373 stage: StagedFileMap = {"versioned.py": make_entry(obj_id, "M")}
374 write_stage(root, stage)
375
376 # --source v1_commit --staged should clear the stage entry
377 result = _invoke(root, "--source", v1_commit, "--staged", "versioned.py")
378 assert result.exit_code == 0
379 stage_after = read_stage(root)
380 assert "versioned.py" not in stage_after
381
382
383 def test_restore_source_staged_worktree_restores_from_source(tmp_path: pathlib.Path) -> None:
384 """--source <ref> --staged --worktree restores disk from source, clears stage."""
385 root = _init_repo(tmp_path)
386 v1_content = b"# v1 original\n"
387 v1_commit = _commit_files(root, {"combo.py": v1_content})
388 _commit_files(root, {"combo.py": b"# v2\n"})
389
390 mod_content = b"# staged mod\n"
391 obj_id = blob_id(mod_content)
392 write_object(root, obj_id, mod_content)
393 stage: StagedFileMap = {"combo.py": make_entry(obj_id, "M")}
394 write_stage(root, stage)
395 (root / "combo.py").write_bytes(b"# dirty disk\n")
396
397 result = _invoke(root, "--source", v1_commit, "--staged", "--worktree", "combo.py")
398 assert result.exit_code == 0
399 assert (root / "combo.py").read_bytes() == v1_content
400 stage_after = read_stage(root)
401 assert "combo.py" not in stage_after
402
403
404 # ---------------------------------------------------------------------------
405 # Text summary output
406 # ---------------------------------------------------------------------------
407
408
409 def test_restore_text_output_summary_line(tmp_path: pathlib.Path) -> None:
410 """Text output includes a summary line like 'Restored 2 file(s)'."""
411 root = _init_repo(tmp_path)
412 _commit_files(root, {"p.py": b"# p\n", "q.py": b"# q\n"})
413 (root / "p.py").write_bytes(b"# dirty p\n")
414 (root / "q.py").write_bytes(b"# dirty q\n")
415
416 result = _invoke(root, "p.py", "q.py")
417 assert result.exit_code == 0
418 output = result.stdout + (result.stderr or "")
419 assert "2" in output, f"Expected count in output: {output!r}"
420
421
422 def test_restore_text_output_errors_noted(tmp_path: pathlib.Path) -> None:
423 """Text output notes how many errors occurred when some paths fail."""
424 root = _init_repo(tmp_path)
425 _commit_files(root, {"real.py": b"# real\n"})
426 (root / "real.py").write_bytes(b"# dirty\n")
427
428 result = _invoke(root, "real.py", "ghost.py")
429 assert result.exit_code != 0
430 output = (result.stdout or "") + (result.stderr or "")
431 # Should mention the failure somehow
432 assert "ghost" in output or "error" in output.lower() or "not" in output.lower()
433
434
435 # ---------------------------------------------------------------------------
436 # _resolve_source_manifest — docstring gap: bad ref returns {}, never raises
437 # ---------------------------------------------------------------------------
438
439
440 def test_resolve_source_manifest_bad_ref_returns_empty(tmp_path: pathlib.Path) -> None:
441 """_resolve_source_manifest returns {} for a non-existent ref — never raises."""
442 from muse.cli.commands.restore import _resolve_source_manifest
443 root = _init_repo(tmp_path)
444 _commit_files(root, {"a.py": b"# a\n"})
445 result = _resolve_source_manifest(root, source_ref="nonexistent-branch-xyz")
446 assert result == {}
447
448
449 def test_resolve_source_manifest_valid_ref(tmp_path: pathlib.Path) -> None:
450 """_resolve_source_manifest resolves a valid branch name to its manifest."""
451 from muse.cli.commands.restore import _resolve_source_manifest
452 root = _init_repo(tmp_path)
453 content = b"# branch content\n"
454 _commit_files(root, {"b.py": content}, branch="main")
455 manifest = _resolve_source_manifest(root, source_ref="main")
456 assert "b.py" in manifest
457 assert manifest["b.py"] == blob_id(content)
458
459
460 # ---------------------------------------------------------------------------
461 # Edge: restore staged-only with --source doesn't require file on disk
462 # ---------------------------------------------------------------------------
463
464
465 def test_restore_staged_only_source_does_not_require_disk_file(tmp_path: pathlib.Path) -> None:
466 """--staged with --source works even when the disk file doesn't exist."""
467 root = _init_repo(tmp_path)
468 v1_commit = _commit_files(root, {"staged_only.py": b"# v1\n"})
469 # Stage a modification
470 obj_id = blob_id(b"# mod\n")
471 write_object(root, obj_id, b"# mod\n")
472 stage: StagedFileMap = {"staged_only.py": make_entry(obj_id, "M")}
473 write_stage(root, stage)
474 # Delete disk file
475 (root / "staged_only.py").unlink()
476
477 result = _invoke(root, "--source", v1_commit, "--staged", "staged_only.py")
478 assert result.exit_code == 0
479 stage_after = read_stage(root)
480 assert "staged_only.py" not in stage_after
481
482
483 # ---------------------------------------------------------------------------
484 # Performance: duration_ms for 50-file restore is under 10 seconds
485 # ---------------------------------------------------------------------------
486
487
488 def test_restore_50_files_duration_ms_reasonable(tmp_path: pathlib.Path) -> None:
489 """50-file restore reports duration_ms and completes under 10 seconds."""
490 root = _init_repo(tmp_path)
491 files = {f"perf_{i}.py": f"# orig {i}\n".encode() for i in range(50)}
492 _commit_files(root, files)
493 for name in files:
494 (root / name).write_bytes(b"# dirty\n")
495
496 result = _invoke(root, "--json", *files.keys())
497 assert result.exit_code == 0
498 data = json.loads(result.stdout)
499 assert "duration_ms" in data
500 assert data["duration_ms"] < 10_000
501 assert len(data["restored"]) == 50
502
503
504 class TestRegisterFlags:
505 def test_default_json_out_is_false(self) -> None:
506 import argparse
507 from muse.cli.commands.restore import register
508 p = argparse.ArgumentParser()
509 subs = p.add_subparsers()
510 register(subs)
511 args = p.parse_args(["restore", "src/billing.py"])
512 assert args.json_out is False
513
514 def test_json_flag_sets_json_out(self) -> None:
515 import argparse
516 from muse.cli.commands.restore import register
517 p = argparse.ArgumentParser()
518 subs = p.add_subparsers()
519 register(subs)
520 args = p.parse_args(["restore", "src/billing.py", "--json"])
521 assert args.json_out is True
522
523 def test_j_shorthand_sets_json_out(self) -> None:
524 import argparse
525 from muse.cli.commands.restore import register
526 p = argparse.ArgumentParser()
527 subs = p.add_subparsers()
528 register(subs)
529 args = p.parse_args(["restore", "src/billing.py", "-j"])
530 assert args.json_out is True
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago