gabriel / muse public
test_read_snapshot_supercharge.py python
425 lines 16.7 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Supercharge tests for ``muse read-snapshot``.
2
3 Coverage tiers
4 --------------
5 - Unit: _short_id helper — prefix preservation, hex length
6 - Integration: duration_ms + exit_code in JSON; text short-ID format
7 - Flag interaction: --no-manifest + --path-prefix together
8 - Data integrity: sha256: on snapshot_id; valid JSON; unicode paths
9 - Path prefix edge cases: empty prefix, no trailing slash, exact filename
10 - Performance: single read and 1000-file manifest under threshold
11 """
12 from __future__ import annotations
13 from collections.abc import Mapping
14
15 import datetime
16 import json
17 import pathlib
18 import re
19 import time
20
21 from muse.core.errors import ExitCode
22 from muse.core.ids import hash_snapshot as compute_snapshot_id
23 from muse.core.snapshots import (
24 SnapshotRecord,
25 write_snapshot,
26 )
27 from tests.cli_test_helper import CliRunner, InvokeResult
28 from muse.core.types import NULL_LONG_ID, fake_id, long_id, split_id
29 from muse.core.paths import muse_dir
30
31 runner = CliRunner()
32
33 _CREATED_AT = datetime.datetime(2026, 3, 18, 12, 0, tzinfo=datetime.timezone.utc)
34
35 _SHA256_FULL = re.compile(r"^sha256:[0-9a-f]{64}$")
36 _SHA256_SHORT_19 = re.compile(r"^sha256:[0-9a-f]{12}$")
37
38
39 # ---------------------------------------------------------------------------
40 # Helpers
41 # ---------------------------------------------------------------------------
42
43
44 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
45 repo = tmp_path / "repo"
46 dot_muse = muse_dir(repo)
47 for sub in ("objects", "commits", "snapshots", "refs/heads"):
48 (dot_muse / sub).mkdir(parents=True)
49 (dot_muse / "HEAD").write_text("ref: refs/heads/main")
50 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo", "domain": "code"}))
51 return repo
52
53
54 def _snap(repo: pathlib.Path, manifest: Mapping[str, object] | None = None) -> str:
55 m = manifest or {}
56 sid = compute_snapshot_id(m)
57 write_snapshot(repo, SnapshotRecord(snapshot_id=sid, manifest=m, created_at=_CREATED_AT))
58 return sid
59
60
61 def _rs(repo: pathlib.Path, *args: str) -> InvokeResult:
62 from muse.cli.app import main as cli
63 return runner.invoke(cli, ["read-snapshot", *args], env={"MUSE_REPO_ROOT": str(repo)})
64
65
66 def _rsj(repo: pathlib.Path, *args: str) -> InvokeResult:
67 """Like _rs but always passes --json for JSON-output tests."""
68 return _rs(repo, "--json", *args)
69
70
71 def _oid(n: int) -> str:
72 """Canonical sha256:-prefixed object ID for test manifests."""
73 return long_id(format(n, "064x"))
74
75
76 # ---------------------------------------------------------------------------
77 # Unit — _short_id
78 # ---------------------------------------------------------------------------
79
80
81 class TestSnapshotIdFormat:
82 """Snapshot IDs are full sha256:<64-hex> (71 chars)."""
83
84 def test_snapshot_id_keeps_sha256_prefix(self) -> None:
85 sid = long_id("a" * 64)
86 assert sid.startswith("sha256:")
87
88 def test_snapshot_id_total_length_is_71(self) -> None:
89 sid = NULL_LONG_ID
90 assert len(sid) == 71
91
92 def test_snapshot_id_matches_full_regex(self) -> None:
93 sid = long_id("abcdef01" * 8)
94 assert _SHA256_FULL.match(sid)
95
96
97 # ---------------------------------------------------------------------------
98 # Integration — duration_ms and exit_code
99 # ---------------------------------------------------------------------------
100
101
102 class TestDurationAndExitCode:
103 def test_duration_ms_present_on_success(self, tmp_path: pathlib.Path) -> None:
104 repo = _make_repo(tmp_path)
105 sid = _snap(repo)
106 data = json.loads(_rsj(repo, sid).output)
107 assert "duration_ms" in data, "duration_ms must be present in JSON output"
108
109 def test_exit_code_zero_on_success(self, tmp_path: pathlib.Path) -> None:
110 repo = _make_repo(tmp_path)
111 sid = _snap(repo)
112 data = json.loads(_rsj(repo, sid).output)
113 assert data["exit_code"] == 0
114
115 def test_duration_ms_is_float(self, tmp_path: pathlib.Path) -> None:
116 repo = _make_repo(tmp_path)
117 sid = _snap(repo)
118 data = json.loads(_rsj(repo, sid).output)
119 assert isinstance(data["duration_ms"], float)
120
121 def test_duration_ms_non_negative(self, tmp_path: pathlib.Path) -> None:
122 repo = _make_repo(tmp_path)
123 sid = _snap(repo)
124 data = json.loads(_rsj(repo, sid).output)
125 assert data["duration_ms"] >= 0.0
126
127 def test_duration_ms_3dp_precision(self, tmp_path: pathlib.Path) -> None:
128 repo = _make_repo(tmp_path)
129 sid = _snap(repo)
130 ms = json.loads(_rsj(repo, sid).output)["duration_ms"]
131 assert round(ms, 3) == ms
132
133 def test_duration_ms_present_with_no_manifest(self, tmp_path: pathlib.Path) -> None:
134 repo = _make_repo(tmp_path)
135 sid = _snap(repo, {"a.py": _oid(1)})
136 data = json.loads(_rsj(repo, "--no-manifest", sid).output)
137 assert "duration_ms" in data
138 assert "exit_code" in data
139
140 def test_duration_ms_present_with_path_prefix(self, tmp_path: pathlib.Path) -> None:
141 repo = _make_repo(tmp_path)
142 sid = _snap(repo, {"src/a.py": _oid(1), "tests/b.py": _oid(2)})
143 data = json.loads(_rsj(repo, "--path-prefix", "src/", sid).output)
144 assert "duration_ms" in data
145 assert data["exit_code"] == 0
146
147
148 # ---------------------------------------------------------------------------
149 # Integration — text format short ID
150 # ---------------------------------------------------------------------------
151
152
153 class TestTextFormatFullId:
154 """Text format must emit the full sha256:<64-hex> (71 chars) snapshot ID."""
155
156 def _full_token(self, line: str) -> str | None:
157 for tok in line.split():
158 if _SHA256_FULL.match(tok):
159 return tok
160 return None
161
162 def test_text_full_id_has_sha256_prefix(self, tmp_path: pathlib.Path) -> None:
163 repo = _make_repo(tmp_path)
164 sid = _snap(repo, {"f.py": _oid(1)})
165 result = _rs(repo, sid)
166 assert result.exit_code == 0
167 tok = self._full_token(result.output.strip())
168 assert tok is not None, f"no sha256:<64-hex> token in: {result.output!r}"
169 assert tok.startswith("sha256:")
170
171 def test_text_full_id_has_64_hex_chars(self, tmp_path: pathlib.Path) -> None:
172 repo = _make_repo(tmp_path)
173 sid = _snap(repo)
174 result = _rs(repo, sid)
175 tok = self._full_token(result.output.strip())
176 assert tok is not None
177 assert tok.startswith("sha256:")
178 assert len(tok[len("sha256:"):]) == 64
179
180 def test_text_full_id_total_length_is_71(self, tmp_path: pathlib.Path) -> None:
181 repo = _make_repo(tmp_path)
182 sid = _snap(repo)
183 result = _rs(repo, sid)
184 tok = self._full_token(result.output.strip())
185 assert tok is not None
186 assert len(tok) == 71
187
188 def test_text_full_id_matches_snapshot_id(self, tmp_path: pathlib.Path) -> None:
189 repo = _make_repo(tmp_path)
190 sid = _snap(repo, {"x.py": _oid(9)})
191 result = _rs(repo, sid)
192 tok = self._full_token(result.output.strip())
193 assert tok is not None
194 assert tok == sid, f"text output ID {tok!r} does not match snapshot_id {sid!r}"
195
196
197 # ---------------------------------------------------------------------------
198 # Flag interaction — --no-manifest + --path-prefix together
199 # ---------------------------------------------------------------------------
200
201
202 class TestFlagInteraction:
203 """--no-manifest and --path-prefix may be combined.
204
205 Use case: "how many files are under src/ without downloading any OIDs?"
206 The file_count reflects the filtered count; manifest is omitted.
207 """
208
209 def test_no_manifest_plus_path_prefix_succeeds(self, tmp_path: pathlib.Path) -> None:
210 repo = _make_repo(tmp_path)
211 sid = _snap(repo, {
212 "src/a.py": _oid(1),
213 "src/b.py": _oid(2),
214 "tests/c.py": _oid(3),
215 })
216 result = _rsj(repo, "--no-manifest", "--path-prefix", "src/", sid)
217 assert result.exit_code == 0, result.output
218
219 def test_no_manifest_plus_path_prefix_file_count_is_filtered(self, tmp_path: pathlib.Path) -> None:
220 repo = _make_repo(tmp_path)
221 sid = _snap(repo, {
222 "src/a.py": _oid(1),
223 "src/b.py": _oid(2),
224 "tests/c.py": _oid(3),
225 })
226 data = json.loads(_rsj(repo, "--no-manifest", "--path-prefix", "src/", sid).output)
227 assert data["file_count"] == 2, "file_count must reflect the prefix-filtered count"
228
229 def test_no_manifest_plus_path_prefix_manifest_absent(self, tmp_path: pathlib.Path) -> None:
230 repo = _make_repo(tmp_path)
231 sid = _snap(repo, {"src/a.py": _oid(1)})
232 data = json.loads(_rsj(repo, "--no-manifest", "--path-prefix", "src/", sid).output)
233 assert "manifest" not in data
234
235 def test_no_manifest_plus_path_prefix_has_duration_ms(self, tmp_path: pathlib.Path) -> None:
236 repo = _make_repo(tmp_path)
237 sid = _snap(repo, {"src/a.py": _oid(1)})
238 data = json.loads(_rsj(repo, "--no-manifest", "--path-prefix", "src/", sid).output)
239 assert "duration_ms" in data
240 assert data["exit_code"] == 0
241
242
243 # ---------------------------------------------------------------------------
244 # Data integrity
245 # ---------------------------------------------------------------------------
246
247
248 class TestDataIntegrity:
249 def test_snapshot_id_has_sha256_prefix(self, tmp_path: pathlib.Path) -> None:
250 repo = _make_repo(tmp_path)
251 sid = _snap(repo)
252 data = json.loads(_rsj(repo, sid).output)
253 assert _SHA256_FULL.match(data["snapshot_id"]), \
254 f"snapshot_id must be sha256:<64hex>, got {data['snapshot_id']!r}"
255
256 def test_json_output_is_valid_json(self, tmp_path: pathlib.Path) -> None:
257 repo = _make_repo(tmp_path)
258 sid = _snap(repo, {"a.py": _oid(1)})
259 result = _rsj(repo, sid)
260 assert result.exit_code == 0
261 data = json.loads(result.output)
262 assert isinstance(data, dict)
263
264 def test_manifest_values_are_strings(self, tmp_path: pathlib.Path) -> None:
265 """Manifest object IDs are strings — no type coercion."""
266 repo = _make_repo(tmp_path)
267 sid = _snap(repo, {"a.py": _oid(1), "b.py": _oid(2)})
268 data = json.loads(_rsj(repo, sid).output)
269 for path, oid in data["manifest"].items():
270 assert isinstance(oid, str), f"manifest[{path!r}] must be a string, got {type(oid)}"
271
272 def test_unicode_paths_in_manifest(self, tmp_path: pathlib.Path) -> None:
273 """Unicode file paths round-trip through JSON without corruption."""
274 repo = _make_repo(tmp_path)
275 paths = {
276 "src/音楽.py": _oid(1),
277 "tracks/café/main.mid": _oid(2),
278 "docs/naïve_approach.md": _oid(3),
279 }
280 sid = _snap(repo, paths)
281 data = json.loads(_rsj(repo, sid).output)
282 assert data["file_count"] == 3
283 for p in paths:
284 assert p in data["manifest"], f"unicode path {p!r} missing from manifest"
285
286 def test_created_at_iso8601_with_timezone(self, tmp_path: pathlib.Path) -> None:
287 repo = _make_repo(tmp_path)
288 sid = _snap(repo)
289 data = json.loads(_rsj(repo, sid).output)
290 dt = datetime.datetime.fromisoformat(data["created_at"])
291 assert dt.tzinfo is not None, "created_at must include timezone"
292
293 def test_file_count_matches_manifest_length(self, tmp_path: pathlib.Path) -> None:
294 """file_count must equal len(manifest) in the response."""
295 repo = _make_repo(tmp_path)
296 n = 17
297 sid = _snap(repo, {f"f{i}.py": _oid(i) for i in range(n)})
298 data = json.loads(_rsj(repo, sid).output)
299 assert data["file_count"] == n
300 assert len(data["manifest"]) == n
301
302
303 # ---------------------------------------------------------------------------
304 # Path prefix edge cases
305 # ---------------------------------------------------------------------------
306
307
308 class TestPathPrefixEdgeCases:
309 def test_empty_prefix_matches_all(self, tmp_path: pathlib.Path) -> None:
310 """Empty --path-prefix matches every path (prefix of every string)."""
311 repo = _make_repo(tmp_path)
312 sid = _snap(repo, {"src/a.py": _oid(1), "tests/b.py": _oid(2)})
313 data = json.loads(_rsj(repo, "--path-prefix", "", sid).output)
314 assert data["file_count"] == 2
315
316 def test_prefix_without_trailing_slash(self, tmp_path: pathlib.Path) -> None:
317 """Prefix 'src' (no slash) matches 'src/a.py' and also 'src_util.py'."""
318 repo = _make_repo(tmp_path)
319 sid = _snap(repo, {
320 "src/a.py": _oid(1),
321 "src_util.py": _oid(2),
322 "tests/b.py": _oid(3),
323 })
324 data = json.loads(_rsj(repo, "--path-prefix", "src", sid).output)
325 assert "src/a.py" in data["manifest"]
326 assert "src_util.py" in data["manifest"]
327 assert "tests/b.py" not in data["manifest"]
328
329 def test_prefix_exact_filename_match(self, tmp_path: pathlib.Path) -> None:
330 """A prefix equal to an exact filename matches only that file."""
331 repo = _make_repo(tmp_path)
332 sid = _snap(repo, {"README.md": _oid(1), "README.md.bak": _oid(2)})
333 data = json.loads(_rsj(repo, "--path-prefix", "README.md", sid).output)
334 assert "README.md" in data["manifest"]
335 assert "README.md.bak" in data["manifest"] # startswith matches both
336
337 def test_prefix_no_match_empty_manifest_with_duration(self, tmp_path: pathlib.Path) -> None:
338 """No-match prefix returns empty manifest with duration_ms."""
339 repo = _make_repo(tmp_path)
340 sid = _snap(repo, {"src/a.py": _oid(1)})
341 data = json.loads(_rsj(repo, "--path-prefix", "nonexistent/", sid).output)
342 assert data["file_count"] == 0
343 assert data["manifest"] == {}
344 assert "duration_ms" in data
345
346
347 # ---------------------------------------------------------------------------
348 # Security
349 # ---------------------------------------------------------------------------
350
351
352 class TestSecuritySupercharge:
353 def test_path_prefix_with_traversal_attempt(self, tmp_path: pathlib.Path) -> None:
354 """Path prefix with '../' traversal must not escape manifest keys."""
355 repo = _make_repo(tmp_path)
356 sid = _snap(repo, {"src/a.py": _oid(1), "../etc/passwd": _oid(2)})
357 # The manifest key itself is literally '../etc/passwd' — filter should match it
358 # only if the prefix is '../', not silently escape the repo root
359 data = json.loads(_rsj(repo, "--path-prefix", "src/", sid).output)
360 # Only src/a.py should match src/ prefix
361 assert "src/a.py" in data["manifest"]
362 assert "../etc/passwd" not in data["manifest"]
363
364 def test_no_traceback_on_sha256_prefixed_missing_id(self, tmp_path: pathlib.Path) -> None:
365 """Valid sha256: format but non-existent ID — no traceback, clean error."""
366 repo = _make_repo(tmp_path)
367 missing = long_id("dead" * 16)
368 result = _rs(repo, missing)
369 assert result.exit_code == ExitCode.USER_ERROR
370 assert "Traceback" not in result.output
371
372
373 # ---------------------------------------------------------------------------
374 # Performance
375 # ---------------------------------------------------------------------------
376
377
378 class TestPerformanceSupercharge:
379 def test_single_read_under_500ms(self, tmp_path: pathlib.Path) -> None:
380 repo = _make_repo(tmp_path)
381 sid = _snap(repo, {"a.py": _oid(0)})
382 t0 = time.monotonic()
383 result = _rs(repo, sid)
384 duration_ms = (time.monotonic() - t0) * 1000
385 assert result.exit_code == 0
386 assert duration_ms < 500
387
388 def test_1000_file_manifest_under_1000ms(self, tmp_path: pathlib.Path) -> None:
389 repo = _make_repo(tmp_path)
390 manifest = {f"src/module{i:04d}.py": _oid(i) for i in range(1000)}
391 sid = _snap(repo, manifest)
392 t0 = time.monotonic()
393 result = _rs(repo, sid)
394 duration_ms = (time.monotonic() - t0) * 1000
395 assert result.exit_code == 0
396 assert duration_ms < 1000
397
398 def test_duration_ms_plausible(self, tmp_path: pathlib.Path) -> None:
399 """duration_ms from the output itself must be < 500ms for a warm read."""
400 repo = _make_repo(tmp_path)
401 sid = _snap(repo, {"a.py": _oid(0)})
402 data = json.loads(_rsj(repo, sid).output)
403 assert data["duration_ms"] < 500
404
405
406 class TestRegisterFlags:
407 def _parse(self, *args: str) -> "argparse.Namespace":
408 import argparse
409 from muse.cli.commands.read_snapshot import register
410 p = argparse.ArgumentParser()
411 subs = p.add_subparsers()
412 register(subs)
413 return p.parse_args(["read-snapshot", fake_id("a"), *args])
414
415 def test_json_short_flag(self) -> None:
416 args = self._parse("-j")
417 assert args.json_out is True
418
419 def test_json_long_flag(self) -> None:
420 args = self._parse("--json")
421 assert args.json_out is True
422
423 def test_default_no_json(self) -> None:
424 args = self._parse()
425 assert args.json_out is False
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago