gabriel / muse public
test_cmd_ls_tree.py python
527 lines 18.4 KB
Raw
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor ⚠ breaking 28 days ago
1 """Tests for ``muse ls-tree`` — directory-aware tree listing from a snapshot.
2
3 Coverage tiers:
4 - Unit: _build_tree_entries, _synthetic_tree_id helpers
5 - Integration: root listing (files + synthetic dirs), path-scoped listing,
6 -r/--recursive (all blobs, no synthetic dirs), --name-only,
7 -l/--long (includes object size), -d/--dirs-only,
8 branch ref, commit ID ref, --json schema, text format,
9 mode strings (100644 for blob, 040000 for tree)
10 - End-to-end: full CLI via CliRunner
11 - Security: path traversal in path arg rejected, ANSI in ref rejected
12 - Edge cases: empty repo, nonexistent ref, path not in tree
13 - Stress: 500-file repo, tree listing root and deep prefix
14 """
15
16 from __future__ import annotations
17 from collections.abc import Mapping
18
19 import datetime
20 import json
21 import pathlib
22
23 import pytest
24
25 from tests.cli_test_helper import CliRunner
26
27 from muse.core.object_store import write_object
28 from muse.core.ids import hash_commit, hash_snapshot
29 from muse.core.store import CommitRecord, SnapshotRecord, write_commit, write_snapshot
30 from muse.core.types import Manifest, blob_id
31 from muse.core.paths import muse_dir, ref_path
32
33 runner = CliRunner()
34
35 _REPO_ID = "ls-tree-test"
36
37
38 # ---------------------------------------------------------------------------
39 # Helpers
40 # ---------------------------------------------------------------------------
41
42
43
44
45 _counter = 0
46
47
48 def _init_repo(path: pathlib.Path) -> pathlib.Path:
49 dot_muse = muse_dir(path)
50 for d in ("commits", "snapshots", "objects", "refs/heads", "code"):
51 (dot_muse / d).mkdir(parents=True, exist_ok=True)
52 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
53 (dot_muse / "repo.json").write_text(
54 json.dumps({"repo_id": _REPO_ID, "domain": "code"}), encoding="utf-8"
55 )
56 return path
57
58
59 def _env(repo: pathlib.Path) -> Mapping[str, str]:
60 return {"MUSE_REPO_ROOT": str(repo)}
61
62
63 def _commit_files(
64 root: pathlib.Path,
65 files: Mapping[str, bytes],
66 branch: str = "main",
67 ) -> str:
68 global _counter
69 _counter += 1
70 manifest: Manifest = {}
71 for rel_path, content in files.items():
72 obj_id = blob_id(content)
73 write_object(root, obj_id, content)
74 manifest[rel_path] = obj_id
75 abs_path = root / rel_path
76 abs_path.parent.mkdir(parents=True, exist_ok=True)
77 abs_path.write_bytes(content)
78 snap_id = hash_snapshot(manifest)
79 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
80 committed_at = datetime.datetime.now(datetime.timezone.utc)
81 commit_id = hash_commit( parent_ids=[],
82 snapshot_id=snap_id,
83 message=f"commit {_counter}",
84 committed_at_iso=committed_at.isoformat(),
85 )
86 write_commit(
87 root,
88 CommitRecord(
89 commit_id=commit_id,
90 branch=branch,
91 snapshot_id=snap_id,
92 message=f"commit {_counter}",
93 committed_at=committed_at,
94 ),
95 )
96 (ref_path(root, branch)).write_text(commit_id, encoding="utf-8")
97 return commit_id
98
99
100 def _invoke(repo: pathlib.Path, *args: str) -> "InvokeResult":
101 from muse.cli.app import main as cli
102 return runner.invoke(cli, ["ls-tree", *args], env=_env(repo))
103
104
105 # ---------------------------------------------------------------------------
106 # Unit — _build_tree_entries
107 # ---------------------------------------------------------------------------
108
109
110 def test_build_tree_entries_separates_blobs_and_dirs() -> None:
111 from muse.cli.commands.ls_tree import _build_tree_entries
112 manifest = {
113 "README.md": "a" * 64,
114 "src/main.py": "b" * 64,
115 "src/utils.py": "c" * 64,
116 "docs/guide.md": "d" * 64,
117 }
118 entries = _build_tree_entries(manifest, path_prefix="", recursive=False)
119 types = {e["path"]: e["type"] for e in entries}
120 assert types["README.md"] == "blob"
121 assert types["src/"] == "tree"
122 assert types["docs/"] == "tree"
123 # Should not show src/main.py at root level (not recursive)
124 assert "src/main.py" not in types
125 assert "src/utils.py" not in types
126
127
128 def test_build_tree_entries_recursive_only_blobs() -> None:
129 from muse.cli.commands.ls_tree import _build_tree_entries
130 manifest = {
131 "README.md": "a" * 64,
132 "src/main.py": "b" * 64,
133 "src/sub/helper.py": "c" * 64,
134 }
135 entries = _build_tree_entries(manifest, path_prefix="", recursive=True)
136 types = [e["type"] for e in entries]
137 assert all(t == "blob" for t in types), f"Got non-blob entries: {types}"
138 paths = [e["path"] for e in entries]
139 assert "src/main.py" in paths
140 assert "src/sub/helper.py" in paths
141
142
143 def test_build_tree_entries_path_prefix_scoping() -> None:
144 from muse.cli.commands.ls_tree import _build_tree_entries
145 manifest = {
146 "src/main.py": "b" * 64,
147 "src/sub/helper.py": "c" * 64,
148 "root.py": "d" * 64,
149 }
150 entries = _build_tree_entries(manifest, path_prefix="src/", recursive=False)
151 paths = [e["path"] for e in entries]
152 assert "src/main.py" in paths
153 assert "src/sub/" in paths
154 assert "root.py" not in paths
155
156
157 def test_build_tree_entries_sorted() -> None:
158 from muse.cli.commands.ls_tree import _build_tree_entries
159 manifest = {
160 "z.py": "a" * 64,
161 "a.py": "b" * 64,
162 "m.py": "c" * 64,
163 }
164 entries = _build_tree_entries(manifest, path_prefix="", recursive=False)
165 paths = [e["path"] for e in entries]
166 assert paths == sorted(paths)
167
168
169 def test_synthetic_tree_id_is_deterministic() -> None:
170 from muse.cli.commands.ls_tree import _synthetic_tree_id
171 manifest = {"src/a.py": "x" * 64, "src/b.py": "y" * 64}
172 id1 = _synthetic_tree_id(manifest, "src/")
173 id2 = _synthetic_tree_id(manifest, "src/")
174 assert id1 == id2
175 assert id1.startswith("sha256:")
176 assert len(id1) == 71 # "sha256:" (7) + 64 hex chars
177
178
179 def test_synthetic_tree_id_differs_for_different_content() -> None:
180 from muse.cli.commands.ls_tree import _synthetic_tree_id
181 manifest_a = {"src/a.py": "x" * 64}
182 manifest_b = {"src/b.py": "y" * 64}
183 assert _synthetic_tree_id(manifest_a, "src/") != _synthetic_tree_id(manifest_b, "src/")
184
185
186 # ---------------------------------------------------------------------------
187 # Integration — root listing (non-recursive)
188 # ---------------------------------------------------------------------------
189
190
191 def test_ls_tree_root_shows_blob_for_root_file(tmp_path: pathlib.Path) -> None:
192 root = _init_repo(tmp_path)
193 _commit_files(root, {"README.md": b"# readme\n"})
194 result = _invoke(root, "HEAD", "--json")
195 assert result.exit_code == 0
196 data = json.loads(result.stdout)
197 paths = [e["path"] for e in data["entries"]]
198 assert "README.md" in paths
199
200
201 def test_ls_tree_root_shows_synthetic_tree_for_subdir(tmp_path: pathlib.Path) -> None:
202 root = _init_repo(tmp_path)
203 _commit_files(root, {"src/main.py": b"# main\n", "README.md": b"# r\n"})
204 result = _invoke(root, "HEAD", "--json")
205 assert result.exit_code == 0
206 data = json.loads(result.stdout)
207 types = {e["path"]: e["type"] for e in data["entries"]}
208 assert types.get("README.md") == "blob"
209 assert types.get("src/") == "tree"
210 # src/main.py should NOT appear at root level
211 assert "src/main.py" not in types
212
213
214 def test_ls_tree_root_blob_mode_is_100644(tmp_path: pathlib.Path) -> None:
215 root = _init_repo(tmp_path)
216 _commit_files(root, {"a.py": b"# a\n"})
217 result = _invoke(root, "HEAD", "--json")
218 data = json.loads(result.stdout)
219 blob = next(e for e in data["entries"] if e["type"] == "blob")
220 assert blob["mode"] == "100644"
221
222
223 def test_ls_tree_root_tree_mode_is_040000(tmp_path: pathlib.Path) -> None:
224 root = _init_repo(tmp_path)
225 _commit_files(root, {"src/a.py": b"# a\n"})
226 result = _invoke(root, "HEAD", "--json")
227 data = json.loads(result.stdout)
228 tree = next(e for e in data["entries"] if e["type"] == "tree")
229 assert tree["mode"] == "040000"
230
231
232 def test_ls_tree_entries_sorted_alphabetically(tmp_path: pathlib.Path) -> None:
233 root = _init_repo(tmp_path)
234 _commit_files(root, {"z.py": b"# z\n", "a.py": b"# a\n", "src/m.py": b"# m\n"})
235 result = _invoke(root, "HEAD", "--json")
236 data = json.loads(result.stdout)
237 paths = [e["path"] for e in data["entries"]]
238 assert paths == sorted(paths)
239
240
241 # ---------------------------------------------------------------------------
242 # Integration — path-scoped listing
243 # ---------------------------------------------------------------------------
244
245
246 def test_ls_tree_path_arg_scopes_to_directory(tmp_path: pathlib.Path) -> None:
247 root = _init_repo(tmp_path)
248 _commit_files(root, {
249 "src/main.py": b"# main\n",
250 "src/sub/helper.py": b"# helper\n",
251 "root.py": b"# root\n",
252 })
253 result = _invoke(root, "HEAD", "src/", "--json")
254 assert result.exit_code == 0
255 data = json.loads(result.stdout)
256 paths = [e["path"] for e in data["entries"]]
257 assert "src/main.py" in paths
258 assert "src/sub/" in paths
259 assert "root.py" not in paths
260
261
262 def test_ls_tree_path_arg_nonexistent_shows_empty(tmp_path: pathlib.Path) -> None:
263 root = _init_repo(tmp_path)
264 _commit_files(root, {"a.py": b"# a\n"})
265 result = _invoke(root, "HEAD", "nonexistent/", "--json")
266 assert result.exit_code == 0
267 data = json.loads(result.stdout)
268 assert data["entries"] == []
269
270
271 # ---------------------------------------------------------------------------
272 # Integration — --recursive
273 # ---------------------------------------------------------------------------
274
275
276 def test_ls_tree_recursive_lists_all_blobs(tmp_path: pathlib.Path) -> None:
277 root = _init_repo(tmp_path)
278 _commit_files(root, {
279 "a.py": b"# a\n",
280 "src/b.py": b"# b\n",
281 "src/deep/c.py": b"# c\n",
282 })
283 result = _invoke(root, "-r", "HEAD", "--json")
284 assert result.exit_code == 0
285 data = json.loads(result.stdout)
286 paths = [e["path"] for e in data["entries"]]
287 assert "a.py" in paths
288 assert "src/b.py" in paths
289 assert "src/deep/c.py" in paths
290
291
292 def test_ls_tree_recursive_no_tree_entries(tmp_path: pathlib.Path) -> None:
293 root = _init_repo(tmp_path)
294 _commit_files(root, {"src/a.py": b"# a\n", "src/b.py": b"# b\n"})
295 result = _invoke(root, "-r", "HEAD", "--json")
296 data = json.loads(result.stdout)
297 assert all(e["type"] == "blob" for e in data["entries"])
298
299
300 def test_ls_tree_recursive_with_path_prefix(tmp_path: pathlib.Path) -> None:
301 root = _init_repo(tmp_path)
302 _commit_files(root, {
303 "src/a.py": b"# a\n",
304 "lib/b.py": b"# b\n",
305 })
306 result = _invoke(root, "-r", "HEAD", "src/", "--json")
307 data = json.loads(result.stdout)
308 paths = [e["path"] for e in data["entries"]]
309 assert "src/a.py" in paths
310 assert "lib/b.py" not in paths
311
312
313 # ---------------------------------------------------------------------------
314 # Integration — --name-only
315 # ---------------------------------------------------------------------------
316
317
318 def test_ls_tree_name_only_text_no_metadata(tmp_path: pathlib.Path) -> None:
319 root = _init_repo(tmp_path)
320 _commit_files(root, {"a.py": b"# a\n", "src/b.py": b"# b\n"})
321 result = _invoke(root, "HEAD", "--name-only")
322 assert result.exit_code == 0
323 # Should have just names, no tabs or object IDs
324 for line in result.stdout.strip().splitlines():
325 assert "\t" not in line
326 assert len(line) < 100 # no 64-char SHA
327
328
329 def test_ls_tree_name_only_json(tmp_path: pathlib.Path) -> None:
330 root = _init_repo(tmp_path)
331 _commit_files(root, {"a.py": b"# a\n"})
332 result = _invoke(root, "HEAD", "--name-only", "--json")
333 data = json.loads(result.stdout)
334 # entries should have 'path' but no 'object_id'
335 for e in data["entries"]:
336 assert "path" in e
337 assert "object_id" not in e
338
339
340 # ---------------------------------------------------------------------------
341 # Integration — --long (-l)
342 # ---------------------------------------------------------------------------
343
344
345 def test_ls_tree_long_includes_size_for_blobs(tmp_path: pathlib.Path) -> None:
346 root = _init_repo(tmp_path)
347 content = b"hello world\n"
348 _commit_files(root, {"hello.py": content})
349 result = _invoke(root, "-l", "HEAD", "--json")
350 assert result.exit_code == 0
351 data = json.loads(result.stdout)
352 blob = next(e for e in data["entries"] if e["type"] == "blob")
353 assert blob["size"] == len(content)
354
355
356 def test_ls_tree_long_tree_size_is_none(tmp_path: pathlib.Path) -> None:
357 root = _init_repo(tmp_path)
358 _commit_files(root, {"src/a.py": b"# a\n"})
359 result = _invoke(root, "-l", "HEAD", "--json")
360 data = json.loads(result.stdout)
361 tree = next(e for e in data["entries"] if e["type"] == "tree")
362 assert tree["size"] is None
363
364
365 # ---------------------------------------------------------------------------
366 # Integration — -d / --dirs-only
367 # ---------------------------------------------------------------------------
368
369
370 def test_ls_tree_dirs_only_shows_only_trees(tmp_path: pathlib.Path) -> None:
371 root = _init_repo(tmp_path)
372 _commit_files(root, {"root.py": b"# r\n", "src/a.py": b"# a\n", "lib/b.py": b"# b\n"})
373 result = _invoke(root, "--dirs-only", "HEAD", "--json")
374 assert result.exit_code == 0
375 data = json.loads(result.stdout)
376 assert all(e["type"] == "tree" for e in data["entries"])
377 types = [e["path"] for e in data["entries"]]
378 assert "src/" in types
379 assert "lib/" in types
380 assert "root.py" not in types
381
382
383 # ---------------------------------------------------------------------------
384 # Integration — ref targeting (branch name and commit ID)
385 # ---------------------------------------------------------------------------
386
387
388 def test_ls_tree_branch_name_ref(tmp_path: pathlib.Path) -> None:
389 root = _init_repo(tmp_path)
390 _commit_files(root, {"a.py": b"# a\n"}, branch="main")
391 result = _invoke(root, "main", "--json")
392 assert result.exit_code == 0
393 data = json.loads(result.stdout)
394 assert any(e["path"] == "a.py" for e in data["entries"])
395
396
397 def test_ls_tree_commit_id_ref(tmp_path: pathlib.Path) -> None:
398 root = _init_repo(tmp_path)
399 commit_id = _commit_files(root, {"b.py": b"# b\n"})
400 result = _invoke(root, commit_id, "--json")
401 assert result.exit_code == 0
402 data = json.loads(result.stdout)
403 assert any(e["path"] == "b.py" for e in data["entries"])
404
405
406 def test_ls_tree_nonexistent_ref_exits_nonzero(tmp_path: pathlib.Path) -> None:
407 root = _init_repo(tmp_path)
408 _commit_files(root, {"a.py": b"# a\n"})
409 result = _invoke(root, "no-such-branch", "--json")
410 assert result.exit_code != 0
411
412
413 def test_ls_tree_empty_repo_exits_nonzero(tmp_path: pathlib.Path) -> None:
414 root = _init_repo(tmp_path)
415 result = _invoke(root, "HEAD", "--json")
416 assert result.exit_code != 0
417
418
419 # ---------------------------------------------------------------------------
420 # Integration — text format
421 # ---------------------------------------------------------------------------
422
423
424 def test_ls_tree_text_format_tab_separated(tmp_path: pathlib.Path) -> None:
425 root = _init_repo(tmp_path)
426 _commit_files(root, {"a.py": b"# a\n"})
427 result = _invoke(root, "HEAD")
428 assert result.exit_code == 0
429 lines = [l for l in result.stdout.strip().splitlines() if l]
430 assert len(lines) >= 1
431 # Default text format: "<mode> <type> <object_id>\t<path>"
432 for line in lines:
433 assert "\t" in line
434 meta, path = line.split("\t", 1)
435 parts = meta.split()
436 assert len(parts) == 3
437 assert parts[0] in ("100644", "040000")
438 assert parts[1] in ("blob", "tree")
439
440
441 def test_ls_tree_json_output_has_commit_id(tmp_path: pathlib.Path) -> None:
442 root = _init_repo(tmp_path)
443 commit_id = _commit_files(root, {"a.py": b"# a\n"})
444 result = _invoke(root, "HEAD", "--json")
445 data = json.loads(result.stdout)
446 assert data["commit_id"] == commit_id
447 assert "entries" in data
448 assert "treeish" in data
449
450
451 # ---------------------------------------------------------------------------
452 # Security
453 # ---------------------------------------------------------------------------
454
455
456 def test_ls_tree_path_traversal_in_path_arg_rejected(tmp_path: pathlib.Path) -> None:
457 root = _init_repo(tmp_path)
458 _commit_files(root, {"a.py": b"# a\n"})
459 result = _invoke(root, "HEAD", "../../../etc/")
460 assert result.exit_code != 0
461
462
463 def test_ls_tree_ansi_in_ref_rejected(tmp_path: pathlib.Path) -> None:
464 root = _init_repo(tmp_path)
465 _commit_files(root, {"a.py": b"# a\n"})
466 result = _invoke(root, "\x1b[31mbad\x1b[0m")
467 assert result.exit_code != 0
468
469
470 # ---------------------------------------------------------------------------
471 # Stress
472 # ---------------------------------------------------------------------------
473
474
475 def test_ls_tree_500_files_root_listing(tmp_path: pathlib.Path) -> None:
476 """Root listing of a 500-file repo must complete and show correct dir entries."""
477 root = _init_repo(tmp_path)
478 files = {}
479 for i in range(10):
480 for j in range(50):
481 files[f"pkg_{i}/module_{j}.py"] = f"# {i},{j}\n".encode()
482 _commit_files(root, files)
483 result = _invoke(root, "HEAD", "--json")
484 assert result.exit_code == 0
485 data = json.loads(result.stdout)
486 # Root level should have 10 synthetic tree entries, one per pkg_*
487 trees = [e for e in data["entries"] if e["type"] == "tree"]
488 assert len(trees) == 10
489
490
491 def test_ls_tree_500_files_recursive(tmp_path: pathlib.Path) -> None:
492 root = _init_repo(tmp_path)
493 files = {f"pkg_{i}/mod_{j}.py": b"# x\n" for i in range(10) for j in range(50)}
494 _commit_files(root, files)
495 result = _invoke(root, "-r", "HEAD", "--json")
496 assert result.exit_code == 0
497 data = json.loads(result.stdout)
498 assert len(data["entries"]) == 500
499
500
501 class TestRegisterFlags:
502 def test_default_json_out_is_false(self) -> None:
503 import argparse
504 from muse.cli.commands.ls_tree import register
505 p = argparse.ArgumentParser()
506 subs = p.add_subparsers()
507 register(subs)
508 args = p.parse_args(["ls-tree"])
509 assert args.json_out is False
510
511 def test_json_flag_sets_json_out(self) -> None:
512 import argparse
513 from muse.cli.commands.ls_tree import register
514 p = argparse.ArgumentParser()
515 subs = p.add_subparsers()
516 register(subs)
517 args = p.parse_args(["ls-tree", "--json"])
518 assert args.json_out is True
519
520 def test_j_shorthand_sets_json_out(self) -> None:
521 import argparse
522 from muse.cli.commands.ls_tree import register
523 p = argparse.ArgumentParser()
524 subs = p.add_subparsers()
525 register(subs)
526 args = p.parse_args(["ls-tree", "-j"])
527 assert args.json_out is True
File History 1 commit
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago