"""Tests for ``muse ls-tree`` — directory-aware tree listing from a snapshot. Coverage tiers: - Unit: _build_tree_entries, _synthetic_tree_id helpers - Integration: root listing (files + synthetic dirs), path-scoped listing, -r/--recursive (all blobs, no synthetic dirs), --name-only, -l/--long (includes object size), -d/--dirs-only, branch ref, commit ID ref, --json schema, text format, mode strings (100644 for blob, 040000 for tree) - End-to-end: full CLI via CliRunner - Security: path traversal in path arg rejected, ANSI in ref rejected - Edge cases: empty repo, nonexistent ref, path not in tree - Stress: 500-file repo, tree listing root and deep prefix """ from __future__ import annotations from collections.abc import Mapping import datetime import json import pathlib import pytest from tests.cli_test_helper import CliRunner from muse.core.object_store import write_object from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.types import Manifest, blob_id from muse.core.paths import muse_dir, ref_path runner = CliRunner() _REPO_ID = "ls-tree-test" # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- _counter = 0 def _init_repo(path: pathlib.Path) -> pathlib.Path: dot_muse = muse_dir(path) for d in ("commits", "snapshots", "objects", "refs/heads", "code"): (dot_muse / d).mkdir(parents=True, exist_ok=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (dot_muse / "repo.json").write_text( json.dumps({"repo_id": _REPO_ID, "domain": "code"}), encoding="utf-8" ) return path def _env(repo: pathlib.Path) -> Mapping[str, str]: return {"MUSE_REPO_ROOT": str(repo)} def _commit_files( root: pathlib.Path, files: Mapping[str, bytes], branch: str = "main", ) -> str: global _counter _counter += 1 manifest: Manifest = {} for rel_path, content in files.items(): obj_id = blob_id(content) write_object(root, obj_id, content) manifest[rel_path] = obj_id abs_path = root / rel_path abs_path.parent.mkdir(parents=True, exist_ok=True) abs_path.write_bytes(content) snap_id = hash_snapshot(manifest) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) committed_at = datetime.datetime.now(datetime.timezone.utc) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message=f"commit {_counter}", committed_at_iso=committed_at.isoformat(), ) write_commit( root, CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=snap_id, message=f"commit {_counter}", committed_at=committed_at, ), ) (ref_path(root, branch)).write_text(commit_id, encoding="utf-8") return commit_id def _invoke(repo: pathlib.Path, *args: str) -> "InvokeResult": from muse.cli.app import main as cli return runner.invoke(cli, ["ls-tree", *args], env=_env(repo)) # --------------------------------------------------------------------------- # Unit — _build_tree_entries # --------------------------------------------------------------------------- def test_build_tree_entries_separates_blobs_and_dirs() -> None: from muse.cli.commands.ls_tree import _build_tree_entries manifest = { "README.md": "a" * 64, "src/main.py": "b" * 64, "src/utils.py": "c" * 64, "docs/guide.md": "d" * 64, } entries = _build_tree_entries(manifest, path_prefix="", recursive=False) types = {e["path"]: e["type"] for e in entries} assert types["README.md"] == "blob" assert types["src/"] == "tree" assert types["docs/"] == "tree" # Should not show src/main.py at root level (not recursive) assert "src/main.py" not in types assert "src/utils.py" not in types def test_build_tree_entries_recursive_only_blobs() -> None: from muse.cli.commands.ls_tree import _build_tree_entries manifest = { "README.md": "a" * 64, "src/main.py": "b" * 64, "src/sub/helper.py": "c" * 64, } entries = _build_tree_entries(manifest, path_prefix="", recursive=True) types = [e["type"] for e in entries] assert all(t == "blob" for t in types), f"Got non-blob entries: {types}" paths = [e["path"] for e in entries] assert "src/main.py" in paths assert "src/sub/helper.py" in paths def test_build_tree_entries_path_prefix_scoping() -> None: from muse.cli.commands.ls_tree import _build_tree_entries manifest = { "src/main.py": "b" * 64, "src/sub/helper.py": "c" * 64, "root.py": "d" * 64, } entries = _build_tree_entries(manifest, path_prefix="src/", recursive=False) paths = [e["path"] for e in entries] assert "src/main.py" in paths assert "src/sub/" in paths assert "root.py" not in paths def test_build_tree_entries_sorted() -> None: from muse.cli.commands.ls_tree import _build_tree_entries manifest = { "z.py": "a" * 64, "a.py": "b" * 64, "m.py": "c" * 64, } entries = _build_tree_entries(manifest, path_prefix="", recursive=False) paths = [e["path"] for e in entries] assert paths == sorted(paths) def test_synthetic_tree_id_is_deterministic() -> None: from muse.cli.commands.ls_tree import _synthetic_tree_id manifest = {"src/a.py": "x" * 64, "src/b.py": "y" * 64} id1 = _synthetic_tree_id(manifest, "src/") id2 = _synthetic_tree_id(manifest, "src/") assert id1 == id2 assert id1.startswith("sha256:") assert len(id1) == 71 # "sha256:" (7) + 64 hex chars def test_synthetic_tree_id_differs_for_different_content() -> None: from muse.cli.commands.ls_tree import _synthetic_tree_id manifest_a = {"src/a.py": "x" * 64} manifest_b = {"src/b.py": "y" * 64} assert _synthetic_tree_id(manifest_a, "src/") != _synthetic_tree_id(manifest_b, "src/") # --------------------------------------------------------------------------- # Integration — root listing (non-recursive) # --------------------------------------------------------------------------- def test_ls_tree_root_shows_blob_for_root_file(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"README.md": b"# readme\n"}) result = _invoke(root, "HEAD", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) paths = [e["path"] for e in data["entries"]] assert "README.md" in paths def test_ls_tree_root_shows_synthetic_tree_for_subdir(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"src/main.py": b"# main\n", "README.md": b"# r\n"}) result = _invoke(root, "HEAD", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) types = {e["path"]: e["type"] for e in data["entries"]} assert types.get("README.md") == "blob" assert types.get("src/") == "tree" # src/main.py should NOT appear at root level assert "src/main.py" not in types def test_ls_tree_root_blob_mode_is_100644(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n"}) result = _invoke(root, "HEAD", "--json") data = json.loads(result.stdout) blob = next(e for e in data["entries"] if e["type"] == "blob") assert blob["mode"] == "100644" def test_ls_tree_root_tree_mode_is_040000(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"src/a.py": b"# a\n"}) result = _invoke(root, "HEAD", "--json") data = json.loads(result.stdout) tree = next(e for e in data["entries"] if e["type"] == "tree") assert tree["mode"] == "040000" def test_ls_tree_entries_sorted_alphabetically(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"z.py": b"# z\n", "a.py": b"# a\n", "src/m.py": b"# m\n"}) result = _invoke(root, "HEAD", "--json") data = json.loads(result.stdout) paths = [e["path"] for e in data["entries"]] assert paths == sorted(paths) # --------------------------------------------------------------------------- # Integration — path-scoped listing # --------------------------------------------------------------------------- def test_ls_tree_path_arg_scopes_to_directory(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, { "src/main.py": b"# main\n", "src/sub/helper.py": b"# helper\n", "root.py": b"# root\n", }) result = _invoke(root, "HEAD", "src/", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) paths = [e["path"] for e in data["entries"]] assert "src/main.py" in paths assert "src/sub/" in paths assert "root.py" not in paths def test_ls_tree_path_arg_nonexistent_shows_empty(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n"}) result = _invoke(root, "HEAD", "nonexistent/", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) assert data["entries"] == [] # --------------------------------------------------------------------------- # Integration — --recursive # --------------------------------------------------------------------------- def test_ls_tree_recursive_lists_all_blobs(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, { "a.py": b"# a\n", "src/b.py": b"# b\n", "src/deep/c.py": b"# c\n", }) result = _invoke(root, "-r", "HEAD", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) paths = [e["path"] for e in data["entries"]] assert "a.py" in paths assert "src/b.py" in paths assert "src/deep/c.py" in paths def test_ls_tree_recursive_no_tree_entries(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"src/a.py": b"# a\n", "src/b.py": b"# b\n"}) result = _invoke(root, "-r", "HEAD", "--json") data = json.loads(result.stdout) assert all(e["type"] == "blob" for e in data["entries"]) def test_ls_tree_recursive_with_path_prefix(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, { "src/a.py": b"# a\n", "lib/b.py": b"# b\n", }) result = _invoke(root, "-r", "HEAD", "src/", "--json") data = json.loads(result.stdout) paths = [e["path"] for e in data["entries"]] assert "src/a.py" in paths assert "lib/b.py" not in paths # --------------------------------------------------------------------------- # Integration — --name-only # --------------------------------------------------------------------------- def test_ls_tree_name_only_text_no_metadata(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n", "src/b.py": b"# b\n"}) result = _invoke(root, "HEAD", "--name-only") assert result.exit_code == 0 # Should have just names, no tabs or object IDs for line in result.stdout.strip().splitlines(): assert "\t" not in line assert len(line) < 100 # no 64-char SHA def test_ls_tree_name_only_json(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n"}) result = _invoke(root, "HEAD", "--name-only", "--json") data = json.loads(result.stdout) # entries should have 'path' but no 'object_id' for e in data["entries"]: assert "path" in e assert "object_id" not in e # --------------------------------------------------------------------------- # Integration — --long (-l) # --------------------------------------------------------------------------- def test_ls_tree_long_includes_size_for_blobs(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) content = b"hello world\n" _commit_files(root, {"hello.py": content}) result = _invoke(root, "-l", "HEAD", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) blob = next(e for e in data["entries"] if e["type"] == "blob") assert blob["size"] == len(content) def test_ls_tree_long_tree_size_is_none(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"src/a.py": b"# a\n"}) result = _invoke(root, "-l", "HEAD", "--json") data = json.loads(result.stdout) tree = next(e for e in data["entries"] if e["type"] == "tree") assert tree["size"] is None # --------------------------------------------------------------------------- # Integration — -d / --dirs-only # --------------------------------------------------------------------------- def test_ls_tree_dirs_only_shows_only_trees(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"root.py": b"# r\n", "src/a.py": b"# a\n", "lib/b.py": b"# b\n"}) result = _invoke(root, "--dirs-only", "HEAD", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) assert all(e["type"] == "tree" for e in data["entries"]) types = [e["path"] for e in data["entries"]] assert "src/" in types assert "lib/" in types assert "root.py" not in types # --------------------------------------------------------------------------- # Integration — ref targeting (branch name and commit ID) # --------------------------------------------------------------------------- def test_ls_tree_branch_name_ref(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n"}, branch="main") result = _invoke(root, "main", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) assert any(e["path"] == "a.py" for e in data["entries"]) def test_ls_tree_commit_id_ref(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) commit_id = _commit_files(root, {"b.py": b"# b\n"}) result = _invoke(root, commit_id, "--json") assert result.exit_code == 0 data = json.loads(result.stdout) assert any(e["path"] == "b.py" for e in data["entries"]) def test_ls_tree_nonexistent_ref_exits_nonzero(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n"}) result = _invoke(root, "no-such-branch", "--json") assert result.exit_code != 0 def test_ls_tree_empty_repo_exits_nonzero(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) result = _invoke(root, "HEAD", "--json") assert result.exit_code != 0 # --------------------------------------------------------------------------- # Integration — text format # --------------------------------------------------------------------------- def test_ls_tree_text_format_tab_separated(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n"}) result = _invoke(root, "HEAD") assert result.exit_code == 0 lines = [l for l in result.stdout.strip().splitlines() if l] assert len(lines) >= 1 # Default text format: " \t" for line in lines: assert "\t" in line meta, path = line.split("\t", 1) parts = meta.split() assert len(parts) == 3 assert parts[0] in ("100644", "040000") assert parts[1] in ("blob", "tree") def test_ls_tree_json_output_has_commit_id(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) commit_id = _commit_files(root, {"a.py": b"# a\n"}) result = _invoke(root, "HEAD", "--json") data = json.loads(result.stdout) assert data["commit_id"] == commit_id assert "entries" in data assert "treeish" in data # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- def test_ls_tree_path_traversal_in_path_arg_rejected(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n"}) result = _invoke(root, "HEAD", "../../../etc/") assert result.exit_code != 0 def test_ls_tree_ansi_in_ref_rejected(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) _commit_files(root, {"a.py": b"# a\n"}) result = _invoke(root, "\x1b[31mbad\x1b[0m") assert result.exit_code != 0 # --------------------------------------------------------------------------- # Stress # --------------------------------------------------------------------------- def test_ls_tree_500_files_root_listing(tmp_path: pathlib.Path) -> None: """Root listing of a 500-file repo must complete and show correct dir entries.""" root = _init_repo(tmp_path) files = {} for i in range(10): for j in range(50): files[f"pkg_{i}/module_{j}.py"] = f"# {i},{j}\n".encode() _commit_files(root, files) result = _invoke(root, "HEAD", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) # Root level should have 10 synthetic tree entries, one per pkg_* trees = [e for e in data["entries"] if e["type"] == "tree"] assert len(trees) == 10 def test_ls_tree_500_files_recursive(tmp_path: pathlib.Path) -> None: root = _init_repo(tmp_path) files = {f"pkg_{i}/mod_{j}.py": b"# x\n" for i in range(10) for j in range(50)} _commit_files(root, files) result = _invoke(root, "-r", "HEAD", "--json") assert result.exit_code == 0 data = json.loads(result.stdout) assert len(data["entries"]) == 500 class TestRegisterFlags: def test_default_json_out_is_false(self) -> None: import argparse from muse.cli.commands.ls_tree import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) args = p.parse_args(["ls-tree"]) assert args.json_out is False def test_json_flag_sets_json_out(self) -> None: import argparse from muse.cli.commands.ls_tree import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) args = p.parse_args(["ls-tree", "--json"]) assert args.json_out is True def test_j_shorthand_sets_json_out(self) -> None: import argparse from muse.cli.commands.ls_tree import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) args = p.parse_args(["ls-tree", "-j"]) assert args.json_out is True