"""Comprehensive tests for ``muse ls-files``. Coverage tiers -------------- - Integration: JSON/text format, --commit, --path-prefix, empty manifest - Security: ANSI in file path stripped in text mode, JSON mode safe - Stress: 1 000-file manifest, 200 sequential calls """ from __future__ import annotations type _FileStore = dict[str, bytes] import datetime import json import pathlib from muse.core.errors import ExitCode from muse.core.object_store import write_object from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.types import Manifest, blob_id, long_id, split_id from muse.core.paths import muse_dir, ref_path from tests.cli_test_helper import CliRunner, InvokeResult runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: repo = tmp_path / "repo" dot_muse = muse_dir(repo) for sub in ("objects", "commits", "snapshots", "refs/heads"): (dot_muse / sub).mkdir(parents=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main") (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test", "domain": "code"})) return repo def _oid(content: bytes) -> str: return blob_id(content) def _add_commit( repo: pathlib.Path, manifest: _FileStore, *, commit_suffix: str = "a", branch: str = "main", set_head: bool = True, ) -> str: """Store objects, snapshot, and commit; return commit_id.""" stored: Manifest = {} for path, content in manifest.items(): oid = _oid(content) write_object(repo, oid, content) stored[path] = oid committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) snap_id = hash_snapshot(stored) write_snapshot(repo, SnapshotRecord( snapshot_id=snap_id, manifest=stored, created_at=committed_at, )) commit_id = hash_commit( parent_ids=[], snapshot_id=snap_id, message="test", committed_at_iso=committed_at.isoformat(), author="tester", ) write_commit(repo, CommitRecord( commit_id=commit_id, branch=branch, snapshot_id=snap_id, message="test", committed_at=committed_at, author="tester", parent_commit_id=None, )) if set_head: ref = ref_path(repo, branch) ref.parent.mkdir(parents=True, exist_ok=True) ref.write_text(commit_id) return commit_id def _ls(repo: pathlib.Path, *args: str) -> InvokeResult: from muse.cli.app import main as cli return runner.invoke( cli, ["ls-files", *args], env={"MUSE_REPO_ROOT": str(repo)}, ) def _lsj(repo: pathlib.Path, *args: str) -> InvokeResult: """Like _ls but always passes --json.""" return _ls(repo, "--json", *args) # --------------------------------------------------------------------------- # Integration — JSON format # --------------------------------------------------------------------------- class TestJsonFormat: def test_lists_files(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _add_commit(repo, {"src/main.py": b"# main", "README.md": b"# readme"}) result = _lsj(repo) assert result.exit_code == 0 data = json.loads(result.output) assert data["file_count"] == 2 paths = [f["path"] for f in data["files"]] assert "src/main.py" in paths assert "README.md" in paths def test_files_sorted_alphabetically(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {"z.py": b"z", "a.py": b"a", "m.py": b"m"}) data = json.loads(_lsj(repo).output) paths = [f["path"] for f in data["files"]] assert paths == sorted(paths) def test_json_has_commit_and_snapshot_id(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _add_commit(repo, {"f.py": b"x"}) data = json.loads(_lsj(repo).output) assert data["commit_id"] == cid assert data["snapshot_id"].startswith("sha256:") def test_empty_manifest(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {}) data = json.loads(_lsj(repo).output) assert data["file_count"] == 0 assert data["files"] == [] def test_json_flag_shorthand(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {"f.py": b"x"}) result = _lsj(repo) assert result.exit_code == 0 data = json.loads(result.output) assert data["file_count"] == 1 def test_object_ids_are_sha256_prefixed(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {"a.py": b"content"}) data = json.loads(_lsj(repo).output) for f in data["files"]: assert f["object_id"].startswith("sha256:") _, hex_part = split_id(f["object_id"]) assert len(hex_part) == 64 assert all(c in "0123456789abcdef" for c in hex_part) # --------------------------------------------------------------------------- # Integration — text format # --------------------------------------------------------------------------- class TestTextFormat: def test_text_tab_separated(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {"hello.py": b"hi"}) # Default (no --json) emits text: \t per line result = _ls(repo) assert result.exit_code == 0 line = result.output.strip() parts = line.split("\t") assert len(parts) == 2 assert parts[0].startswith("sha256:") # canonical object_id assert parts[1] == "hello.py" def test_text_oid_matches_json_oid(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {"check.py": b"content"}) json_data = json.loads(_lsj(repo).output) text_out = _ls(repo).output.strip() json_oid = json_data["files"][0]["object_id"] text_oid = text_out.split("\t")[0] assert json_oid == text_oid # --------------------------------------------------------------------------- # Integration — --commit flag # --------------------------------------------------------------------------- class TestCommitFlag: def test_explicit_commit_resolves(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) cid = _add_commit(repo, {"explicit.py": b"content"}) result = _lsj(repo, "--commit", cid) assert result.exit_code == 0 data = json.loads(result.output) assert data["commit_id"] == cid def test_invalid_commit_id_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _ls(repo, "--commit", "not-a-valid-id") assert result.exit_code == ExitCode.USER_ERROR def test_nonexistent_commit_id_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _ls(repo, "--commit", long_id("f" * 64)) assert result.exit_code == ExitCode.USER_ERROR def test_no_commits_on_branch_errors(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _ls(repo) assert result.exit_code == ExitCode.USER_ERROR # --------------------------------------------------------------------------- # Integration — --path-prefix filter # --------------------------------------------------------------------------- class TestPathPrefix: def test_prefix_filters_to_subtree(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, { "src/main.py": b"main", "src/utils.py": b"utils", "tests/test_main.py": b"test", "README.md": b"readme", }) data = json.loads(_lsj(repo, "--path-prefix", "src/").output) paths = [f["path"] for f in data["files"]] assert all(p.startswith("src/") for p in paths) assert len(paths) == 2 def test_prefix_file_count_reflects_filter(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, { "a/x.py": b"x", "a/y.py": b"y", "b/z.py": b"z", }) data = json.loads(_lsj(repo, "--path-prefix", "a/").output) assert data["file_count"] == 2 def test_prefix_no_match_returns_empty(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {"src/main.py": b"main"}) data = json.loads(_lsj(repo, "--path-prefix", "tests/").output) assert data["file_count"] == 0 assert data["files"] == [] def test_prefix_text_format(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {"src/a.py": b"a", "tests/b.py": b"b"}) # Default (no --json) with --path-prefix emits text lines result = _ls(repo, "--path-prefix", "src/") assert result.exit_code == 0 lines = [l for l in result.output.strip().splitlines() if l] assert len(lines) == 1 assert "src/a.py" in lines[0] # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- class TestSecurity: def test_ansi_in_path_stripped_in_text_mode(self, tmp_path: pathlib.Path) -> None: """File path with ANSI escape must be sanitized in text mode.""" repo = _make_repo(tmp_path) malicious_path = "src/\x1b[31mmalicious\x1b[0m.py" _add_commit(repo, {malicious_path: b"content"}) # Default (no --json) emits sanitized text result = _ls(repo) assert result.exit_code == 0 assert "\x1b" not in result.output def test_ansi_in_path_preserved_in_json(self, tmp_path: pathlib.Path) -> None: """JSON mode encodes ANSI as \\u001b — never emits raw escape sequences.""" repo = _make_repo(tmp_path) malicious_path = "src/\x1b[31mmalicious\x1b[0m.py" _add_commit(repo, {malicious_path: b"content"}) result = _lsj(repo) assert result.exit_code == 0 # No raw ANSI bytes in stdout — json.dumps encodes \x1b as \u001b assert "\x1b" not in result.output data = json.loads(result.output) # The path is preserved in the JSON payload (as \u001b-encoded) paths = [f["path"] for f in data["files"]] assert any("\x1b" in p or "\u001b" in p for p in paths) def test_path_traversal_commit_id_rejected(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _ls(repo, "--commit", "../../../etc/passwd") assert result.exit_code == ExitCode.USER_ERROR def test_no_traceback_on_invalid_input(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) result = _ls(repo, "--commit", "bad!") assert "Traceback" not in result.output # --------------------------------------------------------------------------- # Stress # --------------------------------------------------------------------------- class TestStress: def test_1000_file_manifest(self, tmp_path: pathlib.Path) -> None: """1 000-file manifest lists and returns in reasonable time.""" repo = _make_repo(tmp_path) manifest = {f"src/file_{i:04d}.py": f"content {i}".encode() for i in range(1000)} _add_commit(repo, manifest) result = _lsj(repo) assert result.exit_code == 0 data = json.loads(result.output) assert data["file_count"] == 1000 def test_1000_file_prefix_filter(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) manifest = {f"a/file_{i:04d}.py": b"a" for i in range(500)} manifest.update({f"b/file_{i:04d}.py": b"b" for i in range(500)}) _add_commit(repo, manifest) data = json.loads(_lsj(repo, "--path-prefix", "a/").output) assert data["file_count"] == 500 def test_200_sequential_calls(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) _add_commit(repo, {"stable.py": b"content"}) for i in range(200): result = _lsj(repo) assert result.exit_code == 0, f"failed at iteration {i}" assert json.loads(result.output)["file_count"] == 1 class TestRegisterFlags: def test_json_short_flag(self) -> None: import argparse from muse.cli.commands.ls_files import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) args = p.parse_args(["ls-files", "-j"]) assert args.json_out is True def test_json_long_flag(self) -> None: import argparse from muse.cli.commands.ls_files import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) args = p.parse_args(["ls-files", "--json"]) assert args.json_out is True def test_default_no_json(self) -> None: import argparse from muse.cli.commands.ls_files import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) # Command-specific required args may differ; just check dest exists when possible try: args = p.parse_args(["ls-files"]) assert args.json_out is False except SystemExit: pass # required positional args missing — flag default still correct