"""Supercharge tests for ``muse read-snapshot``. Coverage tiers -------------- - Unit: _short_id helper — prefix preservation, hex length - Integration: duration_ms + exit_code in JSON; text short-ID format - Flag interaction: --no-manifest + --path-prefix together - Data integrity: sha256: on snapshot_id; valid JSON; unicode paths - Path prefix edge cases: empty prefix, no trailing slash, exact filename - Performance: single read and 1000-file manifest under threshold """ from __future__ import annotations from collections.abc import Mapping import datetime import json import pathlib import re import time from muse.core.errors import ExitCode from muse.core.ids import hash_snapshot as compute_snapshot_id from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from tests.cli_test_helper import CliRunner, InvokeResult from muse.core.types import NULL_LONG_ID, fake_id, long_id, split_id from muse.core.paths import muse_dir runner = CliRunner() _CREATED_AT = datetime.datetime(2026, 3, 18, 12, 0, tzinfo=datetime.timezone.utc) _SHA256_FULL = re.compile(r"^sha256:[0-9a-f]{64}$") _SHA256_SHORT_19 = re.compile(r"^sha256:[0-9a-f]{12}$") # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: repo = tmp_path / "repo" dot_muse = muse_dir(repo) for sub in ("objects", "commits", "snapshots", "refs/heads"): (dot_muse / sub).mkdir(parents=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main") (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo", "domain": "code"})) return repo def _snap(repo: pathlib.Path, manifest: Mapping[str, object] | None = None) -> str: m = manifest or {} sid = compute_snapshot_id(m) write_snapshot(repo, SnapshotRecord(snapshot_id=sid, manifest=m, created_at=_CREATED_AT)) return sid def _rs(repo: pathlib.Path, *args: str) -> InvokeResult: from muse.cli.app import main as cli return runner.invoke(cli, ["read-snapshot", *args], env={"MUSE_REPO_ROOT": str(repo)}) def _rsj(repo: pathlib.Path, *args: str) -> InvokeResult: """Like _rs but always passes --json for JSON-output tests.""" return _rs(repo, "--json", *args) def _oid(n: int) -> str: """Canonical sha256:-prefixed object ID for test manifests.""" return long_id(format(n, "064x")) # --------------------------------------------------------------------------- # Unit — _short_id # --------------------------------------------------------------------------- class TestSnapshotIdFormat: """Snapshot IDs are full sha256:<64-hex> (71 chars).""" def test_snapshot_id_keeps_sha256_prefix(self) -> None: sid = long_id("a" * 64) assert sid.startswith("sha256:") def test_snapshot_id_total_length_is_71(self) -> None: sid = NULL_LONG_ID assert len(sid) == 71 def test_snapshot_id_matches_full_regex(self) -> None: sid = long_id("abcdef01" * 8) assert _SHA256_FULL.match(sid) # --------------------------------------------------------------------------- # Integration — duration_ms and exit_code # --------------------------------------------------------------------------- class TestDurationAndExitCode: def test_duration_ms_present_on_success(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) data = json.loads(_rsj(repo, sid).output) assert "duration_ms" in data, "duration_ms must be present in JSON output" def test_exit_code_zero_on_success(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) data = json.loads(_rsj(repo, sid).output) assert data["exit_code"] == 0 def test_duration_ms_is_float(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) data = json.loads(_rsj(repo, sid).output) assert isinstance(data["duration_ms"], float) def test_duration_ms_non_negative(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) data = json.loads(_rsj(repo, sid).output) assert data["duration_ms"] >= 0.0 def test_duration_ms_3dp_precision(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) ms = json.loads(_rsj(repo, sid).output)["duration_ms"] assert round(ms, 3) == ms def test_duration_ms_present_with_no_manifest(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, {"a.py": _oid(1)}) data = json.loads(_rsj(repo, "--no-manifest", sid).output) assert "duration_ms" in data assert "exit_code" in data def test_duration_ms_present_with_path_prefix(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, {"src/a.py": _oid(1), "tests/b.py": _oid(2)}) data = json.loads(_rsj(repo, "--path-prefix", "src/", sid).output) assert "duration_ms" in data assert data["exit_code"] == 0 # --------------------------------------------------------------------------- # Integration — text format short ID # --------------------------------------------------------------------------- class TestTextFormatFullId: """Text format must emit the full sha256:<64-hex> (71 chars) snapshot ID.""" def _full_token(self, line: str) -> str | None: for tok in line.split(): if _SHA256_FULL.match(tok): return tok return None def test_text_full_id_has_sha256_prefix(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, {"f.py": _oid(1)}) result = _rs(repo, sid) assert result.exit_code == 0 tok = self._full_token(result.output.strip()) assert tok is not None, f"no sha256:<64-hex> token in: {result.output!r}" assert tok.startswith("sha256:") def test_text_full_id_has_64_hex_chars(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) result = _rs(repo, sid) tok = self._full_token(result.output.strip()) assert tok is not None assert tok.startswith("sha256:") assert len(tok[len("sha256:"):]) == 64 def test_text_full_id_total_length_is_71(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) result = _rs(repo, sid) tok = self._full_token(result.output.strip()) assert tok is not None assert len(tok) == 71 def test_text_full_id_matches_snapshot_id(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, {"x.py": _oid(9)}) result = _rs(repo, sid) tok = self._full_token(result.output.strip()) assert tok is not None assert tok == sid, f"text output ID {tok!r} does not match snapshot_id {sid!r}" # --------------------------------------------------------------------------- # Flag interaction — --no-manifest + --path-prefix together # --------------------------------------------------------------------------- class TestFlagInteraction: """--no-manifest and --path-prefix may be combined. Use case: "how many files are under src/ without downloading any OIDs?" The file_count reflects the filtered count; manifest is omitted. """ def test_no_manifest_plus_path_prefix_succeeds(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, { "src/a.py": _oid(1), "src/b.py": _oid(2), "tests/c.py": _oid(3), }) result = _rsj(repo, "--no-manifest", "--path-prefix", "src/", sid) assert result.exit_code == 0, result.output def test_no_manifest_plus_path_prefix_file_count_is_filtered(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, { "src/a.py": _oid(1), "src/b.py": _oid(2), "tests/c.py": _oid(3), }) data = json.loads(_rsj(repo, "--no-manifest", "--path-prefix", "src/", sid).output) assert data["file_count"] == 2, "file_count must reflect the prefix-filtered count" def test_no_manifest_plus_path_prefix_manifest_absent(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, {"src/a.py": _oid(1)}) data = json.loads(_rsj(repo, "--no-manifest", "--path-prefix", "src/", sid).output) assert "manifest" not in data def test_no_manifest_plus_path_prefix_has_duration_ms(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, {"src/a.py": _oid(1)}) data = json.loads(_rsj(repo, "--no-manifest", "--path-prefix", "src/", sid).output) assert "duration_ms" in data assert data["exit_code"] == 0 # --------------------------------------------------------------------------- # Data integrity # --------------------------------------------------------------------------- class TestDataIntegrity: def test_snapshot_id_has_sha256_prefix(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) data = json.loads(_rsj(repo, sid).output) assert _SHA256_FULL.match(data["snapshot_id"]), \ f"snapshot_id must be sha256:<64hex>, got {data['snapshot_id']!r}" def test_json_output_is_valid_json(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, {"a.py": _oid(1)}) result = _rsj(repo, sid) assert result.exit_code == 0 data = json.loads(result.output) assert isinstance(data, dict) def test_manifest_values_are_strings(self, tmp_path: pathlib.Path) -> None: """Manifest object IDs are strings — no type coercion.""" repo = _make_repo(tmp_path) sid = _snap(repo, {"a.py": _oid(1), "b.py": _oid(2)}) data = json.loads(_rsj(repo, sid).output) for path, oid in data["manifest"].items(): assert isinstance(oid, str), f"manifest[{path!r}] must be a string, got {type(oid)}" def test_unicode_paths_in_manifest(self, tmp_path: pathlib.Path) -> None: """Unicode file paths round-trip through JSON without corruption.""" repo = _make_repo(tmp_path) paths = { "src/音楽.py": _oid(1), "tracks/café/main.mid": _oid(2), "docs/naïve_approach.md": _oid(3), } sid = _snap(repo, paths) data = json.loads(_rsj(repo, sid).output) assert data["file_count"] == 3 for p in paths: assert p in data["manifest"], f"unicode path {p!r} missing from manifest" def test_created_at_iso8601_with_timezone(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo) data = json.loads(_rsj(repo, sid).output) dt = datetime.datetime.fromisoformat(data["created_at"]) assert dt.tzinfo is not None, "created_at must include timezone" def test_file_count_matches_manifest_length(self, tmp_path: pathlib.Path) -> None: """file_count must equal len(manifest) in the response.""" repo = _make_repo(tmp_path) n = 17 sid = _snap(repo, {f"f{i}.py": _oid(i) for i in range(n)}) data = json.loads(_rsj(repo, sid).output) assert data["file_count"] == n assert len(data["manifest"]) == n # --------------------------------------------------------------------------- # Path prefix edge cases # --------------------------------------------------------------------------- class TestPathPrefixEdgeCases: def test_empty_prefix_matches_all(self, tmp_path: pathlib.Path) -> None: """Empty --path-prefix matches every path (prefix of every string).""" repo = _make_repo(tmp_path) sid = _snap(repo, {"src/a.py": _oid(1), "tests/b.py": _oid(2)}) data = json.loads(_rsj(repo, "--path-prefix", "", sid).output) assert data["file_count"] == 2 def test_prefix_without_trailing_slash(self, tmp_path: pathlib.Path) -> None: """Prefix 'src' (no slash) matches 'src/a.py' and also 'src_util.py'.""" repo = _make_repo(tmp_path) sid = _snap(repo, { "src/a.py": _oid(1), "src_util.py": _oid(2), "tests/b.py": _oid(3), }) data = json.loads(_rsj(repo, "--path-prefix", "src", sid).output) assert "src/a.py" in data["manifest"] assert "src_util.py" in data["manifest"] assert "tests/b.py" not in data["manifest"] def test_prefix_exact_filename_match(self, tmp_path: pathlib.Path) -> None: """A prefix equal to an exact filename matches only that file.""" repo = _make_repo(tmp_path) sid = _snap(repo, {"README.md": _oid(1), "README.md.bak": _oid(2)}) data = json.loads(_rsj(repo, "--path-prefix", "README.md", sid).output) assert "README.md" in data["manifest"] assert "README.md.bak" in data["manifest"] # startswith matches both def test_prefix_no_match_empty_manifest_with_duration(self, tmp_path: pathlib.Path) -> None: """No-match prefix returns empty manifest with duration_ms.""" repo = _make_repo(tmp_path) sid = _snap(repo, {"src/a.py": _oid(1)}) data = json.loads(_rsj(repo, "--path-prefix", "nonexistent/", sid).output) assert data["file_count"] == 0 assert data["manifest"] == {} assert "duration_ms" in data # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- class TestSecuritySupercharge: def test_path_prefix_with_traversal_attempt(self, tmp_path: pathlib.Path) -> None: """Path prefix with '../' traversal must not escape manifest keys.""" repo = _make_repo(tmp_path) sid = _snap(repo, {"src/a.py": _oid(1), "../etc/passwd": _oid(2)}) # The manifest key itself is literally '../etc/passwd' — filter should match it # only if the prefix is '../', not silently escape the repo root data = json.loads(_rsj(repo, "--path-prefix", "src/", sid).output) # Only src/a.py should match src/ prefix assert "src/a.py" in data["manifest"] assert "../etc/passwd" not in data["manifest"] def test_no_traceback_on_sha256_prefixed_missing_id(self, tmp_path: pathlib.Path) -> None: """Valid sha256: format but non-existent ID — no traceback, clean error.""" repo = _make_repo(tmp_path) missing = long_id("dead" * 16) result = _rs(repo, missing) assert result.exit_code == ExitCode.USER_ERROR assert "Traceback" not in result.output # --------------------------------------------------------------------------- # Performance # --------------------------------------------------------------------------- class TestPerformanceSupercharge: def test_single_read_under_500ms(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) sid = _snap(repo, {"a.py": _oid(0)}) t0 = time.monotonic() result = _rs(repo, sid) duration_ms = (time.monotonic() - t0) * 1000 assert result.exit_code == 0 assert duration_ms < 500 def test_1000_file_manifest_under_1000ms(self, tmp_path: pathlib.Path) -> None: repo = _make_repo(tmp_path) manifest = {f"src/module{i:04d}.py": _oid(i) for i in range(1000)} sid = _snap(repo, manifest) t0 = time.monotonic() result = _rs(repo, sid) duration_ms = (time.monotonic() - t0) * 1000 assert result.exit_code == 0 assert duration_ms < 1000 def test_duration_ms_plausible(self, tmp_path: pathlib.Path) -> None: """duration_ms from the output itself must be < 500ms for a warm read.""" repo = _make_repo(tmp_path) sid = _snap(repo, {"a.py": _oid(0)}) data = json.loads(_rsj(repo, sid).output) assert data["duration_ms"] < 500 class TestRegisterFlags: def _parse(self, *args: str) -> "argparse.Namespace": import argparse from muse.cli.commands.read_snapshot import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) return p.parse_args(["read-snapshot", fake_id("a"), *args]) def test_json_short_flag(self) -> None: args = self._parse("-j") assert args.json_out is True def test_json_long_flag(self) -> None: args = self._parse("--json") assert args.json_out is True def test_default_no_json(self) -> None: args = self._parse() assert args.json_out is False