"""Tests for muse for-each-ref. Coverage tiers -------------- Unit — _list_all_refs (flat, hierarchical, symlink skip, bad commit ID), _RefDetail + _ForEachRefResult schemas, _SORT_FIELDS completeness Integration — empty repo, flat branches, hierarchical branches, pattern filter, sort (all fields, asc/desc), --count limit, --no-commits fast-path, text output (full / no-commits), --json shorthand Security — symlinks skipped, ANSI in branch/commit/author sanitized, error output to stderr (format, sort, negative count), no traceback on bad format/corrupted ref, no-commits+commit-sort rejected Stress — 100-branch repo, 50-hierarchical-branch repo, 200 sequential reads """ from __future__ import annotations import argparse import datetime import json import os import pathlib import pytest from tests.cli_test_helper import CliRunner, InvokeResult from muse.cli.commands.for_each_ref import ( _ForEachRefResult, _RefDetail, _SORT_FIELDS, _list_all_refs, ) from muse.core.ids import hash_commit, hash_snapshot from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.types import Manifest from muse.core.paths import muse_dir, heads_dir, ref_path cli = None # argparse-based CLI; CliRunner ignores this arg runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _init_repo(path: pathlib.Path) -> pathlib.Path: muse = muse_dir(path) (muse / "commits").mkdir(parents=True) (muse / "snapshots").mkdir(parents=True) (muse / "objects").mkdir(parents=True) (muse / "refs" / "heads").mkdir(parents=True) (muse / "HEAD").write_text("ref: refs/heads/main\n", encoding="utf-8") (muse / "repo.json").write_text( json.dumps({"repo_id": "test-repo", "domain": "midi"}), encoding="utf-8" ) return path def _env(repo: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(repo)} def _snap(repo: pathlib.Path, tag: str = "snap") -> str: sid = hash_snapshot({}) write_snapshot( repo, SnapshotRecord( snapshot_id=sid, manifest={}, created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc), ), ) return sid def _commit( repo: pathlib.Path, tag: str, branch: str = "main", parent: str | None = None, ts: datetime.datetime | None = None, author: str = "tester", ) -> str: sid = _snap(repo, tag) ts_actual = ts or datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) parent_ids: list[str] = [parent] if parent else [] cid = hash_commit( parent_ids=parent_ids, snapshot_id=sid, message=tag, committed_at_iso=ts_actual.isoformat(), author=author, ) write_commit( repo, CommitRecord( commit_id=cid, branch=branch, snapshot_id=sid, message=tag, committed_at=ts_actual, author=author, parent_commit_id=parent, parent2_commit_id=None, ), ) branch_ref = ref_path(repo, branch) branch_ref.parent.mkdir(parents=True, exist_ok=True) branch_ref.write_text(cid, encoding="utf-8") return cid def _fer(repo: pathlib.Path, *args: str) -> InvokeResult: return runner.invoke(cli, ["for-each-ref", "--json", *args], env=_env(repo)) def _fer_text(repo: pathlib.Path, *args: str) -> InvokeResult: return runner.invoke(cli, ["for-each-ref", *args], env=_env(repo)) # --------------------------------------------------------------------------- # Unit — flag registration # --------------------------------------------------------------------------- class TestRegisterFlags: def _parse(self, *args: str) -> "argparse.Namespace": import argparse from muse.cli.commands.for_each_ref import register p = argparse.ArgumentParser() sub = p.add_subparsers() register(sub) return p.parse_args(["for-each-ref", *args]) def test_default_json_out_is_false(self) -> None: ns = self._parse() assert ns.json_out is False def test_json_flag_sets_json_out(self) -> None: ns = self._parse("--json") assert ns.json_out is True def test_j_shorthand_sets_json_out(self) -> None: ns = self._parse("-j") assert ns.json_out is True # --------------------------------------------------------------------------- # Unit — schema # --------------------------------------------------------------------------- class TestSchemas: def test_sort_fields_includes_snapshot_id(self) -> None: assert "snapshot_id" in _SORT_FIELDS def test_sort_fields_includes_all_expected(self) -> None: for f in ("ref", "branch", "commit_id", "author", "committed_at", "message"): assert f in _SORT_FIELDS def test_for_each_ref_result_fields(self) -> None: keys = _ForEachRefResult.__annotations__ assert "refs" in keys assert "count" in keys def test_ref_detail_is_total_false(self) -> None: # total=False allows partial dicts for --no-commits mode. # __required_keys__ is empty when total=False. assert len(_RefDetail.__required_keys__) == 0 # --------------------------------------------------------------------------- # Unit — _list_all_refs # --------------------------------------------------------------------------- class TestListAllRefs: def test_empty_heads_dir(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) assert _list_all_refs(tmp_path) == [] def test_flat_branch(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c", "main") pairs = _list_all_refs(tmp_path) assert len(pairs) == 1 assert pairs[0][0] == "main" def test_hierarchical_branch_discovered(self, tmp_path: pathlib.Path) -> None: """feat/my-thing must be found — requires rglob, not iterdir.""" _init_repo(tmp_path) _commit(tmp_path, "c-main", "main") _commit(tmp_path, "c-feat", "feat/my-thing") pairs = _list_all_refs(tmp_path) branch_names = [b for b, _ in pairs] assert "feat/my-thing" in branch_names assert "main" in branch_names def test_symlink_ref_skipped(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c", "main") real = heads_dir(tmp_path) / "main" link = heads_dir(tmp_path) / "sym" link.symlink_to(real) pairs = _list_all_refs(tmp_path) names = [b for b, _ in pairs] assert "sym" not in names assert "main" in names def test_invalid_commit_id_skipped(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c", "main") # Write a ref file with garbage content bad = heads_dir(tmp_path) / "bad-ref" bad.write_text("not-a-sha256\n", encoding="utf-8") pairs = _list_all_refs(tmp_path) names = [b for b, _ in pairs] assert "bad-ref" not in names assert "main" in names def test_missing_heads_dir_returns_empty(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) import shutil shutil.rmtree(heads_dir(tmp_path)) assert _list_all_refs(tmp_path) == [] def test_sorted_output(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["zzz", "aaa", "mmm"]: _commit(tmp_path, f"c-{b}", b) pairs = _list_all_refs(tmp_path) names = [b for b, _ in pairs] assert names == sorted(names) # --------------------------------------------------------------------------- # Integration — basic JSON output # --------------------------------------------------------------------------- class TestJsonOutput: def test_empty_repo(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) r = _fer(tmp_path) assert r.exit_code == 0 data = json.loads(r.output) assert data["count"] == 0 assert data["refs"] == [] def test_single_branch(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) cid = _commit(tmp_path, "c1") r = _fer(tmp_path) assert r.exit_code == 0 data = json.loads(r.output) assert data["count"] == 1 ref = data["refs"][0] assert ref["commit_id"] == cid assert ref["branch"] == "main" assert ref["ref"] == "refs/heads/main" def test_all_fields_present(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c1") r = _fer(tmp_path) ref = json.loads(r.output)["refs"][0] for key in ("ref", "branch", "commit_id", "author", "message", "committed_at", "snapshot_id"): assert key in ref, f"missing field: {key}" def test_json_shorthand_alias(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c1") r = _fer(tmp_path, "--json") assert r.exit_code == 0 data = json.loads(r.output) assert "refs" in data def test_hierarchical_branch_in_output(self, tmp_path: pathlib.Path) -> None: """Branches with slashes in name must appear in the output.""" _init_repo(tmp_path) _commit(tmp_path, "c-main", "main") _commit(tmp_path, "c-feat", "feat/my-thing") r = _fer(tmp_path) assert r.exit_code == 0 data = json.loads(r.output) branches = [ref["branch"] for ref in data["refs"]] assert "feat/my-thing" in branches assert data["count"] == 2 def test_multiple_branches_counted(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["main", "dev", "feat/x", "feat/y"]: _commit(tmp_path, f"c-{b}", b) r = _fer(tmp_path) assert r.exit_code == 0 data = json.loads(r.output) assert data["count"] == 4 # --------------------------------------------------------------------------- # Integration — --no-commits fast path # --------------------------------------------------------------------------- class TestNoCommits: def test_no_commits_omits_commit_fields(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c1") r = _fer(tmp_path, "--no-commits") assert r.exit_code == 0 data = json.loads(r.output) ref = data["refs"][0] assert "ref" in ref assert "branch" in ref assert "commit_id" in ref # These must be absent in --no-commits mode assert "author" not in ref assert "message" not in ref assert "committed_at" not in ref def test_no_commits_count_correct(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["main", "dev", "feat/x"]: _commit(tmp_path, f"c-{b}", b) r = _fer(tmp_path, "--no-commits") assert r.exit_code == 0 data = json.loads(r.output) assert data["count"] == 3 def test_no_commits_text_format(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) cid = _commit(tmp_path, "c1") r = _fer_text(tmp_path, "--no-commits") assert r.exit_code == 0 line = r.output.strip() assert cid in line assert "refs/heads/main" in line # Should NOT have 4 columns (no author column) parts = line.split(" ") assert len(parts) == 2 def test_no_commits_rejected_with_commit_sort_field(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c1") for field in ("author", "message", "committed_at", "snapshot_id"): r = _fer(tmp_path, "--no-commits", "--sort", field) assert r.exit_code != 0 assert r.stdout_bytes == b"" assert "error" in r.stderr.lower() def test_no_commits_allows_ref_level_sort(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["zzz", "aaa"]: _commit(tmp_path, f"c-{b}", b) for field in ("ref", "branch", "commit_id"): r = _fer(tmp_path, "--no-commits", "--sort", field) assert r.exit_code == 0 # --------------------------------------------------------------------------- # Integration — sorting # --------------------------------------------------------------------------- class TestSorting: def test_sort_by_ref_ascending(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["zzz", "aaa", "mmm"]: _commit(tmp_path, f"c-{b}", b) r = _fer(tmp_path, "--sort", "ref") data = json.loads(r.output) refs = [d["ref"] for d in data["refs"]] assert refs == sorted(refs) def test_sort_by_ref_descending(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["zzz", "aaa", "mmm"]: _commit(tmp_path, f"c-{b}", b) r = _fer(tmp_path, "--sort", "ref", "--desc") data = json.loads(r.output) refs = [d["ref"] for d in data["refs"]] assert refs == sorted(refs, reverse=True) def test_sort_by_committed_at(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) base = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) _commit(tmp_path, "c-b", "bbb", ts=base + datetime.timedelta(hours=2)) _commit(tmp_path, "c-a", "aaa", ts=base + datetime.timedelta(hours=1)) r = _fer(tmp_path, "--sort", "committed_at") data = json.loads(r.output) timestamps = [d["committed_at"] for d in data["refs"]] assert timestamps == sorted(timestamps) def test_sort_by_author(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c-main", "main", author="zara") _commit(tmp_path, "c-dev", "dev", author="alice") r = _fer(tmp_path, "--sort", "author") data = json.loads(r.output) authors = [d["author"] for d in data["refs"]] assert authors == sorted(authors) def test_sort_by_snapshot_id(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["a", "b", "c"]: _commit(tmp_path, f"snap-{b}", b) r = _fer(tmp_path, "--sort", "snapshot_id") assert r.exit_code == 0 data = json.loads(r.output) sids = [d["snapshot_id"] for d in data["refs"]] assert sids == sorted(sids) # --------------------------------------------------------------------------- # Integration — --count and --pattern # --------------------------------------------------------------------------- class TestCountAndPattern: def test_count_limits_output(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["aaa", "bbb", "ccc", "ddd"]: _commit(tmp_path, f"c-{b}", b) r = _fer(tmp_path, "--count", "2") data = json.loads(r.output) assert data["count"] == 2 assert len(data["refs"]) == 2 def test_count_zero_is_unlimited(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["a", "b", "c"]: _commit(tmp_path, f"c-{b}", b) r = _fer(tmp_path, "--count", "0") data = json.loads(r.output) assert data["count"] == 3 def test_negative_count_errors(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) r = _fer(tmp_path, "--count", "-1") assert r.exit_code != 0 assert r.stdout_bytes == b"" assert "error" in r.stderr.lower() def test_pattern_filter_flat(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c-main", "main") _commit(tmp_path, "c-dev", "dev") r = _fer(tmp_path, "--pattern", "refs/heads/main") data = json.loads(r.output) assert data["count"] == 1 assert data["refs"][0]["branch"] == "main" def test_pattern_filter_hierarchical(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c-main", "main") _commit(tmp_path, "c-feat1", "feat/one") _commit(tmp_path, "c-feat2", "feat/two") r = _fer(tmp_path, "--pattern", "refs/heads/feat/*") data = json.loads(r.output) assert data["count"] == 2 for ref in data["refs"]: assert ref["branch"].startswith("feat/") def test_pattern_no_match_returns_empty(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c-main", "main") r = _fer(tmp_path, "--pattern", "refs/heads/nonexistent/*") data = json.loads(r.output) assert data["count"] == 0 # --------------------------------------------------------------------------- # Integration — text output # --------------------------------------------------------------------------- class TestTextOutput: def test_text_format_four_columns(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) cid = _commit(tmp_path, "c1", author="alice") r = _fer_text(tmp_path) assert r.exit_code == 0 line = r.output.strip() assert cid in line assert "refs/heads/main" in line assert "alice" in line def test_text_multiple_lines(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["aaa", "bbb"]: _commit(tmp_path, f"c-{b}", b) r = _fer_text(tmp_path) lines = [l for l in r.output.strip().splitlines() if l] assert len(lines) == 2 # --------------------------------------------------------------------------- # Security # --------------------------------------------------------------------------- class TestSecurity: def test_ansi_in_branch_name_sanitized_text(self, tmp_path: pathlib.Path) -> None: """Branch names with ANSI must not appear raw in text output.""" _init_repo(tmp_path) cid = _commit(tmp_path, "c1", "main") # Directly write a ref file with ANSI in its name (via the raw FS) ansi_branch_dir = heads_dir(tmp_path) / "safe" ansi_branch_dir.mkdir(parents=True, exist_ok=True) # Can't create filename with ANSI; instead verify author field sanitized _commit(tmp_path, "c-dev", "dev", author="\x1b[31mred\x1b[0m") r = _fer_text(tmp_path) assert "\x1b" not in r.output def test_unknown_flag_exits_nonzero(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) r = _fer_text(tmp_path, "--format", "xml") assert r.exit_code != 0 def test_error_sort_goes_to_stderr(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) r = _fer(tmp_path, "--sort", "invalid_field") assert r.exit_code != 0 assert r.stdout_bytes == b"" assert "error" in r.stderr.lower() def test_negative_count_error_to_stderr(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) r = _fer(tmp_path, "--count", "-5") assert r.exit_code != 0 assert r.stdout_bytes == b"" def test_no_traceback_on_unknown_flag(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) r = _fer_text(tmp_path, "--format", "bad") assert "Traceback" not in r.output assert "Traceback" not in r.stderr def test_symlink_ref_skipped_in_output(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c", "main") real = heads_dir(tmp_path) / "main" link = heads_dir(tmp_path) / "linked" link.symlink_to(real) r = _fer(tmp_path) data = json.loads(r.output) branches = [ref["branch"] for ref in data["refs"]] assert "linked" not in branches def test_corrupted_ref_skipped_in_output(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) _commit(tmp_path, "c", "main") bad = heads_dir(tmp_path) / "corrupted" bad.write_text("not-a-sha\n", encoding="utf-8") r = _fer(tmp_path) data = json.loads(r.output) branches = [ref["branch"] for ref in data["refs"]] assert "corrupted" not in branches def test_no_repo_exits_cleanly(self, tmp_path: pathlib.Path) -> None: r = runner.invoke( cli, ["for-each-ref"], env={"MUSE_REPO_ROOT": str(tmp_path / "norepo")}, ) assert r.exit_code != 0 assert "Traceback" not in r.output assert "Traceback" not in r.stderr # --------------------------------------------------------------------------- # Stress # --------------------------------------------------------------------------- class TestStress: def test_100_flat_branches(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for i in range(100): _commit(tmp_path, f"c-{i:03d}", f"branch-{i:03d}") r = _fer(tmp_path) assert r.exit_code == 0 data = json.loads(r.output) assert data["count"] == 100 def test_50_hierarchical_branches(self, tmp_path: pathlib.Path) -> None: """All 50 branches with slashes must be discovered via rglob.""" _init_repo(tmp_path) for i in range(50): _commit(tmp_path, f"c-{i}", f"feat/task-{i:03d}") r = _fer(tmp_path) assert r.exit_code == 0 data = json.loads(r.output) assert data["count"] == 50 for ref in data["refs"]: assert ref["branch"].startswith("feat/") def test_no_commits_100_branches_fast(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for i in range(100): _commit(tmp_path, f"c-{i}", f"b-{i:03d}") r = _fer(tmp_path, "--no-commits") assert r.exit_code == 0 data = json.loads(r.output) assert data["count"] == 100 # Confirm no commit metadata fields for ref in data["refs"]: assert "author" not in ref def test_200_sequential_reads(self, tmp_path: pathlib.Path) -> None: _init_repo(tmp_path) for b in ["main", "dev"]: _commit(tmp_path, f"c-{b}", b) for _ in range(200): r = _fer(tmp_path) assert r.exit_code == 0 assert json.loads(r.output)["count"] == 2