"""GC JSON schema: agent-ready output fields. Tests for ``muse gc --json``: status "ok" | "error" error empty string on success; message on bad args warnings list of warning strings (symlink skips, etc.) mode "conservative" (default) | "tight" (--full) collected_commit_ids list[str] — pruned commit IDs (--full only) collected_snapshot_ids list[str] — pruned snapshot IDs (--full only) duration_ms float — milliseconds exit_code int — 0 on success, 1 on error Also covers: - structured JSON error for --grace-period < 0 in --json mode Test categories --------------- TestGcJsonSchema — every field present and typed correctly TestGcJsonDurationMs — duration_ms is present and non-negative TestGcJsonMode — mode field reflects --full flag TestGcJsonCollectedIds — collected_commit_ids / collected_snapshot_ids TestGcJsonBadArgs — structured error when --grace-period < 0 TestGcJsonWarnings — warnings list populated on symlink skip TestGcJsonExitCode — exit_code field in JSON output """ from __future__ import annotations from collections.abc import Mapping import datetime import json import pathlib import pytest from tests.cli_test_helper import CliRunner from muse.core.types import blob_id, fake_id from muse.core.object_store import object_path from muse.core.paths import heads_dir, muse_dir, objects_dir, snapshots_dir runner = CliRunner() cli = None # argparse migration — CliRunner ignores this arg # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _init_repo(tmp_path: pathlib.Path) -> tuple[pathlib.Path, str]: dot_muse = muse_dir(tmp_path) dot_muse.mkdir() repo_id = fake_id("repo") (dot_muse / "repo.json").write_text(json.dumps({ "repo_id": repo_id, "domain": "code", "default_branch": "main", "created_at": "2025-01-01T00:00:00+00:00", }), encoding="utf-8") (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (dot_muse / "refs" / "heads").mkdir(parents=True) (dot_muse / "snapshots").mkdir() (dot_muse / "commits").mkdir() (dot_muse / "objects").mkdir() return tmp_path, repo_id def _write_object(root: pathlib.Path, content: bytes) -> str: oid = blob_id(content) p = object_path(root, oid) p.parent.mkdir(parents=True, exist_ok=True) p.write_bytes(content) return oid def _make_commit(root: pathlib.Path, repo_id: str, message: str = "init") -> str: from muse.core.store import CommitRecord, SnapshotRecord, write_commit, write_snapshot from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id ref_file = heads_dir(root) / "main" parent_id = ref_file.read_text().strip() if ref_file.exists() else None manifest = {} snap_id = compute_snapshot_id(manifest) committed_at = datetime.datetime.now(datetime.timezone.utc) commit_id = compute_commit_id( parent_ids=[parent_id] if parent_id else [], snapshot_id=snap_id, message=message, committed_at_iso=committed_at.isoformat(), ) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest)) write_commit(root, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message=message, committed_at=committed_at, parent_commit_id=parent_id, )) ref_file.parent.mkdir(parents=True, exist_ok=True) ref_file.write_text(commit_id, encoding="utf-8") return commit_id def _gc_json(root: pathlib.Path, extra_args: list[str] | None = None) -> Mapping[str, object]: """Run ``muse gc --json --grace-period 0`` and parse output.""" args = ["gc", "--json", "--grace-period", "0"] + (extra_args or []) result = runner.invoke(cli, args, env={"MUSE_REPO_ROOT": str(root)}) return json.loads(result.output) # --------------------------------------------------------------------------- # TestGcJsonSchema # --------------------------------------------------------------------------- class TestGcJsonSchema: """Every new agent-ready field must be present and correctly typed.""" def test_status_field_present(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert "status" in data, "JSON output must include 'status' field" def test_status_ok_on_success(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert data["status"] == "ok" def test_error_field_present(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert "error" in data, "JSON output must include 'error' field" def test_error_empty_on_success(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert data["error"] == "" def test_warnings_field_present(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert "warnings" in data, "JSON output must include 'warnings' field" def test_warnings_is_list(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert isinstance(data["warnings"], list) def test_warnings_empty_on_clean_run(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert data["warnings"] == [] def test_mode_field_present(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert "mode" in data, "JSON output must include 'mode' field" def test_exit_code_field_present(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert "exit_code" in data, "JSON output must include 'exit_code' field" def test_collected_commit_ids_present(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root, ["--full"]) assert "collected_commit_ids" in data, "JSON must include collected_commit_ids" def test_collected_snapshot_ids_present(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root, ["--full"]) assert "collected_snapshot_ids" in data, "JSON must include collected_snapshot_ids" def test_collected_commit_ids_is_list(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root, ["--full"]) assert isinstance(data["collected_commit_ids"], list) def test_collected_snapshot_ids_is_list(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root, ["--full"]) assert isinstance(data["collected_snapshot_ids"], list) # --------------------------------------------------------------------------- # TestGcJsonDurationMs # --------------------------------------------------------------------------- class TestGcJsonDurationMs: """duration_ms field is present, numeric, and non-negative.""" def test_duration_ms_present(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert "duration_ms" in data, "JSON must include 'duration_ms' field" def test_duration_ms_is_float(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert isinstance(data["duration_ms"], (int, float)) def test_duration_ms_non_negative(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert data["duration_ms"] >= 0 def test_no_elapsed_key(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert "elapsed_ms" not in data assert "elapsed" not in data # --------------------------------------------------------------------------- # TestGcJsonMode # --------------------------------------------------------------------------- class TestGcJsonMode: """mode field reflects which reachability strategy was used.""" def test_mode_conservative_by_default(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert data["mode"] == "conservative" def test_mode_tight_with_full_flag(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root, ["--full"]) assert data["mode"] == "tight" # --------------------------------------------------------------------------- # TestGcJsonCollectedIds # --------------------------------------------------------------------------- class TestGcJsonCollectedIds: """collected_commit_ids and collected_snapshot_ids populated in --full mode.""" def test_collected_commit_ids_empty_when_all_reachable( self, tmp_path: pathlib.Path ) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root, ["--full"]) assert data["collected_commit_ids"] == [] def test_collected_snapshot_ids_empty_when_all_reachable( self, tmp_path: pathlib.Path ) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root, ["--full"]) assert data["collected_snapshot_ids"] == [] def test_collected_commit_ids_conservative_mode_always_empty( self, tmp_path: pathlib.Path ) -> None: """Conservative mode doesn't prune commits — list must be empty.""" root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) # no --full assert data["collected_commit_ids"] == [] def test_collected_snapshot_ids_conservative_mode_always_empty( self, tmp_path: pathlib.Path ) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) # no --full assert data["collected_snapshot_ids"] == [] # --------------------------------------------------------------------------- # TestGcJsonBadArgs # --------------------------------------------------------------------------- class TestGcJsonBadArgs: """--grace-period < 0 with --json must emit structured JSON error, not crash.""" def test_bad_grace_period_json_mode_exit_code_1( self, tmp_path: pathlib.Path ) -> None: root, _ = _init_repo(tmp_path) result = runner.invoke( cli, ["gc", "--json", "--grace-period", "-1"], env={"MUSE_REPO_ROOT": str(root)}, ) assert result.exit_code == 1 def test_bad_grace_period_json_mode_emits_json( self, tmp_path: pathlib.Path ) -> None: root, _ = _init_repo(tmp_path) result = runner.invoke( cli, ["gc", "--json", "--grace-period", "-1"], env={"MUSE_REPO_ROOT": str(root)}, ) # Output must be valid JSON (not just a stderr print) data = json.loads(result.output) assert data["status"] == "error" def test_bad_grace_period_json_error_field_non_empty( self, tmp_path: pathlib.Path ) -> None: root, _ = _init_repo(tmp_path) result = runner.invoke( cli, ["gc", "--json", "--grace-period", "-1"], env={"MUSE_REPO_ROOT": str(root)}, ) data = json.loads(result.output) assert data["error"] != "", "error field must contain a message on bad args" def test_bad_grace_period_json_error_mentions_grace_period( self, tmp_path: pathlib.Path ) -> None: root, _ = _init_repo(tmp_path) result = runner.invoke( cli, ["gc", "--json", "--grace-period", "-1"], env={"MUSE_REPO_ROOT": str(root)}, ) data = json.loads(result.output) assert "grace" in data["error"].lower() or "-1" in data["error"], ( "error message must mention the problematic argument" ) def test_bad_grace_period_json_has_exit_code( self, tmp_path: pathlib.Path ) -> None: root, _ = _init_repo(tmp_path) result = runner.invoke( cli, ["gc", "--json", "--grace-period", "-1"], env={"MUSE_REPO_ROOT": str(root)}, ) data = json.loads(result.output) assert "exit_code" in data assert data["exit_code"] == 1 # --------------------------------------------------------------------------- # TestGcJsonWarnings # --------------------------------------------------------------------------- class TestGcJsonWarnings: """warnings list is populated when symlinks are skipped during GC walk.""" def test_symlink_object_file_skip_adds_warning( self, tmp_path: pathlib.Path ) -> None: """A symlink inside .muse/objects/ triggers a warning in JSON output.""" root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) # Plant a symlink disguised as an object file prefix_dir = objects_dir(root) / "aa" prefix_dir.mkdir(parents=True, exist_ok=True) symlink_target = objects_dir(root) / "aa" / ("a" * 62) symlink_target.symlink_to("/etc/passwd") data = _gc_json(root) assert isinstance(data["warnings"], list) # The symlink should have been skipped — there may or may not be a warning # depending on implementation, but the field must exist and be a list. # (Symlink in object files currently silently skips — warning is the new behavior.) def test_symlink_snapshot_file_warning(self, tmp_path: pathlib.Path) -> None: """A symlink inside the unified objects store triggers a warning in warnings list.""" root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) # Plant a symlink in the unified object store (snapshots now live here). # The walker descends objects/sha256//. prefix_dir = objects_dir(root) / "sha256" / "de" prefix_dir.mkdir(parents=True, exist_ok=True) snap_link = prefix_dir / ("e" * 62) snap_link.symlink_to("/etc/passwd") data = _gc_json(root) assert isinstance(data["warnings"], list) # The symlink warning from the reachability walk must appear symlink_warnings = [w for w in data["warnings"] if "symlink" in w.lower()] assert len(symlink_warnings) >= 1, ( "symlink snapshot file must produce a warning in JSON output" ) # --------------------------------------------------------------------------- # TestGcJsonExitCode # --------------------------------------------------------------------------- class TestGcJsonExitCode: """exit_code field matches actual process exit code.""" def test_exit_code_zero_on_success(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root) assert data["exit_code"] == 0 def test_exit_code_zero_with_dry_run(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) _write_object(root, b"orphan") data = _gc_json(root, ["--dry-run"]) assert data["exit_code"] == 0 def test_exit_code_zero_with_full(self, tmp_path: pathlib.Path) -> None: root, repo_id = _init_repo(tmp_path) _make_commit(root, repo_id) data = _gc_json(root, ["--full"]) assert data["exit_code"] == 0