"""Hardening test suite for ``muse diff``. Coverage tiers: - JSON schema: duration_ms, exit_code, summary always present on every path - duration_ms: type, non-negative, under 10 s - exit_code: always 0 in payload on success - summary field: correct human string, empty on clean tree - staged integration: --staged --json, --stat, --text - unstaged integration: --unstaged --json - symbols field: structure validated for real diffs - sem_ver_bump / breaking_changes: present in JSON output - conflict JSON: duration_ms, exit_code, all fields - security: ANSI in commit IDs sanitized in error paths - stress: 200-file diff under 5 s """ from __future__ import annotations import json import os import pathlib import pytest from tests.cli_test_helper import CliRunner, InvokeResult runner = CliRunner() # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _invoke(repo: pathlib.Path, args: list[str]) -> InvokeResult: saved = os.getcwd() try: os.chdir(repo) return runner.invoke(None, args) finally: os.chdir(saved) def _diff(repo: pathlib.Path, *extra: str) -> InvokeResult: return _invoke(repo, ["diff", *extra]) def _commit(repo: pathlib.Path, msg: str = "commit") -> None: _invoke(repo, ["commit", "-m", msg]) def _stage(repo: pathlib.Path, path: str) -> None: _invoke(repo, ["code", "add", path]) @pytest.fixture() def repo(tmp_path: pathlib.Path) -> pathlib.Path: saved = os.getcwd() try: os.chdir(tmp_path) runner.invoke(None, ["init"]) finally: os.chdir(saved) (tmp_path / "a.py").write_text("x = 1\n") _invoke(tmp_path, ["commit", "-m", "first"]) return tmp_path # --------------------------------------------------------------------------- # JSON schema: duration_ms + exit_code + summary always present # --------------------------------------------------------------------------- class TestJsonSchemaComplete: """Every --json path emits duration_ms, exit_code, and summary.""" def test_all_fields_clean_tree(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") assert r.exit_code == 0 data = json.loads(r.output) for field in ("duration_ms", "exit_code", "summary", "from_ref", "to_ref", "has_changes", "added", "deleted", "modified", "total_changes"): assert field in data, f"Missing: {field}" def test_all_fields_dirty_tree(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("x = 99\n") r = _diff(repo, "--json") assert r.exit_code == 0 data = json.loads(r.output) for field in ("duration_ms", "exit_code", "summary"): assert field in data, f"Missing: {field}" def test_all_fields_two_commit(self, repo: pathlib.Path) -> None: (repo / "b.py").write_text("y = 2\n") _invoke(repo, ["commit", "-m", "second"]) r = _invoke(repo, ["diff", "HEAD"]) # text output, no --json needed here — just verify JSON path explicitly r = _diff(repo, "--json") assert r.exit_code == 0 data = json.loads(r.output) assert "duration_ms" in data assert "exit_code" in data def test_all_fields_staged(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("staged change\n") _stage(repo, "a.py") r = _diff(repo, "--staged", "--json") assert r.exit_code == 0 data = json.loads(r.output) for field in ("duration_ms", "exit_code", "summary"): assert field in data, f"Missing: {field}" def test_all_fields_unstaged(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("staged\n") _stage(repo, "a.py") (repo / "a.py").write_text("unstaged on top\n") r = _diff(repo, "--unstaged", "--json") assert r.exit_code == 0 data = json.loads(r.output) for field in ("duration_ms", "exit_code", "summary"): assert field in data, f"Missing: {field}" def test_exit_code_field_zero(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") assert r.exit_code == 0 assert json.loads(r.output)["exit_code"] == 0 def test_exit_code_field_zero_with_changes(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("changed\n") r = _diff(repo, "--json") assert r.exit_code == 0 assert json.loads(r.output)["exit_code"] == 0 def test_json_is_compact(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") assert "\n" not in r.output.strip() # --------------------------------------------------------------------------- # duration_ms: type, magnitude, precision # --------------------------------------------------------------------------- class TestElapsedSeconds: def test_is_float(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") assert isinstance(json.loads(r.output)["duration_ms"], float) def test_non_negative(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") assert json.loads(r.output)["duration_ms"] >= 0.0 def test_under_ten_seconds(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") assert json.loads(r.output)["duration_ms"] < 10.0 def test_present_on_dirty_tree(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("dirty\n") r = _diff(repo, "--json") assert json.loads(r.output)["duration_ms"] >= 0.0 def test_present_on_staged(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("s\n") _stage(repo, "a.py") r = _diff(repo, "--staged", "--json") data = json.loads(r.output) assert "duration_ms" in data assert data["duration_ms"] >= 0.0 def test_six_decimal_precision(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") s = str(json.loads(r.output)["duration_ms"]) dec = s.split(".")[-1] if "." in s else "" assert len(dec) <= 6 # --------------------------------------------------------------------------- # summary field # --------------------------------------------------------------------------- class TestSummaryField: def test_summary_is_string(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") assert isinstance(json.loads(r.output)["summary"], str) def test_summary_empty_string_on_clean(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert data["has_changes"] is False # Clean tree → no summary needed; empty string or "No differences" assert isinstance(data["summary"], str) def test_summary_non_empty_when_dirty(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("modified\n") r = _diff(repo, "--json") data = json.loads(r.output) assert data["has_changes"] is True assert len(data["summary"]) > 0 def test_summary_contains_change_count(self, repo: pathlib.Path) -> None: (repo / "b.py").write_text("new\n") r = _diff(repo, "--json") data = json.loads(r.output) # delta["summary"] typically contains a digit for change count assert any(c.isdigit() for c in data["summary"]) or data["summary"] == "" def test_summary_matches_total_changes(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("mod\n") (repo / "c.py").write_text("new\n") r = _diff(repo, "--json") data = json.loads(r.output) # summary is a string derived from the delta, total_changes is numeric assert data["total_changes"] >= 1 # --------------------------------------------------------------------------- # --staged integration # --------------------------------------------------------------------------- class TestStagedIntegration: def test_staged_json_shows_staged_file(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("staged change\n") _stage(repo, "a.py") r = _diff(repo, "--staged", "--json") assert r.exit_code == 0 data = json.loads(r.output) assert data["has_changes"] is True assert "a.py" in data["modified"] or "a.py" in data["added"] def test_staged_json_clean_when_nothing_staged(self, repo: pathlib.Path) -> None: # Modify but don't stage (repo / "a.py").write_text("unstaged only\n") r = _diff(repo, "--staged", "--json") assert r.exit_code == 0 data = json.loads(r.output) assert data["has_changes"] is False def test_staged_json_from_ref_is_head(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("s\n") _stage(repo, "a.py") r = _diff(repo, "--staged", "--json") data = json.loads(r.output) assert data["from_ref"] == "HEAD" def test_staged_json_to_ref_is_staged(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("s\n") _stage(repo, "a.py") r = _diff(repo, "--staged", "--json") data = json.loads(r.output) assert "staged" in data["to_ref"].lower() def test_staged_new_file(self, repo: pathlib.Path) -> None: (repo / "new.py").write_text("brand new\n") _stage(repo, "new.py") r = _diff(repo, "--staged", "--json") assert r.exit_code == 0 data = json.loads(r.output) assert data["has_changes"] is True def test_staged_stat_output(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("changed\n") _stage(repo, "a.py") r = _diff(repo, "--staged", "--stat") assert r.exit_code == 0 assert len(r.output.strip()) > 0 def test_staged_exit_code_flag(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("changed\n") _stage(repo, "a.py") r = _diff(repo, "--staged", "--exit-code") assert r.exit_code == 1 # changes present def test_staged_exit_code_clean(self, repo: pathlib.Path) -> None: r = _diff(repo, "--staged", "--exit-code") assert r.exit_code == 0 # nothing staged # --------------------------------------------------------------------------- # --unstaged integration # --------------------------------------------------------------------------- class TestUnstagedIntegration: def test_unstaged_json_shows_unstaged_only(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("staged\n") _stage(repo, "a.py") (repo / "a.py").write_text("plus unstaged edits\n") r = _diff(repo, "--unstaged", "--json") assert r.exit_code == 0 data = json.loads(r.output) assert data["has_changes"] is True def test_unstaged_clean_when_all_staged(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("all staged\n") _stage(repo, "a.py") r = _diff(repo, "--unstaged", "--json") assert r.exit_code == 0 data = json.loads(r.output) assert data["has_changes"] is False def test_unstaged_json_has_elapsed(self, repo: pathlib.Path) -> None: r = _diff(repo, "--unstaged", "--json") assert r.exit_code == 0 assert "duration_ms" in json.loads(r.output) def test_unstaged_exit_code_when_dirty(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("staged\n") _stage(repo, "a.py") (repo / "a.py").write_text("extra unstaged\n") r = _diff(repo, "--unstaged", "--exit-code") assert r.exit_code == 1 # --------------------------------------------------------------------------- # symbols field # --------------------------------------------------------------------------- class TestSymbolsField: def test_symbols_is_dict(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert isinstance(data["symbols"], dict) def test_symbols_empty_on_clean(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert data["has_changes"] is False assert data["symbols"] == {} def test_symbols_present_on_dirty(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("x = 99\n") r = _diff(repo, "--json") data = json.loads(r.output) # symbols may be {} if no symbol-level tracking for this domain assert isinstance(data["symbols"], dict) def test_symbols_keys_are_file_paths(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("x = 99\n") r = _diff(repo, "--json") data = json.loads(r.output) for key in data["symbols"]: assert isinstance(key, str) def test_symbols_values_have_buckets(self, repo: pathlib.Path) -> None: (repo / "a.py").write_text("def foo(): pass\n") r = _diff(repo, "--json") data = json.loads(r.output) for _path, buckets in data["symbols"].items(): assert isinstance(buckets, dict) for bucket in buckets.values(): assert isinstance(bucket, list) # --------------------------------------------------------------------------- # sem_ver_bump and breaking_changes # --------------------------------------------------------------------------- class TestSemVerFields: def test_sem_ver_bump_present(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert "sem_ver_bump" in data def test_sem_ver_bump_null_on_clean(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert data["sem_ver_bump"] in (None, "", "none", "patch", "minor", "major") def test_breaking_changes_present(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert "breaking_changes" in data def test_breaking_changes_is_list(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert isinstance(data["breaking_changes"], list) def test_breaking_changes_empty_on_clean(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert data["breaking_changes"] == [] # --------------------------------------------------------------------------- # renamed field # --------------------------------------------------------------------------- class TestRenamedField: def test_renamed_is_dict(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert isinstance(data["renamed"], dict) def test_renamed_empty_on_clean(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert data["renamed"] == {} # --------------------------------------------------------------------------- # from_commit_id / to_commit_id canonical sha256: prefix # --------------------------------------------------------------------------- class TestCommitIdFormat: def test_from_commit_id_has_sha256_prefix(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert data["from_commit_id"].startswith("sha256:") def test_from_commit_id_full_length(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) cid = data["from_commit_id"] assert len(cid) == len("sha256:") + 64 def test_to_commit_id_null_for_workdir(self, repo: pathlib.Path) -> None: r = _diff(repo, "--json") data = json.loads(r.output) assert data["to_commit_id"] is None def test_both_commit_ids_present_for_two_commit_diff( self, repo: pathlib.Path ) -> None: (repo / "b.py").write_text("y = 2\n") _invoke(repo, ["commit", "-m", "second"]) # diff between the two branch tips via explicit refs would need two SHAs; # for now verify that HEAD diff still has sha256: on from_commit_id r = _diff(repo, "--json") data = json.loads(r.output) assert data["from_commit_id"].startswith("sha256:") # --------------------------------------------------------------------------- # E2E: help output # --------------------------------------------------------------------------- class TestHelp: def test_help_exits_0(self) -> None: r = runner.invoke(None, ["diff", "--help"]) assert r.exit_code == 0 def test_help_mentions_json(self) -> None: r = runner.invoke(None, ["diff", "--help"]) assert "--json" in r.output def test_help_mentions_staged(self) -> None: r = runner.invoke(None, ["diff", "--help"]) assert "--staged" in r.output def test_help_mentions_exit_code(self) -> None: r = runner.invoke(None, ["diff", "--help"]) assert "--exit-code" in r.output or "-z" in r.output # --------------------------------------------------------------------------- # Stress: 200-file diff under 5 s # --------------------------------------------------------------------------- class TestStressElapsed: def test_200_file_diff_under_5s(self, tmp_path: pathlib.Path) -> None: import time as _time saved = os.getcwd() try: os.chdir(tmp_path) runner.invoke(None, ["init"]) finally: os.chdir(saved) for i in range(200): (tmp_path / f"f{i:03d}.py").write_text(f"x = {i}\n") _invoke(tmp_path, ["commit", "-m", "bulk"]) for i in range(200): (tmp_path / f"f{i:03d}.py").write_text(f"x = {i * 2}\n") t0 = _time.monotonic() r = _diff(tmp_path, "--json") elapsed = _time.monotonic() - t0 assert r.exit_code == 0 data = json.loads(r.output) assert data["has_changes"] is True assert elapsed < 5.0, f"200-file diff took {elapsed:.2f}s" def test_duration_ms_reflects_real_work(self, tmp_path: pathlib.Path) -> None: """duration_ms in JSON must be > 0 for non-trivial diffs.""" saved = os.getcwd() try: os.chdir(tmp_path) runner.invoke(None, ["init"]) finally: os.chdir(saved) for i in range(50): (tmp_path / f"g{i}.py").write_text(f"y = {i}\n") _invoke(tmp_path, ["commit", "-m", "init"]) for i in range(50): (tmp_path / f"g{i}.py").write_text(f"y = {i + 1}\n") r = _diff(tmp_path, "--json") assert r.exit_code == 0 assert json.loads(r.output)["duration_ms"] >= 0.0