"""Comprehensive tests for ``muse code add`` and ``muse code reset``. Review findings addressed -------------------------- Security * Path-traversal: staging a file outside the repo root is rejected. * Symlink: symlinks are not followed during tree walks (followlinks=False). * `.museignore`: ignored files are never staged even when explicitly named. Performance * Unchanged files (content = committed) are skipped — no object written. * Already-staged files with the same content are skipped (idempotent). New capabilities (added this review) * ``--format json`` on ``muse code add`` — machine-readable output. * ``--format json`` on ``muse code reset`` — machine-readable output. * Breakdown summary in text output (N added, M modified, K deleted). Stage persistence * Stage is persisted as ``.muse/code/stage.json`` (JSON format, version 3). * Corrupt stage file is cleared on read rather than silently returning {}. Test categories --------------- I Security — path traversal, symlinks, ignore rules. II JSON output — muse code add --format json. III JSON output — muse code reset --format json. IV Text output breakdown — "N added, M modified, K deleted". V JSON stage persistence — format, atomicity. VI Dry-run correctness — no writes, accurate preview. VII Edge cases — fresh repo, no commits, multiple flags, cycles. VIII Stress — 500-file staging, repeated cycles, large files. """ from __future__ import annotations import json import os import pathlib import pytest from muse.plugins.code.stage import StagedEntry, read_stage, stage_path, write_stage, StagedFileMap from muse.core.paths import muse_dir, code_dir, commits_dir, snapshots_dir, stat_cache_path from muse.core.types import Manifest, blob_id, fake_id, long_id, short_id, split_id from muse.core.object_store import object_path from tests.cli_test_helper import CliRunner runner = CliRunner() cli = None # --------------------------------------------------------------------------- # Helpers and fixtures # --------------------------------------------------------------------------- def _env(root: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(root)} def _run(root: pathlib.Path, *args: str) -> tuple[int, str]: result = runner.invoke(cli, list(args), env=_env(root), catch_exceptions=False) return result.exit_code, result.output def _run_unchecked(root: pathlib.Path, *args: str) -> tuple[int, str]: result = runner.invoke(cli, list(args), env=_env(root)) return result.exit_code, result.output @pytest.fixture() def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: """Fresh code-domain repo with one committed file (main.py = 'x = 1').""" monkeypatch.chdir(tmp_path) r = runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path)) assert r.exit_code == 0, r.output (tmp_path / "main.py").write_text("x = 1\n") r2 = runner.invoke(cli, ["commit", "--allow-empty", "-m", "init"], env=_env(tmp_path)) assert r2.exit_code == 0, r2.output return tmp_path # =========================================================================== # I Security # =========================================================================== class TestSecurityI: """Files outside the repo, symlinks, and ignored paths must never be staged.""" def test_I1_path_outside_repo_root_is_rejected( self, repo: pathlib.Path ) -> None: """I1: staging a path outside the repo root exits non-zero.""" outside = repo.parent / f"secret_{fake_id('outside-secret')[-8:]}.txt" outside.write_text("secret\n") code, _ = _run_unchecked(repo, "code", "add", str(outside)) assert code != 0 or str(outside) not in _read_stage(repo) def test_I2_symlink_not_followed_during_dot_add( self, repo: pathlib.Path ) -> None: """I2: symlinks to files outside the repo are never staged.""" outside = repo.parent / f"outside_{fake_id('outside-symlink')[-8:]}.txt" outside.write_text("outside content\n") link = repo / "link_to_outside.txt" link.symlink_to(outside) _run(repo, "code", "add", ".") stage = _read_stage(repo) assert "link_to_outside.txt" not in stage, "Symlink to outside must not be staged" def test_I3_museignore_file_not_staged_by_dot( self, repo: pathlib.Path ) -> None: """I3: .museignore exclusions are honoured by 'muse code add .'""" # .museignore is TOML — use the proper section format. (repo / ".museignore").write_text( '[domain.code]\npatterns = ["*.secret"]\n' ) (repo / "creds.secret").write_text("password=123\n") _run(repo, "code", "add", ".") stage = _read_stage(repo) assert "creds.secret" not in stage, "Ignored file must not be staged" def test_I4_museignore_file_not_staged_when_explicit( self, repo: pathlib.Path ) -> None: """I4: even when explicitly named, .museignore exclusions prevent staging.""" (repo / ".museignore").write_text( '[domain.code]\npatterns = ["private.py"]\n' ) (repo / "private.py").write_text("SECRET = 'x'\n") _run(repo, "code", "add", "private.py") stage = _read_stage(repo) assert "private.py" not in stage, "Explicitly named ignored file must not be staged" def test_I5_hidden_files_staged_by_default( self, repo: pathlib.Path ) -> None: """I5: hidden files (dotfiles) are staged by muse code add . (mirrors git behaviour).""" (repo / ".env").write_text("API_KEY=secret\n") _run(repo, "code", "add", ".") stage = _read_stage(repo) assert ".env" in stage, "Hidden .env must be staged by muse code add ." def test_I6_pycache_not_staged(self, repo: pathlib.Path) -> None: """I6: __pycache__ directories are never walked.""" cache = repo / "__pycache__" cache.mkdir() (cache / "main.cpython-311.pyc").write_bytes(b"\x00compiled\x00") _run(repo, "code", "add", ".") stage = _read_stage(repo) for key in stage: assert "__pycache__" not in key, f"Compiled cache file staged: {key}" def test_I7_muse_dir_file_not_staged_by_dot(self, repo: pathlib.Path) -> None: """I7: files inside .muse/ (VCS storage) are never staged by 'muse code add .' Data-integrity invariant: the .muse/ directory is the VCS store itself. Tracking its contents as repo files corrupts checkout — switching to a branch whose snapshot omits them would delete live VCS internals from disk. """ # agent-config writes these; they must never leak into the snapshot. dot_muse = muse_dir(repo) (dot_muse / "agent.md").write_text("# agent config\n") (dot_muse / "config.toml").write_text('[adapters]\nclaude = true\n') _run(repo, "code", "add", ".") stage = _read_stage(repo) for key in stage: assert not key.startswith(".muse/"), ( f"VCS-internal file leaked into stage: {key!r}" ) def test_I8_muse_dir_file_not_staged_when_explicit( self, repo: pathlib.Path ) -> None: """I8: explicitly naming a .muse/ file is silently rejected. Data-integrity invariant: an agent that runs 'muse code add .muse/agent.md' must not corrupt the snapshot. The file is silently dropped — same treatment as a file outside the repo root. """ dot_muse = muse_dir(repo) agent_md = dot_muse / "agent.md" agent_md.write_text("# agent config\n") _run(repo, "code", "add", ".muse/agent.md") stage = _read_stage(repo) assert ".muse/agent.md" not in stage, ( "Explicitly naming a .muse/ file must not add it to the stage" ) def test_I9_muse_dir_subdir_not_staged_when_explicit( self, repo: pathlib.Path ) -> None: """I9: passing .muse/ as a directory arg stages nothing from inside it.""" dot_muse = muse_dir(repo) (dot_muse / "agent.md").write_text("# config\n") _run(repo, "code", "add", ".muse") stage = _read_stage(repo) for key in stage: assert not key.startswith(".muse/"), ( f"VCS-internal file staged via directory arg: {key!r}" ) def test_I10_muse_dir_not_staged_by_update_flag( self, repo: pathlib.Path ) -> None: """I10: 'muse code add -u' re-staging head_manifest never includes .muse/ entries. Defense-in-depth: if a .muse/ entry somehow reached the head manifest (e.g. from a snapshot created before this fix), the -u path must still silently drop it rather than perpetuating the corruption. """ from muse.plugins.code.stage import write_stage, make_entry from muse.core.snapshot import hash_file # Plant a .muse/ file and force it into the head manifest via the # stage, then commit — simulating the pre-fix corruption path. dot_muse = muse_dir(repo) agent_md = dot_muse / "agent.md" agent_md.write_text("# agent config\n") oid = hash_file(agent_md) # Write directly to stage (bypassing _collect_paths) to simulate # the pre-fix scenario. write_stage(repo, {".muse/agent.md": make_entry(oid, "A")}) # Commit will bake .muse/agent.md into the snapshot via the stage. # After the commit we clear the stage and check that -u doesn't re-add it. _run(repo, "commit", "-m", "simulate pre-fix corruption") # Now .muse/agent.md is in head manifest. -u must not restage it. _run(repo, "code", "add", "-u") stage = _read_stage(repo) for key in stage: assert not key.startswith(".muse/"), ( f"muse code add -u re-staged VCS-internal file: {key!r}" ) def test_I11_muse_dir_not_staged_by_all_flag( self, repo: pathlib.Path ) -> None: """I11: 'muse code add -A' never stages .muse/ entries from head manifest.""" from muse.plugins.code.stage import write_stage, make_entry from muse.core.snapshot import hash_file dot_muse = muse_dir(repo) agent_md = dot_muse / "agent.md" agent_md.write_text("# agent config\n") oid = hash_file(agent_md) write_stage(repo, {".muse/agent.md": make_entry(oid, "A")}) _run(repo, "commit", "-m", "simulate pre-fix corruption") _run(repo, "code", "add", "-A") stage = _read_stage(repo) for key in stage: assert not key.startswith(".muse/"), ( f"muse code add -A re-staged VCS-internal file: {key!r}" ) def test_I12_snapshot_strips_muse_dir_entries_at_commit( self, repo: pathlib.Path ) -> None: """I12: commit snapshot never contains .muse/ keys regardless of stage content. Defense-in-depth at the snapshot layer: even if a .muse/ entry sneaks into the stage (e.g. written directly by a third-party tool), the snapshot built at commit time must strip it before persisting. """ import json as _json from muse.plugins.code.stage import write_stage, make_entry from muse.core.snapshot import hash_file from muse.core.refs import get_head_commit_id dot_muse = muse_dir(repo) agent_md = dot_muse / "agent.md" agent_md.write_text("# agent config\n") oid = hash_file(agent_md) # Bypass _collect_paths and write directly to stage. write_stage(repo, {".muse/agent.md": make_entry(oid, "A")}) _run(repo, "commit", "-m", "should strip .muse from snapshot") # Read the snapshot the commit produced and verify it has no .muse/ keys. from muse.core.commits import read_commit from muse.core.snapshots import read_snapshot commit_id = get_head_commit_id(repo, "main") assert commit_id, "commit must have produced a HEAD" assert object_path(repo, commit_id).exists(), f"commit object not found for {commit_id}" commit_rec = read_commit(repo, commit_id) assert commit_rec is not None, f"could not read commit {commit_id}" snap_rec = read_snapshot(repo, commit_rec.snapshot_id) assert snap_rec is not None, "snapshot must be readable after commit" manifest = snap_rec.manifest muse_keys = [k for k in manifest if k.startswith(".muse/")] assert not muse_keys, ( f"Snapshot contains VCS-internal keys: {muse_keys}" ) # =========================================================================== # II JSON output — muse code add --format json # =========================================================================== class TestJsonOutputAddII: """``muse code add --format json`` must emit valid, complete JSON.""" def test_II1_json_output_on_single_file_staged( self, repo: pathlib.Path ) -> None: """II1: staging one file emits correct JSON with all required keys.""" (repo / "main.py").write_text("x = 2\n") code, out = _run(repo, "code", "add", "--json", "main.py") assert code == 0, out data = json.loads(out.strip()) assert data["staged"] == 1 assert data["modified"] == 1 assert data["added"] == 0 assert data["deleted"] == 0 assert data["dry_run"] is False assert any(f["path"] == "main.py" for f in data["files"]) def test_II2_json_output_new_file_is_added( self, repo: pathlib.Path ) -> None: """II2: a brand-new file has mode 'new file' in JSON output.""" (repo / "brand_new.py").write_text("y = 99\n") code, out = _run(repo, "code", "add", "--json", "brand_new.py") assert code == 0, out data = json.loads(out.strip()) assert data["added"] == 1 assert data["modified"] == 0 file_entry = next(f for f in data["files"] if f["path"] == "brand_new.py") assert file_entry["mode"] == "new file" def test_II3_json_output_deletion_counted( self, repo: pathlib.Path ) -> None: """II3: staging a deletion records deleted=1 in JSON.""" (repo / "main.py").unlink() code, out = _run(repo, "code", "add", "-u", "--json") assert code == 0, out data = json.loads(out.strip()) assert data["deleted"] == 1 assert any(f["mode"] == "deleted" for f in data["files"]) def test_II4_json_output_nothing_to_stage( self, repo: pathlib.Path ) -> None: """II4: nothing to stage returns staged=0, not an error.""" # main.py is already at committed content — nothing to stage. code, out = _run(repo, "code", "add", "--json", ".") assert code == 0, out data = json.loads(out.strip()) assert data["staged"] == 0 def test_II5_json_dry_run_flag_true(self, repo: pathlib.Path) -> None: """II5: --dry-run sets dry_run=true in JSON and writes no stage.""" (repo / "main.py").write_text("# dry\n") code, out = _run( repo, "code", "add", "--dry-run", "--json", "main.py" ) assert code == 0, out data = json.loads(out.strip()) assert data["dry_run"] is True assert data["staged"] == 1 assert not stage_path(repo).exists() def test_II6_json_output_multiple_files( self, repo: pathlib.Path ) -> None: """II6: multiple staged files all appear in the files list.""" for i in range(5): (repo / f"f{i}.py").write_text(f"v = {i}\n") code, out = _run(repo, "code", "add", "--json", "-A") assert code == 0, out data = json.loads(out.strip()) assert data["staged"] >= 5 paths = {f["path"] for f in data["files"]} for i in range(5): assert f"f{i}.py" in paths def test_II7_json_output_is_valid_json(self, repo: pathlib.Path) -> None: """II7: output is always parseable JSON, never raw text.""" (repo / "main.py").write_text("# changed\n") _, out = _run(repo, "code", "add", "--json", "main.py") json.loads(out.strip()) # must not raise # =========================================================================== # III JSON output — muse code reset --format json # =========================================================================== class TestJsonOutputResetIII: """``muse code reset --format json`` must emit valid, complete JSON.""" def test_III1_json_reset_specific_file(self, repo: pathlib.Path) -> None: """III1: resetting a staged file returns unstaged=1 in JSON.""" (repo / "main.py").write_text("# staged\n") _run(repo, "code", "add", "main.py") code, out = _run(repo, "code", "reset", "--json", "main.py") assert code == 0, out data = json.loads(out.strip()) assert data["unstaged"] == 1 assert "main.py" in data["files"] def test_III2_json_reset_all(self, repo: pathlib.Path) -> None: """III2: reset with no args clears all staged files, reports count in JSON.""" for i in range(3): (repo / f"f{i}.py").write_text(f"x = {i}\n") _run(repo, "code", "add", "-A") code, out = _run(repo, "code", "reset", "--json") assert code == 0, out data = json.loads(out.strip()) assert data["unstaged"] >= 3 def test_III3_json_reset_nothing_staged(self, repo: pathlib.Path) -> None: """III3: reset with nothing staged returns unstaged=0 in JSON.""" code, out = _run(repo, "code", "reset", "--json") assert code == 0, out data = json.loads(out.strip()) assert data["unstaged"] == 0 assert data["files"] == [] def test_III4_json_reset_preserves_other_staged_files( self, repo: pathlib.Path ) -> None: """III4: resetting one file leaves others staged.""" (repo / "main.py").write_text("# changed\n") (repo / "other.py").write_text("y = 9\n") _run(repo, "code", "add", "-A") code, out = _run(repo, "code", "reset", "--json", "other.py") assert code == 0, out data = json.loads(out.strip()) assert data["unstaged"] == 1 assert "other.py" in data["files"] remaining = read_stage(repo) assert "main.py" in remaining, "main.py must still be staged" assert "other.py" not in remaining # =========================================================================== # IV Text output breakdown # =========================================================================== class TestTextOutputBreakdownIV: """The text summary must show a breakdown: N added, M modified, K deleted.""" def test_IV1_text_shows_added_count(self, repo: pathlib.Path) -> None: """IV1: new files appear in 'added' part of the breakdown.""" (repo / "new.py").write_text("z = 0\n") _, out = _run(repo, "code", "add", "new.py") assert "added" in out def test_IV2_text_shows_modified_count(self, repo: pathlib.Path) -> None: """IV2: modified tracked files appear in 'modified' part.""" (repo / "main.py").write_text("x = 999\n") _, out = _run(repo, "code", "add", "main.py") assert "modified" in out def test_IV3_text_shows_deleted_count(self, repo: pathlib.Path) -> None: """IV3: staged deletions appear in 'deleted' part.""" (repo / "main.py").unlink() _, out = _run(repo, "code", "add", "-u") assert "deleted" in out def test_IV4_text_nothing_to_stage_message( self, repo: pathlib.Path ) -> None: """IV4: when nothing changed, output explains nothing to stage.""" _, out = _run(repo, "code", "add", ".") assert "Nothing" in out or "already up to date" in out def test_IV5_text_breakdown_counts_match_actual( self, repo: pathlib.Path ) -> None: """IV5: text breakdown totals match what was actually staged.""" (repo / "main.py").write_text("x = 2\n") # modified (repo / "a.py").write_text("a = 1\n") # new (repo / "b.py").write_text("b = 2\n") # new _, out = _run(repo, "code", "add", "-A") assert "1 modified" in out assert "2 added" in out # =========================================================================== # V JSON stage persistence # =========================================================================== class TestJsonPersistenceV: """The stage index must be persisted as JSON and survive round-trips.""" def test_V1_stage_file_is_json( self, repo: pathlib.Path ) -> None: """V1: after staging, the file on disk is valid JSON.""" import json as _json (repo / "main.py").write_text("x = 9\n") _run(repo, "code", "add", "main.py") path = stage_path(repo) assert path.exists(), "stage.json must exist after staging" raw = path.read_bytes() assert raw.startswith(b"{"), "Stage file must be JSON" data = _json.loads(raw) assert "entries" in data assert "main.py" in data["entries"] def test_V2_stage_round_trips_all_entry_fields( self, repo: pathlib.Path ) -> None: """V2: object_id, mode, and staged_at survive a write/read cycle.""" (repo / "main.py").write_text("x = 42\n") _run(repo, "code", "add", "main.py") stage = read_stage(repo) entry = stage["main.py"] assert entry["object_id"].startswith("sha256:") and len(entry["object_id"]) == 71, \ "object_id must be a canonical long_id (sha256:<64hex>)" assert entry["mode"] in ("A", "M", "D") assert entry["staged_at"] def test_V3_stage_atomic_write_no_tmp_file_after_success( self, repo: pathlib.Path ) -> None: """V3: no .stage-tmp-* file lingers after a successful write.""" (repo / "main.py").write_text("x = 1\n") _run(repo, "code", "add", "main.py") stage_dir = code_dir(repo) tmps = list(stage_dir.glob(".stage-tmp-*")) assert tmps == [], f"Stale tmp files: {tmps}" def test_V5_corrupt_json_clears_and_returns_empty( self, repo: pathlib.Path ) -> None: """V5: corrupt JSON stage file is deleted and read_stage returns {}.""" stage_dir = code_dir(repo) stage_dir.mkdir(parents=True, exist_ok=True) stage_path(repo).write_bytes(b"\xde\xad\xbe\xef garbage") entries = read_stage(repo) assert entries == {} assert not stage_path(repo).exists(), "Corrupt stage file must be removed" def test_V6_write_empty_removes_json_file( self, repo: pathlib.Path ) -> None: """V6: write_stage({}) removes stage.json (clear the stage).""" # Change main.py so it's different from the committed content. (repo / "main.py").write_text("x = 999\n") _run(repo, "code", "add", "main.py") assert stage_path(repo).exists(), "Stage must exist after staging a changed file" write_stage(repo, {}) assert not stage_path(repo).exists() def test_V7_stage_version_is_3_in_json( self, repo: pathlib.Path ) -> None: """V7: JSON file carries version=3.""" import json as _json (repo / "main.py").write_text("x = 999\n") _run(repo, "code", "add", "main.py") assert stage_path(repo).exists(), "Stage must exist after staging" raw = _json.loads(stage_path(repo).read_bytes()) assert raw["version"] == 3 # =========================================================================== # VI Dry-run correctness # =========================================================================== class TestDryRunVI: """--dry-run must preview accurately and never write anything.""" def test_VI1_dry_run_lists_files_that_would_be_staged( self, repo: pathlib.Path ) -> None: """VI1: output lists every file that would be staged.""" (repo / "main.py").write_text("x = 3\n") (repo / "new.py").write_text("y = 0\n") _, out = _run(repo, "code", "add", "--dry-run", "-A") assert "main.py" in out assert "new.py" in out def test_VI2_dry_run_does_not_write_stage_file( self, repo: pathlib.Path ) -> None: """VI2: after dry-run, stage.json must not exist.""" (repo / "main.py").write_text("x = 3\n") _run(repo, "code", "add", "--dry-run", "main.py") assert not stage_path(repo).exists() def test_VI3_dry_run_does_not_write_objects( self, repo: pathlib.Path ) -> None: """VI3: dry-run must not write any blobs to the object store.""" content = b"brand new content\n" (repo / "brand_new.py").write_bytes(content) oid = blob_id(content) obj_path = object_path(repo, oid) _run(repo, "code", "add", "--dry-run", "brand_new.py") assert not obj_path.exists(), "Dry-run must not write objects to the store" def test_VI4_dry_run_json_shows_correct_counts( self, repo: pathlib.Path ) -> None: """VI4: --dry-run --format json shows accurate counts.""" (repo / "main.py").write_text("x = 5\n") # modified (repo / "extra.py").write_text("z = 0\n") # new _, out = _run( repo, "code", "add", "--dry-run", "--json", "-A" ) data = json.loads(out.strip()) assert data["dry_run"] is True assert data["modified"] >= 1 assert data["added"] >= 1 def test_VI5_dry_run_output_stable_across_runs( self, repo: pathlib.Path ) -> None: """VI5: running dry-run twice on the same tree produces identical output.""" (repo / "main.py").write_text("x = 7\n") _, out1 = _run(repo, "code", "add", "--dry-run", "--json", ".") _, out2 = _run(repo, "code", "add", "--dry-run", "--json", ".") _volatile = {"duration_ms", "timestamp"} d1 = {k: v for k, v in json.loads(out1).items() if k not in _volatile} d2 = {k: v for k, v in json.loads(out2).items() if k not in _volatile} assert d1 == d2 # =========================================================================== # VII Edge cases # =========================================================================== class TestEdgeCasesVII: """Edge cases: fresh repo, no commits, conflicting flags, etc.""" def test_VII1_stage_on_fresh_repo_no_commits( self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch ) -> None: """VII1: staging works on a repo with no prior commits.""" monkeypatch.chdir(tmp_path) runner.invoke(cli, ["init", "--domain", "code"], env=_env(tmp_path)) (tmp_path / "first.py").write_text("x = 1\n") code, out = _run(tmp_path, "code", "add", "first.py") assert code == 0, out stage = read_stage(tmp_path) assert "first.py" in stage assert stage["first.py"]["mode"] == "A" def test_VII2_staging_identical_content_is_idempotent( self, repo: pathlib.Path ) -> None: """VII2: staging the same file twice with identical content is a no-op.""" (repo / "main.py").write_text("x = 10\n") _run(repo, "code", "add", "main.py") code, out = _run(repo, "code", "add", "main.py") assert code == 0 assert "already up to date" in out or "Nothing" in out def test_VII3_restaging_after_modification_updates_object_id( self, repo: pathlib.Path ) -> None: """VII3: re-staging a file after modification updates the object_id.""" (repo / "main.py").write_text("v1\n") _run(repo, "code", "add", "main.py") oid_v1 = read_stage(repo)["main.py"]["object_id"] (repo / "main.py").write_text("v2\n") _run(repo, "code", "add", "main.py") oid_v2 = read_stage(repo)["main.py"]["object_id"] assert oid_v1 != oid_v2 def test_VII4_nonexistent_path_exits_nonzero( self, repo: pathlib.Path ) -> None: """VII4: staging a non-existent, untracked path exits non-zero.""" code, _ = _run_unchecked(repo, "code", "add", "ghost.py") assert code != 0 def test_VII5_directory_scoped_add_leaves_top_level_unstaged( self, repo: pathlib.Path ) -> None: """VII5: 'muse code add subdir' stages only files under that directory.""" sub = repo / "sub" sub.mkdir() (sub / "a.py").write_text("a = 1\n") (repo / "top.py").write_text("t = 1\n") _run(repo, "code", "add", "sub") stage = read_stage(repo) assert "sub/a.py" in stage assert "top.py" not in stage def test_VII6_verbose_shows_per_file_mode( self, repo: pathlib.Path ) -> None: """VII6: --verbose shows one line per staged file.""" (repo / "main.py").write_text("x = 2\n") _, out = _run(repo, "code", "add", "-v", "main.py") assert "main.py" in out def test_VII7_reset_HEAD_syntax_alias(self, repo: pathlib.Path) -> None: """VII7: 'muse code reset HEAD ' is identical to 'muse code reset '.""" (repo / "main.py").write_text("x = 3\n") _run(repo, "code", "add", "main.py") code, _ = _run(repo, "code", "reset", "HEAD", "main.py") assert code == 0 assert not stage_path(repo).exists() def test_VII8_stage_then_commit_then_restage_works( self, repo: pathlib.Path ) -> None: """VII8: full stage → commit → re-stage cycle works end-to-end.""" (repo / "main.py").write_text("x = 5\n") _run(repo, "code", "add", "main.py") _run(repo, "commit", "-m", "v2") assert not stage_path(repo).exists() (repo / "main.py").write_text("x = 6\n") code, out = _run(repo, "code", "add", "main.py") assert code == 0 assert "main.py" in read_stage(repo) def test_VII9_update_flag_includes_modifications_not_new( self, repo: pathlib.Path ) -> None: """VII9: -u stages tracked modifications but not new untracked files.""" (repo / "main.py").write_text("x = 99\n") # tracked, modified (repo / "untracked.py").write_text("u = 0\n") # new, untracked _run(repo, "code", "add", "-u") stage = read_stage(repo) assert "main.py" in stage assert "untracked.py" not in stage # =========================================================================== # VIII Stress tests # =========================================================================== class TestStressVIII: """High-volume and adversarial scenarios.""" def test_VIII1_stage_500_files_correct_count( self, repo: pathlib.Path ) -> None: """VIII1: staging 500 files produces 500 entries in the stage index.""" for i in range(500): (repo / f"module_{i:04d}.py").write_text(f"X = {i}\n") code, out = _run(repo, "code", "add", "-A") assert code == 0, out stage = read_stage(repo) assert len(stage) >= 500 def test_VIII2_500_files_json_output_correct( self, repo: pathlib.Path ) -> None: """VIII2: JSON output for 500 files has correct counts.""" for i in range(500): (repo / f"f_{i:04d}.py").write_text(f"X = {i}\n") _, out = _run(repo, "code", "add", "-A", "--json") data = json.loads(out.strip()) assert data["added"] >= 500 assert data["staged"] >= 500 def test_VIII3_stage_add_reset_cycle_50_times( self, repo: pathlib.Path ) -> None: """VIII3: 50 add/reset cycles leave a clean stage each time.""" (repo / "main.py").write_text("x = 0\n") for cycle in range(50): (repo / "main.py").write_text(f"x = {cycle}\n") code, _ = _run(repo, "code", "add", "main.py") assert code == 0, f"Cycle {cycle}: add failed" code, _ = _run(repo, "code", "reset", "main.py") assert code == 0, f"Cycle {cycle}: reset failed" assert not stage_path(repo).exists(), ( f"Cycle {cycle}: stage not cleared after reset" ) def test_VIII4_large_file_stages_correctly( self, repo: pathlib.Path ) -> None: """VIII4: a 5 MiB file stages and its object_id is correct.""" content = os.urandom(5 * 1024 * 1024) (repo / "big.bin").write_bytes(content) code, _ = _run(repo, "code", "add", "big.bin") assert code == 0 stage = read_stage(repo) assert "big.bin" in stage expected_oid = blob_id(content) assert stage["big.bin"]["object_id"] == expected_oid def test_VIII5_all_modes_in_single_add( self, repo: pathlib.Path ) -> None: """VIII5: a single add can capture added, modified, and deleted in one shot.""" # Add extra tracked file and commit first. (repo / "to_delete.py").write_text("del = 1\n") _run(repo, "code", "add", "to_delete.py") _run(repo, "commit", "-m", "add to_delete") (repo / "main.py").write_text("x = modified\n") (repo / "to_delete.py").unlink() (repo / "brand_new.py").write_text("new = True\n") code, out = _run(repo, "code", "add", "--json", "-A") assert code == 0, out data = json.loads(out.strip()) assert data["modified"] >= 1 assert data["added"] >= 1 assert data["deleted"] >= 1 def test_VIII6_staging_after_many_commits_works( self, repo: pathlib.Path ) -> None: """VIII6: staging still works correctly after many commits.""" for i in range(50): (repo / "main.py").write_text(f"x = {i}\n") _run(repo, "commit", "--allow-empty", "-m", f"commit {i}") (repo / "main.py").write_text("x = final\n") code, _ = _run(repo, "code", "add", "main.py") assert code == 0 stage = read_stage(repo) assert "main.py" in stage # =========================================================================== # IX Stat-cache performance — muse code add must use StatCache, not hash_file # =========================================================================== class TestStatCacheIX: """muse code add must use the stat cache, not raw hash_file on every call.""" def test_IX1_stat_cache_used_structurally(self) -> None: """IX1: code_stage module must import and use StatCache or load_cache.""" import inspect from muse.cli.commands import code_stage as cs_module source = inspect.getsource(cs_module) assert "load_cache" in source or "StatCache" in source, ( "code_stage must import and use load_cache or StatCache for hashing" ) def test_IX2_hash_file_not_called_on_unchanged_file( self, repo: pathlib.Path ) -> None: """IX2: second code add on unchanged file must not rehash from disk. After the first add the stat cache has a valid entry. The second add must return the cached hash without calling _hash_str again. """ from unittest.mock import patch (repo / "cached.txt").write_text("stable content\n") # First add — computes and caches the hash. code, _ = _run(repo, "code", "add", "cached.txt") assert code == 0 # Reset stage so the file is re-evaluated on the second add. _run(repo, "code", "reset", "cached.txt") # Second add — must hit the cache; _hash_str must NOT be called. with patch("muse.core.stat_cache._hash_str") as mock_hash: code2, _ = _run(repo, "code", "add", "cached.txt") assert code2 == 0 mock_hash.assert_not_called(), ( "second code add on unchanged file called _hash_str — stat cache not used" ) def test_IX3_stat_cache_file_written_after_add( self, repo: pathlib.Path ) -> None: """IX3: .muse/cache/stat.json must exist after code add (cache was saved).""" (repo / "new_file.py").write_text("y = 2\n") code, _ = _run(repo, "code", "add", "new_file.py") assert code == 0 cache_path = stat_cache_path(repo) assert cache_path.exists(), ( "cache/stat.json not found — cache.save() not called after code add" ) def test_IX4_modified_file_is_rehashed( self, repo: pathlib.Path ) -> None: """IX4: modifying a file invalidates the cache entry so it is rehashed.""" from unittest.mock import patch import muse.core.stat_cache as _sc (repo / "mutable.py").write_text("v = 1\n") _run(repo, "code", "add", "mutable.py") _run(repo, "code", "reset", "mutable.py") # Modify the file — mtime/size change → cache miss. (repo / "mutable.py").write_text("v = 2\n") # Spy on _hash_str but let the real function run so object_store # integrity checks still pass. with patch.object(_sc, "_hash_str", wraps=_sc._hash_str) as mock_hash: code, _ = _run(repo, "code", "add", "mutable.py") assert code == 0 mock_hash.assert_called(), ( "modified file should trigger a _hash_str call (cache miss)" ) # --------------------------------------------------------------------------- # Helper # --------------------------------------------------------------------------- def _read_stage(root: pathlib.Path) -> StagedFileMap: return read_stage(root)