"""Tests for Bug 14: rebase/squash proceeds with theirs_manifest={} when the commit's snapshot is missing or corrupt — silently deleting all files from that commit in the rebased history. Root cause: both muse/core/rebase.py::replay_one and the squash path in muse/cli/commands/rebase.py had: theirs_manifest = theirs_snap.manifest if theirs_snap else {} If theirs_snap is None (snapshot missing or corrupt), theirs_manifest={} causes the three-way merge engine to treat all files from that commit as "deleted" — producing a rebased history that is missing the commit's content. This is silent data loss. The fix: if theirs_snap is None, raise ValueError (in replay_one) or abort with SystemExit (in the squash path) rather than proceeding with empty manifest. Scope of tests -------------- Unit (replay_one missing snapshot): - replay_one raises ValueError when commit snapshot is missing - replay_one raises ValueError when commit snapshot is corrupt (unreadable) - replay_one succeeds when all snapshots are present Integration (the pre-fix empty-manifest behavior): - Documents that theirs_snap=None → theirs_manifest={} would delete all files - Validates that the fix prevents wrong merge from occurring """ from __future__ import annotations import datetime import pathlib from typing import TYPE_CHECKING import pytest if TYPE_CHECKING: from muse.plugins.registry import MuseDomainPlugin from muse.core.rebase import replay_one from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id from muse.core.types import Manifest, blob_id, fake_id from muse.core.paths import muse_dir from muse.core.object_store import object_path as _obj_path from muse.core.refs import write_branch_ref from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) _TS = datetime.datetime(2024, 6, 15, 10, 0, 0, tzinfo=datetime.timezone.utc) def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: import json repo = tmp_path / "repo" repo.mkdir() dot_muse = muse_dir(repo) (dot_muse / "commits").mkdir(parents=True) (dot_muse / "snapshots").mkdir() (dot_muse / "objects").mkdir() (dot_muse / "refs" / "heads").mkdir(parents=True) (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") (dot_muse / "refs" / "heads" / "main").write_text("") (dot_muse / "repo.json").write_text(json.dumps({ "repo_id": fake_id("repo"), "domain": "code", "default_branch": "main", })) return repo def _write_commit( repo: pathlib.Path, message: str, manifest: Manifest, parent: str | None = None, *, write_objects: bool = False, ) -> CommitRecord: if write_objects: from muse.core.object_store import write_object real_manifest: Manifest = {} for path, content in manifest.items(): raw = content.encode() oid = blob_id(raw) write_object(repo, oid, raw) real_manifest[path] = oid manifest = real_manifest snap_id = compute_snapshot_id(manifest) snap = SnapshotRecord(snapshot_id=snap_id, manifest=manifest, created_at=_TS) write_snapshot(repo, snap) parent_ids = [parent] if parent else [] cid = compute_commit_id( parent_ids=parent_ids, snapshot_id=snap_id, message=message, committed_at_iso=_TS.isoformat(), author="tester", ) c = CommitRecord( commit_id=cid, branch="main", snapshot_id=snap_id, message=message, committed_at=_TS, author="tester", parent_commit_id=parent, parent2_commit_id=None, ) write_commit(repo, c) return c def _get_plugin(repo: pathlib.Path) -> "MuseDomainPlugin": from muse.plugins.registry import resolve_plugin return resolve_plugin(repo) # ────────────────────────────────────────────────────────────────────────────── # Unit: replay_one raises when commit snapshot is missing # ────────────────────────────────────────────────────────────────────────────── class TestReplayOneMissingSnapshot: def test_replay_one_raises_valueerror_when_snapshot_missing(self, tmp_path: pathlib.Path) -> None: """Bug 14: replay_one must raise ValueError when the commit's snapshot is missing.""" repo = _make_repo(tmp_path) # Base: initial commit c1 = _write_commit(repo, "initial", {"a.py": "a" * 64}) write_branch_ref(repo, "main", c1.commit_id) # The commit to replay c2 = _write_commit(repo, "target", {"b.py": "b" * 64}, parent=c1.commit_id) # Delete c2's snapshot to simulate corruption snap_path = _obj_path(repo, c2.snapshot_id) snap_path.unlink() plugin = _get_plugin(repo) domain = "code" with pytest.raises(ValueError, match="missing or corrupt"): replay_one(repo, c2, c1.commit_id, plugin, domain, "main") def test_replay_one_raises_when_snapshot_corrupt(self, tmp_path: pathlib.Path) -> None: """replay_one must raise ValueError when the commit's snapshot is corrupt.""" repo = _make_repo(tmp_path) c1 = _write_commit(repo, "initial", {"a.py": "a" * 64}) write_branch_ref(repo, "main", c1.commit_id) c2 = _write_commit(repo, "target", {"b.py": "b" * 64}, parent=c1.commit_id) # Corrupt c2's snapshot snap_path = _obj_path(repo, c2.snapshot_id) snap_path.write_bytes(b"\xff\x00garbage-bytes") plugin = _get_plugin(repo) with pytest.raises(ValueError, match="missing or corrupt"): replay_one(repo, c2, c1.commit_id, plugin, "code", "main") def test_replay_one_succeeds_when_all_snapshots_present(self, tmp_path: pathlib.Path) -> None: """Regression: replay_one must work normally when all snapshots exist.""" repo = _make_repo(tmp_path) c1 = _write_commit(repo, "initial", {"a.py": "hello"}, write_objects=True) write_branch_ref(repo, "main", c1.commit_id) c2 = _write_commit(repo, "target", {"b.py": "world"}, parent=c1.commit_id, write_objects=True) plugin = _get_plugin(repo) result = replay_one(repo, c2, c1.commit_id, plugin, "code", "main") # Should return a new CommitRecord (or conflict list), NOT raise assert result is not None # If clean merge, result is a CommitRecord from muse.core.commits import CommitRecord as CR if isinstance(result, CR): assert result.message == c2.message def test_before_fix_would_produce_wrong_manifest(self, tmp_path: pathlib.Path) -> None: """Document the pre-fix behavior: missing snapshot → empty theirs_manifest. With theirs_manifest={}, the three-way merge would treat ALL files in the commit as deleted — producing a rebased commit with no content. """ repo = _make_repo(tmp_path) c1 = _write_commit(repo, "initial", {"a.py": "a" * 64}) c2 = _write_commit(repo, "target", {"b.py": "b" * 64}, parent=c1.commit_id) # Simulate the pre-fix fallback from muse.core.snapshots import read_snapshot theirs_snap = read_snapshot(repo, c2.snapshot_id) assert theirs_snap is not None # snapshot exists # Now delete it to show what would happen snap_path = _obj_path(repo, c2.snapshot_id) snap_path.unlink() theirs_snap = read_snapshot(repo, c2.snapshot_id) old_behavior_manifest: Manifest = theirs_snap.manifest if theirs_snap else {} # Pre-fix: empty manifest would be used, silently deleting b.py assert old_behavior_manifest == {}, ( "BUG 14: missing snapshot caused theirs_manifest={} in replay_one, " "which would silently delete all files from the rebased commit" ) # ────────────────────────────────────────────────────────────────────────────── # Integration: snapshot missing guard in rebase path # ────────────────────────────────────────────────────────────────────────────── class TestRebaseSnapshotMissingIntegration: def test_replay_one_raises_valueerror_not_returns_empty_commit(self, tmp_path: pathlib.Path) -> None: """ValueError from replay_one must propagate — not be swallowed.""" repo = _make_repo(tmp_path) c1 = _write_commit(repo, "initial", {"main.py": "a" * 64}) write_branch_ref(repo, "main", c1.commit_id) c2 = _write_commit(repo, "add feature", {"feature.py": "b" * 64}, parent=c1.commit_id) # Remove snapshot for c2 (_obj_path(repo, c2.snapshot_id)).unlink() plugin = _get_plugin(repo) # Should raise, not silently return an empty commit with pytest.raises(ValueError): replay_one(repo, c2, c1.commit_id, plugin, "code", "main") # c2's commit still exists on disk (replay_one didn't corrupt anything) from muse.core.commits import read_commit assert read_commit(repo, c2.commit_id) is not None, ( "replay_one raising ValueError must not corrupt the original commit" )