"""TDD — Phase 2: migrate legacy .muse/commits/ and .muse/snapshots/ to unified object store. Phase 2 requirements (issue #12): - muse code migrate reads binary msgpack files from .muse/commits/sha256/ and .muse/snapshots/sha256/ and writes them to .muse/objects/sha256/<2>/<62> - After migration every legacy commit/snapshot is readable via read_commit() - After migration .muse/commits/ and .muse/snapshots/ are removed entirely - dry_run=True must not remove the legacy directories - muse init (init_repo_dirs) must NOT create .muse/commits/ or .muse/snapshots/ - read_commit() returns None for IDs that only exist in the legacy dir (pre-migrate) """ from __future__ import annotations import hashlib import json import pathlib import msgpack import pytest from muse.core.ids import hash_blob, hash_snapshot, hash_commit from muse.core.object_store import object_path, objects_dir, read_muse_object from muse.core.paths import commits_dir, snapshots_dir, init_repo_dirs from muse.core.commits import read_commit from muse.core.types import long_id # --------------------------------------------------------------------------- # Helpers — reproduce pre-Phase-2 legacy on-disk shapes # --------------------------------------------------------------------------- BLOB_A = b"# Hello\n" def _old_blob_id(data: bytes) -> str: return long_id(hashlib.sha256(data).hexdigest()) def _write_old_blob(repo: pathlib.Path, data: bytes) -> str: old_id = _old_blob_id(data) path = object_path(repo, old_id) path.parent.mkdir(parents=True, exist_ok=True) path.write_bytes(data) return old_id def _old_snapshot_id(manifest: dict[str, str]) -> str: from muse.core.types import split_id _SEP = "\x00" parts = sorted(f"{p}{_SEP}{split_id(oid)[1]}" for p, oid in manifest.items()) canonical = _SEP.join(parts).encode() return long_id(hashlib.sha256(canonical).hexdigest()) def _write_legacy_snapshot_msgpack( repo: pathlib.Path, manifest: dict[str, str], created_at: str = "2026-05-20T16:00:00+00:00", ) -> str: """Write a legacy binary msgpack snapshot into .muse/snapshots/sha256/.""" old_id = _old_snapshot_id(manifest) _, hex_id = old_id.split(":", 1) snap_dir = snapshots_dir(repo) / "sha256" snap_dir.mkdir(parents=True, exist_ok=True) record = { "schema_version": 1, "snapshot_id": old_id, "manifest": manifest, "directories": [], "created_at": created_at, "note": "", } (snap_dir / f"{hex_id}.msgpack").write_bytes( msgpack.packb(record, use_bin_type=True) ) return old_id def _old_commit_id( parent_ids: list[str], snapshot_id: str, message: str, committed_at_iso: str, author: str = "", signer_public_key: str = "", ) -> str: from muse.core.types import split_id _SEP = "\x00" parts = [ _SEP.join(sorted(split_id(p)[1] for p in parent_ids)), split_id(snapshot_id)[1] if snapshot_id else "", message, committed_at_iso, author, signer_public_key, ] canonical = _SEP.join(parts).encode() return long_id(hashlib.sha256(canonical).hexdigest()) def _write_legacy_commit_msgpack( repo: pathlib.Path, snapshot_id: str, message: str = "initial commit", committed_at: str = "2026-05-20T16:00:00+00:00", author: str = "gabriel", parent_ids: list[str] | None = None, ) -> str: """Write a legacy binary msgpack commit into .muse/commits/sha256/.""" parents = parent_ids or [] old_id = _old_commit_id( parent_ids=parents, snapshot_id=snapshot_id, message=message, committed_at_iso=committed_at, author=author, ) _, hex_id = old_id.split(":", 1) cmt_dir = commits_dir(repo) / "sha256" cmt_dir.mkdir(parents=True, exist_ok=True) record: dict = { "commit_id": old_id, "branch": "main", "snapshot_id": snapshot_id, "message": message, "committed_at": committed_at, "parent_commit_id": parents[0] if parents else None, "parent2_commit_id": parents[1] if len(parents) > 1 else None, "author": author, "signature": "", "signer_public_key": "", "format_version": 8, "metadata": {}, "structured_delta": None, "sem_ver_bump": "none", "breaking_changes": [], "agent_id": "claude-code", "model_id": "claude-sonnet-4-6", "toolchain_id": "", "prompt_hash": "", "reviewed_by": [], "test_runs": 0, "labels": [], "status": "", "notes": [], "score": None, } (cmt_dir / f"{hex_id}.msgpack").write_bytes( msgpack.packb(record, use_bin_type=True) ) return old_id # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture def repo(tmp_path: pathlib.Path) -> pathlib.Path: objects_dir(tmp_path).mkdir(parents=True, exist_ok=True) return tmp_path # --------------------------------------------------------------------------- # Pre-migrate: read_commit does NOT look in legacy dir # --------------------------------------------------------------------------- class TestReadCommitDoesNotTouchLegacyDir: def test_returns_none_for_id_only_in_commits_dir(self, repo: pathlib.Path) -> None: """read_commit() returns None for a commit that only exists in .muse/commits/.""" old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) old_cmt_id = _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) assert read_commit(repo, old_cmt_id) is None # --------------------------------------------------------------------------- # Post-migrate: legacy commits land in the object store # --------------------------------------------------------------------------- class TestLegacyCommitsMigrate: def test_legacy_commit_readable_after_migrate(self, repo: pathlib.Path) -> None: """After migrate(), a commit from .muse/commits/ is findable via read_commit().""" from muse.core.migrate import migrate old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) old_cmt_id = _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) result = migrate(repo) new_cmt_id = result.id_map.get(old_cmt_id, old_cmt_id) commit = read_commit(repo, new_cmt_id) assert commit is not None assert commit.message == "initial commit" def test_legacy_commit_fields_preserved_after_migrate(self, repo: pathlib.Path) -> None: """Commit fields survive the migration round-trip.""" from muse.core.migrate import migrate old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) old_cmt_id = _write_legacy_commit_msgpack( repo, snapshot_id=old_snap_id, message="preserve me", author="gabriel", ) result = migrate(repo) new_cmt_id = result.id_map.get(old_cmt_id, old_cmt_id) commit = read_commit(repo, new_cmt_id) assert commit is not None assert commit.message == "preserve me" assert commit.branch == "main" def test_two_legacy_commits_both_readable_after_migrate(self, repo: pathlib.Path) -> None: """A two-commit chain in the legacy dir is fully migrated.""" from muse.core.migrate import migrate old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) old_cmt_a = _write_legacy_commit_msgpack( repo, snapshot_id=old_snap_id, message="first" ) old_cmt_b = _write_legacy_commit_msgpack( repo, snapshot_id=old_snap_id, message="second", parent_ids=[old_cmt_a] ) result = migrate(repo) for old_id in (old_cmt_a, old_cmt_b): new_id = result.id_map.get(old_id, old_id) assert read_commit(repo, new_id) is not None, \ f"commit {old_id} not found after migration" # --------------------------------------------------------------------------- # Post-migrate: legacy snapshots land in the object store # --------------------------------------------------------------------------- class TestLegacySnapshotsMigrate: def test_legacy_snapshot_in_object_store_after_migrate(self, repo: pathlib.Path) -> None: """After migrate(), a snapshot from .muse/snapshots/ is in the object store.""" from muse.core.migrate import migrate old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) migrate(repo) new_blob_id = hash_blob(BLOB_A) new_snap_id = hash_snapshot({"hello.md": new_blob_id}) obj = read_muse_object(repo, new_snap_id) assert obj is not None type_str, raw = obj assert type_str == "snapshot" data = json.loads(raw) assert data["snapshot_id"] == new_snap_id # --------------------------------------------------------------------------- # Post-migrate: legacy directories are removed # --------------------------------------------------------------------------- class TestLegacyDirCleanedAfterMigrate: def test_commits_dir_removed_after_migrate(self, repo: pathlib.Path) -> None: """.muse/commits/ is deleted entirely after migrate().""" from muse.core.migrate import migrate old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) migrate(repo) assert not commits_dir(repo).exists(), \ ".muse/commits/ must be removed after migration" def test_snapshots_dir_removed_after_migrate(self, repo: pathlib.Path) -> None: """.muse/snapshots/ is deleted entirely after migrate().""" from muse.core.migrate import migrate old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) migrate(repo) assert not snapshots_dir(repo).exists(), \ ".muse/snapshots/ must be removed after migration" def test_empty_commits_dir_removed_after_migrate(self, repo: pathlib.Path) -> None: """migrate() removes an empty .muse/commits/ dir even with no msgpack files.""" from muse.core.migrate import migrate commits_dir(repo).mkdir(parents=True, exist_ok=True) migrate(repo) assert not commits_dir(repo).exists() def test_dry_run_does_not_remove_commits_dir(self, repo: pathlib.Path) -> None: """dry_run=True must not delete .muse/commits/.""" from muse.core.migrate import migrate old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) migrate(repo, dry_run=True) assert commits_dir(repo).exists(), "dry_run must not remove .muse/commits/" def test_dry_run_does_not_remove_snapshots_dir(self, repo: pathlib.Path) -> None: """dry_run=True must not delete .muse/snapshots/.""" from muse.core.migrate import migrate old_blob_id = _write_old_blob(repo, BLOB_A) old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) migrate(repo, dry_run=True) assert snapshots_dir(repo).exists(), "dry_run must not remove .muse/snapshots/" # --------------------------------------------------------------------------- # init_repo_dirs does NOT create legacy directories # --------------------------------------------------------------------------- class TestInitDoesNotCreateLegacyDirs: def test_init_does_not_create_commits_dir(self, tmp_path: pathlib.Path) -> None: """init_repo_dirs() must not create .muse/commits/.""" init_repo_dirs(tmp_path) assert not commits_dir(tmp_path).exists(), \ ".muse/commits/ must not be created by init_repo_dirs" def test_init_does_not_create_snapshots_dir(self, tmp_path: pathlib.Path) -> None: """init_repo_dirs() must not create .muse/snapshots/.""" init_repo_dirs(tmp_path) assert not snapshots_dir(tmp_path).exists(), \ ".muse/snapshots/ must not be created by init_repo_dirs"