test_phase2_legacy_store_migration.py
python
sha256:7781e508756c81b7ddb0b08b408fd2b99bad87798cefa596773373efc360952c
chore: typing audit — zero violations, zero untyped defs
Sonnet 4.6
patch
23 days ago
| 1 | """TDD — Phase 2: migrate legacy .muse/commits/ and .muse/snapshots/ to unified object store. |
| 2 | |
| 3 | Phase 2 requirements (issue #12): |
| 4 | - muse code migrate reads binary msgpack files from .muse/commits/sha256/ and |
| 5 | .muse/snapshots/sha256/ and writes them to .muse/objects/sha256/<2>/<62> |
| 6 | - After migration every legacy commit/snapshot is readable via read_commit() |
| 7 | - After migration .muse/commits/ and .muse/snapshots/ are removed entirely |
| 8 | - dry_run=True must not remove the legacy directories |
| 9 | - muse init (init_repo_dirs) must NOT create .muse/commits/ or .muse/snapshots/ |
| 10 | - read_commit() returns None for IDs that only exist in the legacy dir (pre-migrate) |
| 11 | """ |
| 12 | |
| 13 | from __future__ import annotations |
| 14 | |
| 15 | import hashlib |
| 16 | import json |
| 17 | import pathlib |
| 18 | |
| 19 | import msgpack |
| 20 | import pytest |
| 21 | |
| 22 | from collections.abc import Mapping |
| 23 | |
| 24 | from muse.core.ids import hash_blob, hash_snapshot, hash_commit |
| 25 | from muse.core.object_store import object_path, objects_dir, read_muse_object |
| 26 | from muse.core.paths import commits_dir, snapshots_dir, init_repo_dirs |
| 27 | from muse.core.commits import read_commit, CommitDict |
| 28 | from muse.core.types import long_id |
| 29 | |
| 30 | |
| 31 | # --------------------------------------------------------------------------- |
| 32 | # Helpers — reproduce pre-Phase-2 legacy on-disk shapes |
| 33 | # --------------------------------------------------------------------------- |
| 34 | |
| 35 | BLOB_A = b"# Hello\n" |
| 36 | |
| 37 | |
| 38 | def _old_blob_id(data: bytes) -> str: |
| 39 | return long_id(hashlib.sha256(data).hexdigest()) |
| 40 | |
| 41 | |
| 42 | def _write_old_blob(repo: pathlib.Path, data: bytes) -> str: |
| 43 | old_id = _old_blob_id(data) |
| 44 | path = object_path(repo, old_id) |
| 45 | path.parent.mkdir(parents=True, exist_ok=True) |
| 46 | path.write_bytes(data) |
| 47 | return old_id |
| 48 | |
| 49 | |
| 50 | def _old_snapshot_id(manifest: Mapping[str, str]) -> str: |
| 51 | from muse.core.types import split_id |
| 52 | _SEP = "\x00" |
| 53 | parts = sorted(f"{p}{_SEP}{split_id(oid)[1]}" for p, oid in manifest.items()) |
| 54 | canonical = _SEP.join(parts).encode() |
| 55 | return long_id(hashlib.sha256(canonical).hexdigest()) |
| 56 | |
| 57 | |
| 58 | def _write_legacy_snapshot_msgpack( |
| 59 | repo: pathlib.Path, |
| 60 | manifest: dict[str, str], |
| 61 | created_at: str = "2026-05-20T16:00:00+00:00", |
| 62 | ) -> str: |
| 63 | """Write a legacy binary msgpack snapshot into .muse/snapshots/sha256/.""" |
| 64 | old_id = _old_snapshot_id(manifest) |
| 65 | _, hex_id = old_id.split(":", 1) |
| 66 | snap_dir = snapshots_dir(repo) / "sha256" |
| 67 | snap_dir.mkdir(parents=True, exist_ok=True) |
| 68 | record = { |
| 69 | "schema_version": 1, |
| 70 | "snapshot_id": old_id, |
| 71 | "manifest": manifest, |
| 72 | "directories": [], |
| 73 | "created_at": created_at, |
| 74 | "note": "", |
| 75 | } |
| 76 | (snap_dir / f"{hex_id}.msgpack").write_bytes( |
| 77 | msgpack.packb(record, use_bin_type=True) |
| 78 | ) |
| 79 | return old_id |
| 80 | |
| 81 | |
| 82 | def _old_commit_id( |
| 83 | parent_ids: list[str], |
| 84 | snapshot_id: str, |
| 85 | message: str, |
| 86 | committed_at_iso: str, |
| 87 | author: str = "", |
| 88 | signer_public_key: str = "", |
| 89 | ) -> str: |
| 90 | from muse.core.types import split_id |
| 91 | _SEP = "\x00" |
| 92 | parts = [ |
| 93 | _SEP.join(sorted(split_id(p)[1] for p in parent_ids)), |
| 94 | split_id(snapshot_id)[1] if snapshot_id else "", |
| 95 | message, |
| 96 | committed_at_iso, |
| 97 | author, |
| 98 | signer_public_key, |
| 99 | ] |
| 100 | canonical = _SEP.join(parts).encode() |
| 101 | return long_id(hashlib.sha256(canonical).hexdigest()) |
| 102 | |
| 103 | |
| 104 | def _write_legacy_commit_msgpack( |
| 105 | repo: pathlib.Path, |
| 106 | snapshot_id: str, |
| 107 | message: str = "initial commit", |
| 108 | committed_at: str = "2026-05-20T16:00:00+00:00", |
| 109 | author: str = "gabriel", |
| 110 | parent_ids: list[str] | None = None, |
| 111 | ) -> str: |
| 112 | """Write a legacy binary msgpack commit into .muse/commits/sha256/.""" |
| 113 | parents = parent_ids or [] |
| 114 | old_id = _old_commit_id( |
| 115 | parent_ids=parents, |
| 116 | snapshot_id=snapshot_id, |
| 117 | message=message, |
| 118 | committed_at_iso=committed_at, |
| 119 | author=author, |
| 120 | ) |
| 121 | _, hex_id = old_id.split(":", 1) |
| 122 | cmt_dir = commits_dir(repo) / "sha256" |
| 123 | cmt_dir.mkdir(parents=True, exist_ok=True) |
| 124 | record: CommitDict = { |
| 125 | "commit_id": old_id, |
| 126 | "branch": "main", |
| 127 | "snapshot_id": snapshot_id, |
| 128 | "message": message, |
| 129 | "committed_at": committed_at, |
| 130 | "parent_commit_id": parents[0] if parents else None, |
| 131 | "parent2_commit_id": parents[1] if len(parents) > 1 else None, |
| 132 | "author": author, |
| 133 | "signature": "", |
| 134 | "signer_public_key": "", |
| 135 | "format_version": 8, |
| 136 | "metadata": {}, |
| 137 | "structured_delta": None, |
| 138 | "sem_ver_bump": "none", |
| 139 | "breaking_changes": [], |
| 140 | "agent_id": "claude-code", |
| 141 | "model_id": "claude-sonnet-4-6", |
| 142 | "toolchain_id": "", |
| 143 | "prompt_hash": "", |
| 144 | "reviewed_by": [], |
| 145 | "test_runs": 0, |
| 146 | "labels": [], |
| 147 | "status": "", |
| 148 | "notes": [], |
| 149 | "score": None, |
| 150 | } |
| 151 | (cmt_dir / f"{hex_id}.msgpack").write_bytes( |
| 152 | msgpack.packb(record, use_bin_type=True) |
| 153 | ) |
| 154 | return old_id |
| 155 | |
| 156 | |
| 157 | # --------------------------------------------------------------------------- |
| 158 | # Fixtures |
| 159 | # --------------------------------------------------------------------------- |
| 160 | |
| 161 | @pytest.fixture |
| 162 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 163 | objects_dir(tmp_path).mkdir(parents=True, exist_ok=True) |
| 164 | return tmp_path |
| 165 | |
| 166 | |
| 167 | # --------------------------------------------------------------------------- |
| 168 | # Pre-migrate: read_commit does NOT look in legacy dir |
| 169 | # --------------------------------------------------------------------------- |
| 170 | |
| 171 | class TestReadCommitDoesNotTouchLegacyDir: |
| 172 | def test_returns_none_for_id_only_in_commits_dir(self, repo: pathlib.Path) -> None: |
| 173 | """read_commit() returns None for a commit that only exists in .muse/commits/.""" |
| 174 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 175 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 176 | old_cmt_id = _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) |
| 177 | |
| 178 | assert read_commit(repo, old_cmt_id) is None |
| 179 | |
| 180 | |
| 181 | # --------------------------------------------------------------------------- |
| 182 | # Post-migrate: legacy commits land in the object store |
| 183 | # --------------------------------------------------------------------------- |
| 184 | |
| 185 | class TestLegacyCommitsMigrate: |
| 186 | def test_legacy_commit_readable_after_migrate(self, repo: pathlib.Path) -> None: |
| 187 | """After migrate(), a commit from .muse/commits/ is findable via read_commit().""" |
| 188 | from muse.core.migrate import migrate |
| 189 | |
| 190 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 191 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 192 | old_cmt_id = _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) |
| 193 | |
| 194 | result = migrate(repo) |
| 195 | |
| 196 | new_cmt_id = result.id_map.get(old_cmt_id, old_cmt_id) |
| 197 | commit = read_commit(repo, new_cmt_id) |
| 198 | assert commit is not None |
| 199 | assert commit.message == "initial commit" |
| 200 | |
| 201 | def test_legacy_commit_fields_preserved_after_migrate(self, repo: pathlib.Path) -> None: |
| 202 | """Commit fields survive the migration round-trip.""" |
| 203 | from muse.core.migrate import migrate |
| 204 | |
| 205 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 206 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 207 | old_cmt_id = _write_legacy_commit_msgpack( |
| 208 | repo, |
| 209 | snapshot_id=old_snap_id, |
| 210 | message="preserve me", |
| 211 | author="gabriel", |
| 212 | ) |
| 213 | |
| 214 | result = migrate(repo) |
| 215 | |
| 216 | new_cmt_id = result.id_map.get(old_cmt_id, old_cmt_id) |
| 217 | commit = read_commit(repo, new_cmt_id) |
| 218 | assert commit is not None |
| 219 | assert commit.message == "preserve me" |
| 220 | assert commit.branch == "main" |
| 221 | |
| 222 | def test_two_legacy_commits_both_readable_after_migrate(self, repo: pathlib.Path) -> None: |
| 223 | """A two-commit chain in the legacy dir is fully migrated.""" |
| 224 | from muse.core.migrate import migrate |
| 225 | |
| 226 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 227 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 228 | old_cmt_a = _write_legacy_commit_msgpack( |
| 229 | repo, snapshot_id=old_snap_id, message="first" |
| 230 | ) |
| 231 | old_cmt_b = _write_legacy_commit_msgpack( |
| 232 | repo, snapshot_id=old_snap_id, message="second", parent_ids=[old_cmt_a] |
| 233 | ) |
| 234 | |
| 235 | result = migrate(repo) |
| 236 | |
| 237 | for old_id in (old_cmt_a, old_cmt_b): |
| 238 | new_id = result.id_map.get(old_id, old_id) |
| 239 | assert read_commit(repo, new_id) is not None, \ |
| 240 | f"commit {old_id} not found after migration" |
| 241 | |
| 242 | |
| 243 | # --------------------------------------------------------------------------- |
| 244 | # Post-migrate: legacy snapshots land in the object store |
| 245 | # --------------------------------------------------------------------------- |
| 246 | |
| 247 | class TestLegacySnapshotsMigrate: |
| 248 | def test_legacy_snapshot_in_object_store_after_migrate(self, repo: pathlib.Path) -> None: |
| 249 | """After migrate(), a snapshot from .muse/snapshots/ is in the object store.""" |
| 250 | from muse.core.migrate import migrate |
| 251 | |
| 252 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 253 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 254 | _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) |
| 255 | |
| 256 | migrate(repo) |
| 257 | |
| 258 | new_blob_id = hash_blob(BLOB_A) |
| 259 | new_snap_id = hash_snapshot({"hello.md": new_blob_id}) |
| 260 | obj = read_muse_object(repo, new_snap_id) |
| 261 | assert obj is not None |
| 262 | type_str, raw = obj |
| 263 | assert type_str == "snapshot" |
| 264 | data = json.loads(raw) |
| 265 | assert data["snapshot_id"] == new_snap_id |
| 266 | |
| 267 | |
| 268 | # --------------------------------------------------------------------------- |
| 269 | # Post-migrate: legacy directories are removed |
| 270 | # --------------------------------------------------------------------------- |
| 271 | |
| 272 | class TestLegacyDirCleanedAfterMigrate: |
| 273 | def test_commits_dir_removed_after_migrate(self, repo: pathlib.Path) -> None: |
| 274 | """.muse/commits/ is deleted entirely after migrate().""" |
| 275 | from muse.core.migrate import migrate |
| 276 | |
| 277 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 278 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 279 | _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) |
| 280 | |
| 281 | migrate(repo) |
| 282 | |
| 283 | assert not commits_dir(repo).exists(), \ |
| 284 | ".muse/commits/ must be removed after migration" |
| 285 | |
| 286 | def test_snapshots_dir_removed_after_migrate(self, repo: pathlib.Path) -> None: |
| 287 | """.muse/snapshots/ is deleted entirely after migrate().""" |
| 288 | from muse.core.migrate import migrate |
| 289 | |
| 290 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 291 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 292 | _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) |
| 293 | |
| 294 | migrate(repo) |
| 295 | |
| 296 | assert not snapshots_dir(repo).exists(), \ |
| 297 | ".muse/snapshots/ must be removed after migration" |
| 298 | |
| 299 | def test_empty_commits_dir_removed_after_migrate(self, repo: pathlib.Path) -> None: |
| 300 | """migrate() removes an empty .muse/commits/ dir even with no msgpack files.""" |
| 301 | from muse.core.migrate import migrate |
| 302 | |
| 303 | commits_dir(repo).mkdir(parents=True, exist_ok=True) |
| 304 | |
| 305 | migrate(repo) |
| 306 | |
| 307 | assert not commits_dir(repo).exists() |
| 308 | |
| 309 | def test_dry_run_does_not_remove_commits_dir(self, repo: pathlib.Path) -> None: |
| 310 | """dry_run=True must not delete .muse/commits/.""" |
| 311 | from muse.core.migrate import migrate |
| 312 | |
| 313 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 314 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 315 | _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) |
| 316 | |
| 317 | migrate(repo, dry_run=True) |
| 318 | |
| 319 | assert commits_dir(repo).exists(), "dry_run must not remove .muse/commits/" |
| 320 | |
| 321 | def test_dry_run_does_not_remove_snapshots_dir(self, repo: pathlib.Path) -> None: |
| 322 | """dry_run=True must not delete .muse/snapshots/.""" |
| 323 | from muse.core.migrate import migrate |
| 324 | |
| 325 | old_blob_id = _write_old_blob(repo, BLOB_A) |
| 326 | old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id}) |
| 327 | _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id) |
| 328 | |
| 329 | migrate(repo, dry_run=True) |
| 330 | |
| 331 | assert snapshots_dir(repo).exists(), "dry_run must not remove .muse/snapshots/" |
| 332 | |
| 333 | |
| 334 | # --------------------------------------------------------------------------- |
| 335 | # init_repo_dirs does NOT create legacy directories |
| 336 | # --------------------------------------------------------------------------- |
| 337 | |
| 338 | class TestInitDoesNotCreateLegacyDirs: |
| 339 | def test_init_does_not_create_commits_dir(self, tmp_path: pathlib.Path) -> None: |
| 340 | """init_repo_dirs() must not create .muse/commits/.""" |
| 341 | init_repo_dirs(tmp_path) |
| 342 | |
| 343 | assert not commits_dir(tmp_path).exists(), \ |
| 344 | ".muse/commits/ must not be created by init_repo_dirs" |
| 345 | |
| 346 | def test_init_does_not_create_snapshots_dir(self, tmp_path: pathlib.Path) -> None: |
| 347 | """init_repo_dirs() must not create .muse/snapshots/.""" |
| 348 | init_repo_dirs(tmp_path) |
| 349 | |
| 350 | assert not snapshots_dir(tmp_path).exists(), \ |
| 351 | ".muse/snapshots/ must not be created by init_repo_dirs" |
File History
3 commits
sha256:7781e508756c81b7ddb0b08b408fd2b99bad87798cefa596773373efc360952c
chore: typing audit — zero violations, zero untyped defs
Sonnet 4.6
patch
23 days ago
sha256:09656d1b0772ea4c96f8911d7bf8042b33eb0596992c6546dfab3d21e9dee330
fix: align muse read --json schema and test contracts
Sonnet 4.6
minor
⚠
23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
29 days ago