gabriel / muse public
test_phase2_legacy_store_migration.py python
351 lines 12.8 KB
Raw
sha256:7781e508756c81b7ddb0b08b408fd2b99bad87798cefa596773373efc360952c chore: typing audit — zero violations, zero untyped defs Sonnet 4.6 patch 23 days ago
1 """TDD — Phase 2: migrate legacy .muse/commits/ and .muse/snapshots/ to unified object store.
2
3 Phase 2 requirements (issue #12):
4 - muse code migrate reads binary msgpack files from .muse/commits/sha256/ and
5 .muse/snapshots/sha256/ and writes them to .muse/objects/sha256/<2>/<62>
6 - After migration every legacy commit/snapshot is readable via read_commit()
7 - After migration .muse/commits/ and .muse/snapshots/ are removed entirely
8 - dry_run=True must not remove the legacy directories
9 - muse init (init_repo_dirs) must NOT create .muse/commits/ or .muse/snapshots/
10 - read_commit() returns None for IDs that only exist in the legacy dir (pre-migrate)
11 """
12
13 from __future__ import annotations
14
15 import hashlib
16 import json
17 import pathlib
18
19 import msgpack
20 import pytest
21
22 from collections.abc import Mapping
23
24 from muse.core.ids import hash_blob, hash_snapshot, hash_commit
25 from muse.core.object_store import object_path, objects_dir, read_muse_object
26 from muse.core.paths import commits_dir, snapshots_dir, init_repo_dirs
27 from muse.core.commits import read_commit, CommitDict
28 from muse.core.types import long_id
29
30
31 # ---------------------------------------------------------------------------
32 # Helpers — reproduce pre-Phase-2 legacy on-disk shapes
33 # ---------------------------------------------------------------------------
34
35 BLOB_A = b"# Hello\n"
36
37
38 def _old_blob_id(data: bytes) -> str:
39 return long_id(hashlib.sha256(data).hexdigest())
40
41
42 def _write_old_blob(repo: pathlib.Path, data: bytes) -> str:
43 old_id = _old_blob_id(data)
44 path = object_path(repo, old_id)
45 path.parent.mkdir(parents=True, exist_ok=True)
46 path.write_bytes(data)
47 return old_id
48
49
50 def _old_snapshot_id(manifest: Mapping[str, str]) -> str:
51 from muse.core.types import split_id
52 _SEP = "\x00"
53 parts = sorted(f"{p}{_SEP}{split_id(oid)[1]}" for p, oid in manifest.items())
54 canonical = _SEP.join(parts).encode()
55 return long_id(hashlib.sha256(canonical).hexdigest())
56
57
58 def _write_legacy_snapshot_msgpack(
59 repo: pathlib.Path,
60 manifest: dict[str, str],
61 created_at: str = "2026-05-20T16:00:00+00:00",
62 ) -> str:
63 """Write a legacy binary msgpack snapshot into .muse/snapshots/sha256/."""
64 old_id = _old_snapshot_id(manifest)
65 _, hex_id = old_id.split(":", 1)
66 snap_dir = snapshots_dir(repo) / "sha256"
67 snap_dir.mkdir(parents=True, exist_ok=True)
68 record = {
69 "schema_version": 1,
70 "snapshot_id": old_id,
71 "manifest": manifest,
72 "directories": [],
73 "created_at": created_at,
74 "note": "",
75 }
76 (snap_dir / f"{hex_id}.msgpack").write_bytes(
77 msgpack.packb(record, use_bin_type=True)
78 )
79 return old_id
80
81
82 def _old_commit_id(
83 parent_ids: list[str],
84 snapshot_id: str,
85 message: str,
86 committed_at_iso: str,
87 author: str = "",
88 signer_public_key: str = "",
89 ) -> str:
90 from muse.core.types import split_id
91 _SEP = "\x00"
92 parts = [
93 _SEP.join(sorted(split_id(p)[1] for p in parent_ids)),
94 split_id(snapshot_id)[1] if snapshot_id else "",
95 message,
96 committed_at_iso,
97 author,
98 signer_public_key,
99 ]
100 canonical = _SEP.join(parts).encode()
101 return long_id(hashlib.sha256(canonical).hexdigest())
102
103
104 def _write_legacy_commit_msgpack(
105 repo: pathlib.Path,
106 snapshot_id: str,
107 message: str = "initial commit",
108 committed_at: str = "2026-05-20T16:00:00+00:00",
109 author: str = "gabriel",
110 parent_ids: list[str] | None = None,
111 ) -> str:
112 """Write a legacy binary msgpack commit into .muse/commits/sha256/."""
113 parents = parent_ids or []
114 old_id = _old_commit_id(
115 parent_ids=parents,
116 snapshot_id=snapshot_id,
117 message=message,
118 committed_at_iso=committed_at,
119 author=author,
120 )
121 _, hex_id = old_id.split(":", 1)
122 cmt_dir = commits_dir(repo) / "sha256"
123 cmt_dir.mkdir(parents=True, exist_ok=True)
124 record: CommitDict = {
125 "commit_id": old_id,
126 "branch": "main",
127 "snapshot_id": snapshot_id,
128 "message": message,
129 "committed_at": committed_at,
130 "parent_commit_id": parents[0] if parents else None,
131 "parent2_commit_id": parents[1] if len(parents) > 1 else None,
132 "author": author,
133 "signature": "",
134 "signer_public_key": "",
135 "format_version": 8,
136 "metadata": {},
137 "structured_delta": None,
138 "sem_ver_bump": "none",
139 "breaking_changes": [],
140 "agent_id": "claude-code",
141 "model_id": "claude-sonnet-4-6",
142 "toolchain_id": "",
143 "prompt_hash": "",
144 "reviewed_by": [],
145 "test_runs": 0,
146 "labels": [],
147 "status": "",
148 "notes": [],
149 "score": None,
150 }
151 (cmt_dir / f"{hex_id}.msgpack").write_bytes(
152 msgpack.packb(record, use_bin_type=True)
153 )
154 return old_id
155
156
157 # ---------------------------------------------------------------------------
158 # Fixtures
159 # ---------------------------------------------------------------------------
160
161 @pytest.fixture
162 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
163 objects_dir(tmp_path).mkdir(parents=True, exist_ok=True)
164 return tmp_path
165
166
167 # ---------------------------------------------------------------------------
168 # Pre-migrate: read_commit does NOT look in legacy dir
169 # ---------------------------------------------------------------------------
170
171 class TestReadCommitDoesNotTouchLegacyDir:
172 def test_returns_none_for_id_only_in_commits_dir(self, repo: pathlib.Path) -> None:
173 """read_commit() returns None for a commit that only exists in .muse/commits/."""
174 old_blob_id = _write_old_blob(repo, BLOB_A)
175 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
176 old_cmt_id = _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id)
177
178 assert read_commit(repo, old_cmt_id) is None
179
180
181 # ---------------------------------------------------------------------------
182 # Post-migrate: legacy commits land in the object store
183 # ---------------------------------------------------------------------------
184
185 class TestLegacyCommitsMigrate:
186 def test_legacy_commit_readable_after_migrate(self, repo: pathlib.Path) -> None:
187 """After migrate(), a commit from .muse/commits/ is findable via read_commit()."""
188 from muse.core.migrate import migrate
189
190 old_blob_id = _write_old_blob(repo, BLOB_A)
191 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
192 old_cmt_id = _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id)
193
194 result = migrate(repo)
195
196 new_cmt_id = result.id_map.get(old_cmt_id, old_cmt_id)
197 commit = read_commit(repo, new_cmt_id)
198 assert commit is not None
199 assert commit.message == "initial commit"
200
201 def test_legacy_commit_fields_preserved_after_migrate(self, repo: pathlib.Path) -> None:
202 """Commit fields survive the migration round-trip."""
203 from muse.core.migrate import migrate
204
205 old_blob_id = _write_old_blob(repo, BLOB_A)
206 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
207 old_cmt_id = _write_legacy_commit_msgpack(
208 repo,
209 snapshot_id=old_snap_id,
210 message="preserve me",
211 author="gabriel",
212 )
213
214 result = migrate(repo)
215
216 new_cmt_id = result.id_map.get(old_cmt_id, old_cmt_id)
217 commit = read_commit(repo, new_cmt_id)
218 assert commit is not None
219 assert commit.message == "preserve me"
220 assert commit.branch == "main"
221
222 def test_two_legacy_commits_both_readable_after_migrate(self, repo: pathlib.Path) -> None:
223 """A two-commit chain in the legacy dir is fully migrated."""
224 from muse.core.migrate import migrate
225
226 old_blob_id = _write_old_blob(repo, BLOB_A)
227 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
228 old_cmt_a = _write_legacy_commit_msgpack(
229 repo, snapshot_id=old_snap_id, message="first"
230 )
231 old_cmt_b = _write_legacy_commit_msgpack(
232 repo, snapshot_id=old_snap_id, message="second", parent_ids=[old_cmt_a]
233 )
234
235 result = migrate(repo)
236
237 for old_id in (old_cmt_a, old_cmt_b):
238 new_id = result.id_map.get(old_id, old_id)
239 assert read_commit(repo, new_id) is not None, \
240 f"commit {old_id} not found after migration"
241
242
243 # ---------------------------------------------------------------------------
244 # Post-migrate: legacy snapshots land in the object store
245 # ---------------------------------------------------------------------------
246
247 class TestLegacySnapshotsMigrate:
248 def test_legacy_snapshot_in_object_store_after_migrate(self, repo: pathlib.Path) -> None:
249 """After migrate(), a snapshot from .muse/snapshots/ is in the object store."""
250 from muse.core.migrate import migrate
251
252 old_blob_id = _write_old_blob(repo, BLOB_A)
253 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
254 _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id)
255
256 migrate(repo)
257
258 new_blob_id = hash_blob(BLOB_A)
259 new_snap_id = hash_snapshot({"hello.md": new_blob_id})
260 obj = read_muse_object(repo, new_snap_id)
261 assert obj is not None
262 type_str, raw = obj
263 assert type_str == "snapshot"
264 data = json.loads(raw)
265 assert data["snapshot_id"] == new_snap_id
266
267
268 # ---------------------------------------------------------------------------
269 # Post-migrate: legacy directories are removed
270 # ---------------------------------------------------------------------------
271
272 class TestLegacyDirCleanedAfterMigrate:
273 def test_commits_dir_removed_after_migrate(self, repo: pathlib.Path) -> None:
274 """.muse/commits/ is deleted entirely after migrate()."""
275 from muse.core.migrate import migrate
276
277 old_blob_id = _write_old_blob(repo, BLOB_A)
278 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
279 _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id)
280
281 migrate(repo)
282
283 assert not commits_dir(repo).exists(), \
284 ".muse/commits/ must be removed after migration"
285
286 def test_snapshots_dir_removed_after_migrate(self, repo: pathlib.Path) -> None:
287 """.muse/snapshots/ is deleted entirely after migrate()."""
288 from muse.core.migrate import migrate
289
290 old_blob_id = _write_old_blob(repo, BLOB_A)
291 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
292 _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id)
293
294 migrate(repo)
295
296 assert not snapshots_dir(repo).exists(), \
297 ".muse/snapshots/ must be removed after migration"
298
299 def test_empty_commits_dir_removed_after_migrate(self, repo: pathlib.Path) -> None:
300 """migrate() removes an empty .muse/commits/ dir even with no msgpack files."""
301 from muse.core.migrate import migrate
302
303 commits_dir(repo).mkdir(parents=True, exist_ok=True)
304
305 migrate(repo)
306
307 assert not commits_dir(repo).exists()
308
309 def test_dry_run_does_not_remove_commits_dir(self, repo: pathlib.Path) -> None:
310 """dry_run=True must not delete .muse/commits/."""
311 from muse.core.migrate import migrate
312
313 old_blob_id = _write_old_blob(repo, BLOB_A)
314 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
315 _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id)
316
317 migrate(repo, dry_run=True)
318
319 assert commits_dir(repo).exists(), "dry_run must not remove .muse/commits/"
320
321 def test_dry_run_does_not_remove_snapshots_dir(self, repo: pathlib.Path) -> None:
322 """dry_run=True must not delete .muse/snapshots/."""
323 from muse.core.migrate import migrate
324
325 old_blob_id = _write_old_blob(repo, BLOB_A)
326 old_snap_id = _write_legacy_snapshot_msgpack(repo, {"hello.md": old_blob_id})
327 _write_legacy_commit_msgpack(repo, snapshot_id=old_snap_id)
328
329 migrate(repo, dry_run=True)
330
331 assert snapshots_dir(repo).exists(), "dry_run must not remove .muse/snapshots/"
332
333
334 # ---------------------------------------------------------------------------
335 # init_repo_dirs does NOT create legacy directories
336 # ---------------------------------------------------------------------------
337
338 class TestInitDoesNotCreateLegacyDirs:
339 def test_init_does_not_create_commits_dir(self, tmp_path: pathlib.Path) -> None:
340 """init_repo_dirs() must not create .muse/commits/."""
341 init_repo_dirs(tmp_path)
342
343 assert not commits_dir(tmp_path).exists(), \
344 ".muse/commits/ must not be created by init_repo_dirs"
345
346 def test_init_does_not_create_snapshots_dir(self, tmp_path: pathlib.Path) -> None:
347 """init_repo_dirs() must not create .muse/snapshots/."""
348 init_repo_dirs(tmp_path)
349
350 assert not snapshots_dir(tmp_path).exists(), \
351 ".muse/snapshots/ must not be created by init_repo_dirs"
File History 3 commits
sha256:7781e508756c81b7ddb0b08b408fd2b99bad87798cefa596773373efc360952c chore: typing audit — zero violations, zero untyped defs Sonnet 4.6 patch 23 days ago
sha256:09656d1b0772ea4c96f8911d7bf8042b33eb0596992c6546dfab3d21e9dee330 fix: align muse read --json schema and test contracts Sonnet 4.6 minor 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago