gabriel / muse public

test_migrate_object_store.py file-level

at sha256:8 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:b adding issues docs to bust staging mpack prebuild cache. · gabriel · Jun 20, 2026
1 """TDD — Phase 7: migrate old object store to Git-idiomatic muse format.
2
3 All three ID formulas changed in Phase 2:
4 old hash_blob(data) = sha256(data)
5 new hash_blob(data) = sha256("blob <size>\\0<data>")
6
7 old hash_snapshot(manifest) = sha256(canonical)
8 new hash_snapshot(manifest) = sha256("snapshot <size>\\0<canonical>")
9
10 old hash_commit(...) = sha256(canonical)
11 new hash_commit(...) = sha256("commit <size>\\0<canonical>")
12
13 The migration is a full DAG rewrite in three passes:
14 Pass 1 — blobs: old raw bytes → muse-format at new path; build old→new map
15 Pass 2 — snapshots: update manifest blob IDs, recompute snapshot ID; build map
16 Pass 3 — commits: update snapshot_id, recompute commit ID; update refs
17
18 Every pass is non-destructive: old files are never deleted.
19 """
20 from __future__ import annotations
21
22 import hashlib
23 import json
24 import pathlib
25
26 import msgpack
27 import pytest
28
29 import msgpack
30
31 from muse.core.ids import hash_blob, hash_snapshot, hash_commit
32 from muse.core.object_store import object_path, objects_dir, read_muse_object
33 from muse.core.paths import commits_dir, snapshots_dir
34 from muse.core.types import long_id
35
36 _Manifest = dict[str, str] # path → object_id
37
38
39 # ---------------------------------------------------------------------------
40 # Helpers — produce old-formula IDs the same way pre-Phase-2 code did
41 # ---------------------------------------------------------------------------
42
43 def _old_blob_id(data: bytes) -> str:
44 """sha256(data) — the pre-Phase-2 blob ID formula."""
45 return long_id(hashlib.sha256(data).hexdigest())
46
47
48 def _write_old_blob(repo: pathlib.Path, data: bytes) -> str:
49 """Write a raw blob using the old formula; return its old object_id."""
50 old_id = _old_blob_id(data)
51 path = object_path(repo, old_id)
52 path.parent.mkdir(parents=True, exist_ok=True)
53 path.write_bytes(data)
54 return old_id
55
56
57 # ---------------------------------------------------------------------------
58 # Fixtures
59 # ---------------------------------------------------------------------------
60
61 BLOB_A = b"# Hello\n"
62 BLOB_B = b"print('world')\n"
63
64
65 @pytest.fixture
66 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
67 objects_dir(tmp_path).mkdir(parents=True, exist_ok=True)
68 return tmp_path
69
70
71 # ---------------------------------------------------------------------------
72 # Pass 1 — blob migration
73 # ---------------------------------------------------------------------------
74
75 def test_migrate_blob_ids_writes_muse_format_copy(repo: pathlib.Path) -> None:
76 """A raw old-format blob gets a muse-format copy at the new hash_blob path."""
77 from muse.core.migrate import migrate_blob_ids
78
79 old_id = _write_old_blob(repo, BLOB_A)
80 result = migrate_blob_ids(repo, dry_run=False)
81
82 new_id = hash_blob(BLOB_A)
83 new_path = object_path(repo, new_id)
84 assert new_path.exists(), "muse-format blob was not written"
85 assert new_path.read_bytes() == b"blob 8\0" + BLOB_A
86 assert result.id_map[old_id] == new_id
87
88
89 def test_migrate_blob_ids_dry_run_writes_nothing(repo: pathlib.Path) -> None:
90 """dry_run=True populates id_map but writes no files."""
91 from muse.core.migrate import migrate_blob_ids
92
93 old_id = _write_old_blob(repo, BLOB_A)
94 result = migrate_blob_ids(repo, dry_run=True)
95
96 new_id = hash_blob(BLOB_A)
97 new_path = object_path(repo, new_id)
98 assert not new_path.exists(), "dry_run must not write any files"
99 assert result.id_map[old_id] == new_id
100 assert result.blobs_written == 1
101
102
103 def test_migrate_blob_ids_skips_already_muse_format(repo: pathlib.Path) -> None:
104 """A blob already in muse format maps to itself and is not rewritten."""
105 from muse.core.migrate import migrate_blob_ids
106 from muse.core.object_store import write_muse_object
107
108 object_id = write_muse_object(repo, "blob", BLOB_A)
109 result = migrate_blob_ids(repo, dry_run=False)
110
111 assert result.id_map[object_id] == object_id
112 assert result.blobs_written == 0
113 assert result.blobs_skipped == 1
114
115
116 def test_migrate_blob_ids_multiple_blobs(repo: pathlib.Path) -> None:
117 """All old-format blobs are migrated; id_map covers every one."""
118 from muse.core.migrate import migrate_blob_ids
119
120 old_a = _write_old_blob(repo, BLOB_A)
121 old_b = _write_old_blob(repo, BLOB_B)
122 result = migrate_blob_ids(repo, dry_run=False)
123
124 assert result.id_map[old_a] == hash_blob(BLOB_A)
125 assert result.id_map[old_b] == hash_blob(BLOB_B)
126 assert result.blobs_written == 2
127
128
129 # ---------------------------------------------------------------------------
130 # Pass 2 — snapshot migration helpers
131 # ---------------------------------------------------------------------------
132
133 def _old_snapshot_id(manifest: _Manifest) -> str:
134 """sha256(canonical) without type prefix — the pre-Phase-2 formula."""
135 from muse.core.types import split_id
136 _SEP = "\x00"
137 parts = sorted(f"{path}{_SEP}{split_id(oid)[1]}" for path, oid in manifest.items())
138 canonical = _SEP.join(parts).encode()
139 return long_id(hashlib.sha256(canonical).hexdigest())
140
141
142 def _write_old_snapshot(
143 repo: pathlib.Path,
144 manifest: _Manifest,
145 created_at: str = "2026-05-20T16:00:00+00:00",
146 ) -> str:
147 """Write an old-format snapshot msgpack; return its old snapshot_id."""
148 old_id = _old_snapshot_id(manifest)
149 _, hex_id = old_id.split(":", 1)
150 snap_dir = snapshots_dir(repo) / "sha256"
151 snap_dir.mkdir(parents=True, exist_ok=True)
152 path = snap_dir / f"{hex_id}.msgpack"
153 record = {
154 "schema_version": 1,
155 "snapshot_id": old_id,
156 "manifest": manifest,
157 "directories": [],
158 "created_at": created_at,
159 "note": "",
160 }
161 path.write_bytes(msgpack.packb(record, use_bin_type=True))
162 return old_id
163
164
165 # ---------------------------------------------------------------------------
166 # Pass 2 — snapshot migration tests
167 # ---------------------------------------------------------------------------
168
169 def test_migrate_snapshot_ids_writes_to_object_store(repo: pathlib.Path) -> None:
170 """Migrated snapshot appears in the unified object store in muse format."""
171 from muse.core.migrate import migrate_blob_ids, migrate_snapshot_ids
172
173 old_blob_id = _write_old_blob(repo, BLOB_A)
174 blob_result = migrate_blob_ids(repo, dry_run=False)
175
176 old_snap_id = _write_old_snapshot(repo, {"hello.md": old_blob_id})
177 snap_result = migrate_snapshot_ids(repo, blob_result.id_map, dry_run=False)
178
179 new_blob_id = hash_blob(BLOB_A)
180 new_snap_id = hash_snapshot({"hello.md": new_blob_id})
181
182 assert snap_result.id_map[old_snap_id] == new_snap_id
183 result = read_muse_object(repo, new_snap_id)
184 assert result is not None
185 type_str, raw = result
186 assert type_str == "snapshot"
187 data = json.loads(raw)
188 assert data["snapshot_id"] == new_snap_id
189 assert data["manifest"]["hello.md"] == new_blob_id
190
191
192 def test_migrate_snapshot_ids_dry_run_writes_nothing(repo: pathlib.Path) -> None:
193 """dry_run=True builds id_map but writes no files."""
194 from muse.core.migrate import migrate_blob_ids, migrate_snapshot_ids
195
196 old_blob_id = _write_old_blob(repo, BLOB_A)
197 blob_result = migrate_blob_ids(repo, dry_run=True)
198
199 old_snap_id = _write_old_snapshot(repo, {"hello.md": old_blob_id})
200 snap_result = migrate_snapshot_ids(repo, blob_result.id_map, dry_run=True)
201
202 new_snap_id = hash_snapshot({"hello.md": hash_blob(BLOB_A)})
203 assert snap_result.id_map[old_snap_id] == new_snap_id
204 assert not object_path(repo, new_snap_id).exists()
205
206
207 def test_migrate_snapshot_ids_non_destructive(repo: pathlib.Path) -> None:
208 """Old snapshot msgpack is preserved after migration."""
209 from muse.core.migrate import migrate_blob_ids, migrate_snapshot_ids
210
211 old_blob_id = _write_old_blob(repo, BLOB_A)
212 blob_result = migrate_blob_ids(repo, dry_run=False)
213
214 old_snap_id = _write_old_snapshot(repo, {"hello.md": old_blob_id})
215 _, old_hex = old_snap_id.split(":", 1)
216 old_path = snapshots_dir(repo) / "sha256" / f"{old_hex}.msgpack"
217
218 migrate_snapshot_ids(repo, blob_result.id_map, dry_run=False)
219
220 assert old_path.exists(), "old snapshot msgpack must not be deleted"
221
222
223 # ---------------------------------------------------------------------------
224 # Pass 3 — commit migration helpers
225 # ---------------------------------------------------------------------------
226
227 def _old_commit_id(
228 parent_ids: list[str],
229 snapshot_id: str,
230 message: str,
231 committed_at_iso: str,
232 author: str = "",
233 signer_public_key: str = "",
234 ) -> str:
235 """sha256(canonical) without type prefix — the pre-Phase-2 commit formula."""
236 from muse.core.types import split_id
237 _SEP = "\x00"
238 parts = [
239 _SEP.join(sorted(split_id(p)[1] for p in parent_ids)),
240 split_id(snapshot_id)[1] if snapshot_id else "",
241 message,
242 committed_at_iso,
243 author,
244 signer_public_key,
245 ]
246 canonical = _SEP.join(parts).encode()
247 return long_id(hashlib.sha256(canonical).hexdigest())
248
249
250 def _write_old_commit(
251 repo: pathlib.Path,
252 snapshot_id: str,
253 message: str = "initial commit",
254 committed_at: str = "2026-05-20T16:00:00+00:00",
255 author: str = "gabriel",
256 parent_ids: list[str] | None = None,
257 ) -> str:
258 """Write an old-format commit msgpack; return its old commit_id."""
259 parents = parent_ids or []
260 old_id = _old_commit_id(
261 parent_ids=parents,
262 snapshot_id=snapshot_id,
263 message=message,
264 committed_at_iso=committed_at,
265 author=author,
266 )
267 _, hex_id = old_id.split(":", 1)
268 cmt_dir = commits_dir(repo) / "sha256"
269 cmt_dir.mkdir(parents=True, exist_ok=True)
270 record = {
271 "commit_id": old_id,
272 "branch": "main",
273 "snapshot_id": snapshot_id,
274 "message": message,
275 "committed_at": committed_at,
276 "parent_commit_id": parents[0] if parents else None,
277 "parent2_commit_id": parents[1] if len(parents) > 1 else None,
278 "author": author,
279 "signature": "",
280 "signer_public_key": "",
281 "format_version": 8,
282 "metadata": {},
283 "structured_delta": None,
284 "sem_ver_bump": "none",
285 "breaking_changes": [],
286 "agent_id": "claude-code",
287 "model_id": "claude-sonnet-4-6",
288 "toolchain_id": "",
289 "prompt_hash": "",
290 "reviewed_by": [],
291 "test_runs": 0,
292 "labels": [],
293 "status": "",
294 "notes": [],
295 "score": None,
296 }
297 path = cmt_dir / f"{hex_id}.msgpack"
298 path.write_bytes(msgpack.packb(record, use_bin_type=True))
299 return old_id
300
301
302 # ---------------------------------------------------------------------------
303 # Pass 3 — commit migration tests
304 # ---------------------------------------------------------------------------
305
306 def test_migrate_commit_ids_writes_to_object_store(repo: pathlib.Path) -> None:
307 """Migrated commit appears in the unified object store with updated snapshot_id."""
308 from muse.core.migrate import migrate_blob_ids, migrate_snapshot_ids, migrate_commit_ids
309
310 old_blob_id = _write_old_blob(repo, BLOB_A)
311 blob_result = migrate_blob_ids(repo, dry_run=False)
312
313 old_snap_id = _write_old_snapshot(repo, {"hello.md": old_blob_id})
314 snap_result = migrate_snapshot_ids(repo, blob_result.id_map, dry_run=False)
315
316 old_cmt_id = _write_old_commit(repo, snapshot_id=old_snap_id)
317 cmt_result = migrate_commit_ids(repo, snap_result.id_map, dry_run=False)
318
319 new_blob_id = hash_blob(BLOB_A)
320 new_snap_id = hash_snapshot({"hello.md": new_blob_id})
321 new_cmt_id = hash_commit(
322 parent_ids=[],
323 snapshot_id=new_snap_id,
324 message="initial commit",
325 committed_at_iso="2026-05-20T16:00:00+00:00",
326 author="gabriel",
327 )
328
329 assert cmt_result.id_map[old_cmt_id] == new_cmt_id
330 result = read_muse_object(repo, new_cmt_id)
331 assert result is not None
332 type_str, raw = result
333 assert type_str == "commit"
334 data = json.loads(raw)
335 assert data["commit_id"] == new_cmt_id
336 assert data["snapshot_id"] == new_snap_id
337
338
339 def test_migrate_commit_ids_non_destructive(repo: pathlib.Path) -> None:
340 """Old commit msgpack is preserved after migration."""
341 from muse.core.migrate import migrate_blob_ids, migrate_snapshot_ids, migrate_commit_ids
342
343 old_blob_id = _write_old_blob(repo, BLOB_A)
344 blob_result = migrate_blob_ids(repo, dry_run=False)
345
346 old_snap_id = _write_old_snapshot(repo, {"hello.md": old_blob_id})
347 snap_result = migrate_snapshot_ids(repo, blob_result.id_map, dry_run=False)
348
349 old_cmt_id = _write_old_commit(repo, snapshot_id=old_snap_id)
350 _, old_hex = old_cmt_id.split(":", 1)
351 old_path = commits_dir(repo) / "sha256" / f"{old_hex}.msgpack"
352
353 migrate_commit_ids(repo, snap_result.id_map, dry_run=False)
354
355 assert old_path.exists(), "old commit msgpack must not be deleted"
356
357
358 def test_migrate_commit_ids_dry_run_writes_nothing(repo: pathlib.Path) -> None:
359 """dry_run=True builds id_map but writes no files."""
360 from muse.core.migrate import migrate_blob_ids, migrate_snapshot_ids, migrate_commit_ids
361
362 old_blob_id = _write_old_blob(repo, BLOB_A)
363 blob_result = migrate_blob_ids(repo, dry_run=True)
364 old_snap_id = _write_old_snapshot(repo, {"hello.md": old_blob_id})
365 snap_result = migrate_snapshot_ids(repo, blob_result.id_map, dry_run=True)
366 old_cmt_id = _write_old_commit(repo, snapshot_id=old_snap_id)
367 cmt_result = migrate_commit_ids(repo, snap_result.id_map, dry_run=True)
368
369 new_snap_id = hash_snapshot({"hello.md": hash_blob(BLOB_A)})
370 new_cmt_id = hash_commit(
371 parent_ids=[],
372 snapshot_id=new_snap_id,
373 message="initial commit",
374 committed_at_iso="2026-05-20T16:00:00+00:00",
375 author="gabriel",
376 )
377 assert cmt_result.id_map[old_cmt_id] == new_cmt_id
378 assert not object_path(repo, new_cmt_id).exists()