gabriel / muse public
test_unified_object_store.py python
268 lines 10.3 KB
Raw
1 """Unified object store — TDD from first principles.
2
3 Every object (commit, snapshot, blob) lives in one store:
4 .muse/objects/sha256/<2-hex-prefix>/<remaining-hex>
5
6 On-disk format (idiomatic with Git):
7 "<type> <size>\0<payload>"
8
9 The full string is hashed to produce the object ID — the type is part of
10 the object's identity, not a separate framing layer.
11 """
12
13 from __future__ import annotations
14
15 import json
16 import pathlib
17
18 import pytest
19
20 from muse.core.ids import hash_blob, hash_snapshot, hash_commit
21 from muse.core.object_store import objects_dir, object_path, write_muse_object, read_muse_object, write_object, read_object
22 from muse.core.commits import (
23 CommitRecord,
24 read_commit,
25 write_commit,
26 )
27 from muse.core.snapshots import (
28 SnapshotRecord,
29 read_snapshot,
30 write_snapshot,
31 )
32
33 _JsonVal = str | int | None | list[str]
34 _DataDict = dict[str, _JsonVal]
35
36 # ---------------------------------------------------------------------------
37 # Fixtures
38 # ---------------------------------------------------------------------------
39
40 @pytest.fixture
41 def repo(tmp_path: pathlib.Path) -> pathlib.Path:
42 objects_dir(tmp_path).mkdir(parents=True, exist_ok=True)
43 return tmp_path
44
45 BLOB_CONTENT = b"# Hello\n"
46
47 @pytest.fixture
48 def blob_id() -> str:
49 return hash_blob(BLOB_CONTENT)
50
51 @pytest.fixture
52 def snapshot_id(blob_id: str) -> str:
53 return hash_snapshot({"hello.md": blob_id})
54
55 @pytest.fixture
56 def snapshot_data(blob_id: str, snapshot_id: str) -> _DataDict:
57 return {
58 "schema_version": 1,
59 "snapshot_id": snapshot_id,
60 "manifest": {"hello.md": blob_id},
61 "directories": [],
62 "created_at": "2026-05-20T16:00:00+00:00",
63 "note": "",
64 }
65
66 @pytest.fixture
67 def commit_id(snapshot_id: str) -> str:
68 return hash_commit(
69 parent_ids=[],
70 snapshot_id=snapshot_id,
71 message="initial commit",
72 committed_at_iso="2026-05-20T16:00:00+00:00",
73 author="gabriel",
74 )
75
76 @pytest.fixture
77 def commit_data(commit_id: str, snapshot_id: str) -> _DataDict:
78 return {
79 "commit_id": commit_id,
80 "branch": "main",
81 "snapshot_id": snapshot_id,
82 "message": "initial commit",
83 "committed_at": "2026-05-20T16:00:00+00:00",
84 "parent_commit_id": None,
85 "parent2_commit_id": None,
86 "author": "gabriel",
87 "metadata": {},
88 "structured_delta": None,
89 "sem_ver_bump": "none",
90 "breaking_changes": [],
91 "agent_id": "claude-code",
92 "model_id": "claude-sonnet-4-6",
93 "toolchain_id": "",
94 "prompt_hash": "",
95 "signature": "",
96 "signer_public_key": "",
97 "signer_key_id": "",
98 "reviewed_by": [],
99 "test_runs": 0,
100 "labels": [],
101 "status": "",
102 "notes": [],
103 "score": None,
104 }
105
106 # ---------------------------------------------------------------------------
107 # Test 0: hash_blob includes type in the hash (Git-idiomatic)
108 # ---------------------------------------------------------------------------
109
110 def test_hash_blob_includes_type() -> None:
111 import hashlib
112 data = b"# Hello\n"
113 header = f"blob {len(data)}\0".encode()
114 expected = "sha256:" + hashlib.sha256(header + data).hexdigest()
115 assert hash_blob(data) == expected
116
117 # ---------------------------------------------------------------------------
118 # Test 0b: hash_snapshot includes type in the hash (Git-idiomatic)
119 # ---------------------------------------------------------------------------
120
121 def test_hash_snapshot_includes_type(blob_id: str) -> None:
122 import hashlib
123 from muse.core.types import split_id
124 manifest = {"hello.md": blob_id}
125 parts = sorted(f"{path}\x00{split_id(oid)[1]}" for path, oid in manifest.items())
126 canonical = "\x00".join(parts).encode()
127 header = f"snapshot {len(canonical)}\0".encode()
128 expected = "sha256:" + hashlib.sha256(header + canonical).hexdigest()
129 assert hash_snapshot(manifest) == expected
130
131 # ---------------------------------------------------------------------------
132 # Test 0c: hash_commit includes type in the hash (Git-idiomatic)
133 # ---------------------------------------------------------------------------
134
135 def test_hash_commit_includes_type(snapshot_id: str) -> None:
136 import hashlib
137 from muse.core.types import split_id
138 parts = [
139 "", # no parents
140 split_id(snapshot_id)[1],
141 "initial commit",
142 "2026-05-20T16:00:00+00:00",
143 "gabriel",
144 "", # no signer_public_key
145 ]
146 canonical = "\x00".join(parts).encode()
147 header = f"commit {len(canonical)}\0".encode()
148 expected = "sha256:" + hashlib.sha256(header + canonical).hexdigest()
149 assert hash_commit(
150 parent_ids=[],
151 snapshot_id=snapshot_id,
152 message="initial commit",
153 committed_at_iso="2026-05-20T16:00:00+00:00",
154 author="gabriel",
155 ) == expected
156
157 # ---------------------------------------------------------------------------
158 # Test 1: blob round-trip
159 # ---------------------------------------------------------------------------
160
161 def test_write_read_muse_object_blob(repo: pathlib.Path) -> None:
162 object_id = write_muse_object(repo, "blob", BLOB_CONTENT)
163 type_str, payload = read_muse_object(repo, object_id)
164 assert type_str == "blob"
165 assert payload == BLOB_CONTENT
166
167 # ---------------------------------------------------------------------------
168 # Test 2: snapshot round-trip
169 # ---------------------------------------------------------------------------
170
171 def test_write_read_muse_object_snapshot(repo: pathlib.Path, snapshot_data: _DataDict) -> None:
172 payload = json.dumps(snapshot_data, separators=(",", ":")).encode()
173 object_id = write_muse_object(repo, "snapshot", payload)
174 type_str, raw = read_muse_object(repo, object_id)
175 assert type_str == "snapshot"
176 assert json.loads(raw) == snapshot_data
177
178 # ---------------------------------------------------------------------------
179 # Test 3: commit round-trip
180 # ---------------------------------------------------------------------------
181
182 def test_write_read_muse_object_commit(repo: pathlib.Path, commit_data: _DataDict) -> None:
183 payload = json.dumps(commit_data, separators=(",", ":")).encode()
184 object_id = write_muse_object(repo, "commit", payload)
185 type_str, raw = read_muse_object(repo, object_id)
186 assert type_str == "commit"
187 assert json.loads(raw) == commit_data
188
189 # ---------------------------------------------------------------------------
190 # Phase 3 — Test 4: read_commit falls back to objects/sha256/ (muse format)
191 # ---------------------------------------------------------------------------
192
193 def test_read_commit_falls_back_to_object_store(repo: pathlib.Path, commit_id: str, commit_data: _DataDict) -> None:
194 payload = json.dumps(commit_data, separators=(",", ":")).encode()
195 obj_path = object_path(repo, commit_id)
196 obj_path.parent.mkdir(parents=True, exist_ok=True)
197 obj_path.write_bytes(f"commit {len(payload)}\0".encode() + payload)
198
199 record = read_commit(repo, commit_id)
200 assert record is not None
201 assert record.commit_id == commit_id
202
203 # ---------------------------------------------------------------------------
204 # Phase 3 — Test 5: write_commit dual-writes to objects/sha256/
205 # ---------------------------------------------------------------------------
206
207 def test_write_commit_lands_in_object_store(repo: pathlib.Path, commit_id: str, commit_data: _DataDict) -> None:
208 record = CommitRecord.from_dict(commit_data)
209 write_commit(repo, record, skip_parent_check=True)
210
211 result = read_muse_object(repo, commit_id)
212 assert result is not None
213 type_str, raw = result
214 assert type_str == "commit"
215 assert json.loads(raw)["commit_id"] == commit_id
216
217 # ---------------------------------------------------------------------------
218 # Phase 4 — Test 6: read_snapshot falls back to objects/sha256/
219 # ---------------------------------------------------------------------------
220
221 def test_read_snapshot_falls_back_to_object_store(repo: pathlib.Path, snapshot_id: str, snapshot_data: _DataDict) -> None:
222 payload = json.dumps(snapshot_data, separators=(",", ":")).encode()
223 obj_path = object_path(repo, snapshot_id)
224 obj_path.parent.mkdir(parents=True, exist_ok=True)
225 obj_path.write_bytes(f"snapshot {len(payload)}\0".encode() + payload)
226
227 record = read_snapshot(repo, snapshot_id)
228 assert record is not None
229 assert record.snapshot_id == snapshot_id
230
231 # ---------------------------------------------------------------------------
232 # Phase 4 — Test 7: write_snapshot dual-writes to objects/sha256/
233 # ---------------------------------------------------------------------------
234
235 def test_write_snapshot_lands_in_object_store(repo: pathlib.Path, snapshot_id: str, snapshot_data: _DataDict) -> None:
236 record = SnapshotRecord.from_dict(snapshot_data)
237 write_snapshot(repo, record)
238
239 result = read_muse_object(repo, snapshot_id)
240 assert result is not None
241 type_str, raw = result
242 assert type_str == "snapshot"
243 assert json.loads(raw)["snapshot_id"] == snapshot_id
244
245 # ---------------------------------------------------------------------------
246 # Phase 5 — Test 8: hash_blob and write_muse_object produce the same ID
247 # ---------------------------------------------------------------------------
248
249 def test_blob_id_consistent_with_object_store(repo: pathlib.Path) -> None:
250 object_id = write_muse_object(repo, "blob", BLOB_CONTENT)
251 assert object_id == hash_blob(BLOB_CONTENT)
252
253 # ---------------------------------------------------------------------------
254 # Phase 5 — Test 9: write_object accepts hash_blob-derived IDs
255 # ---------------------------------------------------------------------------
256
257 def test_write_object_accepts_hash_blob_id(repo: pathlib.Path) -> None:
258 object_id = hash_blob(BLOB_CONTENT)
259 write_object(repo, object_id, BLOB_CONTENT)
260
261 # ---------------------------------------------------------------------------
262 # Phase 5 — Test 10: read_object strips the muse header
263 # ---------------------------------------------------------------------------
264
265 def test_read_object_strips_muse_header(repo: pathlib.Path) -> None:
266 object_id = write_muse_object(repo, "blob", BLOB_CONTENT)
267 content = read_object(repo, object_id)
268 assert content == BLOB_CONTENT
File History 1 commit