test_unified_object_store.py
python
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
6 days ago
| 1 | """Unified object store — TDD from first principles. |
| 2 | |
| 3 | Every object (commit, snapshot, blob) lives in one store: |
| 4 | .muse/objects/sha256/<2-hex-prefix>/<remaining-hex> |
| 5 | |
| 6 | On-disk format (idiomatic with Git): |
| 7 | "<type> <size>\0<payload>" |
| 8 | |
| 9 | The full string is hashed to produce the object ID — the type is part of |
| 10 | the object's identity, not a separate framing layer. |
| 11 | """ |
| 12 | |
| 13 | from __future__ import annotations |
| 14 | |
| 15 | import json |
| 16 | import pathlib |
| 17 | |
| 18 | import pytest |
| 19 | |
| 20 | from muse.core.ids import hash_blob, hash_snapshot, hash_commit |
| 21 | from muse.core.object_store import objects_dir, object_path, write_muse_object, read_muse_object, write_object, read_object |
| 22 | from muse.core.commits import ( |
| 23 | CommitRecord, |
| 24 | read_commit, |
| 25 | write_commit, |
| 26 | ) |
| 27 | from muse.core.snapshots import ( |
| 28 | SnapshotRecord, |
| 29 | read_snapshot, |
| 30 | write_snapshot, |
| 31 | ) |
| 32 | |
| 33 | _JsonVal = str | int | None | list[str] |
| 34 | _DataDict = dict[str, _JsonVal] |
| 35 | |
| 36 | # --------------------------------------------------------------------------- |
| 37 | # Fixtures |
| 38 | # --------------------------------------------------------------------------- |
| 39 | |
| 40 | @pytest.fixture |
| 41 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 42 | objects_dir(tmp_path).mkdir(parents=True, exist_ok=True) |
| 43 | return tmp_path |
| 44 | |
| 45 | BLOB_CONTENT = b"# Hello\n" |
| 46 | |
| 47 | @pytest.fixture |
| 48 | def blob_id() -> str: |
| 49 | return hash_blob(BLOB_CONTENT) |
| 50 | |
| 51 | @pytest.fixture |
| 52 | def snapshot_id(blob_id: str) -> str: |
| 53 | return hash_snapshot({"hello.md": blob_id}) |
| 54 | |
| 55 | @pytest.fixture |
| 56 | def snapshot_data(blob_id: str, snapshot_id: str) -> _DataDict: |
| 57 | return { |
| 58 | "schema_version": 1, |
| 59 | "snapshot_id": snapshot_id, |
| 60 | "manifest": {"hello.md": blob_id}, |
| 61 | "directories": [], |
| 62 | "created_at": "2026-05-20T16:00:00+00:00", |
| 63 | "note": "", |
| 64 | } |
| 65 | |
| 66 | @pytest.fixture |
| 67 | def commit_id(snapshot_id: str) -> str: |
| 68 | return hash_commit( |
| 69 | parent_ids=[], |
| 70 | snapshot_id=snapshot_id, |
| 71 | message="initial commit", |
| 72 | committed_at_iso="2026-05-20T16:00:00+00:00", |
| 73 | author="gabriel", |
| 74 | ) |
| 75 | |
| 76 | @pytest.fixture |
| 77 | def commit_data(commit_id: str, snapshot_id: str) -> _DataDict: |
| 78 | return { |
| 79 | "commit_id": commit_id, |
| 80 | "branch": "main", |
| 81 | "snapshot_id": snapshot_id, |
| 82 | "message": "initial commit", |
| 83 | "committed_at": "2026-05-20T16:00:00+00:00", |
| 84 | "parent_commit_id": None, |
| 85 | "parent2_commit_id": None, |
| 86 | "author": "gabriel", |
| 87 | "metadata": {}, |
| 88 | "structured_delta": None, |
| 89 | "sem_ver_bump": "none", |
| 90 | "breaking_changes": [], |
| 91 | "agent_id": "claude-code", |
| 92 | "model_id": "claude-sonnet-4-6", |
| 93 | "toolchain_id": "", |
| 94 | "prompt_hash": "", |
| 95 | "signature": "", |
| 96 | "signer_public_key": "", |
| 97 | "signer_key_id": "", |
| 98 | "reviewed_by": [], |
| 99 | "test_runs": 0, |
| 100 | "labels": [], |
| 101 | "status": "", |
| 102 | "notes": [], |
| 103 | "score": None, |
| 104 | } |
| 105 | |
| 106 | # --------------------------------------------------------------------------- |
| 107 | # Test 0: hash_blob includes type in the hash (Git-idiomatic) |
| 108 | # --------------------------------------------------------------------------- |
| 109 | |
| 110 | def test_hash_blob_includes_type() -> None: |
| 111 | import hashlib |
| 112 | data = b"# Hello\n" |
| 113 | header = f"blob {len(data)}\0".encode() |
| 114 | expected = "sha256:" + hashlib.sha256(header + data).hexdigest() |
| 115 | assert hash_blob(data) == expected |
| 116 | |
| 117 | # --------------------------------------------------------------------------- |
| 118 | # Test 0b: hash_snapshot includes type in the hash (Git-idiomatic) |
| 119 | # --------------------------------------------------------------------------- |
| 120 | |
| 121 | def test_hash_snapshot_includes_type(blob_id: str) -> None: |
| 122 | import hashlib |
| 123 | from muse.core.types import split_id |
| 124 | manifest = {"hello.md": blob_id} |
| 125 | parts = sorted(f"{path}\x00{split_id(oid)[1]}" for path, oid in manifest.items()) |
| 126 | canonical = "\x00".join(parts).encode() |
| 127 | header = f"snapshot {len(canonical)}\0".encode() |
| 128 | expected = "sha256:" + hashlib.sha256(header + canonical).hexdigest() |
| 129 | assert hash_snapshot(manifest) == expected |
| 130 | |
| 131 | # --------------------------------------------------------------------------- |
| 132 | # Test 0c: hash_commit includes type in the hash (Git-idiomatic) |
| 133 | # --------------------------------------------------------------------------- |
| 134 | |
| 135 | def test_hash_commit_includes_type(snapshot_id: str) -> None: |
| 136 | import hashlib |
| 137 | from muse.core.types import split_id |
| 138 | parts = [ |
| 139 | "", # no parents |
| 140 | split_id(snapshot_id)[1], |
| 141 | "initial commit", |
| 142 | "2026-05-20T16:00:00+00:00", |
| 143 | "gabriel", |
| 144 | "", # no signer_public_key |
| 145 | ] |
| 146 | canonical = "\x00".join(parts).encode() |
| 147 | header = f"commit {len(canonical)}\0".encode() |
| 148 | expected = "sha256:" + hashlib.sha256(header + canonical).hexdigest() |
| 149 | assert hash_commit( |
| 150 | parent_ids=[], |
| 151 | snapshot_id=snapshot_id, |
| 152 | message="initial commit", |
| 153 | committed_at_iso="2026-05-20T16:00:00+00:00", |
| 154 | author="gabriel", |
| 155 | ) == expected |
| 156 | |
| 157 | # --------------------------------------------------------------------------- |
| 158 | # Test 1: blob round-trip |
| 159 | # --------------------------------------------------------------------------- |
| 160 | |
| 161 | def test_write_read_muse_object_blob(repo: pathlib.Path) -> None: |
| 162 | object_id = write_muse_object(repo, "blob", BLOB_CONTENT) |
| 163 | type_str, payload = read_muse_object(repo, object_id) |
| 164 | assert type_str == "blob" |
| 165 | assert payload == BLOB_CONTENT |
| 166 | |
| 167 | # --------------------------------------------------------------------------- |
| 168 | # Test 2: snapshot round-trip |
| 169 | # --------------------------------------------------------------------------- |
| 170 | |
| 171 | def test_write_read_muse_object_snapshot(repo: pathlib.Path, snapshot_data: _DataDict) -> None: |
| 172 | payload = json.dumps(snapshot_data, separators=(",", ":")).encode() |
| 173 | object_id = write_muse_object(repo, "snapshot", payload) |
| 174 | type_str, raw = read_muse_object(repo, object_id) |
| 175 | assert type_str == "snapshot" |
| 176 | assert json.loads(raw) == snapshot_data |
| 177 | |
| 178 | # --------------------------------------------------------------------------- |
| 179 | # Test 3: commit round-trip |
| 180 | # --------------------------------------------------------------------------- |
| 181 | |
| 182 | def test_write_read_muse_object_commit(repo: pathlib.Path, commit_data: _DataDict) -> None: |
| 183 | payload = json.dumps(commit_data, separators=(",", ":")).encode() |
| 184 | object_id = write_muse_object(repo, "commit", payload) |
| 185 | type_str, raw = read_muse_object(repo, object_id) |
| 186 | assert type_str == "commit" |
| 187 | assert json.loads(raw) == commit_data |
| 188 | |
| 189 | # --------------------------------------------------------------------------- |
| 190 | # Phase 3 — Test 4: read_commit falls back to objects/sha256/ (muse format) |
| 191 | # --------------------------------------------------------------------------- |
| 192 | |
| 193 | def test_read_commit_falls_back_to_object_store(repo: pathlib.Path, commit_id: str, commit_data: _DataDict) -> None: |
| 194 | payload = json.dumps(commit_data, separators=(",", ":")).encode() |
| 195 | obj_path = object_path(repo, commit_id) |
| 196 | obj_path.parent.mkdir(parents=True, exist_ok=True) |
| 197 | obj_path.write_bytes(f"commit {len(payload)}\0".encode() + payload) |
| 198 | |
| 199 | record = read_commit(repo, commit_id) |
| 200 | assert record is not None |
| 201 | assert record.commit_id == commit_id |
| 202 | |
| 203 | # --------------------------------------------------------------------------- |
| 204 | # Phase 3 — Test 5: write_commit dual-writes to objects/sha256/ |
| 205 | # --------------------------------------------------------------------------- |
| 206 | |
| 207 | def test_write_commit_lands_in_object_store(repo: pathlib.Path, commit_id: str, commit_data: _DataDict) -> None: |
| 208 | record = CommitRecord.from_dict(commit_data) |
| 209 | write_commit(repo, record, skip_parent_check=True) |
| 210 | |
| 211 | result = read_muse_object(repo, commit_id) |
| 212 | assert result is not None |
| 213 | type_str, raw = result |
| 214 | assert type_str == "commit" |
| 215 | assert json.loads(raw)["commit_id"] == commit_id |
| 216 | |
| 217 | # --------------------------------------------------------------------------- |
| 218 | # Phase 4 — Test 6: read_snapshot falls back to objects/sha256/ |
| 219 | # --------------------------------------------------------------------------- |
| 220 | |
| 221 | def test_read_snapshot_falls_back_to_object_store(repo: pathlib.Path, snapshot_id: str, snapshot_data: _DataDict) -> None: |
| 222 | payload = json.dumps(snapshot_data, separators=(",", ":")).encode() |
| 223 | obj_path = object_path(repo, snapshot_id) |
| 224 | obj_path.parent.mkdir(parents=True, exist_ok=True) |
| 225 | obj_path.write_bytes(f"snapshot {len(payload)}\0".encode() + payload) |
| 226 | |
| 227 | record = read_snapshot(repo, snapshot_id) |
| 228 | assert record is not None |
| 229 | assert record.snapshot_id == snapshot_id |
| 230 | |
| 231 | # --------------------------------------------------------------------------- |
| 232 | # Phase 4 — Test 7: write_snapshot dual-writes to objects/sha256/ |
| 233 | # --------------------------------------------------------------------------- |
| 234 | |
| 235 | def test_write_snapshot_lands_in_object_store(repo: pathlib.Path, snapshot_id: str, snapshot_data: _DataDict) -> None: |
| 236 | record = SnapshotRecord.from_dict(snapshot_data) |
| 237 | write_snapshot(repo, record) |
| 238 | |
| 239 | result = read_muse_object(repo, snapshot_id) |
| 240 | assert result is not None |
| 241 | type_str, raw = result |
| 242 | assert type_str == "snapshot" |
| 243 | assert json.loads(raw)["snapshot_id"] == snapshot_id |
| 244 | |
| 245 | # --------------------------------------------------------------------------- |
| 246 | # Phase 5 — Test 8: hash_blob and write_muse_object produce the same ID |
| 247 | # --------------------------------------------------------------------------- |
| 248 | |
| 249 | def test_blob_id_consistent_with_object_store(repo: pathlib.Path) -> None: |
| 250 | object_id = write_muse_object(repo, "blob", BLOB_CONTENT) |
| 251 | assert object_id == hash_blob(BLOB_CONTENT) |
| 252 | |
| 253 | # --------------------------------------------------------------------------- |
| 254 | # Phase 5 — Test 9: write_object accepts hash_blob-derived IDs |
| 255 | # --------------------------------------------------------------------------- |
| 256 | |
| 257 | def test_write_object_accepts_hash_blob_id(repo: pathlib.Path) -> None: |
| 258 | object_id = hash_blob(BLOB_CONTENT) |
| 259 | write_object(repo, object_id, BLOB_CONTENT) |
| 260 | |
| 261 | # --------------------------------------------------------------------------- |
| 262 | # Phase 5 — Test 10: read_object strips the muse header |
| 263 | # --------------------------------------------------------------------------- |
| 264 | |
| 265 | def test_read_object_strips_muse_header(repo: pathlib.Path) -> None: |
| 266 | object_id = write_muse_object(repo, "blob", BLOB_CONTENT) |
| 267 | content = read_object(repo, object_id) |
| 268 | assert content == BLOB_CONTENT |
File History
1 commit
sha256:d11a87833d5fad6059b7662844bf5448a8911a17cce7a51811f71ad394f248eb
bump to v0.2.0rc13
Human
patch
6 days ago