"""Unified object store — TDD from first principles. Every object (commit, snapshot, blob) lives in one store: .muse/objects/sha256/<2-hex-prefix>/ On-disk format (idiomatic with Git): " \0" The full string is hashed to produce the object ID — the type is part of the object's identity, not a separate framing layer. """ from __future__ import annotations import json import pathlib import pytest from muse.core.ids import hash_blob, hash_snapshot, hash_commit from muse.core.object_store import objects_dir, object_path, write_muse_object, read_muse_object, write_object, read_object from muse.core.commits import ( CommitRecord, read_commit, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, read_snapshot, write_snapshot, ) _JsonVal = str | int | None | list[str] _DataDict = dict[str, _JsonVal] # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture def repo(tmp_path: pathlib.Path) -> pathlib.Path: objects_dir(tmp_path).mkdir(parents=True, exist_ok=True) return tmp_path BLOB_CONTENT = b"# Hello\n" @pytest.fixture def blob_id() -> str: return hash_blob(BLOB_CONTENT) @pytest.fixture def snapshot_id(blob_id: str) -> str: return hash_snapshot({"hello.md": blob_id}) @pytest.fixture def snapshot_data(blob_id: str, snapshot_id: str) -> _DataDict: return { "schema_version": 1, "snapshot_id": snapshot_id, "manifest": {"hello.md": blob_id}, "directories": [], "created_at": "2026-05-20T16:00:00+00:00", "note": "", } @pytest.fixture def commit_id(snapshot_id: str) -> str: return hash_commit( parent_ids=[], snapshot_id=snapshot_id, message="initial commit", committed_at_iso="2026-05-20T16:00:00+00:00", author="gabriel", ) @pytest.fixture def commit_data(commit_id: str, snapshot_id: str) -> _DataDict: return { "commit_id": commit_id, "branch": "main", "snapshot_id": snapshot_id, "message": "initial commit", "committed_at": "2026-05-20T16:00:00+00:00", "parent_commit_id": None, "parent2_commit_id": None, "author": "gabriel", "metadata": {}, "structured_delta": None, "sem_ver_bump": "none", "breaking_changes": [], "agent_id": "claude-code", "model_id": "claude-sonnet-4-6", "toolchain_id": "", "prompt_hash": "", "signature": "", "signer_public_key": "", "signer_key_id": "", "reviewed_by": [], "test_runs": 0, "labels": [], "status": "", "notes": [], "score": None, } # --------------------------------------------------------------------------- # Test 0: hash_blob includes type in the hash (Git-idiomatic) # --------------------------------------------------------------------------- def test_hash_blob_includes_type() -> None: import hashlib data = b"# Hello\n" header = f"blob {len(data)}\0".encode() expected = "sha256:" + hashlib.sha256(header + data).hexdigest() assert hash_blob(data) == expected # --------------------------------------------------------------------------- # Test 0b: hash_snapshot includes type in the hash (Git-idiomatic) # --------------------------------------------------------------------------- def test_hash_snapshot_includes_type(blob_id: str) -> None: import hashlib from muse.core.types import split_id manifest = {"hello.md": blob_id} parts = sorted(f"{path}\x00{split_id(oid)[1]}" for path, oid in manifest.items()) canonical = "\x00".join(parts).encode() header = f"snapshot {len(canonical)}\0".encode() expected = "sha256:" + hashlib.sha256(header + canonical).hexdigest() assert hash_snapshot(manifest) == expected # --------------------------------------------------------------------------- # Test 0c: hash_commit includes type in the hash (Git-idiomatic) # --------------------------------------------------------------------------- def test_hash_commit_includes_type(snapshot_id: str) -> None: import hashlib from muse.core.types import split_id parts = [ "", # no parents split_id(snapshot_id)[1], "initial commit", "2026-05-20T16:00:00+00:00", "gabriel", "", # no signer_public_key ] canonical = "\x00".join(parts).encode() header = f"commit {len(canonical)}\0".encode() expected = "sha256:" + hashlib.sha256(header + canonical).hexdigest() assert hash_commit( parent_ids=[], snapshot_id=snapshot_id, message="initial commit", committed_at_iso="2026-05-20T16:00:00+00:00", author="gabriel", ) == expected # --------------------------------------------------------------------------- # Test 1: blob round-trip # --------------------------------------------------------------------------- def test_write_read_muse_object_blob(repo: pathlib.Path) -> None: object_id = write_muse_object(repo, "blob", BLOB_CONTENT) type_str, payload = read_muse_object(repo, object_id) assert type_str == "blob" assert payload == BLOB_CONTENT # --------------------------------------------------------------------------- # Test 2: snapshot round-trip # --------------------------------------------------------------------------- def test_write_read_muse_object_snapshot(repo: pathlib.Path, snapshot_data: _DataDict) -> None: payload = json.dumps(snapshot_data, separators=(",", ":")).encode() object_id = write_muse_object(repo, "snapshot", payload) type_str, raw = read_muse_object(repo, object_id) assert type_str == "snapshot" assert json.loads(raw) == snapshot_data # --------------------------------------------------------------------------- # Test 3: commit round-trip # --------------------------------------------------------------------------- def test_write_read_muse_object_commit(repo: pathlib.Path, commit_data: _DataDict) -> None: payload = json.dumps(commit_data, separators=(",", ":")).encode() object_id = write_muse_object(repo, "commit", payload) type_str, raw = read_muse_object(repo, object_id) assert type_str == "commit" assert json.loads(raw) == commit_data # --------------------------------------------------------------------------- # Phase 3 — Test 4: read_commit falls back to objects/sha256/ (muse format) # --------------------------------------------------------------------------- def test_read_commit_falls_back_to_object_store(repo: pathlib.Path, commit_id: str, commit_data: _DataDict) -> None: payload = json.dumps(commit_data, separators=(",", ":")).encode() obj_path = object_path(repo, commit_id) obj_path.parent.mkdir(parents=True, exist_ok=True) obj_path.write_bytes(f"commit {len(payload)}\0".encode() + payload) record = read_commit(repo, commit_id) assert record is not None assert record.commit_id == commit_id # --------------------------------------------------------------------------- # Phase 3 — Test 5: write_commit dual-writes to objects/sha256/ # --------------------------------------------------------------------------- def test_write_commit_lands_in_object_store(repo: pathlib.Path, commit_id: str, commit_data: _DataDict) -> None: record = CommitRecord.from_dict(commit_data) write_commit(repo, record, skip_parent_check=True) result = read_muse_object(repo, commit_id) assert result is not None type_str, raw = result assert type_str == "commit" assert json.loads(raw)["commit_id"] == commit_id # --------------------------------------------------------------------------- # Phase 4 — Test 6: read_snapshot falls back to objects/sha256/ # --------------------------------------------------------------------------- def test_read_snapshot_falls_back_to_object_store(repo: pathlib.Path, snapshot_id: str, snapshot_data: _DataDict) -> None: payload = json.dumps(snapshot_data, separators=(",", ":")).encode() obj_path = object_path(repo, snapshot_id) obj_path.parent.mkdir(parents=True, exist_ok=True) obj_path.write_bytes(f"snapshot {len(payload)}\0".encode() + payload) record = read_snapshot(repo, snapshot_id) assert record is not None assert record.snapshot_id == snapshot_id # --------------------------------------------------------------------------- # Phase 4 — Test 7: write_snapshot dual-writes to objects/sha256/ # --------------------------------------------------------------------------- def test_write_snapshot_lands_in_object_store(repo: pathlib.Path, snapshot_id: str, snapshot_data: _DataDict) -> None: record = SnapshotRecord.from_dict(snapshot_data) write_snapshot(repo, record) result = read_muse_object(repo, snapshot_id) assert result is not None type_str, raw = result assert type_str == "snapshot" assert json.loads(raw)["snapshot_id"] == snapshot_id # --------------------------------------------------------------------------- # Phase 5 — Test 8: hash_blob and write_muse_object produce the same ID # --------------------------------------------------------------------------- def test_blob_id_consistent_with_object_store(repo: pathlib.Path) -> None: object_id = write_muse_object(repo, "blob", BLOB_CONTENT) assert object_id == hash_blob(BLOB_CONTENT) # --------------------------------------------------------------------------- # Phase 5 — Test 9: write_object accepts hash_blob-derived IDs # --------------------------------------------------------------------------- def test_write_object_accepts_hash_blob_id(repo: pathlib.Path) -> None: object_id = hash_blob(BLOB_CONTENT) write_object(repo, object_id, BLOB_CONTENT) # --------------------------------------------------------------------------- # Phase 5 — Test 10: read_object strips the muse header # --------------------------------------------------------------------------- def test_read_object_strips_muse_header(repo: pathlib.Path) -> None: object_id = write_muse_object(repo, "blob", BLOB_CONTENT) content = read_object(repo, object_id) assert content == BLOB_CONTENT