gabriel / muse public
test_object_store_algo_layout.py python
244 lines 8.7 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Tests: algorithm-directory layout for the object store.
2
3 Canonical layout::
4
5 .muse/objects/sha256/<prefix>/<remainder>
6
7 Also covers ``iter_stored_objects`` — the single canonical walker that
8 replaces the six inline ``iterdir`` loops scattered across gc, maintenance,
9 prune, count_objects, verify_object, and object_store itself.
10 """
11
12 from __future__ import annotations
13
14 import pathlib
15
16 import pytest
17
18 from muse.core.types import blob_id, DEFAULT_HASH_ALGO, split_id
19 from muse.core.object_store import (
20 _object_path_with_fallback,
21 cleanup_stale_object_temps,
22 has_object,
23 iter_stored_objects,
24 object_path,
25 objects_dir,
26 read_object,
27 write_object,
28 )
29 from muse.core.paths import muse_dir
30
31
32 # ---------------------------------------------------------------------------
33 # Helpers
34 # ---------------------------------------------------------------------------
35
36
37 def _repo(tmp_path: pathlib.Path) -> pathlib.Path:
38 muse_dir(tmp_path).mkdir()
39 return tmp_path
40
41
42 # ---------------------------------------------------------------------------
43 # 1. object_path — algo directory
44 # ---------------------------------------------------------------------------
45
46
47 class TestObjectPathAlgoDirectory:
48 """object_path must embed the algorithm as a directory component."""
49
50 def test_path_contains_sha256_directory(self, tmp_path: pathlib.Path) -> None:
51 """object_path returns .muse/objects/sha256/<prefix>/<rest>."""
52 repo = _repo(tmp_path)
53 oid = blob_id(b"hello")
54 p = object_path(repo, oid)
55 # algo dir is the first component under objects/
56 assert p.parent.parent.name == "sha256"
57
58 def test_algo_directory_is_inside_objects(self, tmp_path: pathlib.Path) -> None:
59 """sha256/ sits directly under .muse/objects/."""
60 repo = _repo(tmp_path)
61 oid = blob_id(b"world")
62 p = object_path(repo, oid)
63 assert p.parent.parent.parent == objects_dir(repo)
64
65 def test_shard_prefix_still_correct(self, tmp_path: pathlib.Path) -> None:
66 """The 2-char shard prefix is the first 2 hex chars of the hash."""
67 repo = _repo(tmp_path)
68 data = b"shard-check"
69 oid = blob_id(data)
70 p = object_path(repo, oid)
71 assert p.parent.name == oid[len("sha256:"):len("sha256:") + 2]
72
73 def test_filename_is_remaining_hex(self, tmp_path: pathlib.Path) -> None:
74 """Object filename is the last 62 hex chars of the hash."""
75 repo = _repo(tmp_path)
76 data = b"filename-check"
77 oid = blob_id(data)
78 p = object_path(repo, oid)
79 assert p.name == split_id(oid)[1][2:]
80
81 def test_four_char_prefix_still_nested_under_sha256(
82 self, tmp_path: pathlib.Path
83 ) -> None:
84 """prefix_len=4 still places the shard under sha256/."""
85 repo = _repo(tmp_path)
86 oid = blob_id(b"four-char")
87 p = object_path(repo, oid, prefix_len=4)
88 assert p.parent.parent.name == DEFAULT_HASH_ALGO
89 assert p.parent.name == split_id(oid)[1][:4]
90 assert p.name == split_id(oid)[1][4:]
91
92 def test_write_object_lands_in_sha256_dir(self, tmp_path: pathlib.Path) -> None:
93 """write_object places the file under .muse/objects/sha256/."""
94 repo = _repo(tmp_path)
95 data = b"write-check"
96 oid = blob_id(data)
97 write_object(repo, oid, data)
98 p = object_path(repo, oid)
99 assert p.exists()
100 assert p.parent.parent.name == "sha256"
101
102
103 # ---------------------------------------------------------------------------
104 # 2. iter_stored_objects — new layout
105 # ---------------------------------------------------------------------------
106
107
108 class TestIterStoredObjectsNewLayout:
109 """iter_stored_objects yields (prefixed_id, path) from the new layout."""
110
111 def test_empty_store_yields_nothing(self, tmp_path: pathlib.Path) -> None:
112 repo = _repo(tmp_path)
113 assert list(iter_stored_objects(repo)) == []
114
115 def test_yields_written_object(self, tmp_path: pathlib.Path) -> None:
116 repo = _repo(tmp_path)
117 data = b"single object"
118 oid = blob_id(data)
119 write_object(repo, oid, data)
120 results = list(iter_stored_objects(repo))
121 assert len(results) == 1
122 yielded_id, yielded_path = results[0]
123 assert yielded_id == oid
124
125 def test_yielded_path_exists(self, tmp_path: pathlib.Path) -> None:
126 repo = _repo(tmp_path)
127 oid = blob_id(b"path-exists")
128 write_object(repo, oid, b"path-exists")
129 _, p = list(iter_stored_objects(repo))[0]
130 assert p.exists()
131 assert p.is_file()
132
133 def test_yields_all_objects(self, tmp_path: pathlib.Path) -> None:
134 repo = _repo(tmp_path)
135 written = set()
136 for i in range(10):
137 data = f"obj-{i}".encode()
138 oid = blob_id(data)
139 write_object(repo, oid, data)
140 written.add(oid)
141 yielded = {oid for oid, _ in iter_stored_objects(repo)}
142 assert yielded == written
143
144 def test_ids_are_sha256_prefixed(self, tmp_path: pathlib.Path) -> None:
145 """All yielded IDs carry the sha256: prefix."""
146 repo = _repo(tmp_path)
147 write_object(repo, blob_id(b"prefix-check"), b"prefix-check")
148 for oid, _ in iter_stored_objects(repo):
149 assert oid.startswith("sha256:")
150
151 def test_no_duplicates(self, tmp_path: pathlib.Path) -> None:
152 repo = _repo(tmp_path)
153 data = b"idempotent"
154 oid = blob_id(data)
155 write_object(repo, oid, data)
156 write_object(repo, oid, data) # second write is no-op
157 results = list(iter_stored_objects(repo))
158 assert len(results) == 1
159
160 def test_skips_symlinks(self, tmp_path: pathlib.Path) -> None:
161 """Symlinked files inside shard dirs are not yielded."""
162 repo = _repo(tmp_path)
163 oid = blob_id(b"real")
164 write_object(repo, oid, b"real")
165 p = object_path(repo, oid)
166 link = p.parent / f"symlink{'a' * 60}"
167 link.symlink_to(p)
168 results = list(iter_stored_objects(repo))
169 ids = [r[0] for r in results]
170 assert len(ids) == 1
171 assert oid in ids
172
173 def test_skips_non_hex_filenames(self, tmp_path: pathlib.Path) -> None:
174 """Stray files (DS_Store, editor temps) with non-hex names are skipped."""
175 repo = _repo(tmp_path)
176 oid = blob_id(b"real-obj")
177 write_object(repo, oid, b"real-obj")
178 p = object_path(repo, oid)
179 (p.parent / ".DS_Store").write_bytes(b"")
180 (p.parent / "editor.tmp").write_bytes(b"")
181 results = list(iter_stored_objects(repo))
182 assert len(results) == 1
183
184
185 # ---------------------------------------------------------------------------
186 # 3. _object_path_with_fallback — shard-prefix fallback only
187 # ---------------------------------------------------------------------------
188
189
190 class TestFallbackShardPrefix:
191 """_object_path_with_fallback handles the 2-char/4-char shard-prefix migration."""
192
193 def test_finds_canonical_layout(self, tmp_path: pathlib.Path) -> None:
194 repo = _repo(tmp_path)
195 data = b"canonical find"
196 oid = blob_id(data)
197 write_object(repo, oid, data)
198 p = _object_path_with_fallback(repo, oid)
199 assert p.exists()
200 assert p.parent.parent.name == "sha256"
201
202 def test_has_object_canonical(self, tmp_path: pathlib.Path) -> None:
203 repo = _repo(tmp_path)
204 data = b"has-object canonical"
205 oid = blob_id(data)
206 write_object(repo, oid, data)
207 assert has_object(repo, oid)
208
209 def test_read_object_canonical(self, tmp_path: pathlib.Path) -> None:
210 repo = _repo(tmp_path)
211 data = b"read-object canonical"
212 oid = blob_id(data)
213 write_object(repo, oid, data)
214 assert read_object(repo, oid) == data
215
216
217 # ---------------------------------------------------------------------------
218 # 4. cleanup_stale_object_temps
219 # ---------------------------------------------------------------------------
220
221
222 class TestCleanupStaleTempNewLayout:
223 """cleanup_stale_object_temps handles the algo-directory structure."""
224
225 def test_cleanup_finds_temps_in_sha256_shards(
226 self, tmp_path: pathlib.Path
227 ) -> None:
228 """Stale .obj-tmp-* files inside sha256/<shard>/ are cleaned up."""
229 import time
230 repo = _repo(tmp_path)
231 # Create a temp file in the new layout shard directory.
232 shard = objects_dir(repo) / "sha256" / "ab"
233 shard.mkdir(parents=True)
234 stale = shard / ".obj-tmp-stale"
235 stale.write_bytes(b"stale")
236 # Back-date so it exceeds the min-age threshold.
237 old_time = time.time() - 120
238 os.utime(stale, (old_time, old_time))
239 removed = cleanup_stale_object_temps(repo)
240 assert removed >= 1
241 assert not stale.exists()
242
243
244 import os # noqa: E402 — needed by the last test
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago