test_object_store_algo_layout.py
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | """Tests: algorithm-directory layout for the object store. |
| 2 | |
| 3 | Canonical layout:: |
| 4 | |
| 5 | .muse/objects/sha256/<prefix>/<remainder> |
| 6 | |
| 7 | Also covers ``iter_stored_objects`` — the single canonical walker that |
| 8 | replaces the six inline ``iterdir`` loops scattered across gc, maintenance, |
| 9 | prune, count_objects, verify_object, and object_store itself. |
| 10 | """ |
| 11 | |
| 12 | from __future__ import annotations |
| 13 | |
| 14 | import pathlib |
| 15 | |
| 16 | import pytest |
| 17 | |
| 18 | from muse.core.types import blob_id, DEFAULT_HASH_ALGO, split_id |
| 19 | from muse.core.object_store import ( |
| 20 | _object_path_with_fallback, |
| 21 | cleanup_stale_object_temps, |
| 22 | has_object, |
| 23 | iter_stored_objects, |
| 24 | object_path, |
| 25 | objects_dir, |
| 26 | read_object, |
| 27 | write_object, |
| 28 | ) |
| 29 | from muse.core.paths import muse_dir |
| 30 | |
| 31 | |
| 32 | # --------------------------------------------------------------------------- |
| 33 | # Helpers |
| 34 | # --------------------------------------------------------------------------- |
| 35 | |
| 36 | |
| 37 | def _repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 38 | muse_dir(tmp_path).mkdir() |
| 39 | return tmp_path |
| 40 | |
| 41 | |
| 42 | # --------------------------------------------------------------------------- |
| 43 | # 1. object_path — algo directory |
| 44 | # --------------------------------------------------------------------------- |
| 45 | |
| 46 | |
| 47 | class TestObjectPathAlgoDirectory: |
| 48 | """object_path must embed the algorithm as a directory component.""" |
| 49 | |
| 50 | def test_path_contains_sha256_directory(self, tmp_path: pathlib.Path) -> None: |
| 51 | """object_path returns .muse/objects/sha256/<prefix>/<rest>.""" |
| 52 | repo = _repo(tmp_path) |
| 53 | oid = blob_id(b"hello") |
| 54 | p = object_path(repo, oid) |
| 55 | # algo dir is the first component under objects/ |
| 56 | assert p.parent.parent.name == "sha256" |
| 57 | |
| 58 | def test_algo_directory_is_inside_objects(self, tmp_path: pathlib.Path) -> None: |
| 59 | """sha256/ sits directly under .muse/objects/.""" |
| 60 | repo = _repo(tmp_path) |
| 61 | oid = blob_id(b"world") |
| 62 | p = object_path(repo, oid) |
| 63 | assert p.parent.parent.parent == objects_dir(repo) |
| 64 | |
| 65 | def test_shard_prefix_still_correct(self, tmp_path: pathlib.Path) -> None: |
| 66 | """The 2-char shard prefix is the first 2 hex chars of the hash.""" |
| 67 | repo = _repo(tmp_path) |
| 68 | data = b"shard-check" |
| 69 | oid = blob_id(data) |
| 70 | p = object_path(repo, oid) |
| 71 | assert p.parent.name == oid[len("sha256:"):len("sha256:") + 2] |
| 72 | |
| 73 | def test_filename_is_remaining_hex(self, tmp_path: pathlib.Path) -> None: |
| 74 | """Object filename is the last 62 hex chars of the hash.""" |
| 75 | repo = _repo(tmp_path) |
| 76 | data = b"filename-check" |
| 77 | oid = blob_id(data) |
| 78 | p = object_path(repo, oid) |
| 79 | assert p.name == split_id(oid)[1][2:] |
| 80 | |
| 81 | def test_four_char_prefix_still_nested_under_sha256( |
| 82 | self, tmp_path: pathlib.Path |
| 83 | ) -> None: |
| 84 | """prefix_len=4 still places the shard under sha256/.""" |
| 85 | repo = _repo(tmp_path) |
| 86 | oid = blob_id(b"four-char") |
| 87 | p = object_path(repo, oid, prefix_len=4) |
| 88 | assert p.parent.parent.name == DEFAULT_HASH_ALGO |
| 89 | assert p.parent.name == split_id(oid)[1][:4] |
| 90 | assert p.name == split_id(oid)[1][4:] |
| 91 | |
| 92 | def test_write_object_lands_in_sha256_dir(self, tmp_path: pathlib.Path) -> None: |
| 93 | """write_object places the file under .muse/objects/sha256/.""" |
| 94 | repo = _repo(tmp_path) |
| 95 | data = b"write-check" |
| 96 | oid = blob_id(data) |
| 97 | write_object(repo, oid, data) |
| 98 | p = object_path(repo, oid) |
| 99 | assert p.exists() |
| 100 | assert p.parent.parent.name == "sha256" |
| 101 | |
| 102 | |
| 103 | # --------------------------------------------------------------------------- |
| 104 | # 2. iter_stored_objects — new layout |
| 105 | # --------------------------------------------------------------------------- |
| 106 | |
| 107 | |
| 108 | class TestIterStoredObjectsNewLayout: |
| 109 | """iter_stored_objects yields (prefixed_id, path) from the new layout.""" |
| 110 | |
| 111 | def test_empty_store_yields_nothing(self, tmp_path: pathlib.Path) -> None: |
| 112 | repo = _repo(tmp_path) |
| 113 | assert list(iter_stored_objects(repo)) == [] |
| 114 | |
| 115 | def test_yields_written_object(self, tmp_path: pathlib.Path) -> None: |
| 116 | repo = _repo(tmp_path) |
| 117 | data = b"single object" |
| 118 | oid = blob_id(data) |
| 119 | write_object(repo, oid, data) |
| 120 | results = list(iter_stored_objects(repo)) |
| 121 | assert len(results) == 1 |
| 122 | yielded_id, yielded_path = results[0] |
| 123 | assert yielded_id == oid |
| 124 | |
| 125 | def test_yielded_path_exists(self, tmp_path: pathlib.Path) -> None: |
| 126 | repo = _repo(tmp_path) |
| 127 | oid = blob_id(b"path-exists") |
| 128 | write_object(repo, oid, b"path-exists") |
| 129 | _, p = list(iter_stored_objects(repo))[0] |
| 130 | assert p.exists() |
| 131 | assert p.is_file() |
| 132 | |
| 133 | def test_yields_all_objects(self, tmp_path: pathlib.Path) -> None: |
| 134 | repo = _repo(tmp_path) |
| 135 | written = set() |
| 136 | for i in range(10): |
| 137 | data = f"obj-{i}".encode() |
| 138 | oid = blob_id(data) |
| 139 | write_object(repo, oid, data) |
| 140 | written.add(oid) |
| 141 | yielded = {oid for oid, _ in iter_stored_objects(repo)} |
| 142 | assert yielded == written |
| 143 | |
| 144 | def test_ids_are_sha256_prefixed(self, tmp_path: pathlib.Path) -> None: |
| 145 | """All yielded IDs carry the sha256: prefix.""" |
| 146 | repo = _repo(tmp_path) |
| 147 | write_object(repo, blob_id(b"prefix-check"), b"prefix-check") |
| 148 | for oid, _ in iter_stored_objects(repo): |
| 149 | assert oid.startswith("sha256:") |
| 150 | |
| 151 | def test_no_duplicates(self, tmp_path: pathlib.Path) -> None: |
| 152 | repo = _repo(tmp_path) |
| 153 | data = b"idempotent" |
| 154 | oid = blob_id(data) |
| 155 | write_object(repo, oid, data) |
| 156 | write_object(repo, oid, data) # second write is no-op |
| 157 | results = list(iter_stored_objects(repo)) |
| 158 | assert len(results) == 1 |
| 159 | |
| 160 | def test_skips_symlinks(self, tmp_path: pathlib.Path) -> None: |
| 161 | """Symlinked files inside shard dirs are not yielded.""" |
| 162 | repo = _repo(tmp_path) |
| 163 | oid = blob_id(b"real") |
| 164 | write_object(repo, oid, b"real") |
| 165 | p = object_path(repo, oid) |
| 166 | link = p.parent / f"symlink{'a' * 60}" |
| 167 | link.symlink_to(p) |
| 168 | results = list(iter_stored_objects(repo)) |
| 169 | ids = [r[0] for r in results] |
| 170 | assert len(ids) == 1 |
| 171 | assert oid in ids |
| 172 | |
| 173 | def test_skips_non_hex_filenames(self, tmp_path: pathlib.Path) -> None: |
| 174 | """Stray files (DS_Store, editor temps) with non-hex names are skipped.""" |
| 175 | repo = _repo(tmp_path) |
| 176 | oid = blob_id(b"real-obj") |
| 177 | write_object(repo, oid, b"real-obj") |
| 178 | p = object_path(repo, oid) |
| 179 | (p.parent / ".DS_Store").write_bytes(b"") |
| 180 | (p.parent / "editor.tmp").write_bytes(b"") |
| 181 | results = list(iter_stored_objects(repo)) |
| 182 | assert len(results) == 1 |
| 183 | |
| 184 | |
| 185 | # --------------------------------------------------------------------------- |
| 186 | # 3. _object_path_with_fallback — shard-prefix fallback only |
| 187 | # --------------------------------------------------------------------------- |
| 188 | |
| 189 | |
| 190 | class TestFallbackShardPrefix: |
| 191 | """_object_path_with_fallback handles the 2-char/4-char shard-prefix migration.""" |
| 192 | |
| 193 | def test_finds_canonical_layout(self, tmp_path: pathlib.Path) -> None: |
| 194 | repo = _repo(tmp_path) |
| 195 | data = b"canonical find" |
| 196 | oid = blob_id(data) |
| 197 | write_object(repo, oid, data) |
| 198 | p = _object_path_with_fallback(repo, oid) |
| 199 | assert p.exists() |
| 200 | assert p.parent.parent.name == "sha256" |
| 201 | |
| 202 | def test_has_object_canonical(self, tmp_path: pathlib.Path) -> None: |
| 203 | repo = _repo(tmp_path) |
| 204 | data = b"has-object canonical" |
| 205 | oid = blob_id(data) |
| 206 | write_object(repo, oid, data) |
| 207 | assert has_object(repo, oid) |
| 208 | |
| 209 | def test_read_object_canonical(self, tmp_path: pathlib.Path) -> None: |
| 210 | repo = _repo(tmp_path) |
| 211 | data = b"read-object canonical" |
| 212 | oid = blob_id(data) |
| 213 | write_object(repo, oid, data) |
| 214 | assert read_object(repo, oid) == data |
| 215 | |
| 216 | |
| 217 | # --------------------------------------------------------------------------- |
| 218 | # 4. cleanup_stale_object_temps |
| 219 | # --------------------------------------------------------------------------- |
| 220 | |
| 221 | |
| 222 | class TestCleanupStaleTempNewLayout: |
| 223 | """cleanup_stale_object_temps handles the algo-directory structure.""" |
| 224 | |
| 225 | def test_cleanup_finds_temps_in_sha256_shards( |
| 226 | self, tmp_path: pathlib.Path |
| 227 | ) -> None: |
| 228 | """Stale .obj-tmp-* files inside sha256/<shard>/ are cleaned up.""" |
| 229 | import time |
| 230 | repo = _repo(tmp_path) |
| 231 | # Create a temp file in the new layout shard directory. |
| 232 | shard = objects_dir(repo) / "sha256" / "ab" |
| 233 | shard.mkdir(parents=True) |
| 234 | stale = shard / ".obj-tmp-stale" |
| 235 | stale.write_bytes(b"stale") |
| 236 | # Back-date so it exceeds the min-age threshold. |
| 237 | old_time = time.time() - 120 |
| 238 | os.utime(stale, (old_time, old_time)) |
| 239 | removed = cleanup_stale_object_temps(repo) |
| 240 | assert removed >= 1 |
| 241 | assert not stale.exists() |
| 242 | |
| 243 | |
| 244 | import os # noqa: E402 — needed by the last test |