test_indices.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago
| 1 | """Tests for muse/core/indices.py — optional local index layer. |
| 2 | |
| 3 | Coverage |
| 4 | -------- |
| 5 | SymbolHistoryEntry |
| 6 | - to_dict / from_dict round-trip. |
| 7 | - All six fields preserved. |
| 8 | |
| 9 | symbol_history index |
| 10 | - save_symbol_history writes a valid JSON file. |
| 11 | - load_symbol_history reads it back correctly. |
| 12 | - load returns empty dict when file absent. |
| 13 | - load returns empty dict on corrupt JSON. |
| 14 | - Sorting: entries dict is sorted by address. |
| 15 | - Multiple addresses, multiple events per address. |
| 16 | |
| 17 | hash_occurrence index |
| 18 | - save_hash_occurrence writes a valid JSON file. |
| 19 | - load_hash_occurrence reads it back correctly. |
| 20 | - load returns empty dict when file absent. |
| 21 | - load returns empty dict on corrupt JSON. |
| 22 | - Addresses within each hash entry are sorted. |
| 23 | |
| 24 | index_info |
| 25 | - Reports "absent" for missing indexes. |
| 26 | - Reports "present" + correct entry count for existing indexes. |
| 27 | - Reports "corrupt" for malformed JSON. |
| 28 | - Reports both indexes. |
| 29 | |
| 30 | Schema compliance |
| 31 | - schema_version == __version__. |
| 32 | - updated_at is present and is a non-empty string. |
| 33 | - index field matches the index name. |
| 34 | """ |
| 35 | |
| 36 | import pathlib |
| 37 | |
| 38 | import pytest |
| 39 | |
| 40 | from muse._version import __version__ |
| 41 | from muse.core.indices import ( |
| 42 | HashOccurrenceIndex, |
| 43 | SymbolHistoryEntry, |
| 44 | SymbolHistoryIndex, |
| 45 | index_info, |
| 46 | load_hash_occurrence, |
| 47 | load_symbol_history, |
| 48 | save_hash_occurrence, |
| 49 | save_symbol_history, |
| 50 | ) |
| 51 | from muse.core.paths import indices_dir |
| 52 | |
| 53 | |
| 54 | # --------------------------------------------------------------------------- |
| 55 | # SymbolHistoryEntry |
| 56 | # --------------------------------------------------------------------------- |
| 57 | |
| 58 | |
| 59 | class TestSymbolHistoryEntry: |
| 60 | def test_to_dict_from_dict_round_trip(self) -> None: |
| 61 | entry = SymbolHistoryEntry( |
| 62 | commit_id="abc123", |
| 63 | committed_at="2026-01-01T00:00:00+00:00", |
| 64 | op="insert", |
| 65 | content_id="content_abc", |
| 66 | body_hash="body_hash_xyz", |
| 67 | signature_id="sig_id_pqr", |
| 68 | ) |
| 69 | d = entry.to_dict() |
| 70 | entry2 = SymbolHistoryEntry.from_dict(d) |
| 71 | assert entry2.commit_id == "abc123" |
| 72 | assert entry2.committed_at == "2026-01-01T00:00:00+00:00" |
| 73 | assert entry2.op == "insert" |
| 74 | assert entry2.content_id == "content_abc" |
| 75 | assert entry2.body_hash == "body_hash_xyz" |
| 76 | assert entry2.signature_id == "sig_id_pqr" |
| 77 | |
| 78 | def test_all_ops_preserved(self) -> None: |
| 79 | for op in ("insert", "delete", "replace", "patch"): |
| 80 | e = SymbolHistoryEntry("c", "t", op, "cid", "bh", "sig") |
| 81 | assert SymbolHistoryEntry.from_dict(e.to_dict()).op == op |
| 82 | |
| 83 | |
| 84 | # --------------------------------------------------------------------------- |
| 85 | # symbol_history index — save / load |
| 86 | # --------------------------------------------------------------------------- |
| 87 | |
| 88 | |
| 89 | class TestSymbolHistoryIndex: |
| 90 | def _make_entry(self, op: str = "insert") -> SymbolHistoryEntry: |
| 91 | return SymbolHistoryEntry( |
| 92 | commit_id="commit1", |
| 93 | committed_at="2026-01-01T00:00:00+00:00", |
| 94 | op=op, |
| 95 | content_id="cid1", |
| 96 | body_hash="bh1", |
| 97 | signature_id="sig1", |
| 98 | ) |
| 99 | |
| 100 | def test_save_creates_file(self, tmp_path: pathlib.Path) -> None: |
| 101 | index: SymbolHistoryIndex = { |
| 102 | "src/a.py::f": [self._make_entry()], |
| 103 | } |
| 104 | save_symbol_history(tmp_path, index) |
| 105 | path = indices_dir(tmp_path) / "symbol_history.json" |
| 106 | assert path.exists() |
| 107 | |
| 108 | def test_round_trip(self, tmp_path: pathlib.Path) -> None: |
| 109 | entry = self._make_entry("replace") |
| 110 | index: SymbolHistoryIndex = { |
| 111 | "src/billing.py::compute_total": [entry], |
| 112 | } |
| 113 | save_symbol_history(tmp_path, index) |
| 114 | loaded = load_symbol_history(tmp_path) |
| 115 | assert "src/billing.py::compute_total" in loaded |
| 116 | entries = loaded["src/billing.py::compute_total"] |
| 117 | assert len(entries) == 1 |
| 118 | assert entries[0].op == "replace" |
| 119 | assert entries[0].commit_id == "commit1" |
| 120 | |
| 121 | def test_multiple_addresses(self, tmp_path: pathlib.Path) -> None: |
| 122 | index: SymbolHistoryIndex = { |
| 123 | "src/a.py::alpha": [self._make_entry("insert")], |
| 124 | "src/b.py::beta": [self._make_entry("insert"), self._make_entry("replace")], |
| 125 | } |
| 126 | save_symbol_history(tmp_path, index) |
| 127 | loaded = load_symbol_history(tmp_path) |
| 128 | assert len(loaded["src/a.py::alpha"]) == 1 |
| 129 | assert len(loaded["src/b.py::beta"]) == 2 |
| 130 | |
| 131 | def test_load_absent_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 132 | result = load_symbol_history(tmp_path) |
| 133 | assert result == {} |
| 134 | |
| 135 | def test_load_corrupt_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 136 | idx_dir = indices_dir(tmp_path) |
| 137 | idx_dir.mkdir(parents=True, exist_ok=True) |
| 138 | (idx_dir / "symbol_history.json").write_bytes(b"\xff\xfe not valid JSON") |
| 139 | result = load_symbol_history(tmp_path) |
| 140 | assert result == {} |
| 141 | |
| 142 | def test_schema_compliance(self, tmp_path: pathlib.Path) -> None: |
| 143 | import json as _json |
| 144 | index: SymbolHistoryIndex = {"x.py::f": [self._make_entry()]} |
| 145 | save_symbol_history(tmp_path, index) |
| 146 | raw = _json.loads((indices_dir(tmp_path) / "symbol_history.json").read_bytes()) |
| 147 | assert raw["schema_version"] == __version__ |
| 148 | assert raw["index"] == "symbol_history" |
| 149 | assert raw["updated_at"] # non-empty string |
| 150 | assert "x.py::f" in raw["entries"] |
| 151 | |
| 152 | def test_empty_index_saved(self, tmp_path: pathlib.Path) -> None: |
| 153 | save_symbol_history(tmp_path, {}) |
| 154 | loaded = load_symbol_history(tmp_path) |
| 155 | assert loaded == {} |
| 156 | |
| 157 | def test_entries_sorted_by_address(self, tmp_path: pathlib.Path) -> None: |
| 158 | import json as _json |
| 159 | index: SymbolHistoryIndex = { |
| 160 | "z.py::z": [self._make_entry()], |
| 161 | "a.py::a": [self._make_entry()], |
| 162 | "m.py::m": [self._make_entry()], |
| 163 | } |
| 164 | save_symbol_history(tmp_path, index) |
| 165 | raw = _json.loads((indices_dir(tmp_path) / "symbol_history.json").read_bytes()) |
| 166 | keys = list(raw["entries"].keys()) |
| 167 | assert keys == sorted(keys) |
| 168 | |
| 169 | |
| 170 | # --------------------------------------------------------------------------- |
| 171 | # hash_occurrence index — save / load |
| 172 | # --------------------------------------------------------------------------- |
| 173 | |
| 174 | |
| 175 | class TestHashOccurrenceIndex: |
| 176 | def test_save_creates_file(self, tmp_path: pathlib.Path) -> None: |
| 177 | index: HashOccurrenceIndex = { |
| 178 | "deadbeef": ["src/a.py::f", "src/b.py::g"], |
| 179 | } |
| 180 | save_hash_occurrence(tmp_path, index) |
| 181 | path = indices_dir(tmp_path) / "hash_occurrence.json" |
| 182 | assert path.exists() |
| 183 | |
| 184 | def test_round_trip(self, tmp_path: pathlib.Path) -> None: |
| 185 | index: HashOccurrenceIndex = { |
| 186 | "abc123": ["src/a.py::f", "src/b.py::g"], |
| 187 | "def456": ["src/c.py::h"], |
| 188 | } |
| 189 | save_hash_occurrence(tmp_path, index) |
| 190 | loaded = load_hash_occurrence(tmp_path) |
| 191 | assert "abc123" in loaded |
| 192 | assert set(loaded["abc123"]) == {"src/a.py::f", "src/b.py::g"} |
| 193 | assert loaded["def456"] == ["src/c.py::h"] |
| 194 | |
| 195 | def test_addresses_sorted_within_hash(self, tmp_path: pathlib.Path) -> None: |
| 196 | import json as _json |
| 197 | index: HashOccurrenceIndex = { |
| 198 | "hash1": ["z.py::z", "a.py::a", "m.py::m"], |
| 199 | } |
| 200 | save_hash_occurrence(tmp_path, index) |
| 201 | raw = _json.loads((indices_dir(tmp_path) / "hash_occurrence.json").read_bytes()) |
| 202 | addrs = raw["entries"]["hash1"] |
| 203 | assert addrs == sorted(addrs) |
| 204 | |
| 205 | def test_hashes_sorted(self, tmp_path: pathlib.Path) -> None: |
| 206 | import json as _json |
| 207 | index: HashOccurrenceIndex = { |
| 208 | "zzz": ["a.py::f"], |
| 209 | "aaa": ["b.py::g"], |
| 210 | } |
| 211 | save_hash_occurrence(tmp_path, index) |
| 212 | raw = _json.loads((indices_dir(tmp_path) / "hash_occurrence.json").read_bytes()) |
| 213 | keys = list(raw["entries"].keys()) |
| 214 | assert keys == sorted(keys) |
| 215 | |
| 216 | def test_load_absent_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 217 | assert load_hash_occurrence(tmp_path) == {} |
| 218 | |
| 219 | def test_load_corrupt_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 220 | idx_dir = indices_dir(tmp_path) |
| 221 | idx_dir.mkdir(parents=True, exist_ok=True) |
| 222 | (idx_dir / "hash_occurrence.json").write_bytes(b"\xff\xfe garbage bytes") |
| 223 | assert load_hash_occurrence(tmp_path) == {} |
| 224 | |
| 225 | def test_schema_compliance(self, tmp_path: pathlib.Path) -> None: |
| 226 | import json as _json |
| 227 | save_hash_occurrence(tmp_path, {"h": ["a.py::f"]}) |
| 228 | raw = _json.loads((indices_dir(tmp_path) / "hash_occurrence.json").read_bytes()) |
| 229 | assert raw["schema_version"] == __version__ |
| 230 | assert raw["index"] == "hash_occurrence" |
| 231 | assert raw["updated_at"] |
| 232 | |
| 233 | def test_empty_index(self, tmp_path: pathlib.Path) -> None: |
| 234 | save_hash_occurrence(tmp_path, {}) |
| 235 | assert load_hash_occurrence(tmp_path) == {} |
| 236 | |
| 237 | |
| 238 | # --------------------------------------------------------------------------- |
| 239 | # index_info |
| 240 | # --------------------------------------------------------------------------- |
| 241 | |
| 242 | |
| 243 | class TestIndexInfo: |
| 244 | def test_both_absent(self, tmp_path: pathlib.Path) -> None: |
| 245 | info = index_info(tmp_path) |
| 246 | assert len(info) == 2 |
| 247 | names = {i["name"] for i in info} |
| 248 | assert names == {"symbol_history", "hash_occurrence"} |
| 249 | for item in info: |
| 250 | assert item["status"] == "absent" |
| 251 | |
| 252 | def test_symbol_history_present(self, tmp_path: pathlib.Path) -> None: |
| 253 | entry = SymbolHistoryEntry("c", "t", "insert", "cid", "bh", "sig") |
| 254 | save_symbol_history(tmp_path, {"a.py::f": [entry], "b.py::g": [entry]}) |
| 255 | info = index_info(tmp_path) |
| 256 | sh = next(i for i in info if i["name"] == "symbol_history") |
| 257 | assert sh["status"] == "present" |
| 258 | assert sh["entries"] == 2 |
| 259 | |
| 260 | def test_hash_occurrence_present(self, tmp_path: pathlib.Path) -> None: |
| 261 | save_hash_occurrence(tmp_path, {"h1": ["a.py::f"], "h2": ["b.py::g"]}) |
| 262 | info = index_info(tmp_path) |
| 263 | ho = next(i for i in info if i["name"] == "hash_occurrence") |
| 264 | assert ho["status"] == "present" |
| 265 | assert ho["entries"] == 2 |
| 266 | |
| 267 | def test_corrupt_index_reported(self, tmp_path: pathlib.Path) -> None: |
| 268 | idx_dir = indices_dir(tmp_path) |
| 269 | idx_dir.mkdir(parents=True, exist_ok=True) |
| 270 | (idx_dir / "symbol_history.json").write_bytes(b"\xff\xfe garbage") |
| 271 | info = index_info(tmp_path) |
| 272 | sh = next(i for i in info if i["name"] == "symbol_history") |
| 273 | assert sh["status"] == "corrupt" |
| 274 | |
| 275 | def test_updated_at_present_when_index_exists(self, tmp_path: pathlib.Path) -> None: |
| 276 | save_hash_occurrence(tmp_path, {"h": ["f.py::x"]}) |
| 277 | info = index_info(tmp_path) |
| 278 | ho = next(i for i in info if i["name"] == "hash_occurrence") |
| 279 | assert ho["updated_at"] # non-empty string |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago