gabriel / muse public
test_cmd_index.py python
717 lines 29.6 KB
Raw
1 """Tests for ``muse code index`` (status / rebuild / purge).
2
3 Coverage layers
4 ---------------
5 Unit
6 _build_symbol_history — empty repo, single commit with ops, manifest
7 cache (blob fetched once per obj_id), missing
8 manifest logged+skipped, no-op commits skipped,
9 SymbolCache consulted before read_object,
10 SymbolCache populated on miss.
11 _build_hash_occurrence — HEAD present, no HEAD (graceful empty), missing
12 manifest logged+empty, imports excluded, trivial
13 (size-1) entries excluded.
14 index_info — present, absent, corrupt states; entries is int.
15 purge_index — deletes existing file, returns False when absent,
16 raises ValueError for unknown name.
17
18 Integration (live repo, CliRunner)
19 status: exit-0, JSON keys + types, absent text, present text.
20 rebuild: exit-0, JSON schema, all counts, text output.
21 rebuild --dry-run: no files written, JSON dry_run=true, counts correct.
22 rebuild --index symbol_history: only that index rebuilt.
23 rebuild --index hash_occurrence: only that index rebuilt.
24 purge: exit-0, JSON schema, present deleted, absent skipped.
25 purge --index <name>: only named index deleted.
26 Invalid --index rejected by argparse (exit non-zero).
27 Missing repo exits non-zero.
28
29 E2E (real symbol changes across commits)
30 After rebuild, status shows both indexes as present with non-zero entries.
31 symbol_history entries reflect commit history (insert recorded).
32 hash_occurrence clusters > 0 when duplicate bodies exist.
33 Rebuild is idempotent: two consecutive rebuilds yield identical JSON.
34 Dry-run counts match a subsequent real rebuild.
35 Purge then status shows absent; rebuild restores present.
36 Purge --index only removes targeted index.
37
38 Stress
39 50-commit repo: rebuild completes, all symbol_history addresses > 0.
40 Manifest cache: blob fetched at most once per unique obj_id during rebuild.
41 Large flat file (200 functions): hash_occurrence correct after rebuild.
42 """
43
44 from __future__ import annotations
45
46 type _CountMap = dict[str, int]
47
48 import json
49 import pathlib
50 import textwrap
51 import time
52 from typing import TypedDict
53 from unittest import mock
54
55 import pytest
56 from tests.cli_test_helper import CliRunner
57
58 from muse.cli.commands.index_rebuild import _build_hash_occurrence, _build_symbol_history
59 from muse.core.indices import (
60 KNOWN_INDEX_NAMES,
61 IndexInfoEntry,
62 SymbolHistoryEntry,
63 index_info,
64 purge_index,
65 )
66 from muse.core.symbol_cache import SymbolCache
67 from muse.core.paths import indices_dir
68
69 # ---------------------------------------------------------------------------
70 # Runner
71 # ---------------------------------------------------------------------------
72
73 runner = CliRunner()
74 cli = None # CliRunner always targets muse.cli.app.main
75
76
77 # ---------------------------------------------------------------------------
78 # TypedDicts for JSON schema validation
79 # ---------------------------------------------------------------------------
80
81
82 class _StatusEntry(TypedDict):
83 name: str
84 status: str
85 entries: int
86 updated_at: str | None
87
88
89 class _RebuildPayload(TypedDict, total=False):
90 schema_version: str
91 dry_run: bool
92 rebuilt: list[str]
93 symbol_history_addresses: int
94 symbol_history_events: int
95 hash_occurrence_clusters: int
96 hash_occurrence_addresses: int
97
98
99 class _PurgePayload(TypedDict):
100 schema_version: str
101 purged: list[str]
102 skipped: list[str]
103
104
105 # ---------------------------------------------------------------------------
106 # Helpers
107 # ---------------------------------------------------------------------------
108
109
110 def _index_path(root: pathlib.Path, name: str) -> pathlib.Path:
111 return indices_dir(root) / f"{name}.json"
112
113
114 def _index_exists(root: pathlib.Path, name: str) -> bool:
115 return _index_path(root, name).exists()
116
117
118 def _invoke_rebuild_json(extra: list[str] | None = None) -> _RebuildPayload:
119 args = ["code", "index", "rebuild", "--json"] + (extra or [])
120 result = runner.invoke(cli, args)
121 assert result.exit_code == 0, result.output
122 out: _RebuildPayload = json.loads(result.output)
123 return out
124
125
126 def _invoke_status_json() -> list[_StatusEntry]:
127 result = runner.invoke(cli, ["code", "index", "status", "--json"])
128 assert result.exit_code == 0, result.output
129 payload = json.loads(result.output)
130 out: list[_StatusEntry] = payload["indexes"]
131 return out
132
133
134 # ---------------------------------------------------------------------------
135 # Fixtures
136 # ---------------------------------------------------------------------------
137
138
139 @pytest.fixture
140 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
141 monkeypatch.chdir(tmp_path)
142 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
143 result = runner.invoke(cli, ["init", "--domain", "code"])
144 assert result.exit_code == 0, result.output
145 return tmp_path
146
147
148 @pytest.fixture
149 def two_commit_repo(repo: pathlib.Path) -> pathlib.Path:
150 """Repo with two commits: v1 has one function, v2 replaces it."""
151 (repo / "billing.py").write_text(textwrap.dedent("""\
152 def compute(items):
153 return sum(items)
154 """))
155 r1 = runner.invoke(cli, ["commit", "-m", "v1"])
156 assert r1.exit_code == 0, r1.output
157
158 (repo / "billing.py").write_text(textwrap.dedent("""\
159 def compute(items):
160 return sum(items) * 2
161 """))
162 r2 = runner.invoke(cli, ["commit", "-m", "v2"])
163 assert r2.exit_code == 0, r2.output
164 return repo
165
166
167 @pytest.fixture
168 def clone_repo(repo: pathlib.Path) -> pathlib.Path:
169 """Repo with two files containing identical body → one hash_occurrence cluster."""
170 body = "def helper():\n return True\n"
171 (repo / "a.py").write_text(body)
172 (repo / "b.py").write_text(f"{body}\ndef other():\n pass\n")
173 runner.invoke(cli, ["commit", "-m", "clones"])
174 return repo
175
176
177 # ---------------------------------------------------------------------------
178 # Unit — _build_symbol_history
179 # ---------------------------------------------------------------------------
180
181
182 class TestBuildSymbolHistory:
183 def test_empty_repo_returns_empty(self, repo: pathlib.Path) -> None:
184 idx = _build_symbol_history(repo)
185 assert isinstance(idx, dict)
186 assert len(idx) == 0
187
188 def test_after_commit_has_entries(self, two_commit_repo: pathlib.Path) -> None:
189 idx = _build_symbol_history(two_commit_repo)
190 assert len(idx) > 0
191
192 def test_address_contains_double_colon(self, two_commit_repo: pathlib.Path) -> None:
193 idx = _build_symbol_history(two_commit_repo)
194 assert all("::" in addr for addr in idx)
195
196 def test_entries_are_symbol_history_entry(self, two_commit_repo: pathlib.Path) -> None:
197 idx = _build_symbol_history(two_commit_repo)
198 for entries in idx.values():
199 for e in entries:
200 assert isinstance(e, SymbolHistoryEntry)
201
202 def test_missing_manifest_skipped_with_log(
203 self, two_commit_repo: pathlib.Path, caplog: pytest.LogCaptureFixture
204 ) -> None:
205 """Commits with missing snapshot manifests are logged and skipped."""
206 import logging
207 with caplog.at_level(logging.DEBUG, logger="muse.cli.commands.index_rebuild"):
208 with mock.patch(
209 "muse.cli.commands.index_rebuild.get_commit_snapshot_manifest",
210 return_value=None,
211 ):
212 idx = _build_symbol_history(two_commit_repo)
213 # All commits skipped → empty index
214 assert len(idx) == 0
215 assert any("Missing snapshot manifest" in r.message for r in caplog.records)
216
217 def test_manifest_cache_prevents_double_fetch(self, two_commit_repo: pathlib.Path) -> None:
218 """Each unique manifest (commit) is fetched at most once."""
219 original = __import__(
220 "muse.core.snapshots", fromlist=["get_commit_snapshot_manifest"]
221 ).get_commit_snapshot_manifest
222
223 call_counts: _CountMap = {}
224
225 def counting_fetch(root: pathlib.Path, commit_id: str) -> Manifest | None:
226 call_counts[commit_id] = call_counts.get(commit_id, 0) + 1
227 result: Manifest | None = original(root, commit_id)
228 return result
229
230 with mock.patch(
231 "muse.cli.commands.index_rebuild.get_commit_snapshot_manifest",
232 side_effect=counting_fetch,
233 ):
234 _build_symbol_history(two_commit_repo)
235
236 for commit_id, count in call_counts.items():
237 assert count == 1, (
238 f"Manifest for commit {commit_id[:8]} fetched {count} times — expected 1"
239 )
240
241 def test_blob_cache_prevents_double_parse(self, two_commit_repo: pathlib.Path) -> None:
242 """Each unique blob (obj_id) is read at most once within a single run."""
243 original_read = __import__(
244 "muse.core.object_store", fromlist=["read_object"]
245 ).read_object
246
247 obj_fetch_count: _CountMap = {}
248
249 def counting_read(root: pathlib.Path, obj_id: str) -> bytes | None:
250 obj_fetch_count[obj_id] = obj_fetch_count.get(obj_id, 0) + 1
251 result: bytes | None = original_read(root, obj_id)
252 return result
253
254 with mock.patch(
255 "muse.cli.commands.index_rebuild.read_object",
256 side_effect=counting_read,
257 ):
258 _build_symbol_history(two_commit_repo)
259
260 duplicates = {oid: n for oid, n in obj_fetch_count.items() if n > 1}
261 assert not duplicates, (
262 f"Blobs fetched more than once: {duplicates} — blob_cache not working"
263 )
264
265 def test_symbol_cache_consulted_before_read_object(
266 self, two_commit_repo: pathlib.Path
267 ) -> None:
268 """When SymbolCache has a hit, read_object is never called for that obj_id."""
269 from muse.core.object_store import read_object as real_read
270 from muse.core.snapshots import get_commit_snapshot_manifest
271
272 # Pre-populate a SymbolCache with every blob in every commit's manifest.
273 warm_cache = SymbolCache.empty()
274 from muse.core.commits import get_all_commits
275 from muse.plugins.code.ast_parser import parse_symbols as real_parse
276 from muse.plugins.code._query import is_semantic
277
278 for commit in get_all_commits(two_commit_repo):
279 manifest = get_commit_snapshot_manifest(two_commit_repo, commit.commit_id) or {}
280 for fp, oid in manifest.items():
281 if is_semantic(fp) and warm_cache.get(oid) is None:
282 raw = real_read(two_commit_repo, oid)
283 if raw is not None:
284 warm_cache.put(oid, real_parse(raw, fp))
285
286 read_calls: list[str] = []
287
288 def spy_read(root: pathlib.Path, obj_id: str) -> bytes | None:
289 read_calls.append(obj_id)
290 result: bytes | None = real_read(root, obj_id)
291 return result
292
293 with mock.patch("muse.cli.commands.index_rebuild.read_object", side_effect=spy_read):
294 _build_symbol_history(two_commit_repo, symbol_cache=warm_cache)
295
296 assert read_calls == [], (
297 f"read_object called {len(read_calls)} times despite warm SymbolCache"
298 )
299
300 def test_symbol_cache_populated_on_miss(self, two_commit_repo: pathlib.Path) -> None:
301 """A cold SymbolCache is populated during _build_symbol_history."""
302 cold_cache = SymbolCache.empty()
303 assert cold_cache.size == 0
304 _build_symbol_history(two_commit_repo, symbol_cache=cold_cache)
305 # Cache should have been populated with at least one entry.
306 assert cold_cache.size > 0
307
308
309 # ---------------------------------------------------------------------------
310 # Unit — _build_hash_occurrence
311 # ---------------------------------------------------------------------------
312
313
314 class TestBuildHashOccurrence:
315 def test_no_head_returns_empty(self, repo: pathlib.Path) -> None:
316 """No commits → no HEAD ref → gracefully returns empty dict."""
317 idx = _build_hash_occurrence(repo)
318 assert idx == {}
319
320 def test_single_function_not_a_clone(self, repo: pathlib.Path) -> None:
321 (repo / "solo.py").write_text("def unique():\n return 42\n")
322 runner.invoke(cli, ["commit", "-m", "solo"])
323 idx = _build_hash_occurrence(repo)
324 # unique function appears only once → filtered out
325 assert all(len(addrs) > 1 for addrs in idx.values())
326
327 def test_identical_bodies_form_cluster(self, clone_repo: pathlib.Path) -> None:
328 idx = _build_hash_occurrence(clone_repo)
329 assert len(idx) > 0
330 # every cluster has ≥ 2 members
331 assert all(len(addrs) >= 2 for addrs in idx.values())
332
333 def test_imports_excluded(self, repo: pathlib.Path) -> None:
334 (repo / "mod.py").write_text("import os\nimport sys\ndef fn():\n return 1\n")
335 runner.invoke(cli, ["commit", "-m", "imports"])
336 idx = _build_hash_occurrence(repo)
337 for addrs in idx.values():
338 for addr in addrs:
339 assert "::import::" not in addr
340
341 def test_missing_manifest_returns_empty(self, two_commit_repo: pathlib.Path) -> None:
342 with mock.patch(
343 "muse.cli.commands.index_rebuild.get_commit_snapshot_manifest",
344 return_value=None,
345 ):
346 idx = _build_hash_occurrence(two_commit_repo)
347 assert idx == {}
348
349
350 # ---------------------------------------------------------------------------
351 # Unit — index_info and purge_index
352 # ---------------------------------------------------------------------------
353
354
355 class TestIndexInfo:
356 def test_absent_before_rebuild(self, repo: pathlib.Path) -> None:
357 infos = index_info(repo)
358 assert len(infos) == len(KNOWN_INDEX_NAMES)
359 for info in infos:
360 assert info["status"] == "absent"
361
362 def test_entries_is_int_not_str(self, repo: pathlib.Path) -> None:
363 infos = index_info(repo)
364 for info in infos:
365 assert isinstance(info["entries"], int), (
366 f"{info['name']}.entries should be int, got {type(info['entries'])}"
367 )
368
369 def test_present_after_rebuild(self, two_commit_repo: pathlib.Path) -> None:
370 runner.invoke(cli, ["code", "index", "rebuild"])
371 infos = index_info(two_commit_repo)
372 for info in infos:
373 assert info["status"] == "present"
374
375 def test_corrupt_index_reported(self, repo: pathlib.Path) -> None:
376 (indices_dir(repo)).mkdir(parents=True, exist_ok=True)
377 _index_path(repo, "symbol_history").write_bytes(b"\xff\xfe corrupt garbage")
378 infos = index_info(repo)
379 sym = next(i for i in infos if i["name"] == "symbol_history")
380 assert sym["status"] == "corrupt"
381
382 def test_updated_at_is_none_when_absent(self, repo: pathlib.Path) -> None:
383 infos = index_info(repo)
384 for info in infos:
385 assert info["updated_at"] is None
386
387
388 class TestPurgeIndex:
389 def test_purge_existing_returns_true(self, two_commit_repo: pathlib.Path) -> None:
390 runner.invoke(cli, ["code", "index", "rebuild"])
391 assert _index_exists(two_commit_repo, "symbol_history")
392 result = purge_index(two_commit_repo, "symbol_history")
393 assert result is True
394 assert not _index_exists(two_commit_repo, "symbol_history")
395
396 def test_purge_absent_returns_false(self, repo: pathlib.Path) -> None:
397 result = purge_index(repo, "hash_occurrence")
398 assert result is False
399
400 def test_purge_unknown_name_raises(self, repo: pathlib.Path) -> None:
401 with pytest.raises(ValueError, match="Unknown index name"):
402 purge_index(repo, "nonexistent_index")
403
404
405 # ---------------------------------------------------------------------------
406 # Integration — CLI runner tests
407 # ---------------------------------------------------------------------------
408
409
410 class TestIndexStatusCLI:
411 def test_exit_zero(self, repo: pathlib.Path) -> None:
412 result = runner.invoke(cli, ["code", "index", "status"])
413 assert result.exit_code == 0
414
415 def test_json_is_list(self, repo: pathlib.Path) -> None:
416 result = runner.invoke(cli, ["code", "index", "status", "--json"])
417 assert result.exit_code == 0
418 payload = json.loads(result.output)
419 data = payload["indexes"]
420 assert isinstance(data, list)
421 assert len(data) == len(KNOWN_INDEX_NAMES)
422
423 def test_json_entry_keys(self, repo: pathlib.Path) -> None:
424 data = _invoke_status_json()
425 for entry in data:
426 for key in ("name", "status", "entries", "updated_at"):
427 assert key in entry, f"Missing key {key!r} in status entry"
428
429 def test_json_entries_is_int(self, repo: pathlib.Path) -> None:
430 data = _invoke_status_json()
431 for entry in data:
432 assert isinstance(entry["entries"], int)
433
434 def test_json_absent_status_before_rebuild(self, repo: pathlib.Path) -> None:
435 data = _invoke_status_json()
436 assert all(e["status"] == "absent" for e in data)
437
438 def test_text_contains_hint_to_rebuild(self, repo: pathlib.Path) -> None:
439 result = runner.invoke(cli, ["code", "index", "status"])
440 assert "muse code index rebuild" in result.output
441
442 def test_missing_repo_exits_nonzero(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
443 monkeypatch.chdir(tmp_path)
444 result = runner.invoke(cli, ["code", "index", "status"])
445 assert result.exit_code != 0
446
447
448 class TestIndexRebuildCLI:
449 def test_exit_zero(self, two_commit_repo: pathlib.Path) -> None:
450 result = runner.invoke(cli, ["code", "index", "rebuild"])
451 assert result.exit_code == 0
452
453 def test_json_top_level_keys(self, two_commit_repo: pathlib.Path) -> None:
454 data = _invoke_rebuild_json()
455 for key in ("schema", "dry_run", "rebuilt",
456 "symbol_history_addresses", "symbol_history_events",
457 "hash_occurrence_clusters", "hash_occurrence_addresses"):
458 assert key in data, f"Missing key {key!r}"
459
460 def test_json_dry_run_false_by_default(self, two_commit_repo: pathlib.Path) -> None:
461 data = _invoke_rebuild_json()
462 assert data["dry_run"] is False
463
464 def test_json_rebuilt_contains_both(self, two_commit_repo: pathlib.Path) -> None:
465 data = _invoke_rebuild_json()
466 assert set(data["rebuilt"]) == set(KNOWN_INDEX_NAMES)
467
468 def test_rebuild_writes_files(self, two_commit_repo: pathlib.Path) -> None:
469 runner.invoke(cli, ["code", "index", "rebuild"])
470 assert _index_exists(two_commit_repo, "symbol_history")
471 assert _index_exists(two_commit_repo, "hash_occurrence")
472
473 def test_dry_run_no_files_written(self, two_commit_repo: pathlib.Path) -> None:
474 result = runner.invoke(cli, ["code", "index", "rebuild", "--dry-run"])
475 assert result.exit_code == 0
476 assert not _index_exists(two_commit_repo, "symbol_history")
477 assert not _index_exists(two_commit_repo, "hash_occurrence")
478
479 def test_dry_run_json_flag(self, two_commit_repo: pathlib.Path) -> None:
480 data = _invoke_rebuild_json(["--dry-run"])
481 assert data["dry_run"] is True
482
483 def test_dry_run_counts_match_real_rebuild(self, two_commit_repo: pathlib.Path) -> None:
484 dry = _invoke_rebuild_json(["--dry-run"])
485 real = _invoke_rebuild_json()
486 assert dry["symbol_history_addresses"] == real["symbol_history_addresses"]
487 assert dry["symbol_history_events"] == real["symbol_history_events"]
488 assert dry["hash_occurrence_clusters"] == real["hash_occurrence_clusters"]
489
490 def test_index_symbol_history_only(self, two_commit_repo: pathlib.Path) -> None:
491 data = _invoke_rebuild_json(["--index", "symbol_history"])
492 assert data["rebuilt"] == ["symbol_history"]
493 assert _index_exists(two_commit_repo, "symbol_history")
494 assert not _index_exists(two_commit_repo, "hash_occurrence")
495
496 def test_index_hash_occurrence_only(self, two_commit_repo: pathlib.Path) -> None:
497 data = _invoke_rebuild_json(["--index", "hash_occurrence"])
498 assert data["rebuilt"] == ["hash_occurrence"]
499 assert not _index_exists(two_commit_repo, "symbol_history")
500 assert _index_exists(two_commit_repo, "hash_occurrence")
501
502 def test_text_output_no_files_on_dry_run(self, two_commit_repo: pathlib.Path) -> None:
503 result = runner.invoke(cli, ["code", "index", "rebuild", "--dry-run"])
504 assert "dry run" in result.output.lower()
505
506 def test_text_output_rebuild_references_status(self, two_commit_repo: pathlib.Path) -> None:
507 result = runner.invoke(cli, ["code", "index", "rebuild"])
508 assert "muse code index status" in result.output
509
510 def test_missing_repo_exits_nonzero(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
511 monkeypatch.chdir(tmp_path)
512 result = runner.invoke(cli, ["code", "index", "rebuild"])
513 assert result.exit_code != 0
514
515
516 class TestIndexPurgeCLI:
517 def test_exit_zero(self, two_commit_repo: pathlib.Path) -> None:
518 runner.invoke(cli, ["code", "index", "rebuild"])
519 result = runner.invoke(cli, ["code", "index", "purge"])
520 assert result.exit_code == 0
521
522 def test_json_schema(self, two_commit_repo: pathlib.Path) -> None:
523 runner.invoke(cli, ["code", "index", "rebuild"])
524 result = runner.invoke(cli, ["code", "index", "purge", "--json"])
525 assert result.exit_code == 0
526 data: _PurgePayload = json.loads(result.output)
527 assert "schema" in data
528 assert "purged" in data
529 assert "skipped" in data
530
531 def test_purge_all_deletes_files(self, two_commit_repo: pathlib.Path) -> None:
532 runner.invoke(cli, ["code", "index", "rebuild"])
533 runner.invoke(cli, ["code", "index", "purge"])
534 assert not _index_exists(two_commit_repo, "symbol_history")
535 assert not _index_exists(two_commit_repo, "hash_occurrence")
536
537 def test_purge_specific_index(self, two_commit_repo: pathlib.Path) -> None:
538 runner.invoke(cli, ["code", "index", "rebuild"])
539 result = runner.invoke(
540 cli, ["code", "index", "purge", "--index", "symbol_history", "--json"]
541 )
542 data: _PurgePayload = json.loads(result.output)
543 assert "symbol_history" in data["purged"]
544 assert not _index_exists(two_commit_repo, "symbol_history")
545 assert _index_exists(two_commit_repo, "hash_occurrence")
546
547 def test_purge_absent_shows_skipped(self, repo: pathlib.Path) -> None:
548 result = runner.invoke(cli, ["code", "index", "purge", "--json"])
549 data: _PurgePayload = json.loads(result.output)
550 assert data["purged"] == []
551 assert set(data["skipped"]) == set(KNOWN_INDEX_NAMES)
552
553 def test_purge_then_status_absent(self, two_commit_repo: pathlib.Path) -> None:
554 runner.invoke(cli, ["code", "index", "rebuild"])
555 runner.invoke(cli, ["code", "index", "purge"])
556 data = _invoke_status_json()
557 assert all(e["status"] == "absent" for e in data)
558
559 def test_missing_repo_exits_nonzero(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
560 monkeypatch.chdir(tmp_path)
561 result = runner.invoke(cli, ["code", "index", "purge"])
562 assert result.exit_code != 0
563
564
565 # ---------------------------------------------------------------------------
566 # E2E — real commit history interactions
567 # ---------------------------------------------------------------------------
568
569
570 class TestIndexE2E:
571 def test_status_shows_present_after_rebuild(self, two_commit_repo: pathlib.Path) -> None:
572 runner.invoke(cli, ["code", "index", "rebuild"])
573 data = _invoke_status_json()
574 for entry in data:
575 assert entry["status"] == "present", f"{entry['name']} still absent"
576
577 def test_status_entries_nonzero_after_rebuild(self, two_commit_repo: pathlib.Path) -> None:
578 runner.invoke(cli, ["code", "index", "rebuild"])
579 data = _invoke_status_json()
580 sym = next(e for e in data if e["name"] == "symbol_history")
581 assert sym["entries"] > 0
582
583 def test_symbol_history_contains_billing_compute(self, two_commit_repo: pathlib.Path) -> None:
584 idx = _build_symbol_history(two_commit_repo)
585 assert any("billing.py::compute" in addr for addr in idx)
586
587 def test_hash_occurrence_cluster_for_clones(self, clone_repo: pathlib.Path) -> None:
588 idx = _build_hash_occurrence(clone_repo)
589 assert len(idx) > 0
590
591 def test_rebuild_is_idempotent(self, two_commit_repo: pathlib.Path) -> None:
592 d1 = _invoke_rebuild_json()
593 d2 = _invoke_rebuild_json()
594 assert d1["symbol_history_addresses"] == d2["symbol_history_addresses"]
595 assert d1["symbol_history_events"] == d2["symbol_history_events"]
596 assert d1["hash_occurrence_clusters"] == d2["hash_occurrence_clusters"]
597
598 def test_purge_then_rebuild_restores_present(self, two_commit_repo: pathlib.Path) -> None:
599 runner.invoke(cli, ["code", "index", "rebuild"])
600 runner.invoke(cli, ["code", "index", "purge"])
601 runner.invoke(cli, ["code", "index", "rebuild"])
602 data = _invoke_status_json()
603 for entry in data:
604 assert entry["status"] == "present"
605
606 def test_purge_index_only_removes_targeted(self, two_commit_repo: pathlib.Path) -> None:
607 runner.invoke(cli, ["code", "index", "rebuild"])
608 runner.invoke(cli, ["code", "index", "purge", "--index", "hash_occurrence"])
609 data = _invoke_status_json()
610 sym = next(e for e in data if e["name"] == "symbol_history")
611 ho = next(e for e in data if e["name"] == "hash_occurrence")
612 assert sym["status"] == "present"
613 assert ho["status"] == "absent"
614
615 def test_dry_run_counts_match_real_rebuild(self, two_commit_repo: pathlib.Path) -> None:
616 dry = _invoke_rebuild_json(["--dry-run"])
617 real = _invoke_rebuild_json()
618 for key in ("symbol_history_addresses", "symbol_history_events",
619 "hash_occurrence_clusters", "hash_occurrence_addresses"):
620 assert dry.get(key) == real.get(key), f"Mismatch on {key}"
621
622
623 # ---------------------------------------------------------------------------
624 # Stress
625 # ---------------------------------------------------------------------------
626
627
628 class TestIndexStress:
629 def test_50_commit_rebuild_completes(self, repo: pathlib.Path) -> None:
630 """50 commits, each changing one function — rebuild must complete."""
631 for i in range(50):
632 (repo / "worker.py").write_text(f"def work():\n return {i}\n")
633 r = runner.invoke(cli, ["commit", "-m", f"v{i}"])
634 assert r.exit_code == 0, r.output
635
636 result = runner.invoke(cli, ["code", "index", "rebuild", "--json"])
637 assert result.exit_code == 0
638 data: _RebuildPayload = json.loads(result.output)
639 assert data.get("symbol_history_addresses", 0) > 0
640
641 def test_blob_cache_scales(self, repo: pathlib.Path) -> None:
642 """10 commits on 1 file: blob for each version fetched exactly once."""
643 for i in range(10):
644 (repo / "target.py").write_text(f"def fn():\n return {i}\n")
645 runner.invoke(cli, ["commit", "-m", f"v{i}"])
646
647 original_read = __import__(
648 "muse.core.object_store", fromlist=["read_object"]
649 ).read_object
650 fetch_log: list[str] = []
651
652 def tracked_read(root: pathlib.Path, obj_id: str) -> bytes | None:
653 fetch_log.append(obj_id)
654 result: bytes | None = original_read(root, obj_id)
655 return result
656
657 with mock.patch(
658 "muse.cli.commands.index_rebuild.read_object", side_effect=tracked_read
659 ):
660 _build_symbol_history(repo)
661
662 unique_ids = set(fetch_log)
663 # Every unique obj_id must appear exactly once
664 for obj_id in unique_ids:
665 assert fetch_log.count(obj_id) == 1, (
666 f"obj_id {obj_id[:8]}… fetched {fetch_log.count(obj_id)} times"
667 )
668
669 def test_large_flat_file_hash_occurrence(self, repo: pathlib.Path) -> None:
670 """200 unique functions: no hash_occurrence clusters (all distinct bodies)."""
671 funcs = "\n\n".join(f"def func_{i}():\n return {i}" for i in range(200))
672 (repo / "flat.py").write_text(f"{funcs}\n")
673 runner.invoke(cli, ["commit", "-m", "flat"])
674 idx = _build_hash_occurrence(repo)
675 # All distinct bodies → no clusters
676 assert len(idx) == 0
677
678 def test_rebuild_performance(self, repo: pathlib.Path) -> None:
679 """20 commits: rebuild must finish within 30 seconds."""
680 for i in range(20):
681 (repo / "perf.py").write_text(f"def work():\n return {i}\n")
682 runner.invoke(cli, ["commit", "-m", f"v{i}"])
683
684 start = time.monotonic()
685 result = runner.invoke(cli, ["code", "index", "rebuild"])
686 elapsed = time.monotonic() - start
687 assert result.exit_code == 0
688 assert elapsed < 30.0, f"rebuild took {elapsed:.1f}s — too slow"
689
690
691 class TestRegisterFlags:
692 def test_json_short_flag(self) -> None:
693 import argparse
694 from muse.cli.commands.index_rebuild import register
695 p = argparse.ArgumentParser()
696 subs = p.add_subparsers()
697 register(subs)
698 args = p.parse_args(["index", "rebuild", "-j"])
699 assert args.json_out is True
700
701 def test_json_long_flag(self) -> None:
702 import argparse
703 from muse.cli.commands.index_rebuild import register
704 p = argparse.ArgumentParser()
705 subs = p.add_subparsers()
706 register(subs)
707 args = p.parse_args(["index", "rebuild", "--json"])
708 assert args.json_out is True
709
710 def test_default_no_json(self) -> None:
711 import argparse
712 from muse.cli.commands.index_rebuild import register
713 p = argparse.ArgumentParser()
714 subs = p.add_subparsers()
715 register(subs)
716 args = p.parse_args(["index", "rebuild"])
717 assert args.json_out is False
File History 1 commit