gabriel / muse public
test_cmd_index.py python
726 lines 30.0 KB
Raw
1 """Tests for ``muse code index`` (status / rebuild / purge).
2
3 Coverage layers
4 ---------------
5 Unit
6 _build_symbol_history — empty repo, single commit with ops, manifest
7 cache (blob fetched once per obj_id), missing
8 manifest logged+skipped, no-op commits skipped,
9 SymbolCache consulted before read_object,
10 SymbolCache populated on miss.
11 _build_hash_occurrence — HEAD present, no HEAD (graceful empty), missing
12 manifest logged+empty, imports excluded, trivial
13 (size-1) entries excluded.
14 index_info — present, absent, corrupt states; entries is int.
15 purge_index — deletes existing file, returns False when absent,
16 raises ValueError for unknown name.
17
18 Integration (live repo, CliRunner)
19 status: exit-0, JSON keys + types, absent text, present text.
20 rebuild: exit-0, JSON schema, all counts, text output.
21 rebuild --dry-run: no files written, JSON dry_run=true, counts correct.
22 rebuild --index symbol_history: only that index rebuilt.
23 rebuild --index hash_occurrence: only that index rebuilt.
24 purge: exit-0, JSON schema, present deleted, absent skipped.
25 purge --index <name>: only named index deleted.
26 Invalid --index rejected by argparse (exit non-zero).
27 Missing repo exits non-zero.
28
29 E2E (real symbol changes across commits)
30 After rebuild, status shows both indexes as present with non-zero entries.
31 symbol_history entries reflect commit history (insert recorded).
32 hash_occurrence clusters > 0 when duplicate bodies exist.
33 Rebuild is idempotent: two consecutive rebuilds yield identical JSON.
34 Dry-run counts match a subsequent real rebuild.
35 Purge then status shows absent; rebuild restores present.
36 Purge --index only removes targeted index.
37
38 Stress
39 50-commit repo: rebuild completes, all symbol_history addresses > 0.
40 Manifest cache: blob fetched at most once per unique obj_id during rebuild.
41 Large flat file (200 functions): hash_occurrence correct after rebuild.
42 """
43
44 from __future__ import annotations
45
46 type _CountMap = dict[str, int]
47
48 import json
49 import pathlib
50 import textwrap
51 import time
52 from typing import TypedDict
53 from unittest import mock
54
55 import pytest
56 from tests.cli_test_helper import CliRunner
57
58 from muse.cli.commands.index_rebuild import _build_hash_occurrence, _build_symbol_history
59 from muse.core.indices import (
60 KNOWN_INDEX_NAMES,
61 IndexInfoEntry,
62 SymbolHistoryEntry,
63 index_info,
64 purge_index,
65 )
66 from muse.core.symbol_cache import SymbolCache
67 from muse.core.paths import indices_dir
68
69 # ---------------------------------------------------------------------------
70 # Runner
71 # ---------------------------------------------------------------------------
72
73 runner = CliRunner()
74 cli = None # CliRunner always targets muse.cli.app.main
75
76
77 # ---------------------------------------------------------------------------
78 # TypedDicts for JSON schema validation
79 # ---------------------------------------------------------------------------
80
81
82 class _StatusEntry(TypedDict):
83 name: str
84 status: str
85 entries: int
86 updated_at: str | None
87
88
89 class _RebuildPayload(TypedDict, total=False):
90 schema_version: str
91 dry_run: bool
92 rebuilt: list[str]
93 symbol_history_addresses: int
94 symbol_history_events: int
95 hash_occurrence_clusters: int
96 hash_occurrence_addresses: int
97
98
99 class _PurgePayload(TypedDict):
100 schema_version: str
101 purged: list[str]
102 skipped: list[str]
103
104
105 # ---------------------------------------------------------------------------
106 # Helpers
107 # ---------------------------------------------------------------------------
108
109
110 def _index_path(root: pathlib.Path, name: str) -> pathlib.Path:
111 return indices_dir(root) / f"{name}.json"
112
113
114 def _index_exists(root: pathlib.Path, name: str) -> bool:
115 return _index_path(root, name).exists()
116
117
118 def _invoke_rebuild_json(extra: list[str] | None = None) -> _RebuildPayload:
119 args = ["code", "index", "rebuild", "--json"] + (extra or [])
120 result = runner.invoke(cli, args)
121 assert result.exit_code == 0, result.output
122 out: _RebuildPayload = json.loads(result.output)
123 return out
124
125
126 def _invoke_status_json() -> list[_StatusEntry]:
127 result = runner.invoke(cli, ["code", "index", "status", "--json"])
128 assert result.exit_code == 0, result.output
129 payload = json.loads(result.output)
130 out: list[_StatusEntry] = payload["indexes"]
131 return out
132
133
134 # ---------------------------------------------------------------------------
135 # Fixtures
136 # ---------------------------------------------------------------------------
137
138
139 @pytest.fixture
140 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
141 monkeypatch.chdir(tmp_path)
142 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
143 result = runner.invoke(cli, ["init", "--domain", "code"])
144 assert result.exit_code == 0, result.output
145 return tmp_path
146
147
148 @pytest.fixture
149 def two_commit_repo(repo: pathlib.Path) -> pathlib.Path:
150 """Repo with two commits: v1 has one function, v2 replaces it."""
151 (repo / "billing.py").write_text(textwrap.dedent("""\
152 def compute(items):
153 return sum(items)
154 """))
155 runner.invoke(cli, ["code", "add", "."])
156 r1 = runner.invoke(cli, ["commit", "-m", "v1"])
157 assert r1.exit_code == 0, r1.output
158
159 (repo / "billing.py").write_text(textwrap.dedent("""\
160 def compute(items):
161 return sum(items) * 2
162 """))
163 runner.invoke(cli, ["code", "add", "."])
164 r2 = runner.invoke(cli, ["commit", "-m", "v2"])
165 assert r2.exit_code == 0, r2.output
166 return repo
167
168
169 @pytest.fixture
170 def clone_repo(repo: pathlib.Path) -> pathlib.Path:
171 """Repo with two files containing identical body → one hash_occurrence cluster."""
172 body = "def helper():\n return True\n"
173 (repo / "a.py").write_text(body)
174 (repo / "b.py").write_text(f"{body}\ndef other():\n pass\n")
175 runner.invoke(cli, ["code", "add", "."])
176 runner.invoke(cli, ["commit", "-m", "clones"])
177 return repo
178
179
180 # ---------------------------------------------------------------------------
181 # Unit — _build_symbol_history
182 # ---------------------------------------------------------------------------
183
184
185 class TestBuildSymbolHistory:
186 def test_empty_repo_returns_empty(self, repo: pathlib.Path) -> None:
187 idx = _build_symbol_history(repo)
188 assert isinstance(idx, dict)
189 assert len(idx) == 0
190
191 def test_after_commit_has_entries(self, two_commit_repo: pathlib.Path) -> None:
192 idx = _build_symbol_history(two_commit_repo)
193 assert len(idx) > 0
194
195 def test_address_contains_double_colon(self, two_commit_repo: pathlib.Path) -> None:
196 idx = _build_symbol_history(two_commit_repo)
197 assert all("::" in addr for addr in idx)
198
199 def test_entries_are_symbol_history_entry(self, two_commit_repo: pathlib.Path) -> None:
200 idx = _build_symbol_history(two_commit_repo)
201 for entries in idx.values():
202 for e in entries:
203 assert isinstance(e, SymbolHistoryEntry)
204
205 def test_missing_manifest_skipped_with_log(
206 self, two_commit_repo: pathlib.Path, caplog: pytest.LogCaptureFixture
207 ) -> None:
208 """Commits with missing snapshot manifests are logged and skipped."""
209 import logging
210 with caplog.at_level(logging.DEBUG, logger="muse.cli.commands.index_rebuild"):
211 with mock.patch(
212 "muse.cli.commands.index_rebuild.get_commit_snapshot_manifest",
213 return_value=None,
214 ):
215 idx = _build_symbol_history(two_commit_repo)
216 # All commits skipped → empty index
217 assert len(idx) == 0
218 assert any("Missing snapshot manifest" in r.message for r in caplog.records)
219
220 def test_manifest_cache_prevents_double_fetch(self, two_commit_repo: pathlib.Path) -> None:
221 """Each unique manifest (commit) is fetched at most once."""
222 original = __import__(
223 "muse.core.snapshots", fromlist=["get_commit_snapshot_manifest"]
224 ).get_commit_snapshot_manifest
225
226 call_counts: _CountMap = {}
227
228 def counting_fetch(root: pathlib.Path, commit_id: str) -> Manifest | None:
229 call_counts[commit_id] = call_counts.get(commit_id, 0) + 1
230 result: Manifest | None = original(root, commit_id)
231 return result
232
233 with mock.patch(
234 "muse.cli.commands.index_rebuild.get_commit_snapshot_manifest",
235 side_effect=counting_fetch,
236 ):
237 _build_symbol_history(two_commit_repo)
238
239 for commit_id, count in call_counts.items():
240 assert count == 1, (
241 f"Manifest for commit {commit_id[:8]} fetched {count} times — expected 1"
242 )
243
244 def test_blob_cache_prevents_double_parse(self, two_commit_repo: pathlib.Path) -> None:
245 """Each unique blob (obj_id) is read at most once within a single run."""
246 original_read = __import__(
247 "muse.core.object_store", fromlist=["read_object"]
248 ).read_object
249
250 obj_fetch_count: _CountMap = {}
251
252 def counting_read(root: pathlib.Path, obj_id: str) -> bytes | None:
253 obj_fetch_count[obj_id] = obj_fetch_count.get(obj_id, 0) + 1
254 result: bytes | None = original_read(root, obj_id)
255 return result
256
257 with mock.patch(
258 "muse.cli.commands.index_rebuild.read_object",
259 side_effect=counting_read,
260 ):
261 _build_symbol_history(two_commit_repo)
262
263 duplicates = {oid: n for oid, n in obj_fetch_count.items() if n > 1}
264 assert not duplicates, (
265 f"Blobs fetched more than once: {duplicates} — blob_cache not working"
266 )
267
268 def test_symbol_cache_consulted_before_read_object(
269 self, two_commit_repo: pathlib.Path
270 ) -> None:
271 """When SymbolCache has a hit, read_object is never called for that obj_id."""
272 from muse.core.object_store import read_object as real_read
273 from muse.core.snapshots import get_commit_snapshot_manifest
274
275 # Pre-populate a SymbolCache with every blob in every commit's manifest.
276 warm_cache = SymbolCache.empty()
277 from muse.core.commits import get_all_commits
278 from muse.plugins.code.ast_parser import parse_symbols as real_parse
279 from muse.plugins.code._query import is_semantic
280
281 for commit in get_all_commits(two_commit_repo):
282 manifest = get_commit_snapshot_manifest(two_commit_repo, commit.commit_id) or {}
283 for fp, oid in manifest.items():
284 if is_semantic(fp) and warm_cache.get(oid) is None:
285 raw = real_read(two_commit_repo, oid)
286 if raw is not None:
287 warm_cache.put(oid, real_parse(raw, fp))
288
289 read_calls: list[str] = []
290
291 def spy_read(root: pathlib.Path, obj_id: str) -> bytes | None:
292 read_calls.append(obj_id)
293 result: bytes | None = real_read(root, obj_id)
294 return result
295
296 with mock.patch("muse.cli.commands.index_rebuild.read_object", side_effect=spy_read):
297 _build_symbol_history(two_commit_repo, symbol_cache=warm_cache)
298
299 assert read_calls == [], (
300 f"read_object called {len(read_calls)} times despite warm SymbolCache"
301 )
302
303 def test_symbol_cache_populated_on_miss(self, two_commit_repo: pathlib.Path) -> None:
304 """A cold SymbolCache is populated during _build_symbol_history."""
305 cold_cache = SymbolCache.empty()
306 assert cold_cache.size == 0
307 _build_symbol_history(two_commit_repo, symbol_cache=cold_cache)
308 # Cache should have been populated with at least one entry.
309 assert cold_cache.size > 0
310
311
312 # ---------------------------------------------------------------------------
313 # Unit — _build_hash_occurrence
314 # ---------------------------------------------------------------------------
315
316
317 class TestBuildHashOccurrence:
318 def test_no_head_returns_empty(self, repo: pathlib.Path) -> None:
319 """No commits → no HEAD ref → gracefully returns empty dict."""
320 idx = _build_hash_occurrence(repo)
321 assert idx == {}
322
323 def test_single_function_not_a_clone(self, repo: pathlib.Path) -> None:
324 (repo / "solo.py").write_text("def unique():\n return 42\n")
325 runner.invoke(cli, ["code", "add", "."])
326 runner.invoke(cli, ["commit", "-m", "solo"])
327 idx = _build_hash_occurrence(repo)
328 # unique function appears only once → filtered out
329 assert all(len(addrs) > 1 for addrs in idx.values())
330
331 def test_identical_bodies_form_cluster(self, clone_repo: pathlib.Path) -> None:
332 idx = _build_hash_occurrence(clone_repo)
333 assert len(idx) > 0
334 # every cluster has ≥ 2 members
335 assert all(len(addrs) >= 2 for addrs in idx.values())
336
337 def test_imports_excluded(self, repo: pathlib.Path) -> None:
338 (repo / "mod.py").write_text("import os\nimport sys\ndef fn():\n return 1\n")
339 runner.invoke(cli, ["code", "add", "."])
340 runner.invoke(cli, ["commit", "-m", "imports"])
341 idx = _build_hash_occurrence(repo)
342 for addrs in idx.values():
343 for addr in addrs:
344 assert "::import::" not in addr
345
346 def test_missing_manifest_returns_empty(self, two_commit_repo: pathlib.Path) -> None:
347 with mock.patch(
348 "muse.cli.commands.index_rebuild.get_commit_snapshot_manifest",
349 return_value=None,
350 ):
351 idx = _build_hash_occurrence(two_commit_repo)
352 assert idx == {}
353
354
355 # ---------------------------------------------------------------------------
356 # Unit — index_info and purge_index
357 # ---------------------------------------------------------------------------
358
359
360 class TestIndexInfo:
361 def test_absent_before_rebuild(self, repo: pathlib.Path) -> None:
362 infos = index_info(repo)
363 assert len(infos) == len(KNOWN_INDEX_NAMES)
364 for info in infos:
365 assert info["status"] == "absent"
366
367 def test_entries_is_int_not_str(self, repo: pathlib.Path) -> None:
368 infos = index_info(repo)
369 for info in infos:
370 assert isinstance(info["entries"], int), (
371 f"{info['name']}.entries should be int, got {type(info['entries'])}"
372 )
373
374 def test_present_after_rebuild(self, two_commit_repo: pathlib.Path) -> None:
375 runner.invoke(cli, ["code", "index", "rebuild"])
376 infos = index_info(two_commit_repo)
377 for info in infos:
378 assert info["status"] == "present"
379
380 def test_corrupt_index_reported(self, repo: pathlib.Path) -> None:
381 (indices_dir(repo)).mkdir(parents=True, exist_ok=True)
382 _index_path(repo, "symbol_history").write_bytes(b"\xff\xfe corrupt garbage")
383 infos = index_info(repo)
384 sym = next(i for i in infos if i["name"] == "symbol_history")
385 assert sym["status"] == "corrupt"
386
387 def test_updated_at_is_none_when_absent(self, repo: pathlib.Path) -> None:
388 infos = index_info(repo)
389 for info in infos:
390 assert info["updated_at"] is None
391
392
393 class TestPurgeIndex:
394 def test_purge_existing_returns_true(self, two_commit_repo: pathlib.Path) -> None:
395 runner.invoke(cli, ["code", "index", "rebuild"])
396 assert _index_exists(two_commit_repo, "symbol_history")
397 result = purge_index(two_commit_repo, "symbol_history")
398 assert result is True
399 assert not _index_exists(two_commit_repo, "symbol_history")
400
401 def test_purge_absent_returns_false(self, repo: pathlib.Path) -> None:
402 result = purge_index(repo, "hash_occurrence")
403 assert result is False
404
405 def test_purge_unknown_name_raises(self, repo: pathlib.Path) -> None:
406 with pytest.raises(ValueError, match="Unknown index name"):
407 purge_index(repo, "nonexistent_index")
408
409
410 # ---------------------------------------------------------------------------
411 # Integration — CLI runner tests
412 # ---------------------------------------------------------------------------
413
414
415 class TestIndexStatusCLI:
416 def test_exit_zero(self, repo: pathlib.Path) -> None:
417 result = runner.invoke(cli, ["code", "index", "status"])
418 assert result.exit_code == 0
419
420 def test_json_is_list(self, repo: pathlib.Path) -> None:
421 result = runner.invoke(cli, ["code", "index", "status", "--json"])
422 assert result.exit_code == 0
423 payload = json.loads(result.output)
424 data = payload["indexes"]
425 assert isinstance(data, list)
426 assert len(data) == len(KNOWN_INDEX_NAMES)
427
428 def test_json_entry_keys(self, repo: pathlib.Path) -> None:
429 data = _invoke_status_json()
430 for entry in data:
431 for key in ("name", "status", "entries", "updated_at"):
432 assert key in entry, f"Missing key {key!r} in status entry"
433
434 def test_json_entries_is_int(self, repo: pathlib.Path) -> None:
435 data = _invoke_status_json()
436 for entry in data:
437 assert isinstance(entry["entries"], int)
438
439 def test_json_absent_status_before_rebuild(self, repo: pathlib.Path) -> None:
440 data = _invoke_status_json()
441 assert all(e["status"] == "absent" for e in data)
442
443 def test_text_contains_hint_to_rebuild(self, repo: pathlib.Path) -> None:
444 result = runner.invoke(cli, ["code", "index", "status"])
445 assert "muse code index rebuild" in result.output
446
447 def test_missing_repo_exits_nonzero(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
448 monkeypatch.chdir(tmp_path)
449 result = runner.invoke(cli, ["code", "index", "status"])
450 assert result.exit_code != 0
451
452
453 class TestIndexRebuildCLI:
454 def test_exit_zero(self, two_commit_repo: pathlib.Path) -> None:
455 result = runner.invoke(cli, ["code", "index", "rebuild"])
456 assert result.exit_code == 0
457
458 def test_json_top_level_keys(self, two_commit_repo: pathlib.Path) -> None:
459 data = _invoke_rebuild_json()
460 for key in ("schema", "dry_run", "rebuilt",
461 "symbol_history_addresses", "symbol_history_events",
462 "hash_occurrence_clusters", "hash_occurrence_addresses"):
463 assert key in data, f"Missing key {key!r}"
464
465 def test_json_dry_run_false_by_default(self, two_commit_repo: pathlib.Path) -> None:
466 data = _invoke_rebuild_json()
467 assert data["dry_run"] is False
468
469 def test_json_rebuilt_contains_both(self, two_commit_repo: pathlib.Path) -> None:
470 data = _invoke_rebuild_json()
471 assert set(data["rebuilt"]) == set(KNOWN_INDEX_NAMES)
472
473 def test_rebuild_writes_files(self, two_commit_repo: pathlib.Path) -> None:
474 runner.invoke(cli, ["code", "index", "rebuild"])
475 assert _index_exists(two_commit_repo, "symbol_history")
476 assert _index_exists(two_commit_repo, "hash_occurrence")
477
478 def test_dry_run_no_files_written(self, two_commit_repo: pathlib.Path) -> None:
479 result = runner.invoke(cli, ["code", "index", "rebuild", "--dry-run"])
480 assert result.exit_code == 0
481 assert not _index_exists(two_commit_repo, "symbol_history")
482 assert not _index_exists(two_commit_repo, "hash_occurrence")
483
484 def test_dry_run_json_flag(self, two_commit_repo: pathlib.Path) -> None:
485 data = _invoke_rebuild_json(["--dry-run"])
486 assert data["dry_run"] is True
487
488 def test_dry_run_counts_match_real_rebuild(self, two_commit_repo: pathlib.Path) -> None:
489 dry = _invoke_rebuild_json(["--dry-run"])
490 real = _invoke_rebuild_json()
491 assert dry["symbol_history_addresses"] == real["symbol_history_addresses"]
492 assert dry["symbol_history_events"] == real["symbol_history_events"]
493 assert dry["hash_occurrence_clusters"] == real["hash_occurrence_clusters"]
494
495 def test_index_symbol_history_only(self, two_commit_repo: pathlib.Path) -> None:
496 data = _invoke_rebuild_json(["--index", "symbol_history"])
497 assert data["rebuilt"] == ["symbol_history"]
498 assert _index_exists(two_commit_repo, "symbol_history")
499 assert not _index_exists(two_commit_repo, "hash_occurrence")
500
501 def test_index_hash_occurrence_only(self, two_commit_repo: pathlib.Path) -> None:
502 data = _invoke_rebuild_json(["--index", "hash_occurrence"])
503 assert data["rebuilt"] == ["hash_occurrence"]
504 assert not _index_exists(two_commit_repo, "symbol_history")
505 assert _index_exists(two_commit_repo, "hash_occurrence")
506
507 def test_text_output_no_files_on_dry_run(self, two_commit_repo: pathlib.Path) -> None:
508 result = runner.invoke(cli, ["code", "index", "rebuild", "--dry-run"])
509 assert "dry run" in result.output.lower()
510
511 def test_text_output_rebuild_references_status(self, two_commit_repo: pathlib.Path) -> None:
512 result = runner.invoke(cli, ["code", "index", "rebuild"])
513 assert "muse code index status" in result.output
514
515 def test_missing_repo_exits_nonzero(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
516 monkeypatch.chdir(tmp_path)
517 result = runner.invoke(cli, ["code", "index", "rebuild"])
518 assert result.exit_code != 0
519
520
521 class TestIndexPurgeCLI:
522 def test_exit_zero(self, two_commit_repo: pathlib.Path) -> None:
523 runner.invoke(cli, ["code", "index", "rebuild"])
524 result = runner.invoke(cli, ["code", "index", "purge"])
525 assert result.exit_code == 0
526
527 def test_json_schema(self, two_commit_repo: pathlib.Path) -> None:
528 runner.invoke(cli, ["code", "index", "rebuild"])
529 result = runner.invoke(cli, ["code", "index", "purge", "--json"])
530 assert result.exit_code == 0
531 data: _PurgePayload = json.loads(result.output)
532 assert "schema" in data
533 assert "purged" in data
534 assert "skipped" in data
535
536 def test_purge_all_deletes_files(self, two_commit_repo: pathlib.Path) -> None:
537 runner.invoke(cli, ["code", "index", "rebuild"])
538 runner.invoke(cli, ["code", "index", "purge"])
539 assert not _index_exists(two_commit_repo, "symbol_history")
540 assert not _index_exists(two_commit_repo, "hash_occurrence")
541
542 def test_purge_specific_index(self, two_commit_repo: pathlib.Path) -> None:
543 runner.invoke(cli, ["code", "index", "rebuild"])
544 result = runner.invoke(
545 cli, ["code", "index", "purge", "--index", "symbol_history", "--json"]
546 )
547 data: _PurgePayload = json.loads(result.output)
548 assert "symbol_history" in data["purged"]
549 assert not _index_exists(two_commit_repo, "symbol_history")
550 assert _index_exists(two_commit_repo, "hash_occurrence")
551
552 def test_purge_absent_shows_skipped(self, repo: pathlib.Path) -> None:
553 result = runner.invoke(cli, ["code", "index", "purge", "--json"])
554 data: _PurgePayload = json.loads(result.output)
555 assert data["purged"] == []
556 assert set(data["skipped"]) == set(KNOWN_INDEX_NAMES)
557
558 def test_purge_then_status_absent(self, two_commit_repo: pathlib.Path) -> None:
559 runner.invoke(cli, ["code", "index", "rebuild"])
560 runner.invoke(cli, ["code", "index", "purge"])
561 data = _invoke_status_json()
562 assert all(e["status"] == "absent" for e in data)
563
564 def test_missing_repo_exits_nonzero(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
565 monkeypatch.chdir(tmp_path)
566 result = runner.invoke(cli, ["code", "index", "purge"])
567 assert result.exit_code != 0
568
569
570 # ---------------------------------------------------------------------------
571 # E2E — real commit history interactions
572 # ---------------------------------------------------------------------------
573
574
575 class TestIndexE2E:
576 def test_status_shows_present_after_rebuild(self, two_commit_repo: pathlib.Path) -> None:
577 runner.invoke(cli, ["code", "index", "rebuild"])
578 data = _invoke_status_json()
579 for entry in data:
580 assert entry["status"] == "present", f"{entry['name']} still absent"
581
582 def test_status_entries_nonzero_after_rebuild(self, two_commit_repo: pathlib.Path) -> None:
583 runner.invoke(cli, ["code", "index", "rebuild"])
584 data = _invoke_status_json()
585 sym = next(e for e in data if e["name"] == "symbol_history")
586 assert sym["entries"] > 0
587
588 def test_symbol_history_contains_billing_compute(self, two_commit_repo: pathlib.Path) -> None:
589 idx = _build_symbol_history(two_commit_repo)
590 assert any("billing.py::compute" in addr for addr in idx)
591
592 def test_hash_occurrence_cluster_for_clones(self, clone_repo: pathlib.Path) -> None:
593 idx = _build_hash_occurrence(clone_repo)
594 assert len(idx) > 0
595
596 def test_rebuild_is_idempotent(self, two_commit_repo: pathlib.Path) -> None:
597 d1 = _invoke_rebuild_json()
598 d2 = _invoke_rebuild_json()
599 assert d1["symbol_history_addresses"] == d2["symbol_history_addresses"]
600 assert d1["symbol_history_events"] == d2["symbol_history_events"]
601 assert d1["hash_occurrence_clusters"] == d2["hash_occurrence_clusters"]
602
603 def test_purge_then_rebuild_restores_present(self, two_commit_repo: pathlib.Path) -> None:
604 runner.invoke(cli, ["code", "index", "rebuild"])
605 runner.invoke(cli, ["code", "index", "purge"])
606 runner.invoke(cli, ["code", "index", "rebuild"])
607 data = _invoke_status_json()
608 for entry in data:
609 assert entry["status"] == "present"
610
611 def test_purge_index_only_removes_targeted(self, two_commit_repo: pathlib.Path) -> None:
612 runner.invoke(cli, ["code", "index", "rebuild"])
613 runner.invoke(cli, ["code", "index", "purge", "--index", "hash_occurrence"])
614 data = _invoke_status_json()
615 sym = next(e for e in data if e["name"] == "symbol_history")
616 ho = next(e for e in data if e["name"] == "hash_occurrence")
617 assert sym["status"] == "present"
618 assert ho["status"] == "absent"
619
620 def test_dry_run_counts_match_real_rebuild(self, two_commit_repo: pathlib.Path) -> None:
621 dry = _invoke_rebuild_json(["--dry-run"])
622 real = _invoke_rebuild_json()
623 for key in ("symbol_history_addresses", "symbol_history_events",
624 "hash_occurrence_clusters", "hash_occurrence_addresses"):
625 assert dry.get(key) == real.get(key), f"Mismatch on {key}"
626
627
628 # ---------------------------------------------------------------------------
629 # Stress
630 # ---------------------------------------------------------------------------
631
632
633 class TestIndexStress:
634 def test_50_commit_rebuild_completes(self, repo: pathlib.Path) -> None:
635 """50 commits, each changing one function — rebuild must complete."""
636 for i in range(50):
637 (repo / "worker.py").write_text(f"def work():\n return {i}\n")
638 runner.invoke(cli, ["code", "add", "."])
639 r = runner.invoke(cli, ["commit", "-m", f"v{i}"])
640 assert r.exit_code == 0, r.output
641
642 result = runner.invoke(cli, ["code", "index", "rebuild", "--json"])
643 assert result.exit_code == 0
644 data: _RebuildPayload = json.loads(result.output)
645 assert data.get("symbol_history_addresses", 0) > 0
646
647 def test_blob_cache_scales(self, repo: pathlib.Path) -> None:
648 """10 commits on 1 file: blob for each version fetched exactly once."""
649 for i in range(10):
650 (repo / "target.py").write_text(f"def fn():\n return {i}\n")
651 runner.invoke(cli, ["code", "add", "."])
652 runner.invoke(cli, ["commit", "-m", f"v{i}"])
653
654 original_read = __import__(
655 "muse.core.object_store", fromlist=["read_object"]
656 ).read_object
657 fetch_log: list[str] = []
658
659 def tracked_read(root: pathlib.Path, obj_id: str) -> bytes | None:
660 fetch_log.append(obj_id)
661 result: bytes | None = original_read(root, obj_id)
662 return result
663
664 with mock.patch(
665 "muse.cli.commands.index_rebuild.read_object", side_effect=tracked_read
666 ):
667 _build_symbol_history(repo)
668
669 unique_ids = set(fetch_log)
670 # Every unique obj_id must appear exactly once
671 for obj_id in unique_ids:
672 assert fetch_log.count(obj_id) == 1, (
673 f"obj_id {obj_id[:8]}… fetched {fetch_log.count(obj_id)} times"
674 )
675
676 def test_large_flat_file_hash_occurrence(self, repo: pathlib.Path) -> None:
677 """200 unique functions: no hash_occurrence clusters (all distinct bodies)."""
678 funcs = "\n\n".join(f"def func_{i}():\n return {i}" for i in range(200))
679 (repo / "flat.py").write_text(f"{funcs}\n")
680 runner.invoke(cli, ["code", "add", "."])
681 runner.invoke(cli, ["commit", "-m", "flat"])
682 idx = _build_hash_occurrence(repo)
683 # All distinct bodies → no clusters
684 assert len(idx) == 0
685
686 def test_rebuild_performance(self, repo: pathlib.Path) -> None:
687 """20 commits: rebuild must finish within 30 seconds."""
688 for i in range(20):
689 (repo / "perf.py").write_text(f"def work():\n return {i}\n")
690 runner.invoke(cli, ["code", "add", "."])
691 runner.invoke(cli, ["commit", "-m", f"v{i}"])
692
693 start = time.monotonic()
694 result = runner.invoke(cli, ["code", "index", "rebuild"])
695 elapsed = time.monotonic() - start
696 assert result.exit_code == 0
697 assert elapsed < 30.0, f"rebuild took {elapsed:.1f}s — too slow"
698
699
700 class TestRegisterFlags:
701 def test_json_short_flag(self) -> None:
702 import argparse
703 from muse.cli.commands.index_rebuild import register
704 p = argparse.ArgumentParser()
705 subs = p.add_subparsers()
706 register(subs)
707 args = p.parse_args(["index", "rebuild", "-j"])
708 assert args.json_out is True
709
710 def test_json_long_flag(self) -> None:
711 import argparse
712 from muse.cli.commands.index_rebuild import register
713 p = argparse.ArgumentParser()
714 subs = p.add_subparsers()
715 register(subs)
716 args = p.parse_args(["index", "rebuild", "--json"])
717 assert args.json_out is True
718
719 def test_default_no_json(self) -> None:
720 import argparse
721 from muse.cli.commands.index_rebuild import register
722 p = argparse.ArgumentParser()
723 subs = p.add_subparsers()
724 register(subs)
725 args = p.parse_args(["index", "rebuild"])
726 assert args.json_out is False
File History 1 commit