gabriel / muse public
test_perf_extreme_code_porcelain.py python
384 lines 13.1 KB
Raw
sha256:c5131d76c6eada02939111fda4aa8e51b0c1456b9983727cfd6be101916de14e merge: pull local/dev — resolve trivial _EXT_MAP symbol con… Sonnet 4.6 patch 12 days ago
1 """Extreme performance tests for Muse code domain porcelain commands.
2
3 Builds a large synthetic repository (100 Python files, 100 commits, ~500
4 symbols per snapshot) and enforces per-command wall-clock budgets. These
5 are intentionally generous: the goal is to catch commands that have O(N²)
6 or worse scaling, not to micro-optimise.
7
8 Tiered budgets
9 --------------
10 Fast (< 5 s): commands that touch only the current snapshot or a small index
11 Medium (< 15 s): commands that walk history but have bounded output
12 Slow (< 45 s): commands that do deep analysis across the full commit graph
13
14 The repo fixture is built once per module (session-scoped) so it is shared
15 across all tests to avoid the dominant cost being fixture creation.
16
17 Note: these tests are marked `perf` — run them explicitly with
18 pytest tests/test_perf_extreme_code_porcelain.py -v -m perf
19 to avoid slowing the standard CI gate.
20 """
21
22 from __future__ import annotations
23
24 import datetime
25 import json
26 import pathlib
27 import time
28 import pytest
29
30 from muse.core.types import fake_id, blob_id
31 from muse.core.object_store import write_object as _write_obj_store
32 from muse.core.paths import heads_dir, muse_dir
33 from tests.cli_test_helper import CliRunner
34
35 cli = None
36 runner = CliRunner()
37
38 # ---------------------------------------------------------------------------
39 # Perf marker — tests can be excluded with `-m "not perf"` on slow CI hosts.
40 # ---------------------------------------------------------------------------
41 pytestmark = pytest.mark.perf
42
43 _FAST_S: float = 5.0
44 _MEDIUM_S: float = 15.0
45 _SLOW_S: float = 45.0
46
47 _N_FILES: int = 100
48 _N_COMMITS: int = 100
49 _SYMBOLS_PER_FILE: int = 5
50
51
52 # ---------------------------------------------------------------------------
53 # Large repo fixture
54 # ---------------------------------------------------------------------------
55
56 def _env(root: pathlib.Path) -> Manifest:
57 return {"MUSE_REPO_ROOT": str(root)}
58
59
60 def _store_object(root: pathlib.Path, content: bytes) -> str:
61 oid = blob_id(content)
62 _write_obj_store(root, oid, content)
63 return oid
64
65
66 def _make_py_source(file_idx: int, commit_idx: int) -> bytes:
67 """Generate a unique Python source file with _SYMBOLS_PER_FILE functions."""
68 lines = [f"# file {file_idx} commit {commit_idx}\n"]
69 for sym_idx in range(_SYMBOLS_PER_FILE):
70 lines.append(
71 f"def func_{file_idx}_{sym_idx}():\n"
72 f" return {file_idx * 1000 + sym_idx * 100 + commit_idx}\n\n"
73 )
74 return "".join(lines).encode()
75
76
77 @pytest.fixture(scope="module")
78 def large_repo(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Path:
79 """Build a {_N_FILES}-file × {_N_COMMITS}-commit repo.
80
81 Layout:
82 - 100 Python source files (src/file_00.py … src/file_99.py)
83 - 100 commits; each commit mutates a rotating subset of files (10 per commit)
84 - Total symbols ≈ 100 × 5 × 100 = 50 000 symbol-commit entries in the index
85 """
86 root = tmp_path_factory.mktemp("large_repo")
87 dot_muse = muse_dir(root)
88 dot_muse.mkdir()
89 repo_id = fake_id("repo")
90 (dot_muse / "repo.json").write_text(
91 json.dumps({
92 "repo_id": repo_id,
93 "domain": "code",
94 "default_branch": "main",
95 "created_at": "2025-01-01T00:00:00+00:00",
96 }),
97 encoding="utf-8",
98 )
99 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
100 (dot_muse / "refs" / "heads").mkdir(parents=True)
101 (dot_muse / "snapshots").mkdir()
102 (dot_muse / "commits").mkdir()
103 (dot_muse / "objects").mkdir()
104 (root / "src").mkdir()
105
106 from muse.core.commits import (
107 CommitRecord,
108 write_commit,
109 )
110 from muse.core.snapshots import (
111 SnapshotRecord,
112 write_snapshot,
113 )
114 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
115
116 # Current manifest: maps file_path → object_id.
117 manifest: Manifest = {}
118 parent_id: str | None = None
119 ref_file = heads_dir(root) / "main"
120
121 for commit_idx in range(_N_COMMITS):
122 # Each commit touches 10 files (rotating window).
123 changed_files = [commit_idx % _N_FILES + i for i in range(10)]
124 changed_files = [f % _N_FILES for f in changed_files]
125 for file_idx in changed_files:
126 src = _make_py_source(file_idx, commit_idx)
127 oid = _store_object(root, src)
128 rel_path = f"src/file_{file_idx:02d}.py"
129 manifest[rel_path] = oid
130 (root / rel_path).write_bytes(src)
131
132 snap_id = compute_snapshot_id(dict(manifest))
133 committed_at = datetime.datetime(
134 2025, 1, 1, tzinfo=datetime.timezone.utc
135 ) + datetime.timedelta(hours=commit_idx)
136 msg = f"commit {commit_idx:04d}: rotate {len(changed_files)} files"
137 commit_id = compute_commit_id(
138 parent_ids=[parent_id] if parent_id else [],
139 snapshot_id=snap_id,
140 message=msg,
141 committed_at_iso=committed_at.isoformat(),
142 )
143 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=dict(manifest)))
144 write_commit(root, CommitRecord(
145 commit_id=commit_id,
146 branch="main",
147 snapshot_id=snap_id,
148 message=msg,
149 committed_at=committed_at,
150 parent_commit_id=parent_id,
151 ))
152 ref_file.parent.mkdir(parents=True, exist_ok=True)
153 ref_file.write_text(commit_id, encoding="utf-8")
154 parent_id = commit_id
155
156 return root
157
158
159 # ---------------------------------------------------------------------------
160 # Timing helper
161 # ---------------------------------------------------------------------------
162
163 def _run_timed(root: pathlib.Path, args: list[str], budget_s: float) -> None:
164 t0 = time.monotonic()
165 r = runner.invoke(cli, args, env=_env(root))
166 elapsed = time.monotonic() - t0
167 assert elapsed < budget_s, (
168 f"Command {args[:4]} took {elapsed:.2f}s > budget {budget_s}s on "
169 f"the {_N_FILES}-file × {_N_COMMITS}-commit repo"
170 )
171 assert r.exception is None, (
172 f"Command raised unexpectedly: {r.exception}\n{r.output[-500:]}"
173 )
174
175
176 # ---------------------------------------------------------------------------
177 # Fast-tier tests (< _FAST_S seconds)
178 # ---------------------------------------------------------------------------
179
180 class TestFastTierPerf:
181 """Commands that touch only the current snapshot or a small index."""
182
183 def test_symbols_perf(self, large_repo: pathlib.Path) -> None:
184 _run_timed(large_repo, ["code", "symbols", "--json"], _FAST_S)
185
186 def test_grep_perf(self, large_repo: pathlib.Path) -> None:
187 _run_timed(large_repo, ["code", "grep", "func_0", "--json"], _FAST_S)
188
189 def test_query_perf(self, large_repo: pathlib.Path) -> None:
190 _run_timed(large_repo, ["code", "query", "kind=function", "--json"], _FAST_S)
191
192 def test_cat_perf(self, large_repo: pathlib.Path) -> None:
193 _run_timed(
194 large_repo, ["code", "cat", "src/file_00.py::func_0_0", "--json"], _FAST_S
195 )
196
197 def test_languages_perf(self, large_repo: pathlib.Path) -> None:
198 _run_timed(large_repo, ["code", "languages", "--json"], _FAST_S)
199
200 def test_api_surface_perf(self, large_repo: pathlib.Path) -> None:
201 _run_timed(large_repo, ["code", "api-surface", "--json"], _FAST_S)
202
203 def test_deps_perf(self, large_repo: pathlib.Path) -> None:
204 _run_timed(large_repo, ["code", "deps", "src/file_00.py", "--json"], _FAST_S)
205
206 def test_impact_perf(self, large_repo: pathlib.Path) -> None:
207 _run_timed(
208 large_repo,
209 ["code", "impact", "src/file_00.py::func_0_0", "--json"],
210 _FAST_S,
211 )
212
213 def test_breakage_perf(self, large_repo: pathlib.Path) -> None:
214 _run_timed(large_repo, ["code", "breakage", "--json"], _FAST_S)
215
216
217 # ---------------------------------------------------------------------------
218 # Medium-tier tests (< _MEDIUM_S seconds)
219 # ---------------------------------------------------------------------------
220
221 class TestMediumTierPerf:
222 """Commands that walk history but have bounded output size."""
223
224 def test_hotspots_perf(self, large_repo: pathlib.Path) -> None:
225 _run_timed(
226 large_repo,
227 ["code", "hotspots", "--top", "20", "--max-commits", "50", "--json"],
228 _MEDIUM_S,
229 )
230
231 def test_stable_perf(self, large_repo: pathlib.Path) -> None:
232 _run_timed(
233 large_repo, ["code", "stable", "--top", "20", "--json"], _MEDIUM_S
234 )
235
236 def test_coupling_perf(self, large_repo: pathlib.Path) -> None:
237 _run_timed(
238 large_repo,
239 ["code", "coupling", "--top", "20", "--min", "2", "--json"],
240 _MEDIUM_S,
241 )
242
243 def test_blast_risk_perf(self, large_repo: pathlib.Path) -> None:
244 _run_timed(
245 large_repo,
246 ["code", "blast-risk", "--top", "10", "--max-commits", "30", "--json"],
247 _MEDIUM_S,
248 )
249
250 def test_age_perf(self, large_repo: pathlib.Path) -> None:
251 _run_timed(
252 large_repo,
253 [
254 "code", "age", "src/file_00.py::func_0_0",
255 "--max-commits", "30", "--json",
256 ],
257 _MEDIUM_S,
258 )
259
260 def test_velocity_perf(self, large_repo: pathlib.Path) -> None:
261 _run_timed(
262 large_repo,
263 ["code", "velocity", "--top", "10", "--max-commits", "30", "--json"],
264 _MEDIUM_S,
265 )
266
267 def test_entangle_perf(self, large_repo: pathlib.Path) -> None:
268 _run_timed(
269 large_repo,
270 ["code", "entangle", "--top", "10", "--max-commits", "30", "--json"],
271 _MEDIUM_S,
272 )
273
274 def test_find_symbol_perf(self, large_repo: pathlib.Path) -> None:
275 _run_timed(
276 large_repo,
277 ["code", "find-symbol", "--name", "func_0_0", "--limit", "50", "--json"],
278 _MEDIUM_S,
279 )
280
281 def test_symbol_log_perf(self, large_repo: pathlib.Path) -> None:
282 _run_timed(
283 large_repo,
284 ["code", "symbol-log", "src/file_00.py::func_0_0", "--max", "30", "--json"],
285 _MEDIUM_S,
286 )
287
288 def test_blame_perf(self, large_repo: pathlib.Path) -> None:
289 _run_timed(
290 large_repo,
291 ["code", "blame", "src/file_00.py::func_0_0", "--max", "30", "--json"],
292 _MEDIUM_S,
293 )
294
295 def test_detect_refactor_perf(self, large_repo: pathlib.Path) -> None:
296 _run_timed(
297 large_repo,
298 ["code", "detect-refactor", "--max-commits", "30", "--json"],
299 _MEDIUM_S,
300 )
301
302 def test_compare_perf(self, large_repo: pathlib.Path) -> None:
303 _run_timed(
304 large_repo, ["code", "compare", "HEAD~10", "HEAD", "--json"], _MEDIUM_S
305 )
306
307 def test_predict_perf(self, large_repo: pathlib.Path) -> None:
308 _run_timed(
309 large_repo,
310 ["code", "predict", "--top", "10", "--max-commits", "30", "--json"],
311 _MEDIUM_S,
312 )
313
314
315 # ---------------------------------------------------------------------------
316 # Slow-tier tests (< _SLOW_S seconds)
317 # ---------------------------------------------------------------------------
318
319 class TestSlowTierPerf:
320 """Commands that do deep graph analysis or full-history traversal."""
321
322 def test_narrative_perf(self, large_repo: pathlib.Path) -> None:
323 _run_timed(
324 large_repo,
325 [
326 "code", "narrative", "src/file_00.py::func_0_0",
327 "--max-commits", "50", "--json",
328 ],
329 _SLOW_S,
330 )
331
332 def test_gravity_perf(self, large_repo: pathlib.Path) -> None:
333 _run_timed(
334 large_repo,
335 [
336 "code", "gravity", "src/file_00.py::func_0_0",
337 "--max-commits", "30", "--json",
338 ],
339 _SLOW_S,
340 )
341
342 def test_contract_perf(self, large_repo: pathlib.Path) -> None:
343 _run_timed(
344 large_repo,
345 [
346 "code", "contract", "src/file_00.py::func_0_0",
347 "--max-commits", "30", "--json",
348 ],
349 _SLOW_S,
350 )
351
352 def test_dead_perf(self, large_repo: pathlib.Path) -> None:
353 _run_timed(
354 large_repo, ["code", "dead", "--workers", "4", "--json"], _SLOW_S
355 )
356
357 def test_codemap_perf(self, large_repo: pathlib.Path) -> None:
358 _run_timed(
359 large_repo, ["code", "codemap", "--top", "30", "--json"], _SLOW_S
360 )
361
362 def test_clones_perf(self, large_repo: pathlib.Path) -> None:
363 _run_timed(large_repo, ["code", "clones", "--json"], _SLOW_S)
364
365 def test_semantic_test_coverage_perf(self, large_repo: pathlib.Path) -> None:
366 _run_timed(
367 large_repo,
368 ["code", "semantic-test-coverage", "--max-commits", "30", "--json"],
369 _SLOW_S,
370 )
371
372 def test_lineage_perf(self, large_repo: pathlib.Path) -> None:
373 _run_timed(
374 large_repo,
375 ["code", "lineage", "src/file_00.py::func_0_0", "--json"],
376 _SLOW_S,
377 )
378
379 def test_coverage_perf(self, large_repo: pathlib.Path) -> None:
380 _run_timed(
381 large_repo,
382 ["code", "coverage", "src/file_00.py::func_0_0", "--json"],
383 _SLOW_S,
384 )
File History 5 commits
sha256:c5131d76c6eada02939111fda4aa8e51b0c1456b9983727cfd6be101916de14e merge: pull local/dev — resolve trivial _EXT_MAP symbol con… Sonnet 4.6 patch 12 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 19 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago