"""Extreme performance tests for Muse code domain porcelain commands. Builds a large synthetic repository (100 Python files, 100 commits, ~500 symbols per snapshot) and enforces per-command wall-clock budgets. These are intentionally generous: the goal is to catch commands that have O(N²) or worse scaling, not to micro-optimise. Tiered budgets -------------- Fast (< 5 s): commands that touch only the current snapshot or a small index Medium (< 15 s): commands that walk history but have bounded output Slow (< 45 s): commands that do deep analysis across the full commit graph The repo fixture is built once per module (session-scoped) so it is shared across all tests to avoid the dominant cost being fixture creation. Note: these tests are marked `perf` — run them explicitly with pytest tests/test_perf_extreme_code_porcelain.py -v -m perf to avoid slowing the standard CI gate. """ from __future__ import annotations import datetime import json import pathlib import time import pytest from muse.core.types import fake_id, blob_id from muse.core.object_store import write_object as _write_obj_store from muse.core.paths import heads_dir, muse_dir from tests.cli_test_helper import CliRunner cli = None runner = CliRunner() # --------------------------------------------------------------------------- # Perf marker — tests can be excluded with `-m "not perf"` on slow CI hosts. # --------------------------------------------------------------------------- pytestmark = pytest.mark.perf _FAST_S: float = 5.0 _MEDIUM_S: float = 15.0 _SLOW_S: float = 45.0 _N_FILES: int = 100 _N_COMMITS: int = 100 _SYMBOLS_PER_FILE: int = 5 # --------------------------------------------------------------------------- # Large repo fixture # --------------------------------------------------------------------------- def _env(root: pathlib.Path) -> Manifest: return {"MUSE_REPO_ROOT": str(root)} def _store_object(root: pathlib.Path, content: bytes) -> str: oid = blob_id(content) _write_obj_store(root, oid, content) return oid def _make_py_source(file_idx: int, commit_idx: int) -> bytes: """Generate a unique Python source file with _SYMBOLS_PER_FILE functions.""" lines = [f"# file {file_idx} commit {commit_idx}\n"] for sym_idx in range(_SYMBOLS_PER_FILE): lines.append( f"def func_{file_idx}_{sym_idx}():\n" f" return {file_idx * 1000 + sym_idx * 100 + commit_idx}\n\n" ) return "".join(lines).encode() @pytest.fixture(scope="module") def large_repo(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Path: """Build a {_N_FILES}-file × {_N_COMMITS}-commit repo. Layout: - 100 Python source files (src/file_00.py … src/file_99.py) - 100 commits; each commit mutates a rotating subset of files (10 per commit) - Total symbols ≈ 100 × 5 × 100 = 50 000 symbol-commit entries in the index """ root = tmp_path_factory.mktemp("large_repo") dot_muse = muse_dir(root) dot_muse.mkdir() repo_id = fake_id("repo") (dot_muse / "repo.json").write_text( json.dumps({ "repo_id": repo_id, "domain": "code", "default_branch": "main", "created_at": "2025-01-01T00:00:00+00:00", }), encoding="utf-8", ) (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") (dot_muse / "refs" / "heads").mkdir(parents=True) (dot_muse / "snapshots").mkdir() (dot_muse / "commits").mkdir() (dot_muse / "objects").mkdir() (root / "src").mkdir() from muse.core.commits import ( CommitRecord, write_commit, ) from muse.core.snapshots import ( SnapshotRecord, write_snapshot, ) from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id # Current manifest: maps file_path → object_id. manifest: Manifest = {} parent_id: str | None = None ref_file = heads_dir(root) / "main" for commit_idx in range(_N_COMMITS): # Each commit touches 10 files (rotating window). changed_files = [commit_idx % _N_FILES + i for i in range(10)] changed_files = [f % _N_FILES for f in changed_files] for file_idx in changed_files: src = _make_py_source(file_idx, commit_idx) oid = _store_object(root, src) rel_path = f"src/file_{file_idx:02d}.py" manifest[rel_path] = oid (root / rel_path).write_bytes(src) snap_id = compute_snapshot_id(dict(manifest)) committed_at = datetime.datetime( 2025, 1, 1, tzinfo=datetime.timezone.utc ) + datetime.timedelta(hours=commit_idx) msg = f"commit {commit_idx:04d}: rotate {len(changed_files)} files" commit_id = compute_commit_id( parent_ids=[parent_id] if parent_id else [], snapshot_id=snap_id, message=msg, committed_at_iso=committed_at.isoformat(), ) write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=dict(manifest))) write_commit(root, CommitRecord( commit_id=commit_id, branch="main", snapshot_id=snap_id, message=msg, committed_at=committed_at, parent_commit_id=parent_id, )) ref_file.parent.mkdir(parents=True, exist_ok=True) ref_file.write_text(commit_id, encoding="utf-8") parent_id = commit_id return root # --------------------------------------------------------------------------- # Timing helper # --------------------------------------------------------------------------- def _run_timed(root: pathlib.Path, args: list[str], budget_s: float) -> None: t0 = time.monotonic() r = runner.invoke(cli, args, env=_env(root)) elapsed = time.monotonic() - t0 assert elapsed < budget_s, ( f"Command {args[:4]} took {elapsed:.2f}s > budget {budget_s}s on " f"the {_N_FILES}-file × {_N_COMMITS}-commit repo" ) assert r.exception is None, ( f"Command raised unexpectedly: {r.exception}\n{r.output[-500:]}" ) # --------------------------------------------------------------------------- # Fast-tier tests (< _FAST_S seconds) # --------------------------------------------------------------------------- class TestFastTierPerf: """Commands that touch only the current snapshot or a small index.""" def test_symbols_perf(self, large_repo: pathlib.Path) -> None: _run_timed(large_repo, ["code", "symbols", "--json"], _FAST_S) def test_grep_perf(self, large_repo: pathlib.Path) -> None: _run_timed(large_repo, ["code", "grep", "func_0", "--json"], _FAST_S) def test_query_perf(self, large_repo: pathlib.Path) -> None: _run_timed(large_repo, ["code", "query", "kind=function", "--json"], _FAST_S) def test_cat_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "cat", "src/file_00.py::func_0_0", "--json"], _FAST_S ) def test_languages_perf(self, large_repo: pathlib.Path) -> None: _run_timed(large_repo, ["code", "languages", "--json"], _FAST_S) def test_api_surface_perf(self, large_repo: pathlib.Path) -> None: _run_timed(large_repo, ["code", "api-surface", "--json"], _FAST_S) def test_deps_perf(self, large_repo: pathlib.Path) -> None: _run_timed(large_repo, ["code", "deps", "src/file_00.py", "--json"], _FAST_S) def test_impact_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "impact", "src/file_00.py::func_0_0", "--json"], _FAST_S, ) def test_breakage_perf(self, large_repo: pathlib.Path) -> None: _run_timed(large_repo, ["code", "breakage", "--json"], _FAST_S) # --------------------------------------------------------------------------- # Medium-tier tests (< _MEDIUM_S seconds) # --------------------------------------------------------------------------- class TestMediumTierPerf: """Commands that walk history but have bounded output size.""" def test_hotspots_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "hotspots", "--top", "20", "--max-commits", "50", "--json"], _MEDIUM_S, ) def test_stable_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "stable", "--top", "20", "--json"], _MEDIUM_S ) def test_coupling_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "coupling", "--top", "20", "--min", "2", "--json"], _MEDIUM_S, ) def test_blast_risk_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "blast-risk", "--top", "10", "--max-commits", "30", "--json"], _MEDIUM_S, ) def test_age_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, [ "code", "age", "src/file_00.py::func_0_0", "--max-commits", "30", "--json", ], _MEDIUM_S, ) def test_velocity_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "velocity", "--top", "10", "--max-commits", "30", "--json"], _MEDIUM_S, ) def test_entangle_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "entangle", "--top", "10", "--max-commits", "30", "--json"], _MEDIUM_S, ) def test_find_symbol_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "find-symbol", "--name", "func_0_0", "--limit", "50", "--json"], _MEDIUM_S, ) def test_symbol_log_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "symbol-log", "src/file_00.py::func_0_0", "--max", "30", "--json"], _MEDIUM_S, ) def test_blame_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "blame", "src/file_00.py::func_0_0", "--max", "30", "--json"], _MEDIUM_S, ) def test_detect_refactor_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "detect-refactor", "--max-commits", "30", "--json"], _MEDIUM_S, ) def test_compare_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "compare", "HEAD~10", "HEAD", "--json"], _MEDIUM_S ) def test_predict_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "predict", "--top", "10", "--max-commits", "30", "--json"], _MEDIUM_S, ) # --------------------------------------------------------------------------- # Slow-tier tests (< _SLOW_S seconds) # --------------------------------------------------------------------------- class TestSlowTierPerf: """Commands that do deep graph analysis or full-history traversal.""" def test_narrative_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, [ "code", "narrative", "src/file_00.py::func_0_0", "--max-commits", "50", "--json", ], _SLOW_S, ) def test_gravity_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, [ "code", "gravity", "src/file_00.py::func_0_0", "--max-commits", "30", "--json", ], _SLOW_S, ) def test_contract_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, [ "code", "contract", "src/file_00.py::func_0_0", "--max-commits", "30", "--json", ], _SLOW_S, ) def test_dead_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "dead", "--workers", "4", "--json"], _SLOW_S ) def test_codemap_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "codemap", "--top", "30", "--json"], _SLOW_S ) def test_clones_perf(self, large_repo: pathlib.Path) -> None: _run_timed(large_repo, ["code", "clones", "--json"], _SLOW_S) def test_semantic_test_coverage_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "semantic-test-coverage", "--max-commits", "30", "--json"], _SLOW_S, ) def test_lineage_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "lineage", "src/file_00.py::func_0_0", "--json"], _SLOW_S, ) def test_coverage_perf(self, large_repo: pathlib.Path) -> None: _run_timed( large_repo, ["code", "coverage", "src/file_00.py::func_0_0", "--json"], _SLOW_S, )