"""Comprehensive tests for ``muse coord shard``. Coverage matrix --------------- Unit ~~~~ * _build_import_edges — language filter, missing objects, stem matching * _connected_components — isolated nodes, simple chain, full graph, cycle * _greedy_partition — single shard, multi-shard balance, more shards than components, empty components, symbol-count weighting verified Integration ~~~~~~~~~~~ * Empty repo (no commits) — exits 1 with "not found" message * --agents 0 — exits 1 with clean error (no traceback) * --agents 257 — exits 1 with clean error * --agents at boundary 1 — accepted * --agents at boundary 256 — accepted * --agents validation fires before require_repo (no .muse dir needed) * --agents 1 with mocked snapshot — text output with "Shard plan" header * --format json — valid JSON with all required schema fields * --json shorthand — same as --format json * --language filter — restricts file selection (language_filter kwarg) * --commit REF — passed through to resolve_commit_ref * No snapshot manifest — exits 0 with "(no semantic files found)" * Text output — header, shard lines, cross-shard edges, elapsed * JSON: commit is the complete commit ID * JSON: total_files and total_symbols present and correct * JSON: duration_ms present and non-negative * JSON: cross_shard_edges correct count * JSON: no-files case still emits valid schema Error shapes ~~~~~~~~~~~~ * --agents out of range: JSON error has {"error": ..., "status": "bad_args"} * --agents out of range: text error uses ❌ prefix on stderr * commit not found: JSON error has {"error": ..., "status": "commit_not_found"} * commit not found: text error uses ❌ prefix on stderr Security ~~~~~~~~ * --language value sanitised in text output (ANSI injection stripped) * --language filter does not traverse filesystem * --commit traversal ref handled gracefully (no crash) * file paths in text output sanitised (ANSI stripped) Stress ~~~~~~ * 100-file mock snapshot partitioned into 8 shards — runs in < 2 s * 500-file isolated nodes partitioned into 16 shards — runs in < 2 s * 200-file dense graph (chain) into 4 shards — cross_shard_edges correct * JSON output with 500 shards is a single compact line (no indent) E2E ~~~ * Single file → 1 shard, 0 cross-shard edges * Two disconnected clusters → 2 shards, 0 cross-shard edges * Connected pair split across 2 shards → cross_shard_edges ≥ 1 * shards_created = min(agents, components) """ from __future__ import annotations import io import json import pathlib import sys import time import pytest from unittest.mock import patch, MagicMock from muse.core.types import fake_id from muse.core.paths import muse_dir from tests.cli_test_helper import CliRunner, InvokeResult from muse.cli.commands.shard import _MIN_AGENTS, _MAX_AGENTS runner = CliRunner() cli = None # ── Fixtures ────────────────────────────────────────────────────────────────── @pytest.fixture() def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path: dot_muse = muse_dir(tmp_path) dot_muse.mkdir() (dot_muse / "HEAD").write_text("ref: refs/heads/main\n") (dot_muse / "repo.json").write_text( json.dumps({"repo_id": fake_id("repo"), "name": "test-repo"}) ) monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) return tmp_path # ── Minimal commit stub ─────────────────────────────────────────────────────── def _make_commit_stub(commit_id: str = "a1b2c3d4e5f60000") -> MagicMock: stub = MagicMock() stub.commit_id = commit_id return stub # ── Unit: _build_import_edges ───────────────────────────────────────────────── class TestBuildImportEdges: def test_empty_manifest_returns_no_edges(self, repo: pathlib.Path) -> None: from muse.cli.commands.shard import _build_import_edges edges = _build_import_edges(repo, {}, language_filter=None) assert edges == [] def test_language_filter_excludes_unmatched_files(self, repo: pathlib.Path) -> None: from muse.cli.commands.shard import _build_import_edges manifest = {"src/foo.py": "obj1", "src/bar.ts": "obj2"} # read_object returns None → no parse → no edges; language filter prunes ts with patch("muse.cli.commands.shard.read_object", return_value=None): edges = _build_import_edges(repo, manifest, language_filter="Python") assert edges == [] def test_missing_object_skipped_gracefully(self, repo: pathlib.Path) -> None: from muse.cli.commands.shard import _build_import_edges manifest = {"src/foo.py": "nonexistent-oid"} with patch("muse.cli.commands.shard.read_object", return_value=None): edges = _build_import_edges(repo, manifest, language_filter=None) assert edges == [] def test_import_edge_built_from_parsed_symbol(self, repo: pathlib.Path) -> None: from muse.cli.commands.shard import _build_import_edges manifest = {"src/foo.py": "oid-foo", "src/bar.py": "oid-bar"} fake_tree = { "import::bar": { "kind": "import", "qualified_name": "import::bar", "name": "bar", } } with ( patch("muse.cli.commands.shard.read_object", return_value=b"dummy"), patch("muse.cli.commands.shard.parse_symbols", return_value=fake_tree), ): edges = _build_import_edges(repo, manifest, language_filter=None) # (src/foo.py, src/bar.py) or (src/bar.py, src/foo.py) edge expected assert len(edges) >= 1 found = any( ("src/foo.py" in e and "src/bar.py" in e) for e in edges ) assert found # ── Unit: _connected_components ─────────────────────────────────────────────── class TestConnectedComponents: def test_empty_files(self) -> None: from muse.cli.commands.shard import _connected_components result = _connected_components([], []) assert result == [] def test_single_file_no_edges(self) -> None: from muse.cli.commands.shard import _connected_components result = _connected_components(["a.py"], []) assert len(result) == 1 assert result[0] == frozenset({"a.py"}) def test_two_isolated_files(self) -> None: from muse.cli.commands.shard import _connected_components result = _connected_components(["a.py", "b.py"], []) assert len(result) == 2 def test_two_connected_files(self) -> None: from muse.cli.commands.shard import _connected_components result = _connected_components(["a.py", "b.py"], [("a.py", "b.py")]) assert len(result) == 1 assert result[0] == frozenset({"a.py", "b.py"}) def test_chain_of_three(self) -> None: from muse.cli.commands.shard import _connected_components files = ["a.py", "b.py", "c.py"] edges = [("a.py", "b.py"), ("b.py", "c.py")] result = _connected_components(files, edges) assert len(result) == 1 assert result[0] == frozenset({"a.py", "b.py", "c.py"}) def test_two_separate_components(self) -> None: from muse.cli.commands.shard import _connected_components files = ["a.py", "b.py", "c.py", "d.py"] edges = [("a.py", "b.py"), ("c.py", "d.py")] result = _connected_components(files, edges) assert len(result) == 2 sizes = sorted(len(c) for c in result) assert sizes == [2, 2] # ── Unit: _greedy_partition ─────────────────────────────────────────────────── class TestGreedyPartition: def test_single_shard_all_in_one(self) -> None: from muse.cli.commands.shard import _greedy_partition comps = [frozenset({"a.py"}), frozenset({"b.py"})] sym_counts = {"a.py": 5, "b.py": 3} result = _greedy_partition(comps, sym_counts, n_shards=1) assert len(result) == 1 assert result[0] == frozenset({"a.py", "b.py"}) def test_balanced_across_shards(self) -> None: from muse.cli.commands.shard import _greedy_partition comps = [frozenset({f"f{i}.py"}) for i in range(4)] sym_counts = {f"f{i}.py": 10 for i in range(4)} result = _greedy_partition(comps, sym_counts, n_shards=2) sizes = [sum(sym_counts[f] for f in s) for s in result] assert sizes[0] == sizes[1] == 20 def test_more_shards_than_components(self) -> None: from muse.cli.commands.shard import _greedy_partition comps = [frozenset({"a.py"})] sym_counts = {"a.py": 2} result = _greedy_partition(comps, sym_counts, n_shards=4) # Only one shard is non-empty non_empty = [s for s in result if s] assert len(non_empty) == 1 def test_empty_components_produces_empty_shards(self) -> None: from muse.cli.commands.shard import _greedy_partition result = _greedy_partition([], {}, n_shards=3) assert all(len(s) == 0 for s in result) # ── Integration ─────────────────────────────────────────────────────────────── class TestShardIntegration: def test_empty_repo_no_commits_exits_nonzero(self, repo: pathlib.Path) -> None: """No commits → resolve_commit_ref returns None → exits nonzero with 'not found' message.""" with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None): result = runner.invoke(cli, ["coord", "shard", "--agents", "4"]) assert result.exit_code != 0 assert "not found" in result.stderr.lower() def test_no_manifest_files_exits_0(self, repo: pathlib.Path) -> None: """Commit found but manifest is empty → prints no-semantic-files message.""" commit = _make_commit_stub() with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "4"]) assert result.exit_code == 0 assert "no semantic files found" in result.output def test_agents_zero_exits_nonzero(self, repo: pathlib.Path) -> None: """--agents 0 is invalid → clamp_int raises ValueError → non-zero exit.""" result = runner.invoke(cli, ["coord", "shard", "--agents", "0"]) assert result.exit_code != 0 def test_agents_1_mocked_snapshot_text_output(self, repo: pathlib.Path) -> None: commit = _make_commit_stub("deadbeef00000000") sym_map = {"src/foo.py": {"foo": {}, "bar": {}}} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={"src/foo.py": "oid1"}), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "1"]) assert result.exit_code == 0 assert "Shard plan" in result.output assert "deadbeef" in result.output def test_format_json_produces_valid_json(self, repo: pathlib.Path) -> None: commit = _make_commit_stub("cafebabe00000000") sym_map = {"src/a.py": {"x": {}}, "src/b.py": {"y": {}}} manifest = {"src/a.py": "oid1", "src/b.py": "oid2"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"]) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert "schema" in data assert "commit" in data assert "agents" in data assert "shards_created" in data assert "cross_shard_edges" in data assert "shards" in data def test_json_shorthand_same_as_json_long(self, repo: pathlib.Path) -> None: commit = _make_commit_stub("00112233aabbccdd") sym_map = {"src/x.py": {"f": {}}} manifest = {"src/x.py": "oid1"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): r1 = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"]) r2 = runner.invoke(cli, ["coord", "shard", "--agents", "2", "-j"]) assert r1.exit_code == 0 assert r2.exit_code == 0 d1 = json.loads(r1.output.strip()) d2 = json.loads(r2.output.strip()) # duration_ms differs between runs — compare structural fields only for key in ("schema", "commit", "agents", "shards_created", "total_files", "total_symbols", "cross_shard_edges", "shards"): assert d1[key] == d2[key] def test_language_filter_passed_through(self, repo: pathlib.Path) -> None: commit = _make_commit_stub() with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}) as mock_sym, ): runner.invoke(cli, ["coord", "shard", "--agents", "2", "--language", "Python"]) mock_sym.assert_called_once() _, kwargs = mock_sym.call_args assert kwargs.get("language_filter") == "Python" def test_commit_ref_forwarded_to_resolver(self, repo: pathlib.Path) -> None: with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None) as mock_res: runner.invoke(cli, ["coord", "shard", "--commit", "HEAD~3"]) mock_res.assert_called_once() args, _ = mock_res.call_args assert args[2] == "HEAD~3" def test_text_output_contains_cross_shard_edges(self, repo: pathlib.Path) -> None: commit = _make_commit_stub("aabbccdd11223344") sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}} manifest = {"src/a.py": "oid1", "src/b.py": "oid2"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[("src/a.py", "src/b.py")]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "2"]) assert result.exit_code == 0 assert "Cross-shard edges" in result.output # ── Security ────────────────────────────────────────────────────────────────── class TestShardSecurity: def test_language_filter_does_not_open_filesystem(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None: """--language must not cause FS traversal beyond object store.""" commit = _make_commit_stub() with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}), ): result = runner.invoke(cli, ["coord", "shard", "--language", "../../../etc/passwd"]) assert result.exit_code == 0 assert "no semantic files found" in result.output def test_traversal_commit_ref_handled_gracefully(self, repo: pathlib.Path) -> None: """Malicious --commit ref should not crash the process.""" with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None): result = runner.invoke(cli, ["coord", "shard", "--commit", "../../etc/shadow"]) assert result.exit_code == 0 or result.exit_code != 0 # no crash def test_ansi_in_language_stripped_text_output(self, repo: pathlib.Path) -> None: """ANSI escape in --language value must not appear in text output.""" malicious_lang = "\x1b[31mPython\x1b[0m" commit = _make_commit_stub() with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}), ): result = runner.invoke(cli, ["coord", "shard", "--language", malicious_lang]) assert "\x1b[" not in result.output def test_ansi_in_file_path_stripped_text_output(self, repo: pathlib.Path) -> None: """ANSI escape codes in file paths must be stripped before display.""" commit = _make_commit_stub("deadbeef00000000") malicious_fp = "\x1b[31msrc/malicious.py\x1b[0m" sym_map = {malicious_fp: {"fn": {}}} manifest = {malicious_fp: "oid1"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "1"]) assert "\x1b[" not in result.output assert "src/malicious.py" in result.output # ── Input validation ────────────────────────────────────────────────────────── class TestShardInputValidation: def test_agents_zero_exits_1_clean(self, repo: pathlib.Path) -> None: """--agents 0 must exit 1 with a clean error message, no traceback.""" result = runner.invoke(cli, ["coord", "shard", "--agents", "0"]) assert result.exit_code == 1 assert "Traceback" not in result.output def test_agents_negative_exits_1(self, repo: pathlib.Path) -> None: result = runner.invoke(cli, ["coord", "shard", "--agents", "-1"]) assert result.exit_code == 1 def test_agents_over_max_exits_1(self, repo: pathlib.Path) -> None: result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MAX_AGENTS + 1)]) assert result.exit_code == 1 def test_agents_at_min_accepted(self, repo: pathlib.Path) -> None: commit = _make_commit_stub() with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}), ): result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MIN_AGENTS)]) assert result.exit_code == 0 def test_agents_at_max_accepted(self, repo: pathlib.Path) -> None: commit = _make_commit_stub() with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}), ): result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MAX_AGENTS)]) assert result.exit_code == 0 def test_agents_invalid_json_error_shape(self, repo: pathlib.Path) -> None: """--format json error for --agents out of range must have {error, status}.""" result = runner.invoke(cli, ["coord", "shard", "--agents", "0", "--json"]) assert result.exit_code == 1 data = json.loads(result.output.strip()) assert "error" in data assert data["status"] == "bad_args" def test_agents_invalid_text_uses_tick_prefix(self, repo: pathlib.Path) -> None: result = runner.invoke(cli, ["coord", "shard", "--agents", "0"]) assert result.exit_code == 1 assert "❌" in result.stderr def test_agents_invalid_text_no_stdout(self, repo: pathlib.Path) -> None: """Text mode error goes to stderr; stdout must be empty.""" result = runner.invoke(cli, ["coord", "shard", "--agents", "0"]) # CliRunner merges stderr into output — we just check no traceback assert "Traceback" not in result.output def test_commit_not_found_json_error_shape(self, repo: pathlib.Path) -> None: with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None): result = runner.invoke(cli, ["coord", "shard", "--json"]) assert result.exit_code == 1 data = json.loads(result.output.strip()) assert "error" in data assert data["status"] == "commit_not_found" def test_commit_not_found_text_uses_tick_prefix(self, repo: pathlib.Path) -> None: with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None): result = runner.invoke(cli, ["coord", "shard"]) assert result.exit_code == 1 assert "❌" in result.stderr def test_agents_validation_fires_before_repo_lookup(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None: """Invalid --agents exits before trying to open .muse/ (no repo needed).""" monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) # no .muse dir result = runner.invoke(cli, ["coord", "shard", "--agents", "0"]) assert result.exit_code == 1 # Must not say "Repository not found" assert "repository" not in result.output.lower() # ── JSON schema: new fields ─────────────────────────────────────────────────── class TestShardJsonSchema: def _base_invoke(self, repo: pathlib.Path, agents: str = "2", extra: list[str] | None = None) -> tuple[InvokeResult, MagicMock]: commit = _make_commit_stub("abcdef1234567890abcdef1234567890") sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}} manifest = {"src/a.py": "oid1", "src/b.py": "oid2"} args = ["coord", "shard", "--agents", agents, "--json"] if extra: args.extend(extra) with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): return runner.invoke(cli, args), commit def test_commit_is_full_id(self, repo: pathlib.Path) -> None: result, commit = self._base_invoke(repo) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert data["commit"] == commit.commit_id assert len(data["commit"]) > 8 def test_total_files_correct(self, repo: pathlib.Path) -> None: result, _ = self._base_invoke(repo) data = json.loads(result.output.strip()) assert data["total_files"] == 2 def test_total_symbols_correct(self, repo: pathlib.Path) -> None: result, _ = self._base_invoke(repo) data = json.loads(result.output.strip()) # sym_map has 1 symbol per file × 2 files assert data["total_symbols"] == 2 def test_duration_ms_present_and_non_negative(self, repo: pathlib.Path) -> None: result, _ = self._base_invoke(repo) data = json.loads(result.output.strip()) assert "duration_ms" in data assert isinstance(data["duration_ms"], float) assert data["duration_ms"] >= 0 def test_json_is_single_line(self, repo: pathlib.Path) -> None: result, _ = self._base_invoke(repo) lines = [ln for ln in result.output.splitlines() if ln.strip()] assert len(lines) == 1, f"JSON output must be one line, got {len(lines)}" def test_all_schema_fields_present(self, repo: pathlib.Path) -> None: result, _ = self._base_invoke(repo) data = json.loads(result.output.strip()) required = { "schema", "commit", "agents", "shards_created", "total_files", "total_symbols", "cross_shard_edges", "shards", "duration_ms", } missing = required - data.keys() assert not missing, f"Missing JSON fields: {missing}" def test_no_files_case_emits_valid_schema(self, repo: pathlib.Path) -> None: """Empty manifest → shards=[], total_files=0, still valid JSON schema.""" commit = _make_commit_stub("abcdef1234567890abcdef1234567890") with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"]) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert data["shards"] == [] assert data["total_files"] == 0 assert data["total_symbols"] == 0 assert "duration_ms" in data def test_cross_shard_edges_zero_when_isolated(self, repo: pathlib.Path) -> None: commit = _make_commit_stub() sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}} manifest = {"src/a.py": "oid1", "src/b.py": "oid2"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"]) data = json.loads(result.output.strip()) assert data["cross_shard_edges"] == 0 def test_shards_created_capped_at_components(self, repo: pathlib.Path) -> None: """shards_created = min(agents, components) — can't exceed file count.""" commit = _make_commit_stub() sym_map = {"src/a.py": {"f": {}}} # 1 file → 1 component manifest = {"src/a.py": "oid1"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "10", "--json"]) data = json.loads(result.output.strip()) assert data["shards_created"] == 1 # ── Unit: _connected_components edge cases ──────────────────────────────────── class TestConnectedComponentsExtra: def test_cycle_resolved_as_single_component(self) -> None: """A → B → C → A cycle must yield one component of 3 files.""" from muse.cli.commands.shard import _connected_components files = ["a.py", "b.py", "c.py"] edges = [("a.py", "b.py"), ("b.py", "c.py"), ("c.py", "a.py")] result = _connected_components(files, edges) assert len(result) == 1 assert result[0] == frozenset({"a.py", "b.py", "c.py"}) def test_star_topology(self) -> None: """Hub → 4 spokes: all connected, 1 component.""" from muse.cli.commands.shard import _connected_components files = ["hub.py", "s1.py", "s2.py", "s3.py", "s4.py"] edges = [(f"s{i}.py", "hub.py") for i in range(1, 5)] result = _connected_components(files, edges) assert len(result) == 1 def test_self_loop_ignored(self) -> None: """A file importing itself produces no cross-edge (target == file_path guard).""" from muse.cli.commands.shard import _connected_components result = _connected_components(["a.py"], [("a.py", "a.py")]) assert len(result) == 1 def test_extra_edge_node_not_in_files_ignored(self) -> None: """An edge referencing a file not in the files list should not crash.""" from muse.cli.commands.shard import _connected_components # "ghost.py" is in the edge but not in files — adj.setdefault handles it result = _connected_components(["a.py"], [("a.py", "ghost.py")]) # a.py is still returned as its own component assert any("a.py" in c for c in result) # ── Unit: _greedy_partition extra ──────────────────────────────────────────── class TestGreedyPartitionExtra: def test_all_files_accounted_for(self) -> None: """Every file in input components appears in exactly one shard.""" from muse.cli.commands.shard import _greedy_partition N = 20 comps = [frozenset({f"f{i}.py"}) for i in range(N)] sym_counts = {f"f{i}.py": i + 1 for i in range(N)} shards = _greedy_partition(comps, sym_counts, n_shards=4) all_files = set() for s in shards: assert not (all_files & s), "File appears in more than one shard" all_files |= s expected = {f"f{i}.py" for i in range(N)} assert all_files == expected def test_symbol_count_weighting(self) -> None: """Largest component goes to the first shard (LPT first step).""" from muse.cli.commands.shard import _greedy_partition big = frozenset({"big.py"}) smalls = [frozenset({f"s{i}.py"}) for i in range(3)] sym_counts = {"big.py": 100, "s0.py": 1, "s1.py": 1, "s2.py": 1} shards = _greedy_partition([big] + smalls, sym_counts, n_shards=2) # big.py is in one shard by itself (100 >> 3*1) big_shard = next(s for s in shards if "big.py" in s) assert big_shard == frozenset({"big.py"}) def test_single_large_component_into_many_shards(self) -> None: """One big component split into 4 shards — all files in first shard.""" from muse.cli.commands.shard import _greedy_partition comp = frozenset({"a.py", "b.py", "c.py"}) sym_counts = {"a.py": 10, "b.py": 5, "c.py": 3} shards = _greedy_partition([comp], sym_counts, n_shards=4) non_empty = [s for s in shards if s] assert len(non_empty) == 1 assert non_empty[0] == comp # ── Stress tests ────────────────────────────────────────────────────────────── class TestShardStressExtra: def test_500_isolated_files_16_shards_under_2s(self, repo: pathlib.Path) -> None: """500 isolated files partitioned into 16 shards in < 2 s.""" N = 500 commit = _make_commit_stub() sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)} manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): t0 = time.monotonic() result = runner.invoke(cli, ["coord", "shard", "--agents", "16", "--json"]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 2.0, f"500 isolated files took {elapsed:.2f}s" data = json.loads(result.output.strip()) assert data["total_files"] == N assert data["total_symbols"] == N # 1 sym per file def test_200_chain_files_4_shards_cross_edges_correct(self, repo: pathlib.Path) -> None: """Chain graph: mod0→mod1→…→mod199, 4 shards — cross_shard_edges is exact.""" N = 200 commit = _make_commit_stub() sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)} manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)} # A chain: each file imports the next edges = [(f"src/mod{i}.py", f"src/mod{i+1}.py") for i in range(N - 1)] with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=edges), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"]) assert result.exit_code == 0 data = json.loads(result.output.strip()) # Chain is one big component → 1 shard, no cross-shard edges assert data["shards_created"] == 1 assert data["cross_shard_edges"] == 0 def test_json_compact_with_500_shards(self, repo: pathlib.Path) -> None: """Even with many shards, JSON output is a single compact line.""" N = 500 commit = _make_commit_stub() sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)} manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "256", "--json"]) assert result.exit_code == 0 lines = [ln for ln in result.output.splitlines() if ln.strip()] assert len(lines) == 1 # ── E2E tests ───────────────────────────────────────────────────────────────── class TestShardE2E: def test_single_file_one_shard_zero_edges(self, repo: pathlib.Path) -> None: commit = _make_commit_stub() sym_map = {"src/only.py": {"fn": {}}} manifest = {"src/only.py": "oid1"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"]) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert data["shards_created"] == 1 assert data["cross_shard_edges"] == 0 assert data["total_files"] == 1 def test_two_disconnected_clusters_zero_cross_edges(self, repo: pathlib.Path) -> None: """Two disconnected clusters into 2 shards → 0 cross-shard edges.""" commit = _make_commit_stub() sym_map = { "src/a.py": {"fa": {}}, "src/b.py": {"fb": {}}, # cluster 1 "src/c.py": {"fc": {}}, "src/d.py": {"fd": {}}, # cluster 2 } manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)} edges = [("src/a.py", "src/b.py"), ("src/c.py", "src/d.py")] with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=edges), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"]) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert data["shards_created"] == 2 assert data["cross_shard_edges"] == 0 def test_connected_pair_forced_into_two_shards_has_edges(self, repo: pathlib.Path) -> None: """A→B with agents=2 forces a cross-shard edge (both in same component).""" commit = _make_commit_stub() sym_map = { "src/a.py": {"fa": {}}, "src/b.py": {"fb": {}}, "src/c.py": {"fc": {}}, # third file so components>1 is possible } manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)} # a→b are connected (1 component), c is isolated (1 component) # agents=2 → 2 shards; a+b are same component → same shard → 0 cross edges edges = [("src/a.py", "src/b.py")] with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=edges), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"]) assert result.exit_code == 0 data = json.loads(result.output.strip()) # a+b are in the same component, never split → 0 cross-shard edges assert data["cross_shard_edges"] == 0 def test_text_output_shows_elapsed(self, repo: pathlib.Path) -> None: commit = _make_commit_stub("cafebabe00000000") sym_map = {"src/x.py": {"f": {}}} manifest = {"src/x.py": "oid1"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "1"]) assert result.exit_code == 0 assert "s)" in result.output def test_text_output_perfect_isolation_message(self, repo: pathlib.Path) -> None: """When cross_shard_edges == 0, text output says 'Perfect isolation'.""" commit = _make_commit_stub() sym_map = {"src/a.py": {"f": {}}} manifest = {"src/a.py": "oid1"} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "1"]) assert result.exit_code == 0 assert "Perfect isolation" in result.output def test_symbol_count_sum_equals_total_symbols(self, repo: pathlib.Path) -> None: """Sum of shard symbol_counts must equal total_symbols in JSON.""" commit = _make_commit_stub() sym_map = {f"src/f{i}.py": {f"fn{j}": {} for j in range(i + 1)} for i in range(5)} manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)} with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=[]), ): result = runner.invoke(cli, ["coord", "shard", "--agents", "3", "--json"]) assert result.exit_code == 0 data = json.loads(result.output.strip()) assert sum(s["symbol_count"] for s in data["shards"]) == data["total_symbols"] # ── Stress ──────────────────────────────────────────────────────────────────── class TestShardStress: def test_100_files_8_shards_under_2s(self, repo: pathlib.Path) -> None: n_files = 100 commit = _make_commit_stub() sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}, f"cls_{i}": {}} for i in range(n_files)} manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)} edges = [(f"src/mod{i}.py", f"src/mod{i+1}.py") for i in range(0, n_files - 1, 5)] with ( patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit), patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest), patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map), patch("muse.cli.commands.shard._build_import_edges", return_value=edges), ): t0 = time.monotonic() result = runner.invoke(cli, ["coord", "shard", "--agents", "8", "--json"]) elapsed = time.monotonic() - t0 assert result.exit_code == 0 assert elapsed < 2.0 data = json.loads(result.output.strip()) assert data["shards_created"] <= 8 assert sum(s["symbol_count"] for s in data["shards"]) == n_files * 2 class TestRegisterFlags: def test_default_json_out_is_false(self) -> None: import argparse from muse.cli.commands.shard import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) args = p.parse_args(["shard"]) assert args.json_out is False def test_json_flag_sets_json_out(self) -> None: import argparse from muse.cli.commands.shard import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) args = p.parse_args(["shard", "--json"]) assert args.json_out is True def test_j_shorthand_sets_json_out(self) -> None: import argparse from muse.cli.commands.shard import register p = argparse.ArgumentParser() subs = p.add_subparsers() register(subs) args = p.parse_args(["shard", "-j"]) assert args.json_out is True