gabriel / muse public
test_cmd_shard.py python
868 lines 42.9 KB
Raw
sha256:1c4b3e3a9a1f300774c3ee662b572a698d5fd405bf765a71e6011a2e9c3eaaaa feat: Muse — version control for the agent era Human 73 days ago
1 """Comprehensive tests for ``muse coord shard``.
2
3 Coverage matrix
4 ---------------
5 Unit
6 ~~~~
7 * _build_import_edges — language filter, missing objects, stem matching
8 * _connected_components — isolated nodes, simple chain, full graph, cycle
9 * _greedy_partition — single shard, multi-shard balance, more shards than
10 components, empty components, symbol-count weighting verified
11
12 Integration
13 ~~~~~~~~~~~
14 * Empty repo (no commits) — exits 1 with "not found" message
15 * --agents 0 — exits 1 with clean error (no traceback)
16 * --agents 257 — exits 1 with clean error
17 * --agents at boundary 1 — accepted
18 * --agents at boundary 256 — accepted
19 * --agents validation fires before require_repo (no .muse dir needed)
20 * --agents 1 with mocked snapshot — text output with "Shard plan" header
21 * --format json — valid JSON with all required schema fields
22 * --json shorthand — same as --format json
23 * --language filter — restricts file selection (language_filter kwarg)
24 * --commit REF — passed through to resolve_commit_ref
25 * No snapshot manifest — exits 0 with "(no semantic files found)"
26 * Text output — header, shard lines, cross-shard edges, elapsed
27 * JSON: full_commit_id is the complete commit ID, not 8 chars
28 * JSON: total_files and total_symbols present and correct
29 * JSON: elapsed_seconds present and non-negative
30 * JSON: cross_shard_edges correct count
31 * JSON: no-files case still emits valid schema
32
33 Error shapes
34 ~~~~~~~~~~~~
35 * --agents out of range: JSON error has {"error": ..., "status": "bad_args"}
36 * --agents out of range: text error uses ❌ prefix on stderr
37 * commit not found: JSON error has {"error": ..., "status": "commit_not_found"}
38 * commit not found: text error uses ❌ prefix on stderr
39
40 Security
41 ~~~~~~~~
42 * --language value sanitised in text output (ANSI injection stripped)
43 * --language filter does not traverse filesystem
44 * --commit traversal ref handled gracefully (no crash)
45 * file paths in text output sanitised (ANSI stripped)
46
47 Stress
48 ~~~~~~
49 * 100-file mock snapshot partitioned into 8 shards — runs in < 2 s
50 * 500-file isolated nodes partitioned into 16 shards — runs in < 2 s
51 * 200-file dense graph (chain) into 4 shards — cross_shard_edges correct
52 * JSON output with 500 shards is a single compact line (no indent)
53
54 E2E
55 ~~~
56 * Single file → 1 shard, 0 cross-shard edges
57 * Two disconnected clusters → 2 shards, 0 cross-shard edges
58 * Connected pair split across 2 shards → cross_shard_edges ≥ 1
59 * shards_created = min(agents, components)
60 """
61
62 from __future__ import annotations
63
64 import io
65 import json
66 import pathlib
67 import sys
68 import time
69 import uuid
70
71 import pytest
72 from unittest.mock import patch, MagicMock
73
74 from tests.cli_test_helper import CliRunner, InvokeResult
75 from muse.cli.commands.shard import _MIN_AGENTS, _MAX_AGENTS
76
77 runner = CliRunner()
78 cli = None
79
80
81 # ── Fixtures ──────────────────────────────────────────────────────────────────
82
83
84 @pytest.fixture()
85 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
86 muse_dir = tmp_path / ".muse"
87 muse_dir.mkdir()
88 (muse_dir / "HEAD").write_text("ref: refs/heads/main\n")
89 (muse_dir / "repo.json").write_text(
90 json.dumps({"repo_id": str(uuid.uuid4()), "name": "test-repo"})
91 )
92 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
93 return tmp_path
94
95
96 # ── Minimal commit stub ───────────────────────────────────────────────────────
97
98
99 def _make_commit_stub(commit_id: str = "a1b2c3d4e5f60000") -> MagicMock:
100 stub = MagicMock()
101 stub.commit_id = commit_id
102 return stub
103
104
105 # ── Unit: _build_import_edges ─────────────────────────────────────────────────
106
107
108 class TestBuildImportEdges:
109 def test_empty_manifest_returns_no_edges(self, repo: pathlib.Path) -> None:
110 from muse.cli.commands.shard import _build_import_edges
111 edges = _build_import_edges(repo, {}, language_filter=None)
112 assert edges == []
113
114 def test_language_filter_excludes_unmatched_files(self, repo: pathlib.Path) -> None:
115 from muse.cli.commands.shard import _build_import_edges
116 manifest = {"src/foo.py": "obj1", "src/bar.ts": "obj2"}
117 # read_object returns None → no parse → no edges; language filter prunes ts
118 with patch("muse.cli.commands.shard.read_object", return_value=None):
119 edges = _build_import_edges(repo, manifest, language_filter="Python")
120 assert edges == []
121
122 def test_missing_object_skipped_gracefully(self, repo: pathlib.Path) -> None:
123 from muse.cli.commands.shard import _build_import_edges
124 manifest = {"src/foo.py": "nonexistent-oid"}
125 with patch("muse.cli.commands.shard.read_object", return_value=None):
126 edges = _build_import_edges(repo, manifest, language_filter=None)
127 assert edges == []
128
129 def test_import_edge_built_from_parsed_symbol(self, repo: pathlib.Path) -> None:
130 from muse.cli.commands.shard import _build_import_edges
131 manifest = {"src/foo.py": "oid-foo", "src/bar.py": "oid-bar"}
132 fake_tree = {
133 "import::bar": {
134 "kind": "import",
135 "qualified_name": "import::bar",
136 "name": "bar",
137 }
138 }
139 with (
140 patch("muse.cli.commands.shard.read_object", return_value=b"dummy"),
141 patch("muse.cli.commands.shard.parse_symbols", return_value=fake_tree),
142 ):
143 edges = _build_import_edges(repo, manifest, language_filter=None)
144 # (src/foo.py, src/bar.py) or (src/bar.py, src/foo.py) edge expected
145 assert len(edges) >= 1
146 found = any(
147 ("src/foo.py" in e and "src/bar.py" in e) for e in edges
148 )
149 assert found
150
151
152 # ── Unit: _connected_components ───────────────────────────────────────────────
153
154
155 class TestConnectedComponents:
156 def test_empty_files(self) -> None:
157 from muse.cli.commands.shard import _connected_components
158 result = _connected_components([], [])
159 assert result == []
160
161 def test_single_file_no_edges(self) -> None:
162 from muse.cli.commands.shard import _connected_components
163 result = _connected_components(["a.py"], [])
164 assert len(result) == 1
165 assert result[0] == frozenset({"a.py"})
166
167 def test_two_isolated_files(self) -> None:
168 from muse.cli.commands.shard import _connected_components
169 result = _connected_components(["a.py", "b.py"], [])
170 assert len(result) == 2
171
172 def test_two_connected_files(self) -> None:
173 from muse.cli.commands.shard import _connected_components
174 result = _connected_components(["a.py", "b.py"], [("a.py", "b.py")])
175 assert len(result) == 1
176 assert result[0] == frozenset({"a.py", "b.py"})
177
178 def test_chain_of_three(self) -> None:
179 from muse.cli.commands.shard import _connected_components
180 files = ["a.py", "b.py", "c.py"]
181 edges = [("a.py", "b.py"), ("b.py", "c.py")]
182 result = _connected_components(files, edges)
183 assert len(result) == 1
184 assert result[0] == frozenset({"a.py", "b.py", "c.py"})
185
186 def test_two_separate_components(self) -> None:
187 from muse.cli.commands.shard import _connected_components
188 files = ["a.py", "b.py", "c.py", "d.py"]
189 edges = [("a.py", "b.py"), ("c.py", "d.py")]
190 result = _connected_components(files, edges)
191 assert len(result) == 2
192 sizes = sorted(len(c) for c in result)
193 assert sizes == [2, 2]
194
195
196 # ── Unit: _greedy_partition ───────────────────────────────────────────────────
197
198
199 class TestGreedyPartition:
200 def test_single_shard_all_in_one(self) -> None:
201 from muse.cli.commands.shard import _greedy_partition
202 comps = [frozenset({"a.py"}), frozenset({"b.py"})]
203 sym_counts = {"a.py": 5, "b.py": 3}
204 result = _greedy_partition(comps, sym_counts, n_shards=1)
205 assert len(result) == 1
206 assert result[0] == frozenset({"a.py", "b.py"})
207
208 def test_balanced_across_shards(self) -> None:
209 from muse.cli.commands.shard import _greedy_partition
210 comps = [frozenset({f"f{i}.py"}) for i in range(4)]
211 sym_counts = {f"f{i}.py": 10 for i in range(4)}
212 result = _greedy_partition(comps, sym_counts, n_shards=2)
213 sizes = [sum(sym_counts[f] for f in s) for s in result]
214 assert sizes[0] == sizes[1] == 20
215
216 def test_more_shards_than_components(self) -> None:
217 from muse.cli.commands.shard import _greedy_partition
218 comps = [frozenset({"a.py"})]
219 sym_counts = {"a.py": 2}
220 result = _greedy_partition(comps, sym_counts, n_shards=4)
221 # Only one shard is non-empty
222 non_empty = [s for s in result if s]
223 assert len(non_empty) == 1
224
225 def test_empty_components_produces_empty_shards(self) -> None:
226 from muse.cli.commands.shard import _greedy_partition
227 result = _greedy_partition([], {}, n_shards=3)
228 assert all(len(s) == 0 for s in result)
229
230
231 # ── Integration ───────────────────────────────────────────────────────────────
232
233
234 class TestShardIntegration:
235 def test_empty_repo_no_commits_exits_nonzero(self, repo: pathlib.Path) -> None:
236 """No commits → resolve_commit_ref returns None → exits nonzero with 'not found' message."""
237 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None):
238 result = runner.invoke(cli, ["coord", "shard", "--agents", "4"])
239 assert result.exit_code != 0
240 assert "not found" in result.output.lower()
241
242 def test_no_manifest_files_exits_0(self, repo: pathlib.Path) -> None:
243 """Commit found but manifest is empty → prints no-semantic-files message."""
244 commit = _make_commit_stub()
245 with (
246 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
247 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
248 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
249 ):
250 result = runner.invoke(cli, ["coord", "shard", "--agents", "4"])
251 assert result.exit_code == 0
252 assert "no semantic files found" in result.output
253
254 def test_agents_zero_exits_nonzero(self, repo: pathlib.Path) -> None:
255 """--agents 0 is invalid → clamp_int raises ValueError → non-zero exit."""
256 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
257 assert result.exit_code != 0
258
259 def test_agents_1_mocked_snapshot_text_output(self, repo: pathlib.Path) -> None:
260 commit = _make_commit_stub("deadbeef00000000")
261 sym_map = {"src/foo.py": {"foo": {}, "bar": {}}}
262 with (
263 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
264 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={"src/foo.py": "oid1"}),
265 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
266 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
267 ):
268 result = runner.invoke(cli, ["coord", "shard", "--agents", "1"])
269 assert result.exit_code == 0
270 assert "Shard plan" in result.output
271 assert "deadbeef" in result.output
272
273 def test_format_json_produces_valid_json(self, repo: pathlib.Path) -> None:
274 commit = _make_commit_stub("cafebabe00000000")
275 sym_map = {"src/a.py": {"x": {}}, "src/b.py": {"y": {}}}
276 manifest = {"src/a.py": "oid1", "src/b.py": "oid2"}
277 with (
278 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
279 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
280 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
281 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
282 ):
283 result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--format", "json"])
284 assert result.exit_code == 0
285 data = json.loads(result.output.strip())
286 assert "schema_version" in data
287 assert "commit" in data
288 assert "agents" in data
289 assert "shards_created" in data
290 assert "cross_shard_edges" in data
291 assert "shards" in data
292
293 def test_json_shorthand_same_as_format_json(self, repo: pathlib.Path) -> None:
294 commit = _make_commit_stub("00112233aabbccdd")
295 sym_map = {"src/x.py": {"f": {}}}
296 manifest = {"src/x.py": "oid1"}
297 with (
298 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
299 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
300 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
301 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
302 ):
303 r1 = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--format", "json"])
304 r2 = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
305 assert r1.exit_code == 0
306 assert r2.exit_code == 0
307 d1 = json.loads(r1.output.strip())
308 d2 = json.loads(r2.output.strip())
309 # elapsed_seconds differs between runs — compare structural fields only
310 for key in ("schema_version", "commit", "full_commit_id", "agents",
311 "shards_created", "total_files", "total_symbols",
312 "cross_shard_edges", "shards"):
313 assert d1[key] == d2[key]
314
315 def test_language_filter_passed_through(self, repo: pathlib.Path) -> None:
316 commit = _make_commit_stub()
317 with (
318 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
319 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
320 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}) as mock_sym,
321 ):
322 runner.invoke(cli, ["coord", "shard", "--agents", "2", "--language", "Python"])
323 mock_sym.assert_called_once()
324 _, kwargs = mock_sym.call_args
325 assert kwargs.get("language_filter") == "Python"
326
327 def test_commit_ref_forwarded_to_resolver(self, repo: pathlib.Path) -> None:
328 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None) as mock_res:
329 runner.invoke(cli, ["coord", "shard", "--commit", "HEAD~3"])
330 mock_res.assert_called_once()
331 args, _ = mock_res.call_args
332 assert args[3] == "HEAD~3"
333
334 def test_text_output_contains_cross_shard_edges(self, repo: pathlib.Path) -> None:
335 commit = _make_commit_stub("aabbccdd11223344")
336 sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}}
337 manifest = {"src/a.py": "oid1", "src/b.py": "oid2"}
338 with (
339 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
340 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
341 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
342 patch("muse.cli.commands.shard._build_import_edges", return_value=[("src/a.py", "src/b.py")]),
343 ):
344 result = runner.invoke(cli, ["coord", "shard", "--agents", "2"])
345 assert result.exit_code == 0
346 assert "Cross-shard edges" in result.output
347
348
349 # ── Security ──────────────────────────────────────────────────────────────────
350
351
352 class TestShardSecurity:
353 def test_language_filter_does_not_open_filesystem(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
354 """--language must not cause FS traversal beyond object store."""
355 commit = _make_commit_stub()
356 with (
357 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
358 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
359 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
360 ):
361 result = runner.invoke(cli, ["coord", "shard", "--language", "../../../etc/passwd"])
362 assert result.exit_code == 0
363 assert "no semantic files found" in result.output
364
365 def test_traversal_commit_ref_handled_gracefully(self, repo: pathlib.Path) -> None:
366 """Malicious --commit ref should not crash the process."""
367 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None):
368 result = runner.invoke(cli, ["coord", "shard", "--commit", "../../etc/shadow"])
369 assert result.exit_code == 0 or result.exit_code != 0 # no crash
370
371 def test_ansi_in_language_stripped_text_output(self, repo: pathlib.Path) -> None:
372 """ANSI escape in --language value must not appear in text output."""
373 evil_lang = "\x1b[31mPython\x1b[0m"
374 commit = _make_commit_stub()
375 with (
376 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
377 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
378 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
379 ):
380 result = runner.invoke(cli, ["coord", "shard", "--language", evil_lang])
381 assert "\x1b[" not in result.output
382
383 def test_ansi_in_file_path_stripped_text_output(self, repo: pathlib.Path) -> None:
384 """ANSI escape codes in file paths must be stripped before display."""
385 commit = _make_commit_stub("deadbeef00000000")
386 evil_fp = "\x1b[31msrc/evil.py\x1b[0m"
387 sym_map = {evil_fp: {"fn": {}}}
388 manifest = {evil_fp: "oid1"}
389 with (
390 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
391 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
392 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
393 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
394 ):
395 result = runner.invoke(cli, ["coord", "shard", "--agents", "1"])
396 assert "\x1b[" not in result.output
397 assert "src/evil.py" in result.output
398
399
400 # ── Input validation ──────────────────────────────────────────────────────────
401
402
403 class TestShardInputValidation:
404 def test_agents_zero_exits_1_clean(self, repo: pathlib.Path) -> None:
405 """--agents 0 must exit 1 with a clean error message, no traceback."""
406 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
407 assert result.exit_code == 1
408 assert "Traceback" not in result.output
409
410 def test_agents_negative_exits_1(self, repo: pathlib.Path) -> None:
411 result = runner.invoke(cli, ["coord", "shard", "--agents", "-1"])
412 assert result.exit_code == 1
413
414 def test_agents_over_max_exits_1(self, repo: pathlib.Path) -> None:
415 result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MAX_AGENTS + 1)])
416 assert result.exit_code == 1
417
418 def test_agents_at_min_accepted(self, repo: pathlib.Path) -> None:
419 commit = _make_commit_stub()
420 with (
421 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
422 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
423 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
424 ):
425 result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MIN_AGENTS)])
426 assert result.exit_code == 0
427
428 def test_agents_at_max_accepted(self, repo: pathlib.Path) -> None:
429 commit = _make_commit_stub()
430 with (
431 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
432 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
433 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
434 ):
435 result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MAX_AGENTS)])
436 assert result.exit_code == 0
437
438 def test_agents_invalid_json_error_shape(self, repo: pathlib.Path) -> None:
439 """--format json error for --agents out of range must have {error, status}."""
440 result = runner.invoke(cli, ["coord", "shard", "--agents", "0", "--json"])
441 assert result.exit_code == 1
442 data = json.loads(result.output.strip())
443 assert "error" in data
444 assert data["status"] == "bad_args"
445
446 def test_agents_invalid_text_uses_tick_prefix(self, repo: pathlib.Path) -> None:
447 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
448 assert result.exit_code == 1
449 assert "❌" in result.output
450
451 def test_agents_invalid_text_no_stdout(self, repo: pathlib.Path) -> None:
452 """Text mode error goes to stderr; stdout must be empty."""
453 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
454 # CliRunner merges stderr into output — we just check no traceback
455 assert "Traceback" not in result.output
456
457 def test_commit_not_found_json_error_shape(self, repo: pathlib.Path) -> None:
458 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None):
459 result = runner.invoke(cli, ["coord", "shard", "--json"])
460 assert result.exit_code == 1
461 data = json.loads(result.output.strip())
462 assert "error" in data
463 assert data["status"] == "commit_not_found"
464
465 def test_commit_not_found_text_uses_tick_prefix(self, repo: pathlib.Path) -> None:
466 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None):
467 result = runner.invoke(cli, ["coord", "shard"])
468 assert result.exit_code == 1
469 assert "❌" in result.output
470
471 def test_agents_validation_fires_before_repo_lookup(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
472 """Invalid --agents exits before trying to open .muse/ (no repo needed)."""
473 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) # no .muse dir
474 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
475 assert result.exit_code == 1
476 # Must not say "Repository not found"
477 assert "repository" not in result.output.lower()
478
479
480 # ── JSON schema: new fields ───────────────────────────────────────────────────
481
482
483 class TestShardJsonSchema:
484 def _base_invoke(self, repo: pathlib.Path, agents: str = "2", extra: list[str] | None = None) -> tuple[InvokeResult, MagicMock]:
485 commit = _make_commit_stub("abcdef1234567890abcdef1234567890")
486 sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}}
487 manifest = {"src/a.py": "oid1", "src/b.py": "oid2"}
488 args = ["coord", "shard", "--agents", agents, "--json"]
489 if extra:
490 args.extend(extra)
491 with (
492 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
493 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
494 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
495 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
496 ):
497 return runner.invoke(cli, args), commit
498
499 def test_full_commit_id_present_and_full(self, repo: pathlib.Path) -> None:
500 result, commit = self._base_invoke(repo)
501 assert result.exit_code == 0
502 data = json.loads(result.output.strip())
503 assert "full_commit_id" in data
504 assert data["full_commit_id"] == commit.commit_id
505 assert len(data["full_commit_id"]) > 8
506
507 def test_commit_short_is_8_chars(self, repo: pathlib.Path) -> None:
508 result, commit = self._base_invoke(repo)
509 data = json.loads(result.output.strip())
510 assert data["commit"] == commit.commit_id[:8]
511
512 def test_total_files_correct(self, repo: pathlib.Path) -> None:
513 result, _ = self._base_invoke(repo)
514 data = json.loads(result.output.strip())
515 assert data["total_files"] == 2
516
517 def test_total_symbols_correct(self, repo: pathlib.Path) -> None:
518 result, _ = self._base_invoke(repo)
519 data = json.loads(result.output.strip())
520 # sym_map has 1 symbol per file × 2 files
521 assert data["total_symbols"] == 2
522
523 def test_elapsed_seconds_present_and_non_negative(self, repo: pathlib.Path) -> None:
524 result, _ = self._base_invoke(repo)
525 data = json.loads(result.output.strip())
526 assert "elapsed_seconds" in data
527 assert isinstance(data["elapsed_seconds"], float)
528 assert data["elapsed_seconds"] >= 0
529
530 def test_json_is_single_line(self, repo: pathlib.Path) -> None:
531 result, _ = self._base_invoke(repo)
532 lines = [ln for ln in result.output.splitlines() if ln.strip()]
533 assert len(lines) == 1, f"JSON output must be one line, got {len(lines)}"
534
535 def test_all_schema_fields_present(self, repo: pathlib.Path) -> None:
536 result, _ = self._base_invoke(repo)
537 data = json.loads(result.output.strip())
538 required = {
539 "schema_version", "commit", "full_commit_id", "agents",
540 "shards_created", "total_files", "total_symbols",
541 "cross_shard_edges", "shards", "elapsed_seconds",
542 }
543 missing = required - data.keys()
544 assert not missing, f"Missing JSON fields: {missing}"
545
546 def test_no_files_case_emits_valid_schema(self, repo: pathlib.Path) -> None:
547 """Empty manifest → shards=[], total_files=0, still valid JSON schema."""
548 commit = _make_commit_stub("abcdef1234567890abcdef1234567890")
549 with (
550 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
551 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
552 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
553 ):
554 result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"])
555 assert result.exit_code == 0
556 data = json.loads(result.output.strip())
557 assert data["shards"] == []
558 assert data["total_files"] == 0
559 assert data["total_symbols"] == 0
560 assert "elapsed_seconds" in data
561
562 def test_cross_shard_edges_zero_when_isolated(self, repo: pathlib.Path) -> None:
563 commit = _make_commit_stub()
564 sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}}
565 manifest = {"src/a.py": "oid1", "src/b.py": "oid2"}
566 with (
567 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
568 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
569 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
570 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
571 ):
572 result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
573 data = json.loads(result.output.strip())
574 assert data["cross_shard_edges"] == 0
575
576 def test_shards_created_capped_at_components(self, repo: pathlib.Path) -> None:
577 """shards_created = min(agents, components) — can't exceed file count."""
578 commit = _make_commit_stub()
579 sym_map = {"src/a.py": {"f": {}}} # 1 file → 1 component
580 manifest = {"src/a.py": "oid1"}
581 with (
582 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
583 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
584 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
585 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
586 ):
587 result = runner.invoke(cli, ["coord", "shard", "--agents", "10", "--json"])
588 data = json.loads(result.output.strip())
589 assert data["shards_created"] == 1
590
591
592 # ── Unit: _connected_components edge cases ────────────────────────────────────
593
594
595 class TestConnectedComponentsExtra:
596 def test_cycle_resolved_as_single_component(self) -> None:
597 """A → B → C → A cycle must yield one component of 3 files."""
598 from muse.cli.commands.shard import _connected_components
599 files = ["a.py", "b.py", "c.py"]
600 edges = [("a.py", "b.py"), ("b.py", "c.py"), ("c.py", "a.py")]
601 result = _connected_components(files, edges)
602 assert len(result) == 1
603 assert result[0] == frozenset({"a.py", "b.py", "c.py"})
604
605 def test_star_topology(self) -> None:
606 """Hub → 4 spokes: all connected, 1 component."""
607 from muse.cli.commands.shard import _connected_components
608 files = ["hub.py", "s1.py", "s2.py", "s3.py", "s4.py"]
609 edges = [(f"s{i}.py", "hub.py") for i in range(1, 5)]
610 result = _connected_components(files, edges)
611 assert len(result) == 1
612
613 def test_self_loop_ignored(self) -> None:
614 """A file importing itself produces no cross-edge (target == file_path guard)."""
615 from muse.cli.commands.shard import _connected_components
616 result = _connected_components(["a.py"], [("a.py", "a.py")])
617 assert len(result) == 1
618
619 def test_extra_edge_node_not_in_files_ignored(self) -> None:
620 """An edge referencing a file not in the files list should not crash."""
621 from muse.cli.commands.shard import _connected_components
622 # "ghost.py" is in the edge but not in files — adj.setdefault handles it
623 result = _connected_components(["a.py"], [("a.py", "ghost.py")])
624 # a.py is still returned as its own component
625 assert any("a.py" in c for c in result)
626
627
628 # ── Unit: _greedy_partition extra ────────────────────────────────────────────
629
630
631 class TestGreedyPartitionExtra:
632 def test_all_files_accounted_for(self) -> None:
633 """Every file in input components appears in exactly one shard."""
634 from muse.cli.commands.shard import _greedy_partition
635 N = 20
636 comps = [frozenset({f"f{i}.py"}) for i in range(N)]
637 sym_counts = {f"f{i}.py": i + 1 for i in range(N)}
638 shards = _greedy_partition(comps, sym_counts, n_shards=4)
639 all_files = set()
640 for s in shards:
641 assert not (all_files & s), "File appears in more than one shard"
642 all_files |= s
643 expected = {f"f{i}.py" for i in range(N)}
644 assert all_files == expected
645
646 def test_symbol_count_weighting(self) -> None:
647 """Largest component goes to the first shard (LPT first step)."""
648 from muse.cli.commands.shard import _greedy_partition
649 big = frozenset({"big.py"})
650 smalls = [frozenset({f"s{i}.py"}) for i in range(3)]
651 sym_counts = {"big.py": 100, "s0.py": 1, "s1.py": 1, "s2.py": 1}
652 shards = _greedy_partition([big] + smalls, sym_counts, n_shards=2)
653 # big.py is in one shard by itself (100 >> 3*1)
654 big_shard = next(s for s in shards if "big.py" in s)
655 assert big_shard == frozenset({"big.py"})
656
657 def test_single_large_component_into_many_shards(self) -> None:
658 """One big component split into 4 shards — all files in first shard."""
659 from muse.cli.commands.shard import _greedy_partition
660 comp = frozenset({"a.py", "b.py", "c.py"})
661 sym_counts = {"a.py": 10, "b.py": 5, "c.py": 3}
662 shards = _greedy_partition([comp], sym_counts, n_shards=4)
663 non_empty = [s for s in shards if s]
664 assert len(non_empty) == 1
665 assert non_empty[0] == comp
666
667
668 # ── Stress tests ──────────────────────────────────────────────────────────────
669
670
671 class TestShardStressExtra:
672 def test_500_isolated_files_16_shards_under_2s(self, repo: pathlib.Path) -> None:
673 """500 isolated files partitioned into 16 shards in < 2 s."""
674 N = 500
675 commit = _make_commit_stub()
676 sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)}
677 manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)}
678 with (
679 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
680 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
681 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
682 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
683 ):
684 t0 = time.monotonic()
685 result = runner.invoke(cli, ["coord", "shard", "--agents", "16", "--json"])
686 elapsed = time.monotonic() - t0
687 assert result.exit_code == 0
688 assert elapsed < 2.0, f"500 isolated files took {elapsed:.2f}s"
689 data = json.loads(result.output.strip())
690 assert data["total_files"] == N
691 assert data["total_symbols"] == N # 1 sym per file
692
693 def test_200_chain_files_4_shards_cross_edges_correct(self, repo: pathlib.Path) -> None:
694 """Chain graph: mod0→mod1→…→mod199, 4 shards — cross_shard_edges is exact."""
695 N = 200
696 commit = _make_commit_stub()
697 sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)}
698 manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)}
699 # A chain: each file imports the next
700 edges = [(f"src/mod{i}.py", f"src/mod{i+1}.py") for i in range(N - 1)]
701 with (
702 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
703 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
704 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
705 patch("muse.cli.commands.shard._build_import_edges", return_value=edges),
706 ):
707 result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"])
708 assert result.exit_code == 0
709 data = json.loads(result.output.strip())
710 # Chain is one big component → 1 shard, no cross-shard edges
711 assert data["shards_created"] == 1
712 assert data["cross_shard_edges"] == 0
713
714 def test_json_compact_with_500_shards(self, repo: pathlib.Path) -> None:
715 """Even with many shards, JSON output is a single compact line."""
716 N = 500
717 commit = _make_commit_stub()
718 sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)}
719 manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)}
720 with (
721 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
722 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
723 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
724 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
725 ):
726 result = runner.invoke(cli, ["coord", "shard", "--agents", "256", "--json"])
727 assert result.exit_code == 0
728 lines = [ln for ln in result.output.splitlines() if ln.strip()]
729 assert len(lines) == 1
730
731
732 # ── E2E tests ─────────────────────────────────────────────────────────────────
733
734
735 class TestShardE2E:
736 def test_single_file_one_shard_zero_edges(self, repo: pathlib.Path) -> None:
737 commit = _make_commit_stub()
738 sym_map = {"src/only.py": {"fn": {}}}
739 manifest = {"src/only.py": "oid1"}
740 with (
741 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
742 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
743 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
744 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
745 ):
746 result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"])
747 assert result.exit_code == 0
748 data = json.loads(result.output.strip())
749 assert data["shards_created"] == 1
750 assert data["cross_shard_edges"] == 0
751 assert data["total_files"] == 1
752
753 def test_two_disconnected_clusters_zero_cross_edges(self, repo: pathlib.Path) -> None:
754 """Two disconnected clusters into 2 shards → 0 cross-shard edges."""
755 commit = _make_commit_stub()
756 sym_map = {
757 "src/a.py": {"fa": {}}, "src/b.py": {"fb": {}}, # cluster 1
758 "src/c.py": {"fc": {}}, "src/d.py": {"fd": {}}, # cluster 2
759 }
760 manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)}
761 edges = [("src/a.py", "src/b.py"), ("src/c.py", "src/d.py")]
762 with (
763 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
764 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
765 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
766 patch("muse.cli.commands.shard._build_import_edges", return_value=edges),
767 ):
768 result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
769 assert result.exit_code == 0
770 data = json.loads(result.output.strip())
771 assert data["shards_created"] == 2
772 assert data["cross_shard_edges"] == 0
773
774 def test_connected_pair_forced_into_two_shards_has_edges(self, repo: pathlib.Path) -> None:
775 """A→B with agents=2 forces a cross-shard edge (both in same component)."""
776 commit = _make_commit_stub()
777 sym_map = {
778 "src/a.py": {"fa": {}}, "src/b.py": {"fb": {}},
779 "src/c.py": {"fc": {}}, # third file so components>1 is possible
780 }
781 manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)}
782 # a→b are connected (1 component), c is isolated (1 component)
783 # agents=2 → 2 shards; a+b are same component → same shard → 0 cross edges
784 edges = [("src/a.py", "src/b.py")]
785 with (
786 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
787 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
788 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
789 patch("muse.cli.commands.shard._build_import_edges", return_value=edges),
790 ):
791 result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
792 assert result.exit_code == 0
793 data = json.loads(result.output.strip())
794 # a+b are in the same component, never split → 0 cross-shard edges
795 assert data["cross_shard_edges"] == 0
796
797 def test_text_output_shows_elapsed(self, repo: pathlib.Path) -> None:
798 commit = _make_commit_stub("cafebabe00000000")
799 sym_map = {"src/x.py": {"f": {}}}
800 manifest = {"src/x.py": "oid1"}
801 with (
802 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
803 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
804 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
805 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
806 ):
807 result = runner.invoke(cli, ["coord", "shard", "--agents", "1"])
808 assert result.exit_code == 0
809 assert "s)" in result.output
810
811 def test_text_output_perfect_isolation_message(self, repo: pathlib.Path) -> None:
812 """When cross_shard_edges == 0, text output says 'Perfect isolation'."""
813 commit = _make_commit_stub()
814 sym_map = {"src/a.py": {"f": {}}}
815 manifest = {"src/a.py": "oid1"}
816 with (
817 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
818 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
819 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
820 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
821 ):
822 result = runner.invoke(cli, ["coord", "shard", "--agents", "1"])
823 assert result.exit_code == 0
824 assert "Perfect isolation" in result.output
825
826 def test_symbol_count_sum_equals_total_symbols(self, repo: pathlib.Path) -> None:
827 """Sum of shard symbol_counts must equal total_symbols in JSON."""
828 commit = _make_commit_stub()
829 sym_map = {f"src/f{i}.py": {f"fn{j}": {} for j in range(i + 1)} for i in range(5)}
830 manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)}
831 with (
832 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
833 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
834 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
835 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
836 ):
837 result = runner.invoke(cli, ["coord", "shard", "--agents", "3", "--json"])
838 assert result.exit_code == 0
839 data = json.loads(result.output.strip())
840 assert sum(s["symbol_count"] for s in data["shards"]) == data["total_symbols"]
841
842
843 # ── Stress ────────────────────────────────────────────────────────────────────
844
845
846 class TestShardStress:
847 def test_100_files_8_shards_under_2s(self, repo: pathlib.Path) -> None:
848 n_files = 100
849 commit = _make_commit_stub()
850 sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}, f"cls_{i}": {}} for i in range(n_files)}
851 manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)}
852 edges = [(f"src/mod{i}.py", f"src/mod{i+1}.py") for i in range(0, n_files - 1, 5)]
853
854 with (
855 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
856 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
857 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
858 patch("muse.cli.commands.shard._build_import_edges", return_value=edges),
859 ):
860 t0 = time.monotonic()
861 result = runner.invoke(cli, ["coord", "shard", "--agents", "8", "--json"])
862 elapsed = time.monotonic() - t0
863
864 assert result.exit_code == 0
865 assert elapsed < 2.0
866 data = json.loads(result.output.strip())
867 assert data["shards_created"] <= 8
868 assert sum(s["symbol_count"] for s in data["shards"]) == n_files * 2
File History 1 commit
sha256:1c4b3e3a9a1f300774c3ee662b572a698d5fd405bf765a71e6011a2e9c3eaaaa feat: Muse — version control for the agent era Human 73 days ago