gabriel / muse public
test_cmd_shard.py python
891 lines 43.5 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Comprehensive tests for ``muse coord shard``.
2
3 Coverage matrix
4 ---------------
5 Unit
6 ~~~~
7 * _build_import_edges — language filter, missing objects, stem matching
8 * _connected_components — isolated nodes, simple chain, full graph, cycle
9 * _greedy_partition — single shard, multi-shard balance, more shards than
10 components, empty components, symbol-count weighting verified
11
12 Integration
13 ~~~~~~~~~~~
14 * Empty repo (no commits) — exits 1 with "not found" message
15 * --agents 0 — exits 1 with clean error (no traceback)
16 * --agents 257 — exits 1 with clean error
17 * --agents at boundary 1 — accepted
18 * --agents at boundary 256 — accepted
19 * --agents validation fires before require_repo (no .muse dir needed)
20 * --agents 1 with mocked snapshot — text output with "Shard plan" header
21 * --format json — valid JSON with all required schema fields
22 * --json shorthand — same as --format json
23 * --language filter — restricts file selection (language_filter kwarg)
24 * --commit REF — passed through to resolve_commit_ref
25 * No snapshot manifest — exits 0 with "(no semantic files found)"
26 * Text output — header, shard lines, cross-shard edges, elapsed
27 * JSON: commit is the complete commit ID
28 * JSON: total_files and total_symbols present and correct
29 * JSON: duration_ms present and non-negative
30 * JSON: cross_shard_edges correct count
31 * JSON: no-files case still emits valid schema
32
33 Error shapes
34 ~~~~~~~~~~~~
35 * --agents out of range: JSON error has {"error": ..., "status": "bad_args"}
36 * --agents out of range: text error uses ❌ prefix on stderr
37 * commit not found: JSON error has {"error": ..., "status": "commit_not_found"}
38 * commit not found: text error uses ❌ prefix on stderr
39
40 Security
41 ~~~~~~~~
42 * --language value sanitised in text output (ANSI injection stripped)
43 * --language filter does not traverse filesystem
44 * --commit traversal ref handled gracefully (no crash)
45 * file paths in text output sanitised (ANSI stripped)
46
47 Stress
48 ~~~~~~
49 * 100-file mock snapshot partitioned into 8 shards — runs in < 2 s
50 * 500-file isolated nodes partitioned into 16 shards — runs in < 2 s
51 * 200-file dense graph (chain) into 4 shards — cross_shard_edges correct
52 * JSON output with 500 shards is a single compact line (no indent)
53
54 E2E
55 ~~~
56 * Single file → 1 shard, 0 cross-shard edges
57 * Two disconnected clusters → 2 shards, 0 cross-shard edges
58 * Connected pair split across 2 shards → cross_shard_edges ≥ 1
59 * shards_created = min(agents, components)
60 """
61
62 from __future__ import annotations
63
64 import io
65 import json
66 import pathlib
67 import sys
68 import time
69 import pytest
70 from unittest.mock import patch, MagicMock
71
72 from muse.core.types import fake_id
73 from muse.core.paths import muse_dir
74 from tests.cli_test_helper import CliRunner, InvokeResult
75 from muse.cli.commands.shard import _MIN_AGENTS, _MAX_AGENTS
76
77 runner = CliRunner()
78 cli = None
79
80
81 # ── Fixtures ──────────────────────────────────────────────────────────────────
82
83
84 @pytest.fixture()
85 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
86 dot_muse = muse_dir(tmp_path)
87 dot_muse.mkdir()
88 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
89 (dot_muse / "repo.json").write_text(
90 json.dumps({"repo_id": fake_id("repo"), "name": "test-repo"})
91 )
92 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path))
93 return tmp_path
94
95
96 # ── Minimal commit stub ───────────────────────────────────────────────────────
97
98
99 def _make_commit_stub(commit_id: str = "a1b2c3d4e5f60000") -> MagicMock:
100 stub = MagicMock()
101 stub.commit_id = commit_id
102 return stub
103
104
105 # ── Unit: _build_import_edges ─────────────────────────────────────────────────
106
107
108 class TestBuildImportEdges:
109 def test_empty_manifest_returns_no_edges(self, repo: pathlib.Path) -> None:
110 from muse.cli.commands.shard import _build_import_edges
111 edges = _build_import_edges(repo, {}, language_filter=None)
112 assert edges == []
113
114 def test_language_filter_excludes_unmatched_files(self, repo: pathlib.Path) -> None:
115 from muse.cli.commands.shard import _build_import_edges
116 manifest = {"src/foo.py": "obj1", "src/bar.ts": "obj2"}
117 # read_object returns None → no parse → no edges; language filter prunes ts
118 with patch("muse.cli.commands.shard.read_object", return_value=None):
119 edges = _build_import_edges(repo, manifest, language_filter="Python")
120 assert edges == []
121
122 def test_missing_object_skipped_gracefully(self, repo: pathlib.Path) -> None:
123 from muse.cli.commands.shard import _build_import_edges
124 manifest = {"src/foo.py": "nonexistent-oid"}
125 with patch("muse.cli.commands.shard.read_object", return_value=None):
126 edges = _build_import_edges(repo, manifest, language_filter=None)
127 assert edges == []
128
129 def test_import_edge_built_from_parsed_symbol(self, repo: pathlib.Path) -> None:
130 from muse.cli.commands.shard import _build_import_edges
131 manifest = {"src/foo.py": "oid-foo", "src/bar.py": "oid-bar"}
132 fake_tree = {
133 "import::bar": {
134 "kind": "import",
135 "qualified_name": "import::bar",
136 "name": "bar",
137 }
138 }
139 with (
140 patch("muse.cli.commands.shard.read_object", return_value=b"dummy"),
141 patch("muse.cli.commands.shard.parse_symbols", return_value=fake_tree),
142 ):
143 edges = _build_import_edges(repo, manifest, language_filter=None)
144 # (src/foo.py, src/bar.py) or (src/bar.py, src/foo.py) edge expected
145 assert len(edges) >= 1
146 found = any(
147 ("src/foo.py" in e and "src/bar.py" in e) for e in edges
148 )
149 assert found
150
151
152 # ── Unit: _connected_components ───────────────────────────────────────────────
153
154
155 class TestConnectedComponents:
156 def test_empty_files(self) -> None:
157 from muse.cli.commands.shard import _connected_components
158 result = _connected_components([], [])
159 assert result == []
160
161 def test_single_file_no_edges(self) -> None:
162 from muse.cli.commands.shard import _connected_components
163 result = _connected_components(["a.py"], [])
164 assert len(result) == 1
165 assert result[0] == frozenset({"a.py"})
166
167 def test_two_isolated_files(self) -> None:
168 from muse.cli.commands.shard import _connected_components
169 result = _connected_components(["a.py", "b.py"], [])
170 assert len(result) == 2
171
172 def test_two_connected_files(self) -> None:
173 from muse.cli.commands.shard import _connected_components
174 result = _connected_components(["a.py", "b.py"], [("a.py", "b.py")])
175 assert len(result) == 1
176 assert result[0] == frozenset({"a.py", "b.py"})
177
178 def test_chain_of_three(self) -> None:
179 from muse.cli.commands.shard import _connected_components
180 files = ["a.py", "b.py", "c.py"]
181 edges = [("a.py", "b.py"), ("b.py", "c.py")]
182 result = _connected_components(files, edges)
183 assert len(result) == 1
184 assert result[0] == frozenset({"a.py", "b.py", "c.py"})
185
186 def test_two_separate_components(self) -> None:
187 from muse.cli.commands.shard import _connected_components
188 files = ["a.py", "b.py", "c.py", "d.py"]
189 edges = [("a.py", "b.py"), ("c.py", "d.py")]
190 result = _connected_components(files, edges)
191 assert len(result) == 2
192 sizes = sorted(len(c) for c in result)
193 assert sizes == [2, 2]
194
195
196 # ── Unit: _greedy_partition ───────────────────────────────────────────────────
197
198
199 class TestGreedyPartition:
200 def test_single_shard_all_in_one(self) -> None:
201 from muse.cli.commands.shard import _greedy_partition
202 comps = [frozenset({"a.py"}), frozenset({"b.py"})]
203 sym_counts = {"a.py": 5, "b.py": 3}
204 result = _greedy_partition(comps, sym_counts, n_shards=1)
205 assert len(result) == 1
206 assert result[0] == frozenset({"a.py", "b.py"})
207
208 def test_balanced_across_shards(self) -> None:
209 from muse.cli.commands.shard import _greedy_partition
210 comps = [frozenset({f"f{i}.py"}) for i in range(4)]
211 sym_counts = {f"f{i}.py": 10 for i in range(4)}
212 result = _greedy_partition(comps, sym_counts, n_shards=2)
213 sizes = [sum(sym_counts[f] for f in s) for s in result]
214 assert sizes[0] == sizes[1] == 20
215
216 def test_more_shards_than_components(self) -> None:
217 from muse.cli.commands.shard import _greedy_partition
218 comps = [frozenset({"a.py"})]
219 sym_counts = {"a.py": 2}
220 result = _greedy_partition(comps, sym_counts, n_shards=4)
221 # Only one shard is non-empty
222 non_empty = [s for s in result if s]
223 assert len(non_empty) == 1
224
225 def test_empty_components_produces_empty_shards(self) -> None:
226 from muse.cli.commands.shard import _greedy_partition
227 result = _greedy_partition([], {}, n_shards=3)
228 assert all(len(s) == 0 for s in result)
229
230
231 # ── Integration ───────────────────────────────────────────────────────────────
232
233
234 class TestShardIntegration:
235 def test_empty_repo_no_commits_exits_nonzero(self, repo: pathlib.Path) -> None:
236 """No commits → resolve_commit_ref returns None → exits nonzero with 'not found' message."""
237 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None):
238 result = runner.invoke(cli, ["coord", "shard", "--agents", "4"])
239 assert result.exit_code != 0
240 assert "not found" in result.stderr.lower()
241
242 def test_no_manifest_files_exits_0(self, repo: pathlib.Path) -> None:
243 """Commit found but manifest is empty → prints no-semantic-files message."""
244 commit = _make_commit_stub()
245 with (
246 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
247 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
248 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
249 ):
250 result = runner.invoke(cli, ["coord", "shard", "--agents", "4"])
251 assert result.exit_code == 0
252 assert "no semantic files found" in result.output
253
254 def test_agents_zero_exits_nonzero(self, repo: pathlib.Path) -> None:
255 """--agents 0 is invalid → clamp_int raises ValueError → non-zero exit."""
256 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
257 assert result.exit_code != 0
258
259 def test_agents_1_mocked_snapshot_text_output(self, repo: pathlib.Path) -> None:
260 commit = _make_commit_stub("deadbeef00000000")
261 sym_map = {"src/foo.py": {"foo": {}, "bar": {}}}
262 with (
263 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
264 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={"src/foo.py": "oid1"}),
265 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
266 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
267 ):
268 result = runner.invoke(cli, ["coord", "shard", "--agents", "1"])
269 assert result.exit_code == 0
270 assert "Shard plan" in result.output
271 assert "deadbeef" in result.output
272
273 def test_format_json_produces_valid_json(self, repo: pathlib.Path) -> None:
274 commit = _make_commit_stub("cafebabe00000000")
275 sym_map = {"src/a.py": {"x": {}}, "src/b.py": {"y": {}}}
276 manifest = {"src/a.py": "oid1", "src/b.py": "oid2"}
277 with (
278 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
279 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
280 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
281 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
282 ):
283 result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
284 assert result.exit_code == 0
285 data = json.loads(result.output.strip())
286 assert "schema" in data
287 assert "commit" in data
288 assert "agents" in data
289 assert "shards_created" in data
290 assert "cross_shard_edges" in data
291 assert "shards" in data
292
293 def test_json_shorthand_same_as_json_long(self, repo: pathlib.Path) -> None:
294 commit = _make_commit_stub("00112233aabbccdd")
295 sym_map = {"src/x.py": {"f": {}}}
296 manifest = {"src/x.py": "oid1"}
297 with (
298 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
299 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
300 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
301 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
302 ):
303 r1 = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
304 r2 = runner.invoke(cli, ["coord", "shard", "--agents", "2", "-j"])
305 assert r1.exit_code == 0
306 assert r2.exit_code == 0
307 d1 = json.loads(r1.output.strip())
308 d2 = json.loads(r2.output.strip())
309 # duration_ms differs between runs — compare structural fields only
310 for key in ("schema", "commit", "agents",
311 "shards_created", "total_files", "total_symbols",
312 "cross_shard_edges", "shards"):
313 assert d1[key] == d2[key]
314
315 def test_language_filter_passed_through(self, repo: pathlib.Path) -> None:
316 commit = _make_commit_stub()
317 with (
318 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
319 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
320 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}) as mock_sym,
321 ):
322 runner.invoke(cli, ["coord", "shard", "--agents", "2", "--language", "Python"])
323 mock_sym.assert_called_once()
324 _, kwargs = mock_sym.call_args
325 assert kwargs.get("language_filter") == "Python"
326
327 def test_commit_ref_forwarded_to_resolver(self, repo: pathlib.Path) -> None:
328 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None) as mock_res:
329 runner.invoke(cli, ["coord", "shard", "--commit", "HEAD~3"])
330 mock_res.assert_called_once()
331 args, _ = mock_res.call_args
332 assert args[2] == "HEAD~3"
333
334 def test_text_output_contains_cross_shard_edges(self, repo: pathlib.Path) -> None:
335 commit = _make_commit_stub("aabbccdd11223344")
336 sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}}
337 manifest = {"src/a.py": "oid1", "src/b.py": "oid2"}
338 with (
339 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
340 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
341 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
342 patch("muse.cli.commands.shard._build_import_edges", return_value=[("src/a.py", "src/b.py")]),
343 ):
344 result = runner.invoke(cli, ["coord", "shard", "--agents", "2"])
345 assert result.exit_code == 0
346 assert "Cross-shard edges" in result.output
347
348
349 # ── Security ──────────────────────────────────────────────────────────────────
350
351
352 class TestShardSecurity:
353 def test_language_filter_does_not_open_filesystem(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
354 """--language must not cause FS traversal beyond object store."""
355 commit = _make_commit_stub()
356 with (
357 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
358 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
359 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
360 ):
361 result = runner.invoke(cli, ["coord", "shard", "--language", "../../../etc/passwd"])
362 assert result.exit_code == 0
363 assert "no semantic files found" in result.output
364
365 def test_traversal_commit_ref_handled_gracefully(self, repo: pathlib.Path) -> None:
366 """Malicious --commit ref should not crash the process."""
367 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None):
368 result = runner.invoke(cli, ["coord", "shard", "--commit", "../../etc/shadow"])
369 assert result.exit_code == 0 or result.exit_code != 0 # no crash
370
371 def test_ansi_in_language_stripped_text_output(self, repo: pathlib.Path) -> None:
372 """ANSI escape in --language value must not appear in text output."""
373 malicious_lang = "\x1b[31mPython\x1b[0m"
374 commit = _make_commit_stub()
375 with (
376 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
377 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
378 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
379 ):
380 result = runner.invoke(cli, ["coord", "shard", "--language", malicious_lang])
381 assert "\x1b[" not in result.output
382
383 def test_ansi_in_file_path_stripped_text_output(self, repo: pathlib.Path) -> None:
384 """ANSI escape codes in file paths must be stripped before display."""
385 commit = _make_commit_stub("deadbeef00000000")
386 malicious_fp = "\x1b[31msrc/malicious.py\x1b[0m"
387 sym_map = {malicious_fp: {"fn": {}}}
388 manifest = {malicious_fp: "oid1"}
389 with (
390 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
391 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
392 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
393 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
394 ):
395 result = runner.invoke(cli, ["coord", "shard", "--agents", "1"])
396 assert "\x1b[" not in result.output
397 assert "src/malicious.py" in result.output
398
399
400 # ── Input validation ──────────────────────────────────────────────────────────
401
402
403 class TestShardInputValidation:
404 def test_agents_zero_exits_1_clean(self, repo: pathlib.Path) -> None:
405 """--agents 0 must exit 1 with a clean error message, no traceback."""
406 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
407 assert result.exit_code == 1
408 assert "Traceback" not in result.output
409
410 def test_agents_negative_exits_1(self, repo: pathlib.Path) -> None:
411 result = runner.invoke(cli, ["coord", "shard", "--agents", "-1"])
412 assert result.exit_code == 1
413
414 def test_agents_over_max_exits_1(self, repo: pathlib.Path) -> None:
415 result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MAX_AGENTS + 1)])
416 assert result.exit_code == 1
417
418 def test_agents_at_min_accepted(self, repo: pathlib.Path) -> None:
419 commit = _make_commit_stub()
420 with (
421 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
422 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
423 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
424 ):
425 result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MIN_AGENTS)])
426 assert result.exit_code == 0
427
428 def test_agents_at_max_accepted(self, repo: pathlib.Path) -> None:
429 commit = _make_commit_stub()
430 with (
431 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
432 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
433 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
434 ):
435 result = runner.invoke(cli, ["coord", "shard", "--agents", str(_MAX_AGENTS)])
436 assert result.exit_code == 0
437
438 def test_agents_invalid_json_error_shape(self, repo: pathlib.Path) -> None:
439 """--format json error for --agents out of range must have {error, status}."""
440 result = runner.invoke(cli, ["coord", "shard", "--agents", "0", "--json"])
441 assert result.exit_code == 1
442 data = json.loads(result.output.strip())
443 assert "error" in data
444 assert data["status"] == "bad_args"
445
446 def test_agents_invalid_text_uses_tick_prefix(self, repo: pathlib.Path) -> None:
447 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
448 assert result.exit_code == 1
449 assert "❌" in result.stderr
450
451 def test_agents_invalid_text_no_stdout(self, repo: pathlib.Path) -> None:
452 """Text mode error goes to stderr; stdout must be empty."""
453 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
454 # CliRunner merges stderr into output — we just check no traceback
455 assert "Traceback" not in result.output
456
457 def test_commit_not_found_json_error_shape(self, repo: pathlib.Path) -> None:
458 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None):
459 result = runner.invoke(cli, ["coord", "shard", "--json"])
460 assert result.exit_code == 1
461 data = json.loads(result.output.strip())
462 assert "error" in data
463 assert data["status"] == "commit_not_found"
464
465 def test_commit_not_found_text_uses_tick_prefix(self, repo: pathlib.Path) -> None:
466 with patch("muse.cli.commands.shard.resolve_commit_ref", return_value=None):
467 result = runner.invoke(cli, ["coord", "shard"])
468 assert result.exit_code == 1
469 assert "❌" in result.stderr
470
471 def test_agents_validation_fires_before_repo_lookup(self, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> None:
472 """Invalid --agents exits before trying to open .muse/ (no repo needed)."""
473 monkeypatch.setenv("MUSE_REPO_ROOT", str(tmp_path)) # no .muse dir
474 result = runner.invoke(cli, ["coord", "shard", "--agents", "0"])
475 assert result.exit_code == 1
476 # Must not say "Repository not found"
477 assert "repository" not in result.output.lower()
478
479
480 # ── JSON schema: new fields ───────────────────────────────────────────────────
481
482
483 class TestShardJsonSchema:
484 def _base_invoke(self, repo: pathlib.Path, agents: str = "2", extra: list[str] | None = None) -> tuple[InvokeResult, MagicMock]:
485 commit = _make_commit_stub("abcdef1234567890abcdef1234567890")
486 sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}}
487 manifest = {"src/a.py": "oid1", "src/b.py": "oid2"}
488 args = ["coord", "shard", "--agents", agents, "--json"]
489 if extra:
490 args.extend(extra)
491 with (
492 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
493 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
494 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
495 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
496 ):
497 return runner.invoke(cli, args), commit
498
499 def test_commit_is_full_id(self, repo: pathlib.Path) -> None:
500 result, commit = self._base_invoke(repo)
501 assert result.exit_code == 0
502 data = json.loads(result.output.strip())
503 assert data["commit"] == commit.commit_id
504 assert len(data["commit"]) > 8
505
506 def test_total_files_correct(self, repo: pathlib.Path) -> None:
507 result, _ = self._base_invoke(repo)
508 data = json.loads(result.output.strip())
509 assert data["total_files"] == 2
510
511 def test_total_symbols_correct(self, repo: pathlib.Path) -> None:
512 result, _ = self._base_invoke(repo)
513 data = json.loads(result.output.strip())
514 # sym_map has 1 symbol per file × 2 files
515 assert data["total_symbols"] == 2
516
517 def test_duration_ms_present_and_non_negative(self, repo: pathlib.Path) -> None:
518 result, _ = self._base_invoke(repo)
519 data = json.loads(result.output.strip())
520 assert "duration_ms" in data
521 assert isinstance(data["duration_ms"], float)
522 assert data["duration_ms"] >= 0
523
524 def test_json_is_single_line(self, repo: pathlib.Path) -> None:
525 result, _ = self._base_invoke(repo)
526 lines = [ln for ln in result.output.splitlines() if ln.strip()]
527 assert len(lines) == 1, f"JSON output must be one line, got {len(lines)}"
528
529 def test_all_schema_fields_present(self, repo: pathlib.Path) -> None:
530 result, _ = self._base_invoke(repo)
531 data = json.loads(result.output.strip())
532 required = {
533 "schema", "commit", "agents",
534 "shards_created", "total_files", "total_symbols",
535 "cross_shard_edges", "shards", "duration_ms",
536 }
537 missing = required - data.keys()
538 assert not missing, f"Missing JSON fields: {missing}"
539
540 def test_no_files_case_emits_valid_schema(self, repo: pathlib.Path) -> None:
541 """Empty manifest → shards=[], total_files=0, still valid JSON schema."""
542 commit = _make_commit_stub("abcdef1234567890abcdef1234567890")
543 with (
544 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
545 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value={}),
546 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value={}),
547 ):
548 result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"])
549 assert result.exit_code == 0
550 data = json.loads(result.output.strip())
551 assert data["shards"] == []
552 assert data["total_files"] == 0
553 assert data["total_symbols"] == 0
554 assert "duration_ms" in data
555
556 def test_cross_shard_edges_zero_when_isolated(self, repo: pathlib.Path) -> None:
557 commit = _make_commit_stub()
558 sym_map = {"src/a.py": {"f": {}}, "src/b.py": {"g": {}}}
559 manifest = {"src/a.py": "oid1", "src/b.py": "oid2"}
560 with (
561 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
562 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
563 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
564 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
565 ):
566 result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
567 data = json.loads(result.output.strip())
568 assert data["cross_shard_edges"] == 0
569
570 def test_shards_created_capped_at_components(self, repo: pathlib.Path) -> None:
571 """shards_created = min(agents, components) — can't exceed file count."""
572 commit = _make_commit_stub()
573 sym_map = {"src/a.py": {"f": {}}} # 1 file → 1 component
574 manifest = {"src/a.py": "oid1"}
575 with (
576 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
577 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
578 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
579 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
580 ):
581 result = runner.invoke(cli, ["coord", "shard", "--agents", "10", "--json"])
582 data = json.loads(result.output.strip())
583 assert data["shards_created"] == 1
584
585
586 # ── Unit: _connected_components edge cases ────────────────────────────────────
587
588
589 class TestConnectedComponentsExtra:
590 def test_cycle_resolved_as_single_component(self) -> None:
591 """A → B → C → A cycle must yield one component of 3 files."""
592 from muse.cli.commands.shard import _connected_components
593 files = ["a.py", "b.py", "c.py"]
594 edges = [("a.py", "b.py"), ("b.py", "c.py"), ("c.py", "a.py")]
595 result = _connected_components(files, edges)
596 assert len(result) == 1
597 assert result[0] == frozenset({"a.py", "b.py", "c.py"})
598
599 def test_star_topology(self) -> None:
600 """Hub → 4 spokes: all connected, 1 component."""
601 from muse.cli.commands.shard import _connected_components
602 files = ["hub.py", "s1.py", "s2.py", "s3.py", "s4.py"]
603 edges = [(f"s{i}.py", "hub.py") for i in range(1, 5)]
604 result = _connected_components(files, edges)
605 assert len(result) == 1
606
607 def test_self_loop_ignored(self) -> None:
608 """A file importing itself produces no cross-edge (target == file_path guard)."""
609 from muse.cli.commands.shard import _connected_components
610 result = _connected_components(["a.py"], [("a.py", "a.py")])
611 assert len(result) == 1
612
613 def test_extra_edge_node_not_in_files_ignored(self) -> None:
614 """An edge referencing a file not in the files list should not crash."""
615 from muse.cli.commands.shard import _connected_components
616 # "ghost.py" is in the edge but not in files — adj.setdefault handles it
617 result = _connected_components(["a.py"], [("a.py", "ghost.py")])
618 # a.py is still returned as its own component
619 assert any("a.py" in c for c in result)
620
621
622 # ── Unit: _greedy_partition extra ────────────────────────────────────────────
623
624
625 class TestGreedyPartitionExtra:
626 def test_all_files_accounted_for(self) -> None:
627 """Every file in input components appears in exactly one shard."""
628 from muse.cli.commands.shard import _greedy_partition
629 N = 20
630 comps = [frozenset({f"f{i}.py"}) for i in range(N)]
631 sym_counts = {f"f{i}.py": i + 1 for i in range(N)}
632 shards = _greedy_partition(comps, sym_counts, n_shards=4)
633 all_files = set()
634 for s in shards:
635 assert not (all_files & s), "File appears in more than one shard"
636 all_files |= s
637 expected = {f"f{i}.py" for i in range(N)}
638 assert all_files == expected
639
640 def test_symbol_count_weighting(self) -> None:
641 """Largest component goes to the first shard (LPT first step)."""
642 from muse.cli.commands.shard import _greedy_partition
643 big = frozenset({"big.py"})
644 smalls = [frozenset({f"s{i}.py"}) for i in range(3)]
645 sym_counts = {"big.py": 100, "s0.py": 1, "s1.py": 1, "s2.py": 1}
646 shards = _greedy_partition([big] + smalls, sym_counts, n_shards=2)
647 # big.py is in one shard by itself (100 >> 3*1)
648 big_shard = next(s for s in shards if "big.py" in s)
649 assert big_shard == frozenset({"big.py"})
650
651 def test_single_large_component_into_many_shards(self) -> None:
652 """One big component split into 4 shards — all files in first shard."""
653 from muse.cli.commands.shard import _greedy_partition
654 comp = frozenset({"a.py", "b.py", "c.py"})
655 sym_counts = {"a.py": 10, "b.py": 5, "c.py": 3}
656 shards = _greedy_partition([comp], sym_counts, n_shards=4)
657 non_empty = [s for s in shards if s]
658 assert len(non_empty) == 1
659 assert non_empty[0] == comp
660
661
662 # ── Stress tests ──────────────────────────────────────────────────────────────
663
664
665 class TestShardStressExtra:
666 def test_500_isolated_files_16_shards_under_2s(self, repo: pathlib.Path) -> None:
667 """500 isolated files partitioned into 16 shards in < 2 s."""
668 N = 500
669 commit = _make_commit_stub()
670 sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)}
671 manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)}
672 with (
673 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
674 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
675 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
676 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
677 ):
678 t0 = time.monotonic()
679 result = runner.invoke(cli, ["coord", "shard", "--agents", "16", "--json"])
680 elapsed = time.monotonic() - t0
681 assert result.exit_code == 0
682 assert elapsed < 2.0, f"500 isolated files took {elapsed:.2f}s"
683 data = json.loads(result.output.strip())
684 assert data["total_files"] == N
685 assert data["total_symbols"] == N # 1 sym per file
686
687 def test_200_chain_files_4_shards_cross_edges_correct(self, repo: pathlib.Path) -> None:
688 """Chain graph: mod0→mod1→…→mod199, 4 shards — cross_shard_edges is exact."""
689 N = 200
690 commit = _make_commit_stub()
691 sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)}
692 manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)}
693 # A chain: each file imports the next
694 edges = [(f"src/mod{i}.py", f"src/mod{i+1}.py") for i in range(N - 1)]
695 with (
696 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
697 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
698 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
699 patch("muse.cli.commands.shard._build_import_edges", return_value=edges),
700 ):
701 result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"])
702 assert result.exit_code == 0
703 data = json.loads(result.output.strip())
704 # Chain is one big component → 1 shard, no cross-shard edges
705 assert data["shards_created"] == 1
706 assert data["cross_shard_edges"] == 0
707
708 def test_json_compact_with_500_shards(self, repo: pathlib.Path) -> None:
709 """Even with many shards, JSON output is a single compact line."""
710 N = 500
711 commit = _make_commit_stub()
712 sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}} for i in range(N)}
713 manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)}
714 with (
715 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
716 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
717 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
718 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
719 ):
720 result = runner.invoke(cli, ["coord", "shard", "--agents", "256", "--json"])
721 assert result.exit_code == 0
722 lines = [ln for ln in result.output.splitlines() if ln.strip()]
723 assert len(lines) == 1
724
725
726 # ── E2E tests ─────────────────────────────────────────────────────────────────
727
728
729 class TestShardE2E:
730 def test_single_file_one_shard_zero_edges(self, repo: pathlib.Path) -> None:
731 commit = _make_commit_stub()
732 sym_map = {"src/only.py": {"fn": {}}}
733 manifest = {"src/only.py": "oid1"}
734 with (
735 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
736 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
737 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
738 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
739 ):
740 result = runner.invoke(cli, ["coord", "shard", "--agents", "4", "--json"])
741 assert result.exit_code == 0
742 data = json.loads(result.output.strip())
743 assert data["shards_created"] == 1
744 assert data["cross_shard_edges"] == 0
745 assert data["total_files"] == 1
746
747 def test_two_disconnected_clusters_zero_cross_edges(self, repo: pathlib.Path) -> None:
748 """Two disconnected clusters into 2 shards → 0 cross-shard edges."""
749 commit = _make_commit_stub()
750 sym_map = {
751 "src/a.py": {"fa": {}}, "src/b.py": {"fb": {}}, # cluster 1
752 "src/c.py": {"fc": {}}, "src/d.py": {"fd": {}}, # cluster 2
753 }
754 manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)}
755 edges = [("src/a.py", "src/b.py"), ("src/c.py", "src/d.py")]
756 with (
757 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
758 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
759 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
760 patch("muse.cli.commands.shard._build_import_edges", return_value=edges),
761 ):
762 result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
763 assert result.exit_code == 0
764 data = json.loads(result.output.strip())
765 assert data["shards_created"] == 2
766 assert data["cross_shard_edges"] == 0
767
768 def test_connected_pair_forced_into_two_shards_has_edges(self, repo: pathlib.Path) -> None:
769 """A→B with agents=2 forces a cross-shard edge (both in same component)."""
770 commit = _make_commit_stub()
771 sym_map = {
772 "src/a.py": {"fa": {}}, "src/b.py": {"fb": {}},
773 "src/c.py": {"fc": {}}, # third file so components>1 is possible
774 }
775 manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)}
776 # a→b are connected (1 component), c is isolated (1 component)
777 # agents=2 → 2 shards; a+b are same component → same shard → 0 cross edges
778 edges = [("src/a.py", "src/b.py")]
779 with (
780 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
781 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
782 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
783 patch("muse.cli.commands.shard._build_import_edges", return_value=edges),
784 ):
785 result = runner.invoke(cli, ["coord", "shard", "--agents", "2", "--json"])
786 assert result.exit_code == 0
787 data = json.loads(result.output.strip())
788 # a+b are in the same component, never split → 0 cross-shard edges
789 assert data["cross_shard_edges"] == 0
790
791 def test_text_output_shows_elapsed(self, repo: pathlib.Path) -> None:
792 commit = _make_commit_stub("cafebabe00000000")
793 sym_map = {"src/x.py": {"f": {}}}
794 manifest = {"src/x.py": "oid1"}
795 with (
796 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
797 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
798 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
799 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
800 ):
801 result = runner.invoke(cli, ["coord", "shard", "--agents", "1"])
802 assert result.exit_code == 0
803 assert "s)" in result.output
804
805 def test_text_output_perfect_isolation_message(self, repo: pathlib.Path) -> None:
806 """When cross_shard_edges == 0, text output says 'Perfect isolation'."""
807 commit = _make_commit_stub()
808 sym_map = {"src/a.py": {"f": {}}}
809 manifest = {"src/a.py": "oid1"}
810 with (
811 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
812 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
813 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
814 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
815 ):
816 result = runner.invoke(cli, ["coord", "shard", "--agents", "1"])
817 assert result.exit_code == 0
818 assert "Perfect isolation" in result.output
819
820 def test_symbol_count_sum_equals_total_symbols(self, repo: pathlib.Path) -> None:
821 """Sum of shard symbol_counts must equal total_symbols in JSON."""
822 commit = _make_commit_stub()
823 sym_map = {f"src/f{i}.py": {f"fn{j}": {} for j in range(i + 1)} for i in range(5)}
824 manifest = {k: f"oid{i}" for i, k in enumerate(sym_map)}
825 with (
826 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
827 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
828 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
829 patch("muse.cli.commands.shard._build_import_edges", return_value=[]),
830 ):
831 result = runner.invoke(cli, ["coord", "shard", "--agents", "3", "--json"])
832 assert result.exit_code == 0
833 data = json.loads(result.output.strip())
834 assert sum(s["symbol_count"] for s in data["shards"]) == data["total_symbols"]
835
836
837 # ── Stress ────────────────────────────────────────────────────────────────────
838
839
840 class TestShardStress:
841 def test_100_files_8_shards_under_2s(self, repo: pathlib.Path) -> None:
842 n_files = 100
843 commit = _make_commit_stub()
844 sym_map = {f"src/mod{i}.py": {f"fn_{i}": {}, f"cls_{i}": {}} for i in range(n_files)}
845 manifest = {fp: f"oid{i}" for i, fp in enumerate(sym_map)}
846 edges = [(f"src/mod{i}.py", f"src/mod{i+1}.py") for i in range(0, n_files - 1, 5)]
847
848 with (
849 patch("muse.cli.commands.shard.resolve_commit_ref", return_value=commit),
850 patch("muse.cli.commands.shard.get_commit_snapshot_manifest", return_value=manifest),
851 patch("muse.cli.commands.shard.symbols_for_snapshot", return_value=sym_map),
852 patch("muse.cli.commands.shard._build_import_edges", return_value=edges),
853 ):
854 t0 = time.monotonic()
855 result = runner.invoke(cli, ["coord", "shard", "--agents", "8", "--json"])
856 elapsed = time.monotonic() - t0
857
858 assert result.exit_code == 0
859 assert elapsed < 2.0
860 data = json.loads(result.output.strip())
861 assert data["shards_created"] <= 8
862 assert sum(s["symbol_count"] for s in data["shards"]) == n_files * 2
863
864
865 class TestRegisterFlags:
866 def test_default_json_out_is_false(self) -> None:
867 import argparse
868 from muse.cli.commands.shard import register
869 p = argparse.ArgumentParser()
870 subs = p.add_subparsers()
871 register(subs)
872 args = p.parse_args(["shard"])
873 assert args.json_out is False
874
875 def test_json_flag_sets_json_out(self) -> None:
876 import argparse
877 from muse.cli.commands.shard import register
878 p = argparse.ArgumentParser()
879 subs = p.add_subparsers()
880 register(subs)
881 args = p.parse_args(["shard", "--json"])
882 assert args.json_out is True
883
884 def test_j_shorthand_sets_json_out(self) -> None:
885 import argparse
886 from muse.cli.commands.shard import register
887 p = argparse.ArgumentParser()
888 subs = p.add_subparsers()
889 register(subs)
890 args = p.parse_args(["shard", "-j"])
891 assert args.json_out is True
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago