test_cmd.py
python
sha256:84df9126d09aeec0b8f1b908f0b06c10913feec28f3514b382efb1ba6d619385
refactor: rename StructuredMergePlugin to AddressedMergePlu…
Sonnet 4.6
minor
⚠ breaking
24 days ago
| 1 | """muse code test — symbol-graph–driven test selection and execution. |
| 2 | |
| 3 | The most powerful test command ever built for a version control system. |
| 4 | |
| 5 | Traditional test runners are file-aware at best. You run a test file, it |
| 6 | either passes or fails. You change 200 files and hope your CI matrix covers |
| 7 | the right subset. You run the full suite and wait ten minutes. |
| 8 | |
| 9 | ``muse code test`` is different. It knows exactly which symbols changed, |
| 10 | which tests call those symbols (via the committed call graph), and which |
| 11 | tests have historically been flaky. It runs the minimum set of tests needed |
| 12 | to validate your changes — and it prioritises failing tests to surface |
| 13 | problems in seconds, not minutes. |
| 14 | |
| 15 | How it works |
| 16 | ------------ |
| 17 | 1. **Diff** — compare HEAD snapshot symbols against the working tree to find |
| 18 | every modified, added, or deleted symbol. |
| 19 | 2. **Graph** — BFS through the call graph from every test function to find |
| 20 | which tests transitively call each changed symbol. |
| 21 | 3. **Prioritise** — order tests by risk: failure streaks first, flaky tests |
| 22 | second, unknown tests third, slow tests to the front of parallel queues. |
| 23 | 4. **Execute** — run the selected tests as isolated subprocesses with |
| 24 | configurable parallelism and a wall-clock budget. |
| 25 | 5. **Record** — persist pass/fail results to ``.muse/cache/test_history.json`` |
| 26 | for future prioritisation and flaky-test detection. |
| 27 | |
| 28 | Usage:: |
| 29 | |
| 30 | # Run tests for all symbols changed vs HEAD (smart selection, default) |
| 31 | muse code test |
| 32 | |
| 33 | # Run all tests (no selection, equivalent to pytest tests/) |
| 34 | muse code test --all |
| 35 | |
| 36 | # Select tests covering a specific symbol |
| 37 | muse code test --symbol "muse/core/store.py::read_commit" |
| 38 | |
| 39 | # Run a specific file or node ID directly |
| 40 | muse code test tests/test_core_store.py |
| 41 | muse code test "tests/test_core_store.py::TestReadCommit::test_returns_none_on_missing" |
| 42 | |
| 43 | # Control execution |
| 44 | muse code test --workers 4 --timeout 120 |
| 45 | |
| 46 | # Show what would be run without running it |
| 47 | muse code test --dry-run |
| 48 | |
| 49 | # Show historical summary (pass rates, flaky tests) |
| 50 | muse code test --history |
| 51 | muse code test --flaky |
| 52 | |
| 53 | # Run full CI gate suite (.muse/ci.toml) |
| 54 | muse code test --ci |
| 55 | |
| 56 | # Machine-readable output |
| 57 | muse code test --json |
| 58 | |
| 59 | Flags |
| 60 | ----- |
| 61 | ``TARGET [TARGET ...]`` |
| 62 | Optional pytest node IDs or file paths to run directly (bypasses graph |
| 63 | selection). |
| 64 | |
| 65 | ``--all, -a`` |
| 66 | Ignore the working-tree diff; run all discovered tests. |
| 67 | |
| 68 | ``--symbol ADDR, -s ADDR`` |
| 69 | Force-select tests covering the given symbol address |
| 70 | (``"path/to/file.py::Name"``). May be specified multiple times. |
| 71 | |
| 72 | ``--depth N, -d N`` |
| 73 | Call-graph BFS depth for test selection (default 3). |
| 74 | |
| 75 | ``--workers N, -w N`` |
| 76 | Number of parallel subprocess partitions (default 1). |
| 77 | |
| 78 | ``--timeout S`` |
| 79 | Wall-clock budget per partition in seconds (default 0 = unlimited). |
| 80 | |
| 81 | ``--dry-run`` |
| 82 | Print selected tests without executing them. |
| 83 | |
| 84 | ``--no-save`` |
| 85 | Do not persist results to ``.muse/cache/test_history.json``. |
| 86 | |
| 87 | ``--history`` |
| 88 | Print a summary of historical pass/fail rates and exit. |
| 89 | |
| 90 | ``--flaky`` |
| 91 | Print only tests with a history of intermittent failures and exit. |
| 92 | |
| 93 | ``--ci`` |
| 94 | Execute the full CI gate suite from ``.muse/ci.toml`` and exit. |
| 95 | |
| 96 | ``--extra ARGS`` |
| 97 | Extra arguments forwarded verbatim to pytest (e.g. ``-x``, ``-v``, |
| 98 | ``--timeout=30``). |
| 99 | |
| 100 | ``--json`` |
| 101 | Emit a machine-readable JSON result and exit. |
| 102 | """ |
| 103 | |
| 104 | import argparse |
| 105 | import json |
| 106 | import logging |
| 107 | import pathlib |
| 108 | import sys |
| 109 | from collections.abc import Callable |
| 110 | from typing import NotRequired, TypedDict |
| 111 | |
| 112 | from muse.core.ci import CiRunResult, GateResult, load_ci_config, run_ci |
| 113 | from muse.core.envelope import EnvelopeJson, make_envelope |
| 114 | from muse.core.timing import start_timer |
| 115 | from muse.core.repo import require_repo |
| 116 | from muse.core.symbol_cache import load_symbol_cache |
| 117 | from muse.core.types import Manifest |
| 118 | from muse.core.refs import ( |
| 119 | get_head_commit_id, |
| 120 | read_current_branch, |
| 121 | ) |
| 122 | from muse.core.snapshots import get_commit_snapshot_manifest |
| 123 | from muse.core.test_history import ( |
| 124 | HistorySummary, |
| 125 | RunRecord, |
| 126 | CaseRecord, |
| 127 | append_run, |
| 128 | flaky_tests, |
| 129 | iso_now, |
| 130 | load_history, |
| 131 | make_run_id, |
| 132 | prioritize_targets, |
| 133 | summarize, |
| 134 | ) |
| 135 | from muse.core.test_runner import RunConfig, RunResult, CaseResult, run_tests |
| 136 | from muse.core.validation import sanitize_display |
| 137 | from muse.core.test_selection import ( |
| 138 | ChangedSymbol, |
| 139 | SelectionResult, |
| 140 | SelectionTarget, |
| 141 | changed_symbols_from_diff, |
| 142 | select_tests, |
| 143 | ) |
| 144 | |
| 145 | type _HistoryMap = dict[str, "HistorySummary"] |
| 146 | |
| 147 | logger = logging.getLogger(__name__) |
| 148 | |
| 149 | # --------------------------------------------------------------------------- |
| 150 | # JSON output types |
| 151 | # --------------------------------------------------------------------------- |
| 152 | |
| 153 | class _SelectionJson(TypedDict): |
| 154 | """JSON representation of the test-selection phase.""" |
| 155 | |
| 156 | changed_addresses: list[str] |
| 157 | covered_addresses: list[str] |
| 158 | uncovered_addresses: list[str] |
| 159 | coverage_fraction: float |
| 160 | fallback_used: bool |
| 161 | targets: list[str] |
| 162 | |
| 163 | class _RunJson(TypedDict): |
| 164 | """JSON representation of the test execution phase.""" |
| 165 | |
| 166 | run_id: str |
| 167 | exit_code: int |
| 168 | duration_ms: float |
| 169 | total: int |
| 170 | passed: int |
| 171 | failed: int |
| 172 | errored: int |
| 173 | skipped: int |
| 174 | timed_out: bool |
| 175 | json_report_available: bool |
| 176 | |
| 177 | class _TestResultJson(TypedDict): |
| 178 | """Per-test result in JSON output.""" |
| 179 | |
| 180 | node_id: str |
| 181 | outcome: str |
| 182 | duration_ms: float |
| 183 | longrepr: NotRequired[str] |
| 184 | |
| 185 | class _HistoryJson(TypedDict): |
| 186 | """JSON representation of a HistorySummary.""" |
| 187 | |
| 188 | node_id: str |
| 189 | total_runs: int |
| 190 | pass_count: int |
| 191 | fail_count: int |
| 192 | skip_count: int |
| 193 | flaky: bool |
| 194 | avg_duration_ms: float |
| 195 | last_outcome: str | None |
| 196 | last_run_timestamp: str | None |
| 197 | fail_streak: int |
| 198 | |
| 199 | class _CiGateJson(TypedDict): |
| 200 | """JSON representation of a single CI gate result.""" |
| 201 | |
| 202 | name: str |
| 203 | command: list[str] |
| 204 | exit_code: int |
| 205 | duration_ms: float |
| 206 | required: bool |
| 207 | passed: bool |
| 208 | timed_out: bool |
| 209 | stdout: str |
| 210 | stderr: str |
| 211 | warning: NotRequired[str] |
| 212 | |
| 213 | class _CiJson(TypedDict): |
| 214 | """JSON representation of a full CI run.""" |
| 215 | |
| 216 | passed: bool |
| 217 | timestamp: str |
| 218 | duration_ms: float |
| 219 | gates: list[_CiGateJson] |
| 220 | |
| 221 | class _FullJson(EnvelopeJson): |
| 222 | """Top-level JSON output for ``muse code test``.""" |
| 223 | |
| 224 | mode: str |
| 225 | selection: NotRequired[_SelectionJson] |
| 226 | run: NotRequired[_RunJson] |
| 227 | results: NotRequired[list[_TestResultJson]] |
| 228 | history: NotRequired[list[_HistoryJson]] |
| 229 | ci: NotRequired[_CiJson] |
| 230 | error: NotRequired[str] |
| 231 | |
| 232 | # --------------------------------------------------------------------------- |
| 233 | # Registration |
| 234 | # --------------------------------------------------------------------------- |
| 235 | |
| 236 | def register( |
| 237 | subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]", |
| 238 | ) -> None: |
| 239 | """Register the ``test`` subcommand under a code sub-parser. |
| 240 | |
| 241 | Arguments |
| 242 | --------- |
| 243 | TARGET |
| 244 | Optional pytest node IDs or file paths to run directly (bypasses |
| 245 | graph selection). |
| 246 | --all, -a |
| 247 | Run all tests regardless of working-tree diff. |
| 248 | --symbol ADDR, -s ADDR |
| 249 | Force-select tests covering this symbol address (repeatable). |
| 250 | --depth N, -d N |
| 251 | Call-graph BFS depth for test selection (default 3). |
| 252 | --workers N, -w N |
| 253 | Number of parallel subprocess partitions (default 1). |
| 254 | --timeout S |
| 255 | Wall-clock budget per partition in seconds (default 0 = unlimited). |
| 256 | --dry-run |
| 257 | Print selected tests without executing them. |
| 258 | --no-save |
| 259 | Do not persist results to ``.muse/cache/test_history.json``. |
| 260 | --history |
| 261 | Print a summary of historical pass/fail rates and exit. |
| 262 | --flaky |
| 263 | Print only tests with a history of intermittent failures and exit. |
| 264 | --ci |
| 265 | Execute the full CI gate suite from ``.muse/ci.toml`` and exit. |
| 266 | --extra ARGS |
| 267 | Extra arguments forwarded verbatim to pytest. |
| 268 | --json, -j |
| 269 | Emit machine-readable JSON with schema_version, exit_code, and |
| 270 | duration_ms in the envelope. |
| 271 | """ |
| 272 | parser = subparsers.add_parser( |
| 273 | "test", |
| 274 | help="Symbol-graph–driven test selection and execution.", |
| 275 | description=__doc__, |
| 276 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 277 | ) |
| 278 | |
| 279 | parser.add_argument( |
| 280 | "targets", |
| 281 | nargs="*", |
| 282 | metavar="TARGET", |
| 283 | help="Optional pytest node IDs or file paths (bypasses graph selection).", |
| 284 | ) |
| 285 | parser.add_argument( |
| 286 | "--all", |
| 287 | "-a", |
| 288 | action="store_true", |
| 289 | dest="run_all", |
| 290 | help="Run all tests regardless of working-tree diff.", |
| 291 | ) |
| 292 | parser.add_argument( |
| 293 | "--symbol", |
| 294 | "-s", |
| 295 | action="append", |
| 296 | dest="symbols", |
| 297 | default=[], |
| 298 | metavar="ADDR", |
| 299 | help="Force-select tests covering this symbol address (repeatable).", |
| 300 | ) |
| 301 | parser.add_argument( |
| 302 | "--depth", |
| 303 | "-d", |
| 304 | type=int, |
| 305 | default=3, |
| 306 | metavar="N", |
| 307 | help="Call-graph BFS depth for test selection (default 3).", |
| 308 | ) |
| 309 | parser.add_argument( |
| 310 | "--workers", |
| 311 | "-w", |
| 312 | type=int, |
| 313 | default=1, |
| 314 | metavar="N", |
| 315 | help="Parallel subprocess partitions (default 1).", |
| 316 | ) |
| 317 | parser.add_argument( |
| 318 | "--timeout", |
| 319 | type=float, |
| 320 | default=0.0, |
| 321 | metavar="S", |
| 322 | help="Wall-clock budget per partition in seconds (default 0 = unlimited).", |
| 323 | ) |
| 324 | parser.add_argument( |
| 325 | "--dry-run", |
| 326 | action="store_true", |
| 327 | help="Print selected tests without executing them.", |
| 328 | ) |
| 329 | parser.add_argument( |
| 330 | "--no-save", |
| 331 | action="store_true", |
| 332 | help="Do not persist results to test history.", |
| 333 | ) |
| 334 | parser.add_argument( |
| 335 | "--history", |
| 336 | action="store_true", |
| 337 | help="Print historical pass/fail summary and exit.", |
| 338 | ) |
| 339 | parser.add_argument( |
| 340 | "--flaky", |
| 341 | action="store_true", |
| 342 | help="Print only flaky tests from history and exit.", |
| 343 | ) |
| 344 | parser.add_argument( |
| 345 | "--ci", |
| 346 | action="store_true", |
| 347 | help="Run the full CI gate suite from .muse/ci.toml.", |
| 348 | ) |
| 349 | parser.add_argument( |
| 350 | "--extra", |
| 351 | nargs=argparse.REMAINDER, |
| 352 | default=[], |
| 353 | metavar="ARGS", |
| 354 | help="Extra arguments forwarded verbatim to pytest.", |
| 355 | ) |
| 356 | parser.add_argument( |
| 357 | "--json", "-j", |
| 358 | action="store_true", |
| 359 | dest="json_out", |
| 360 | help="Emit machine-readable JSON.", |
| 361 | ) |
| 362 | |
| 363 | parser.set_defaults(func=run, json_out=False) |
| 364 | |
| 365 | # --------------------------------------------------------------------------- |
| 366 | # History display |
| 367 | # --------------------------------------------------------------------------- |
| 368 | |
| 369 | def _print_history(summaries: _HistoryMap, *, flaky_only: bool) -> None: |
| 370 | """Render the history table to stdout.""" |
| 371 | entries = sorted(summaries.values(), key=lambda s: s["fail_count"], reverse=True) |
| 372 | if flaky_only: |
| 373 | entries = [e for e in entries if e["flaky"]] |
| 374 | |
| 375 | if not entries: |
| 376 | print("No test history recorded." if not flaky_only else "No flaky tests found.") |
| 377 | return |
| 378 | |
| 379 | hdr = f"{'NODE ID':<70} {'RUNS':>5} {'PASS':>5} {'FAIL':>5} {'FLAKY':>6} {'AVG ms':>8} {'STREAK':>7}" |
| 380 | print(hdr) |
| 381 | print("─" * len(hdr)) |
| 382 | for s in entries: |
| 383 | flaky_flag = "✓" if s["flaky"] else "" |
| 384 | print( |
| 385 | f"{s['node_id']:<70} " |
| 386 | f"{s['total_runs']:>5} " |
| 387 | f"{s['pass_count']:>5} " |
| 388 | f"{s['fail_count']:>5} " |
| 389 | f"{flaky_flag:>6} " |
| 390 | f"{s['avg_duration_ms']:>8.1f} " |
| 391 | f"{s['fail_streak']:>7}" |
| 392 | ) |
| 393 | |
| 394 | def _history_to_json(s: HistorySummary) -> _HistoryJson: |
| 395 | return _HistoryJson( |
| 396 | node_id=s["node_id"], |
| 397 | total_runs=s["total_runs"], |
| 398 | pass_count=s["pass_count"], |
| 399 | fail_count=s["fail_count"], |
| 400 | skip_count=s["skip_count"], |
| 401 | flaky=s["flaky"], |
| 402 | avg_duration_ms=s["avg_duration_ms"], |
| 403 | last_outcome=s["last_outcome"], |
| 404 | last_run_timestamp=s["last_run_timestamp"], |
| 405 | fail_streak=s["fail_streak"], |
| 406 | ) |
| 407 | |
| 408 | # --------------------------------------------------------------------------- |
| 409 | # CI display |
| 410 | # --------------------------------------------------------------------------- |
| 411 | |
| 412 | def _print_ci_result(result: CiRunResult) -> None: |
| 413 | """Render CI gate results to stdout.""" |
| 414 | width = 72 |
| 415 | print() |
| 416 | print("CI gate results") |
| 417 | print("─" * width) |
| 418 | for gate in result["gates"]: |
| 419 | icon = "✅" if gate["passed"] else ("⚠️ " if not gate["required"] else "❌") |
| 420 | ms = gate["duration_ms"] |
| 421 | print(f" {icon} {gate['name']:<40} {ms:>8.0f} ms exit={gate['exit_code']}") |
| 422 | if not gate["passed"] and gate["stdout"]: |
| 423 | for line in gate["stdout"].strip().splitlines()[-5:]: |
| 424 | print(f" {line}") |
| 425 | if not gate["passed"] and gate["stderr"]: |
| 426 | for line in gate["stderr"].strip().splitlines()[-3:]: |
| 427 | print(f" {line}") |
| 428 | print("─" * width) |
| 429 | overall = "✅ PASSED" if result["passed"] else "❌ FAILED" |
| 430 | total_s = result["total_duration_ms"] / 1000.0 |
| 431 | print(f" {overall} ({total_s:.1f} s total)") |
| 432 | print() |
| 433 | |
| 434 | def _gate_to_json(g: GateResult) -> _CiGateJson: |
| 435 | """Serialise a single :class:`GateResult` for JSON output.""" |
| 436 | out = _CiGateJson( |
| 437 | name=g["name"], |
| 438 | command=g["command"], |
| 439 | exit_code=g["exit_code"], |
| 440 | duration_ms=g["duration_ms"], |
| 441 | required=g["required"], |
| 442 | passed=g["passed"], |
| 443 | timed_out=g["timed_out"], |
| 444 | stdout=g["stdout"], |
| 445 | stderr=g["stderr"], |
| 446 | ) |
| 447 | if "warning" in g: |
| 448 | out["warning"] = g["warning"] |
| 449 | return out |
| 450 | |
| 451 | def _ci_to_json(result: CiRunResult) -> _CiJson: |
| 452 | """Serialise a :class:`CiRunResult` for JSON output.""" |
| 453 | return _CiJson( |
| 454 | passed=result["passed"], |
| 455 | timestamp=result["timestamp"], |
| 456 | duration_ms=result["total_duration_ms"], |
| 457 | gates=[_gate_to_json(g) for g in result["gates"]], |
| 458 | ) |
| 459 | |
| 460 | # --------------------------------------------------------------------------- |
| 461 | # Helpers for run recording |
| 462 | # --------------------------------------------------------------------------- |
| 463 | |
| 464 | def _run_result_to_record( |
| 465 | result: RunResult, |
| 466 | *, |
| 467 | commit_id: str | None, |
| 468 | branch: str | None, |
| 469 | selection: SelectionResult | None, |
| 470 | ) -> RunRecord: |
| 471 | """Convert a :class:`RunResult` to a persistable :class:`RunRecord`.""" |
| 472 | |
| 473 | def _to_case(r: CaseResult) -> CaseRecord: |
| 474 | # Determine which symbol addresses this test covers (from selection). |
| 475 | symbol_addresses: list[str] = [] |
| 476 | if selection is not None: |
| 477 | for target in selection["test_targets"]: |
| 478 | if target["node_id"] == r["node_id"] or target["file"] in r["node_id"]: |
| 479 | symbol_addresses = list(selection["covered_addresses"]) |
| 480 | break |
| 481 | |
| 482 | rec = CaseRecord( |
| 483 | node_id=r["node_id"], |
| 484 | outcome=r["outcome"], |
| 485 | duration_ms=r["duration_ms"], |
| 486 | symbol_addresses=symbol_addresses, |
| 487 | ) |
| 488 | if "longrepr" in r: |
| 489 | rec["longrepr"] = r["longrepr"] |
| 490 | return rec |
| 491 | |
| 492 | return RunRecord( |
| 493 | run_id=result["run_id"], |
| 494 | timestamp=iso_now(), |
| 495 | commit_id=commit_id, |
| 496 | branch=branch, |
| 497 | results=[_to_case(r) for r in result["results"]], |
| 498 | total=result["total"], |
| 499 | passed=result["passed"], |
| 500 | failed=result["failed"], |
| 501 | errored=result["errored"], |
| 502 | skipped=result["skipped"], |
| 503 | ) |
| 504 | |
| 505 | # --------------------------------------------------------------------------- |
| 506 | # Main command handler |
| 507 | # --------------------------------------------------------------------------- |
| 508 | |
| 509 | def run(args: argparse.Namespace) -> None: |
| 510 | """Run symbol-graph–driven test selection and execution. |
| 511 | |
| 512 | Diffs the working tree against HEAD, follows the call graph to select |
| 513 | covering tests, prioritises by failure history, and executes with |
| 514 | configurable parallelism. Supports history inspection, dry-run, and |
| 515 | full CI gate mode. |
| 516 | |
| 517 | Agent quickstart:: |
| 518 | |
| 519 | muse code test --json |
| 520 | muse code test --all --json |
| 521 | muse code test --symbol "billing.py::compute_total" --json |
| 522 | muse code test --dry-run --json |
| 523 | muse code test --ci --json |
| 524 | muse code test --history --json |
| 525 | |
| 526 | JSON fields:: |
| 527 | |
| 528 | mode Execution mode: "run", "dry-run", "history", "ci". |
| 529 | selection Test-selection metadata (changed + covered addresses). |
| 530 | run Execution summary: total, passed, failed, errored, skipped. |
| 531 | results Per-test outcome list (node_id, outcome, duration_ms). |
| 532 | history Historical summaries per test (pass rate, flaky flag). |
| 533 | ci CI gate results (present in --ci mode only). |
| 534 | error Config or fatal error message (when non-zero exit). |
| 535 | muse_version Muse release that produced this output. |
| 536 | schema Envelope schema version (int). |
| 537 | exit_code 0 on all tests pass, 1 on any test failure. |
| 538 | duration_ms Wall-clock milliseconds for the command. |
| 539 | timestamp ISO-8601 UTC timestamp of command completion. |
| 540 | warnings List of non-fatal advisory messages. |
| 541 | |
| 542 | Exit codes:: |
| 543 | |
| 544 | 0 All selected tests passed (or dry-run / history mode). |
| 545 | 1 One or more tests failed or errored. |
| 546 | """ |
| 547 | elapsed = start_timer() |
| 548 | root = require_repo() |
| 549 | json_out: bool = args.json_out |
| 550 | |
| 551 | # ── History / flaky mode (read-only, no tests run) ─────────────────── |
| 552 | if args.history or args.flaky: |
| 553 | records = load_history(root) |
| 554 | sums = summarize(records) |
| 555 | if args.flaky: |
| 556 | flaky_list = flaky_tests(records) |
| 557 | sums = {s["node_id"]: s for s in flaky_list} |
| 558 | if json_out: |
| 559 | out = _FullJson( |
| 560 | **make_envelope(elapsed), |
| 561 | mode="history", |
| 562 | history=[_history_to_json(s) for s in sums.values()], |
| 563 | ) |
| 564 | print(json.dumps(out)) |
| 565 | else: |
| 566 | _print_history(sums, flaky_only=args.flaky) |
| 567 | return |
| 568 | |
| 569 | # ── CI mode ────────────────────────────────────────────────────────── |
| 570 | if args.ci: |
| 571 | try: |
| 572 | ci_config = load_ci_config(root) |
| 573 | except ValueError as exc: |
| 574 | _fatal(str(exc), json_out, elapsed) |
| 575 | return |
| 576 | ci_result = run_ci(root, ci_config) |
| 577 | if json_out: |
| 578 | ci_code = 0 if ci_result["passed"] else 1 |
| 579 | out = _FullJson(**make_envelope(elapsed, exit_code=ci_code), mode="ci", ci=_ci_to_json(ci_result)) |
| 580 | print(json.dumps(out)) |
| 581 | else: |
| 582 | _print_ci_result(ci_result) |
| 583 | sys.exit(0 if ci_result["passed"] else 1) |
| 584 | |
| 585 | # ── Determine what to run ───────────────────────────────────────────── |
| 586 | explicit_targets: list[str] = list(args.targets or []) |
| 587 | force_symbols: list[str] = list(args.symbols or []) |
| 588 | run_all: bool = bool(args.run_all) |
| 589 | |
| 590 | # Try to load the HEAD snapshot for graph-based selection. |
| 591 | branch: str | None = None |
| 592 | commit_id: str | None = None |
| 593 | manifest: Manifest | None = None |
| 594 | |
| 595 | try: |
| 596 | branch = read_current_branch(root) |
| 597 | commit_id = get_head_commit_id(root, branch) |
| 598 | if commit_id: |
| 599 | manifest = get_commit_snapshot_manifest(root, commit_id) |
| 600 | except Exception as exc: |
| 601 | logger.debug("test_cmd: could not load HEAD manifest: %s", exc) |
| 602 | |
| 603 | selection: SelectionResult | None = None |
| 604 | final_targets: list[str] |
| 605 | |
| 606 | # Load the symbol cache once and share it across changed_symbols_from_diff |
| 607 | # and select_tests — avoids double disk reads and double parse_symbols calls. |
| 608 | shared_sym_cache = load_symbol_cache(root) if manifest is not None else None |
| 609 | |
| 610 | if explicit_targets: |
| 611 | # User specified exact targets — run them directly. |
| 612 | final_targets = explicit_targets |
| 613 | elif run_all or manifest is None: |
| 614 | # No snapshot or --all flag — discover all tests. |
| 615 | final_targets = [] |
| 616 | elif force_symbols: |
| 617 | # Force-select tests covering specific symbols. |
| 618 | forced_changed: list[ChangedSymbol] = [ |
| 619 | ChangedSymbol(address=addr, change_kind="modified") |
| 620 | for addr in force_symbols |
| 621 | ] |
| 622 | selection = select_tests( |
| 623 | root, |
| 624 | forced_changed, |
| 625 | manifest, |
| 626 | depth=args.depth, |
| 627 | cache=shared_sym_cache, |
| 628 | ) |
| 629 | final_targets = [t["node_id"] for t in selection["test_targets"]] |
| 630 | else: |
| 631 | # Default: diff working tree vs HEAD and select covering tests. |
| 632 | try: |
| 633 | changed = changed_symbols_from_diff(root, manifest, cache=shared_sym_cache) |
| 634 | except Exception as exc: |
| 635 | logger.warning("⚠️ test_cmd: diff failed, falling back to --all: %s", exc) |
| 636 | changed = [] |
| 637 | |
| 638 | if not changed: |
| 639 | # Nothing changed in the working tree — there is nothing to test. |
| 640 | # Running the full suite here would silently block for minutes. |
| 641 | # Use --all to explicitly run every test file. |
| 642 | if json_out: |
| 643 | print(json.dumps({**make_envelope(elapsed), "mode": "run", "message": "no changes detected"})) |
| 644 | else: |
| 645 | print("\n✅ No changes detected — nothing to test.") |
| 646 | print(" Use --all to run the full suite explicitly.\n") |
| 647 | return |
| 648 | else: |
| 649 | selection = select_tests( |
| 650 | root, changed, manifest, depth=args.depth, cache=shared_sym_cache |
| 651 | ) |
| 652 | final_targets = [t["node_id"] for t in selection["test_targets"]] |
| 653 | |
| 654 | # Re-order targets using historical risk priority. |
| 655 | if final_targets: |
| 656 | records_for_priority = load_history(root) |
| 657 | final_targets = prioritize_targets(final_targets, records_for_priority) |
| 658 | |
| 659 | # ── Dry-run ────────────────────────────────────────────────────────── |
| 660 | if args.dry_run: |
| 661 | _print_dry_run(selection, final_targets, json_out, elapsed) |
| 662 | return |
| 663 | |
| 664 | # ── Execute ────────────────────────────────────────────────────────── |
| 665 | extra: list[str] = list(args.extra or []) |
| 666 | config = RunConfig( |
| 667 | targets=final_targets, |
| 668 | workers=args.workers, |
| 669 | timeout_s=args.timeout, |
| 670 | extra_args=extra, |
| 671 | env_allowlist=[], |
| 672 | cwd=root, |
| 673 | stream_output=not json_out, |
| 674 | ) |
| 675 | |
| 676 | if not json_out: |
| 677 | _print_pre_run(selection, final_targets) |
| 678 | |
| 679 | # When streaming, pytest writes directly to the terminal so progress_cb |
| 680 | # dots would interleave badly. Use progress_cb only in captured (json) mode. |
| 681 | result = run_tests( |
| 682 | config, |
| 683 | progress_cb=_progress_cb if json_out else None, |
| 684 | ) |
| 685 | |
| 686 | # ── Persist history ────────────────────────────────────────────────── |
| 687 | if not args.no_save: |
| 688 | record = _run_result_to_record( |
| 689 | result, |
| 690 | commit_id=commit_id, |
| 691 | branch=branch, |
| 692 | selection=selection, |
| 693 | ) |
| 694 | try: |
| 695 | append_run(root, record) |
| 696 | except Exception as exc: |
| 697 | logger.warning("⚠️ test_cmd: failed to save history: %s", exc) |
| 698 | |
| 699 | # ── Output ─────────────────────────────────────────────────────────── |
| 700 | if json_out: |
| 701 | sel_json: _SelectionJson | None = None |
| 702 | if selection is not None: |
| 703 | sel_json = _SelectionJson( |
| 704 | changed_addresses=selection["changed_addresses"], |
| 705 | covered_addresses=selection["covered_addresses"], |
| 706 | uncovered_addresses=selection["uncovered_addresses"], |
| 707 | coverage_fraction=selection["coverage_fraction"], |
| 708 | fallback_used=selection["fallback_used"], |
| 709 | targets=[t["node_id"] for t in selection["test_targets"]], |
| 710 | ) |
| 711 | |
| 712 | run_json = _RunJson( |
| 713 | run_id=result["run_id"], |
| 714 | exit_code=result["exit_code"], |
| 715 | duration_ms=result["duration_ms"], |
| 716 | total=result["total"], |
| 717 | passed=result["passed"], |
| 718 | failed=result["failed"], |
| 719 | errored=result["errored"], |
| 720 | skipped=result["skipped"], |
| 721 | timed_out=result["timed_out"], |
| 722 | json_report_available=result["json_report_available"], |
| 723 | ) |
| 724 | |
| 725 | test_results: list[_TestResultJson] = [] |
| 726 | for r in result["results"]: |
| 727 | tr = _TestResultJson( |
| 728 | node_id=r["node_id"], |
| 729 | outcome=r["outcome"], |
| 730 | duration_ms=r["duration_ms"], |
| 731 | ) |
| 732 | if "longrepr" in r: |
| 733 | tr["longrepr"] = r["longrepr"] |
| 734 | test_results.append(tr) |
| 735 | |
| 736 | run_exit = result["exit_code"] if result["exit_code"] in {0, 1} else 1 |
| 737 | out = _FullJson(**make_envelope(elapsed, exit_code=run_exit), mode="run", run=run_json, results=test_results) |
| 738 | if sel_json is not None: |
| 739 | out["selection"] = sel_json |
| 740 | print(json.dumps(out)) |
| 741 | else: |
| 742 | _print_summary(result, selection) |
| 743 | |
| 744 | sys.exit(result["exit_code"] if result["exit_code"] in {0, 1} else 1) |
| 745 | |
| 746 | # --------------------------------------------------------------------------- |
| 747 | # Display helpers |
| 748 | # --------------------------------------------------------------------------- |
| 749 | |
| 750 | def _fatal(msg: str, json_out: bool, elapsed: Callable[[], float] | None = None) -> None: |
| 751 | if json_out: |
| 752 | env = make_envelope(elapsed, exit_code=1) if elapsed is not None else {} |
| 753 | print(json.dumps({**env, "error": msg})) |
| 754 | else: |
| 755 | print(f"❌ {msg}", file=sys.stderr) |
| 756 | sys.exit(1) |
| 757 | |
| 758 | def _progress_cb(result: CaseResult) -> None: |
| 759 | """Stream a single test result to stderr as it arrives. |
| 760 | |
| 761 | Progress dots go to stderr so they never contaminate the JSON object |
| 762 | emitted to stdout in ``--json`` mode. |
| 763 | """ |
| 764 | icon = {"passed": ".", "failed": "F", "error": "E", "skipped": "s"}.get( |
| 765 | result["outcome"], "?" |
| 766 | ) |
| 767 | print(icon, end="", flush=True, file=sys.stderr) |
| 768 | |
| 769 | def _print_pre_run(selection: SelectionResult | None, targets: list[str]) -> None: |
| 770 | """Print a pre-run summary before tests execute.""" |
| 771 | if selection is not None: |
| 772 | n = len(selection["changed_addresses"]) |
| 773 | t = len(targets) |
| 774 | uncov = len(selection["uncovered_addresses"]) |
| 775 | pct = selection["coverage_fraction"] * 100 |
| 776 | print( |
| 777 | f"\n🔍 Changed symbols: {n} → Selected tests: {t} " |
| 778 | f"(coverage {pct:.0f}%)" |
| 779 | ) |
| 780 | if uncov: |
| 781 | print(f" ⚠️ {uncov} symbol(s) have no covering test:") |
| 782 | for addr in selection["uncovered_addresses"][:5]: |
| 783 | print(f" • {sanitize_display(addr)}") |
| 784 | if uncov > 5: |
| 785 | print(f" … and {uncov - 5} more") |
| 786 | if selection["fallback_used"]: |
| 787 | print(" ℹ️ File-name heuristics used for some targets (graph miss)") |
| 788 | elif targets: |
| 789 | print(f"\n🔍 Running {len(targets)} specified target(s)") |
| 790 | else: |
| 791 | print("\n🔍 Running full test suite (--all or no HEAD snapshot)") |
| 792 | print() |
| 793 | |
| 794 | def _print_dry_run( |
| 795 | selection: SelectionResult | None, |
| 796 | targets: list[str], |
| 797 | json_out: bool, |
| 798 | elapsed: Callable[[], float] | None = None, |
| 799 | ) -> None: |
| 800 | """Print the selected targets without executing them.""" |
| 801 | if json_out: |
| 802 | sel_json: _SelectionJson | None = None |
| 803 | if selection is not None: |
| 804 | sel_json = _SelectionJson( |
| 805 | changed_addresses=selection["changed_addresses"], |
| 806 | covered_addresses=selection["covered_addresses"], |
| 807 | uncovered_addresses=selection["uncovered_addresses"], |
| 808 | coverage_fraction=selection["coverage_fraction"], |
| 809 | fallback_used=selection["fallback_used"], |
| 810 | targets=targets, |
| 811 | ) |
| 812 | out = _FullJson(**make_envelope(elapsed if callable(elapsed) else lambda: 0.0), mode="dry-run") |
| 813 | if sel_json is not None: |
| 814 | out["selection"] = sel_json |
| 815 | print(json.dumps(out)) |
| 816 | return |
| 817 | |
| 818 | if selection is not None: |
| 819 | _print_pre_run(selection, targets) |
| 820 | |
| 821 | if targets: |
| 822 | print("Would run:") |
| 823 | for t in targets: |
| 824 | print(f" pytest {t}") |
| 825 | else: |
| 826 | print("Would run: pytest (full discovery)") |
| 827 | |
| 828 | def _print_summary(result: RunResult, selection: SelectionResult | None) -> None: |
| 829 | """Print the post-run summary.""" |
| 830 | print() |
| 831 | width = 60 |
| 832 | print("─" * width) |
| 833 | icon = "✅" if result["exit_code"] == 0 else "❌" |
| 834 | s = result["duration_ms"] / 1000.0 |
| 835 | |
| 836 | counts_available = result["json_report_available"] or result["total"] > 0 |
| 837 | if counts_available: |
| 838 | print( |
| 839 | f"{icon} {result['passed']} passed " |
| 840 | f"{result['failed']} failed " |
| 841 | f"{result['errored']} error " |
| 842 | f"{result['skipped']} skipped " |
| 843 | f"({s:.2f} s)" |
| 844 | ) |
| 845 | else: |
| 846 | # Stream mode without pytest-json-report: pytest output went straight |
| 847 | # to the terminal but was never captured — counts are unavailable. |
| 848 | # The exit code is still correct. |
| 849 | print(f"{icon} counts unavailable ({s:.2f} s)") |
| 850 | print( |
| 851 | " ℹ️ Install pytest-json-report for structured counts: " |
| 852 | "pip install pytest-json-report" |
| 853 | ) |
| 854 | |
| 855 | if result["timed_out"]: |
| 856 | print(" ⚠️ Run was terminated due to timeout") |
| 857 | |
| 858 | # Show any uncovered symbols as a reminder. |
| 859 | if selection is not None and selection["uncovered_addresses"]: |
| 860 | uncov = selection["uncovered_addresses"] |
| 861 | print(f"\n⚠️ Coverage gaps — {len(uncov)} changed symbol(s) have no tests:") |
| 862 | for addr in uncov[:10]: |
| 863 | print(f" • {sanitize_display(addr)}") |
| 864 | if len(uncov) > 10: |
| 865 | print(f" … and {len(uncov) - 10} more") |
| 866 | print() |
File History
1 commit
sha256:84df9126d09aeec0b8f1b908f0b06c10913feec28f3514b382efb1ba6d619385
refactor: rename StructuredMergePlugin to AddressedMergePlu…
Sonnet 4.6
minor
⚠
24 days ago