test_core_test_history.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
7 days ago
| 1 | """Tests for muse.core.test_history — persistent test-run history. |
| 2 | |
| 3 | Coverage: |
| 4 | - Unit tests for serialisation (_record_to_dict / _record_from_dict). |
| 5 | - Round-trip tests: save + load round-trips for RunRecord. |
| 6 | - load_history returns empty list when file missing. |
| 7 | - append_run adds one record. |
| 8 | - summarize computes correct counts, flaky flag, and fail_streak. |
| 9 | - flaky_tests returns only flaky tests, sorted by fail_count. |
| 10 | - prioritize_targets puts streaky/flaky tests first. |
| 11 | - Corrupt file handling: load_history returns empty list on corruption. |
| 12 | - iso_now returns a valid ISO 8601 string. |
| 13 | - make_run_id returns a unique UUID. |
| 14 | """ |
| 15 | |
| 16 | from __future__ import annotations |
| 17 | |
| 18 | import pathlib |
| 19 | |
| 20 | import pytest |
| 21 | |
| 22 | from muse.core.paths import muse_dir, test_history_path as _test_history_path |
| 23 | from muse.core.test_history import ( |
| 24 | HistorySummary, |
| 25 | RunRecord, |
| 26 | CaseRecord, |
| 27 | _record_from_dict, |
| 28 | _record_to_dict, |
| 29 | append_run, |
| 30 | flaky_tests, |
| 31 | iso_now, |
| 32 | load_history, |
| 33 | make_run_id, |
| 34 | prioritize_targets, |
| 35 | save_history, |
| 36 | summarize, |
| 37 | ) |
| 38 | |
| 39 | |
| 40 | # --------------------------------------------------------------------------- |
| 41 | # Fixtures |
| 42 | # --------------------------------------------------------------------------- |
| 43 | |
| 44 | |
| 45 | def _make_record( |
| 46 | run_id: str = "run-1", |
| 47 | *, |
| 48 | passed: int = 2, |
| 49 | failed: int = 0, |
| 50 | results: list[CaseRecord] | None = None, |
| 51 | ) -> RunRecord: |
| 52 | if results is None: |
| 53 | results = [ |
| 54 | CaseRecord( |
| 55 | node_id="tests/test_foo.py::test_a", |
| 56 | outcome="passed", |
| 57 | duration_ms=10.0, |
| 58 | symbol_addresses=[], |
| 59 | ), |
| 60 | CaseRecord( |
| 61 | node_id="tests/test_foo.py::test_b", |
| 62 | outcome="passed", |
| 63 | duration_ms=20.0, |
| 64 | symbol_addresses=[], |
| 65 | ), |
| 66 | ] |
| 67 | return RunRecord( |
| 68 | run_id=run_id, |
| 69 | timestamp="2026-03-26T12:00:00Z", |
| 70 | commit_id="abc123", |
| 71 | branch="main", |
| 72 | results=results, |
| 73 | total=len(results), |
| 74 | passed=passed, |
| 75 | failed=failed, |
| 76 | errored=0, |
| 77 | skipped=0, |
| 78 | ) |
| 79 | |
| 80 | |
| 81 | # --------------------------------------------------------------------------- |
| 82 | # Unit tests — serialisation |
| 83 | # --------------------------------------------------------------------------- |
| 84 | |
| 85 | |
| 86 | class TestRecordSerialization: |
| 87 | def test_round_trip(self) -> None: |
| 88 | """A RunRecord serialises and deserialises back to an equal value.""" |
| 89 | import json as _json |
| 90 | record = _make_record() |
| 91 | doc = _record_to_dict(record) |
| 92 | raw_value = _json.loads(_json.dumps(doc)) |
| 93 | restored = _record_from_dict(raw_value) |
| 94 | assert restored is not None |
| 95 | assert restored["run_id"] == record["run_id"] |
| 96 | assert restored["timestamp"] == record["timestamp"] |
| 97 | assert restored["commit_id"] == record["commit_id"] |
| 98 | assert restored["branch"] == record["branch"] |
| 99 | assert restored["total"] == record["total"] |
| 100 | assert restored["passed"] == record["passed"] |
| 101 | assert len(restored["results"]) == len(record["results"]) |
| 102 | |
| 103 | def test_longrepr_round_trip(self) -> None: |
| 104 | """longrepr is preserved across serialisation.""" |
| 105 | import json as _json |
| 106 | result = CaseRecord( |
| 107 | node_id="tests/test_foo.py::test_fail", |
| 108 | outcome="failed", |
| 109 | duration_ms=5.0, |
| 110 | symbol_addresses=[], |
| 111 | ) |
| 112 | result["longrepr"] = "AssertionError: expected 1, got 2" |
| 113 | |
| 114 | record = _make_record( |
| 115 | failed=1, passed=0, results=[result] |
| 116 | ) |
| 117 | doc = _record_to_dict(record) |
| 118 | raw_value = _json.loads(_json.dumps(doc)) |
| 119 | restored = _record_from_dict(raw_value) |
| 120 | assert restored is not None |
| 121 | restored_result = restored["results"][0] |
| 122 | assert restored_result.get("longrepr") == "AssertionError: expected 1, got 2" |
| 123 | |
| 124 | def test_none_fields_preserved(self) -> None: |
| 125 | """commit_id=None and branch=None survive round-trip.""" |
| 126 | import json as _json |
| 127 | record = _make_record() |
| 128 | record["commit_id"] = None |
| 129 | record["branch"] = None |
| 130 | doc = _record_to_dict(record) |
| 131 | raw_value = _json.loads(_json.dumps(doc)) |
| 132 | restored = _record_from_dict(raw_value) |
| 133 | assert restored is not None |
| 134 | assert restored["commit_id"] is None |
| 135 | assert restored["branch"] is None |
| 136 | |
| 137 | def test_invalid_input_returns_none(self) -> None: |
| 138 | """_record_from_dict returns None for non-dict input.""" |
| 139 | assert _record_from_dict("not a dict") is None |
| 140 | assert _record_from_dict([]) is None |
| 141 | assert _record_from_dict(None) is None |
| 142 | |
| 143 | |
| 144 | # --------------------------------------------------------------------------- |
| 145 | # I/O tests — load_history / save_history / append_run |
| 146 | # --------------------------------------------------------------------------- |
| 147 | |
| 148 | |
| 149 | class TestLoadSave: |
| 150 | def test_load_missing_file(self, tmp_path: pathlib.Path) -> None: |
| 151 | """load_history returns [] when history file does not exist.""" |
| 152 | muse_dir(tmp_path).mkdir() |
| 153 | records = load_history(tmp_path) |
| 154 | assert records == [] |
| 155 | |
| 156 | def test_save_and_load(self, tmp_path: pathlib.Path) -> None: |
| 157 | """save_history + load_history is a faithful round-trip.""" |
| 158 | muse_dir(tmp_path).mkdir() |
| 159 | rec1 = _make_record("r1") |
| 160 | rec2 = _make_record("r2", passed=1, failed=1) |
| 161 | save_history(tmp_path, [rec1, rec2]) |
| 162 | loaded = load_history(tmp_path) |
| 163 | assert len(loaded) == 2 |
| 164 | assert loaded[0]["run_id"] == "r1" |
| 165 | assert loaded[1]["run_id"] == "r2" |
| 166 | |
| 167 | def test_append_run(self, tmp_path: pathlib.Path) -> None: |
| 168 | """append_run adds one record to the history.""" |
| 169 | muse_dir(tmp_path).mkdir() |
| 170 | save_history(tmp_path, [_make_record("r1")]) |
| 171 | append_run(tmp_path, _make_record("r2")) |
| 172 | loaded = load_history(tmp_path) |
| 173 | assert len(loaded) == 2 |
| 174 | assert loaded[-1]["run_id"] == "r2" |
| 175 | |
| 176 | def test_load_corrupt_file_returns_empty(self, tmp_path: pathlib.Path) -> None: |
| 177 | """Corrupt JSON file returns empty list without raising.""" |
| 178 | hist_path = _test_history_path(tmp_path) |
| 179 | hist_path.parent.mkdir(parents=True, exist_ok=True) |
| 180 | hist_path.write_bytes(b"\xff\xfe garbage bytes that are not valid JSON") |
| 181 | records = load_history(tmp_path) |
| 182 | assert records == [] |
| 183 | |
| 184 | def test_atomic_write(self, tmp_path: pathlib.Path) -> None: |
| 185 | """save_history writes to a .tmp file then renames (no partial writes).""" |
| 186 | muse_dir(tmp_path).mkdir() |
| 187 | save_history(tmp_path, [_make_record()]) |
| 188 | tmp_files = list(muse_dir(tmp_path).glob("*.tmp")) |
| 189 | assert tmp_files == [], "Temp file should be removed after atomic write" |
| 190 | |
| 191 | |
| 192 | # --------------------------------------------------------------------------- |
| 193 | # Analytics — summarize |
| 194 | # --------------------------------------------------------------------------- |
| 195 | |
| 196 | |
| 197 | class TestSummarize: |
| 198 | def test_empty_records(self) -> None: |
| 199 | """summarize returns empty dict for empty input.""" |
| 200 | assert summarize([]) == {} |
| 201 | |
| 202 | def test_all_passed(self) -> None: |
| 203 | """All-pass history: pass_count = total_runs, fail_count = 0.""" |
| 204 | results = [ |
| 205 | CaseRecord( |
| 206 | node_id="tests/test_foo.py::test_a", |
| 207 | outcome="passed", |
| 208 | duration_ms=10.0, |
| 209 | symbol_addresses=[], |
| 210 | ) |
| 211 | ] |
| 212 | record = _make_record(passed=1, failed=0, results=results) |
| 213 | sums = summarize([record]) |
| 214 | s = sums["tests/test_foo.py::test_a"] |
| 215 | assert s["pass_count"] == 1 |
| 216 | assert s["fail_count"] == 0 |
| 217 | assert s["flaky"] is False |
| 218 | assert s["fail_streak"] == 0 |
| 219 | assert s["last_outcome"] == "passed" |
| 220 | |
| 221 | def test_all_failed(self) -> None: |
| 222 | """All-fail history: fail_count = total_runs, fail_streak = total_runs.""" |
| 223 | results = [ |
| 224 | CaseRecord( |
| 225 | node_id="tests/test_foo.py::test_a", |
| 226 | outcome="failed", |
| 227 | duration_ms=5.0, |
| 228 | symbol_addresses=[], |
| 229 | ) |
| 230 | ] |
| 231 | records = [ |
| 232 | RunRecord( |
| 233 | run_id=f"r{i}", |
| 234 | timestamp=f"2026-03-{i+1:02d}T00:00:00Z", |
| 235 | commit_id=None, |
| 236 | branch=None, |
| 237 | results=results, |
| 238 | total=1, |
| 239 | passed=0, |
| 240 | failed=1, |
| 241 | errored=0, |
| 242 | skipped=0, |
| 243 | ) |
| 244 | for i in range(3) |
| 245 | ] |
| 246 | sums = summarize(records) |
| 247 | s = sums["tests/test_foo.py::test_a"] |
| 248 | assert s["fail_count"] == 3 |
| 249 | assert s["pass_count"] == 0 |
| 250 | assert s["flaky"] is False |
| 251 | assert s["fail_streak"] == 3 |
| 252 | |
| 253 | def test_flaky_detection(self) -> None: |
| 254 | """A test that both passes and fails is flagged as flaky.""" |
| 255 | pass_res = CaseRecord( |
| 256 | node_id="tests/test_foo.py::test_flaky", |
| 257 | outcome="passed", |
| 258 | duration_ms=10.0, |
| 259 | symbol_addresses=[], |
| 260 | ) |
| 261 | fail_res = CaseRecord( |
| 262 | node_id="tests/test_foo.py::test_flaky", |
| 263 | outcome="failed", |
| 264 | duration_ms=10.0, |
| 265 | symbol_addresses=[], |
| 266 | ) |
| 267 | records = [ |
| 268 | _make_record("r1", passed=1, failed=0, results=[pass_res]), |
| 269 | _make_record("r2", passed=0, failed=1, results=[fail_res]), |
| 270 | ] |
| 271 | sums = summarize(records) |
| 272 | s = sums["tests/test_foo.py::test_flaky"] |
| 273 | assert s["flaky"] is True |
| 274 | assert s["pass_count"] == 1 |
| 275 | assert s["fail_count"] == 1 |
| 276 | |
| 277 | def test_fail_streak_stops_on_pass(self) -> None: |
| 278 | """fail_streak resets when the most recent run passes.""" |
| 279 | results_fail = [ |
| 280 | CaseRecord( |
| 281 | node_id="tests/t.py::test_x", |
| 282 | outcome="failed", |
| 283 | duration_ms=5.0, |
| 284 | symbol_addresses=[], |
| 285 | ) |
| 286 | ] |
| 287 | results_pass = [ |
| 288 | CaseRecord( |
| 289 | node_id="tests/t.py::test_x", |
| 290 | outcome="passed", |
| 291 | duration_ms=5.0, |
| 292 | symbol_addresses=[], |
| 293 | ) |
| 294 | ] |
| 295 | records = [ |
| 296 | _make_record("r1", passed=0, failed=1, results=results_fail), |
| 297 | _make_record("r2", passed=0, failed=1, results=results_fail), |
| 298 | _make_record("r3", passed=1, failed=0, results=results_pass), |
| 299 | ] |
| 300 | sums = summarize(records) |
| 301 | s = sums["tests/t.py::test_x"] |
| 302 | assert s["fail_streak"] == 0 # Most recent run passed. |
| 303 | |
| 304 | def test_avg_duration_excludes_skipped(self) -> None: |
| 305 | """avg_duration_ms excludes skipped tests from the mean.""" |
| 306 | results = [ |
| 307 | CaseRecord( |
| 308 | node_id="tests/t.py::test_x", |
| 309 | outcome="passed", |
| 310 | duration_ms=100.0, |
| 311 | symbol_addresses=[], |
| 312 | ), |
| 313 | CaseRecord( |
| 314 | node_id="tests/t.py::test_x", |
| 315 | outcome="skipped", |
| 316 | duration_ms=0.0, |
| 317 | symbol_addresses=[], |
| 318 | ), |
| 319 | ] |
| 320 | records = [ |
| 321 | _make_record("r1", passed=1, results=[results[0]]), |
| 322 | _make_record("r2", passed=0, results=[results[1]]), |
| 323 | ] |
| 324 | sums = summarize(records) |
| 325 | s = sums["tests/t.py::test_x"] |
| 326 | assert s["avg_duration_ms"] == 100.0 |
| 327 | |
| 328 | |
| 329 | # --------------------------------------------------------------------------- |
| 330 | # Analytics — flaky_tests |
| 331 | # --------------------------------------------------------------------------- |
| 332 | |
| 333 | |
| 334 | class TestFlakyTests: |
| 335 | def test_returns_only_flaky(self) -> None: |
| 336 | """flaky_tests returns only tests with both passes and failures.""" |
| 337 | pass_res = CaseRecord( |
| 338 | node_id="tests/t.py::test_stable", |
| 339 | outcome="passed", |
| 340 | duration_ms=10.0, |
| 341 | symbol_addresses=[], |
| 342 | ) |
| 343 | flaky_res_pass = CaseRecord( |
| 344 | node_id="tests/t.py::test_flaky", |
| 345 | outcome="passed", |
| 346 | duration_ms=10.0, |
| 347 | symbol_addresses=[], |
| 348 | ) |
| 349 | flaky_res_fail = CaseRecord( |
| 350 | node_id="tests/t.py::test_flaky", |
| 351 | outcome="failed", |
| 352 | duration_ms=10.0, |
| 353 | symbol_addresses=[], |
| 354 | ) |
| 355 | records = [ |
| 356 | _make_record("r1", passed=2, results=[pass_res, flaky_res_pass]), |
| 357 | _make_record("r2", passed=1, failed=1, results=[pass_res, flaky_res_fail]), |
| 358 | ] |
| 359 | result = flaky_tests(records) |
| 360 | node_ids = {s["node_id"] for s in result} |
| 361 | assert "tests/t.py::test_flaky" in node_ids |
| 362 | assert "tests/t.py::test_stable" not in node_ids |
| 363 | |
| 364 | def test_empty_returns_empty(self) -> None: |
| 365 | assert flaky_tests([]) == [] |
| 366 | |
| 367 | |
| 368 | # --------------------------------------------------------------------------- |
| 369 | # Analytics — prioritize_targets |
| 370 | # --------------------------------------------------------------------------- |
| 371 | |
| 372 | |
| 373 | class TestPrioritizeTargets: |
| 374 | def test_unknown_targets_returned_in_some_order(self) -> None: |
| 375 | """Unknown targets (not in history) are returned (order unspecified).""" |
| 376 | targets = ["tests/t.py::test_a", "tests/t.py::test_b"] |
| 377 | result = prioritize_targets(targets, []) |
| 378 | assert sorted(result) == sorted(targets) |
| 379 | |
| 380 | def test_streaky_test_comes_first(self) -> None: |
| 381 | """A test with a recent failure streak is sorted before stable tests.""" |
| 382 | fail_res = CaseRecord( |
| 383 | node_id="tests/t.py::test_fail", |
| 384 | outcome="failed", |
| 385 | duration_ms=5.0, |
| 386 | symbol_addresses=[], |
| 387 | ) |
| 388 | pass_res = CaseRecord( |
| 389 | node_id="tests/t.py::test_pass", |
| 390 | outcome="passed", |
| 391 | duration_ms=5.0, |
| 392 | symbol_addresses=[], |
| 393 | ) |
| 394 | records = [ |
| 395 | _make_record("r1", passed=0, failed=1, results=[fail_res]), |
| 396 | _make_record("r2", passed=1, failed=0, results=[pass_res]), |
| 397 | ] |
| 398 | targets = ["tests/t.py::test_pass", "tests/t.py::test_fail"] |
| 399 | ordered = prioritize_targets(targets, records) |
| 400 | assert ordered[0] == "tests/t.py::test_fail" |
| 401 | |
| 402 | def test_empty_targets(self) -> None: |
| 403 | assert prioritize_targets([], []) == [] |
| 404 | |
| 405 | |
| 406 | # --------------------------------------------------------------------------- |
| 407 | # Utilities |
| 408 | # --------------------------------------------------------------------------- |
| 409 | |
| 410 | |
| 411 | class TestUtilities: |
| 412 | def test_iso_now_format(self) -> None: |
| 413 | """iso_now returns an ISO 8601 UTC string.""" |
| 414 | ts = iso_now() |
| 415 | assert "T" in ts |
| 416 | assert ts.endswith("Z") |
| 417 | assert len(ts) == 20 # "YYYY-MM-DDTHH:MM:SSZ" |
| 418 | |
| 419 | def test_make_run_id_is_unique(self) -> None: |
| 420 | """make_run_id returns a different sha256: ID each time.""" |
| 421 | ids = {make_run_id() for _ in range(100)} |
| 422 | assert len(ids) == 100 |
| 423 | |
| 424 | def test_make_run_id_is_sha256(self) -> None: |
| 425 | """make_run_id returns a sha256: content-addressed ID.""" |
| 426 | run_id = make_run_id() |
| 427 | assert run_id.startswith("sha256:"), f"expected sha256: prefix, got {run_id!r}" |
| 428 | assert len(run_id) == 71 |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
7 days ago