"""Tests for muse.core.test_history — persistent test-run history. Coverage: - Unit tests for serialisation (_record_to_dict / _record_from_dict). - Round-trip tests: save + load round-trips for RunRecord. - load_history returns empty list when file missing. - append_run adds one record. - summarize computes correct counts, flaky flag, and fail_streak. - flaky_tests returns only flaky tests, sorted by fail_count. - prioritize_targets puts streaky/flaky tests first. - Corrupt file handling: load_history returns empty list on corruption. - iso_now returns a valid ISO 8601 string. - make_run_id returns a unique UUID. """ from __future__ import annotations import pathlib import pytest from muse.core.paths import muse_dir, test_history_path as _test_history_path from muse.core.test_history import ( HistorySummary, RunRecord, CaseRecord, _record_from_dict, _record_to_dict, append_run, flaky_tests, iso_now, load_history, make_run_id, prioritize_targets, save_history, summarize, ) # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- def _make_record( run_id: str = "run-1", *, passed: int = 2, failed: int = 0, results: list[CaseRecord] | None = None, ) -> RunRecord: if results is None: results = [ CaseRecord( node_id="tests/test_foo.py::test_a", outcome="passed", duration_ms=10.0, symbol_addresses=[], ), CaseRecord( node_id="tests/test_foo.py::test_b", outcome="passed", duration_ms=20.0, symbol_addresses=[], ), ] return RunRecord( run_id=run_id, timestamp="2026-03-26T12:00:00Z", commit_id="abc123", branch="main", results=results, total=len(results), passed=passed, failed=failed, errored=0, skipped=0, ) # --------------------------------------------------------------------------- # Unit tests — serialisation # --------------------------------------------------------------------------- class TestRecordSerialization: def test_round_trip(self) -> None: """A RunRecord serialises and deserialises back to an equal value.""" import json as _json record = _make_record() doc = _record_to_dict(record) raw_value = _json.loads(_json.dumps(doc)) restored = _record_from_dict(raw_value) assert restored is not None assert restored["run_id"] == record["run_id"] assert restored["timestamp"] == record["timestamp"] assert restored["commit_id"] == record["commit_id"] assert restored["branch"] == record["branch"] assert restored["total"] == record["total"] assert restored["passed"] == record["passed"] assert len(restored["results"]) == len(record["results"]) def test_longrepr_round_trip(self) -> None: """longrepr is preserved across serialisation.""" import json as _json result = CaseRecord( node_id="tests/test_foo.py::test_fail", outcome="failed", duration_ms=5.0, symbol_addresses=[], ) result["longrepr"] = "AssertionError: expected 1, got 2" record = _make_record( failed=1, passed=0, results=[result] ) doc = _record_to_dict(record) raw_value = _json.loads(_json.dumps(doc)) restored = _record_from_dict(raw_value) assert restored is not None restored_result = restored["results"][0] assert restored_result.get("longrepr") == "AssertionError: expected 1, got 2" def test_none_fields_preserved(self) -> None: """commit_id=None and branch=None survive round-trip.""" import json as _json record = _make_record() record["commit_id"] = None record["branch"] = None doc = _record_to_dict(record) raw_value = _json.loads(_json.dumps(doc)) restored = _record_from_dict(raw_value) assert restored is not None assert restored["commit_id"] is None assert restored["branch"] is None def test_invalid_input_returns_none(self) -> None: """_record_from_dict returns None for non-dict input.""" assert _record_from_dict("not a dict") is None assert _record_from_dict([]) is None assert _record_from_dict(None) is None # --------------------------------------------------------------------------- # I/O tests — load_history / save_history / append_run # --------------------------------------------------------------------------- class TestLoadSave: def test_load_missing_file(self, tmp_path: pathlib.Path) -> None: """load_history returns [] when history file does not exist.""" muse_dir(tmp_path).mkdir() records = load_history(tmp_path) assert records == [] def test_save_and_load(self, tmp_path: pathlib.Path) -> None: """save_history + load_history is a faithful round-trip.""" muse_dir(tmp_path).mkdir() rec1 = _make_record("r1") rec2 = _make_record("r2", passed=1, failed=1) save_history(tmp_path, [rec1, rec2]) loaded = load_history(tmp_path) assert len(loaded) == 2 assert loaded[0]["run_id"] == "r1" assert loaded[1]["run_id"] == "r2" def test_append_run(self, tmp_path: pathlib.Path) -> None: """append_run adds one record to the history.""" muse_dir(tmp_path).mkdir() save_history(tmp_path, [_make_record("r1")]) append_run(tmp_path, _make_record("r2")) loaded = load_history(tmp_path) assert len(loaded) == 2 assert loaded[-1]["run_id"] == "r2" def test_load_corrupt_file_returns_empty(self, tmp_path: pathlib.Path) -> None: """Corrupt JSON file returns empty list without raising.""" hist_path = _test_history_path(tmp_path) hist_path.parent.mkdir(parents=True, exist_ok=True) hist_path.write_bytes(b"\xff\xfe garbage bytes that are not valid JSON") records = load_history(tmp_path) assert records == [] def test_atomic_write(self, tmp_path: pathlib.Path) -> None: """save_history writes to a .tmp file then renames (no partial writes).""" muse_dir(tmp_path).mkdir() save_history(tmp_path, [_make_record()]) tmp_files = list(muse_dir(tmp_path).glob("*.tmp")) assert tmp_files == [], "Temp file should be removed after atomic write" # --------------------------------------------------------------------------- # Analytics — summarize # --------------------------------------------------------------------------- class TestSummarize: def test_empty_records(self) -> None: """summarize returns empty dict for empty input.""" assert summarize([]) == {} def test_all_passed(self) -> None: """All-pass history: pass_count = total_runs, fail_count = 0.""" results = [ CaseRecord( node_id="tests/test_foo.py::test_a", outcome="passed", duration_ms=10.0, symbol_addresses=[], ) ] record = _make_record(passed=1, failed=0, results=results) sums = summarize([record]) s = sums["tests/test_foo.py::test_a"] assert s["pass_count"] == 1 assert s["fail_count"] == 0 assert s["flaky"] is False assert s["fail_streak"] == 0 assert s["last_outcome"] == "passed" def test_all_failed(self) -> None: """All-fail history: fail_count = total_runs, fail_streak = total_runs.""" results = [ CaseRecord( node_id="tests/test_foo.py::test_a", outcome="failed", duration_ms=5.0, symbol_addresses=[], ) ] records = [ RunRecord( run_id=f"r{i}", timestamp=f"2026-03-{i+1:02d}T00:00:00Z", commit_id=None, branch=None, results=results, total=1, passed=0, failed=1, errored=0, skipped=0, ) for i in range(3) ] sums = summarize(records) s = sums["tests/test_foo.py::test_a"] assert s["fail_count"] == 3 assert s["pass_count"] == 0 assert s["flaky"] is False assert s["fail_streak"] == 3 def test_flaky_detection(self) -> None: """A test that both passes and fails is flagged as flaky.""" pass_res = CaseRecord( node_id="tests/test_foo.py::test_flaky", outcome="passed", duration_ms=10.0, symbol_addresses=[], ) fail_res = CaseRecord( node_id="tests/test_foo.py::test_flaky", outcome="failed", duration_ms=10.0, symbol_addresses=[], ) records = [ _make_record("r1", passed=1, failed=0, results=[pass_res]), _make_record("r2", passed=0, failed=1, results=[fail_res]), ] sums = summarize(records) s = sums["tests/test_foo.py::test_flaky"] assert s["flaky"] is True assert s["pass_count"] == 1 assert s["fail_count"] == 1 def test_fail_streak_stops_on_pass(self) -> None: """fail_streak resets when the most recent run passes.""" results_fail = [ CaseRecord( node_id="tests/t.py::test_x", outcome="failed", duration_ms=5.0, symbol_addresses=[], ) ] results_pass = [ CaseRecord( node_id="tests/t.py::test_x", outcome="passed", duration_ms=5.0, symbol_addresses=[], ) ] records = [ _make_record("r1", passed=0, failed=1, results=results_fail), _make_record("r2", passed=0, failed=1, results=results_fail), _make_record("r3", passed=1, failed=0, results=results_pass), ] sums = summarize(records) s = sums["tests/t.py::test_x"] assert s["fail_streak"] == 0 # Most recent run passed. def test_avg_duration_excludes_skipped(self) -> None: """avg_duration_ms excludes skipped tests from the mean.""" results = [ CaseRecord( node_id="tests/t.py::test_x", outcome="passed", duration_ms=100.0, symbol_addresses=[], ), CaseRecord( node_id="tests/t.py::test_x", outcome="skipped", duration_ms=0.0, symbol_addresses=[], ), ] records = [ _make_record("r1", passed=1, results=[results[0]]), _make_record("r2", passed=0, results=[results[1]]), ] sums = summarize(records) s = sums["tests/t.py::test_x"] assert s["avg_duration_ms"] == 100.0 # --------------------------------------------------------------------------- # Analytics — flaky_tests # --------------------------------------------------------------------------- class TestFlakyTests: def test_returns_only_flaky(self) -> None: """flaky_tests returns only tests with both passes and failures.""" pass_res = CaseRecord( node_id="tests/t.py::test_stable", outcome="passed", duration_ms=10.0, symbol_addresses=[], ) flaky_res_pass = CaseRecord( node_id="tests/t.py::test_flaky", outcome="passed", duration_ms=10.0, symbol_addresses=[], ) flaky_res_fail = CaseRecord( node_id="tests/t.py::test_flaky", outcome="failed", duration_ms=10.0, symbol_addresses=[], ) records = [ _make_record("r1", passed=2, results=[pass_res, flaky_res_pass]), _make_record("r2", passed=1, failed=1, results=[pass_res, flaky_res_fail]), ] result = flaky_tests(records) node_ids = {s["node_id"] for s in result} assert "tests/t.py::test_flaky" in node_ids assert "tests/t.py::test_stable" not in node_ids def test_empty_returns_empty(self) -> None: assert flaky_tests([]) == [] # --------------------------------------------------------------------------- # Analytics — prioritize_targets # --------------------------------------------------------------------------- class TestPrioritizeTargets: def test_unknown_targets_returned_in_some_order(self) -> None: """Unknown targets (not in history) are returned (order unspecified).""" targets = ["tests/t.py::test_a", "tests/t.py::test_b"] result = prioritize_targets(targets, []) assert sorted(result) == sorted(targets) def test_streaky_test_comes_first(self) -> None: """A test with a recent failure streak is sorted before stable tests.""" fail_res = CaseRecord( node_id="tests/t.py::test_fail", outcome="failed", duration_ms=5.0, symbol_addresses=[], ) pass_res = CaseRecord( node_id="tests/t.py::test_pass", outcome="passed", duration_ms=5.0, symbol_addresses=[], ) records = [ _make_record("r1", passed=0, failed=1, results=[fail_res]), _make_record("r2", passed=1, failed=0, results=[pass_res]), ] targets = ["tests/t.py::test_pass", "tests/t.py::test_fail"] ordered = prioritize_targets(targets, records) assert ordered[0] == "tests/t.py::test_fail" def test_empty_targets(self) -> None: assert prioritize_targets([], []) == [] # --------------------------------------------------------------------------- # Utilities # --------------------------------------------------------------------------- class TestUtilities: def test_iso_now_format(self) -> None: """iso_now returns an ISO 8601 UTC string.""" ts = iso_now() assert "T" in ts assert ts.endswith("Z") assert len(ts) == 20 # "YYYY-MM-DDTHH:MM:SSZ" def test_make_run_id_is_unique(self) -> None: """make_run_id returns a different sha256: ID each time.""" ids = {make_run_id() for _ in range(100)} assert len(ids) == 100 def test_make_run_id_is_sha256(self) -> None: """make_run_id returns a sha256: content-addressed ID.""" run_id = make_run_id() assert run_id.startswith("sha256:"), f"expected sha256: prefix, got {run_id!r}" assert len(run_id) == 71