gabriel / muse public
test_core_test_history.py python
428 lines 14.7 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago
1 """Tests for muse.core.test_history — persistent test-run history.
2
3 Coverage:
4 - Unit tests for serialisation (_record_to_dict / _record_from_dict).
5 - Round-trip tests: save + load round-trips for RunRecord.
6 - load_history returns empty list when file missing.
7 - append_run adds one record.
8 - summarize computes correct counts, flaky flag, and fail_streak.
9 - flaky_tests returns only flaky tests, sorted by fail_count.
10 - prioritize_targets puts streaky/flaky tests first.
11 - Corrupt file handling: load_history returns empty list on corruption.
12 - iso_now returns a valid ISO 8601 string.
13 - make_run_id returns a unique UUID.
14 """
15
16 from __future__ import annotations
17
18 import pathlib
19
20 import pytest
21
22 from muse.core.paths import muse_dir, test_history_path as _test_history_path
23 from muse.core.test_history import (
24 HistorySummary,
25 RunRecord,
26 CaseRecord,
27 _record_from_dict,
28 _record_to_dict,
29 append_run,
30 flaky_tests,
31 iso_now,
32 load_history,
33 make_run_id,
34 prioritize_targets,
35 save_history,
36 summarize,
37 )
38
39
40 # ---------------------------------------------------------------------------
41 # Fixtures
42 # ---------------------------------------------------------------------------
43
44
45 def _make_record(
46 run_id: str = "run-1",
47 *,
48 passed: int = 2,
49 failed: int = 0,
50 results: list[CaseRecord] | None = None,
51 ) -> RunRecord:
52 if results is None:
53 results = [
54 CaseRecord(
55 node_id="tests/test_foo.py::test_a",
56 outcome="passed",
57 duration_ms=10.0,
58 symbol_addresses=[],
59 ),
60 CaseRecord(
61 node_id="tests/test_foo.py::test_b",
62 outcome="passed",
63 duration_ms=20.0,
64 symbol_addresses=[],
65 ),
66 ]
67 return RunRecord(
68 run_id=run_id,
69 timestamp="2026-03-26T12:00:00Z",
70 commit_id="abc123",
71 branch="main",
72 results=results,
73 total=len(results),
74 passed=passed,
75 failed=failed,
76 errored=0,
77 skipped=0,
78 )
79
80
81 # ---------------------------------------------------------------------------
82 # Unit tests — serialisation
83 # ---------------------------------------------------------------------------
84
85
86 class TestRecordSerialization:
87 def test_round_trip(self) -> None:
88 """A RunRecord serialises and deserialises back to an equal value."""
89 import json as _json
90 record = _make_record()
91 doc = _record_to_dict(record)
92 raw_value = _json.loads(_json.dumps(doc))
93 restored = _record_from_dict(raw_value)
94 assert restored is not None
95 assert restored["run_id"] == record["run_id"]
96 assert restored["timestamp"] == record["timestamp"]
97 assert restored["commit_id"] == record["commit_id"]
98 assert restored["branch"] == record["branch"]
99 assert restored["total"] == record["total"]
100 assert restored["passed"] == record["passed"]
101 assert len(restored["results"]) == len(record["results"])
102
103 def test_longrepr_round_trip(self) -> None:
104 """longrepr is preserved across serialisation."""
105 import json as _json
106 result = CaseRecord(
107 node_id="tests/test_foo.py::test_fail",
108 outcome="failed",
109 duration_ms=5.0,
110 symbol_addresses=[],
111 )
112 result["longrepr"] = "AssertionError: expected 1, got 2"
113
114 record = _make_record(
115 failed=1, passed=0, results=[result]
116 )
117 doc = _record_to_dict(record)
118 raw_value = _json.loads(_json.dumps(doc))
119 restored = _record_from_dict(raw_value)
120 assert restored is not None
121 restored_result = restored["results"][0]
122 assert restored_result.get("longrepr") == "AssertionError: expected 1, got 2"
123
124 def test_none_fields_preserved(self) -> None:
125 """commit_id=None and branch=None survive round-trip."""
126 import json as _json
127 record = _make_record()
128 record["commit_id"] = None
129 record["branch"] = None
130 doc = _record_to_dict(record)
131 raw_value = _json.loads(_json.dumps(doc))
132 restored = _record_from_dict(raw_value)
133 assert restored is not None
134 assert restored["commit_id"] is None
135 assert restored["branch"] is None
136
137 def test_invalid_input_returns_none(self) -> None:
138 """_record_from_dict returns None for non-dict input."""
139 assert _record_from_dict("not a dict") is None
140 assert _record_from_dict([]) is None
141 assert _record_from_dict(None) is None
142
143
144 # ---------------------------------------------------------------------------
145 # I/O tests — load_history / save_history / append_run
146 # ---------------------------------------------------------------------------
147
148
149 class TestLoadSave:
150 def test_load_missing_file(self, tmp_path: pathlib.Path) -> None:
151 """load_history returns [] when history file does not exist."""
152 muse_dir(tmp_path).mkdir()
153 records = load_history(tmp_path)
154 assert records == []
155
156 def test_save_and_load(self, tmp_path: pathlib.Path) -> None:
157 """save_history + load_history is a faithful round-trip."""
158 muse_dir(tmp_path).mkdir()
159 rec1 = _make_record("r1")
160 rec2 = _make_record("r2", passed=1, failed=1)
161 save_history(tmp_path, [rec1, rec2])
162 loaded = load_history(tmp_path)
163 assert len(loaded) == 2
164 assert loaded[0]["run_id"] == "r1"
165 assert loaded[1]["run_id"] == "r2"
166
167 def test_append_run(self, tmp_path: pathlib.Path) -> None:
168 """append_run adds one record to the history."""
169 muse_dir(tmp_path).mkdir()
170 save_history(tmp_path, [_make_record("r1")])
171 append_run(tmp_path, _make_record("r2"))
172 loaded = load_history(tmp_path)
173 assert len(loaded) == 2
174 assert loaded[-1]["run_id"] == "r2"
175
176 def test_load_corrupt_file_returns_empty(self, tmp_path: pathlib.Path) -> None:
177 """Corrupt JSON file returns empty list without raising."""
178 hist_path = _test_history_path(tmp_path)
179 hist_path.parent.mkdir(parents=True, exist_ok=True)
180 hist_path.write_bytes(b"\xff\xfe garbage bytes that are not valid JSON")
181 records = load_history(tmp_path)
182 assert records == []
183
184 def test_atomic_write(self, tmp_path: pathlib.Path) -> None:
185 """save_history writes to a .tmp file then renames (no partial writes)."""
186 muse_dir(tmp_path).mkdir()
187 save_history(tmp_path, [_make_record()])
188 tmp_files = list(muse_dir(tmp_path).glob("*.tmp"))
189 assert tmp_files == [], "Temp file should be removed after atomic write"
190
191
192 # ---------------------------------------------------------------------------
193 # Analytics — summarize
194 # ---------------------------------------------------------------------------
195
196
197 class TestSummarize:
198 def test_empty_records(self) -> None:
199 """summarize returns empty dict for empty input."""
200 assert summarize([]) == {}
201
202 def test_all_passed(self) -> None:
203 """All-pass history: pass_count = total_runs, fail_count = 0."""
204 results = [
205 CaseRecord(
206 node_id="tests/test_foo.py::test_a",
207 outcome="passed",
208 duration_ms=10.0,
209 symbol_addresses=[],
210 )
211 ]
212 record = _make_record(passed=1, failed=0, results=results)
213 sums = summarize([record])
214 s = sums["tests/test_foo.py::test_a"]
215 assert s["pass_count"] == 1
216 assert s["fail_count"] == 0
217 assert s["flaky"] is False
218 assert s["fail_streak"] == 0
219 assert s["last_outcome"] == "passed"
220
221 def test_all_failed(self) -> None:
222 """All-fail history: fail_count = total_runs, fail_streak = total_runs."""
223 results = [
224 CaseRecord(
225 node_id="tests/test_foo.py::test_a",
226 outcome="failed",
227 duration_ms=5.0,
228 symbol_addresses=[],
229 )
230 ]
231 records = [
232 RunRecord(
233 run_id=f"r{i}",
234 timestamp=f"2026-03-{i+1:02d}T00:00:00Z",
235 commit_id=None,
236 branch=None,
237 results=results,
238 total=1,
239 passed=0,
240 failed=1,
241 errored=0,
242 skipped=0,
243 )
244 for i in range(3)
245 ]
246 sums = summarize(records)
247 s = sums["tests/test_foo.py::test_a"]
248 assert s["fail_count"] == 3
249 assert s["pass_count"] == 0
250 assert s["flaky"] is False
251 assert s["fail_streak"] == 3
252
253 def test_flaky_detection(self) -> None:
254 """A test that both passes and fails is flagged as flaky."""
255 pass_res = CaseRecord(
256 node_id="tests/test_foo.py::test_flaky",
257 outcome="passed",
258 duration_ms=10.0,
259 symbol_addresses=[],
260 )
261 fail_res = CaseRecord(
262 node_id="tests/test_foo.py::test_flaky",
263 outcome="failed",
264 duration_ms=10.0,
265 symbol_addresses=[],
266 )
267 records = [
268 _make_record("r1", passed=1, failed=0, results=[pass_res]),
269 _make_record("r2", passed=0, failed=1, results=[fail_res]),
270 ]
271 sums = summarize(records)
272 s = sums["tests/test_foo.py::test_flaky"]
273 assert s["flaky"] is True
274 assert s["pass_count"] == 1
275 assert s["fail_count"] == 1
276
277 def test_fail_streak_stops_on_pass(self) -> None:
278 """fail_streak resets when the most recent run passes."""
279 results_fail = [
280 CaseRecord(
281 node_id="tests/t.py::test_x",
282 outcome="failed",
283 duration_ms=5.0,
284 symbol_addresses=[],
285 )
286 ]
287 results_pass = [
288 CaseRecord(
289 node_id="tests/t.py::test_x",
290 outcome="passed",
291 duration_ms=5.0,
292 symbol_addresses=[],
293 )
294 ]
295 records = [
296 _make_record("r1", passed=0, failed=1, results=results_fail),
297 _make_record("r2", passed=0, failed=1, results=results_fail),
298 _make_record("r3", passed=1, failed=0, results=results_pass),
299 ]
300 sums = summarize(records)
301 s = sums["tests/t.py::test_x"]
302 assert s["fail_streak"] == 0 # Most recent run passed.
303
304 def test_avg_duration_excludes_skipped(self) -> None:
305 """avg_duration_ms excludes skipped tests from the mean."""
306 results = [
307 CaseRecord(
308 node_id="tests/t.py::test_x",
309 outcome="passed",
310 duration_ms=100.0,
311 symbol_addresses=[],
312 ),
313 CaseRecord(
314 node_id="tests/t.py::test_x",
315 outcome="skipped",
316 duration_ms=0.0,
317 symbol_addresses=[],
318 ),
319 ]
320 records = [
321 _make_record("r1", passed=1, results=[results[0]]),
322 _make_record("r2", passed=0, results=[results[1]]),
323 ]
324 sums = summarize(records)
325 s = sums["tests/t.py::test_x"]
326 assert s["avg_duration_ms"] == 100.0
327
328
329 # ---------------------------------------------------------------------------
330 # Analytics — flaky_tests
331 # ---------------------------------------------------------------------------
332
333
334 class TestFlakyTests:
335 def test_returns_only_flaky(self) -> None:
336 """flaky_tests returns only tests with both passes and failures."""
337 pass_res = CaseRecord(
338 node_id="tests/t.py::test_stable",
339 outcome="passed",
340 duration_ms=10.0,
341 symbol_addresses=[],
342 )
343 flaky_res_pass = CaseRecord(
344 node_id="tests/t.py::test_flaky",
345 outcome="passed",
346 duration_ms=10.0,
347 symbol_addresses=[],
348 )
349 flaky_res_fail = CaseRecord(
350 node_id="tests/t.py::test_flaky",
351 outcome="failed",
352 duration_ms=10.0,
353 symbol_addresses=[],
354 )
355 records = [
356 _make_record("r1", passed=2, results=[pass_res, flaky_res_pass]),
357 _make_record("r2", passed=1, failed=1, results=[pass_res, flaky_res_fail]),
358 ]
359 result = flaky_tests(records)
360 node_ids = {s["node_id"] for s in result}
361 assert "tests/t.py::test_flaky" in node_ids
362 assert "tests/t.py::test_stable" not in node_ids
363
364 def test_empty_returns_empty(self) -> None:
365 assert flaky_tests([]) == []
366
367
368 # ---------------------------------------------------------------------------
369 # Analytics — prioritize_targets
370 # ---------------------------------------------------------------------------
371
372
373 class TestPrioritizeTargets:
374 def test_unknown_targets_returned_in_some_order(self) -> None:
375 """Unknown targets (not in history) are returned (order unspecified)."""
376 targets = ["tests/t.py::test_a", "tests/t.py::test_b"]
377 result = prioritize_targets(targets, [])
378 assert sorted(result) == sorted(targets)
379
380 def test_streaky_test_comes_first(self) -> None:
381 """A test with a recent failure streak is sorted before stable tests."""
382 fail_res = CaseRecord(
383 node_id="tests/t.py::test_fail",
384 outcome="failed",
385 duration_ms=5.0,
386 symbol_addresses=[],
387 )
388 pass_res = CaseRecord(
389 node_id="tests/t.py::test_pass",
390 outcome="passed",
391 duration_ms=5.0,
392 symbol_addresses=[],
393 )
394 records = [
395 _make_record("r1", passed=0, failed=1, results=[fail_res]),
396 _make_record("r2", passed=1, failed=0, results=[pass_res]),
397 ]
398 targets = ["tests/t.py::test_pass", "tests/t.py::test_fail"]
399 ordered = prioritize_targets(targets, records)
400 assert ordered[0] == "tests/t.py::test_fail"
401
402 def test_empty_targets(self) -> None:
403 assert prioritize_targets([], []) == []
404
405
406 # ---------------------------------------------------------------------------
407 # Utilities
408 # ---------------------------------------------------------------------------
409
410
411 class TestUtilities:
412 def test_iso_now_format(self) -> None:
413 """iso_now returns an ISO 8601 UTC string."""
414 ts = iso_now()
415 assert "T" in ts
416 assert ts.endswith("Z")
417 assert len(ts) == 20 # "YYYY-MM-DDTHH:MM:SSZ"
418
419 def test_make_run_id_is_unique(self) -> None:
420 """make_run_id returns a different sha256: ID each time."""
421 ids = {make_run_id() for _ in range(100)}
422 assert len(ids) == 100
423
424 def test_make_run_id_is_sha256(self) -> None:
425 """make_run_id returns a sha256: content-addressed ID."""
426 run_id = make_run_id()
427 assert run_id.startswith("sha256:"), f"expected sha256: prefix, got {run_id!r}"
428 assert len(run_id) == 71
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago