tests/test_core_test_history.py · gabriel/muse

test_core_test_history.py python

428 lines 14.7 KB

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago

1	"""Tests for muse.core.test_history — persistent test-run history.
2
3	Coverage:
4	- Unit tests for serialisation (_record_to_dict / _record_from_dict).
5	- Round-trip tests: save + load round-trips for RunRecord.
6	- load_history returns empty list when file missing.
7	- append_run adds one record.
8	- summarize computes correct counts, flaky flag, and fail_streak.
9	- flaky_tests returns only flaky tests, sorted by fail_count.
10	- prioritize_targets puts streaky/flaky tests first.
11	- Corrupt file handling: load_history returns empty list on corruption.
12	- iso_now returns a valid ISO 8601 string.
13	- make_run_id returns a unique UUID.
14	"""
15
16	from __future__ import annotations
17
18	import pathlib
19
20	import pytest
21
22	from muse.core.paths import muse_dir, test_history_path as _test_history_path
23	from muse.core.test_history import (
24	HistorySummary,
25	RunRecord,
26	CaseRecord,
27	_record_from_dict,
28	_record_to_dict,
29	append_run,
30	flaky_tests,
31	iso_now,
32	load_history,
33	make_run_id,
34	prioritize_targets,
35	save_history,
36	summarize,
37	)
38
39
40	# ---------------------------------------------------------------------------
41	# Fixtures
42	# ---------------------------------------------------------------------------
43
44
45	def _make_record(
46	run_id: str = "run-1",
47	*,
48	passed: int = 2,
49	failed: int = 0,
50	results: list[CaseRecord] \| None = None,
51	) -> RunRecord:
52	if results is None:
53	results = [
54	CaseRecord(
55	node_id="tests/test_foo.py::test_a",
56	outcome="passed",
57	duration_ms=10.0,
58	symbol_addresses=[],
59	),
60	CaseRecord(
61	node_id="tests/test_foo.py::test_b",
62	outcome="passed",
63	duration_ms=20.0,
64	symbol_addresses=[],
65	),
66	]
67	return RunRecord(
68	run_id=run_id,
69	timestamp="2026-03-26T12:00:00Z",
70	commit_id="abc123",
71	branch="main",
72	results=results,
73	total=len(results),
74	passed=passed,
75	failed=failed,
76	errored=0,
77	skipped=0,
78	)
79
80
81	# ---------------------------------------------------------------------------
82	# Unit tests — serialisation
83	# ---------------------------------------------------------------------------
84
85
86	class TestRecordSerialization:
87	def test_round_trip(self) -> None:
88	"""A RunRecord serialises and deserialises back to an equal value."""
89	import json as _json
90	record = _make_record()
91	doc = _record_to_dict(record)
92	raw_value = _json.loads(_json.dumps(doc))
93	restored = _record_from_dict(raw_value)
94	assert restored is not None
95	assert restored["run_id"] == record["run_id"]
96	assert restored["timestamp"] == record["timestamp"]
97	assert restored["commit_id"] == record["commit_id"]
98	assert restored["branch"] == record["branch"]
99	assert restored["total"] == record["total"]
100	assert restored["passed"] == record["passed"]
101	assert len(restored["results"]) == len(record["results"])
102
103	def test_longrepr_round_trip(self) -> None:
104	"""longrepr is preserved across serialisation."""
105	import json as _json
106	result = CaseRecord(
107	node_id="tests/test_foo.py::test_fail",
108	outcome="failed",
109	duration_ms=5.0,
110	symbol_addresses=[],
111	)
112	result["longrepr"] = "AssertionError: expected 1, got 2"
113
114	record = _make_record(
115	failed=1, passed=0, results=[result]
116	)
117	doc = _record_to_dict(record)
118	raw_value = _json.loads(_json.dumps(doc))
119	restored = _record_from_dict(raw_value)
120	assert restored is not None
121	restored_result = restored["results"][0]
122	assert restored_result.get("longrepr") == "AssertionError: expected 1, got 2"
123
124	def test_none_fields_preserved(self) -> None:
125	"""commit_id=None and branch=None survive round-trip."""
126	import json as _json
127	record = _make_record()
128	record["commit_id"] = None
129	record["branch"] = None
130	doc = _record_to_dict(record)
131	raw_value = _json.loads(_json.dumps(doc))
132	restored = _record_from_dict(raw_value)
133	assert restored is not None
134	assert restored["commit_id"] is None
135	assert restored["branch"] is None
136
137	def test_invalid_input_returns_none(self) -> None:
138	"""_record_from_dict returns None for non-dict input."""
139	assert _record_from_dict("not a dict") is None
140	assert _record_from_dict([]) is None
141	assert _record_from_dict(None) is None
142
143
144	# ---------------------------------------------------------------------------
145	# I/O tests — load_history / save_history / append_run
146	# ---------------------------------------------------------------------------
147
148
149	class TestLoadSave:
150	def test_load_missing_file(self, tmp_path: pathlib.Path) -> None:
151	"""load_history returns [] when history file does not exist."""
152	muse_dir(tmp_path).mkdir()
153	records = load_history(tmp_path)
154	assert records == []
155
156	def test_save_and_load(self, tmp_path: pathlib.Path) -> None:
157	"""save_history + load_history is a faithful round-trip."""
158	muse_dir(tmp_path).mkdir()
159	rec1 = _make_record("r1")
160	rec2 = _make_record("r2", passed=1, failed=1)
161	save_history(tmp_path, [rec1, rec2])
162	loaded = load_history(tmp_path)
163	assert len(loaded) == 2
164	assert loaded[0]["run_id"] == "r1"
165	assert loaded[1]["run_id"] == "r2"
166
167	def test_append_run(self, tmp_path: pathlib.Path) -> None:
168	"""append_run adds one record to the history."""
169	muse_dir(tmp_path).mkdir()
170	save_history(tmp_path, [_make_record("r1")])
171	append_run(tmp_path, _make_record("r2"))
172	loaded = load_history(tmp_path)
173	assert len(loaded) == 2
174	assert loaded[-1]["run_id"] == "r2"
175
176	def test_load_corrupt_file_returns_empty(self, tmp_path: pathlib.Path) -> None:
177	"""Corrupt JSON file returns empty list without raising."""
178	hist_path = _test_history_path(tmp_path)
179	hist_path.parent.mkdir(parents=True, exist_ok=True)
180	hist_path.write_bytes(b"\xff\xfe garbage bytes that are not valid JSON")
181	records = load_history(tmp_path)
182	assert records == []
183
184	def test_atomic_write(self, tmp_path: pathlib.Path) -> None:
185	"""save_history writes to a .tmp file then renames (no partial writes)."""
186	muse_dir(tmp_path).mkdir()
187	save_history(tmp_path, [_make_record()])
188	tmp_files = list(muse_dir(tmp_path).glob("*.tmp"))
189	assert tmp_files == [], "Temp file should be removed after atomic write"
190
191
192	# ---------------------------------------------------------------------------
193	# Analytics — summarize
194	# ---------------------------------------------------------------------------
195
196
197	class TestSummarize:
198	def test_empty_records(self) -> None:
199	"""summarize returns empty dict for empty input."""
200	assert summarize([]) == {}
201
202	def test_all_passed(self) -> None:
203	"""All-pass history: pass_count = total_runs, fail_count = 0."""
204	results = [
205	CaseRecord(
206	node_id="tests/test_foo.py::test_a",
207	outcome="passed",
208	duration_ms=10.0,
209	symbol_addresses=[],
210	)
211	]
212	record = _make_record(passed=1, failed=0, results=results)
213	sums = summarize([record])
214	s = sums["tests/test_foo.py::test_a"]
215	assert s["pass_count"] == 1
216	assert s["fail_count"] == 0
217	assert s["flaky"] is False
218	assert s["fail_streak"] == 0
219	assert s["last_outcome"] == "passed"
220
221	def test_all_failed(self) -> None:
222	"""All-fail history: fail_count = total_runs, fail_streak = total_runs."""
223	results = [
224	CaseRecord(
225	node_id="tests/test_foo.py::test_a",
226	outcome="failed",
227	duration_ms=5.0,
228	symbol_addresses=[],
229	)
230	]
231	records = [
232	RunRecord(
233	run_id=f"r{i}",
234	timestamp=f"2026-03-{i+1:02d}T00:00:00Z",
235	commit_id=None,
236	branch=None,
237	results=results,
238	total=1,
239	passed=0,
240	failed=1,
241	errored=0,
242	skipped=0,
243	)
244	for i in range(3)
245	]
246	sums = summarize(records)
247	s = sums["tests/test_foo.py::test_a"]
248	assert s["fail_count"] == 3
249	assert s["pass_count"] == 0
250	assert s["flaky"] is False
251	assert s["fail_streak"] == 3
252
253	def test_flaky_detection(self) -> None:
254	"""A test that both passes and fails is flagged as flaky."""
255	pass_res = CaseRecord(
256	node_id="tests/test_foo.py::test_flaky",
257	outcome="passed",
258	duration_ms=10.0,
259	symbol_addresses=[],
260	)
261	fail_res = CaseRecord(
262	node_id="tests/test_foo.py::test_flaky",
263	outcome="failed",
264	duration_ms=10.0,
265	symbol_addresses=[],
266	)
267	records = [
268	_make_record("r1", passed=1, failed=0, results=[pass_res]),
269	_make_record("r2", passed=0, failed=1, results=[fail_res]),
270	]
271	sums = summarize(records)
272	s = sums["tests/test_foo.py::test_flaky"]
273	assert s["flaky"] is True
274	assert s["pass_count"] == 1
275	assert s["fail_count"] == 1
276
277	def test_fail_streak_stops_on_pass(self) -> None:
278	"""fail_streak resets when the most recent run passes."""
279	results_fail = [
280	CaseRecord(
281	node_id="tests/t.py::test_x",
282	outcome="failed",
283	duration_ms=5.0,
284	symbol_addresses=[],
285	)
286	]
287	results_pass = [
288	CaseRecord(
289	node_id="tests/t.py::test_x",
290	outcome="passed",
291	duration_ms=5.0,
292	symbol_addresses=[],
293	)
294	]
295	records = [
296	_make_record("r1", passed=0, failed=1, results=results_fail),
297	_make_record("r2", passed=0, failed=1, results=results_fail),
298	_make_record("r3", passed=1, failed=0, results=results_pass),
299	]
300	sums = summarize(records)
301	s = sums["tests/t.py::test_x"]
302	assert s["fail_streak"] == 0 # Most recent run passed.
303
304	def test_avg_duration_excludes_skipped(self) -> None:
305	"""avg_duration_ms excludes skipped tests from the mean."""
306	results = [
307	CaseRecord(
308	node_id="tests/t.py::test_x",
309	outcome="passed",
310	duration_ms=100.0,
311	symbol_addresses=[],
312	),
313	CaseRecord(
314	node_id="tests/t.py::test_x",
315	outcome="skipped",
316	duration_ms=0.0,
317	symbol_addresses=[],
318	),
319	]
320	records = [
321	_make_record("r1", passed=1, results=[results[0]]),
322	_make_record("r2", passed=0, results=[results[1]]),
323	]
324	sums = summarize(records)
325	s = sums["tests/t.py::test_x"]
326	assert s["avg_duration_ms"] == 100.0
327
328
329	# ---------------------------------------------------------------------------
330	# Analytics — flaky_tests
331	# ---------------------------------------------------------------------------
332
333
334	class TestFlakyTests:
335	def test_returns_only_flaky(self) -> None:
336	"""flaky_tests returns only tests with both passes and failures."""
337	pass_res = CaseRecord(
338	node_id="tests/t.py::test_stable",
339	outcome="passed",
340	duration_ms=10.0,
341	symbol_addresses=[],
342	)
343	flaky_res_pass = CaseRecord(
344	node_id="tests/t.py::test_flaky",
345	outcome="passed",
346	duration_ms=10.0,
347	symbol_addresses=[],
348	)
349	flaky_res_fail = CaseRecord(
350	node_id="tests/t.py::test_flaky",
351	outcome="failed",
352	duration_ms=10.0,
353	symbol_addresses=[],
354	)
355	records = [
356	_make_record("r1", passed=2, results=[pass_res, flaky_res_pass]),
357	_make_record("r2", passed=1, failed=1, results=[pass_res, flaky_res_fail]),
358	]
359	result = flaky_tests(records)
360	node_ids = {s["node_id"] for s in result}
361	assert "tests/t.py::test_flaky" in node_ids
362	assert "tests/t.py::test_stable" not in node_ids
363
364	def test_empty_returns_empty(self) -> None:
365	assert flaky_tests([]) == []
366
367
368	# ---------------------------------------------------------------------------
369	# Analytics — prioritize_targets
370	# ---------------------------------------------------------------------------
371
372
373	class TestPrioritizeTargets:
374	def test_unknown_targets_returned_in_some_order(self) -> None:
375	"""Unknown targets (not in history) are returned (order unspecified)."""
376	targets = ["tests/t.py::test_a", "tests/t.py::test_b"]
377	result = prioritize_targets(targets, [])
378	assert sorted(result) == sorted(targets)
379
380	def test_streaky_test_comes_first(self) -> None:
381	"""A test with a recent failure streak is sorted before stable tests."""
382	fail_res = CaseRecord(
383	node_id="tests/t.py::test_fail",
384	outcome="failed",
385	duration_ms=5.0,
386	symbol_addresses=[],
387	)
388	pass_res = CaseRecord(
389	node_id="tests/t.py::test_pass",
390	outcome="passed",
391	duration_ms=5.0,
392	symbol_addresses=[],
393	)
394	records = [
395	_make_record("r1", passed=0, failed=1, results=[fail_res]),
396	_make_record("r2", passed=1, failed=0, results=[pass_res]),
397	]
398	targets = ["tests/t.py::test_pass", "tests/t.py::test_fail"]
399	ordered = prioritize_targets(targets, records)
400	assert ordered[0] == "tests/t.py::test_fail"
401
402	def test_empty_targets(self) -> None:
403	assert prioritize_targets([], []) == []
404
405
406	# ---------------------------------------------------------------------------
407	# Utilities
408	# ---------------------------------------------------------------------------
409
410
411	class TestUtilities:
412	def test_iso_now_format(self) -> None:
413	"""iso_now returns an ISO 8601 UTC string."""
414	ts = iso_now()
415	assert "T" in ts
416	assert ts.endswith("Z")
417	assert len(ts) == 20 # "YYYY-MM-DDTHH:MM:SSZ"
418
419	def test_make_run_id_is_unique(self) -> None:
420	"""make_run_id returns a different sha256: ID each time."""
421	ids = {make_run_id() for _ in range(100)}
422	assert len(ids) == 100
423
424	def test_make_run_id_is_sha256(self) -> None:
425	"""make_run_id returns a sha256: content-addressed ID."""
426	run_id = make_run_id()
427	assert run_id.startswith("sha256:"), f"expected sha256: prefix, got {run_id!r}"
428	assert len(run_id) == 71

File History 1 commit

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 7 days ago

function _make_record

class TestRecordSerialization

function test_round_trip

function test_longrepr_round_trip

function test_none_fields_preserved

function test_invalid_input_returns_none

class TestLoadSave

function test_load_missing_file

function test_save_and_load

function test_append_run

function test_load_corrupt_file_returns_empty

function test_atomic_write

class TestSummarize

function test_empty_records

function test_all_passed

function test_all_failed

function test_flaky_detection

function test_fail_streak_stops_on_pass

function test_avg_duration_excludes_skipped

class TestFlakyTests

function test_returns_only_flaky

function test_empty_returns_empty

class TestPrioritizeTargets

function test_unknown_targets_returned_in_some_order

function test_streaky_test_comes_first

function test_empty_targets

class TestUtilities

function test_iso_now_format

function test_make_run_id_is_unique

function test_make_run_id_is_sha256

Pathtests/test_core_test_history.py

Lines428

Size14.7 KB

LangPython

Refsha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b

Object ID

sha256:3df3c7a70240cbc13cbe14db5e17ed3f06a93755b318e707617da15730b3db99…

Last commit

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b

fix: try fetch/presign before fetch/mpack to avoi…

7 days ago

Quick links

Blame History