tests/test_snapshot_supercharge.py · gabriel/muse

test_snapshot_supercharge.py python

832 lines 33.2 KB

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 6 days ago

1	"""Comprehensive tests for ``muse snapshot`` subcommands.
2
3	Covers gaps in the original test_cmd_snapshot.py:
4
5	* JSON envelope — duration_ms / exit_code on all four subcommands
6	* JSON schema completeness — all documented fields, correct types
7	* Bug regression — sha256: prefix round-trip through _list_all_snapshots /
8	_resolve_snapshot (bare-hex stem bug)
9	* Data integrity — create → export tar.gz/zip → extract → verify file content
10	* Security — ANSI escape injection in note, symlink skip in snapshots dir,
11	path traversal rejected by _validate_snapshot_id_prefix / _safe_arcname,
12	zip-slip guard for crafted manifest entries
13	* Text mode — ``snapshot read --text`` output format
14	* --prefix — files nested under prefix directory inside archive
15	* Limit validation — limit=0 rejected, limit=1 honoured, limit clamps output
16	* Idempotency — identical working-tree always produces the same snapshot_id
17	* Empty list envelope — snapshot list --json returns envelope even when empty
18	* Concurrent stress — N parallel snapshot creates, all independent and valid
19	* Large file export — single 5 MiB file round-trips correctly
20	"""
21
22	from __future__ import annotations
23	from collections.abc import Mapping
24
25	import json
26	import os
27	import pathlib
28	import tarfile
29	import threading
30	import zipfile
31
32	import pytest
33
34	from muse.core.types import short_id, split_id
35	from muse.core.paths import muse_dir, snapshots_dir
36	from tests.cli_test_helper import CliRunner
37
38	cli = None # argparse migration — CliRunner ignores this arg
39
40	runner = CliRunner()
41
42
43	# ---------------------------------------------------------------------------
44	# Shared helpers
45	# ---------------------------------------------------------------------------
46
47
48	def _init_repo(path: pathlib.Path) -> pathlib.Path:
49	dot_muse = muse_dir(path)
50	for d in ("commits", "snapshots", "objects", "refs/heads"):
51	(dot_muse / d).mkdir(parents=True, exist_ok=True)
52	(dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
53	(dot_muse / "repo.json").write_text(
54	json.dumps({"repo_id": "snap-supercharge", "domain": "code"}),
55	encoding="utf-8",
56	)
57	return path
58
59
60	def _env(repo: pathlib.Path) -> Mapping[str, str]:
61	return {"MUSE_REPO_ROOT": str(repo)}
62
63
64	def _create_files(root: pathlib.Path, count: int = 3) -> list[str]:
65	names: list[str] = []
66	for i in range(count):
67	name = f"file_{i}.txt"
68	(root / name).write_text(f"content-{i}", encoding="utf-8")
69	names.append(name)
70	return names
71
72
73	def _create_snapshot(root: pathlib.Path, note: str = "") -> Mapping[str, object]:
74	"""Create a snapshot and return the parsed JSON output."""
75	cmd = ["snapshot", "create", "--json"]
76	if note:
77	cmd += ["-m", note]
78	result = runner.invoke(cli, cmd, env=_env(root))
79	assert result.exit_code == 0, result.output
80	return json.loads(result.output)
81
82
83	# ---------------------------------------------------------------------------
84	# JSON envelope — duration_ms / exit_code
85	# ---------------------------------------------------------------------------
86
87
88	class TestJsonEnvelope:
89	"""Every --json subcommand must include duration_ms and exit_code."""
90
91	def test_create_has_duration_ms(self, tmp_path: pathlib.Path) -> None:
92	_init_repo(tmp_path)
93	_create_files(tmp_path, 1)
94	data = _create_snapshot(tmp_path)
95	assert "duration_ms" in data
96	assert isinstance(data["duration_ms"], (int, float))
97	assert data["duration_ms"] >= 0
98
99	def test_create_has_exit_code_zero(self, tmp_path: pathlib.Path) -> None:
100	_init_repo(tmp_path)
101	_create_files(tmp_path, 1)
102	data = _create_snapshot(tmp_path)
103	assert data["exit_code"] == 0
104
105	def test_list_has_duration_ms(self, tmp_path: pathlib.Path) -> None:
106	_init_repo(tmp_path)
107	_create_files(tmp_path, 1)
108	_create_snapshot(tmp_path)
109	result = runner.invoke(cli, ["snapshot", "list", "--json"], env=_env(tmp_path))
110	assert result.exit_code == 0
111	data = json.loads(result.output)
112	assert "duration_ms" in data
113	assert isinstance(data["duration_ms"], (int, float))
114	assert data["duration_ms"] >= 0
115
116	def test_list_has_exit_code_zero(self, tmp_path: pathlib.Path) -> None:
117	_init_repo(tmp_path)
118	_create_files(tmp_path, 1)
119	_create_snapshot(tmp_path)
120	result = runner.invoke(cli, ["snapshot", "list", "--json"], env=_env(tmp_path))
121	data = json.loads(result.output)
122	assert data["exit_code"] == 0
123
124	def test_list_empty_has_envelope(self, tmp_path: pathlib.Path) -> None:
125	_init_repo(tmp_path)
126	result = runner.invoke(cli, ["snapshot", "list", "--json"], env=_env(tmp_path))
127	assert result.exit_code == 0
128	data = json.loads(result.output)
129	assert data["snapshots"] == []
130	assert "duration_ms" in data
131	assert data["exit_code"] == 0
132
133	def test_read_has_duration_ms(self, tmp_path: pathlib.Path) -> None:
134	_init_repo(tmp_path)
135	_create_files(tmp_path, 1)
136	created = _create_snapshot(tmp_path)
137	snap_id = created["snapshot_id"]
138	result = runner.invoke(cli, ["snapshot", "read", snap_id, "--json"], env=_env(tmp_path))
139	assert result.exit_code == 0
140	data = json.loads(result.output)
141	assert "duration_ms" in data
142	assert isinstance(data["duration_ms"], (int, float))
143	assert data["duration_ms"] >= 0
144
145	def test_read_has_exit_code_zero(self, tmp_path: pathlib.Path) -> None:
146	_init_repo(tmp_path)
147	_create_files(tmp_path, 1)
148	created = _create_snapshot(tmp_path)
149	snap_id = created["snapshot_id"]
150	result = runner.invoke(cli, ["snapshot", "read", snap_id, "--json"], env=_env(tmp_path))
151	data = json.loads(result.output)
152	assert data["exit_code"] == 0
153
154	def test_export_has_duration_ms(self, tmp_path: pathlib.Path) -> None:
155	_init_repo(tmp_path)
156	_create_files(tmp_path, 1)
157	created = _create_snapshot(tmp_path)
158	snap_id = created["snapshot_id"]
159	out = tmp_path / "out.tar.gz"
160	result = runner.invoke(
161	cli,
162	["snapshot", "export", snap_id, "--output", str(out), "--json"],
163	env=_env(tmp_path),
164	)
165	assert result.exit_code == 0
166	data = json.loads(result.output)
167	assert "duration_ms" in data
168	assert isinstance(data["duration_ms"], (int, float))
169	assert data["duration_ms"] >= 0
170
171	def test_export_has_exit_code_zero(self, tmp_path: pathlib.Path) -> None:
172	_init_repo(tmp_path)
173	_create_files(tmp_path, 1)
174	created = _create_snapshot(tmp_path)
175	snap_id = created["snapshot_id"]
176	out = tmp_path / "out.tar.gz"
177	result = runner.invoke(
178	cli,
179	["snapshot", "export", snap_id, "--output", str(out), "--json"],
180	env=_env(tmp_path),
181	)
182	data = json.loads(result.output)
183	assert data["exit_code"] == 0
184
185
186	# ---------------------------------------------------------------------------
187	# JSON schema completeness
188	# ---------------------------------------------------------------------------
189
190
191	class TestJsonSchemaCompleteness:
192	"""All documented fields must be present with correct types."""
193
194	def test_create_schema(self, tmp_path: pathlib.Path) -> None:
195	_init_repo(tmp_path)
196	_create_files(tmp_path, 2)
197	data = _create_snapshot(tmp_path, note="schema-test")
198	assert isinstance(data["repo_id"], str)
199	assert isinstance(data["snapshot_id"], str)
200	assert data["snapshot_id"].startswith("sha256:")
201	assert isinstance(data["file_count"], int)
202	assert data["file_count"] >= 1
203	assert isinstance(data["note"], str)
204	assert data["note"] == "schema-test"
205	assert isinstance(data["created_at"], str)
206	# ISO-8601: basic sanity check
207	assert "T" in data["created_at"] or "-" in data["created_at"]
208	assert isinstance(data["duration_ms"], (int, float))
209	assert isinstance(data["exit_code"], int)
210
211	def test_list_schema(self, tmp_path: pathlib.Path) -> None:
212	_init_repo(tmp_path)
213	_create_files(tmp_path, 2)
214	_create_snapshot(tmp_path, note="list-schema")
215	result = runner.invoke(cli, ["snapshot", "list", "--json"], env=_env(tmp_path))
216	assert result.exit_code == 0
217	data = json.loads(result.output)
218	assert "snapshots" in data
219	assert isinstance(data["snapshots"], list)
220	assert "duration_ms" in data
221	assert "exit_code" in data
222	item = data["snapshots"][0]
223	assert isinstance(item["snapshot_id"], str)
224	assert item["snapshot_id"].startswith("sha256:")
225	assert isinstance(item["file_count"], int)
226	assert isinstance(item["note"], str)
227	assert isinstance(item["created_at"], str)
228
229	def test_read_schema(self, tmp_path: pathlib.Path) -> None:
230	_init_repo(tmp_path)
231	_create_files(tmp_path, 2)
232	created = _create_snapshot(tmp_path, note="read-schema")
233	snap_id = created["snapshot_id"]
234	result = runner.invoke(cli, ["snapshot", "read", snap_id, "--json"], env=_env(tmp_path))
235	assert result.exit_code == 0
236	data = json.loads(result.output)
237	assert isinstance(data["snapshot_id"], str)
238	assert data["snapshot_id"].startswith("sha256:")
239	assert isinstance(data["created_at"], str)
240	assert isinstance(data["file_count"], int)
241	assert isinstance(data["note"], str)
242	assert isinstance(data["manifest"], dict)
243	assert len(data["manifest"]) == data["file_count"]
244	assert isinstance(data["duration_ms"], (int, float))
245	assert isinstance(data["exit_code"], int)
246
247	def test_export_schema(self, tmp_path: pathlib.Path) -> None:
248	_init_repo(tmp_path)
249	_create_files(tmp_path, 2)
250	created = _create_snapshot(tmp_path)
251	snap_id = created["snapshot_id"]
252	out = tmp_path / "schema.tar.gz"
253	result = runner.invoke(
254	cli,
255	["snapshot", "export", snap_id, "--output", str(out), "--json"],
256	env=_env(tmp_path),
257	)
258	assert result.exit_code == 0
259	data = json.loads(result.output)
260	assert isinstance(data["snapshot_id"], str)
261	assert isinstance(data["output"], str)
262	assert data["format"] in ("tar.gz", "zip")
263	assert isinstance(data["file_count"], int)
264	assert isinstance(data["size_bytes"], int)
265	assert data["size_bytes"] > 0
266	assert isinstance(data["duration_ms"], (int, float))
267	assert isinstance(data["exit_code"], int)
268
269	def test_manifest_keys_are_sorted(self, tmp_path: pathlib.Path) -> None:
270	_init_repo(tmp_path)
271	# Create files in reverse alpha order to verify manifest sorts them.
272	for name in ("zzz.txt", "aaa.txt", "mmm.txt"):
273	(tmp_path / name).write_text(name, encoding="utf-8")
274	created = _create_snapshot(tmp_path)
275	snap_id = created["snapshot_id"]
276	result = runner.invoke(cli, ["snapshot", "read", snap_id, "--json"], env=_env(tmp_path))
277	data = json.loads(result.output)
278	keys = list(data["manifest"].keys())
279	assert keys == sorted(keys)
280
281
282	# ---------------------------------------------------------------------------
283	# Bug regression — sha256: prefix round-trip
284	# ---------------------------------------------------------------------------
285
286
287	class TestSha256PrefixRoundTrip:
288	"""Regression for the bare-hex-stem bug: _list_all_snapshots and
289	_resolve_snapshot were passing path.stem (bare hex) to read_snapshot,
290	which then compared it against compute_snapshot_id output (sha256: prefixed),
291	causing every snapshot to fail content-hash verification and appear missing."""
292
293	def test_list_after_create_returns_snapshot(self, tmp_path: pathlib.Path) -> None:
294	_init_repo(tmp_path)
295	_create_files(tmp_path, 2)
296	created = _create_snapshot(tmp_path)
297	result = runner.invoke(cli, ["snapshot", "list", "--json"], env=_env(tmp_path))
298	assert result.exit_code == 0
299	data = json.loads(result.output)
300	ids = [s["snapshot_id"] for s in data["snapshots"]]
301	assert created["snapshot_id"] in ids
302
303	def test_read_by_full_id_succeeds(self, tmp_path: pathlib.Path) -> None:
304	_init_repo(tmp_path)
305	_create_files(tmp_path, 1)
306	created = _create_snapshot(tmp_path)
307	snap_id = created["snapshot_id"]
308	result = runner.invoke(cli, ["snapshot", "read", snap_id], env=_env(tmp_path))
309	assert result.exit_code == 0
310
311	def test_bare_hex_prefix_rejected(self, tmp_path: pathlib.Path) -> None:
312	"""Bare hex prefix (no sha256: type tag) must be rejected at the CLI boundary."""
313	_init_repo(tmp_path)
314	_create_files(tmp_path, 1)
315	created = _create_snapshot(tmp_path)
316	snap_id = created["snapshot_id"]
317	result = runner.invoke(cli, ["snapshot", "read", short_id(snap_id, strip=True)], env=_env(tmp_path))
318	assert result.exit_code != 0
319
320	def test_read_by_sha256_prefix_succeeds(self, tmp_path: pathlib.Path) -> None:
321	"""Full sha256:... ID passed to snapshot read must resolve."""
322	_init_repo(tmp_path)
323	_create_files(tmp_path, 1)
324	created = _create_snapshot(tmp_path)
325	snap_id = created["snapshot_id"]
326	result = runner.invoke(cli, ["snapshot", "read", snap_id, "--json"], env=_env(tmp_path))
327	assert result.exit_code == 0
328	data = json.loads(result.output)
329	assert data["snapshot_id"] == snap_id
330
331	def test_snapshot_id_in_read_matches_create(self, tmp_path: pathlib.Path) -> None:
332	_init_repo(tmp_path)
333	_create_files(tmp_path, 2)
334	created = _create_snapshot(tmp_path)
335	result = runner.invoke(cli, ["snapshot", "read", created["snapshot_id"], "--json"], env=_env(tmp_path))
336	data = json.loads(result.output)
337	assert data["snapshot_id"] == created["snapshot_id"]
338
339
340	# ---------------------------------------------------------------------------
341	# Data integrity — create → export → verify content
342	# ---------------------------------------------------------------------------
343
344
345	class TestDataIntegrity:
346	"""File contents written to archives must match the original source files."""
347
348	def test_tar_gz_content_matches_source(self, tmp_path: pathlib.Path) -> None:
349	_init_repo(tmp_path)
350	names = _create_files(tmp_path, 3)
351	created = _create_snapshot(tmp_path)
352	snap_id = created["snapshot_id"]
353	out = tmp_path / "integrity.tar.gz"
354	runner.invoke(
355	cli,
356	["snapshot", "export", snap_id, "--output", str(out)],
357	env=_env(tmp_path),
358	)
359	assert out.exists()
360	with tarfile.open(out, "r:gz") as tar:
361	members = {m.name: m for m in tar.getmembers()}
362	for name in names:
363	match = [k for k in members if k.endswith(name)]
364	assert match, f"{name} not found in archive"
365	content = tar.extractfile(members[match[0]])
366	assert content is not None
367	extracted = content.read().decode("utf-8")
368	expected = (tmp_path / name).read_text(encoding="utf-8")
369	assert extracted == expected, f"content mismatch for {name}"
370
371	def test_zip_content_matches_source(self, tmp_path: pathlib.Path) -> None:
372	_init_repo(tmp_path)
373	names = _create_files(tmp_path, 3)
374	created = _create_snapshot(tmp_path)
375	snap_id = created["snapshot_id"]
376	out = tmp_path / "integrity.zip"
377	runner.invoke(
378	cli,
379	["snapshot", "export", snap_id, "--format", "zip", "--output", str(out)],
380	env=_env(tmp_path),
381	)
382	assert out.exists()
383	with zipfile.ZipFile(out, "r") as zf:
384	namelist = zf.namelist()
385	for name in names:
386	match = [k for k in namelist if k.endswith(name)]
387	assert match, f"{name} not found in zip"
388	extracted = zf.read(match[0]).decode("utf-8")
389	expected = (tmp_path / name).read_text(encoding="utf-8")
390	assert extracted == expected, f"content mismatch for {name}"
391
392	def test_export_file_count_matches_snapshot(self, tmp_path: pathlib.Path) -> None:
393	_init_repo(tmp_path)
394	_create_files(tmp_path, 4)
395	created = _create_snapshot(tmp_path)
396	snap_id = created["snapshot_id"]
397	out = tmp_path / "count.tar.gz"
398	result = runner.invoke(
399	cli,
400	["snapshot", "export", snap_id, "--output", str(out), "--json"],
401	env=_env(tmp_path),
402	)
403	assert result.exit_code == 0
404	data = json.loads(result.output)
405	assert data["file_count"] == created["file_count"]
406
407	def test_export_size_bytes_matches_disk(self, tmp_path: pathlib.Path) -> None:
408	_init_repo(tmp_path)
409	_create_files(tmp_path, 2)
410	created = _create_snapshot(tmp_path)
411	snap_id = created["snapshot_id"]
412	out = tmp_path / "size.tar.gz"
413	result = runner.invoke(
414	cli,
415	["snapshot", "export", snap_id, "--output", str(out), "--json"],
416	env=_env(tmp_path),
417	)
418	data = json.loads(result.output)
419	assert data["size_bytes"] == out.stat().st_size
420
421
422	# ---------------------------------------------------------------------------
423	# Security
424	# ---------------------------------------------------------------------------
425
426
427	class TestSecurity:
428	"""Security properties of snapshot commands."""
429
430	def test_ansi_escape_in_note_sanitized_in_text_output(self, tmp_path: pathlib.Path) -> None:
431	"""ANSI escape sequences in notes must not reach the terminal raw."""
432	_init_repo(tmp_path)
433	_create_files(tmp_path, 1)
434	malicious_note = "\x1b[31mred\x1b[0m"
435	result = runner.invoke(
436	cli, ["snapshot", "create", "-m", malicious_note], env=_env(tmp_path)
437	)
438	assert result.exit_code == 0
439	# ANSI escape character should not appear verbatim in text output.
440	assert "\x1b" not in result.output
441
442	def test_note_appears_sanitized_in_list_text(self, tmp_path: pathlib.Path) -> None:
443	_init_repo(tmp_path)
444	_create_files(tmp_path, 1)
445	malicious_note = "\x1b[1mBOLD\x1b[0m"
446	_create_snapshot(tmp_path, note=malicious_note)
447	result = runner.invoke(cli, ["snapshot", "list"], env=_env(tmp_path))
448	assert result.exit_code == 0
449	assert "\x1b" not in result.output
450
451	def test_symlink_in_objects_dir_is_skipped(self, tmp_path: pathlib.Path) -> None:
452	"""A symlink inside .muse/objects/ must not be read as a snapshot."""
453	from muse.core.paths import objects_dir
454	_init_repo(tmp_path)
455	_create_files(tmp_path, 1)
456	created = _create_snapshot(tmp_path)
457	objs_dir = objects_dir(tmp_path)
458	# Plant a symlink in the object store pointing to an unrelated file.
459	target = tmp_path / "some_file.txt"
460	target.write_bytes(b"payload")
461	shard_dir = objs_dir / "sha256" / "de"
462	shard_dir.mkdir(parents=True, exist_ok=True)
463	fake_name = "ad" + "0" * 60
464	link = shard_dir / fake_name
465	try:
466	link.symlink_to(target)
467	except (OSError, NotImplementedError):
468	pytest.skip("symlinks not supported on this platform")
469	result = runner.invoke(cli, ["snapshot", "list", "--json"], env=_env(tmp_path))
470	assert result.exit_code == 0
471	data = json.loads(result.output)
472	# Only the legitimately created snapshot should appear.
473	ids = [s["snapshot_id"] for s in data["snapshots"]]
474	assert len(ids) == 1
475	assert ids[0] == created["snapshot_id"]
476
477	def test_path_traversal_in_snapshot_id_prefix_is_safe(self, tmp_path: pathlib.Path) -> None:
478	"""A crafted snapshot_id with ../ must not escape the snapshots dir."""
479	_init_repo(tmp_path)
480	result = runner.invoke(
481	cli,
482	["snapshot", "read", "../../etc/passwd"],
483	env=_env(tmp_path),
484	)
485	# Must fail gracefully — not crash, not read /etc/passwd.
486	assert result.exit_code != 0
487
488	def test_safe_arcname_rejects_dotdot_path(self, tmp_path: pathlib.Path) -> None:
489	"""_safe_arcname must return None for paths with .. segments."""
490	from muse.cli.commands.snapshot_cmd import _safe_arcname
491
492	assert _safe_arcname("", "../etc/passwd") is None
493	assert _safe_arcname("prefix", "../../secret") is None
494
495	def test_safe_arcname_rejects_absolute_path(self, tmp_path: pathlib.Path) -> None:
496	from muse.cli.commands.snapshot_cmd import _safe_arcname
497
498	assert _safe_arcname("", "/etc/passwd") is None
499	assert _safe_arcname("prefix", "/root/.ssh/id_rsa") is None
500
501	def test_safe_arcname_accepts_normal_path(self, tmp_path: pathlib.Path) -> None:
502	from muse.cli.commands.snapshot_cmd import _safe_arcname
503
504	assert _safe_arcname("", "src/main.py") == "src/main.py"
505	assert _safe_arcname("myproject", "lib/util.py") == "myproject/lib/util.py"
506
507	def test_safe_arcname_rejects_dotdot_in_prefix(self) -> None:
508	from muse.cli.commands.snapshot_cmd import _safe_arcname
509
510	assert _safe_arcname("../escape", "file.txt") is None
511
512
513	# ---------------------------------------------------------------------------
514	# Text mode — snapshot read --text
515	# ---------------------------------------------------------------------------
516
517
518	class TestTextMode:
519	def test_read_text_shows_snapshot_id(self, tmp_path: pathlib.Path) -> None:
520	_init_repo(tmp_path)
521	_create_files(tmp_path, 2)
522	created = _create_snapshot(tmp_path)
523	snap_id = created["snapshot_id"]
524	result = runner.invoke(
525	cli, ["snapshot", "read", snap_id], env=_env(tmp_path)
526	)
527	assert result.exit_code == 0
528	assert "snapshot_id" in result.output
529	assert snap_id in result.output
530
531	def test_read_text_shows_file_list(self, tmp_path: pathlib.Path) -> None:
532	_init_repo(tmp_path)
533	_create_files(tmp_path, 2)
534	created = _create_snapshot(tmp_path)
535	snap_id = created["snapshot_id"]
536	result = runner.invoke(
537	cli, ["snapshot", "read", snap_id], env=_env(tmp_path)
538	)
539	assert result.exit_code == 0
540	assert "file" in result.output.lower() or "files" in result.output.lower()
541
542	def test_read_text_shows_note_when_set(self, tmp_path: pathlib.Path) -> None:
543	_init_repo(tmp_path)
544	_create_files(tmp_path, 1)
545	created = _create_snapshot(tmp_path, note="my-label")
546	snap_id = created["snapshot_id"]
547	result = runner.invoke(
548	cli, ["snapshot", "read", snap_id], env=_env(tmp_path)
549	)
550	assert result.exit_code == 0
551	assert "my-label" in result.output
552
553	def test_read_text_is_not_valid_json(self, tmp_path: pathlib.Path) -> None:
554	"""--text output must not be machine-parseable JSON."""
555	_init_repo(tmp_path)
556	_create_files(tmp_path, 1)
557	created = _create_snapshot(tmp_path)
558	snap_id = created["snapshot_id"]
559	result = runner.invoke(
560	cli, ["snapshot", "read", snap_id], env=_env(tmp_path)
561	)
562	assert result.exit_code == 0
563	with pytest.raises((json.JSONDecodeError, ValueError)):
564	json.loads(result.output)
565
566
567	# ---------------------------------------------------------------------------
568	# --prefix export
569	# ---------------------------------------------------------------------------
570
571
572	class TestPrefixExport:
573	def test_tar_gz_files_nested_under_prefix(self, tmp_path: pathlib.Path) -> None:
574	_init_repo(tmp_path)
575	_create_files(tmp_path, 2)
576	created = _create_snapshot(tmp_path)
577	snap_id = created["snapshot_id"]
578	out = tmp_path / "prefixed.tar.gz"
579	runner.invoke(
580	cli,
581	["snapshot", "export", snap_id, "--prefix", "myproject", "--output", str(out)],
582	env=_env(tmp_path),
583	)
584	assert out.exists()
585	with tarfile.open(out, "r:gz") as tar:
586	names = tar.getnames()
587	assert all(n.startswith("myproject/") for n in names), names
588
589	def test_zip_files_nested_under_prefix(self, tmp_path: pathlib.Path) -> None:
590	_init_repo(tmp_path)
591	_create_files(tmp_path, 2)
592	created = _create_snapshot(tmp_path)
593	snap_id = created["snapshot_id"]
594	out = tmp_path / "prefixed.zip"
595	runner.invoke(
596	cli,
597	[
598	"snapshot", "export", snap_id,
599	"--format", "zip",
600	"--prefix", "release",
601	"--output", str(out),
602	],
603	env=_env(tmp_path),
604	)
605	assert out.exists()
606	with zipfile.ZipFile(out, "r") as zf:
607	names = zf.namelist()
608	assert all(n.startswith("release/") for n in names), names
609
610	def test_empty_prefix_uses_flat_layout(self, tmp_path: pathlib.Path) -> None:
611	_init_repo(tmp_path)
612	_create_files(tmp_path, 2)
613	created = _create_snapshot(tmp_path)
614	snap_id = created["snapshot_id"]
615	out = tmp_path / "flat.tar.gz"
616	runner.invoke(
617	cli,
618	["snapshot", "export", snap_id, "--prefix", "", "--output", str(out)],
619	env=_env(tmp_path),
620	)
621	assert out.exists()
622	with tarfile.open(out, "r:gz") as tar:
623	names = tar.getnames()
624	assert all(not n.startswith("/") for n in names)
625
626
627	# ---------------------------------------------------------------------------
628	# Limit validation
629	# ---------------------------------------------------------------------------
630
631
632	class TestLimitValidation:
633	def test_limit_zero_rejected(self, tmp_path: pathlib.Path) -> None:
634	_init_repo(tmp_path)
635	result = runner.invoke(
636	cli, ["snapshot", "list", "--limit", "0"], env=_env(tmp_path)
637	)
638	assert result.exit_code != 0
639
640	def test_limit_one_returns_at_most_one(self, tmp_path: pathlib.Path) -> None:
641	_init_repo(tmp_path)
642	_create_files(tmp_path, 1)
643	for _ in range(3):
644	_create_snapshot(tmp_path)
645	result = runner.invoke(
646	cli, ["snapshot", "list", "--limit", "1", "--json"], env=_env(tmp_path)
647	)
648	assert result.exit_code == 0
649	data = json.loads(result.output)
650	assert len(data["snapshots"]) <= 1
651
652	def test_negative_limit_rejected(self, tmp_path: pathlib.Path) -> None:
653	_init_repo(tmp_path)
654	result = runner.invoke(
655	cli, ["snapshot", "list", "--limit", "-1"], env=_env(tmp_path)
656	)
657	assert result.exit_code != 0
658
659	def test_short_flag_n_respected(self, tmp_path: pathlib.Path) -> None:
660	_init_repo(tmp_path)
661	_create_files(tmp_path, 1)
662	for _ in range(4):
663	_create_snapshot(tmp_path)
664	result = runner.invoke(
665	cli, ["snapshot", "list", "--limit", "2", "--json"], env=_env(tmp_path)
666	)
667	assert result.exit_code == 0
668	data = json.loads(result.output)
669	assert len(data["snapshots"]) <= 2
670
671
672	# ---------------------------------------------------------------------------
673	# Idempotency — same tree → same snapshot_id
674	# ---------------------------------------------------------------------------
675
676
677	class TestIdempotency:
678	def test_same_files_same_snapshot_id(self, tmp_path: pathlib.Path) -> None:
679	_init_repo(tmp_path)
680	_create_files(tmp_path, 3)
681	first = _create_snapshot(tmp_path)
682	second = _create_snapshot(tmp_path)
683	assert first["snapshot_id"] == second["snapshot_id"]
684
685	def test_different_content_different_snapshot_id(self, tmp_path: pathlib.Path) -> None:
686	_init_repo(tmp_path)
687	_create_files(tmp_path, 2)
688	first = _create_snapshot(tmp_path)
689	# Modify a file.
690	(tmp_path / "file_0.txt").write_text("changed-content", encoding="utf-8")
691	second = _create_snapshot(tmp_path)
692	assert first["snapshot_id"] != second["snapshot_id"]
693
694	def test_list_shows_only_one_when_idempotent(self, tmp_path: pathlib.Path) -> None:
695	"""write_snapshot is idempotent — same ID written twice → one file."""
696	_init_repo(tmp_path)
697	_create_files(tmp_path, 2)
698	_create_snapshot(tmp_path)
699	_create_snapshot(tmp_path)
700	result = runner.invoke(cli, ["snapshot", "list", "--json"], env=_env(tmp_path))
701	data = json.loads(result.output)
702	# De-duplicate by snapshot_id.
703	ids = {s["snapshot_id"] for s in data["snapshots"]}
704	assert len(ids) == 1
705
706
707	# ---------------------------------------------------------------------------
708	# List ordering — newest first
709	# ---------------------------------------------------------------------------
710
711
712	class TestListOrdering:
713	def test_list_newest_first(self, tmp_path: pathlib.Path) -> None:
714	"""Multiple distinct snapshots must be returned newest-first."""
715	_init_repo(tmp_path)
716	snap_ids: list[str] = []
717	for i in range(3):
718	(tmp_path / f"round_{i}.txt").write_text(f"v{i}", encoding="utf-8")
719	created = _create_snapshot(tmp_path)
720	snap_ids.append(created["snapshot_id"])
721	result = runner.invoke(cli, ["snapshot", "list", "--json"], env=_env(tmp_path))
722	data = json.loads(result.output)
723	returned = [s["snapshot_id"] for s in data["snapshots"]]
724	# Newest (last created) must appear first.
725	assert returned[0] == snap_ids[-1]
726
727
728	# ---------------------------------------------------------------------------
729	# Concurrent stress
730	# ---------------------------------------------------------------------------
731
732
733	class TestConcurrentStress:
734	def test_concurrent_creates_all_succeed(self, tmp_path: pathlib.Path) -> None:
735	"""N threads creating snapshots concurrently must all succeed."""
736	_init_repo(tmp_path)
737	_create_files(tmp_path, 5)
738	n_threads = 8
739	errors: list[str] = []
740	results: list[dict] = []
741	lock = threading.Lock()
742
743	def _do_create() -> None:
744	result = runner.invoke(
745	cli, ["snapshot", "create", "--json"], env=_env(tmp_path)
746	)
747	with lock:
748	if result.exit_code != 0:
749	errors.append(result.output)
750	else:
751	results.append(json.loads(result.output))
752
753	threads = [threading.Thread(target=_do_create) for _ in range(n_threads)]
754	for t in threads:
755	t.start()
756	for t in threads:
757	t.join()
758
759	assert not errors, f"Some creates failed: {errors}"
760	assert len(results) == n_threads
761	# All results have a valid snapshot_id.
762	for r in results:
763	assert r["snapshot_id"].startswith("sha256:")
764	assert r["exit_code"] == 0
765
766
767	# ---------------------------------------------------------------------------
768	# Large file stress
769	# ---------------------------------------------------------------------------
770
771
772	class TestLargeFileExport:
773	def test_large_file_round_trips_correctly(self, tmp_path: pathlib.Path) -> None:
774	"""A 5 MiB file must survive create → export → extract unchanged."""
775	_init_repo(tmp_path)
776	payload = os.urandom(5 * 1024 * 1024)
777	(tmp_path / "big.bin").write_bytes(payload)
778	created = _create_snapshot(tmp_path)
779	snap_id = created["snapshot_id"]
780	out = tmp_path / "big.tar.gz"
781	result = runner.invoke(
782	cli,
783	["snapshot", "export", snap_id, "--output", str(out), "--json"],
784	env=_env(tmp_path),
785	)
786	assert result.exit_code == 0
787	data = json.loads(result.output)
788	assert data["file_count"] >= 1
789	assert data["size_bytes"] > 0
790	assert out.exists()
791	# Verify archive actually opens.
792	assert tarfile.is_tarfile(str(out))
793	with tarfile.open(out, "r:gz") as tar:
794	members = [m for m in tar.getmembers() if m.name.endswith("big.bin")]
795	assert members, "big.bin not found in archive"
796	content = tar.extractfile(members[0])
797	assert content is not None
798	assert content.read() == payload
799
800
801	# ---------------------------------------------------------------------------
802	# Export to default filename
803	# ---------------------------------------------------------------------------
804
805
806	class TestDefaultFilename:
807	def test_export_default_filename_is_short_id_dot_format(self, tmp_path: pathlib.Path) -> None:
808	"""When --output is omitted, the archive uses <short_id>.<fmt>."""
809	_init_repo(tmp_path)
810	_create_files(tmp_path, 1)
811	created = _create_snapshot(tmp_path)
812	snap_id = created["snapshot_id"]
813	# Run from tmp_path so the default output lands there.
814	orig_dir = pathlib.Path.cwd()
815	os.chdir(tmp_path)
816	try:
817	result = runner.invoke(
818	cli, ["snapshot", "export", snap_id, "--json"], env=_env(tmp_path)
819	)
820	finally:
821	os.chdir(orig_dir)
822	assert result.exit_code == 0
823	data = json.loads(result.output)
824	assert data["output"].endswith(".tar.gz")
825	assert pathlib.Path(tmp_path / data["output"]).exists() or pathlib.Path(data["output"]).exists()
826
827	def test_export_not_found_exits_nonzero(self, tmp_path: pathlib.Path) -> None:
828	_init_repo(tmp_path)
829	result = runner.invoke(
830	cli, ["snapshot", "export", "nonexistent"], env=_env(tmp_path)
831	)
832	assert result.exit_code != 0

File History 1 commit

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 6 days ago

function _init_repo

function _env

function _create_files

function _create_snapshot

class TestJsonEnvelope

function test_create_has_duration_ms

function test_create_has_exit_code_zero

function test_list_has_duration_ms

function test_list_has_exit_code_zero

function test_list_empty_has_envelope

function test_read_has_duration_ms

function test_read_has_exit_code_zero

function test_export_has_duration_ms

function test_export_has_exit_code_zero

class TestJsonSchemaCompleteness

function test_create_schema

function test_list_schema

function test_read_schema

function test_export_schema

function test_manifest_keys_are_sorted

class TestSha256PrefixRoundTrip

function test_list_after_create_returns_snapshot

function test_read_by_full_id_succeeds

function test_bare_hex_prefix_rejected

function test_read_by_sha256_prefix_succeeds

function test_snapshot_id_in_read_matches_create

class TestDataIntegrity

function test_tar_gz_content_matches_source

function test_zip_content_matches_source

function test_export_file_count_matches_snapshot

function test_export_size_bytes_matches_disk

class TestSecurity

function test_ansi_escape_in_note_sanitized_in_text_output

function test_note_appears_sanitized_in_list_text

function test_symlink_in_objects_dir_is_skipped

function test_path_traversal_in_snapshot_id_prefix_is_safe

function test_safe_arcname_rejects_dotdot_path

function test_safe_arcname_rejects_absolute_path

function test_safe_arcname_accepts_normal_path

function test_safe_arcname_rejects_dotdot_in_prefix

class TestTextMode

function test_read_text_shows_snapshot_id

function test_read_text_shows_file_list

function test_read_text_shows_note_when_set

function test_read_text_is_not_valid_json

class TestPrefixExport

function test_tar_gz_files_nested_under_prefix

function test_zip_files_nested_under_prefix

function test_empty_prefix_uses_flat_layout

class TestLimitValidation

function test_limit_zero_rejected

function test_limit_one_returns_at_most_one

function test_negative_limit_rejected

function test_short_flag_n_respected

class TestIdempotency

function test_same_files_same_snapshot_id

function test_different_content_different_snapshot_id

function test_list_shows_only_one_when_idempotent

class TestListOrdering

function test_list_newest_first

class TestConcurrentStress

function test_concurrent_creates_all_succeed

function _do_create

class TestLargeFileExport

function test_large_file_round_trips_correctly

class TestDefaultFilename

function test_export_default_filename_is_short_id_dot_format

function test_export_not_found_exits_nonzero

Pathtests/test_snapshot_supercharge.py

Lines832

Size33.2 KB

LangPython

Refsha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b

Object ID

sha256:68da4bd0d6e796ccafa1f083e534d70c53fc653d4615dad3998a737db122b9ca…

Last commit

sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b

fix: try fetch/presign before fetch/mpack to avoi…

6 days ago

Quick links

Blame History