gabriel / muse public
test_archive_command.py python
842 lines 33.3 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 22 days ago
1 """Tests for ``muse archive`` — snapshot export command.
2
3 Tiers
4 -----
5 1. Unit — ``_safe_arcname`` and ``_build_entries`` in isolation.
6 2. Integration — store round-trip: write commit/snapshot, build archive, verify contents.
7 3. End-to-End — full CLI invocations via CliRunner.
8 4. Security — zip-slip, tar-slip, null bytes, ``..`` traversal, unsafe prefixes.
9 5. Stress — large manifests, many files, names at path limits.
10 6. Performance — timing assertions on archive creation and list mode.
11 7. Data Integrity — archive contents match snapshot manifest exactly; JSON schema complete.
12 """
13
14 from __future__ import annotations
15
16 import datetime
17 import json
18 import pathlib
19 import tarfile
20 import time
21 import zipfile
22
23 import pytest
24 from tests.cli_test_helper import CliRunner
25
26 cli = None # argparse migration — CliRunner ignores this arg
27
28 from muse.cli.commands.archive import (
29 _FORMAT_CHOICES,
30 _build_entries,
31 _build_tar,
32 _build_zip,
33 _safe_arcname,
34 )
35 from muse.core.object_store import write_object
36 from muse.core.ids import hash_commit, hash_snapshot
37 from muse.core.commits import (
38 CommitRecord,
39 write_commit,
40 )
41 from muse.core.snapshots import (
42 SnapshotRecord,
43 write_snapshot,
44 )
45 from muse.core.types import blob_id, long_id, short_id, split_id, fake_id
46 from muse.core.paths import heads_dir, muse_dir
47
48 runner = CliRunner()
49
50
51 # ---------------------------------------------------------------------------
52 # Fixtures
53 # ---------------------------------------------------------------------------
54
55
56 @pytest.fixture
57 def repo(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch) -> pathlib.Path:
58 """Minimal Muse repo chdir'd into tmp_path."""
59 monkeypatch.chdir(tmp_path)
60 dot_muse = muse_dir(tmp_path)
61 dot_muse.mkdir()
62 (dot_muse / "repo.json").write_text('{"repo_id":"test-repo"}')
63 (dot_muse / "HEAD").write_text("ref: refs/heads/main")
64 (dot_muse / "commits").mkdir()
65 (dot_muse / "snapshots").mkdir()
66 (dot_muse / "refs" / "heads").mkdir(parents=True)
67 (dot_muse / "objects").mkdir()
68 return tmp_path
69
70
71 def _make_commit(
72 root: pathlib.Path,
73 files: dict[str, bytes],
74 message: str = "test commit",
75 ) -> CommitRecord:
76 """Write objects, a snapshot, and a commit; update the branch ref.
77
78 Args:
79 root: Repository root.
80 files: Mapping of relative path → raw file bytes.
81 message: Commit message.
82
83 Returns:
84 The written ``CommitRecord``.
85 """
86 manifest: dict[str, str] = {}
87 for rel_path, content in files.items():
88 oid = blob_id(content)
89 write_object(root, oid, content)
90 manifest[rel_path] = oid
91
92 snap_id = hash_snapshot(manifest)
93 snap = SnapshotRecord(
94 snapshot_id=snap_id,
95 manifest=manifest,
96 directories=[],
97 created_at=datetime.datetime(2026, 3, 1, tzinfo=datetime.timezone.utc),
98 note="",
99 )
100 write_snapshot(root, snap)
101
102 committed_at = datetime.datetime(2026, 3, 1, tzinfo=datetime.timezone.utc)
103 cid = hash_commit(
104 parent_ids=[],
105 snapshot_id=snap_id,
106 message=message,
107 committed_at_iso=committed_at.isoformat(),
108 author="test-author",
109 )
110 record = CommitRecord(
111 commit_id=cid,
112 branch="main",
113 snapshot_id=snap_id,
114 message=message,
115 committed_at=committed_at,
116 author="test-author",
117 agent_id="test-agent",
118 model_id="test-model",
119 )
120 write_commit(root, record)
121 (heads_dir(root) / "main").write_text(cid)
122 return record
123
124
125 # ===========================================================================
126 # 1. Unit tests — _safe_arcname and _build_entries
127 # ===========================================================================
128
129
130 class TestSafeArcname:
131 def test_simple_path_no_prefix(self) -> None:
132 assert _safe_arcname("", "src/main.py") == "src/main.py"
133
134 def test_simple_path_with_prefix(self) -> None:
135 assert _safe_arcname("myproject", "src/main.py") == "myproject/src/main.py"
136
137 def test_prefix_trailing_slash_stripped(self) -> None:
138 assert _safe_arcname("myproject/", "a.py") == "myproject/a.py"
139
140 def test_empty_rel_path_returns_none(self) -> None:
141 assert _safe_arcname("", "") is None
142
143 def test_dot_rel_path_returns_none(self) -> None:
144 # PurePosixPath("") → "." — should be rejected
145 assert _safe_arcname("", ".") is None
146
147 def test_absolute_rel_path_returns_none(self) -> None:
148 assert _safe_arcname("", "/etc/passwd") is None
149
150 def test_dotdot_in_rel_path_returns_none(self) -> None:
151 assert _safe_arcname("", "../../etc/passwd") is None
152
153 def test_dotdot_component_in_rel_path_returns_none(self) -> None:
154 assert _safe_arcname("", "src/../../../etc/passwd") is None
155
156 def test_dotdot_in_prefix_returns_none(self) -> None:
157 assert _safe_arcname("../traversal", "a.py") is None
158
159 def test_null_byte_in_rel_path_returns_none(self) -> None:
160 assert _safe_arcname("", "a\x00b.py") is None
161
162 def test_null_byte_in_prefix_returns_none(self) -> None:
163 assert _safe_arcname("pre\x00fix", "a.py") is None
164
165 def test_nested_path(self) -> None:
166 assert _safe_arcname("", "a/b/c/d.txt") == "a/b/c/d.txt"
167
168 def test_single_filename(self) -> None:
169 assert _safe_arcname("", "README.md") == "README.md"
170
171 def test_prefix_with_subdirs(self) -> None:
172 assert _safe_arcname("proj/v2", "src/app.py") == "proj/v2/src/app.py"
173
174
175 class TestBuildEntries:
176 def test_returns_entries_for_valid_manifest(self, repo: pathlib.Path) -> None:
177 c = _make_commit(repo, {"a.py": b"hello"})
178 from muse.core.commits import read_commit
179 from muse.core.snapshots import read_snapshot
180 commit = read_commit(repo, c.commit_id)
181 assert commit is not None
182 snap = read_snapshot(repo, commit.snapshot_id)
183 assert snap is not None
184 entries, skipped = _build_entries(repo, snap.manifest, "")
185 assert len(entries) == 1
186 assert skipped == []
187 arcname, oid, path = entries[0]
188 assert arcname == "a.py"
189 assert path.exists()
190
191 def test_skips_missing_objects(self, repo: pathlib.Path) -> None:
192 # Fake a manifest entry pointing at a nonexistent object.
193 fake_manifest = {"ghost.py": fake_id("ghost-obj")}
194 entries, skipped = _build_entries(repo, fake_manifest, "")
195 assert entries == []
196 assert len(skipped) == 1
197 assert "missing" in skipped[0]
198
199 def test_entries_sorted_by_arcname(self, repo: pathlib.Path) -> None:
200 c = _make_commit(repo, {"z.py": b"z", "a.py": b"a", "m.py": b"m"})
201 from muse.core.commits import read_commit
202 from muse.core.snapshots import read_snapshot
203 commit = read_commit(repo, c.commit_id)
204 snap = read_snapshot(repo, commit.snapshot_id)
205 entries, _ = _build_entries(repo, snap.manifest, "")
206 names = [e[0] for e in entries]
207 assert names == sorted(names)
208
209 def test_prefix_applied_to_arcnames(self, repo: pathlib.Path) -> None:
210 c = _make_commit(repo, {"src/app.py": b"app"})
211 from muse.core.commits import read_commit
212 from muse.core.snapshots import read_snapshot
213 commit = read_commit(repo, c.commit_id)
214 snap = read_snapshot(repo, commit.snapshot_id)
215 entries, _ = _build_entries(repo, snap.manifest, "myproject")
216 assert entries[0][0] == "myproject/src/app.py"
217
218
219 # ===========================================================================
220 # 2. Integration tests — store round-trip + archive contents
221 # ===========================================================================
222
223
224 class TestTarContents:
225 def test_tar_contains_all_files(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
226 _make_commit(repo, {"a.py": b"aaa", "b.py": b"bbb"})
227 out = tmp_path / "out.tar.gz"
228 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
229 with tarfile.open(out, "r:gz") as tar:
230 names = tar.getnames()
231 assert "a.py" in names
232 assert "b.py" in names
233
234 def test_tar_file_contents_match_source(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
235 _make_commit(repo, {"hello.py": b"print('hello')"})
236 out = tmp_path / "out.tar.gz"
237 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
238 with tarfile.open(out, "r:gz") as tar:
239 member = tar.getmember("hello.py")
240 f = tar.extractfile(member)
241 assert f is not None
242 assert f.read() == b"print('hello')"
243
244 def test_tar_prefix_wraps_files(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
245 _make_commit(repo, {"a.py": b"a"})
246 out = tmp_path / "out.tar.gz"
247 runner.invoke(
248 cli, ["archive", "--prefix", "proj", "--output", str(out)],
249 catch_exceptions=False,
250 )
251 with tarfile.open(out, "r:gz") as tar:
252 names = tar.getnames()
253 assert "proj/a.py" in names
254 assert "a.py" not in names
255
256 def test_no_muse_metadata_in_tar(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
257 _make_commit(repo, {"src/app.py": b"app"})
258 out = tmp_path / "out.tar.gz"
259 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
260 with tarfile.open(out, "r:gz") as tar:
261 names = tar.getnames()
262 assert not any(".muse" in n for n in names)
263
264
265 class TestZipContents:
266 def test_zip_contains_all_files(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
267 _make_commit(repo, {"x.py": b"x", "y.py": b"y"})
268 out = tmp_path / "out.zip"
269 runner.invoke(cli, ["archive", "--format", "zip", "--output", str(out)], catch_exceptions=False)
270 with zipfile.ZipFile(out) as zf:
271 names = zf.namelist()
272 assert "x.py" in names
273 assert "y.py" in names
274
275 def test_zip_file_contents_match_source(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
276 _make_commit(repo, {"data.txt": b"hello world"})
277 out = tmp_path / "out.zip"
278 runner.invoke(cli, ["archive", "--format", "zip", "--output", str(out)], catch_exceptions=False)
279 with zipfile.ZipFile(out) as zf:
280 assert zf.read("data.txt") == b"hello world"
281
282 def test_zip_prefix_wraps_files(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
283 _make_commit(repo, {"b.py": b"b"})
284 out = tmp_path / "out.zip"
285 runner.invoke(
286 cli, ["archive", "--format", "zip", "--prefix", "release", "--output", str(out)],
287 catch_exceptions=False,
288 )
289 with zipfile.ZipFile(out) as zf:
290 names = zf.namelist()
291 assert "release/b.py" in names
292 assert "b.py" not in names
293
294
295 # ===========================================================================
296 # 3. End-to-End tests — full CLI
297 # ===========================================================================
298
299
300 class TestDefaultBehavior:
301 def test_exits_0_with_commit(self, repo: pathlib.Path) -> None:
302 _make_commit(repo, {"a.py": b"a"})
303 result = runner.invoke(cli, ["archive"], catch_exceptions=False)
304 assert result.exit_code == 0
305
306 def test_default_filename_no_sha256_prefix(self, repo: pathlib.Path) -> None:
307 c = _make_commit(repo, {"a.py": b"a"})
308 runner.invoke(cli, ["archive"], catch_exceptions=False)
309 _, hex_full = split_id(c.commit_id)
310 assert pathlib.Path(f"{hex_full}.tar.gz").exists()
311
312 def test_default_filename_has_no_colon(self, repo: pathlib.Path) -> None:
313 _make_commit(repo, {"a.py": b"a"})
314 runner.invoke(cli, ["archive"], catch_exceptions=False)
315 created = list(pathlib.Path(".").glob("*.tar.gz"))
316 assert created, "no tar.gz file created"
317 assert ":" not in created[0].name
318
319 def test_no_commits_exits_1(self, repo: pathlib.Path) -> None:
320 result = runner.invoke(cli, ["archive"])
321 assert result.exit_code != 0
322
323 def test_output_includes_file_count(self, repo: pathlib.Path) -> None:
324 _make_commit(repo, {"a.py": b"a", "b.py": b"b"})
325 result = runner.invoke(cli, ["archive"], catch_exceptions=False)
326 assert "2 file(s)" in result.output
327
328 def test_output_includes_commit_short(self, repo: pathlib.Path) -> None:
329 c = _make_commit(repo, {"a.py": b"a"})
330 result = runner.invoke(cli, ["archive"], catch_exceptions=False)
331 short = short_id(c.commit_id, strip=True)
332 assert short in result.output
333
334
335 class TestFormatFlag:
336 def test_zip_format_flag(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
337 _make_commit(repo, {"a.py": b"a"})
338 out = tmp_path / "out.zip"
339 result = runner.invoke(
340 cli, ["archive", "--format", "zip", "--output", str(out)],
341 catch_exceptions=False,
342 )
343 assert result.exit_code == 0
344 assert zipfile.is_zipfile(out)
345
346 def test_tgz_short_flag(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
347 _make_commit(repo, {"a.py": b"a"})
348 out = tmp_path / "out.tar.gz"
349 result = runner.invoke(
350 cli, ["archive", "-f", "tar.gz", "--output", str(out)],
351 catch_exceptions=False,
352 )
353 assert result.exit_code == 0
354 assert tarfile.is_tarfile(out)
355
356 def test_invalid_format_exits_nonzero(self, repo: pathlib.Path) -> None:
357 _make_commit(repo, {"a.py": b"a"})
358 result = runner.invoke(cli, ["archive", "--format", "rar"])
359 assert result.exit_code != 0
360
361
362 class TestRefFlag:
363 def test_ref_to_branch(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
364 _make_commit(repo, {"a.py": b"a"}, message="on main")
365 out = tmp_path / "out.tar.gz"
366 result = runner.invoke(
367 cli, ["archive", "--ref", "main", "--output", str(out)],
368 catch_exceptions=False,
369 )
370 assert result.exit_code == 0
371
372 def test_ref_to_commit_id(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
373 c = _make_commit(repo, {"a.py": b"a"})
374 out = tmp_path / "out.tar.gz"
375 short = c.commit_id[len("sha256:"):len("sha256:") + 8]
376 result = runner.invoke(
377 cli, ["archive", "--ref", short, "--output", str(out)],
378 catch_exceptions=False,
379 )
380 assert result.exit_code == 0
381
382 def test_unknown_ref_exits_1(self, repo: pathlib.Path) -> None:
383 _make_commit(repo, {"a.py": b"a"})
384 result = runner.invoke(cli, ["archive", "--ref", "no-such-branch"])
385 assert result.exit_code != 0
386
387
388 class TestOutputFlag:
389 def test_custom_output_path(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
390 _make_commit(repo, {"a.py": b"a"})
391 out = tmp_path / "release.tar.gz"
392 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
393 assert out.exists()
394
395 def test_output_short_flag(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
396 _make_commit(repo, {"a.py": b"a"})
397 out = tmp_path / "r.tar.gz"
398 result = runner.invoke(
399 cli, ["archive", "-o", str(out)], catch_exceptions=False
400 )
401 assert result.exit_code == 0
402 assert out.exists()
403
404 def test_missing_output_dir_exits_1(self, repo: pathlib.Path) -> None:
405 _make_commit(repo, {"a.py": b"a"})
406 result = runner.invoke(cli, ["archive", "--output", "/nonexistent/dir/out.tar.gz"])
407 assert result.exit_code != 0
408
409
410 class TestListMode:
411 def test_list_exits_0(self, repo: pathlib.Path) -> None:
412 _make_commit(repo, {"a.py": b"a"})
413 result = runner.invoke(cli, ["archive", "--list"], catch_exceptions=False)
414 assert result.exit_code == 0
415
416 def test_list_does_not_create_file(self, repo: pathlib.Path) -> None:
417 _make_commit(repo, {"a.py": b"a"})
418 before = set(pathlib.Path(".").glob("*.tar.gz"))
419 runner.invoke(cli, ["archive", "--list"], catch_exceptions=False)
420 after = set(pathlib.Path(".").glob("*.tar.gz"))
421 assert before == after
422
423 def test_list_shows_file_paths(self, repo: pathlib.Path) -> None:
424 _make_commit(repo, {"src/app.py": b"app", "README.md": b"readme"})
425 result = runner.invoke(cli, ["archive", "--list"], catch_exceptions=False)
426 assert "src/app.py" in result.output
427 assert "README.md" in result.output
428
429 def test_list_shows_file_count(self, repo: pathlib.Path) -> None:
430 _make_commit(repo, {"a.py": b"a", "b.py": b"b", "c.py": b"c"})
431 result = runner.invoke(cli, ["archive", "--list"], catch_exceptions=False)
432 assert "3 file(s)" in result.output
433
434 def test_list_with_prefix_shows_prefixed_paths(self, repo: pathlib.Path) -> None:
435 _make_commit(repo, {"a.py": b"a"})
436 result = runner.invoke(
437 cli, ["archive", "--list", "--prefix", "proj"],
438 catch_exceptions=False,
439 )
440 assert "proj/a.py" in result.output
441
442 def test_list_json_schema(self, repo: pathlib.Path) -> None:
443 _make_commit(repo, {"a.py": b"a"})
444 result = runner.invoke(
445 cli, ["archive", "--list", "--json"], catch_exceptions=False
446 )
447 data = json.loads(result.output)
448 required = {
449 "commit_id", "snapshot_id", "message", "branch", "author",
450 "committed_at", "ref", "prefix", "file_count", "entries",
451 }
452 assert required <= data.keys()
453 assert isinstance(data["entries"], list)
454 assert data["entries"][0].keys() >= {"path", "object_id"}
455
456 def test_list_json_entry_count_matches(self, repo: pathlib.Path) -> None:
457 _make_commit(repo, {"a.py": b"a", "b.py": b"b"})
458 result = runner.invoke(
459 cli, ["archive", "--list", "--json"], catch_exceptions=False
460 )
461 data = json.loads(result.output)
462 assert data["file_count"] == 2
463 assert len(data["entries"]) == 2
464
465
466 class TestJsonOutput:
467 def test_json_exits_0(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
468 _make_commit(repo, {"a.py": b"a"})
469 out = tmp_path / "out.tar.gz"
470 result = runner.invoke(
471 cli, ["archive", "--json", "--output", str(out)],
472 catch_exceptions=False,
473 )
474 assert result.exit_code == 0
475
476 def test_json_is_valid(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
477 _make_commit(repo, {"a.py": b"a"})
478 out = tmp_path / "out.tar.gz"
479 result = runner.invoke(
480 cli, ["archive", "--json", "--output", str(out)],
481 catch_exceptions=False,
482 )
483 data = json.loads(result.output)
484 assert isinstance(data, dict)
485
486 def test_json_has_all_keys(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
487 _make_commit(repo, {"a.py": b"a"})
488 out = tmp_path / "out.tar.gz"
489 result = runner.invoke(
490 cli, ["archive", "--json", "--output", str(out)],
491 catch_exceptions=False,
492 )
493 data = json.loads(result.output)
494 required = {
495 "path", "format", "file_count", "bytes",
496 "commit_id", "snapshot_id", "message", "branch",
497 "author", "agent_id", "model_id", "committed_at",
498 "ref", "prefix",
499 }
500 assert required <= data.keys()
501
502 def test_json_file_count_correct(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
503 _make_commit(repo, {"a.py": b"a", "b.py": b"b"})
504 out = tmp_path / "out.tar.gz"
505 result = runner.invoke(
506 cli, ["archive", "--json", "--output", str(out)],
507 catch_exceptions=False,
508 )
509 data = json.loads(result.output)
510 assert data["file_count"] == 2
511
512 def test_json_bytes_positive(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
513 _make_commit(repo, {"a.py": b"some content here"})
514 out = tmp_path / "out.tar.gz"
515 result = runner.invoke(
516 cli, ["archive", "--json", "--output", str(out)],
517 catch_exceptions=False,
518 )
519 data = json.loads(result.output)
520 assert data["bytes"] > 0
521
522 def test_json_commit_id_matches(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
523 c = _make_commit(repo, {"a.py": b"a"})
524 out = tmp_path / "out.tar.gz"
525 result = runner.invoke(
526 cli, ["archive", "--json", "--output", str(out)],
527 catch_exceptions=False,
528 )
529 data = json.loads(result.output)
530 assert data["commit_id"] == c.commit_id
531
532 def test_json_snapshot_id_present(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
533 c = _make_commit(repo, {"a.py": b"a"})
534 out = tmp_path / "out.tar.gz"
535 result = runner.invoke(
536 cli, ["archive", "--json", "--output", str(out)],
537 catch_exceptions=False,
538 )
539 data = json.loads(result.output)
540 assert data["snapshot_id"] == c.snapshot_id
541
542 def test_json_agent_id_and_model_id(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
543 _make_commit(repo, {"a.py": b"a"})
544 out = tmp_path / "out.tar.gz"
545 result = runner.invoke(
546 cli, ["archive", "--json", "--output", str(out)],
547 catch_exceptions=False,
548 )
549 data = json.loads(result.output)
550 assert data["agent_id"] == "test-agent"
551 assert data["model_id"] == "test-model"
552
553 def test_json_ref_null_for_head(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
554 _make_commit(repo, {"a.py": b"a"})
555 out = tmp_path / "out.tar.gz"
556 result = runner.invoke(
557 cli, ["archive", "--json", "--output", str(out)],
558 catch_exceptions=False,
559 )
560 data = json.loads(result.output)
561 assert data["ref"] is None
562
563 def test_json_ref_set_when_given(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
564 _make_commit(repo, {"a.py": b"a"})
565 out = tmp_path / "out.tar.gz"
566 result = runner.invoke(
567 cli, ["archive", "--json", "--ref", "main", "--output", str(out)],
568 catch_exceptions=False,
569 )
570 data = json.loads(result.output)
571 assert data["ref"] == "main"
572
573 def test_json_prefix_field(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
574 _make_commit(repo, {"a.py": b"a"})
575 out = tmp_path / "out.tar.gz"
576 result = runner.invoke(
577 cli, ["archive", "--json", "--prefix", "myproj", "--output", str(out)],
578 catch_exceptions=False,
579 )
580 data = json.loads(result.output)
581 assert data["prefix"] == "myproj"
582
583 def test_json_format_field(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
584 _make_commit(repo, {"a.py": b"a"})
585 out = tmp_path / "out.zip"
586 result = runner.invoke(
587 cli, ["archive", "--json", "--format", "zip", "--output", str(out)],
588 catch_exceptions=False,
589 )
590 data = json.loads(result.output)
591 assert data["format"] == "zip"
592
593
594 # ===========================================================================
595 # 4. Security tests
596 # ===========================================================================
597
598
599 class TestSecurity:
600 def test_safe_arcname_blocks_traversal(self) -> None:
601 assert _safe_arcname("", "../../etc/passwd") is None
602
603 def test_safe_arcname_blocks_absolute(self) -> None:
604 assert _safe_arcname("", "/etc/passwd") is None
605
606 def test_safe_arcname_blocks_null_byte_path(self) -> None:
607 assert _safe_arcname("", "a\x00b") is None
608
609 def test_safe_arcname_blocks_null_byte_prefix(self) -> None:
610 assert _safe_arcname("pre\x00fix", "a.py") is None
611
612 def test_safe_arcname_blocks_dotdot_prefix(self) -> None:
613 assert _safe_arcname("../../malicious", "a.py") is None
614
615 def test_prefix_dotdot_rejected_by_cli(self, repo: pathlib.Path) -> None:
616 _make_commit(repo, {"a.py": b"a"})
617 result = runner.invoke(cli, ["archive", "--prefix", "../../etc"])
618 assert result.exit_code != 0
619
620 def test_prefix_dotdot_error_on_stderr(self, repo: pathlib.Path) -> None:
621 _make_commit(repo, {"a.py": b"a"})
622 result = runner.invoke(cli, ["archive", "--prefix", "../../etc"])
623 assert "❌" in result.stderr
624
625 def test_unknown_ref_does_not_glob(self, repo: pathlib.Path) -> None:
626 """A glob metacharacter in --ref must not trigger directory scanning."""
627 _make_commit(repo, {"a.py": b"a"})
628 result = runner.invoke(cli, ["archive", "--ref", "../../*"])
629 assert result.exit_code != 0
630
631 def test_tar_archive_has_no_traversal_paths(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
632 _make_commit(repo, {"safe/file.py": b"ok"})
633 out = tmp_path / "out.tar.gz"
634 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
635 with tarfile.open(out, "r:gz") as tar:
636 for name in tar.getnames():
637 assert not name.startswith("/")
638 assert ".." not in name.split("/")
639
640 def test_zip_archive_has_no_traversal_paths(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
641 _make_commit(repo, {"safe/file.py": b"ok"})
642 out = tmp_path / "out.zip"
643 runner.invoke(
644 cli, ["archive", "--format", "zip", "--output", str(out)],
645 catch_exceptions=False,
646 )
647 with zipfile.ZipFile(out) as zf:
648 for name in zf.namelist():
649 assert not name.startswith("/")
650 assert ".." not in name.split("/")
651
652
653 # ===========================================================================
654 # 5. Stress tests
655 # ===========================================================================
656
657
658 class TestStress:
659 def test_100_file_manifest_tar(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
660 files = {f"src/module_{i:03d}.py": f"# module {i}".encode() for i in range(100)}
661 _make_commit(repo, files)
662 out = tmp_path / "out.tar.gz"
663 result = runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
664 assert result.exit_code == 0
665 with tarfile.open(out, "r:gz") as tar:
666 assert len(tar.getnames()) == 100
667
668 def test_100_file_manifest_zip(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
669 files = {f"src/module_{i:03d}.py": f"# module {i}".encode() for i in range(100)}
670 _make_commit(repo, files)
671 out = tmp_path / "out.zip"
672 result = runner.invoke(
673 cli, ["archive", "--format", "zip", "--output", str(out)],
674 catch_exceptions=False,
675 )
676 assert result.exit_code == 0
677 with zipfile.ZipFile(out) as zf:
678 assert len(zf.namelist()) == 100
679
680 def test_list_mode_100_files(self, repo: pathlib.Path) -> None:
681 files = {f"f_{i:03d}.txt": b"x" for i in range(100)}
682 _make_commit(repo, files)
683 result = runner.invoke(cli, ["archive", "--list", "--json"], catch_exceptions=False)
684 data = json.loads(result.output)
685 assert data["file_count"] == 100
686 assert len(data["entries"]) == 100
687
688 def test_deeply_nested_paths(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
689 files = {"a/b/c/d/e/f/deep.py": b"deep"}
690 _make_commit(repo, files)
691 out = tmp_path / "out.tar.gz"
692 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
693 with tarfile.open(out, "r:gz") as tar:
694 assert "a/b/c/d/e/f/deep.py" in tar.getnames()
695
696 def test_large_file_content(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
697 big = b"x" * (1024 * 512) # 512 KiB
698 _make_commit(repo, {"big.bin": big})
699 out = tmp_path / "out.tar.gz"
700 result = runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
701 assert result.exit_code == 0
702 with tarfile.open(out, "r:gz") as tar:
703 f = tar.extractfile(tar.getmember("big.bin"))
704 assert f is not None
705 assert f.read() == big
706
707
708 # ===========================================================================
709 # 6. Performance tests
710 # ===========================================================================
711
712
713 class TestPerformance:
714 def test_single_file_archive_under_500ms(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
715 _make_commit(repo, {"a.py": b"a"})
716 out = tmp_path / "out.tar.gz"
717 start = time.monotonic()
718 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
719 elapsed = time.monotonic() - start
720 assert elapsed < 0.5, f"single-file archive took {elapsed:.3f}s"
721
722 def test_list_mode_under_300ms(self, repo: pathlib.Path) -> None:
723 files = {f"f_{i}.py": b"x" for i in range(20)}
724 _make_commit(repo, files)
725 start = time.monotonic()
726 runner.invoke(cli, ["archive", "--list", "--json"], catch_exceptions=False)
727 elapsed = time.monotonic() - start
728 assert elapsed < 0.3, f"list mode took {elapsed:.3f}s"
729
730 def test_json_output_under_500ms(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
731 files = {f"f_{i}.py": b"x" for i in range(10)}
732 _make_commit(repo, files)
733 out = tmp_path / "out.tar.gz"
734 start = time.monotonic()
735 runner.invoke(cli, ["archive", "--json", "--output", str(out)], catch_exceptions=False)
736 elapsed = time.monotonic() - start
737 assert elapsed < 0.5, f"json archive took {elapsed:.3f}s"
738
739
740 # ===========================================================================
741 # 7. Data Integrity tests
742 # ===========================================================================
743
744
745 class TestDataIntegrity:
746 def test_archive_contains_exactly_manifest_files(
747 self, repo: pathlib.Path, tmp_path: pathlib.Path
748 ) -> None:
749 """Every file in the snapshot manifest appears in the archive, no more."""
750 files = {"a.py": b"a", "b/c.py": b"bc", "d.txt": b"d"}
751 _make_commit(repo, files)
752 out = tmp_path / "out.tar.gz"
753 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
754 with tarfile.open(out, "r:gz") as tar:
755 names = set(tar.getnames())
756 assert names == set(files.keys())
757
758 def test_file_bytes_match_original(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
759 content = b"\x00\x01\x02binary\xff\xfe"
760 _make_commit(repo, {"binary.bin": content})
761 out = tmp_path / "out.tar.gz"
762 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
763 with tarfile.open(out, "r:gz") as tar:
764 f = tar.extractfile(tar.getmember("binary.bin"))
765 assert f is not None
766 assert f.read() == content
767
768 def test_zip_bytes_match_original(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
769 content = b"exact content"
770 _make_commit(repo, {"f.txt": content})
771 out = tmp_path / "out.zip"
772 runner.invoke(
773 cli, ["archive", "--format", "zip", "--output", str(out)],
774 catch_exceptions=False,
775 )
776 with zipfile.ZipFile(out) as zf:
777 assert zf.read("f.txt") == content
778
779 def test_list_entries_match_archive_entries(
780 self, repo: pathlib.Path, tmp_path: pathlib.Path
781 ) -> None:
782 """Files listed by --list match files written to the archive."""
783 files = {"x.py": b"x", "y/z.py": b"yz"}
784 _make_commit(repo, files)
785 list_result = runner.invoke(
786 cli, ["archive", "--list", "--json"], catch_exceptions=False
787 )
788 list_data = json.loads(list_result.output)
789 listed_paths = {e["path"] for e in list_data["entries"]}
790
791 out = tmp_path / "out.tar.gz"
792 runner.invoke(cli, ["archive", "--output", str(out)], catch_exceptions=False)
793 with tarfile.open(out, "r:gz") as tar:
794 archive_paths = set(tar.getnames())
795
796 assert listed_paths == archive_paths
797
798 def test_list_entries_sorted(self, repo: pathlib.Path) -> None:
799 files = {"z.py": b"z", "a.py": b"a", "m.py": b"m"}
800 _make_commit(repo, files)
801 result = runner.invoke(
802 cli, ["archive", "--list", "--json"], catch_exceptions=False
803 )
804 data = json.loads(result.output)
805 paths = [e["path"] for e in data["entries"]]
806 assert paths == sorted(paths)
807
808 def test_committed_at_iso8601(self, repo: pathlib.Path, tmp_path: pathlib.Path) -> None:
809 _make_commit(repo, {"a.py": b"a"})
810 out = tmp_path / "out.tar.gz"
811 result = runner.invoke(
812 cli, ["archive", "--json", "--output", str(out)], catch_exceptions=False
813 )
814 data = json.loads(result.output)
815 # Must parse without error
816 dt = datetime.datetime.fromisoformat(data["committed_at"])
817 assert dt.tzinfo is not None
818
819 def test_json_path_field_matches_written_file(
820 self, repo: pathlib.Path, tmp_path: pathlib.Path
821 ) -> None:
822 _make_commit(repo, {"a.py": b"a"})
823 out = tmp_path / "exact-name.tar.gz"
824 result = runner.invoke(
825 cli, ["archive", "--json", "--output", str(out)], catch_exceptions=False
826 )
827 data = json.loads(result.output)
828 assert pathlib.Path(data["path"]) == out
829
830 def test_json_bytes_matches_file_size(
831 self, repo: pathlib.Path, tmp_path: pathlib.Path
832 ) -> None:
833 _make_commit(repo, {"a.py": b"content here"})
834 out = tmp_path / "out.tar.gz"
835 result = runner.invoke(
836 cli, ["archive", "--json", "--output", str(out)], catch_exceptions=False
837 )
838 data = json.loads(result.output)
839 assert data["bytes"] == out.stat().st_size
840
841 def test_format_choices_complete(self) -> None:
842 assert _FORMAT_CHOICES == {"tar.gz", "zip"}
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 22 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 23 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago