gabriel / muse public

test_format_patch_supercharge.py file-level

at sha256:d · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:4 Merge branch 'dev' into main · gabriel · Jun 17, 2026
1 """Supercharge tests for ``muse format-patch``.
2
3 TDD — sections labelled [RED] contain tests that fail until the feature lands.
4 Sections labelled [GREEN] fill gaps against existing behavior.
5
6 New features under test
7 -----------------------
8 - ``--agent-id <id>`` [RED] embed agent provenance in the patch record
9 - ``--model-id <id>`` [RED] embed model provenance in the patch record
10 - ``--intent <text>`` [RED] embed an intent description in the patch record
11 - ``--no-blobs`` [RED] omit base64 blob content from the patch file
12 - Rename detection [RED] same-oid delete+insert → rename op in files_renamed
13
14 Gap-fill coverage
15 -----------------
16 - Register-flag parser shape
17 - Unit tests for _sem_ver_bump, _breaking_changes, _make_patch_filename, _action_label
18 - Blob content verification (decoded bytes match source)
19 - from/to manifest delta correctness
20 - Initial-commit sentinel (from_snapshot_id = sha256:000…, from_commit_id = "")
21 - Required-objects sorted + sha256: prefix
22 - ops count === files_added + files_modified + files_deleted
23 - Default stdout output is valid JSON
24 - Stress: 50-file commit
25 - Security: path-traversal and ANSI in treeish
26 - Performance: duration_ms plausible
27 """
28 from __future__ import annotations
29 from collections.abc import Mapping
30
31 import argparse
32 import datetime
33 import json
34 import pathlib
35 import time
36
37 import pytest
38
39 from muse.cli.commands.format_patch import (
40 _action_label,
41 _breaking_changes,
42 _build_file_level_ops,
43 _make_patch_filename,
44 _sem_ver_bump,
45 register,
46 )
47 from muse.core.object_store import write_object
48 from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id
49 from muse.core.commits import (
50 CommitRecord,
51 write_commit,
52 )
53 from muse.core.snapshots import (
54 SnapshotRecord,
55 write_snapshot,
56 )
57 from tests.cli_test_helper import CliRunner, InvokeResult
58 from muse.core.types import NULL_LONG_ID, blob_id, long_id
59 from muse.core.paths import ref_path, muse_dir
60
61 runner = CliRunner()
62
63
64 # ---------------------------------------------------------------------------
65 # Repo / commit helpers (shared)
66 # ---------------------------------------------------------------------------
67
68
69 def _init_repo(path: pathlib.Path) -> pathlib.Path:
70 dot_muse = muse_dir(path)
71 for sub in ("commits", "snapshots", "objects", "refs/heads"):
72 (dot_muse / sub).mkdir(parents=True, exist_ok=True)
73 (dot_muse / "HEAD").write_text("ref: refs/heads/main\n")
74 (dot_muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo", "domain": "code"}))
75 return path
76
77
78 def _write_obj(repo: pathlib.Path, content: bytes) -> str:
79 oid = blob_id(content)
80 write_object(repo, oid, content)
81 return oid
82
83
84 _TS = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
85
86
87 def _commit(
88 repo: pathlib.Path,
89 msg: str,
90 manifest: dict[str, str],
91 *,
92 branch: str = "main",
93 parent: str | None = None,
94 ) -> str:
95 sid = compute_snapshot_id(manifest)
96 write_snapshot(repo, SnapshotRecord(snapshot_id=sid, manifest=manifest, created_at=_TS))
97 parent_ids = [parent] if parent else []
98 cid = compute_commit_id( parent_ids=parent_ids,
99 snapshot_id=sid,
100 message=msg,
101 committed_at_iso=_TS.isoformat(),
102 author="gabriel",)
103 write_commit(repo, CommitRecord(
104 commit_id=cid, branch=branch,
105 snapshot_id=sid, message=msg, committed_at=_TS,
106 author="gabriel", parent_commit_id=parent, parent2_commit_id=None,
107 ))
108 ref = ref_path(repo, branch)
109 ref.parent.mkdir(parents=True, exist_ok=True)
110 ref.write_text(cid)
111 return cid
112
113
114 def _fp(repo: pathlib.Path, *args: str) -> InvokeResult:
115 return runner.invoke(None, ["format-patch", *args], env={"MUSE_REPO_ROOT": str(repo)})
116
117
118 def _json_out(r: InvokeResult) -> Mapping[str, object]:
119 for line in r.output.splitlines():
120 line = line.strip()
121 if line.startswith("{"):
122 return json.loads(line)
123 raise ValueError(f"No JSON line in output:\n{r.output!r}")
124
125
126 # ---------------------------------------------------------------------------
127 # Register flags [GREEN]
128 # ---------------------------------------------------------------------------
129
130
131 class TestRegisterFlags:
132 """Parser shape — verify flags are wired correctly."""
133
134 def _parse(self, *args: str) -> argparse.Namespace:
135 p = argparse.ArgumentParser()
136 subs = p.add_subparsers()
137 register(subs)
138 return p.parse_args(["format-patch", *args])
139
140 def test_treeish_defaults_to_head(self) -> None:
141 ns = self._parse()
142 assert ns.treeish == "HEAD"
143
144 def test_treeish_positional(self) -> None:
145 ns = self._parse("main")
146 assert ns.treeish == "main"
147
148 def test_output_dir_flag(self) -> None:
149 ns = self._parse("--output-dir", "/tmp")
150 assert ns.output_dir == "/tmp"
151
152 def test_output_dir_short_flag(self) -> None:
153 ns = self._parse("-o", "/tmp")
154 assert ns.output_dir == "/tmp"
155
156 def test_json_flag(self) -> None:
157 ns = self._parse("--json")
158 assert ns.json_out is True
159
160 def test_json_default_false(self) -> None:
161 ns = self._parse()
162 assert ns.json_out is False
163
164 def test_output_dir_default_none(self) -> None:
165 ns = self._parse()
166 assert ns.output_dir is None
167
168 # [RED] — these flags don't exist yet
169 def test_agent_id_flag(self) -> None:
170 ns = self._parse("--agent-id", "claude-code")
171 assert ns.agent_id == "claude-code"
172
173 def test_agent_id_default_empty(self) -> None:
174 ns = self._parse()
175 assert ns.agent_id == ""
176
177 def test_model_id_flag(self) -> None:
178 ns = self._parse("--model-id", "claude-sonnet-4-6")
179 assert ns.model_id == "claude-sonnet-4-6"
180
181 def test_model_id_default_empty(self) -> None:
182 ns = self._parse()
183 assert ns.model_id == ""
184
185 def test_intent_flag(self) -> None:
186 ns = self._parse("--intent", "add login flow")
187 assert ns.intent == "add login flow"
188
189 def test_intent_default_empty(self) -> None:
190 ns = self._parse()
191 assert ns.intent == ""
192
193 def test_no_blobs_flag(self) -> None:
194 ns = self._parse("--no-blobs")
195 assert ns.no_blobs is True
196
197 def test_no_blobs_default_false(self) -> None:
198 ns = self._parse()
199 assert ns.no_blobs is False
200
201
202 # ---------------------------------------------------------------------------
203 # _sem_ver_bump unit tests [GREEN]
204 # ---------------------------------------------------------------------------
205
206
207 class TestSemVerBump:
208 def test_break_prefix_is_major(self) -> None:
209 assert _sem_ver_bump("break: remove old API", [], []) == "major"
210
211 def test_feat_bang_is_major(self) -> None:
212 assert _sem_ver_bump("feat!: overhaul auth", [], []) == "major"
213
214 def test_breaking_change_body_is_major(self) -> None:
215 assert _sem_ver_bump("refactor: cleanup\n\nBREAKING CHANGE: old param removed", [], []) == "major"
216
217 def test_breaking_change_case_insensitive(self) -> None:
218 assert _sem_ver_bump("breaking change in behavior", [], []) == "major"
219
220 def test_feat_prefix_is_minor(self) -> None:
221 assert _sem_ver_bump("feat: add endpoint", [], []) == "minor"
222
223 def test_files_added_is_minor(self) -> None:
224 assert _sem_ver_bump("chore: misc", ["new_file.py"], []) == "minor"
225
226 def test_fix_prefix_is_patch(self) -> None:
227 assert _sem_ver_bump("fix: off-by-one", [], []) == "patch"
228
229 def test_chore_no_additions_is_patch(self) -> None:
230 assert _sem_ver_bump("chore: update deps", [], []) == "patch"
231
232 def test_empty_message_is_patch(self) -> None:
233 assert _sem_ver_bump("", [], []) == "patch"
234
235 def test_files_deleted_alone_is_patch(self) -> None:
236 assert _sem_ver_bump("chore: cleanup", [], ["old.py"]) == "patch"
237
238 def test_feat_prefix_beats_files_added(self) -> None:
239 # Both trigger minor — result is still minor
240 assert _sem_ver_bump("feat: add stuff", ["new.py"], []) == "minor"
241
242 def test_break_prefix_beats_files_added(self) -> None:
243 assert _sem_ver_bump("break: remove", ["new.py"], []) == "major"
244
245 def test_result_is_one_of_three_values(self) -> None:
246 for msg in ["anything", "feat: x", "break: y"]:
247 result = _sem_ver_bump(msg, [], [])
248 assert result in ("major", "minor", "patch")
249
250
251 # ---------------------------------------------------------------------------
252 # _breaking_changes unit tests [GREEN]
253 # ---------------------------------------------------------------------------
254
255
256 class TestBreakingChanges:
257 def test_empty_message_returns_empty(self) -> None:
258 assert _breaking_changes("") == []
259
260 def test_no_breaking_change_returns_empty(self) -> None:
261 assert _breaking_changes("feat: add endpoint") == []
262
263 def test_single_breaking_change(self) -> None:
264 msg = "refactor: cleanup\n\nBREAKING CHANGE: removed --legacy flag"
265 result = _breaking_changes(msg)
266 assert result == ["removed --legacy flag"]
267
268 def test_multiple_breaking_changes(self) -> None:
269 msg = "refactor:\n\nBREAKING CHANGE: first\nBREAKING CHANGE: second"
270 result = _breaking_changes(msg)
271 assert result == ["first", "second"]
272
273 def test_leading_trailing_whitespace_stripped(self) -> None:
274 msg = "BREAKING CHANGE: trimmed "
275 result = _breaking_changes(msg)
276 assert result == ["trimmed"]
277
278 def test_not_at_start_of_line_ignored(self) -> None:
279 # Mid-sentence "BREAKING CHANGE" not at start of line
280 msg = "This has BREAKING CHANGE: in the middle"
281 # The implementation checks stripped.upper().startswith("BREAKING CHANGE:")
282 # so it WOULD match if the stripped line starts with it — it does here
283 # because "This has..." stripped starts with "This" not "BREAKING CHANGE"
284 result = _breaking_changes(msg)
285 assert result == []
286
287 def test_returns_list(self) -> None:
288 assert isinstance(_breaking_changes("anything"), list)
289
290
291 # ---------------------------------------------------------------------------
292 # _make_patch_filename unit tests [GREEN]
293 # ---------------------------------------------------------------------------
294
295
296 class TestMakePatchFilename:
297 def test_basic_subject(self) -> None:
298 assert _make_patch_filename("feat: add hello") == "feat-add-hello.mpatch"
299
300 def test_ends_with_mpatch(self) -> None:
301 name = _make_patch_filename("anything")
302 assert name.endswith(".mpatch")
303
304 def test_empty_subject_returns_patch(self) -> None:
305 assert _make_patch_filename("") == "patch.mpatch"
306
307 def test_slash_replaced(self) -> None:
308 name = _make_patch_filename("fix/my-bug")
309 assert "/" not in name
310
311 def test_backslash_replaced(self) -> None:
312 name = _make_patch_filename("fix\\my-bug")
313 assert "\\" not in name
314
315 def test_dot_replaced(self) -> None:
316 # dots → dashes in the slug portion (before the .mpatch extension)
317 name = _make_patch_filename("v1.2.3 release")
318 slug = name.removesuffix(".mpatch")
319 assert "." not in slug
320
321 def test_long_subject_truncated(self) -> None:
322 long_msg = "x" * 100
323 name = _make_patch_filename(long_msg)
324 slug = name.removesuffix(".mpatch")
325 assert len(slug) <= 52
326
327 def test_unicode_stripped(self) -> None:
328 name = _make_patch_filename("feat: émoji 🚀 add")
329 # Non-ASCII removed, but ASCII words remain
330 assert "feat" in name
331
332 def test_whitespace_replaced_with_dash(self) -> None:
333 name = _make_patch_filename("add multiple spaces")
334 assert " " not in name
335
336 def test_no_leading_trailing_dashes_in_slug(self) -> None:
337 slug = _make_patch_filename(" spaces around ").removesuffix(".mpatch")
338 assert not slug.startswith("-")
339 assert not slug.endswith("-")
340
341 def test_no_consecutive_dashes_in_slug(self) -> None:
342 slug = _make_patch_filename("a!!b").removesuffix(".mpatch")
343 assert "--" not in slug
344
345
346 # ---------------------------------------------------------------------------
347 # _action_label unit tests [GREEN]
348 # ---------------------------------------------------------------------------
349
350
351 class TestActionLabel:
352 def test_insert_is_inserted(self) -> None:
353 assert _action_label("insert") == "inserted"
354
355 def test_delete_is_deleted(self) -> None:
356 assert _action_label("delete") == "deleted"
357
358 def test_replace_is_modified(self) -> None:
359 assert _action_label("replace") == "modified"
360
361 def test_mutate_is_modified(self) -> None:
362 assert _action_label("mutate") == "modified"
363
364 def test_patch_is_modified(self) -> None:
365 assert _action_label("patch") == "modified"
366
367 def test_move_is_moved(self) -> None:
368 assert _action_label("move") == "moved"
369
370 def test_rename_is_renamed(self) -> None:
371 assert _action_label("rename") == "renamed"
372
373 def test_unknown_defaults_to_modified(self) -> None:
374 assert _action_label("frob") == "modified"
375 assert _action_label("") == "modified"
376 assert _action_label("UPDATE") == "modified"
377
378
379 # ---------------------------------------------------------------------------
380 # _build_file_level_ops — internal unit tests [GREEN + RED for rename]
381 # ---------------------------------------------------------------------------
382
383
384 class TestBuildFileOps:
385 def test_added_file_in_ops(self) -> None:
386 base: dict[str, str] = {}
387 target = {"new.py": long_id("a" * 64)}
388 ops, added, modified, deleted, *_ = _build_file_level_ops(base, target)
389 assert any(op["address"] == "new.py" and op["op"] == "insert" for op in ops)
390
391 def test_deleted_file_in_ops(self) -> None:
392 base = {"old.py": long_id("a" * 64)}
393 target: dict[str, str] = {}
394 ops, added, modified, deleted, *_ = _build_file_level_ops(base, target)
395 assert any(op["address"] == "old.py" and op["op"] == "delete" for op in ops)
396
397 def test_modified_file_in_ops(self) -> None:
398 oid_a = long_id("a" * 64)
399 oid_b = long_id("b" * 64)
400 ops, added, modified, deleted, *_ = _build_file_level_ops(
401 {"f.py": oid_a}, {"f.py": oid_b}
402 )
403 assert any(op["address"] == "f.py" and op["op"] == "replace" for op in ops)
404
405 def test_added_list_sorted(self) -> None:
406 base: dict[str, str] = {}
407 target = {"z.py": long_id("z" * 64), "a.py": long_id("a" * 64)}
408 _, added, _, _, *_ = _build_file_level_ops(base, target)
409 assert added == sorted(added)
410
411 def test_deleted_list_sorted(self) -> None:
412 base = {"z.py": long_id("z" * 64), "a.py": long_id("a" * 64)}
413 _, _, _, deleted, *_ = _build_file_level_ops(base, {})
414 assert deleted == sorted(deleted)
415
416 def test_modified_list_sorted(self) -> None:
417 oid_a = long_id("a" * 64)
418 oid_b = long_id("b" * 64)
419 base = {"z.py": oid_a, "a.py": oid_a}
420 target = {"z.py": oid_b, "a.py": oid_b}
421 _, _, modified, _, *_ = _build_file_level_ops(base, target)
422 assert modified == sorted(modified)
423
424 def test_unchanged_file_not_in_ops(self) -> None:
425 oid = long_id("a" * 64)
426 ops, _, _, _, *_ = _build_file_level_ops({"f.py": oid}, {"f.py": oid})
427 addresses = [op["address"] for op in ops]
428 assert "f.py" not in addresses
429
430 # [RED] rename detection — same oid deleted + added at different path = rename
431 def test_rename_detected(self) -> None:
432 oid = long_id("a" * 64)
433 base = {"old.py": oid}
434 target = {"new.py": oid}
435 ops, added, modified, deleted, renamed = _build_file_level_ops(base, target)
436 assert "old.py" in renamed
437 assert renamed["old.py"] == "new.py"
438
439 def test_rename_not_in_files_added(self) -> None:
440 oid = long_id("a" * 64)
441 _, added, _, _, renamed = _build_file_level_ops({"old.py": oid}, {"new.py": oid})
442 assert "new.py" not in added
443
444 def test_rename_not_in_files_deleted(self) -> None:
445 oid = long_id("a" * 64)
446 _, _, _, deleted, renamed = _build_file_level_ops({"old.py": oid}, {"new.py": oid})
447 assert "old.py" not in deleted
448
449 def test_rename_op_present_in_ops(self) -> None:
450 oid = long_id("a" * 64)
451 ops, _, _, _, _ = _build_file_level_ops({"old.py": oid}, {"new.py": oid})
452 rename_ops = [op for op in ops if op.get("op") == "move"]
453 assert len(rename_ops) == 1
454
455 def test_rename_op_action_label_is_moved(self) -> None:
456 oid = long_id("a" * 64)
457 ops, _, _, _, _ = _build_file_level_ops({"old.py": oid}, {"new.py": oid})
458 rename_ops = [op for op in ops if op.get("op") == "move"]
459 assert rename_ops[0]["action_label"] == "moved"
460
461 def test_different_oid_not_a_rename(self) -> None:
462 oid_a = long_id("a" * 64)
463 oid_b = long_id("b" * 64)
464 _, added, _, deleted, renamed = _build_file_level_ops(
465 {"old.py": oid_a}, {"new.py": oid_b}
466 )
467 assert not renamed
468 assert "old.py" in deleted
469 assert "new.py" in added
470
471 def test_empty_renamed_dict_when_no_renames(self) -> None:
472 oid_a = long_id("a" * 64)
473 oid_b = long_id("b" * 64)
474 _, _, _, _, renamed = _build_file_level_ops({"f.py": oid_a}, {"f.py": oid_b})
475 assert renamed == {}
476
477
478 # ---------------------------------------------------------------------------
479 # Blob embedding [GREEN]
480 # ---------------------------------------------------------------------------
481
482
483 class TestBlobEmbedding:
484 def test_blobs_field_present(self, tmp_path: pathlib.Path) -> None:
485 repo = _init_repo(tmp_path)
486 oid = _write_obj(repo, b"hello blob")
487 _commit(repo, "init", {"f.py": oid})
488 data = _json_out(_fp(repo, "--json"))
489 assert "blobs" in data
490
491 def test_blobs_is_dict(self, tmp_path: pathlib.Path) -> None:
492 repo = _init_repo(tmp_path)
493 oid = _write_obj(repo, b"hello blob")
494 _commit(repo, "init", {"f.py": oid})
495 data = _json_out(_fp(repo, "--json"))
496 assert isinstance(data["blobs"], dict)
497
498 def test_blob_key_matches_required_object(self, tmp_path: pathlib.Path) -> None:
499 repo = _init_repo(tmp_path)
500 content = b"blob content"
501 oid = _write_obj(repo, content)
502 _commit(repo, "init", {"f.py": oid})
503 data = _json_out(_fp(repo, "--json"))
504 assert oid in data["blobs"]
505
506 def test_blob_decodes_to_original_content(self, tmp_path: pathlib.Path) -> None:
507 import base64
508 repo = _init_repo(tmp_path)
509 content = b"exact bytes\x00\x01\x02"
510 oid = _write_obj(repo, content)
511 _commit(repo, "init", {"f.py": oid})
512 data = _json_out(_fp(repo, "--json"))
513 decoded = base64.b64decode(data["blobs"][oid])
514 assert decoded == content
515
516 def test_blobs_is_base64_valid_string(self, tmp_path: pathlib.Path) -> None:
517 import base64
518 repo = _init_repo(tmp_path)
519 oid = _write_obj(repo, b"any content")
520 _commit(repo, "init", {"f.py": oid})
521 data = _json_out(_fp(repo, "--json"))
522 for val in data["blobs"].values():
523 assert isinstance(val, str)
524 base64.b64decode(val) # must not raise
525
526 def test_unmodified_objects_not_in_blobs(self, tmp_path: pathlib.Path) -> None:
527 """Blobs only contains objects in to_manifest (new/modified), not deleted."""
528 repo = _init_repo(tmp_path)
529 oid_a = _write_obj(repo, b"a")
530 oid_b = _write_obj(repo, b"b")
531 c1 = _commit(repo, "c1", {"a.py": oid_a, "b.py": oid_b})
532 oid_c = _write_obj(repo, b"c")
533 _commit(repo, "c2", {"a.py": oid_a, "c.py": oid_c}, parent=c1)
534 # b.py deleted → oid_b not in to_manifest → not in blobs
535 data = _json_out(_fp(repo, "--json"))
536 assert oid_b not in data["blobs"]
537
538
539 # ---------------------------------------------------------------------------
540 # Required objects [GREEN]
541 # ---------------------------------------------------------------------------
542
543
544 class TestRequiredObjects:
545 def test_all_sha256_prefixed(self, tmp_path: pathlib.Path) -> None:
546 repo = _init_repo(tmp_path)
547 oid = _write_obj(repo, b"x")
548 _commit(repo, "init", {"f.py": oid})
549 data = _json_out(_fp(repo, "--json"))
550 for rid in data["required_objects"]:
551 assert rid.startswith("sha256:")
552
553 def test_required_objects_is_sorted(self, tmp_path: pathlib.Path) -> None:
554 repo = _init_repo(tmp_path)
555 oid_a = _write_obj(repo, b"aaa")
556 oid_b = _write_obj(repo, b"bbb")
557 _commit(repo, "init", {"a.py": oid_a, "b.py": oid_b})
558 data = _json_out(_fp(repo, "--json"))
559 ro = data["required_objects"]
560 assert ro == sorted(ro)
561
562 def test_required_objects_subset_of_to_manifest(self, tmp_path: pathlib.Path) -> None:
563 repo = _init_repo(tmp_path)
564 oid = _write_obj(repo, b"y")
565 _commit(repo, "init", {"f.py": oid})
566 data = _json_out(_fp(repo, "--json"))
567 to_vals = set(data["to_manifest"].values())
568 for rid in data["required_objects"]:
569 assert rid in to_vals
570
571 def test_required_objects_empty_for_no_change(self, tmp_path: pathlib.Path) -> None:
572 repo = _init_repo(tmp_path)
573 oid = _write_obj(repo, b"z")
574 c1 = _commit(repo, "c1", {"f.py": oid})
575 _commit(repo, "c2 no-op", {"f.py": oid}, parent=c1)
576 data = _json_out(_fp(repo, "--json"))
577 assert data["required_objects"] == []
578
579
580 # ---------------------------------------------------------------------------
581 # Manifest delta correctness [GREEN]
582 # ---------------------------------------------------------------------------
583
584
585 class TestManifestDelta:
586 def test_added_path_in_to_manifest(self, tmp_path: pathlib.Path) -> None:
587 repo = _init_repo(tmp_path)
588 oid = _write_obj(repo, b"new")
589 _commit(repo, "init", {"new.py": oid})
590 data = _json_out(_fp(repo, "--json"))
591 assert "new.py" in data["to_manifest"]
592
593 def test_added_path_not_in_from_manifest(self, tmp_path: pathlib.Path) -> None:
594 repo = _init_repo(tmp_path)
595 oid = _write_obj(repo, b"new")
596 _commit(repo, "init", {"new.py": oid})
597 data = _json_out(_fp(repo, "--json"))
598 assert "new.py" not in data["from_manifest"]
599
600 def test_deleted_path_in_from_manifest(self, tmp_path: pathlib.Path) -> None:
601 repo = _init_repo(tmp_path)
602 oid = _write_obj(repo, b"old")
603 c1 = _commit(repo, "c1", {"old.py": oid})
604 _commit(repo, "c2", {}, parent=c1)
605 data = _json_out(_fp(repo, "--json"))
606 assert "old.py" in data["from_manifest"]
607
608 def test_deleted_path_not_in_to_manifest(self, tmp_path: pathlib.Path) -> None:
609 repo = _init_repo(tmp_path)
610 oid = _write_obj(repo, b"old")
611 c1 = _commit(repo, "c1", {"old.py": oid})
612 _commit(repo, "c2", {}, parent=c1)
613 data = _json_out(_fp(repo, "--json"))
614 assert "old.py" not in data["to_manifest"]
615
616 def test_modified_path_in_both_manifests(self, tmp_path: pathlib.Path) -> None:
617 repo = _init_repo(tmp_path)
618 oid_a = _write_obj(repo, b"v1")
619 c1 = _commit(repo, "c1", {"f.py": oid_a})
620 oid_b = _write_obj(repo, b"v2")
621 _commit(repo, "c2", {"f.py": oid_b}, parent=c1)
622 data = _json_out(_fp(repo, "--json"))
623 assert "f.py" in data["from_manifest"]
624 assert "f.py" in data["to_manifest"]
625 assert data["from_manifest"]["f.py"] != data["to_manifest"]["f.py"]
626
627 def test_unchanged_path_not_in_either_manifest(self, tmp_path: pathlib.Path) -> None:
628 repo = _init_repo(tmp_path)
629 oid_keep = _write_obj(repo, b"keep")
630 oid_chg = _write_obj(repo, b"v1")
631 c1 = _commit(repo, "c1", {"keep.py": oid_keep, "chg.py": oid_chg})
632 oid_chg2 = _write_obj(repo, b"v2")
633 _commit(repo, "c2", {"keep.py": oid_keep, "chg.py": oid_chg2}, parent=c1)
634 data = _json_out(_fp(repo, "--json"))
635 assert "keep.py" not in data["from_manifest"]
636 assert "keep.py" not in data["to_manifest"]
637
638
639 # ---------------------------------------------------------------------------
640 # Initial commit sentinel [GREEN]
641 # ---------------------------------------------------------------------------
642
643
644 class TestInitialCommit:
645 def test_from_snapshot_id_is_sentinel_for_initial(self, tmp_path: pathlib.Path) -> None:
646 repo = _init_repo(tmp_path)
647 oid = _write_obj(repo, b"x")
648 _commit(repo, "init", {"f.py": oid})
649 data = _json_out(_fp(repo, "--json"))
650 # Sentinel for initial commit is sha256:000...000 (64 zeros)
651 assert data["from_snapshot_id"] == NULL_LONG_ID
652
653 def test_from_commit_id_empty_for_initial(self, tmp_path: pathlib.Path) -> None:
654 repo = _init_repo(tmp_path)
655 oid = _write_obj(repo, b"x")
656 _commit(repo, "init", {"f.py": oid})
657 data = _json_out(_fp(repo, "--json"))
658 assert data["from_commit_id"] == ""
659
660 def test_all_files_in_files_added_for_initial(self, tmp_path: pathlib.Path) -> None:
661 repo = _init_repo(tmp_path)
662 oid_a = _write_obj(repo, b"a")
663 oid_b = _write_obj(repo, b"b")
664 _commit(repo, "init", {"a.py": oid_a, "b.py": oid_b})
665 data = _json_out(_fp(repo, "--json"))
666 assert "a.py" in data["files_added"]
667 assert "b.py" in data["files_added"]
668 assert data["files_modified"] == []
669 assert data["files_deleted"] == []
670
671 def test_from_snapshot_id_set_for_second_commit(self, tmp_path: pathlib.Path) -> None:
672 repo = _init_repo(tmp_path)
673 oid = _write_obj(repo, b"v1")
674 c1 = _commit(repo, "c1", {"f.py": oid})
675 oid2 = _write_obj(repo, b"v2")
676 _commit(repo, "c2", {"f.py": oid2}, parent=c1)
677 data = _json_out(_fp(repo, "--json"))
678 # Non-initial: from_snapshot_id should NOT be the sentinel
679 assert data["from_snapshot_id"] != NULL_LONG_ID
680
681
682 # ---------------------------------------------------------------------------
683 # Agent provenance flags [RED] — --agent-id, --model-id, --intent
684 # ---------------------------------------------------------------------------
685
686
687 class TestAgentProvenance:
688 def test_agent_id_set_in_output(self, tmp_path: pathlib.Path) -> None:
689 repo = _init_repo(tmp_path)
690 oid = _write_obj(repo, b"x")
691 _commit(repo, "init", {"f.py": oid})
692 data = _json_out(_fp(repo, "--json", "--agent-id", "claude-code"))
693 assert data["agent_id"] == "claude-code"
694
695 def test_model_id_set_in_output(self, tmp_path: pathlib.Path) -> None:
696 repo = _init_repo(tmp_path)
697 oid = _write_obj(repo, b"x")
698 _commit(repo, "init", {"f.py": oid})
699 data = _json_out(_fp(repo, "--json", "--model-id", "claude-sonnet-4-6"))
700 assert data["model_id"] == "claude-sonnet-4-6"
701
702 def test_intent_set_in_output(self, tmp_path: pathlib.Path) -> None:
703 repo = _init_repo(tmp_path)
704 oid = _write_obj(repo, b"x")
705 _commit(repo, "init", {"f.py": oid})
706 data = _json_out(_fp(repo, "--json", "--intent", "bootstrap project"))
707 assert data["intent"] == "bootstrap project"
708
709 def test_agent_id_in_mpatch_file(self, tmp_path: pathlib.Path) -> None:
710 repo = _init_repo(tmp_path)
711 oid = _write_obj(repo, b"x")
712 _commit(repo, "init", {"f.py": oid})
713 out_dir = tmp_path / "patches"
714 out_dir.mkdir()
715 r = _fp(repo, "--output-dir", str(out_dir), "--agent-id", "claude-code")
716 assert r.exit_code == 0
717 patch_file = list(out_dir.glob("*.mpatch"))[0]
718 data = json.loads(patch_file.read_bytes())
719 assert data["agent_id"] == "claude-code"
720
721 def test_agent_id_affects_patch_id(self, tmp_path: pathlib.Path) -> None:
722 """Different agent_id → different patch_id (agent_id is part of canonical JSON)."""
723 repo = _init_repo(tmp_path)
724 oid = _write_obj(repo, b"x")
725 _commit(repo, "init", {"f.py": oid})
726 pid_no_agent = _json_out(_fp(repo, "--json"))["patch_id"]
727 pid_with_agent = _json_out(_fp(repo, "--json", "--agent-id", "claude-code"))["patch_id"]
728 assert pid_no_agent != pid_with_agent
729
730 def test_no_agent_flags_leaves_fields_empty(self, tmp_path: pathlib.Path) -> None:
731 repo = _init_repo(tmp_path)
732 oid = _write_obj(repo, b"x")
733 _commit(repo, "init", {"f.py": oid})
734 data = _json_out(_fp(repo, "--json"))
735 assert data["agent_id"] == ""
736 assert data["model_id"] == ""
737 assert data["intent"] == ""
738
739 def test_all_provenance_flags_together(self, tmp_path: pathlib.Path) -> None:
740 repo = _init_repo(tmp_path)
741 oid = _write_obj(repo, b"x")
742 _commit(repo, "init", {"f.py": oid})
743 data = _json_out(_fp(repo, "--json",
744 "--agent-id", "claude-code",
745 "--model-id", "claude-sonnet-4-6",
746 "--intent", "add login endpoint"))
747 assert data["agent_id"] == "claude-code"
748 assert data["model_id"] == "claude-sonnet-4-6"
749 assert data["intent"] == "add login endpoint"
750
751
752 # ---------------------------------------------------------------------------
753 # --no-blobs flag [RED]
754 # ---------------------------------------------------------------------------
755
756
757 class TestNoBlobs:
758 def test_no_blobs_empties_blobs_dict(self, tmp_path: pathlib.Path) -> None:
759 repo = _init_repo(tmp_path)
760 oid = _write_obj(repo, b"blob content here")
761 _commit(repo, "init", {"f.py": oid})
762 data = _json_out(_fp(repo, "--json", "--no-blobs"))
763 assert data["blobs"] == {}
764
765 def test_no_blobs_preserves_required_objects(self, tmp_path: pathlib.Path) -> None:
766 """required_objects still lists what the target needs even without inline blobs."""
767 repo = _init_repo(tmp_path)
768 oid = _write_obj(repo, b"blob content here")
769 _commit(repo, "init", {"f.py": oid})
770 data = _json_out(_fp(repo, "--json", "--no-blobs"))
771 assert oid in data["required_objects"]
772
773 def test_no_blobs_in_mpatch_file(self, tmp_path: pathlib.Path) -> None:
774 repo = _init_repo(tmp_path)
775 oid = _write_obj(repo, b"some bytes")
776 _commit(repo, "init", {"f.py": oid})
777 out_dir = tmp_path / "patches"
778 out_dir.mkdir()
779 r = _fp(repo, "--output-dir", str(out_dir), "--no-blobs")
780 assert r.exit_code == 0
781 data = json.loads(list(out_dir.glob("*.mpatch"))[0].read_bytes())
782 assert data["blobs"] == {}
783
784 def test_default_has_blobs(self, tmp_path: pathlib.Path) -> None:
785 """Without --no-blobs, blobs are embedded (existing behavior)."""
786 repo = _init_repo(tmp_path)
787 oid = _write_obj(repo, b"keep me")
788 _commit(repo, "init", {"f.py": oid})
789 data = _json_out(_fp(repo, "--json"))
790 assert len(data["blobs"]) > 0
791
792 def test_no_blobs_output_smaller_than_with_blobs(self, tmp_path: pathlib.Path) -> None:
793 """--no-blobs patch should be smaller (no base64 content)."""
794 repo = _init_repo(tmp_path)
795 content = b"x" * 1024 # 1KB object
796 oid = _write_obj(repo, content)
797 _commit(repo, "init", {"f.py": oid})
798 r_with = _fp(repo, "--json")
799 r_no = _fp(repo, "--json", "--no-blobs")
800 assert len(r_no.output) < len(r_with.output)
801
802
803 # ---------------------------------------------------------------------------
804 # Rename detection via CLI [RED]
805 # ---------------------------------------------------------------------------
806
807
808 class TestRenameDetectionCLI:
809 def test_rename_in_files_renamed(self, tmp_path: pathlib.Path) -> None:
810 repo = _init_repo(tmp_path)
811 oid = _write_obj(repo, b"shared content")
812 c1 = _commit(repo, "c1", {"old.py": oid})
813 _commit(repo, "c2 rename", {"new.py": oid}, parent=c1)
814 data = _json_out(_fp(repo, "--json"))
815 assert "old.py" in data["files_renamed"]
816 assert data["files_renamed"]["old.py"] == "new.py"
817
818 def test_rename_not_in_files_added(self, tmp_path: pathlib.Path) -> None:
819 repo = _init_repo(tmp_path)
820 oid = _write_obj(repo, b"shared content")
821 c1 = _commit(repo, "c1", {"old.py": oid})
822 _commit(repo, "c2 rename", {"new.py": oid}, parent=c1)
823 data = _json_out(_fp(repo, "--json"))
824 assert "new.py" not in data["files_added"]
825
826 def test_rename_not_in_files_deleted(self, tmp_path: pathlib.Path) -> None:
827 repo = _init_repo(tmp_path)
828 oid = _write_obj(repo, b"shared content")
829 c1 = _commit(repo, "c1", {"old.py": oid})
830 _commit(repo, "c2 rename", {"new.py": oid}, parent=c1)
831 data = _json_out(_fp(repo, "--json"))
832 assert "old.py" not in data["files_deleted"]
833
834 def test_genuine_add_and_delete_not_confused_for_rename(self, tmp_path: pathlib.Path) -> None:
835 repo = _init_repo(tmp_path)
836 oid_a = _write_obj(repo, b"content A")
837 oid_b = _write_obj(repo, b"content B")
838 c1 = _commit(repo, "c1", {"a.py": oid_a})
839 _commit(repo, "c2", {"b.py": oid_b}, parent=c1)
840 data = _json_out(_fp(repo, "--json"))
841 assert data["files_renamed"] == {}
842 assert "b.py" in data["files_added"]
843 assert "a.py" in data["files_deleted"]
844
845
846 # ---------------------------------------------------------------------------
847 # Default stdout output [GREEN]
848 # ---------------------------------------------------------------------------
849
850
851 class TestDefaultOutput:
852 def test_default_output_is_valid_json(self, tmp_path: pathlib.Path) -> None:
853 repo = _init_repo(tmp_path)
854 oid = _write_obj(repo, b"x")
855 _commit(repo, "init", {"f.py": oid})
856 r = _fp(repo)
857 assert r.exit_code == 0
858 data = json.loads(r.output.strip())
859 assert "patch_id" in data
860
861 def test_default_output_has_patch_id(self, tmp_path: pathlib.Path) -> None:
862 repo = _init_repo(tmp_path)
863 oid = _write_obj(repo, b"x")
864 _commit(repo, "init", {"f.py": oid})
865 r = _fp(repo)
866 data = json.loads(r.output.strip())
867 assert data["patch_id"].startswith("sha256:")
868
869
870 # ---------------------------------------------------------------------------
871 # ops completeness [GREEN]
872 # ---------------------------------------------------------------------------
873
874
875 class TestOpsCompleteness:
876 def test_ops_count_equals_sum_of_file_lists(self, tmp_path: pathlib.Path) -> None:
877 repo = _init_repo(tmp_path)
878 oid1 = _write_obj(repo, b"a")
879 oid2 = _write_obj(repo, b"b")
880 oid3 = _write_obj(repo, b"c")
881 c1 = _commit(repo, "c1", {"a.py": oid1, "b.py": oid2, "c.py": oid3})
882 oid4 = _write_obj(repo, b"a-modified")
883 _commit(repo, "c2", {"a.py": oid4, "b.py": oid2}, parent=c1)
884 data = _json_out(_fp(repo, "--json"))
885 # c.py deleted, a.py modified, b.py unchanged
886 total_file_changes = (
887 len(data["files_added"])
888 + len(data["files_modified"])
889 + len(data["files_deleted"])
890 + len(data["files_renamed"])
891 )
892 # Each changed file has exactly one op (excluding renames which have one move op)
893 assert len(data["ops"]) == total_file_changes
894
895 def test_each_op_has_required_fields(self, tmp_path: pathlib.Path) -> None:
896 repo = _init_repo(tmp_path)
897 oid_a = _write_obj(repo, b"a")
898 oid_b = _write_obj(repo, b"b")
899 c1 = _commit(repo, "c1", {"a.py": oid_a})
900 oid_c = _write_obj(repo, b"a-mod")
901 _commit(repo, "c2", {"a.py": oid_c, "b.py": oid_b}, parent=c1)
902 data = _json_out(_fp(repo, "--json"))
903 for op in data["ops"]:
904 assert "op" in op
905 assert "address" in op
906 assert "action_label" in op
907
908
909 # ---------------------------------------------------------------------------
910 # Stress [GREEN]
911 # ---------------------------------------------------------------------------
912
913
914 class TestStress:
915 def test_50_files_added(self, tmp_path: pathlib.Path) -> None:
916 repo = _init_repo(tmp_path)
917 manifest = {}
918 for i in range(50):
919 content = f"# file {i}\n".encode() * 10
920 oid = _write_obj(repo, content)
921 manifest[f"src/file_{i:02d}.py"] = oid
922 _commit(repo, "feat: add 50 files", manifest)
923 r = _fp(repo, "--json")
924 assert r.exit_code == 0
925 data = _json_out(r)
926 assert len(data["files_added"]) == 50
927 assert len(data["ops"]) == 50
928
929 def test_mixed_50_file_commit(self, tmp_path: pathlib.Path) -> None:
930 repo = _init_repo(tmp_path)
931 manifest_c1 = {}
932 for i in range(40):
933 oid = _write_obj(repo, f"v1-{i}".encode())
934 manifest_c1[f"f{i:02d}.py"] = oid
935 c1 = _commit(repo, "c1", manifest_c1)
936
937 manifest_c2 = {}
938 # Keep 20, modify 10, delete 10, add 10 new
939 oids = list(manifest_c1.items())
940 for path, oid in oids[:20]:
941 manifest_c2[path] = oid
942 for path, _ in oids[20:30]:
943 manifest_c2[path] = _write_obj(repo, f"v2-{path}".encode())
944 # oids[30:40] deleted
945 for i in range(10):
946 manifest_c2[f"new{i}.py"] = _write_obj(repo, f"new-{i}".encode())
947 _commit(repo, "c2 mixed", manifest_c2, parent=c1)
948
949 r = _fp(repo, "--json")
950 assert r.exit_code == 0
951 data = _json_out(r)
952 assert len(data["files_added"]) == 10
953 assert len(data["files_modified"]) == 10
954 assert len(data["files_deleted"]) == 10
955
956
957 # ---------------------------------------------------------------------------
958 # Security [GREEN]
959 # ---------------------------------------------------------------------------
960
961
962 class TestSecurity:
963 def test_path_traversal_in_treeish_rejected(self, tmp_path: pathlib.Path) -> None:
964 repo = _init_repo(tmp_path)
965 oid = _write_obj(repo, b"x")
966 _commit(repo, "init", {"f.py": oid})
967 r = _fp(repo, "../../etc/passwd", "--json")
968 assert r.exit_code != 0
969
970 def test_ansi_escape_in_treeish_rejected(self, tmp_path: pathlib.Path) -> None:
971 repo = _init_repo(tmp_path)
972 oid = _write_obj(repo, b"x")
973 _commit(repo, "init", {"f.py": oid})
974 r = _fp(repo, "\x1b[31mbad\x1b[0m", "--json")
975 assert r.exit_code != 0
976
977 def test_very_long_treeish_rejected(self, tmp_path: pathlib.Path) -> None:
978 repo = _init_repo(tmp_path)
979 oid = _write_obj(repo, b"x")
980 _commit(repo, "init", {"f.py": oid})
981 r = _fp(repo, "a" * 300, "--json")
982 assert r.exit_code != 0
983
984 def test_null_byte_in_treeish_rejected(self, tmp_path: pathlib.Path) -> None:
985 repo = _init_repo(tmp_path)
986 oid = _write_obj(repo, b"x")
987 _commit(repo, "init", {"f.py": oid})
988 r = _fp(repo, "main\x00malicious", "--json")
989 assert r.exit_code != 0
990
991 def test_error_goes_to_stderr_not_stdout(self, tmp_path: pathlib.Path) -> None:
992 repo = _init_repo(tmp_path)
993 r = _fp(repo, "--json") # empty repo → error
994 assert r.exit_code != 0
995 assert "❌" in r.stderr or "error" in r.stderr.lower() or r.exit_code != 0
996
997 def test_no_traceback_on_bad_ref(self, tmp_path: pathlib.Path) -> None:
998 repo = _init_repo(tmp_path)
999 oid = _write_obj(repo, b"x")
1000 _commit(repo, "init", {"f.py": oid})
1001 r = _fp(repo, "no-such-ref", "--json")
1002 assert "Traceback" not in r.output
1003 assert "Traceback" not in r.stderr
1004
1005
1006 # ---------------------------------------------------------------------------
1007 # Performance [GREEN]
1008 # ---------------------------------------------------------------------------
1009
1010
1011 class TestPerformance:
1012 def test_duration_ms_under_two_seconds(self, tmp_path: pathlib.Path) -> None:
1013 repo = _init_repo(tmp_path)
1014 manifest = {}
1015 for i in range(20):
1016 oid = _write_obj(repo, f"content {i}".encode() * 50)
1017 manifest[f"file_{i}.py"] = oid
1018 _commit(repo, "feat: 20 files", manifest)
1019 data = _json_out(_fp(repo, "--json"))
1020 assert data["duration_ms"] < 2000.0
1021
1022 def test_duration_ms_non_negative(self, tmp_path: pathlib.Path) -> None:
1023 repo = _init_repo(tmp_path)
1024 oid = _write_obj(repo, b"x")
1025 _commit(repo, "init", {"f.py": oid})
1026 data = _json_out(_fp(repo, "--json"))
1027 assert data["duration_ms"] >= 0.0