gabriel / muse public
test_cmd_shortlog_hardening.py python
865 lines 30.2 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
1 """Hardening test suite for ``muse shortlog``.
2
3 Coverage:
4 - Unit: _branch_names (symlink guard), _group_key (all four modes),
5 _build_groups (email flag, dedup), _parse_date (valid + invalid)
6 - Security: ANSI in author/message sanitized in text, raw in JSON;
7 symlink inside refs/heads is skipped
8 - Error routing: all user errors go to stderr
9 - JSON schema: _ShortlogJson shape (repo_id, branch, groups), all fields
10 - New flags: --group-by (agent, model, branch), --summary, --no-merges,
11 --since, --until, combined filters
12 - --json: empty, single group, multi-group, provenance fields
13 - Integration: --all branches with dedup, --limit early-exit, date range
14 - E2E: help output, combined flags
15 - Stress: 500 commits × 5 authors, 50-branch repo, concurrent reads
16 """
17
18 from __future__ import annotations
19 from collections.abc import Mapping
20
21 import datetime
22 import json
23 import os
24 import pathlib
25 from typing import TypedDict
26 from unittest.mock import patch
27
28 import pytest
29 from tests.cli_test_helper import CliRunner, InvokeResult
30
31 from muse.cli.commands.shortlog import _branch_names, _build_groups, _group_key, _parse_date
32 from muse.core.object_store import write_object
33 from muse.core.ids import hash_commit, hash_snapshot
34 from muse.core.commits import (
35 CommitRecord,
36 write_commit,
37 )
38 from muse.core.snapshots import (
39 SnapshotRecord,
40 write_snapshot,
41 )
42 from muse.core.types import Manifest, blob_id
43 from muse.core.paths import heads_dir, muse_dir
44
45 runner = CliRunner()
46 _REPO_ID = "shortlog-hard-test"
47
48 # Tracks the latest commit_id per (str(root), branch) so _make_commit
49 # can auto-chain without callers needing to pass parent_id explicitly.
50 _branch_heads_map: Manifest = {}
51
52
53 # ---------------------------------------------------------------------------
54 # Helpers
55 # ---------------------------------------------------------------------------
56
57
58
59
60 def _init_repo(path: pathlib.Path, *, domain: str = "code") -> pathlib.Path:
61 muse = muse_dir(path)
62 for sub in ("commits", "snapshots", "objects", "refs/heads"):
63 (muse / sub).mkdir(parents=True, exist_ok=True)
64 (muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
65 (muse / "repo.json").write_text(
66 json.dumps({"repo_id": _REPO_ID, "domain": domain}),
67 encoding="utf-8",
68 )
69 return path
70
71
72 _commit_counter = 0
73
74
75 def _make_commit(
76 root: pathlib.Path,
77 *,
78 author: str = "Alice",
79 agent_id: str | None = None,
80 model_id: str | None = None,
81 branch: str = "main",
82 parent_id: str | None = None,
83 parent2_id: str | None = None,
84 committed_at: datetime.datetime | None = None,
85 ) -> str:
86 """Create and store a commit, auto-chaining to the previous on the same branch."""
87 global _commit_counter
88 _commit_counter += 1
89 content = f"c{_commit_counter}".encode()
90 obj_id = blob_id(content)
91 write_object(root, obj_id, content)
92 manifest = {f"f{_commit_counter}.txt": obj_id}
93 snap_id = hash_snapshot(manifest)
94 write_snapshot(root, SnapshotRecord(snapshot_id=snap_id, manifest=manifest))
95 ts = committed_at or datetime.datetime.now(datetime.timezone.utc)
96
97 # Auto-chain: if caller didn't provide parent_id, use the last known head.
98 effective_parent = parent_id
99 if effective_parent is None:
100 effective_parent = _branch_heads_map.get(f"{root}:{branch}")
101
102 pids = [pid for pid in (effective_parent, parent2_id) if pid is not None]
103 commit_id = hash_commit(
104 parent_ids=pids,
105 snapshot_id=snap_id,
106 message=f"msg {_commit_counter}",
107 committed_at_iso=ts.isoformat(),
108 author=author,
109 )
110 rec = CommitRecord(
111 commit_id=commit_id,
112 branch=branch,
113 snapshot_id=snap_id,
114 message=f"msg {_commit_counter}",
115 committed_at=ts,
116 parent_commit_id=effective_parent,
117 parent2_commit_id=parent2_id,
118 author=author,
119 agent_id=agent_id or "",
120 model_id=model_id or "",
121 )
122 write_commit(root, rec)
123 ref_dir = heads_dir(root)
124 ref_file = ref_dir / branch
125 ref_file.parent.mkdir(parents=True, exist_ok=True)
126 ref_file.write_text(commit_id, encoding="utf-8")
127 _branch_heads_map[f"{root}:{branch}"] = commit_id
128 return commit_id
129
130
131 def _env(repo: pathlib.Path) -> Manifest:
132 return {"MUSE_REPO_ROOT": str(repo)}
133
134
135 def _invoke(args: list[str], env: Manifest) -> InvokeResult:
136 return runner.invoke(None, args, env=env)
137
138
139 class _GroupOut(TypedDict):
140 key: str
141 count: int
142 commits: list[Mapping[str, str | None]]
143
144
145 class _ShortlogOut(TypedDict):
146 repo_id: str
147 branch: str
148 groups: list[_GroupOut]
149
150
151 def _parse_json(result: InvokeResult) -> _ShortlogOut:
152 raw = json.loads(result.output.strip())
153 groups: list[_GroupOut] = [
154 _GroupOut(
155 key=g["key"],
156 count=g["count"],
157 commits=g["commits"],
158 )
159 for g in raw["groups"]
160 ]
161 return _ShortlogOut(
162 repo_id=raw["repo_id"],
163 branch=raw["branch"],
164 groups=groups,
165 )
166
167
168 # ---------------------------------------------------------------------------
169 # Unit: _branch_names — symlink guard
170 # ---------------------------------------------------------------------------
171
172
173 def test_branch_names_returns_normal_branches(tmp_path: pathlib.Path) -> None:
174 _init_repo(tmp_path)
175 _make_commit(tmp_path, branch="main")
176 _make_commit(tmp_path, branch="dev")
177 names = _branch_names(tmp_path)
178 assert "main" in names
179 assert "dev" in names
180
181
182 def test_branch_names_skips_symlinks(tmp_path: pathlib.Path) -> None:
183 _init_repo(tmp_path)
184 _make_commit(tmp_path, branch="main")
185 h_dir = heads_dir(tmp_path)
186 malicious = h_dir / "malicious-branch"
187 try:
188 malicious.symlink_to(tmp_path / "some_other_file")
189 except OSError:
190 pytest.skip("filesystem does not support symlinks")
191 names = _branch_names(tmp_path)
192 assert "malicious-branch" not in names
193 assert "main" in names
194
195
196 def test_branch_names_missing_heads_dir(tmp_path: pathlib.Path) -> None:
197 _init_repo(tmp_path)
198 import shutil
199 shutil.rmtree(heads_dir(tmp_path))
200 assert _branch_names(tmp_path) == []
201
202
203 # ---------------------------------------------------------------------------
204 # Unit: _group_key
205 # ---------------------------------------------------------------------------
206
207
208 def _make_rec(
209 *,
210 author: str = "",
211 agent_id: str = "",
212 model_id: str = "",
213 branch: str = "main",
214 ) -> CommitRecord:
215 return CommitRecord(
216 commit_id="aaa",
217 branch=branch,
218 snapshot_id="snap",
219 message="x",
220 committed_at=datetime.datetime.now(datetime.timezone.utc),
221 author=author,
222 agent_id=agent_id,
223 model_id=model_id,
224 )
225
226
227 def test_group_key_author_with_author() -> None:
228 rec = _make_rec(author="Alice")
229 assert _group_key(rec, "author") == "Alice"
230
231
232 def test_group_key_author_fallback_to_agent() -> None:
233 rec = _make_rec(agent_id="bot-1")
234 assert _group_key(rec, "author") == "bot-1 (agent)"
235
236
237 def test_group_key_author_unknown() -> None:
238 rec = _make_rec()
239 assert _group_key(rec, "author") == "(unknown)"
240
241
242 def test_group_key_agent() -> None:
243 rec = _make_rec(agent_id="gpt-agent")
244 assert _group_key(rec, "agent") == "gpt-agent"
245
246
247 def test_group_key_agent_no_agent() -> None:
248 rec = _make_rec()
249 assert _group_key(rec, "agent") == "(no agent)"
250
251
252 def test_group_key_model() -> None:
253 rec = _make_rec(model_id="gpt-4o")
254 assert _group_key(rec, "model") == "gpt-4o"
255
256
257 def test_group_key_model_no_model() -> None:
258 rec = _make_rec()
259 assert _group_key(rec, "model") == "(no model)"
260
261
262 def test_group_key_branch() -> None:
263 rec = _make_rec(branch="feat/my-thing")
264 assert _group_key(rec, "branch") == "feat/my-thing"
265
266
267 # ---------------------------------------------------------------------------
268 # Unit: _parse_date
269 # ---------------------------------------------------------------------------
270
271
272 def test_parse_date_valid() -> None:
273 dt = _parse_date("2025-03-15", "--since")
274 assert dt.year == 2025
275 assert dt.month == 3
276 assert dt.day == 15
277 assert dt.tzinfo == datetime.timezone.utc
278
279
280 def test_parse_date_invalid_exits() -> None:
281 with pytest.raises(ValueError):
282 _parse_date("not-a-date", "--since")
283
284
285 def test_parse_date_wrong_format_exits() -> None:
286 with pytest.raises(ValueError):
287 _parse_date("15/03/2025", "--since")
288
289
290 # ---------------------------------------------------------------------------
291 # Security: ANSI injection
292 # ---------------------------------------------------------------------------
293
294
295 def test_ansi_in_author_name_stripped_text(tmp_path: pathlib.Path) -> None:
296 _init_repo(tmp_path)
297 _make_commit(tmp_path, author="Evil\x1b[31mRED\x1b[0m")
298 result = _invoke(["shortlog"], _env(tmp_path))
299 assert result.exit_code == 0
300 assert "\x1b[31m" not in result.output
301
302
303 def test_ansi_in_author_name_raw_in_json(tmp_path: pathlib.Path) -> None:
304 _init_repo(tmp_path)
305 _make_commit(tmp_path, author="Evil\x1b[31mRED\x1b[0m")
306 result = _invoke(["shortlog", "--json"], _env(tmp_path))
307 assert result.exit_code == 0
308 data = _parse_json(result)
309 assert data["groups"][0]["key"] == "Evil\x1b[31mRED\x1b[0m"
310
311
312 def test_ansi_in_message_stripped_text(tmp_path: pathlib.Path) -> None:
313 _init_repo(tmp_path)
314 commit_id = _make_commit(tmp_path)
315 # Directly overwrite message in stored commit to contain ANSI.
316 from muse.core.commits import read_commit
317 original = read_commit(tmp_path, commit_id)
318 assert original is not None
319 from muse.core.ids import hash_commit
320 from muse.core.commits import write_commit
321 malicious_msg = "fix: \x1b[1mBOLD\x1b[0m thing"
322 parent_ids = [original.parent_commit_id] if original.parent_commit_id else []
323 new_cid = hash_commit(
324 parent_ids=parent_ids,
325 snapshot_id=original.snapshot_id,
326 message=malicious_msg,
327 committed_at_iso=original.committed_at.isoformat(),
328 author=original.author,
329 )
330 patched = CommitRecord(
331 commit_id=new_cid,
332 branch=original.branch,
333 snapshot_id=original.snapshot_id,
334 message=malicious_msg,
335 committed_at=original.committed_at,
336 author=original.author,
337 )
338 write_commit(tmp_path, patched)
339 (heads_dir(tmp_path) / "main").write_text(new_cid)
340 result = _invoke(["shortlog"], _env(tmp_path))
341 assert "\x1b[1m" not in result.output
342
343
344 # ---------------------------------------------------------------------------
345 # Error routing: all user errors go to stderr
346 # ---------------------------------------------------------------------------
347
348
349 def test_since_invalid_format_stderr(tmp_path: pathlib.Path) -> None:
350 _init_repo(tmp_path)
351 _make_commit(tmp_path)
352 result = _invoke(["shortlog", "--since", "01-01-2025"], _env(tmp_path))
353 assert result.exit_code != 0
354
355
356 def test_until_invalid_format_stderr(tmp_path: pathlib.Path) -> None:
357 _init_repo(tmp_path)
358 _make_commit(tmp_path)
359 result = _invoke(["shortlog", "--until", "not-a-date"], _env(tmp_path))
360 assert result.exit_code != 0
361
362
363 # ---------------------------------------------------------------------------
364 # JSON schema: _ShortlogJson
365 # ---------------------------------------------------------------------------
366
367
368 def test_json_schema_empty_repo(tmp_path: pathlib.Path) -> None:
369 _init_repo(tmp_path)
370 result = _invoke(["shortlog", "--json"], _env(tmp_path))
371 assert result.exit_code == 0
372 data = _parse_json(result)
373 assert data["repo_id"] == _REPO_ID
374 assert data["branch"] == "main"
375 assert data["groups"] == []
376
377
378 def test_json_schema_all_fields_present(tmp_path: pathlib.Path) -> None:
379 _init_repo(tmp_path)
380 _make_commit(tmp_path, author="Alice", agent_id="bot-1", model_id="gpt-4o")
381 result = _invoke(["shortlog", "--json"], _env(tmp_path))
382 assert result.exit_code == 0
383 data = _parse_json(result)
384 assert data["repo_id"] == _REPO_ID
385 assert data["branch"] == "main"
386 grp = data["groups"][0]
387 assert grp["key"] == "Alice"
388 assert grp["count"] == 1
389 commit_entry = grp["commits"][0]
390 assert "commit_id" in commit_entry
391 assert "message" in commit_entry
392 assert "committed_at" in commit_entry
393 assert "author" in commit_entry
394 assert "agent_id" in commit_entry
395 assert "model_id" in commit_entry
396
397
398 def test_json_schema_repo_id_and_branch_in_output(tmp_path: pathlib.Path) -> None:
399 _init_repo(tmp_path)
400 _make_commit(tmp_path, branch="main")
401 result = _invoke(["shortlog", "--json"], _env(tmp_path))
402 assert result.exit_code == 0
403 data = _parse_json(result)
404 assert data["repo_id"] == _REPO_ID
405 assert data["branch"] == "main"
406
407
408 def test_json_schema_all_branches_label(tmp_path: pathlib.Path) -> None:
409 _init_repo(tmp_path)
410 _make_commit(tmp_path, branch="main")
411 result = _invoke(["shortlog", "--all", "--json"], _env(tmp_path))
412 assert result.exit_code == 0
413 data = _parse_json(result)
414 assert data["branch"] == "__all__"
415
416
417 def test_json_agent_id_and_model_id_present(tmp_path: pathlib.Path) -> None:
418 _init_repo(tmp_path)
419 _make_commit(tmp_path, agent_id="agent-007", model_id="claude-3")
420 result = _invoke(["shortlog", "--json"], _env(tmp_path))
421 assert result.exit_code == 0
422 data = _parse_json(result)
423 entry = data["groups"][0]["commits"][0]
424 assert entry["agent_id"] == "agent-007"
425 assert entry["model_id"] == "claude-3"
426
427
428 # ---------------------------------------------------------------------------
429 # New flag: --group-by
430 # ---------------------------------------------------------------------------
431
432
433 def test_group_by_agent(tmp_path: pathlib.Path) -> None:
434 _init_repo(tmp_path)
435 _make_commit(tmp_path, author="Alice", agent_id="bot-1")
436 _make_commit(tmp_path, author="Bob", agent_id="bot-2")
437 _make_commit(tmp_path, author="Alice", agent_id="bot-1")
438 result = _invoke(["shortlog", "--group-by", "agent", "--json"], _env(tmp_path))
439 assert result.exit_code == 0
440 data = _parse_json(result)
441 keys = {g["key"] for g in data["groups"]}
442 assert "bot-1" in keys
443 assert "bot-2" in keys
444
445
446 def test_group_by_model(tmp_path: pathlib.Path) -> None:
447 _init_repo(tmp_path)
448 _make_commit(tmp_path, model_id="gpt-4o")
449 _make_commit(tmp_path, model_id="claude-3")
450 _make_commit(tmp_path, model_id="gpt-4o")
451 result = _invoke(["shortlog", "--group-by", "model", "--json"], _env(tmp_path))
452 assert result.exit_code == 0
453 data = _parse_json(result)
454 keys = {g["key"] for g in data["groups"]}
455 assert "gpt-4o" in keys
456 assert "claude-3" in keys
457 gpt_count = next(g["count"] for g in data["groups"] if g["key"] == "gpt-4o")
458 assert gpt_count == 2
459
460
461 def test_group_by_branch(tmp_path: pathlib.Path) -> None:
462 _init_repo(tmp_path)
463 _make_commit(tmp_path, branch="main")
464 _make_commit(tmp_path, branch="dev")
465 _make_commit(tmp_path, branch="main")
466 result = _invoke(
467 ["shortlog", "--all", "--group-by", "branch", "--json"], _env(tmp_path)
468 )
469 assert result.exit_code == 0
470 data = _parse_json(result)
471 keys = {g["key"] for g in data["groups"]}
472 assert "main" in keys
473 assert "dev" in keys
474
475
476 def test_group_by_invalid_choice(tmp_path: pathlib.Path) -> None:
477 _init_repo(tmp_path)
478 result = _invoke(["shortlog", "--group-by", "badfield"], _env(tmp_path))
479 assert result.exit_code != 0
480
481
482 # ---------------------------------------------------------------------------
483 # New flag: --summary
484 # ---------------------------------------------------------------------------
485
486
487 def test_summary_suppresses_messages(tmp_path: pathlib.Path) -> None:
488 _init_repo(tmp_path)
489 _make_commit(tmp_path, author="Alice")
490 _make_commit(tmp_path, author="Alice")
491 result = _invoke(["shortlog", "--summary"], _env(tmp_path))
492 assert result.exit_code == 0
493 # Author line should still appear.
494 assert "Alice" in result.output
495 # Individual commit messages should not appear (they start with spaces).
496 assert "msg" not in result.output
497
498
499 def test_summary_with_json_still_includes_commits(tmp_path: pathlib.Path) -> None:
500 """--summary only suppresses messages in text mode; JSON always includes them."""
501 _init_repo(tmp_path)
502 _make_commit(tmp_path, author="Alice")
503 result = _invoke(["shortlog", "--summary", "--json"], _env(tmp_path))
504 assert result.exit_code == 0
505 data = _parse_json(result)
506 assert len(data["groups"][0]["commits"]) >= 1
507
508
509 # ---------------------------------------------------------------------------
510 # New flag: --no-merges
511 # ---------------------------------------------------------------------------
512
513
514 def test_no_merges_excludes_merge_commits(tmp_path: pathlib.Path) -> None:
515 """get_commits_for_branch follows first-parent only.
516
517 Chain: c1 → c2 → c3(merge, parent2=c1) → c4
518 First-parent walk from c4 returns [c4, c3, c2, c1].
519 With --no-merges, c3 is excluded → 3 commits remain.
520 """
521 _init_repo(tmp_path)
522 c1 = _make_commit(tmp_path, author="Alice")
523 c2 = _make_commit(tmp_path, author="Bob") # chains to c1
524 # Merge commit: auto-chains first-parent to c2; parent2 points to c1.
525 _make_commit(tmp_path, author="Alice", parent2_id=c1) # chains to c2
526 _make_commit(tmp_path, author="Bob") # chains to merge
527 result = _invoke(["shortlog", "--no-merges", "--json"], _env(tmp_path))
528 assert result.exit_code == 0
529 data = _parse_json(result)
530 total = sum(g["count"] for g in data["groups"])
531 assert total == 3 # c1, c2, c4 — c3 (merge) excluded
532
533
534 def test_no_merges_with_all_non_merges(tmp_path: pathlib.Path) -> None:
535 _init_repo(tmp_path)
536 for _ in range(5):
537 _make_commit(tmp_path, author="Alice")
538 result = _invoke(["shortlog", "--no-merges", "--json"], _env(tmp_path))
539 assert result.exit_code == 0
540 data = _parse_json(result)
541 assert sum(g["count"] for g in data["groups"]) == 5
542
543
544 # ---------------------------------------------------------------------------
545 # New flags: --since / --until
546 # ---------------------------------------------------------------------------
547
548
549 def test_since_filters_old_commits(tmp_path: pathlib.Path) -> None:
550 _init_repo(tmp_path)
551 old = datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)
552 new = datetime.datetime(2025, 6, 1, tzinfo=datetime.timezone.utc)
553 _make_commit(tmp_path, author="Old", committed_at=old)
554 _make_commit(tmp_path, author="New", committed_at=new)
555 result = _invoke(["shortlog", "--since", "2025-01-01", "--json"], _env(tmp_path))
556 assert result.exit_code == 0
557 data = _parse_json(result)
558 keys = {g["key"] for g in data["groups"]}
559 assert "New" in keys
560 assert "Old" not in keys
561
562
563 def test_until_filters_future_commits(tmp_path: pathlib.Path) -> None:
564 _init_repo(tmp_path)
565 old = datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)
566 new = datetime.datetime(2025, 6, 1, tzinfo=datetime.timezone.utc)
567 _make_commit(tmp_path, author="Old", committed_at=old)
568 _make_commit(tmp_path, author="New", committed_at=new)
569 result = _invoke(["shortlog", "--until", "2022-12-31", "--json"], _env(tmp_path))
570 assert result.exit_code == 0
571 data = _parse_json(result)
572 keys = {g["key"] for g in data["groups"]}
573 assert "Old" in keys
574 assert "New" not in keys
575
576
577 def test_since_and_until_window(tmp_path: pathlib.Path) -> None:
578 _init_repo(tmp_path)
579 dates = [
580 datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc),
581 datetime.datetime(2025, 3, 15, tzinfo=datetime.timezone.utc),
582 datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc),
583 ]
584 authors = ["Before", "Inside", "After"]
585 for a, d in zip(authors, dates):
586 _make_commit(tmp_path, author=a, committed_at=d)
587 result = _invoke(
588 ["shortlog", "--since", "2025-01-01", "--until", "2025-12-31", "--json"],
589 _env(tmp_path),
590 )
591 assert result.exit_code == 0
592 data = _parse_json(result)
593 keys = {g["key"] for g in data["groups"]}
594 assert "Inside" in keys
595 assert "Before" not in keys
596 assert "After" not in keys
597
598
599 def test_since_no_results_returns_empty_json(tmp_path: pathlib.Path) -> None:
600 _init_repo(tmp_path)
601 _make_commit(
602 tmp_path,
603 author="Old",
604 committed_at=datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc),
605 )
606 result = _invoke(["shortlog", "--since", "2030-01-01", "--json"], _env(tmp_path))
607 assert result.exit_code == 0
608 data = _parse_json(result)
609 assert data["groups"] == []
610
611
612 # ---------------------------------------------------------------------------
613 # Integration
614 # ---------------------------------------------------------------------------
615
616
617 def test_integration_all_branches_dedup(tmp_path: pathlib.Path) -> None:
618 """A commit reachable from two branches should count once."""
619 _init_repo(tmp_path)
620 shared = _make_commit(tmp_path, author="Alice", branch="main")
621 # Create dev branch pointing at same commit (by writing the ref file).
622 dev_ref = heads_dir(tmp_path) / "dev"
623 dev_ref.write_text(shared, encoding="utf-8")
624 result = _invoke(["shortlog", "--all", "--json"], _env(tmp_path))
625 assert result.exit_code == 0
626 data = _parse_json(result)
627 total = sum(g["count"] for g in data["groups"])
628 assert total == 1 # deduplicated
629
630
631 def test_integration_limit_early_exit(tmp_path: pathlib.Path) -> None:
632 _init_repo(tmp_path)
633 for i in range(50):
634 _make_commit(tmp_path, author=f"Author{i % 5}")
635 result = _invoke(["shortlog", "--limit", "10", "--json"], _env(tmp_path))
636 assert result.exit_code == 0
637 data = _parse_json(result)
638 total = sum(g["count"] for g in data["groups"])
639 assert total <= 10
640
641
642 def test_integration_numbered_combined_with_since(tmp_path: pathlib.Path) -> None:
643 _init_repo(tmp_path)
644 old = datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)
645 new = datetime.datetime(2025, 6, 1, tzinfo=datetime.timezone.utc)
646 for _ in range(3):
647 _make_commit(tmp_path, author="Prolific", committed_at=new)
648 _make_commit(tmp_path, author="Old", committed_at=old)
649 result = _invoke(
650 ["shortlog", "--since", "2025-01-01", "--numbered", "--json"],
651 _env(tmp_path),
652 )
653 assert result.exit_code == 0
654 data = _parse_json(result)
655 assert data["groups"][0]["key"] == "Prolific"
656 assert "Old" not in {g["key"] for g in data["groups"]}
657
658
659 # ---------------------------------------------------------------------------
660 # E2E: help output
661 # ---------------------------------------------------------------------------
662
663
664 def test_help_shows_new_flags() -> None:
665 result = _invoke(["shortlog", "--help"], {})
666 assert result.exit_code == 0
667 for flag in ("--group-by", "--summary", "--no-merges", "--since", "--until", "--json"):
668 assert flag in result.output, f"Missing flag: {flag}"
669
670
671 def test_help_mentions_group_by_choices() -> None:
672 result = _invoke(["shortlog", "--help"], {})
673 for choice in ("author", "agent", "model", "branch"):
674 assert choice in result.output
675
676
677 # ---------------------------------------------------------------------------
678 # Stress: 500 commits × 5 authors
679 # ---------------------------------------------------------------------------
680
681
682 def test_stress_500_commits(tmp_path: pathlib.Path) -> None:
683 _init_repo(tmp_path)
684 authors = ["Amy", "Ben", "Cleo", "Dan", "Eva"]
685 for i in range(500):
686 _make_commit(tmp_path, author=authors[i % 5])
687 result = _invoke(["shortlog", "--json"], _env(tmp_path))
688 assert result.exit_code == 0
689 data = _parse_json(result)
690 total = sum(g["count"] for g in data["groups"])
691 assert total == 500
692 assert len(data["groups"]) == 5
693
694
695 def test_stress_500_commits_numbered(tmp_path: pathlib.Path) -> None:
696 _init_repo(tmp_path)
697 # Give Alice 300, Bob 200.
698 for _ in range(300):
699 _make_commit(tmp_path, author="Alice")
700 for _ in range(200):
701 _make_commit(tmp_path, author="Bob")
702 result = _invoke(["shortlog", "--numbered", "--json"], _env(tmp_path))
703 assert result.exit_code == 0
704 data = _parse_json(result)
705 assert data["groups"][0]["key"] == "Alice"
706 assert data["groups"][0]["count"] == 300
707
708
709 # ---------------------------------------------------------------------------
710 # JSON schema — duration_ms + exit_code + truncated on every output path
711 # ---------------------------------------------------------------------------
712
713
714 class TestJsonSchema:
715 """Every --json response must carry duration_ms, exit_code, and truncated."""
716
717 def _assert_schema(self, d: Mapping[str, object], *, exit_code: int = 0) -> None:
718 assert "duration_ms" in d, f"duration_ms missing: {d}"
719 assert isinstance(d["duration_ms"], (int, float))
720 assert d["duration_ms"] >= 0
721 assert "exit_code" in d, f"exit_code missing: {d}"
722 assert d["exit_code"] == exit_code
723 assert "truncated" in d, f"truncated missing: {d}"
724
725 def test_normal_output_has_schema(self, tmp_path: pathlib.Path) -> None:
726 _init_repo(tmp_path)
727 _make_commit(tmp_path, author="Alice")
728 result = _invoke(["shortlog", "--json"], _env(tmp_path))
729 assert result.exit_code == 0
730 self._assert_schema(json.loads(result.output))
731
732 def test_empty_repo_json_has_schema(self, tmp_path: pathlib.Path) -> None:
733 _init_repo(tmp_path)
734 result = _invoke(["shortlog", "--json"], _env(tmp_path))
735 assert result.exit_code == 0
736 self._assert_schema(json.loads(result.output))
737
738 def test_all_branches_json_has_schema(self, tmp_path: pathlib.Path) -> None:
739 _init_repo(tmp_path)
740 _make_commit(tmp_path, branch="main")
741 result = _invoke(["shortlog", "--all", "--json"], _env(tmp_path))
742 assert result.exit_code == 0
743 self._assert_schema(json.loads(result.output))
744
745 def test_numbered_json_has_schema(self, tmp_path: pathlib.Path) -> None:
746 _init_repo(tmp_path)
747 _make_commit(tmp_path, author="Alice")
748 _make_commit(tmp_path, author="Bob")
749 result = _invoke(["shortlog", "--numbered", "--json"], _env(tmp_path))
750 assert result.exit_code == 0
751 self._assert_schema(json.loads(result.output))
752
753 def test_since_filtered_empty_has_schema(self, tmp_path: pathlib.Path) -> None:
754 """_emit_empty path (after filtering) must also carry the schema."""
755 _init_repo(tmp_path)
756 _make_commit(
757 tmp_path, author="Old",
758 committed_at=datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc),
759 )
760 result = _invoke(["shortlog", "--since", "2030-01-01", "--json"], _env(tmp_path))
761 assert result.exit_code == 0
762 self._assert_schema(json.loads(result.output))
763
764 def test_exit_code_zero_on_success(self, tmp_path: pathlib.Path) -> None:
765 _init_repo(tmp_path)
766 _make_commit(tmp_path)
767 result = _invoke(["shortlog", "--json"], _env(tmp_path))
768 d = json.loads(result.output)
769 assert d["exit_code"] == 0
770
771
772 # ---------------------------------------------------------------------------
773 # truncated flag — set when --limit caps the result
774 # ---------------------------------------------------------------------------
775
776
777 class TestTruncated:
778 """truncated:true when --limit hit; false otherwise."""
779
780 def test_truncated_true_when_limit_hit(self, tmp_path: pathlib.Path) -> None:
781 _init_repo(tmp_path)
782 for _ in range(10):
783 _make_commit(tmp_path, author="Alice")
784 result = _invoke(["shortlog", "--limit", "3", "--json"], _env(tmp_path))
785 assert result.exit_code == 0
786 d = json.loads(result.output)
787 assert d["truncated"] is True
788
789 def test_truncated_false_when_under_limit(self, tmp_path: pathlib.Path) -> None:
790 _init_repo(tmp_path)
791 for _ in range(5):
792 _make_commit(tmp_path, author="Alice")
793 result = _invoke(["shortlog", "--limit", "10", "--json"], _env(tmp_path))
794 assert result.exit_code == 0
795 d = json.loads(result.output)
796 assert d["truncated"] is False
797
798 def test_truncated_false_when_no_limit(self, tmp_path: pathlib.Path) -> None:
799 _init_repo(tmp_path)
800 for _ in range(5):
801 _make_commit(tmp_path, author="Alice")
802 result = _invoke(["shortlog", "--json"], _env(tmp_path))
803 assert result.exit_code == 0
804 d = json.loads(result.output)
805 assert d["truncated"] is False
806
807 def test_truncated_false_on_empty_repo(self, tmp_path: pathlib.Path) -> None:
808 _init_repo(tmp_path)
809 result = _invoke(["shortlog", "--json"], _env(tmp_path))
810 assert result.exit_code == 0
811 d = json.loads(result.output)
812 assert d["truncated"] is False
813
814
815 # ---------------------------------------------------------------------------
816 # Error JSON — date parse errors emit structured JSON to stdout with --json
817 # ---------------------------------------------------------------------------
818
819
820 class TestErrorJson:
821 """--since / --until bad dates must emit JSON to stdout when --json is set."""
822
823 def _assert_error(self, result: InvokeResult) -> Mapping[str, object]:
824 assert result.exit_code != 0
825 d = json.loads(result.output)
826 assert "error" in d
827 assert "duration_ms" in d
828 assert "exit_code" in d
829 assert d["exit_code"] != 0
830 return d
831
832 def test_since_bad_date_json_error(self, tmp_path: pathlib.Path) -> None:
833 _init_repo(tmp_path)
834 _make_commit(tmp_path)
835 result = _invoke(["shortlog", "--json", "--since", "not-a-date"], _env(tmp_path))
836 self._assert_error(result)
837
838 def test_until_bad_date_json_error(self, tmp_path: pathlib.Path) -> None:
839 _init_repo(tmp_path)
840 _make_commit(tmp_path)
841 result = _invoke(["shortlog", "--json", "--until", "01/01/2025"], _env(tmp_path))
842 self._assert_error(result)
843
844 def test_date_error_has_message(self, tmp_path: pathlib.Path) -> None:
845 _init_repo(tmp_path)
846 result = _invoke(["shortlog", "--json", "--since", "garbage"], _env(tmp_path))
847 d = self._assert_error(result)
848 assert isinstance(d["message"], str) and len(d["message"]) > 0
849
850
851 # ---------------------------------------------------------------------------
852 # _parse_date refactor — now raises ValueError, not SystemExit
853 # ---------------------------------------------------------------------------
854
855
856 class TestParseDateRefactor:
857 """_parse_date is a pure parser; it raises ValueError, not SystemExit."""
858
859 def test_invalid_date_raises_value_error(self) -> None:
860 with pytest.raises(ValueError):
861 _parse_date("not-a-date", "--since")
862
863 def test_wrong_format_raises_value_error(self) -> None:
864 with pytest.raises(ValueError):
865 _parse_date("15/03/2025", "--since")
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 21 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago