gabriel / muse public
test_cmd_read_commit.py python
485 lines 19.2 KB
Raw
sha256:ff478cfdcdd4b7fd6de89cb68896601a981f945634463275ec333bd20ca36402 Merge branch 'dev' into main Human 20 days ago
1 """Comprehensive tests for ``muse read-commit``.
2
3 Coverage tiers
4 --------------
5 - Unit: _ALL_FIELDS completeness
6 - Integration: JSON/text format, prefix resolution, --fields filter, parent chain
7 - Security: ANSI in branch/author/message stripped in text mode
8 - Stress: 200 sequential reads, --fields on large schema
9 """
10 from __future__ import annotations
11
12 import datetime
13 import json
14 import pathlib
15
16 from muse.core.errors import ExitCode
17 from muse.core.ids import hash_commit, hash_snapshot
18 from muse.core.commits import (
19 CommitRecord,
20 write_commit,
21 )
22 from muse.core.snapshots import (
23 SnapshotRecord,
24 write_snapshot,
25 )
26 from tests.cli_test_helper import CliRunner, InvokeResult
27 from muse.core.types import fake_id, long_id, short_id
28 from muse.core.paths import heads_dir, muse_dir
29
30 runner = CliRunner()
31
32 # Module-level constants so every test uses the same deterministic inputs.
33 _SNAP_ID: str = hash_snapshot({})
34 _COMMITTED_AT: datetime.datetime = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
35
36
37 # ---------------------------------------------------------------------------
38 # Helpers
39 # ---------------------------------------------------------------------------
40
41 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
42 repo = tmp_path / "repo"
43 muse = muse_dir(repo)
44 for sub in ("objects", "commits", "snapshots", "refs/heads"):
45 (muse / sub).mkdir(parents=True)
46 (muse / "HEAD").write_text("ref: refs/heads/main")
47 (muse / "repo.json").write_text(json.dumps({"repo_id": "test-repo", "domain": "code"}))
48 return repo
49
50
51 def _commit(
52 repo: pathlib.Path,
53 *,
54 branch: str = "main",
55 message: str = "test commit",
56 author: str = "tester",
57 parent: str | None = None,
58 agent_id: str = "",
59 model_id: str = "",
60 ) -> str:
61 """Write a commit with a real content-addressed ID; return the commit_id."""
62 parent_ids: list[str] = [parent] if parent else []
63 commit_id = hash_commit(
64 parent_ids=parent_ids,
65 snapshot_id=_SNAP_ID,
66 message=message,
67 committed_at_iso=_COMMITTED_AT.isoformat(),
68 author=author,
69 )
70 write_snapshot(repo, SnapshotRecord(
71 snapshot_id=_SNAP_ID,
72 manifest={},
73 created_at=_COMMITTED_AT,
74 ))
75 rec = CommitRecord(
76 commit_id=commit_id,
77 branch=branch,
78 snapshot_id=_SNAP_ID,
79 message=message,
80 committed_at=_COMMITTED_AT,
81 author=author,
82 parent_commit_id=parent,
83 agent_id=agent_id,
84 model_id=model_id,
85 )
86 write_commit(repo, rec)
87 return commit_id
88
89
90 def _rc(repo: pathlib.Path, *args: str) -> InvokeResult:
91 from muse.cli.app import main as cli
92 return runner.invoke(
93 cli,
94 ["read-commit", *args],
95 env={"MUSE_REPO_ROOT": str(repo)},
96 )
97
98
99 def _rcj(repo: pathlib.Path, *args: str) -> InvokeResult:
100 """Like _rc but always passes --json."""
101 return _rc(repo, "--json", *args)
102
103
104 # ---------------------------------------------------------------------------
105 # Unit — _ALL_FIELDS
106 # ---------------------------------------------------------------------------
107
108
109 class TestAllFields:
110 def test_all_fields_matches_commitdict_annotations(self) -> None:
111 """_ALL_FIELDS must be exactly the keys in CommitDict.__annotations__."""
112 from muse.cli.commands.read_commit import _ALL_FIELDS
113 from muse.core.commits import CommitDict
114 assert _ALL_FIELDS == frozenset(CommitDict.__annotations__.keys())
115
116 def test_required_fields_present(self) -> None:
117 from muse.cli.commands.read_commit import _ALL_FIELDS
118 for field in ("commit_id", "branch", "message", "committed_at",
119 "agent_id", "model_id", "reviewed_by"):
120 assert field in _ALL_FIELDS
121
122
123 # ---------------------------------------------------------------------------
124 # Integration — JSON format
125 # ---------------------------------------------------------------------------
126
127
128 class TestJsonFormat:
129 def test_full_schema_returned(self, tmp_path: pathlib.Path) -> None:
130 repo = _make_repo(tmp_path)
131 cid = _commit(repo, message="hello world")
132 result = _rcj(repo, cid)
133 assert result.exit_code == 0
134 data = json.loads(result.output)
135 assert data["commit_id"] == cid
136 assert data["message"] == "hello world"
137 assert data["branch"] == "main"
138
139 def test_json_flag_shorthand(self, tmp_path: pathlib.Path) -> None:
140 repo = _make_repo(tmp_path)
141 cid = _commit(repo, message="shorthand test")
142 result = _rc(repo, "--json", cid)
143 assert result.exit_code == 0
144 assert "commit_id" in json.loads(result.output)
145
146 def test_agent_provenance_fields_present(self, tmp_path: pathlib.Path) -> None:
147 repo = _make_repo(tmp_path)
148 cid = _commit(repo, agent_id="my-agent", model_id="claude-4")
149 data = json.loads(_rcj(repo, cid).output)
150 assert data["agent_id"] == "my-agent"
151 assert data["model_id"] == "claude-4"
152
153 def test_parent_commit_id_null_for_root(self, tmp_path: pathlib.Path) -> None:
154 repo = _make_repo(tmp_path)
155 cid = _commit(repo, message="root commit")
156 data = json.loads(_rcj(repo, cid).output)
157 assert data["parent_commit_id"] is None
158
159 def test_parent_commit_id_set_for_child(self, tmp_path: pathlib.Path) -> None:
160 repo = _make_repo(tmp_path)
161 parent = _commit(repo, message="parent commit")
162 child = _commit(repo, message="child commit", parent=parent)
163 data = json.loads(_rcj(repo, child).output)
164 assert data["parent_commit_id"] == parent
165
166 def test_committed_at_is_iso8601(self, tmp_path: pathlib.Path) -> None:
167 repo = _make_repo(tmp_path)
168 cid = _commit(repo, message="iso date test")
169 data = json.loads(_rcj(repo, cid).output)
170 # Should parse without error
171 datetime.datetime.fromisoformat(data["committed_at"])
172
173 def test_snapshot_id_in_output(self, tmp_path: pathlib.Path) -> None:
174 repo = _make_repo(tmp_path)
175 cid = _commit(repo, message="snapshot test")
176 data = json.loads(_rcj(repo, cid).output)
177 import re
178 assert re.fullmatch(r"sha256:[0-9a-f]{64}", data["snapshot_id"])
179
180
181 # ---------------------------------------------------------------------------
182 # Integration — text format
183 # ---------------------------------------------------------------------------
184
185
186 class TestTextFormat:
187 def test_text_format_contains_commit_prefix(self, tmp_path: pathlib.Path) -> None:
188 repo = _make_repo(tmp_path)
189 cid = _commit(repo, message="text test")
190 result = _rc(repo, cid)
191 assert result.exit_code == 0
192 line = result.output.strip()
193 assert short_id(cid) in line
194
195 def test_text_format_contains_branch(self, tmp_path: pathlib.Path) -> None:
196 repo = _make_repo(tmp_path)
197 cid = _commit(repo, branch="main", message="branch test")
198 result = _rc(repo, cid)
199 assert "main" in result.output
200
201 def test_text_format_contains_message(self, tmp_path: pathlib.Path) -> None:
202 repo = _make_repo(tmp_path)
203 cid = _commit(repo, message="my commit message")
204 result = _rc(repo, cid)
205 assert "my commit message" in result.output
206
207 def test_text_multiline_message_flattened(self, tmp_path: pathlib.Path) -> None:
208 repo = _make_repo(tmp_path)
209 cid = _commit(repo, message="line one\nline two")
210 result = _rc(repo, cid)
211 # Newline replaced with space — output stays on one line
212 assert "\n" not in result.output.strip()
213 assert "line one" in result.output
214
215
216 # ---------------------------------------------------------------------------
217 # Integration — prefix resolution
218 # ---------------------------------------------------------------------------
219
220
221 class TestPrefixResolution:
222 def test_sha256_short_prefix_resolves(self, tmp_path: pathlib.Path) -> None:
223 """sha256:<8-hex> prefix form resolves to the full commit."""
224 repo = _make_repo(tmp_path)
225 cid = _commit(repo, message="prefix resolve test")
226 # cid is "sha256:<64-hex>"; take long_id(first 8 hex chars = 15 chars)
227 short_ref = cid[:15]
228 result = _rcj(repo, short_ref)
229 assert result.exit_code == 0
230 assert json.loads(result.output)["commit_id"] == cid
231
232 def test_sha256_full_id_resolves(self, tmp_path: pathlib.Path) -> None:
233 """Full sha256:<64-hex> canonical form resolves directly."""
234 repo = _make_repo(tmp_path)
235 cid = _commit(repo, message="full id resolve test")
236 result = _rcj(repo, cid)
237 assert result.exit_code == 0
238 assert json.loads(result.output)["commit_id"] == cid
239
240 def test_ambiguous_prefix_errors(self, tmp_path: pathlib.Path) -> None:
241 repo = _make_repo(tmp_path)
242 # "msg 205" and "msg 321" produce commit IDs sharing the "990f" 4-char
243 # hex prefix under the unified-object-store formula (author="tester",
244 # empty manifest, 2026-01-01T00:00:00+00:00).
245 # Verified by precomputation; changing _SNAP_ID, _COMMITTED_AT, or
246 # author requires updating these message strings.
247 cid1 = _commit(repo, message="msg 205")
248 cid2 = _commit(repo, message="msg 321")
249 result = _rc(repo, "sha256:990f")
250 assert result.exit_code == ExitCode.USER_ERROR
251 data = json.loads(result.stderr)
252 assert "ambiguous" in data["error"]
253 assert set(data["candidates"]) == {cid1, cid2}
254
255 def test_missing_commit_errors(self, tmp_path: pathlib.Path) -> None:
256 repo = _make_repo(tmp_path)
257 # Valid canonical ID that doesn't exist in the store
258 result = _rc(repo, long_id(f"dead{'beef' * 15}"))
259 assert result.exit_code == ExitCode.USER_ERROR
260 data = json.loads(result.stderr)
261 assert "not found" in data["error"]
262
263 def test_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
264 """Bare hex without sha256: prefix is rejected with a clear error."""
265 repo = _make_repo(tmp_path)
266 result = _rc(repo, "a" * 64)
267 assert result.exit_code == ExitCode.USER_ERROR
268 data = json.loads(result.stderr)
269 assert "sha256:" in data["error"]
270
271 def test_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None:
272 """Short bare hex prefix is rejected — sha256:<hex> form required."""
273 repo = _make_repo(tmp_path)
274 result = _rc(repo, "deadbeef")
275 assert result.exit_code == ExitCode.USER_ERROR
276 data = json.loads(result.stderr)
277 assert "sha256:" in data["error"]
278
279 def test_invalid_commit_id_errors(self, tmp_path: pathlib.Path) -> None:
280 repo = _make_repo(tmp_path)
281 result = _rc(repo, f"ZZZZ{'a' * 60}")
282 assert result.exit_code == ExitCode.USER_ERROR
283
284
285 class TestSymbolicRefResolution:
286 def test_head_resolves(self, tmp_path: pathlib.Path) -> None:
287 """HEAD resolves to the tip of the current branch."""
288 repo = _make_repo(tmp_path)
289 cid = _commit(repo, branch="main", message="head test")
290 # Write branch ref so HEAD resolves
291 (heads_dir(repo) / "main").write_text(cid, encoding="utf-8")
292 result = _rcj(repo, "HEAD")
293 assert result.exit_code == 0
294 assert json.loads(result.output)["commit_id"] == cid
295
296 def test_branch_name_resolves(self, tmp_path: pathlib.Path) -> None:
297 """A branch name resolves to the tip commit of that branch."""
298 repo = _make_repo(tmp_path)
299 cid = _commit(repo, branch="dev", message="branch ref test")
300 (heads_dir(repo) / "dev").write_text(cid, encoding="utf-8")
301 result = _rcj(repo, "dev")
302 assert result.exit_code == 0
303 assert json.loads(result.output)["commit_id"] == cid
304
305 def test_tilde_notation_resolves(self, tmp_path: pathlib.Path) -> None:
306 """HEAD~1 resolves to the parent of the HEAD commit."""
307 repo = _make_repo(tmp_path)
308 parent_cid = _commit(repo, branch="main", message="parent")
309 child_cid = _commit(repo, branch="main", message="child", parent=parent_cid)
310 (heads_dir(repo) / "main").write_text(child_cid, encoding="utf-8")
311 result = _rcj(repo, "HEAD~1")
312 assert result.exit_code == 0
313 assert json.loads(result.output)["commit_id"] == parent_cid
314
315
316 # ---------------------------------------------------------------------------
317 # Integration — --fields filter
318 # ---------------------------------------------------------------------------
319
320
321 class TestFieldsFilter:
322 def test_single_field(self, tmp_path: pathlib.Path) -> None:
323 repo = _make_repo(tmp_path)
324 cid = _commit(repo, message="filtered")
325 result = _rcj(repo, "--fields", "message", cid)
326 assert result.exit_code == 0
327 data = json.loads(result.output)
328 # duration_ms and exit_code are always-present metadata fields — not commit fields.
329 commit_keys = set(data.keys()) - {"duration_ms", "exit_code", "muse_version", "schema", "timestamp", "warnings"}
330 assert commit_keys == {"message"}
331 assert data["message"] == "filtered"
332
333 def test_multiple_fields(self, tmp_path: pathlib.Path) -> None:
334 repo = _make_repo(tmp_path)
335 cid = _commit(repo, branch="dev", message="multi field test")
336 result = _rcj(repo, "--fields", "commit_id,branch,message", cid)
337 data = json.loads(result.output)
338 commit_keys = set(data.keys()) - {"duration_ms", "exit_code", "muse_version", "schema", "timestamp", "warnings"}
339 assert commit_keys == {"commit_id", "branch", "message"}
340 assert data["commit_id"] == cid
341 assert data["branch"] == "dev"
342
343 def test_agent_fields_filter(self, tmp_path: pathlib.Path) -> None:
344 """Agents extracting provenance fields should get exactly what they asked for."""
345 repo = _make_repo(tmp_path)
346 cid = _commit(repo, agent_id="audit-bot", model_id="claude-4")
347 result = _rcj(repo, "--fields", "agent_id,model_id,toolchain_id", cid)
348 data = json.loads(result.output)
349 commit_keys = set(data.keys()) - {"duration_ms", "exit_code", "muse_version", "schema", "timestamp", "warnings"}
350 assert commit_keys == {"agent_id", "model_id", "toolchain_id"}
351 assert data["agent_id"] == "audit-bot"
352 assert data["model_id"] == "claude-4"
353
354 def test_unknown_field_errors(self, tmp_path: pathlib.Path) -> None:
355 repo = _make_repo(tmp_path)
356 cid = _commit(repo, message="unknown field test")
357 result = _rc(repo, "--fields", "nonexistent_field", cid)
358 assert result.exit_code == ExitCode.USER_ERROR
359
360 def test_fields_with_text_format_errors(self, tmp_path: pathlib.Path) -> None:
361 repo = _make_repo(tmp_path)
362 cid = _commit(repo, message="fields text error test")
363 result = _rc(repo, "--fields", "commit_id", cid)
364 assert result.exit_code == ExitCode.USER_ERROR
365
366 def test_fields_whitespace_trimmed(self, tmp_path: pathlib.Path) -> None:
367 repo = _make_repo(tmp_path)
368 cid = _commit(repo, message="whitespace trim test")
369 result = _rcj(repo, "--fields", " commit_id , message ", cid)
370 assert result.exit_code == 0
371 data = json.loads(result.output)
372 assert "commit_id" in data
373 assert "message" in data
374
375
376 # ---------------------------------------------------------------------------
377 # Security
378 # ---------------------------------------------------------------------------
379
380
381 class TestSecurity:
382 def test_ansi_in_branch_stripped_in_text(self, tmp_path: pathlib.Path) -> None:
383 repo = _make_repo(tmp_path)
384 _commit(repo, branch="main")
385 # Write an malicious commit directly, bypassing the normal helper.
386 # The commit_id must be a real hash of the stored fields for read_commit
387 # to pass content-hash verification.
388 from muse.core.commits import write_commit as _wc
389 malicious_message = "test"
390 malicious_cid = hash_commit(
391 parent_ids=[],
392 snapshot_id=_SNAP_ID,
393 message=malicious_message,
394 committed_at_iso=_COMMITTED_AT.isoformat(),
395 )
396 malicious_rec = CommitRecord(
397 commit_id=malicious_cid,
398 branch="\x1b[31mmalicious\x1b[0m",
399 snapshot_id=_SNAP_ID,
400 message=malicious_message,
401 committed_at=_COMMITTED_AT,
402 )
403 _wc(repo, malicious_rec)
404 result = _rc(repo, malicious_cid)
405 assert result.exit_code == 0
406 assert "\x1b" not in result.output
407
408 def test_ansi_in_message_stripped_in_text(self, tmp_path: pathlib.Path) -> None:
409 repo = _make_repo(tmp_path)
410 malicious_snap_id = fake_id("malicious-snap")
411 malicious_message = "\x1b[31mmalicious\x1b[0m"
412 malicious_committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
413 malicious_cid = hash_commit(
414 parent_ids=[],
415 snapshot_id=malicious_snap_id,
416 message=malicious_message,
417 committed_at_iso=malicious_committed_at.isoformat(),
418 )
419 malicious_rec = CommitRecord(
420 commit_id=malicious_cid,
421 branch="main",
422 snapshot_id=malicious_snap_id,
423 message=malicious_message,
424 committed_at=malicious_committed_at,
425 )
426 write_commit(repo, malicious_rec)
427 result = _rc(repo, malicious_cid)
428 assert result.exit_code == 0
429 assert "\x1b" not in result.output
430
431 def test_ansi_in_commit_id_rejected(self, tmp_path: pathlib.Path) -> None:
432 repo = _make_repo(tmp_path)
433 result = _rc(repo, f"\x1b[31m{'a' * 64}")
434 assert result.exit_code == ExitCode.USER_ERROR
435
436 def test_no_traceback_on_bad_input(self, tmp_path: pathlib.Path) -> None:
437 repo = _make_repo(tmp_path)
438 result = _rc(repo, "not-valid")
439 assert "Traceback" not in result.output
440
441
442 # ---------------------------------------------------------------------------
443 # Stress
444 # ---------------------------------------------------------------------------
445
446
447 class TestStress:
448 def test_200_sequential_reads(self, tmp_path: pathlib.Path) -> None:
449 repo = _make_repo(tmp_path)
450 cid = _commit(repo, message="stable")
451 for i in range(200):
452 result = _rcj(repo, cid)
453 assert result.exit_code == 0, f"failed at iteration {i}"
454 assert json.loads(result.output)["message"] == "stable"
455
456 def test_fields_filter_200_iterations(self, tmp_path: pathlib.Path) -> None:
457 repo = _make_repo(tmp_path)
458 cid = _commit(repo, agent_id="bot")
459 for i in range(200):
460 result = _rcj(repo, "--fields", "commit_id,agent_id", cid)
461 assert result.exit_code == 0, f"failed at iteration {i}"
462 data = json.loads(result.output)
463 assert data["agent_id"] == "bot"
464
465
466 class TestRegisterFlags:
467 def _parse(self, *args: str) -> "argparse.Namespace":
468 import argparse
469 from muse.cli.commands.read_commit import register
470 p = argparse.ArgumentParser()
471 subs = p.add_subparsers()
472 register(subs)
473 return p.parse_args(["read-commit", fake_id("a"), *args])
474
475 def test_json_short_flag(self) -> None:
476 args = self._parse("-j")
477 assert args.json_out is True
478
479 def test_json_long_flag(self) -> None:
480 args = self._parse("--json")
481 assert args.json_out is True
482
483 def test_default_no_json(self) -> None:
484 args = self._parse()
485 assert args.json_out is False
File History 1 commit
sha256:ff478cfdcdd4b7fd6de89cb68896601a981f945634463275ec333bd20ca36402 Merge branch 'dev' into main Human 20 days ago