gabriel / muse public
test_hash_object_canonical.py python
331 lines 13.2 KB
Raw
sha256:c5131d76c6eada02939111fda4aa8e51b0c1456b9983727cfd6be101916de14e merge: pull local/dev — resolve trivial _EXT_MAP symbol con… Sonnet 4.6 patch 12 days ago
1 """hash-object: canonical sha256: prefix and agent-ready JSON schema.
2
3 Every object ID emitted by ``muse hash-object`` must carry the ``sha256:``
4 prefix. Bare hex is only acceptable at the disk boundary (the filename
5 on disk). This test suite enforces that invariant and covers the new
6 agent-ready JSON fields.
7
8 Test categories
9 ---------------
10 TestCanonicalPrefix — object_id always starts with 'sha256:'
11 TestStdinWriteFixed — stdin + --write was broken (bare hex bug); now fixed
12 TestAgentFields — duration_ms, exit_code, size_bytes in JSON output
13 TestTextOutputPrefix — text format also carries the prefix
14 TestCrossCheck — file and stdin produce identical canonical IDs
15 """
16
17 from __future__ import annotations
18
19 import argparse
20 import json
21 import pathlib
22
23 from muse.core.errors import ExitCode
24 from tests.cli_test_helper import CliRunner, InvokeResult
25 from muse.core.types import blob_id, split_id
26 from muse.core.paths import muse_dir
27 from muse.core.object_store import object_path, read_object
28
29 runner = CliRunner()
30
31
32 # ---------------------------------------------------------------------------
33 # Helpers
34 # ---------------------------------------------------------------------------
35
36 def _run(*args: str, stdin: bytes | None = None, repo: pathlib.Path | None = None) -> InvokeResult:
37 from muse.cli.app import main as cli
38 env = {"MUSE_REPO_ROOT": str(repo)} if repo else {}
39 return runner.invoke(cli, ["hash-object", *args], input=stdin, env=env)
40
41
42 def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path:
43 repo = tmp_path / "repo"
44 dot_muse = muse_dir(repo)
45 for sub in ("objects", "commits", "snapshots", "refs/heads"):
46 (dot_muse / sub).mkdir(parents=True)
47 (dot_muse / "HEAD").write_text("ref: refs/heads/main")
48 (dot_muse / "repo.json").write_text(
49 json.dumps({"repo_id": "test", "domain": "code"})
50 )
51 return repo
52
53
54
55 # ---------------------------------------------------------------------------
56 # TestCanonicalPrefix
57 # ---------------------------------------------------------------------------
58
59 class TestCanonicalPrefix:
60 """object_id in JSON output must always start with 'sha256:'."""
61
62 def test_file_json_object_id_has_prefix(self, tmp_path: pathlib.Path) -> None:
63 f = tmp_path / "f.txt"
64 f.write_bytes(b"hello")
65 result = _run("--json", str(f))
66 assert result.exit_code == 0
67 data = json.loads(result.output)
68 assert data["object_id"].startswith("sha256:"), (
69 f"object_id must start with 'sha256:' — got {data['object_id']!r}"
70 )
71
72 def test_file_json_object_id_correct_length(self, tmp_path: pathlib.Path) -> None:
73 """sha256: (7 chars) + 64 hex = 71 total."""
74 f = tmp_path / "f.txt"
75 f.write_bytes(b"hello")
76 result = _run("--json", str(f))
77 data = json.loads(result.output)
78 assert len(data["object_id"]) == 71
79
80 def test_file_json_object_id_matches_canonical(self, tmp_path: pathlib.Path) -> None:
81 content = b"canonical check"
82 f = tmp_path / "f.txt"
83 f.write_bytes(content)
84 result = _run("--json", str(f))
85 data = json.loads(result.output)
86 assert data["object_id"] == blob_id(content)
87
88 def test_stdin_json_object_id_has_prefix(self, tmp_path: pathlib.Path) -> None:
89 result = _run("--json", "--stdin", stdin=b"from stdin")
90 assert result.exit_code == 0
91 data = json.loads(result.output)
92 assert data["object_id"].startswith("sha256:")
93
94 def test_stdin_json_object_id_matches_canonical(self, tmp_path: pathlib.Path) -> None:
95 content = b"piped data"
96 result = _run("--json", "--stdin", stdin=content)
97 data = json.loads(result.output)
98 assert data["object_id"] == blob_id(content)
99
100 def test_empty_file_has_prefix(self, tmp_path: pathlib.Path) -> None:
101 f = tmp_path / "empty.txt"
102 f.write_bytes(b"")
103 result = _run("--json", str(f))
104 data = json.loads(result.output)
105 assert data["object_id"] == blob_id(b"")
106
107 def test_empty_stdin_has_prefix(self, tmp_path: pathlib.Path) -> None:
108 result = _run("--json", "--stdin", stdin=b"")
109 data = json.loads(result.output)
110 assert data["object_id"] == blob_id(b"")
111
112 def test_no_bare_hex_in_json_output(self, tmp_path: pathlib.Path) -> None:
113 """The raw 64-char hex without prefix must not appear as object_id."""
114 content = b"no bare hex"
115 f = tmp_path / "f.txt"
116 f.write_bytes(content)
117 result = _run("--json", str(f))
118 data = json.loads(result.output)
119 bare_hex = split_id(blob_id(content))[1]
120 assert data["object_id"] != bare_hex, (
121 "object_id must be 'sha256:<hex>', not bare hex"
122 )
123
124
125 # ---------------------------------------------------------------------------
126 # TestStdinWriteFixed
127 # ---------------------------------------------------------------------------
128
129 class TestStdinWriteFixed:
130 """stdin + --write was broken (passed bare hex to write_object). Now fixed."""
131
132 def test_stdin_write_exits_zero(self, tmp_path: pathlib.Path) -> None:
133 repo = _make_repo(tmp_path)
134 result = _run("--stdin", "--write", stdin=b"store me", repo=repo)
135 assert result.exit_code == 0, f"exit {result.exit_code}: {result.output}"
136
137 def test_stdin_write_stored_true(self, tmp_path: pathlib.Path) -> None:
138 repo = _make_repo(tmp_path)
139 result = _run("--json", "--stdin", "--write", stdin=b"store me", repo=repo)
140 assert json.loads(result.output)["stored"] is True
141
142 def test_stdin_write_object_file_exists(self, tmp_path: pathlib.Path) -> None:
143 repo = _make_repo(tmp_path)
144 content = b"stdin stored content"
145 result = _run("--json", "--stdin", "--write", stdin=content, repo=repo)
146 oid = json.loads(result.output)["object_id"]
147 obj_file = object_path(repo, oid)
148 assert obj_file.exists(), f"object file not found at {obj_file}"
149 assert read_object(repo, oid) == content
150
151 def test_stdin_write_object_id_canonical(self, tmp_path: pathlib.Path) -> None:
152 repo = _make_repo(tmp_path)
153 content = b"canonical write"
154 result = _run("--json", "--stdin", "--write", stdin=content, repo=repo)
155 data = json.loads(result.output)
156 assert data["object_id"] == blob_id(content)
157
158 def test_stdin_write_idempotent(self, tmp_path: pathlib.Path) -> None:
159 repo = _make_repo(tmp_path)
160 content = b"write twice"
161 _run("--stdin", "--write", stdin=content, repo=repo)
162 result2 = _run("--json", "--stdin", "--write", stdin=content, repo=repo)
163 assert result2.exit_code == 0
164 assert json.loads(result2.output)["stored"] is False
165
166
167 # ---------------------------------------------------------------------------
168 # TestAgentFields
169 # ---------------------------------------------------------------------------
170
171 class TestAgentFields:
172 """JSON output must include duration_ms, exit_code, size_bytes."""
173
174 def test_duration_ms_present(self, tmp_path: pathlib.Path) -> None:
175 f = tmp_path / "f.txt"
176 f.write_bytes(b"timing")
177 data = json.loads(_run("--json", str(f)).output)
178 assert "duration_ms" in data, "JSON must include duration_ms"
179
180 def test_duration_ms_non_negative(self, tmp_path: pathlib.Path) -> None:
181 f = tmp_path / "f.txt"
182 f.write_bytes(b"timing")
183 data = json.loads(_run("--json", str(f)).output)
184 assert data["duration_ms"] >= 0
185
186 def test_exit_code_present(self, tmp_path: pathlib.Path) -> None:
187 f = tmp_path / "f.txt"
188 f.write_bytes(b"x")
189 data = json.loads(_run("--json", str(f)).output)
190 assert "exit_code" in data, "JSON must include exit_code"
191
192 def test_exit_code_zero_on_success(self, tmp_path: pathlib.Path) -> None:
193 f = tmp_path / "f.txt"
194 f.write_bytes(b"x")
195 data = json.loads(_run("--json", str(f)).output)
196 assert data["exit_code"] == 0
197
198 def test_size_bytes_present(self, tmp_path: pathlib.Path) -> None:
199 f = tmp_path / "f.txt"
200 f.write_bytes(b"twelve bytes")
201 data = json.loads(_run("--json", str(f)).output)
202 assert "size_bytes" in data, "JSON must include size_bytes"
203
204 def test_size_bytes_correct_for_file(self, tmp_path: pathlib.Path) -> None:
205 content = b"twelve bytes"
206 f = tmp_path / "f.txt"
207 f.write_bytes(content)
208 data = json.loads(_run("--json", str(f)).output)
209 assert data["size_bytes"] == len(content)
210
211 def test_size_bytes_correct_for_stdin(self, tmp_path: pathlib.Path) -> None:
212 content = b"stdin payload"
213 data = json.loads(_run("--json", "--stdin", stdin=content).output)
214 assert data["size_bytes"] == len(content)
215
216 def test_size_bytes_zero_for_empty(self, tmp_path: pathlib.Path) -> None:
217 f = tmp_path / "empty.txt"
218 f.write_bytes(b"")
219 data = json.loads(_run("--json", str(f)).output)
220 assert data["size_bytes"] == 0
221
222 def test_stdin_duration_ms_present(self, tmp_path: pathlib.Path) -> None:
223 data = json.loads(_run("--json", "--stdin", stdin=b"x").output)
224 assert "duration_ms" in data
225
226 def test_stdin_exit_code_present(self, tmp_path: pathlib.Path) -> None:
227 data = json.loads(_run("--json", "--stdin", stdin=b"x").output)
228 assert "exit_code" in data
229
230
231 # ---------------------------------------------------------------------------
232 # TestTextOutputPrefix
233 # ---------------------------------------------------------------------------
234
235 class TestTextOutputPrefix:
236 """Text format must also emit the sha256: prefix."""
237
238 def test_text_file_has_prefix(self, tmp_path: pathlib.Path) -> None:
239 f = tmp_path / "f.txt"
240 f.write_bytes(b"text output")
241 result = _run(str(f))
242 assert result.exit_code == 0
243 assert result.output.strip().startswith("sha256:")
244
245 def test_text_stdin_has_prefix(self, tmp_path: pathlib.Path) -> None:
246 result = _run("--stdin", stdin=b"text stdin")
247 assert result.output.strip().startswith("sha256:")
248
249 def test_text_output_is_correct_canonical_id(self, tmp_path: pathlib.Path) -> None:
250 content = b"text canonical"
251 f = tmp_path / "f.txt"
252 f.write_bytes(content)
253 result = _run(str(f))
254 assert result.output.strip() == blob_id(content)
255
256 def test_text_length_is_71(self, tmp_path: pathlib.Path) -> None:
257 """sha256: (7) + 64 hex = 71 characters."""
258 f = tmp_path / "f.txt"
259 f.write_bytes(b"length check")
260 result = _run(str(f))
261 assert len(result.output.strip()) == 71
262
263
264 # ---------------------------------------------------------------------------
265 # TestCrossCheck
266 # ---------------------------------------------------------------------------
267
268 class TestCrossCheck:
269 """File and stdin paths produce identical canonical IDs for the same bytes."""
270
271 def test_file_and_stdin_same_id(self, tmp_path: pathlib.Path) -> None:
272 content = b"cross check content"
273 f = tmp_path / "f.txt"
274 f.write_bytes(content)
275 file_id = json.loads(_run("--json", str(f)).output)["object_id"]
276 stdin_id = json.loads(_run("--json", "--stdin", stdin=content).output)["object_id"]
277 assert file_id == stdin_id
278
279 def test_write_file_and_stdin_same_id(self, tmp_path: pathlib.Path) -> None:
280 repo = _make_repo(tmp_path)
281 content = b"write cross check"
282 f = repo / "f.txt"
283 f.write_bytes(content)
284 file_id = json.loads(_run("--json", "--write", str(f), repo=repo).output)["object_id"]
285 stdin_id = json.loads(
286 _run("--json", "--stdin", "--write", stdin=content, repo=repo).output
287 )["object_id"]
288 assert file_id == stdin_id
289
290 def test_hash_bytes_returns_canonical(self) -> None:
291 """_hash_bytes must return sha256:-prefixed ID, not bare hex."""
292 from muse.cli.commands.hash_object import _hash_bytes
293 result = _hash_bytes(b"test data")
294 assert result.startswith("sha256:"), (
295 f"_hash_bytes must return 'sha256:<hex>', got {result!r}"
296 )
297 assert len(result) == 71
298
299
300 # ---------------------------------------------------------------------------
301 # TestRegisterFlags — argparse-level verification
302 # ---------------------------------------------------------------------------
303
304
305 class TestRegisterFlags:
306 """Verify that register() wires --json / -j correctly."""
307
308 def _make_parser(self) -> "argparse.ArgumentParser":
309 import argparse
310 from muse.cli.commands.hash_object import register
311 ap = argparse.ArgumentParser()
312 subs = ap.add_subparsers()
313 register(subs)
314 return ap
315
316 def test_json_flag_long(self) -> None:
317 ns = self._make_parser().parse_args(["hash-object", "--stdin", "--json"])
318 assert ns.json_out is True
319
320 def test_j_alias(self) -> None:
321 ns = self._make_parser().parse_args(["hash-object", "--stdin", "-j"])
322 assert ns.json_out is True
323
324 def test_default_is_text(self) -> None:
325 ns = self._make_parser().parse_args(["hash-object", "--stdin"])
326 assert ns.json_out is False
327
328 def test_dest_is_json_out(self) -> None:
329 ns = self._make_parser().parse_args(["hash-object", "--stdin", "-j"])
330 assert hasattr(ns, "json_out")
331 assert not hasattr(ns, "fmt")
File History 5 commits
sha256:c5131d76c6eada02939111fda4aa8e51b0c1456b9983727cfd6be101916de14e merge: pull local/dev — resolve trivial _EXT_MAP symbol con… Sonnet 4.6 patch 12 days ago
sha256:9c33d61749fff814c5226d5386aa2af7064c2c02788594a25fdd709358132eea fix: _PROPOSAL_PREFIX_RESOLVE_LIMIT 200 → 100 to match hub … Sonnet 4.6 19 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago