gabriel / muse public
test_bare_hex_rejection.py python
413 lines 16.7 KB
Raw
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago
1 """TDD: bare hex IDs are rejected at every CLI boundary.
2
3 The sha256: prefix is a type tag, not decoration. It tells the system which
4 algorithm produced the hash. Accepting bare hex at CLI boundaries forecloses
5 future algorithm agility — if we ever add blake3: IDs, bare hex becomes
6 fatally ambiguous.
7
8 Architecture note
9 -----------------
10 Enforcement belongs at the CLI outer shell — the hard boundary where untrusted
11 user input enters the system. Internal functions like resolve_commit_ref()
12 operate on already-validated input; they are not the primary enforcement point.
13 Defense-in-depth at the core is a bonus, not the design.
14
15 Rule (always, without exception)
16 ---------------------------------
17 - sha256:<64 lowercase hex> — full ID, accepted everywhere.
18 - sha256:<short prefix> — prefix resolution, accepted.
19 - <bare hex, any length> — REJECTED at the CLI boundary with a clear error.
20
21 The only place bare hex appears is on disk (filenames) — stripped on write,
22 restored on read. Users never see it; agents never pass it.
23
24 Covered boundaries
25 ------------------
26 - muse snapshot read <id>
27 - muse snapshot export <id>
28 - muse snapshot-diff <ref_a> <ref_b>
29 - muse verify-commit <id>
30 """
31
32 from __future__ import annotations
33
34 import datetime
35 import json
36 import pathlib
37
38 from muse.core.types import Manifest, blob_id, long_id, short_id
39 from muse.core.object_store import write_object
40 from muse.core.ids import hash_commit, hash_snapshot
41 from muse.core.store import (
42 CommitRecord,
43 SnapshotRecord,
44 write_commit,
45 write_snapshot,
46 )
47 from muse.core.paths import muse_dir, ref_path
48 from tests.cli_test_helper import CliRunner
49
50 cli = None
51 runner = CliRunner()
52
53
54 # ---------------------------------------------------------------------------
55 # Helpers
56 # ---------------------------------------------------------------------------
57
58 _BARE_HEX_FULL = "a" * 64 # 64 hex chars, no prefix
59 _BARE_HEX_SHORT = "abc123def456" # short hex prefix, no prefix
60 _INVALID_LOOK = "deadbeef" # 8 hex chars, no prefix
61
62
63 def _init_repo(path: pathlib.Path) -> pathlib.Path:
64 dot_muse = muse_dir(path)
65 for d in ("commits", "snapshots", "objects", "refs/heads"):
66 (dot_muse / d).mkdir(parents=True, exist_ok=True)
67 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
68 (dot_muse / "repo.json").write_text(
69 json.dumps({"repo_id": "bare-hex-test", "domain": "code"}), encoding="utf-8"
70 )
71 return path
72
73
74 def _env(repo: pathlib.Path) -> Manifest:
75 return {"MUSE_REPO_ROOT": str(repo)}
76
77
78 def _obj(repo: pathlib.Path, content: bytes) -> str:
79 oid = blob_id(content)
80 write_object(repo, oid, content)
81 return oid
82
83
84 def _snap(repo: pathlib.Path, manifest: Manifest) -> str:
85 sid = hash_snapshot(manifest)
86 write_snapshot(
87 repo,
88 SnapshotRecord(
89 snapshot_id=sid,
90 manifest=manifest,
91 created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc),
92 ),
93 )
94 return sid
95
96
97 def _commit(repo: pathlib.Path, sid: str, branch: str = "main") -> str:
98 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
99 cid = hash_commit(
100 parent_ids=[],
101 snapshot_id=sid,
102 message="test",
103 committed_at_iso=committed_at.isoformat(),
104 author="tester",
105 )
106 write_commit(
107 repo,
108 CommitRecord(
109 commit_id=cid,
110 branch=branch,
111 snapshot_id=sid,
112 message="test",
113 committed_at=committed_at,
114 author="tester",
115 parent_commit_id=None,
116 ),
117 )
118 ref = ref_path(repo, branch)
119 ref.write_text(cid, encoding="utf-8")
120 return cid
121
122
123 def _create_snapshot_and_commit(repo: pathlib.Path) -> tuple[str, str]:
124 """Return (snapshot_id, commit_id) for a one-file repo snapshot."""
125 oid = _obj(repo, b"hello world")
126 sid = _snap(repo, {"file.txt": oid})
127 cid = _commit(repo, sid)
128 return sid, cid
129
130
131 # ---------------------------------------------------------------------------
132 # muse snapshot read — bare hex must be rejected
133 # ---------------------------------------------------------------------------
134
135
136 class TestSnapshotReadBareHexRejected:
137 """snapshot read must reject bare hex, full or short."""
138
139 def test_full_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
140 repo = _init_repo(tmp_path)
141 result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_FULL], env=_env(repo))
142 assert result.exit_code != 0, "bare full 64-char hex must be rejected"
143
144 def test_short_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
145 repo = _init_repo(tmp_path)
146 result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_SHORT], env=_env(repo))
147 assert result.exit_code != 0, "bare short hex must be rejected"
148
149 def test_8_char_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
150 repo = _init_repo(tmp_path)
151 result = runner.invoke(cli, ["snapshot", "read", _INVALID_LOOK], env=_env(repo))
152 assert result.exit_code != 0, "any bare hex must be rejected"
153
154 def test_error_message_mentions_sha256_prefix(self, tmp_path: pathlib.Path) -> None:
155 repo = _init_repo(tmp_path)
156 result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_SHORT], env=_env(repo))
157 assert result.exit_code != 0
158 assert "sha256:" in result.output.lower() or "sha256:" in (result.stderr or "").lower(), (
159 "error message must tell the user to use sha256: prefix"
160 )
161
162 def test_prefixed_full_id_accepted(self, tmp_path: pathlib.Path) -> None:
163 repo = _init_repo(tmp_path)
164 sid, _ = _create_snapshot_and_commit(repo)
165 result = runner.invoke(cli, ["snapshot", "read", sid], env=_env(repo))
166 assert result.exit_code == 0, f"sha256: prefixed full ID must be accepted; got: {result.stderr}"
167
168 def test_prefixed_short_id_accepted(self, tmp_path: pathlib.Path) -> None:
169 repo = _init_repo(tmp_path)
170 sid, _ = _create_snapshot_and_commit(repo)
171 # Short prefix: sha256: + first 12 hex chars
172 short_prefixed = short_id(sid)
173 result = runner.invoke(cli, ["snapshot", "read", short_prefixed], env=_env(repo))
174 assert result.exit_code == 0, (
175 f"sha256:-prefixed short ID must be accepted; got: {result.stderr}"
176 )
177
178
179 # ---------------------------------------------------------------------------
180 # muse snapshot export — bare hex must be rejected
181 # ---------------------------------------------------------------------------
182
183
184 class TestSnapshotExportBareHexRejected:
185 """snapshot export must reject bare hex."""
186
187 def test_full_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
188 repo = _init_repo(tmp_path)
189 out = tmp_path / "out.tar.gz"
190 result = runner.invoke(
191 cli,
192 ["snapshot", "export", _BARE_HEX_FULL, "--output", str(out)],
193 env=_env(repo),
194 )
195 assert result.exit_code != 0, "bare hex must be rejected by snapshot export"
196
197 def test_short_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
198 repo = _init_repo(tmp_path)
199 out = tmp_path / "out.tar.gz"
200 result = runner.invoke(
201 cli,
202 ["snapshot", "export", _BARE_HEX_SHORT, "--output", str(out)],
203 env=_env(repo),
204 )
205 assert result.exit_code != 0, "short bare hex must be rejected by snapshot export"
206
207 def test_prefixed_id_accepted(self, tmp_path: pathlib.Path) -> None:
208 repo = _init_repo(tmp_path)
209 sid, _ = _create_snapshot_and_commit(repo)
210 out = tmp_path / "out.tar.gz"
211 result = runner.invoke(
212 cli,
213 ["snapshot", "export", sid, "--output", str(out)],
214 env=_env(repo),
215 )
216 assert result.exit_code == 0, f"sha256: prefixed ID must be accepted; got: {result.stderr}"
217
218 def test_prefixed_short_id_accepted(self, tmp_path: pathlib.Path) -> None:
219 repo = _init_repo(tmp_path)
220 sid, _ = _create_snapshot_and_commit(repo)
221 short_prefixed = short_id(sid)
222 out = tmp_path / "out.tar.gz"
223 result = runner.invoke(
224 cli,
225 ["snapshot", "export", short_prefixed, "--output", str(out)],
226 env=_env(repo),
227 )
228 assert result.exit_code == 0, (
229 f"sha256:-prefixed short ID must be accepted; got: {result.stderr}"
230 )
231
232
233 # ---------------------------------------------------------------------------
234 # muse snapshot-diff — bare hex must be rejected for both refs
235 # ---------------------------------------------------------------------------
236
237
238 class TestSnapshotDiffBareHexRejected:
239 """snapshot-diff must reject bare hex in ref_a or ref_b position."""
240
241 def test_ref_a_bare_hex_full_rejected(self, tmp_path: pathlib.Path) -> None:
242 repo = _init_repo(tmp_path)
243 sid, _ = _create_snapshot_and_commit(repo)
244 result = runner.invoke(
245 cli, ["snapshot-diff", _BARE_HEX_FULL, sid], env=_env(repo)
246 )
247 assert result.exit_code != 0, "bare hex in ref_a position must be rejected"
248
249 def test_ref_b_bare_hex_full_rejected(self, tmp_path: pathlib.Path) -> None:
250 repo = _init_repo(tmp_path)
251 sid, _ = _create_snapshot_and_commit(repo)
252 result = runner.invoke(
253 cli, ["snapshot-diff", sid, _BARE_HEX_FULL], env=_env(repo)
254 )
255 assert result.exit_code != 0, "bare hex in ref_b position must be rejected"
256
257 def test_ref_a_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None:
258 repo = _init_repo(tmp_path)
259 sid, _ = _create_snapshot_and_commit(repo)
260 result = runner.invoke(
261 cli, ["snapshot-diff", _BARE_HEX_SHORT, sid], env=_env(repo)
262 )
263 assert result.exit_code != 0, "short bare hex in ref_a must be rejected"
264
265 def test_ref_b_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None:
266 repo = _init_repo(tmp_path)
267 sid, _ = _create_snapshot_and_commit(repo)
268 result = runner.invoke(
269 cli, ["snapshot-diff", sid, _BARE_HEX_SHORT], env=_env(repo)
270 )
271 assert result.exit_code != 0, "short bare hex in ref_b must be rejected"
272
273 def test_both_prefixed_full_ids_accepted(self, tmp_path: pathlib.Path) -> None:
274 repo = _init_repo(tmp_path)
275 oid_a = _obj(repo, b"version_a")
276 oid_b = _obj(repo, b"version_b")
277 sid_a = _snap(repo, {"f.txt": oid_a})
278 sid_b = _snap(repo, {"f.txt": oid_b})
279 result = runner.invoke(cli, ["snapshot-diff", sid_a, sid_b], env=_env(repo))
280 assert result.exit_code == 0, f"prefixed full IDs must be accepted; got: {result.stderr}"
281
282 def test_ref_a_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None:
283 repo = _init_repo(tmp_path)
284 oid_a = _obj(repo, b"version_a")
285 oid_b = _obj(repo, b"version_b")
286 sid_a = _snap(repo, {"f.txt": oid_a})
287 sid_b = _snap(repo, {"f.txt": oid_b})
288 short_a = short_id(sid_a)
289 result = runner.invoke(cli, ["snapshot-diff", short_a, sid_b], env=_env(repo))
290 assert result.exit_code == 0, (
291 f"sha256:-prefixed short ID in ref_a must be accepted; got: {result.stderr}"
292 )
293
294 def test_ref_b_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None:
295 repo = _init_repo(tmp_path)
296 oid_a = _obj(repo, b"version_a")
297 oid_b = _obj(repo, b"version_b")
298 sid_a = _snap(repo, {"f.txt": oid_a})
299 sid_b = _snap(repo, {"f.txt": oid_b})
300 short_b = short_id(sid_b)
301 result = runner.invoke(cli, ["snapshot-diff", sid_a, short_b], env=_env(repo))
302 assert result.exit_code == 0, (
303 f"sha256:-prefixed short ID in ref_b must be accepted; got: {result.stderr}"
304 )
305
306 def test_both_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None:
307 repo = _init_repo(tmp_path)
308 oid_a = _obj(repo, b"version_a")
309 oid_b = _obj(repo, b"version_b")
310 sid_a = _snap(repo, {"f.txt": oid_a})
311 sid_b = _snap(repo, {"f.txt": oid_b})
312 short_a = short_id(sid_a)
313 short_b = short_id(sid_b)
314 result = runner.invoke(cli, ["snapshot-diff", short_a, short_b], env=_env(repo))
315 assert result.exit_code == 0, (
316 f"both sha256:-prefixed short IDs must be accepted; got: {result.stderr}"
317 )
318
319 def test_branch_name_still_accepted(self, tmp_path: pathlib.Path) -> None:
320 """Non-hex branch names must continue to resolve normally."""
321 repo = _init_repo(tmp_path)
322 oid_a = _obj(repo, b"v1")
323 oid_b = _obj(repo, b"v2")
324 sid_a = _snap(repo, {"f.txt": oid_a})
325 sid_b = _snap(repo, {"f.txt": oid_b})
326 _commit(repo, sid_a, branch="main")
327 _commit(repo, sid_b, branch="dev")
328 result = runner.invoke(cli, ["snapshot-diff", "main", "dev"], env=_env(repo))
329 assert result.exit_code == 0, f"branch names must still resolve; got: {result.stderr}"
330
331 def test_head_still_accepted(self, tmp_path: pathlib.Path) -> None:
332 """HEAD must continue to resolve normally."""
333 repo = _init_repo(tmp_path)
334 oid = _obj(repo, b"v1")
335 sid = _snap(repo, {"f.txt": oid})
336 _commit(repo, sid)
337 result = runner.invoke(cli, ["snapshot-diff", "HEAD", "HEAD"], env=_env(repo))
338 assert result.exit_code == 0, f"HEAD must still resolve; got: {result.stderr}"
339
340
341 # ---------------------------------------------------------------------------
342 # muse verify-commit — bare hex must be rejected
343 # ---------------------------------------------------------------------------
344
345
346 class TestVerifyCommitBareHexRejected:
347 """verify-commit must reject bare 64-char hex commit IDs."""
348
349 def test_bare_64hex_rejected(self, tmp_path: pathlib.Path) -> None:
350 repo = _init_repo(tmp_path)
351 result = runner.invoke(
352 cli, ["verify-commit", _BARE_HEX_FULL], env=_env(repo)
353 )
354 assert result.exit_code != 0, "bare 64-char hex must be rejected by verify-commit"
355
356 def test_prefixed_id_not_found_is_not_bare_hex_error(self, tmp_path: pathlib.Path) -> None:
357 """A sha256:-prefixed ID that doesn't exist should fail with 'not found', not 'bare hex'."""
358 repo = _init_repo(tmp_path)
359 prefixed = long_id("b" * 64)
360 result = runner.invoke(cli, ["verify-commit", prefixed], env=_env(repo))
361 # Exit code != 0 is expected (commit doesn't exist), but the reason
362 # must NOT be a bare-hex rejection — 'sha256:' prefix is correct.
363 output_combined = result.output + (result.stderr or "")
364 # The word "bare" should not appear if the input was correctly prefixed.
365 assert "bare" not in output_combined.lower() or result.exit_code != 0
366
367
368 # ---------------------------------------------------------------------------
369 # muse read — bare hex must be rejected at the CLI boundary
370 # ---------------------------------------------------------------------------
371
372
373 class TestReadBareHexRejected:
374 """muse read must reject bare hex commit refs.
375
376 show uses resolve_commit_ref() — the CLI layer must catch bare hex before
377 that function is ever called. resolve_commit_ref() itself is internal and
378 is not the enforcement point.
379 """
380
381 def test_bare_full_hex_rejected(self, tmp_path: pathlib.Path) -> None:
382 repo = _init_repo(tmp_path)
383 result = runner.invoke(cli, ["read", _BARE_HEX_FULL], env=_env(repo))
384 assert result.exit_code != 0, "bare 64-char hex must be rejected by show"
385
386 def test_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None:
387 repo = _init_repo(tmp_path)
388 result = runner.invoke(cli, ["read", _BARE_HEX_SHORT], env=_env(repo))
389 assert result.exit_code != 0, "bare short hex must be rejected by show"
390
391 def test_prefixed_full_id_accepted(self, tmp_path: pathlib.Path) -> None:
392 repo = _init_repo(tmp_path)
393 oid = _obj(repo, b"content")
394 sid = _snap(repo, {"f.txt": oid})
395 cid = _commit(repo, sid)
396 result = runner.invoke(cli, ["read", cid], env=_env(repo))
397 assert result.exit_code == 0, f"sha256:-prefixed full commit ID must be accepted; got: {result.stderr}"
398
399 def test_branch_name_still_accepted(self, tmp_path: pathlib.Path) -> None:
400 repo = _init_repo(tmp_path)
401 oid = _obj(repo, b"content")
402 sid = _snap(repo, {"f.txt": oid})
403 _commit(repo, sid, branch="main")
404 result = runner.invoke(cli, ["read", "main"], env=_env(repo))
405 assert result.exit_code == 0, f"branch name must still resolve via show; got: {result.stderr}"
406
407 def test_head_still_accepted(self, tmp_path: pathlib.Path) -> None:
408 repo = _init_repo(tmp_path)
409 oid = _obj(repo, b"content")
410 sid = _snap(repo, {"f.txt": oid})
411 _commit(repo, sid, branch="main")
412 result = runner.invoke(cli, ["read", "HEAD"], env=_env(repo))
413 assert result.exit_code == 0, f"HEAD must still resolve via show; got: {result.stderr}"
File History 2 commits
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 29 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 29 days ago