gabriel / muse public
test_bare_hex_rejection.py python
415 lines 16.8 KB
Raw
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 6 days ago
1 """TDD: bare hex IDs are rejected at every CLI boundary.
2
3 The sha256: prefix is a type tag, not decoration. It tells the system which
4 algorithm produced the hash. Accepting bare hex at CLI boundaries forecloses
5 future algorithm agility — if we ever add blake3: IDs, bare hex becomes
6 fatally ambiguous.
7
8 Architecture note
9 -----------------
10 Enforcement belongs at the CLI outer shell — the hard boundary where untrusted
11 user input enters the system. Internal functions like resolve_commit_ref()
12 operate on already-validated input; they are not the primary enforcement point.
13 Defense-in-depth at the core is a bonus, not the design.
14
15 Rule (always, without exception)
16 ---------------------------------
17 - sha256:<64 lowercase hex> — full ID, accepted everywhere.
18 - sha256:<short prefix> — prefix resolution, accepted.
19 - <bare hex, any length> — REJECTED at the CLI boundary with a clear error.
20
21 The only place bare hex appears is on disk (filenames) — stripped on write,
22 restored on read. Users never see it; agents never pass it.
23
24 Covered boundaries
25 ------------------
26 - muse snapshot read <id>
27 - muse snapshot export <id>
28 - muse snapshot-diff <ref_a> <ref_b>
29 - muse verify-commit <id>
30 """
31
32 from __future__ import annotations
33
34 import datetime
35 import json
36 import pathlib
37
38 from muse.core.types import Manifest, blob_id, long_id, short_id
39 from muse.core.object_store import write_object
40 from muse.core.ids import hash_commit, hash_snapshot
41 from muse.core.commits import (
42 CommitRecord,
43 write_commit,
44 )
45 from muse.core.snapshots import (
46 SnapshotRecord,
47 write_snapshot,
48 )
49 from muse.core.paths import muse_dir, ref_path
50 from tests.cli_test_helper import CliRunner
51
52 cli = None
53 runner = CliRunner()
54
55
56 # ---------------------------------------------------------------------------
57 # Helpers
58 # ---------------------------------------------------------------------------
59
60 _BARE_HEX_FULL = "a" * 64 # 64 hex chars, no prefix
61 _BARE_HEX_SHORT = "abc123def456" # short hex prefix, no prefix
62 _INVALID_LOOK = "deadbeef" # 8 hex chars, no prefix
63
64
65 def _init_repo(path: pathlib.Path) -> pathlib.Path:
66 dot_muse = muse_dir(path)
67 for d in ("commits", "snapshots", "objects", "refs/heads"):
68 (dot_muse / d).mkdir(parents=True, exist_ok=True)
69 (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8")
70 (dot_muse / "repo.json").write_text(
71 json.dumps({"repo_id": "bare-hex-test", "domain": "code"}), encoding="utf-8"
72 )
73 return path
74
75
76 def _env(repo: pathlib.Path) -> Manifest:
77 return {"MUSE_REPO_ROOT": str(repo)}
78
79
80 def _obj(repo: pathlib.Path, content: bytes) -> str:
81 oid = blob_id(content)
82 write_object(repo, oid, content)
83 return oid
84
85
86 def _snap(repo: pathlib.Path, manifest: Manifest) -> str:
87 sid = hash_snapshot(manifest)
88 write_snapshot(
89 repo,
90 SnapshotRecord(
91 snapshot_id=sid,
92 manifest=manifest,
93 created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc),
94 ),
95 )
96 return sid
97
98
99 def _commit(repo: pathlib.Path, sid: str, branch: str = "main") -> str:
100 committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
101 cid = hash_commit(
102 parent_ids=[],
103 snapshot_id=sid,
104 message="test",
105 committed_at_iso=committed_at.isoformat(),
106 author="tester",
107 )
108 write_commit(
109 repo,
110 CommitRecord(
111 commit_id=cid,
112 branch=branch,
113 snapshot_id=sid,
114 message="test",
115 committed_at=committed_at,
116 author="tester",
117 parent_commit_id=None,
118 ),
119 )
120 ref = ref_path(repo, branch)
121 ref.write_text(cid, encoding="utf-8")
122 return cid
123
124
125 def _create_snapshot_and_commit(repo: pathlib.Path) -> tuple[str, str]:
126 """Return (snapshot_id, commit_id) for a one-file repo snapshot."""
127 oid = _obj(repo, b"hello world")
128 sid = _snap(repo, {"file.txt": oid})
129 cid = _commit(repo, sid)
130 return sid, cid
131
132
133 # ---------------------------------------------------------------------------
134 # muse snapshot read — bare hex must be rejected
135 # ---------------------------------------------------------------------------
136
137
138 class TestSnapshotReadBareHexRejected:
139 """snapshot read must reject bare hex, full or short."""
140
141 def test_full_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
142 repo = _init_repo(tmp_path)
143 result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_FULL], env=_env(repo))
144 assert result.exit_code != 0, "bare full 64-char hex must be rejected"
145
146 def test_short_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
147 repo = _init_repo(tmp_path)
148 result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_SHORT], env=_env(repo))
149 assert result.exit_code != 0, "bare short hex must be rejected"
150
151 def test_8_char_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
152 repo = _init_repo(tmp_path)
153 result = runner.invoke(cli, ["snapshot", "read", _INVALID_LOOK], env=_env(repo))
154 assert result.exit_code != 0, "any bare hex must be rejected"
155
156 def test_error_message_mentions_sha256_prefix(self, tmp_path: pathlib.Path) -> None:
157 repo = _init_repo(tmp_path)
158 result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_SHORT], env=_env(repo))
159 assert result.exit_code != 0
160 assert "sha256:" in result.output.lower() or "sha256:" in (result.stderr or "").lower(), (
161 "error message must tell the user to use sha256: prefix"
162 )
163
164 def test_prefixed_full_id_accepted(self, tmp_path: pathlib.Path) -> None:
165 repo = _init_repo(tmp_path)
166 sid, _ = _create_snapshot_and_commit(repo)
167 result = runner.invoke(cli, ["snapshot", "read", sid], env=_env(repo))
168 assert result.exit_code == 0, f"sha256: prefixed full ID must be accepted; got: {result.stderr}"
169
170 def test_prefixed_short_id_accepted(self, tmp_path: pathlib.Path) -> None:
171 repo = _init_repo(tmp_path)
172 sid, _ = _create_snapshot_and_commit(repo)
173 # Short prefix: sha256: + first 12 hex chars
174 short_prefixed = short_id(sid)
175 result = runner.invoke(cli, ["snapshot", "read", short_prefixed], env=_env(repo))
176 assert result.exit_code == 0, (
177 f"sha256:-prefixed short ID must be accepted; got: {result.stderr}"
178 )
179
180
181 # ---------------------------------------------------------------------------
182 # muse snapshot export — bare hex must be rejected
183 # ---------------------------------------------------------------------------
184
185
186 class TestSnapshotExportBareHexRejected:
187 """snapshot export must reject bare hex."""
188
189 def test_full_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
190 repo = _init_repo(tmp_path)
191 out = tmp_path / "out.tar.gz"
192 result = runner.invoke(
193 cli,
194 ["snapshot", "export", _BARE_HEX_FULL, "--output", str(out)],
195 env=_env(repo),
196 )
197 assert result.exit_code != 0, "bare hex must be rejected by snapshot export"
198
199 def test_short_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None:
200 repo = _init_repo(tmp_path)
201 out = tmp_path / "out.tar.gz"
202 result = runner.invoke(
203 cli,
204 ["snapshot", "export", _BARE_HEX_SHORT, "--output", str(out)],
205 env=_env(repo),
206 )
207 assert result.exit_code != 0, "short bare hex must be rejected by snapshot export"
208
209 def test_prefixed_id_accepted(self, tmp_path: pathlib.Path) -> None:
210 repo = _init_repo(tmp_path)
211 sid, _ = _create_snapshot_and_commit(repo)
212 out = tmp_path / "out.tar.gz"
213 result = runner.invoke(
214 cli,
215 ["snapshot", "export", sid, "--output", str(out)],
216 env=_env(repo),
217 )
218 assert result.exit_code == 0, f"sha256: prefixed ID must be accepted; got: {result.stderr}"
219
220 def test_prefixed_short_id_accepted(self, tmp_path: pathlib.Path) -> None:
221 repo = _init_repo(tmp_path)
222 sid, _ = _create_snapshot_and_commit(repo)
223 short_prefixed = short_id(sid)
224 out = tmp_path / "out.tar.gz"
225 result = runner.invoke(
226 cli,
227 ["snapshot", "export", short_prefixed, "--output", str(out)],
228 env=_env(repo),
229 )
230 assert result.exit_code == 0, (
231 f"sha256:-prefixed short ID must be accepted; got: {result.stderr}"
232 )
233
234
235 # ---------------------------------------------------------------------------
236 # muse snapshot-diff — bare hex must be rejected for both refs
237 # ---------------------------------------------------------------------------
238
239
240 class TestSnapshotDiffBareHexRejected:
241 """snapshot-diff must reject bare hex in ref_a or ref_b position."""
242
243 def test_ref_a_bare_hex_full_rejected(self, tmp_path: pathlib.Path) -> None:
244 repo = _init_repo(tmp_path)
245 sid, _ = _create_snapshot_and_commit(repo)
246 result = runner.invoke(
247 cli, ["snapshot-diff", _BARE_HEX_FULL, sid], env=_env(repo)
248 )
249 assert result.exit_code != 0, "bare hex in ref_a position must be rejected"
250
251 def test_ref_b_bare_hex_full_rejected(self, tmp_path: pathlib.Path) -> None:
252 repo = _init_repo(tmp_path)
253 sid, _ = _create_snapshot_and_commit(repo)
254 result = runner.invoke(
255 cli, ["snapshot-diff", sid, _BARE_HEX_FULL], env=_env(repo)
256 )
257 assert result.exit_code != 0, "bare hex in ref_b position must be rejected"
258
259 def test_ref_a_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None:
260 repo = _init_repo(tmp_path)
261 sid, _ = _create_snapshot_and_commit(repo)
262 result = runner.invoke(
263 cli, ["snapshot-diff", _BARE_HEX_SHORT, sid], env=_env(repo)
264 )
265 assert result.exit_code != 0, "short bare hex in ref_a must be rejected"
266
267 def test_ref_b_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None:
268 repo = _init_repo(tmp_path)
269 sid, _ = _create_snapshot_and_commit(repo)
270 result = runner.invoke(
271 cli, ["snapshot-diff", sid, _BARE_HEX_SHORT], env=_env(repo)
272 )
273 assert result.exit_code != 0, "short bare hex in ref_b must be rejected"
274
275 def test_both_prefixed_full_ids_accepted(self, tmp_path: pathlib.Path) -> None:
276 repo = _init_repo(tmp_path)
277 oid_a = _obj(repo, b"version_a")
278 oid_b = _obj(repo, b"version_b")
279 sid_a = _snap(repo, {"f.txt": oid_a})
280 sid_b = _snap(repo, {"f.txt": oid_b})
281 result = runner.invoke(cli, ["snapshot-diff", sid_a, sid_b], env=_env(repo))
282 assert result.exit_code == 0, f"prefixed full IDs must be accepted; got: {result.stderr}"
283
284 def test_ref_a_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None:
285 repo = _init_repo(tmp_path)
286 oid_a = _obj(repo, b"version_a")
287 oid_b = _obj(repo, b"version_b")
288 sid_a = _snap(repo, {"f.txt": oid_a})
289 sid_b = _snap(repo, {"f.txt": oid_b})
290 short_a = short_id(sid_a)
291 result = runner.invoke(cli, ["snapshot-diff", short_a, sid_b], env=_env(repo))
292 assert result.exit_code == 0, (
293 f"sha256:-prefixed short ID in ref_a must be accepted; got: {result.stderr}"
294 )
295
296 def test_ref_b_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None:
297 repo = _init_repo(tmp_path)
298 oid_a = _obj(repo, b"version_a")
299 oid_b = _obj(repo, b"version_b")
300 sid_a = _snap(repo, {"f.txt": oid_a})
301 sid_b = _snap(repo, {"f.txt": oid_b})
302 short_b = short_id(sid_b)
303 result = runner.invoke(cli, ["snapshot-diff", sid_a, short_b], env=_env(repo))
304 assert result.exit_code == 0, (
305 f"sha256:-prefixed short ID in ref_b must be accepted; got: {result.stderr}"
306 )
307
308 def test_both_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None:
309 repo = _init_repo(tmp_path)
310 oid_a = _obj(repo, b"version_a")
311 oid_b = _obj(repo, b"version_b")
312 sid_a = _snap(repo, {"f.txt": oid_a})
313 sid_b = _snap(repo, {"f.txt": oid_b})
314 short_a = short_id(sid_a)
315 short_b = short_id(sid_b)
316 result = runner.invoke(cli, ["snapshot-diff", short_a, short_b], env=_env(repo))
317 assert result.exit_code == 0, (
318 f"both sha256:-prefixed short IDs must be accepted; got: {result.stderr}"
319 )
320
321 def test_branch_name_still_accepted(self, tmp_path: pathlib.Path) -> None:
322 """Non-hex branch names must continue to resolve normally."""
323 repo = _init_repo(tmp_path)
324 oid_a = _obj(repo, b"v1")
325 oid_b = _obj(repo, b"v2")
326 sid_a = _snap(repo, {"f.txt": oid_a})
327 sid_b = _snap(repo, {"f.txt": oid_b})
328 _commit(repo, sid_a, branch="main")
329 _commit(repo, sid_b, branch="dev")
330 result = runner.invoke(cli, ["snapshot-diff", "main", "dev"], env=_env(repo))
331 assert result.exit_code == 0, f"branch names must still resolve; got: {result.stderr}"
332
333 def test_head_still_accepted(self, tmp_path: pathlib.Path) -> None:
334 """HEAD must continue to resolve normally."""
335 repo = _init_repo(tmp_path)
336 oid = _obj(repo, b"v1")
337 sid = _snap(repo, {"f.txt": oid})
338 _commit(repo, sid)
339 result = runner.invoke(cli, ["snapshot-diff", "HEAD", "HEAD"], env=_env(repo))
340 assert result.exit_code == 0, f"HEAD must still resolve; got: {result.stderr}"
341
342
343 # ---------------------------------------------------------------------------
344 # muse verify-commit — bare hex must be rejected
345 # ---------------------------------------------------------------------------
346
347
348 class TestVerifyCommitBareHexRejected:
349 """verify-commit must reject bare 64-char hex commit IDs."""
350
351 def test_bare_64hex_rejected(self, tmp_path: pathlib.Path) -> None:
352 repo = _init_repo(tmp_path)
353 result = runner.invoke(
354 cli, ["verify-commit", _BARE_HEX_FULL], env=_env(repo)
355 )
356 assert result.exit_code != 0, "bare 64-char hex must be rejected by verify-commit"
357
358 def test_prefixed_id_not_found_is_not_bare_hex_error(self, tmp_path: pathlib.Path) -> None:
359 """A sha256:-prefixed ID that doesn't exist should fail with 'not found', not 'bare hex'."""
360 repo = _init_repo(tmp_path)
361 prefixed = long_id("b" * 64)
362 result = runner.invoke(cli, ["verify-commit", prefixed], env=_env(repo))
363 # Exit code != 0 is expected (commit doesn't exist), but the reason
364 # must NOT be a bare-hex rejection — 'sha256:' prefix is correct.
365 output_combined = result.output + (result.stderr or "")
366 # The word "bare" should not appear if the input was correctly prefixed.
367 assert "bare" not in output_combined.lower() or result.exit_code != 0
368
369
370 # ---------------------------------------------------------------------------
371 # muse read — bare hex must be rejected at the CLI boundary
372 # ---------------------------------------------------------------------------
373
374
375 class TestReadBareHexRejected:
376 """muse read must reject bare hex commit refs.
377
378 show uses resolve_commit_ref() — the CLI layer must catch bare hex before
379 that function is ever called. resolve_commit_ref() itself is internal and
380 is not the enforcement point.
381 """
382
383 def test_bare_full_hex_rejected(self, tmp_path: pathlib.Path) -> None:
384 repo = _init_repo(tmp_path)
385 result = runner.invoke(cli, ["read", _BARE_HEX_FULL], env=_env(repo))
386 assert result.exit_code != 0, "bare 64-char hex must be rejected by show"
387
388 def test_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None:
389 repo = _init_repo(tmp_path)
390 result = runner.invoke(cli, ["read", _BARE_HEX_SHORT], env=_env(repo))
391 assert result.exit_code != 0, "bare short hex must be rejected by show"
392
393 def test_prefixed_full_id_accepted(self, tmp_path: pathlib.Path) -> None:
394 repo = _init_repo(tmp_path)
395 oid = _obj(repo, b"content")
396 sid = _snap(repo, {"f.txt": oid})
397 cid = _commit(repo, sid)
398 result = runner.invoke(cli, ["read", cid], env=_env(repo))
399 assert result.exit_code == 0, f"sha256:-prefixed full commit ID must be accepted; got: {result.stderr}"
400
401 def test_branch_name_still_accepted(self, tmp_path: pathlib.Path) -> None:
402 repo = _init_repo(tmp_path)
403 oid = _obj(repo, b"content")
404 sid = _snap(repo, {"f.txt": oid})
405 _commit(repo, sid, branch="main")
406 result = runner.invoke(cli, ["read", "main"], env=_env(repo))
407 assert result.exit_code == 0, f"branch name must still resolve via show; got: {result.stderr}"
408
409 def test_head_still_accepted(self, tmp_path: pathlib.Path) -> None:
410 repo = _init_repo(tmp_path)
411 oid = _obj(repo, b"content")
412 sid = _snap(repo, {"f.txt": oid})
413 _commit(repo, sid, branch="main")
414 result = runner.invoke(cli, ["read", "HEAD"], env=_env(repo))
415 assert result.exit_code == 0, f"HEAD must still resolve via show; got: {result.stderr}"
File History 1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b fix: try fetch/presign before fetch/mpack to avoid Cloudfla… Sonnet 4.6 patch 6 days ago