test_bare_hex_rejection.py
python
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago
| 1 | """TDD: bare hex IDs are rejected at every CLI boundary. |
| 2 | |
| 3 | The sha256: prefix is a type tag, not decoration. It tells the system which |
| 4 | algorithm produced the hash. Accepting bare hex at CLI boundaries forecloses |
| 5 | future algorithm agility — if we ever add blake3: IDs, bare hex becomes |
| 6 | fatally ambiguous. |
| 7 | |
| 8 | Architecture note |
| 9 | ----------------- |
| 10 | Enforcement belongs at the CLI outer shell — the hard boundary where untrusted |
| 11 | user input enters the system. Internal functions like resolve_commit_ref() |
| 12 | operate on already-validated input; they are not the primary enforcement point. |
| 13 | Defense-in-depth at the core is a bonus, not the design. |
| 14 | |
| 15 | Rule (always, without exception) |
| 16 | --------------------------------- |
| 17 | - sha256:<64 lowercase hex> — full ID, accepted everywhere. |
| 18 | - sha256:<short prefix> — prefix resolution, accepted. |
| 19 | - <bare hex, any length> — REJECTED at the CLI boundary with a clear error. |
| 20 | |
| 21 | The only place bare hex appears is on disk (filenames) — stripped on write, |
| 22 | restored on read. Users never see it; agents never pass it. |
| 23 | |
| 24 | Covered boundaries |
| 25 | ------------------ |
| 26 | - muse snapshot read <id> |
| 27 | - muse snapshot export <id> |
| 28 | - muse snapshot-diff <ref_a> <ref_b> |
| 29 | - muse verify-commit <id> |
| 30 | """ |
| 31 | |
| 32 | from __future__ import annotations |
| 33 | |
| 34 | import datetime |
| 35 | import json |
| 36 | import pathlib |
| 37 | |
| 38 | from muse.core.types import Manifest, blob_id, long_id, short_id |
| 39 | from muse.core.object_store import write_object |
| 40 | from muse.core.ids import hash_commit, hash_snapshot |
| 41 | from muse.core.commits import ( |
| 42 | CommitRecord, |
| 43 | write_commit, |
| 44 | ) |
| 45 | from muse.core.snapshots import ( |
| 46 | SnapshotRecord, |
| 47 | write_snapshot, |
| 48 | ) |
| 49 | from muse.core.paths import muse_dir, ref_path |
| 50 | from tests.cli_test_helper import CliRunner |
| 51 | |
| 52 | cli = None |
| 53 | runner = CliRunner() |
| 54 | |
| 55 | |
| 56 | # --------------------------------------------------------------------------- |
| 57 | # Helpers |
| 58 | # --------------------------------------------------------------------------- |
| 59 | |
| 60 | _BARE_HEX_FULL = "a" * 64 # 64 hex chars, no prefix |
| 61 | _BARE_HEX_SHORT = "abc123def456" # short hex prefix, no prefix |
| 62 | _INVALID_LOOK = "deadbeef" # 8 hex chars, no prefix |
| 63 | |
| 64 | |
| 65 | def _init_repo(path: pathlib.Path) -> pathlib.Path: |
| 66 | dot_muse = muse_dir(path) |
| 67 | for d in ("commits", "snapshots", "objects", "refs/heads"): |
| 68 | (dot_muse / d).mkdir(parents=True, exist_ok=True) |
| 69 | (dot_muse / "HEAD").write_text("ref: refs/heads/main", encoding="utf-8") |
| 70 | (dot_muse / "repo.json").write_text( |
| 71 | json.dumps({"repo_id": "bare-hex-test", "domain": "code"}), encoding="utf-8" |
| 72 | ) |
| 73 | return path |
| 74 | |
| 75 | |
| 76 | def _env(repo: pathlib.Path) -> Manifest: |
| 77 | return {"MUSE_REPO_ROOT": str(repo)} |
| 78 | |
| 79 | |
| 80 | def _obj(repo: pathlib.Path, content: bytes) -> str: |
| 81 | oid = blob_id(content) |
| 82 | write_object(repo, oid, content) |
| 83 | return oid |
| 84 | |
| 85 | |
| 86 | def _snap(repo: pathlib.Path, manifest: Manifest) -> str: |
| 87 | sid = hash_snapshot(manifest) |
| 88 | write_snapshot( |
| 89 | repo, |
| 90 | SnapshotRecord( |
| 91 | snapshot_id=sid, |
| 92 | manifest=manifest, |
| 93 | created_at=datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc), |
| 94 | ), |
| 95 | ) |
| 96 | return sid |
| 97 | |
| 98 | |
| 99 | def _commit(repo: pathlib.Path, sid: str, branch: str = "main") -> str: |
| 100 | committed_at = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc) |
| 101 | cid = hash_commit( |
| 102 | parent_ids=[], |
| 103 | snapshot_id=sid, |
| 104 | message="test", |
| 105 | committed_at_iso=committed_at.isoformat(), |
| 106 | author="tester", |
| 107 | ) |
| 108 | write_commit( |
| 109 | repo, |
| 110 | CommitRecord( |
| 111 | commit_id=cid, |
| 112 | branch=branch, |
| 113 | snapshot_id=sid, |
| 114 | message="test", |
| 115 | committed_at=committed_at, |
| 116 | author="tester", |
| 117 | parent_commit_id=None, |
| 118 | ), |
| 119 | ) |
| 120 | ref = ref_path(repo, branch) |
| 121 | ref.write_text(cid, encoding="utf-8") |
| 122 | return cid |
| 123 | |
| 124 | |
| 125 | def _create_snapshot_and_commit(repo: pathlib.Path) -> tuple[str, str]: |
| 126 | """Return (snapshot_id, commit_id) for a one-file repo snapshot.""" |
| 127 | oid = _obj(repo, b"hello world") |
| 128 | sid = _snap(repo, {"file.txt": oid}) |
| 129 | cid = _commit(repo, sid) |
| 130 | return sid, cid |
| 131 | |
| 132 | |
| 133 | # --------------------------------------------------------------------------- |
| 134 | # muse snapshot read — bare hex must be rejected |
| 135 | # --------------------------------------------------------------------------- |
| 136 | |
| 137 | |
| 138 | class TestSnapshotReadBareHexRejected: |
| 139 | """snapshot read must reject bare hex, full or short.""" |
| 140 | |
| 141 | def test_full_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 142 | repo = _init_repo(tmp_path) |
| 143 | result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_FULL], env=_env(repo)) |
| 144 | assert result.exit_code != 0, "bare full 64-char hex must be rejected" |
| 145 | |
| 146 | def test_short_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 147 | repo = _init_repo(tmp_path) |
| 148 | result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_SHORT], env=_env(repo)) |
| 149 | assert result.exit_code != 0, "bare short hex must be rejected" |
| 150 | |
| 151 | def test_8_char_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 152 | repo = _init_repo(tmp_path) |
| 153 | result = runner.invoke(cli, ["snapshot", "read", _INVALID_LOOK], env=_env(repo)) |
| 154 | assert result.exit_code != 0, "any bare hex must be rejected" |
| 155 | |
| 156 | def test_error_message_mentions_sha256_prefix(self, tmp_path: pathlib.Path) -> None: |
| 157 | repo = _init_repo(tmp_path) |
| 158 | result = runner.invoke(cli, ["snapshot", "read", _BARE_HEX_SHORT], env=_env(repo)) |
| 159 | assert result.exit_code != 0 |
| 160 | assert "sha256:" in result.output.lower() or "sha256:" in (result.stderr or "").lower(), ( |
| 161 | "error message must tell the user to use sha256: prefix" |
| 162 | ) |
| 163 | |
| 164 | def test_prefixed_full_id_accepted(self, tmp_path: pathlib.Path) -> None: |
| 165 | repo = _init_repo(tmp_path) |
| 166 | sid, _ = _create_snapshot_and_commit(repo) |
| 167 | result = runner.invoke(cli, ["snapshot", "read", sid], env=_env(repo)) |
| 168 | assert result.exit_code == 0, f"sha256: prefixed full ID must be accepted; got: {result.stderr}" |
| 169 | |
| 170 | def test_prefixed_short_id_accepted(self, tmp_path: pathlib.Path) -> None: |
| 171 | repo = _init_repo(tmp_path) |
| 172 | sid, _ = _create_snapshot_and_commit(repo) |
| 173 | # Short prefix: sha256: + first 12 hex chars |
| 174 | short_prefixed = short_id(sid) |
| 175 | result = runner.invoke(cli, ["snapshot", "read", short_prefixed], env=_env(repo)) |
| 176 | assert result.exit_code == 0, ( |
| 177 | f"sha256:-prefixed short ID must be accepted; got: {result.stderr}" |
| 178 | ) |
| 179 | |
| 180 | |
| 181 | # --------------------------------------------------------------------------- |
| 182 | # muse snapshot export — bare hex must be rejected |
| 183 | # --------------------------------------------------------------------------- |
| 184 | |
| 185 | |
| 186 | class TestSnapshotExportBareHexRejected: |
| 187 | """snapshot export must reject bare hex.""" |
| 188 | |
| 189 | def test_full_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 190 | repo = _init_repo(tmp_path) |
| 191 | out = tmp_path / "out.tar.gz" |
| 192 | result = runner.invoke( |
| 193 | cli, |
| 194 | ["snapshot", "export", _BARE_HEX_FULL, "--output", str(out)], |
| 195 | env=_env(repo), |
| 196 | ) |
| 197 | assert result.exit_code != 0, "bare hex must be rejected by snapshot export" |
| 198 | |
| 199 | def test_short_bare_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 200 | repo = _init_repo(tmp_path) |
| 201 | out = tmp_path / "out.tar.gz" |
| 202 | result = runner.invoke( |
| 203 | cli, |
| 204 | ["snapshot", "export", _BARE_HEX_SHORT, "--output", str(out)], |
| 205 | env=_env(repo), |
| 206 | ) |
| 207 | assert result.exit_code != 0, "short bare hex must be rejected by snapshot export" |
| 208 | |
| 209 | def test_prefixed_id_accepted(self, tmp_path: pathlib.Path) -> None: |
| 210 | repo = _init_repo(tmp_path) |
| 211 | sid, _ = _create_snapshot_and_commit(repo) |
| 212 | out = tmp_path / "out.tar.gz" |
| 213 | result = runner.invoke( |
| 214 | cli, |
| 215 | ["snapshot", "export", sid, "--output", str(out)], |
| 216 | env=_env(repo), |
| 217 | ) |
| 218 | assert result.exit_code == 0, f"sha256: prefixed ID must be accepted; got: {result.stderr}" |
| 219 | |
| 220 | def test_prefixed_short_id_accepted(self, tmp_path: pathlib.Path) -> None: |
| 221 | repo = _init_repo(tmp_path) |
| 222 | sid, _ = _create_snapshot_and_commit(repo) |
| 223 | short_prefixed = short_id(sid) |
| 224 | out = tmp_path / "out.tar.gz" |
| 225 | result = runner.invoke( |
| 226 | cli, |
| 227 | ["snapshot", "export", short_prefixed, "--output", str(out)], |
| 228 | env=_env(repo), |
| 229 | ) |
| 230 | assert result.exit_code == 0, ( |
| 231 | f"sha256:-prefixed short ID must be accepted; got: {result.stderr}" |
| 232 | ) |
| 233 | |
| 234 | |
| 235 | # --------------------------------------------------------------------------- |
| 236 | # muse snapshot-diff — bare hex must be rejected for both refs |
| 237 | # --------------------------------------------------------------------------- |
| 238 | |
| 239 | |
| 240 | class TestSnapshotDiffBareHexRejected: |
| 241 | """snapshot-diff must reject bare hex in ref_a or ref_b position.""" |
| 242 | |
| 243 | def test_ref_a_bare_hex_full_rejected(self, tmp_path: pathlib.Path) -> None: |
| 244 | repo = _init_repo(tmp_path) |
| 245 | sid, _ = _create_snapshot_and_commit(repo) |
| 246 | result = runner.invoke( |
| 247 | cli, ["snapshot-diff", _BARE_HEX_FULL, sid], env=_env(repo) |
| 248 | ) |
| 249 | assert result.exit_code != 0, "bare hex in ref_a position must be rejected" |
| 250 | |
| 251 | def test_ref_b_bare_hex_full_rejected(self, tmp_path: pathlib.Path) -> None: |
| 252 | repo = _init_repo(tmp_path) |
| 253 | sid, _ = _create_snapshot_and_commit(repo) |
| 254 | result = runner.invoke( |
| 255 | cli, ["snapshot-diff", sid, _BARE_HEX_FULL], env=_env(repo) |
| 256 | ) |
| 257 | assert result.exit_code != 0, "bare hex in ref_b position must be rejected" |
| 258 | |
| 259 | def test_ref_a_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 260 | repo = _init_repo(tmp_path) |
| 261 | sid, _ = _create_snapshot_and_commit(repo) |
| 262 | result = runner.invoke( |
| 263 | cli, ["snapshot-diff", _BARE_HEX_SHORT, sid], env=_env(repo) |
| 264 | ) |
| 265 | assert result.exit_code != 0, "short bare hex in ref_a must be rejected" |
| 266 | |
| 267 | def test_ref_b_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 268 | repo = _init_repo(tmp_path) |
| 269 | sid, _ = _create_snapshot_and_commit(repo) |
| 270 | result = runner.invoke( |
| 271 | cli, ["snapshot-diff", sid, _BARE_HEX_SHORT], env=_env(repo) |
| 272 | ) |
| 273 | assert result.exit_code != 0, "short bare hex in ref_b must be rejected" |
| 274 | |
| 275 | def test_both_prefixed_full_ids_accepted(self, tmp_path: pathlib.Path) -> None: |
| 276 | repo = _init_repo(tmp_path) |
| 277 | oid_a = _obj(repo, b"version_a") |
| 278 | oid_b = _obj(repo, b"version_b") |
| 279 | sid_a = _snap(repo, {"f.txt": oid_a}) |
| 280 | sid_b = _snap(repo, {"f.txt": oid_b}) |
| 281 | result = runner.invoke(cli, ["snapshot-diff", sid_a, sid_b], env=_env(repo)) |
| 282 | assert result.exit_code == 0, f"prefixed full IDs must be accepted; got: {result.stderr}" |
| 283 | |
| 284 | def test_ref_a_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None: |
| 285 | repo = _init_repo(tmp_path) |
| 286 | oid_a = _obj(repo, b"version_a") |
| 287 | oid_b = _obj(repo, b"version_b") |
| 288 | sid_a = _snap(repo, {"f.txt": oid_a}) |
| 289 | sid_b = _snap(repo, {"f.txt": oid_b}) |
| 290 | short_a = short_id(sid_a) |
| 291 | result = runner.invoke(cli, ["snapshot-diff", short_a, sid_b], env=_env(repo)) |
| 292 | assert result.exit_code == 0, ( |
| 293 | f"sha256:-prefixed short ID in ref_a must be accepted; got: {result.stderr}" |
| 294 | ) |
| 295 | |
| 296 | def test_ref_b_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None: |
| 297 | repo = _init_repo(tmp_path) |
| 298 | oid_a = _obj(repo, b"version_a") |
| 299 | oid_b = _obj(repo, b"version_b") |
| 300 | sid_a = _snap(repo, {"f.txt": oid_a}) |
| 301 | sid_b = _snap(repo, {"f.txt": oid_b}) |
| 302 | short_b = short_id(sid_b) |
| 303 | result = runner.invoke(cli, ["snapshot-diff", sid_a, short_b], env=_env(repo)) |
| 304 | assert result.exit_code == 0, ( |
| 305 | f"sha256:-prefixed short ID in ref_b must be accepted; got: {result.stderr}" |
| 306 | ) |
| 307 | |
| 308 | def test_both_prefixed_short_accepted(self, tmp_path: pathlib.Path) -> None: |
| 309 | repo = _init_repo(tmp_path) |
| 310 | oid_a = _obj(repo, b"version_a") |
| 311 | oid_b = _obj(repo, b"version_b") |
| 312 | sid_a = _snap(repo, {"f.txt": oid_a}) |
| 313 | sid_b = _snap(repo, {"f.txt": oid_b}) |
| 314 | short_a = short_id(sid_a) |
| 315 | short_b = short_id(sid_b) |
| 316 | result = runner.invoke(cli, ["snapshot-diff", short_a, short_b], env=_env(repo)) |
| 317 | assert result.exit_code == 0, ( |
| 318 | f"both sha256:-prefixed short IDs must be accepted; got: {result.stderr}" |
| 319 | ) |
| 320 | |
| 321 | def test_branch_name_still_accepted(self, tmp_path: pathlib.Path) -> None: |
| 322 | """Non-hex branch names must continue to resolve normally.""" |
| 323 | repo = _init_repo(tmp_path) |
| 324 | oid_a = _obj(repo, b"v1") |
| 325 | oid_b = _obj(repo, b"v2") |
| 326 | sid_a = _snap(repo, {"f.txt": oid_a}) |
| 327 | sid_b = _snap(repo, {"f.txt": oid_b}) |
| 328 | _commit(repo, sid_a, branch="main") |
| 329 | _commit(repo, sid_b, branch="dev") |
| 330 | result = runner.invoke(cli, ["snapshot-diff", "main", "dev"], env=_env(repo)) |
| 331 | assert result.exit_code == 0, f"branch names must still resolve; got: {result.stderr}" |
| 332 | |
| 333 | def test_head_still_accepted(self, tmp_path: pathlib.Path) -> None: |
| 334 | """HEAD must continue to resolve normally.""" |
| 335 | repo = _init_repo(tmp_path) |
| 336 | oid = _obj(repo, b"v1") |
| 337 | sid = _snap(repo, {"f.txt": oid}) |
| 338 | _commit(repo, sid) |
| 339 | result = runner.invoke(cli, ["snapshot-diff", "HEAD", "HEAD"], env=_env(repo)) |
| 340 | assert result.exit_code == 0, f"HEAD must still resolve; got: {result.stderr}" |
| 341 | |
| 342 | |
| 343 | # --------------------------------------------------------------------------- |
| 344 | # muse verify-commit — bare hex must be rejected |
| 345 | # --------------------------------------------------------------------------- |
| 346 | |
| 347 | |
| 348 | class TestVerifyCommitBareHexRejected: |
| 349 | """verify-commit must reject bare 64-char hex commit IDs.""" |
| 350 | |
| 351 | def test_bare_64hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 352 | repo = _init_repo(tmp_path) |
| 353 | result = runner.invoke( |
| 354 | cli, ["verify-commit", _BARE_HEX_FULL], env=_env(repo) |
| 355 | ) |
| 356 | assert result.exit_code != 0, "bare 64-char hex must be rejected by verify-commit" |
| 357 | |
| 358 | def test_prefixed_id_not_found_is_not_bare_hex_error(self, tmp_path: pathlib.Path) -> None: |
| 359 | """A sha256:-prefixed ID that doesn't exist should fail with 'not found', not 'bare hex'.""" |
| 360 | repo = _init_repo(tmp_path) |
| 361 | prefixed = long_id("b" * 64) |
| 362 | result = runner.invoke(cli, ["verify-commit", prefixed], env=_env(repo)) |
| 363 | # Exit code != 0 is expected (commit doesn't exist), but the reason |
| 364 | # must NOT be a bare-hex rejection — 'sha256:' prefix is correct. |
| 365 | output_combined = result.output + (result.stderr or "") |
| 366 | # The word "bare" should not appear if the input was correctly prefixed. |
| 367 | assert "bare" not in output_combined.lower() or result.exit_code != 0 |
| 368 | |
| 369 | |
| 370 | # --------------------------------------------------------------------------- |
| 371 | # muse read — bare hex must be rejected at the CLI boundary |
| 372 | # --------------------------------------------------------------------------- |
| 373 | |
| 374 | |
| 375 | class TestReadBareHexRejected: |
| 376 | """muse read must reject bare hex commit refs. |
| 377 | |
| 378 | show uses resolve_commit_ref() — the CLI layer must catch bare hex before |
| 379 | that function is ever called. resolve_commit_ref() itself is internal and |
| 380 | is not the enforcement point. |
| 381 | """ |
| 382 | |
| 383 | def test_bare_full_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 384 | repo = _init_repo(tmp_path) |
| 385 | result = runner.invoke(cli, ["read", _BARE_HEX_FULL], env=_env(repo)) |
| 386 | assert result.exit_code != 0, "bare 64-char hex must be rejected by show" |
| 387 | |
| 388 | def test_bare_short_hex_rejected(self, tmp_path: pathlib.Path) -> None: |
| 389 | repo = _init_repo(tmp_path) |
| 390 | result = runner.invoke(cli, ["read", _BARE_HEX_SHORT], env=_env(repo)) |
| 391 | assert result.exit_code != 0, "bare short hex must be rejected by show" |
| 392 | |
| 393 | def test_prefixed_full_id_accepted(self, tmp_path: pathlib.Path) -> None: |
| 394 | repo = _init_repo(tmp_path) |
| 395 | oid = _obj(repo, b"content") |
| 396 | sid = _snap(repo, {"f.txt": oid}) |
| 397 | cid = _commit(repo, sid) |
| 398 | result = runner.invoke(cli, ["read", cid], env=_env(repo)) |
| 399 | assert result.exit_code == 0, f"sha256:-prefixed full commit ID must be accepted; got: {result.stderr}" |
| 400 | |
| 401 | def test_branch_name_still_accepted(self, tmp_path: pathlib.Path) -> None: |
| 402 | repo = _init_repo(tmp_path) |
| 403 | oid = _obj(repo, b"content") |
| 404 | sid = _snap(repo, {"f.txt": oid}) |
| 405 | _commit(repo, sid, branch="main") |
| 406 | result = runner.invoke(cli, ["read", "main"], env=_env(repo)) |
| 407 | assert result.exit_code == 0, f"branch name must still resolve via show; got: {result.stderr}" |
| 408 | |
| 409 | def test_head_still_accepted(self, tmp_path: pathlib.Path) -> None: |
| 410 | repo = _init_repo(tmp_path) |
| 411 | oid = _obj(repo, b"content") |
| 412 | sid = _snap(repo, {"f.txt": oid}) |
| 413 | _commit(repo, sid, branch="main") |
| 414 | result = runner.invoke(cli, ["read", "HEAD"], env=_env(repo)) |
| 415 | assert result.exit_code == 0, f"HEAD must still resolve via show; got: {result.stderr}" |
File History
1 commit
sha256:2eaa5d95f9d9383498e76947410a26e5a3ba23d182f339910c424cf88fad412b
fix: try fetch/presign before fetch/mpack to avoid Cloudfla…
Sonnet 4.6
patch
6 days ago