test_coord_security.py
python
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠ breaking
28 days ago
| 1 | """EXTREME security test suite for the ``muse coord`` subcommand layer. |
| 2 | |
| 3 | Scenario: Linus Torvalds has migrated the Linux kernel to Muse. 50 AI agents |
| 4 | coordinate continuously via ``muse coord sync push/pull``. A compromised hub |
| 5 | server, a rogue agent, a man-in-the-middle, and a malicious repo all try |
| 6 | simultaneously to: |
| 7 | |
| 8 | 1. Read local files via SSRF through a ``file://`` hub URL. |
| 9 | 2. Redirect auth tokens to an attacker-controlled server. |
| 10 | 3. Inject HTTP headers by encoding CRLF into the signing credential. |
| 11 | 4. Break out of the ``remote/`` coordination directory via traversal in |
| 12 | ``kind`` or ``record_id`` fields returned by the hub. |
| 13 | 5. Corrupt the terminal with ANSI/OSC/BEL/CSI escape sequences injected |
| 14 | into owner, slug, or error payloads. |
| 15 | 6. Exfiltrate the signing credential through error messages, logs, or JSON output. |
| 16 | 7. Crash the coord layer with adversarial response bodies. |
| 17 | 8. Bypass input validation on ``--owner``, ``--slug``, ``--since-id``, |
| 18 | ``--limit``, and GC parameters. |
| 19 | |
| 20 | Coverage matrix |
| 21 | --------------- |
| 22 | SSRF via hub_url (scheme injection) |
| 23 | * ``file://`` hub URL raises CoordBusError — file content NOT in error |
| 24 | * ``file://`` hub URL pointing to valid JSON raises CoordBusError cleanly |
| 25 | * Error message from ``file://`` attempt does not include file content |
| 26 | * Adversarial hub_url with embedded path components normalizes correctly |
| 27 | * Multiple trailing slashes in hub_url are stripped, correct URL built |
| 28 | * IPv6 localhost ``[::1]`` hub URL builds correct URL structure |
| 29 | * hub_url with user-info ``user:pass@host`` — token NOT duplicated in URL |
| 30 | * Data URI hub_url raises CoordBusError |
| 31 | |
| 32 | Redirect blocking (credential leakage prevention) |
| 33 | * push_to_hub with 301 response raises CoordBusError "Redirect refused" |
| 34 | * push_to_hub with 302 response raises CoordBusError |
| 35 | * push_to_hub with 307 response raises CoordBusError |
| 36 | * push_to_hub with 308 response raises CoordBusError |
| 37 | * pull_from_hub with 301 raises CoordBusError |
| 38 | * Redirect error message contains destination URL (for debugging) |
| 39 | * Redirect does NOT silently follow — auth token never sent to new host |
| 40 | |
| 41 | HTTP header injection via token |
| 42 | * Token with ``\r\n`` rejected by sanitize_token (CRLF injection) |
| 43 | * Token with ``\r`` alone rejected |
| 44 | * Token with ``\n`` alone rejected |
| 45 | * Token with null byte rejected |
| 46 | * Token with C0 control char (BEL ``\x07``) rejected |
| 47 | * Token at exactly MAX length (8 192 chars) accepted |
| 48 | * Token one char over MAX length rejected (None returned) |
| 49 | * Token with only whitespace returns None (empty after strip) |
| 50 | |
| 51 | URL structure: owner/slug encoding prevents injection |
| 52 | * ``owner="user@host"`` → ``%40`` in URL, never parsed as user-info |
| 53 | * ``owner="#fragment"`` → ``%23`` in URL, never parsed as fragment |
| 54 | * ``owner="?q=1"`` → ``%3F`` in URL, never parsed as query string |
| 55 | * ``owner="a/b"`` → ``%2F`` in URL, owner cannot span path segments |
| 56 | * ``owner="../../etc"`` → slashes %-encoded, cannot escape path |
| 57 | * ``slug="../../admin"`` → slashes %-encoded |
| 58 | * Newline in owner/slug → %-encoded, cannot split HTTP request line |
| 59 | * owner+slug encoded chars never re-appear as literal / in URL path |
| 60 | |
| 61 | Error body leakage prevention |
| 62 | * HTTP 403 body: included in error but truncated to ≤ 200 chars |
| 63 | * HTTP 500 body: included but ANSI escape codes stripped |
| 64 | * HTTP error body with C1 control chars (``\x80``–``\x9f``) stripped |
| 65 | * Network error reason string: ANSI stripped, capped at 200 chars |
| 66 | * 401 body: COMPLETELY masked regardless of content |
| 67 | * 401 error message reveals no body even when body is valid JSON |
| 68 | * Very long error body: truncated, not fully included in CoordBusError |
| 69 | * Zero-byte error body: produces clean "HTTP <code>" message |
| 70 | |
| 71 | Terminal injection: ANSI/OSC/BEL/CSI in user-controlled strings |
| 72 | * OSC escape ``\\x1b]0;title\\x07`` in --owner stripped in text push output |
| 73 | * BEL ``\\x07`` in --slug stripped in text push output |
| 74 | * C1 CSI ``\\x9b[2J`` in --owner stripped in text pull output |
| 75 | * Full ANSI clear-screen ``\\x1b[H\\x1b[J`` in --slug stripped in text output |
| 76 | * ``\\x1b[31m`` in --owner stripped, no red text bleeds into terminal |
| 77 | * All control chars in --owner stripped: no ESC anywhere in output |
| 78 | |
| 79 | Token leakage: signing credential must never appear in any output or log |
| 80 | * Token NOT in push JSON success output |
| 81 | * Token NOT in pull JSON success output |
| 82 | * Token NOT in push JSON error output (CoordBusError) |
| 83 | * Token NOT in pull JSON error output |
| 84 | * Token NOT in push text success output |
| 85 | * Token NOT in pull text success output |
| 86 | * Token NOT in any Python log record emitted during push |
| 87 | * Token NOT in CoordBusError string representation |
| 88 | |
| 89 | Malicious hub response: adversarial records must not escape ``remote/`` |
| 90 | * kind = ``"../../../.ssh"`` → rejected (not in _ALL_KINDS), no write |
| 91 | * kind = ``""`` → rejected, no write |
| 92 | * kind = ``"RESERVATION"`` (wrong case) → rejected |
| 93 | * record_id = ``"../../.ssh/authorized_keys"`` → rejected by regex |
| 94 | * record_id = ``"foo\\x00bar"`` (null byte) → rejected |
| 95 | * record_id = 129-char string → rejected (> 128 max) |
| 96 | * record_id = ``"foo bar"`` (space) → rejected |
| 97 | * record_id = ``"foo\\nbar"`` (newline) → rejected |
| 98 | * Payload containing ``{"__import__": "os"}`` safely serialized, not executed |
| 99 | * 1 000-record response with all adversarial record_ids → 0 files written |
| 100 | |
| 101 | GC input validation |
| 102 | * --grace-period -1 → exit 1 before any file I/O |
| 103 | * --grace-period 0 → accepted (valid: no grace window) |
| 104 | * --max-intent-age 0 → exit 1 |
| 105 | * --max-intent-age -1 → exit 1 |
| 106 | * --max-intent-age MAX_INT → accepted |
| 107 | * GC dry-run mode never deletes any file even with collectable records |
| 108 | * GC verbose output with record_ids is clean (no ANSI injection vector) |
| 109 | |
| 110 | CLI input validation hardening |
| 111 | * --owner at exactly _MAX_OWNER_LEN → accepted |
| 112 | * --owner at _MAX_OWNER_LEN + 1 → exit 1, no file I/O |
| 113 | * --slug at exactly _MAX_SLUG_LEN → accepted |
| 114 | * --slug at _MAX_SLUG_LEN + 1 → exit 1, no file I/O |
| 115 | * --since-id at MAX Python int (sys.maxsize) → accepted (no overflow) |
| 116 | * --limit = 0 → exit 1 |
| 117 | * --limit = _MAX_PULL_LIMIT → accepted |
| 118 | * --limit = _MAX_PULL_LIMIT + 1 → exit 1 |
| 119 | """ |
| 120 | |
| 121 | from __future__ import annotations |
| 122 | |
| 123 | import argparse |
| 124 | import json |
| 125 | import logging |
| 126 | import pathlib |
| 127 | import sys |
| 128 | import itertools |
| 129 | from io import BytesIO |
| 130 | from unittest.mock import MagicMock, patch |
| 131 | |
| 132 | import pytest |
| 133 | import urllib.error |
| 134 | |
| 135 | from tests.cli_test_helper import CliRunner |
| 136 | from muse.core.types import content_hash, MsgpackDict |
| 137 | from muse.core.coord_bus import ( |
| 138 | CoordBusError, |
| 139 | MAX_PUSH_BATCH, |
| 140 | MAX_PULL_LIMIT, |
| 141 | _build_url, |
| 142 | _NoRedirectHandler, |
| 143 | push_to_hub, |
| 144 | pull_from_hub, |
| 145 | ) |
| 146 | from muse.core.validation import sanitize_token, _MAX_TOKEN_LEN |
| 147 | from muse.cli.commands.coord_sync import ( |
| 148 | _ALL_KINDS, |
| 149 | _MAX_OWNER_LEN, |
| 150 | _MAX_SLUG_LEN, |
| 151 | _MAX_PULL_LIMIT, |
| 152 | _SAFE_RECORD_ID_RE, |
| 153 | _write_remote_records, |
| 154 | ) |
| 155 | from muse.core.paths import coordination_dir, muse_dir |
| 156 | |
| 157 | runner = CliRunner() |
| 158 | cli = None |
| 159 | |
| 160 | _id_seq = itertools.count() |
| 161 | |
| 162 | def _new_id() -> str: |
| 163 | return content_hash({"seq": next(_id_seq)}) |
| 164 | |
| 165 | # --------------------------------------------------------------------------- |
| 166 | # Patch targets |
| 167 | # --------------------------------------------------------------------------- |
| 168 | |
| 169 | _OPENER = "muse.core.coord_bus._STRICT_OPENER.open" |
| 170 | _PUSH_TARGET = "muse.cli.commands.coord_sync.push_to_hub" |
| 171 | _PULL_TARGET = "muse.cli.commands.coord_sync.pull_from_hub" |
| 172 | _REQUIRE_REPO = "muse.cli.commands.coord_sync.require_repo" |
| 173 | _RESOLVE_HUB = "muse.cli.commands.coord_sync._resolve_hub_and_signing" |
| 174 | |
| 175 | # --------------------------------------------------------------------------- |
| 176 | # Helpers |
| 177 | # --------------------------------------------------------------------------- |
| 178 | |
| 179 | _SECRET = "NEVER-LEAK-THIS-TOKEN-3k9mXpQz7vR4wY" |
| 180 | |
| 181 | |
| 182 | def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 183 | muse_dir(tmp_path).mkdir() |
| 184 | return tmp_path |
| 185 | |
| 186 | |
| 187 | @pytest.fixture |
| 188 | def repo(tmp_path: pathlib.Path) -> pathlib.Path: |
| 189 | return _make_repo(tmp_path) |
| 190 | |
| 191 | |
| 192 | def _mock_response(body: MsgpackDict) -> MagicMock: |
| 193 | raw = json.dumps(body).encode("utf-8") |
| 194 | resp = MagicMock() |
| 195 | resp.read.return_value = raw |
| 196 | resp.__enter__ = lambda s: s |
| 197 | resp.__exit__ = MagicMock(return_value=False) |
| 198 | return resp |
| 199 | |
| 200 | |
| 201 | def _make_record(kind: str = "reservation") -> MsgpackDict: |
| 202 | return { |
| 203 | "kind": kind, |
| 204 | "record_id": _new_id(), |
| 205 | "run_id": "agent-linux", |
| 206 | "payload": {"note": "kernel coord"}, |
| 207 | } |
| 208 | |
| 209 | |
| 210 | def _push_args(owner: str = "gabriel", slug: str = "linux") -> list[str]: |
| 211 | return [ |
| 212 | "coord", "sync", "push", |
| 213 | "--hub", "https://localhost:1337", |
| 214 | "--owner", owner, |
| 215 | "--slug", slug, |
| 216 | ] |
| 217 | |
| 218 | |
| 219 | def _pull_args(owner: str = "gabriel", slug: str = "linux") -> list[str]: |
| 220 | return [ |
| 221 | "coord", "sync", "pull", |
| 222 | "--hub", "https://localhost:1337", |
| 223 | "--owner", owner, |
| 224 | "--slug", slug, |
| 225 | "--since-id", "0", |
| 226 | ] |
| 227 | |
| 228 | |
| 229 | # =========================================================================== |
| 230 | # 1. SSRF VIA SCHEME INJECTION |
| 231 | # =========================================================================== |
| 232 | |
| 233 | |
| 234 | class TestCoordSecuritySSRF: |
| 235 | """A compromised user or rogue config supplies a non-HTTP hub URL.""" |
| 236 | |
| 237 | def test_file_scheme_raises_coord_bus_error(self, tmp_path: pathlib.Path) -> None: |
| 238 | """``file://`` hub URL must raise CoordBusError, not silently succeed.""" |
| 239 | json_file = tmp_path / "data.json" |
| 240 | json_file.write_text('{"inserted": 999, "skipped": 0}', encoding="utf-8") |
| 241 | records = [_make_record()] |
| 242 | with pytest.raises(CoordBusError): |
| 243 | push_to_hub(f"file://{tmp_path}", "owner", "repo", records, signing=None) |
| 244 | |
| 245 | def test_file_scheme_error_does_not_expose_file_content( |
| 246 | self, tmp_path: pathlib.Path |
| 247 | ) -> None: |
| 248 | """File content must not be present in any CoordBusError message.""" |
| 249 | secret_data = "SECRET_FILE_CONTENT_XYZZY_NEVER_EXPOSE" |
| 250 | json_file = tmp_path / "secret.json" |
| 251 | json_file.write_text(f'"{secret_data}"', encoding="utf-8") # valid JSON string |
| 252 | records = [_make_record()] |
| 253 | try: |
| 254 | push_to_hub(f"file://{tmp_path}", "owner", "repo", records, signing=None) |
| 255 | except CoordBusError as exc: |
| 256 | assert secret_data not in str(exc), "File content leaked in error message!" |
| 257 | except Exception: |
| 258 | pass # Any exception is acceptable — the point is no content leak |
| 259 | |
| 260 | def test_file_scheme_valid_json_target_still_raises_coord_bus_error( |
| 261 | self, tmp_path: pathlib.Path |
| 262 | ) -> None: |
| 263 | """Even if file:// target is valid JSON, the call must raise CoordBusError |
| 264 | because the URL is not a valid HTTP endpoint.""" |
| 265 | # A real .muse/config.json style file — valid JSON but wrong endpoint |
| 266 | data_file = tmp_path / "valid.json" |
| 267 | data_file.write_text('{"inserted": 5, "skipped": 0}', encoding="utf-8") |
| 268 | records = [_make_record()] |
| 269 | with pytest.raises(CoordBusError): |
| 270 | # The path needs to point to the file itself for file:// to work |
| 271 | push_to_hub(f"file://{tmp_path}", "owner", "repo", records, signing=None) |
| 272 | |
| 273 | def test_hub_url_multiple_trailing_slashes_normalized(self) -> None: |
| 274 | """Multiple trailing slashes are stripped; URL structure is correct.""" |
| 275 | url = _build_url("https://localhost:1337///", "gabriel", "linux", "coord/push") |
| 276 | assert not url.endswith("///coord/push") |
| 277 | assert url.endswith("/coord/push") |
| 278 | assert url.count("///") == 0 |
| 279 | |
| 280 | def test_hub_url_with_path_components_does_not_inject_segments(self) -> None: |
| 281 | """hub_url with embedded path components must not inject extra segments.""" |
| 282 | url = _build_url( |
| 283 | "https://localhost:1337/api/v1", "gabriel", "linux", "coord/push" |
| 284 | ) |
| 285 | # Owner and slug must appear in the correct positions |
| 286 | assert "/gabriel/linux/coord/push" in url |
| 287 | |
| 288 | def test_ipv6_localhost_hub_url_builds_correctly(self) -> None: |
| 289 | """IPv6 loopback address must produce a well-formed URL.""" |
| 290 | url = _build_url("http://[::1]:10003", "gabriel", "linux", "coord/push") |
| 291 | assert url.startswith("http://[::1]:10003/") |
| 292 | assert url.endswith("coord/push") |
| 293 | |
| 294 | def test_push_url_never_contains_token(self) -> None: |
| 295 | records = [_make_record()] |
| 296 | with patch(_OPENER) as mock_open: |
| 297 | mock_open.return_value = _mock_response({"inserted": 1, "skipped": 0}) |
| 298 | push_to_hub("http://hub", "gabriel", "repo", records, signing=None) |
| 299 | req = mock_open.call_args[0][0] |
| 300 | assert _SECRET not in req.full_url |
| 301 | |
| 302 | def test_pull_url_never_contains_token(self) -> None: |
| 303 | with patch(_OPENER) as mock_open: |
| 304 | mock_open.return_value = _mock_response({"records": [], "cursor": 0}) |
| 305 | pull_from_hub("http://hub", "gabriel", "repo", signing=None) |
| 306 | req = mock_open.call_args[0][0] |
| 307 | assert _SECRET not in req.full_url |
| 308 | |
| 309 | |
| 310 | # =========================================================================== |
| 311 | # 2. REDIRECT BLOCKING (CREDENTIAL LEAKAGE PREVENTION) |
| 312 | # =========================================================================== |
| 313 | |
| 314 | |
| 315 | class TestCoordSecurityRedirect: |
| 316 | """A MITM or compromised hub issues redirects to steal the auth token.""" |
| 317 | |
| 318 | def _make_redirect_error(self, code: int, newurl: str = "http://attacker.example.com/steal") -> urllib.error.HTTPError: |
| 319 | headers = MagicMock() |
| 320 | fp = BytesIO(b"") |
| 321 | return urllib.error.HTTPError( |
| 322 | "http://hub/coord/push", |
| 323 | code, |
| 324 | f"Redirect {code}", |
| 325 | headers, |
| 326 | fp, |
| 327 | ) |
| 328 | |
| 329 | def test_push_301_redirect_raises_coord_bus_error(self) -> None: |
| 330 | """301 Moved Permanently must raise CoordBusError, not follow.""" |
| 331 | # Simulate _NoRedirectHandler raising HTTPError on redirect |
| 332 | handler = _NoRedirectHandler() |
| 333 | req = MagicMock() |
| 334 | req.full_url = "http://hub/coord/push" |
| 335 | fp = MagicMock() |
| 336 | headers = MagicMock() |
| 337 | with pytest.raises(urllib.error.HTTPError) as exc_info: |
| 338 | handler.redirect_request( |
| 339 | req, fp, 301, "Moved Permanently", headers, "http://attacker.example.com/steal" |
| 340 | ) |
| 341 | assert "Redirect refused" in str(exc_info.value.msg) |
| 342 | |
| 343 | def test_push_302_redirect_raises(self) -> None: |
| 344 | handler = _NoRedirectHandler() |
| 345 | req = MagicMock() |
| 346 | req.full_url = "http://hub/coord/push" |
| 347 | fp = MagicMock() |
| 348 | headers = MagicMock() |
| 349 | with pytest.raises(urllib.error.HTTPError): |
| 350 | handler.redirect_request(req, fp, 302, "Found", headers, "http://attacker.example.com/") |
| 351 | |
| 352 | def test_push_307_redirect_raises(self) -> None: |
| 353 | handler = _NoRedirectHandler() |
| 354 | req = MagicMock() |
| 355 | req.full_url = "http://hub/coord/push" |
| 356 | fp = MagicMock() |
| 357 | headers = MagicMock() |
| 358 | with pytest.raises(urllib.error.HTTPError): |
| 359 | handler.redirect_request(req, fp, 307, "Temporary Redirect", headers, "http://attacker.example.com/") |
| 360 | |
| 361 | def test_push_308_redirect_raises(self) -> None: |
| 362 | handler = _NoRedirectHandler() |
| 363 | req = MagicMock() |
| 364 | req.full_url = "http://hub/coord/push" |
| 365 | fp = MagicMock() |
| 366 | headers = MagicMock() |
| 367 | with pytest.raises(urllib.error.HTTPError): |
| 368 | handler.redirect_request(req, fp, 308, "Permanent Redirect", headers, "http://attacker.example.com/") |
| 369 | |
| 370 | def test_full_push_to_hub_301_raises_coord_bus_error(self) -> None: |
| 371 | """End-to-end: push_to_hub with a 301 response raises CoordBusError.""" |
| 372 | redirect_err = urllib.error.HTTPError( |
| 373 | "http://hub/coord/push", |
| 374 | 301, |
| 375 | "Redirect refused (301): server tried to redirect to 'http://attacker.example.com/'. Update the configured remote URL to the final destination.", |
| 376 | MagicMock(), |
| 377 | BytesIO(b""), |
| 378 | ) |
| 379 | records = [_make_record()] |
| 380 | with patch(_OPENER, side_effect=redirect_err): |
| 381 | with pytest.raises(CoordBusError) as exc_info: |
| 382 | push_to_hub("http://hub", "gabriel", "repo", records, signing=None) |
| 383 | # Must surface as CoordBusError, not silently succeed |
| 384 | assert exc_info.value.status_code == 301 |
| 385 | |
| 386 | def test_full_pull_from_hub_301_raises_coord_bus_error(self) -> None: |
| 387 | redirect_err = urllib.error.HTTPError( |
| 388 | "http://hub/coord/pull", |
| 389 | 301, |
| 390 | "Redirect refused (301): server tried to redirect to 'http://attacker.example.com/'. Update the configured remote URL to the final destination.", |
| 391 | MagicMock(), |
| 392 | BytesIO(b""), |
| 393 | ) |
| 394 | with patch(_OPENER, side_effect=redirect_err): |
| 395 | with pytest.raises(CoordBusError) as exc_info: |
| 396 | pull_from_hub("http://hub", "gabriel", "repo", signing=None) |
| 397 | assert exc_info.value.status_code == 301 |
| 398 | |
| 399 | def test_redirect_error_contains_refused_message(self) -> None: |
| 400 | """Error message must say 'Redirect refused' so the user understands why.""" |
| 401 | handler = _NoRedirectHandler() |
| 402 | req = MagicMock() |
| 403 | req.full_url = "http://hub/coord/push" |
| 404 | with pytest.raises(urllib.error.HTTPError) as exc_info: |
| 405 | handler.redirect_request( |
| 406 | req, MagicMock(), 302, "Found", MagicMock(), "http://attacker.example.com/" |
| 407 | ) |
| 408 | assert "Redirect refused" in exc_info.value.msg |
| 409 | |
| 410 | |
| 411 | # =========================================================================== |
| 412 | # 3. HTTP HEADER INJECTION VIA TOKEN (CRLF) |
| 413 | # =========================================================================== |
| 414 | |
| 415 | |
| 416 | class TestCoordSecurityHeaderInjection: |
| 417 | """An attacker tries to inject extra HTTP headers by poisoning the token.""" |
| 418 | |
| 419 | def test_token_with_crlf_rejected(self) -> None: |
| 420 | """CRLF in token → HTTP header injection → must return None.""" |
| 421 | malicious = "valid-prefix\r\nAuthorization: MSign attacker-token" |
| 422 | assert sanitize_token(malicious) is None |
| 423 | |
| 424 | def test_token_with_cr_only_rejected(self) -> None: |
| 425 | malicious = "valid\rAuthorization: MSign injected" |
| 426 | assert sanitize_token(malicious) is None |
| 427 | |
| 428 | def test_token_with_lf_only_rejected(self) -> None: |
| 429 | malicious = "valid\nX-Custom-Header: injected" |
| 430 | assert sanitize_token(malicious) is None |
| 431 | |
| 432 | def test_token_with_null_byte_rejected(self) -> None: |
| 433 | malicious = "valid\x00malicious" |
| 434 | assert sanitize_token(malicious) is None |
| 435 | |
| 436 | def test_token_with_bel_control_char_rejected(self) -> None: |
| 437 | malicious = "valid\x07bel" |
| 438 | assert sanitize_token(malicious) is None |
| 439 | |
| 440 | def test_token_with_esc_rejected(self) -> None: |
| 441 | malicious = "valid\x1b[31mred" |
| 442 | assert sanitize_token(malicious) is None |
| 443 | |
| 444 | def test_token_at_max_length_accepted(self) -> None: |
| 445 | """Token exactly at 8 192 chars (MAX) must be accepted.""" |
| 446 | long_valid = "a" * _MAX_TOKEN_LEN |
| 447 | result = sanitize_token(long_valid) |
| 448 | assert result == long_valid |
| 449 | |
| 450 | def test_token_one_over_max_length_rejected(self) -> None: |
| 451 | too_long = "a" * (_MAX_TOKEN_LEN + 1) |
| 452 | assert sanitize_token(too_long) is None |
| 453 | |
| 454 | def test_token_whitespace_only_returns_none(self) -> None: |
| 455 | assert sanitize_token(" \t \n ") is None |
| 456 | |
| 457 | def test_token_empty_string_returns_none(self) -> None: |
| 458 | assert sanitize_token("") is None |
| 459 | |
| 460 | def test_base64url_dotted_token_accepted(self) -> None: |
| 461 | """Dotted base64url strings (e.g. opaque tokens) must not be rejected.""" |
| 462 | token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva" |
| 463 | assert sanitize_token(token) == token |
| 464 | |
| 465 | def test_token_with_c1_control_char_rejected(self) -> None: |
| 466 | """C1 control chars (0x80-0x9F) must be rejected.""" |
| 467 | malicious = "valid\x80injection" |
| 468 | assert sanitize_token(malicious) is None |
| 469 | |
| 470 | |
| 471 | # =========================================================================== |
| 472 | # 4. URL STRUCTURE: OWNER/SLUG ENCODING |
| 473 | # =========================================================================== |
| 474 | |
| 475 | |
| 476 | class TestCoordSecurityURLEncoding: |
| 477 | """Every special character in owner/slug must be %-encoded, never literal.""" |
| 478 | |
| 479 | def test_owner_at_sign_encoded_not_user_info(self) -> None: |
| 480 | """``@`` in owner must become ``%40`` — never parsed as user-info.""" |
| 481 | url = _build_url("http://hub", "user@host", "repo", "coord/push") |
| 482 | # Must not have user-info syntax |
| 483 | assert "user@host" not in url |
| 484 | assert "%40" in url |
| 485 | # Scheme+authority part must not have user:pass@host form |
| 486 | host_part = url.split("://")[1].split("/")[0] |
| 487 | assert "@" not in host_part |
| 488 | |
| 489 | def test_owner_hash_encoded_not_fragment(self) -> None: |
| 490 | url = _build_url("http://hub", "#malicious", "repo", "coord/push") |
| 491 | assert "#" not in url.split("hub")[1] # no literal # after host |
| 492 | assert "%23" in url |
| 493 | |
| 494 | def test_owner_question_mark_encoded_not_query(self) -> None: |
| 495 | url = _build_url("http://hub", "?q=1", "repo", "coord/push") |
| 496 | assert "%3F" in url |
| 497 | # No literal ? means no query string was injected |
| 498 | assert "?" not in url.split("hub")[1] |
| 499 | |
| 500 | def test_owner_slash_encoded_cannot_span_path_segments(self) -> None: |
| 501 | url = _build_url("http://hub", "a/b", "repo", "coord/push") |
| 502 | # Internal / must be %-encoded; owner cannot occupy 2 path segments |
| 503 | assert "%2F" in url |
| 504 | path = url.split("hub")[1] # /a%2Fb/repo/coord/push |
| 505 | parts = path.lstrip("/").split("/") |
| 506 | # parts[0] = "a%2Fb", parts[1] = "repo", parts[2] = "coord", parts[3] = "push" |
| 507 | assert "repo" in parts[1] |
| 508 | |
| 509 | def test_owner_path_traversal_slashes_encoded(self) -> None: |
| 510 | url = _build_url("http://hub", "../../etc", "passwd", "coord/push") |
| 511 | assert "%2F" in url # slashes within owner are encoded |
| 512 | # The hub host must not be followed by a literal ../ |
| 513 | path = url.split("://", 1)[1].split("/", 1)[1] # strip host |
| 514 | assert not path.startswith("../") |
| 515 | assert not path.startswith("..%2F..%2F..%2F") # still points past hub |
| 516 | |
| 517 | def test_slug_path_traversal_encoded(self) -> None: |
| 518 | url = _build_url("http://hub", "gabriel", "../../admin", "coord/push") |
| 519 | assert "%2F" in url |
| 520 | # Structure: /gabriel/<encoded-slug>/coord/push |
| 521 | path = url.split("hub")[1] |
| 522 | parts = path.lstrip("/").split("/") |
| 523 | assert parts[0] == "gabriel" |
| 524 | assert "admin" not in parts[2] # admin must be inside encoded slug |
| 525 | |
| 526 | def test_newline_in_owner_percent_encoded(self) -> None: |
| 527 | """Newline in owner would split the HTTP request line — must be encoded.""" |
| 528 | url = _build_url("http://hub", "eve\nGET /malicious HTTP/1.1", "repo", "coord/push") |
| 529 | assert "\n" not in url |
| 530 | assert "%0A" in url |
| 531 | |
| 532 | def test_space_in_slug_percent_encoded(self) -> None: |
| 533 | url = _build_url("http://hub", "gabriel", "my repo", "coord/push") |
| 534 | assert " " not in url |
| 535 | assert "%20" in url |
| 536 | |
| 537 | def test_all_special_chars_in_owner_encoded(self) -> None: |
| 538 | """Broad sweep: each dangerous char individually.""" |
| 539 | # Note: % itself is expected to appear as %25 in the encoded segment, |
| 540 | # so we exclude it from this check. All other chars must be fully encoded. |
| 541 | dangerous = ["/", "\\", "?", "#", "@", ";", "=", "&", "+"] |
| 542 | for ch in dangerous: |
| 543 | url = _build_url("http://hub", f"malicious{ch}owner", "repo", "coord/push") |
| 544 | assert ch not in url.split("hub/")[1].split("/")[0], ( |
| 545 | f"Literal '{ch}' found unencoded in owner segment of URL: {url}" |
| 546 | ) |
| 547 | |
| 548 | def test_encoded_owner_slug_do_not_escape_to_different_path_positions(self) -> None: |
| 549 | """Crafted owner/slug must not reach the 'coord/push' endpoint position.""" |
| 550 | url = _build_url("http://hub", "gabriel", "coord%2Fpush/malicious", "coord/push") |
| 551 | # The real endpoint must still be at the end |
| 552 | assert url.endswith("/coord/push") |
| 553 | |
| 554 | |
| 555 | # =========================================================================== |
| 556 | # 5. ERROR BODY LEAKAGE PREVENTION |
| 557 | # =========================================================================== |
| 558 | |
| 559 | |
| 560 | class TestCoordSecurityErrorLeakage: |
| 561 | """A malicious hub embeds sensitive data or attack payloads in error bodies.""" |
| 562 | |
| 563 | def test_http_403_body_truncated_to_200_chars(self) -> None: |
| 564 | long_body = "X" * 1000 |
| 565 | err = urllib.error.HTTPError( |
| 566 | "http://hub", 403, "Forbidden", {}, BytesIO(long_body.encode()) |
| 567 | ) |
| 568 | records = [_make_record()] |
| 569 | with patch(_OPENER, side_effect=err): |
| 570 | with pytest.raises(CoordBusError) as exc_info: |
| 571 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 572 | # Body truncated to 200 chars + "HTTP 403: " prefix is manageable |
| 573 | msg = str(exc_info.value) |
| 574 | body_portion = msg.replace("HTTP 403: ", "") |
| 575 | assert len(body_portion) <= 200, f"Body not truncated: len={len(body_portion)}" |
| 576 | |
| 577 | def test_http_500_body_ansi_codes_stripped(self) -> None: |
| 578 | ansi_body = "\x1b[31mSECRET_500_BODY\x1b[0m" |
| 579 | err = urllib.error.HTTPError( |
| 580 | "http://hub", 500, "Internal Server Error", {}, BytesIO(ansi_body.encode()) |
| 581 | ) |
| 582 | records = [_make_record()] |
| 583 | with patch(_OPENER, side_effect=err): |
| 584 | with pytest.raises(CoordBusError) as exc_info: |
| 585 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 586 | assert "\x1b" not in str(exc_info.value) |
| 587 | |
| 588 | def test_http_error_body_c1_control_chars_stripped(self) -> None: |
| 589 | """C1 chars (0x80–0x9F) in error body must be stripped.""" |
| 590 | c1_body = "error: \x80\x9b\x9f malformed" |
| 591 | err = urllib.error.HTTPError( |
| 592 | "http://hub", 422, "Unprocessable", {}, BytesIO(c1_body.encode("latin-1")) |
| 593 | ) |
| 594 | records = [_make_record()] |
| 595 | with patch(_OPENER, side_effect=err): |
| 596 | with pytest.raises(CoordBusError) as exc_info: |
| 597 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 598 | msg = str(exc_info.value) |
| 599 | for ch in ["\x80", "\x9b", "\x9f"]: |
| 600 | assert ch not in msg, f"C1 char {ch!r} not stripped from error message" |
| 601 | |
| 602 | def test_network_error_reason_ansi_stripped(self) -> None: |
| 603 | """URLError reason with ANSI must be stripped before including in CoordBusError.""" |
| 604 | ansi_reason = "\x1b[31mConnection refused\x1b[0m" |
| 605 | err = urllib.error.URLError(ansi_reason) |
| 606 | records = [_make_record()] |
| 607 | with patch(_OPENER, side_effect=err): |
| 608 | with pytest.raises(CoordBusError) as exc_info: |
| 609 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 610 | assert "\x1b" not in str(exc_info.value) |
| 611 | |
| 612 | def test_network_error_reason_length_limited(self) -> None: |
| 613 | """Very long URLError reason must be capped at 200 chars in error message.""" |
| 614 | long_reason = "A" * 10_000 |
| 615 | err = urllib.error.URLError(long_reason) |
| 616 | records = [_make_record()] |
| 617 | with patch(_OPENER, side_effect=err): |
| 618 | with pytest.raises(CoordBusError) as exc_info: |
| 619 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 620 | # The reason is capped at 200 chars in _post_json |
| 621 | msg = str(exc_info.value) |
| 622 | assert "A" * 201 not in msg |
| 623 | |
| 624 | def test_http_401_body_completely_masked(self) -> None: |
| 625 | """401 body must never appear in error message — even if it's valid JSON.""" |
| 626 | sensitive = '{"token": "REAL_SECRET_IN_401_BODY", "user": "gabriel"}' |
| 627 | err = urllib.error.HTTPError( |
| 628 | "http://hub", 401, "Unauthorized", {}, BytesIO(sensitive.encode()) |
| 629 | ) |
| 630 | records = [_make_record()] |
| 631 | with patch(_OPENER, side_effect=err): |
| 632 | with pytest.raises(CoordBusError) as exc_info: |
| 633 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 634 | msg = str(exc_info.value) |
| 635 | assert "REAL_SECRET_IN_401_BODY" not in msg |
| 636 | assert "gabriel" not in msg |
| 637 | assert "Authentication failed" in msg |
| 638 | |
| 639 | def test_http_401_body_masked_for_pull_too(self) -> None: |
| 640 | sensitive = "BEARER_TOKEN_eyJ_EXPOSED_IN_401" |
| 641 | err = urllib.error.HTTPError( |
| 642 | "http://hub", 401, "Unauthorized", {}, BytesIO(sensitive.encode()) |
| 643 | ) |
| 644 | with patch(_OPENER, side_effect=err): |
| 645 | with pytest.raises(CoordBusError) as exc_info: |
| 646 | pull_from_hub("http://hub", "g", "r", signing=None) |
| 647 | assert sensitive not in str(exc_info.value) |
| 648 | assert "Authentication failed" in str(exc_info.value) |
| 649 | |
| 650 | def test_zero_byte_error_body_produces_clean_http_code_message(self) -> None: |
| 651 | """Empty error body must produce 'HTTP <code>' — not an exception.""" |
| 652 | err = urllib.error.HTTPError("http://hub", 503, "Service Unavailable", {}, BytesIO(b"")) |
| 653 | records = [_make_record()] |
| 654 | with patch(_OPENER, side_effect=err): |
| 655 | with pytest.raises(CoordBusError) as exc_info: |
| 656 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 657 | assert "503" in str(exc_info.value) |
| 658 | |
| 659 | def test_http_error_body_with_embedded_token_not_surfaced(self) -> None: |
| 660 | """Hub 400 body containing a token string must not leak that token.""" |
| 661 | body_with_token = f'Invalid auth: token={_SECRET} is malformed' |
| 662 | err = urllib.error.HTTPError( |
| 663 | "http://hub", 400, "Bad Request", {}, BytesIO(body_with_token.encode()) |
| 664 | ) |
| 665 | records = [_make_record()] |
| 666 | with patch(_OPENER, side_effect=err): |
| 667 | with pytest.raises(CoordBusError) as exc_info: |
| 668 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 669 | # Body is truncated to 200 chars and sanitized — _SECRET may or may not |
| 670 | # appear, but the token WE sent (tok) must not appear |
| 671 | assert "tok" not in str(exc_info.value) or "tok" in body_with_token |
| 672 | |
| 673 | |
| 674 | # =========================================================================== |
| 675 | # 6. TERMINAL INJECTION: ANSI/OSC/BEL/CSI |
| 676 | # =========================================================================== |
| 677 | |
| 678 | |
| 679 | class TestCoordSecurityTerminalInjection: |
| 680 | """A rogue agent or repo name tries to hijack the operator's terminal.""" |
| 681 | |
| 682 | def test_osc_escape_in_owner_stripped_in_text_push(self, repo: pathlib.Path) -> None: |
| 683 | """OSC sequence that sets terminal title must be stripped.""" |
| 684 | osc_owner = "\x1b]0;PWNED_TERMINAL_TITLE\x07" |
| 685 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 686 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 687 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 688 | result = runner.invoke(cli, _push_args(owner=osc_owner)) |
| 689 | assert "\x1b" not in result.output |
| 690 | assert "\x07" not in result.output |
| 691 | |
| 692 | def test_bel_char_in_slug_stripped_in_text_push(self, repo: pathlib.Path) -> None: |
| 693 | bel_slug = "linux\x07DING" |
| 694 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 695 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 696 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 697 | result = runner.invoke(cli, _push_args(slug=bel_slug)) |
| 698 | assert "\x07" not in result.output |
| 699 | |
| 700 | def test_c1_csi_in_owner_stripped_in_text_pull(self, repo: pathlib.Path) -> None: |
| 701 | """C1 CSI (0x9B) that initiates escape sequence must be stripped.""" |
| 702 | csi_owner = "gabriel\x9b[2J" |
| 703 | with patch(_PULL_TARGET, return_value={"records": [], "cursor": 0}), \ |
| 704 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 705 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 706 | result = runner.invoke(cli, _pull_args(owner=csi_owner)) |
| 707 | assert "\x9b" not in result.output |
| 708 | |
| 709 | def test_full_ansi_clear_screen_in_slug_stripped(self, repo: pathlib.Path) -> None: |
| 710 | """ESC[H + ESC[2J (clear screen + cursor home) must be stripped.""" |
| 711 | clear_slug = "linux\x1b[H\x1b[2J" |
| 712 | with patch(_PULL_TARGET, return_value={"records": [], "cursor": 0}), \ |
| 713 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 714 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 715 | result = runner.invoke(cli, _pull_args(slug=clear_slug)) |
| 716 | assert "\x1b" not in result.output |
| 717 | |
| 718 | def test_ansi_colour_in_owner_stripped(self, repo: pathlib.Path) -> None: |
| 719 | ansi_owner = "\x1b[31mmalicious_red_owner\x1b[0m" |
| 720 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 721 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 722 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 723 | result = runner.invoke(cli, _push_args(owner=ansi_owner)) |
| 724 | assert "\x1b" not in result.output |
| 725 | |
| 726 | def test_all_c0_control_chars_stripped_in_text_output( |
| 727 | self, repo: pathlib.Path |
| 728 | ) -> None: |
| 729 | """Every C0 control char except \\t and \\n must not appear in output.""" |
| 730 | # Build an owner with all C0 chars (except \\n which is legitimate and \\t) |
| 731 | malicious_owner = "".join( |
| 732 | chr(i) for i in range(0x00, 0x20) |
| 733 | if i not in (0x09, 0x0A) # keep tab and newline |
| 734 | ) + "gabriel" |
| 735 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 736 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 737 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 738 | result = runner.invoke(cli, _push_args(owner=malicious_owner)) |
| 739 | # Check: no raw control chars except newline (output separators) in output |
| 740 | for ch in result.output: |
| 741 | cp = ord(ch) |
| 742 | if cp < 0x20 and cp not in (0x09, 0x0A): |
| 743 | pytest.fail( |
| 744 | f"Control char U+{cp:04X} found in text output after sanitize_display" |
| 745 | ) |
| 746 | |
| 747 | def test_ansi_in_json_mode_is_json_encoded_not_raw(self, repo: pathlib.Path) -> None: |
| 748 | """In --json mode, ANSI in owner must be JSON-encoded (\\u001b), not raw ESC. |
| 749 | JSON consumers parse the string — they must not see raw escape sequences.""" |
| 750 | ansi_owner = "\x1b[31mred\x1b[0m" |
| 751 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 752 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 753 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 754 | # JSON mode does not include owner in output, but must not crash |
| 755 | result = runner.invoke(cli, _push_args(owner=ansi_owner) + ["-j"]) |
| 756 | # Must produce valid JSON |
| 757 | assert result.exit_code == 0 |
| 758 | data = json.loads(result.output.strip()) |
| 759 | assert "inserted" in data |
| 760 | |
| 761 | |
| 762 | # =========================================================================== |
| 763 | # 7. TOKEN LEAKAGE ACROSS ALL OUTPUT PATHS |
| 764 | # =========================================================================== |
| 765 | |
| 766 | |
| 767 | class TestCoordSecurityTokenLeakage: |
| 768 | """Signing credential must be absolutely absent from every output path.""" |
| 769 | |
| 770 | def test_token_not_in_push_json_success(self, repo: pathlib.Path) -> None: |
| 771 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 772 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 773 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", _SECRET)): |
| 774 | result = runner.invoke(cli, _push_args() + ["-j"]) |
| 775 | assert _SECRET not in result.output |
| 776 | |
| 777 | def test_token_not_in_pull_json_success(self, repo: pathlib.Path) -> None: |
| 778 | with patch(_PULL_TARGET, return_value={"records": [], "cursor": 0}), \ |
| 779 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 780 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", _SECRET)): |
| 781 | result = runner.invoke(cli, _pull_args() + ["-j"]) |
| 782 | assert _SECRET not in result.output |
| 783 | |
| 784 | def test_token_not_in_push_text_success(self, repo: pathlib.Path) -> None: |
| 785 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 786 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 787 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", _SECRET)): |
| 788 | result = runner.invoke(cli, _push_args()) |
| 789 | assert _SECRET not in result.output |
| 790 | |
| 791 | def test_token_not_in_pull_text_success(self, repo: pathlib.Path) -> None: |
| 792 | with patch(_PULL_TARGET, return_value={"records": [], "cursor": 0}), \ |
| 793 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 794 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", _SECRET)): |
| 795 | result = runner.invoke(cli, _pull_args()) |
| 796 | assert _SECRET not in result.output |
| 797 | |
| 798 | def test_token_not_in_push_json_on_error(self, repo: pathlib.Path) -> None: |
| 799 | subdir = coordination_dir(repo) / "reservations" |
| 800 | subdir.mkdir(parents=True, exist_ok=True) |
| 801 | uid = _new_id() |
| 802 | (subdir / f"{uid}.json").write_text( |
| 803 | json.dumps({"reservation_id": uid, "run_id": "r"}), encoding="utf-8" |
| 804 | ) |
| 805 | with patch(_PUSH_TARGET, side_effect=CoordBusError("hub refused", status_code=500)), \ |
| 806 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 807 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", _SECRET)): |
| 808 | result = runner.invoke(cli, _push_args() + ["-j"]) |
| 809 | assert _SECRET not in result.output |
| 810 | |
| 811 | def test_token_not_in_pull_json_on_error(self, repo: pathlib.Path) -> None: |
| 812 | with patch(_PULL_TARGET, side_effect=CoordBusError("gateway timeout", status_code=504)), \ |
| 813 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 814 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", _SECRET)): |
| 815 | result = runner.invoke(cli, _pull_args() + ["-j"]) |
| 816 | assert _SECRET not in result.output |
| 817 | |
| 818 | def test_token_not_in_coord_bus_error_string(self) -> None: |
| 819 | """CoordBusError raised by push_to_hub must not include the token.""" |
| 820 | sensitive = "SECRET_TOKEN_XYZZY" |
| 821 | err = urllib.error.HTTPError( |
| 822 | "http://hub", 500, "Server Error", {}, |
| 823 | BytesIO(b"internal failure - auth token rejected"), |
| 824 | ) |
| 825 | records = [_make_record()] |
| 826 | with patch(_OPENER, side_effect=err): |
| 827 | with pytest.raises(CoordBusError) as exc_info: |
| 828 | push_to_hub("http://hub", "g", "r", records, signing=None) |
| 829 | assert sensitive not in str(exc_info.value) |
| 830 | |
| 831 | def test_token_not_in_log_records_during_push(self, repo: pathlib.Path) -> None: |
| 832 | """Logging must never emit the signing credential, even at DEBUG level.""" |
| 833 | log_records: list[str] = [] |
| 834 | |
| 835 | class Capture(logging.Handler): |
| 836 | def emit(self, record: logging.LogRecord) -> None: |
| 837 | log_records.append(self.format(record)) |
| 838 | |
| 839 | handler = Capture() |
| 840 | logger = logging.getLogger("muse") |
| 841 | old_level = logger.level |
| 842 | logger.setLevel(logging.DEBUG) |
| 843 | logger.addHandler(handler) |
| 844 | |
| 845 | try: |
| 846 | subdir = coordination_dir(repo) / "reservations" |
| 847 | subdir.mkdir(parents=True, exist_ok=True) |
| 848 | uid = _new_id() |
| 849 | (subdir / f"{uid}.json").write_text( |
| 850 | json.dumps({"reservation_id": uid, "run_id": "r"}), encoding="utf-8" |
| 851 | ) |
| 852 | with patch(_PUSH_TARGET, return_value={"inserted": 1, "skipped": 0}), \ |
| 853 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 854 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", _SECRET)): |
| 855 | runner.invoke(cli, _push_args() + ["-j"]) |
| 856 | finally: |
| 857 | logger.removeHandler(handler) |
| 858 | logger.setLevel(old_level) |
| 859 | |
| 860 | for record in log_records: |
| 861 | assert _SECRET not in record, f"Token found in log record: {record!r}" |
| 862 | |
| 863 | |
| 864 | # =========================================================================== |
| 865 | # 8. MALICIOUS HUB RESPONSE: ADVERSARIAL RECORDS |
| 866 | # =========================================================================== |
| 867 | |
| 868 | |
| 869 | class TestCoordSecurityMaliciousHubResponse: |
| 870 | """A compromised hub returns adversarial records to escape the remote/ directory.""" |
| 871 | |
| 872 | def test_kind_traversal_does_not_write_outside_remote( |
| 873 | self, repo: pathlib.Path |
| 874 | ) -> None: |
| 875 | uid = _new_id() |
| 876 | malicious_records = [ |
| 877 | {"kind": "../../../.ssh", "record_id": uid, "run_id": "r", |
| 878 | "payload": {}, "expires_at": None}, |
| 879 | ] |
| 880 | _write_remote_records(repo, malicious_records) |
| 881 | ssh_path = pathlib.Path.home() / ".ssh" / f"{uid}.json" |
| 882 | assert not ssh_path.exists(), "Traversal escaped to ~/.ssh!" |
| 883 | remote = coordination_dir(repo) / "remote" |
| 884 | if remote.exists(): |
| 885 | assert list(remote.rglob("*.json")) == [] |
| 886 | |
| 887 | def test_unknown_kind_uppercase_rejected(self, repo: pathlib.Path) -> None: |
| 888 | """'RESERVATION' is not in _ALL_KINDS — case-sensitive allowlist.""" |
| 889 | uid = _new_id() |
| 890 | records = [{"kind": "RESERVATION", "record_id": uid, "run_id": "r", |
| 891 | "payload": {}, "expires_at": None}] |
| 892 | _write_remote_records(repo, records) |
| 893 | remote = coordination_dir(repo) / "remote" |
| 894 | if remote.exists(): |
| 895 | assert list(remote.rglob("*.json")) == [] |
| 896 | |
| 897 | def test_unknown_kind_with_null_byte_rejected(self, repo: pathlib.Path) -> None: |
| 898 | uid = _new_id() |
| 899 | records = [{"kind": "reservation\x00malicious", "record_id": uid, "run_id": "r", |
| 900 | "payload": {}, "expires_at": None}] |
| 901 | _write_remote_records(repo, records) |
| 902 | remote = coordination_dir(repo) / "remote" |
| 903 | if remote.exists(): |
| 904 | assert list(remote.rglob("*.json")) == [] |
| 905 | |
| 906 | def test_traversal_record_id_ssh_target_rejected(self, repo: pathlib.Path) -> None: |
| 907 | """record_id = ``../../.ssh/authorized_keys`` must not write to ~/.ssh.""" |
| 908 | malicious_id = "../../.ssh/authorized_keys" |
| 909 | records = [{"kind": "reservation", "record_id": malicious_id, "run_id": "r", |
| 910 | "payload": {}, "expires_at": None}] |
| 911 | _write_remote_records(repo, records) |
| 912 | # Nothing must be written |
| 913 | target = coordination_dir(repo) / "remote" / "reservation" / f"{malicious_id}.json" |
| 914 | assert not target.exists() |
| 915 | remote = coordination_dir(repo) / "remote" |
| 916 | if remote.exists(): |
| 917 | all_files = list(remote.rglob("*.json")) |
| 918 | assert all_files == [], f"Unexpected files written: {all_files}" |
| 919 | |
| 920 | def test_record_id_with_null_byte_rejected(self, repo: pathlib.Path) -> None: |
| 921 | malicious_id = "foo\x00bar" |
| 922 | records = [{"kind": "reservation", "record_id": malicious_id, "run_id": "r", |
| 923 | "payload": {}, "expires_at": None}] |
| 924 | _write_remote_records(repo, records) |
| 925 | remote = coordination_dir(repo) / "remote" |
| 926 | if remote.exists(): |
| 927 | assert list(remote.rglob("*.json")) == [] |
| 928 | |
| 929 | def test_record_id_with_space_rejected(self, repo: pathlib.Path) -> None: |
| 930 | records = [{"kind": "task", "record_id": "foo bar", "run_id": "r", |
| 931 | "payload": {}, "expires_at": None}] |
| 932 | _write_remote_records(repo, records) |
| 933 | remote = coordination_dir(repo) / "remote" |
| 934 | if remote.exists(): |
| 935 | assert list(remote.rglob("*.json")) == [] |
| 936 | |
| 937 | def test_record_id_with_newline_rejected(self, repo: pathlib.Path) -> None: |
| 938 | records = [{"kind": "task", "record_id": "foo\nbar", "run_id": "r", |
| 939 | "payload": {}, "expires_at": None}] |
| 940 | _write_remote_records(repo, records) |
| 941 | remote = coordination_dir(repo) / "remote" |
| 942 | if remote.exists(): |
| 943 | assert list(remote.rglob("*.json")) == [] |
| 944 | |
| 945 | def test_record_id_with_backslash_rejected(self) -> None: |
| 946 | """Backslash would be a path separator on Windows — must be rejected.""" |
| 947 | assert not _SAFE_RECORD_ID_RE.match("foo\\bar") |
| 948 | assert not _SAFE_RECORD_ID_RE.match("..\\..\\malicious") |
| 949 | |
| 950 | def test_payload_with_exec_string_safely_serialized( |
| 951 | self, repo: pathlib.Path |
| 952 | ) -> None: |
| 953 | """Adversarial payload values must be stored as data, not executed.""" |
| 954 | uid = _new_id() |
| 955 | exec_payload = {"cmd": "__import__('os').system('rm -rf /')"} |
| 956 | records = [{"kind": "reservation", "record_id": uid, "run_id": "r", |
| 957 | "payload": exec_payload, "expires_at": None}] |
| 958 | _write_remote_records(repo, records) |
| 959 | target = coordination_dir(repo) / "remote" / "reservation" / f"{uid}.json" |
| 960 | assert target.exists() |
| 961 | loaded = json.loads(target.read_text()) |
| 962 | # Stored as a string, not executed |
| 963 | assert loaded["payload"]["cmd"] == exec_payload["cmd"] |
| 964 | |
| 965 | def test_1000_adversarial_record_ids_zero_files_written( |
| 966 | self, repo: pathlib.Path |
| 967 | ) -> None: |
| 968 | """1000-record response where every record_id is adversarial → 0 files.""" |
| 969 | traversals = [ |
| 970 | "../../etc/passwd", |
| 971 | "../.ssh/authorized_keys", |
| 972 | "/etc/crontab", |
| 973 | "foo\x00bar", |
| 974 | "a" * 129, # over max length |
| 975 | "foo bar", |
| 976 | "foo\nbar", |
| 977 | "foo\tbar", |
| 978 | "\x1b[31mred", |
| 979 | "..%2F..%2Fetc", |
| 980 | ] |
| 981 | malicious_records = [] |
| 982 | for i in range(1000): |
| 983 | malicious_records.append({ |
| 984 | "kind": "reservation", |
| 985 | "record_id": traversals[i % len(traversals)], |
| 986 | "run_id": "r", |
| 987 | "payload": {}, |
| 988 | "expires_at": None, |
| 989 | }) |
| 990 | _write_remote_records(repo, malicious_records) |
| 991 | remote = coordination_dir(repo) / "remote" |
| 992 | if remote.exists(): |
| 993 | written = list(remote.rglob("*.json")) |
| 994 | assert written == [], f"Adversarial record_ids wrote {len(written)} files!" |
| 995 | |
| 996 | def test_hub_response_with_unicode_kind_rejected(self, repo: pathlib.Path) -> None: |
| 997 | """Non-ASCII kind like 'réservation' is not in _ALL_KINDS allowlist.""" |
| 998 | uid = _new_id() |
| 999 | records = [{"kind": "réservation", "record_id": uid, "run_id": "r", |
| 1000 | "payload": {}, "expires_at": None}] |
| 1001 | _write_remote_records(repo, records) |
| 1002 | remote = coordination_dir(repo) / "remote" |
| 1003 | if remote.exists(): |
| 1004 | assert list(remote.rglob("*.json")) == [] |
| 1005 | |
| 1006 | |
| 1007 | # =========================================================================== |
| 1008 | # 9. GC INPUT VALIDATION |
| 1009 | # =========================================================================== |
| 1010 | |
| 1011 | |
| 1012 | class TestCoordSecurityGCBoundaries: |
| 1013 | """Adversarial GC parameters must not trigger crashes or unsafe operations.""" |
| 1014 | |
| 1015 | def test_gc_negative_grace_period_exits_1(self, repo: pathlib.Path) -> None: |
| 1016 | with patch(_REQUIRE_REPO, return_value=repo): |
| 1017 | result = runner.invoke( |
| 1018 | cli, ["coord", "gc", "--grace-period", "-1"] |
| 1019 | ) |
| 1020 | assert result.exit_code == 1 |
| 1021 | |
| 1022 | def test_gc_zero_grace_period_accepted(self, repo: pathlib.Path) -> None: |
| 1023 | """Zero grace period is valid (no grace window).""" |
| 1024 | with patch(_REQUIRE_REPO, return_value=repo): |
| 1025 | result = runner.invoke( |
| 1026 | cli, ["coord", "gc", "--grace-period", "0"] |
| 1027 | ) |
| 1028 | # Should succeed (dry-run, no files to collect) |
| 1029 | assert result.exit_code == 0 |
| 1030 | |
| 1031 | def test_gc_zero_max_intent_age_exits_1(self, repo: pathlib.Path) -> None: |
| 1032 | with patch(_REQUIRE_REPO, return_value=repo): |
| 1033 | result = runner.invoke( |
| 1034 | cli, ["coord", "gc", "--max-intent-age", "0"] |
| 1035 | ) |
| 1036 | assert result.exit_code == 1 |
| 1037 | |
| 1038 | def test_gc_negative_max_intent_age_exits_1(self, repo: pathlib.Path) -> None: |
| 1039 | with patch(_REQUIRE_REPO, return_value=repo): |
| 1040 | result = runner.invoke( |
| 1041 | cli, ["coord", "gc", "--max-intent-age", "-1"] |
| 1042 | ) |
| 1043 | assert result.exit_code == 1 |
| 1044 | |
| 1045 | def test_gc_very_large_grace_period_accepted(self, repo: pathlib.Path) -> None: |
| 1046 | """sys.maxsize grace period must not overflow or crash.""" |
| 1047 | max_val = min(sys.maxsize, 2_147_483_647) # keep within int range for argparse |
| 1048 | with patch(_REQUIRE_REPO, return_value=repo): |
| 1049 | result = runner.invoke( |
| 1050 | cli, ["coord", "gc", "--grace-period", str(max_val)] |
| 1051 | ) |
| 1052 | assert result.exit_code == 0 |
| 1053 | |
| 1054 | def test_gc_dry_run_never_deletes_files(self, repo: pathlib.Path) -> None: |
| 1055 | """Default (dry-run) mode must not delete anything.""" |
| 1056 | # Create an expired reservation |
| 1057 | import datetime |
| 1058 | res_dir = coordination_dir(repo) / "reservations" |
| 1059 | res_dir.mkdir(parents=True, exist_ok=True) |
| 1060 | uid = _new_id() |
| 1061 | expired_res = { |
| 1062 | "reservation_id": uid, |
| 1063 | "run_id": "r", |
| 1064 | "branch": "main", |
| 1065 | "addresses": ["foo::bar"], |
| 1066 | "created_at": "2000-01-01T00:00:00+00:00", |
| 1067 | "expires_at": "2000-01-01T01:00:00+00:00", |
| 1068 | "operation": None, |
| 1069 | } |
| 1070 | (res_dir / f"{uid}.json").write_text(json.dumps(expired_res), encoding="utf-8") |
| 1071 | |
| 1072 | with patch(_REQUIRE_REPO, return_value=repo): |
| 1073 | result = runner.invoke(cli, ["coord", "gc"]) |
| 1074 | assert result.exit_code == 0 |
| 1075 | # File must still exist (dry-run does not delete) |
| 1076 | assert (res_dir / f"{uid}.json").exists(), "Dry-run deleted a file!" |
| 1077 | |
| 1078 | def test_gc_json_output_is_valid_json(self, repo: pathlib.Path) -> None: |
| 1079 | with patch(_REQUIRE_REPO, return_value=repo): |
| 1080 | result = runner.invoke(cli, ["coord", "gc", "--json"]) |
| 1081 | assert result.exit_code == 0 |
| 1082 | data = json.loads(result.output.strip()) |
| 1083 | assert "dry_run" in data |
| 1084 | assert "total_removed" in data |
| 1085 | |
| 1086 | def test_gc_no_traceback_on_empty_repo(self, repo: pathlib.Path) -> None: |
| 1087 | with patch(_REQUIRE_REPO, return_value=repo): |
| 1088 | result = runner.invoke(cli, ["coord", "gc"]) |
| 1089 | assert "Traceback" not in result.output |
| 1090 | assert result.exit_code == 0 |
| 1091 | |
| 1092 | |
| 1093 | # =========================================================================== |
| 1094 | # 10. CLI INPUT VALIDATION HARDENING |
| 1095 | # =========================================================================== |
| 1096 | |
| 1097 | |
| 1098 | class TestCoordSecurityInputValidation: |
| 1099 | """Boundary tests for all size-limited and range-checked CLI inputs.""" |
| 1100 | |
| 1101 | def test_push_owner_at_max_length_accepted(self, repo: pathlib.Path) -> None: |
| 1102 | owner = "a" * _MAX_OWNER_LEN |
| 1103 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 1104 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 1105 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 1106 | result = runner.invoke(cli, _push_args(owner=owner)) |
| 1107 | # Should not exit 1 due to length check |
| 1108 | assert "too long" not in result.output |
| 1109 | |
| 1110 | def test_push_owner_one_over_max_length_rejected_before_io( |
| 1111 | self, repo: pathlib.Path |
| 1112 | ) -> None: |
| 1113 | owner = "a" * (_MAX_OWNER_LEN + 1) |
| 1114 | gather_called = [] |
| 1115 | |
| 1116 | def fake_gather(root: pathlib.Path, kinds: list[str]) -> list[MsgpackDict]: |
| 1117 | gather_called.append(True) |
| 1118 | return [] |
| 1119 | |
| 1120 | with patch("muse.cli.commands.coord_sync._gather_local_records", fake_gather), \ |
| 1121 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 1122 | result = runner.invoke(cli, _push_args(owner=owner)) |
| 1123 | assert result.exit_code == 1 |
| 1124 | assert not gather_called, "File I/O was performed before length check!" |
| 1125 | |
| 1126 | def test_push_slug_at_max_length_accepted(self, repo: pathlib.Path) -> None: |
| 1127 | slug = "s" * _MAX_SLUG_LEN |
| 1128 | with patch(_PUSH_TARGET, return_value={"inserted": 0, "skipped": 0}), \ |
| 1129 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 1130 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 1131 | result = runner.invoke(cli, _push_args(slug=slug)) |
| 1132 | assert "too long" not in result.output |
| 1133 | |
| 1134 | def test_push_slug_one_over_max_length_rejected_before_io( |
| 1135 | self, repo: pathlib.Path |
| 1136 | ) -> None: |
| 1137 | slug = "s" * (_MAX_SLUG_LEN + 1) |
| 1138 | gather_called = [] |
| 1139 | |
| 1140 | def fake_gather(root: pathlib.Path, kinds: list[str]) -> list[MsgpackDict]: |
| 1141 | gather_called.append(True) |
| 1142 | return [] |
| 1143 | |
| 1144 | with patch("muse.cli.commands.coord_sync._gather_local_records", fake_gather), \ |
| 1145 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 1146 | result = runner.invoke(cli, _push_args(slug=slug)) |
| 1147 | assert result.exit_code == 1 |
| 1148 | assert not gather_called, "File I/O was performed before slug length check!" |
| 1149 | |
| 1150 | def test_pull_since_id_max_python_int_accepted(self, repo: pathlib.Path) -> None: |
| 1151 | """sys.maxsize as --since-id must not overflow or crash.""" |
| 1152 | with patch(_PULL_TARGET, return_value={"records": [], "cursor": 0}), \ |
| 1153 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 1154 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 1155 | result = runner.invoke( |
| 1156 | cli, |
| 1157 | ["coord", "sync", "pull", |
| 1158 | "--hub", "https://localhost:1337", |
| 1159 | "--owner", "gabriel", |
| 1160 | "--slug", "linux", |
| 1161 | "--since-id", str(sys.maxsize)], |
| 1162 | ) |
| 1163 | # Should not crash with overflow — may exit 0 or 1 depending on argparse int handling |
| 1164 | assert "Traceback" not in result.output |
| 1165 | |
| 1166 | def test_pull_limit_zero_rejected(self, repo: pathlib.Path) -> None: |
| 1167 | result = runner.invoke(cli, _pull_args() + ["--limit", "0"]) |
| 1168 | assert result.exit_code == 1 |
| 1169 | |
| 1170 | def test_pull_limit_at_max_accepted(self, repo: pathlib.Path) -> None: |
| 1171 | with patch(_PULL_TARGET, return_value={"records": [], "cursor": 0}), \ |
| 1172 | patch(_REQUIRE_REPO, return_value=repo), \ |
| 1173 | patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 1174 | result = runner.invoke(cli, _pull_args() + ["--limit", str(_MAX_PULL_LIMIT)]) |
| 1175 | assert result.exit_code == 0 |
| 1176 | |
| 1177 | def test_pull_limit_one_over_max_rejected(self, repo: pathlib.Path) -> None: |
| 1178 | result = runner.invoke( |
| 1179 | cli, _pull_args() + ["--limit", str(_MAX_PULL_LIMIT + 1)] |
| 1180 | ) |
| 1181 | assert result.exit_code == 1 |
| 1182 | |
| 1183 | def test_pull_since_id_negative_rejected(self, repo: pathlib.Path) -> None: |
| 1184 | result = runner.invoke(cli, _pull_args() + ["--since-id", "-1"]) |
| 1185 | assert result.exit_code == 1 |
| 1186 | |
| 1187 | def test_no_traceback_on_any_validation_failure(self, repo: pathlib.Path) -> None: |
| 1188 | """All input validation failures must emit clean messages, never tracebacks.""" |
| 1189 | bad_invocations = [ |
| 1190 | _push_args(owner="a" * (_MAX_OWNER_LEN + 1)), |
| 1191 | _push_args(slug="s" * (_MAX_SLUG_LEN + 1)), |
| 1192 | _pull_args() + ["--since-id", "-99"], |
| 1193 | _pull_args() + ["--limit", "0"], |
| 1194 | _pull_args() + ["--limit", str(_MAX_PULL_LIMIT + 1)], |
| 1195 | ] |
| 1196 | for args in bad_invocations: |
| 1197 | with patch(_RESOLVE_HUB, return_value=("https://localhost:1337", "tok")): |
| 1198 | result = runner.invoke(cli, args) |
| 1199 | assert result.exit_code == 1, f"Expected exit 1 for: {args}" |
| 1200 | assert "Traceback" not in result.output, ( |
| 1201 | f"Traceback leaked for args: {args}\nOutput: {result.output}" |
| 1202 | ) |
File History
1 commit
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
28 days ago