gabriel / muse public
test_core_validation.py python
567 lines 18.9 KB
Raw
sha256:248464b6a2f758985cbef90f864fa62c61842be699d975d6e00b6a9509ef919c fix(delta): detect blob-identical file renames for files wi… Sonnet 4.6 patch 24 days ago
1 """Tests for muse.core.validation — all trust-boundary primitives.
2
3 Every function in the validation module operates on untrusted input and must
4 either return a safe value or raise ValueError / TypeError with a descriptive
5 message. These tests verify correctness of the allow-lists, reject-lists, and
6 edge cases for each guard.
7 """
8
9 from __future__ import annotations
10
11 import math
12 import pathlib
13
14 import pytest
15
16 from muse.core.types import fake_id, long_id
17 from muse.core.validation import (
18 MAX_FILE_BYTES,
19 MAX_RESPONSE_BYTES,
20 MAX_SYSEX_BYTES,
21 clamp_int,
22 contain_path,
23 finite_float,
24 sanitize_display,
25 sanitize_glob_prefix,
26 validate_branch_name,
27 validate_domain_name,
28 validate_object_id,
29 validate_ref_id,
30 validate_repo_id,
31 )
32
33
34 # ---------------------------------------------------------------------------
35 # Constants
36 # ---------------------------------------------------------------------------
37
38
39 class TestConstants:
40 def test_max_file_bytes_is_256mb(self) -> None:
41 assert MAX_FILE_BYTES == 256 * 1024 * 1024
42
43 def test_max_response_bytes_is_64mb(self) -> None:
44 assert MAX_RESPONSE_BYTES == 64 * 1024 * 1024
45
46 def test_max_sysex_bytes_is_64kib(self) -> None:
47 assert MAX_SYSEX_BYTES == 65_536
48
49
50 # ---------------------------------------------------------------------------
51 # validate_object_id
52 # ---------------------------------------------------------------------------
53
54
55 class TestValidateObjectId:
56 """validate_object_id must accept valid 64-char hex and reject everything else."""
57
58 def test_valid_all_zeros(self) -> None:
59 oid = fake_id("zeros")
60 assert validate_object_id(oid) == oid
61
62 def test_valid_all_lowercase_hex(self) -> None:
63 oid = fake_id("lowercase")
64 assert validate_object_id(oid) == oid
65
66 def test_valid_mixed_hex(self) -> None:
67 oid = fake_id("mixed")
68 assert validate_object_id(oid) == oid
69
70 def test_returns_same_string(self) -> None:
71 oid = fake_id("identity")
72 result = validate_object_id(oid)
73 assert result is oid # identity, not a copy
74
75 def test_rejects_uppercase(self) -> None:
76 with pytest.raises(ValueError, match="64 lowercase hex"):
77 validate_object_id(long_id("A" * 64))
78
79 def test_rejects_63_chars(self) -> None:
80 with pytest.raises(ValueError):
81 validate_object_id(long_id("a" * 63))
82
83 def test_rejects_65_chars(self) -> None:
84 with pytest.raises(ValueError):
85 validate_object_id("a" * 65)
86
87 def test_rejects_empty_string(self) -> None:
88 with pytest.raises(ValueError):
89 validate_object_id("")
90
91 def test_rejects_non_hex_chars(self) -> None:
92 oid = f"g{'a' * 63}" # 'g' is not hex
93 with pytest.raises(ValueError):
94 validate_object_id(oid)
95
96 def test_rejects_path_traversal_string(self) -> None:
97 with pytest.raises(ValueError):
98 validate_object_id(f"../traversal/../path/{'a' * 48}")
99
100 def test_rejects_null_byte_in_id(self) -> None:
101 with pytest.raises(ValueError):
102 validate_object_id("\x00" * 64)
103
104
105
106 # ---------------------------------------------------------------------------
107 # validate_ref_id
108 # ---------------------------------------------------------------------------
109
110
111 class TestValidateRefId:
112 """validate_ref_id is an alias for the same 64-char hex rule."""
113
114 def test_valid_commit_id(self) -> None:
115 rid = fake_id("commit")
116 assert validate_ref_id(rid) == rid
117
118 def test_rejects_short_id(self) -> None:
119 with pytest.raises(ValueError):
120 validate_ref_id("abc123")
121
122 def test_rejects_uppercase(self) -> None:
123 with pytest.raises(ValueError):
124 validate_ref_id(long_id("B" * 64))
125
126 def test_error_message_mentions_ref_id(self) -> None:
127 with pytest.raises(ValueError, match="ref ID"):
128 validate_ref_id("short")
129
130
131 # ---------------------------------------------------------------------------
132 # validate_branch_name
133 # ---------------------------------------------------------------------------
134
135
136 class TestValidateBranchName:
137 """Branch names follow Git conventions — forward slashes allowed,
138 backslashes and null bytes are not."""
139
140 # --- valid names ---
141
142 def test_simple_name(self) -> None:
143 assert validate_branch_name("main") == "main"
144
145 def test_dev_branch(self) -> None:
146 assert validate_branch_name("dev") == "dev"
147
148 def test_feature_slash_style(self) -> None:
149 assert validate_branch_name("feature/my-branch") == "feature/my-branch"
150
151 def test_fix_slash_style(self) -> None:
152 assert validate_branch_name("fix/auth-token-exposure") == "fix/auth-token-exposure"
153
154 def test_nested_path(self) -> None:
155 assert validate_branch_name("feat/v2/core") == "feat/v2/core"
156
157 def test_max_length_255(self) -> None:
158 name = "a" * 255
159 assert validate_branch_name(name) == name
160
161 def test_digits_hyphens_underscores(self) -> None:
162 assert validate_branch_name("branch-123_test") == "branch-123_test"
163
164 # --- rejected names ---
165
166 def test_rejects_empty(self) -> None:
167 with pytest.raises(ValueError, match="must not be empty"):
168 validate_branch_name("")
169
170 def test_rejects_too_long(self) -> None:
171 with pytest.raises(ValueError, match="too long"):
172 validate_branch_name("a" * 256)
173
174 def test_rejects_backslash(self) -> None:
175 with pytest.raises(ValueError, match="forbidden"):
176 validate_branch_name("malicious\\branch")
177
178 def test_rejects_null_byte(self) -> None:
179 with pytest.raises(ValueError):
180 validate_branch_name("branch\x00name")
181
182 def test_rejects_carriage_return(self) -> None:
183 with pytest.raises(ValueError):
184 validate_branch_name("branch\rname")
185
186 def test_rejects_linefeed(self) -> None:
187 with pytest.raises(ValueError):
188 validate_branch_name("branch\nname")
189
190 def test_rejects_tab(self) -> None:
191 with pytest.raises(ValueError):
192 validate_branch_name("branch\tname")
193
194 def test_rejects_leading_dot(self) -> None:
195 with pytest.raises(ValueError):
196 validate_branch_name(".hidden")
197
198 def test_rejects_trailing_dot(self) -> None:
199 with pytest.raises(ValueError):
200 validate_branch_name("branch.")
201
202 def test_rejects_consecutive_dots(self) -> None:
203 with pytest.raises(ValueError):
204 validate_branch_name("branch..name")
205
206 def test_rejects_triple_dot(self) -> None:
207 with pytest.raises(ValueError):
208 validate_branch_name("branch...name")
209
210 def test_rejects_consecutive_slashes(self) -> None:
211 with pytest.raises(ValueError):
212 validate_branch_name("feat//branch")
213
214 def test_rejects_leading_slash(self) -> None:
215 with pytest.raises(ValueError):
216 validate_branch_name("/branch")
217
218 def test_rejects_trailing_slash(self) -> None:
219 with pytest.raises(ValueError):
220 validate_branch_name("branch/")
221
222
223
224 # ---------------------------------------------------------------------------
225 # validate_repo_id
226 # ---------------------------------------------------------------------------
227
228
229 _VALID_REPO_ID = long_id("ab" * 32)
230
231
232 class TestValidateRepoId:
233 def test_valid_sha256_prefixed(self) -> None:
234 assert validate_repo_id(_VALID_REPO_ID) == _VALID_REPO_ID
235
236 def test_rejects_plain_string(self) -> None:
237 with pytest.raises(ValueError, match="sha256:"):
238 validate_repo_id("664433cd-8e2f-4b76-88ff-16d23570a699")
239
240 def test_rejects_bare_hex(self) -> None:
241 with pytest.raises(ValueError, match="sha256:"):
242 validate_repo_id("ab" * 32)
243
244 def test_rejects_simple_string(self) -> None:
245 with pytest.raises(ValueError, match="sha256:"):
246 validate_repo_id("myrepo")
247
248 def test_rejects_empty(self) -> None:
249 with pytest.raises(ValueError, match="must not be empty"):
250 validate_repo_id("")
251
252 def test_rejects_too_long(self) -> None:
253 with pytest.raises(ValueError, match="too long"):
254 validate_repo_id(_VALID_REPO_ID + "x" * 200)
255
256 def test_rejects_invalid_hex_char(self) -> None:
257 with pytest.raises(ValueError, match="sha256:"):
258 validate_repo_id(long_id("g" * 64))
259
260 def test_rejects_null_byte(self) -> None:
261 with pytest.raises(ValueError, match="sha256:"):
262 validate_repo_id(long_id("a" * 63 + "\x00"))
263
264
265
266 # ---------------------------------------------------------------------------
267 # validate_domain_name
268 # ---------------------------------------------------------------------------
269
270
271 class TestValidateDomainName:
272 def test_midi(self) -> None:
273 assert validate_domain_name("midi") == "midi"
274
275 def test_code(self) -> None:
276 assert validate_domain_name("code") == "code"
277
278 def test_scaffold(self) -> None:
279 assert validate_domain_name("scaffold") == "scaffold"
280
281 def test_with_hyphen(self) -> None:
282 assert validate_domain_name("my-domain") == "my-domain"
283
284 def test_with_underscore(self) -> None:
285 assert validate_domain_name("my_domain") == "my_domain"
286
287 def test_with_digits(self) -> None:
288 assert validate_domain_name("domain2") == "domain2"
289
290 def test_rejects_empty(self) -> None:
291 with pytest.raises(ValueError):
292 validate_domain_name("")
293
294 def test_rejects_leading_digit(self) -> None:
295 with pytest.raises(ValueError):
296 validate_domain_name("2domain")
297
298 def test_rejects_uppercase(self) -> None:
299 with pytest.raises(ValueError):
300 validate_domain_name("MIDI")
301
302 def test_rejects_space(self) -> None:
303 with pytest.raises(ValueError):
304 validate_domain_name("my domain")
305
306 def test_rejects_slash(self) -> None:
307 with pytest.raises(ValueError):
308 validate_domain_name("midi/ext")
309
310 def test_rejects_dot(self) -> None:
311 with pytest.raises(ValueError):
312 validate_domain_name("midi.ext")
313
314 def test_rejects_too_long(self) -> None:
315 with pytest.raises(ValueError):
316 # > 63 chars (the regex allows a start letter + up to 62 more)
317 validate_domain_name(f"a{'b' * 63}")
318
319
320 # ---------------------------------------------------------------------------
321 # contain_path
322 # ---------------------------------------------------------------------------
323
324
325 class TestContainPath:
326 def test_simple_subpath(self, tmp_path: pathlib.Path) -> None:
327 result = contain_path(tmp_path, "file.txt")
328 assert result == (tmp_path / "file.txt").resolve()
329
330 def test_nested_subpath(self, tmp_path: pathlib.Path) -> None:
331 result = contain_path(tmp_path, "sub/dir/file.txt")
332 assert result == (tmp_path / "sub" / "dir" / "file.txt").resolve()
333
334 def test_returns_resolved_path(self, tmp_path: pathlib.Path) -> None:
335 result = contain_path(tmp_path, "a/./b")
336 assert "./" not in str(result)
337
338 def test_rejects_dotdot_traversal(self, tmp_path: pathlib.Path) -> None:
339 with pytest.raises(ValueError, match="traversal"):
340 contain_path(tmp_path, "../escape")
341
342 def test_rejects_double_dotdot(self, tmp_path: pathlib.Path) -> None:
343 with pytest.raises(ValueError):
344 contain_path(tmp_path, "sub/../../etc/passwd")
345
346 def test_rejects_absolute_path(self, tmp_path: pathlib.Path) -> None:
347 with pytest.raises(ValueError):
348 contain_path(tmp_path, "/etc/passwd")
349
350 def test_rejects_empty_rel(self, tmp_path: pathlib.Path) -> None:
351 with pytest.raises(ValueError, match="must not be empty"):
352 contain_path(tmp_path, "")
353
354
355 def test_path_equal_to_child_is_fine(self, tmp_path: pathlib.Path) -> None:
356 # A path that resolves exactly to a direct child should pass.
357 result = contain_path(tmp_path, "direct_child")
358 assert result.parent == tmp_path.resolve()
359
360 def test_rejects_symlink_escaping_base(self, tmp_path: pathlib.Path) -> None:
361 # Create a symlink inside base that points outside.
362 outside = tmp_path.parent / "outside.txt"
363 outside.write_text("secret")
364 link = tmp_path / "link.txt"
365 link.symlink_to(outside)
366 # contain_path resolves the path — symlink target is outside base.
367 with pytest.raises(ValueError, match="traversal"):
368 contain_path(tmp_path, "link.txt")
369
370
371 # ---------------------------------------------------------------------------
372 # sanitize_glob_prefix
373 # ---------------------------------------------------------------------------
374
375
376 class TestSanitizeGlobPrefix:
377 def test_clean_prefix_unchanged(self) -> None:
378 assert sanitize_glob_prefix("abcdef") == "abcdef"
379
380 def test_strips_asterisk(self) -> None:
381 assert sanitize_glob_prefix("abc*def") == "abcdef"
382
383 def test_strips_question_mark(self) -> None:
384 assert sanitize_glob_prefix("abc?def") == "abcdef"
385
386 def test_strips_open_bracket(self) -> None:
387 assert sanitize_glob_prefix("abc[def") == "abcdef"
388
389 def test_strips_close_bracket(self) -> None:
390 assert sanitize_glob_prefix("abc]def") == "abcdef"
391
392 def test_strips_open_brace(self) -> None:
393 assert sanitize_glob_prefix("abc{def") == "abcdef"
394
395 def test_strips_close_brace(self) -> None:
396 assert sanitize_glob_prefix("abc}def") == "abcdef"
397
398 def test_strips_all_metacharacters(self) -> None:
399 assert sanitize_glob_prefix("*?[]{} abc") == " abc"
400
401 def test_empty_string(self) -> None:
402 assert sanitize_glob_prefix("") == ""
403
404 def test_hex_prefix_unaffected(self) -> None:
405 prefix = "deadbeef01"
406 assert sanitize_glob_prefix(prefix) == prefix
407
408
409 # ---------------------------------------------------------------------------
410 # sanitize_display
411 # ---------------------------------------------------------------------------
412
413
414 class TestSanitizeDisplay:
415 def test_clean_ascii_unchanged(self) -> None:
416 assert sanitize_display("Hello, World!") == "Hello, World!"
417
418 def test_newline_preserved(self) -> None:
419 s = "line1\nline2"
420 assert sanitize_display(s) == s
421
422 def test_tab_preserved(self) -> None:
423 s = "col1\tcol2"
424 assert sanitize_display(s) == s
425
426 def test_strips_ansi_escape_sequence(self) -> None:
427 ansi = "\x1b[31mred text\x1b[0m"
428 result = sanitize_display(ansi)
429 assert "\x1b" not in result
430 assert "red text" in result
431
432 def test_strips_bel(self) -> None:
433 assert sanitize_display("ring\x07bell") == "ringbell"
434
435 def test_strips_null_byte(self) -> None:
436 assert sanitize_display("no\x00null") == "nonull"
437
438 def test_strips_osc_sequence(self) -> None:
439 # OSC sequences start with \x9b (C1 CSI) or ESC [
440 osc = "\x9bmalicious"
441 result = sanitize_display(osc)
442 assert "\x9b" not in result
443
444 def test_strips_cr(self) -> None:
445 assert sanitize_display("text\r") == "text"
446
447 def test_strips_vertical_tab(self) -> None:
448 assert sanitize_display("text\x0bmore") == "textmore"
449
450 def test_strips_form_feed(self) -> None:
451 assert sanitize_display("text\x0cmore") == "textmore"
452
453 def test_strips_del(self) -> None:
454 assert sanitize_display("text\x7fmore") == "textmore"
455
456 def test_multiline_message_sanitized(self) -> None:
457 msg = "commit: \x1b[1mAdd feature\x1b[0m\nSigned-off-by: Alice"
458 result = sanitize_display(msg)
459 assert "\x1b" not in result
460 assert "Add feature" in result
461 assert "Signed-off-by: Alice" in result
462
463 def test_empty_string(self) -> None:
464 assert sanitize_display("") == ""
465
466 def test_unicode_letters_preserved(self) -> None:
467 s = "Héllo Wörld — 日本語"
468 assert sanitize_display(s) == s
469
470
471 # ---------------------------------------------------------------------------
472 # clamp_int
473 # ---------------------------------------------------------------------------
474
475
476 class TestClampInt:
477 def test_value_in_range_returned_unchanged(self) -> None:
478 assert clamp_int(5, 1, 10) == 5
479
480 def test_value_at_lower_bound(self) -> None:
481 assert clamp_int(1, 1, 10) == 1
482
483 def test_value_at_upper_bound(self) -> None:
484 assert clamp_int(10, 1, 10) == 10
485
486 def test_below_min_raises(self) -> None:
487 with pytest.raises(ValueError, match="between"):
488 clamp_int(0, 1, 10)
489
490 def test_above_max_raises(self) -> None:
491 with pytest.raises(ValueError, match="between"):
492 clamp_int(11, 1, 10)
493
494 def test_name_in_error_message(self) -> None:
495 with pytest.raises(ValueError, match="depth"):
496 clamp_int(-1, 0, 100, name="depth")
497
498 def test_negative_range(self) -> None:
499 assert clamp_int(-5, -10, 0) == -5
500
501 def test_equal_lo_hi(self) -> None:
502 assert clamp_int(42, 42, 42) == 42
503
504
505 # ---------------------------------------------------------------------------
506 # finite_float
507 # ---------------------------------------------------------------------------
508
509
510 class TestFiniteFloat:
511 def test_finite_value_returned_unchanged(self) -> None:
512 assert finite_float(120.0, 120.0) == 120.0
513
514 def test_zero_is_finite(self) -> None:
515 assert finite_float(0.0, 1.0) == 0.0
516
517 def test_negative_finite_returned(self) -> None:
518 assert finite_float(-5.5, 0.0) == -5.5
519
520 def test_positive_inf_returns_fallback(self) -> None:
521 assert finite_float(math.inf, 120.0) == 120.0
522
523 def test_negative_inf_returns_fallback(self) -> None:
524 assert finite_float(-math.inf, 120.0) == 120.0
525
526 def test_nan_returns_fallback(self) -> None:
527 assert finite_float(math.nan, 120.0) == 120.0
528
529 def test_large_finite_returned(self) -> None:
530 big = 1e300
531 assert finite_float(big, 0.0) == big
532
533
534 # ---------------------------------------------------------------------------
535 # Stress: contain_path with many adversarial inputs
536 # ---------------------------------------------------------------------------
537
538
539 class TestContainPathStress:
540 """Fuzz-style test — generate many adversarial path strings and verify
541 that contain_path rejects all traversal attempts."""
542
543 TRAVERSAL_ATTEMPTS: list[str] = [
544 "..",
545 "../etc/passwd",
546 "../../etc/shadow",
547 "sub/../../../etc/passwd",
548 "/absolute/path",
549 "/",
550 "//double-slash",
551 # Note: URL-encoded dots (%2e%2e) are NOT traversal from a filesystem
552 # perspective — contain_path is a filesystem guard, not an HTTP parser.
553 # Null bytes cause an OS-level ValueError, which we also accept.
554 "\x00null",
555 "sub/\x00null",
556 ]
557
558 def test_all_traversal_attempts_rejected(self, tmp_path: pathlib.Path) -> None:
559 for attempt in self.TRAVERSAL_ATTEMPTS:
560 with pytest.raises((ValueError, TypeError)):
561 contain_path(tmp_path, attempt)
562
563 def test_large_number_of_valid_paths_accepted(self, tmp_path: pathlib.Path) -> None:
564 for i in range(200):
565 rel = f"subdir/track_{i:04d}.mid"
566 result = contain_path(tmp_path, rel)
567 assert str(result).startswith(str(tmp_path.resolve()))
File History 1 commit
sha256:248464b6a2f758985cbef90f864fa62c61842be699d975d6e00b6a9509ef919c fix(delta): detect blob-identical file renames for files wi… Sonnet 4.6 patch 24 days ago