gabriel / musehub public

test_url_validation.py file-level

at sha256:3 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:0 fix: fall back to any indexed mpack in read_object_bytes when push mpac… · gabriel · Jun 17, 2026
1 """Tests for checklist 2.1 — URL segment validation.
2
3 Covers:
4 - Unit tests for _SLUG_RE regex (owner/repo slugs)
5 - Unit tests for _BRANCH_RE regex (branch names)
6 - Integration tests verifying FastAPI enforces the patterns via 422
7 """
8 from __future__ import annotations
9
10 import re
11
12 import pytest
13 from httpx import AsyncClient
14
15 from musehub.api.validation import _SLUG_RE, _BRANCH_RE, _FILE_PATH_RE
16
17
18 # ---------------------------------------------------------------------------
19 # Unit tests — _SLUG_RE
20 # ---------------------------------------------------------------------------
21
22 class TestSlugRegexRejects:
23 """Characters and patterns that must be blocked by _SLUG_RE."""
24
25 def test_rejects_dotdot(self) -> None:
26 assert re.fullmatch(_SLUG_RE, "..") is None
27
28 def test_rejects_traversal_path(self) -> None:
29 assert re.fullmatch(_SLUG_RE, "../../etc/passwd") is None
30
31 def test_rejects_null_byte(self) -> None:
32 assert re.fullmatch(_SLUG_RE, "null\x00byte") is None
33
34 def test_rejects_script_tag(self) -> None:
35 assert re.fullmatch(_SLUG_RE, "<script>") is None
36
37 def test_rejects_path_with_dotdot_component(self) -> None:
38 # Slashes are not in the slug charset at all
39 assert re.fullmatch(_SLUG_RE, "a/../b") is None
40
41 def test_rejects_empty_string(self) -> None:
42 assert re.fullmatch(_SLUG_RE, "") is None
43
44 def test_rejects_101_char_string(self) -> None:
45 # Pattern allows [a-zA-Z0-9][a-zA-Z0-9_.-]{0,99} — 101 chars total is too long
46 long_slug = "a" * 101
47 assert re.fullmatch(_SLUG_RE, long_slug) is None
48
49 def test_rejects_leading_dot(self) -> None:
50 # Dot-files / hidden names must start with alphanumeric
51 assert re.fullmatch(_SLUG_RE, ".hidden") is None
52
53 def test_rejects_leading_hyphen(self) -> None:
54 assert re.fullmatch(_SLUG_RE, "-bad-start") is None
55
56 def test_rejects_slash_in_slug(self) -> None:
57 # Slashes are not permitted in slugs (only in branch names)
58 assert re.fullmatch(_SLUG_RE, "owner/repo") is None
59
60 def test_rejects_at_sign(self) -> None:
61 assert re.fullmatch(_SLUG_RE, "@user") is None
62
63 def test_rejects_space(self) -> None:
64 assert re.fullmatch(_SLUG_RE, "my repo") is None
65
66
67 class TestSlugRegexAccepts:
68 """Values that _SLUG_RE must accept."""
69
70 def test_accepts_simple_name(self) -> None:
71 assert re.fullmatch(_SLUG_RE, "valid-name-123") is not None
72
73 def test_accepts_plain_username(self) -> None:
74 assert re.fullmatch(_SLUG_RE, "gabriel") is not None
75
76 def test_accepts_name_with_dot_and_underscore(self) -> None:
77 assert re.fullmatch(_SLUG_RE, "my.repo_name") is not None
78
79 def test_accepts_single_char(self) -> None:
80 assert re.fullmatch(_SLUG_RE, "a") is not None
81
82 def test_accepts_exactly_100_chars(self) -> None:
83 # 100-char string: one leading alphanum + 99 more alphanums
84 slug = "a" * 100
85 assert re.fullmatch(_SLUG_RE, slug) is not None
86
87 def test_accepts_numeric_start(self) -> None:
88 assert re.fullmatch(_SLUG_RE, "1repo") is not None
89
90 def test_accepts_uppercase(self) -> None:
91 assert re.fullmatch(_SLUG_RE, "MyRepo") is not None
92
93
94 # ---------------------------------------------------------------------------
95 # Unit tests — _BRANCH_RE
96 # ---------------------------------------------------------------------------
97
98 class TestBranchRegexRejects:
99 """Characters and patterns that must be blocked by _BRANCH_RE."""
100
101 def test_rejects_dotdot_traversal_component(self) -> None:
102 # "feat/../main" — the ".." component starts with "." not [a-zA-Z0-9_]
103 assert re.fullmatch(_BRANCH_RE, "feat/../main") is None
104
105 def test_rejects_leading_dot(self) -> None:
106 assert re.fullmatch(_BRANCH_RE, ".hidden") is None
107
108 def test_rejects_single_dot_component(self) -> None:
109 # "feat/./foo" — the "." component is not valid
110 assert re.fullmatch(_BRANCH_RE, "feat/./foo") is None
111
112 def test_rejects_empty_string(self) -> None:
113 assert re.fullmatch(_BRANCH_RE, "") is None
114
115 def test_rejects_dotdot_standalone(self) -> None:
116 assert re.fullmatch(_BRANCH_RE, "..") is None
117
118 def test_rejects_leading_hyphen(self) -> None:
119 assert re.fullmatch(_BRANCH_RE, "-bad") is None
120
121 def test_rejects_trailing_slash(self) -> None:
122 assert re.fullmatch(_BRANCH_RE, "feat/") is None
123
124 def test_rejects_double_slash(self) -> None:
125 assert re.fullmatch(_BRANCH_RE, "feat//foo") is None
126
127
128 class TestBranchRegexAccepts:
129 """Values that _BRANCH_RE must accept."""
130
131 def test_accepts_main(self) -> None:
132 assert re.fullmatch(_BRANCH_RE, "main") is not None
133
134 def test_accepts_namespaced_feature(self) -> None:
135 assert re.fullmatch(_BRANCH_RE, "feat/my-feature") is not None
136
137 def test_accepts_dev(self) -> None:
138 assert re.fullmatch(_BRANCH_RE, "dev") is not None
139
140 def test_accepts_deep_namespace(self) -> None:
141 assert re.fullmatch(_BRANCH_RE, "feat/scope/my_feature") is not None
142
143 def test_accepts_underscore_start(self) -> None:
144 assert re.fullmatch(_BRANCH_RE, "_internal") is not None
145
146 def test_accepts_numeric_start(self) -> None:
147 assert re.fullmatch(_BRANCH_RE, "1fix") is not None
148
149 def test_accepts_hotfix_namespace(self) -> None:
150 assert re.fullmatch(_BRANCH_RE, "hotfix/urgent-patch-1.2.3") is not None
151
152
153 # ---------------------------------------------------------------------------
154 # Unit tests — _FILE_PATH_RE (bonus: covers the same structural invariants)
155 # ---------------------------------------------------------------------------
156
157 class TestFilePathRegex:
158 """Structural sanity checks for _FILE_PATH_RE."""
159
160 def test_rejects_dotdot_component(self) -> None:
161 assert re.fullmatch(_FILE_PATH_RE, "src/../etc/passwd") is None
162
163 def test_rejects_dotdot_standalone(self) -> None:
164 assert re.fullmatch(_FILE_PATH_RE, "..") is None
165
166 def test_rejects_dotdot_segment(self) -> None:
167 assert re.fullmatch(_FILE_PATH_RE, "src/../etc/passwd") is None
168
169 def test_rejects_dotdot_leading_segment(self) -> None:
170 assert re.fullmatch(_FILE_PATH_RE, "../etc/passwd") is None
171
172 def test_rejects_bare_dot(self) -> None:
173 assert re.fullmatch(_FILE_PATH_RE, ".") is None
174
175 def test_accepts_dotfile_root(self) -> None:
176 assert re.fullmatch(_FILE_PATH_RE, ".museignore") is not None
177
178 def test_accepts_dotfile_nested(self) -> None:
179 assert re.fullmatch(_FILE_PATH_RE, "config/.museattributes") is not None
180
181 def test_accepts_dotfile_muse_internal(self) -> None:
182 assert re.fullmatch(_FILE_PATH_RE, ".muse/code_config.toml") is not None
183
184 def test_accepts_simple_file(self) -> None:
185 assert re.fullmatch(_FILE_PATH_RE, "README") is not None
186
187 def test_accepts_nested_path(self) -> None:
188 assert re.fullmatch(_FILE_PATH_RE, "src/main.py") is not None
189
190 def test_accepts_deep_path(self) -> None:
191 assert re.fullmatch(_FILE_PATH_RE, "musehub/api/routes/wire.py") is not None
192
193
194 # ---------------------------------------------------------------------------
195 # Integration tests — FastAPI enforces SlugParam / BranchParam at 422
196 # ---------------------------------------------------------------------------
197
198 async def test_dotdot_owner_returns_422(client: AsyncClient) -> None:
199 """Path segment '..' must be rejected by FastAPI validation (422)."""
200 response = await client.get("/..%2F../refs")
201 assert response.status_code in {400, 404, 422}
202
203
204 async def test_valid_slug_does_not_return_422_on_validation(client: AsyncClient) -> None:
205 """A syntactically valid slug should not be rejected with 422.
206
207 The repo may not exist (404) but the slug itself must pass validation.
208 """
209 response = await client.get("/gabriel/valid-repo/refs")
210 # 422 means the slug was rejected — we must NOT see that
211 assert response.status_code != 422
212
213
214 async def test_url_encoded_slash_in_owner_returns_4xx(client: AsyncClient) -> None:
215 """A URL-encoded slash in the owner segment should be blocked.
216
217 FastAPI decodes %2F before routing, making 'gabriel%2Fetc' become two
218 path segments ('gabriel' and 'etc/passwd/refs') which won't match the
219 route pattern — yielding 404 or 422, never 200.
220 """
221 response = await client.get("/gabriel%2Fetc/passwd/refs")
222 assert response.status_code in {400, 404, 422}
223
224
225 async def test_null_byte_in_owner_returns_4xx(client: AsyncClient) -> None:
226 """Null byte encoded as %00 in owner must be blocked."""
227 response = await client.get("/owner%00name/repo/refs")
228 assert response.status_code in {400, 404, 422}
229
230
231 async def test_script_tag_in_owner_returns_4xx(client: AsyncClient) -> None:
232 """HTML/script characters in owner slug must be blocked."""
233 response = await client.get("/%3Cscript%3E/repo/refs")
234 assert response.status_code in {400, 404, 422}
235
236
237 async def test_dot_only_owner_returns_422(client: AsyncClient) -> None:
238 """Single dot as owner must fail slug validation."""
239 response = await client.get("/./repo/refs")
240 assert response.status_code in {400, 404, 422}