test_url_validation.py
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | """Tests for checklist 2.1 — URL segment validation. |
| 2 | |
| 3 | Covers: |
| 4 | - Unit tests for _SLUG_RE regex (owner/repo slugs) |
| 5 | - Unit tests for _BRANCH_RE regex (branch names) |
| 6 | - Integration tests verifying FastAPI enforces the patterns via 422 |
| 7 | """ |
| 8 | from __future__ import annotations |
| 9 | |
| 10 | import re |
| 11 | |
| 12 | import pytest |
| 13 | from httpx import AsyncClient |
| 14 | |
| 15 | from musehub.api.validation import _SLUG_RE, _BRANCH_RE, _FILE_PATH_RE |
| 16 | |
| 17 | |
| 18 | # --------------------------------------------------------------------------- |
| 19 | # Unit tests — _SLUG_RE |
| 20 | # --------------------------------------------------------------------------- |
| 21 | |
| 22 | class TestSlugRegexRejects: |
| 23 | """Characters and patterns that must be blocked by _SLUG_RE.""" |
| 24 | |
| 25 | def test_rejects_dotdot(self) -> None: |
| 26 | assert re.fullmatch(_SLUG_RE, "..") is None |
| 27 | |
| 28 | def test_rejects_traversal_path(self) -> None: |
| 29 | assert re.fullmatch(_SLUG_RE, "../../etc/passwd") is None |
| 30 | |
| 31 | def test_rejects_null_byte(self) -> None: |
| 32 | assert re.fullmatch(_SLUG_RE, "null\x00byte") is None |
| 33 | |
| 34 | def test_rejects_script_tag(self) -> None: |
| 35 | assert re.fullmatch(_SLUG_RE, "<script>") is None |
| 36 | |
| 37 | def test_rejects_path_with_dotdot_component(self) -> None: |
| 38 | # Slashes are not in the slug charset at all |
| 39 | assert re.fullmatch(_SLUG_RE, "a/../b") is None |
| 40 | |
| 41 | def test_rejects_empty_string(self) -> None: |
| 42 | assert re.fullmatch(_SLUG_RE, "") is None |
| 43 | |
| 44 | def test_rejects_101_char_string(self) -> None: |
| 45 | # Pattern allows [a-zA-Z0-9][a-zA-Z0-9_.-]{0,99} — 101 chars total is too long |
| 46 | long_slug = "a" * 101 |
| 47 | assert re.fullmatch(_SLUG_RE, long_slug) is None |
| 48 | |
| 49 | def test_rejects_leading_dot(self) -> None: |
| 50 | # Dot-files / hidden names must start with alphanumeric |
| 51 | assert re.fullmatch(_SLUG_RE, ".hidden") is None |
| 52 | |
| 53 | def test_rejects_leading_hyphen(self) -> None: |
| 54 | assert re.fullmatch(_SLUG_RE, "-bad-start") is None |
| 55 | |
| 56 | def test_rejects_slash_in_slug(self) -> None: |
| 57 | # Slashes are not permitted in slugs (only in branch names) |
| 58 | assert re.fullmatch(_SLUG_RE, "owner/repo") is None |
| 59 | |
| 60 | def test_rejects_at_sign(self) -> None: |
| 61 | assert re.fullmatch(_SLUG_RE, "@user") is None |
| 62 | |
| 63 | def test_rejects_space(self) -> None: |
| 64 | assert re.fullmatch(_SLUG_RE, "my repo") is None |
| 65 | |
| 66 | |
| 67 | class TestSlugRegexAccepts: |
| 68 | """Values that _SLUG_RE must accept.""" |
| 69 | |
| 70 | def test_accepts_simple_name(self) -> None: |
| 71 | assert re.fullmatch(_SLUG_RE, "valid-name-123") is not None |
| 72 | |
| 73 | def test_accepts_plain_username(self) -> None: |
| 74 | assert re.fullmatch(_SLUG_RE, "gabriel") is not None |
| 75 | |
| 76 | def test_accepts_name_with_dot_and_underscore(self) -> None: |
| 77 | assert re.fullmatch(_SLUG_RE, "my.repo_name") is not None |
| 78 | |
| 79 | def test_accepts_single_char(self) -> None: |
| 80 | assert re.fullmatch(_SLUG_RE, "a") is not None |
| 81 | |
| 82 | def test_accepts_exactly_100_chars(self) -> None: |
| 83 | # 100-char string: one leading alphanum + 99 more alphanums |
| 84 | slug = "a" * 100 |
| 85 | assert re.fullmatch(_SLUG_RE, slug) is not None |
| 86 | |
| 87 | def test_accepts_numeric_start(self) -> None: |
| 88 | assert re.fullmatch(_SLUG_RE, "1repo") is not None |
| 89 | |
| 90 | def test_accepts_uppercase(self) -> None: |
| 91 | assert re.fullmatch(_SLUG_RE, "MyRepo") is not None |
| 92 | |
| 93 | |
| 94 | # --------------------------------------------------------------------------- |
| 95 | # Unit tests — _BRANCH_RE |
| 96 | # --------------------------------------------------------------------------- |
| 97 | |
| 98 | class TestBranchRegexRejects: |
| 99 | """Characters and patterns that must be blocked by _BRANCH_RE.""" |
| 100 | |
| 101 | def test_rejects_dotdot_traversal_component(self) -> None: |
| 102 | # "feat/../main" — the ".." component starts with "." not [a-zA-Z0-9_] |
| 103 | assert re.fullmatch(_BRANCH_RE, "feat/../main") is None |
| 104 | |
| 105 | def test_rejects_leading_dot(self) -> None: |
| 106 | assert re.fullmatch(_BRANCH_RE, ".hidden") is None |
| 107 | |
| 108 | def test_rejects_single_dot_component(self) -> None: |
| 109 | # "feat/./foo" — the "." component is not valid |
| 110 | assert re.fullmatch(_BRANCH_RE, "feat/./foo") is None |
| 111 | |
| 112 | def test_rejects_empty_string(self) -> None: |
| 113 | assert re.fullmatch(_BRANCH_RE, "") is None |
| 114 | |
| 115 | def test_rejects_dotdot_standalone(self) -> None: |
| 116 | assert re.fullmatch(_BRANCH_RE, "..") is None |
| 117 | |
| 118 | def test_rejects_leading_hyphen(self) -> None: |
| 119 | assert re.fullmatch(_BRANCH_RE, "-bad") is None |
| 120 | |
| 121 | def test_rejects_trailing_slash(self) -> None: |
| 122 | assert re.fullmatch(_BRANCH_RE, "feat/") is None |
| 123 | |
| 124 | def test_rejects_double_slash(self) -> None: |
| 125 | assert re.fullmatch(_BRANCH_RE, "feat//foo") is None |
| 126 | |
| 127 | |
| 128 | class TestBranchRegexAccepts: |
| 129 | """Values that _BRANCH_RE must accept.""" |
| 130 | |
| 131 | def test_accepts_main(self) -> None: |
| 132 | assert re.fullmatch(_BRANCH_RE, "main") is not None |
| 133 | |
| 134 | def test_accepts_namespaced_feature(self) -> None: |
| 135 | assert re.fullmatch(_BRANCH_RE, "feat/my-feature") is not None |
| 136 | |
| 137 | def test_accepts_dev(self) -> None: |
| 138 | assert re.fullmatch(_BRANCH_RE, "dev") is not None |
| 139 | |
| 140 | def test_accepts_deep_namespace(self) -> None: |
| 141 | assert re.fullmatch(_BRANCH_RE, "feat/scope/my_feature") is not None |
| 142 | |
| 143 | def test_accepts_underscore_start(self) -> None: |
| 144 | assert re.fullmatch(_BRANCH_RE, "_internal") is not None |
| 145 | |
| 146 | def test_accepts_numeric_start(self) -> None: |
| 147 | assert re.fullmatch(_BRANCH_RE, "1fix") is not None |
| 148 | |
| 149 | def test_accepts_hotfix_namespace(self) -> None: |
| 150 | assert re.fullmatch(_BRANCH_RE, "hotfix/urgent-patch-1.2.3") is not None |
| 151 | |
| 152 | |
| 153 | # --------------------------------------------------------------------------- |
| 154 | # Unit tests — _FILE_PATH_RE (bonus: covers the same structural invariants) |
| 155 | # --------------------------------------------------------------------------- |
| 156 | |
| 157 | class TestFilePathRegex: |
| 158 | """Structural sanity checks for _FILE_PATH_RE.""" |
| 159 | |
| 160 | def test_rejects_dotdot_component(self) -> None: |
| 161 | assert re.fullmatch(_FILE_PATH_RE, "src/../etc/passwd") is None |
| 162 | |
| 163 | def test_rejects_dotdot_standalone(self) -> None: |
| 164 | assert re.fullmatch(_FILE_PATH_RE, "..") is None |
| 165 | |
| 166 | def test_rejects_dotdot_segment(self) -> None: |
| 167 | assert re.fullmatch(_FILE_PATH_RE, "src/../etc/passwd") is None |
| 168 | |
| 169 | def test_rejects_dotdot_leading_segment(self) -> None: |
| 170 | assert re.fullmatch(_FILE_PATH_RE, "../etc/passwd") is None |
| 171 | |
| 172 | def test_rejects_bare_dot(self) -> None: |
| 173 | assert re.fullmatch(_FILE_PATH_RE, ".") is None |
| 174 | |
| 175 | def test_accepts_dotfile_root(self) -> None: |
| 176 | assert re.fullmatch(_FILE_PATH_RE, ".museignore") is not None |
| 177 | |
| 178 | def test_accepts_dotfile_nested(self) -> None: |
| 179 | assert re.fullmatch(_FILE_PATH_RE, "config/.museattributes") is not None |
| 180 | |
| 181 | def test_accepts_dotfile_muse_internal(self) -> None: |
| 182 | assert re.fullmatch(_FILE_PATH_RE, ".muse/code_config.toml") is not None |
| 183 | |
| 184 | def test_accepts_simple_file(self) -> None: |
| 185 | assert re.fullmatch(_FILE_PATH_RE, "README") is not None |
| 186 | |
| 187 | def test_accepts_nested_path(self) -> None: |
| 188 | assert re.fullmatch(_FILE_PATH_RE, "src/main.py") is not None |
| 189 | |
| 190 | def test_accepts_deep_path(self) -> None: |
| 191 | assert re.fullmatch(_FILE_PATH_RE, "musehub/api/routes/wire.py") is not None |
| 192 | |
| 193 | |
| 194 | # --------------------------------------------------------------------------- |
| 195 | # Integration tests — FastAPI enforces SlugParam / BranchParam at 422 |
| 196 | # --------------------------------------------------------------------------- |
| 197 | |
| 198 | async def test_dotdot_owner_returns_422(client: AsyncClient) -> None: |
| 199 | """Path segment '..' must be rejected by FastAPI validation (422).""" |
| 200 | response = await client.get("/..%2F../refs") |
| 201 | assert response.status_code in {400, 404, 422} |
| 202 | |
| 203 | |
| 204 | async def test_valid_slug_does_not_return_422_on_validation(client: AsyncClient) -> None: |
| 205 | """A syntactically valid slug should not be rejected with 422. |
| 206 | |
| 207 | The repo may not exist (404) but the slug itself must pass validation. |
| 208 | """ |
| 209 | response = await client.get("/gabriel/valid-repo/refs") |
| 210 | # 422 means the slug was rejected — we must NOT see that |
| 211 | assert response.status_code != 422 |
| 212 | |
| 213 | |
| 214 | async def test_url_encoded_slash_in_owner_returns_4xx(client: AsyncClient) -> None: |
| 215 | """A URL-encoded slash in the owner segment should be blocked. |
| 216 | |
| 217 | FastAPI decodes %2F before routing, making 'gabriel%2Fetc' become two |
| 218 | path segments ('gabriel' and 'etc/passwd/refs') which won't match the |
| 219 | route pattern — yielding 404 or 422, never 200. |
| 220 | """ |
| 221 | response = await client.get("/gabriel%2Fetc/passwd/refs") |
| 222 | assert response.status_code in {400, 404, 422} |
| 223 | |
| 224 | |
| 225 | async def test_null_byte_in_owner_returns_4xx(client: AsyncClient) -> None: |
| 226 | """Null byte encoded as %00 in owner must be blocked.""" |
| 227 | response = await client.get("/owner%00name/repo/refs") |
| 228 | assert response.status_code in {400, 404, 422} |
| 229 | |
| 230 | |
| 231 | async def test_script_tag_in_owner_returns_4xx(client: AsyncClient) -> None: |
| 232 | """HTML/script characters in owner slug must be blocked.""" |
| 233 | response = await client.get("/%3Cscript%3E/repo/refs") |
| 234 | assert response.status_code in {400, 404, 422} |
| 235 | |
| 236 | |
| 237 | async def test_dot_only_owner_returns_422(client: AsyncClient) -> None: |
| 238 | """Single dot as owner must fail slug validation.""" |
| 239 | response = await client.get("/./repo/refs") |
| 240 | assert response.status_code in {400, 404, 422} |