gabriel / musehub public
test_zlib_object_decompression.py python
334 lines 10.8 KB
Raw
sha256:0997d6250ae6476362f6fe2025af7789f46d03df3e9f34356d5e8ee79b201923 fix(issues): use issue number as pagination cursor, not cre… Sonnet 4.6 patch 8 days ago
1 """Tests for zlib-compressed object decompression in raw file serving and README fetching.
2
3 Root cause: objects pushed via the old wire path were stored zlib-compressed in R2.
4 The raw file endpoint and _fetch_readme read bytes directly from storage without
5 decompressing, so staging serves garbled bytes while localhost (filesystem backend
6 with repair logic) serves clean text.
7
8 Fix: detect zlib magic bytes and decompress before serving — in both
9 _fetch_readme and raw_file_semantic.
10 """
11 from __future__ import annotations
12
13 import zlib
14 from unittest.mock import AsyncMock, MagicMock, patch
15
16 import pytest
17 import pytest_asyncio
18 from httpx import AsyncClient, ASGITransport
19
20 from sqlalchemy.ext.asyncio import AsyncSession
21
22 from musehub.db.musehub_repo_models import MusehubObject
23 from musehub.main import app
24
25
26 # ---------------------------------------------------------------------------
27 # Helpers
28 # ---------------------------------------------------------------------------
29
30 def _zlib_compress(text: str) -> bytes:
31 return zlib.compress(text.encode())
32
33
34 def _raw_bytes(text: str) -> bytes:
35 return text.encode()
36
37
38 README_TEXT = "# muse-zsh\n\nOh My ZSH plugin for Muse.\n"
39 ZLIB_README = _zlib_compress(README_TEXT)
40 RAW_README = _raw_bytes(README_TEXT)
41
42
43 # ---------------------------------------------------------------------------
44 # Unit tests — decompress_if_needed utility
45 # ---------------------------------------------------------------------------
46
47 def test_decompress_if_needed_passes_plain_text_through() -> None:
48 from musehub.types.compression import decompress_if_needed
49 data = b"# plain text README\n"
50 assert decompress_if_needed(data) == data
51
52
53 def test_decompress_if_needed_decompresses_zlib_level_default() -> None:
54 from musehub.types.compression import decompress_if_needed
55 compressed = zlib.compress(b"hello world\n")
56 assert decompress_if_needed(compressed) == b"hello world\n"
57
58
59 def test_decompress_if_needed_decompresses_zlib_level_1() -> None:
60 from musehub.types.compression import decompress_if_needed
61 compressed = zlib.compress(b"hello\n", level=1)
62 assert decompress_if_needed(compressed) == b"hello\n"
63
64
65 def test_decompress_if_needed_decompresses_zlib_level_9() -> None:
66 from musehub.types.compression import decompress_if_needed
67 compressed = zlib.compress(b"hello\n", level=9)
68 assert decompress_if_needed(compressed) == b"hello\n"
69
70
71 def test_decompress_if_needed_passes_empty_bytes_through() -> None:
72 from musehub.types.compression import decompress_if_needed
73 assert decompress_if_needed(b"") == b""
74
75
76 def test_decompress_if_needed_passes_binary_non_zlib_through() -> None:
77 from musehub.types.compression import decompress_if_needed
78 # PNG magic bytes — not zlib
79 data = b"\x89PNG\r\n\x1a\n" + b"\x00" * 100
80 assert decompress_if_needed(data) == data
81
82
83 def test_decompress_if_needed_handles_truncated_zlib_gracefully() -> None:
84 from musehub.types.compression import decompress_if_needed
85 # zlib magic bytes but truncated — should return original bytes, not raise
86 data = b"\x78\x9c\x00" # valid zlib header, invalid body
87 result = decompress_if_needed(data)
88 # Must not raise — returns original bytes on decompression failure
89 assert isinstance(result, bytes)
90
91
92 def test_decompress_if_needed_decompresses_full_readme() -> None:
93 from musehub.types.compression import decompress_if_needed
94 assert decompress_if_needed(ZLIB_README).decode() == README_TEXT
95
96
97 # ---------------------------------------------------------------------------
98 # Unit tests — _fetch_readme decompresses stored objects
99 # ---------------------------------------------------------------------------
100
101 @pytest.mark.asyncio
102 async def test_fetch_readme_decompresses_zlib_object(
103 monkeypatch: pytest.MonkeyPatch,
104 ) -> None:
105 """_fetch_readme must decode zlib-compressed objects from storage."""
106 from musehub.api.routes.musehub._ui_helpers import _fetch_readme
107
108 fake_obj = MagicMock()
109 fake_obj.content_cache = ZLIB_README
110 fake_obj.storage_uri = ""
111 mock_db = MagicMock()
112 mock_db.get = AsyncMock(return_value=fake_obj)
113
114 class _Entry:
115 name = "README.md"
116 object_id = "abc123"
117
118 result = await _fetch_readme(
119 db=mock_db,
120 repo_id="repo-1",
121 ref="main",
122 entries=[_Entry()], # type: ignore[list-item]
123 )
124 assert result == README_TEXT
125
126
127 @pytest.mark.asyncio
128 async def test_fetch_readme_plain_text_object_unchanged(
129 monkeypatch: pytest.MonkeyPatch,
130 ) -> None:
131 """_fetch_readme must pass through already-plain-text objects unchanged."""
132 from musehub.api.routes.musehub._ui_helpers import _fetch_readme
133
134 fake_obj = MagicMock()
135 fake_obj.content_cache = RAW_README
136 fake_obj.storage_uri = ""
137 mock_db = MagicMock()
138 mock_db.get = AsyncMock(return_value=fake_obj)
139
140 class _Entry:
141 name = "README.md"
142 object_id = "abc123"
143
144 result = await _fetch_readme(
145 db=mock_db,
146 repo_id="repo-1",
147 ref="main",
148 entries=[_Entry()], # type: ignore[list-item]
149 )
150 assert result == README_TEXT
151
152
153 # ---------------------------------------------------------------------------
154 # Integration tests — raw_file_semantic endpoint decompresses
155 # ---------------------------------------------------------------------------
156
157 @pytest.mark.asyncio
158 async def test_raw_endpoint_decompresses_zlib_object(
159 client: AsyncClient,
160 db_session: AsyncSession,
161 monkeypatch: pytest.MonkeyPatch,
162 ) -> None:
163 """GET /owner/repo/raw/ref/README.md must return plain text even when the
164 stored object is zlib-compressed."""
165 from musehub.api.routes.musehub import ui_tree
166 from musehub.api.routes.musehub import repos as repos_mod
167
168 db_session.add(MusehubObject(
169 object_id="deadbeef",
170 path="README.md",
171 size_bytes=len(ZLIB_README),
172 storage_uri="s3://muse-objects/objects/deadbeef",
173 ))
174 await db_session.commit()
175
176 # Stub repo resolution
177 monkeypatch.setattr(
178 ui_tree,
179 "_resolve_repo",
180 AsyncMock(return_value=("repo-id-1", MagicMock(), MagicMock())),
181 )
182
183 # Stub file metadata lookup
184 monkeypatch.setattr(
185 ui_tree.musehub_repository,
186 "get_file_at_ref",
187 AsyncMock(return_value={"object_id": "deadbeef"}),
188 )
189
190 # Stub storage: exists=True, stream yields zlib-compressed bytes
191 mock_storage = MagicMock()
192 mock_storage.exists = AsyncMock(return_value=True)
193
194 async def _compressed_stream(object_id: str, chunk_size: int = 65536) -> None:
195 yield ZLIB_README
196
197 mock_storage.stream = _compressed_stream
198 monkeypatch.setattr(ui_tree, "_get_storage_backend", lambda *_: mock_storage)
199
200 resp = await client.get("/gabriel/muse-zsh/raw/main/README.md")
201 assert resp.status_code == 200
202 assert resp.text == README_TEXT
203
204
205 @pytest.mark.asyncio
206 async def test_raw_endpoint_plain_text_object_unchanged(
207 client: AsyncClient,
208 db_session: AsyncSession,
209 monkeypatch: pytest.MonkeyPatch,
210 ) -> None:
211 """GET /owner/repo/raw/ref/README.md must pass through plain text unchanged."""
212 from musehub.api.routes.musehub import ui_tree
213
214 db_session.add(MusehubObject(
215 object_id="deadbeef",
216 path="README.md",
217 size_bytes=len(RAW_README),
218 storage_uri="s3://muse-objects/objects/deadbeef",
219 ))
220 await db_session.commit()
221
222 monkeypatch.setattr(
223 ui_tree,
224 "_resolve_repo",
225 AsyncMock(return_value=("repo-id-1", MagicMock(), MagicMock())),
226 )
227 monkeypatch.setattr(
228 ui_tree.musehub_repository,
229 "get_file_at_ref",
230 AsyncMock(return_value={"object_id": "deadbeef"}),
231 )
232
233 mock_storage = MagicMock()
234 mock_storage.exists = AsyncMock(return_value=True)
235
236 async def _plain_stream(object_id: str, chunk_size: int = 65536) -> None:
237 yield RAW_README
238
239 mock_storage.stream = _plain_stream
240 monkeypatch.setattr(ui_tree, "_get_storage_backend", lambda *_: mock_storage)
241
242 resp = await client.get("/gabriel/muse-zsh/raw/main/README.md")
243 assert resp.status_code == 200
244 assert resp.text == README_TEXT
245
246
247 @pytest.mark.asyncio
248 async def test_raw_endpoint_zlib_toml_file_decompressed(
249 client: AsyncClient,
250 db_session: AsyncSession,
251 monkeypatch: pytest.MonkeyPatch,
252 ) -> None:
253 """Non-README files (e.g. .toml) are also decompressed when zlib-stored."""
254 from musehub.api.routes.musehub import ui_tree
255
256 toml_content = '[workspace]\nversion = 1\n'
257 compressed_toml = zlib.compress(toml_content.encode())
258
259 db_session.add(MusehubObject(
260 object_id="deadbeef",
261 path=".museattributes",
262 size_bytes=len(compressed_toml),
263 storage_uri="s3://muse-objects/objects/deadbeef",
264 ))
265 await db_session.commit()
266
267 monkeypatch.setattr(
268 ui_tree,
269 "_resolve_repo",
270 AsyncMock(return_value=("repo-id-1", MagicMock(), MagicMock())),
271 )
272 monkeypatch.setattr(
273 ui_tree.musehub_repository,
274 "get_file_at_ref",
275 AsyncMock(return_value={"object_id": "deadbeef"}),
276 )
277
278 mock_storage = MagicMock()
279 mock_storage.exists = AsyncMock(return_value=True)
280
281 async def _compressed_stream(object_id: str, chunk_size: int = 65536) -> None:
282 yield compressed_toml
283
284 mock_storage.stream = _compressed_stream
285 monkeypatch.setattr(ui_tree, "_get_storage_backend", lambda *_: mock_storage)
286
287 resp = await client.get("/gabriel/muse-zsh/raw/main/.museattributes")
288 assert resp.status_code == 200
289 assert resp.text == toml_content
290
291
292 @pytest.mark.asyncio
293 async def test_raw_endpoint_binary_file_not_decompressed(
294 client: AsyncClient,
295 db_session: AsyncSession,
296 monkeypatch: pytest.MonkeyPatch,
297 ) -> None:
298 """Binary files (PNG etc.) must not be decompressed — their bytes are served raw."""
299 from musehub.api.routes.musehub import ui_tree
300
301 # PNG magic — starts with bytes that are NOT a zlib header
302 png_bytes = b"\x89PNG\r\n\x1a\n" + b"\x00" * 50
303
304 db_session.add(MusehubObject(
305 object_id="deadbeef",
306 path="logo.png",
307 size_bytes=len(png_bytes),
308 storage_uri="s3://muse-objects/objects/deadbeef",
309 ))
310 await db_session.commit()
311
312 monkeypatch.setattr(
313 ui_tree,
314 "_resolve_repo",
315 AsyncMock(return_value=("repo-id-1", MagicMock(), MagicMock())),
316 )
317 monkeypatch.setattr(
318 ui_tree.musehub_repository,
319 "get_file_at_ref",
320 AsyncMock(return_value={"object_id": "deadbeef"}),
321 )
322
323 mock_storage = MagicMock()
324 mock_storage.exists = AsyncMock(return_value=True)
325
326 async def _png_stream(object_id: str, chunk_size: int = 65536) -> None:
327 yield png_bytes
328
329 mock_storage.stream = _png_stream
330 monkeypatch.setattr(ui_tree, "_get_storage_backend", lambda *_: mock_storage)
331
332 resp = await client.get("/gabriel/muse-zsh/raw/main/logo.png")
333 assert resp.status_code == 200
334 assert resp.content == png_bytes
File History 1 commit
sha256:0997d6250ae6476362f6fe2025af7789f46d03df3e9f34356d5e8ee79b201923 fix(issues): use issue number as pagination cursor, not cre… Sonnet 4.6 patch 8 days ago