gabriel / musehub public
snapshots.py python
370 lines 14.2 KB
Raw
sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32 fix: fall back to DB ancestry check when mpack-only fast-fo… Sonnet 4.6 patch 7 days ago
1 """REST API — snapshot endpoints.
2
3 Mounted at /api/repos/{repo_id}/snapshots/...
4
5 Endpoint surface
6 ----------------
7 GET /api/repos/{repo_id}/snapshots
8 List all snapshots for a repo (newest first, paginated).
9 Returns lightweight summaries — no entry data.
10
11 GET /api/repos/{repo_id}/snapshots/{snapshot_id}
12 Full snapshot record: header + all file-tree entries sorted by path.
13 For snapshots with many files use the /entries endpoint with pagination.
14
15 GET /api/repos/{repo_id}/snapshots/{snapshot_id}/entries
16 Paginated file-tree entries for one snapshot.
17 Sets ``X-Snapshot-Entry-Count`` so clients can compute page totals cheaply.
18
19 GET /api/repos/{repo_id}/commits/{commit_id}/snapshot
20 Resolve a commit to its snapshot in one round-trip.
21
22 GET /api/repos/{repo_id}/snapshots/{snapshot_id}/diff
23 File-level diff between two snapshots.
24 Query param: ``base`` — the snapshot_id to compare against.
25
26 POST /api/repos/{repo_id}/snapshots/batch
27 Bulk lookup — resolve up to 100 snapshot IDs without N sequential GETs.
28
29 Security
30 --------
31 All endpoints respect repo visibility. Public repos allow unauthenticated
32 reads; private repos require a valid MSign ``Authorization`` header.
33 Snapshot IDs are content-addressed hashes — there are no sequential IDs to
34 enumerate. Repo membership is verified on every request.
35
36 Agent notes
37 -----------
38 - Pass ``Accept: application/json`` (the default for all these endpoints).
39 - The ``Link`` response header on list endpoints is RFC 8288 — use ``rel="next"``
40 to paginate without parsing URLs.
41 - ``X-Snapshot-Entry-Count`` on the detail endpoint lets you decide whether to
42 paginate entries without a separate COUNT query.
43 - The batch endpoint is the most efficient way to resolve many snapshot IDs —
44 prefer it over parallel single-ID GETs.
45 """
46
47 import logging
48
49 from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response, status
50 from sqlalchemy.ext.asyncio import AsyncSession
51
52 from musehub.api.routes.musehub.pagination import PaginationParams, build_cursor_link_header
53 from musehub.auth.dependencies import TokenClaims, optional_token
54 from musehub.db.database import get_db as get_session
55 from musehub.models.musehub import (
56 RepoResponse,
57 SnapshotBatchRequest,
58 SnapshotDiffResponse,
59 SnapshotEntryListResponse,
60 SnapshotListResponse,
61 SnapshotResponse,
62 SnapshotSummaryResponse,
63 )
64 from musehub.services import musehub_repository
65 from musehub.services import musehub_snapshot as snapshot_svc
66
67 logger = logging.getLogger(__name__)
68
69 router = APIRouter(prefix="/api/repos", tags=["Snapshots"])
70
71 # Per-page caps — keep responses bounded for safety.
72 _MAX_SNAPSHOTS_PER_PAGE = 100
73 _MAX_ENTRIES_PER_PAGE = 500
74
75 def _guard(repo: RepoResponse | None, claims: TokenClaims | None, *, repo_id: str) -> None:
76 """Raise 404 or 401 when the caller may not read this repo.
77
78 A missing repo always returns 404 (not 401) so that unauthenticated
79 callers cannot enumerate private repo IDs by observing the status code.
80 """
81 if repo is None:
82 raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
83 if repo.visibility != "public" and claims is None:
84 raise HTTPException(
85 status_code=status.HTTP_401_UNAUTHORIZED,
86 detail="Authentication required to access private repos.",
87 headers={"WWW-Authenticate": 'MSign realm="musehub"'},
88 )
89
90 # ---------------------------------------------------------------------------
91 # GET /api/repos/{repo_id}/snapshots
92 # ---------------------------------------------------------------------------
93
94 @router.get(
95 "/{repo_id}/snapshots",
96 response_model=SnapshotListResponse,
97 operation_id="listSnapshots",
98 summary="List snapshots for a repo",
99 )
100 async def list_snapshots(
101 repo_id: str,
102 request: Request,
103 response: Response,
104 pagination: PaginationParams = Depends(PaginationParams),
105 session: AsyncSession = Depends(get_session),
106 claims: TokenClaims | None = Depends(optional_token),
107 ) -> SnapshotListResponse:
108 """Return a paginated list of snapshot summaries (newest first).
109
110 Each summary includes ``entry_count``, ``total_size_bytes``, and
111 ``directories`` without loading the full file-tree manifest. Use
112 ``GET /snapshots/{snapshot_id}`` when you need the manifest itself.
113
114 Cursor-based keyset pagination anchors each page to a stable position in
115 the ``created_at`` sequence. Pass ``nextCursor`` from a previous response
116 as ``?cursor=`` to advance. A null ``nextCursor`` means this is the last
117 page.
118
119 The ``Link: <url>; rel="next"`` response header carries the same signal
120 for HTTP-native clients.
121 """
122 repo = await musehub_repository.get_repo(session, repo_id)
123 _guard(repo, claims, repo_id=repo_id)
124
125 result = await snapshot_svc.list_snapshots(
126 session, repo_id, cursor=pagination.cursor, limit=pagination.limit
127 )
128 if result.next_cursor is not None:
129 response.headers["Link"] = build_cursor_link_header(
130 request, result.next_cursor, pagination.limit
131 )
132 return result
133
134 # ---------------------------------------------------------------------------
135 # GET /api/repos/{repo_id}/snapshots/{snapshot_id}
136 # ---------------------------------------------------------------------------
137
138 @router.get(
139 "/{repo_id}/snapshots/{snapshot_id}",
140 response_model=SnapshotResponse,
141 operation_id="getSnapshot",
142 summary="Get a snapshot with its full file-tree manifest",
143 )
144 async def get_snapshot(
145 repo_id: str,
146 snapshot_id: str,
147 response: Response,
148 session: AsyncSession = Depends(get_session),
149 claims: TokenClaims | None = Depends(optional_token),
150 ) -> SnapshotResponse:
151 """Return the complete snapshot record including all file-tree entries.
152
153 Entries are sorted alphabetically by path. The ``X-Snapshot-Entry-Count``
154 response header carries the entry count so clients can decide whether to
155 paginate without parsing the body.
156
157 For repos with thousands of files, the ``/entries`` sub-endpoint supports
158 page-based pagination to avoid loading the full manifest in one response.
159
160 Returns 404 when the snapshot does not exist or belongs to a different repo.
161 """
162 repo = await musehub_repository.get_repo(session, repo_id)
163 _guard(repo, claims, repo_id=repo_id)
164
165 snap = await snapshot_svc.get_snapshot(session, repo_id, snapshot_id)
166 if snap is None:
167 raise HTTPException(
168 status_code=status.HTTP_404_NOT_FOUND,
169 detail=f"Snapshot {snapshot_id!r} not found in repo {repo_id!r}",
170 )
171 response.headers["X-Snapshot-Entry-Count"] = str(snap.entry_count)
172 return snap
173
174 # ---------------------------------------------------------------------------
175 # GET /api/repos/{repo_id}/snapshots/{snapshot_id}/entries
176 # ---------------------------------------------------------------------------
177
178 @router.get(
179 "/{repo_id}/snapshots/{snapshot_id}/entries",
180 response_model=SnapshotEntryListResponse,
181 operation_id="listSnapshotEntries",
182 summary="Paginate file-tree entries for a snapshot",
183 )
184 async def list_snapshot_entries(
185 repo_id: str,
186 snapshot_id: str,
187 request: Request,
188 response: Response,
189 pagination: PaginationParams = Depends(PaginationParams),
190 session: AsyncSession = Depends(get_session),
191 claims: TokenClaims | None = Depends(optional_token),
192 ) -> SnapshotEntryListResponse:
193 """Return a paginated slice of file-tree entries sorted by path.
194
195 Prefer this endpoint over the full ``GET /snapshots/{snapshot_id}`` when
196 the snapshot may contain many files — it never loads the full manifest.
197
198 Cursor-based keyset pagination anchors each page to a stable position in
199 the path ordering. Pass ``nextCursor`` from a previous response as
200 ``?cursor=`` to advance. A null ``nextCursor`` means this is the last page.
201
202 The ``Link: <url>; rel="next"`` response header and the
203 ``X-Snapshot-Entry-Count`` response header (total entries) are set for
204 HTTP-native clients.
205 """
206 repo = await musehub_repository.get_repo(session, repo_id)
207 _guard(repo, claims, repo_id=repo_id)
208
209 # Emit entry count header before loading the page — uses a cheap COUNT query.
210 entry_count = await snapshot_svc.count_snapshot_entries(session, snapshot_id)
211 response.headers["X-Snapshot-Entry-Count"] = str(entry_count)
212
213 page_result = await snapshot_svc.get_snapshot_entries_page(
214 session, repo_id, snapshot_id, cursor=pagination.cursor, limit=pagination.limit
215 )
216 if page_result is None:
217 raise HTTPException(
218 status_code=status.HTTP_404_NOT_FOUND,
219 detail=f"Snapshot {snapshot_id!r} not found in repo {repo_id!r}",
220 )
221 if page_result.next_cursor is not None:
222 response.headers["Link"] = build_cursor_link_header(
223 request, page_result.next_cursor, pagination.limit
224 )
225 return page_result
226
227 # ---------------------------------------------------------------------------
228 # GET /api/repos/{repo_id}/commits/{commit_id}/snapshot
229 # ---------------------------------------------------------------------------
230
231 @router.get(
232 "/{repo_id}/commits/{commit_id}/snapshot",
233 response_model=SnapshotResponse,
234 operation_id="getCommitSnapshot",
235 summary="Get the snapshot attached to a commit",
236 )
237 async def get_commit_snapshot(
238 repo_id: str,
239 commit_id: str,
240 response: Response,
241 session: AsyncSession = Depends(get_session),
242 claims: TokenClaims | None = Depends(optional_token),
243 ) -> SnapshotResponse:
244 """Resolve a commit ID to its full snapshot in one round-trip.
245
246 Equivalent to ``GET /commits/{commit_id}`` (to read ``snapshot_id``) then
247 ``GET /snapshots/{snapshot_id}``, but avoids the extra request.
248
249 Returns 404 when the commit does not exist, belongs to a different repo,
250 or has no snapshot attached (e.g. an empty initial commit).
251 """
252 repo = await musehub_repository.get_repo(session, repo_id)
253 _guard(repo, claims, repo_id=repo_id)
254
255 snap = await snapshot_svc.get_snapshot_for_commit(session, repo_id, commit_id)
256 if snap is None:
257 raise HTTPException(
258 status_code=status.HTTP_404_NOT_FOUND,
259 detail=(
260 f"No snapshot found for commit {commit_id!r} in repo {repo_id!r}. "
261 "The commit may not exist, belong to a different repo, or have no "
262 "snapshot attached."
263 ),
264 )
265 response.headers["X-Snapshot-Entry-Count"] = str(snap.entry_count)
266 return snap
267
268 # ---------------------------------------------------------------------------
269 # GET /api/repos/{repo_id}/snapshots/{snapshot_id}/diff
270 # ---------------------------------------------------------------------------
271
272 @router.get(
273 "/{repo_id}/snapshots/{snapshot_id}/diff",
274 response_model=SnapshotDiffResponse,
275 operation_id="diffSnapshots",
276 summary="File-level diff between two snapshots",
277 )
278 async def diff_snapshots(
279 repo_id: str,
280 snapshot_id: str,
281 base: str = Query(
282 ...,
283 description="snapshot_id of the base (older) snapshot to compare against",
284 ),
285 include_unchanged: bool = Query(
286 False,
287 alias="includeUnchanged",
288 description=(
289 "Emit 'unchanged' entries for files identical in both snapshots. "
290 "Off by default — unchanged files dominate large repos."
291 ),
292 ),
293 session: AsyncSession = Depends(get_session),
294 claims: TokenClaims | None = Depends(optional_token),
295 ) -> SnapshotDiffResponse:
296 """Return a file-level diff between *snapshot_id* (new) and *base* (old).
297
298 Each entry in ``changes`` carries ``status`` (added|removed|modified|unchanged),
299 object IDs for both sides, and size deltas.
300
301 Agent note: filter ``changes`` on ``status`` for targeted analysis —
302 e.g. only ``"modified"`` entries to see content changes, only ``"added"``
303 to see new files. ``bytes_added`` and ``bytes_removed`` give storage deltas
304 without iterating the full change list.
305 """
306 repo = await musehub_repository.get_repo(session, repo_id)
307 _guard(repo, claims, repo_id=repo_id)
308
309 if snapshot_id == base:
310 raise HTTPException(
311 status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
312 detail="snapshot_id and base must be different snapshots",
313 )
314
315 diff = await snapshot_svc.diff_snapshots(
316 session, repo_id, snapshot_id, base, include_unchanged=include_unchanged
317 )
318 if diff is None:
319 raise HTTPException(
320 status_code=status.HTTP_404_NOT_FOUND,
321 detail=(
322 f"One or both snapshots not found in repo {repo_id!r}: "
323 f"{snapshot_id!r}, {base!r}"
324 ),
325 )
326 return diff
327
328 # ---------------------------------------------------------------------------
329 # POST /api/repos/{repo_id}/snapshots/batch
330 # ---------------------------------------------------------------------------
331
332 @router.post(
333 "/{repo_id}/snapshots/batch",
334 response_model=list[SnapshotResponse | SnapshotSummaryResponse],
335 operation_id="batchGetSnapshots",
336 summary="Bulk-resolve up to 100 snapshot IDs",
337 )
338 async def batch_get_snapshots(
339 repo_id: str,
340 body: SnapshotBatchRequest,
341 session: AsyncSession = Depends(get_session),
342 claims: TokenClaims | None = Depends(optional_token),
343 ) -> list[SnapshotResponse | SnapshotSummaryResponse]:
344 """Resolve up to 100 snapshot IDs in one round-trip.
345
346 Unknown IDs and IDs belonging to a different repo are silently omitted —
347 check ``len(response)`` against your input if completeness matters.
348
349 Set ``include_entries: true`` in the request body to get full
350 ``SnapshotResponse`` objects (with file-tree entries). Omit it or set it
351 to ``false`` for lightweight ``SnapshotSummaryResponse`` objects.
352
353 Agent note: this is the most efficient way to populate a snapshot browser
354 or resolve a list of commit snapshot IDs — one POST beats N parallel GETs.
355 """
356 repo = await musehub_repository.get_repo(session, repo_id)
357 _guard(repo, claims, repo_id=repo_id)
358
359 if len(body.snapshot_ids) > 100:
360 raise HTTPException(
361 status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
362 detail="snapshot_ids must contain at most 100 entries",
363 )
364
365 return await snapshot_svc.batch_get_snapshots(
366 session,
367 repo_id,
368 body.snapshot_ids,
369 include_entries=body.include_entries,
370 )
File History 1 commit
sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32 fix: fall back to DB ancestry check when mpack-only fast-fo… Sonnet 4.6 patch 7 days ago