musehub/api/routes/api/snapshots.py · gabriel/musehub

snapshots.py python

370 lines 14.2 KB

sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32 fix: fall back to DB ancestry check when mpack-only fast-fo… Sonnet 4.6 patch 7 days ago

1	"""REST API — snapshot endpoints.
2
3	Mounted at /api/repos/{repo_id}/snapshots/...
4
5	Endpoint surface
6	----------------
7	GET /api/repos/{repo_id}/snapshots
8	List all snapshots for a repo (newest first, paginated).
9	Returns lightweight summaries — no entry data.
10
11	GET /api/repos/{repo_id}/snapshots/{snapshot_id}
12	Full snapshot record: header + all file-tree entries sorted by path.
13	For snapshots with many files use the /entries endpoint with pagination.
14
15	GET /api/repos/{repo_id}/snapshots/{snapshot_id}/entries
16	Paginated file-tree entries for one snapshot.
17	Sets ``X-Snapshot-Entry-Count`` so clients can compute page totals cheaply.
18
19	GET /api/repos/{repo_id}/commits/{commit_id}/snapshot
20	Resolve a commit to its snapshot in one round-trip.
21
22	GET /api/repos/{repo_id}/snapshots/{snapshot_id}/diff
23	File-level diff between two snapshots.
24	Query param: ``base`` — the snapshot_id to compare against.
25
26	POST /api/repos/{repo_id}/snapshots/batch
27	Bulk lookup — resolve up to 100 snapshot IDs without N sequential GETs.
28
29	Security
30	--------
31	All endpoints respect repo visibility. Public repos allow unauthenticated
32	reads; private repos require a valid MSign ``Authorization`` header.
33	Snapshot IDs are content-addressed hashes — there are no sequential IDs to
34	enumerate. Repo membership is verified on every request.
35
36	Agent notes
37	-----------
38	- Pass ``Accept: application/json`` (the default for all these endpoints).
39	- The ``Link`` response header on list endpoints is RFC 8288 — use ``rel="next"``
40	to paginate without parsing URLs.
41	- ``X-Snapshot-Entry-Count`` on the detail endpoint lets you decide whether to
42	paginate entries without a separate COUNT query.
43	- The batch endpoint is the most efficient way to resolve many snapshot IDs —
44	prefer it over parallel single-ID GETs.
45	"""
46
47	import logging
48
49	from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response, status
50	from sqlalchemy.ext.asyncio import AsyncSession
51
52	from musehub.api.routes.musehub.pagination import PaginationParams, build_cursor_link_header
53	from musehub.auth.dependencies import TokenClaims, optional_token
54	from musehub.db.database import get_db as get_session
55	from musehub.models.musehub import (
56	RepoResponse,
57	SnapshotBatchRequest,
58	SnapshotDiffResponse,
59	SnapshotEntryListResponse,
60	SnapshotListResponse,
61	SnapshotResponse,
62	SnapshotSummaryResponse,
63	)
64	from musehub.services import musehub_repository
65	from musehub.services import musehub_snapshot as snapshot_svc
66
67	logger = logging.getLogger(__name__)
68
69	router = APIRouter(prefix="/api/repos", tags=["Snapshots"])
70
71	# Per-page caps — keep responses bounded for safety.
72	_MAX_SNAPSHOTS_PER_PAGE = 100
73	_MAX_ENTRIES_PER_PAGE = 500
74
75	def _guard(repo: RepoResponse \| None, claims: TokenClaims \| None, *, repo_id: str) -> None:
76	"""Raise 404 or 401 when the caller may not read this repo.
77
78	A missing repo always returns 404 (not 401) so that unauthenticated
79	callers cannot enumerate private repo IDs by observing the status code.
80	"""
81	if repo is None:
82	raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Repo not found")
83	if repo.visibility != "public" and claims is None:
84	raise HTTPException(
85	status_code=status.HTTP_401_UNAUTHORIZED,
86	detail="Authentication required to access private repos.",
87	headers={"WWW-Authenticate": 'MSign realm="musehub"'},
88	)
89
90	# ---------------------------------------------------------------------------
91	# GET /api/repos/{repo_id}/snapshots
92	# ---------------------------------------------------------------------------
93
94	@router.get(
95	"/{repo_id}/snapshots",
96	response_model=SnapshotListResponse,
97	operation_id="listSnapshots",
98	summary="List snapshots for a repo",
99	)
100	async def list_snapshots(
101	repo_id: str,
102	request: Request,
103	response: Response,
104	pagination: PaginationParams = Depends(PaginationParams),
105	session: AsyncSession = Depends(get_session),
106	claims: TokenClaims \| None = Depends(optional_token),
107	) -> SnapshotListResponse:
108	"""Return a paginated list of snapshot summaries (newest first).
109
110	Each summary includes ``entry_count``, ``total_size_bytes``, and
111	``directories`` without loading the full file-tree manifest. Use
112	``GET /snapshots/{snapshot_id}`` when you need the manifest itself.
113
114	Cursor-based keyset pagination anchors each page to a stable position in
115	the ``created_at`` sequence. Pass ``nextCursor`` from a previous response
116	as ``?cursor=`` to advance. A null ``nextCursor`` means this is the last
117	page.
118
119	The ``Link: <url>; rel="next"`` response header carries the same signal
120	for HTTP-native clients.
121	"""
122	repo = await musehub_repository.get_repo(session, repo_id)
123	_guard(repo, claims, repo_id=repo_id)
124
125	result = await snapshot_svc.list_snapshots(
126	session, repo_id, cursor=pagination.cursor, limit=pagination.limit
127	)
128	if result.next_cursor is not None:
129	response.headers["Link"] = build_cursor_link_header(
130	request, result.next_cursor, pagination.limit
131	)
132	return result
133
134	# ---------------------------------------------------------------------------
135	# GET /api/repos/{repo_id}/snapshots/{snapshot_id}
136	# ---------------------------------------------------------------------------
137
138	@router.get(
139	"/{repo_id}/snapshots/{snapshot_id}",
140	response_model=SnapshotResponse,
141	operation_id="getSnapshot",
142	summary="Get a snapshot with its full file-tree manifest",
143	)
144	async def get_snapshot(
145	repo_id: str,
146	snapshot_id: str,
147	response: Response,
148	session: AsyncSession = Depends(get_session),
149	claims: TokenClaims \| None = Depends(optional_token),
150	) -> SnapshotResponse:
151	"""Return the complete snapshot record including all file-tree entries.
152
153	Entries are sorted alphabetically by path. The ``X-Snapshot-Entry-Count``
154	response header carries the entry count so clients can decide whether to
155	paginate without parsing the body.
156
157	For repos with thousands of files, the ``/entries`` sub-endpoint supports
158	page-based pagination to avoid loading the full manifest in one response.
159
160	Returns 404 when the snapshot does not exist or belongs to a different repo.
161	"""
162	repo = await musehub_repository.get_repo(session, repo_id)
163	_guard(repo, claims, repo_id=repo_id)
164
165	snap = await snapshot_svc.get_snapshot(session, repo_id, snapshot_id)
166	if snap is None:
167	raise HTTPException(
168	status_code=status.HTTP_404_NOT_FOUND,
169	detail=f"Snapshot {snapshot_id!r} not found in repo {repo_id!r}",
170	)
171	response.headers["X-Snapshot-Entry-Count"] = str(snap.entry_count)
172	return snap
173
174	# ---------------------------------------------------------------------------
175	# GET /api/repos/{repo_id}/snapshots/{snapshot_id}/entries
176	# ---------------------------------------------------------------------------
177
178	@router.get(
179	"/{repo_id}/snapshots/{snapshot_id}/entries",
180	response_model=SnapshotEntryListResponse,
181	operation_id="listSnapshotEntries",
182	summary="Paginate file-tree entries for a snapshot",
183	)
184	async def list_snapshot_entries(
185	repo_id: str,
186	snapshot_id: str,
187	request: Request,
188	response: Response,
189	pagination: PaginationParams = Depends(PaginationParams),
190	session: AsyncSession = Depends(get_session),
191	claims: TokenClaims \| None = Depends(optional_token),
192	) -> SnapshotEntryListResponse:
193	"""Return a paginated slice of file-tree entries sorted by path.
194
195	Prefer this endpoint over the full ``GET /snapshots/{snapshot_id}`` when
196	the snapshot may contain many files — it never loads the full manifest.
197
198	Cursor-based keyset pagination anchors each page to a stable position in
199	the path ordering. Pass ``nextCursor`` from a previous response as
200	``?cursor=`` to advance. A null ``nextCursor`` means this is the last page.
201
202	The ``Link: <url>; rel="next"`` response header and the
203	``X-Snapshot-Entry-Count`` response header (total entries) are set for
204	HTTP-native clients.
205	"""
206	repo = await musehub_repository.get_repo(session, repo_id)
207	_guard(repo, claims, repo_id=repo_id)
208
209	# Emit entry count header before loading the page — uses a cheap COUNT query.
210	entry_count = await snapshot_svc.count_snapshot_entries(session, snapshot_id)
211	response.headers["X-Snapshot-Entry-Count"] = str(entry_count)
212
213	page_result = await snapshot_svc.get_snapshot_entries_page(
214	session, repo_id, snapshot_id, cursor=pagination.cursor, limit=pagination.limit
215	)
216	if page_result is None:
217	raise HTTPException(
218	status_code=status.HTTP_404_NOT_FOUND,
219	detail=f"Snapshot {snapshot_id!r} not found in repo {repo_id!r}",
220	)
221	if page_result.next_cursor is not None:
222	response.headers["Link"] = build_cursor_link_header(
223	request, page_result.next_cursor, pagination.limit
224	)
225	return page_result
226
227	# ---------------------------------------------------------------------------
228	# GET /api/repos/{repo_id}/commits/{commit_id}/snapshot
229	# ---------------------------------------------------------------------------
230
231	@router.get(
232	"/{repo_id}/commits/{commit_id}/snapshot",
233	response_model=SnapshotResponse,
234	operation_id="getCommitSnapshot",
235	summary="Get the snapshot attached to a commit",
236	)
237	async def get_commit_snapshot(
238	repo_id: str,
239	commit_id: str,
240	response: Response,
241	session: AsyncSession = Depends(get_session),
242	claims: TokenClaims \| None = Depends(optional_token),
243	) -> SnapshotResponse:
244	"""Resolve a commit ID to its full snapshot in one round-trip.
245
246	Equivalent to ``GET /commits/{commit_id}`` (to read ``snapshot_id``) then
247	``GET /snapshots/{snapshot_id}``, but avoids the extra request.
248
249	Returns 404 when the commit does not exist, belongs to a different repo,
250	or has no snapshot attached (e.g. an empty initial commit).
251	"""
252	repo = await musehub_repository.get_repo(session, repo_id)
253	_guard(repo, claims, repo_id=repo_id)
254
255	snap = await snapshot_svc.get_snapshot_for_commit(session, repo_id, commit_id)
256	if snap is None:
257	raise HTTPException(
258	status_code=status.HTTP_404_NOT_FOUND,
259	detail=(
260	f"No snapshot found for commit {commit_id!r} in repo {repo_id!r}. "
261	"The commit may not exist, belong to a different repo, or have no "
262	"snapshot attached."
263	),
264	)
265	response.headers["X-Snapshot-Entry-Count"] = str(snap.entry_count)
266	return snap
267
268	# ---------------------------------------------------------------------------
269	# GET /api/repos/{repo_id}/snapshots/{snapshot_id}/diff
270	# ---------------------------------------------------------------------------
271
272	@router.get(
273	"/{repo_id}/snapshots/{snapshot_id}/diff",
274	response_model=SnapshotDiffResponse,
275	operation_id="diffSnapshots",
276	summary="File-level diff between two snapshots",
277	)
278	async def diff_snapshots(
279	repo_id: str,
280	snapshot_id: str,
281	base: str = Query(
282	...,
283	description="snapshot_id of the base (older) snapshot to compare against",
284	),
285	include_unchanged: bool = Query(
286	False,
287	alias="includeUnchanged",
288	description=(
289	"Emit 'unchanged' entries for files identical in both snapshots. "
290	"Off by default — unchanged files dominate large repos."
291	),
292	),
293	session: AsyncSession = Depends(get_session),
294	claims: TokenClaims \| None = Depends(optional_token),
295	) -> SnapshotDiffResponse:
296	"""Return a file-level diff between snapshot_id (new) and base (old).
297
298	Each entry in ``changes`` carries ``status`` (added\|removed\|modified\|unchanged),
299	object IDs for both sides, and size deltas.
300
301	Agent note: filter ``changes`` on ``status`` for targeted analysis —
302	e.g. only ``"modified"`` entries to see content changes, only ``"added"``
303	to see new files. ``bytes_added`` and ``bytes_removed`` give storage deltas
304	without iterating the full change list.
305	"""
306	repo = await musehub_repository.get_repo(session, repo_id)
307	_guard(repo, claims, repo_id=repo_id)
308
309	if snapshot_id == base:
310	raise HTTPException(
311	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
312	detail="snapshot_id and base must be different snapshots",
313	)
314
315	diff = await snapshot_svc.diff_snapshots(
316	session, repo_id, snapshot_id, base, include_unchanged=include_unchanged
317	)
318	if diff is None:
319	raise HTTPException(
320	status_code=status.HTTP_404_NOT_FOUND,
321	detail=(
322	f"One or both snapshots not found in repo {repo_id!r}: "
323	f"{snapshot_id!r}, {base!r}"
324	),
325	)
326	return diff
327
328	# ---------------------------------------------------------------------------
329	# POST /api/repos/{repo_id}/snapshots/batch
330	# ---------------------------------------------------------------------------
331
332	@router.post(
333	"/{repo_id}/snapshots/batch",
334	response_model=list[SnapshotResponse \| SnapshotSummaryResponse],
335	operation_id="batchGetSnapshots",
336	summary="Bulk-resolve up to 100 snapshot IDs",
337	)
338	async def batch_get_snapshots(
339	repo_id: str,
340	body: SnapshotBatchRequest,
341	session: AsyncSession = Depends(get_session),
342	claims: TokenClaims \| None = Depends(optional_token),
343	) -> list[SnapshotResponse \| SnapshotSummaryResponse]:
344	"""Resolve up to 100 snapshot IDs in one round-trip.
345
346	Unknown IDs and IDs belonging to a different repo are silently omitted —
347	check ``len(response)`` against your input if completeness matters.
348
349	Set ``include_entries: true`` in the request body to get full
350	``SnapshotResponse`` objects (with file-tree entries). Omit it or set it
351	to ``false`` for lightweight ``SnapshotSummaryResponse`` objects.
352
353	Agent note: this is the most efficient way to populate a snapshot browser
354	or resolve a list of commit snapshot IDs — one POST beats N parallel GETs.
355	"""
356	repo = await musehub_repository.get_repo(session, repo_id)
357	_guard(repo, claims, repo_id=repo_id)
358
359	if len(body.snapshot_ids) > 100:
360	raise HTTPException(
361	status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
362	detail="snapshot_ids must contain at most 100 entries",
363	)
364
365	return await snapshot_svc.batch_get_snapshots(
366	session,
367	repo_id,
368	body.snapshot_ids,
369	include_entries=body.include_entries,
370	)

File History 1 commit

sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32 fix: fall back to DB ancestry check when mpack-only fast-fo… Sonnet 4.6 patch 7 days ago

function _guard

async_function list_snapshots

async_function get_snapshot

async_function list_snapshot_entries

async_function get_commit_snapshot

async_function diff_snapshots

async_function batch_get_snapshots

Pathmusehub/api/routes/api/snapshots.py

Lines370

Size14.2 KB

LangPython

Refsha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32

Object ID

sha256:8d2a76569663c4984f1427d2d47c7333a2d735e8757cd37423792b1bece989a2…

Last commit

sha256:7d6dd8f4a89e2d1fef2d84f6e65feaff51385d382f466766b7f690a22ec18e32

fix: fall back to DB ancestry check when mpack-on…

7 days ago

Quick links

Blame History