gabriel / muse public
clone.py python
565 lines 20.1 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 23 days ago
1 """muse clone — create a local copy of a remote Muse repository.
2
3 Downloads the complete commit history, snapshots, and objects from a remote
4 MuseHub repository into a new local directory. After cloning:
5
6 - A full ``.muse/`` directory is created with the remote's repo_id and domain.
7 - The ``origin`` remote is configured to point at the source URL.
8 - The default branch is checked out into the working tree.
9
10 Usage
11 -----
12
13 muse clone <url> Clone into a directory named after the last URL segment.
14 muse clone <url> <dir> Clone into a specific directory.
15 muse clone <url> --branch dev Clone and check out 'dev'.
16 muse clone <url> --dry-run Show what would happen without writing anything.
17 muse clone <url> --no-checkout Skip working-tree restore after cloning.
18 muse clone <url> --json Emit a machine-readable JSON result to stdout.
19
20 Auth
21 ----
22
23 Signing identities are read from ``~/.muse/identity.toml`` keyed by hostname.
24 No signing identity is required for public repositories.
25
26 JSON schema (``--json``)
27 ------------------------
28
29 ::
30
31 {
32 "status": "cloned | dry_run | already_exists",
33 "url": "<remote_url>",
34 "directory": "<local_path>",
35 "branch": "<branch_checked_out>",
36 "commits_received": <N>,
37 "blobs_written": <N>,
38 "head": "<sha256> | null",
39 "domain": "<domain>",
40 "dry_run": false
41 }
42
43 Exit codes
44 ----------
45
46 0 — success (including dry-run)
47 1 — user error (target already exists, empty repository, unknown branch)
48 2 — internal / transport error
49 """
50
51 import argparse
52 import json
53 import logging
54 import pathlib
55 import shutil
56 import sys
57 from typing import TYPE_CHECKING, TypedDict
58
59 import time
60
61 from muse._version import __version__ as _SCHEMA_VERSION
62 from muse.core.timing import start_timer
63 from muse.core.envelope import EnvelopeJson, make_envelope
64 from muse.cli.config import get_signing_identity, set_remote, set_remote_head, set_upstream
65 from muse.core.types import content_hash, now_utc_iso
66 from muse.core.paths import muse_dir as _muse_dir, ref_path as _ref_path
67 from muse.core.errors import ExitCode
68 from muse.core.mpack import apply_mpack
69 from muse.core.io import write_text_atomic
70 from muse.core.refs import (
71 write_branch_ref,
72 write_head_branch,
73 )
74 from muse.core.commits import read_commit
75 from muse.core.snapshots import read_snapshot
76 from muse.core.transport import TransportError, make_transport
77 from muse.core.validation import sanitize_display
78 from muse.core.workdir import apply_manifest
79
80 type _RepoMeta = dict[str, str]
81 if TYPE_CHECKING:
82 from muse.core.mpack import ApplyResult
83
84 logger = logging.getLogger(__name__)
85
86 # Canonical set of subdirectories — must match muse init's _INIT_SUBDIRS.
87 _CLONE_SUBDIRS: tuple[str, ...] = (
88 "refs",
89 "refs/heads",
90 "objects",
91 "commits",
92 "snapshots",
93 "tags",
94 "cache",
95 )
96
97 _DEFAULT_CONFIG = """\
98 [user]
99 name = ""
100 email = ""
101
102 [remotes]
103
104 [domain]
105 # Domain-specific configuration keys depend on the active domain.
106 """
107
108 class _CloneJson(EnvelopeJson):
109 """Stable JSON schema emitted by ``muse clone --json``."""
110
111 status: str # "cloned" | "dry_run" | "already_exists" | "partial"
112 url: str
113 directory: str # resolved local path
114 branch: str # branch checked out
115 commits_received: int
116 blobs_written: int
117 skipped_blobs: int # blobs skipped due to integrity failure (0 on clean clone)
118 head: str | None # HEAD commit ID after clone, null on dry-run
119 domain: str
120 dry_run: bool
121 shallow_commits: list[str]
122
123 class _CloneErrorJson(EnvelopeJson):
124 """JSON output for clone transport/network error paths."""
125
126 error: str # "remote_unreachable" | "empty_repository" | "fetch_failed"
127 url: str
128 message: str
129
130 def _infer_dir_name(url: str) -> str:
131 """Derive a safe local directory name from the last non-empty segment of *url*.
132
133 Strips query strings, fragments, and path-traversal components so that a
134 crafted URL like ``http://attacker.example.com/../../../../etc`` cannot escape
135 the current working directory.
136 """
137 # Drop fragment and query before splitting on path separators.
138 stripped = url.split("#")[0].split("?")[0].rstrip("/")
139 last = stripped.rsplit("/", 1)[-1]
140 # pathlib.Path.name always strips leading dots and directory separators,
141 # eliminating traversal attempts like ".." or "../../secret".
142 safe = pathlib.PurePosixPath(last).name
143 return safe if safe and safe not in (".", "..") else "muse-repo"
144
145 def _init_muse_dir(
146 target: pathlib.Path,
147 repo_id: str,
148 domain: str,
149 default_branch: str,
150 ) -> None:
151 """Create the ``.muse/`` directory tree inside *target*.
152
153 Uses the same subdirectory set as ``muse init`` so that every command that
154 relies on the standard layout (tags, objects, etc.) works out of the box.
155 """
156 muse_dir = _muse_dir(target)
157 for subdir in _CLONE_SUBDIRS:
158 (muse_dir / subdir).mkdir(parents=True, exist_ok=True)
159
160 repo_meta: _RepoMeta = {
161 "repo_id": repo_id,
162 "schema_version": _SCHEMA_VERSION,
163 "created_at": now_utc_iso(),
164 "domain": domain,
165 }
166 write_text_atomic(muse_dir / "repo.json", f"{json.dumps(repo_meta)}\n")
167 write_head_branch(muse_dir.parent, default_branch)
168 write_text_atomic(_ref_path(target, default_branch), "")
169 write_text_atomic(muse_dir / "config.toml", _DEFAULT_CONFIG)
170
171 def _restore_working_tree(root: pathlib.Path, commit_id: str) -> None:
172 """Restore the working tree to the snapshot referenced by *commit_id*.
173
174 Logs a warning to stderr (rather than silently returning) if the commit or
175 snapshot cannot be read — this surfaces bugs where apply_mpack did not write
176 the expected objects.
177 """
178 commit = read_commit(root, commit_id)
179 if commit is None:
180 logger.warning(
181 "⚠️ clone: commit %s not found after apply_mpack — working tree not restored",
182 commit_id,
183 )
184 return
185 snap = read_snapshot(root, commit.snapshot_id)
186 if snap is None:
187 logger.warning(
188 "⚠️ clone: snapshot %s not found after apply_mpack — working tree not restored",
189 commit.snapshot_id,
190 )
191 return
192 try:
193 apply_manifest(root, {}, snap.manifest)
194 except RuntimeError as exc:
195 logger.warning(
196 "⚠️ clone: working tree partially restored — %s",
197 exc,
198 )
199
200 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
201 """Register the ``muse clone`` subcommand and all its flags."""
202 parser = subparsers.add_parser(
203 "clone",
204 help="Create a local copy of a remote Muse repository.",
205 description=__doc__,
206 formatter_class=argparse.RawDescriptionHelpFormatter,
207 )
208 parser.add_argument(
209 "url",
210 help="URL of the remote Muse repository to clone.",
211 )
212 parser.add_argument(
213 "directory",
214 nargs="?",
215 default=None,
216 help=(
217 "Local directory to clone into. "
218 "Defaults to the last path segment of the URL."
219 ),
220 )
221 parser.add_argument(
222 "--branch", "-b",
223 default=None,
224 help="Branch to check out after cloning (default: remote default branch).",
225 )
226 parser.add_argument(
227 "--dry-run", "-n",
228 action="store_true",
229 default=False,
230 dest="dry_run",
231 help=(
232 "Contact the remote and show what would be cloned without writing "
233 "any files or creating any directories."
234 ),
235 )
236 parser.add_argument(
237 "--no-checkout",
238 action="store_true",
239 default=False,
240 dest="no_checkout",
241 help="Skip restoring the working tree after cloning.",
242 )
243 parser.add_argument(
244 "--depth",
245 type=int,
246 default=None,
247 metavar="N",
248 help=(
249 "Shallow clone: fetch only the N most recent commits from each branch tip. "
250 "Writes .muse/shallow with the boundary commit IDs. "
251 "Must be >= 1."
252 ),
253 )
254 parser.add_argument(
255 "--json", "-j",
256 action="store_true",
257 dest="json_out",
258 help="Emit JSON output to stdout.",
259 )
260 parser.set_defaults(func=run)
261
262 def run(args: argparse.Namespace) -> None:
263 """Clone a remote Muse repository into a new local directory.
264
265 Downloads the full commit history, snapshots, objects, and branch heads.
266 Configures ``origin`` remote and upstream tracking. Checks out the default
267 branch unless ``--no-checkout`` is given. On any error after the target
268 directory has been partially created the directory is removed to leave the
269 filesystem clean.
270
271 Agent quickstart
272 ----------------
273 ::
274
275 muse clone https://musehub.ai/gabriel/muse --json
276 muse clone https://musehub.ai/gabriel/muse --branch dev --json
277 muse clone https://musehub.ai/gabriel/muse mydir --dry-run --json
278
279 JSON fields
280 -----------
281 status ``"cloned"``, ``"already_exists"``, or ``"error"``.
282 url The remote URL cloned from.
283 directory Absolute path to the created local directory.
284 branch Branch checked out after cloning.
285 commits_received Number of commits received.
286 blobs_written Number of content blobs written.
287 head Full commit ID at the branch tip; ``null`` on error.
288 domain Repository domain (e.g. ``"code"``).
289 dry_run ``true`` when ``--dry-run`` was passed.
290
291 Exit codes
292 ----------
293 0 Clone completed successfully.
294 1 Target already exists or bad URL.
295 3 Network or internal error during fetch.
296 """
297 elapsed = start_timer()
298 url: str = args.url
299 directory: str | None = args.directory
300 branch: str | None = args.branch
301 dry_run: bool = args.dry_run
302 no_checkout: bool = args.no_checkout
303 json_out: bool = args.json_out
304 depth: int | None = getattr(args, "depth", None)
305
306 if depth is not None and depth < 1:
307 print("❌ --depth must be >= 1 (got 0)", file=sys.stderr)
308 raise SystemExit(ExitCode.USER_ERROR)
309
310 # clone does not need to be inside a Muse repo — it creates a new one.
311 # Resolve the target name and path before any network I/O.
312 target_name = directory or _infer_dir_name(url)
313 target = pathlib.Path.cwd() / target_name
314
315 if dry_run:
316 print("(dry run — no files will be created)", file=sys.stderr)
317
318 if _muse_dir(target).exists():
319 msg = f"❌ '{sanitize_display(str(target))}' is already a Muse repository."
320 print(msg, file=sys.stderr)
321 if json_out:
322 print(json.dumps(_CloneJson(
323 **make_envelope(elapsed, exit_code=ExitCode.USER_ERROR),
324 status="already_exists",
325 url=url,
326 directory=str(target),
327 branch=branch or "",
328 commits_received=0,
329 blobs_written=0,
330 head=None,
331 domain="",
332 dry_run=dry_run,
333 shallow_commits=[],
334 )))
335 raise SystemExit(ExitCode.USER_ERROR)
336
337 signing = get_signing_identity(remote_url=url)
338
339 transport = make_transport(url)
340
341 print(
342 f"Cloning from {sanitize_display(url)} …",
343 file=sys.stderr,
344 )
345 try:
346 info = transport.fetch_remote_info(url, signing=signing)
347 except TransportError as exc:
348 if json_out:
349 print(json.dumps(_CloneErrorJson(
350 **make_envelope(elapsed, exit_code=ExitCode.INTERNAL_ERROR),
351 error="remote_unreachable",
352 url=url,
353 message=str(exc),
354 )))
355 print(f"❌ Cannot reach remote: {exc}", file=sys.stderr)
356 raise SystemExit(ExitCode.INTERNAL_ERROR)
357
358 # Use "code" as the domain fallback — "midi" was the first plugin but is
359 # not the canonical default domain for new repositories.
360 if info["repo_id"]:
361 remote_repo_id = info["repo_id"]
362 else:
363 _cloned_at = now_utc_iso()
364 remote_repo_id = content_hash({"cloned_at": _cloned_at, "url": url})
365 domain = info["domain"] or "code"
366 default_branch = branch or info["default_branch"] or "main"
367
368 if not info["branch_heads"]:
369 if json_out:
370 print(json.dumps(_CloneErrorJson(
371 **make_envelope(elapsed, exit_code=ExitCode.USER_ERROR),
372 error="empty_repository",
373 url=url,
374 message="remote repository has no branches",
375 )))
376 print(
377 "❌ Remote repository has no branches (empty repository).",
378 file=sys.stderr,
379 )
380 raise SystemExit(ExitCode.USER_ERROR)
381
382 default_commit_id = info["branch_heads"].get(default_branch)
383 if default_commit_id is None:
384 # Fall back to the first available branch rather than failing hard —
385 # a user who requests a non-existent branch gets a clear warning.
386 first_branch, default_commit_id = next(iter(info["branch_heads"].items()))
387 print(
388 f" ⚠️ Branch '{sanitize_display(default_branch)}' not found on remote; "
389 f"checking out '{sanitize_display(first_branch)}' instead.",
390 file=sys.stderr,
391 )
392 default_branch = first_branch
393
394 available = sorted(info["branch_heads"])
395 logger.debug(
396 "Remote has %d branch(es): %s",
397 len(available),
398 ", ".join(sanitize_display(b) for b in available),
399 )
400
401 # ── dry-run exits here — no filesystem changes after this point ──────────
402 if dry_run:
403 want_count = len(info["branch_heads"])
404 if json_out:
405 print(json.dumps(_CloneJson(
406 **make_envelope(elapsed),
407 status="dry_run",
408 url=url,
409 directory=str(target),
410 branch=default_branch,
411 commits_received=0,
412 blobs_written=0,
413 head=default_commit_id,
414 domain=domain,
415 dry_run=True,
416 shallow_commits=[],
417 )))
418 else:
419 print(
420 f"Would clone {sanitize_display(url)} → {sanitize_display(str(target))}",
421 file=sys.stderr,
422 )
423 print(
424 f" branch={sanitize_display(default_branch)}, "
425 f"domain={sanitize_display(domain)}, "
426 f"{want_count} branch head(s) to fetch",
427 file=sys.stderr,
428 )
429 return
430
431 # ── real clone ────────────────────────────────────────────────────────────
432 target.mkdir(parents=True, exist_ok=True)
433 try:
434 _init_muse_dir(target, remote_repo_id, domain, default_branch)
435 except OSError as exc:
436 print(
437 f"❌ Failed to create repository at '{sanitize_display(str(target))}': {exc}",
438 file=sys.stderr,
439 )
440 shutil.rmtree(target, ignore_errors=True)
441 raise SystemExit(ExitCode.INTERNAL_ERROR)
442
443 # ── fetch/mpack ───────────────────────────────────────────────────────────
444 # Clone always starts with an empty have list — no local history yet.
445 want = list(info["branch_heads"].values())
446
447 t0 = time.perf_counter()
448 try:
449 fetch_result = transport.fetch_mpack(
450 url, signing,
451 want=want,
452 have=[],
453 )
454 except TransportError as exc:
455 if json_out:
456 print(json.dumps(_CloneErrorJson(
457 **make_envelope(elapsed, exit_code=ExitCode.INTERNAL_ERROR),
458 error="fetch_failed",
459 url=url,
460 message=str(exc),
461 )))
462 print(f"❌ Fetch failed: {exc}", file=sys.stderr)
463 shutil.rmtree(target, ignore_errors=True)
464 raise SystemExit(ExitCode.INTERNAL_ERROR)
465 t_fetch = time.perf_counter() - t0
466
467 fetched_shallow: list[str] = fetch_result.get("shallow_commits") or []
468 _raw_blobs = fetch_result.get("blobs") or []
469 _t_apply0 = time.perf_counter()
470 print(f"[clone] apply_mpack START blobs={len(_raw_blobs)} commits={len(fetch_result['commits'])} snaps={len(fetch_result['snapshots'])}", file=sys.stderr, flush=True)
471 apply_result: ApplyResult = apply_mpack(
472 target,
473 {
474 "commits": fetch_result["commits"],
475 "snapshots": fetch_result["snapshots"],
476 "blobs": _raw_blobs,
477 },
478 shallow_commits=set(fetched_shallow),
479 )
480 _t_apply1 = time.perf_counter()
481 blobs_written: int = apply_result["blobs_written"]
482 blobs_skipped: int = apply_result["blobs_skipped"]
483 print(
484 f"[clone] apply_mpack DONE t={int((_t_apply1-_t_apply0)*1000)}ms"
485 f" blobs_written={blobs_written} blobs_skipped={blobs_skipped}"
486 f" commits_written={apply_result['commits_written']}",
487 file=sys.stderr, flush=True,
488 )
489 print(
490 f"[mpack] fetch/mpack: {t_fetch:.2f}s "
491 f"blobs: {fetch_result['blobs_received']} "
492 f"commits: {apply_result['commits_written']}",
493 file=sys.stderr,
494 )
495
496 # Write branch head refs for every remote branch atomically and record
497 # the remote tracking pointer so future fetches can detect staleness.
498 # Only advance refs for branches whose tip commit landed cleanly.
499 failed = set(apply_result.get("failed_blobs") or [])
500 for b, cid in info["branch_heads"].items():
501 write_branch_ref(target, b, cid)
502 set_remote_head("origin", b, cid, target)
503
504 # Configure origin remote and upstream tracking.
505 set_remote("origin", url, target)
506 set_upstream(default_branch, "origin", target)
507
508 # Write .muse/shallow for shallow clones so future pull/fetch can recognise boundary.
509 if fetched_shallow:
510 shallow_path = _muse_dir(target) / "shallow"
511 write_text_atomic(shallow_path, "\n".join(fetched_shallow) + "\n")
512
513 # Restore working tree unless the caller opted out.
514 if not no_checkout:
515 _t_wt0 = time.perf_counter()
516 print(f"[clone] apply_manifest START (working tree restore)", file=sys.stderr, flush=True)
517 _restore_working_tree(target, default_commit_id)
518 _t_wt1 = time.perf_counter()
519 print(f"[clone] apply_manifest DONE t={int((_t_wt1-_t_wt0)*1000)}ms", file=sys.stderr, flush=True)
520
521 commits_received = apply_result["commits_written"]
522 blobs_failed = len(failed)
523 total_skipped = blobs_skipped + blobs_failed
524 exit_code = ExitCode.PARTIAL if total_skipped > 0 else ExitCode.SUCCESS
525
526 if json_out:
527 print(json.dumps(_CloneJson(
528 **make_envelope(elapsed, exit_code=exit_code),
529 status="partial" if total_skipped > 0 else "cloned",
530 url=url,
531 directory=str(target),
532 branch=default_branch,
533 commits_received=commits_received,
534 blobs_written=blobs_written,
535 skipped_blobs=total_skipped,
536 head=default_commit_id,
537 domain=domain,
538 dry_run=False,
539 shallow_commits=fetched_shallow,
540 )))
541 else:
542 if total_skipped > 0:
543 print(
544 f"⚠️ Cloned into '{sanitize_display(target_name)}' with {total_skipped} skipped blob(s) — "
545 f"{commits_received} commit(s), {blobs_written} blob(s) written, "
546 f"domain={sanitize_display(domain)}, "
547 f"branch={sanitize_display(default_branch)} ({default_commit_id})",
548 file=sys.stderr,
549 )
550 else:
551 print(
552 f"✅ Cloned into '{sanitize_display(target_name)}' — "
553 f"{commits_received} commit(s), {blobs_written} blob(s), "
554 f"domain={sanitize_display(domain)}, "
555 f"branch={sanitize_display(default_branch)} ({default_commit_id})",
556 file=sys.stderr,
557 )
558
559 logger.info(
560 "✅ clone: %s → %s commits=%d blobs=%d skipped=%d",
561 url, target, commits_received, blobs_written, total_skipped,
562 )
563
564 if exit_code != ExitCode.SUCCESS:
565 raise SystemExit(exit_code)
File History 5 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 23 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago
sha256:0313c134f0ef4518a9c3a0ec359ffdc42546dc720010730374edfe0857caf7ef rename: delta_add → delta_upsert across wire format, source… Sonnet 4.6 minor 25 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 31 days ago