workspace.py
python
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
22 days ago
| 1 | """Workspace management — compose multiple Muse repositories. |
| 2 | |
| 3 | A *workspace* is a collection of related Muse repositories that are developed |
| 4 | together. Think of a film score that references a sound library, a machine |
| 5 | learning pipeline that includes a dataset repo, or a multi-service codebase |
| 6 | where each service lives in its own Muse repo. |
| 7 | |
| 8 | Design |
| 9 | ------ |
| 10 | Workspaces are distinct from worktrees: |
| 11 | |
| 12 | - A **worktree** is one checkout of *one* repo with *one* ``.muse/`` store. |
| 13 | - A **workspace** is an envelope that *links* multiple separate repos together. |
| 14 | |
| 15 | The workspace manifest lives at ``.muse/workspace.toml``:: |
| 16 | |
| 17 | [[members]] |
| 18 | name = "core" |
| 19 | url = "https://musehub.ai/acme/core" |
| 20 | path = "repos/core" # relative to workspace root |
| 21 | branch = "main" # pinned branch |
| 22 | |
| 23 | [[members]] |
| 24 | name = "dataset" |
| 25 | url = "https://musehub.ai/acme/dataset" |
| 26 | path = "repos/dataset" |
| 27 | branch = "v2" |
| 28 | |
| 29 | Agent workflow |
| 30 | -------------- |
| 31 | Each member repo is a fully independent Muse repository. Agents can commit |
| 32 | to member repos independently and the workspace provides a unified status view |
| 33 | and one-shot sync. |
| 34 | |
| 35 | ``muse workspace sync`` walks all members and runs ``muse fetch`` + ``muse pull`` |
| 36 | so the workspace root always has the latest HEAD for every pinned branch. |
| 37 | ``muse workspace sync --workers 8`` parallelises across members. |
| 38 | |
| 39 | Security model |
| 40 | -------------- |
| 41 | - Manifest size is capped at ``_MAX_MANIFEST_BYTES`` before reading. |
| 42 | - The manifest file and its parent directory are checked for symlinks before |
| 43 | any read or write to prevent path-traversal attacks. |
| 44 | - All free-form string fields (name, url, path, branch) are TOML-escaped |
| 45 | before serialisation to prevent injection via crafted values. |
| 46 | - Member ``path`` values are validated to resolve *within* the workspace root. |
| 47 | - Member ``url`` values are checked for a valid scheme (https, http, or local |
| 48 | path); shell metacharacters are rejected before passing to subprocess. |
| 49 | """ |
| 50 | |
| 51 | import concurrent.futures |
| 52 | import logging |
| 53 | import pathlib |
| 54 | import subprocess |
| 55 | from dataclasses import dataclass |
| 56 | from typing import TypedDict |
| 57 | |
| 58 | from muse.core.paths import muse_dir as _muse_dir, workspace_toml_path as _workspace_toml_path, shelf_json_path as _shelf_json_path |
| 59 | from muse.core.types import load_json_file |
| 60 | from muse.core.refs import iter_branch_refs |
| 61 | |
| 62 | logger = logging.getLogger(__name__) |
| 63 | |
| 64 | |
| 65 | # 1 MiB — a manifest with 1 000 members at ~200 bytes each is ~200 KiB. |
| 66 | _MAX_MANIFEST_BYTES = 1 * 1024 * 1024 |
| 67 | |
| 68 | # Allowed URL schemes for member repositories. |
| 69 | _ALLOWED_SCHEMES = frozenset({"https", "http"}) |
| 70 | |
| 71 | # --------------------------------------------------------------------------- |
| 72 | # Types |
| 73 | # --------------------------------------------------------------------------- |
| 74 | |
| 75 | class WorkspaceMemberDict(TypedDict): |
| 76 | """One entry in the workspace manifest.""" |
| 77 | |
| 78 | name: str |
| 79 | url: str |
| 80 | path: str |
| 81 | branch: str |
| 82 | |
| 83 | class WorkspaceManifestDict(TypedDict): |
| 84 | """Top-level workspace manifest.""" |
| 85 | |
| 86 | members: list[WorkspaceMemberDict] |
| 87 | |
| 88 | @dataclass |
| 89 | class WorkspaceMemberStatus: |
| 90 | """Runtime status of one workspace member. |
| 91 | |
| 92 | ``branch`` is the configured tracking branch from *workspace.toml* — the |
| 93 | branch this member is *supposed* to be on according to the manifest. |
| 94 | |
| 95 | ``actual_branch`` is the branch currently checked out in the working |
| 96 | directory (read from ``HEAD``). When it differs from ``branch`` the member |
| 97 | is checked out somewhere unexpected; agents should surface this discrepancy. |
| 98 | |
| 99 | ``head_commit`` is the commit that ``HEAD`` currently resolves to — i.e. |
| 100 | the actual checked-out commit, not the tip of the configured branch. |
| 101 | |
| 102 | ``shelf_count`` is the number of shelved changesets (0 = nothing on the shelf). |
| 103 | |
| 104 | ``feature_branches`` lists every local branch that is not ``main`` or |
| 105 | ``dev`` — short-lived task/feat/bugfix branches that have not been cleaned |
| 106 | up yet. |
| 107 | """ |
| 108 | |
| 109 | name: str |
| 110 | path: pathlib.Path |
| 111 | branch: str # configured tracking branch from workspace.toml |
| 112 | url: str |
| 113 | present: bool |
| 114 | head_commit: str | None # actual HEAD commit (what HEAD resolves to) |
| 115 | dirty: bool |
| 116 | actual_branch: str | None # currently checked-out branch |
| 117 | shelf_count: int # number of shelved changesets |
| 118 | feature_branches: list[str] # local branches other than main / dev |
| 119 | |
| 120 | class WorkspaceSyncResult(TypedDict): |
| 121 | """Result of syncing one workspace member. |
| 122 | |
| 123 | ``status`` is one of ``'cloned'``, ``'pulled'``, ``'skipped'``, or |
| 124 | ``'error: <message>'``. |
| 125 | """ |
| 126 | |
| 127 | name: str |
| 128 | status: str |
| 129 | |
| 130 | # --------------------------------------------------------------------------- |
| 131 | # TOML helpers |
| 132 | # --------------------------------------------------------------------------- |
| 133 | |
| 134 | def _toml_escape(value: str) -> str: |
| 135 | """Escape *value* for safe embedding inside a TOML double-quoted string. |
| 136 | |
| 137 | TOML basic strings forbid unescaped backslash, double-quote, and control |
| 138 | characters (newline, carriage-return, tab, etc.). All are escaped here so |
| 139 | that crafted values like ``core"\\nname = "injected`` or values containing |
| 140 | literal newlines cannot break the manifest structure. |
| 141 | """ |
| 142 | return ( |
| 143 | value |
| 144 | .replace("\\", "\\\\") |
| 145 | .replace('"', '\\"') |
| 146 | .replace("\n", "\\n") |
| 147 | .replace("\r", "\\r") |
| 148 | .replace("\t", "\\t") |
| 149 | .replace("\x00", "\\u0000") |
| 150 | ) |
| 151 | |
| 152 | # --------------------------------------------------------------------------- |
| 153 | # Paths |
| 154 | # --------------------------------------------------------------------------- |
| 155 | |
| 156 | def _workspace_path(repo_root: pathlib.Path) -> pathlib.Path: |
| 157 | return _workspace_toml_path(repo_root) |
| 158 | |
| 159 | def find_workspace_root(start: pathlib.Path | None = None) -> pathlib.Path | None: |
| 160 | """Walk up from *start* (default: cwd) to find the directory containing |
| 161 | ``.muse/workspace.toml``. Returns ``None`` if no workspace is found. |
| 162 | |
| 163 | This mirrors ``find_repo_root()`` so that workspace commands resolve the |
| 164 | correct manifest regardless of CWD or ``-C`` flag usage. |
| 165 | """ |
| 166 | current = (start or pathlib.Path.cwd()).resolve() |
| 167 | for directory in (current, *current.parents): |
| 168 | if _workspace_toml_path(directory).exists(): |
| 169 | return directory |
| 170 | return None |
| 171 | |
| 172 | def require_workspace_root(start: pathlib.Path | None = None) -> pathlib.Path: |
| 173 | """Return the workspace root or exit with a clear error message.""" |
| 174 | from muse.core.errors import ExitCode |
| 175 | root = find_workspace_root(start) |
| 176 | if root is None: |
| 177 | import sys |
| 178 | print( |
| 179 | "❌ Not inside a Muse workspace.\n" |
| 180 | " No .muse/workspace.toml found in this directory or any parent.", |
| 181 | file=sys.stderr, |
| 182 | ) |
| 183 | raise SystemExit(ExitCode.REPO_NOT_FOUND) |
| 184 | return root |
| 185 | |
| 186 | # --------------------------------------------------------------------------- |
| 187 | # Persistence |
| 188 | # --------------------------------------------------------------------------- |
| 189 | |
| 190 | def _load_manifest(repo_root: pathlib.Path) -> WorkspaceManifestDict | None: |
| 191 | """Read and parse the workspace manifest. |
| 192 | |
| 193 | Security guards applied before any read: |
| 194 | |
| 195 | - Symlink check: a symlink at the manifest path could redirect reads to |
| 196 | sensitive files outside the repo. |
| 197 | - Size cap (``_MAX_MANIFEST_BYTES``): a corrupt or tampered manifest cannot |
| 198 | exhaust memory. |
| 199 | """ |
| 200 | import tomllib |
| 201 | |
| 202 | path = _workspace_path(repo_root) |
| 203 | if not path.exists(): |
| 204 | return None |
| 205 | if path.is_symlink(): |
| 206 | logger.warning( |
| 207 | "⚠️ Workspace manifest is a symlink — ignoring to prevent path traversal" |
| 208 | ) |
| 209 | return None |
| 210 | try: |
| 211 | size = path.stat().st_size |
| 212 | if size > _MAX_MANIFEST_BYTES: |
| 213 | logger.warning( |
| 214 | "⚠️ Workspace manifest is %.1f MiB — exceeds cap of %d MiB; ignoring", |
| 215 | size / (1024 * 1024), |
| 216 | _MAX_MANIFEST_BYTES // (1024 * 1024), |
| 217 | ) |
| 218 | return None |
| 219 | raw = tomllib.loads(path.read_text(encoding="utf-8")) |
| 220 | except Exception as exc: |
| 221 | logger.warning("⚠️ Could not read workspace manifest: %s", exc) |
| 222 | return None |
| 223 | members: list[WorkspaceMemberDict] = [] |
| 224 | for m in raw.get("members", []): |
| 225 | if not isinstance(m, dict): |
| 226 | continue |
| 227 | members.append( |
| 228 | WorkspaceMemberDict( |
| 229 | name=str(m.get("name", "")), |
| 230 | url=str(m.get("url", "")), |
| 231 | path=str(m.get("path", "")), |
| 232 | branch=str(m.get("branch", "main")), |
| 233 | ) |
| 234 | ) |
| 235 | return WorkspaceManifestDict(members=members) |
| 236 | |
| 237 | def _save_manifest(repo_root: pathlib.Path, manifest: WorkspaceManifestDict) -> None: |
| 238 | """Write the manifest atomically. |
| 239 | |
| 240 | Security guards: |
| 241 | |
| 242 | - The manifest file and its parent directory are checked for symlinks |
| 243 | before writing to prevent path-traversal via a planted symlink. |
| 244 | - All string values are TOML-escaped to prevent injection. |
| 245 | """ |
| 246 | path = _workspace_path(repo_root) |
| 247 | parent = path.parent |
| 248 | parent.mkdir(parents=True, exist_ok=True) |
| 249 | |
| 250 | if parent.is_symlink(): |
| 251 | raise OSError(f"Refusing to write manifest — parent directory is a symlink: {parent}") |
| 252 | if path.exists() and path.is_symlink(): |
| 253 | raise OSError(f"Refusing to write manifest — file is a symlink: {path}") |
| 254 | |
| 255 | lines: list[str] = [] |
| 256 | for m in manifest["members"]: |
| 257 | lines.append("[[members]]") |
| 258 | lines.append(f'name = "{_toml_escape(m["name"])}"') |
| 259 | lines.append(f'url = "{_toml_escape(m["url"])}"') |
| 260 | lines.append(f'path = "{_toml_escape(m["path"])}"') |
| 261 | lines.append(f'branch = "{_toml_escape(m["branch"])}"') |
| 262 | lines.append("") |
| 263 | tmp = path.with_suffix(".tmp") |
| 264 | tmp.write_text("\n".join(lines), encoding="utf-8") |
| 265 | tmp.replace(path) |
| 266 | |
| 267 | # --------------------------------------------------------------------------- |
| 268 | # Validation helpers |
| 269 | # --------------------------------------------------------------------------- |
| 270 | |
| 271 | def _validate_member_name(name: str) -> None: |
| 272 | """Raise ``ValueError`` if *name* is not a safe workspace member name. |
| 273 | |
| 274 | Allowed: alphanumerics, hyphens, underscores, dots. No slashes, nulls, |
| 275 | or shell metacharacters. Must be 1–64 characters. |
| 276 | """ |
| 277 | import re |
| 278 | if not name or len(name) > 64: |
| 279 | raise ValueError(f"Member name must be 1–64 characters, got {len(name)!r}.") |
| 280 | if not re.fullmatch(r"[A-Za-z0-9._-]+", name): |
| 281 | raise ValueError( |
| 282 | f"Member name {name!r} contains invalid characters. " |
| 283 | "Use only alphanumerics, hyphens, underscores, and dots." |
| 284 | ) |
| 285 | |
| 286 | def _validate_member_url(url: str) -> None: |
| 287 | """Raise ``ValueError`` if *url* is not a safe member URL or local path. |
| 288 | |
| 289 | Accepted forms: |
| 290 | - ``https://`` or ``http://`` — remote MuseHub URL. |
| 291 | - An absolute local path (no scheme). |
| 292 | - A relative local path (no scheme). |
| 293 | |
| 294 | Rejected: |
| 295 | - Null bytes in the URL string. |
| 296 | - ``file://`` — use a bare path instead. |
| 297 | - Any other scheme (``ftp://``, ``ssh://``, etc.). |
| 298 | """ |
| 299 | import urllib.parse |
| 300 | if "\x00" in url: |
| 301 | raise ValueError("Member URL must not contain null bytes.") |
| 302 | parsed = urllib.parse.urlparse(url) |
| 303 | if parsed.scheme and parsed.scheme not in _ALLOWED_SCHEMES: |
| 304 | raise ValueError( |
| 305 | f"Member URL scheme {parsed.scheme!r} is not allowed. " |
| 306 | "Use https://, http://, or a bare filesystem path." |
| 307 | ) |
| 308 | |
| 309 | def _validate_member_path(repo_root: pathlib.Path, relative_path: str) -> None: |
| 310 | """Raise ``ValueError`` if *relative_path* escapes the workspace root. |
| 311 | |
| 312 | Path components like ``../../etc`` would let a crafted manifest point |
| 313 | members at arbitrary directories. We resolve the candidate path and |
| 314 | confirm it sits within *repo_root*. |
| 315 | """ |
| 316 | if "\x00" in relative_path: |
| 317 | raise ValueError("Member path must not contain null bytes.") |
| 318 | candidate = (repo_root / relative_path).resolve() |
| 319 | try: |
| 320 | candidate.relative_to(repo_root.resolve()) |
| 321 | except ValueError: |
| 322 | raise ValueError( |
| 323 | f"Member path {relative_path!r} resolves outside the workspace root." |
| 324 | ) |
| 325 | |
| 326 | # --------------------------------------------------------------------------- |
| 327 | # Public API |
| 328 | # --------------------------------------------------------------------------- |
| 329 | |
| 330 | def add_workspace_member( |
| 331 | repo_root: pathlib.Path, |
| 332 | name: str, |
| 333 | url: str, |
| 334 | path: str = "", |
| 335 | branch: str = "main", |
| 336 | ) -> None: |
| 337 | """Register a new member repository in the workspace manifest. |
| 338 | |
| 339 | Args: |
| 340 | repo_root: The workspace root (where ``.muse/`` lives). |
| 341 | name: Short identifier for this member (alphanumeric, hyphens, |
| 342 | underscores, dots; max 64 chars). |
| 343 | url: Remote URL (https/http) or local path to the member repo. |
| 344 | path: Relative checkout path inside the workspace (default: |
| 345 | ``repos/<name>``). Must not escape the workspace root. |
| 346 | branch: Branch to track (default: ``main``). |
| 347 | |
| 348 | Raises: |
| 349 | ValueError: If name is invalid, URL scheme is disallowed, path escapes |
| 350 | the workspace root, or a member with the same name exists. |
| 351 | """ |
| 352 | from muse.core.validation import validate_branch_name |
| 353 | |
| 354 | _validate_member_name(name) |
| 355 | _validate_member_url(url) |
| 356 | validate_branch_name(branch) |
| 357 | |
| 358 | effective_path = path or f"repos/{name}" |
| 359 | _validate_member_path(repo_root, effective_path) |
| 360 | |
| 361 | manifest = _load_manifest(repo_root) or WorkspaceManifestDict(members=[]) |
| 362 | for m in manifest["members"]: |
| 363 | if m["name"] == name: |
| 364 | raise ValueError(f"Workspace member '{name}' already exists.") |
| 365 | |
| 366 | manifest["members"].append( |
| 367 | WorkspaceMemberDict( |
| 368 | name=name, |
| 369 | url=url, |
| 370 | path=effective_path, |
| 371 | branch=branch, |
| 372 | ) |
| 373 | ) |
| 374 | _save_manifest(repo_root, manifest) |
| 375 | |
| 376 | def update_workspace_member( |
| 377 | repo_root: pathlib.Path, |
| 378 | name: str, |
| 379 | url: str | None = None, |
| 380 | path: str | None = None, |
| 381 | branch: str | None = None, |
| 382 | ) -> None: |
| 383 | """Update the URL, path, or branch for an existing workspace member. |
| 384 | |
| 385 | Only the supplied keyword arguments are changed. Raises ``ValueError`` if |
| 386 | no member with *name* exists. |
| 387 | |
| 388 | Args: |
| 389 | repo_root: The workspace root. |
| 390 | name: Member name to update. |
| 391 | url: New URL (or ``None`` to keep current). |
| 392 | path: New relative checkout path (or ``None`` to keep current). |
| 393 | branch: New branch to track (or ``None`` to keep current). |
| 394 | |
| 395 | Raises: |
| 396 | ValueError: If the member does not exist or any new value is invalid. |
| 397 | """ |
| 398 | from muse.core.validation import validate_branch_name |
| 399 | |
| 400 | if url is not None: |
| 401 | _validate_member_url(url) |
| 402 | if branch is not None: |
| 403 | validate_branch_name(branch) |
| 404 | if path is not None: |
| 405 | _validate_member_path(repo_root, path) |
| 406 | |
| 407 | manifest = _load_manifest(repo_root) |
| 408 | if manifest is not None: |
| 409 | for m in manifest["members"]: |
| 410 | if m["name"] == name: |
| 411 | if url is not None: |
| 412 | m["url"] = url |
| 413 | if path is not None: |
| 414 | m["path"] = path |
| 415 | if branch is not None: |
| 416 | m["branch"] = branch |
| 417 | _save_manifest(repo_root, manifest) |
| 418 | return |
| 419 | raise ValueError(f"Workspace member '{name}' not found.") |
| 420 | |
| 421 | def remove_workspace_member(repo_root: pathlib.Path, name: str) -> None: |
| 422 | """Remove a member from the workspace manifest. |
| 423 | |
| 424 | Does **not** delete the member's directory — only its registration in the |
| 425 | manifest is removed. |
| 426 | |
| 427 | Raises: |
| 428 | ValueError: If no member with that name exists. |
| 429 | """ |
| 430 | manifest = _load_manifest(repo_root) |
| 431 | if manifest is None: |
| 432 | raise ValueError("No workspace manifest found.") |
| 433 | before = len(manifest["members"]) |
| 434 | manifest["members"] = [m for m in manifest["members"] if m["name"] != name] |
| 435 | if len(manifest["members"]) == before: |
| 436 | raise ValueError(f"Workspace member '{name}' not found.") |
| 437 | _save_manifest(repo_root, manifest) |
| 438 | |
| 439 | def get_workspace_member( |
| 440 | repo_root: pathlib.Path, |
| 441 | name: str, |
| 442 | ) -> WorkspaceMemberStatus: |
| 443 | """Return the status for a single named workspace member. |
| 444 | |
| 445 | Raises: |
| 446 | ValueError: If no member with that name is registered. |
| 447 | """ |
| 448 | manifest = _load_manifest(repo_root) |
| 449 | if manifest is None: |
| 450 | raise ValueError("No workspace manifest found.") |
| 451 | for m in manifest["members"]: |
| 452 | if m["name"] == name: |
| 453 | return _member_status(repo_root, m) |
| 454 | raise ValueError(f"Workspace member '{name}' not found.") |
| 455 | |
| 456 | def _member_status(repo_root: pathlib.Path, m: WorkspaceMemberDict) -> WorkspaceMemberStatus: |
| 457 | """Build a ``WorkspaceMemberStatus`` for one manifest entry.""" |
| 458 | import json as _json |
| 459 | |
| 460 | member_path = repo_root / m["path"] |
| 461 | present = member_path.exists() and (_muse_dir(member_path)).exists() |
| 462 | head_commit: str | None = None |
| 463 | dirty = False |
| 464 | actual_branch: str | None = None |
| 465 | shelf_count = 0 |
| 466 | feature_branches: list[str] = [] |
| 467 | |
| 468 | if present: |
| 469 | # One subprocess: muse status gives us dirty, actual branch, and HEAD commit. |
| 470 | try: |
| 471 | result = subprocess.run( |
| 472 | ["muse", "status", "--json"], |
| 473 | capture_output=True, |
| 474 | text=True, |
| 475 | cwd=str(member_path), |
| 476 | timeout=10, |
| 477 | ) |
| 478 | if result.returncode == 0: |
| 479 | status_data = _json.loads(result.stdout) |
| 480 | dirty = bool(status_data.get("dirty", False)) |
| 481 | actual_branch = status_data.get("branch") or None |
| 482 | head_commit = status_data.get("head_commit") or None |
| 483 | except Exception as exc: |
| 484 | logger.debug("Could not read status for member %r: %s", m["name"], exc) |
| 485 | |
| 486 | # Shelf count — read .muse/shelf.json directly; no subprocess needed. |
| 487 | try: |
| 488 | shelf_file = _muse_dir(member_path) / "shelf.json" |
| 489 | if shelf_file.is_file(): |
| 490 | shelf_list = load_json_file(shelf_file) |
| 491 | if isinstance(shelf_list, list): |
| 492 | shelf_count = len(shelf_list) |
| 493 | except Exception as exc: |
| 494 | logger.debug("Could not read shelf count for member %r: %s", m["name"], exc) |
| 495 | |
| 496 | # Feature branches — pure file I/O, no subprocess. |
| 497 | try: |
| 498 | standard = {"main", "dev"} |
| 499 | feature_branches = sorted( |
| 500 | name for name, _ in iter_branch_refs(member_path) |
| 501 | if name not in standard |
| 502 | ) |
| 503 | except Exception as exc: |
| 504 | logger.debug("Could not read branches for member %r: %s", m["name"], exc) |
| 505 | |
| 506 | return WorkspaceMemberStatus( |
| 507 | name=m["name"], |
| 508 | path=member_path, |
| 509 | branch=m["branch"], |
| 510 | url=m["url"], |
| 511 | present=present, |
| 512 | head_commit=head_commit, |
| 513 | dirty=dirty, |
| 514 | actual_branch=actual_branch, |
| 515 | shelf_count=shelf_count, |
| 516 | feature_branches=feature_branches, |
| 517 | ) |
| 518 | |
| 519 | def list_workspace_members(repo_root: pathlib.Path) -> list[WorkspaceMemberStatus]: |
| 520 | """Return status for every workspace member. |
| 521 | |
| 522 | Each member's status requires one ``muse status`` subprocess call plus |
| 523 | several file-I/O reads. Members are processed concurrently via a |
| 524 | thread pool so the total wall time is bounded by the slowest single |
| 525 | member rather than the sum across all members. |
| 526 | |
| 527 | Results are returned in the same order as the manifest. |
| 528 | """ |
| 529 | manifest = _load_manifest(repo_root) |
| 530 | if manifest is None: |
| 531 | return [] |
| 532 | members = manifest["members"] |
| 533 | if len(members) <= 1: |
| 534 | return [_member_status(repo_root, m) for m in members] |
| 535 | |
| 536 | results: list[WorkspaceMemberStatus | None] = [None] * len(members) |
| 537 | with concurrent.futures.ThreadPoolExecutor(max_workers=len(members)) as pool: |
| 538 | future_to_idx = { |
| 539 | pool.submit(_member_status, repo_root, m): i |
| 540 | for i, m in enumerate(members) |
| 541 | } |
| 542 | for future in concurrent.futures.as_completed(future_to_idx): |
| 543 | idx = future_to_idx[future] |
| 544 | try: |
| 545 | results[idx] = future.result() |
| 546 | except Exception as exc: |
| 547 | logger.warning( |
| 548 | "⚠️ Could not read status for member %r: %s", |
| 549 | members[idx]["name"], exc, |
| 550 | ) |
| 551 | return [r for r in results if r is not None] |
| 552 | |
| 553 | def sync_workspace_member( |
| 554 | repo_root: pathlib.Path, |
| 555 | member: WorkspaceMemberDict, |
| 556 | dry_run: bool = False, |
| 557 | ) -> WorkspaceSyncResult: |
| 558 | """Clone or pull the latest state for one workspace member. |
| 559 | |
| 560 | Returns a :class:`WorkspaceSyncResult` dict with ``name`` and ``status``. |
| 561 | ``status`` is one of ``'cloned'``, ``'pulled'``, ``'skipped'`` (dry-run), |
| 562 | or ``'error: <message>'``. |
| 563 | |
| 564 | Security: ``url`` and ``branch`` are passed as separate list elements to |
| 565 | ``subprocess.run`` (never via the shell), so shell injection is not |
| 566 | possible. Size of error output is capped at 200 chars. |
| 567 | """ |
| 568 | member_path = repo_root / member["path"] |
| 569 | name = member["name"] |
| 570 | |
| 571 | if dry_run: |
| 572 | action = "clone" if (not member_path.exists() or not (_muse_dir(member_path)).exists()) else "pull" |
| 573 | return WorkspaceSyncResult(name=name, status=f"skipped (dry-run would {action})") |
| 574 | |
| 575 | if not member_path.exists() or not (_muse_dir(member_path)).exists(): |
| 576 | member_path.parent.mkdir(parents=True, exist_ok=True) |
| 577 | result = subprocess.run( |
| 578 | ["muse", "clone", member["url"], str(member_path)], |
| 579 | capture_output=True, |
| 580 | text=True, |
| 581 | timeout=300, |
| 582 | ) |
| 583 | if result.returncode != 0: |
| 584 | err = result.stderr.strip()[:200] |
| 585 | logger.warning("⚠️ Clone failed for member %r: %s", name, err) |
| 586 | return WorkspaceSyncResult(name=name, status=f"error: {err}") |
| 587 | return WorkspaceSyncResult(name=name, status="cloned") |
| 588 | |
| 589 | result = subprocess.run( |
| 590 | ["muse", "pull", "--branch", member["branch"]], |
| 591 | capture_output=True, |
| 592 | text=True, |
| 593 | cwd=str(member_path), |
| 594 | timeout=120, |
| 595 | ) |
| 596 | if result.returncode != 0: |
| 597 | err = result.stderr.strip()[:200] |
| 598 | logger.warning("⚠️ Pull failed for member %r: %s", name, err) |
| 599 | return WorkspaceSyncResult(name=name, status=f"error: {err}") |
| 600 | return WorkspaceSyncResult(name=name, status="pulled") |
| 601 | |
| 602 | def sync_workspace( |
| 603 | repo_root: pathlib.Path, |
| 604 | member_name: str | None = None, |
| 605 | dry_run: bool = False, |
| 606 | workers: int = 1, |
| 607 | ) -> list[WorkspaceSyncResult]: |
| 608 | """Sync all (or one named) workspace members. |
| 609 | |
| 610 | Args: |
| 611 | repo_root: The workspace root. |
| 612 | member_name: Sync only this member (default: all). |
| 613 | dry_run: Show what would happen without doing it. |
| 614 | workers: Number of parallel sync workers (default: 1 — sequential). |
| 615 | Set to > 1 to parallelise across members. |
| 616 | |
| 617 | Returns: |
| 618 | List of :class:`WorkspaceSyncResult` dicts, one per member synced. |
| 619 | """ |
| 620 | manifest = _load_manifest(repo_root) |
| 621 | if manifest is None: |
| 622 | return [] |
| 623 | |
| 624 | targets = ( |
| 625 | [m for m in manifest["members"] if m["name"] == member_name] |
| 626 | if member_name is not None |
| 627 | else manifest["members"] |
| 628 | ) |
| 629 | |
| 630 | if workers <= 1 or len(targets) <= 1: |
| 631 | return [sync_workspace_member(repo_root, m, dry_run=dry_run) for m in targets] |
| 632 | |
| 633 | results: list[WorkspaceSyncResult] = [] |
| 634 | effective_workers = min(workers, len(targets)) |
| 635 | with concurrent.futures.ThreadPoolExecutor(max_workers=effective_workers) as pool: |
| 636 | futures = { |
| 637 | pool.submit(sync_workspace_member, repo_root, m, dry_run): m["name"] |
| 638 | for m in targets |
| 639 | } |
| 640 | for future in concurrent.futures.as_completed(futures): |
| 641 | try: |
| 642 | results.append(future.result()) |
| 643 | except Exception as exc: |
| 644 | name = futures[future] |
| 645 | logger.warning("⚠️ Unexpected sync error for member %r: %s", name, exc) |
| 646 | results.append(WorkspaceSyncResult(name=name, status=f"error: {exc}")) |
| 647 | return results |
File History
4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
22 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
30 days ago