gabriel / muse public
workspace.py python
647 lines 23.6 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 22 days ago
1 """Workspace management — compose multiple Muse repositories.
2
3 A *workspace* is a collection of related Muse repositories that are developed
4 together. Think of a film score that references a sound library, a machine
5 learning pipeline that includes a dataset repo, or a multi-service codebase
6 where each service lives in its own Muse repo.
7
8 Design
9 ------
10 Workspaces are distinct from worktrees:
11
12 - A **worktree** is one checkout of *one* repo with *one* ``.muse/`` store.
13 - A **workspace** is an envelope that *links* multiple separate repos together.
14
15 The workspace manifest lives at ``.muse/workspace.toml``::
16
17 [[members]]
18 name = "core"
19 url = "https://musehub.ai/acme/core"
20 path = "repos/core" # relative to workspace root
21 branch = "main" # pinned branch
22
23 [[members]]
24 name = "dataset"
25 url = "https://musehub.ai/acme/dataset"
26 path = "repos/dataset"
27 branch = "v2"
28
29 Agent workflow
30 --------------
31 Each member repo is a fully independent Muse repository. Agents can commit
32 to member repos independently and the workspace provides a unified status view
33 and one-shot sync.
34
35 ``muse workspace sync`` walks all members and runs ``muse fetch`` + ``muse pull``
36 so the workspace root always has the latest HEAD for every pinned branch.
37 ``muse workspace sync --workers 8`` parallelises across members.
38
39 Security model
40 --------------
41 - Manifest size is capped at ``_MAX_MANIFEST_BYTES`` before reading.
42 - The manifest file and its parent directory are checked for symlinks before
43 any read or write to prevent path-traversal attacks.
44 - All free-form string fields (name, url, path, branch) are TOML-escaped
45 before serialisation to prevent injection via crafted values.
46 - Member ``path`` values are validated to resolve *within* the workspace root.
47 - Member ``url`` values are checked for a valid scheme (https, http, or local
48 path); shell metacharacters are rejected before passing to subprocess.
49 """
50
51 import concurrent.futures
52 import logging
53 import pathlib
54 import subprocess
55 from dataclasses import dataclass
56 from typing import TypedDict
57
58 from muse.core.paths import muse_dir as _muse_dir, workspace_toml_path as _workspace_toml_path, shelf_json_path as _shelf_json_path
59 from muse.core.types import load_json_file
60 from muse.core.refs import iter_branch_refs
61
62 logger = logging.getLogger(__name__)
63
64
65 # 1 MiB — a manifest with 1 000 members at ~200 bytes each is ~200 KiB.
66 _MAX_MANIFEST_BYTES = 1 * 1024 * 1024
67
68 # Allowed URL schemes for member repositories.
69 _ALLOWED_SCHEMES = frozenset({"https", "http"})
70
71 # ---------------------------------------------------------------------------
72 # Types
73 # ---------------------------------------------------------------------------
74
75 class WorkspaceMemberDict(TypedDict):
76 """One entry in the workspace manifest."""
77
78 name: str
79 url: str
80 path: str
81 branch: str
82
83 class WorkspaceManifestDict(TypedDict):
84 """Top-level workspace manifest."""
85
86 members: list[WorkspaceMemberDict]
87
88 @dataclass
89 class WorkspaceMemberStatus:
90 """Runtime status of one workspace member.
91
92 ``branch`` is the configured tracking branch from *workspace.toml* — the
93 branch this member is *supposed* to be on according to the manifest.
94
95 ``actual_branch`` is the branch currently checked out in the working
96 directory (read from ``HEAD``). When it differs from ``branch`` the member
97 is checked out somewhere unexpected; agents should surface this discrepancy.
98
99 ``head_commit`` is the commit that ``HEAD`` currently resolves to — i.e.
100 the actual checked-out commit, not the tip of the configured branch.
101
102 ``shelf_count`` is the number of shelved changesets (0 = nothing on the shelf).
103
104 ``feature_branches`` lists every local branch that is not ``main`` or
105 ``dev`` — short-lived task/feat/bugfix branches that have not been cleaned
106 up yet.
107 """
108
109 name: str
110 path: pathlib.Path
111 branch: str # configured tracking branch from workspace.toml
112 url: str
113 present: bool
114 head_commit: str | None # actual HEAD commit (what HEAD resolves to)
115 dirty: bool
116 actual_branch: str | None # currently checked-out branch
117 shelf_count: int # number of shelved changesets
118 feature_branches: list[str] # local branches other than main / dev
119
120 class WorkspaceSyncResult(TypedDict):
121 """Result of syncing one workspace member.
122
123 ``status`` is one of ``'cloned'``, ``'pulled'``, ``'skipped'``, or
124 ``'error: <message>'``.
125 """
126
127 name: str
128 status: str
129
130 # ---------------------------------------------------------------------------
131 # TOML helpers
132 # ---------------------------------------------------------------------------
133
134 def _toml_escape(value: str) -> str:
135 """Escape *value* for safe embedding inside a TOML double-quoted string.
136
137 TOML basic strings forbid unescaped backslash, double-quote, and control
138 characters (newline, carriage-return, tab, etc.). All are escaped here so
139 that crafted values like ``core"\\nname = "injected`` or values containing
140 literal newlines cannot break the manifest structure.
141 """
142 return (
143 value
144 .replace("\\", "\\\\")
145 .replace('"', '\\"')
146 .replace("\n", "\\n")
147 .replace("\r", "\\r")
148 .replace("\t", "\\t")
149 .replace("\x00", "\\u0000")
150 )
151
152 # ---------------------------------------------------------------------------
153 # Paths
154 # ---------------------------------------------------------------------------
155
156 def _workspace_path(repo_root: pathlib.Path) -> pathlib.Path:
157 return _workspace_toml_path(repo_root)
158
159 def find_workspace_root(start: pathlib.Path | None = None) -> pathlib.Path | None:
160 """Walk up from *start* (default: cwd) to find the directory containing
161 ``.muse/workspace.toml``. Returns ``None`` if no workspace is found.
162
163 This mirrors ``find_repo_root()`` so that workspace commands resolve the
164 correct manifest regardless of CWD or ``-C`` flag usage.
165 """
166 current = (start or pathlib.Path.cwd()).resolve()
167 for directory in (current, *current.parents):
168 if _workspace_toml_path(directory).exists():
169 return directory
170 return None
171
172 def require_workspace_root(start: pathlib.Path | None = None) -> pathlib.Path:
173 """Return the workspace root or exit with a clear error message."""
174 from muse.core.errors import ExitCode
175 root = find_workspace_root(start)
176 if root is None:
177 import sys
178 print(
179 "❌ Not inside a Muse workspace.\n"
180 " No .muse/workspace.toml found in this directory or any parent.",
181 file=sys.stderr,
182 )
183 raise SystemExit(ExitCode.REPO_NOT_FOUND)
184 return root
185
186 # ---------------------------------------------------------------------------
187 # Persistence
188 # ---------------------------------------------------------------------------
189
190 def _load_manifest(repo_root: pathlib.Path) -> WorkspaceManifestDict | None:
191 """Read and parse the workspace manifest.
192
193 Security guards applied before any read:
194
195 - Symlink check: a symlink at the manifest path could redirect reads to
196 sensitive files outside the repo.
197 - Size cap (``_MAX_MANIFEST_BYTES``): a corrupt or tampered manifest cannot
198 exhaust memory.
199 """
200 import tomllib
201
202 path = _workspace_path(repo_root)
203 if not path.exists():
204 return None
205 if path.is_symlink():
206 logger.warning(
207 "⚠️ Workspace manifest is a symlink — ignoring to prevent path traversal"
208 )
209 return None
210 try:
211 size = path.stat().st_size
212 if size > _MAX_MANIFEST_BYTES:
213 logger.warning(
214 "⚠️ Workspace manifest is %.1f MiB — exceeds cap of %d MiB; ignoring",
215 size / (1024 * 1024),
216 _MAX_MANIFEST_BYTES // (1024 * 1024),
217 )
218 return None
219 raw = tomllib.loads(path.read_text(encoding="utf-8"))
220 except Exception as exc:
221 logger.warning("⚠️ Could not read workspace manifest: %s", exc)
222 return None
223 members: list[WorkspaceMemberDict] = []
224 for m in raw.get("members", []):
225 if not isinstance(m, dict):
226 continue
227 members.append(
228 WorkspaceMemberDict(
229 name=str(m.get("name", "")),
230 url=str(m.get("url", "")),
231 path=str(m.get("path", "")),
232 branch=str(m.get("branch", "main")),
233 )
234 )
235 return WorkspaceManifestDict(members=members)
236
237 def _save_manifest(repo_root: pathlib.Path, manifest: WorkspaceManifestDict) -> None:
238 """Write the manifest atomically.
239
240 Security guards:
241
242 - The manifest file and its parent directory are checked for symlinks
243 before writing to prevent path-traversal via a planted symlink.
244 - All string values are TOML-escaped to prevent injection.
245 """
246 path = _workspace_path(repo_root)
247 parent = path.parent
248 parent.mkdir(parents=True, exist_ok=True)
249
250 if parent.is_symlink():
251 raise OSError(f"Refusing to write manifest — parent directory is a symlink: {parent}")
252 if path.exists() and path.is_symlink():
253 raise OSError(f"Refusing to write manifest — file is a symlink: {path}")
254
255 lines: list[str] = []
256 for m in manifest["members"]:
257 lines.append("[[members]]")
258 lines.append(f'name = "{_toml_escape(m["name"])}"')
259 lines.append(f'url = "{_toml_escape(m["url"])}"')
260 lines.append(f'path = "{_toml_escape(m["path"])}"')
261 lines.append(f'branch = "{_toml_escape(m["branch"])}"')
262 lines.append("")
263 tmp = path.with_suffix(".tmp")
264 tmp.write_text("\n".join(lines), encoding="utf-8")
265 tmp.replace(path)
266
267 # ---------------------------------------------------------------------------
268 # Validation helpers
269 # ---------------------------------------------------------------------------
270
271 def _validate_member_name(name: str) -> None:
272 """Raise ``ValueError`` if *name* is not a safe workspace member name.
273
274 Allowed: alphanumerics, hyphens, underscores, dots. No slashes, nulls,
275 or shell metacharacters. Must be 1–64 characters.
276 """
277 import re
278 if not name or len(name) > 64:
279 raise ValueError(f"Member name must be 1–64 characters, got {len(name)!r}.")
280 if not re.fullmatch(r"[A-Za-z0-9._-]+", name):
281 raise ValueError(
282 f"Member name {name!r} contains invalid characters. "
283 "Use only alphanumerics, hyphens, underscores, and dots."
284 )
285
286 def _validate_member_url(url: str) -> None:
287 """Raise ``ValueError`` if *url* is not a safe member URL or local path.
288
289 Accepted forms:
290 - ``https://`` or ``http://`` — remote MuseHub URL.
291 - An absolute local path (no scheme).
292 - A relative local path (no scheme).
293
294 Rejected:
295 - Null bytes in the URL string.
296 - ``file://`` — use a bare path instead.
297 - Any other scheme (``ftp://``, ``ssh://``, etc.).
298 """
299 import urllib.parse
300 if "\x00" in url:
301 raise ValueError("Member URL must not contain null bytes.")
302 parsed = urllib.parse.urlparse(url)
303 if parsed.scheme and parsed.scheme not in _ALLOWED_SCHEMES:
304 raise ValueError(
305 f"Member URL scheme {parsed.scheme!r} is not allowed. "
306 "Use https://, http://, or a bare filesystem path."
307 )
308
309 def _validate_member_path(repo_root: pathlib.Path, relative_path: str) -> None:
310 """Raise ``ValueError`` if *relative_path* escapes the workspace root.
311
312 Path components like ``../../etc`` would let a crafted manifest point
313 members at arbitrary directories. We resolve the candidate path and
314 confirm it sits within *repo_root*.
315 """
316 if "\x00" in relative_path:
317 raise ValueError("Member path must not contain null bytes.")
318 candidate = (repo_root / relative_path).resolve()
319 try:
320 candidate.relative_to(repo_root.resolve())
321 except ValueError:
322 raise ValueError(
323 f"Member path {relative_path!r} resolves outside the workspace root."
324 )
325
326 # ---------------------------------------------------------------------------
327 # Public API
328 # ---------------------------------------------------------------------------
329
330 def add_workspace_member(
331 repo_root: pathlib.Path,
332 name: str,
333 url: str,
334 path: str = "",
335 branch: str = "main",
336 ) -> None:
337 """Register a new member repository in the workspace manifest.
338
339 Args:
340 repo_root: The workspace root (where ``.muse/`` lives).
341 name: Short identifier for this member (alphanumeric, hyphens,
342 underscores, dots; max 64 chars).
343 url: Remote URL (https/http) or local path to the member repo.
344 path: Relative checkout path inside the workspace (default:
345 ``repos/<name>``). Must not escape the workspace root.
346 branch: Branch to track (default: ``main``).
347
348 Raises:
349 ValueError: If name is invalid, URL scheme is disallowed, path escapes
350 the workspace root, or a member with the same name exists.
351 """
352 from muse.core.validation import validate_branch_name
353
354 _validate_member_name(name)
355 _validate_member_url(url)
356 validate_branch_name(branch)
357
358 effective_path = path or f"repos/{name}"
359 _validate_member_path(repo_root, effective_path)
360
361 manifest = _load_manifest(repo_root) or WorkspaceManifestDict(members=[])
362 for m in manifest["members"]:
363 if m["name"] == name:
364 raise ValueError(f"Workspace member '{name}' already exists.")
365
366 manifest["members"].append(
367 WorkspaceMemberDict(
368 name=name,
369 url=url,
370 path=effective_path,
371 branch=branch,
372 )
373 )
374 _save_manifest(repo_root, manifest)
375
376 def update_workspace_member(
377 repo_root: pathlib.Path,
378 name: str,
379 url: str | None = None,
380 path: str | None = None,
381 branch: str | None = None,
382 ) -> None:
383 """Update the URL, path, or branch for an existing workspace member.
384
385 Only the supplied keyword arguments are changed. Raises ``ValueError`` if
386 no member with *name* exists.
387
388 Args:
389 repo_root: The workspace root.
390 name: Member name to update.
391 url: New URL (or ``None`` to keep current).
392 path: New relative checkout path (or ``None`` to keep current).
393 branch: New branch to track (or ``None`` to keep current).
394
395 Raises:
396 ValueError: If the member does not exist or any new value is invalid.
397 """
398 from muse.core.validation import validate_branch_name
399
400 if url is not None:
401 _validate_member_url(url)
402 if branch is not None:
403 validate_branch_name(branch)
404 if path is not None:
405 _validate_member_path(repo_root, path)
406
407 manifest = _load_manifest(repo_root)
408 if manifest is not None:
409 for m in manifest["members"]:
410 if m["name"] == name:
411 if url is not None:
412 m["url"] = url
413 if path is not None:
414 m["path"] = path
415 if branch is not None:
416 m["branch"] = branch
417 _save_manifest(repo_root, manifest)
418 return
419 raise ValueError(f"Workspace member '{name}' not found.")
420
421 def remove_workspace_member(repo_root: pathlib.Path, name: str) -> None:
422 """Remove a member from the workspace manifest.
423
424 Does **not** delete the member's directory — only its registration in the
425 manifest is removed.
426
427 Raises:
428 ValueError: If no member with that name exists.
429 """
430 manifest = _load_manifest(repo_root)
431 if manifest is None:
432 raise ValueError("No workspace manifest found.")
433 before = len(manifest["members"])
434 manifest["members"] = [m for m in manifest["members"] if m["name"] != name]
435 if len(manifest["members"]) == before:
436 raise ValueError(f"Workspace member '{name}' not found.")
437 _save_manifest(repo_root, manifest)
438
439 def get_workspace_member(
440 repo_root: pathlib.Path,
441 name: str,
442 ) -> WorkspaceMemberStatus:
443 """Return the status for a single named workspace member.
444
445 Raises:
446 ValueError: If no member with that name is registered.
447 """
448 manifest = _load_manifest(repo_root)
449 if manifest is None:
450 raise ValueError("No workspace manifest found.")
451 for m in manifest["members"]:
452 if m["name"] == name:
453 return _member_status(repo_root, m)
454 raise ValueError(f"Workspace member '{name}' not found.")
455
456 def _member_status(repo_root: pathlib.Path, m: WorkspaceMemberDict) -> WorkspaceMemberStatus:
457 """Build a ``WorkspaceMemberStatus`` for one manifest entry."""
458 import json as _json
459
460 member_path = repo_root / m["path"]
461 present = member_path.exists() and (_muse_dir(member_path)).exists()
462 head_commit: str | None = None
463 dirty = False
464 actual_branch: str | None = None
465 shelf_count = 0
466 feature_branches: list[str] = []
467
468 if present:
469 # One subprocess: muse status gives us dirty, actual branch, and HEAD commit.
470 try:
471 result = subprocess.run(
472 ["muse", "status", "--json"],
473 capture_output=True,
474 text=True,
475 cwd=str(member_path),
476 timeout=10,
477 )
478 if result.returncode == 0:
479 status_data = _json.loads(result.stdout)
480 dirty = bool(status_data.get("dirty", False))
481 actual_branch = status_data.get("branch") or None
482 head_commit = status_data.get("head_commit") or None
483 except Exception as exc:
484 logger.debug("Could not read status for member %r: %s", m["name"], exc)
485
486 # Shelf count — read .muse/shelf.json directly; no subprocess needed.
487 try:
488 shelf_file = _muse_dir(member_path) / "shelf.json"
489 if shelf_file.is_file():
490 shelf_list = load_json_file(shelf_file)
491 if isinstance(shelf_list, list):
492 shelf_count = len(shelf_list)
493 except Exception as exc:
494 logger.debug("Could not read shelf count for member %r: %s", m["name"], exc)
495
496 # Feature branches — pure file I/O, no subprocess.
497 try:
498 standard = {"main", "dev"}
499 feature_branches = sorted(
500 name for name, _ in iter_branch_refs(member_path)
501 if name not in standard
502 )
503 except Exception as exc:
504 logger.debug("Could not read branches for member %r: %s", m["name"], exc)
505
506 return WorkspaceMemberStatus(
507 name=m["name"],
508 path=member_path,
509 branch=m["branch"],
510 url=m["url"],
511 present=present,
512 head_commit=head_commit,
513 dirty=dirty,
514 actual_branch=actual_branch,
515 shelf_count=shelf_count,
516 feature_branches=feature_branches,
517 )
518
519 def list_workspace_members(repo_root: pathlib.Path) -> list[WorkspaceMemberStatus]:
520 """Return status for every workspace member.
521
522 Each member's status requires one ``muse status`` subprocess call plus
523 several file-I/O reads. Members are processed concurrently via a
524 thread pool so the total wall time is bounded by the slowest single
525 member rather than the sum across all members.
526
527 Results are returned in the same order as the manifest.
528 """
529 manifest = _load_manifest(repo_root)
530 if manifest is None:
531 return []
532 members = manifest["members"]
533 if len(members) <= 1:
534 return [_member_status(repo_root, m) for m in members]
535
536 results: list[WorkspaceMemberStatus | None] = [None] * len(members)
537 with concurrent.futures.ThreadPoolExecutor(max_workers=len(members)) as pool:
538 future_to_idx = {
539 pool.submit(_member_status, repo_root, m): i
540 for i, m in enumerate(members)
541 }
542 for future in concurrent.futures.as_completed(future_to_idx):
543 idx = future_to_idx[future]
544 try:
545 results[idx] = future.result()
546 except Exception as exc:
547 logger.warning(
548 "⚠️ Could not read status for member %r: %s",
549 members[idx]["name"], exc,
550 )
551 return [r for r in results if r is not None]
552
553 def sync_workspace_member(
554 repo_root: pathlib.Path,
555 member: WorkspaceMemberDict,
556 dry_run: bool = False,
557 ) -> WorkspaceSyncResult:
558 """Clone or pull the latest state for one workspace member.
559
560 Returns a :class:`WorkspaceSyncResult` dict with ``name`` and ``status``.
561 ``status`` is one of ``'cloned'``, ``'pulled'``, ``'skipped'`` (dry-run),
562 or ``'error: <message>'``.
563
564 Security: ``url`` and ``branch`` are passed as separate list elements to
565 ``subprocess.run`` (never via the shell), so shell injection is not
566 possible. Size of error output is capped at 200 chars.
567 """
568 member_path = repo_root / member["path"]
569 name = member["name"]
570
571 if dry_run:
572 action = "clone" if (not member_path.exists() or not (_muse_dir(member_path)).exists()) else "pull"
573 return WorkspaceSyncResult(name=name, status=f"skipped (dry-run would {action})")
574
575 if not member_path.exists() or not (_muse_dir(member_path)).exists():
576 member_path.parent.mkdir(parents=True, exist_ok=True)
577 result = subprocess.run(
578 ["muse", "clone", member["url"], str(member_path)],
579 capture_output=True,
580 text=True,
581 timeout=300,
582 )
583 if result.returncode != 0:
584 err = result.stderr.strip()[:200]
585 logger.warning("⚠️ Clone failed for member %r: %s", name, err)
586 return WorkspaceSyncResult(name=name, status=f"error: {err}")
587 return WorkspaceSyncResult(name=name, status="cloned")
588
589 result = subprocess.run(
590 ["muse", "pull", "--branch", member["branch"]],
591 capture_output=True,
592 text=True,
593 cwd=str(member_path),
594 timeout=120,
595 )
596 if result.returncode != 0:
597 err = result.stderr.strip()[:200]
598 logger.warning("⚠️ Pull failed for member %r: %s", name, err)
599 return WorkspaceSyncResult(name=name, status=f"error: {err}")
600 return WorkspaceSyncResult(name=name, status="pulled")
601
602 def sync_workspace(
603 repo_root: pathlib.Path,
604 member_name: str | None = None,
605 dry_run: bool = False,
606 workers: int = 1,
607 ) -> list[WorkspaceSyncResult]:
608 """Sync all (or one named) workspace members.
609
610 Args:
611 repo_root: The workspace root.
612 member_name: Sync only this member (default: all).
613 dry_run: Show what would happen without doing it.
614 workers: Number of parallel sync workers (default: 1 — sequential).
615 Set to > 1 to parallelise across members.
616
617 Returns:
618 List of :class:`WorkspaceSyncResult` dicts, one per member synced.
619 """
620 manifest = _load_manifest(repo_root)
621 if manifest is None:
622 return []
623
624 targets = (
625 [m for m in manifest["members"] if m["name"] == member_name]
626 if member_name is not None
627 else manifest["members"]
628 )
629
630 if workers <= 1 or len(targets) <= 1:
631 return [sync_workspace_member(repo_root, m, dry_run=dry_run) for m in targets]
632
633 results: list[WorkspaceSyncResult] = []
634 effective_workers = min(workers, len(targets))
635 with concurrent.futures.ThreadPoolExecutor(max_workers=effective_workers) as pool:
636 futures = {
637 pool.submit(sync_workspace_member, repo_root, m, dry_run): m["name"]
638 for m in targets
639 }
640 for future in concurrent.futures.as_completed(futures):
641 try:
642 results.append(future.result())
643 except Exception as exc:
644 name = futures[future]
645 logger.warning("⚠️ Unexpected sync error for member %r: %s", name, exc)
646 results.append(WorkspaceSyncResult(name=name, status=f"error: {exc}"))
647 return results
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 22 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 24 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 30 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 30 days ago