repo.py
python
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
20 days ago
| 1 | """Repository detection utilities for the Muse CLI. |
| 2 | |
| 3 | Walking up the directory tree to locate a ``.muse/`` directory is the |
| 4 | single most-called internal primitive. Every subcommand uses it. Keeping |
| 5 | the semantics clear (``None`` on miss, never raises) makes callers simpler |
| 6 | and test isolation easier (``MUSE_REPO_ROOT`` env-var override). |
| 7 | |
| 8 | :func:`read_repo_id` is the canonical way to read the repository ID from |
| 9 | ``.muse/repo.json``. It replaces 73 copy-pasted ``_read_repo_id`` functions |
| 10 | that had diverged into four different error-handling behavioural variants. |
| 11 | The canonical implementation uses ``REPO_NOT_FOUND`` on a missing file and |
| 12 | ``INTERNAL_ERROR`` on a malformed file — errors go through the logger, not |
| 13 | stderr, so commands that want to emit a user-facing message can catch the |
| 14 | ``SystemExit`` and print their own context. |
| 15 | |
| 16 | :func:`require_repo` performs a **startup GC sweep** on every invocation, |
| 17 | removing stale temp files left by a prior SIGKILL. All three temp-file |
| 18 | families used by Muse are covered: |
| 19 | |
| 20 | * ``.obj-tmp-*`` / ``.restore-tmp-*`` — object-store shard directories |
| 21 | (delegated to :func:`muse.core.object_store.cleanup_stale_object_temps`) |
| 22 | * ``.muse-tmp-*`` — created by :func:`~muse.core.store.write_text_atomic` |
| 23 | and :func:`~muse.core.store._write_msgpack_atomic` in ``.muse/`` and its |
| 24 | subdirectories (branches, tags, releases, …) |
| 25 | * ``.stat_cache_*.tmp`` — created by :class:`~muse.core.stat_cache.StatCache` |
| 26 | in ``.muse/`` |
| 27 | """ |
| 28 | |
| 29 | import datetime |
| 30 | import logging |
| 31 | import os |
| 32 | import pathlib |
| 33 | import sys |
| 34 | |
| 35 | from muse.core.paths import muse_dir as _muse_dir, repo_json_path as _repo_json_path |
| 36 | from muse.core.types import load_json_file |
| 37 | from muse.core.errors import ExitCode, UntrustedRepositoryError |
| 38 | from muse.core.validation import assert_not_symlink |
| 39 | |
| 40 | logger = logging.getLogger(__name__) |
| 41 | |
| 42 | # Subdirectories of .muse/ (excluding objects/) that can hold stale temps. |
| 43 | # objects/ is handled separately by cleanup_stale_object_temps. |
| 44 | _MUSE_SWEEP_DIRS: tuple[str, ...] = ( |
| 45 | "", # .muse/ root itself (HEAD, stat_cache, config, merge-state, etc.) |
| 46 | "branches", # write_branch_ref |
| 47 | "commits", # legacy — kept for stale-temp sweep on pre-migration repos |
| 48 | "snapshots", # legacy — kept for stale-temp sweep on pre-migration repos |
| 49 | "tags", # write_tag via write_json_atomic |
| 50 | "releases", # write_release via write_json_atomic |
| 51 | "refs/heads", # write_branch_ref alternative path |
| 52 | "code", # code-domain index writes |
| 53 | "indices", # index writes |
| 54 | "coordination", # create_intent / create_reservation |
| 55 | "worktrees", # _save_meta |
| 56 | "shelf", # shelf writes |
| 57 | "cache", # recomputable JSON caches (symbol, callgraph, stat, etc.) |
| 58 | "logs", # log writes |
| 59 | "remotes", # remote config writes |
| 60 | ) |
| 61 | |
| 62 | # File-name prefixes that identify stale temps within the above directories. |
| 63 | # .muse-tmp-: write_text_atomic (tags, releases, branches, refs) |
| 64 | # .stat_cache_: StatCache.save (mkstemp, suffix .tmp) |
| 65 | # .symbols_: SymbolCache.save (mkstemp, suffix .tmp) |
| 66 | # .callgraph_: CallGraphCache.save (mkstemp, suffix .tmp) |
| 67 | # .implicit_edges_: ImplicitEdgeCache.save (mkstemp, suffix .tmp) |
| 68 | # .invariants_: _InvariantFileCache.save (mkstemp, suffix .tmp) |
| 69 | _MUSE_TEMP_PREFIXES: tuple[str, ...] = ( |
| 70 | ".muse-tmp-", |
| 71 | ".stat_cache_", |
| 72 | ".symbols_", |
| 73 | ".callgraph_", |
| 74 | ".implicit_edges_", |
| 75 | ".invariants_", |
| 76 | ) |
| 77 | |
| 78 | def _cleanup_muse_dir_temps(muse_dir: pathlib.Path) -> int: |
| 79 | """Remove stale temp files left by crashed cache saves. |
| 80 | |
| 81 | Covers all six temp-file families produced by Muse writers: |
| 82 | ``.muse-tmp-*``, ``.stat_cache_*.tmp``, ``.symbols_*.tmp``, |
| 83 | ``.callgraph_*.tmp``, ``.implicit_edges_*.tmp``, ``.invariants_*.tmp``. |
| 84 | |
| 85 | Iterates only the known subdirectory set that Muse writes to, so the |
| 86 | object store (handled separately) and user files in the workdir are |
| 87 | never touched. |
| 88 | |
| 89 | Returns: |
| 90 | Number of stale temp files removed. |
| 91 | """ |
| 92 | if not muse_dir.is_dir(): |
| 93 | return 0 |
| 94 | removed = 0 |
| 95 | for subdir in _MUSE_SWEEP_DIRS: |
| 96 | target = muse_dir / subdir if subdir else muse_dir |
| 97 | # Skip symlinked subdirectories — never delete files inside an |
| 98 | # attacker-controlled location that was swapped in via symlink. |
| 99 | if not target.is_dir() or target.is_symlink(): |
| 100 | continue |
| 101 | for entry in target.iterdir(): |
| 102 | if entry.is_file() and any( |
| 103 | entry.name.startswith(pfx) for pfx in _MUSE_TEMP_PREFIXES |
| 104 | ): |
| 105 | try: |
| 106 | entry.unlink() |
| 107 | removed += 1 |
| 108 | logger.warning( |
| 109 | "⚠️ Removed stale muse temp %s (left by prior crash)", entry |
| 110 | ) |
| 111 | except OSError as exc: |
| 112 | logger.warning( |
| 113 | "⚠️ Could not remove stale temp %s: %s", entry, exc |
| 114 | ) |
| 115 | return removed |
| 116 | |
| 117 | _CRITICAL_MUSE_DIRS: tuple[str, ...] = ( |
| 118 | "objects", |
| 119 | "refs", |
| 120 | "refs/heads", |
| 121 | "tags", |
| 122 | ) |
| 123 | |
| 124 | def _verify_muse_dir_integrity(muse_dir: pathlib.Path) -> None: |
| 125 | """Assert that critical ``.muse/`` subdirectories are real directories. |
| 126 | |
| 127 | Any of these being a symlink would redirect writes to an attacker- |
| 128 | controlled location. Called by :func:`require_repo` on every invocation |
| 129 | so the check runs at the trust boundary, before any store operation. |
| 130 | |
| 131 | Args: |
| 132 | muse_dir: Absolute path to the ``.muse/`` directory. |
| 133 | |
| 134 | Raises: |
| 135 | SystemExit(1): If any critical subdirectory is a symbolic link. |
| 136 | """ |
| 137 | for subname in _CRITICAL_MUSE_DIRS: |
| 138 | candidate = muse_dir / subname |
| 139 | if not candidate.exists(): |
| 140 | continue # not yet created — first-use, not an attack |
| 141 | try: |
| 142 | assert_not_symlink(candidate, label=f".muse/{subname}") |
| 143 | except ValueError as exc: |
| 144 | logger.error("❌ %s", exc) |
| 145 | raise SystemExit(1) from exc |
| 146 | |
| 147 | def _startup_gc(repo_root: pathlib.Path) -> None: |
| 148 | """Sweep all stale temp files left by a prior SIGKILL crash. |
| 149 | |
| 150 | Called by :func:`require_repo` on every command invocation so that any |
| 151 | orphaned temp file from the previous crash is cleaned before the current |
| 152 | command reads or writes the store. The sweep is fast (< 5 ms on a |
| 153 | typical repo) because it only touches small, bounded directories. |
| 154 | |
| 155 | Three temp-file families are covered: |
| 156 | |
| 157 | 1. Object-store temps (``.obj-tmp-*``, ``.restore-tmp-*``) via |
| 158 | :func:`~muse.core.object_store.cleanup_stale_object_temps`. |
| 159 | 2. Store/config temps (``.muse-tmp-*``) via |
| 160 | :func:`_cleanup_muse_dir_temps`. |
| 161 | 3. Stat-cache temps (``.stat_cache_*.tmp``) via the same sweep (the |
| 162 | ``.stat_cache_`` prefix is included in :data:`_MUSE_TEMP_PREFIXES`). |
| 163 | """ |
| 164 | from muse.core.object_store import cleanup_stale_object_temps |
| 165 | |
| 166 | cleanup_stale_object_temps(repo_root) |
| 167 | _cleanup_muse_dir_temps(_muse_dir(repo_root)) |
| 168 | |
| 169 | def _resolve_worktree_pointer(pointer_path: pathlib.Path) -> pathlib.Path | None: |
| 170 | """Read a ``.muse`` worktree pointer file and return the main repo root. |
| 171 | |
| 172 | The file must contain a line of the form:: |
| 173 | |
| 174 | musestore: /absolute/path/to/main/.muse |
| 175 | |
| 176 | Returns the parent of the ``.muse/`` store (i.e. the main repo root), or |
| 177 | ``None`` on any parse or validation failure. Never raises. |
| 178 | """ |
| 179 | try: |
| 180 | text = pointer_path.read_text(encoding="utf-8", errors="strict").strip() |
| 181 | except Exception as exc: |
| 182 | logger.debug("Could not read worktree pointer %s: %s", pointer_path, exc) |
| 183 | return None |
| 184 | |
| 185 | prefix = "musestore: " |
| 186 | if not text.startswith(prefix): |
| 187 | logger.debug("Worktree pointer %s has unexpected format: %r", pointer_path, text[:80]) |
| 188 | return None |
| 189 | |
| 190 | raw_store = text[len(prefix):].strip() |
| 191 | if not raw_store: |
| 192 | return None |
| 193 | if any(ord(c) < 0x20 or ord(c) == 0x7F for c in raw_store): |
| 194 | logger.warning("⚠️ Worktree pointer %s contains control characters — ignoring", pointer_path) |
| 195 | return None |
| 196 | if len(raw_store) > 4096: |
| 197 | logger.warning("⚠️ Worktree pointer %s path too long — ignoring", pointer_path) |
| 198 | return None |
| 199 | |
| 200 | store_path = pathlib.Path(raw_store).resolve() |
| 201 | if not store_path.is_dir() or store_path.is_symlink(): |
| 202 | logger.debug("Worktree pointer %s → %s is not a valid store dir", pointer_path, store_path) |
| 203 | return None |
| 204 | |
| 205 | repo_root = store_path.parent |
| 206 | # Loop guard: resolved root must not be the worktree directory itself. |
| 207 | worktree_dir = pointer_path.parent.resolve() |
| 208 | if repo_root == worktree_dir: |
| 209 | logger.warning("⚠️ Worktree pointer %s loops back to its own directory", pointer_path) |
| 210 | return None |
| 211 | |
| 212 | return repo_root |
| 213 | |
| 214 | def _is_repo_trusted(repo_root: pathlib.Path) -> bool: |
| 215 | """Return ``True`` if *repo_root* is in the caller's trust list. |
| 216 | |
| 217 | Checks two sources (in order): |
| 218 | 1. ``MUSE_SAFE_DIRS`` environment variable — colon-separated absolute paths. |
| 219 | 2. ``~/.muse/config.toml`` ``[security] safe_dirs`` list. |
| 220 | |
| 221 | Root (uid == 0) is always trusted. |
| 222 | """ |
| 223 | if os.getuid() == 0: |
| 224 | return True |
| 225 | |
| 226 | canonical = str(repo_root.resolve()) |
| 227 | |
| 228 | # 1. MUSE_SAFE_DIRS env var (colon-separated, for Docker/CI). |
| 229 | env_raw = os.environ.get("MUSE_SAFE_DIRS", "") |
| 230 | if env_raw.strip(): |
| 231 | for raw_dir in env_raw.split(":"): |
| 232 | if raw_dir.strip() and pathlib.Path(raw_dir.strip()).resolve() == pathlib.Path(canonical): |
| 233 | return True |
| 234 | |
| 235 | # 2. ~/.muse/config.toml [security] safe_dirs. |
| 236 | try: |
| 237 | from muse.cli.config import get_global_safe_dirs |
| 238 | for safe_path in get_global_safe_dirs(): |
| 239 | if pathlib.Path(safe_path).resolve() == pathlib.Path(canonical): |
| 240 | return True |
| 241 | except Exception: # noqa: BLE001 |
| 242 | pass |
| 243 | |
| 244 | return False |
| 245 | |
| 246 | def _check_repo_ownership(repo_root: pathlib.Path) -> None: |
| 247 | """Raise :class:`~muse.core.errors.UntrustedRepositoryError` if ownership mismatch. |
| 248 | |
| 249 | Implements a CVE-2022-24765–equivalent check: the ``.muse/`` directory must |
| 250 | be owned by the current user, or the repository must be explicitly trusted. |
| 251 | |
| 252 | Skipped when: |
| 253 | - Current uid is 0 (root has unrestricted access anyway). |
| 254 | - The repo is in ``MUSE_SAFE_DIRS`` or ``~/.muse/config.toml`` safe_dirs. |
| 255 | |
| 256 | Args: |
| 257 | repo_root: The repository root directory (parent of ``.muse/``). |
| 258 | |
| 259 | Raises: |
| 260 | UntrustedRepositoryError: When owner UID does not match current UID and |
| 261 | the path is not in the trust list. |
| 262 | """ |
| 263 | current_uid = os.getuid() |
| 264 | if current_uid == 0: |
| 265 | return # root bypass |
| 266 | |
| 267 | muse_dir = _muse_dir(repo_root) |
| 268 | try: |
| 269 | st = muse_dir.stat() |
| 270 | except OSError: |
| 271 | # Can't stat — not a concern for ownership check; other code handles |
| 272 | # missing .muse/ |
| 273 | return |
| 274 | |
| 275 | owner_uid = st.st_uid |
| 276 | if owner_uid == current_uid: |
| 277 | return # owned by us — safe |
| 278 | |
| 279 | # Different owner — check trust list before raising. |
| 280 | if _is_repo_trusted(repo_root): |
| 281 | return |
| 282 | |
| 283 | raise UntrustedRepositoryError( |
| 284 | path=str(repo_root), |
| 285 | owner_uid=owner_uid, |
| 286 | current_uid=current_uid, |
| 287 | ) |
| 288 | |
| 289 | def find_repo_root(start: pathlib.Path | None = None) -> pathlib.Path | None: |
| 290 | """Walk up from *start* (default ``Path.cwd()``) looking for ``.muse/``. |
| 291 | |
| 292 | Returns the first directory that contains ``.muse/``, or ``None`` if no |
| 293 | such ancestor exists. Never raises — callers decide what to do on miss. |
| 294 | |
| 295 | The ``MUSE_REPO_ROOT`` environment variable overrides discovery entirely; |
| 296 | set it in tests to avoid ``os.chdir`` calls. |
| 297 | |
| 298 | Security hardening for ``MUSE_REPO_ROOT``: |
| 299 | - Empty or whitespace-only values are silently ignored (falls through to |
| 300 | directory walk) rather than being resolved to the current working |
| 301 | directory, which would bypass the explicit intent to override. |
| 302 | - Values longer than the OS ``PATH_MAX`` (4096 on Linux/macOS) are |
| 303 | rejected — overly long paths indicate an injection attempt. |
| 304 | - Control characters in the value are rejected — they indicate a crafted |
| 305 | payload rather than a genuine file-system path. |
| 306 | - Symlinked ``.muse/`` directories are rejected even when the path comes |
| 307 | from the env override, consistent with the directory-walk path. |
| 308 | |
| 309 | Ownership check (CVE-2022-24765 equivalent): |
| 310 | - After locating ``.muse/``, ``_check_repo_ownership`` verifies that the |
| 311 | directory is owned by the current user. |
| 312 | - Raises :class:`~muse.core.errors.UntrustedRepositoryError` on mismatch |
| 313 | unless the path is in ``MUSE_SAFE_DIRS`` or ``~/.muse/config.toml``. |
| 314 | """ |
| 315 | raw_env = os.environ.get("MUSE_REPO_ROOT") |
| 316 | if raw_env is not None: |
| 317 | # Silently ignore empty or whitespace-only values — fall through to walk. |
| 318 | stripped = raw_env.strip() |
| 319 | if not stripped: |
| 320 | logger.debug("MUSE_REPO_ROOT is empty or whitespace — ignoring, using cwd walk") |
| 321 | else: |
| 322 | # Reject values containing control characters. |
| 323 | if any(ord(c) < 0x20 or ord(c) == 0x7F for c in stripped): |
| 324 | logger.warning( |
| 325 | "⚠️ MUSE_REPO_ROOT contains control characters — ignoring for safety" |
| 326 | ) |
| 327 | return None |
| 328 | # Reject unreasonably long paths (OS PATH_MAX is 4096 on Linux/macOS). |
| 329 | if len(stripped) > 4096: |
| 330 | logger.warning( |
| 331 | "⚠️ MUSE_REPO_ROOT is too long (%d chars) — ignoring for safety", |
| 332 | len(stripped), |
| 333 | ) |
| 334 | return None |
| 335 | p = pathlib.Path(stripped).resolve() |
| 336 | logger.debug("⚠️ MUSE_REPO_ROOT override active: %s", p) |
| 337 | muse_candidate = _muse_dir(p) |
| 338 | # Reject symlinked .muse/ even when the path comes from the env override. |
| 339 | if muse_candidate.is_dir() and not muse_candidate.is_symlink(): |
| 340 | _check_repo_ownership(p) |
| 341 | return p |
| 342 | return None |
| 343 | |
| 344 | current = (start or pathlib.Path.cwd()).resolve() |
| 345 | while True: |
| 346 | muse_dir = _muse_dir(current) |
| 347 | # Reject symlinked .muse/ — a symlink here redirects all subsequent |
| 348 | # writes to an attacker-controlled location outside the repo root. |
| 349 | if muse_dir.is_dir() and not muse_dir.is_symlink(): |
| 350 | _check_repo_ownership(current) |
| 351 | return current |
| 352 | # Linked worktree: .muse is a file containing "musestore: /path/to/.muse" |
| 353 | if muse_dir.is_file() and not muse_dir.is_symlink(): |
| 354 | resolved = _resolve_worktree_pointer(muse_dir) |
| 355 | if resolved is not None: |
| 356 | _check_repo_ownership(resolved) |
| 357 | return resolved |
| 358 | parent = current.parent |
| 359 | if parent == current: |
| 360 | return None |
| 361 | current = parent |
| 362 | |
| 363 | _NOT_A_REPO_MSG = ( |
| 364 | 'fatal: not a muse repository (or any parent up to mount point /)\n' |
| 365 | 'Run "muse init" to initialize a new repository.' |
| 366 | ) |
| 367 | |
| 368 | def require_repo(start: pathlib.Path | None = None) -> pathlib.Path: |
| 369 | """Return the repo root or exit 2 with a clear error message. |
| 370 | |
| 371 | Wraps ``find_repo_root()`` for command callbacks that must be inside a |
| 372 | Muse repository. The error text is written to stderr so the shell always |
| 373 | surfaces it; our ``CliRunner`` merges stderr into ``result.output``. |
| 374 | |
| 375 | **Startup GC sweep:** after locating the repo root, performs a lightweight |
| 376 | sweep of all ``.muse/`` subdirectories to remove stale temp files left by |
| 377 | a prior ``SIGKILL``. The sweep covers all three temp-file families |
| 378 | produced by the store layer (``.muse-tmp-*``, ``.stat_cache_*.tmp``, |
| 379 | ``.obj-tmp-*``, ``.restore-tmp-*``). The cost is < 5 ms on a typical |
| 380 | repo because only small, bounded directories are listed. |
| 381 | """ |
| 382 | root = find_repo_root(start) |
| 383 | if root is None: |
| 384 | print(_NOT_A_REPO_MSG, file=sys.stderr) |
| 385 | raise SystemExit(ExitCode.REPO_NOT_FOUND) |
| 386 | _verify_muse_dir_integrity(_muse_dir(root)) |
| 387 | _startup_gc(root) |
| 388 | return root |
| 389 | |
| 390 | #: Public alias. |
| 391 | require_repo_root = require_repo |
| 392 | |
| 393 | def parse_date_arg(value: str, flag: str) -> datetime.datetime: |
| 394 | """Parse an ISO-8601 date or datetime string from a CLI flag. |
| 395 | |
| 396 | Accepts ``YYYY-MM-DD`` and ``YYYY-MM-DDTHH:MM:SS``. Always returns a |
| 397 | UTC-aware :class:`datetime.datetime`. Exits with code 1 and a clear |
| 398 | error message on parse failure, naming the offending *flag*. |
| 399 | |
| 400 | This is the canonical implementation replacing per-command inline date |
| 401 | parsing that used different formats and error messages. |
| 402 | |
| 403 | Args: |
| 404 | value: The raw string value from the CLI argument. |
| 405 | flag: The flag name (e.g. ``--since``) used in the error message. |
| 406 | |
| 407 | Returns: |
| 408 | A UTC-aware :class:`datetime.datetime`. |
| 409 | |
| 410 | Raises: |
| 411 | SystemExit(1): when *value* cannot be parsed as a recognised format. |
| 412 | """ |
| 413 | for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d"): |
| 414 | try: |
| 415 | return datetime.datetime.strptime(value, fmt).replace( |
| 416 | tzinfo=datetime.timezone.utc |
| 417 | ) |
| 418 | except ValueError: |
| 419 | continue |
| 420 | print( |
| 421 | f"❌ Invalid date for {flag}: {value!r}" |
| 422 | " — expected YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS", |
| 423 | file=sys.stderr, |
| 424 | ) |
| 425 | raise SystemExit(1) |
| 426 | |
| 427 | def read_repo_id(repo_root: pathlib.Path) -> str: |
| 428 | """Read the ``repo_id`` from ``.muse/repo.json``. |
| 429 | |
| 430 | This is the canonical implementation. It replaces 73 copy-pasted |
| 431 | ``_read_repo_id`` functions that had diverged into four different |
| 432 | error-handling variants across the codebase. |
| 433 | |
| 434 | Raises: |
| 435 | SystemExit(REPO_NOT_FOUND): when ``.muse/repo.json`` does not exist. |
| 436 | SystemExit(INTERNAL_ERROR): when the file exists but is not valid JSON |
| 437 | or does not contain the expected ``repo_id`` key. |
| 438 | """ |
| 439 | repo_json = _repo_json_path(repo_root) |
| 440 | if not repo_json.exists(): |
| 441 | logger.debug(".muse/repo.json not found") |
| 442 | raise SystemExit(ExitCode.REPO_NOT_FOUND) |
| 443 | data = load_json_file(repo_json) |
| 444 | if data is None or not isinstance(data.get("repo_id"), str): |
| 445 | logger.debug(".muse/repo.json malformed") |
| 446 | raise SystemExit(ExitCode.INTERNAL_ERROR) |
| 447 | return str(data["repo_id"]) |
File History
4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2
fix: remove commit_exists filter from have anchors — server…
Sonnet 4.6
patch
20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e
fix: rename objects→blobs in push client and all stale test…
Sonnet 4.6
patch
22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a
fix: repair four test failures from post-migration audit
Sonnet 4.6
patch
28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf
fix: unified object store migration — idempotent writes, JS…
Sonnet 4.6
minor
⚠
29 days ago