gabriel / muse public
checkout_symbol.py python
459 lines 17.6 KB
Raw
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
1 """muse code checkout-symbol — restore a historical version of a specific symbol.
2
3 Extracts a single named symbol from a historical committed snapshot and writes
4 it back into the current working-tree file, replacing the current version of
5 that symbol.
6
7 This is a **surgical** operation: only the target symbol's lines change.
8 All surrounding code — other symbols, comments, imports, blank lines outside
9 the symbol boundary — is left untouched.
10
11 Why this matters
12 ----------------
13 Git's ``checkout`` restores entire files. If you need to roll back a single
14 function while keeping everything else current, you need to manually cherry-
15 pick lines. ``muse code checkout-symbol`` does this atomically against Muse's
16 content-addressed symbol index.
17
18 No-op detection: if the symbol body at the target commit is identical to the
19 current working-tree version (same body hash), the file is not written and
20 ``"changed": false`` is reported.
21
22 Security note: the file path component of ADDRESS is validated via
23 ``contain_path()`` before any disk access. Paths that escape the repo root
24 (e.g. ``../../etc/passwd::foo``) are rejected with exit 1.
25
26 Usage::
27
28 muse code checkout-symbol "src/billing.py::compute_invoice_total" --commit HEAD~3
29 muse code checkout-symbol "src/auth.py::validate_token" --commit abc12345 --dry-run
30 muse code checkout-symbol "src/billing.py::compute_invoice_total" --commit HEAD~3 --json
31
32 Output (without --dry-run)::
33
34 Restoring: src/billing.py::compute_invoice_total
35 from commit: abc12345 (2026-02-15)
36 lines 42–67 → replaced with 31 historical line(s)
37 ✅ Written to src/billing.py
38
39 Output (no-op — symbol already matches)::
40
41 ✅ src/billing.py::compute_invoice_total already matches commit abc12345 — nothing to do.
42
43 Output (with --dry-run)::
44
45 Dry run — no files will be written.
46
47 Restoring: src/billing.py::compute_invoice_total
48 from commit: abc12345 (2026-02-15)
49
50 --- current
51 +++ historical
52 @@ -42,26 +42,20 @@
53 def compute_invoice_total(...):
54 - ...current body...
55 + ...historical body...
56
57 JSON output (``--json``)::
58
59 {
60 "schema_version": "0.1.5",
61 "address": "src/billing.py::compute_invoice_total",
62 "file": "src/billing.py",
63 "branch": "main",
64 "restored_from": "abc12345",
65 "dry_run": false,
66 "changed": true,
67 "appended": false,
68 "current_start": 42,
69 "current_end": 67,
70 "historical_line_count": 31,
71 "diff_lines": []
72 }
73
74 Flags:
75
76 ``--commit, -c REF``
77 Required. Commit to restore from.
78
79 ``--dry-run``
80 Print the diff without writing anything.
81
82 ``--json``
83 Emit result as JSON for agent consumption. In dry-run mode the JSON
84 includes a ``diff_lines`` list so agents can inspect what would change.
85 """
86
87 import argparse
88 import difflib
89 import json
90 import logging
91 import pathlib
92
93 from typing import TypedDict
94
95 from muse.core.envelope import EnvelopeJson, make_envelope
96 from muse.core.errors import ExitCode
97 from muse.core.object_store import read_object
98 from muse.core.repo import require_repo
99 from muse.core.refs import read_current_branch
100 from muse.core.commits import resolve_commit_ref
101 from muse.core.snapshots import get_commit_snapshot_manifest
102 from muse.core.timing import start_timer
103 from muse.core.validation import contain_path, sanitize_display
104 from muse.plugins.code.ast_parser import SymbolRecord, SymbolTree, parse_symbols
105
106 logger = logging.getLogger(__name__)
107
108 # ---------------------------------------------------------------------------
109 # Data types
110 # ---------------------------------------------------------------------------
111
112 class _CheckoutSymbolOutputJson(EnvelopeJson, total=False):
113 """JSON output for ``muse code checkout-symbol --json``.
114
115 Present on all three code paths: no-op, dry-run, and write.
116 EnvelopeJson fields (muse_version, schema, exit_code, duration_ms,
117 timestamp, warnings) are always present via the base class.
118
119 Fields
120 ------
121 address The symbol address that was requested.
122 file Repo-relative file path.
123 branch Current branch name.
124 restored_from Short commit ID of the source snapshot.
125 dry_run True when ``--dry-run`` was passed.
126 changed False on no-op; True when the symbol body differs.
127 appended True when the symbol was absent from the working tree
128 and was appended rather than spliced.
129 current_start First line of the current working-tree symbol (0 if appended).
130 current_end Last line of the current working-tree symbol (0 if appended).
131 historical_line_count Number of lines in the restored body.
132 diff_lines Unified-diff lines; non-empty only in dry-run mode.
133 verified Post-write parse check; True = symbol found at expected address.
134 verified_preview Dry-run parse check; True = patched content would be parseable.
135 """
136
137 address: str
138 file: str
139 branch: str
140 restored_from: str
141 dry_run: bool
142 changed: bool
143 appended: bool
144 current_start: int
145 current_end: int
146 historical_line_count: int
147 diff_lines: list[str]
148 verified: bool
149 verified_preview: bool
150
151 # ---------------------------------------------------------------------------
152 # Internal helpers
153 # ---------------------------------------------------------------------------
154
155 def _extract_lines(source: bytes, lineno: int, end_lineno: int) -> list[str]:
156 """Extract lines *lineno*..*end_lineno* (1-indexed, inclusive).
157
158 Returns an empty list and logs a warning if the range is out of bounds
159 rather than silently truncating.
160 """
161 all_lines = source.decode("utf-8", errors="replace").splitlines(keepends=True)
162 total = len(all_lines)
163 if lineno < 1 or end_lineno < lineno or end_lineno > total:
164 logger.warning(
165 "Line range %d–%d out of bounds for %d-line source — returning empty",
166 lineno, end_lineno, total,
167 )
168 return []
169 return all_lines[lineno - 1:end_lineno]
170
171 def _find_symbol_in_source(
172 source: bytes,
173 file_rel: str,
174 address: str,
175 ) -> SymbolRecord | None:
176 """Parse *source* as *file_rel* and return the record for *address*, or None."""
177 # file_rel must be the repo-relative path so that parse_symbols builds
178 # address keys that match the caller's address format (e.g. "src/a.py::fn").
179 tree: SymbolTree = parse_symbols(source, file_rel)
180 return tree.get(address)
181
182 # ---------------------------------------------------------------------------
183 # CLI registration and entry point
184 # ---------------------------------------------------------------------------
185
186 def register(subparsers: "argparse._SubParsersAction[argparse.ArgumentParser]") -> None:
187 """Register the checkout-symbol subcommand."""
188 parser = subparsers.add_parser(
189 "checkout-symbol",
190 help="Restore a historical version of a specific symbol into the working tree.",
191 description=__doc__,
192 formatter_class=argparse.RawDescriptionHelpFormatter,
193 )
194 parser.add_argument(
195 "address", metavar="ADDRESS",
196 help='Symbol address, e.g. "src/billing.py::compute_invoice_total".',
197 )
198 parser.add_argument(
199 "--commit", "-c", required=True, metavar="REF", dest="ref",
200 help="Commit to restore the symbol from (required).",
201 )
202 parser.add_argument(
203 "--dry-run", action="store_true", dest="dry_run",
204 help="Print the diff without writing anything.",
205 )
206 parser.add_argument(
207 "--json", "-j", action="store_true", dest="json_out",
208 help="Emit result as JSON for agent consumption.",
209 )
210 parser.set_defaults(func=run)
211
212 def run(args: argparse.Namespace) -> None:
213 """Restore a historical version of a specific symbol into the working tree.
214
215 Extracts the symbol body from the given commit and splices it into the
216 current working-tree file at the symbol's current location. Only the
217 target symbol's lines change; everything else is untouched. If the body
218 already matches the historical version no file is written and
219 ``changed=false`` is returned.
220
221 Agent quickstart
222 ----------------
223 ::
224
225 muse checkout-symbol "src/billing.py::compute_tax" --commit HEAD~5 --json
226 muse checkout-symbol "src/billing.py::compute_tax" --commit HEAD~5 --dry-run --json
227
228 JSON fields
229 -----------
230 address The symbol address that was operated on.
231 commit_id Full commit ID the symbol was restored from.
232 changed ``true`` if the working-tree file was modified.
233 dry_run ``true`` when ``--dry-run`` was passed.
234 file Workspace-relative file path that was (or would be) modified.
235
236 Exit codes
237 ----------
238 0 Success (including no-op when ``changed=false``).
239 1 Symbol not found at the given commit, invalid address, or bad ref.
240 2 Not inside a Muse repository.
241 3 I/O error writing the working-tree file.
242 """
243 elapsed = start_timer()
244 address: str = args.address
245 ref: str = args.ref
246 dry_run: bool = args.dry_run
247 json_out: bool = args.json_out
248
249 root = require_repo()
250
251 branch = read_current_branch(root)
252
253 if "::" not in address:
254 logger.error("ADDRESS must be a symbol address like 'src/billing.py::func'.")
255 raise SystemExit(ExitCode.USER_ERROR)
256
257 file_rel, _sym_name = address.split("::", 1)
258
259 # Validate the file path stays inside the repo root.
260 try:
261 contain_path(root, file_rel)
262 except ValueError as exc:
263 logger.error("%s", exc)
264 raise SystemExit(ExitCode.USER_ERROR) from exc
265
266 commit = resolve_commit_ref(root, branch, ref)
267 if commit is None:
268 logger.error("Commit %r not found.", ref)
269 raise SystemExit(ExitCode.USER_ERROR)
270
271 # ------------------------------------------------------------------
272 # Load the historical blob and locate the symbol inside it.
273 # ------------------------------------------------------------------
274 manifest = get_commit_snapshot_manifest(root, commit.commit_id) or {}
275 obj_id = manifest.get(file_rel)
276 if obj_id is None:
277 logger.error("'%s' is not in snapshot %s.", file_rel, commit.commit_id)
278 raise SystemExit(ExitCode.USER_ERROR)
279
280 historical_raw = read_object(root, obj_id)
281 if historical_raw is None:
282 logger.error("Blob %s missing from object store.", obj_id)
283 raise SystemExit(ExitCode.USER_ERROR)
284
285 hist_rec = _find_symbol_in_source(historical_raw, file_rel, address)
286 if hist_rec is None:
287 logger.error("Symbol '%s' not found in commit %s.", address, commit.commit_id)
288 raise SystemExit(ExitCode.USER_ERROR)
289
290 historical_lines = _extract_lines(
291 historical_raw, hist_rec["lineno"], hist_rec["end_lineno"]
292 )
293 # Guard against a corrupted snapshot producing empty content. Writing
294 # an empty replacement would silently delete the symbol — hard to debug.
295 if not historical_lines:
296 logger.error(
297 "Symbol '%s' at commit %s produced no extractable lines — "
298 "snapshot may be corrupted. Aborting without writing.",
299 address, commit.commit_id,
300 )
301 raise SystemExit(ExitCode.INTERNAL_ERROR)
302
303 # ------------------------------------------------------------------
304 # Load the current working-tree file.
305 # Read once — reuse bytes for both line-list and symbol lookup.
306 # ------------------------------------------------------------------
307 working_file = root / file_rel
308 if working_file.exists():
309 current_raw = working_file.read_bytes()
310 current_lines = current_raw.decode("utf-8", errors="replace").splitlines(keepends=True)
311 # Pass file_rel (not the absolute path) so parse_symbols builds
312 # addresses matching the address format used throughout Muse.
313 cur_rec = _find_symbol_in_source(current_raw, file_rel, address)
314 else:
315 current_raw = b""
316 current_lines = []
317 cur_rec = None
318
319 # ------------------------------------------------------------------
320 # No-op detection: bail out early when the bodies already match.
321 # Only skip when NOT in dry-run mode — a dry-run explicitly requests
322 # "show me what would happen," so it must go through the full diff
323 # path even when the result would be unchanged. Skipping early in
324 # dry-run mode would omit verified_preview from the JSON, breaking
325 # agent pipelines that always pass --dry-run before writing.
326 # ------------------------------------------------------------------
327 already_current = (
328 cur_rec is not None
329 and cur_rec["body_hash"] == hist_rec["body_hash"]
330 )
331 if already_current and not dry_run:
332 assert cur_rec is not None # narrowed by already_current check above
333 if json_out:
334 print(json.dumps(_CheckoutSymbolOutputJson(
335 **make_envelope(elapsed),
336 address=address,
337 file=file_rel,
338 branch=branch,
339 restored_from=commit.commit_id,
340 dry_run=False,
341 changed=False,
342 appended=False,
343 current_start=cur_rec["lineno"],
344 current_end=cur_rec["end_lineno"],
345 historical_line_count=len(historical_lines),
346 diff_lines=[],
347 verified=True,
348 )))
349 else:
350 print(
351 f"✅ {address} already matches commit {commit.commit_id}"
352 " — nothing to do."
353 )
354 return
355
356 # ------------------------------------------------------------------
357 # Compute the patched file content.
358 # ------------------------------------------------------------------
359 appended = cur_rec is None
360
361 if cur_rec is not None:
362 cur_start, cur_end = cur_rec["lineno"], cur_rec["end_lineno"]
363 new_lines = current_lines[:cur_start - 1] + historical_lines + current_lines[cur_end:]
364 else:
365 cur_start = cur_end = 0
366 new_lines = current_lines + ["\n"] + historical_lines
367
368 # ------------------------------------------------------------------
369 # Dry run — report without writing.
370 # ------------------------------------------------------------------
371 if dry_run:
372 diff_lines = list(difflib.unified_diff(
373 current_lines,
374 new_lines,
375 fromfile="current",
376 tofile="historical",
377 lineterm="",
378 ))
379 # Preview whether the resulting file would be parseable — lets agents
380 # detect malformed output before committing to a write.
381 verified_preview = _find_symbol_in_source(
382 "".join(new_lines).encode("utf-8"), file_rel, address
383 ) is not None
384 if json_out:
385 print(json.dumps(_CheckoutSymbolOutputJson(
386 **make_envelope(elapsed),
387 address=address,
388 file=file_rel,
389 branch=branch,
390 restored_from=commit.commit_id,
391 dry_run=True,
392 changed=not already_current,
393 appended=appended,
394 current_start=cur_start,
395 current_end=cur_end,
396 historical_line_count=len(historical_lines),
397 diff_lines=diff_lines,
398 verified_preview=verified_preview,
399 )))
400 else:
401 print("Dry run — no files will be written.\n")
402 print(f"Restoring: {sanitize_display(address)}")
403 print(f" from commit: {commit.commit_id} ({commit.committed_at.date()})")
404 if not verified_preview:
405 print(" ⚠️ Warning: result would not be parseable at this address.")
406 print(f"\n{''.join(diff_lines)}")
407 return
408
409 # ------------------------------------------------------------------
410 # Write the patched file.
411 # ------------------------------------------------------------------
412 written_content = "".join(new_lines)
413 working_file.write_text(written_content, encoding="utf-8")
414
415 # Post-write verification: confirm the symbol is parseable at its address
416 # in the content we just wrote. Uses the in-memory string to avoid a
417 # second disk read. Catches splice edge-cases (encoding artifacts, AST
418 # parse failures) that would silently produce a broken file.
419 verified = _find_symbol_in_source(
420 written_content.encode("utf-8"), file_rel, address
421 ) is not None
422 if not verified:
423 logger.warning(
424 "Post-write verification failed: '%s' not found after restore. "
425 "The file was written but the symbol may not be at the expected "
426 "address. Inspect with: muse code symbols --file %s",
427 address, file_rel,
428 )
429
430 if json_out:
431 print(json.dumps(_CheckoutSymbolOutputJson(
432 **make_envelope(elapsed),
433 address=address,
434 file=file_rel,
435 branch=branch,
436 restored_from=commit.commit_id,
437 dry_run=False,
438 changed=True,
439 appended=appended,
440 current_start=cur_start,
441 current_end=cur_end,
442 historical_line_count=len(historical_lines),
443 diff_lines=[],
444 verified=verified,
445 )))
446 else:
447 print(f"Restoring: {sanitize_display(address)}")
448 print(f" from commit: {commit.commit_id} ({commit.committed_at.date()})")
449 if not appended:
450 print(
451 f" lines {cur_start}–{cur_end} → replaced with "
452 f"{len(historical_lines)} historical line(s)"
453 )
454 else:
455 print(" symbol not found in working tree — appending at end of file")
456 if verified:
457 print(f"✅ Written to {file_rel}")
458 else:
459 print(f"⚠️ Written to {file_rel} — post-write verification failed")
File History 4 commits
sha256:81ae324db5ad375fbfe4834c6fcb378312cafad3cc92dec5d3e5c427306621a2 fix: remove commit_exists filter from have anchors — server… Sonnet 4.6 patch 20 days ago
sha256:36c3cb3e76619d4c30a6d9bf81b5ec4ff148e30dcfed913e3114ca7b43b81c7e fix: rename objects→blobs in push client and all stale test… Sonnet 4.6 patch 22 days ago
sha256:c06a9b9b9fee26c68ea725b44d54b2c0a171301ce9de746d5b656617b4463a9a fix: repair four test failures from post-migration audit Sonnet 4.6 patch 28 days ago
sha256:1900655993c83c4107067375548a7be823e471d2515830842f1a12cba4bd3cdf fix: unified object store migration — idempotent writes, JS… Sonnet 4.6 minor 28 days ago