"""Persistent call-graph cache — eliminates re-parsing on every CLI invocation. Architecture ------------ ``build_forward_graph`` currently re-reads every Python blob from the object store and re-parses every AST on each invocation: ~15 s for a 778-file repo. The result is fully determined by the file content — the same bytes always produce the same caller→callee mapping. ``CallGraphCache`` exploits this by persisting a **per-file subgraph** keyed by the SHA-256 of the file bytes (``object_id`` from the manifest): key: SHA-256 hex digest of raw Python file bytes (``object_id``) value: ``dict[caller_address, frozenset[callee_bare_name]]`` — the portion of the ``ForwardGraph`` contributed by this file Storing the per-file subgraph (not just a flattened set of names) preserves full per-address granularity so the warm path produces an identical ``ForwardGraph`` to the cold path. Storage ------- ``.muse/cache/callgraph.json``:: { "version": 2, "entries": { "": { "file.py::caller_fn": ["callee_a", "callee_b"], "file.py::leaf_fn": [] } } } Inner lists are sorted for deterministic output. On load they are converted back to frozensets. Writes are atomic (Pattern A): ``mkstemp`` gives each writer a unique temp file so two concurrent saves cannot interleave bytes; ``os.replace`` is the atomic rename. Pruning ------- Use :meth:`CallGraphCache.prune` from ``muse gc`` to remove entries whose object IDs are no longer reachable in the object store. Typical lifecycle inside ``build_forward_graph``:: cache = load_callgraph_cache(root) for file_path, obj_id in manifest.items(): subgraph = cache.get(obj_id) if subgraph is not None: graph.update(subgraph) # warm: no parse, no ast walk else: subgraph = _parse_file_subgraph(root, file_path, obj_id) cache.put(obj_id, subgraph) graph.update(subgraph) cache.save() """ import pathlib from muse.core.cache_base import MsgpackCache, _RawCacheMap _CACHE_VERSION = 2 _CACHE_FILENAME = "callgraph.json" # Type alias: per-file portion of the forward call graph _Subgraph = dict[str, frozenset[str]] class CallGraphCache(MsgpackCache): """Persistent JSON cache mapping object_id → per-file forward subgraph. Subgraph type: ``{caller_address: frozenset[callee_bare_name]}``. Inherits load/save/get/put/prune/size/empty from :class:`MsgpackCache` (Pattern A — mkstemp + replace). Typical lifecycle inside ``build_forward_graph``:: cache = CallGraphCache.load(muse_dir) for file_path, object_id in manifest.items(): subgraph = cache.get(object_id) if subgraph is None: subgraph = _parse_file(root, file_path, object_id) cache.put(object_id, subgraph) graph.update(subgraph) cache.save() Attributes ---------- _cache_dir : pathlib.Path | None Absolute path to ``.muse/cache/``. ``None`` for in-memory-only instances (``empty()``). ``save()`` is a no-op when ``None``. _dirty : bool Set to ``True`` by ``put()`` and ``prune()`` when entries change. Reset to ``False`` by a successful ``save()``. """ _CACHE_FILENAME = "callgraph.json" _CACHE_VERSION = 2 _TEMP_PREFIX = ".callgraph_" @classmethod def _deserialize_entries(cls, raw: _RawCacheMap) -> _RawCacheMap: """Validate and convert raw JSON entries to typed subgraph entries. Each value must be a dict of address → list[str]; lists are converted to frozensets. Invalid entries are skipped. """ entries: dict[str, _Subgraph] = {} for obj_id, subgraph_raw in raw.items(): if not isinstance(obj_id, str): continue if not isinstance(subgraph_raw, dict): continue subgraph: _Subgraph = {} valid = True for addr, callee_list in subgraph_raw.items(): if not isinstance(addr, str): valid = False break if not isinstance(callee_list, list): valid = False break if not all(isinstance(n, str) for n in callee_list): valid = False break subgraph[addr] = frozenset(callee_list) if valid: entries[obj_id] = subgraph return entries def _serialize_entries(self) -> _RawCacheMap: """Convert frozenset callees to sorted lists for JSON serialisation.""" return { obj_id: {addr: sorted(callees) for addr, callees in subgraph.items()} for obj_id, subgraph in self._entries.items() } # Type-narrowing overrides def get(self, object_id: str) -> _Subgraph | None: """Return the cached per-file subgraph for *object_id*, or ``None`` on miss.""" return self._entries.get(object_id) # type: ignore[return-value] def put(self, object_id: str, subgraph: _Subgraph) -> None: """Store *subgraph* under *object_id* and mark the cache dirty.""" super().put(object_id, subgraph) def prune(self, live_ids: set[str]) -> None: """Remove entries whose object IDs are not in *live_ids*. Call this from ``muse gc`` after identifying all reachable object IDs. """ super().prune(live_ids) def load_callgraph_cache(root: pathlib.Path) -> CallGraphCache: """Convenience loader: return a ``CallGraphCache`` for a repository root. Returns ``CallGraphCache.empty()`` when *root* has no ``.muse`` directory so callers never need to guard against a missing repo. """ return CallGraphCache.from_root(root) # type: ignore[return-value]