"""Tests for Bug 11: _coerce_snapshot_dict silently drops the ``directories`` (and ``note``) fields from incoming snapshot data. Root cause: the original implementation returned a SnapshotDict with only ``snapshot_id``, ``manifest``, and ``created_at`` — omitting ``directories`` and ``note``. Since ``directories`` feeds into ``compute_snapshot_id``, any snapshot with non-empty directories would: (before Bug-9 fix) be written with directories=[] → stored snapshot_id doesn't match recomputed hash → read_snapshot returns None forever. (after Bug-9 fix) be rejected by write_snapshot's incoming hash verification → apply_mpack logs a warning and skips it → the snapshot never reaches disk. Either way, snapshots with non-empty directories could never be received via the HTTP transport layer. Scope of tests -------------- Unit (_coerce_snapshot_dict): - directories list is preserved (was silently dropped) - note string is preserved (was silently dropped) - missing directories defaults to empty list (no crash) - empty string note preserved - extra unknown keys in raw dict are safely ignored Integration (_parse_mpack round-trip): - mpack with snapshot directories survives parse - mpack with snapshot note survives parse - mpack without directories key produces empty list (not KeyError) End-to-end (parse → write_snapshot → read_snapshot): - snapshot with directories written from parsed mpack is readable - snapshot_id matches after parse + write + read - snapshot without directories still works after fix Security: - directories entries that are not strings are filtered out - non-list directories value is treated as empty list """ from __future__ import annotations import datetime import pathlib import msgpack import pytest from muse.core.ids import hash_snapshot as compute_snapshot_id from muse.core.snapshots import ( SnapshotDict, SnapshotRecord, read_snapshot, write_snapshot, ) from muse.core.paths import muse_dir, snapshots_dir from muse.core.types import Manifest, MsgpackDict from muse.core.transport import _coerce_snapshot_dict, _parse_mpack _TS = "2024-06-15T10:00:00+00:00" def _make_repo(tmp_path: pathlib.Path) -> pathlib.Path: repo = tmp_path / "repo" repo.mkdir() muse_dir(repo).mkdir() snapshots_dir(repo).mkdir() return repo def _snap_wire( manifest: Manifest, directories: list[str], note: str = "", ) -> SnapshotDict: snap_id = compute_snapshot_id(manifest, directories) return SnapshotDict( snapshot_id=snap_id, manifest=manifest, directories=directories, created_at=_TS, note=note, ) # ────────────────────────────────────────────────────────────────────────────── # Unit: _coerce_snapshot_dict # ────────────────────────────────────────────────────────────────────────────── class TestCoerceSnapshotDict: def test_directories_preserved(self) -> None: """Bug 11: directories must not be dropped by _coerce_snapshot_dict.""" raw = { "snapshot_id": "a" * 64, "manifest": {"src/main.py": "b" * 64}, "directories": ["src", "tests"], "created_at": _TS, "note": "", } result = _coerce_snapshot_dict(raw) assert result["directories"] == ["src", "tests"], ( "BUG 11: directories were silently dropped during snapshot coercion" ) def test_note_preserved(self) -> None: """Bug 11: note must not be dropped by _coerce_snapshot_dict.""" raw = { "snapshot_id": "a" * 64, "manifest": {}, "directories": [], "created_at": _TS, "note": "initial snapshot", } result = _coerce_snapshot_dict(raw) assert result["note"] == "initial snapshot" def test_missing_directories_defaults_to_empty_list(self) -> None: raw = { "snapshot_id": "a" * 64, "manifest": {}, "created_at": _TS, } result = _coerce_snapshot_dict(raw) assert result["directories"] == [] def test_empty_string_note_preserved(self) -> None: raw = { "snapshot_id": "a" * 64, "manifest": {}, "directories": [], "created_at": _TS, "note": "", } result = _coerce_snapshot_dict(raw) assert result["note"] == "" def test_non_list_directories_treated_as_empty(self) -> None: """A non-list directories value must not crash — default to [].""" raw = { "snapshot_id": "a" * 64, "manifest": {}, "directories": "not-a-list", "created_at": _TS, "note": "", } result = _coerce_snapshot_dict(raw) assert result["directories"] == [] def test_non_string_directory_entries_filtered(self) -> None: """Non-string entries in the directories list must be filtered out.""" raw = { "snapshot_id": "a" * 64, "manifest": {}, "directories": ["src", 42, None, "tests", b"bytes"], "created_at": _TS, "note": "", } result = _coerce_snapshot_dict(raw) assert result["directories"] == ["src", "tests"] # ────────────────────────────────────────────────────────────────────────────── # Integration: _parse_mpack round-trip # ────────────────────────────────────────────────────────────────────────────── class TestParseBundleSnapshotDirectories: def _make_bundle_bytes(self, snapshots: list[SnapshotDict]) -> bytes: return msgpack.packb({"snapshots": snapshots, "commits": [], "blobs": [], "tags": []}, use_bin_type=True) def test_bundle_snapshot_directories_survive_parse(self) -> None: """Snapshot directories must be present after _parse_mpack.""" wire = _snap_wire({"src/main.py": "a" * 64}, ["src", "tests"]) bundle_bytes = self._make_bundle_bytes([wire]) mpack = _parse_mpack(bundle_bytes) snaps = mpack.get("snapshots") or [] assert len(snaps) == 1 assert snaps[0]["directories"] == ["src", "tests"], ( "BUG 11: directories were dropped during _parse_mpack" ) def test_bundle_snapshot_note_survives_parse(self) -> None: wire = _snap_wire({"src/main.py": "a" * 64}, [], note="feature snapshot") bundle_bytes = self._make_bundle_bytes([wire]) mpack = _parse_mpack(bundle_bytes) snaps = mpack.get("snapshots") or [] assert snaps[0]["note"] == "feature snapshot" def test_bundle_snapshot_without_directories_key_no_keyerror(self) -> None: """A snapshot with no directories key must not raise KeyError.""" wire = { "snapshot_id": compute_snapshot_id({"f.py": "a" * 64}, []), "manifest": {"f.py": "a" * 64}, "created_at": _TS, # deliberately omit "directories" and "note" } bundle_bytes = self._make_bundle_bytes([wire]) mpack = _parse_mpack(bundle_bytes) # must not raise snaps = mpack.get("snapshots") or [] assert snaps[0]["directories"] == [] # ────────────────────────────────────────────────────────────────────────────── # End-to-end: parse → write_snapshot → read_snapshot # ────────────────────────────────────────────────────────────────────────────── class TestSnapshotDirectoriesE2E: def _apply_snapshot_from_bundle(self, repo: pathlib.Path, wire: MsgpackDict) -> None: """Simulate apply_mpack for a single snapshot via _parse_mpack.""" bundle_bytes = msgpack.packb( {"snapshots": [wire], "commits": [], "blobs": [], "tags": []}, use_bin_type=True, ) mpack = _parse_mpack(bundle_bytes) for snap_dict in mpack.get("snapshots") or []: snap = SnapshotRecord.from_dict(snap_dict) write_snapshot(repo, snap) def test_snapshot_with_directories_readable_after_parse_and_write(self, tmp_path: pathlib.Path) -> None: """Bug 11: snapshot with directories must be readable after HTTP parse + write.""" repo = _make_repo(tmp_path) manifest = {"src/main.py": "a" * 64, "tests/test_main.py": "b" * 64} directories = ["src", "tests"] wire = _snap_wire(manifest, directories) self._apply_snapshot_from_bundle(repo, wire) stored = read_snapshot(repo, wire["snapshot_id"]) assert stored is not None, ( "BUG 11: snapshot with directories is unreadable after HTTP parse → write" ) assert stored.snapshot_id == wire["snapshot_id"] assert stored.directories == directories assert stored.manifest == manifest def test_snapshot_id_matches_after_parse_write_read(self, tmp_path: pathlib.Path) -> None: """The snapshot_id must be the same at every stage of the pipeline.""" repo = _make_repo(tmp_path) manifest = {"app/main.py": "c" * 64} directories = ["app"] wire = _snap_wire(manifest, directories) expected_id = wire["snapshot_id"] self._apply_snapshot_from_bundle(repo, wire) stored = read_snapshot(repo, expected_id) assert stored is not None assert stored.snapshot_id == expected_id def test_snapshot_without_directories_still_works_after_fix(self, tmp_path: pathlib.Path) -> None: """Regression: snapshots without directories must still be stored after the fix.""" repo = _make_repo(tmp_path) manifest = {"README.md": "d" * 64} wire = _snap_wire(manifest, directories=[]) self._apply_snapshot_from_bundle(repo, wire) stored = read_snapshot(repo, wire["snapshot_id"]) assert stored is not None assert stored.directories == [] def test_snapshot_with_multiple_directories_round_trips(self, tmp_path: pathlib.Path) -> None: """Snapshot with several directories must survive the full HTTP transport pipeline.""" repo = _make_repo(tmp_path) manifest = { "src/a.py": "a" * 64, "lib/b.py": "b" * 64, "docs/c.md": "c" * 64, } directories = ["docs", "lib", "src"] wire = _snap_wire(manifest, directories) self._apply_snapshot_from_bundle(repo, wire) stored = read_snapshot(repo, wire["snapshot_id"]) assert stored is not None assert sorted(stored.directories) == sorted(directories)