"""Phase 3.4 — MPack build and apply at scale.

Target metrics (measured on a 2024 MacBook Pro M4, macOS 15):

  build_mpack (10 000 objects × 4 KiB):  < 60 s   [@slow]
  apply_mpack (10 000 objects × 4 KiB):  < 60 s   [@slow]
  verify-pack (10 000 objects × 4 KiB): < 120 s  [@slow] (in practice much faster)
  collect_object_ids (1 000 objects):   < 1 s    (no blob reads)

Edges verified beyond the plan:

  a. ``build_mpack`` loads ALL blob bytes simultaneously — peak RSS ≈ 2× blob total.
  b. ``have=`` filter correctly reduces both commit count and blob payload.
  c. ``MAX_PACK_OBJECTS`` applies to total_items (commits + snapshots + objects),
     not per-type — an mpack within per-type limits can still be rejected.
  d. Oversized object (> MAX_OBJECT_WRITE_BYTES) is silently skipped, not raised —
     caller sees objects_skipped++ but no error; documented behaviour.
  e. ``verify-pack --stat`` is purely structural — no SHA-256, near-instant.
  f. Duplicate OID dedup in ``apply_mpack`` — each OID written at most once.
  g. Round-trip integrity: build → msgpack-serialize → apply → all objects present.
"""

from __future__ import annotations

import datetime
import pathlib
import sys
import tempfile
import time
import tracemalloc

import msgpack
import pytest
from unittest.mock import patch

from muse.core.object_store import write_object
from muse.core.mpack import (
    MAX_OBJECT_WRITE_BYTES,
    MAX_PACK_OBJECTS,
    ObjectPayload,
    MPack,
    apply_mpack,
    build_mpack,
    collect_object_ids,
)
from muse.core.ids import hash_commit as compute_commit_id, hash_snapshot as compute_snapshot_id

from muse.core.types import Manifest, blob_id
from muse.core.store import (
    CommitRecord,
    MAX_PACK_MSGPACK_BYTES,
    SnapshotRecord,
    write_branch_ref,
    write_commit,
    write_snapshot,
)
from muse.core.paths import config_toml_path, muse_dir

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _make_repo(tmp: pathlib.Path) -> pathlib.Path:
    tmp.mkdir(parents=True, exist_ok=True)
    muse = muse_dir(tmp)
    muse.mkdir()
    (muse / "repo.json").write_text('{"repo_id":"bench","owner":"bench"}')
    for d in ("commits", "snapshots", "objects"):
        (muse / d).mkdir()
    (muse / "refs" / "heads").mkdir(parents=True)
    (muse / "HEAD").write_text("ref: refs/heads/main\n")
    (muse / "config.toml").write_text("")
    return tmp


def _fresh_repo(tmp: pathlib.Path) -> pathlib.Path:
    tmp.mkdir(parents=True, exist_ok=True)
    muse = muse_dir(tmp)
    muse.mkdir()
    (muse / "repo.json").write_text('{"repo_id":"dst"}')
    for d in ("commits", "snapshots", "objects"):
        (muse / d).mkdir()
    return tmp


def _populate(
    repo: pathlib.Path,
    n_commits: int = 10,
    n_unique_objects: int = 10,
    blob_size: int = 4096,
    branch: str = "main",
    start: int = 0,
) -> tuple[str, dict[str, str]]:
    """Write *n_unique_objects* blobs and a *n_commits* chain.

    Returns ``(tip_commit_id, {path: oid})`` manifest.
    """
    blobs: Manifest = {}
    for i in range(n_unique_objects):
        data = f"obj-{i + start:08d}-".encode() + b"x" * blob_size
        oid = blob_id(data)
        write_object(repo, oid, data)
        blobs[f"file_{i:04d}.py"] = oid

    sid = compute_snapshot_id(blobs)
    write_snapshot(repo, SnapshotRecord(snapshot_id=sid, manifest=blobs))

    parent: str | None = None
    tip = ""
    for i in range(n_commits):
        ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.timezone.utc)
        msg = f"c{start + i:07d}"
        cid = compute_commit_id(
            parent_ids=[parent] if parent else [],
            snapshot_id=sid,
            message=msg,
            committed_at_iso=ts.isoformat(),
            author="bench",
        )
        rec = CommitRecord(
            commit_id=cid,
            branch=branch,
            snapshot_id=sid,
            message=msg,
            committed_at=ts,
            parent_commit_id=parent,
            parent2_commit_id=None,
            author="bench",
            metadata={},
            structured_delta=None,
            sem_ver_bump="none",
            breaking_changes=[],
            agent_id="",
            model_id="",
            toolchain_id="",
            prompt_hash="",
            signature="",
            signer_key_id="",
        )
        write_commit(repo, rec)
        parent = cid
        tip = cid

    write_branch_ref(repo, branch, tip)
    return tip, blobs


# ---------------------------------------------------------------------------
# Phase 3.4.1 — build_mpack throughput
# ---------------------------------------------------------------------------


class TestBuildMPackThroughput:
    """build_mpack must sustain ≥ 2 000 objects/sec in the object-read loop.

    build_mpack's hot path is ``read_object`` for each unique blob.  At the
    Phase 3.1 floor of 2 000 objects/sec, 100 000 objects take ~50 s, within
    the 60 s target.  The fast test covers 1 000 objects and asserts the rate
    directly; the slow test covers 10 000 objects and proves on-disk timing.
    """

    _MIN_OBJECTS_PER_SEC = 2_000

    def test_build_mpack_1k_objects_rate(self, tmp_path: pathlib.Path) -> None:
        """build_mpack on 1 000 objects must achieve ≥ 2 000 objects/sec."""
        repo = _make_repo(tmp_path)
        N = 1_000
        tip, blobs = _populate(repo, n_commits=50, n_unique_objects=N)

        t0 = time.perf_counter()
        mpack = build_mpack(repo, [tip])
        elapsed = time.perf_counter() - t0

        assert len(mpack["objects"]) == N, (
            f"Expected {N} objects in mpack, got {len(mpack['objects'])}"
        )
        rate = N / elapsed
        assert rate >= self._MIN_OBJECTS_PER_SEC, (
            f"build_mpack throughput {rate:.0f} objects/sec < {self._MIN_OBJECTS_PER_SEC} minimum. "
            f"({N} objects took {elapsed:.2f}s.)"
        )

    def test_build_mpack_have_filter_excludes_base(
        self, tmp_path: pathlib.Path
    ) -> None:
        """build_mpack with have=[base_tip] sends only delta commits, not the full history."""
        repo = _make_repo(tmp_path)
        base_tip, base_blobs = _populate(repo, n_commits=50, n_unique_objects=100, start=0)

        # New commits on top of the base, with fresh objects.
        delta_tip, delta_blobs = _populate(
            repo, n_commits=20, n_unique_objects=50, start=1000
        )
        # Chain delta to base by writing a commit that has base_tip as parent.
        ts = datetime.datetime(2026, 1, 2, tzinfo=datetime.timezone.utc)
        sid = compute_snapshot_id(delta_blobs)
        chained_cid = compute_commit_id(
            parent_ids=[delta_tip, base_tip],
            snapshot_id=sid,
            message="merge",
            committed_at_iso=ts.isoformat(),
            author="bench",
        )
        chained = CommitRecord(
            commit_id=chained_cid,
            branch="main",
            snapshot_id=sid,
            message="merge",
            committed_at=ts,
            parent_commit_id=delta_tip,
            parent2_commit_id=base_tip,
            author="bench",
            metadata={},
            structured_delta=None,
            sem_ver_bump="none",
            breaking_changes=[],
            agent_id="",
            model_id="",
            toolchain_id="",
            prompt_hash="",
            signature="",
            signer_key_id="",
        )
        write_commit(repo, chained)
        write_branch_ref(repo, "main", chained_cid)

        # Full mpack (no have).
        full_mpack = build_mpack(repo, [chained_cid])
        # Delta mpack: receiver already has base history.
        delta_mpack = build_mpack(repo, [chained_cid], have=[base_tip])

        assert len(delta_mpack["commits"]) < len(full_mpack["commits"]), (
            "have= filter must reduce commit count"
        )
        # The base objects must not be in the delta mpack since they share the snapshot.
        full_oids = {o["object_id"] for o in full_mpack["objects"]}
        delta_oids = {o["object_id"] for o in delta_mpack["objects"]}
        assert delta_oids.issubset(full_oids), "delta mpack must be a subset of full mpack"

    @pytest.mark.slow
    def test_build_mpack_10k_objects_under_60s(
        self, tmp_path: pathlib.Path
    ) -> None:
        """build_mpack on 10 000 objects must complete in < 60 s.

        This extrapolates to 100 000 objects at the same rate: 100k / 2000 ≈ 50 s,
        within the plan target of 60 s.
        """
        repo = _make_repo(tmp_path)
        N = 10_000
        tip, _ = _populate(repo, n_commits=100, n_unique_objects=N)

        t0 = time.perf_counter()
        mpack = build_mpack(repo, [tip])
        elapsed = time.perf_counter() - t0

        assert len(mpack["objects"]) == N
        assert elapsed < 60.0, (
            f"build_mpack({N} objects) took {elapsed:.1f}s — target < 60 s. "
            f"Rate: {N/elapsed:.0f} objects/sec."
        )


class TestCollectObjectIdsThroughput:
    """collect_object_ids must be significantly faster than build_mpack.

    It performs the same BFS + manifest traversal but skips reading blob bytes.
    The result feeds client-side deduplication so only missing objects are sent.
    """

    def test_collect_object_ids_no_blob_reads(
        self, tmp_path: pathlib.Path
    ) -> None:
        """collect_object_ids on 500 objects must be < 1 s (no blob reads)."""
        repo = _make_repo(tmp_path)
        N = 500
        tip, blobs = _populate(repo, n_commits=50, n_unique_objects=N)

        t0 = time.perf_counter()
        oids = collect_object_ids(repo, [tip])
        elapsed = time.perf_counter() - t0

        assert len(oids) == N, f"Expected {N} OIDs, got {len(oids)}"
        assert elapsed < 1.0, (
            f"collect_object_ids({N}) took {elapsed:.3f}s — expected < 1 s. "
            "It must not read blob bytes; only BFS + manifest traversal."
        )

    def test_collect_object_ids_faster_than_build_mpack(
        self, tmp_path: pathlib.Path
    ) -> None:
        """collect_object_ids must be faster than build_mpack for the same input."""
        repo = _make_repo(tmp_path)
        N = 200
        tip, _ = _populate(repo, n_commits=20, n_unique_objects=N)

        t_collect = time.perf_counter()
        oids = collect_object_ids(repo, [tip])
        t_collect = time.perf_counter() - t_collect

        t_build = time.perf_counter()
        mpack = build_mpack(repo, [tip])
        t_build = time.perf_counter() - t_build

        assert len(oids) == len(mpack["objects"]) == N
        assert t_collect <= t_build, (
            f"collect_object_ids ({t_collect:.3f}s) must be ≤ build_mpack ({t_build:.3f}s) — "
            "collect skips blob reads, build reads every byte."
        )

    def test_collect_object_ids_have_excludes_ancestors(
        self, tmp_path: pathlib.Path
    ) -> None:
        """collect_object_ids with have= returns only new object IDs."""
        repo = _make_repo(tmp_path)
        base_tip, base_blobs = _populate(repo, n_commits=10, n_unique_objects=50, start=0)
        delta_tip, delta_blobs = _populate(repo, n_commits=5, n_unique_objects=30, start=100)

        all_oids = collect_object_ids(repo, [delta_tip])
        delta_oids = collect_object_ids(repo, [delta_tip], have=[base_tip])

        # Delta must be a subset and contain only the 30 new objects.
        assert set(delta_blobs.values()).issubset(set(all_oids))
        assert len(delta_oids) == len(set(delta_blobs.values())), (
            f"Expected {len(set(delta_blobs.values()))} delta OIDs, got {len(delta_oids)}"
        )


# ---------------------------------------------------------------------------
# Phase 3.4.2 — apply_mpack throughput
# ---------------------------------------------------------------------------


class TestApplyMPackThroughput:
    """apply_mpack must sustain ≥ 1 500 objects/sec in the object-write loop.

    fsync is mocked: the test measures the mpack unpacking + hash-verify +
    mkstemp + fchmod + os.replace pipeline without OS I/O latency.  Durability
    ordering is verified by test_integrity_I2_fsync.py.
    """

    _MIN_OBJECTS_PER_SEC: int = 1_500

    @pytest.fixture(autouse=True)
    def no_fsync(self) -> None:
        """Mock out all fsync calls so the test measures algorithmic throughput."""
        with patch("muse.core.object_store._fsync_fd", return_value=None), \
             patch("muse.core.store.os.fsync", return_value=None), \
             patch("muse.core.store.fcntl.fcntl", return_value=0):
            yield

    @pytest.mark.perf
    def test_apply_mpack_1k_objects_rate(self, tmp_path: pathlib.Path) -> None:
        """apply_mpack of a 1 000-object mpack must achieve ≥ _MIN_OBJECTS_PER_SEC."""
        src = _make_repo(tmp_path / "src")
        N = 1_000
        tip, _ = _populate(src, n_commits=50, n_unique_objects=N)
        mpack = build_mpack(src, [tip])
        assert len(mpack["objects"]) == N

        dst = _fresh_repo(tmp_path / "dst")

        t0 = time.perf_counter()
        result = apply_mpack(dst, mpack)
        elapsed = time.perf_counter() - t0

        assert result["objects_written"] == N
        rate = N / elapsed
        assert rate >= self._MIN_OBJECTS_PER_SEC, (
            f"apply_mpack throughput {rate:.0f} objects/sec < {self._MIN_OBJECTS_PER_SEC} minimum. "
            f"({N} objects took {elapsed:.2f}s.)"
        )

    def test_apply_mpack_idempotent_second_apply_skips_all(
        self, tmp_path: pathlib.Path
    ) -> None:
        """Applying the same mpack twice: second apply must skip every object."""
        src = _make_repo(tmp_path / "src")
        tip, _ = _populate(src, n_commits=20, n_unique_objects=100)
        mpack = build_mpack(src, [tip])

        dst = _fresh_repo(tmp_path / "dst")
        r1 = apply_mpack(dst, mpack)
        r2 = apply_mpack(dst, mpack)

        assert r1["objects_written"] == 100
        assert r2["objects_written"] == 0
        assert r2["objects_skipped"] == 100, (
            f"Expected 100 skipped on second apply, got {r2['objects_skipped']}"
        )

    @pytest.mark.slow
    def test_apply_mpack_10k_objects_under_60s(
        self, tmp_path: pathlib.Path
    ) -> None:
        """apply_mpack of a 10 000-object mpack must complete in < 60 s."""
        src = _make_repo(tmp_path / "src")
        N = 10_000
        tip, _ = _populate(src, n_commits=100, n_unique_objects=N)
        mpack = build_mpack(src, [tip])

        dst = _fresh_repo(tmp_path / "dst")

        t0 = time.perf_counter()
        result = apply_mpack(dst, mpack)
        elapsed = time.perf_counter() - t0

        assert result["objects_written"] == N
        assert elapsed < 60.0, (
            f"apply_mpack({N} objects) took {elapsed:.1f}s — target < 60 s. "
            f"Rate: {N/elapsed:.0f} objects/sec."
        )


# ---------------------------------------------------------------------------
# Phase 3.4.3 — verify-pack
# ---------------------------------------------------------------------------


class TestVerifyPackIntegrity:
    """verify-pack must detect hash mismatches and work fast in --stat mode."""

    def test_verify_pack_stat_returns_counts(
        self, tmp_path: pathlib.Path
    ) -> None:
        """verify-pack --stat must count objects/snapshots/commits without hashing."""
        from tests.cli_test_helper import CliRunner
        import json

        repo = _make_repo(tmp_path)
        tip, _ = _populate(repo, n_commits=20, n_unique_objects=50)
        mpack = build_mpack(repo, [tip])
        raw = msgpack.packb(mpack, use_bin_type=True)

        mpack_file = tmp_path / "pack.muse"
        mpack_file.write_bytes(raw)
        (config_toml_path(repo)).write_text("")

        runner = CliRunner()
        result = runner.invoke(
            None,
            [
                "verify-pack",
                "--stat",
                "--no-local",
                "--json",
                "--file", str(mpack_file),
            ],
            env={"MUSE_REPO_ROOT": str(repo)},
        )
        assert result.exit_code == 0, f"verify-pack --stat failed: {result.output}"
        payload = json.loads(result.output)
        assert payload["objects"] == 50
        assert payload["commits"] == 20

    def test_verify_pack_detects_hash_mismatch(
        self, tmp_path: pathlib.Path
    ) -> None:
        """verify-pack must flag an object whose content hash does not match its ID."""
        from tests.cli_test_helper import CliRunner
        import json

        repo = _make_repo(tmp_path)
        tip, blobs = _populate(repo, n_commits=5, n_unique_objects=10)
        mpack = build_mpack(repo, [tip])

        # Tamper: set a wrong content for the first object while keeping the declared ID.
        tampered_obj: ObjectPayload = {
            "object_id": mpack["objects"][0]["object_id"],
            "content": b"TAMPERED_CONTENT",
        }
        tampered: MPack = {
            "commits": mpack["commits"],
            "snapshots": mpack["snapshots"],
            "objects": [tampered_obj] + mpack["objects"][1:],
            "summary": mpack.get("summary", {}),
            "meta": mpack.get("meta", {}),
        }
        raw = msgpack.packb(tampered, use_bin_type=True)
        mpack_file = tmp_path / "tampered.muse"
        mpack_file.write_bytes(raw)
        (config_toml_path(repo)).write_text("")

        runner = CliRunner()
        result = runner.invoke(
            None,
            [
                "verify-pack",
                "--no-local",
                "--json",
                "--file", str(mpack_file),
            ],
            env={"MUSE_REPO_ROOT": str(repo)},
        )
        assert result.exit_code != 0, "verify-pack must exit non-zero when hash mismatches"
        payload = json.loads(result.output)
        assert payload["all_ok"] is False
        assert any("hash mismatch" in f["error"] for f in payload["failures"]), (
            f"Expected 'hash mismatch' in failures: {payload['failures']}"
        )

    @pytest.mark.slow
    def test_verify_pack_10k_objects_under_120s(
        self, tmp_path: pathlib.Path
    ) -> None:
        """verify-pack of a 10 000-object mpack must complete in < 120 s.

        SHA-256 on M4 Silicon processes ~3 GiB/s; 10k × 4 KiB = 40 MiB → < 1 s.
        The 120 s ceiling catches pathological I/O or per-object overhead.
        """
        from tests.cli_test_helper import CliRunner
        import json

        repo = _make_repo(tmp_path)
        N = 10_000
        tip, _ = _populate(repo, n_commits=100, n_unique_objects=N)
        mpack = build_mpack(repo, [tip])
        raw = msgpack.packb(mpack, use_bin_type=True)
        mpack_file = tmp_path / "pack10k.muse"
        mpack_file.write_bytes(raw)
        (config_toml_path(repo)).write_text("")

        t0 = time.perf_counter()
        runner = CliRunner()
        result = runner.invoke(
            None,
            [
                "verify-pack",
                "--no-local",
                "--json",
                "--file", str(mpack_file),
            ],
            env={"MUSE_REPO_ROOT": str(repo)},
        )
        elapsed = time.perf_counter() - t0

        assert result.exit_code == 0, f"verify-pack failed: {result.output[:200]}"
        payload = json.loads(result.output)
        assert payload["all_ok"] is True
        assert payload["objects_checked"] == N
        assert elapsed < 120.0, (
            f"verify-pack({N} objects) took {elapsed:.1f}s — target < 120 s."
        )


# ---------------------------------------------------------------------------
# Phase 3.4.4 — cap and guard enforcement
# ---------------------------------------------------------------------------


class TestMPackCapEnforcement:
    """MPack-bomb and size-cap guards must fire correctly."""

    def test_apply_mpack_rejects_mpack_exceeding_max_pack_objects(
        self, tmp_path: pathlib.Path
    ) -> None:
        """apply_mpack raises ValueError when total_items > MAX_PACK_OBJECTS.

        MAX_PACK_OBJECTS counts commits + snapshots + objects combined — not
        per-type.  An mpack with MAX_PACK_OBJECTS + 1 total items is rejected.
        """
        repo = _fresh_repo(tmp_path)
        oversized: MPack = {
            "commits": [{}] * (MAX_PACK_OBJECTS + 1),
            "snapshots": [],
            "objects": [],
        }
        with pytest.raises(ValueError, match="Pack rejected"):
            apply_mpack(repo, oversized)

    def test_apply_mpack_accepts_mpack_at_exact_cap(
        self, tmp_path: pathlib.Path
    ) -> None:
        """apply_mpack does NOT raise when total_items == MAX_PACK_OBJECTS.

        Items are malformed (empty dicts) so they are skipped as bad entries,
        but the cap check must pass.
        """
        repo = _fresh_repo(tmp_path)
        at_cap: MPack = {
            "commits": [{}] * MAX_PACK_OBJECTS,
            "snapshots": [],
            "objects": [],
        }
        # Must not raise ValueError for the cap — skips malformed entries instead.
        result = apply_mpack(repo, at_cap)
        # Each empty-dict commit is missing commit_id and snapshot_id, so every
        # one is skipped by the essential-field guard added to apply_mpack.
        assert result["commits_written"] == 0, (
            "All malformed empty-dict commits must be skipped, not written"
        )

    def test_apply_mpack_total_items_cap_is_cross_type(
        self, tmp_path: pathlib.Path
    ) -> None:
        """MAX_PACK_OBJECTS applies across commits+snapshots+objects, not per-type.

        80 000 objects + 20 000 commits + 1 snapshot = 100 001 → rejected.
        """
        repo = _fresh_repo(tmp_path)
        cross_type: MPack = {
            "commits": [{}] * 20_000,
            "snapshots": [{}] * 1,
            "objects": [{}] * 80_000,
        }
        with pytest.raises(ValueError, match="Pack rejected"):
            apply_mpack(repo, cross_type)

    def test_apply_mpack_oversized_object_is_skipped_not_raised(
        self, tmp_path: pathlib.Path
    ) -> None:
        """An object exceeding MAX_OBJECT_WRITE_BYTES is silently skipped.

        This is documented behaviour: the per-object cap logs a warning and
        increments the loop counter rather than raising an exception, so the
        rest of the mpack is still applied.
        """
        repo = _fresh_repo(tmp_path)
        good_data = b"x" * 64
        good_oid = blob_id(good_data)
        oversized_oid = blob_id(b"y")  # real hash — but we'll fake the size check
        # Construct an mpack with one valid object and one whose content we
        # claim is MAX_OBJECT_WRITE_BYTES + 1 bytes.
        # We use a real 1-byte payload but lie about the size by patching
        # apply_mpack's check via len(raw) — we need an actually-oversized payload.
        # Build a real oversized content string:
        huge_data = b"z" * (MAX_OBJECT_WRITE_BYTES + 1)
        huge_oid = blob_id(huge_data)
        mpack: MPack = {
            "commits": [],
            "snapshots": [],
            "objects": [
                ObjectPayload(object_id=good_oid, content=good_data),
                ObjectPayload(object_id=huge_oid, content=huge_data),
            ],
        }
        result = apply_mpack(repo, mpack)
        # Good object written; oversized object skipped.
        assert result["objects_written"] == 1, (
            f"Expected 1 object written (the good one), got {result['objects_written']}"
        )
        # Oversized object must NOT be in the store.
        from muse.core.object_store import has_object
        assert not has_object(repo, huge_oid), (
            "Oversized object must be rejected and not written to store"
        )

    def test_apply_mpack_deduplicates_repeated_oid(
        self, tmp_path: pathlib.Path
    ) -> None:
        """apply_mpack writes a repeated OID only once (dedup via seen_object_ids)."""
        repo = _fresh_repo(tmp_path)
        data = b"deduplicate-me" * 100
        oid = blob_id(data)
        REPEAT = 50
        mpack: MPack = {
            "commits": [],
            "snapshots": [],
            "objects": [ObjectPayload(object_id=oid, content=data)] * REPEAT,
        }
        result = apply_mpack(repo, mpack)
        # First occurrence written; remaining 49 skipped.
        assert result["objects_written"] == 1, (
            f"Expected 1 write for {REPEAT} identical OIDs, got {result['objects_written']}"
        )
        assert result["objects_skipped"] == REPEAT - 1, (
            f"Expected {REPEAT - 1} skipped, got {result['objects_skipped']}"
        )

    def test_apply_mpack_empty_mpack_is_noop(
        self, tmp_path: pathlib.Path
    ) -> None:
        """apply_mpack on an mpack with no items returns all-zero counts."""
        repo = _fresh_repo(tmp_path)
        empty: MPack = {"commits": [], "snapshots": [], "objects": []}
        result = apply_mpack(repo, empty)
        assert result["commits_written"] == 0
        assert result["snapshots_written"] == 0
        assert result["objects_written"] == 0
        assert result["objects_skipped"] == 0

    def test_have_equals_want_produces_empty_mpack(
        self, tmp_path: pathlib.Path
    ) -> None:
        """build_mpack with have=[tip] where tip is also in want returns empty mpack."""
        repo = _make_repo(tmp_path)
        tip, _ = _populate(repo, n_commits=10, n_unique_objects=20)

        mpack = build_mpack(repo, [tip], have=[tip])

        assert mpack["commits"] == [], (
            "When have contains the want tip, BFS should yield 0 commits"
        )
        assert mpack["objects"] == [], (
            "Empty commit set must produce empty object list"
        )


# ---------------------------------------------------------------------------
# Phase 3.4.5 — memory ceiling
# ---------------------------------------------------------------------------


class TestMPackMemoryCeiling:
    """build_mpack and apply_mpack peak memory must be proportional to blob payload.

    build_mpack holds ALL object bytes in-memory simultaneously — this is a
    known architectural property, not a bug.  The test confirms:
      1. Peak RSS ≈ total blob bytes (not 10× or 100×).
      2. build_mpack does not accumulate unbounded intermediate structures.
    """

    def test_build_mpack_peak_rss_proportional_to_blob_total(
        self, tmp_path: pathlib.Path
    ) -> None:
        """build_mpack peak allocation is ≤ 3× the total blob payload size."""
        repo = _make_repo(tmp_path)
        N = 500
        BLOB_SZ = 4096  # 4 KiB
        tip, _ = _populate(repo, n_commits=50, n_unique_objects=N, blob_size=BLOB_SZ)
        blob_total_mib = N * BLOB_SZ / (1024 * 1024)

        tracemalloc.start()
        tracemalloc.clear_traces()
        mpack = build_mpack(repo, [tip])
        _, peak_bytes = tracemalloc.get_traced_memory()
        tracemalloc.stop()

        peak_mib = peak_bytes / (1024 * 1024)
        ceiling_mib = blob_total_mib * 3  # generous: blobs + msgpack + overhead
        assert len(mpack["objects"]) == N
        assert peak_mib <= ceiling_mib, (
            f"build_mpack peak {peak_mib:.1f} MiB exceeds 3× blob total "
            f"({ceiling_mib:.1f} MiB for {N} × {BLOB_SZ//1024} KiB objects). "
            "build_mpack must not accumulate more than the object bytes themselves."
        )

    def test_apply_mpack_peak_rss_under_64_mib_for_small_objects(
        self, tmp_path: pathlib.Path
    ) -> None:
        """apply_mpack of 500 × 4 KiB objects stays under 64 MiB."""
        src = _make_repo(tmp_path / "src")
        tip, _ = _populate(src, n_commits=50, n_unique_objects=500, blob_size=4096)
        mpack = build_mpack(src, [tip])

        dst = _fresh_repo(tmp_path / "dst")

        tracemalloc.start()
        tracemalloc.clear_traces()
        apply_mpack(dst, mpack)
        _, peak_bytes = tracemalloc.get_traced_memory()
        tracemalloc.stop()

        peak_mib = peak_bytes / (1024 * 1024)
        assert peak_mib <= 64, (
            f"apply_mpack(500 × 4 KiB) peak {peak_mib:.1f} MiB — expected ≤ 64 MiB."
        )


# ---------------------------------------------------------------------------
# Phase 3.4.6 — round-trip integrity
# ---------------------------------------------------------------------------


class TestMPackRoundTrip:
    """End-to-end round-trip: build_mpack → msgpack serialize → apply_mpack → verify."""

    def test_roundtrip_all_objects_restored(
        self, tmp_path: pathlib.Path
    ) -> None:
        """build_mpack → msgpack → apply_mpack round-trip: all objects readable on dst."""
        src = _make_repo(tmp_path / "src")
        N_OBJECTS = 200
        N_COMMITS = 30
        tip, blobs = _populate(src, n_commits=N_COMMITS, n_unique_objects=N_OBJECTS)
        mpack = build_mpack(src, [tip])

        # Serialize (simulates wire transfer).
        raw = msgpack.packb(mpack, use_bin_type=True)

        # Re-hydrate using safe_unpackb (the same path as unpack-objects).
        from muse.core.store import safe_unpackb, MAX_PACK_MSGPACK_BYTES
        restored_dict = safe_unpackb(raw, context="roundtrip", max_bytes=MAX_PACK_MSGPACK_BYTES, allow_binary=True)
        assert isinstance(restored_dict, dict)

        from muse.core.mpack import ObjectPayload as OP, MPack as PB
        raw_objects = restored_dict.get("objects") or []
        objects: list[OP] = []
        for item in raw_objects:
            if isinstance(item, dict):
                oid = item.get("object_id", "")
                content = item.get("content", b"")
                if isinstance(oid, str) and isinstance(content, (bytes, bytearray)):
                    objects.append(OP(object_id=oid, content=bytes(content)))

        hydrated: PB = {
            "commits": [c for c in (restored_dict.get("commits") or []) if isinstance(c, dict)],
            "snapshots": [s for s in (restored_dict.get("snapshots") or []) if isinstance(s, dict)],
            "objects": objects,
        }
        dst = _fresh_repo(tmp_path / "dst")
        result = apply_mpack(dst, hydrated)

        # Every source object must be readable on the destination.
        from muse.core.object_store import read_object, has_object
        missing = [oid for oid in blobs.values() if not has_object(dst, oid)]
        assert not missing, (
            f"{len(missing)}/{N_OBJECTS} objects missing after round-trip: "
            f"{missing[:3]}"
        )
        assert result["commits_written"] == N_COMMITS
        assert result["objects_written"] == N_OBJECTS

    def test_roundtrip_msgpack_size_within_max_pack_bytes(
        self, tmp_path: pathlib.Path
    ) -> None:
        """The serialised mpack for 1 000 × 4 KiB objects must be < MAX_PACK_MSGPACK_BYTES."""
        src = _make_repo(tmp_path)
        N = 1_000
        tip, _ = _populate(src, n_commits=50, n_unique_objects=N, blob_size=4096)
        mpack = build_mpack(src, [tip])
        raw = msgpack.packb(mpack, use_bin_type=True)

        limit = MAX_PACK_MSGPACK_BYTES
        assert len(raw) < limit, (
            f"MPack for {N} × 4 KiB objects is {len(raw):,} bytes — "
            f"exceeds MAX_PACK_MSGPACK_BYTES ({limit:,} bytes / "
            f"{limit // 1024 // 1024} MiB)."
        )