"""Unit tests for muse.core.compression — zlib (Tier 1) and delta (Tier 2).""" from __future__ import annotations import zlib import pytest from muse.core.compression import ( _COPY_THRESHOLD, apply_delta, compress_zlib, compute_delta, decompress_zlib, ) # --------------------------------------------------------------------------- # Tier 1 — zlib round-trip # --------------------------------------------------------------------------- class TestZlib: def test_round_trip_source_code(self) -> None: """Compressing and decompressing Python source returns identical bytes.""" data = b"def foo(x):\n return x * 2\n" * 100 assert decompress_zlib(compress_zlib(data)) == data def test_round_trip_binary(self) -> None: """Binary data round-trips correctly.""" data = bytes(range(256)) * 128 assert decompress_zlib(compress_zlib(data)) == data def test_round_trip_empty(self) -> None: """Empty bytes compress and decompress cleanly.""" assert decompress_zlib(compress_zlib(b"")) == b"" def test_compress_reduces_size_for_text(self) -> None: """Repetitive text compresses to fewer bytes.""" data = b"hello world " * 1000 assert len(compress_zlib(data)) < len(data) def test_decompress_corrupt_raises(self) -> None: """Corrupt input to decompress_zlib raises zlib.error.""" with pytest.raises(zlib.error): decompress_zlib(b"not zlib data") def test_compress_output_is_valid_zlib(self) -> None: """compress_zlib output is valid zlib (decompressible by the stdlib directly).""" data = b"test payload" compressed = compress_zlib(data) assert zlib.decompress(compressed) == data # --------------------------------------------------------------------------- # Tier 2 — delta round-trip # --------------------------------------------------------------------------- class TestDelta: def test_round_trip_identical_content(self) -> None: """Delta of a file against itself reconstructs the original.""" data = b"unchanged content" * 50 delta = compute_delta(data, data) assert apply_delta(data, delta) == data def test_round_trip_small_edit(self) -> None: """A one-byte change in a large file round-trips correctly.""" base = b"the quick brown fox jumps over the lazy dog" * 100 target = base[:500] + b"X" + base[501:] delta = compute_delta(base, target) assert apply_delta(base, delta) == target def test_round_trip_append(self) -> None: """Appending bytes to a file round-trips correctly.""" base = b"existing content\n" * 20 target = base + b"new line appended\n" delta = compute_delta(base, target) assert apply_delta(base, delta) == target def test_round_trip_prepend(self) -> None: """Prepending bytes to a file round-trips correctly.""" base = b"existing content\n" * 20 target = b"new header\n" + base delta = compute_delta(base, target) assert apply_delta(base, delta) == target def test_apply_delta_pure_data_instruction(self) -> None: """apply_delta correctly reconstructs target from a pure DATA instruction stream. compute_delta raises ValueError when DATA-only deltas aren't smaller than plain zlib (completely different content has no COPY opportunities and DATA framing adds overhead). We verify the decoder directly with a hand-built stream. """ import struct target = b"completely different from base " * 20 stream = b"\x01" + struct.pack(">I", len(target)) + target delta = zlib.compress(stream, level=1) base = b"unrelated source material " * 20 assert apply_delta(base, delta) == target def test_round_trip_empty_base(self) -> None: """Empty base produces a pure DATA delta that reconstructs target.""" base = b"" target = b"new file content" delta = compute_delta(base, target) assert apply_delta(base, delta) == target def test_apply_delta_empty_target(self) -> None: """apply_delta with an empty instruction stream produces empty bytes. An empty target has no delta instructions (nothing to emit), so the compressed stream is just zlib(b"") — the same size as compress_zlib(b""). compute_delta raises ValueError for this case (not profitable). We verify the decoder directly: an empty instruction stream reconstructs b"". """ delta = zlib.compress(b"", level=1) base = b"some existing content" assert apply_delta(base, delta) == b"" def test_round_trip_source_file(self) -> None: """Simulated source-file edit: add a function at the end.""" base = ( b"def foo():\n pass\n\n" b"def bar():\n return 1\n\n" ) * 30 target = base + b"def baz():\n return 2\n" delta = compute_delta(base, target) assert apply_delta(base, delta) == target def test_delta_smaller_than_zlib_for_small_edit(self) -> None: """Delta should be smaller than plain zlib for a small edit in a large file.""" base = b"stable content\n" * 500 target = base[:1000] + b"changed line\n" + base[1013:] delta = compute_delta(base, target) plain = compress_zlib(target) assert len(delta) < len(plain), ( f"Expected delta ({len(delta)}) < zlib ({len(plain)})" ) def test_unprofitable_delta_raises_value_error(self) -> None: """compute_delta raises ValueError when delta >= zlib(target).""" # Completely random-looking data has no copy opportunities and the # overhead of the delta format makes it larger than plain zlib. import os base = os.urandom(64) target = os.urandom(64) with pytest.raises(ValueError, match="not profitable"): compute_delta(base, target) def test_apply_delta_corrupt_raises(self) -> None: """apply_delta raises zlib.error on corrupt compressed input.""" with pytest.raises(zlib.error): apply_delta(b"base", b"not zlib") def test_apply_delta_unknown_instruction_raises(self) -> None: """apply_delta raises ValueError on an unknown instruction byte.""" import struct # Craft a stream with an invalid instruction byte (0x99). bad_stream = zlib.compress(b"\x99" + struct.pack(">I", 0)) with pytest.raises(ValueError, match="unknown delta instruction type"): apply_delta(b"base", bad_stream) def test_copy_threshold_respected(self) -> None: """Only runs of >= _COPY_THRESHOLD bytes produce COPY instructions. Small data (< _COPY_THRESHOLD bytes) has no 32-byte hash windows, so compute_delta emits DATA and the delta is not profitable (overhead > savings). We verify the decoder handles a hand-built mixed COPY+DATA stream where a short tail that was below threshold is encoded as DATA. """ import struct # Large stable prefix — would produce COPY in a real delta. stable = b"the quick brown fox jumps over the lazy dog\n" * 40 # 1760 bytes # Short unique tail shorter than _COPY_THRESHOLD. short_tail = b"Z" * (_COPY_THRESHOLD - 1) # 31 bytes base = stable + short_tail target = stable + short_tail # same — round-trip check # Build a hand-crafted delta: # COPY the entire base (COPY instruction is valid since base == target) # …but simulate the DATA path by encoding just the tail as DATA. # Build as: COPY(0, len(stable)) then DATA(short_tail) copy_instr = b"\x00" + struct.pack(">II", 0, len(stable)) data_instr = b"\x01" + struct.pack(">I", len(short_tail)) + short_tail stream = copy_instr + data_instr delta = zlib.compress(stream, level=1) result = apply_delta(base, delta) assert result == target def test_large_file_delta_round_trip(self) -> None: """1 MB file with a small edit round-trips correctly.""" base = (b"the quick brown fox\n") * 50_000 # ~1 MB change_pos = 250_000 target = base[:change_pos] + b"changed" + base[change_pos + 7 :] delta = compute_delta(base, target) assert apply_delta(base, delta) == target def test_midi_like_binary_round_trip(self) -> None: """MIDI-like binary payload round-trips correctly.""" # Simulate a MIDI file: fixed header + variable event stream. header = bytes([0x4D, 0x54, 0x68, 0x64, 0x00, 0x00, 0x00, 0x06]) base = header + bytes(range(256)) * 40 # Simulate a one-byte velocity change somewhere in the middle. target = bytearray(base) target[100] = (target[100] + 1) % 256 target_bytes = bytes(target) delta = compute_delta(base, target_bytes) assert apply_delta(base, delta) == target_bytes