"""TDD — merge commit_id must be consistent with stored author/signer fields. Issue #36 --------- MuseHub server compute_commit_id formula diverged from published muse clients. Root cause ---------- merge_proposal() computes the merge commit_id by calling: merge_commit_id = compute_commit_id( parent_ids, merged_snapshot_id, merge_message, committed_at.isoformat() ) …which defaults author="" and signer_public_key="". It then stores the commit with author="musehub-server". The Muse CLI verifies commit integrity via _verify_commit_id(): recomputed = compute_commit_id( parent_ids, snapshot_id, message, committed_at, author=record.author, # "musehub-server" signer_public_key=record.signer_public_key or "", ) Because the CLI uses the STORED author when re-deriving the ID, the recomputed hash never matches the stored commit_id. Every server-created merge commit fails client-side integrity verification on pull. Fix --- Pass author="musehub-server" explicitly to compute_commit_id so the hash covers the same fields the client will use during verification. Tests ----- P1 Unit — direct parity check: hub and CLI formulas agree for all optional fields including author and signer_public_key. P2 Unit — author mismatch reproducer: commit_id computed with author="" differs from commit_id computed with author="musehub-server". Documents the root cause of the bug. P3 Unit — fix check: commit_id computed with author="musehub-server" matches what the CLI formula produces with the same author. P4 E2E — proposal merge produces a commit whose commit_id is consistent with its stored author and signer_public_key fields. This is the regression guard: if compute_commit_id loses the author again, this test fails. """ from __future__ import annotations import sys from datetime import datetime, timezone from pathlib import Path import pytest from collections.abc import Mapping from httpx import AsyncClient from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession sys.path.insert(0, str(Path.home() / "ecosystem" / "muse")) from muse.core.snapshot import compute_commit_id as cli_compute_commit_id from muse.core.types import fake_id from musehub.core.genesis import compute_branch_id from musehub.db.musehub_repo_models import MusehubBranch, MusehubCommit, MusehubCommitRef from musehub.muse_cli.snapshot import compute_commit_id as hub_compute_commit_id from musehub.muse_cli.snapshot import compute_snapshot_id from tests.factories import create_repo # --------------------------------------------------------------------------- # Shared fixtures # --------------------------------------------------------------------------- _TIMESTAMP = "2026-01-01T00:00:00+00:00" _MESSAGE = "feat: parity check" _PARENT_IDS: list[str] = [] def _snap_id() -> str: return compute_snapshot_id({"README.md": fake_id("readme")}) # --------------------------------------------------------------------------- # P1 — hub and CLI agree for all optional fields # --------------------------------------------------------------------------- def test_p1_hub_and_cli_agree_with_author_and_signer() -> None: """P1: hub and CLI compute_commit_id produce identical output for all fields.""" snap = _snap_id() hub = hub_compute_commit_id( _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author="musehub-server", signer_public_key="", ) cli = cli_compute_commit_id( _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author="musehub-server", signer_public_key="", ) assert hub == cli, f"hub={hub!r} ≠ cli={cli!r}" # --------------------------------------------------------------------------- # P2 — author mismatch reproducer (documents the root-cause) # --------------------------------------------------------------------------- def test_p2_author_mismatch_produces_different_ids() -> None: """P2: commit_id with author='' differs from author='musehub-server'. This reproduces the exact failure mode of issue #36: the server computed commit_id with author="" but stored author="musehub-server". Any verification that re-derives commit_id using the stored author would get a different hash. """ snap = _snap_id() id_empty_author = hub_compute_commit_id( _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author="" ) id_server_author = hub_compute_commit_id( _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author="musehub-server" ) assert id_empty_author != id_server_author, ( "Expected different hashes for author='' vs author='musehub-server'; " "the formula must include the author field to produce distinct IDs." ) # --------------------------------------------------------------------------- # P3 — fix check: explicit author produces correct parity # --------------------------------------------------------------------------- def test_p3_author_included_in_hash() -> None: """P3: the author field is covered by the hash, so stored author must match what was passed to compute_commit_id at creation time. The fix: merge_proposal() passes merger_handle as author to compute_commit_id and also stores it in the commit row. Both sides use the same variable. """ snap = _snap_id() stored_author = "gabriel" id_stored = hub_compute_commit_id( _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author=stored_author ) id_verified = cli_compute_commit_id( _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author=stored_author ) assert id_stored == id_verified, ( f"commit_id mismatch: stored={id_stored!r} verified={id_verified!r}\n" "The server must pass the same author to compute_commit_id that it stores." ) # --------------------------------------------------------------------------- # P4 — E2E: proposal merge commit is self-consistent # --------------------------------------------------------------------------- async def _push_branch( db: AsyncSession, repo_id: str, branch_name: str, ) -> str: commit_id = fake_id(f"{repo_id}-{branch_name}") db.add(MusehubCommit( commit_id=commit_id, branch=branch_name, parent_ids=[], message=f"Initial commit on {branch_name}", author="gabriel", timestamp=datetime.now(tz=timezone.utc), )) db.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id)) db.add(MusehubBranch( branch_id=compute_branch_id(repo_id, branch_name), repo_id=repo_id, name=branch_name, head_commit_id=commit_id, )) await db.commit() return commit_id @pytest.mark.asyncio async def test_p4_merge_commit_id_consistent_with_stored_fields( client: AsyncClient, auth_headers: Mapping[str, str], db_session: AsyncSession, ) -> None: """P4: after a proposal merge, the stored commit_id matches what the CLI formula would compute using the stored author and signer_public_key. RED before fix: server computes commit_id with author="" but stores author="musehub-server" → mismatch. GREEN after fix: server passes author="musehub-server" to compute_commit_id. """ # Create a repo via API so auth wiring is correct. resp = await client.post( "/api/repos", json={"name": "p4-parity-repo", "owner": "testuser", "initialize": False}, headers=auth_headers, ) assert resp.status_code == 201, resp.text repo_id = resp.json()["repoId"] await _push_branch(db_session, repo_id, "main") await _push_branch(db_session, repo_id, "feat/p4") p_resp = await client.post( f"/api/repos/{repo_id}/proposals", json={"title": "P4 parity test", "fromBranch": "feat/p4", "toBranch": "main"}, headers=auth_headers, ) assert p_resp.status_code == 201, p_resp.text proposal_id = p_resp.json()["proposalId"] merge_resp = await client.post( f"/api/repos/{repo_id}/proposals/{proposal_id}/merge", json={"mergeStrategy": "merge_commit"}, headers=auth_headers, ) assert merge_resp.status_code == 200, merge_resp.text merge_commit_id = merge_resp.json()["mergeCommitId"] assert merge_commit_id is not None # Read the stored merge commit row. row = (await db_session.execute( select(MusehubCommit).where(MusehubCommit.commit_id == merge_commit_id) )).scalar_one() # Re-derive commit_id the same way the Muse CLI does in _verify_commit_id. parent_ids: list[str] = list(row.parent_ids or []) recomputed = cli_compute_commit_id( parent_ids=parent_ids, snapshot_id=row.snapshot_id or "", message=row.message or "", committed_at_iso=row.timestamp.isoformat() if row.timestamp else "", author=row.author or "", signer_public_key=row.signer_public_key or "", ) assert recomputed == merge_commit_id, ( f"Merge commit_id is NOT self-consistent.\n" f" stored commit_id : {merge_commit_id}\n" f" cli recomputed : {recomputed}\n" f" stored author : {row.author!r}\n" f" stored signer_pk : {(row.signer_public_key or '')[:20]!r}\n" "Fix: pass author and signer_public_key to compute_commit_id in " "musehub/services/musehub_proposals.py." )