gabriel / musehub public
test_merge_commit_id_parity.py python
257 lines 9.2 KB
Raw
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor ⚠ breaking 20 days ago
1 """TDD — merge commit_id must be consistent with stored author/signer fields.
2
3 Issue #36
4 ---------
5 MuseHub server compute_commit_id formula diverged from published muse clients.
6
7 Root cause
8 ----------
9 merge_proposal() computes the merge commit_id by calling:
10
11 merge_commit_id = compute_commit_id(
12 parent_ids, merged_snapshot_id, merge_message, committed_at.isoformat()
13 )
14
15 …which defaults author="" and signer_public_key="".
16
17 It then stores the commit with author="musehub-server".
18
19 The Muse CLI verifies commit integrity via _verify_commit_id():
20
21 recomputed = compute_commit_id(
22 parent_ids, snapshot_id, message, committed_at,
23 author=record.author, # "musehub-server"
24 signer_public_key=record.signer_public_key or "",
25 )
26
27 Because the CLI uses the STORED author when re-deriving the ID, the
28 recomputed hash never matches the stored commit_id. Every server-created
29 merge commit fails client-side integrity verification on pull.
30
31 Fix
32 ---
33 Pass author="musehub-server" explicitly to compute_commit_id so the hash
34 covers the same fields the client will use during verification.
35
36 Tests
37 -----
38 P1 Unit — direct parity check: hub and CLI formulas agree for all optional
39 fields including author and signer_public_key.
40
41 P2 Unit — author mismatch reproducer: commit_id computed with author=""
42 differs from commit_id computed with author="musehub-server".
43 Documents the root cause of the bug.
44
45 P3 Unit — fix check: commit_id computed with author="musehub-server" matches
46 what the CLI formula produces with the same author.
47
48 P4 E2E — proposal merge produces a commit whose commit_id is consistent with
49 its stored author and signer_public_key fields. This is the regression
50 guard: if compute_commit_id loses the author again, this test fails.
51 """
52 from __future__ import annotations
53
54 import sys
55 from datetime import datetime, timezone
56 from pathlib import Path
57
58 import pytest
59 from collections.abc import Mapping
60 from httpx import AsyncClient
61 from sqlalchemy import select
62 from sqlalchemy.ext.asyncio import AsyncSession
63
64 sys.path.insert(0, str(Path.home() / "ecosystem" / "muse"))
65 from muse.core.snapshot import compute_commit_id as cli_compute_commit_id
66 from muse.core.types import fake_id
67
68 from musehub.core.genesis import compute_branch_id
69 from musehub.db.musehub_repo_models import MusehubBranch, MusehubCommit, MusehubCommitRef
70 from musehub.muse_cli.snapshot import compute_commit_id as hub_compute_commit_id
71 from musehub.muse_cli.snapshot import compute_snapshot_id
72 from tests.factories import create_repo
73
74 # ---------------------------------------------------------------------------
75 # Shared fixtures
76 # ---------------------------------------------------------------------------
77
78 _TIMESTAMP = "2026-01-01T00:00:00+00:00"
79 _MESSAGE = "feat: parity check"
80 _PARENT_IDS: list[str] = []
81
82
83 def _snap_id() -> str:
84 return compute_snapshot_id({"README.md": fake_id("readme")})
85
86
87 # ---------------------------------------------------------------------------
88 # P1 — hub and CLI agree for all optional fields
89 # ---------------------------------------------------------------------------
90
91
92 def test_p1_hub_and_cli_agree_with_author_and_signer() -> None:
93 """P1: hub and CLI compute_commit_id produce identical output for all fields."""
94 snap = _snap_id()
95 hub = hub_compute_commit_id(
96 _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP,
97 author="musehub-server",
98 signer_public_key="",
99 )
100 cli = cli_compute_commit_id(
101 _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP,
102 author="musehub-server",
103 signer_public_key="",
104 )
105 assert hub == cli, f"hub={hub!r} ≠ cli={cli!r}"
106
107
108 # ---------------------------------------------------------------------------
109 # P2 — author mismatch reproducer (documents the root-cause)
110 # ---------------------------------------------------------------------------
111
112
113 def test_p2_author_mismatch_produces_different_ids() -> None:
114 """P2: commit_id with author='' differs from author='musehub-server'.
115
116 This reproduces the exact failure mode of issue #36: the server computed
117 commit_id with author="" but stored author="musehub-server". Any
118 verification that re-derives commit_id using the stored author would get
119 a different hash.
120 """
121 snap = _snap_id()
122 id_empty_author = hub_compute_commit_id(
123 _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author=""
124 )
125 id_server_author = hub_compute_commit_id(
126 _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author="musehub-server"
127 )
128 assert id_empty_author != id_server_author, (
129 "Expected different hashes for author='' vs author='musehub-server'; "
130 "the formula must include the author field to produce distinct IDs."
131 )
132
133
134 # ---------------------------------------------------------------------------
135 # P3 — fix check: explicit author produces correct parity
136 # ---------------------------------------------------------------------------
137
138
139 def test_p3_author_included_in_hash() -> None:
140 """P3: the author field is covered by the hash, so stored author must match
141 what was passed to compute_commit_id at creation time.
142
143 The fix: merge_proposal() passes merger_handle as author to compute_commit_id
144 and also stores it in the commit row. Both sides use the same variable.
145 """
146 snap = _snap_id()
147 stored_author = "gabriel"
148
149 id_stored = hub_compute_commit_id(
150 _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author=stored_author
151 )
152 id_verified = cli_compute_commit_id(
153 _PARENT_IDS, snap, _MESSAGE, _TIMESTAMP, author=stored_author
154 )
155 assert id_stored == id_verified, (
156 f"commit_id mismatch: stored={id_stored!r} verified={id_verified!r}\n"
157 "The server must pass the same author to compute_commit_id that it stores."
158 )
159
160
161 # ---------------------------------------------------------------------------
162 # P4 — E2E: proposal merge commit is self-consistent
163 # ---------------------------------------------------------------------------
164
165
166 async def _push_branch(
167 db: AsyncSession,
168 repo_id: str,
169 branch_name: str,
170 ) -> str:
171 commit_id = fake_id(f"{repo_id}-{branch_name}")
172 db.add(MusehubCommit(
173 commit_id=commit_id,
174 branch=branch_name,
175 parent_ids=[],
176 message=f"Initial commit on {branch_name}",
177 author="gabriel",
178 timestamp=datetime.now(tz=timezone.utc),
179 ))
180 db.add(MusehubCommitRef(repo_id=repo_id, commit_id=commit_id))
181 db.add(MusehubBranch(
182 branch_id=compute_branch_id(repo_id, branch_name),
183 repo_id=repo_id,
184 name=branch_name,
185 head_commit_id=commit_id,
186 ))
187 await db.commit()
188 return commit_id
189
190
191 @pytest.mark.asyncio
192 async def test_p4_merge_commit_id_consistent_with_stored_fields(
193 client: AsyncClient,
194 auth_headers: Mapping[str, str],
195 db_session: AsyncSession,
196 ) -> None:
197 """P4: after a proposal merge, the stored commit_id matches what the CLI
198 formula would compute using the stored author and signer_public_key.
199
200 RED before fix: server computes commit_id with author="" but stores
201 author="musehub-server" → mismatch.
202 GREEN after fix: server passes author="musehub-server" to compute_commit_id.
203 """
204 # Create a repo via API so auth wiring is correct.
205 resp = await client.post(
206 "/api/repos",
207 json={"name": "p4-parity-repo", "owner": "testuser", "initialize": False},
208 headers=auth_headers,
209 )
210 assert resp.status_code == 201, resp.text
211 repo_id = resp.json()["repoId"]
212
213 await _push_branch(db_session, repo_id, "main")
214 await _push_branch(db_session, repo_id, "feat/p4")
215
216 p_resp = await client.post(
217 f"/api/repos/{repo_id}/proposals",
218 json={"title": "P4 parity test", "fromBranch": "feat/p4", "toBranch": "main"},
219 headers=auth_headers,
220 )
221 assert p_resp.status_code == 201, p_resp.text
222 proposal_id = p_resp.json()["proposalId"]
223
224 merge_resp = await client.post(
225 f"/api/repos/{repo_id}/proposals/{proposal_id}/merge",
226 json={"mergeStrategy": "merge_commit"},
227 headers=auth_headers,
228 )
229 assert merge_resp.status_code == 200, merge_resp.text
230 merge_commit_id = merge_resp.json()["mergeCommitId"]
231 assert merge_commit_id is not None
232
233 # Read the stored merge commit row.
234 row = (await db_session.execute(
235 select(MusehubCommit).where(MusehubCommit.commit_id == merge_commit_id)
236 )).scalar_one()
237
238 # Re-derive commit_id the same way the Muse CLI does in _verify_commit_id.
239 parent_ids: list[str] = list(row.parent_ids or [])
240 recomputed = cli_compute_commit_id(
241 parent_ids=parent_ids,
242 snapshot_id=row.snapshot_id or "",
243 message=row.message or "",
244 committed_at_iso=row.timestamp.isoformat() if row.timestamp else "",
245 author=row.author or "",
246 signer_public_key=row.signer_public_key or "",
247 )
248
249 assert recomputed == merge_commit_id, (
250 f"Merge commit_id is NOT self-consistent.\n"
251 f" stored commit_id : {merge_commit_id}\n"
252 f" cli recomputed : {recomputed}\n"
253 f" stored author : {row.author!r}\n"
254 f" stored signer_pk : {(row.signer_public_key or '')[:20]!r}\n"
255 "Fix: pass author and signer_public_key to compute_commit_id in "
256 "musehub/services/musehub_proposals.py."
257 )
File History 1 commit
sha256:ef10830ce231e0a20efcb0e2586cb879471247e916616e6fdd0d51df459e2595 fix: typing audit — 0 violations, 0 untyped defs across all… Sonnet 4.6 minor 20 days ago