gabriel / musehub public

musehub_context.py file-level

at sha256:3 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 💥 blast risk
sha256:0 fix: fall back to any indexed mpack in read_object_bytes when push mpac… · gabriel · Jun 17, 2026
1 """MuseHub agent context aggregation service.
2
3 This is the canonical read-path for AI composition agents. ``build_agent_context``
4 aggregates musical state, commit history, analysis highlights, open proposals, and open
5 issues for a given repo ref into a single ``AgentContextResponse``.
6
7 Design notes
8 ------------
9 - **Read-only**: this service never writes to the DB.
10 - **Deterministic**: for the same repo_id + resolved ref, the output is always
11 identical, making it safe to cache.
12 - **Depth-aware**: ``brief`` returns minimal data for tight context windows;
13 ``standard`` returns a full briefing; ``verbose`` adds all bodies/history.
14 - **Analysis stubs**: per-dimension analysis (key, groove, harmony) is currently
15 None — these require Storpheus MIDI integration. The schema is fully defined so
16 agents can handle None gracefully today and receive populated values once that
17 integration lands.
18 """
19
20 import logging
21 from datetime import datetime, timezone
22
23 from sqlalchemy import desc, select
24 from sqlalchemy.ext.asyncio import AsyncSession
25
26 from musehub.db.musehub_repo_models import MusehubBranch, MusehubCommit, MusehubCommitRef, MusehubObject, MusehubRepo
27 from musehub.db.musehub_social_models import MusehubIssue, MusehubProposal
28 from musehub.types.json_types import IntDict
29 from musehub.models.musehub_context import (
30 ActiveProposalContext,
31 AgentContextResponse,
32 AnalysisSummaryContext,
33 ContextDepth,
34 HistoryEntryContext,
35 MusicalStateContext,
36 OpenIssueContext,
37 )
38
39 logger = logging.getLogger(__name__)
40
41 # ---------------------------------------------------------------------------
42 # Depth configuration
43 # ---------------------------------------------------------------------------
44
45 _HISTORY_LIMIT: IntDict = {
46 ContextDepth.brief: 3,
47 ContextDepth.standard: 10,
48 ContextDepth.verbose: 50,
49 }
50
51 _INCLUDE_PROPOSAL_BODY = {
52 ContextDepth.brief: False,
53 ContextDepth.standard: True,
54 ContextDepth.verbose: True,
55 }
56
57 _INCLUDE_ISSUE_BODY = {
58 ContextDepth.brief: False,
59 ContextDepth.standard: False,
60 ContextDepth.verbose: True,
61 }
62
63
64 # ---------------------------------------------------------------------------
65 # Internal helpers
66 # ---------------------------------------------------------------------------
67
68
69 def _utc_iso(dt: datetime) -> str:
70 """Return a UTC ISO-8601 string from a datetime (naive or aware)."""
71 if dt.tzinfo is None:
72 dt = dt.replace(tzinfo=timezone.utc)
73 return dt.isoformat()
74
75
76 def _extract_tracks_from_snapshot(snapshot: MusehubObject | None) -> list[str]:
77 """Not applicable in MuseHub context — snapshots are binary objects.
78
79 Track names in the MuseHub context come from commit message heuristics and
80 branch-level metadata. This stub returns an empty list until commit-level
81 manifest tracking is added to MusehubCommit.
82 """
83 return []
84
85
86 async def _resolve_ref_to_commit(
87 session: AsyncSession,
88 repo_id: str,
89 ref: str,
90 ) -> MusehubCommit | None:
91 """Resolve a ref (branch name or commit ID) to a MusehubCommit row.
92
93 Resolution order:
94 1. If ``ref`` matches a branch name → return its head commit.
95 2. If ``ref`` matches a commit ID (exact) → return that commit.
96 3. Return None if neither matches.
97 """
98 # Try branch lookup first (most common case)
99 branch_stmt = select(MusehubBranch).where(
100 MusehubBranch.repo_id == repo_id,
101 MusehubBranch.name == ref,
102 )
103 branch = (await session.execute(branch_stmt)).scalars().first()
104 if branch is not None and branch.head_commit_id is not None:
105 commit = await session.get(MusehubCommit, branch.head_commit_id)
106 return commit
107
108 # Fall back to direct commit ID lookup (verify belongs to this repo via ref table)
109 ref_row = await session.get(MusehubCommitRef, (repo_id, ref))
110 if ref_row is None:
111 return None
112 return await session.get(MusehubCommit, ref)
113
114
115 async def _get_latest_commit(
116 session: AsyncSession,
117 repo_id: str,
118 ) -> MusehubCommit | None:
119 """Return the most-recent commit for any branch in the repo, or None."""
120 stmt = (
121 select(MusehubCommit)
122 .join(MusehubCommitRef, MusehubCommitRef.commit_id == MusehubCommit.commit_id)
123 .where(MusehubCommitRef.repo_id == repo_id)
124 .order_by(desc(MusehubCommit.timestamp))
125 .limit(1)
126 )
127 return (await session.execute(stmt)).scalars().first()
128
129
130 async def _build_history(
131 session: AsyncSession,
132 repo_id: str,
133 head_commit: MusehubCommit,
134 limit: int,
135 ) -> list[HistoryEntryContext]:
136 """Return up to *limit* recent commits for the repo (newest-first).
137
138 The head commit itself is excluded — it is surfaced as the current ref.
139 We query by repo and timestamp rather than walking parent links, because
140 MusehubCommit parent_ids are a JSONB list and graph traversal would
141 require N+1 queries. Timestamp ordering is an approximation; in practice
142 it matches the commit graph order for sequential workflows.
143 """
144 stmt = (
145 select(MusehubCommit)
146 .join(MusehubCommitRef, MusehubCommitRef.commit_id == MusehubCommit.commit_id)
147 .where(
148 MusehubCommitRef.repo_id == repo_id,
149 MusehubCommit.commit_id != head_commit.commit_id,
150 )
151 .order_by(desc(MusehubCommit.timestamp))
152 .limit(limit)
153 )
154 rows = (await session.execute(stmt)).scalars().all()
155 return [
156 HistoryEntryContext(
157 commit_id=row.commit_id,
158 message=row.message,
159 author=row.author,
160 timestamp=_utc_iso(row.timestamp),
161 active_tracks=[],
162 )
163 for row in rows
164 ]
165
166
167 async def _get_open_proposals(
168 session: AsyncSession,
169 repo_id: str,
170 include_body: bool,
171 ) -> list[ActiveProposalContext]:
172 """Return all open proposals for the repo."""
173 stmt = (
174 select(MusehubProposal)
175 .where(
176 MusehubProposal.repo_id == repo_id,
177 MusehubProposal.state == "open",
178 )
179 .order_by(MusehubProposal.created_at)
180 )
181 rows = (await session.execute(stmt)).scalars().all()
182 return [
183 ActiveProposalContext(
184 proposal_id=row.proposal_id,
185 title=row.title,
186 from_branch=row.from_branch,
187 to_branch=row.to_branch,
188 state=row.state,
189 body=row.body if include_body else "",
190 )
191 for row in rows
192 ]
193
194
195 async def _get_open_issues(
196 session: AsyncSession,
197 repo_id: str,
198 include_body: bool,
199 ) -> list[OpenIssueContext]:
200 """Return all open issues for the repo, ordered by number."""
201 stmt = (
202 select(MusehubIssue)
203 .where(
204 MusehubIssue.repo_id == repo_id,
205 MusehubIssue.state == "open",
206 )
207 .order_by(MusehubIssue.number)
208 )
209 rows = (await session.execute(stmt)).scalars().all()
210 return [
211 OpenIssueContext(
212 issue_id=row.issue_id,
213 number=row.number,
214 title=row.title,
215 labels=list(row.labels or []),
216 body=row.body if include_body else "",
217 )
218 for row in rows
219 ]
220
221
222 def _generate_suggestions(
223 musical_state: MusicalStateContext,
224 open_issues: list[OpenIssueContext],
225 active_proposals: list[ActiveProposalContext],
226 depth: ContextDepth,
227 ) -> list[str]:
228 """Generate heuristic composition suggestions based on current context.
229
230 This is a deterministic, rule-based function until LLM-powered suggestions
231 are integrated. Suggestions are derived from:
232 - Missing musical dimensions (no tempo, no key, etc.)
233 - Open issues that describe compositional problems
234 - Open proposals that are waiting for review
235
236 At ``brief`` depth, only 1–2 suggestions are returned.
237 """
238 suggestions: list[str] = []
239
240 if not musical_state.active_tracks:
241 suggestions.append(
242 "No files found in the latest commit snapshot. Push a commit with source files."
243 )
244 if open_issues:
245 issue = open_issues[0]
246 suggestions.append(
247 f"Address open issue #{issue.number}: '{issue.title}'. "
248 "This may describe a compositional problem to fix before the next section."
249 )
250 if active_proposals:
251 active_proposal = active_proposals[0]
252 suggestions.append(
253 f"Review proposal '{active_proposal.title}' ({active_proposal.from_branch} → {active_proposal.to_branch}). "
254 "Merge or close it before branching for the next section."
255 )
256
257 if depth == ContextDepth.brief:
258 return suggestions[:2]
259 if depth == ContextDepth.standard:
260 return suggestions[:4]
261 return suggestions
262
263
264 # ---------------------------------------------------------------------------
265 # Public API
266 # ---------------------------------------------------------------------------
267
268
269 async def build_agent_context(
270 session: AsyncSession,
271 *,
272 repo_id: str,
273 ref: str = "HEAD",
274 depth: ContextDepth = ContextDepth.standard,
275 ) -> AgentContextResponse | None:
276 """Build a complete agent context document for a MuseHub repo at a given ref.
277
278 Returns None if the repo does not exist or has no commits.
279
280 Args:
281 session: Open async DB session. Read-only — no writes performed.
282 repo_id: The MuseHub repo ID.
283 ref: Branch name or commit ID. Defaults to HEAD (latest commit).
284 depth: Controls how much data is returned:
285 - ``brief`` — minimal context (~2 K tokens)
286 - ``standard`` — full briefing (~8 K tokens)
287 - ``verbose`` — uncapped (all history, full bodies)
288
289 Returns:
290 ``AgentContextResponse`` if repo + ref are valid, ``None`` if repo
291 is not found or has no commits. The caller should surface None as HTTP 404.
292 """
293 repo = await session.get(MusehubRepo, repo_id)
294 if repo is None:
295 return None
296
297 # Resolve ref → commit
298 if ref == "HEAD":
299 head_commit = await _get_latest_commit(session, repo_id)
300 else:
301 head_commit = await _resolve_ref_to_commit(session, repo_id, ref)
302
303 if head_commit is None:
304 logger.warning("⚠️ No commit found for repo %s ref %s", repo_id, ref)
305 return None
306
307 resolved_ref = ref if ref != "HEAD" else head_commit.branch
308
309 history_limit = _HISTORY_LIMIT[depth]
310 include_proposal_body = _INCLUDE_PROPOSAL_BODY[depth]
311 include_issue_body = _INCLUDE_ISSUE_BODY[depth]
312
313 # Gather all sections concurrently would require asyncio.gather; we keep
314 # sequential awaits for readability — this is a read-heavy, low-latency path.
315 history = await _build_history(session, repo_id, head_commit, history_limit)
316 active_proposals = await _get_open_proposals(session, repo_id, include_proposal_body)
317 open_issues = await _get_open_issues(session, repo_id, include_issue_body)
318
319 musical_state = MusicalStateContext(active_tracks=[])
320
321 analysis = AnalysisSummaryContext()
322
323 suggestions = _generate_suggestions(musical_state, open_issues, active_proposals, depth)
324
325 logger.info(
326 "✅ Agent context built for repo %s ref %s (depth=%s, history=%d, proposals=%d, issues=%d)",
327 repo_id,
328 resolved_ref,
329 depth,
330 len(history),
331 len(active_proposals),
332 len(open_issues),
333 )
334
335 return AgentContextResponse(
336 repo_id=repo_id,
337 ref=resolved_ref,
338 depth=depth,
339 musical_state=musical_state,
340 history=history,
341 analysis=analysis,
342 active_proposals=active_proposals,
343 open_issues=open_issues,
344 suggestions=suggestions,
345 )