musehub_context.py
file-level
1
files
1
commits
0
hotspots
0
🧊 dead
0
💥 blast risk
| 1 | """MuseHub agent context aggregation service. |
| 2 | |
| 3 | This is the canonical read-path for AI composition agents. ``build_agent_context`` |
| 4 | aggregates musical state, commit history, analysis highlights, open proposals, and open |
| 5 | issues for a given repo ref into a single ``AgentContextResponse``. |
| 6 | |
| 7 | Design notes |
| 8 | ------------ |
| 9 | - **Read-only**: this service never writes to the DB. |
| 10 | - **Deterministic**: for the same repo_id + resolved ref, the output is always |
| 11 | identical, making it safe to cache. |
| 12 | - **Depth-aware**: ``brief`` returns minimal data for tight context windows; |
| 13 | ``standard`` returns a full briefing; ``verbose`` adds all bodies/history. |
| 14 | - **Analysis stubs**: per-dimension analysis (key, groove, harmony) is currently |
| 15 | None — these require Storpheus MIDI integration. The schema is fully defined so |
| 16 | agents can handle None gracefully today and receive populated values once that |
| 17 | integration lands. |
| 18 | """ |
| 19 | |
| 20 | import logging |
| 21 | from datetime import datetime, timezone |
| 22 | |
| 23 | from sqlalchemy import desc, select |
| 24 | from sqlalchemy.ext.asyncio import AsyncSession |
| 25 | |
| 26 | from musehub.db.musehub_repo_models import MusehubBranch, MusehubCommit, MusehubCommitRef, MusehubObject, MusehubRepo |
| 27 | from musehub.db.musehub_social_models import MusehubIssue, MusehubProposal |
| 28 | from musehub.types.json_types import IntDict |
| 29 | from musehub.models.musehub_context import ( |
| 30 | ActiveProposalContext, |
| 31 | AgentContextResponse, |
| 32 | AnalysisSummaryContext, |
| 33 | ContextDepth, |
| 34 | HistoryEntryContext, |
| 35 | MusicalStateContext, |
| 36 | OpenIssueContext, |
| 37 | ) |
| 38 | |
| 39 | logger = logging.getLogger(__name__) |
| 40 | |
| 41 | # --------------------------------------------------------------------------- |
| 42 | # Depth configuration |
| 43 | # --------------------------------------------------------------------------- |
| 44 | |
| 45 | _HISTORY_LIMIT: IntDict = { |
| 46 | ContextDepth.brief: 3, |
| 47 | ContextDepth.standard: 10, |
| 48 | ContextDepth.verbose: 50, |
| 49 | } |
| 50 | |
| 51 | _INCLUDE_PROPOSAL_BODY = { |
| 52 | ContextDepth.brief: False, |
| 53 | ContextDepth.standard: True, |
| 54 | ContextDepth.verbose: True, |
| 55 | } |
| 56 | |
| 57 | _INCLUDE_ISSUE_BODY = { |
| 58 | ContextDepth.brief: False, |
| 59 | ContextDepth.standard: False, |
| 60 | ContextDepth.verbose: True, |
| 61 | } |
| 62 | |
| 63 | |
| 64 | # --------------------------------------------------------------------------- |
| 65 | # Internal helpers |
| 66 | # --------------------------------------------------------------------------- |
| 67 | |
| 68 | |
| 69 | def _utc_iso(dt: datetime) -> str: |
| 70 | """Return a UTC ISO-8601 string from a datetime (naive or aware).""" |
| 71 | if dt.tzinfo is None: |
| 72 | dt = dt.replace(tzinfo=timezone.utc) |
| 73 | return dt.isoformat() |
| 74 | |
| 75 | |
| 76 | def _extract_tracks_from_snapshot(snapshot: MusehubObject | None) -> list[str]: |
| 77 | """Not applicable in MuseHub context — snapshots are binary objects. |
| 78 | |
| 79 | Track names in the MuseHub context come from commit message heuristics and |
| 80 | branch-level metadata. This stub returns an empty list until commit-level |
| 81 | manifest tracking is added to MusehubCommit. |
| 82 | """ |
| 83 | return [] |
| 84 | |
| 85 | |
| 86 | async def _resolve_ref_to_commit( |
| 87 | session: AsyncSession, |
| 88 | repo_id: str, |
| 89 | ref: str, |
| 90 | ) -> MusehubCommit | None: |
| 91 | """Resolve a ref (branch name or commit ID) to a MusehubCommit row. |
| 92 | |
| 93 | Resolution order: |
| 94 | 1. If ``ref`` matches a branch name → return its head commit. |
| 95 | 2. If ``ref`` matches a commit ID (exact) → return that commit. |
| 96 | 3. Return None if neither matches. |
| 97 | """ |
| 98 | # Try branch lookup first (most common case) |
| 99 | branch_stmt = select(MusehubBranch).where( |
| 100 | MusehubBranch.repo_id == repo_id, |
| 101 | MusehubBranch.name == ref, |
| 102 | ) |
| 103 | branch = (await session.execute(branch_stmt)).scalars().first() |
| 104 | if branch is not None and branch.head_commit_id is not None: |
| 105 | commit = await session.get(MusehubCommit, branch.head_commit_id) |
| 106 | return commit |
| 107 | |
| 108 | # Fall back to direct commit ID lookup (verify belongs to this repo via ref table) |
| 109 | ref_row = await session.get(MusehubCommitRef, (repo_id, ref)) |
| 110 | if ref_row is None: |
| 111 | return None |
| 112 | return await session.get(MusehubCommit, ref) |
| 113 | |
| 114 | |
| 115 | async def _get_latest_commit( |
| 116 | session: AsyncSession, |
| 117 | repo_id: str, |
| 118 | ) -> MusehubCommit | None: |
| 119 | """Return the most-recent commit for any branch in the repo, or None.""" |
| 120 | stmt = ( |
| 121 | select(MusehubCommit) |
| 122 | .join(MusehubCommitRef, MusehubCommitRef.commit_id == MusehubCommit.commit_id) |
| 123 | .where(MusehubCommitRef.repo_id == repo_id) |
| 124 | .order_by(desc(MusehubCommit.timestamp)) |
| 125 | .limit(1) |
| 126 | ) |
| 127 | return (await session.execute(stmt)).scalars().first() |
| 128 | |
| 129 | |
| 130 | async def _build_history( |
| 131 | session: AsyncSession, |
| 132 | repo_id: str, |
| 133 | head_commit: MusehubCommit, |
| 134 | limit: int, |
| 135 | ) -> list[HistoryEntryContext]: |
| 136 | """Return up to *limit* recent commits for the repo (newest-first). |
| 137 | |
| 138 | The head commit itself is excluded — it is surfaced as the current ref. |
| 139 | We query by repo and timestamp rather than walking parent links, because |
| 140 | MusehubCommit parent_ids are a JSONB list and graph traversal would |
| 141 | require N+1 queries. Timestamp ordering is an approximation; in practice |
| 142 | it matches the commit graph order for sequential workflows. |
| 143 | """ |
| 144 | stmt = ( |
| 145 | select(MusehubCommit) |
| 146 | .join(MusehubCommitRef, MusehubCommitRef.commit_id == MusehubCommit.commit_id) |
| 147 | .where( |
| 148 | MusehubCommitRef.repo_id == repo_id, |
| 149 | MusehubCommit.commit_id != head_commit.commit_id, |
| 150 | ) |
| 151 | .order_by(desc(MusehubCommit.timestamp)) |
| 152 | .limit(limit) |
| 153 | ) |
| 154 | rows = (await session.execute(stmt)).scalars().all() |
| 155 | return [ |
| 156 | HistoryEntryContext( |
| 157 | commit_id=row.commit_id, |
| 158 | message=row.message, |
| 159 | author=row.author, |
| 160 | timestamp=_utc_iso(row.timestamp), |
| 161 | active_tracks=[], |
| 162 | ) |
| 163 | for row in rows |
| 164 | ] |
| 165 | |
| 166 | |
| 167 | async def _get_open_proposals( |
| 168 | session: AsyncSession, |
| 169 | repo_id: str, |
| 170 | include_body: bool, |
| 171 | ) -> list[ActiveProposalContext]: |
| 172 | """Return all open proposals for the repo.""" |
| 173 | stmt = ( |
| 174 | select(MusehubProposal) |
| 175 | .where( |
| 176 | MusehubProposal.repo_id == repo_id, |
| 177 | MusehubProposal.state == "open", |
| 178 | ) |
| 179 | .order_by(MusehubProposal.created_at) |
| 180 | ) |
| 181 | rows = (await session.execute(stmt)).scalars().all() |
| 182 | return [ |
| 183 | ActiveProposalContext( |
| 184 | proposal_id=row.proposal_id, |
| 185 | title=row.title, |
| 186 | from_branch=row.from_branch, |
| 187 | to_branch=row.to_branch, |
| 188 | state=row.state, |
| 189 | body=row.body if include_body else "", |
| 190 | ) |
| 191 | for row in rows |
| 192 | ] |
| 193 | |
| 194 | |
| 195 | async def _get_open_issues( |
| 196 | session: AsyncSession, |
| 197 | repo_id: str, |
| 198 | include_body: bool, |
| 199 | ) -> list[OpenIssueContext]: |
| 200 | """Return all open issues for the repo, ordered by number.""" |
| 201 | stmt = ( |
| 202 | select(MusehubIssue) |
| 203 | .where( |
| 204 | MusehubIssue.repo_id == repo_id, |
| 205 | MusehubIssue.state == "open", |
| 206 | ) |
| 207 | .order_by(MusehubIssue.number) |
| 208 | ) |
| 209 | rows = (await session.execute(stmt)).scalars().all() |
| 210 | return [ |
| 211 | OpenIssueContext( |
| 212 | issue_id=row.issue_id, |
| 213 | number=row.number, |
| 214 | title=row.title, |
| 215 | labels=list(row.labels or []), |
| 216 | body=row.body if include_body else "", |
| 217 | ) |
| 218 | for row in rows |
| 219 | ] |
| 220 | |
| 221 | |
| 222 | def _generate_suggestions( |
| 223 | musical_state: MusicalStateContext, |
| 224 | open_issues: list[OpenIssueContext], |
| 225 | active_proposals: list[ActiveProposalContext], |
| 226 | depth: ContextDepth, |
| 227 | ) -> list[str]: |
| 228 | """Generate heuristic composition suggestions based on current context. |
| 229 | |
| 230 | This is a deterministic, rule-based function until LLM-powered suggestions |
| 231 | are integrated. Suggestions are derived from: |
| 232 | - Missing musical dimensions (no tempo, no key, etc.) |
| 233 | - Open issues that describe compositional problems |
| 234 | - Open proposals that are waiting for review |
| 235 | |
| 236 | At ``brief`` depth, only 1–2 suggestions are returned. |
| 237 | """ |
| 238 | suggestions: list[str] = [] |
| 239 | |
| 240 | if not musical_state.active_tracks: |
| 241 | suggestions.append( |
| 242 | "No files found in the latest commit snapshot. Push a commit with source files." |
| 243 | ) |
| 244 | if open_issues: |
| 245 | issue = open_issues[0] |
| 246 | suggestions.append( |
| 247 | f"Address open issue #{issue.number}: '{issue.title}'. " |
| 248 | "This may describe a compositional problem to fix before the next section." |
| 249 | ) |
| 250 | if active_proposals: |
| 251 | active_proposal = active_proposals[0] |
| 252 | suggestions.append( |
| 253 | f"Review proposal '{active_proposal.title}' ({active_proposal.from_branch} → {active_proposal.to_branch}). " |
| 254 | "Merge or close it before branching for the next section." |
| 255 | ) |
| 256 | |
| 257 | if depth == ContextDepth.brief: |
| 258 | return suggestions[:2] |
| 259 | if depth == ContextDepth.standard: |
| 260 | return suggestions[:4] |
| 261 | return suggestions |
| 262 | |
| 263 | |
| 264 | # --------------------------------------------------------------------------- |
| 265 | # Public API |
| 266 | # --------------------------------------------------------------------------- |
| 267 | |
| 268 | |
| 269 | async def build_agent_context( |
| 270 | session: AsyncSession, |
| 271 | *, |
| 272 | repo_id: str, |
| 273 | ref: str = "HEAD", |
| 274 | depth: ContextDepth = ContextDepth.standard, |
| 275 | ) -> AgentContextResponse | None: |
| 276 | """Build a complete agent context document for a MuseHub repo at a given ref. |
| 277 | |
| 278 | Returns None if the repo does not exist or has no commits. |
| 279 | |
| 280 | Args: |
| 281 | session: Open async DB session. Read-only — no writes performed. |
| 282 | repo_id: The MuseHub repo ID. |
| 283 | ref: Branch name or commit ID. Defaults to HEAD (latest commit). |
| 284 | depth: Controls how much data is returned: |
| 285 | - ``brief`` — minimal context (~2 K tokens) |
| 286 | - ``standard`` — full briefing (~8 K tokens) |
| 287 | - ``verbose`` — uncapped (all history, full bodies) |
| 288 | |
| 289 | Returns: |
| 290 | ``AgentContextResponse`` if repo + ref are valid, ``None`` if repo |
| 291 | is not found or has no commits. The caller should surface None as HTTP 404. |
| 292 | """ |
| 293 | repo = await session.get(MusehubRepo, repo_id) |
| 294 | if repo is None: |
| 295 | return None |
| 296 | |
| 297 | # Resolve ref → commit |
| 298 | if ref == "HEAD": |
| 299 | head_commit = await _get_latest_commit(session, repo_id) |
| 300 | else: |
| 301 | head_commit = await _resolve_ref_to_commit(session, repo_id, ref) |
| 302 | |
| 303 | if head_commit is None: |
| 304 | logger.warning("⚠️ No commit found for repo %s ref %s", repo_id, ref) |
| 305 | return None |
| 306 | |
| 307 | resolved_ref = ref if ref != "HEAD" else head_commit.branch |
| 308 | |
| 309 | history_limit = _HISTORY_LIMIT[depth] |
| 310 | include_proposal_body = _INCLUDE_PROPOSAL_BODY[depth] |
| 311 | include_issue_body = _INCLUDE_ISSUE_BODY[depth] |
| 312 | |
| 313 | # Gather all sections concurrently would require asyncio.gather; we keep |
| 314 | # sequential awaits for readability — this is a read-heavy, low-latency path. |
| 315 | history = await _build_history(session, repo_id, head_commit, history_limit) |
| 316 | active_proposals = await _get_open_proposals(session, repo_id, include_proposal_body) |
| 317 | open_issues = await _get_open_issues(session, repo_id, include_issue_body) |
| 318 | |
| 319 | musical_state = MusicalStateContext(active_tracks=[]) |
| 320 | |
| 321 | analysis = AnalysisSummaryContext() |
| 322 | |
| 323 | suggestions = _generate_suggestions(musical_state, open_issues, active_proposals, depth) |
| 324 | |
| 325 | logger.info( |
| 326 | "✅ Agent context built for repo %s ref %s (depth=%s, history=%d, proposals=%d, issues=%d)", |
| 327 | repo_id, |
| 328 | resolved_ref, |
| 329 | depth, |
| 330 | len(history), |
| 331 | len(active_proposals), |
| 332 | len(open_issues), |
| 333 | ) |
| 334 | |
| 335 | return AgentContextResponse( |
| 336 | repo_id=repo_id, |
| 337 | ref=resolved_ref, |
| 338 | depth=depth, |
| 339 | musical_state=musical_state, |
| 340 | history=history, |
| 341 | analysis=analysis, |
| 342 | active_proposals=active_proposals, |
| 343 | open_issues=open_issues, |
| 344 | suggestions=suggestions, |
| 345 | ) |