proposal-enrich-llm.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
2 days ago
| 1 | /** |
| 2 | * Shared proposal Enrich: LLM prompt, parse, validate/normalize (SPEC-aligned metadata). |
| 3 | * Used by self-hosted hub/server.mjs and hub/gateway/proposal-enrich-hosted.mjs. |
| 4 | */ |
| 5 | |
| 6 | import { normalizeSlug, normalizeTags } from './vault.mjs'; |
| 7 | |
| 8 | export const ENRICH_VERSION = 2; |
| 9 | |
| 10 | /** Keys the model may place inside suggested_frontmatter (SPEC §2.1 + §2.3). */ |
| 11 | export const SUGGESTED_FRONTMATTER_KEYS = new Set([ |
| 12 | 'title', |
| 13 | 'project', |
| 14 | 'tags', |
| 15 | 'date', |
| 16 | 'updated', |
| 17 | 'source', |
| 18 | 'source_id', |
| 19 | 'intent', |
| 20 | 'follows', |
| 21 | 'causal_chain_id', |
| 22 | 'entity', |
| 23 | 'episode_id', |
| 24 | 'summarizes', |
| 25 | 'summarizes_range', |
| 26 | 'state_snapshot', |
| 27 | ]); |
| 28 | |
| 29 | const FORBIDDEN_KEY_PREFIXES = ['knowtation_']; |
| 30 | const FORBIDDEN_KEYS = new Set([ |
| 31 | 'author_kind', |
| 32 | 'network', |
| 33 | 'wallet_address', |
| 34 | 'tx_hash', |
| 35 | 'payment_status', |
| 36 | 'kind', // approval_log etc. |
| 37 | ]); |
| 38 | |
| 39 | const MAX_SUMMARY_CHARS = 8000; |
| 40 | const MAX_LABELS = 8; |
| 41 | const MAX_LABEL_LEN = 64; |
| 42 | const MAX_SCALAR_CHARS = 512; |
| 43 | const MAX_TITLE_CHARS = 500; |
| 44 | const MAX_INTENT_CHARS = 2000; |
| 45 | const MAX_PATH_SEGMENTS = 20; |
| 46 | const MAX_JSON_OUTPUT_CHARS = 14000; |
| 47 | |
| 48 | function isForbiddenKey(k) { |
| 49 | if (typeof k !== 'string' || !k) return true; |
| 50 | const lower = k.toLowerCase(); |
| 51 | if (FORBIDDEN_KEYS.has(lower)) return true; |
| 52 | for (const p of FORBIDDEN_KEY_PREFIXES) { |
| 53 | if (lower.startsWith(p)) return true; |
| 54 | } |
| 55 | return false; |
| 56 | } |
| 57 | |
| 58 | /** |
| 59 | * Vault-relative path segment check: no escape, no null bytes. |
| 60 | * @param {string} s |
| 61 | */ |
| 62 | function isSafeVaultPathLike(s) { |
| 63 | if (typeof s !== 'string') return false; |
| 64 | const t = s.trim().replace(/\\/g, '/'); |
| 65 | if (!t || t.includes('\0')) return false; |
| 66 | if (t.startsWith('/') || t.includes('..')) return false; |
| 67 | const parts = t.split('/').filter(Boolean); |
| 68 | if (parts.length > MAX_PATH_SEGMENTS) return false; |
| 69 | for (const seg of parts) { |
| 70 | if (seg === '..' || seg === '.') return false; |
| 71 | } |
| 72 | return true; |
| 73 | } |
| 74 | |
| 75 | function clampStr(s, max) { |
| 76 | if (typeof s !== 'string') return ''; |
| 77 | const t = s.trim(); |
| 78 | return t.length <= max ? t : t.slice(0, max); |
| 79 | } |
| 80 | |
| 81 | /** |
| 82 | * @param {{ path?: string, intent?: string, body?: string }} input |
| 83 | * @param {{ bodyMaxChars?: number }} [opts] |
| 84 | * @returns {{ system: string, user: string }} |
| 85 | */ |
| 86 | export function buildEnrichMessages(input, opts = {}) { |
| 87 | const bodyMax = opts.bodyMaxChars ?? 12_000; |
| 88 | const path = input.path != null ? String(input.path) : ''; |
| 89 | const intent = input.intent != null ? String(input.intent) : '—'; |
| 90 | const body = input.body != null ? String(input.body).slice(0, bodyMax) : ''; |
| 91 | const keyList = [...SUGGESTED_FRONTMATTER_KEYS].sort().join(', '); |
| 92 | const system = `Reply with ONLY valid JSON (no markdown fences). Schema: |
| 93 | { |
| 94 | "enrich_version": ${ENRICH_VERSION}, |
| 95 | "summary": "one short paragraph describing the proposed change", |
| 96 | "suggested_labels": ["short-tag", ...], |
| 97 | "suggested_frontmatter": { ... optional; only keys from this allow-list: ${keyList} } |
| 98 | } |
| 99 | Rules: |
| 100 | - suggested_labels: at most ${MAX_LABELS} strings; lowercase slugs (a-z, 0-9, hyphen). |
| 101 | - suggested_frontmatter: only use keys from the allow-list. Prefer returning every field that is clearly grounded in the note path, intent, headings, body text, or explicit references. Omit only fields that are genuinely unsupported by the content. |
| 102 | - For project, causal_chain_id, episode_id, entity (if string), tags: use slug form (lowercase, hyphens). |
| 103 | - entity may be a string or array of strings (each normalized as slug). |
| 104 | - follows and summarizes may be a vault-relative path string or array of such paths (e.g. inbox/note.md). |
| 105 | - state_snapshot must be boolean if present. |
| 106 | - Do NOT include knowtation_* keys, author_kind, or blockchain fields (network, wallet_address, tx_hash, payment_status).`; |
| 107 | const user = `Path: ${path} |
| 108 | Intent: ${intent} |
| 109 | |
| 110 | Extract metadata from the actual content, not just from any existing frontmatter-like wording. |
| 111 | |
| 112 | Field guidance: |
| 113 | - title: infer a strong note title from the heading/body when possible. |
| 114 | - project: infer from the note path, named initiative, or clearly repeated project framing. |
| 115 | - tags: include concrete topic tags from the body, not generic filler. |
| 116 | - date / updated: include only if the document states a concrete date or time anchor. |
| 117 | - source / source_id: include only if the text names a source system, document, URL, export id, ticket id, etc. |
| 118 | - intent: infer the operational purpose if the body makes it clearer than the provided intent. |
| 119 | - follows: include prior note paths only when the body/path clearly references an earlier note or continuation. |
| 120 | - causal_chain_id: infer when the document describes a named chain of events, incident thread, rollout, investigation, or dependency chain. |
| 121 | - entity: extract the main people, teams, companies, products, systems, repos, or domains discussed. |
| 122 | - episode_id: infer when this note belongs to a clearly bounded episode, milestone, launch, sprint, outage, or test run. |
| 123 | - summarizes / summarizes_range: infer only when the note explicitly summarizes another note, folder, period, or range. |
| 124 | - state_snapshot: true only when the note is clearly capturing status/state at a moment in time. |
| 125 | |
| 126 | Prioritize temporal, causal, entity, and relationship metadata when the content supports it. Do not stop at title/project/tags if richer grounded metadata is present. |
| 127 | --- |
| 128 | ${body}`; |
| 129 | return { system, user }; |
| 130 | } |
| 131 | |
| 132 | /** |
| 133 | * @param {string} rawText |
| 134 | * @returns {{ |
| 135 | * enrich_version: number, |
| 136 | * summary: string, |
| 137 | * suggested_labels: string[], |
| 138 | * suggested_frontmatter: Record<string, unknown>, |
| 139 | * parseOk: boolean, |
| 140 | * }} |
| 141 | */ |
| 142 | export function parseEnrichModelOutput(rawText) { |
| 143 | const raw = rawText != null ? String(rawText) : ''; |
| 144 | let summary = raw.trim(); |
| 145 | const suggested_labels = []; |
| 146 | /** @type {Record<string, unknown>} */ |
| 147 | const suggested_frontmatter = {}; |
| 148 | let enrich_version = 1; |
| 149 | let parseOk = false; |
| 150 | try { |
| 151 | const cleaned = raw.replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/m, '').trim(); |
| 152 | const j = JSON.parse(cleaned); |
| 153 | if (j && typeof j === 'object' && !Array.isArray(j)) { |
| 154 | parseOk = true; |
| 155 | if (typeof j.summary === 'string') summary = j.summary; |
| 156 | if (typeof j.enrich_version === 'number' && Number.isFinite(j.enrich_version)) { |
| 157 | enrich_version = j.enrich_version; |
| 158 | } |
| 159 | if (Array.isArray(j.suggested_labels)) { |
| 160 | for (const x of j.suggested_labels) { |
| 161 | suggested_labels.push(String(x)); |
| 162 | } |
| 163 | } |
| 164 | const sf = j.suggested_frontmatter; |
| 165 | if (sf && typeof sf === 'object' && !Array.isArray(sf)) { |
| 166 | for (const [k, v] of Object.entries(sf)) { |
| 167 | suggested_frontmatter[k] = v; |
| 168 | } |
| 169 | } |
| 170 | } |
| 171 | } catch (_) { |
| 172 | /* keep summary = raw */ |
| 173 | } |
| 174 | return { |
| 175 | enrich_version, |
| 176 | summary: clampStr(summary, MAX_SUMMARY_CHARS), |
| 177 | suggested_labels, |
| 178 | suggested_frontmatter, |
| 179 | parseOk, |
| 180 | }; |
| 181 | } |
| 182 | |
| 183 | /** |
| 184 | * Normalize labels for storage (slug-like tags). |
| 185 | * @param {string[]} labels |
| 186 | */ |
| 187 | export function normalizeSuggestedLabels(labels) { |
| 188 | if (!Array.isArray(labels)) return []; |
| 189 | const out = normalizeTags(labels.map((x) => String(x))).filter(Boolean); |
| 190 | return [...new Set(out)].slice(0, MAX_LABELS).map((t) => (t.length > MAX_LABEL_LEN ? t.slice(0, MAX_LABEL_LEN) : t)); |
| 191 | } |
| 192 | |
| 193 | /** |
| 194 | * @param {Record<string, unknown>} raw |
| 195 | * @returns {Record<string, unknown>} |
| 196 | */ |
| 197 | export function validateAndNormalizeSuggestedFrontmatter(raw) { |
| 198 | if (!raw || typeof raw !== 'object' || Array.isArray(raw)) return {}; |
| 199 | /** @type {Record<string, unknown>} */ |
| 200 | const out = {}; |
| 201 | for (const [key, val] of Object.entries(raw)) { |
| 202 | if (!SUGGESTED_FRONTMATTER_KEYS.has(key) || isForbiddenKey(key)) continue; |
| 203 | if (key === 'tags') { |
| 204 | const tags = normalizeTags(Array.isArray(val) ? val : val != null ? [String(val)] : []); |
| 205 | if (tags.length) out.tags = tags.slice(0, 32); |
| 206 | continue; |
| 207 | } |
| 208 | if (key === 'entity') { |
| 209 | if (Array.isArray(val)) { |
| 210 | const ents = val.map((x) => normalizeSlug(String(x))).filter(Boolean).slice(0, 32); |
| 211 | if (ents.length) out.entity = ents.length === 1 ? ents[0] : ents; |
| 212 | } else if (val != null && String(val).trim()) { |
| 213 | const e = normalizeSlug(String(val)); |
| 214 | if (e) out.entity = e; |
| 215 | } |
| 216 | continue; |
| 217 | } |
| 218 | if (key === 'project' || key === 'causal_chain_id' || key === 'episode_id') { |
| 219 | const s = normalizeSlug(String(val ?? '')); |
| 220 | if (s) out[key] = s; |
| 221 | continue; |
| 222 | } |
| 223 | if (key === 'follows' || key === 'summarizes') { |
| 224 | if (Array.isArray(val)) { |
| 225 | const paths = []; |
| 226 | for (const p of val) { |
| 227 | const ps = String(p).trim(); |
| 228 | if (isSafeVaultPathLike(ps)) paths.push(ps.replace(/\\/g, '/')); |
| 229 | } |
| 230 | if (paths.length) out[key] = paths.length === 1 ? paths[0] : paths.slice(0, 32); |
| 231 | } else if (val != null) { |
| 232 | const ps = String(val).trim(); |
| 233 | if (isSafeVaultPathLike(ps)) out[key] = ps.replace(/\\/g, '/'); |
| 234 | } |
| 235 | continue; |
| 236 | } |
| 237 | if (key === 'state_snapshot') { |
| 238 | if (val === true || val === false) out.state_snapshot = val; |
| 239 | else if (val === 'true' || val === 'false') out.state_snapshot = val === 'true'; |
| 240 | continue; |
| 241 | } |
| 242 | if (key === 'title') { |
| 243 | const s = clampStr(String(val ?? ''), MAX_TITLE_CHARS); |
| 244 | if (s) out.title = s; |
| 245 | continue; |
| 246 | } |
| 247 | if (key === 'intent') { |
| 248 | const s = clampStr(String(val ?? ''), MAX_INTENT_CHARS); |
| 249 | if (s) out.intent = s; |
| 250 | continue; |
| 251 | } |
| 252 | if (key === 'date' || key === 'updated' || key === 'source' || key === 'source_id' || key === 'summarizes_range') { |
| 253 | const s = clampStr(String(val ?? ''), MAX_SCALAR_CHARS); |
| 254 | if (s) out[key] = s; |
| 255 | } |
| 256 | } |
| 257 | return out; |
| 258 | } |
| 259 | |
| 260 | /** |
| 261 | * Full pipeline after LLM returns raw text. |
| 262 | * @param {string} rawText |
| 263 | */ |
| 264 | export function validateAndNormalizeEnrichResult(rawText) { |
| 265 | const parsed = parseEnrichModelOutput(rawText); |
| 266 | const suggested_labels = normalizeSuggestedLabels(parsed.suggested_labels); |
| 267 | const suggested_frontmatter = validateAndNormalizeSuggestedFrontmatter(parsed.suggested_frontmatter); |
| 268 | let jsonSize = 0; |
| 269 | try { |
| 270 | jsonSize = JSON.stringify(suggested_frontmatter).length; |
| 271 | } catch (_) { |
| 272 | jsonSize = MAX_JSON_OUTPUT_CHARS + 1; |
| 273 | } |
| 274 | let fm = suggested_frontmatter; |
| 275 | if (jsonSize > MAX_JSON_OUTPUT_CHARS) { |
| 276 | fm = {}; |
| 277 | } |
| 278 | return { |
| 279 | enrich_version: parsed.enrich_version, |
| 280 | summary: parsed.summary, |
| 281 | suggested_labels, |
| 282 | suggested_frontmatter: fm, |
| 283 | parseOk: parsed.parseOk, |
| 284 | }; |
| 285 | } |
| 286 | |
| 287 | /** |
| 288 | * Serialize normalized frontmatter for canister / API (bounded). |
| 289 | * @param {Record<string, unknown>} fm |
| 290 | */ |
| 291 | export function serializeSuggestedFrontmatterJson(fm) { |
| 292 | try { |
| 293 | const s = JSON.stringify(fm == null ? {} : fm); |
| 294 | if (s.length > MAX_JSON_OUTPUT_CHARS) return '{}'; |
| 295 | return s; |
| 296 | } catch (_) { |
| 297 | return '{}'; |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | export { MAX_JSON_OUTPUT_CHARS as ENRICH_SUGGESTED_FRONTMATTER_MAX_JSON_CHARS }; |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
2 days ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
2 days ago