validate-deepinfra-enrich.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | #!/usr/bin/env node |
| 2 | /** |
| 3 | * DeepInfra Enrich validation harness. |
| 4 | * |
| 5 | * Purpose: before flipping the hosted Hub gateway from OpenAI to DeepInfra, |
| 6 | * verify that the chosen DeepInfra chat model returns JSON that |
| 7 | * `validateAndNormalizeEnrichResult` can parse for a representative spread |
| 8 | * of proposal-shaped inputs. Fail fast (exit 1) if any sample fails to parse, |
| 9 | * since downstream the canister will store empty `suggested_frontmatter` JSON |
| 10 | * and reviewers lose the metadata-suggestion benefit. |
| 11 | * |
| 12 | * Usage (Track A — flip explicit provider): |
| 13 | * |
| 14 | * DEEPINFRA_API_KEY=di-...\ |
| 15 | * KNOWTATION_CHAT_PROVIDER=deepinfra \ |
| 16 | * DEEPINFRA_CHAT_MODEL=Qwen/Qwen2.5-72B-Instruct \ |
| 17 | * node scripts/validate-deepinfra-enrich.mjs |
| 18 | * |
| 19 | * Usage (Track B — control: re-run against current OpenAI to compare): |
| 20 | * |
| 21 | * OPENAI_API_KEY=sk-...\ |
| 22 | * KNOWTATION_CHAT_PROVIDER=openai \ |
| 23 | * node scripts/validate-deepinfra-enrich.mjs |
| 24 | * |
| 25 | * Exit code 0 = all samples passed; 1 = at least one failed to parse or |
| 26 | * produced fields outside the SPEC §2 allow-list. |
| 27 | * |
| 28 | * Privacy: each sample body below is synthetic. No vault data is sent. |
| 29 | * If you want to stress-test against real proposal bodies, pass |
| 30 | * --vault-sample <path-to-md> [--vault-sample <path>...] |
| 31 | * (paths are read with fs.readFileSync — they never leave your machine until |
| 32 | * the LLM call goes out, which you have already approved by setting the API key). |
| 33 | */ |
| 34 | |
| 35 | import fs from 'node:fs'; |
| 36 | import path from 'node:path'; |
| 37 | import { fileURLToPath } from 'node:url'; |
| 38 | import { completeChat } from '../lib/llm-complete.mjs'; |
| 39 | import { |
| 40 | buildEnrichMessages, |
| 41 | validateAndNormalizeEnrichResult, |
| 42 | SUGGESTED_FRONTMATTER_KEYS, |
| 43 | } from '../lib/proposal-enrich-llm.mjs'; |
| 44 | |
| 45 | const __dirname = path.dirname(fileURLToPath(import.meta.url)); |
| 46 | |
| 47 | /** @type {Array<{ label: string, input: { path: string, intent: string, body: string } }>} */ |
| 48 | const SAMPLES = [ |
| 49 | { |
| 50 | label: 'short-paragraph', |
| 51 | input: { |
| 52 | path: 'projects/example/inbox/quick-thought.md', |
| 53 | intent: 'capture a quick observation about onboarding friction', |
| 54 | body: |
| 55 | 'Users on the trial plan are dropping off after the first invite step. ' + |
| 56 | 'It seems related to the email confirmation delay. We should add a resend button.', |
| 57 | }, |
| 58 | }, |
| 59 | { |
| 60 | label: 'long-technical', |
| 61 | input: { |
| 62 | path: 'projects/example/research/auth-rotation-2026-04.md', |
| 63 | intent: 'research note on JWT rotation strategy', |
| 64 | body: [ |
| 65 | '# JWT rotation review', |
| 66 | '', |
| 67 | '## Background', |
| 68 | 'Hosted Hub currently issues 24h JWTs (HUB_JWT_EXPIRY default). Rotation is manual.', |
| 69 | '', |
| 70 | '## Options considered', |
| 71 | '1. Sliding 7d refresh token + 1h access token (industry standard).', |
| 72 | '2. Stateless 24h JWT with mandatory client re-login (current).', |
| 73 | '3. Server-side revocation list (cost: cache miss + DB hit).', |
| 74 | '', |
| 75 | '## Decision', |
| 76 | 'Move to option 1 in Q3 2026. Track in causal-chain `auth-rotation-2026`.', |
| 77 | '', |
| 78 | '## Risks', |
| 79 | '- Refresh-token theft via XSS (mitigate with httpOnly cookies).', |
| 80 | '- Logout requires server-side denylist (1h TTL bounded).', |
| 81 | ].join('\n'), |
| 82 | }, |
| 83 | }, |
| 84 | { |
| 85 | label: 'with-project-frontmatter-wording', |
| 86 | input: { |
| 87 | path: 'projects/born-free/outlines/landing-hero-2026-05.md', |
| 88 | intent: 'outline for the May 2026 landing hero refresh', |
| 89 | body: |
| 90 | 'Project: Born Free. Audience: families and adventurers who want community-owned property access. ' + |
| 91 | 'Hero CTA: claim Experience Key with code BORNFREE100. Tone: warm, partner-focused, never paid-influencer.', |
| 92 | }, |
| 93 | }, |
| 94 | { |
| 95 | label: 'bulleted-only', |
| 96 | input: { |
| 97 | path: 'projects/example/playbooks/launch-checklist.md', |
| 98 | intent: 'pre-launch operational checklist', |
| 99 | body: [ |
| 100 | '- Confirm DEEPINFRA_API_KEY rotated in Netlify', |
| 101 | '- Run Enrich validation harness (this script)', |
| 102 | '- Smoke test create proposal in staging', |
| 103 | '- Watch error rate dashboard for 1h', |
| 104 | '- If ok, flip production env vars', |
| 105 | ].join('\n'), |
| 106 | }, |
| 107 | }, |
| 108 | { |
| 109 | label: 'code-blocks', |
| 110 | input: { |
| 111 | path: 'projects/knowtation/research/embedding-dim-table.md', |
| 112 | intent: 'reference for embedding dimensions per provider', |
| 113 | body: [ |
| 114 | 'Common embedding dimensions:', |
| 115 | '', |
| 116 | '```', |
| 117 | 'openai/text-embedding-3-small 1536', |
| 118 | 'openai/text-embedding-3-large 3072', |
| 119 | 'voyage/voyage-4-lite 1024', |
| 120 | 'deepinfra/bge-large-en-v1.5 1024', |
| 121 | 'deepinfra/Qwen3-Embedding-8B 4096', |
| 122 | '```', |
| 123 | ].join('\n'), |
| 124 | }, |
| 125 | }, |
| 126 | { |
| 127 | label: 'date-references', |
| 128 | input: { |
| 129 | path: 'projects/example/decisions/2026-04-15-llm-provider.md', |
| 130 | intent: 'decision record for switching chat provider', |
| 131 | body: |
| 132 | 'On 2026-04-15 we decided to flip hosted Hub chat from OpenAI to DeepInfra. ' + |
| 133 | 'Effective date: 2026-05-01 after staging validation. Updated: 2026-04-30. ' + |
| 134 | 'Source: docs/NEXT-SESSION-HUB-LLM-COST-ROUTING.md.', |
| 135 | }, |
| 136 | }, |
| 137 | { |
| 138 | label: 'named-entities', |
| 139 | input: { |
| 140 | path: 'projects/born-free/research/competitive-snapshot-2026-04.md', |
| 141 | intent: 'competitive snapshot of community-owned travel platforms', |
| 142 | body: |
| 143 | 'Competitors: Kibbo (community RV access), Inspirato (luxury subscription), DAOhaus governance template. ' + |
| 144 | 'Differentiators for Born Free: credits renew for life, DAO governance, member NFT, partner not promoter.', |
| 145 | }, |
| 146 | }, |
| 147 | { |
| 148 | label: 'causal-chain', |
| 149 | input: { |
| 150 | path: 'projects/example/incidents/2026-04-22-hint-timeout.md', |
| 151 | intent: 'post-mortem for review-hints timeout incident', |
| 152 | body: |
| 153 | 'Incident chain `hosted-hint-timeout-2026-q2` follows from earlier note ' + |
| 154 | 'projects/example/incidents/2026-04-10-canister-cold-start.md. ' + |
| 155 | 'Root cause: extra canister GET inside the 18s race. Fixed by merging client body into the hints job.', |
| 156 | }, |
| 157 | }, |
| 158 | { |
| 159 | label: 'edge-empty-intent', |
| 160 | input: { |
| 161 | path: 'projects/example/inbox/random.md', |
| 162 | intent: '', |
| 163 | body: 'one line note', |
| 164 | }, |
| 165 | }, |
| 166 | { |
| 167 | label: 'structured-table', |
| 168 | input: { |
| 169 | path: 'projects/example/research/provider-cost-2026-04.md', |
| 170 | intent: 'cost comparison table', |
| 171 | body: [ |
| 172 | '| Provider | Chat (per 1M tok) | Embed (per 1M tok) |', |
| 173 | '|-----------|-------------------|---------------------|', |
| 174 | '| OpenAI | 0.15 | 0.02 |', |
| 175 | '| DeepInfra | 0.05 | 0.005 |', |
| 176 | '| Voyage | n/a | 0.05 |', |
| 177 | ].join('\n'), |
| 178 | }, |
| 179 | }, |
| 180 | ]; |
| 181 | |
| 182 | /** |
| 183 | * Optional: append samples loaded from real .md files (paths via --vault-sample). |
| 184 | * Body becomes the file content; path/intent come from filename and `intent:` line if present. |
| 185 | */ |
| 186 | function loadVaultSamples(filePaths) { |
| 187 | const out = []; |
| 188 | for (const p of filePaths) { |
| 189 | let body = ''; |
| 190 | try { |
| 191 | body = fs.readFileSync(p, 'utf8'); |
| 192 | } catch (e) { |
| 193 | console.error(`[skip] cannot read ${p}: ${e.message}`); |
| 194 | continue; |
| 195 | } |
| 196 | const intentMatch = body.match(/^intent:\s*(.+)$/m); |
| 197 | out.push({ |
| 198 | label: `vault:${path.basename(p)}`, |
| 199 | input: { |
| 200 | path: p.replace(/^.*?\/vault\//, 'vault/'), |
| 201 | intent: intentMatch ? intentMatch[1].trim() : 'imported vault sample', |
| 202 | body, |
| 203 | }, |
| 204 | }); |
| 205 | } |
| 206 | return out; |
| 207 | } |
| 208 | |
| 209 | function parseArgs(argv) { |
| 210 | const out = { vaultSamples: [], passThreshold: 10 }; |
| 211 | for (let i = 2; i < argv.length; i++) { |
| 212 | if (argv[i] === '--vault-sample' && argv[i + 1]) { |
| 213 | out.vaultSamples.push(argv[++i]); |
| 214 | } else if (argv[i] === '--pass-threshold' && argv[i + 1]) { |
| 215 | out.passThreshold = parseInt(argv[++i], 10) || 10; |
| 216 | } |
| 217 | } |
| 218 | return out; |
| 219 | } |
| 220 | |
| 221 | function summarizeFrontmatterKeys(fm) { |
| 222 | if (!fm || typeof fm !== 'object') return []; |
| 223 | return Object.keys(fm).sort(); |
| 224 | } |
| 225 | |
| 226 | function isSubsetOfAllowList(fm) { |
| 227 | if (!fm || typeof fm !== 'object') return true; |
| 228 | for (const k of Object.keys(fm)) { |
| 229 | if (!SUGGESTED_FRONTMATTER_KEYS.has(k)) return false; |
| 230 | } |
| 231 | return true; |
| 232 | } |
| 233 | |
| 234 | async function runOne(sample) { |
| 235 | const { system, user } = buildEnrichMessages(sample.input); |
| 236 | const t0 = Date.now(); |
| 237 | let raw; |
| 238 | try { |
| 239 | raw = await completeChat( |
| 240 | { llm: {} }, |
| 241 | { system, user, maxTokens: 800 }, |
| 242 | ); |
| 243 | } catch (e) { |
| 244 | return { |
| 245 | label: sample.label, |
| 246 | ok: false, |
| 247 | reason: `LLM call failed: ${e.message || String(e)}`, |
| 248 | ms: Date.now() - t0, |
| 249 | }; |
| 250 | } |
| 251 | const ms = Date.now() - t0; |
| 252 | const result = validateAndNormalizeEnrichResult(raw); |
| 253 | const allowListOk = isSubsetOfAllowList(result.suggested_frontmatter); |
| 254 | const summaryOk = typeof result.summary === 'string' && result.summary.trim().length > 0; |
| 255 | const ok = result.parseOk && allowListOk && summaryOk; |
| 256 | return { |
| 257 | label: sample.label, |
| 258 | ok, |
| 259 | parseOk: result.parseOk, |
| 260 | allowListOk, |
| 261 | summaryOk, |
| 262 | summaryLen: result.summary.length, |
| 263 | labels: result.suggested_labels, |
| 264 | fmKeys: summarizeFrontmatterKeys(result.suggested_frontmatter), |
| 265 | ms, |
| 266 | rawSnippet: raw.slice(0, 200), |
| 267 | }; |
| 268 | } |
| 269 | |
| 270 | async function main() { |
| 271 | const args = parseArgs(process.argv); |
| 272 | const samples = [...SAMPLES, ...loadVaultSamples(args.vaultSamples)]; |
| 273 | |
| 274 | const provider = String(process.env.KNOWTATION_CHAT_PROVIDER || '').toLowerCase(); |
| 275 | const hasDeepinfra = Boolean(process.env.DEEPINFRA_API_KEY); |
| 276 | const hasOpenai = Boolean(process.env.OPENAI_API_KEY); |
| 277 | const hasAnthropic = Boolean(process.env.ANTHROPIC_API_KEY); |
| 278 | |
| 279 | console.log('--- DeepInfra Enrich validation harness ---'); |
| 280 | console.log(`KNOWTATION_CHAT_PROVIDER=${provider || '(unset)'}`); |
| 281 | console.log( |
| 282 | `Keys: deepinfra=${hasDeepinfra ? 'set' : '(unset)'} ` + |
| 283 | `openai=${hasOpenai ? 'set' : '(unset)'} ` + |
| 284 | `anthropic=${hasAnthropic ? 'set' : '(unset)'}`, |
| 285 | ); |
| 286 | console.log(`DEEPINFRA_CHAT_MODEL=${process.env.DEEPINFRA_CHAT_MODEL || '(default Qwen/Qwen2.5-72B-Instruct)'}`); |
| 287 | console.log(`Samples: ${samples.length} (built-in: ${SAMPLES.length}, vault: ${args.vaultSamples.length})`); |
| 288 | console.log(`Pass threshold: ${args.passThreshold}/${samples.length}`); |
| 289 | console.log(''); |
| 290 | |
| 291 | if (!hasDeepinfra && !hasOpenai && !hasAnthropic) { |
| 292 | console.error( |
| 293 | 'No LLM key set. Configure DEEPINFRA_API_KEY (recommended) or OPENAI_API_KEY/ANTHROPIC_API_KEY for a control run.', |
| 294 | ); |
| 295 | process.exit(2); |
| 296 | } |
| 297 | |
| 298 | const results = []; |
| 299 | for (const s of samples) { |
| 300 | process.stdout.write(`[${s.label}] ... `); |
| 301 | const r = await runOne(s); |
| 302 | results.push(r); |
| 303 | if (r.ok) { |
| 304 | console.log( |
| 305 | `ok parseOk=${r.parseOk} summaryLen=${r.summaryLen} labels=${r.labels.length} ` + |
| 306 | `fmKeys=[${r.fmKeys.join(',')}] ${r.ms}ms`, |
| 307 | ); |
| 308 | } else { |
| 309 | console.log(`FAIL ${r.reason || ''}`); |
| 310 | console.log( |
| 311 | ` parseOk=${r.parseOk} allowListOk=${r.allowListOk} summaryOk=${r.summaryOk} ` + |
| 312 | `fmKeys=[${(r.fmKeys || []).join(',')}] raw="${r.rawSnippet || ''}"`, |
| 313 | ); |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | const passed = results.filter((r) => r.ok).length; |
| 318 | const total = results.length; |
| 319 | console.log(''); |
| 320 | console.log(`--- ${passed}/${total} samples passed ---`); |
| 321 | |
| 322 | if (passed < args.passThreshold) { |
| 323 | console.error( |
| 324 | `FAIL: only ${passed}/${total} samples passed (threshold ${args.passThreshold}). ` + |
| 325 | 'Do NOT flip production yet. Try a stronger model (Qwen/Qwen2.5-72B-Instruct) ' + |
| 326 | 'or tighten the system prompt before promoting.', |
| 327 | ); |
| 328 | process.exit(1); |
| 329 | } |
| 330 | console.log('PASS: production flip is safe for the tested model + prompt.'); |
| 331 | process.exit(0); |
| 332 | } |
| 333 | |
| 334 | main().catch((e) => { |
| 335 | console.error(`harness crashed: ${e.message || String(e)}`); |
| 336 | process.exit(2); |
| 337 | }); |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago