companion-runtime-manager-e2e.test.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
1 day ago
| 1 | /** |
| 2 | * Tier 3 — END-TO-END: lib/companion-runtime-manager.mjs |
| 3 | * |
| 4 | * Realistic simulated request lifecycles using stub adapters that mirror what Phase 5 |
| 5 | * will inject. Tests the full "download → verify → start → health → serve → drain → stop" |
| 6 | * cycle, including failure branches (integrity failure, health-check failure, runtime crash, |
| 7 | * resource exhaustion mid-session). |
| 8 | * |
| 9 | * No real child_process, no real fetch, no real filesystem — stubs only. |
| 10 | * |
| 11 | * Reference: docs/COMPANION-APP-PHASE-4-RUNTIME-MANAGER.md §5 (Phase 5 obligations). |
| 12 | */ |
| 13 | |
| 14 | import { describe, it } from 'node:test'; |
| 15 | import assert from 'node:assert/strict'; |
| 16 | import crypto from 'node:crypto'; |
| 17 | |
| 18 | import { |
| 19 | RUNTIME_MANAGER_REASONS, |
| 20 | LIFECYCLE_STATES, |
| 21 | LIFECYCLE_EVENTS, |
| 22 | createIntegrityAccumulator, |
| 23 | verifyModelBytes, |
| 24 | createLifecycleState, |
| 25 | transitionLifecycle, |
| 26 | canServeInference, |
| 27 | createAdmissionState, |
| 28 | evaluateAdmission, |
| 29 | recordInFlight, |
| 30 | recordCompletion, |
| 31 | createResourceLimits, |
| 32 | evaluateResourceLimits, |
| 33 | evaluateRuntimeRequest, |
| 34 | } from '../lib/companion-runtime-manager.mjs'; |
| 35 | |
| 36 | function makeDigest(data) { |
| 37 | return crypto.createHash('sha256').update(data).digest('hex'); |
| 38 | } |
| 39 | |
| 40 | const ALLOWED_URLS = ['https://models.example.com/']; |
| 41 | const VALID_URL = 'https://models.example.com/model.gguf'; |
| 42 | |
| 43 | // ── Stub adapter factory (mirrors RuntimeAdapterFns from Phase 4 §7) ──────── |
| 44 | |
| 45 | function makeStubAdapter({ healthShouldPass = true, resourceObs = null } = {}) { |
| 46 | const obs = resourceObs ?? { ramBytes: 1e9, vramBytes: 0, cpuPercent: 10 }; |
| 47 | return { |
| 48 | async spawn() { return { pid: 12345, kill: async () => {} }; }, |
| 49 | async download(url, onChunk, data) { onChunk(data); }, |
| 50 | async healthCheck() { return healthShouldPass; }, |
| 51 | async statResources() { return obs; }, |
| 52 | }; |
| 53 | } |
| 54 | |
| 55 | // ── Simulated Phase 5 lifecycle orchestrator (pure logic only) ─────────────── |
| 56 | |
| 57 | async function simulatePhase5Session({ |
| 58 | modelData, |
| 59 | expectedDigest, |
| 60 | adapter, |
| 61 | maxInFlight = 4, |
| 62 | queueBound = 8, |
| 63 | limits = null, |
| 64 | }) { |
| 65 | const resourceLimits = limits ?? createResourceLimits({ maxRamBytes: 8e9, maxVramBytes: 4e9, maxCpuPercent: 80 }); |
| 66 | |
| 67 | // 1. Integrity verification |
| 68 | const acc = createIntegrityAccumulator({ |
| 69 | expectedDigest, expectedSizeBytes: modelData.length, |
| 70 | sourceUrl: VALID_URL, allowedSourceUrls: ALLOWED_URLS, |
| 71 | }); |
| 72 | // Simulate download via adapter |
| 73 | await adapter.download(VALID_URL, (chunk) => acc.update(chunk), modelData); |
| 74 | const integrityVerdict = acc.finalize(); |
| 75 | if (!integrityVerdict.ok) { |
| 76 | return { success: false, phase: 'integrity', reason: integrityVerdict.reason }; |
| 77 | } |
| 78 | |
| 79 | // 2. Spawn (real Phase 5 would call adapter.spawn here) |
| 80 | // Phase 4 pure: we skip real spawn but simulate the lifecycle transition |
| 81 | let lifecycle = createLifecycleState(); |
| 82 | let tr = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.START); |
| 83 | if (!tr.ok) return { success: false, phase: 'start', reason: tr.reason }; |
| 84 | lifecycle = tr.newState; |
| 85 | |
| 86 | // 3. Health check |
| 87 | const healthy = await adapter.healthCheck({ pid: 0, kill: async () => {} }); |
| 88 | const healthEvent = healthy ? LIFECYCLE_EVENTS.HEALTH_OK : LIFECYCLE_EVENTS.HEALTH_FAIL; |
| 89 | tr = transitionLifecycle(lifecycle, healthEvent); |
| 90 | if (!tr.ok) return { success: false, phase: 'health', reason: tr.reason }; |
| 91 | lifecycle = tr.newState; |
| 92 | |
| 93 | if (!canServeInference(lifecycle)) { |
| 94 | return { success: false, phase: 'not_ready', reason: RUNTIME_MANAGER_REASONS.NOT_READY }; |
| 95 | } |
| 96 | |
| 97 | // 4. Serve an inference request |
| 98 | let admissionState = createAdmissionState({ maxInFlight, queueBound }); |
| 99 | const obs = await adapter.statResources(); |
| 100 | const decision = evaluateRuntimeRequest({ |
| 101 | lifecycleState: lifecycle, |
| 102 | admissionState, |
| 103 | resourceObservation: obs, |
| 104 | resourceLimits, |
| 105 | }); |
| 106 | if (!decision.ok) { |
| 107 | return { success: false, phase: 'admission', reason: decision.reason }; |
| 108 | } |
| 109 | admissionState = recordInFlight(admissionState); |
| 110 | // (simulate inference work) |
| 111 | admissionState = recordCompletion(admissionState); |
| 112 | |
| 113 | // 5. Drain |
| 114 | tr = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.DRAIN); |
| 115 | lifecycle = tr.newState; |
| 116 | tr = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.STOPPED); |
| 117 | lifecycle = tr.newState; |
| 118 | |
| 119 | return { success: true, finalState: lifecycle.state }; |
| 120 | } |
| 121 | |
| 122 | // ── E2E tests ──────────────────────────────────────────────────────────────── |
| 123 | |
| 124 | describe('E2E: happy path — download → verify → start → serve → drain', () => { |
| 125 | it('completes full session with valid model', async () => { |
| 126 | const modelData = Buffer.from('a trustworthy model binary - valid and correct'); |
| 127 | const digest = makeDigest(modelData); |
| 128 | const adapter = makeStubAdapter({ healthShouldPass: true }); |
| 129 | |
| 130 | const result = await simulatePhase5Session({ modelData, expectedDigest: digest, adapter }); |
| 131 | assert.equal(result.success, true, `Expected success, got: ${JSON.stringify(result)}`); |
| 132 | assert.equal(result.finalState, LIFECYCLE_STATES.STOPPED); |
| 133 | }); |
| 134 | }); |
| 135 | |
| 136 | describe('E2E: integrity failure — tampered model refused before execution', () => { |
| 137 | it('rejects session when model digest is wrong', async () => { |
| 138 | const modelData = Buffer.from('tampered model binary with wrong content'); |
| 139 | const wrongDigest = 'a'.repeat(64); // incorrect digest |
| 140 | const adapter = makeStubAdapter({ healthShouldPass: true }); |
| 141 | |
| 142 | const result = await simulatePhase5Session({ modelData, expectedDigest: wrongDigest, adapter }); |
| 143 | assert.equal(result.success, false); |
| 144 | assert.equal(result.phase, 'integrity'); |
| 145 | assert.equal(result.reason, RUNTIME_MANAGER_REASONS.DIGEST_MISMATCH); |
| 146 | }); |
| 147 | |
| 148 | it('lifecycle remains stopped when integrity fails (no execution path)', () => { |
| 149 | // Confirm that a failed integrity check means canServeInference stays false |
| 150 | const lifecycle = createLifecycleState(); |
| 151 | assert.equal(canServeInference(lifecycle), false); |
| 152 | // The orchestrator never calls START on integrity failure, so lifecycle stays stopped |
| 153 | assert.equal(lifecycle.state, LIFECYCLE_STATES.STOPPED); |
| 154 | }); |
| 155 | }); |
| 156 | |
| 157 | describe('E2E: health-check failure — runtime fails to start', () => { |
| 158 | it('session ends with not_ready when health check fails', async () => { |
| 159 | const modelData = Buffer.from('good model data, but runtime fails to start'); |
| 160 | const digest = makeDigest(modelData); |
| 161 | const adapter = makeStubAdapter({ healthShouldPass: false }); |
| 162 | |
| 163 | const result = await simulatePhase5Session({ modelData, expectedDigest: digest, adapter }); |
| 164 | assert.equal(result.success, false); |
| 165 | assert.equal(result.phase, 'not_ready'); |
| 166 | }); |
| 167 | |
| 168 | it('lifecycle is in stopped state after health_fail', () => { |
| 169 | let lifecycle = createLifecycleState(); |
| 170 | lifecycle = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.START).newState; |
| 171 | lifecycle = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.HEALTH_FAIL).newState; |
| 172 | assert.equal(lifecycle.state, LIFECYCLE_STATES.STOPPED); |
| 173 | assert.equal(canServeInference(lifecycle), false); |
| 174 | }); |
| 175 | }); |
| 176 | |
| 177 | describe('E2E: resource exhaustion mid-session', () => { |
| 178 | it('inference rejected when RAM spikes over limit', async () => { |
| 179 | const modelData = Buffer.from('valid model with resource spike'); |
| 180 | const digest = makeDigest(modelData); |
| 181 | // Adapter reports RAM over limit |
| 182 | const obs = { ramBytes: 10e9, vramBytes: 0, cpuPercent: 10 }; // 10GB RAM |
| 183 | const adapter = makeStubAdapter({ healthShouldPass: true, resourceObs: obs }); |
| 184 | const limits = createResourceLimits({ maxRamBytes: 8e9, maxVramBytes: 4e9, maxCpuPercent: 80 }); |
| 185 | |
| 186 | const result = await simulatePhase5Session({ |
| 187 | modelData, expectedDigest: digest, adapter, limits, |
| 188 | }); |
| 189 | assert.equal(result.success, false); |
| 190 | assert.equal(result.phase, 'admission'); |
| 191 | assert.equal(result.reason, RUNTIME_MANAGER_REASONS.RAM_OVER_LIMIT); |
| 192 | }); |
| 193 | }); |
| 194 | |
| 195 | describe('E2E: concurrent request lifecycle', () => { |
| 196 | it('multiple sequential requests cycle in-flight counter correctly', async () => { |
| 197 | let lifecycle = createLifecycleState(); |
| 198 | lifecycle = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.START).newState; |
| 199 | lifecycle = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.HEALTH_OK).newState; |
| 200 | |
| 201 | let admission = createAdmissionState({ maxInFlight: 3, queueBound: 6 }); |
| 202 | const limits = createResourceLimits({ maxRamBytes: 8e9, maxVramBytes: 4e9, maxCpuPercent: 80 }); |
| 203 | const obs = { ramBytes: 1e9, vramBytes: 0, cpuPercent: 10 }; |
| 204 | |
| 205 | // Admit 3 requests |
| 206 | for (let i = 0; i < 3; i++) { |
| 207 | const d = evaluateRuntimeRequest({ lifecycleState: lifecycle, admissionState: admission, resourceObservation: obs, resourceLimits: limits }); |
| 208 | assert.equal(d.ok, true, `request ${i} should be admitted`); |
| 209 | admission = recordInFlight(admission); |
| 210 | } |
| 211 | assert.equal(admission.inFlight, 3); |
| 212 | |
| 213 | // 4th request should be AT_CAPACITY |
| 214 | const d4 = evaluateRuntimeRequest({ lifecycleState: lifecycle, admissionState: admission, resourceObservation: obs, resourceLimits: limits }); |
| 215 | assert.equal(d4.ok, false); |
| 216 | assert.equal(d4.reason, RUNTIME_MANAGER_REASONS.AT_CAPACITY); |
| 217 | |
| 218 | // Complete all 3 in-flight |
| 219 | admission = recordCompletion(admission); |
| 220 | admission = recordCompletion(admission); |
| 221 | admission = recordCompletion(admission); |
| 222 | assert.equal(admission.inFlight, 0); |
| 223 | |
| 224 | // Now a new request should be admitted again |
| 225 | const d5 = evaluateRuntimeRequest({ lifecycleState: lifecycle, admissionState: admission, resourceObservation: obs, resourceLimits: limits }); |
| 226 | assert.equal(d5.ok, true); |
| 227 | }); |
| 228 | }); |
| 229 | |
| 230 | describe('E2E: draining rejects new inference', () => { |
| 231 | it('requests rejected in draining state', () => { |
| 232 | let lifecycle = createLifecycleState(); |
| 233 | lifecycle = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.START).newState; |
| 234 | lifecycle = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.HEALTH_OK).newState; |
| 235 | lifecycle = transitionLifecycle(lifecycle, LIFECYCLE_EVENTS.DRAIN).newState; |
| 236 | assert.equal(lifecycle.state, LIFECYCLE_STATES.DRAINING); |
| 237 | |
| 238 | const admission = createAdmissionState({ maxInFlight: 4, queueBound: 8 }); |
| 239 | const limits = createResourceLimits({ maxRamBytes: 8e9, maxVramBytes: 4e9, maxCpuPercent: 80 }); |
| 240 | const obs = { ramBytes: 1e9, vramBytes: 0, cpuPercent: 10 }; |
| 241 | |
| 242 | const r = evaluateRuntimeRequest({ lifecycleState: lifecycle, admissionState: admission, resourceObservation: obs, resourceLimits: limits }); |
| 243 | assert.equal(r.ok, false); |
| 244 | assert.equal(r.reason, RUNTIME_MANAGER_REASONS.NOT_READY); |
| 245 | }); |
| 246 | }); |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
1 day ago