companion-runtime-manager.mjs file-level

at sha256:3 · View file ↗ · Intel ↗

History
1 files
1 commits
0 hotspots
0 🧊 dead
0 πŸ’₯ blast risk
sha256:4 fix(security): pin patched transitive deps to clear Dependabot moderate… · aaronrene · Jun 11, 2026
1 /**
2 * Companion App β€” Runtime Manager DECISION CORE.
3 *
4 * Phase 4 of the Companion App build plan (feat/companion-app).
5 * See docs/COMPANION-APP-PHASE-4-RUNTIME-MANAGER.md for the accepted design, the adversarial
6 * threat model, and the Phase 5 obligations to spawn the real runtime and perform the real
7 * verified download behind the shared bind gate.
8 *
9 * WHAT THIS MODULE IS
10 * The companion app (Phase 5+) bundles a local AI inference runtime (Ollama / llama.cpp).
11 * This module is the DECISION CORE for managing that runtime's lifecycle:
12 * - Supply-chain integrity: verify a downloaded model file before it is ever executed.
13 * - Lifecycle state machine: stopped β†’ starting β†’ ready β†’ draining β†’ stopped.
14 * - Backpressure / concurrency admission: queue bound, max-in-flight.
15 * - Resource-limit policy: RAM/VRAM/CPU ceilings; reject when over.
16 *
17 * DESIGN CONSTRAINTS (read before modifying β€” these are security invariants):
18 * - PURE. No I/O, no process.env reads, no child_process, no network, no filesystem,
19 * no logging, no clock reads. Every input is passed explicitly. The actual spawn of
20 * Ollama/llama.cpp, the real model download over TLS, and OS resource probing are
21 * deferred to Phase 5 via the INJECTED adapter interface (RuntimeAdapterFns).
22 * - FAIL-CLOSED. Any missing, malformed, ambiguous, or unrecognised input β†’ DENY.
23 * There is no fail-open branch anywhere in this module.
24 * - NO AMBIENT AUTHORITY. The module imports no vault, canister, keychain, or auth module.
25 * The injected adapter interface is typed to model-lifecycle operations only.
26 * - NO SECRET IN OUTPUT. Reason codes are fixed constants. No model path, download URL,
27 * binary path, SHA-256 digest value, or access token ever appears in a reason string,
28 * a return value, or a thrown error.
29 * - SUPPLY-CHAIN INTEGRITY. A model file MUST pass SHA-256 digest + size verification
30 * via the integrity accumulator BEFORE canServeInference returns true for the first time.
31 * Phase 5 is responsible for calling finalize() and gating execution on { ok: true }.
32 *
33 * Hard constraint from docs/COMPANION-APP-DESIGN-AND-AUTHORIZATION-GATE.md Β§4 item 6:
34 * "No ambient authority. The endpoint exposes only model inference; it never exposes vault
35 * read/write, the canister client, or the stored JWT."
36 *
37 * Gate Β§12 Phase 4 obligations (remaining for Phase 5):
38 * - Spawn Ollama/llama.cpp (real child_process) after integrity is verified.
39 * - Perform the real model download over TLS using the injected download adapter.
40 * - Call the OS resource probe (injected stat adapter) to supply ResourceObservation.
41 * - Run the health-check loop and call transitionLifecycle with health_ok/health_fail.
42 * - Set companionAvailable=true in LaneCapabilities ONLY after lifecycle reaches 'ready'.
43 * - Wire the Phase 2 loopback guard BEFORE any model work (Phase 2 boundary stays intact).
44 */
45
46 import crypto from 'node:crypto';
47
48 // ─────────────────────────────────────────────────────────────────────────────
49 // Β§1 β€” Reason codes (frozen constants; never derived from input)
50 // ─────────────────────────────────────────────────────────────────────────────
51
52 /**
53 * Fixed reason codes returned by all decision functions.
54 * These are the ONLY strings that may appear as `reason` values in verdicts or decisions.
55 * No secret, model path, URL, digest, or caller-controlled value ever appears in a reason.
56 * @readonly
57 */
58 export const RUNTIME_MANAGER_REASONS = Object.freeze({
59 // Integrity
60 OK: 'ok',
61 MALFORMED_SPEC: 'malformed_spec',
62 SOURCE_NOT_ALLOWED: 'source_not_allowed',
63 SCHEME_NOT_ALLOWED: 'scheme_not_allowed',
64 SIZE_MISMATCH: 'size_mismatch',
65 DIGEST_MISMATCH: 'digest_mismatch',
66 ACCUMULATOR_FINALIZED: 'accumulator_finalized',
67 ACCUMULATOR_ABORTED: 'accumulator_aborted',
68
69 // Lifecycle
70 INVALID_TRANSITION: 'invalid_transition',
71 NOT_READY: 'not_ready',
72 UNKNOWN_EVENT: 'unknown_event',
73 UNKNOWN_STATE: 'unknown_state',
74
75 // Admission
76 MALFORMED_ADMISSION_STATE: 'malformed_admission_state',
77 AT_CAPACITY: 'at_capacity',
78 QUEUE_FULL: 'queue_full',
79 NO_IN_FLIGHT_TO_COMPLETE: 'no_in_flight_to_complete',
80
81 // Resource limits
82 MALFORMED_LIMITS: 'malformed_limits',
83 MALFORMED_OBSERVATION: 'malformed_observation',
84 RAM_OVER_LIMIT: 'ram_over_limit',
85 VRAM_OVER_LIMIT: 'vram_over_limit',
86 CPU_OVER_LIMIT: 'cpu_over_limit',
87
88 // Top-level request gate
89 MALFORMED_REQUEST_PARAMS: 'malformed_request_params',
90 });
91
92 // ─────────────────────────────────────────────────────────────────────────────
93 // Β§2 β€” Supply-chain integrity verification
94 // ─────────────────────────────────────────────────────────────────────────────
95
96 /**
97 * URL schemes permitted as model download sources.
98 * HTTP is structurally banned β€” a model spec specifying an HTTP source is rejected at
99 * spec-validation time, not at download time. This prevents a misconfigured registry from
100 * silently serving models over a cleartext channel.
101 * @type {ReadonlySet<string>}
102 */
103 export const ALLOWED_SOURCE_SCHEMES = new Set(['https:']);
104
105 /**
106 * The exact byte length of a valid SHA-256 hex digest string (64 lowercase hex chars).
107 * @type {number}
108 */
109 export const SHA256_HEX_LENGTH = 64;
110
111 /**
112 * Validate that a download source URL is (a) a valid URL, (b) uses an allowed scheme
113 * (HTTPS only), and (c) matches the caller-supplied allowlist.
114 *
115 * Fail-closed: any invalid URL, non-HTTPS scheme, empty allowlist, or allowlist miss β†’ deny.
116 * The source URL itself is NEVER copied into the returned reason string.
117 *
118 * @param {unknown} url - The model download URL to validate.
119 * @param {unknown} allowedUrls - Explicit allowlist of permitted base URLs (string[]).
120 * @returns {{ ok: boolean, reason: string }}
121 */
122 export function validateSourceUrl(url, allowedUrls) {
123 if (typeof url !== 'string' || url.length === 0) {
124 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
125 }
126 if (!Array.isArray(allowedUrls) || allowedUrls.length === 0) {
127 return { ok: false, reason: RUNTIME_MANAGER_REASONS.SOURCE_NOT_ALLOWED };
128 }
129 let parsed;
130 try {
131 parsed = new URL(url);
132 } catch {
133 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
134 }
135 if (!ALLOWED_SOURCE_SCHEMES.has(parsed.protocol)) {
136 return { ok: false, reason: RUNTIME_MANAGER_REASONS.SCHEME_NOT_ALLOWED };
137 }
138 // Allowlist match: the source URL must start with one of the allowed base URL strings.
139 // We normalise to lowercase and strip trailing slashes for comparison.
140 const normalised = url.toLowerCase();
141 const matched = allowedUrls.some((allowed) => {
142 if (typeof allowed !== 'string' || allowed.length === 0) return false;
143 return normalised.startsWith(allowed.toLowerCase().replace(/\/$/, ''));
144 });
145 if (!matched) {
146 return { ok: false, reason: RUNTIME_MANAGER_REASONS.SOURCE_NOT_ALLOWED };
147 }
148 return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
149 }
150
151 /**
152 * Validate a model integrity spec (expectedDigest + expectedSizeBytes) without performing
153 * any I/O. Called at model-spec registration time to catch malformed registry entries early.
154 *
155 * @param {unknown} expectedDigest - Lowercase SHA-256 hex string (64 chars exactly).
156 * @param {unknown} expectedSizeBytes - Positive integer byte count for the model file.
157 * @returns {{ ok: boolean, reason: string }}
158 */
159 export function validateIntegritySpec(expectedDigest, expectedSizeBytes) {
160 if (
161 typeof expectedDigest !== 'string' ||
162 expectedDigest.length !== SHA256_HEX_LENGTH ||
163 !/^[0-9a-f]{64}$/.test(expectedDigest)
164 ) {
165 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
166 }
167 if (
168 !Number.isInteger(expectedSizeBytes) ||
169 expectedSizeBytes <= 0
170 ) {
171 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
172 }
173 return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
174 }
175
176 /**
177 * @typedef {{ ok: boolean, reason: string }} IntegrityVerdict
178 */
179
180 /**
181 * Create a streaming integrity accumulator for a model download.
182 *
183 * The accumulator feeds every downloaded byte into a SHA-256 hash and tracks received byte
184 * count. Call `update(chunk)` for each received chunk, then `finalize()` after the download
185 * completes. `finalize()` uses constant-time comparison for the digest (preventing a timing
186 * oracle on the expected digest) and an exact numeric equality check for size.
187 *
188 * SECURITY INVARIANTS:
189 * - `finalize()` MUST be called (and return { ok: true }) before the model is executed.
190 * - Once `finalize()` is called (or the accumulator is aborted), further calls to
191 * `update()` and `finalize()` return a fixed failure reason β€” the accumulator is a
192 * single-use object.
193 * - Neither the expected digest, the source URL, nor any computed partial digest appears
194 * in any returned reason string (the reasons are fixed RUNTIME_MANAGER_REASONS constants).
195 *
196 * PHASE 5 OBLIGATION:
197 * Phase 5's download adapter MUST:
198 * 1. Create the accumulator BEFORE starting the download.
199 * 2. Feed every received byte to `update()` (no skipping, no out-of-order).
200 * 3. Call `finalize()` after the download stream ends.
201 * 4. If `finalize().ok` is false, delete the downloaded file and REFUSE to transition
202 * the lifecycle out of 'starting' (call `transitionLifecycle(state, 'health_fail')`).
203 * 5. Only if `finalize().ok` is true may Phase 5 proceed to the health-check round-trip.
204 *
205 * @param {{
206 * expectedDigest: string,
207 * expectedSizeBytes: number,
208 * sourceUrl: string,
209 * allowedSourceUrls: string[],
210 * }} params
211 * @returns {{
212 * update: (chunk: Uint8Array) => void,
213 * finalize: () => IntegrityVerdict,
214 * getReceivedBytes: () => number,
215 * abort: () => void,
216 * }}
217 * @throws {TypeError} when the spec or source URL fails validation (fail at creation time).
218 */
219 export function createIntegrityAccumulator({ expectedDigest, expectedSizeBytes, sourceUrl, allowedSourceUrls }) {
220 const specCheck = validateIntegritySpec(expectedDigest, expectedSizeBytes);
221 if (!specCheck.ok) throw new TypeError(`createIntegrityAccumulator: ${specCheck.reason}`);
222
223 const srcCheck = validateSourceUrl(sourceUrl, allowedSourceUrls);
224 if (!srcCheck.ok) throw new TypeError(`createIntegrityAccumulator: ${srcCheck.reason}`);
225
226 const hasher = crypto.createHash('sha256');
227 let receivedBytes = 0;
228 let finalized = false;
229 let aborted = false;
230
231 return {
232 /**
233 * Feed a chunk of downloaded bytes into the accumulator.
234 * Must be called in order, for every byte, with no skipping.
235 * @param {Uint8Array} chunk
236 */
237 update(chunk) {
238 if (finalized) return; // silently ignore β€” finalize already called
239 if (aborted) return; // silently ignore β€” already aborted
240 if (!(chunk instanceof Uint8Array) && !Buffer.isBuffer(chunk)) {
241 aborted = true;
242 return;
243 }
244 hasher.update(chunk);
245 receivedBytes += chunk.length;
246 },
247
248 /**
249 * Finalize the integrity check. Returns an IntegrityVerdict with ok=true only when
250 * the received byte count matches expectedSizeBytes AND the SHA-256 digest matches
251 * expectedDigest (constant-time comparison).
252 *
253 * After finalize() is called (regardless of result), the accumulator is sealed β€” further
254 * updates are no-ops and further finalize() calls return ACCUMULATOR_FINALIZED.
255 * @returns {IntegrityVerdict}
256 */
257 finalize() {
258 if (aborted) return { ok: false, reason: RUNTIME_MANAGER_REASONS.ACCUMULATOR_ABORTED };
259 if (finalized) return { ok: false, reason: RUNTIME_MANAGER_REASONS.ACCUMULATOR_FINALIZED };
260 finalized = true;
261
262 // Size check first (cheap, no timing oracle concern).
263 if (receivedBytes !== expectedSizeBytes) {
264 return { ok: false, reason: RUNTIME_MANAGER_REASONS.SIZE_MISMATCH };
265 }
266
267 // Digest check β€” constant-time to prevent a timing oracle on the expected digest.
268 // Hash both sides to equal-length 32-byte buffers before timingSafeEqual.
269 const computedHex = hasher.digest('hex'); // lowercase, 64 chars
270 const da = crypto.createHash('sha256').update(computedHex, 'utf8').digest();
271 const db = crypto.createHash('sha256').update(expectedDigest, 'utf8').digest();
272 if (!crypto.timingSafeEqual(da, db)) {
273 return { ok: false, reason: RUNTIME_MANAGER_REASONS.DIGEST_MISMATCH };
274 }
275
276 return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
277 },
278
279 /**
280 * Return the number of bytes received so far (for progress reporting by Phase 5).
281 * @returns {number}
282 */
283 getReceivedBytes() {
284 return receivedBytes;
285 },
286
287 /**
288 * Abort the accumulator (e.g., download cancelled or error mid-stream).
289 * After abort(), finalize() returns ACCUMULATOR_ABORTED.
290 */
291 abort() {
292 aborted = true;
293 },
294 };
295 }
296
297 /**
298 * Verify an already-downloaded model file held entirely in memory.
299 * Suitable for small models or testing. For large models, Phase 5 should use
300 * createIntegrityAccumulator with streaming to avoid loading the full file into RAM.
301 *
302 * SECURITY: the file data is never returned or logged; only the verdict { ok, reason }.
303 *
304 * @param {{
305 * fileData: Uint8Array,
306 * expectedDigest: string,
307 * expectedSizeBytes: number,
308 * sourceUrl: string,
309 * allowedSourceUrls: string[],
310 * }} params
311 * @returns {IntegrityVerdict}
312 */
313 export function verifyModelBytes({ fileData, expectedDigest, expectedSizeBytes, sourceUrl, allowedSourceUrls }) {
314 const srcCheck = validateSourceUrl(sourceUrl, allowedSourceUrls);
315 if (!srcCheck.ok) return { ok: false, reason: srcCheck.reason };
316
317 const specCheck = validateIntegritySpec(expectedDigest, expectedSizeBytes);
318 if (!specCheck.ok) return { ok: false, reason: specCheck.reason };
319
320 if (!(fileData instanceof Uint8Array) && !Buffer.isBuffer(fileData)) {
321 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
322 }
323
324 if (fileData.length !== expectedSizeBytes) {
325 return { ok: false, reason: RUNTIME_MANAGER_REASONS.SIZE_MISMATCH };
326 }
327
328 const computedHex = crypto.createHash('sha256').update(fileData).digest('hex');
329 const da = crypto.createHash('sha256').update(computedHex, 'utf8').digest();
330 const db = crypto.createHash('sha256').update(expectedDigest, 'utf8').digest();
331 if (!crypto.timingSafeEqual(da, db)) {
332 return { ok: false, reason: RUNTIME_MANAGER_REASONS.DIGEST_MISMATCH };
333 }
334
335 return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
336 }
337
338 // ─────────────────────────────────────────────────────────────────────────────
339 // Β§3 β€” Lifecycle state machine
340 // ─────────────────────────────────────────────────────────────────────────────
341
342 /**
343 * Valid lifecycle states for the bundled runtime process.
344 * Inference is served ONLY in the 'ready' state (canServeInference enforces this).
345 * @readonly
346 */
347 export const LIFECYCLE_STATES = Object.freeze({
348 STOPPED: 'stopped',
349 STARTING: 'starting',
350 READY: 'ready',
351 DRAINING: 'draining',
352 });
353
354 /**
355 * Lifecycle event names that drive state transitions.
356 * @readonly
357 */
358 export const LIFECYCLE_EVENTS = Object.freeze({
359 /** Signal: begin cold-start. stopped β†’ starting. */
360 START: 'start',
361 /** Signal: health-check passed after cold-start. starting β†’ ready. */
362 HEALTH_OK: 'health_ok',
363 /** Signal: health-check failed during cold-start. starting β†’ stopped. */
364 HEALTH_FAIL: 'health_fail',
365 /** Signal: begin graceful drain. ready β†’ draining. */
366 DRAIN: 'drain',
367 /** Signal: drain complete / process exited. draining β†’ stopped. */
368 STOPPED: 'stopped',
369 });
370
371 /**
372 * @typedef {{ state: string }} LifecycleState
373 */
374
375 /**
376 * Allowed state transitions: Map<fromState, Set<event→toState>>.
377 * Any (fromState, event) pair not in this map is an invalid transition β†’ fail-closed.
378 *
379 * @type {ReadonlyMap<string, ReadonlyMap<string, string>>}
380 */
381 const LIFECYCLE_TRANSITIONS = new Map([
382 [
383 LIFECYCLE_STATES.STOPPED,
384 new Map([[LIFECYCLE_EVENTS.START, LIFECYCLE_STATES.STARTING]]),
385 ],
386 [
387 LIFECYCLE_STATES.STARTING,
388 new Map([
389 [LIFECYCLE_EVENTS.HEALTH_OK, LIFECYCLE_STATES.READY],
390 [LIFECYCLE_EVENTS.HEALTH_FAIL, LIFECYCLE_STATES.STOPPED],
391 ]),
392 ],
393 [
394 LIFECYCLE_STATES.READY,
395 new Map([[LIFECYCLE_EVENTS.DRAIN, LIFECYCLE_STATES.DRAINING]]),
396 ],
397 [
398 LIFECYCLE_STATES.DRAINING,
399 new Map([[LIFECYCLE_EVENTS.STOPPED, LIFECYCLE_STATES.STOPPED]]),
400 ],
401 ]);
402
403 /**
404 * Create the initial lifecycle state (the runtime always starts as stopped).
405 * @returns {LifecycleState}
406 */
407 export function createLifecycleState() {
408 return { state: LIFECYCLE_STATES.STOPPED };
409 }
410
411 /**
412 * Attempt a lifecycle state transition.
413 *
414 * Pure: the input `state` is never mutated; a new state object is returned on success.
415 * Fail-closed: any unrecognised state, unknown event, or invalid (from, event) pair
416 * returns { ok: false, reason } and the current state is unchanged.
417 *
418 * SECURITY: The lifecycle machine is the gate that prevents inference from being served
419 * in a non-ready state (e.g. still starting, draining, or stopped). canServeInference
420 * reads `state.state === LIFECYCLE_STATES.READY` β€” this transition function ensures the
421 * only path to 'ready' is via a successful health_ok after a start.
422 *
423 * @param {LifecycleState} currentState
424 * @param {string} event - One of LIFECYCLE_EVENTS values.
425 * @returns {{ ok: boolean, newState: LifecycleState, reason?: string }}
426 */
427 export function transitionLifecycle(currentState, event) {
428 if (!currentState || typeof currentState !== 'object' || typeof currentState.state !== 'string') {
429 return { ok: false, newState: createLifecycleState(), reason: RUNTIME_MANAGER_REASONS.UNKNOWN_STATE };
430 }
431 if (typeof event !== 'string' || event.length === 0) {
432 return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.UNKNOWN_EVENT };
433 }
434 const fromMap = LIFECYCLE_TRANSITIONS.get(currentState.state);
435 if (!fromMap) {
436 return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.UNKNOWN_STATE };
437 }
438 if (!fromMap.has(event)) {
439 return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.INVALID_TRANSITION };
440 }
441 const toState = fromMap.get(event);
442 return { ok: true, newState: { state: toState } };
443 }
444
445 /**
446 * Returns true ONLY when the runtime is in the 'ready' state and can safely serve inference.
447 *
448 * SECURITY INVARIANT: inference callers MUST call this function before routing to the runtime.
449 * The function is intentionally simple and branchless (no ambiguity) to minimise the risk of
450 * an incorrect "truthy" result from a malformed state object.
451 *
452 * @param {LifecycleState} lifecycleState
453 * @returns {boolean}
454 */
455 export function canServeInference(lifecycleState) {
456 if (!lifecycleState || typeof lifecycleState !== 'object') return false;
457 return lifecycleState.state === LIFECYCLE_STATES.READY;
458 }
459
460 // ─────────────────────────────────────────────────────────────────────────────
461 // Β§4 β€” Backpressure / concurrency admission
462 // ─────────────────────────────────────────────────────────────────────────────
463
464 /**
465 * @typedef {Object} AdmissionState
466 * @property {number} maxInFlight - Maximum concurrent inference requests allowed.
467 * @property {number} queueBound - Maximum requests that may be queued (pending admission).
468 * @property {number} inFlight - Current count of admitted (in-progress) requests.
469 * @property {number} queued - Current count of queued (pending) requests.
470 */
471
472 /**
473 * Create a fresh admission state with the given concurrency limits.
474 *
475 * Fail-closed: maxInFlight and queueBound must be positive integers.
476 *
477 * @param {{ maxInFlight: number, queueBound: number }} params
478 * @returns {AdmissionState}
479 * @throws {TypeError} on invalid parameters.
480 */
481 export function createAdmissionState({ maxInFlight, queueBound }) {
482 if (!Number.isInteger(maxInFlight) || maxInFlight <= 0) {
483 throw new TypeError('createAdmissionState: maxInFlight must be a positive integer');
484 }
485 if (!Number.isInteger(queueBound) || queueBound <= 0) {
486 throw new TypeError('createAdmissionState: queueBound must be a positive integer');
487 }
488 return { maxInFlight, queueBound, inFlight: 0, queued: 0 };
489 }
490
491 /**
492 * Check whether a new inference request may be admitted or queued.
493 *
494 * Returns:
495 * { ok: true, reason: 'ok' } β€” request may proceed immediately (in-flight slot free).
496 * { ok: false, reason: 'at_capacity'} β€” all in-flight slots full; request must queue.
497 * { ok: false, reason: 'queue_full' } β€” both in-flight and queue are full; request is rejected.
498 *
499 * Callers interpret at_capacity as "enqueue and wait" and queue_full as "return busy to caller."
500 * Phase 5 orchestrates the queue and calls recordInFlight when a slot opens.
501 *
502 * Fail-closed: a malformed admissionState returns queue_full (cannot prove capacity exists).
503 *
504 * @param {AdmissionState} state
505 * @returns {{ ok: boolean, reason: string }}
506 */
507 export function evaluateAdmission(state) {
508 if (
509 !state ||
510 typeof state !== 'object' ||
511 !Number.isInteger(state.maxInFlight) || state.maxInFlight <= 0 ||
512 !Number.isInteger(state.queueBound) || state.queueBound <= 0 ||
513 !Number.isInteger(state.inFlight) || state.inFlight < 0 ||
514 !Number.isInteger(state.queued) || state.queued < 0
515 ) {
516 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_ADMISSION_STATE };
517 }
518
519 if (state.inFlight < state.maxInFlight) {
520 return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
521 }
522
523 if (state.queued < state.queueBound) {
524 return { ok: false, reason: RUNTIME_MANAGER_REASONS.AT_CAPACITY };
525 }
526
527 return { ok: false, reason: RUNTIME_MANAGER_REASONS.QUEUE_FULL };
528 }
529
530 /**
531 * Record that a new request has been admitted to in-flight (granted a concurrency slot).
532 * Returns a NEW admission state (pure; the input is not mutated).
533 *
534 * Phase 5 calls this when it is about to dispatch the request to the runtime.
535 * It is the caller's responsibility to call recordCompletion when the request finishes.
536 *
537 * @param {AdmissionState} state
538 * @returns {AdmissionState}
539 * @throws {TypeError} on malformed state.
540 */
541 export function recordInFlight(state) {
542 if (!state || typeof state !== 'object' || !Number.isInteger(state.inFlight)) {
543 throw new TypeError('recordInFlight: state is malformed');
544 }
545 return { ...state, inFlight: state.inFlight + 1 };
546 }
547
548 /**
549 * Record that an in-flight request has completed (releases the concurrency slot).
550 * Returns a NEW admission state (pure; the input is not mutated).
551 *
552 * @param {AdmissionState} state
553 * @returns {AdmissionState}
554 * @throws {TypeError} on malformed state or attempt to complete with no in-flight requests.
555 */
556 export function recordCompletion(state) {
557 if (!state || typeof state !== 'object' || !Number.isInteger(state.inFlight)) {
558 throw new TypeError('recordCompletion: state is malformed');
559 }
560 if (state.inFlight <= 0) {
561 throw new TypeError('recordCompletion: no in-flight requests to complete');
562 }
563 return { ...state, inFlight: state.inFlight - 1 };
564 }
565
566 /**
567 * Record that a new request has been added to the queue (not yet admitted to in-flight).
568 * Returns a NEW admission state (pure; the input is not mutated).
569 *
570 * @param {AdmissionState} state
571 * @returns {AdmissionState}
572 * @throws {TypeError} on malformed state.
573 */
574 export function recordQueued(state) {
575 if (!state || typeof state !== 'object' || !Number.isInteger(state.queued)) {
576 throw new TypeError('recordQueued: state is malformed');
577 }
578 return { ...state, queued: state.queued + 1 };
579 }
580
581 /**
582 * Record that a queued request has been dequeued (either admitted or cancelled).
583 * Returns a NEW admission state (pure; the input is not mutated).
584 *
585 * @param {AdmissionState} state
586 * @returns {AdmissionState}
587 * @throws {TypeError} on malformed state or attempt to dequeue with no queued requests.
588 */
589 export function recordDequeued(state) {
590 if (!state || typeof state !== 'object' || !Number.isInteger(state.queued)) {
591 throw new TypeError('recordDequeued: state is malformed');
592 }
593 if (state.queued <= 0) {
594 throw new TypeError('recordDequeued: no queued requests to dequeue');
595 }
596 return { ...state, queued: state.queued - 1 };
597 }
598
599 // ─────────────────────────────────────────────────────────────────────────────
600 // Β§5 β€” Resource-limit policy
601 // ─────────────────────────────────────────────────────────────────────────────
602
603 /**
604 * @typedef {Object} ResourceLimits
605 * @property {number} maxRamBytes - Maximum RAM usage in bytes (> 0).
606 * @property {number} maxVramBytes - Maximum VRAM usage in bytes (> 0; use Infinity if no GPU).
607 * @property {number} maxCpuPercent - Maximum CPU usage 0–100 (exclusive upper bound).
608 */
609
610 /**
611 * @typedef {Object} ResourceObservation
612 * @property {number} ramBytes - Current RAM used by the runtime process in bytes.
613 * @property {number} vramBytes - Current VRAM used (0 if no GPU).
614 * @property {number} cpuPercent - Current CPU percent (0–100).
615 */
616
617 /**
618 * Create and validate resource limits.
619 * Fail-closed: all fields must be positive finite numbers; maxCpuPercent must be 0–100.
620 *
621 * @param {{ maxRamBytes: number, maxVramBytes: number, maxCpuPercent: number }} params
622 * @returns {ResourceLimits}
623 * @throws {TypeError} on invalid parameters.
624 */
625 export function createResourceLimits({ maxRamBytes, maxVramBytes, maxCpuPercent }) {
626 if (!Number.isFinite(maxRamBytes) || maxRamBytes <= 0) {
627 throw new TypeError('createResourceLimits: maxRamBytes must be a positive finite number');
628 }
629 if (!Number.isFinite(maxVramBytes) || maxVramBytes <= 0) {
630 throw new TypeError('createResourceLimits: maxVramBytes must be a positive finite number');
631 }
632 if (!Number.isFinite(maxCpuPercent) || maxCpuPercent <= 0 || maxCpuPercent > 100) {
633 throw new TypeError('createResourceLimits: maxCpuPercent must be in (0, 100]');
634 }
635 return { maxRamBytes, maxVramBytes, maxCpuPercent };
636 }
637
638 /**
639 * Evaluate whether the current resource observation is within the configured limits.
640 * Returns the FIRST limit violation found (RAM before VRAM before CPU) or ok.
641 *
642 * Fail-closed: malformed limits or observation β†’ MALFORMED_LIMITS / MALFORMED_OBSERVATION.
643 * The actual numeric values of the observation are NEVER returned in the reason string.
644 *
645 * @param {ResourceObservation} observation - Current runtime resource usage (from Phase 5 stat adapter).
646 * @param {ResourceLimits} limits - Configured ceilings.
647 * @returns {{ ok: boolean, reason: string }}
648 */
649 export function evaluateResourceLimits(observation, limits) {
650 if (
651 !limits ||
652 typeof limits !== 'object' ||
653 !Number.isFinite(limits.maxRamBytes) || limits.maxRamBytes <= 0 ||
654 !Number.isFinite(limits.maxVramBytes) || limits.maxVramBytes <= 0 ||
655 !Number.isFinite(limits.maxCpuPercent) || limits.maxCpuPercent <= 0
656 ) {
657 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_LIMITS };
658 }
659 if (
660 !observation ||
661 typeof observation !== 'object' ||
662 !Number.isFinite(observation.ramBytes) || observation.ramBytes < 0 ||
663 !Number.isFinite(observation.vramBytes) || observation.vramBytes < 0 ||
664 !Number.isFinite(observation.cpuPercent) || observation.cpuPercent < 0
665 ) {
666 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_OBSERVATION };
667 }
668
669 if (observation.ramBytes > limits.maxRamBytes) {
670 return { ok: false, reason: RUNTIME_MANAGER_REASONS.RAM_OVER_LIMIT };
671 }
672 if (observation.vramBytes > limits.maxVramBytes) {
673 return { ok: false, reason: RUNTIME_MANAGER_REASONS.VRAM_OVER_LIMIT };
674 }
675 if (observation.cpuPercent > limits.maxCpuPercent) {
676 return { ok: false, reason: RUNTIME_MANAGER_REASONS.CPU_OVER_LIMIT };
677 }
678
679 return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
680 }
681
682 // ─────────────────────────────────────────────────────────────────────────────
683 // Β§6 β€” Top-level runtime request gate
684 // ─────────────────────────────────────────────────────────────────────────────
685
686 /**
687 * @typedef {Object} RuntimeDecision
688 * @property {boolean} ok - true only when all gates pass (inference may proceed).
689 * @property {string} reason - A RUNTIME_MANAGER_REASONS constant. Never a secret.
690 */
691
692 /**
693 * Top-level admission decision for a single inference request against the bundled runtime.
694 *
695 * Checks, in order:
696 * 1. Lifecycle gate: runtime must be in 'ready' state. Non-ready β†’ NOT_READY.
697 * 2. Admission gate: in-flight concurrency and queue bounds. AT_CAPACITY / QUEUE_FULL.
698 * 3. Resource-limit gate: RAM/VRAM/CPU ceilings. Over limit β†’ reject.
699 *
700 * ALL THREE gates must pass for the request to be allowed.
701 * Fail-closed: malformed parameters β†’ MALFORMED_REQUEST_PARAMS.
702 *
703 * SECURITY PROPERTIES:
704 * - Inference is NEVER allowed in a non-ready lifecycle state (no timing window between
705 * states that could allow a request through a transitional state).
706 * - Backpressure trips at the exact configured bound β€” no overflow possible.
707 * - Resource limits are enforced BEFORE the request reaches the runtime, bounding OOM risk.
708 * - No secret, path, URL, or numeric observation value appears in any reason string.
709 * - This function has no side effects β€” the caller must call recordInFlight on the admission
710 * state when it decides to proceed (pure: decision is separated from state mutation).
711 *
712 * @param {{
713 * lifecycleState: LifecycleState,
714 * admissionState: AdmissionState,
715 * resourceObservation: ResourceObservation,
716 * resourceLimits: ResourceLimits,
717 * }} params
718 * @returns {RuntimeDecision}
719 */
720 export function evaluateRuntimeRequest(params) {
721 try {
722 const { lifecycleState, admissionState, resourceObservation, resourceLimits } = params ?? {};
723 if (!lifecycleState || !admissionState || !resourceObservation || !resourceLimits) {
724 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_REQUEST_PARAMS };
725 }
726
727 // 1. Lifecycle gate.
728 if (!canServeInference(lifecycleState)) {
729 return { ok: false, reason: RUNTIME_MANAGER_REASONS.NOT_READY };
730 }
731
732 // 2. Admission gate.
733 const admission = evaluateAdmission(admissionState);
734 if (!admission.ok) {
735 return { ok: false, reason: admission.reason };
736 }
737
738 // 3. Resource-limit gate.
739 const resources = evaluateResourceLimits(resourceObservation, resourceLimits);
740 if (!resources.ok) {
741 return { ok: false, reason: resources.reason };
742 }
743
744 return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
745 } catch {
746 // Defense in depth: never let an unexpected error carry input data outward.
747 return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_REQUEST_PARAMS };
748 }
749 }
750
751 // ─────────────────────────────────────────────────────────────────────────────
752 // Β§7 β€” Injected adapter interface (type documentation only; no implementation)
753 // ─────────────────────────────────────────────────────────────────────────────
754
755 /**
756 * The adapter interface that Phase 5 MUST supply to connect the decision core to the real
757 * Ollama/llama.cpp runtime. The pure module in this file imports NONE of these β€” they are
758 * passed explicitly by Phase 5's binding layer.
759 *
760 * SECURITY INVARIANT: the adapter must not expose vault, canister, keychain, or JWT handles.
761 * It is scoped exclusively to model-lifecycle operations (spawn, download, health, resource probe).
762 *
763 * @typedef {Object} RuntimeAdapterFns
764 * @property {(opts: SpawnOpts) => Promise<SpawnHandle>} spawn
765 * Spawn the Ollama/llama.cpp process. Must bind to 127.0.0.1 only (Phase 2 Β§4.5).
766 * Called ONLY after integrity verification passes (finalize().ok === true).
767 * @property {(url: string, onChunk: (chunk: Uint8Array) => void) => Promise<void>} download
768 * Download a model file over TLS, calling onChunk for each received chunk.
769 * The URL MUST be one that passed validateSourceUrl. Phase 5 feeds chunks to the
770 * integrity accumulator via the onChunk callback.
771 * @property {(handle: SpawnHandle) => Promise<boolean>} healthCheck
772 * Return true if the runtime responds correctly to a health probe (OpenAI-compat
773 * GET /v1/models or Ollama GET /api/tags). Phase 5 drives the health-check retry loop
774 * and calls transitionLifecycle(state, HEALTH_OK | HEALTH_FAIL).
775 * @property {() => Promise<ResourceObservation>} statResources
776 * Return the current RAM/VRAM/CPU usage for the runtime process. Called before each
777 * inference request; result is passed to evaluateResourceLimits.
778 */
779
780 /**
781 * @typedef {Object} SpawnOpts
782 * @property {string} binaryPath - Absolute path to the Ollama/llama.cpp binary.
783 * @property {string} modelPath - Absolute path to the verified model file.
784 * @property {number} port - Ephemeral port allocated by Phase 5 (non-predictable).
785 * @property {number} maxRamBytes - Memory ceiling to pass to the runtime's CLI flags.
786 */
787
788 /**
789 * @typedef {Object} SpawnHandle
790 * @property {number} pid - Process ID of the spawned runtime.
791 * @property {() => Promise<void>} kill - Gracefully shut down the runtime.
792 */