aaronrene/knowtation — blame/sha256:3/companion-runtime-manager.mjs — MuseHub

aaronrene / knowtation public

1 files

1 commits

0 hotspots

0 🧊 dead

0 💥 blast risk

sha256:4 fix(security): pin patched transitive deps to clear Dependabot moderate… · aaronrene · Jun 11, 2026

1	/**
2	* Companion App — Runtime Manager DECISION CORE.
3	*
4	* Phase 4 of the Companion App build plan (feat/companion-app).
5	* See docs/COMPANION-APP-PHASE-4-RUNTIME-MANAGER.md for the accepted design, the adversarial
6	* threat model, and the Phase 5 obligations to spawn the real runtime and perform the real
7	* verified download behind the shared bind gate.
8	*
9	* WHAT THIS MODULE IS
10	* The companion app (Phase 5+) bundles a local AI inference runtime (Ollama / llama.cpp).
11	* This module is the DECISION CORE for managing that runtime's lifecycle:
12	* - Supply-chain integrity: verify a downloaded model file before it is ever executed.
13	* - Lifecycle state machine: stopped → starting → ready → draining → stopped.
14	* - Backpressure / concurrency admission: queue bound, max-in-flight.
15	* - Resource-limit policy: RAM/VRAM/CPU ceilings; reject when over.
16	*
17	* DESIGN CONSTRAINTS (read before modifying — these are security invariants):
18	* - PURE. No I/O, no process.env reads, no child_process, no network, no filesystem,
19	* no logging, no clock reads. Every input is passed explicitly. The actual spawn of
20	* Ollama/llama.cpp, the real model download over TLS, and OS resource probing are
21	* deferred to Phase 5 via the INJECTED adapter interface (RuntimeAdapterFns).
22	* - FAIL-CLOSED. Any missing, malformed, ambiguous, or unrecognised input → DENY.
23	* There is no fail-open branch anywhere in this module.
24	* - NO AMBIENT AUTHORITY. The module imports no vault, canister, keychain, or auth module.
25	* The injected adapter interface is typed to model-lifecycle operations only.
26	* - NO SECRET IN OUTPUT. Reason codes are fixed constants. No model path, download URL,
27	* binary path, SHA-256 digest value, or access token ever appears in a reason string,
28	* a return value, or a thrown error.
29	* - SUPPLY-CHAIN INTEGRITY. A model file MUST pass SHA-256 digest + size verification
30	* via the integrity accumulator BEFORE canServeInference returns true for the first time.
31	* Phase 5 is responsible for calling finalize() and gating execution on { ok: true }.
32	*
33	* Hard constraint from docs/COMPANION-APP-DESIGN-AND-AUTHORIZATION-GATE.md §4 item 6:
34	* "No ambient authority. The endpoint exposes only model inference; it never exposes vault
35	* read/write, the canister client, or the stored JWT."
36	*
37	* Gate §12 Phase 4 obligations (remaining for Phase 5):
38	* - Spawn Ollama/llama.cpp (real child_process) after integrity is verified.
39	* - Perform the real model download over TLS using the injected download adapter.
40	* - Call the OS resource probe (injected stat adapter) to supply ResourceObservation.
41	* - Run the health-check loop and call transitionLifecycle with health_ok/health_fail.
42	* - Set companionAvailable=true in LaneCapabilities ONLY after lifecycle reaches 'ready'.
43	* - Wire the Phase 2 loopback guard BEFORE any model work (Phase 2 boundary stays intact).
44	*/
45
46	import crypto from 'node:crypto';
47
48	// ─────────────────────────────────────────────────────────────────────────────
49	// §1 — Reason codes (frozen constants; never derived from input)
50	// ─────────────────────────────────────────────────────────────────────────────
51
52	/**
53	* Fixed reason codes returned by all decision functions.
54	* These are the ONLY strings that may appear as `reason` values in verdicts or decisions.
55	* No secret, model path, URL, digest, or caller-controlled value ever appears in a reason.
56	* @readonly
57	*/
58	export const RUNTIME_MANAGER_REASONS = Object.freeze({
59	// Integrity
60	OK: 'ok',
61	MALFORMED_SPEC: 'malformed_spec',
62	SOURCE_NOT_ALLOWED: 'source_not_allowed',
63	SCHEME_NOT_ALLOWED: 'scheme_not_allowed',
64	SIZE_MISMATCH: 'size_mismatch',
65	DIGEST_MISMATCH: 'digest_mismatch',
66	ACCUMULATOR_FINALIZED: 'accumulator_finalized',
67	ACCUMULATOR_ABORTED: 'accumulator_aborted',
68
69	// Lifecycle
70	INVALID_TRANSITION: 'invalid_transition',
71	NOT_READY: 'not_ready',
72	UNKNOWN_EVENT: 'unknown_event',
73	UNKNOWN_STATE: 'unknown_state',
74
75	// Admission
76	MALFORMED_ADMISSION_STATE: 'malformed_admission_state',
77	AT_CAPACITY: 'at_capacity',
78	QUEUE_FULL: 'queue_full',
79	NO_IN_FLIGHT_TO_COMPLETE: 'no_in_flight_to_complete',
80
81	// Resource limits
82	MALFORMED_LIMITS: 'malformed_limits',
83	MALFORMED_OBSERVATION: 'malformed_observation',
84	RAM_OVER_LIMIT: 'ram_over_limit',
85	VRAM_OVER_LIMIT: 'vram_over_limit',
86	CPU_OVER_LIMIT: 'cpu_over_limit',
87
88	// Top-level request gate
89	MALFORMED_REQUEST_PARAMS: 'malformed_request_params',
90	});
91
92	// ─────────────────────────────────────────────────────────────────────────────
93	// §2 — Supply-chain integrity verification
94	// ─────────────────────────────────────────────────────────────────────────────
95
96	/**
97	* URL schemes permitted as model download sources.
98	* HTTP is structurally banned — a model spec specifying an HTTP source is rejected at
99	* spec-validation time, not at download time. This prevents a misconfigured registry from
100	* silently serving models over a cleartext channel.
101	* @type {ReadonlySet<string>}
102	*/
103	export const ALLOWED_SOURCE_SCHEMES = new Set(['https:']);
104
105	/**
106	* The exact byte length of a valid SHA-256 hex digest string (64 lowercase hex chars).
107	* @type {number}
108	*/
109	export const SHA256_HEX_LENGTH = 64;
110
111	/**
112	* Validate that a download source URL is (a) a valid URL, (b) uses an allowed scheme
113	* (HTTPS only), and (c) matches the caller-supplied allowlist.
114	*
115	* Fail-closed: any invalid URL, non-HTTPS scheme, empty allowlist, or allowlist miss → deny.
116	* The source URL itself is NEVER copied into the returned reason string.
117	*
118	* @param {unknown} url - The model download URL to validate.
119	* @param {unknown} allowedUrls - Explicit allowlist of permitted base URLs (string[]).
120	* @returns {{ ok: boolean, reason: string }}
121	*/
122	export function validateSourceUrl(url, allowedUrls) {
123	if (typeof url !== 'string' \|\| url.length === 0) {
124	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
125	}
126	if (!Array.isArray(allowedUrls) \|\| allowedUrls.length === 0) {
127	return { ok: false, reason: RUNTIME_MANAGER_REASONS.SOURCE_NOT_ALLOWED };
128	}
129	let parsed;
130	try {
131	parsed = new URL(url);
132	} catch {
133	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
134	}
135	if (!ALLOWED_SOURCE_SCHEMES.has(parsed.protocol)) {
136	return { ok: false, reason: RUNTIME_MANAGER_REASONS.SCHEME_NOT_ALLOWED };
137	}
138	// Allowlist match: the source URL must start with one of the allowed base URL strings.
139	// We normalise to lowercase and strip trailing slashes for comparison.
140	const normalised = url.toLowerCase();
141	const matched = allowedUrls.some((allowed) => {
142	if (typeof allowed !== 'string' \|\| allowed.length === 0) return false;
143	return normalised.startsWith(allowed.toLowerCase().replace(/\/$/, ''));
144	});
145	if (!matched) {
146	return { ok: false, reason: RUNTIME_MANAGER_REASONS.SOURCE_NOT_ALLOWED };
147	}
148	return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
149	}
150
151	/**
152	* Validate a model integrity spec (expectedDigest + expectedSizeBytes) without performing
153	* any I/O. Called at model-spec registration time to catch malformed registry entries early.
154	*
155	* @param {unknown} expectedDigest - Lowercase SHA-256 hex string (64 chars exactly).
156	* @param {unknown} expectedSizeBytes - Positive integer byte count for the model file.
157	* @returns {{ ok: boolean, reason: string }}
158	*/
159	export function validateIntegritySpec(expectedDigest, expectedSizeBytes) {
160	if (
161	typeof expectedDigest !== 'string' \|\|
162	expectedDigest.length !== SHA256_HEX_LENGTH \|\|
163	!/^[0-9a-f]{64}$/.test(expectedDigest)
164	) {
165	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
166	}
167	if (
168	!Number.isInteger(expectedSizeBytes) \|\|
169	expectedSizeBytes <= 0
170	) {
171	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
172	}
173	return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
174	}
175
176	/**
177	* @typedef {{ ok: boolean, reason: string }} IntegrityVerdict
178	*/
179
180	/**
181	* Create a streaming integrity accumulator for a model download.
182	*
183	* The accumulator feeds every downloaded byte into a SHA-256 hash and tracks received byte
184	* count. Call `update(chunk)` for each received chunk, then `finalize()` after the download
185	* completes. `finalize()` uses constant-time comparison for the digest (preventing a timing
186	* oracle on the expected digest) and an exact numeric equality check for size.
187	*
188	* SECURITY INVARIANTS:
189	* - `finalize()` MUST be called (and return { ok: true }) before the model is executed.
190	* - Once `finalize()` is called (or the accumulator is aborted), further calls to
191	* `update()` and `finalize()` return a fixed failure reason — the accumulator is a
192	* single-use object.
193	* - Neither the expected digest, the source URL, nor any computed partial digest appears
194	* in any returned reason string (the reasons are fixed RUNTIME_MANAGER_REASONS constants).
195	*
196	* PHASE 5 OBLIGATION:
197	* Phase 5's download adapter MUST:
198	* 1. Create the accumulator BEFORE starting the download.
199	* 2. Feed every received byte to `update()` (no skipping, no out-of-order).
200	* 3. Call `finalize()` after the download stream ends.
201	* 4. If `finalize().ok` is false, delete the downloaded file and REFUSE to transition
202	* the lifecycle out of 'starting' (call `transitionLifecycle(state, 'health_fail')`).
203	* 5. Only if `finalize().ok` is true may Phase 5 proceed to the health-check round-trip.
204	*
205	* @param {{
206	* expectedDigest: string,
207	* expectedSizeBytes: number,
208	* sourceUrl: string,
209	* allowedSourceUrls: string[],
210	* }} params
211	* @returns {{
212	* update: (chunk: Uint8Array) => void,
213	* finalize: () => IntegrityVerdict,
214	* getReceivedBytes: () => number,
215	* abort: () => void,
216	* }}
217	* @throws {TypeError} when the spec or source URL fails validation (fail at creation time).
218	*/
219	export function createIntegrityAccumulator({ expectedDigest, expectedSizeBytes, sourceUrl, allowedSourceUrls }) {
220	const specCheck = validateIntegritySpec(expectedDigest, expectedSizeBytes);
221	if (!specCheck.ok) throw new TypeError(`createIntegrityAccumulator: ${specCheck.reason}`);
222
223	const srcCheck = validateSourceUrl(sourceUrl, allowedSourceUrls);
224	if (!srcCheck.ok) throw new TypeError(`createIntegrityAccumulator: ${srcCheck.reason}`);
225
226	const hasher = crypto.createHash('sha256');
227	let receivedBytes = 0;
228	let finalized = false;
229	let aborted = false;
230
231	return {
232	/**
233	* Feed a chunk of downloaded bytes into the accumulator.
234	* Must be called in order, for every byte, with no skipping.
235	* @param {Uint8Array} chunk
236	*/
237	update(chunk) {
238	if (finalized) return; // silently ignore — finalize already called
239	if (aborted) return; // silently ignore — already aborted
240	if (!(chunk instanceof Uint8Array) && !Buffer.isBuffer(chunk)) {
241	aborted = true;
242	return;
243	}
244	hasher.update(chunk);
245	receivedBytes += chunk.length;
246	},
247
248	/**
249	* Finalize the integrity check. Returns an IntegrityVerdict with ok=true only when
250	* the received byte count matches expectedSizeBytes AND the SHA-256 digest matches
251	* expectedDigest (constant-time comparison).
252	*
253	* After finalize() is called (regardless of result), the accumulator is sealed — further
254	* updates are no-ops and further finalize() calls return ACCUMULATOR_FINALIZED.
255	* @returns {IntegrityVerdict}
256	*/
257	finalize() {
258	if (aborted) return { ok: false, reason: RUNTIME_MANAGER_REASONS.ACCUMULATOR_ABORTED };
259	if (finalized) return { ok: false, reason: RUNTIME_MANAGER_REASONS.ACCUMULATOR_FINALIZED };
260	finalized = true;
261
262	// Size check first (cheap, no timing oracle concern).
263	if (receivedBytes !== expectedSizeBytes) {
264	return { ok: false, reason: RUNTIME_MANAGER_REASONS.SIZE_MISMATCH };
265	}
266
267	// Digest check — constant-time to prevent a timing oracle on the expected digest.
268	// Hash both sides to equal-length 32-byte buffers before timingSafeEqual.
269	const computedHex = hasher.digest('hex'); // lowercase, 64 chars
270	const da = crypto.createHash('sha256').update(computedHex, 'utf8').digest();
271	const db = crypto.createHash('sha256').update(expectedDigest, 'utf8').digest();
272	if (!crypto.timingSafeEqual(da, db)) {
273	return { ok: false, reason: RUNTIME_MANAGER_REASONS.DIGEST_MISMATCH };
274	}
275
276	return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
277	},
278
279	/**
280	* Return the number of bytes received so far (for progress reporting by Phase 5).
281	* @returns {number}
282	*/
283	getReceivedBytes() {
284	return receivedBytes;
285	},
286
287	/**
288	* Abort the accumulator (e.g., download cancelled or error mid-stream).
289	* After abort(), finalize() returns ACCUMULATOR_ABORTED.
290	*/
291	abort() {
292	aborted = true;
293	},
294	};
295	}
296
297	/**
298	* Verify an already-downloaded model file held entirely in memory.
299	* Suitable for small models or testing. For large models, Phase 5 should use
300	* createIntegrityAccumulator with streaming to avoid loading the full file into RAM.
301	*
302	* SECURITY: the file data is never returned or logged; only the verdict { ok, reason }.
303	*
304	* @param {{
305	* fileData: Uint8Array,
306	* expectedDigest: string,
307	* expectedSizeBytes: number,
308	* sourceUrl: string,
309	* allowedSourceUrls: string[],
310	* }} params
311	* @returns {IntegrityVerdict}
312	*/
313	export function verifyModelBytes({ fileData, expectedDigest, expectedSizeBytes, sourceUrl, allowedSourceUrls }) {
314	const srcCheck = validateSourceUrl(sourceUrl, allowedSourceUrls);
315	if (!srcCheck.ok) return { ok: false, reason: srcCheck.reason };
316
317	const specCheck = validateIntegritySpec(expectedDigest, expectedSizeBytes);
318	if (!specCheck.ok) return { ok: false, reason: specCheck.reason };
319
320	if (!(fileData instanceof Uint8Array) && !Buffer.isBuffer(fileData)) {
321	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC };
322	}
323
324	if (fileData.length !== expectedSizeBytes) {
325	return { ok: false, reason: RUNTIME_MANAGER_REASONS.SIZE_MISMATCH };
326	}
327
328	const computedHex = crypto.createHash('sha256').update(fileData).digest('hex');
329	const da = crypto.createHash('sha256').update(computedHex, 'utf8').digest();
330	const db = crypto.createHash('sha256').update(expectedDigest, 'utf8').digest();
331	if (!crypto.timingSafeEqual(da, db)) {
332	return { ok: false, reason: RUNTIME_MANAGER_REASONS.DIGEST_MISMATCH };
333	}
334
335	return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
336	}
337
338	// ─────────────────────────────────────────────────────────────────────────────
339	// §3 — Lifecycle state machine
340	// ─────────────────────────────────────────────────────────────────────────────
341
342	/**
343	* Valid lifecycle states for the bundled runtime process.
344	* Inference is served ONLY in the 'ready' state (canServeInference enforces this).
345	* @readonly
346	*/
347	export const LIFECYCLE_STATES = Object.freeze({
348	STOPPED: 'stopped',
349	STARTING: 'starting',
350	READY: 'ready',
351	DRAINING: 'draining',
352	});
353
354	/**
355	* Lifecycle event names that drive state transitions.
356	* @readonly
357	*/
358	export const LIFECYCLE_EVENTS = Object.freeze({
359	/** Signal: begin cold-start. stopped → starting. */
360	START: 'start',
361	/** Signal: health-check passed after cold-start. starting → ready. */
362	HEALTH_OK: 'health_ok',
363	/** Signal: health-check failed during cold-start. starting → stopped. */
364	HEALTH_FAIL: 'health_fail',
365	/** Signal: begin graceful drain. ready → draining. */
366	DRAIN: 'drain',
367	/** Signal: drain complete / process exited. draining → stopped. */
368	STOPPED: 'stopped',
369	});
370
371	/**
372	* @typedef {{ state: string }} LifecycleState
373	*/
374
375	/**
376	* Allowed state transitions: Map<fromState, Set<event→toState>>.
377	* Any (fromState, event) pair not in this map is an invalid transition → fail-closed.
378	*
379	* @type {ReadonlyMap<string, ReadonlyMap<string, string>>}
380	*/
381	const LIFECYCLE_TRANSITIONS = new Map([
382	[
383	LIFECYCLE_STATES.STOPPED,
384	new Map([[LIFECYCLE_EVENTS.START, LIFECYCLE_STATES.STARTING]]),
385	],
386	[
387	LIFECYCLE_STATES.STARTING,
388	new Map([
389	[LIFECYCLE_EVENTS.HEALTH_OK, LIFECYCLE_STATES.READY],
390	[LIFECYCLE_EVENTS.HEALTH_FAIL, LIFECYCLE_STATES.STOPPED],
391	]),
392	],
393	[
394	LIFECYCLE_STATES.READY,
395	new Map([[LIFECYCLE_EVENTS.DRAIN, LIFECYCLE_STATES.DRAINING]]),
396	],
397	[
398	LIFECYCLE_STATES.DRAINING,
399	new Map([[LIFECYCLE_EVENTS.STOPPED, LIFECYCLE_STATES.STOPPED]]),
400	],
401	]);
402
403	/**
404	* Create the initial lifecycle state (the runtime always starts as stopped).
405	* @returns {LifecycleState}
406	*/
407	export function createLifecycleState() {
408	return { state: LIFECYCLE_STATES.STOPPED };
409	}
410
411	/**
412	* Attempt a lifecycle state transition.
413	*
414	* Pure: the input `state` is never mutated; a new state object is returned on success.
415	* Fail-closed: any unrecognised state, unknown event, or invalid (from, event) pair
416	* returns { ok: false, reason } and the current state is unchanged.
417	*
418	* SECURITY: The lifecycle machine is the gate that prevents inference from being served
419	* in a non-ready state (e.g. still starting, draining, or stopped). canServeInference
420	* reads `state.state === LIFECYCLE_STATES.READY` — this transition function ensures the
421	* only path to 'ready' is via a successful health_ok after a start.
422	*
423	* @param {LifecycleState} currentState
424	* @param {string} event - One of LIFECYCLE_EVENTS values.
425	* @returns {{ ok: boolean, newState: LifecycleState, reason?: string }}
426	*/
427	export function transitionLifecycle(currentState, event) {
428	if (!currentState \|\| typeof currentState !== 'object' \|\| typeof currentState.state !== 'string') {
429	return { ok: false, newState: createLifecycleState(), reason: RUNTIME_MANAGER_REASONS.UNKNOWN_STATE };
430	}
431	if (typeof event !== 'string' \|\| event.length === 0) {
432	return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.UNKNOWN_EVENT };
433	}
434	const fromMap = LIFECYCLE_TRANSITIONS.get(currentState.state);
435	if (!fromMap) {
436	return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.UNKNOWN_STATE };
437	}
438	if (!fromMap.has(event)) {
439	return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.INVALID_TRANSITION };
440	}
441	const toState = fromMap.get(event);
442	return { ok: true, newState: { state: toState } };
443	}
444
445	/**
446	* Returns true ONLY when the runtime is in the 'ready' state and can safely serve inference.
447	*
448	* SECURITY INVARIANT: inference callers MUST call this function before routing to the runtime.
449	* The function is intentionally simple and branchless (no ambiguity) to minimise the risk of
450	* an incorrect "truthy" result from a malformed state object.
451	*
452	* @param {LifecycleState} lifecycleState
453	* @returns {boolean}
454	*/
455	export function canServeInference(lifecycleState) {
456	if (!lifecycleState \|\| typeof lifecycleState !== 'object') return false;
457	return lifecycleState.state === LIFECYCLE_STATES.READY;
458	}
459
460	// ─────────────────────────────────────────────────────────────────────────────
461	// §4 — Backpressure / concurrency admission
462	// ─────────────────────────────────────────────────────────────────────────────
463
464	/**
465	* @typedef {Object} AdmissionState
466	* @property {number} maxInFlight - Maximum concurrent inference requests allowed.
467	* @property {number} queueBound - Maximum requests that may be queued (pending admission).
468	* @property {number} inFlight - Current count of admitted (in-progress) requests.
469	* @property {number} queued - Current count of queued (pending) requests.
470	*/
471
472	/**
473	* Create a fresh admission state with the given concurrency limits.
474	*
475	* Fail-closed: maxInFlight and queueBound must be positive integers.
476	*
477	* @param {{ maxInFlight: number, queueBound: number }} params
478	* @returns {AdmissionState}
479	* @throws {TypeError} on invalid parameters.
480	*/
481	export function createAdmissionState({ maxInFlight, queueBound }) {
482	if (!Number.isInteger(maxInFlight) \|\| maxInFlight <= 0) {
483	throw new TypeError('createAdmissionState: maxInFlight must be a positive integer');
484	}
485	if (!Number.isInteger(queueBound) \|\| queueBound <= 0) {
486	throw new TypeError('createAdmissionState: queueBound must be a positive integer');
487	}
488	return { maxInFlight, queueBound, inFlight: 0, queued: 0 };
489	}
490
491	/**
492	* Check whether a new inference request may be admitted or queued.
493	*
494	* Returns:
495	* { ok: true, reason: 'ok' } — request may proceed immediately (in-flight slot free).
496	* { ok: false, reason: 'at_capacity'} — all in-flight slots full; request must queue.
497	* { ok: false, reason: 'queue_full' } — both in-flight and queue are full; request is rejected.
498	*
499	* Callers interpret at_capacity as "enqueue and wait" and queue_full as "return busy to caller."
500	* Phase 5 orchestrates the queue and calls recordInFlight when a slot opens.
501	*
502	* Fail-closed: a malformed admissionState returns queue_full (cannot prove capacity exists).
503	*
504	* @param {AdmissionState} state
505	* @returns {{ ok: boolean, reason: string }}
506	*/
507	export function evaluateAdmission(state) {
508	if (
509	!state \|\|
510	typeof state !== 'object' \|\|
511	!Number.isInteger(state.maxInFlight) \|\| state.maxInFlight <= 0 \|\|
512	!Number.isInteger(state.queueBound) \|\| state.queueBound <= 0 \|\|
513	!Number.isInteger(state.inFlight) \|\| state.inFlight < 0 \|\|
514	!Number.isInteger(state.queued) \|\| state.queued < 0
515	) {
516	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_ADMISSION_STATE };
517	}
518
519	if (state.inFlight < state.maxInFlight) {
520	return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
521	}
522
523	if (state.queued < state.queueBound) {
524	return { ok: false, reason: RUNTIME_MANAGER_REASONS.AT_CAPACITY };
525	}
526
527	return { ok: false, reason: RUNTIME_MANAGER_REASONS.QUEUE_FULL };
528	}
529
530	/**
531	* Record that a new request has been admitted to in-flight (granted a concurrency slot).
532	* Returns a NEW admission state (pure; the input is not mutated).
533	*
534	* Phase 5 calls this when it is about to dispatch the request to the runtime.
535	* It is the caller's responsibility to call recordCompletion when the request finishes.
536	*
537	* @param {AdmissionState} state
538	* @returns {AdmissionState}
539	* @throws {TypeError} on malformed state.
540	*/
541	export function recordInFlight(state) {
542	if (!state \|\| typeof state !== 'object' \|\| !Number.isInteger(state.inFlight)) {
543	throw new TypeError('recordInFlight: state is malformed');
544	}
545	return { ...state, inFlight: state.inFlight + 1 };
546	}
547
548	/**
549	* Record that an in-flight request has completed (releases the concurrency slot).
550	* Returns a NEW admission state (pure; the input is not mutated).
551	*
552	* @param {AdmissionState} state
553	* @returns {AdmissionState}
554	* @throws {TypeError} on malformed state or attempt to complete with no in-flight requests.
555	*/
556	export function recordCompletion(state) {
557	if (!state \|\| typeof state !== 'object' \|\| !Number.isInteger(state.inFlight)) {
558	throw new TypeError('recordCompletion: state is malformed');
559	}
560	if (state.inFlight <= 0) {
561	throw new TypeError('recordCompletion: no in-flight requests to complete');
562	}
563	return { ...state, inFlight: state.inFlight - 1 };
564	}
565
566	/**
567	* Record that a new request has been added to the queue (not yet admitted to in-flight).
568	* Returns a NEW admission state (pure; the input is not mutated).
569	*
570	* @param {AdmissionState} state
571	* @returns {AdmissionState}
572	* @throws {TypeError} on malformed state.
573	*/
574	export function recordQueued(state) {
575	if (!state \|\| typeof state !== 'object' \|\| !Number.isInteger(state.queued)) {
576	throw new TypeError('recordQueued: state is malformed');
577	}
578	return { ...state, queued: state.queued + 1 };
579	}
580
581	/**
582	* Record that a queued request has been dequeued (either admitted or cancelled).
583	* Returns a NEW admission state (pure; the input is not mutated).
584	*
585	* @param {AdmissionState} state
586	* @returns {AdmissionState}
587	* @throws {TypeError} on malformed state or attempt to dequeue with no queued requests.
588	*/
589	export function recordDequeued(state) {
590	if (!state \|\| typeof state !== 'object' \|\| !Number.isInteger(state.queued)) {
591	throw new TypeError('recordDequeued: state is malformed');
592	}
593	if (state.queued <= 0) {
594	throw new TypeError('recordDequeued: no queued requests to dequeue');
595	}
596	return { ...state, queued: state.queued - 1 };
597	}
598
599	// ─────────────────────────────────────────────────────────────────────────────
600	// §5 — Resource-limit policy
601	// ─────────────────────────────────────────────────────────────────────────────
602
603	/**
604	* @typedef {Object} ResourceLimits
605	* @property {number} maxRamBytes - Maximum RAM usage in bytes (> 0).
606	* @property {number} maxVramBytes - Maximum VRAM usage in bytes (> 0; use Infinity if no GPU).
607	* @property {number} maxCpuPercent - Maximum CPU usage 0–100 (exclusive upper bound).
608	*/
609
610	/**
611	* @typedef {Object} ResourceObservation
612	* @property {number} ramBytes - Current RAM used by the runtime process in bytes.
613	* @property {number} vramBytes - Current VRAM used (0 if no GPU).
614	* @property {number} cpuPercent - Current CPU percent (0–100).
615	*/
616
617	/**
618	* Create and validate resource limits.
619	* Fail-closed: all fields must be positive finite numbers; maxCpuPercent must be 0–100.
620	*
621	* @param {{ maxRamBytes: number, maxVramBytes: number, maxCpuPercent: number }} params
622	* @returns {ResourceLimits}
623	* @throws {TypeError} on invalid parameters.
624	*/
625	export function createResourceLimits({ maxRamBytes, maxVramBytes, maxCpuPercent }) {
626	if (!Number.isFinite(maxRamBytes) \|\| maxRamBytes <= 0) {
627	throw new TypeError('createResourceLimits: maxRamBytes must be a positive finite number');
628	}
629	if (!Number.isFinite(maxVramBytes) \|\| maxVramBytes <= 0) {
630	throw new TypeError('createResourceLimits: maxVramBytes must be a positive finite number');
631	}
632	if (!Number.isFinite(maxCpuPercent) \|\| maxCpuPercent <= 0 \|\| maxCpuPercent > 100) {
633	throw new TypeError('createResourceLimits: maxCpuPercent must be in (0, 100]');
634	}
635	return { maxRamBytes, maxVramBytes, maxCpuPercent };
636	}
637
638	/**
639	* Evaluate whether the current resource observation is within the configured limits.
640	* Returns the FIRST limit violation found (RAM before VRAM before CPU) or ok.
641	*
642	* Fail-closed: malformed limits or observation → MALFORMED_LIMITS / MALFORMED_OBSERVATION.
643	* The actual numeric values of the observation are NEVER returned in the reason string.
644	*
645	* @param {ResourceObservation} observation - Current runtime resource usage (from Phase 5 stat adapter).
646	* @param {ResourceLimits} limits - Configured ceilings.
647	* @returns {{ ok: boolean, reason: string }}
648	*/
649	export function evaluateResourceLimits(observation, limits) {
650	if (
651	!limits \|\|
652	typeof limits !== 'object' \|\|
653	!Number.isFinite(limits.maxRamBytes) \|\| limits.maxRamBytes <= 0 \|\|
654	!Number.isFinite(limits.maxVramBytes) \|\| limits.maxVramBytes <= 0 \|\|
655	!Number.isFinite(limits.maxCpuPercent) \|\| limits.maxCpuPercent <= 0
656	) {
657	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_LIMITS };
658	}
659	if (
660	!observation \|\|
661	typeof observation !== 'object' \|\|
662	!Number.isFinite(observation.ramBytes) \|\| observation.ramBytes < 0 \|\|
663	!Number.isFinite(observation.vramBytes) \|\| observation.vramBytes < 0 \|\|
664	!Number.isFinite(observation.cpuPercent) \|\| observation.cpuPercent < 0
665	) {
666	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_OBSERVATION };
667	}
668
669	if (observation.ramBytes > limits.maxRamBytes) {
670	return { ok: false, reason: RUNTIME_MANAGER_REASONS.RAM_OVER_LIMIT };
671	}
672	if (observation.vramBytes > limits.maxVramBytes) {
673	return { ok: false, reason: RUNTIME_MANAGER_REASONS.VRAM_OVER_LIMIT };
674	}
675	if (observation.cpuPercent > limits.maxCpuPercent) {
676	return { ok: false, reason: RUNTIME_MANAGER_REASONS.CPU_OVER_LIMIT };
677	}
678
679	return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
680	}
681
682	// ─────────────────────────────────────────────────────────────────────────────
683	// §6 — Top-level runtime request gate
684	// ─────────────────────────────────────────────────────────────────────────────
685
686	/**
687	* @typedef {Object} RuntimeDecision
688	* @property {boolean} ok - true only when all gates pass (inference may proceed).
689	* @property {string} reason - A RUNTIME_MANAGER_REASONS constant. Never a secret.
690	*/
691
692	/**
693	* Top-level admission decision for a single inference request against the bundled runtime.
694	*
695	* Checks, in order:
696	* 1. Lifecycle gate: runtime must be in 'ready' state. Non-ready → NOT_READY.
697	* 2. Admission gate: in-flight concurrency and queue bounds. AT_CAPACITY / QUEUE_FULL.
698	* 3. Resource-limit gate: RAM/VRAM/CPU ceilings. Over limit → reject.
699	*
700	* ALL THREE gates must pass for the request to be allowed.
701	* Fail-closed: malformed parameters → MALFORMED_REQUEST_PARAMS.
702	*
703	* SECURITY PROPERTIES:
704	* - Inference is NEVER allowed in a non-ready lifecycle state (no timing window between
705	* states that could allow a request through a transitional state).
706	* - Backpressure trips at the exact configured bound — no overflow possible.
707	* - Resource limits are enforced BEFORE the request reaches the runtime, bounding OOM risk.
708	* - No secret, path, URL, or numeric observation value appears in any reason string.
709	* - This function has no side effects — the caller must call recordInFlight on the admission
710	* state when it decides to proceed (pure: decision is separated from state mutation).
711	*
712	* @param {{
713	* lifecycleState: LifecycleState,
714	* admissionState: AdmissionState,
715	* resourceObservation: ResourceObservation,
716	* resourceLimits: ResourceLimits,
717	* }} params
718	* @returns {RuntimeDecision}
719	*/
720	export function evaluateRuntimeRequest(params) {
721	try {
722	const { lifecycleState, admissionState, resourceObservation, resourceLimits } = params ?? {};
723	if (!lifecycleState \|\| !admissionState \|\| !resourceObservation \|\| !resourceLimits) {
724	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_REQUEST_PARAMS };
725	}
726
727	// 1. Lifecycle gate.
728	if (!canServeInference(lifecycleState)) {
729	return { ok: false, reason: RUNTIME_MANAGER_REASONS.NOT_READY };
730	}
731
732	// 2. Admission gate.
733	const admission = evaluateAdmission(admissionState);
734	if (!admission.ok) {
735	return { ok: false, reason: admission.reason };
736	}
737
738	// 3. Resource-limit gate.
739	const resources = evaluateResourceLimits(resourceObservation, resourceLimits);
740	if (!resources.ok) {
741	return { ok: false, reason: resources.reason };
742	}
743
744	return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK };
745	} catch {
746	// Defense in depth: never let an unexpected error carry input data outward.
747	return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_REQUEST_PARAMS };
748	}
749	}
750
751	// ─────────────────────────────────────────────────────────────────────────────
752	// §7 — Injected adapter interface (type documentation only; no implementation)
753	// ─────────────────────────────────────────────────────────────────────────────
754
755	/**
756	* The adapter interface that Phase 5 MUST supply to connect the decision core to the real
757	* Ollama/llama.cpp runtime. The pure module in this file imports NONE of these — they are
758	* passed explicitly by Phase 5's binding layer.
759	*
760	* SECURITY INVARIANT: the adapter must not expose vault, canister, keychain, or JWT handles.
761	* It is scoped exclusively to model-lifecycle operations (spawn, download, health, resource probe).
762	*
763	* @typedef {Object} RuntimeAdapterFns
764	* @property {(opts: SpawnOpts) => Promise<SpawnHandle>} spawn
765	* Spawn the Ollama/llama.cpp process. Must bind to 127.0.0.1 only (Phase 2 §4.5).
766	* Called ONLY after integrity verification passes (finalize().ok === true).
767	* @property {(url: string, onChunk: (chunk: Uint8Array) => void) => Promise<void>} download
768	* Download a model file over TLS, calling onChunk for each received chunk.
769	* The URL MUST be one that passed validateSourceUrl. Phase 5 feeds chunks to the
770	* integrity accumulator via the onChunk callback.
771	* @property {(handle: SpawnHandle) => Promise<boolean>} healthCheck
772	* Return true if the runtime responds correctly to a health probe (OpenAI-compat
773	* GET /v1/models or Ollama GET /api/tags). Phase 5 drives the health-check retry loop
774	* and calls transitionLifecycle(state, HEALTH_OK \| HEALTH_FAIL).
775	* @property {() => Promise<ResourceObservation>} statResources
776	* Return the current RAM/VRAM/CPU usage for the runtime process. Called before each
777	* inference request; result is passed to evaluateResourceLimits.
778	*/
779
780	/**
781	* @typedef {Object} SpawnOpts
782	* @property {string} binaryPath - Absolute path to the Ollama/llama.cpp binary.
783	* @property {string} modelPath - Absolute path to the verified model file.
784	* @property {number} port - Ephemeral port allocated by Phase 5 (non-predictable).
785	* @property {number} maxRamBytes - Memory ceiling to pass to the runtime's CLI flags.
786	*/
787
788	/**
789	* @typedef {Object} SpawnHandle
790	* @property {number} pid - Process ID of the spawned runtime.
791	* @property {() => Promise<void>} kill - Gracefully shut down the runtime.
792	*/