companion-runtime-manager.mjs
file-level
1
files
1
commits
0
hotspots
0
π§ dead
0
π₯ blast risk
| 1 | /** |
| 2 | * Companion App β Runtime Manager DECISION CORE. |
| 3 | * |
| 4 | * Phase 4 of the Companion App build plan (feat/companion-app). |
| 5 | * See docs/COMPANION-APP-PHASE-4-RUNTIME-MANAGER.md for the accepted design, the adversarial |
| 6 | * threat model, and the Phase 5 obligations to spawn the real runtime and perform the real |
| 7 | * verified download behind the shared bind gate. |
| 8 | * |
| 9 | * WHAT THIS MODULE IS |
| 10 | * The companion app (Phase 5+) bundles a local AI inference runtime (Ollama / llama.cpp). |
| 11 | * This module is the DECISION CORE for managing that runtime's lifecycle: |
| 12 | * - Supply-chain integrity: verify a downloaded model file before it is ever executed. |
| 13 | * - Lifecycle state machine: stopped β starting β ready β draining β stopped. |
| 14 | * - Backpressure / concurrency admission: queue bound, max-in-flight. |
| 15 | * - Resource-limit policy: RAM/VRAM/CPU ceilings; reject when over. |
| 16 | * |
| 17 | * DESIGN CONSTRAINTS (read before modifying β these are security invariants): |
| 18 | * - PURE. No I/O, no process.env reads, no child_process, no network, no filesystem, |
| 19 | * no logging, no clock reads. Every input is passed explicitly. The actual spawn of |
| 20 | * Ollama/llama.cpp, the real model download over TLS, and OS resource probing are |
| 21 | * deferred to Phase 5 via the INJECTED adapter interface (RuntimeAdapterFns). |
| 22 | * - FAIL-CLOSED. Any missing, malformed, ambiguous, or unrecognised input β DENY. |
| 23 | * There is no fail-open branch anywhere in this module. |
| 24 | * - NO AMBIENT AUTHORITY. The module imports no vault, canister, keychain, or auth module. |
| 25 | * The injected adapter interface is typed to model-lifecycle operations only. |
| 26 | * - NO SECRET IN OUTPUT. Reason codes are fixed constants. No model path, download URL, |
| 27 | * binary path, SHA-256 digest value, or access token ever appears in a reason string, |
| 28 | * a return value, or a thrown error. |
| 29 | * - SUPPLY-CHAIN INTEGRITY. A model file MUST pass SHA-256 digest + size verification |
| 30 | * via the integrity accumulator BEFORE canServeInference returns true for the first time. |
| 31 | * Phase 5 is responsible for calling finalize() and gating execution on { ok: true }. |
| 32 | * |
| 33 | * Hard constraint from docs/COMPANION-APP-DESIGN-AND-AUTHORIZATION-GATE.md Β§4 item 6: |
| 34 | * "No ambient authority. The endpoint exposes only model inference; it never exposes vault |
| 35 | * read/write, the canister client, or the stored JWT." |
| 36 | * |
| 37 | * Gate Β§12 Phase 4 obligations (remaining for Phase 5): |
| 38 | * - Spawn Ollama/llama.cpp (real child_process) after integrity is verified. |
| 39 | * - Perform the real model download over TLS using the injected download adapter. |
| 40 | * - Call the OS resource probe (injected stat adapter) to supply ResourceObservation. |
| 41 | * - Run the health-check loop and call transitionLifecycle with health_ok/health_fail. |
| 42 | * - Set companionAvailable=true in LaneCapabilities ONLY after lifecycle reaches 'ready'. |
| 43 | * - Wire the Phase 2 loopback guard BEFORE any model work (Phase 2 boundary stays intact). |
| 44 | */ |
| 45 | |
| 46 | import crypto from 'node:crypto'; |
| 47 | |
| 48 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 49 | // Β§1 β Reason codes (frozen constants; never derived from input) |
| 50 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 51 | |
| 52 | /** |
| 53 | * Fixed reason codes returned by all decision functions. |
| 54 | * These are the ONLY strings that may appear as `reason` values in verdicts or decisions. |
| 55 | * No secret, model path, URL, digest, or caller-controlled value ever appears in a reason. |
| 56 | * @readonly |
| 57 | */ |
| 58 | export const RUNTIME_MANAGER_REASONS = Object.freeze({ |
| 59 | // Integrity |
| 60 | OK: 'ok', |
| 61 | MALFORMED_SPEC: 'malformed_spec', |
| 62 | SOURCE_NOT_ALLOWED: 'source_not_allowed', |
| 63 | SCHEME_NOT_ALLOWED: 'scheme_not_allowed', |
| 64 | SIZE_MISMATCH: 'size_mismatch', |
| 65 | DIGEST_MISMATCH: 'digest_mismatch', |
| 66 | ACCUMULATOR_FINALIZED: 'accumulator_finalized', |
| 67 | ACCUMULATOR_ABORTED: 'accumulator_aborted', |
| 68 | |
| 69 | // Lifecycle |
| 70 | INVALID_TRANSITION: 'invalid_transition', |
| 71 | NOT_READY: 'not_ready', |
| 72 | UNKNOWN_EVENT: 'unknown_event', |
| 73 | UNKNOWN_STATE: 'unknown_state', |
| 74 | |
| 75 | // Admission |
| 76 | MALFORMED_ADMISSION_STATE: 'malformed_admission_state', |
| 77 | AT_CAPACITY: 'at_capacity', |
| 78 | QUEUE_FULL: 'queue_full', |
| 79 | NO_IN_FLIGHT_TO_COMPLETE: 'no_in_flight_to_complete', |
| 80 | |
| 81 | // Resource limits |
| 82 | MALFORMED_LIMITS: 'malformed_limits', |
| 83 | MALFORMED_OBSERVATION: 'malformed_observation', |
| 84 | RAM_OVER_LIMIT: 'ram_over_limit', |
| 85 | VRAM_OVER_LIMIT: 'vram_over_limit', |
| 86 | CPU_OVER_LIMIT: 'cpu_over_limit', |
| 87 | |
| 88 | // Top-level request gate |
| 89 | MALFORMED_REQUEST_PARAMS: 'malformed_request_params', |
| 90 | }); |
| 91 | |
| 92 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 93 | // Β§2 β Supply-chain integrity verification |
| 94 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 95 | |
| 96 | /** |
| 97 | * URL schemes permitted as model download sources. |
| 98 | * HTTP is structurally banned β a model spec specifying an HTTP source is rejected at |
| 99 | * spec-validation time, not at download time. This prevents a misconfigured registry from |
| 100 | * silently serving models over a cleartext channel. |
| 101 | * @type {ReadonlySet<string>} |
| 102 | */ |
| 103 | export const ALLOWED_SOURCE_SCHEMES = new Set(['https:']); |
| 104 | |
| 105 | /** |
| 106 | * The exact byte length of a valid SHA-256 hex digest string (64 lowercase hex chars). |
| 107 | * @type {number} |
| 108 | */ |
| 109 | export const SHA256_HEX_LENGTH = 64; |
| 110 | |
| 111 | /** |
| 112 | * Validate that a download source URL is (a) a valid URL, (b) uses an allowed scheme |
| 113 | * (HTTPS only), and (c) matches the caller-supplied allowlist. |
| 114 | * |
| 115 | * Fail-closed: any invalid URL, non-HTTPS scheme, empty allowlist, or allowlist miss β deny. |
| 116 | * The source URL itself is NEVER copied into the returned reason string. |
| 117 | * |
| 118 | * @param {unknown} url - The model download URL to validate. |
| 119 | * @param {unknown} allowedUrls - Explicit allowlist of permitted base URLs (string[]). |
| 120 | * @returns {{ ok: boolean, reason: string }} |
| 121 | */ |
| 122 | export function validateSourceUrl(url, allowedUrls) { |
| 123 | if (typeof url !== 'string' || url.length === 0) { |
| 124 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC }; |
| 125 | } |
| 126 | if (!Array.isArray(allowedUrls) || allowedUrls.length === 0) { |
| 127 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.SOURCE_NOT_ALLOWED }; |
| 128 | } |
| 129 | let parsed; |
| 130 | try { |
| 131 | parsed = new URL(url); |
| 132 | } catch { |
| 133 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC }; |
| 134 | } |
| 135 | if (!ALLOWED_SOURCE_SCHEMES.has(parsed.protocol)) { |
| 136 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.SCHEME_NOT_ALLOWED }; |
| 137 | } |
| 138 | // Allowlist match: the source URL must start with one of the allowed base URL strings. |
| 139 | // We normalise to lowercase and strip trailing slashes for comparison. |
| 140 | const normalised = url.toLowerCase(); |
| 141 | const matched = allowedUrls.some((allowed) => { |
| 142 | if (typeof allowed !== 'string' || allowed.length === 0) return false; |
| 143 | return normalised.startsWith(allowed.toLowerCase().replace(/\/$/, '')); |
| 144 | }); |
| 145 | if (!matched) { |
| 146 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.SOURCE_NOT_ALLOWED }; |
| 147 | } |
| 148 | return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK }; |
| 149 | } |
| 150 | |
| 151 | /** |
| 152 | * Validate a model integrity spec (expectedDigest + expectedSizeBytes) without performing |
| 153 | * any I/O. Called at model-spec registration time to catch malformed registry entries early. |
| 154 | * |
| 155 | * @param {unknown} expectedDigest - Lowercase SHA-256 hex string (64 chars exactly). |
| 156 | * @param {unknown} expectedSizeBytes - Positive integer byte count for the model file. |
| 157 | * @returns {{ ok: boolean, reason: string }} |
| 158 | */ |
| 159 | export function validateIntegritySpec(expectedDigest, expectedSizeBytes) { |
| 160 | if ( |
| 161 | typeof expectedDigest !== 'string' || |
| 162 | expectedDigest.length !== SHA256_HEX_LENGTH || |
| 163 | !/^[0-9a-f]{64}$/.test(expectedDigest) |
| 164 | ) { |
| 165 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC }; |
| 166 | } |
| 167 | if ( |
| 168 | !Number.isInteger(expectedSizeBytes) || |
| 169 | expectedSizeBytes <= 0 |
| 170 | ) { |
| 171 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC }; |
| 172 | } |
| 173 | return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK }; |
| 174 | } |
| 175 | |
| 176 | /** |
| 177 | * @typedef {{ ok: boolean, reason: string }} IntegrityVerdict |
| 178 | */ |
| 179 | |
| 180 | /** |
| 181 | * Create a streaming integrity accumulator for a model download. |
| 182 | * |
| 183 | * The accumulator feeds every downloaded byte into a SHA-256 hash and tracks received byte |
| 184 | * count. Call `update(chunk)` for each received chunk, then `finalize()` after the download |
| 185 | * completes. `finalize()` uses constant-time comparison for the digest (preventing a timing |
| 186 | * oracle on the expected digest) and an exact numeric equality check for size. |
| 187 | * |
| 188 | * SECURITY INVARIANTS: |
| 189 | * - `finalize()` MUST be called (and return { ok: true }) before the model is executed. |
| 190 | * - Once `finalize()` is called (or the accumulator is aborted), further calls to |
| 191 | * `update()` and `finalize()` return a fixed failure reason β the accumulator is a |
| 192 | * single-use object. |
| 193 | * - Neither the expected digest, the source URL, nor any computed partial digest appears |
| 194 | * in any returned reason string (the reasons are fixed RUNTIME_MANAGER_REASONS constants). |
| 195 | * |
| 196 | * PHASE 5 OBLIGATION: |
| 197 | * Phase 5's download adapter MUST: |
| 198 | * 1. Create the accumulator BEFORE starting the download. |
| 199 | * 2. Feed every received byte to `update()` (no skipping, no out-of-order). |
| 200 | * 3. Call `finalize()` after the download stream ends. |
| 201 | * 4. If `finalize().ok` is false, delete the downloaded file and REFUSE to transition |
| 202 | * the lifecycle out of 'starting' (call `transitionLifecycle(state, 'health_fail')`). |
| 203 | * 5. Only if `finalize().ok` is true may Phase 5 proceed to the health-check round-trip. |
| 204 | * |
| 205 | * @param {{ |
| 206 | * expectedDigest: string, |
| 207 | * expectedSizeBytes: number, |
| 208 | * sourceUrl: string, |
| 209 | * allowedSourceUrls: string[], |
| 210 | * }} params |
| 211 | * @returns {{ |
| 212 | * update: (chunk: Uint8Array) => void, |
| 213 | * finalize: () => IntegrityVerdict, |
| 214 | * getReceivedBytes: () => number, |
| 215 | * abort: () => void, |
| 216 | * }} |
| 217 | * @throws {TypeError} when the spec or source URL fails validation (fail at creation time). |
| 218 | */ |
| 219 | export function createIntegrityAccumulator({ expectedDigest, expectedSizeBytes, sourceUrl, allowedSourceUrls }) { |
| 220 | const specCheck = validateIntegritySpec(expectedDigest, expectedSizeBytes); |
| 221 | if (!specCheck.ok) throw new TypeError(`createIntegrityAccumulator: ${specCheck.reason}`); |
| 222 | |
| 223 | const srcCheck = validateSourceUrl(sourceUrl, allowedSourceUrls); |
| 224 | if (!srcCheck.ok) throw new TypeError(`createIntegrityAccumulator: ${srcCheck.reason}`); |
| 225 | |
| 226 | const hasher = crypto.createHash('sha256'); |
| 227 | let receivedBytes = 0; |
| 228 | let finalized = false; |
| 229 | let aborted = false; |
| 230 | |
| 231 | return { |
| 232 | /** |
| 233 | * Feed a chunk of downloaded bytes into the accumulator. |
| 234 | * Must be called in order, for every byte, with no skipping. |
| 235 | * @param {Uint8Array} chunk |
| 236 | */ |
| 237 | update(chunk) { |
| 238 | if (finalized) return; // silently ignore β finalize already called |
| 239 | if (aborted) return; // silently ignore β already aborted |
| 240 | if (!(chunk instanceof Uint8Array) && !Buffer.isBuffer(chunk)) { |
| 241 | aborted = true; |
| 242 | return; |
| 243 | } |
| 244 | hasher.update(chunk); |
| 245 | receivedBytes += chunk.length; |
| 246 | }, |
| 247 | |
| 248 | /** |
| 249 | * Finalize the integrity check. Returns an IntegrityVerdict with ok=true only when |
| 250 | * the received byte count matches expectedSizeBytes AND the SHA-256 digest matches |
| 251 | * expectedDigest (constant-time comparison). |
| 252 | * |
| 253 | * After finalize() is called (regardless of result), the accumulator is sealed β further |
| 254 | * updates are no-ops and further finalize() calls return ACCUMULATOR_FINALIZED. |
| 255 | * @returns {IntegrityVerdict} |
| 256 | */ |
| 257 | finalize() { |
| 258 | if (aborted) return { ok: false, reason: RUNTIME_MANAGER_REASONS.ACCUMULATOR_ABORTED }; |
| 259 | if (finalized) return { ok: false, reason: RUNTIME_MANAGER_REASONS.ACCUMULATOR_FINALIZED }; |
| 260 | finalized = true; |
| 261 | |
| 262 | // Size check first (cheap, no timing oracle concern). |
| 263 | if (receivedBytes !== expectedSizeBytes) { |
| 264 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.SIZE_MISMATCH }; |
| 265 | } |
| 266 | |
| 267 | // Digest check β constant-time to prevent a timing oracle on the expected digest. |
| 268 | // Hash both sides to equal-length 32-byte buffers before timingSafeEqual. |
| 269 | const computedHex = hasher.digest('hex'); // lowercase, 64 chars |
| 270 | const da = crypto.createHash('sha256').update(computedHex, 'utf8').digest(); |
| 271 | const db = crypto.createHash('sha256').update(expectedDigest, 'utf8').digest(); |
| 272 | if (!crypto.timingSafeEqual(da, db)) { |
| 273 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.DIGEST_MISMATCH }; |
| 274 | } |
| 275 | |
| 276 | return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK }; |
| 277 | }, |
| 278 | |
| 279 | /** |
| 280 | * Return the number of bytes received so far (for progress reporting by Phase 5). |
| 281 | * @returns {number} |
| 282 | */ |
| 283 | getReceivedBytes() { |
| 284 | return receivedBytes; |
| 285 | }, |
| 286 | |
| 287 | /** |
| 288 | * Abort the accumulator (e.g., download cancelled or error mid-stream). |
| 289 | * After abort(), finalize() returns ACCUMULATOR_ABORTED. |
| 290 | */ |
| 291 | abort() { |
| 292 | aborted = true; |
| 293 | }, |
| 294 | }; |
| 295 | } |
| 296 | |
| 297 | /** |
| 298 | * Verify an already-downloaded model file held entirely in memory. |
| 299 | * Suitable for small models or testing. For large models, Phase 5 should use |
| 300 | * createIntegrityAccumulator with streaming to avoid loading the full file into RAM. |
| 301 | * |
| 302 | * SECURITY: the file data is never returned or logged; only the verdict { ok, reason }. |
| 303 | * |
| 304 | * @param {{ |
| 305 | * fileData: Uint8Array, |
| 306 | * expectedDigest: string, |
| 307 | * expectedSizeBytes: number, |
| 308 | * sourceUrl: string, |
| 309 | * allowedSourceUrls: string[], |
| 310 | * }} params |
| 311 | * @returns {IntegrityVerdict} |
| 312 | */ |
| 313 | export function verifyModelBytes({ fileData, expectedDigest, expectedSizeBytes, sourceUrl, allowedSourceUrls }) { |
| 314 | const srcCheck = validateSourceUrl(sourceUrl, allowedSourceUrls); |
| 315 | if (!srcCheck.ok) return { ok: false, reason: srcCheck.reason }; |
| 316 | |
| 317 | const specCheck = validateIntegritySpec(expectedDigest, expectedSizeBytes); |
| 318 | if (!specCheck.ok) return { ok: false, reason: specCheck.reason }; |
| 319 | |
| 320 | if (!(fileData instanceof Uint8Array) && !Buffer.isBuffer(fileData)) { |
| 321 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_SPEC }; |
| 322 | } |
| 323 | |
| 324 | if (fileData.length !== expectedSizeBytes) { |
| 325 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.SIZE_MISMATCH }; |
| 326 | } |
| 327 | |
| 328 | const computedHex = crypto.createHash('sha256').update(fileData).digest('hex'); |
| 329 | const da = crypto.createHash('sha256').update(computedHex, 'utf8').digest(); |
| 330 | const db = crypto.createHash('sha256').update(expectedDigest, 'utf8').digest(); |
| 331 | if (!crypto.timingSafeEqual(da, db)) { |
| 332 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.DIGEST_MISMATCH }; |
| 333 | } |
| 334 | |
| 335 | return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK }; |
| 336 | } |
| 337 | |
| 338 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 339 | // Β§3 β Lifecycle state machine |
| 340 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 341 | |
| 342 | /** |
| 343 | * Valid lifecycle states for the bundled runtime process. |
| 344 | * Inference is served ONLY in the 'ready' state (canServeInference enforces this). |
| 345 | * @readonly |
| 346 | */ |
| 347 | export const LIFECYCLE_STATES = Object.freeze({ |
| 348 | STOPPED: 'stopped', |
| 349 | STARTING: 'starting', |
| 350 | READY: 'ready', |
| 351 | DRAINING: 'draining', |
| 352 | }); |
| 353 | |
| 354 | /** |
| 355 | * Lifecycle event names that drive state transitions. |
| 356 | * @readonly |
| 357 | */ |
| 358 | export const LIFECYCLE_EVENTS = Object.freeze({ |
| 359 | /** Signal: begin cold-start. stopped β starting. */ |
| 360 | START: 'start', |
| 361 | /** Signal: health-check passed after cold-start. starting β ready. */ |
| 362 | HEALTH_OK: 'health_ok', |
| 363 | /** Signal: health-check failed during cold-start. starting β stopped. */ |
| 364 | HEALTH_FAIL: 'health_fail', |
| 365 | /** Signal: begin graceful drain. ready β draining. */ |
| 366 | DRAIN: 'drain', |
| 367 | /** Signal: drain complete / process exited. draining β stopped. */ |
| 368 | STOPPED: 'stopped', |
| 369 | }); |
| 370 | |
| 371 | /** |
| 372 | * @typedef {{ state: string }} LifecycleState |
| 373 | */ |
| 374 | |
| 375 | /** |
| 376 | * Allowed state transitions: Map<fromState, Set<eventβtoState>>. |
| 377 | * Any (fromState, event) pair not in this map is an invalid transition β fail-closed. |
| 378 | * |
| 379 | * @type {ReadonlyMap<string, ReadonlyMap<string, string>>} |
| 380 | */ |
| 381 | const LIFECYCLE_TRANSITIONS = new Map([ |
| 382 | [ |
| 383 | LIFECYCLE_STATES.STOPPED, |
| 384 | new Map([[LIFECYCLE_EVENTS.START, LIFECYCLE_STATES.STARTING]]), |
| 385 | ], |
| 386 | [ |
| 387 | LIFECYCLE_STATES.STARTING, |
| 388 | new Map([ |
| 389 | [LIFECYCLE_EVENTS.HEALTH_OK, LIFECYCLE_STATES.READY], |
| 390 | [LIFECYCLE_EVENTS.HEALTH_FAIL, LIFECYCLE_STATES.STOPPED], |
| 391 | ]), |
| 392 | ], |
| 393 | [ |
| 394 | LIFECYCLE_STATES.READY, |
| 395 | new Map([[LIFECYCLE_EVENTS.DRAIN, LIFECYCLE_STATES.DRAINING]]), |
| 396 | ], |
| 397 | [ |
| 398 | LIFECYCLE_STATES.DRAINING, |
| 399 | new Map([[LIFECYCLE_EVENTS.STOPPED, LIFECYCLE_STATES.STOPPED]]), |
| 400 | ], |
| 401 | ]); |
| 402 | |
| 403 | /** |
| 404 | * Create the initial lifecycle state (the runtime always starts as stopped). |
| 405 | * @returns {LifecycleState} |
| 406 | */ |
| 407 | export function createLifecycleState() { |
| 408 | return { state: LIFECYCLE_STATES.STOPPED }; |
| 409 | } |
| 410 | |
| 411 | /** |
| 412 | * Attempt a lifecycle state transition. |
| 413 | * |
| 414 | * Pure: the input `state` is never mutated; a new state object is returned on success. |
| 415 | * Fail-closed: any unrecognised state, unknown event, or invalid (from, event) pair |
| 416 | * returns { ok: false, reason } and the current state is unchanged. |
| 417 | * |
| 418 | * SECURITY: The lifecycle machine is the gate that prevents inference from being served |
| 419 | * in a non-ready state (e.g. still starting, draining, or stopped). canServeInference |
| 420 | * reads `state.state === LIFECYCLE_STATES.READY` β this transition function ensures the |
| 421 | * only path to 'ready' is via a successful health_ok after a start. |
| 422 | * |
| 423 | * @param {LifecycleState} currentState |
| 424 | * @param {string} event - One of LIFECYCLE_EVENTS values. |
| 425 | * @returns {{ ok: boolean, newState: LifecycleState, reason?: string }} |
| 426 | */ |
| 427 | export function transitionLifecycle(currentState, event) { |
| 428 | if (!currentState || typeof currentState !== 'object' || typeof currentState.state !== 'string') { |
| 429 | return { ok: false, newState: createLifecycleState(), reason: RUNTIME_MANAGER_REASONS.UNKNOWN_STATE }; |
| 430 | } |
| 431 | if (typeof event !== 'string' || event.length === 0) { |
| 432 | return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.UNKNOWN_EVENT }; |
| 433 | } |
| 434 | const fromMap = LIFECYCLE_TRANSITIONS.get(currentState.state); |
| 435 | if (!fromMap) { |
| 436 | return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.UNKNOWN_STATE }; |
| 437 | } |
| 438 | if (!fromMap.has(event)) { |
| 439 | return { ok: false, newState: currentState, reason: RUNTIME_MANAGER_REASONS.INVALID_TRANSITION }; |
| 440 | } |
| 441 | const toState = fromMap.get(event); |
| 442 | return { ok: true, newState: { state: toState } }; |
| 443 | } |
| 444 | |
| 445 | /** |
| 446 | * Returns true ONLY when the runtime is in the 'ready' state and can safely serve inference. |
| 447 | * |
| 448 | * SECURITY INVARIANT: inference callers MUST call this function before routing to the runtime. |
| 449 | * The function is intentionally simple and branchless (no ambiguity) to minimise the risk of |
| 450 | * an incorrect "truthy" result from a malformed state object. |
| 451 | * |
| 452 | * @param {LifecycleState} lifecycleState |
| 453 | * @returns {boolean} |
| 454 | */ |
| 455 | export function canServeInference(lifecycleState) { |
| 456 | if (!lifecycleState || typeof lifecycleState !== 'object') return false; |
| 457 | return lifecycleState.state === LIFECYCLE_STATES.READY; |
| 458 | } |
| 459 | |
| 460 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 461 | // Β§4 β Backpressure / concurrency admission |
| 462 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 463 | |
| 464 | /** |
| 465 | * @typedef {Object} AdmissionState |
| 466 | * @property {number} maxInFlight - Maximum concurrent inference requests allowed. |
| 467 | * @property {number} queueBound - Maximum requests that may be queued (pending admission). |
| 468 | * @property {number} inFlight - Current count of admitted (in-progress) requests. |
| 469 | * @property {number} queued - Current count of queued (pending) requests. |
| 470 | */ |
| 471 | |
| 472 | /** |
| 473 | * Create a fresh admission state with the given concurrency limits. |
| 474 | * |
| 475 | * Fail-closed: maxInFlight and queueBound must be positive integers. |
| 476 | * |
| 477 | * @param {{ maxInFlight: number, queueBound: number }} params |
| 478 | * @returns {AdmissionState} |
| 479 | * @throws {TypeError} on invalid parameters. |
| 480 | */ |
| 481 | export function createAdmissionState({ maxInFlight, queueBound }) { |
| 482 | if (!Number.isInteger(maxInFlight) || maxInFlight <= 0) { |
| 483 | throw new TypeError('createAdmissionState: maxInFlight must be a positive integer'); |
| 484 | } |
| 485 | if (!Number.isInteger(queueBound) || queueBound <= 0) { |
| 486 | throw new TypeError('createAdmissionState: queueBound must be a positive integer'); |
| 487 | } |
| 488 | return { maxInFlight, queueBound, inFlight: 0, queued: 0 }; |
| 489 | } |
| 490 | |
| 491 | /** |
| 492 | * Check whether a new inference request may be admitted or queued. |
| 493 | * |
| 494 | * Returns: |
| 495 | * { ok: true, reason: 'ok' } β request may proceed immediately (in-flight slot free). |
| 496 | * { ok: false, reason: 'at_capacity'} β all in-flight slots full; request must queue. |
| 497 | * { ok: false, reason: 'queue_full' } β both in-flight and queue are full; request is rejected. |
| 498 | * |
| 499 | * Callers interpret at_capacity as "enqueue and wait" and queue_full as "return busy to caller." |
| 500 | * Phase 5 orchestrates the queue and calls recordInFlight when a slot opens. |
| 501 | * |
| 502 | * Fail-closed: a malformed admissionState returns queue_full (cannot prove capacity exists). |
| 503 | * |
| 504 | * @param {AdmissionState} state |
| 505 | * @returns {{ ok: boolean, reason: string }} |
| 506 | */ |
| 507 | export function evaluateAdmission(state) { |
| 508 | if ( |
| 509 | !state || |
| 510 | typeof state !== 'object' || |
| 511 | !Number.isInteger(state.maxInFlight) || state.maxInFlight <= 0 || |
| 512 | !Number.isInteger(state.queueBound) || state.queueBound <= 0 || |
| 513 | !Number.isInteger(state.inFlight) || state.inFlight < 0 || |
| 514 | !Number.isInteger(state.queued) || state.queued < 0 |
| 515 | ) { |
| 516 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_ADMISSION_STATE }; |
| 517 | } |
| 518 | |
| 519 | if (state.inFlight < state.maxInFlight) { |
| 520 | return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK }; |
| 521 | } |
| 522 | |
| 523 | if (state.queued < state.queueBound) { |
| 524 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.AT_CAPACITY }; |
| 525 | } |
| 526 | |
| 527 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.QUEUE_FULL }; |
| 528 | } |
| 529 | |
| 530 | /** |
| 531 | * Record that a new request has been admitted to in-flight (granted a concurrency slot). |
| 532 | * Returns a NEW admission state (pure; the input is not mutated). |
| 533 | * |
| 534 | * Phase 5 calls this when it is about to dispatch the request to the runtime. |
| 535 | * It is the caller's responsibility to call recordCompletion when the request finishes. |
| 536 | * |
| 537 | * @param {AdmissionState} state |
| 538 | * @returns {AdmissionState} |
| 539 | * @throws {TypeError} on malformed state. |
| 540 | */ |
| 541 | export function recordInFlight(state) { |
| 542 | if (!state || typeof state !== 'object' || !Number.isInteger(state.inFlight)) { |
| 543 | throw new TypeError('recordInFlight: state is malformed'); |
| 544 | } |
| 545 | return { ...state, inFlight: state.inFlight + 1 }; |
| 546 | } |
| 547 | |
| 548 | /** |
| 549 | * Record that an in-flight request has completed (releases the concurrency slot). |
| 550 | * Returns a NEW admission state (pure; the input is not mutated). |
| 551 | * |
| 552 | * @param {AdmissionState} state |
| 553 | * @returns {AdmissionState} |
| 554 | * @throws {TypeError} on malformed state or attempt to complete with no in-flight requests. |
| 555 | */ |
| 556 | export function recordCompletion(state) { |
| 557 | if (!state || typeof state !== 'object' || !Number.isInteger(state.inFlight)) { |
| 558 | throw new TypeError('recordCompletion: state is malformed'); |
| 559 | } |
| 560 | if (state.inFlight <= 0) { |
| 561 | throw new TypeError('recordCompletion: no in-flight requests to complete'); |
| 562 | } |
| 563 | return { ...state, inFlight: state.inFlight - 1 }; |
| 564 | } |
| 565 | |
| 566 | /** |
| 567 | * Record that a new request has been added to the queue (not yet admitted to in-flight). |
| 568 | * Returns a NEW admission state (pure; the input is not mutated). |
| 569 | * |
| 570 | * @param {AdmissionState} state |
| 571 | * @returns {AdmissionState} |
| 572 | * @throws {TypeError} on malformed state. |
| 573 | */ |
| 574 | export function recordQueued(state) { |
| 575 | if (!state || typeof state !== 'object' || !Number.isInteger(state.queued)) { |
| 576 | throw new TypeError('recordQueued: state is malformed'); |
| 577 | } |
| 578 | return { ...state, queued: state.queued + 1 }; |
| 579 | } |
| 580 | |
| 581 | /** |
| 582 | * Record that a queued request has been dequeued (either admitted or cancelled). |
| 583 | * Returns a NEW admission state (pure; the input is not mutated). |
| 584 | * |
| 585 | * @param {AdmissionState} state |
| 586 | * @returns {AdmissionState} |
| 587 | * @throws {TypeError} on malformed state or attempt to dequeue with no queued requests. |
| 588 | */ |
| 589 | export function recordDequeued(state) { |
| 590 | if (!state || typeof state !== 'object' || !Number.isInteger(state.queued)) { |
| 591 | throw new TypeError('recordDequeued: state is malformed'); |
| 592 | } |
| 593 | if (state.queued <= 0) { |
| 594 | throw new TypeError('recordDequeued: no queued requests to dequeue'); |
| 595 | } |
| 596 | return { ...state, queued: state.queued - 1 }; |
| 597 | } |
| 598 | |
| 599 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 600 | // Β§5 β Resource-limit policy |
| 601 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 602 | |
| 603 | /** |
| 604 | * @typedef {Object} ResourceLimits |
| 605 | * @property {number} maxRamBytes - Maximum RAM usage in bytes (> 0). |
| 606 | * @property {number} maxVramBytes - Maximum VRAM usage in bytes (> 0; use Infinity if no GPU). |
| 607 | * @property {number} maxCpuPercent - Maximum CPU usage 0β100 (exclusive upper bound). |
| 608 | */ |
| 609 | |
| 610 | /** |
| 611 | * @typedef {Object} ResourceObservation |
| 612 | * @property {number} ramBytes - Current RAM used by the runtime process in bytes. |
| 613 | * @property {number} vramBytes - Current VRAM used (0 if no GPU). |
| 614 | * @property {number} cpuPercent - Current CPU percent (0β100). |
| 615 | */ |
| 616 | |
| 617 | /** |
| 618 | * Create and validate resource limits. |
| 619 | * Fail-closed: all fields must be positive finite numbers; maxCpuPercent must be 0β100. |
| 620 | * |
| 621 | * @param {{ maxRamBytes: number, maxVramBytes: number, maxCpuPercent: number }} params |
| 622 | * @returns {ResourceLimits} |
| 623 | * @throws {TypeError} on invalid parameters. |
| 624 | */ |
| 625 | export function createResourceLimits({ maxRamBytes, maxVramBytes, maxCpuPercent }) { |
| 626 | if (!Number.isFinite(maxRamBytes) || maxRamBytes <= 0) { |
| 627 | throw new TypeError('createResourceLimits: maxRamBytes must be a positive finite number'); |
| 628 | } |
| 629 | if (!Number.isFinite(maxVramBytes) || maxVramBytes <= 0) { |
| 630 | throw new TypeError('createResourceLimits: maxVramBytes must be a positive finite number'); |
| 631 | } |
| 632 | if (!Number.isFinite(maxCpuPercent) || maxCpuPercent <= 0 || maxCpuPercent > 100) { |
| 633 | throw new TypeError('createResourceLimits: maxCpuPercent must be in (0, 100]'); |
| 634 | } |
| 635 | return { maxRamBytes, maxVramBytes, maxCpuPercent }; |
| 636 | } |
| 637 | |
| 638 | /** |
| 639 | * Evaluate whether the current resource observation is within the configured limits. |
| 640 | * Returns the FIRST limit violation found (RAM before VRAM before CPU) or ok. |
| 641 | * |
| 642 | * Fail-closed: malformed limits or observation β MALFORMED_LIMITS / MALFORMED_OBSERVATION. |
| 643 | * The actual numeric values of the observation are NEVER returned in the reason string. |
| 644 | * |
| 645 | * @param {ResourceObservation} observation - Current runtime resource usage (from Phase 5 stat adapter). |
| 646 | * @param {ResourceLimits} limits - Configured ceilings. |
| 647 | * @returns {{ ok: boolean, reason: string }} |
| 648 | */ |
| 649 | export function evaluateResourceLimits(observation, limits) { |
| 650 | if ( |
| 651 | !limits || |
| 652 | typeof limits !== 'object' || |
| 653 | !Number.isFinite(limits.maxRamBytes) || limits.maxRamBytes <= 0 || |
| 654 | !Number.isFinite(limits.maxVramBytes) || limits.maxVramBytes <= 0 || |
| 655 | !Number.isFinite(limits.maxCpuPercent) || limits.maxCpuPercent <= 0 |
| 656 | ) { |
| 657 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_LIMITS }; |
| 658 | } |
| 659 | if ( |
| 660 | !observation || |
| 661 | typeof observation !== 'object' || |
| 662 | !Number.isFinite(observation.ramBytes) || observation.ramBytes < 0 || |
| 663 | !Number.isFinite(observation.vramBytes) || observation.vramBytes < 0 || |
| 664 | !Number.isFinite(observation.cpuPercent) || observation.cpuPercent < 0 |
| 665 | ) { |
| 666 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_OBSERVATION }; |
| 667 | } |
| 668 | |
| 669 | if (observation.ramBytes > limits.maxRamBytes) { |
| 670 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.RAM_OVER_LIMIT }; |
| 671 | } |
| 672 | if (observation.vramBytes > limits.maxVramBytes) { |
| 673 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.VRAM_OVER_LIMIT }; |
| 674 | } |
| 675 | if (observation.cpuPercent > limits.maxCpuPercent) { |
| 676 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.CPU_OVER_LIMIT }; |
| 677 | } |
| 678 | |
| 679 | return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK }; |
| 680 | } |
| 681 | |
| 682 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 683 | // Β§6 β Top-level runtime request gate |
| 684 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 685 | |
| 686 | /** |
| 687 | * @typedef {Object} RuntimeDecision |
| 688 | * @property {boolean} ok - true only when all gates pass (inference may proceed). |
| 689 | * @property {string} reason - A RUNTIME_MANAGER_REASONS constant. Never a secret. |
| 690 | */ |
| 691 | |
| 692 | /** |
| 693 | * Top-level admission decision for a single inference request against the bundled runtime. |
| 694 | * |
| 695 | * Checks, in order: |
| 696 | * 1. Lifecycle gate: runtime must be in 'ready' state. Non-ready β NOT_READY. |
| 697 | * 2. Admission gate: in-flight concurrency and queue bounds. AT_CAPACITY / QUEUE_FULL. |
| 698 | * 3. Resource-limit gate: RAM/VRAM/CPU ceilings. Over limit β reject. |
| 699 | * |
| 700 | * ALL THREE gates must pass for the request to be allowed. |
| 701 | * Fail-closed: malformed parameters β MALFORMED_REQUEST_PARAMS. |
| 702 | * |
| 703 | * SECURITY PROPERTIES: |
| 704 | * - Inference is NEVER allowed in a non-ready lifecycle state (no timing window between |
| 705 | * states that could allow a request through a transitional state). |
| 706 | * - Backpressure trips at the exact configured bound β no overflow possible. |
| 707 | * - Resource limits are enforced BEFORE the request reaches the runtime, bounding OOM risk. |
| 708 | * - No secret, path, URL, or numeric observation value appears in any reason string. |
| 709 | * - This function has no side effects β the caller must call recordInFlight on the admission |
| 710 | * state when it decides to proceed (pure: decision is separated from state mutation). |
| 711 | * |
| 712 | * @param {{ |
| 713 | * lifecycleState: LifecycleState, |
| 714 | * admissionState: AdmissionState, |
| 715 | * resourceObservation: ResourceObservation, |
| 716 | * resourceLimits: ResourceLimits, |
| 717 | * }} params |
| 718 | * @returns {RuntimeDecision} |
| 719 | */ |
| 720 | export function evaluateRuntimeRequest(params) { |
| 721 | try { |
| 722 | const { lifecycleState, admissionState, resourceObservation, resourceLimits } = params ?? {}; |
| 723 | if (!lifecycleState || !admissionState || !resourceObservation || !resourceLimits) { |
| 724 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_REQUEST_PARAMS }; |
| 725 | } |
| 726 | |
| 727 | // 1. Lifecycle gate. |
| 728 | if (!canServeInference(lifecycleState)) { |
| 729 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.NOT_READY }; |
| 730 | } |
| 731 | |
| 732 | // 2. Admission gate. |
| 733 | const admission = evaluateAdmission(admissionState); |
| 734 | if (!admission.ok) { |
| 735 | return { ok: false, reason: admission.reason }; |
| 736 | } |
| 737 | |
| 738 | // 3. Resource-limit gate. |
| 739 | const resources = evaluateResourceLimits(resourceObservation, resourceLimits); |
| 740 | if (!resources.ok) { |
| 741 | return { ok: false, reason: resources.reason }; |
| 742 | } |
| 743 | |
| 744 | return { ok: true, reason: RUNTIME_MANAGER_REASONS.OK }; |
| 745 | } catch { |
| 746 | // Defense in depth: never let an unexpected error carry input data outward. |
| 747 | return { ok: false, reason: RUNTIME_MANAGER_REASONS.MALFORMED_REQUEST_PARAMS }; |
| 748 | } |
| 749 | } |
| 750 | |
| 751 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 752 | // Β§7 β Injected adapter interface (type documentation only; no implementation) |
| 753 | // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| 754 | |
| 755 | /** |
| 756 | * The adapter interface that Phase 5 MUST supply to connect the decision core to the real |
| 757 | * Ollama/llama.cpp runtime. The pure module in this file imports NONE of these β they are |
| 758 | * passed explicitly by Phase 5's binding layer. |
| 759 | * |
| 760 | * SECURITY INVARIANT: the adapter must not expose vault, canister, keychain, or JWT handles. |
| 761 | * It is scoped exclusively to model-lifecycle operations (spawn, download, health, resource probe). |
| 762 | * |
| 763 | * @typedef {Object} RuntimeAdapterFns |
| 764 | * @property {(opts: SpawnOpts) => Promise<SpawnHandle>} spawn |
| 765 | * Spawn the Ollama/llama.cpp process. Must bind to 127.0.0.1 only (Phase 2 Β§4.5). |
| 766 | * Called ONLY after integrity verification passes (finalize().ok === true). |
| 767 | * @property {(url: string, onChunk: (chunk: Uint8Array) => void) => Promise<void>} download |
| 768 | * Download a model file over TLS, calling onChunk for each received chunk. |
| 769 | * The URL MUST be one that passed validateSourceUrl. Phase 5 feeds chunks to the |
| 770 | * integrity accumulator via the onChunk callback. |
| 771 | * @property {(handle: SpawnHandle) => Promise<boolean>} healthCheck |
| 772 | * Return true if the runtime responds correctly to a health probe (OpenAI-compat |
| 773 | * GET /v1/models or Ollama GET /api/tags). Phase 5 drives the health-check retry loop |
| 774 | * and calls transitionLifecycle(state, HEALTH_OK | HEALTH_FAIL). |
| 775 | * @property {() => Promise<ResourceObservation>} statResources |
| 776 | * Return the current RAM/VRAM/CPU usage for the runtime process. Called before each |
| 777 | * inference request; result is passed to evaluateResourceLimits. |
| 778 | */ |
| 779 | |
| 780 | /** |
| 781 | * @typedef {Object} SpawnOpts |
| 782 | * @property {string} binaryPath - Absolute path to the Ollama/llama.cpp binary. |
| 783 | * @property {string} modelPath - Absolute path to the verified model file. |
| 784 | * @property {number} port - Ephemeral port allocated by Phase 5 (non-predictable). |
| 785 | * @property {number} maxRamBytes - Memory ceiling to pass to the runtime's CLI flags. |
| 786 | */ |
| 787 | |
| 788 | /** |
| 789 | * @typedef {Object} SpawnHandle |
| 790 | * @property {number} pid - Process ID of the spawned runtime. |
| 791 | * @property {() => Promise<void>} kill - Gracefully shut down the runtime. |
| 792 | */ |