transcribe.mjs
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠ breaking
2 days ago
| 1 | /** |
| 2 | * Transcription: audio/video → text. Phase 7. |
| 3 | * Provider: OpenAI Whisper (OPENAI_API_KEY required). |
| 4 | * Optional: ffmpeg transcodes files over 25MB when enabled and ffmpeg is available. |
| 5 | */ |
| 6 | |
| 7 | import fs from 'fs'; |
| 8 | import path from 'path'; |
| 9 | import { readTranscriptionYaml } from './config.mjs'; |
| 10 | import { getRepoRoot } from './repo-root.mjs'; |
| 11 | |
| 12 | const WHISPER_URL = 'https://api.openai.com/v1/audio/transcriptions'; |
| 13 | |
| 14 | /** OpenAI transcription endpoint rejects files over this size (bytes). See API docs; matches observed 413 errors. */ |
| 15 | export const WHISPER_MAX_FILE_BYTES = 25 * 1024 * 1024; |
| 16 | |
| 17 | const FFMPEG_HINT = |
| 18 | 'Install ffmpeg (https://ffmpeg.org/download.html) and ensure it is on PATH, or set FFMPEG_PATH, so Knowtation can compress oversized files automatically. Or export a smaller MP3/M4A, use a shorter clip, or import an existing transcript as Markdown.'; |
| 19 | |
| 20 | /** Supported extensions for Whisper (mp3, mp4, mpeg, mpga, m4a, wav, webm) */ |
| 21 | const SUPPORTED_EXT = new Set(['.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm']); |
| 22 | |
| 23 | /** |
| 24 | * @param {boolean} transcodeEnabled |
| 25 | * @param {number} sizeBytes |
| 26 | * @returns {Error} |
| 27 | */ |
| 28 | function oversizeError(transcodeEnabled, sizeBytes) { |
| 29 | const mb = (sizeBytes / (1024 * 1024)).toFixed(1); |
| 30 | const base = `File is ${mb}MB; OpenAI Whisper accepts at most 25MB per request.`; |
| 31 | if (transcodeEnabled) { |
| 32 | return new Error(`${base} ${FFMPEG_HINT}`); |
| 33 | } |
| 34 | return new Error( |
| 35 | `${base} Automatic compression is disabled (transcription.transcode_oversized: false or KNOWTATION_TRANSCODE_OVERSIZED=0). Export a smaller MP3/M4A, use a shorter clip, or import an existing transcript as Markdown.` |
| 36 | ); |
| 37 | } |
| 38 | |
| 39 | /** |
| 40 | * @param {{ transcodeOversized?: boolean }} options |
| 41 | */ |
| 42 | function resolveTranscodeOversized(options) { |
| 43 | if (options.transcodeOversized === false) return false; |
| 44 | if (options.transcodeOversized === true) return true; |
| 45 | const ev = process.env.KNOWTATION_TRANSCODE_OVERSIZED; |
| 46 | if (ev === '0' || ev === 'false') return false; |
| 47 | if (ev === '1' || ev === 'true') return true; |
| 48 | try { |
| 49 | const y = readTranscriptionYaml(getRepoRoot()); |
| 50 | return y.transcode_oversized !== false; |
| 51 | } catch (_) { |
| 52 | return true; |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | /** |
| 57 | * Transcribe an audio or video file to text. |
| 58 | * @param {string} filePath - Absolute or cwd-relative path to audio/video file |
| 59 | * @param {{ apiKey?: string, model?: string, transcodeOversized?: boolean }} options |
| 60 | * @returns {Promise<{ text: string, transcoded?: boolean }>} |
| 61 | */ |
| 62 | export async function transcribe(filePath, options = {}) { |
| 63 | const absPath = path.isAbsolute(filePath) ? filePath : path.resolve(process.cwd(), filePath); |
| 64 | if (!fs.existsSync(absPath) || !fs.statSync(absPath).isFile()) { |
| 65 | throw new Error(`File not found: ${filePath}`); |
| 66 | } |
| 67 | |
| 68 | const ext = path.extname(absPath).toLowerCase(); |
| 69 | if (!SUPPORTED_EXT.has(ext)) { |
| 70 | throw new Error( |
| 71 | `Unsupported format: ${ext}. Use mp3, mp4, mpeg, mpga, m4a, wav, or webm.` |
| 72 | ); |
| 73 | } |
| 74 | |
| 75 | const st = fs.statSync(absPath); |
| 76 | const transcodeEnabled = resolveTranscodeOversized(options); |
| 77 | |
| 78 | let pathForUpload = absPath; |
| 79 | let transcoded = false; |
| 80 | /** @type {(() => void) | null} */ |
| 81 | let cleanupTemp = null; |
| 82 | |
| 83 | try { |
| 84 | if (st.size > WHISPER_MAX_FILE_BYTES) { |
| 85 | if (!transcodeEnabled) { |
| 86 | throw oversizeError(false, st.size); |
| 87 | } |
| 88 | const { transcodeUnderWhisperLimit } = await import('./ffmpeg-whisper-transcode.mjs'); |
| 89 | const result = await transcodeUnderWhisperLimit(absPath, WHISPER_MAX_FILE_BYTES); |
| 90 | if (!result) { |
| 91 | throw oversizeError(true, st.size); |
| 92 | } |
| 93 | pathForUpload = result.path; |
| 94 | transcoded = true; |
| 95 | cleanupTemp = result.cleanup; |
| 96 | const st2 = fs.statSync(pathForUpload); |
| 97 | if (st2.size > WHISPER_MAX_FILE_BYTES) { |
| 98 | throw new Error( |
| 99 | `After compression the file is still over 25MB. Split the recording or reduce length. ${FFMPEG_HINT}` |
| 100 | ); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | const apiKey = options.apiKey ?? process.env.OPENAI_API_KEY; |
| 105 | if (!apiKey) { |
| 106 | throw new Error( |
| 107 | 'OPENAI_API_KEY is required for transcription. Set it in the environment or config.' |
| 108 | ); |
| 109 | } |
| 110 | |
| 111 | let model = options.model; |
| 112 | if (model == null || model === '') { |
| 113 | try { |
| 114 | model = readTranscriptionYaml(getRepoRoot()).model || 'whisper-1'; |
| 115 | } catch (_) { |
| 116 | model = 'whisper-1'; |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | const blob = new Blob([fs.readFileSync(pathForUpload)]); |
| 121 | const form = new FormData(); |
| 122 | form.append('file', blob, path.basename(pathForUpload)); |
| 123 | form.append('model', model); |
| 124 | |
| 125 | const res = await fetch(WHISPER_URL, { |
| 126 | method: 'POST', |
| 127 | headers: { Authorization: `Bearer ${apiKey}` }, |
| 128 | body: form, |
| 129 | }); |
| 130 | |
| 131 | if (!res.ok) { |
| 132 | const err = await res.text(); |
| 133 | throw new Error(`Transcription failed: ${res.status} ${res.statusText} - ${err}`); |
| 134 | } |
| 135 | |
| 136 | const data = await res.json(); |
| 137 | const text = data.text?.trim() ?? ''; |
| 138 | return transcoded ? { text, transcoded: true } : { text }; |
| 139 | } finally { |
| 140 | if (typeof cleanupTemp === 'function') { |
| 141 | try { |
| 142 | cleanupTemp(); |
| 143 | } catch (_) {} |
| 144 | } |
| 145 | } |
| 146 | } |
File History
2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd
feat(calendar): enforce agent context tiers in retrieval AP…
Human
minor
⚠
2 days ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6
docs: accept Calendar Events v0 spec with Phase 0 security …
Human
2 days ago