notebooklm.mjs
121 lines 4.6 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * NotebookLM import. Accepts (1) a folder of markdown files (e.g. from Google takeout or Apify export),
3 * or (2) a JSON file with an array of sources/conversations. One note per file or per entry.
4 * Frontmatter: source: notebooklm, source_id from filename or entry id.
5 */
6
7 import fs from 'fs';
8 import path from 'path';
9 import { writeNote } from '../write.mjs';
10 import { parseFrontmatterAndBody, normalizeSlug } from '../vault.mjs';
11
12 /**
13 * @param {string} input - Path to folder of .md files or to a .json export
14 * @param {{ vaultPath: string, outputBase: string, project?: string, tags: string[], dryRun: boolean }} ctx
15 * @returns {Promise<{ imported: { path: string, source_id?: string }[], count: number }>}
16 */
17 export async function importNotebookLM(input, ctx) {
18 const { vaultPath, outputBase, project, tags, dryRun } = ctx;
19 const absInput = path.isAbsolute(input) ? input : path.resolve(process.cwd(), input);
20 if (!fs.existsSync(absInput)) {
21 throw new Error(`Input not found: ${input}. Use a folder of markdown files or a NotebookLM export JSON.`);
22 }
23
24 if (fs.statSync(absInput).isFile()) {
25 if (absInput.endsWith('.json')) {
26 return importNotebookLMJson(absInput, ctx);
27 }
28 throw new Error('NotebookLM import expects a folder of .md files or a .json export file.');
29 }
30
31 const files = [];
32 walkMarkdown(absInput, absInput, '', files);
33 if (files.length === 0) {
34 throw new Error('No .md files found in folder. Export NotebookLM sources to markdown (e.g. Google takeout or Apify), then pass the folder.');
35 }
36
37 const imported = [];
38 const now = new Date().toISOString().slice(0, 10);
39
40 for (const { fullPath, relPath } of files) {
41 const content = fs.readFileSync(fullPath, 'utf8');
42 const { frontmatter, body } = parseFrontmatterAndBody(content);
43 const date = (frontmatter.date && String(frontmatter.date).slice(0, 10)) || now;
44 const outputRel = path.join(outputBase, relPath).replace(/\\/g, '/');
45 const sourceId = frontmatter.source_id || path.basename(relPath, '.md');
46 const merged = {
47 ...frontmatter,
48 source: 'notebooklm',
49 source_id: sourceId,
50 date,
51 ...(project && { project: normalizeSlug(project) }),
52 ...(tags.length && { tags }),
53 };
54
55 if (!dryRun) {
56 writeNote(vaultPath, outputRel, {
57 body,
58 frontmatter: Object.fromEntries(Object.entries(merged).filter(([, v]) => v !== undefined && v !== null && v !== '')),
59 });
60 }
61 imported.push({ path: outputRel, source_id: sourceId });
62 }
63
64 return { imported, count: imported.length };
65 }
66
67 async function importNotebookLMJson(jsonPath, ctx) {
68 const { vaultPath, outputBase, project, tags, dryRun } = ctx;
69 const raw = fs.readFileSync(jsonPath, 'utf8');
70 let data;
71 try {
72 data = JSON.parse(raw);
73 } catch (e) {
74 throw new Error(`Invalid JSON: ${e.message}`);
75 }
76 const entries = Array.isArray(data) ? data : (data.sources || data.conversations || data.notes || []);
77 if (!Array.isArray(entries)) {
78 throw new Error('JSON must be an array or have sources/conversations/notes array.');
79 }
80
81 const imported = [];
82 const now = new Date().toISOString().slice(0, 10);
83
84 for (let i = 0; i < entries.length; i++) {
85 const e = entries[i];
86 const body = e.content || e.text || e.markdown || e.body || JSON.stringify(e);
87 const sourceId = e.id || e.source_id || e.name || `notebooklm-${i}`;
88 const safeName = String(sourceId).replace(/[^a-zA-Z0-9._-]/g, '_').slice(0, 60) + '.md';
89 const outputRel = path.join(outputBase, safeName).replace(/\\/g, '/');
90 const date = e.created_at || e.date || now;
91 const d = typeof date === 'number' ? new Date(date).toISOString().slice(0, 10) : String(date).slice(0, 10);
92
93 const frontmatter = {
94 source: 'notebooklm',
95 source_id: String(sourceId).slice(0, 128),
96 date: d,
97 ...(e.title && { title: e.title }),
98 ...(project && { project: normalizeSlug(project) }),
99 ...(tags.length && { tags }),
100 };
101
102 if (!dryRun) {
103 writeNote(vaultPath, outputRel, { body: String(body).trim(), frontmatter });
104 }
105 imported.push({ path: outputRel, source_id: frontmatter.source_id });
106 }
107
108 return { imported, count: imported.length };
109 }
110
111 function walkMarkdown(rootDir, dir, relDir, out) {
112 const entries = fs.readdirSync(dir, { withFileTypes: true });
113 for (const e of entries) {
114 const rel = relDir ? `${relDir}/${e.name}` : e.name;
115 if (e.isDirectory()) {
116 walkMarkdown(rootDir, path.join(dir, e.name), rel, out);
117 } else if (e.name.endsWith('.md')) {
118 out.push({ fullPath: path.join(dir, e.name), relPath: rel.replace(/\\/g, '/') });
119 }
120 }
121 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago