lib/importers/chatgpt.mjs · aaronrene/knowtation — MuseHub

aaronrene / knowtation public

chatgpt.mjs

127 lines 4.2 KB

Raw

sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago

1	/**
2	* ChatGPT (OpenAI) export importer. Parses conversations.json from export ZIP or folder.
3	* One note per conversation; frontmatter: source: chatgpt, source_id, date, title.
4	*/
5
6	import fs from 'fs';
7	import path from 'path';
8	import { writeNote } from '../write.mjs';
9	import { normalizeSlug } from '../vault.mjs';
10
11	/**
12	* @param {string} input - Path to ZIP or folder containing conversations.json
13	* @param {{ vaultPath: string, outputBase: string, project?: string, tags: string[], dryRun: boolean }} ctx
14	* @returns {Promise<{ imported: { path: string, source_id?: string }[], count: number }>}
15	*/
16	export async function importChatGPT(input, ctx) {
17	const { vaultPath, outputBase, project, tags, dryRun } = ctx;
18	const absInput = path.isAbsolute(input) ? input : path.resolve(process.cwd(), input);
19	if (!fs.existsSync(absInput)) {
20	throw new Error(`Input not found: ${input}`);
21	}
22
23	if (fs.statSync(absInput).isFile()) {
24	throw new Error('ChatGPT export must be a folder. Extract the OpenAI export ZIP first, then pass the folder path.');
25	}
26
27	const conversationsPath = findConversationsJson(absInput);
28	if (!conversationsPath) {
29	throw new Error('conversations.json not found in input. Export from ChatGPT: Settings → Data Controls → Export Data.');
30	}
31
32	const raw = fs.readFileSync(conversationsPath, 'utf8');
33	let data;
34	try {
35	data = JSON.parse(raw);
36	} catch (e) {
37	throw new Error(`Invalid conversations.json: ${e.message}`);
38	}
39
40	let conversations;
41	if (Array.isArray(data)) {
42	conversations = data;
43	} else if (data.conversations && typeof data.conversations === 'object') {
44	conversations = Object.values(data.conversations);
45	} else {
46	conversations = [];
47	}
48	if (!conversations.length) {
49	return { imported: [], count: 0 };
50	}
51
52	const imported = [];
53	for (let i = 0; i < conversations.length; i++) {
54	const conv = conversations[i];
55	const title = conv.title \|\| `Conversation ${i + 1}`;
56	const mapping = conv.mapping \|\| {};
57	const body = buildTranscript(mapping);
58	if (!body.trim()) continue;
59
60	const convId = conv.id \|\| Object.keys(mapping)[0] \|\| `conv-${i}`;
61	const sourceId = `chatgpt_${String(convId).replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 64)}`;
62	const date = extractDate(conv);
63	const safeTitle = title.replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 60) \|\| `chatgpt-${i}`;
64	const outputRel = path.join(outputBase, `${safeTitle}.md`).replace(/\\/g, '/');
65
66	const frontmatter = {
67	source: 'chatgpt',
68	source_id: sourceId,
69	date,
70	title,
71	...(project && { project: normalizeSlug(project) }),
72	...(tags.length && { tags }),
73	};
74
75	if (!dryRun) {
76	writeNote(vaultPath, outputRel, { body, frontmatter });
77	}
78	imported.push({ path: outputRel, source_id: sourceId });
79	}
80
81	return { imported, count: imported.length };
82	}
83
84	function findConversationsJson(dir) {
85	const p = path.join(dir, 'conversations.json');
86	if (fs.existsSync(p) && fs.statSync(p).isFile()) return p;
87	const entries = fs.readdirSync(dir, { withFileTypes: true });
88	for (const e of entries) {
89	if (e.isDirectory()) {
90	const found = findConversationsJson(path.join(dir, e.name));
91	if (found) return found;
92	}
93	}
94	return null;
95	}
96
97	/**
98	* Build transcript from mapping. Order by message creation_time or parent chain.
99	* @param {Record<string, { message?: { content?: { parts?: string[] }, author?: { role?: string } }, children?: string[] }>} mapping
100	*/
101	function buildTranscript(mapping) {
102	const parts = [];
103	const seen = new Set();
104	const entries = Object.entries(mapping);
105
106	for (const [, info] of entries) {
107	const msg = info?.message;
108	if (!msg) continue;
109	const content = msg.content;
110	const text = content?.parts?.[0];
111	if (typeof text !== 'string' \|\| !text.trim()) continue;
112	const role = msg.author?.role \|\| 'unknown';
113	parts.push({ role, text, create_time: msg.create_time });
114	}
115
116	parts.sort((a, b) => (a.create_time \|\| 0) - (b.create_time \|\| 0));
117	return parts.map((p) => `${p.role}:\n${p.text}`).join('\n\n');
118	}
119
120	function extractDate(conv) {
121	const createTime = conv.create_time \|\| conv.created;
122	if (createTime) {
123	const d = new Date(createTime * 1000);
124	return d.toISOString().slice(0, 10);
125	}
126	return new Date().toISOString().slice(0, 10);
127	}

File History 2 commits

sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ 1 day ago

sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago