chatgpt.mjs
127 lines 4.2 KB
Raw
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor ⚠ breaking 1 day ago
1 /**
2 * ChatGPT (OpenAI) export importer. Parses conversations.json from export ZIP or folder.
3 * One note per conversation; frontmatter: source: chatgpt, source_id, date, title.
4 */
5
6 import fs from 'fs';
7 import path from 'path';
8 import { writeNote } from '../write.mjs';
9 import { normalizeSlug } from '../vault.mjs';
10
11 /**
12 * @param {string} input - Path to ZIP or folder containing conversations.json
13 * @param {{ vaultPath: string, outputBase: string, project?: string, tags: string[], dryRun: boolean }} ctx
14 * @returns {Promise<{ imported: { path: string, source_id?: string }[], count: number }>}
15 */
16 export async function importChatGPT(input, ctx) {
17 const { vaultPath, outputBase, project, tags, dryRun } = ctx;
18 const absInput = path.isAbsolute(input) ? input : path.resolve(process.cwd(), input);
19 if (!fs.existsSync(absInput)) {
20 throw new Error(`Input not found: ${input}`);
21 }
22
23 if (fs.statSync(absInput).isFile()) {
24 throw new Error('ChatGPT export must be a folder. Extract the OpenAI export ZIP first, then pass the folder path.');
25 }
26
27 const conversationsPath = findConversationsJson(absInput);
28 if (!conversationsPath) {
29 throw new Error('conversations.json not found in input. Export from ChatGPT: Settings → Data Controls → Export Data.');
30 }
31
32 const raw = fs.readFileSync(conversationsPath, 'utf8');
33 let data;
34 try {
35 data = JSON.parse(raw);
36 } catch (e) {
37 throw new Error(`Invalid conversations.json: ${e.message}`);
38 }
39
40 let conversations;
41 if (Array.isArray(data)) {
42 conversations = data;
43 } else if (data.conversations && typeof data.conversations === 'object') {
44 conversations = Object.values(data.conversations);
45 } else {
46 conversations = [];
47 }
48 if (!conversations.length) {
49 return { imported: [], count: 0 };
50 }
51
52 const imported = [];
53 for (let i = 0; i < conversations.length; i++) {
54 const conv = conversations[i];
55 const title = conv.title || `Conversation ${i + 1}`;
56 const mapping = conv.mapping || {};
57 const body = buildTranscript(mapping);
58 if (!body.trim()) continue;
59
60 const convId = conv.id || Object.keys(mapping)[0] || `conv-${i}`;
61 const sourceId = `chatgpt_${String(convId).replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 64)}`;
62 const date = extractDate(conv);
63 const safeTitle = title.replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, 60) || `chatgpt-${i}`;
64 const outputRel = path.join(outputBase, `${safeTitle}.md`).replace(/\\/g, '/');
65
66 const frontmatter = {
67 source: 'chatgpt',
68 source_id: sourceId,
69 date,
70 title,
71 ...(project && { project: normalizeSlug(project) }),
72 ...(tags.length && { tags }),
73 };
74
75 if (!dryRun) {
76 writeNote(vaultPath, outputRel, { body, frontmatter });
77 }
78 imported.push({ path: outputRel, source_id: sourceId });
79 }
80
81 return { imported, count: imported.length };
82 }
83
84 function findConversationsJson(dir) {
85 const p = path.join(dir, 'conversations.json');
86 if (fs.existsSync(p) && fs.statSync(p).isFile()) return p;
87 const entries = fs.readdirSync(dir, { withFileTypes: true });
88 for (const e of entries) {
89 if (e.isDirectory()) {
90 const found = findConversationsJson(path.join(dir, e.name));
91 if (found) return found;
92 }
93 }
94 return null;
95 }
96
97 /**
98 * Build transcript from mapping. Order by message creation_time or parent chain.
99 * @param {Record<string, { message?: { content?: { parts?: string[] }, author?: { role?: string } }, children?: string[] }>} mapping
100 */
101 function buildTranscript(mapping) {
102 const parts = [];
103 const seen = new Set();
104 const entries = Object.entries(mapping);
105
106 for (const [, info] of entries) {
107 const msg = info?.message;
108 if (!msg) continue;
109 const content = msg.content;
110 const text = content?.parts?.[0];
111 if (typeof text !== 'string' || !text.trim()) continue;
112 const role = msg.author?.role || 'unknown';
113 parts.push({ role, text, create_time: msg.create_time });
114 }
115
116 parts.sort((a, b) => (a.create_time || 0) - (b.create_time || 0));
117 return parts.map((p) => `**${p.role}:**\n${p.text}`).join('\n\n');
118 }
119
120 function extractDate(conv) {
121 const createTime = conv.create_time || conv.created;
122 if (createTime) {
123 const d = new Date(createTime * 1000);
124 return d.toISOString().slice(0, 10);
125 }
126 return new Date().toISOString().slice(0, 10);
127 }
File History 2 commits
sha256:65ccb454656ea5acdea0a10e559b78bcde1eb6ff753ecc2911bc99d1c3d7cadd feat(calendar): enforce agent context tiers in retrieval AP… Human minor 1 day ago
sha256:9103f98c89257ed2b01c237cea895dabb3e85ea337dccb1161c175e4422355b6 docs: accept Calendar Events v0 spec with Phase 0 security … Human 1 day ago