trilium/src/routes/api/import.js

368 lines
11 KiB
JavaScript
Raw Normal View History

2017-12-03 12:41:18 +08:00
"use strict";
const Attribute = require('../../entities/attribute');
const Link = require('../../entities/link');
2018-04-01 23:42:12 +08:00
const repository = require('../../services/repository');
2018-09-03 18:05:44 +08:00
const log = require('../../services/log');
const utils = require('../../services/utils');
const enex = require('../../services/import/enex');
const noteService = require('../../services/notes');
const Branch = require('../../entities/branch');
const tar = require('tar-stream');
const stream = require('stream');
const path = require('path');
2018-05-30 08:32:13 +08:00
const parseString = require('xml2js').parseString;
2018-09-03 15:40:22 +08:00
const commonmark = require('commonmark');
2018-05-30 08:32:13 +08:00
async function importToBranch(req) {
const parentNoteId = req.params.parentNoteId;
const file = req.file;
2018-09-11 05:41:11 +08:00
if (!file) {
2018-09-04 03:06:24 +08:00
return [400, "No file has been uploaded"];
}
2018-05-30 08:32:13 +08:00
const parentNote = await repository.getNote(parentNoteId);
if (!parentNote) {
return [404, `Note ${parentNoteId} doesn't exist.`];
}
const extension = path.extname(file.originalname).toLowerCase();
if (extension === '.tar') {
2018-11-05 07:06:17 +08:00
return await importTar(file, parentNote);
2018-05-30 08:32:13 +08:00
}
else if (extension === '.opml') {
2018-11-05 07:06:17 +08:00
return await importOpml(file, parentNote);
2018-05-30 08:32:13 +08:00
}
2018-09-03 19:40:40 +08:00
else if (extension === '.md') {
2018-11-05 07:06:17 +08:00
return await importMarkdown(file, parentNote);
}
else if (extension === '.enex') {
return await enex.importEnex(file, parentNote);
2018-09-03 19:40:40 +08:00
}
2018-05-30 08:32:13 +08:00
else {
return [400, `Unrecognized extension ${extension}, must be .tar or .opml`];
}
}
function toHtml(text) {
2018-05-31 11:18:56 +08:00
if (!text) {
return '';
}
2018-05-30 08:32:13 +08:00
return '<p>' + text.replace(/(?:\r\n|\r|\n)/g, '</p><p>') + '</p>';
}
async function importOutline(outline, parentNoteId) {
const {note} = await noteService.createNote(parentNoteId, outline.$.title, toHtml(outline.$.text));
for (const childOutline of (outline.outline || [])) {
await importOutline(childOutline, note.noteId);
}
2018-09-03 19:40:40 +08:00
return note;
2018-05-30 08:32:13 +08:00
}
2018-11-05 07:06:17 +08:00
async function importOpml(file, parentNote) {
2018-05-30 08:32:13 +08:00
const xml = await new Promise(function(resolve, reject)
{
parseString(file.buffer, function (err, result) {
if (err) {
reject(err);
}
else {
resolve(result);
}
});
});
if (xml.opml.$.version !== '1.0' && xml.opml.$.version !== '1.1') {
return [400, 'Unsupported OPML version ' + xml.opml.$.version + ', 1.0 or 1.1 expected instead.'];
}
const outlines = xml.opml.body[0].outline || [];
2018-09-03 19:40:40 +08:00
let returnNote = null;
2018-05-30 08:32:13 +08:00
for (const outline of outlines) {
2018-11-05 07:06:17 +08:00
const note = await importOutline(outline, parentNote.noteId);
2018-09-03 19:40:40 +08:00
// first created note will be activated after import
returnNote = returnNote || note;
2018-05-30 08:32:13 +08:00
}
2018-09-03 19:40:40 +08:00
return returnNote;
2018-05-30 08:32:13 +08:00
}
/**
* Complication of this export is the need to balance two needs:
* -
*/
2018-11-05 07:06:17 +08:00
async function importTar(file, parentNote) {
2018-05-30 08:32:13 +08:00
const files = await parseImportFile(file);
2018-09-03 19:17:29 +08:00
const ctx = {
// maps from original noteId (in tar file) to newly generated noteId
noteIdMap: {},
// new noteIds of notes which were actually created (not just referenced)
createdNoteIds: [],
2018-09-03 19:17:29 +08:00
attributes: [],
links: [],
2018-09-03 18:05:44 +08:00
reader: new commonmark.Parser(),
writer: new commonmark.HtmlRenderer()
};
ctx.getNewNoteId = function(origNoteId) {
// in case the original noteId is empty. This probably shouldn't happen, but still good to have this precaution
if (!origNoteId.trim()) {
return "";
}
if (!ctx.noteIdMap[origNoteId]) {
ctx.noteIdMap[origNoteId] = utils.newEntityId();
}
return ctx.noteIdMap[origNoteId];
};
2018-11-05 07:06:17 +08:00
const note = await importNotes(ctx, files, parentNote.noteId);
2018-08-15 21:27:22 +08:00
// we save attributes and links after importing notes because we need to check that target noteIds
// have been really created (relation/links with targets outside of the export are not created)
2018-09-03 19:17:29 +08:00
for (const attr of ctx.attributes) {
if (attr.type === 'relation') {
attr.value = ctx.getNewNoteId(attr.value);
if (!ctx.createdNoteIds.includes(attr.value)) {
// relation targets note outside of the export
continue;
}
}
await new Attribute(attr).save();
}
for (const link of ctx.links) {
link.targetNoteId = ctx.getNewNoteId(link.targetNoteId);
if (!ctx.createdNoteIds.includes(link.targetNoteId)) {
// link targets note outside of the export
continue;
}
await new Link(link).save();
}
2018-09-03 19:40:40 +08:00
return note;
2018-05-30 08:32:13 +08:00
}
2017-12-03 12:41:18 +08:00
function getFileName(name) {
let key;
2017-12-03 12:41:18 +08:00
if (name.endsWith(".dat")) {
key = "data";
name = name.substr(0, name.length - 4);
}
2018-09-03 18:05:44 +08:00
else if (name.endsWith(".md")) {
key = "markdown";
name = name.substr(0, name.length - 3);
}
else if (name.endsWith((".meta"))) {
key = "meta";
name = name.substr(0, name.length - 5);
}
else {
2018-09-03 19:17:29 +08:00
log.error("Unknown file type in import: " + name);
2017-12-03 12:41:18 +08:00
}
2018-09-03 18:05:44 +08:00
return {name, key};
}
2017-12-03 12:41:18 +08:00
async function parseImportFile(file) {
const fileMap = {};
const files = [];
2017-12-03 12:41:18 +08:00
const extract = tar.extract();
2017-12-03 12:41:18 +08:00
extract.on('entry', function(header, stream, next) {
2018-09-03 19:17:29 +08:00
let name, key;
if (header.type === 'file') {
({name, key} = getFileName(header.name));
}
else if (header.type === 'directory') {
// directory entries in tar often end with directory separator
name = (header.name.endsWith("/") || header.name.endsWith("\\")) ? header.name.substr(0, header.name.length - 1) : header.name;
2018-09-03 19:17:29 +08:00
key = 'directory';
}
else {
log.error("Unrecognized tar entry: " + JSON.stringify(header));
return;
}
2017-12-03 12:41:18 +08:00
let file = fileMap[name];
2017-12-03 12:41:18 +08:00
if (!file) {
file = fileMap[name] = {
2018-09-03 18:05:44 +08:00
name: path.basename(name),
children: []
};
let parentFileName = path.dirname(header.name);
2017-12-03 12:41:18 +08:00
if (parentFileName && parentFileName !== '.') {
fileMap[parentFileName].children.push(file);
}
else {
files.push(file);
}
2017-12-03 12:41:18 +08:00
}
const chunks = [];
stream.on("data", function (chunk) {
chunks.push(chunk);
});
// header is the tar header
// stream is the content body (might be an empty stream)
// call next when you are done with this entry
stream.on('end', function() {
file[key] = Buffer.concat(chunks);
2017-12-03 12:41:18 +08:00
if (key === "meta") {
file[key] = JSON.parse(file[key].toString("UTF-8"));
}
next(); // ready for next entry
2017-12-03 12:41:18 +08:00
});
stream.resume(); // just auto drain the stream
});
return new Promise(resolve => {
extract.on('finish', function() {
resolve(files);
2017-12-03 12:41:18 +08:00
});
const bufferStream = new stream.PassThrough();
bufferStream.end(file.buffer);
bufferStream.pipe(extract);
});
}
2018-09-03 19:17:29 +08:00
async function importNotes(ctx, files, parentNoteId) {
2018-09-03 19:40:40 +08:00
let returnNote = null;
for (const file of files) {
2018-09-03 18:05:44 +08:00
let note;
2018-09-03 19:17:29 +08:00
if (!file.meta) {
let content = '';
if (file.data) {
content = file.data.toString("UTF-8");
}
else if (file.markdown) {
const parsed = ctx.reader.parse(file.markdown.toString("UTF-8"));
content = ctx.writer.render(parsed);
}
2018-09-03 18:05:44 +08:00
note = (await noteService.createNote(parentNoteId, file.name, content, {
type: 'text',
mime: 'text/html'
})).note;
}
2018-09-03 18:05:44 +08:00
else {
if (file.meta.version !== 1) {
throw new Error("Can't read meta data version " + file.meta.version);
}
2018-09-03 18:05:44 +08:00
if (file.meta.clone) {
await new Branch({
parentNoteId: parentNoteId,
noteId: ctx.getNewNoteId(file.meta.noteId),
prefix: file.meta.prefix,
isExpanded: !!file.meta.isExpanded
2018-09-03 18:05:44 +08:00
}).save();
2018-09-03 18:05:44 +08:00
return;
}
2017-12-03 12:41:18 +08:00
if (file.meta.type !== 'file' && file.meta.type !== 'image') {
2018-09-03 18:05:44 +08:00
file.data = file.data.toString("UTF-8");
// this will replace all internal links (<a> and <img>) inside the body
// links pointing outside the export will be broken and changed (ctx.getNewNoteId() will still assign new noteId)
for (const link of file.meta.links || []) {
// no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
file.data = file.data.replace(new RegExp(link.targetNoteId, "g"), ctx.getNewNoteId(link.targetNoteId));
}
2018-09-03 18:05:44 +08:00
}
note = (await noteService.createNote(parentNoteId, file.meta.title, file.data, {
noteId: ctx.getNewNoteId(file.meta.noteId),
2018-09-03 18:05:44 +08:00
type: file.meta.type,
mime: file.meta.mime,
prefix: file.meta.prefix
})).note;
ctx.createdNoteIds.push(note.noteId);
2018-09-03 18:05:44 +08:00
for (const attribute of file.meta.attributes || []) {
2018-09-03 19:17:29 +08:00
ctx.attributes.push({
2018-09-03 18:05:44 +08:00
noteId: note.noteId,
type: attribute.type,
name: attribute.name,
value: attribute.value,
isInheritable: attribute.isInheritable,
position: attribute.position
});
}
for (const link of file.meta.links || []) {
ctx.links.push({
noteId: note.noteId,
type: link.type,
targetNoteId: link.targetNoteId
});
}
}
2018-09-03 19:40:40 +08:00
// first created note will be activated after import
returnNote = returnNote || note;
if (file.children.length > 0) {
2018-09-03 19:17:29 +08:00
await importNotes(ctx, file.children, note.noteId);
2017-12-03 12:41:18 +08:00
}
}
2018-09-03 19:40:40 +08:00
return returnNote;
}
2018-11-05 07:06:17 +08:00
async function importMarkdown(file, parentNote) {
2018-09-03 19:40:40 +08:00
const markdownContent = file.buffer.toString("UTF-8");
const reader = new commonmark.Parser();
const writer = new commonmark.HtmlRenderer();
const parsed = reader.parse(markdownContent);
const htmlContent = writer.render(parsed);
const title = file.originalname.substr(0, file.originalname.length - 3); // strip .md extension
2018-11-05 07:06:17 +08:00
const {note} = await noteService.createNote(parentNote.noteId, title, htmlContent, {
2018-09-03 19:40:40 +08:00
type: 'text',
mime: 'text/html'
});
return note;
2017-12-03 12:41:18 +08:00
}
2018-03-31 03:34:07 +08:00
module.exports = {
2018-05-30 08:32:13 +08:00
importToBranch
2018-03-31 03:34:07 +08:00
};