zip import refactoring

This commit is contained in:
zadam 2022-12-26 10:38:31 +01:00
parent acda37e334
commit 45b94ecaeb
2 changed files with 84 additions and 93 deletions

View file

@ -1,20 +1,18 @@
const sqlInit = require("./sql_init");
const cls = require("./cls");
const zipImport = require("../services/import/zip");
const TaskContext = require("./task_context");
const becca = require("../becca/becca");
const beccaLoader = require("../becca/becca_loader");
const fs = require("fs").promises;
const HELP_FILE_PATH = '/home/adam/Downloads/Help1.zip';
sqlInit.dbReady.then(() => {
beccaLoader.beccaLoaded.then(() => {
cls.init(async () => {
const helpRoot = becca.getNote("_help");
const taskContext = new TaskContext('no-progress-reporting', null, {});
const data = await fs.readFile(HELP_FILE_PATH, "binary");
console.log("BUGGER LENGTH", data.length);
await zipImport.importZip(taskContext, Buffer.from(data, 'binary'), helpRoot);
});
});

View file

@ -235,6 +235,84 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
return targetNoteId;
}
function processNoteContent(content, noteTitle, filePath, noteMeta) {
function isUrlAbsolute(url) {
return /^(?:[a-z]+:)?\/\//i.test(url);
}
content = content.replace(/<h1>([^<]*)<\/h1>/gi, (match, text) => {
if (noteTitle.trim() === text.trim()) {
return ""; // remove whole H1 tag
} else {
return `<h2>${text}</h2>`;
}
});
content = htmlSanitizer.sanitize(content);
content = content.replace(/<html.*<body[^>]*>/gis, "");
content = content.replace(/<\/body>.*<\/html>/gis, "");
content = content.replace(/src="([^"]*)"/g, (match, url) => {
try {
url = decodeURIComponent(url);
} catch (e) {
log.error(`Cannot parse image URL '${url}', keeping original (${e}).`);
return `src="${url}"`;
}
if (isUrlAbsolute(url) || url.startsWith("/")) {
return match;
}
const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
return `src="api/images/${targetNoteId}/${path.basename(url)}"`;
});
content = content.replace(/href="([^"]*)"/g, (match, url) => {
try {
url = decodeURIComponent(url);
} catch (e) {
log.error(`Cannot parse link URL '${url}', keeping original (${e}).`);
return `href="${url}"`;
}
if (url.startsWith('#') || isUrlAbsolute(url)) {
return match;
}
const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
return `href="#root/${targetNoteId}"`;
});
content = content.replace(/data-note-path="([^"]*)"/g, (match, notePath) => {
const noteId = notePath.split("/").pop();
let targetNoteId;
if (noteId === 'root' || noteId.startsWith("_")) { // named noteIds stay identical across instances
targetNoteId = noteId;
} else {
targetNoteId = noteIdMap[noteId];
}
return `data-note-path="root/${targetNoteId}"`;
});
if (noteMeta) {
const includeNoteLinks = (noteMeta.attributes || [])
.filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink');
for (const link of includeNoteLinks) {
// no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
}
}
return content;
}
function saveNote(filePath, content) {
const {parentNoteMeta, noteMeta} = getMeta(filePath);
@ -280,81 +358,7 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta);
if (type === 'text') {
function isUrlAbsolute(url) {
return /^(?:[a-z]+:)?\/\//i.test(url);
}
content = content.replace(/<h1>([^<]*)<\/h1>/gi, (match, text) => {
if (noteTitle.trim() === text.trim()) {
return ""; // remove whole H1 tag
}
else {
return `<h2>${text}</h2>`;
}
});
content = htmlSanitizer.sanitize(content);
content = content.replace(/<html.*<body[^>]*>/gis, "");
content = content.replace(/<\/body>.*<\/html>/gis, "");
content = content.replace(/src="([^"]*)"/g, (match, url) => {
try {
url = decodeURIComponent(url);
} catch (e) {
log.error(`Cannot parse image URL '${url}', keeping original (${e}).`);
return `src="${url}"`;
}
if (isUrlAbsolute(url) || url.startsWith("/")) {
return match;
}
const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
return `src="api/images/${targetNoteId}/${path.basename(url)}"`;
});
content = content.replace(/href="([^"]*)"/g, (match, url) => {
try {
url = decodeURIComponent(url);
} catch (e) {
log.error(`Cannot parse link URL '${url}', keeping original (${e}).`);
return `href="${url}"`;
}
if (url.startsWith('#') || isUrlAbsolute(url)) {
return match;
}
const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
return `href="#root/${targetNoteId}"`;
});
content = content.replace(/data-note-path="([^"]*)"/g, (match, notePath) => {
const noteId = notePath.split("/").pop();
let targetNoteId;
if (noteId === 'root' || noteId.startsWith("_")) { // named noteIds stay identical across instances
targetNoteId = noteId;
} else {
targetNoteId = noteIdMap[noteId];
}
return `data-note-path="root/${targetNoteId}"`;
});
if (noteMeta) {
const includeNoteLinks = (noteMeta.attributes || [])
.filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink');
for (const link of includeNoteLinks) {
// no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
}
}
content = processNoteContent(content, noteTitle, filePath, noteMeta);
}
if (type === 'relationMap' && noteMeta) {
@ -368,17 +372,6 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
}
}
if (type === 'text' && noteMeta) {
const includeNoteLinks = (noteMeta.attributes || [])
.filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink');
// this will replace relation map links
for (const link of includeNoteLinks) {
// no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
}
}
let note = becca.getNote(noteId);
const isProtected = importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable();
@ -523,7 +516,7 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
if (!metaFile) {
// if there's no meta file then the notes are created based on the order in that zip file but that
// is usually quite random so we sort the notes in the way they would appear in the file manager
// is usually quite random, so we sort the notes in the way they would appear in the file manager
treeService.sortNotes(noteId, 'title', false, true);
}
@ -533,11 +526,11 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
// we're saving attributes and links only now so that all relation and link target notes
// are already in the database (we don't want to have "broken" relations, not even transitionally)
for (const attr of attributes) {
if (attr.type !== 'relation' || attr.value in createdNoteIds) {
if (attr.type !== 'relation' || attr.value in becca.notes) {
new Attribute(attr).save();
}
else {
log.info(`Relation not imported since target note doesn't exist: ${JSON.stringify(attr)}`);
log.info(`Relation not imported since the target note doesn't exist: ${JSON.stringify(attr)}`);
}
}