trilium/static/js/html2notecase.js

function html2notecase(contents, note) {
    // remove any possible extra newlines which might be inserted - all relevant new lines should be only in <br> and <p>
    contents = contents.replace(/(?:\r\n|\r|\n)/, '');

    contents = contents.replace(/<br \/>/g, '\n');
    contents = contents.replace(/<br>/g, '\n');
    contents = contents.replace(/<\/p>/g, '\n');
    contents = contents.replace(/<p>/g, '');
    contents = contents.replace(/&nbsp;/g, ' ');

    let index = 0;

    note.formatting = [];
    note.links = [];
    note.images = [];

    while (index < contents.length) {
        let curContent = contents.substr(index);

        if (contents[index] === '<') {
            let found = false;
            let endOfTag = curContent.indexOf('>');

            if (endOfTag === -1) {
                console.log("Can't find the end of the tag");
            }

            let curTag = curContent.substr(0, endOfTag + 1);

            //console.log(contents);

            for (tagId in tags) {
                let tag = tags[tagId];

                if (contents.substr(index, tag.length) === tag) {
                    found = true;
                    // if (tagMap.get(index) == undefined) {
                    //   tagMap.get(index) = [];
                    // }

                    // tagMap.get(index).push(key);

                    note.formatting.push({
                        note_id: note.detail.note_id,
                        note_offset: index,
                        fmt_tag: tagId,
                        fmt_color: '',
                        fmt_font: '',
                        fmt_value: 100
                    });

                    contents = contents.substr(0, index) + contents.substr(index + tag.length);

                    break;
                }
            }

            if (curTag.substr(0, 4) === "<img") {
                //console.log("Found img tag");

                let dataImagePos = curTag.indexOf('data:image/');

                if (dataImagePos !== -1) {
                    let imageType = curTag.substr(dataImagePos + 11, 3);

                    //console.log("image type: " + imageType);

                    let dataStart = curTag.substr(dataImagePos + 22);

                    let endOfDataPos = dataStart.indexOf('"');

                    if (endOfDataPos !== -1) {
                        //console.log("Found the end of image data");

                        let imageData = dataStart.substr(0, endOfDataPos);

                        note.images.push({
                            note_id: note.detail.note_id,
                            note_offset: index,
                            is_png: imageType === "png",
                            image_data: imageData
                        });

                        contents = contents.substr(0, index) + contents.substr(index + curTag.length);

                        //console.log("Parsed image: " + imageData.substr(0, 100));

                        found = true;
                    }
                }
            }

            let match = /^<a[^>]+?href="([^"]+?)"[^>]+?>([^<]+?)<\/a>/.exec(curContent);

            if (match !== null) {
                note.links.push({
                    note_id: note.detail.note_id,
                    note_offset: index,
                    target_url: match[1],
                    lnk_text: match[2]
                });
                
                //console.log("Found link with text: " + match[2] + ", targetting: " + match[1]);

                contents = contents.substr(0, index) + match[2] + contents.substr(index + match[0].length);

                found = true;
            }

            // let imageRegex = /<img[^>]+src="data:image\/(jpg|png);base64,([^>\"]+)"[^>]+>/;

            // console.log("Testing for image: " + curTag.substr(0, 100));
            // console.log("End of image: " + curTag.substr(curTag.length - 100));

            // let match = imageRegex.exec(curTag);

            // if (match != null) {

            // }

            if (!found) {
                contents = contents.substr(0, index) + contents.substr(index + endOfTag + 1);
            }
        }
        else {
            let linkMatch = /^(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|]/i.exec(curContent);

            if (linkMatch !== null) {
                note.links.push({
                    note_id: note.detail.note_id,
                    note_offset: index,
                    target_url: linkMatch[0],
                    lnk_text: linkMatch[0]
                });

                console.log(linkMatch[0]);
                console.log(linkMatch[0].length);

                index += linkMatch[0].length;
            }
            else {
                index++;
            }
        }
    }

    note.detail.note_text = contents;
}
reorganization of sources 2017-06-12 04:04:07 +08:00			`function html2notecase(contents, note) {`
possible fix to "double new lines" 2017-08-24 08:35:47 +08:00			`// remove any possible extra newlines which might be inserted - all relevant new lines should be only in <br> and <p>`
			`contents = contents.replace(/(?:\r\n\|\r\|\n)/, '');`

reorganization of sources 2017-06-12 04:04:07 +08:00			`contents = contents.replace(/<br \/>/g, '\n');`
			`contents = contents.replace(/<br>/g, '\n');`
			`contents = contents.replace(/<\/p>/g, '\n');`
			`contents = contents.replace(/<p>/g, '');`
various small usability improvements 2017-08-14 09:42:10 +08:00			`contents = contents.replace(/ /g, ' ');`
reorganization of sources 2017-06-12 04:04:07 +08:00
			`let index = 0;`

			`note.formatting = [];`
			`note.links = [];`
			`note.images = [];`

			`while (index < contents.length) {`
auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`let curContent = contents.substr(index);`
HTML tags which are not converted to notecase formattings are stripped - that way notecase webapp doesn't save stuff not readable by notecase desktop. Also bugfix in parsing links 2017-08-14 10:35:04 +08:00
auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`if (contents[index] === '<') {`
			`let found = false;`
reorganization of sources 2017-06-12 04:04:07 +08:00			`let endOfTag = curContent.indexOf('>');`

auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`if (endOfTag === -1) {`
reorganization of sources 2017-06-12 04:04:07 +08:00			`console.log("Can't find the end of the tag");`
			`}`

			`let curTag = curContent.substr(0, endOfTag + 1);`

			`//console.log(contents);`

			`for (tagId in tags) {`
			`let tag = tags[tagId];`

auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`if (contents.substr(index, tag.length) === tag) {`
reorganization of sources 2017-06-12 04:04:07 +08:00			`found = true;`
			`// if (tagMap.get(index) == undefined) {`
			`// tagMap.get(index) = [];`
			`// }`

			`// tagMap.get(index).push(key);`

			`note.formatting.push({`
			`note_id: note.detail.note_id,`
			`note_offset: index,`
			`fmt_tag: tagId,`
			`fmt_color: '',`
			`fmt_font: '',`
			`fmt_value: 100`
			`});`

			`contents = contents.substr(0, index) + contents.substr(index + tag.length);`

			`break;`
			`}`
			`}`

auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`if (curTag.substr(0, 4) === "<img") {`
reorganization of sources 2017-06-12 04:04:07 +08:00			`//console.log("Found img tag");`

			`let dataImagePos = curTag.indexOf('data:image/');`

auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`if (dataImagePos !== -1) {`
reorganization of sources 2017-06-12 04:04:07 +08:00			`let imageType = curTag.substr(dataImagePos + 11, 3);`

			`//console.log("image type: " + imageType);`

			`let dataStart = curTag.substr(dataImagePos + 22);`

			`let endOfDataPos = dataStart.indexOf('"');`

auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`if (endOfDataPos !== -1) {`
reorganization of sources 2017-06-12 04:04:07 +08:00			`//console.log("Found the end of image data");`

			`let imageData = dataStart.substr(0, endOfDataPos);`

			`note.images.push({`
			`note_id: note.detail.note_id,`
			`note_offset: index,`
auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`is_png: imageType === "png",`
reorganization of sources 2017-06-12 04:04:07 +08:00			`image_data: imageData`
			`});`

			`contents = contents.substr(0, index) + contents.substr(index + curTag.length);`

			`//console.log("Parsed image: " + imageData.substr(0, 100));`

			`found = true;`
			`}`
			`}`
			`}`

HTML tags which are not converted to notecase formattings are stripped - that way notecase webapp doesn't save stuff not readable by notecase desktop. Also bugfix in parsing links 2017-08-14 10:35:04 +08:00			`let match = /^<a[^>]+?href="([^"]+?)"[^>]+?>([^<]+?)<\/a>/.exec(curContent);`
reorganization of sources 2017-06-12 04:04:07 +08:00
auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`if (match !== null) {`
reorganization of sources 2017-06-12 04:04:07 +08:00			`note.links.push({`
			`note_id: note.detail.note_id,`
			`note_offset: index,`
			`target_url: match[1],`
			`lnk_text: match[2]`
			`});`

			`//console.log("Found link with text: " + match[2] + ", targetting: " + match[1]);`

			`contents = contents.substr(0, index) + match[2] + contents.substr(index + match[0].length);`

			`found = true;`
			`}`

			`// let imageRegex = /<img[^>]+src="data:image\/(jpg\|png);base64,([^>\"]+)"[^>]+>/;`

			`// console.log("Testing for image: " + curTag.substr(0, 100));`
			`// console.log("End of image: " + curTag.substr(curTag.length - 100));`

			`// let match = imageRegex.exec(curTag);`

			`// if (match != null) {`

			`// }`

HTML tags which are not converted to notecase formattings are stripped - that way notecase webapp doesn't save stuff not readable by notecase desktop. Also bugfix in parsing links 2017-08-14 10:35:04 +08:00			`if (!found) {`
			`contents = contents.substr(0, index) + contents.substr(index + endOfTag + 1);`
			`}`
			`}`
			`else {`
small simplification in parsing 2017-08-23 08:19:46 +08:00			`let linkMatch = /^(https?\|ftp\|file):\/\/[-A-Z0-9+&@#\/%?=~_\|!:,.;]*[-A-Z0-9+&@#\/%=~_\|]/i.exec(curContent);`
auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00
			`if (linkMatch !== null) {`
			`note.links.push({`
			`note_id: note.detail.note_id,`
			`note_offset: index,`
small simplification in parsing 2017-08-23 08:19:46 +08:00			`target_url: linkMatch[0],`
			`lnk_text: linkMatch[0]`
auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`});`

small simplification in parsing 2017-08-23 08:19:46 +08:00			`console.log(linkMatch[0]);`
			`console.log(linkMatch[0].length);`

			`index += linkMatch[0].length;`
auto linkification (plus some refactorings to silent pycharm warnings) 2017-08-22 09:31:23 +08:00			`}`
			`else {`
			`index++;`
			`}`
reorganization of sources 2017-06-12 04:04:07 +08:00			`}`
			`}`

			`note.detail.note_text = contents;`
			`}`