2024-08-21 07:46:31 +08:00
|
|
|
/**
|
|
|
|
* https://github.com/mixmark-io/turndown
|
|
|
|
* v7.2.0 modified by SnappyMail to be ES2020
|
|
|
|
*/
|
|
|
|
|
|
|
|
const TurndownService = (() => {
|
|
|
|
|
|
|
|
const repeat = (character, count) => Array(count + 1).join(character),
|
|
|
|
|
|
|
|
trimLeadingNewlines = string => string.replace(/^\n*/, ''),
|
|
|
|
|
|
|
|
trimTrailingNewlines = string => {
|
|
|
|
// avoid match-at-end regexp bottleneck, see #370
|
|
|
|
var indexEnd = string.length;
|
|
|
|
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
|
|
|
|
return string.substring(0, indexEnd)
|
|
|
|
},
|
|
|
|
|
|
|
|
blockElements = [
|
|
|
|
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
|
|
|
|
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
|
|
|
|
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
|
|
|
|
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
|
|
|
|
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
|
|
|
|
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
|
|
|
|
],
|
|
|
|
|
|
|
|
isBlock = node => is(node, blockElements),
|
|
|
|
|
|
|
|
voidElements = [
|
|
|
|
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
|
|
|
|
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
|
|
|
|
],
|
|
|
|
|
|
|
|
isVoid = node => is(node, voidElements),
|
|
|
|
|
|
|
|
hasVoid = node => has(node, voidElements),
|
|
|
|
|
|
|
|
meaningfulWhenBlankElements = [
|
|
|
|
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
|
|
|
|
'AUDIO', 'VIDEO'
|
|
|
|
],
|
|
|
|
|
|
|
|
isMeaningfulWhenBlank = node => is(node, meaningfulWhenBlankElements),
|
|
|
|
|
|
|
|
hasMeaningfulWhenBlank = node => has(node, meaningfulWhenBlankElements),
|
|
|
|
|
|
|
|
is = (node, tagNames) => tagNames.indexOf(node.nodeName) >= 0,
|
|
|
|
|
|
|
|
has = (node, tagNames) =>
|
|
|
|
(
|
|
|
|
node.getElementsByTagName &&
|
|
|
|
tagNames.some(tagName => node.getElementsByTagName(tagName).length)
|
|
|
|
),
|
|
|
|
|
|
|
|
rules = {
|
|
|
|
|
|
|
|
paragraph: {
|
|
|
|
filter: 'p',
|
|
|
|
|
|
|
|
replacement: content => '\n\n' + content + '\n\n'
|
|
|
|
},
|
|
|
|
|
|
|
|
lineBreak: {
|
|
|
|
filter: 'br',
|
|
|
|
|
|
|
|
replacement: (content, node, options) => options.br + '\n'
|
|
|
|
},
|
|
|
|
|
|
|
|
heading: {
|
|
|
|
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
|
|
|
|
|
|
|
|
replacement: (content, node, options) => {
|
|
|
|
var hLevel = Number(node.nodeName.charAt(1));
|
|
|
|
|
|
|
|
if (options.headingStyle === 'setext' && hLevel < 3) {
|
|
|
|
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
|
|
|
|
return '\n\n' + content + '\n' + underline + '\n\n'
|
|
|
|
}
|
|
|
|
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
blockquote: {
|
|
|
|
filter: 'blockquote',
|
|
|
|
|
|
|
|
replacement: (content) => {
|
|
|
|
content = content.replace(/^\n+|\n+$/g, '');
|
|
|
|
content = content.replace(/^/gm, '> ');
|
|
|
|
return '\n\n' + content + '\n\n'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
list: {
|
|
|
|
filter: ['ul', 'ol'],
|
|
|
|
|
|
|
|
replacement: (content, node) => {
|
|
|
|
var parent = node.parentNode;
|
|
|
|
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
|
|
|
|
return '\n' + content
|
|
|
|
}
|
|
|
|
return '\n\n' + content + '\n\n'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
listItem: {
|
|
|
|
filter: 'li',
|
|
|
|
|
|
|
|
replacement: (content, node, options) => {
|
|
|
|
content = content
|
|
|
|
.replace(/^\n+/, '') // remove leading newlines
|
|
|
|
.replace(/\n+$/, '\n') // replace trailing newlines with just a single one
|
|
|
|
.replace(/\n/gm, '\n '); // indent
|
|
|
|
var prefix = options.bulletListMarker + ' ';
|
|
|
|
var parent = node.parentNode;
|
|
|
|
if (parent.nodeName === 'OL') {
|
|
|
|
var start = parent.getAttribute('start');
|
|
|
|
var index = Array.prototype.indexOf.call(parent.children, node);
|
|
|
|
prefix = (start ? Number(start) + index : index + 1) + '. ';
|
|
|
|
}
|
|
|
|
return (
|
|
|
|
prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
|
|
|
|
)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
indentedCodeBlock: {
|
|
|
|
filter: (node, options) =>
|
|
|
|
(
|
|
|
|
options.codeBlockStyle === 'indented' &&
|
|
|
|
node.nodeName === 'PRE' &&
|
|
|
|
node.firstChild &&
|
|
|
|
node.firstChild.nodeName === 'CODE'
|
|
|
|
)
|
|
|
|
,
|
|
|
|
|
|
|
|
replacement: (content, node) =>
|
|
|
|
(
|
|
|
|
'\n\n ' +
|
|
|
|
node.firstChild.textContent.replace(/\n/g, '\n ') +
|
|
|
|
'\n\n'
|
|
|
|
)
|
|
|
|
},
|
|
|
|
|
|
|
|
fencedCodeBlock: {
|
|
|
|
filter: (node, options) =>
|
|
|
|
(
|
|
|
|
options.codeBlockStyle === 'fenced' &&
|
|
|
|
node.nodeName === 'PRE' &&
|
|
|
|
node.firstChild &&
|
|
|
|
node.firstChild.nodeName === 'CODE'
|
|
|
|
)
|
|
|
|
,
|
|
|
|
|
|
|
|
replacement: (content, node, options) => {
|
|
|
|
var className = node.firstChild.getAttribute('class') || '';
|
|
|
|
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
|
|
|
|
var code = node.firstChild.textContent;
|
|
|
|
|
|
|
|
var fenceChar = options.fence.charAt(0);
|
|
|
|
var fenceSize = 3;
|
|
|
|
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
|
|
|
|
|
|
|
|
var match;
|
|
|
|
while ((match = fenceInCodeRegex.exec(code))) {
|
|
|
|
if (match[0].length >= fenceSize) {
|
|
|
|
fenceSize = match[0].length + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
var fence = repeat(fenceChar, fenceSize);
|
|
|
|
|
|
|
|
return (
|
|
|
|
'\n\n' + fence + language + '\n' +
|
|
|
|
code.replace(/\n$/, '') +
|
|
|
|
'\n' + fence + '\n\n'
|
|
|
|
)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
horizontalRule: {
|
|
|
|
filter: 'hr',
|
|
|
|
|
|
|
|
replacement: (content, node, options) => '\n\n' + options.hr + '\n\n'
|
|
|
|
},
|
|
|
|
|
|
|
|
inlineLink: {
|
|
|
|
filter: (node, options) =>
|
|
|
|
(
|
|
|
|
options.linkStyle === 'inlined' &&
|
|
|
|
node.nodeName === 'A' &&
|
|
|
|
node.getAttribute('href')
|
|
|
|
)
|
|
|
|
,
|
|
|
|
|
|
|
|
replacement: (content, node) => {
|
|
|
|
var href = node.getAttribute('href');
|
|
|
|
if (href) href = href.replace(/([()])/g, '\\$1');
|
|
|
|
var title = cleanAttribute(node.getAttribute('title'));
|
|
|
|
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
|
|
|
|
return '[' + content + '](' + href + title + ')'
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
referenceLink: {
|
|
|
|
filter: (node, options) =>
|
|
|
|
(
|
|
|
|
options.linkStyle === 'referenced' &&
|
|
|
|
node.nodeName === 'A' &&
|
|
|
|
node.getAttribute('href')
|
|
|
|
)
|
|
|
|
,
|
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
replacement(content, node, options) {
|
2024-08-21 07:46:31 +08:00
|
|
|
var href = node.getAttribute('href');
|
|
|
|
var title = cleanAttribute(node.getAttribute('title'));
|
|
|
|
if (title) title = ' "' + title + '"';
|
|
|
|
var replacement;
|
|
|
|
var reference;
|
|
|
|
|
|
|
|
switch (options.linkReferenceStyle) {
|
|
|
|
case 'collapsed':
|
|
|
|
replacement = '[' + content + '][]';
|
|
|
|
reference = '[' + content + ']: ' + href + title;
|
|
|
|
break
|
|
|
|
case 'shortcut':
|
|
|
|
replacement = '[' + content + ']';
|
|
|
|
reference = '[' + content + ']: ' + href + title;
|
|
|
|
break
|
|
|
|
default:
|
|
|
|
var id = this.references.length + 1;
|
|
|
|
replacement = '[' + content + '][' + id + ']';
|
|
|
|
reference = '[' + id + ']: ' + href + title;
|
|
|
|
}
|
|
|
|
|
|
|
|
this.references.push(reference);
|
|
|
|
return replacement
|
|
|
|
},
|
|
|
|
|
|
|
|
references: [],
|
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
append() {
|
2024-08-21 07:46:31 +08:00
|
|
|
var references = '';
|
|
|
|
if (this.references.length) {
|
|
|
|
references = '\n\n' + this.references.join('\n') + '\n\n';
|
|
|
|
this.references = []; // Reset references
|
|
|
|
}
|
|
|
|
return references
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
emphasis: {
|
|
|
|
filter: ['em', 'i'],
|
|
|
|
|
|
|
|
replacement: (content, node, options) => {
|
|
|
|
if (!content.trim()) return ''
|
|
|
|
return options.emDelimiter + content + options.emDelimiter
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
strong: {
|
|
|
|
filter: ['strong', 'b'],
|
|
|
|
|
|
|
|
replacement: (content, node, options) => {
|
|
|
|
if (!content.trim()) return ''
|
|
|
|
return options.strongDelimiter + content + options.strongDelimiter
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
code: {
|
|
|
|
filter: (node) => {
|
|
|
|
var hasSiblings = node.previousSibling || node.nextSibling;
|
|
|
|
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
|
|
|
|
|
|
|
|
return node.nodeName === 'CODE' && !isCodeBlock
|
|
|
|
},
|
|
|
|
|
|
|
|
replacement: (content) => {
|
|
|
|
if (!content) return ''
|
|
|
|
content = content.replace(/\r?\n|\r/g, ' ');
|
|
|
|
|
|
|
|
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
|
|
|
|
var delimiter = '`';
|
|
|
|
var matches = content.match(/`+/gm) || [];
|
|
|
|
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
|
|
|
|
|
|
|
|
return delimiter + extraSpace + content + extraSpace + delimiter
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
image: {
|
|
|
|
filter: 'img',
|
|
|
|
|
|
|
|
replacement: (content, node) => {
|
|
|
|
var alt = cleanAttribute(node.getAttribute('alt'));
|
|
|
|
var src = node.getAttribute('src') || '';
|
|
|
|
var title = cleanAttribute(node.getAttribute('title'));
|
|
|
|
var titlePart = title ? ' "' + title + '"' : '';
|
|
|
|
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
|
|
|
|
}
|
2024-09-14 19:37:31 +08:00
|
|
|
},
|
|
|
|
|
|
|
|
style: {
|
|
|
|
filter: 'style',
|
|
|
|
replacement: () => ''
|
2024-08-21 07:46:31 +08:00
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
cleanAttribute = (attribute) => attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Manages a collection of rules used to convert HTML to Markdown
|
|
|
|
*/
|
|
|
|
|
|
|
|
class Rules
|
|
|
|
{
|
2024-09-14 19:37:31 +08:00
|
|
|
constructor(options) {
|
2024-08-21 07:46:31 +08:00
|
|
|
this.options = options;
|
|
|
|
this._keep = [];
|
|
|
|
this._remove = [];
|
|
|
|
|
|
|
|
this.blankRule = {
|
|
|
|
replacement: options.blankReplacement
|
|
|
|
};
|
|
|
|
|
|
|
|
this.keepReplacement = options.keepReplacement;
|
|
|
|
|
|
|
|
this.defaultRule = {
|
|
|
|
replacement: options.defaultReplacement
|
|
|
|
};
|
|
|
|
|
|
|
|
this.array = [];
|
|
|
|
for (var key in options.rules) this.array.push(options.rules[key]);
|
|
|
|
}
|
|
|
|
|
|
|
|
add(key, rule) {
|
|
|
|
this.array.unshift(rule);
|
|
|
|
}
|
|
|
|
|
|
|
|
keep(filter) {
|
|
|
|
this._keep.unshift({
|
|
|
|
filter: filter,
|
|
|
|
replacement: this.keepReplacement
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
remove(filter) {
|
|
|
|
this._remove.unshift({
|
|
|
|
filter: filter,
|
|
|
|
replacement: () => ''
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
forNode(node) {
|
|
|
|
if (node.isBlank) return this.blankRule
|
|
|
|
return findRule(this.array, node, this.options)
|
|
|
|
|| findRule(this._keep, node, this.options)
|
|
|
|
|| findRule(this._remove, node, this.options)
|
|
|
|
|| this.defaultRule
|
|
|
|
}
|
|
|
|
|
|
|
|
forEach(fn) {
|
|
|
|
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const findRule = (rules, node, options) => {
|
|
|
|
for (var i = 0; i < rules.length; i++) {
|
|
|
|
var rule = rules[i];
|
|
|
|
if (filterValue(rule, node, options)) return rule
|
|
|
|
}
|
|
|
|
return void 0
|
|
|
|
},
|
|
|
|
|
|
|
|
filterValue = (rule, node, options) => {
|
|
|
|
var filter = rule.filter;
|
|
|
|
if (typeof filter === 'string') {
|
|
|
|
return (filter === node.nodeName.toLowerCase())
|
|
|
|
} else if (Array.isArray(filter)) {
|
|
|
|
return (filter.indexOf(node.nodeName.toLowerCase()) > -1)
|
|
|
|
} else if (typeof filter === 'function') {
|
|
|
|
return !!filter.call(rule, node, options)
|
|
|
|
}
|
|
|
|
throw new TypeError('`filter` needs to be a string, array, or function')
|
|
|
|
},
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The collapseWhitespace function is adapted from collapse-whitespace
|
|
|
|
* by Luc Thevenard.
|
|
|
|
*
|
|
|
|
* The MIT License (MIT)
|
|
|
|
*
|
|
|
|
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
* THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
|
|
|
|
*
|
|
|
|
* @param {Object} options
|
|
|
|
*/
|
|
|
|
collapseWhitespace = (options) => {
|
|
|
|
var element = options.element;
|
|
|
|
var isBlock = options.isBlock;
|
|
|
|
var isVoid = options.isVoid;
|
|
|
|
var isPre = options.isPre || ((node) => node.nodeName === 'PRE');
|
|
|
|
|
|
|
|
if (!element.firstChild || isPre(element)) return
|
|
|
|
|
|
|
|
var prevText = null;
|
|
|
|
var keepLeadingWs = false;
|
|
|
|
|
|
|
|
var prev = null;
|
|
|
|
var node = next(prev, element, isPre);
|
|
|
|
|
|
|
|
while (node !== element) {
|
|
|
|
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
|
|
|
|
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
|
|
|
|
|
|
|
|
if ((!prevText || / $/.test(prevText.data)) &&
|
|
|
|
!keepLeadingWs && text[0] === ' ') {
|
|
|
|
text = text.substr(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// `text` might be empty at this point.
|
|
|
|
if (!text) {
|
|
|
|
node = remove(node);
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
node.data = text;
|
|
|
|
|
|
|
|
prevText = node;
|
|
|
|
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
|
|
|
|
if (isBlock(node) || node.nodeName === 'BR') {
|
|
|
|
if (prevText) {
|
|
|
|
prevText.data = prevText.data.replace(/ $/, '');
|
|
|
|
}
|
|
|
|
|
|
|
|
prevText = null;
|
|
|
|
keepLeadingWs = false;
|
|
|
|
} else if (isVoid(node) || isPre(node)) {
|
|
|
|
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
|
|
|
|
prevText = null;
|
|
|
|
keepLeadingWs = true;
|
|
|
|
} else if (prevText) {
|
|
|
|
// Drop protection if set previously.
|
|
|
|
keepLeadingWs = false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
node = remove(node);
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
var nextNode = next(prev, node, isPre);
|
|
|
|
prev = node;
|
|
|
|
node = nextNode;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prevText) {
|
|
|
|
prevText.data = prevText.data.replace(/ $/, '');
|
|
|
|
if (!prevText.data) {
|
|
|
|
remove(prevText);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
/**
|
|
|
|
* remove(node) removes the given node from the DOM and returns the
|
|
|
|
* next node in the sequence.
|
|
|
|
*
|
|
|
|
* @param {Node} node
|
|
|
|
* @return {Node} node
|
|
|
|
*/
|
|
|
|
remove = (node) => {
|
|
|
|
var next = node.nextSibling || node.parentNode;
|
|
|
|
|
|
|
|
node.parentNode.removeChild(node);
|
|
|
|
|
|
|
|
return next
|
|
|
|
},
|
|
|
|
|
|
|
|
/**
|
|
|
|
* next(prev, current, isPre) returns the next node in the sequence, given the
|
|
|
|
* current and previous nodes.
|
|
|
|
*
|
|
|
|
* @param {Node} prev
|
|
|
|
* @param {Node} current
|
|
|
|
* @param {Function} isPre
|
|
|
|
* @return {Node}
|
|
|
|
*/
|
|
|
|
next = (prev, current, isPre) => {
|
|
|
|
if ((prev && prev.parentNode === current) || isPre(current)) {
|
|
|
|
return current.nextSibling || current.parentNode
|
|
|
|
}
|
|
|
|
|
|
|
|
return current.firstChild || current.nextSibling || current.parentNode
|
|
|
|
},
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parsing HTML strings
|
|
|
|
*/
|
|
|
|
|
|
|
|
RootNode = (input, options) => {
|
|
|
|
var root;
|
|
|
|
if (typeof input === 'string') {
|
|
|
|
var doc = htmlParser().parseFromString(
|
|
|
|
// DOM parsers arrange elements in the <head> and <body>.
|
|
|
|
// Wrapping in a custom element ensures elements are reliably arranged in
|
|
|
|
// a single element.
|
|
|
|
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
|
|
|
|
'text/html'
|
|
|
|
);
|
|
|
|
root = doc.getElementById('turndown-root');
|
|
|
|
} else {
|
|
|
|
root = input.cloneNode(true);
|
|
|
|
}
|
|
|
|
collapseWhitespace({
|
|
|
|
element: root,
|
|
|
|
isBlock: isBlock,
|
|
|
|
isVoid: isVoid,
|
|
|
|
isPre: options.preformattedCode ? isPreOrCode : null
|
|
|
|
});
|
|
|
|
|
|
|
|
return root
|
|
|
|
};
|
|
|
|
|
|
|
|
var _htmlParser;
|
|
|
|
const htmlParser = () => {
|
|
|
|
_htmlParser = _htmlParser || new DOMParser();
|
|
|
|
return _htmlParser
|
|
|
|
},
|
|
|
|
|
|
|
|
isPreOrCode = (node) => node.nodeName === 'PRE' || node.nodeName === 'CODE',
|
|
|
|
|
|
|
|
Node = (node, options) => {
|
|
|
|
node.isBlock = isBlock(node);
|
|
|
|
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
|
|
|
|
node.isBlank = isBlank(node);
|
|
|
|
node.flankingWhitespace = flankingWhitespace(node, options);
|
|
|
|
return node
|
|
|
|
},
|
|
|
|
|
|
|
|
isBlank = (node) =>
|
|
|
|
(
|
|
|
|
!isVoid(node) &&
|
|
|
|
!isMeaningfulWhenBlank(node) &&
|
|
|
|
/^\s*$/i.test(node.textContent) &&
|
|
|
|
!hasVoid(node) &&
|
|
|
|
!hasMeaningfulWhenBlank(node)
|
|
|
|
)
|
|
|
|
,
|
|
|
|
|
|
|
|
flankingWhitespace = (node, options) => {
|
|
|
|
if (node.isBlock || (options.preformattedCode && node.isCode)) {
|
|
|
|
return { leading: '', trailing: '' }
|
|
|
|
}
|
|
|
|
|
|
|
|
var edges = edgeWhitespace(node.textContent);
|
|
|
|
|
|
|
|
// abandon leading ASCII WS if left-flanked by ASCII WS
|
|
|
|
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
|
|
|
|
edges.leading = edges.leadingNonAscii;
|
|
|
|
}
|
|
|
|
|
|
|
|
// abandon trailing ASCII WS if right-flanked by ASCII WS
|
|
|
|
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
|
|
|
|
edges.trailing = edges.trailingNonAscii;
|
|
|
|
}
|
|
|
|
|
|
|
|
return { leading: edges.leading, trailing: edges.trailing }
|
|
|
|
},
|
|
|
|
|
|
|
|
edgeWhitespace = (string) => {
|
|
|
|
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
|
|
|
|
return {
|
|
|
|
leading: m[1], // whole string for whitespace-only strings
|
|
|
|
leadingAscii: m[2],
|
|
|
|
leadingNonAscii: m[3],
|
|
|
|
trailing: m[4], // empty for whitespace-only strings
|
|
|
|
trailingNonAscii: m[5],
|
|
|
|
trailingAscii: m[6]
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
isFlankedByWhitespace = (side, node, options) => {
|
|
|
|
var sibling;
|
|
|
|
var regExp;
|
|
|
|
var isFlanked;
|
|
|
|
|
|
|
|
if (side === 'left') {
|
|
|
|
sibling = node.previousSibling;
|
|
|
|
regExp = / $/;
|
|
|
|
} else {
|
|
|
|
sibling = node.nextSibling;
|
|
|
|
regExp = /^ /;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sibling) {
|
|
|
|
if (sibling.nodeType === 3) {
|
|
|
|
isFlanked = regExp.test(sibling.nodeValue);
|
|
|
|
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
|
|
|
|
isFlanked = false;
|
|
|
|
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
|
|
|
|
isFlanked = regExp.test(sibling.textContent);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return isFlanked
|
|
|
|
},
|
|
|
|
|
|
|
|
reduce = Array.prototype.reduce,
|
|
|
|
escapes = [
|
|
|
|
[/\\/g, '\\\\'],
|
|
|
|
[/\*/g, '\\*'],
|
|
|
|
[/^-/g, '\\-'],
|
|
|
|
[/^\+ /g, '\\+ '],
|
|
|
|
[/^(=+)/g, '\\$1'],
|
|
|
|
[/^(#{1,6}) /g, '\\$1 '],
|
|
|
|
[/`/g, '\\`'],
|
|
|
|
[/^~~~/g, '\\~~~'],
|
|
|
|
[/\[/g, '\\['],
|
|
|
|
[/\]/g, '\\]'],
|
|
|
|
[/^>/g, '\\>'],
|
|
|
|
[/_/g, '\\_'],
|
|
|
|
[/^(\d+)\. /g, '$1\\. ']
|
|
|
|
];
|
|
|
|
|
|
|
|
class TurndownService
|
|
|
|
{
|
2024-09-14 19:37:31 +08:00
|
|
|
constructor(options) {
|
2024-08-21 07:46:31 +08:00
|
|
|
this.options = Object.assign({
|
|
|
|
rules: rules,
|
|
|
|
headingStyle: 'setext',
|
|
|
|
hr: '* * *',
|
|
|
|
bulletListMarker: '*',
|
|
|
|
codeBlockStyle: 'indented',
|
|
|
|
fence: '```',
|
|
|
|
emDelimiter: '_',
|
|
|
|
strongDelimiter: '**',
|
|
|
|
linkStyle: 'inlined',
|
|
|
|
linkReferenceStyle: 'full',
|
|
|
|
br: ' ',
|
|
|
|
preformattedCode: false,
|
|
|
|
blankReplacement: (content, node) => node.isBlock ? '\n\n' : '',
|
|
|
|
keepReplacement: (content, node) => node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML,
|
|
|
|
defaultReplacement: (content, node) => node.isBlock ? '\n\n' + content + '\n\n' : content
|
|
|
|
}, options);
|
|
|
|
this.rules = new Rules(this.options);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The entry point for converting a string or DOM node to Markdown
|
|
|
|
* @public
|
|
|
|
* @param {String|HTMLElement} input The string or DOM node to convert
|
|
|
|
* @returns A Markdown representation of the input
|
|
|
|
* @type String
|
|
|
|
*/
|
|
|
|
|
|
|
|
turndown(input) {
|
|
|
|
if (!canConvert(input)) {
|
|
|
|
throw new TypeError(
|
|
|
|
input + ' is not a string, or an element/document/fragment node.'
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
if (input === '') return ''
|
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
var output = this.process(RootNode(input, this.options));
|
|
|
|
return this.postProcess(output)
|
2024-08-21 07:46:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Add one or more plugins
|
|
|
|
* @public
|
|
|
|
* @param {Function|Array} plugin The plugin or array of plugins to add
|
|
|
|
* @returns The Turndown instance for chaining
|
|
|
|
* @type Object
|
|
|
|
*/
|
|
|
|
|
|
|
|
use(plugin) {
|
|
|
|
if (Array.isArray(plugin)) {
|
|
|
|
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
|
|
|
|
} else if (typeof plugin === 'function') {
|
|
|
|
plugin(this);
|
|
|
|
} else {
|
|
|
|
throw new TypeError('plugin must be a Function or an Array of Functions')
|
|
|
|
}
|
|
|
|
return this
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds a rule
|
|
|
|
* @public
|
|
|
|
* @param {String} key The unique key of the rule
|
|
|
|
* @param {Object} rule The rule
|
|
|
|
* @returns The Turndown instance for chaining
|
|
|
|
* @type Object
|
|
|
|
*/
|
|
|
|
|
|
|
|
addRule(key, rule) {
|
|
|
|
this.rules.add(key, rule);
|
|
|
|
return this
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Keep a node (as HTML) that matches the filter
|
|
|
|
* @public
|
|
|
|
* @param {String|Array|Function} filter The unique key of the rule
|
|
|
|
* @returns The Turndown instance for chaining
|
|
|
|
* @type Object
|
|
|
|
*/
|
|
|
|
|
|
|
|
keep(filter) {
|
|
|
|
this.rules.keep(filter);
|
|
|
|
return this
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Remove a node that matches the filter
|
|
|
|
* @public
|
|
|
|
* @param {String|Array|Function} filter The unique key of the rule
|
|
|
|
* @returns The Turndown instance for chaining
|
|
|
|
* @type Object
|
|
|
|
*/
|
|
|
|
|
|
|
|
remove(filter) {
|
|
|
|
this.rules.remove(filter);
|
|
|
|
return this
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Escapes Markdown syntax
|
|
|
|
* @public
|
|
|
|
* @param {String} string The string to escape
|
|
|
|
* @returns A string with Markdown syntax escaped
|
|
|
|
* @type String
|
|
|
|
*/
|
|
|
|
|
|
|
|
escape(string) {
|
|
|
|
return escapes.reduce((accumulator, escape) => accumulator.replace(escape[0], escape[1]), string)
|
|
|
|
}
|
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
/**
|
|
|
|
* Reduces a DOM node down to its Markdown string equivalent
|
|
|
|
* @private
|
|
|
|
* @param {HTMLElement} parentNode The node to convert
|
|
|
|
* @returns A Markdown representation of the node
|
|
|
|
* @type String
|
|
|
|
*/
|
2024-08-21 07:46:31 +08:00
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
process(parentNode) {
|
|
|
|
var self = this;
|
|
|
|
return reduce.call(parentNode.childNodes, (output, node) => {
|
|
|
|
node = Node(node, self.options);
|
2024-08-21 07:46:31 +08:00
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
var replacement = '';
|
|
|
|
if (node.nodeType === 3) {
|
|
|
|
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
|
|
|
|
} else if (node.nodeType === 1) {
|
|
|
|
replacement = self.replacementForNode(node);
|
|
|
|
}
|
2024-08-21 07:46:31 +08:00
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
return join(output, replacement)
|
|
|
|
}, '')
|
|
|
|
}
|
2024-08-21 07:46:31 +08:00
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
/**
|
|
|
|
* Appends strings as each rule requires and trims the output
|
|
|
|
* @private
|
|
|
|
* @param {String} output The conversion output
|
|
|
|
* @returns A trimmed version of the ouput
|
|
|
|
* @type String
|
|
|
|
*/
|
2024-08-21 07:46:31 +08:00
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
postProcess(output) {
|
|
|
|
var self = this;
|
|
|
|
this.rules.forEach((rule) => {
|
|
|
|
if (typeof rule.append === 'function') {
|
|
|
|
output = join(output, rule.append(self.options));
|
|
|
|
}
|
|
|
|
});
|
2024-08-21 07:46:31 +08:00
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
|
|
|
|
}
|
2024-08-21 07:46:31 +08:00
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
/**
|
|
|
|
* Converts an element node to its Markdown equivalent
|
|
|
|
* @private
|
|
|
|
* @param {HTMLElement} node The node to convert
|
|
|
|
* @returns A Markdown representation of the node
|
|
|
|
* @type String
|
|
|
|
*/
|
2024-08-21 07:46:31 +08:00
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
replacementForNode(node) {
|
|
|
|
var rule = this.rules.forNode(node);
|
|
|
|
var content = this.process(node);
|
|
|
|
var whitespace = node.flankingWhitespace;
|
|
|
|
if (whitespace.leading || whitespace.trailing) content = content.trim();
|
|
|
|
return (
|
|
|
|
whitespace.leading +
|
|
|
|
rule.replacement(content, node, this.options) +
|
|
|
|
whitespace.trailing
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
2024-08-21 07:46:31 +08:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Joins replacement to the current output with appropriate number of new lines
|
|
|
|
* @private
|
|
|
|
* @param {String} output The current conversion output
|
|
|
|
* @param {String} replacement The string to append to the output
|
|
|
|
* @returns Joined output
|
|
|
|
* @type String
|
|
|
|
*/
|
|
|
|
|
2024-09-14 19:37:31 +08:00
|
|
|
const join = (output, replacement) => {
|
2024-08-21 07:46:31 +08:00
|
|
|
var s1 = trimTrailingNewlines(output);
|
|
|
|
var s2 = trimLeadingNewlines(replacement);
|
|
|
|
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
|
|
|
|
var separator = '\n\n'.substring(0, nls);
|
|
|
|
|
|
|
|
return s1 + separator + s2
|
|
|
|
},
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Determines whether an input can be converted
|
|
|
|
* @private
|
|
|
|
* @param {String|HTMLElement} input Describe this parameter
|
|
|
|
* @returns Describe what it returns
|
|
|
|
* @type String|Object|Array|Boolean|Number
|
|
|
|
*/
|
|
|
|
|
|
|
|
canConvert = (input) =>
|
|
|
|
(
|
|
|
|
input != null && (
|
|
|
|
typeof input === 'string' ||
|
|
|
|
(input.nodeType && (
|
|
|
|
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
|
|
|
|
))
|
|
|
|
)
|
|
|
|
)
|
|
|
|
;
|
|
|
|
|
|
|
|
return TurndownService;
|
|
|
|
|
|
|
|
})();
|