Added turndown.js for #1604

This commit is contained in:
the-djmaze 2024-08-21 01:46:31 +02:00
parent 113003f485
commit 0dda102a05

861
vendors/turndown/turndown.js vendored Normal file
View file

@ -0,0 +1,861 @@
/**
* https://github.com/mixmark-io/turndown
* v7.2.0 modified by SnappyMail to be ES2020
*/
const TurndownService = (() => {
'use strict';
const repeat = (character, count) => Array(count + 1).join(character),
trimLeadingNewlines = string => string.replace(/^\n*/, ''),
trimTrailingNewlines = string => {
// avoid match-at-end regexp bottleneck, see #370
var indexEnd = string.length;
while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--;
return string.substring(0, indexEnd)
},
blockElements = [
'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
],
isBlock = node => is(node, blockElements),
voidElements = [
'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
],
isVoid = node => is(node, voidElements),
hasVoid = node => has(node, voidElements),
meaningfulWhenBlankElements = [
'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
'AUDIO', 'VIDEO'
],
isMeaningfulWhenBlank = node => is(node, meaningfulWhenBlankElements),
hasMeaningfulWhenBlank = node => has(node, meaningfulWhenBlankElements),
is = (node, tagNames) => tagNames.indexOf(node.nodeName) >= 0,
has = (node, tagNames) =>
(
node.getElementsByTagName &&
tagNames.some(tagName => node.getElementsByTagName(tagName).length)
),
rules = {
paragraph: {
filter: 'p',
replacement: content => '\n\n' + content + '\n\n'
},
lineBreak: {
filter: 'br',
replacement: (content, node, options) => options.br + '\n'
},
heading: {
filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
replacement: (content, node, options) => {
var hLevel = Number(node.nodeName.charAt(1));
if (options.headingStyle === 'setext' && hLevel < 3) {
var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
return '\n\n' + content + '\n' + underline + '\n\n'
}
return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
}
},
blockquote: {
filter: 'blockquote',
replacement: (content) => {
content = content.replace(/^\n+|\n+$/g, '');
content = content.replace(/^/gm, '> ');
return '\n\n' + content + '\n\n'
}
},
list: {
filter: ['ul', 'ol'],
replacement: (content, node) => {
var parent = node.parentNode;
if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
return '\n' + content
}
return '\n\n' + content + '\n\n'
}
},
listItem: {
filter: 'li',
replacement: (content, node, options) => {
content = content
.replace(/^\n+/, '') // remove leading newlines
.replace(/\n+$/, '\n') // replace trailing newlines with just a single one
.replace(/\n/gm, '\n '); // indent
var prefix = options.bulletListMarker + ' ';
var parent = node.parentNode;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start');
var index = Array.prototype.indexOf.call(parent.children, node);
prefix = (start ? Number(start) + index : index + 1) + '. ';
}
return (
prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
)
}
},
indentedCodeBlock: {
filter: (node, options) =>
(
options.codeBlockStyle === 'indented' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
,
replacement: (content, node) =>
(
'\n\n ' +
node.firstChild.textContent.replace(/\n/g, '\n ') +
'\n\n'
)
},
fencedCodeBlock: {
filter: (node, options) =>
(
options.codeBlockStyle === 'fenced' &&
node.nodeName === 'PRE' &&
node.firstChild &&
node.firstChild.nodeName === 'CODE'
)
,
replacement: (content, node, options) => {
var className = node.firstChild.getAttribute('class') || '';
var language = (className.match(/language-(\S+)/) || [null, ''])[1];
var code = node.firstChild.textContent;
var fenceChar = options.fence.charAt(0);
var fenceSize = 3;
var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
var match;
while ((match = fenceInCodeRegex.exec(code))) {
if (match[0].length >= fenceSize) {
fenceSize = match[0].length + 1;
}
}
var fence = repeat(fenceChar, fenceSize);
return (
'\n\n' + fence + language + '\n' +
code.replace(/\n$/, '') +
'\n' + fence + '\n\n'
)
}
},
horizontalRule: {
filter: 'hr',
replacement: (content, node, options) => '\n\n' + options.hr + '\n\n'
},
inlineLink: {
filter: (node, options) =>
(
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
,
replacement: (content, node) => {
var href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"';
return '[' + content + '](' + href + title + ')'
}
},
referenceLink: {
filter: (node, options) =>
(
options.linkStyle === 'referenced' &&
node.nodeName === 'A' &&
node.getAttribute('href')
)
,
replacement: function (content, node, options) {
var href = node.getAttribute('href');
var title = cleanAttribute(node.getAttribute('title'));
if (title) title = ' "' + title + '"';
var replacement;
var reference;
switch (options.linkReferenceStyle) {
case 'collapsed':
replacement = '[' + content + '][]';
reference = '[' + content + ']: ' + href + title;
break
case 'shortcut':
replacement = '[' + content + ']';
reference = '[' + content + ']: ' + href + title;
break
default:
var id = this.references.length + 1;
replacement = '[' + content + '][' + id + ']';
reference = '[' + id + ']: ' + href + title;
}
this.references.push(reference);
return replacement
},
references: [],
append: function () {
var references = '';
if (this.references.length) {
references = '\n\n' + this.references.join('\n') + '\n\n';
this.references = []; // Reset references
}
return references
}
},
emphasis: {
filter: ['em', 'i'],
replacement: (content, node, options) => {
if (!content.trim()) return ''
return options.emDelimiter + content + options.emDelimiter
}
},
strong: {
filter: ['strong', 'b'],
replacement: (content, node, options) => {
if (!content.trim()) return ''
return options.strongDelimiter + content + options.strongDelimiter
}
},
code: {
filter: (node) => {
var hasSiblings = node.previousSibling || node.nextSibling;
var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
return node.nodeName === 'CODE' && !isCodeBlock
},
replacement: (content) => {
if (!content) return ''
content = content.replace(/\r?\n|\r/g, ' ');
var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : '';
var delimiter = '`';
var matches = content.match(/`+/gm) || [];
while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
return delimiter + extraSpace + content + extraSpace + delimiter
}
},
image: {
filter: 'img',
replacement: (content, node) => {
var alt = cleanAttribute(node.getAttribute('alt'));
var src = node.getAttribute('src') || '';
var title = cleanAttribute(node.getAttribute('title'));
var titlePart = title ? ' "' + title + '"' : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
}
}
},
cleanAttribute = (attribute) => attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '';
/**
* Manages a collection of rules used to convert HTML to Markdown
*/
class Rules
{
constructor(options) {
this.options = options;
this._keep = [];
this._remove = [];
this.blankRule = {
replacement: options.blankReplacement
};
this.keepReplacement = options.keepReplacement;
this.defaultRule = {
replacement: options.defaultReplacement
};
this.array = [];
for (var key in options.rules) this.array.push(options.rules[key]);
}
add(key, rule) {
this.array.unshift(rule);
}
keep(filter) {
this._keep.unshift({
filter: filter,
replacement: this.keepReplacement
});
}
remove(filter) {
this._remove.unshift({
filter: filter,
replacement: () => ''
});
}
forNode(node) {
if (node.isBlank) return this.blankRule
return findRule(this.array, node, this.options)
|| findRule(this._keep, node, this.options)
|| findRule(this._remove, node, this.options)
|| this.defaultRule
}
forEach(fn) {
for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
}
}
const findRule = (rules, node, options) => {
for (var i = 0; i < rules.length; i++) {
var rule = rules[i];
if (filterValue(rule, node, options)) return rule
}
return void 0
},
filterValue = (rule, node, options) => {
var filter = rule.filter;
if (typeof filter === 'string') {
return (filter === node.nodeName.toLowerCase())
} else if (Array.isArray(filter)) {
return (filter.indexOf(node.nodeName.toLowerCase()) > -1)
} else if (typeof filter === 'function') {
return !!filter.call(rule, node, options)
}
throw new TypeError('`filter` needs to be a string, array, or function')
},
/**
* The collapseWhitespace function is adapted from collapse-whitespace
* by Luc Thevenard.
*
* The MIT License (MIT)
*
* Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* collapseWhitespace(options) removes extraneous whitespace from an the given element.
*
* @param {Object} options
*/
collapseWhitespace = (options) => {
var element = options.element;
var isBlock = options.isBlock;
var isVoid = options.isVoid;
var isPre = options.isPre || ((node) => node.nodeName === 'PRE');
if (!element.firstChild || isPre(element)) return
var prevText = null;
var keepLeadingWs = false;
var prev = null;
var node = next(prev, element, isPre);
while (node !== element) {
if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
var text = node.data.replace(/[ \r\n\t]+/g, ' ');
if ((!prevText || / $/.test(prevText.data)) &&
!keepLeadingWs && text[0] === ' ') {
text = text.substr(1);
}
// `text` might be empty at this point.
if (!text) {
node = remove(node);
continue
}
node.data = text;
prevText = node;
} else if (node.nodeType === 1) { // Node.ELEMENT_NODE
if (isBlock(node) || node.nodeName === 'BR') {
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
}
prevText = null;
keepLeadingWs = false;
} else if (isVoid(node) || isPre(node)) {
// Avoid trimming space around non-block, non-BR void elements and inline PRE.
prevText = null;
keepLeadingWs = true;
} else if (prevText) {
// Drop protection if set previously.
keepLeadingWs = false;
}
} else {
node = remove(node);
continue
}
var nextNode = next(prev, node, isPre);
prev = node;
node = nextNode;
}
if (prevText) {
prevText.data = prevText.data.replace(/ $/, '');
if (!prevText.data) {
remove(prevText);
}
}
},
/**
* remove(node) removes the given node from the DOM and returns the
* next node in the sequence.
*
* @param {Node} node
* @return {Node} node
*/
remove = (node) => {
var next = node.nextSibling || node.parentNode;
node.parentNode.removeChild(node);
return next
},
/**
* next(prev, current, isPre) returns the next node in the sequence, given the
* current and previous nodes.
*
* @param {Node} prev
* @param {Node} current
* @param {Function} isPre
* @return {Node}
*/
next = (prev, current, isPre) => {
if ((prev && prev.parentNode === current) || isPre(current)) {
return current.nextSibling || current.parentNode
}
return current.firstChild || current.nextSibling || current.parentNode
},
/*
* Parsing HTML strings
*/
RootNode = (input, options) => {
var root;
if (typeof input === 'string') {
var doc = htmlParser().parseFromString(
// DOM parsers arrange elements in the <head> and <body>.
// Wrapping in a custom element ensures elements are reliably arranged in
// a single element.
'<x-turndown id="turndown-root">' + input + '</x-turndown>',
'text/html'
);
root = doc.getElementById('turndown-root');
} else {
root = input.cloneNode(true);
}
collapseWhitespace({
element: root,
isBlock: isBlock,
isVoid: isVoid,
isPre: options.preformattedCode ? isPreOrCode : null
});
return root
};
var _htmlParser;
const htmlParser = () => {
_htmlParser = _htmlParser || new DOMParser();
return _htmlParser
},
isPreOrCode = (node) => node.nodeName === 'PRE' || node.nodeName === 'CODE',
Node = (node, options) => {
node.isBlock = isBlock(node);
node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode;
node.isBlank = isBlank(node);
node.flankingWhitespace = flankingWhitespace(node, options);
return node
},
isBlank = (node) =>
(
!isVoid(node) &&
!isMeaningfulWhenBlank(node) &&
/^\s*$/i.test(node.textContent) &&
!hasVoid(node) &&
!hasMeaningfulWhenBlank(node)
)
,
flankingWhitespace = (node, options) => {
if (node.isBlock || (options.preformattedCode && node.isCode)) {
return { leading: '', trailing: '' }
}
var edges = edgeWhitespace(node.textContent);
// abandon leading ASCII WS if left-flanked by ASCII WS
if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) {
edges.leading = edges.leadingNonAscii;
}
// abandon trailing ASCII WS if right-flanked by ASCII WS
if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) {
edges.trailing = edges.trailingNonAscii;
}
return { leading: edges.leading, trailing: edges.trailing }
},
edgeWhitespace = (string) => {
var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/);
return {
leading: m[1], // whole string for whitespace-only strings
leadingAscii: m[2],
leadingNonAscii: m[3],
trailing: m[4], // empty for whitespace-only strings
trailingNonAscii: m[5],
trailingAscii: m[6]
}
},
isFlankedByWhitespace = (side, node, options) => {
var sibling;
var regExp;
var isFlanked;
if (side === 'left') {
sibling = node.previousSibling;
regExp = / $/;
} else {
sibling = node.nextSibling;
regExp = /^ /;
}
if (sibling) {
if (sibling.nodeType === 3) {
isFlanked = regExp.test(sibling.nodeValue);
} else if (options.preformattedCode && sibling.nodeName === 'CODE') {
isFlanked = false;
} else if (sibling.nodeType === 1 && !isBlock(sibling)) {
isFlanked = regExp.test(sibling.textContent);
}
}
return isFlanked
},
reduce = Array.prototype.reduce,
escapes = [
[/\\/g, '\\\\'],
[/\*/g, '\\*'],
[/^-/g, '\\-'],
[/^\+ /g, '\\+ '],
[/^(=+)/g, '\\$1'],
[/^(#{1,6}) /g, '\\$1 '],
[/`/g, '\\`'],
[/^~~~/g, '\\~~~'],
[/\[/g, '\\['],
[/\]/g, '\\]'],
[/^>/g, '\\>'],
[/_/g, '\\_'],
[/^(\d+)\. /g, '$1\\. ']
];
class TurndownService
{
constructor(options) {
if (!(this instanceof TurndownService)) return new TurndownService(options)
this.options = Object.assign({
rules: rules,
headingStyle: 'setext',
hr: '* * *',
bulletListMarker: '*',
codeBlockStyle: 'indented',
fence: '```',
emDelimiter: '_',
strongDelimiter: '**',
linkStyle: 'inlined',
linkReferenceStyle: 'full',
br: ' ',
preformattedCode: false,
blankReplacement: (content, node) => node.isBlock ? '\n\n' : '',
keepReplacement: (content, node) => node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML,
defaultReplacement: (content, node) => node.isBlock ? '\n\n' + content + '\n\n' : content
}, options);
this.rules = new Rules(this.options);
}
/**
* The entry point for converting a string or DOM node to Markdown
* @public
* @param {String|HTMLElement} input The string or DOM node to convert
* @returns A Markdown representation of the input
* @type String
*/
turndown(input) {
if (!canConvert(input)) {
throw new TypeError(
input + ' is not a string, or an element/document/fragment node.'
)
}
if (input === '') return ''
var output = process.call(this, new RootNode(input, this.options));
return postProcess.call(this, output)
}
/**
* Add one or more plugins
* @public
* @param {Function|Array} plugin The plugin or array of plugins to add
* @returns The Turndown instance for chaining
* @type Object
*/
use(plugin) {
if (Array.isArray(plugin)) {
for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
} else if (typeof plugin === 'function') {
plugin(this);
} else {
throw new TypeError('plugin must be a Function or an Array of Functions')
}
return this
}
/**
* Adds a rule
* @public
* @param {String} key The unique key of the rule
* @param {Object} rule The rule
* @returns The Turndown instance for chaining
* @type Object
*/
addRule(key, rule) {
this.rules.add(key, rule);
return this
}
/**
* Keep a node (as HTML) that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
keep(filter) {
this.rules.keep(filter);
return this
}
/**
* Remove a node that matches the filter
* @public
* @param {String|Array|Function} filter The unique key of the rule
* @returns The Turndown instance for chaining
* @type Object
*/
remove(filter) {
this.rules.remove(filter);
return this
}
/**
* Escapes Markdown syntax
* @public
* @param {String} string The string to escape
* @returns A string with Markdown syntax escaped
* @type String
*/
escape(string) {
return escapes.reduce((accumulator, escape) => accumulator.replace(escape[0], escape[1]), string)
}
}
/**
* Reduces a DOM node down to its Markdown string equivalent
* @private
* @param {HTMLElement} parentNode The node to convert
* @returns A Markdown representation of the node
* @type String
*/
const process = (parentNode) => {
var self = this;
return reduce.call(parentNode.childNodes, (output, node) => {
node = new Node(node, self.options);
var replacement = '';
if (node.nodeType === 3) {
replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
} else if (node.nodeType === 1) {
replacement = replacementForNode.call(self, node);
}
return join(output, replacement)
}, '')
},
/**
* Appends strings as each rule requires and trims the output
* @private
* @param {String} output The conversion output
* @returns A trimmed version of the ouput
* @type String
*/
postProcess = (output) => {
var self = this;
this.rules.forEach((rule) => {
if (typeof rule.append === 'function') {
output = join(output, rule.append(self.options));
}
});
return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
},
/**
* Converts an element node to its Markdown equivalent
* @private
* @param {HTMLElement} node The node to convert
* @returns A Markdown representation of the node
* @type String
*/
replacementForNode = (node) => {
var rule = this.rules.forNode(node);
var content = process.call(this, node);
var whitespace = node.flankingWhitespace;
if (whitespace.leading || whitespace.trailing) content = content.trim();
return (
whitespace.leading +
rule.replacement(content, node, this.options) +
whitespace.trailing
)
},
/**
* Joins replacement to the current output with appropriate number of new lines
* @private
* @param {String} output The current conversion output
* @param {String} replacement The string to append to the output
* @returns Joined output
* @type String
*/
join = (output, replacement) => {
var s1 = trimTrailingNewlines(output);
var s2 = trimLeadingNewlines(replacement);
var nls = Math.max(output.length - s1.length, replacement.length - s2.length);
var separator = '\n\n'.substring(0, nls);
return s1 + separator + s2
},
/**
* Determines whether an input can be converted
* @private
* @param {String|HTMLElement} input Describe this parameter
* @returns Describe what it returns
* @type String|Object|Array|Boolean|Number
*/
canConvert = (input) =>
(
input != null && (
typeof input === 'string' ||
(input.nodeType && (
input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
))
)
)
;
return TurndownService;
})();