/** * https://github.com/mixmark-io/turndown * v7.2.0 modified by SnappyMail to be ES2020 */ const TurndownService = (() => { const repeat = (character, count) => Array(count + 1).join(character), trimLeadingNewlines = string => string.replace(/^\n*/, ''), trimTrailingNewlines = string => { // avoid match-at-end regexp bottleneck, see #370 var indexEnd = string.length; while (indexEnd > 0 && string[indexEnd - 1] === '\n') indexEnd--; return string.substring(0, indexEnd) }, blockElements = [ 'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS', 'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER', 'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES', 'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD', 'TFOOT', 'TH', 'THEAD', 'TR', 'UL' ], isBlock = node => is(node, blockElements), voidElements = [ 'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT', 'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR' ], isVoid = node => is(node, voidElements), hasVoid = node => has(node, voidElements), meaningfulWhenBlankElements = [ 'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT', 'AUDIO', 'VIDEO' ], isMeaningfulWhenBlank = node => is(node, meaningfulWhenBlankElements), hasMeaningfulWhenBlank = node => has(node, meaningfulWhenBlankElements), is = (node, tagNames) => tagNames.indexOf(node.nodeName) >= 0, has = (node, tagNames) => ( node.getElementsByTagName && tagNames.some(tagName => node.getElementsByTagName(tagName).length) ), rules = { paragraph: { filter: 'p', replacement: content => '\n\n' + content + '\n\n' }, lineBreak: { filter: 'br', replacement: (content, node, options) => options.br + '\n' }, heading: { filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], replacement: (content, node, options) => { var hLevel = Number(node.nodeName.charAt(1)); if (options.headingStyle === 'setext' && hLevel < 3) { var underline = repeat((hLevel === 1 ? '=' : '-'), content.length); return '\n\n' + content + '\n' + underline + '\n\n' } return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n' } }, blockquote: { filter: 'blockquote', replacement: (content) => { content = content.replace(/^\n+|\n+$/g, ''); content = content.replace(/^/gm, '> '); return '\n\n' + content + '\n\n' } }, list: { filter: ['ul', 'ol'], replacement: (content, node) => { var parent = node.parentNode; if (parent.nodeName === 'LI' && parent.lastElementChild === node) { return '\n' + content } return '\n\n' + content + '\n\n' } }, listItem: { filter: 'li', replacement: (content, node, options) => { content = content .replace(/^\n+/, '') // remove leading newlines .replace(/\n+$/, '\n') // replace trailing newlines with just a single one .replace(/\n/gm, '\n '); // indent var prefix = options.bulletListMarker + ' '; var parent = node.parentNode; if (parent.nodeName === 'OL') { var start = parent.getAttribute('start'); var index = Array.prototype.indexOf.call(parent.children, node); prefix = (start ? Number(start) + index : index + 1) + '. '; } return ( prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '') ) } }, indentedCodeBlock: { filter: (node, options) => ( options.codeBlockStyle === 'indented' && node.nodeName === 'PRE' && node.firstChild && node.firstChild.nodeName === 'CODE' ) , replacement: (content, node) => ( '\n\n ' + node.firstChild.textContent.replace(/\n/g, '\n ') + '\n\n' ) }, fencedCodeBlock: { filter: (node, options) => ( options.codeBlockStyle === 'fenced' && node.nodeName === 'PRE' && node.firstChild && node.firstChild.nodeName === 'CODE' ) , replacement: (content, node, options) => { var className = node.firstChild.getAttribute('class') || ''; var language = (className.match(/language-(\S+)/) || [null, ''])[1]; var code = node.firstChild.textContent; var fenceChar = options.fence.charAt(0); var fenceSize = 3; var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm'); var match; while ((match = fenceInCodeRegex.exec(code))) { if (match[0].length >= fenceSize) { fenceSize = match[0].length + 1; } } var fence = repeat(fenceChar, fenceSize); return ( '\n\n' + fence + language + '\n' + code.replace(/\n$/, '') + '\n' + fence + '\n\n' ) } }, horizontalRule: { filter: 'hr', replacement: (content, node, options) => '\n\n' + options.hr + '\n\n' }, inlineLink: { filter: (node, options) => ( options.linkStyle === 'inlined' && node.nodeName === 'A' && node.getAttribute('href') ) , replacement: (content, node) => { var href = node.getAttribute('href'); if (href) href = href.replace(/([()])/g, '\\$1'); var title = cleanAttribute(node.getAttribute('title')); if (title) title = ' "' + title.replace(/"/g, '\\"') + '"'; return '[' + content + '](' + href + title + ')' } }, referenceLink: { filter: (node, options) => ( options.linkStyle === 'referenced' && node.nodeName === 'A' && node.getAttribute('href') ) , replacement(content, node, options) { var href = node.getAttribute('href'); var title = cleanAttribute(node.getAttribute('title')); if (title) title = ' "' + title + '"'; var replacement; var reference; switch (options.linkReferenceStyle) { case 'collapsed': replacement = '[' + content + '][]'; reference = '[' + content + ']: ' + href + title; break case 'shortcut': replacement = '[' + content + ']'; reference = '[' + content + ']: ' + href + title; break default: var id = this.references.length + 1; replacement = '[' + content + '][' + id + ']'; reference = '[' + id + ']: ' + href + title; } this.references.push(reference); return replacement }, references: [], append() { var references = ''; if (this.references.length) { references = '\n\n' + this.references.join('\n') + '\n\n'; this.references = []; // Reset references } return references } }, emphasis: { filter: ['em', 'i'], replacement: (content, node, options) => { if (!content.trim()) return '' return options.emDelimiter + content + options.emDelimiter } }, strong: { filter: ['strong', 'b'], replacement: (content, node, options) => { if (!content.trim()) return '' return options.strongDelimiter + content + options.strongDelimiter } }, code: { filter: (node) => { var hasSiblings = node.previousSibling || node.nextSibling; var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings; return node.nodeName === 'CODE' && !isCodeBlock }, replacement: (content) => { if (!content) return '' content = content.replace(/\r?\n|\r/g, ' '); var extraSpace = /^`|^ .*?[^ ].* $|`$/.test(content) ? ' ' : ''; var delimiter = '`'; var matches = content.match(/`+/gm) || []; while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`'; return delimiter + extraSpace + content + extraSpace + delimiter } }, image: { filter: 'img', replacement: (content, node) => { var alt = cleanAttribute(node.getAttribute('alt')); var src = node.getAttribute('src') || ''; var title = cleanAttribute(node.getAttribute('title')); var titlePart = title ? ' "' + title + '"' : ''; return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : '' } }, style: { filter: 'style', replacement: () => '' } }, cleanAttribute = (attribute) => attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''; /** * Manages a collection of rules used to convert HTML to Markdown */ class Rules { constructor(options) { this.options = options; this._keep = []; this._remove = []; this.blankRule = { replacement: options.blankReplacement }; this.keepReplacement = options.keepReplacement; this.defaultRule = { replacement: options.defaultReplacement }; this.array = []; for (var key in options.rules) this.array.push(options.rules[key]); } add(key, rule) { this.array.unshift(rule); } keep(filter) { this._keep.unshift({ filter: filter, replacement: this.keepReplacement }); } remove(filter) { this._remove.unshift({ filter: filter, replacement: () => '' }); } forNode(node) { if (node.isBlank) return this.blankRule return findRule(this.array, node, this.options) || findRule(this._keep, node, this.options) || findRule(this._remove, node, this.options) || this.defaultRule } forEach(fn) { for (var i = 0; i < this.array.length; i++) fn(this.array[i], i); } } const findRule = (rules, node, options) => { for (var i = 0; i < rules.length; i++) { var rule = rules[i]; if (filterValue(rule, node, options)) return rule } return void 0 }, filterValue = (rule, node, options) => { var filter = rule.filter; if (typeof filter === 'string') { return (filter === node.nodeName.toLowerCase()) } else if (Array.isArray(filter)) { return (filter.indexOf(node.nodeName.toLowerCase()) > -1) } else if (typeof filter === 'function') { return !!filter.call(rule, node, options) } throw new TypeError('`filter` needs to be a string, array, or function') }, /** * The collapseWhitespace function is adapted from collapse-whitespace * by Luc Thevenard. * * The MIT License (MIT) * * Copyright (c) 2014 Luc Thevenard * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ /** * collapseWhitespace(options) removes extraneous whitespace from an the given element. * * @param {Object} options */ collapseWhitespace = (options) => { var element = options.element; var isBlock = options.isBlock; var isVoid = options.isVoid; var isPre = options.isPre || ((node) => node.nodeName === 'PRE'); if (!element.firstChild || isPre(element)) return var prevText = null; var keepLeadingWs = false; var prev = null; var node = next(prev, element, isPre); while (node !== element) { if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE var text = node.data.replace(/[ \r\n\t]+/g, ' '); if ((!prevText || / $/.test(prevText.data)) && !keepLeadingWs && text[0] === ' ') { text = text.substr(1); } // `text` might be empty at this point. if (!text) { node = remove(node); continue } node.data = text; prevText = node; } else if (node.nodeType === 1) { // Node.ELEMENT_NODE if (isBlock(node) || node.nodeName === 'BR') { if (prevText) { prevText.data = prevText.data.replace(/ $/, ''); } prevText = null; keepLeadingWs = false; } else if (isVoid(node) || isPre(node)) { // Avoid trimming space around non-block, non-BR void elements and inline PRE. prevText = null; keepLeadingWs = true; } else if (prevText) { // Drop protection if set previously. keepLeadingWs = false; } } else { node = remove(node); continue } var nextNode = next(prev, node, isPre); prev = node; node = nextNode; } if (prevText) { prevText.data = prevText.data.replace(/ $/, ''); if (!prevText.data) { remove(prevText); } } }, /** * remove(node) removes the given node from the DOM and returns the * next node in the sequence. * * @param {Node} node * @return {Node} node */ remove = (node) => { var next = node.nextSibling || node.parentNode; node.parentNode.removeChild(node); return next }, /** * next(prev, current, isPre) returns the next node in the sequence, given the * current and previous nodes. * * @param {Node} prev * @param {Node} current * @param {Function} isPre * @return {Node} */ next = (prev, current, isPre) => { if ((prev && prev.parentNode === current) || isPre(current)) { return current.nextSibling || current.parentNode } return current.firstChild || current.nextSibling || current.parentNode }, /* * Parsing HTML strings */ RootNode = (input, options) => { var root; if (typeof input === 'string') { var doc = htmlParser().parseFromString( // DOM parsers arrange elements in the and . // Wrapping in a custom element ensures elements are reliably arranged in // a single element. '' + input + '', 'text/html' ); root = doc.getElementById('turndown-root'); } else { root = input.cloneNode(true); } collapseWhitespace({ element: root, isBlock: isBlock, isVoid: isVoid, isPre: options.preformattedCode ? isPreOrCode : null }); return root }; var _htmlParser; const htmlParser = () => { _htmlParser = _htmlParser || new DOMParser(); return _htmlParser }, isPreOrCode = (node) => node.nodeName === 'PRE' || node.nodeName === 'CODE', Node = (node, options) => { node.isBlock = isBlock(node); node.isCode = node.nodeName === 'CODE' || node.parentNode.isCode; node.isBlank = isBlank(node); node.flankingWhitespace = flankingWhitespace(node, options); return node }, isBlank = (node) => ( !isVoid(node) && !isMeaningfulWhenBlank(node) && /^\s*$/i.test(node.textContent) && !hasVoid(node) && !hasMeaningfulWhenBlank(node) ) , flankingWhitespace = (node, options) => { if (node.isBlock || (options.preformattedCode && node.isCode)) { return { leading: '', trailing: '' } } var edges = edgeWhitespace(node.textContent); // abandon leading ASCII WS if left-flanked by ASCII WS if (edges.leadingAscii && isFlankedByWhitespace('left', node, options)) { edges.leading = edges.leadingNonAscii; } // abandon trailing ASCII WS if right-flanked by ASCII WS if (edges.trailingAscii && isFlankedByWhitespace('right', node, options)) { edges.trailing = edges.trailingNonAscii; } return { leading: edges.leading, trailing: edges.trailing } }, edgeWhitespace = (string) => { var m = string.match(/^(([ \t\r\n]*)(\s*))(?:(?=\S)[\s\S]*\S)?((\s*?)([ \t\r\n]*))$/); return { leading: m[1], // whole string for whitespace-only strings leadingAscii: m[2], leadingNonAscii: m[3], trailing: m[4], // empty for whitespace-only strings trailingNonAscii: m[5], trailingAscii: m[6] } }, isFlankedByWhitespace = (side, node, options) => { var sibling; var regExp; var isFlanked; if (side === 'left') { sibling = node.previousSibling; regExp = / $/; } else { sibling = node.nextSibling; regExp = /^ /; } if (sibling) { if (sibling.nodeType === 3) { isFlanked = regExp.test(sibling.nodeValue); } else if (options.preformattedCode && sibling.nodeName === 'CODE') { isFlanked = false; } else if (sibling.nodeType === 1 && !isBlock(sibling)) { isFlanked = regExp.test(sibling.textContent); } } return isFlanked }, reduce = Array.prototype.reduce, escapes = [ [/\\/g, '\\\\'], [/\*/g, '\\*'], [/^-/g, '\\-'], [/^\+ /g, '\\+ '], [/^(=+)/g, '\\$1'], [/^(#{1,6}) /g, '\\$1 '], [/`/g, '\\`'], [/^~~~/g, '\\~~~'], [/\[/g, '\\['], [/\]/g, '\\]'], [/^>/g, '\\>'], [/_/g, '\\_'], [/^(\d+)\. /g, '$1\\. '] ]; class TurndownService { constructor(options) { this.options = Object.assign({ rules: rules, headingStyle: 'setext', hr: '* * *', bulletListMarker: '*', codeBlockStyle: 'indented', fence: '```', emDelimiter: '_', strongDelimiter: '**', linkStyle: 'inlined', linkReferenceStyle: 'full', br: ' ', preformattedCode: false, blankReplacement: (content, node) => node.isBlock ? '\n\n' : '', keepReplacement: (content, node) => node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML, defaultReplacement: (content, node) => node.isBlock ? '\n\n' + content + '\n\n' : content }, options); this.rules = new Rules(this.options); } /** * The entry point for converting a string or DOM node to Markdown * @public * @param {String|HTMLElement} input The string or DOM node to convert * @returns A Markdown representation of the input * @type String */ turndown(input) { if (!canConvert(input)) { throw new TypeError( input + ' is not a string, or an element/document/fragment node.' ) } if (input === '') return '' var output = this.process(RootNode(input, this.options)); return this.postProcess(output) } /** * Add one or more plugins * @public * @param {Function|Array} plugin The plugin or array of plugins to add * @returns The Turndown instance for chaining * @type Object */ use(plugin) { if (Array.isArray(plugin)) { for (var i = 0; i < plugin.length; i++) this.use(plugin[i]); } else if (typeof plugin === 'function') { plugin(this); } else { throw new TypeError('plugin must be a Function or an Array of Functions') } return this } /** * Adds a rule * @public * @param {String} key The unique key of the rule * @param {Object} rule The rule * @returns The Turndown instance for chaining * @type Object */ addRule(key, rule) { this.rules.add(key, rule); return this } /** * Keep a node (as HTML) that matches the filter * @public * @param {String|Array|Function} filter The unique key of the rule * @returns The Turndown instance for chaining * @type Object */ keep(filter) { this.rules.keep(filter); return this } /** * Remove a node that matches the filter * @public * @param {String|Array|Function} filter The unique key of the rule * @returns The Turndown instance for chaining * @type Object */ remove(filter) { this.rules.remove(filter); return this } /** * Escapes Markdown syntax * @public * @param {String} string The string to escape * @returns A string with Markdown syntax escaped * @type String */ escape(string) { return escapes.reduce((accumulator, escape) => accumulator.replace(escape[0], escape[1]), string) } /** * Reduces a DOM node down to its Markdown string equivalent * @private * @param {HTMLElement} parentNode The node to convert * @returns A Markdown representation of the node * @type String */ process(parentNode) { var self = this; return reduce.call(parentNode.childNodes, (output, node) => { node = Node(node, self.options); var replacement = ''; if (node.nodeType === 3) { replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue); } else if (node.nodeType === 1) { replacement = self.replacementForNode(node); } return join(output, replacement) }, '') } /** * Appends strings as each rule requires and trims the output * @private * @param {String} output The conversion output * @returns A trimmed version of the ouput * @type String */ postProcess(output) { var self = this; this.rules.forEach((rule) => { if (typeof rule.append === 'function') { output = join(output, rule.append(self.options)); } }); return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '') } /** * Converts an element node to its Markdown equivalent * @private * @param {HTMLElement} node The node to convert * @returns A Markdown representation of the node * @type String */ replacementForNode(node) { var rule = this.rules.forNode(node); var content = this.process(node); var whitespace = node.flankingWhitespace; if (whitespace.leading || whitespace.trailing) content = content.trim(); return ( whitespace.leading + rule.replacement(content, node, this.options) + whitespace.trailing ) } } /** * Joins replacement to the current output with appropriate number of new lines * @private * @param {String} output The current conversion output * @param {String} replacement The string to append to the output * @returns Joined output * @type String */ const join = (output, replacement) => { var s1 = trimTrailingNewlines(output); var s2 = trimLeadingNewlines(replacement); var nls = Math.max(output.length - s1.length, replacement.length - s2.length); var separator = '\n\n'.substring(0, nls); return s1 + separator + s2 }, /** * Determines whether an input can be converted * @private * @param {String|HTMLElement} input Describe this parameter * @returns Describe what it returns * @type String|Object|Array|Boolean|Number */ canConvert = (input) => ( input != null && ( typeof input === 'string' || (input.nodeType && ( input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11 )) ) ) ; return TurndownService; })();