From 8944e10ae9b93eaf0d98cbc1a886cccf770c8f74 Mon Sep 17 00:00:00 2001 From: Ben Gotow Date: Tue, 26 Apr 2016 19:03:43 -0700 Subject: [PATCH] fix(autolinker): Don't match emails that are also URLs #2071 --- .../message-list/lib/autolinker.es6 | 47 +++++++++++++++---- .../both-email-and-url-in.html | 4 ++ .../both-email-and-url-out.html | 4 ++ src/regexp-utils.coffee | 2 +- 4 files changed, 46 insertions(+), 11 deletions(-) create mode 100644 internal_packages/message-list/spec/autolinker-fixtures/both-email-and-url-in.html create mode 100644 internal_packages/message-list/spec/autolinker-fixtures/both-email-and-url-out.html diff --git a/internal_packages/message-list/lib/autolinker.es6 b/internal_packages/message-list/lib/autolinker.es6 index 0c1284927..064d52c1e 100644 --- a/internal_packages/message-list/lib/autolinker.es6 +++ b/internal_packages/message-list/lib/autolinker.es6 @@ -1,5 +1,14 @@ import {RegExpUtils, DOMUtils} from 'nylas-exports'; +function _matchesAnyRegexp(text, regexps) { + for (const excludeRegexp of regexps) { + if (excludeRegexp.test(text)) { + return true; + } + } + return false; +} + function _runOnTextNode(node, matchers) { if (node.parentElement) { const withinScript = node.parentElement.tagName === "SCRIPT"; @@ -12,20 +21,34 @@ function _runOnTextNode(node, matchers) { if (node.textContent.trim().length < 4) { return; } - for (const [prefix, regex] of matchers) { + + let longest = null; + let longestLength = null; + for (const [prefix, regex, options = {}] of matchers) { regex.lastIndex = 0; const match = regex.exec(node.textContent); if (match !== null) { - const href = `${prefix}${match[0]}`; - const range = document.createRange(); - range.setStart(node, match.index); - range.setEnd(node, match.index + match[0].length); - const aTag = DOMUtils.wrap(range, 'A'); - aTag.href = href; - aTag.title = href; - return; + if (options.exclude && _matchesAnyRegexp(match[0], options.exclude)) { + continue; + } + if (match[0].length > longestLength) { + longest = [prefix, match]; + longestLength = match[0].length; + } } } + + if (longest) { + const [prefix, match] = longest; + const href = `${prefix}${match[0]}`; + const range = document.createRange(); + range.setStart(node, match.index); + range.setEnd(node, match.index + match[0].length); + const aTag = DOMUtils.wrap(range, 'A'); + aTag.href = href; + aTag.title = href; + return; + } } export function autolink(doc, {async} = {}) { @@ -33,7 +56,11 @@ export function autolink(doc, {async} = {}) { // and ensure anything with an href has a title attribute. const textWalker = document.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT); const matchers = [ - ['mailto:', RegExpUtils.emailRegex()], + ['mailto:', RegExpUtils.emailRegex(), { + // Technically, gmail.com/bengotow@gmail.com is an email address. After + // matching, manully exclude any email that follows the .*[/?].*@ pattern. + exclude: [/\..*[\/|\?].*\@/], + }], ['tel:', RegExpUtils.phoneRegex()], ['', RegExpUtils.urlRegex({matchEntireString: false})], ]; diff --git a/internal_packages/message-list/spec/autolinker-fixtures/both-email-and-url-in.html b/internal_packages/message-list/spec/autolinker-fixtures/both-email-and-url-in.html new file mode 100644 index 000000000..87e893b88 --- /dev/null +++ b/internal_packages/message-list/spec/autolinker-fixtures/both-email-and-url-in.html @@ -0,0 +1,4 @@ +To test this, send https://www.google.com/search?q=test@example.com or gmail.com?q=bengotow@gmail.com +to yourself from a client that allows plaintext or html editing. + +What about gmail.com/bengotow@gmail.com - Oh man you're asking for trouble. diff --git a/internal_packages/message-list/spec/autolinker-fixtures/both-email-and-url-out.html b/internal_packages/message-list/spec/autolinker-fixtures/both-email-and-url-out.html new file mode 100644 index 000000000..51d7ebf17 --- /dev/null +++ b/internal_packages/message-list/spec/autolinker-fixtures/both-email-and-url-out.html @@ -0,0 +1,4 @@ +To test this, send https://www.google.com/search?q=test@example.com or gmail.com?q=bengotow@gmail.com +to yourself from a client that allows plaintext or html editing. + +What about gmail.com/bengotow@gmail.com - Oh man you're asking for trouble. diff --git a/src/regexp-utils.coffee b/src/regexp-utils.coffee index 4f3e744ff..3e0f7aba7 100644 --- a/src/regexp-utils.coffee +++ b/src/regexp-utils.coffee @@ -14,7 +14,7 @@ RegExpUtils = # See http://tools.ietf.org/html/rfc5322#section-3.4 and # https://tools.ietf.org/html/rfc6531 and # https://en.wikipedia.org/wiki/Email_address#Local_part - emailRegex: -> new RegExp(/([a-z.A-Z0-9!#$%&'*+\-/=?^_`{|}~;:]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,63})/g) + emailRegex: -> new RegExp(/([a-z.A-Z0-9!#$%&'*+\-/=?^_`{|}~;]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,63})/g) # http://stackoverflow.com/questions/16631571/javascript-regular-expression-detect-all-the-phone-number-from-the-page-source # http://www.regexpal.com/?fam=94521