fix(autolinker): Don't match emails that are also URLs #2071

This commit is contained in:
Ben Gotow 2016-04-26 19:03:43 -07:00
parent c5a77fa854
commit 8944e10ae9
4 changed files with 46 additions and 11 deletions

View file

@ -1,5 +1,14 @@
import {RegExpUtils, DOMUtils} from 'nylas-exports';
function _matchesAnyRegexp(text, regexps) {
for (const excludeRegexp of regexps) {
if (excludeRegexp.test(text)) {
return true;
}
}
return false;
}
function _runOnTextNode(node, matchers) {
if (node.parentElement) {
const withinScript = node.parentElement.tagName === "SCRIPT";
@ -12,20 +21,34 @@ function _runOnTextNode(node, matchers) {
if (node.textContent.trim().length < 4) {
return;
}
for (const [prefix, regex] of matchers) {
let longest = null;
let longestLength = null;
for (const [prefix, regex, options = {}] of matchers) {
regex.lastIndex = 0;
const match = regex.exec(node.textContent);
if (match !== null) {
const href = `${prefix}${match[0]}`;
const range = document.createRange();
range.setStart(node, match.index);
range.setEnd(node, match.index + match[0].length);
const aTag = DOMUtils.wrap(range, 'A');
aTag.href = href;
aTag.title = href;
return;
if (options.exclude && _matchesAnyRegexp(match[0], options.exclude)) {
continue;
}
if (match[0].length > longestLength) {
longest = [prefix, match];
longestLength = match[0].length;
}
}
}
if (longest) {
const [prefix, match] = longest;
const href = `${prefix}${match[0]}`;
const range = document.createRange();
range.setStart(node, match.index);
range.setEnd(node, match.index + match[0].length);
const aTag = DOMUtils.wrap(range, 'A');
aTag.href = href;
aTag.title = href;
return;
}
}
export function autolink(doc, {async} = {}) {
@ -33,7 +56,11 @@ export function autolink(doc, {async} = {}) {
// and ensure anything with an href has a title attribute.
const textWalker = document.createTreeWalker(doc.body, NodeFilter.SHOW_TEXT);
const matchers = [
['mailto:', RegExpUtils.emailRegex()],
['mailto:', RegExpUtils.emailRegex(), {
// Technically, gmail.com/bengotow@gmail.com is an email address. After
// matching, manully exclude any email that follows the .*[/?].*@ pattern.
exclude: [/\..*[\/|\?].*\@/],
}],
['tel:', RegExpUtils.phoneRegex()],
['', RegExpUtils.urlRegex({matchEntireString: false})],
];

View file

@ -0,0 +1,4 @@
To test this, send https://www.google.com/search?q=test@example.com or gmail.com?q=bengotow@gmail.com
to yourself from a client that allows plaintext or html editing.
What about gmail.com/bengotow@gmail.com - Oh man you're asking for trouble.

View file

@ -0,0 +1,4 @@
To test this, send <a href="https://www.google.com/search?q=test@example.com" title="https://www.google.com/search?q=test@example.com">https://www.google.com/search?q=test@example.com</a> or <a href="gmail.com?q=bengotow@gmail.com" title="gmail.com?q=bengotow@gmail.com">gmail.com?q=bengotow@gmail.com</a>
to yourself from a client that allows plaintext or html editing.
What about <a href="gmail.com/bengotow@gmail.com" title="gmail.com/bengotow@gmail.com">gmail.com/bengotow@gmail.com</a> - Oh man you're asking for trouble.

View file

@ -14,7 +14,7 @@ RegExpUtils =
# See http://tools.ietf.org/html/rfc5322#section-3.4 and
# https://tools.ietf.org/html/rfc6531 and
# https://en.wikipedia.org/wiki/Email_address#Local_part
emailRegex: -> new RegExp(/([a-z.A-Z0-9!#$%&'*+\-/=?^_`{|}~;:]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,63})/g)
emailRegex: -> new RegExp(/([a-z.A-Z0-9!#$%&'*+\-/=?^_`{|}~;]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,63})/g)
# http://stackoverflow.com/questions/16631571/javascript-regular-expression-detect-all-the-phone-number-from-the-page-source
# http://www.regexpal.com/?fam=94521