From cc145132da5ed17bee1436403802e9a046f319bb Mon Sep 17 00:00:00 2001 From: Mehdi Rejraji Date: Thu, 3 Mar 2016 15:30:24 +0100 Subject: [PATCH] Updated Regex to exclude mailto: links only The rationale being it's covering all of the cases while still having a good performance. Tracking a non-standard URL is not a user experience problem in a way. It's embedded in a tag. --- src/regexp-utils.coffee | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/regexp-utils.coffee b/src/regexp-utils.coffee index f7456d446..cc4ad4c3a 100644 --- a/src/regexp-utils.coffee +++ b/src/regexp-utils.coffee @@ -34,15 +34,17 @@ RegExpUtils = # 5. the closing tag linkTagRegex: -> new RegExp(/()([\s\S]*?)(<\/a>)/gim) - # Test cases: https://regex101.com/r/cK0zD8/2 - # Catches link tags containing a valid URL using the Gruber Regex. + # Test cases: https://regex101.com/r/cK0zD8/3 + # Catches link tags containing which are: + # - Non empty + # - Not a mailto: link # Returns the following capturing groups: # 1. start of the opening a tag to href=" - # 2. The contents of the href without quotes if it's a valid URL + # 2. The contents of the href without quotes # 3. the rest of the opening a tag # 4. the contents of the a tag # 5. the closing tag - urlLinkTagRegex: -> new RegExp(/(]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))(['"].*?>)([\s\S]*?)(<\/a>)/gim) + urlLinkTagRegex: -> new RegExp(/()([\s\S]*?)(<\/a>)/gim) # https://regex101.com/r/zG7aW4/3 imageTagRegex: -> /]*src="([^"]*)"[^>]*>/g