Match only valid URLs for link tracking

As of now, link-tracking creates link tracking for every <a> tag, no matter the href attribute. This commit only creates link tracking for valid URLs. The reason I wrote a new regex instead of rewriting linkTagRegex is that this regex is used by message-list. https://regex101.com/r/jD5zC7/3 vs https://regex101.com/r/cK0zD8/2 Resolves: #1525
2025-09-08 13:44:53 +08:00 · 2016-03-01 22:23:26 +01:00 · 2016-03-01 22:23:26 +01:00 · 19d3c416fc
commit 19d3c416fc
parent 4900df4768
3 changed files with 13 additions and 3 deletions
--- a/internal_packages/link-tracking/lib/link-tracking-composer-extension.es6
+++ b/internal_packages/link-tracking/lib/link-tracking-composer-extension.es6
@ -27,7 +27,7 @@ export default class LinkTrackingComposerExtension extends ComposerExtension {
      const messageUid = uuid.v4().replace(/-/g, "");

      // loop through all <a href> elements, replace with redirect links and save mappings
-      draftBody.unquoted = draftBody.unquoted.replace(RegExpUtils.linkTagRegex(), (match, prefix, url, suffix, content, closingTag) => {
+      draftBody.unquoted = draftBody.unquoted.replace(RegExpUtils.urlLinkTagRegex(), (match, prefix, url, suffix, content, closingTag) => {
        const encoded = encodeURIComponent(url);
        // the links param is an index of the link array.
        const redirectUrl = `${PLUGIN_URL}/link/${draft.accountId}/${messageUid}/${links.length}?redirect=${encoded}`;
--- a/internal_packages/link-tracking/lib/link-tracking-message-extension.es6
+++ b/internal_packages/link-tracking/lib/link-tracking-message-extension.es6
@ -10,7 +10,7 @@ export default class LinkTrackingMessageExtension extends MessageViewExtension {
      links[link.redirect_url] = link
    }

-    message.body = message.body.replace(RegExpUtils.linkTagRegex(), (match, openTagPrefix, aTagHref, openTagSuffix, content, closingTag) => {
+    message.body = message.body.replace(RegExpUtils.urlLinkTagRegex(), (match, openTagPrefix, aTagHref, openTagSuffix, content, closingTag) => {
      if (links[aTagHref]) {
        const openTag = openTagPrefix + aTagHref + openTagSuffix
        let title;
--- a/src/regexp-utils.coffee
+++ b/src/regexp-utils.coffee
@ -26,7 +26,7 @@ RegExpUtils =
  urlRegex: -> new RegExp(/^\b((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))$/)

  # Test cases: https://regex101.com/r/jD5zC7/2
-  # Retruns the following capturing groups:
+  # Returns the following capturing groups:
  # 1. start of the opening a tag to href="
  # 2. The contents of the href without quotes
  # 3. the rest of the opening a tag
@ -34,6 +34,16 @@ RegExpUtils =
  # 5. the closing tag
  linkTagRegex: -> new RegExp(/(<a.*?href\s*?=\s*?['"])(.*?)(['"].*?>)([\s\S]*?)(<\/a>)/gim)

+  # Test cases: https://regex101.com/r/cK0zD8/2
+  # Catches link tags containing a valid URL using the Gruber Regex.
+  # Returns the following capturing groups:
+  # 1. start of the opening a tag to href="
+  # 2. The contents of the href without quotes if it's a valid URL
+  # 3. the rest of the opening a tag
+  # 4. the contents of the a tag
+  # 5. the closing tag
+  urlLinkTagRegex: -> new RegExp(/(<a.*?href\s*?=\s*?['"])((?:https?:\/\/|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))(['"].*?>)([\s\S]*?)(<\/a>)/gim)
+
  # https://regex101.com/r/zG7aW4/3
  imageTagRegex: -> /<img\s+[^>]*src="([^"]*)"[^>]*>/g