fix(snippet-parsing): Don't add extraneous spaces after text format tags

Summary: This was leading us to put funny things like 'Nylas !' in some snippets that used tags like <i> and <b> for text formatting. This is probs a teeny little bit slower than the previous version since it invokes a callback on a lot more nodes, but we can't really fix this issue without knowledge of the preceding tag name. Test Plan: unit test included!! Reviewers: evan, jackie Reviewed By: jackie Differential Revision: https://phab.nylas.com/D3553
2025-01-11 02:30:21 +08:00 · 2016-12-22 17:08:18 -08:00 · 2016-12-22 17:08:18 -08:00 · de1b67287c
commit de1b67287c
parent 99899be7c2
3 changed files with 25 additions and 10 deletions
--- a/packages/local-sync/spec/fixtures/MessageFactory/extractSnippet/finimize.txt
+++ b/packages/local-sync/spec/fixtures/MessageFactory/extractSnippet/finimize.txt
@ -1 +1 @@
-Finance for our generation . Hi Christine, here's the news you need to know for December 13th . Reading
+Finance for our generation. Hi Christine, here's the news you need to know for December 13th. Reading
--- a/packages/local-sync/spec/message-factory-spec.js
+++ b/packages/local-sync/spec/message-factory-spec.js
@ -73,6 +73,15 @@ const snippetTestCases = [{
  plainBody: null,
  htmlBody: '<p>Unicorns are<!-- an HTML comment! -->native to the</p>',
  snippet: 'Unicorns are native to the',
+}, {
+  purpose: "don't add extraneous spaces after text format markup",
+  plainBody: null,
+  htmlBody: `
+  <td style="padding: 0px 10px">
+            Hey there, <b>Nylas</b>!<br>
+            You have a new follower on Product Hunt.
+          </td>`,
+  snippet: 'Hey there, Nylas! You have a new follower on Product Hunt.',
 },
 ]

--- a/packages/local-sync/src/shared/message-factory.js
+++ b/packages/local-sync/src/shared/message-factory.js
@ -42,6 +42,7 @@ function extractSnippet(plainBody, htmlBody) {
  if (htmlBody) {
    const doc = new DOMParser().parseFromString(htmlBody, 'text/html')
    const skipTags = new Set(['TITLE', 'SCRIPT', 'STYLE', 'IMG']);
+    const noSpaceTags = new Set(['B', 'I', 'STRONG', 'EM', 'SPAN']);

    const treeWalker = document.createTreeWalker(doc, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT, (node) => {
      if (skipTags.has(node.tagName)) {
@ -53,23 +54,28 @@ function extractSnippet(plainBody, htmlBody) {
        if (nodeValue) {
          return NodeFilter.FILTER_ACCEPT;
        }
+        return NodeFilter.FILTER_SKIP;
      }
-      return NodeFilter.FILTER_SKIP;
+      return NodeFilter.FILTER_ACCEPT;
    });

    let extractedText = "";
+    let lastNodeTag = "";
    while (treeWalker.nextNode()) {
-      // TODO: there may be some elements we don't want to add a space between
-      if (extractedText) {
-        extractedText += " ";
-      }
-      extractedText += treeWalker.currentNode.nodeValue;
-      if (extractedText.length > SNIPPET_MAX_SIZE) {
-        break;
+      if (treeWalker.currentNode.nodeType === Node.ELEMENT_NODE) {
+        lastNodeTag = treeWalker.currentNode.nodeName;
+      } else {
+        if (extractedText && !noSpaceTags.has(lastNodeTag)) {
+          extractedText += " ";
+        }
+        extractedText += treeWalker.currentNode.nodeValue;
+        if (extractedText.length > SNIPPET_MAX_SIZE) {
+          break;
+        }
      }
    }

-    snippetText = extractedText;
+    snippetText = extractedText.trim();
  }

  // clean up and trim snippet