From de1b67287c66f1fdf91707e226e2394211f194f0 Mon Sep 17 00:00:00 2001 From: Christine Spang Date: Thu, 22 Dec 2016 17:08:18 -0800 Subject: [PATCH] fix(snippet-parsing): Don't add extraneous spaces after text format tags Summary: This was leading us to put funny things like 'Nylas !' in some snippets that used tags like and for text formatting. This is probs a teeny little bit slower than the previous version since it invokes a callback on a lot more nodes, but we can't really fix this issue without knowledge of the preceding tag name. Test Plan: unit test included!! Reviewers: evan, jackie Reviewed By: jackie Differential Revision: https://phab.nylas.com/D3553 --- .../extractSnippet/finimize.txt | 2 +- .../local-sync/spec/message-factory-spec.js | 9 +++++++ .../local-sync/src/shared/message-factory.js | 24 ++++++++++++------- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/packages/local-sync/spec/fixtures/MessageFactory/extractSnippet/finimize.txt b/packages/local-sync/spec/fixtures/MessageFactory/extractSnippet/finimize.txt index 3c75ab2d4..57b264695 100644 --- a/packages/local-sync/spec/fixtures/MessageFactory/extractSnippet/finimize.txt +++ b/packages/local-sync/spec/fixtures/MessageFactory/extractSnippet/finimize.txt @@ -1 +1 @@ -Finance for our generation . Hi Christine, here's the news you need to know for December 13th . Reading +Finance for our generation. Hi Christine, here's the news you need to know for December 13th. Reading diff --git a/packages/local-sync/spec/message-factory-spec.js b/packages/local-sync/spec/message-factory-spec.js index 674ec0c05..2839a7eb2 100644 --- a/packages/local-sync/spec/message-factory-spec.js +++ b/packages/local-sync/spec/message-factory-spec.js @@ -73,6 +73,15 @@ const snippetTestCases = [{ plainBody: null, htmlBody: '

Unicorns arenative to the

', snippet: 'Unicorns are native to the', +}, { + purpose: "don't add extraneous spaces after text format markup", + plainBody: null, + htmlBody: ` + + Hey there, Nylas!
+ You have a new follower on Product Hunt. + `, + snippet: 'Hey there, Nylas! You have a new follower on Product Hunt.', }, ] diff --git a/packages/local-sync/src/shared/message-factory.js b/packages/local-sync/src/shared/message-factory.js index d5a20ba8f..8e276a7de 100644 --- a/packages/local-sync/src/shared/message-factory.js +++ b/packages/local-sync/src/shared/message-factory.js @@ -42,6 +42,7 @@ function extractSnippet(plainBody, htmlBody) { if (htmlBody) { const doc = new DOMParser().parseFromString(htmlBody, 'text/html') const skipTags = new Set(['TITLE', 'SCRIPT', 'STYLE', 'IMG']); + const noSpaceTags = new Set(['B', 'I', 'STRONG', 'EM', 'SPAN']); const treeWalker = document.createTreeWalker(doc, NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_TEXT, (node) => { if (skipTags.has(node.tagName)) { @@ -53,23 +54,28 @@ function extractSnippet(plainBody, htmlBody) { if (nodeValue) { return NodeFilter.FILTER_ACCEPT; } + return NodeFilter.FILTER_SKIP; } - return NodeFilter.FILTER_SKIP; + return NodeFilter.FILTER_ACCEPT; }); let extractedText = ""; + let lastNodeTag = ""; while (treeWalker.nextNode()) { - // TODO: there may be some elements we don't want to add a space between - if (extractedText) { - extractedText += " "; - } - extractedText += treeWalker.currentNode.nodeValue; - if (extractedText.length > SNIPPET_MAX_SIZE) { - break; + if (treeWalker.currentNode.nodeType === Node.ELEMENT_NODE) { + lastNodeTag = treeWalker.currentNode.nodeName; + } else { + if (extractedText && !noSpaceTags.has(lastNodeTag)) { + extractedText += " "; + } + extractedText += treeWalker.currentNode.nodeValue; + if (extractedText.length > SNIPPET_MAX_SIZE) { + break; + } } } - snippetText = extractedText; + snippetText = extractedText.trim(); } // clean up and trim snippet