From 2d19f073d97d03a190533819831e3a14f4542219 Mon Sep 17 00:00:00 2001 From: zadam Date: Sat, 2 Mar 2024 07:13:02 +0100 Subject: [PATCH] fix searching fulltext with tags, closes #4661 --- .../expressions/note_content_fulltext.js | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/services/search/expressions/note_content_fulltext.js b/src/services/search/expressions/note_content_fulltext.js index 86b08c469..1b607b308 100644 --- a/src/services/search/expressions/note_content_fulltext.js +++ b/src/services/search/expressions/note_content_fulltext.js @@ -111,11 +111,7 @@ class NoteContentFulltextExp extends Expression { if (type === 'text' && mime === 'text/html') { if (!this.raw && content.length < 20000) { // striptags is slow for very large notes - // allow link to preserve URLs: https://github.com/zadam/trilium/issues/2412 - content = striptags(content, ['a'], ' '); - - // at least the closing tag can be easily stripped - content = content.replace(/<\/a>/ig, ""); + content = this.stripTags(content); } content = content.replace(/ /g, ' '); @@ -123,6 +119,23 @@ class NoteContentFulltextExp extends Expression { return content.trim(); } + + stripTags(content) { + // we want to allow link to preserve URLs: https://github.com/zadam/trilium/issues/2412 + // we want to insert space in place of block tags (because they imply text separation) + // but we don't want to insert text for typical formatting inline tags which can occur within one word + const linkTag = 'a'; + const inlineFormattingTags = ['b', 'strong', 'em', 'i', 'span', 'big', 'small', 'font', 'sub', 'sup']; + + // replace tags which imply text separation with a space + content = striptags(content, [linkTag, ...inlineFormattingTags], ' '); + + // replace the inline formatting tags (but not links) without a space + content = striptags(content, [linkTag], ''); + + // at least the closing link tag can be easily stripped + return content.replace(/<\/a>/ig, ""); + } } module.exports = NoteContentFulltextExp;