full text search should look into link URLs as well, closes #2412

This commit is contained in:
zadam 2021-12-06 20:54:37 +01:00
parent 9d18bebb13
commit 263b7a84bb
2 changed files with 38 additions and 18 deletions

View file

@ -8,6 +8,7 @@ const protectedSessionService = require('../../protected_session');
const striptags = require('striptags'); const striptags = require('striptags');
const utils = require("../../utils"); const utils = require("../../utils");
// FIXME: create common subclass with NoteContentUnprotectedFulltextExp to avoid duplication
class NoteContentProtectedFulltextExp extends Expression { class NoteContentProtectedFulltextExp extends Expression {
constructor(operator, tokens, raw) { constructor(operator, tokens, raw) {
super(); super();
@ -46,15 +47,7 @@ class NoteContentProtectedFulltextExp extends Expression {
continue; continue;
} }
content = utils.normalize(content); content = this.preprocessContent(content, type, mime);
if (type === 'text' && mime === 'text/html') {
if (!this.raw && content.length < 20000) { // striptags is slow for very large notes
content = striptags(content);
}
content = content.replace(/&nbsp;/g, ' ');
}
if (!this.tokens.find(token => !content.includes(token))) { if (!this.tokens.find(token => !content.includes(token))) {
resultNoteSet.add(becca.notes[noteId]); resultNoteSet.add(becca.notes[noteId]);
@ -63,6 +56,23 @@ class NoteContentProtectedFulltextExp extends Expression {
return resultNoteSet; return resultNoteSet;
} }
preprocessContent(content, type, mime) {
content = utils.normalize(content.toString());
if (type === 'text' && mime === 'text/html') {
if (!this.raw && content.length < 20000) { // striptags is slow for very large notes
// allow link to preserve URLs: https://github.com/zadam/trilium/issues/2412
content = striptags(content, ['a']);
// at least the closing tag can be easily stripped
content = content.replace(/<\/a>/ig, "");
}
content = content.replace(/&nbsp;/g, ' ');
}
return content;
}
} }
module.exports = NoteContentProtectedFulltextExp; module.exports = NoteContentProtectedFulltextExp;

View file

@ -6,6 +6,7 @@ const becca = require('../../../becca/becca');
const striptags = require('striptags'); const striptags = require('striptags');
const utils = require("../../utils"); const utils = require("../../utils");
// FIXME: create common subclass with NoteContentProtectedFulltextExp to avoid duplication
class NoteContentUnprotectedFulltextExp extends Expression { class NoteContentUnprotectedFulltextExp extends Expression {
constructor(operator, tokens, raw) { constructor(operator, tokens, raw) {
super(); super();
@ -32,15 +33,7 @@ class NoteContentUnprotectedFulltextExp extends Expression {
continue; continue;
} }
content = utils.normalize(content.toString()); content = this.preprocessContent(content, type, mime);
if (type === 'text' && mime === 'text/html') {
if (!this.raw && content.length < 20000) { // striptags is slow for very large notes
content = striptags(content);
}
content = content.replace(/&nbsp;/g, ' ');
}
if (!this.tokens.find(token => !content.includes(token))) { if (!this.tokens.find(token => !content.includes(token))) {
resultNoteSet.add(becca.notes[noteId]); resultNoteSet.add(becca.notes[noteId]);
@ -49,6 +42,23 @@ class NoteContentUnprotectedFulltextExp extends Expression {
return resultNoteSet; return resultNoteSet;
} }
preprocessContent(content, type, mime) {
content = utils.normalize(content.toString());
if (type === 'text' && mime === 'text/html') {
if (!this.raw && content.length < 20000) { // striptags is slow for very large notes
// allow link to preserve URLs: https://github.com/zadam/trilium/issues/2412
content = striptags(content, ['a']);
// at least the closing tag can be easily stripped
content = content.replace(/<\/a>/ig, "");
}
content = content.replace(/&nbsp;/g, ' ');
}
return content;
}
} }
module.exports = NoteContentUnprotectedFulltextExp; module.exports = NoteContentUnprotectedFulltextExp;