From 13b9f5231cf4ef18f7292378009b5f2928a94279 Mon Sep 17 00:00:00 2001 From: zadam Date: Wed, 16 Sep 2020 17:34:48 +0200 Subject: [PATCH] similar note tweaks --- bin/build-linux-x64.sh | 2 + bin/build-mac-x64.sh | 2 + bin/build-win-x64.sh | 2 + bin/copy-trilium.sh | 1 + src/public/app/widgets/similar_notes.js | 2 +- src/services/note_cache/similarity.js | 66 +++++++++++++++++-------- 6 files changed, 54 insertions(+), 21 deletions(-) diff --git a/bin/build-linux-x64.sh b/bin/build-linux-x64.sh index 97ef3044a..064105492 100755 --- a/bin/build-linux-x64.sh +++ b/bin/build-linux-x64.sh @@ -13,6 +13,8 @@ echo "Copying required linux-x64 binaries" cp -r bin/better-sqlite3/linux-desktop-better_sqlite3.node $SRC_DIR/node_modules/better-sqlite3/build/Release/better_sqlite3.node +echo "Packaging linux x64 electron build" + ./node_modules/.bin/electron-packager $SRC_DIR --asar --out=dist --executable-name=trilium --platform=linux --arch=x64 --overwrite BUILD_DIR=./dist/trilium-linux-x64 diff --git a/bin/build-mac-x64.sh b/bin/build-mac-x64.sh index fbdc230d8..7178cf612 100755 --- a/bin/build-mac-x64.sh +++ b/bin/build-mac-x64.sh @@ -13,6 +13,8 @@ cp -r bin/better-sqlite3/mac-better_sqlite3.node $SRC_DIR/node_modules/better-sq rm -r $SRC_DIR/src/public/app-dist/*.mobile.* +echo "Packaging mac x64 electron build" + ./node_modules/.bin/electron-packager $SRC_DIR --asar --out=dist --executable-name=trilium --platform=darwin --arch=x64 --overwrite --icon=images/app-icons/mac/icon.icns BUILD_DIR=./dist/trilium-mac-x64 diff --git a/bin/build-win-x64.sh b/bin/build-win-x64.sh index 98717f8a8..172015767 100755 --- a/bin/build-win-x64.sh +++ b/bin/build-win-x64.sh @@ -13,6 +13,8 @@ cp -r bin/better-sqlite3/win-better_sqlite3.node $SRC_DIR/node_modules/better-sq rm -r $SRC_DIR/src/public/app-dist/*.mobile.* +echo "Packaging windows x64 electron build" + ./node_modules/.bin/electron-packager $SRC_DIR --asar --out=dist --executable-name=trilium --platform=win32 --arch=x64 --overwrite --icon=images/app-icons/win/icon.ico BUILD_DIR=./dist/trilium-windows-x64 diff --git a/bin/copy-trilium.sh b/bin/copy-trilium.sh index cc5a6342c..578d8b989 100755 --- a/bin/copy-trilium.sh +++ b/bin/copy-trilium.sh @@ -31,6 +31,7 @@ cp webpack-* $DIR/ # cleanup of useless files in dependencies rm -r $DIR/node_modules/image-q/demo +rm -r $DIR/node_modules/better-sqlite3/Release rm -r $DIR/node_modules/better-sqlite3/deps/sqlite3.tar.gz rm -r $DIR/node_modules/@jimp/plugin-print/fonts rm -r $DIR/node_modules/jimp/browser diff --git a/src/public/app/widgets/similar_notes.js b/src/public/app/widgets/similar_notes.js index 703ce5c34..6ebc042f6 100644 --- a/src/public/app/widgets/similar_notes.js +++ b/src/public/app/widgets/similar_notes.js @@ -148,7 +148,7 @@ export default class SimilarNotesWidget extends TabAwareWidget { } const $item = (await linkService.createNoteLink(similarNote.notePath.join("/"))) - .css("font-size", 24 * (1 - 1 / (similarNote.score - 1))); + .css("font-size", 24 * (1 - 1 / (similarNote.score))); $list.append($item); } diff --git a/src/services/note_cache/similarity.js b/src/services/note_cache/similarity.js index a0756a629..3bc47890d 100644 --- a/src/services/note_cache/similarity.js +++ b/src/services/note_cache/similarity.js @@ -9,6 +9,13 @@ const IGNORED_ATTR_NAMES = [ "relationmaplink" ]; +function filterLabelValue(value) { + return value + .replace(/https?:\/\//i, "") + .replace(/www\./i, "") + .replace(/(\.net|\.com|\.org|\.info|\.edu)/i, ""); +} + /** * @param {Note} note */ @@ -20,7 +27,7 @@ function buildRewardMap(note) { return; } - for (const word of text.toLowerCase().split(/\W+/)) { + for (const word of splitToWords(text)) { if (word) { map[word] = map[word] || 0; @@ -58,7 +65,7 @@ function buildRewardMap(note) { addToRewardMap(attr.name, reward); } - addToRewardMap(attr.value, reward); + addToRewardMap(filterLabelValue(attr.value), reward); } return map; @@ -67,7 +74,7 @@ function buildRewardMap(note) { const mimeCache = {}; function trimMime(mime) { - if (!mime) { + if (!mime || mime === 'text/html') { return; } @@ -86,6 +93,7 @@ function trimMime(mime) { } mimeCache[mime] = str; + mimeCache[mime] = str; } return mimeCache[mime]; @@ -104,7 +112,24 @@ function buildDateLimits(baseNote) { const wordCache = {}; -function findSimilarNotes(noteId) { +function splitToWords(text) { + let words = wordCache[text]; + + if (!words) { + wordCache[text] = words = text.toLowerCase().split(/\W+/); + + for (const idx in words) { + // special case for english plurals + if (words[idx].endsWith("s")) { + words[idx] = words[idx].substr(0, words[idx] - 1); + } + } + } + + return words; +} + +async function findSimilarNotes(noteId) { const results = []; let i = 0; @@ -124,16 +149,14 @@ function findSimilarNotes(noteId) { return 0; } - let words = wordCache[text]; - - if (!words) { - words = wordCache[text] = text.toLowerCase().split(/\W+/); - } - let counter = 0; - for (const word of words) { - counter += rewardMap[word] * factor || 0; + // when the title is very long then weight of each individual word should be lower + // also pretty important in e.g. long URLs in label values + const lengthPenalization = 1 / Math.pow(text.length, 0.3); + + for (const word of splitToWords(text)) { + counter += rewardMap[word] * factor * lengthPenalization || 0; } return counter; @@ -146,11 +169,11 @@ function findSimilarNotes(noteId) { for (const parentNote of note.parents) { if (!ancestorNoteIds.has(parentNote.noteId)) { if (parentNote.isDecrypted) { - score += gatherRewards(parentNote.title, 0.5); + score += gatherRewards(parentNote.title, 0.3); } for (const branch of parentNote.parentBranches) { - score += gatherRewards(branch.prefix, 0.5) + score += gatherRewards(branch.prefix, 0.3) + gatherAncestorRewards(branch.parentNote); } } @@ -163,8 +186,7 @@ function findSimilarNotes(noteId) { } function computeScore(candidateNote) { - let score = gatherRewards(candidateNote.type) - + gatherRewards(trimMime(candidateNote.mime)) + let score = gatherRewards(trimMime(candidateNote.mime)) + gatherAncestorRewards(candidateNote); if (candidateNote.isDecrypted) { @@ -183,6 +205,10 @@ function findSimilarNotes(noteId) { score += gatherRewards(attr.value); } + if (candidateNote.type === baseNote.type) { + score += 0.2; + } + /** * We want to improve standing of notes which have been created in similar time to each other since * there's a good chance they are related. @@ -195,7 +221,7 @@ function findSimilarNotes(noteId) { if (utcDateCreated >= dateLimits.minDate && utcDateCreated <= dateLimits.maxDate && utcDateCreated < dateLimits.minExcludedDate && utcDateCreated > dateLimits.maxExcludedDate) { - score += 3; + score += 1; } return score; @@ -208,7 +234,7 @@ function findSimilarNotes(noteId) { let score = computeScore(candidateNote); - if (score >= 4) { + if (score >= 1) { const notePath = noteCacheService.getSomePath(candidateNote); // this takes care of note hoisting @@ -217,7 +243,7 @@ function findSimilarNotes(noteId) { } if (noteCacheService.isNotePathArchived(notePath)) { - score -= 1; // archived penalization + score -= 0.5; // archived penalization } results.push({score, notePath, noteId: candidateNote.noteId}); @@ -226,7 +252,7 @@ function findSimilarNotes(noteId) { i++; if (i % 1000 === 0) { - //await setImmediatePromise(); + await setImmediatePromise(); } }