similar note tweaks

This commit is contained in:
zadam 2020-09-16 17:34:48 +02:00
parent 71ed24344c
commit 13b9f5231c
6 changed files with 54 additions and 21 deletions

View file

@ -13,6 +13,8 @@ echo "Copying required linux-x64 binaries"
cp -r bin/better-sqlite3/linux-desktop-better_sqlite3.node $SRC_DIR/node_modules/better-sqlite3/build/Release/better_sqlite3.node
echo "Packaging linux x64 electron build"
./node_modules/.bin/electron-packager $SRC_DIR --asar --out=dist --executable-name=trilium --platform=linux --arch=x64 --overwrite
BUILD_DIR=./dist/trilium-linux-x64

View file

@ -13,6 +13,8 @@ cp -r bin/better-sqlite3/mac-better_sqlite3.node $SRC_DIR/node_modules/better-sq
rm -r $SRC_DIR/src/public/app-dist/*.mobile.*
echo "Packaging mac x64 electron build"
./node_modules/.bin/electron-packager $SRC_DIR --asar --out=dist --executable-name=trilium --platform=darwin --arch=x64 --overwrite --icon=images/app-icons/mac/icon.icns
BUILD_DIR=./dist/trilium-mac-x64

View file

@ -13,6 +13,8 @@ cp -r bin/better-sqlite3/win-better_sqlite3.node $SRC_DIR/node_modules/better-sq
rm -r $SRC_DIR/src/public/app-dist/*.mobile.*
echo "Packaging windows x64 electron build"
./node_modules/.bin/electron-packager $SRC_DIR --asar --out=dist --executable-name=trilium --platform=win32 --arch=x64 --overwrite --icon=images/app-icons/win/icon.ico
BUILD_DIR=./dist/trilium-windows-x64

View file

@ -31,6 +31,7 @@ cp webpack-* $DIR/
# cleanup of useless files in dependencies
rm -r $DIR/node_modules/image-q/demo
rm -r $DIR/node_modules/better-sqlite3/Release
rm -r $DIR/node_modules/better-sqlite3/deps/sqlite3.tar.gz
rm -r $DIR/node_modules/@jimp/plugin-print/fonts
rm -r $DIR/node_modules/jimp/browser

View file

@ -148,7 +148,7 @@ export default class SimilarNotesWidget extends TabAwareWidget {
}
const $item = (await linkService.createNoteLink(similarNote.notePath.join("/")))
.css("font-size", 24 * (1 - 1 / (similarNote.score - 1)));
.css("font-size", 24 * (1 - 1 / (similarNote.score)));
$list.append($item);
}

View file

@ -9,6 +9,13 @@ const IGNORED_ATTR_NAMES = [
"relationmaplink"
];
function filterLabelValue(value) {
return value
.replace(/https?:\/\//i, "")
.replace(/www\./i, "")
.replace(/(\.net|\.com|\.org|\.info|\.edu)/i, "");
}
/**
* @param {Note} note
*/
@ -20,7 +27,7 @@ function buildRewardMap(note) {
return;
}
for (const word of text.toLowerCase().split(/\W+/)) {
for (const word of splitToWords(text)) {
if (word) {
map[word] = map[word] || 0;
@ -58,7 +65,7 @@ function buildRewardMap(note) {
addToRewardMap(attr.name, reward);
}
addToRewardMap(attr.value, reward);
addToRewardMap(filterLabelValue(attr.value), reward);
}
return map;
@ -67,7 +74,7 @@ function buildRewardMap(note) {
const mimeCache = {};
function trimMime(mime) {
if (!mime) {
if (!mime || mime === 'text/html') {
return;
}
@ -86,6 +93,7 @@ function trimMime(mime) {
}
mimeCache[mime] = str;
mimeCache[mime] = str;
}
return mimeCache[mime];
@ -104,7 +112,24 @@ function buildDateLimits(baseNote) {
const wordCache = {};
function findSimilarNotes(noteId) {
function splitToWords(text) {
let words = wordCache[text];
if (!words) {
wordCache[text] = words = text.toLowerCase().split(/\W+/);
for (const idx in words) {
// special case for english plurals
if (words[idx].endsWith("s")) {
words[idx] = words[idx].substr(0, words[idx] - 1);
}
}
}
return words;
}
async function findSimilarNotes(noteId) {
const results = [];
let i = 0;
@ -124,16 +149,14 @@ function findSimilarNotes(noteId) {
return 0;
}
let words = wordCache[text];
if (!words) {
words = wordCache[text] = text.toLowerCase().split(/\W+/);
}
let counter = 0;
for (const word of words) {
counter += rewardMap[word] * factor || 0;
// when the title is very long then weight of each individual word should be lower
// also pretty important in e.g. long URLs in label values
const lengthPenalization = 1 / Math.pow(text.length, 0.3);
for (const word of splitToWords(text)) {
counter += rewardMap[word] * factor * lengthPenalization || 0;
}
return counter;
@ -146,11 +169,11 @@ function findSimilarNotes(noteId) {
for (const parentNote of note.parents) {
if (!ancestorNoteIds.has(parentNote.noteId)) {
if (parentNote.isDecrypted) {
score += gatherRewards(parentNote.title, 0.5);
score += gatherRewards(parentNote.title, 0.3);
}
for (const branch of parentNote.parentBranches) {
score += gatherRewards(branch.prefix, 0.5)
score += gatherRewards(branch.prefix, 0.3)
+ gatherAncestorRewards(branch.parentNote);
}
}
@ -163,8 +186,7 @@ function findSimilarNotes(noteId) {
}
function computeScore(candidateNote) {
let score = gatherRewards(candidateNote.type)
+ gatherRewards(trimMime(candidateNote.mime))
let score = gatherRewards(trimMime(candidateNote.mime))
+ gatherAncestorRewards(candidateNote);
if (candidateNote.isDecrypted) {
@ -183,6 +205,10 @@ function findSimilarNotes(noteId) {
score += gatherRewards(attr.value);
}
if (candidateNote.type === baseNote.type) {
score += 0.2;
}
/**
* We want to improve standing of notes which have been created in similar time to each other since
* there's a good chance they are related.
@ -195,7 +221,7 @@ function findSimilarNotes(noteId) {
if (utcDateCreated >= dateLimits.minDate && utcDateCreated <= dateLimits.maxDate
&& utcDateCreated < dateLimits.minExcludedDate && utcDateCreated > dateLimits.maxExcludedDate) {
score += 3;
score += 1;
}
return score;
@ -208,7 +234,7 @@ function findSimilarNotes(noteId) {
let score = computeScore(candidateNote);
if (score >= 4) {
if (score >= 1) {
const notePath = noteCacheService.getSomePath(candidateNote);
// this takes care of note hoisting
@ -217,7 +243,7 @@ function findSimilarNotes(noteId) {
}
if (noteCacheService.isNotePathArchived(notePath)) {
score -= 1; // archived penalization
score -= 0.5; // archived penalization
}
results.push({score, notePath, noteId: candidateNote.noteId});
@ -226,7 +252,7 @@ function findSimilarNotes(noteId) {
i++;
if (i % 1000 === 0) {
//await setImmediatePromise();
await setImmediatePromise();
}
}