mirror of
https://github.com/zadam/trilium.git
synced 2025-01-16 03:58:53 +08:00
improvements to similar notes - now using dice's coefficient for better results
This commit is contained in:
parent
0e867a995f
commit
55356963dd
5 changed files with 104 additions and 19 deletions
5
package-lock.json
generated
5
package-lock.json
generated
|
@ -12215,6 +12215,11 @@
|
||||||
"resolved": "https://registry.npmjs.org/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz",
|
"resolved": "https://registry.npmjs.org/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz",
|
||||||
"integrity": "sha1-J5siXfHVgrH1TmWt3UNS4Y+qBxM="
|
"integrity": "sha1-J5siXfHVgrH1TmWt3UNS4Y+qBxM="
|
||||||
},
|
},
|
||||||
|
"string-similarity": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-7kS7LyTp56OqOI2BDWQNVnLX/rCxIQn+/5M0op1WV6P8Xx6TZNdajpuqQdiJ7Xx+p1C5CsWMvdiBp9ApMhxzEQ=="
|
||||||
|
},
|
||||||
"string-width": {
|
"string-width": {
|
||||||
"version": "1.0.2",
|
"version": "1.0.2",
|
||||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz",
|
||||||
|
|
|
@ -70,6 +70,7 @@
|
||||||
"simple-node-logger": "18.12.23",
|
"simple-node-logger": "18.12.23",
|
||||||
"sqlite": "3.0.3",
|
"sqlite": "3.0.3",
|
||||||
"sqlite3": "4.1.0",
|
"sqlite3": "4.1.0",
|
||||||
|
"string-similarity": "^3.0.0",
|
||||||
"tar-stream": "2.1.0",
|
"tar-stream": "2.1.0",
|
||||||
"turndown": "5.0.3",
|
"turndown": "5.0.3",
|
||||||
"unescape": "1.0.1",
|
"unescape": "1.0.1",
|
||||||
|
|
|
@ -2,6 +2,7 @@ import StandardWidget from "./standard_widget.js";
|
||||||
import linkService from "../services/link.js";
|
import linkService from "../services/link.js";
|
||||||
import server from "../services/server.js";
|
import server from "../services/server.js";
|
||||||
import treeCache from "../services/tree_cache.js";
|
import treeCache from "../services/tree_cache.js";
|
||||||
|
import treeUtils from "../services/tree_utils.js";
|
||||||
|
|
||||||
class SimilarNotesWidget extends StandardWidget {
|
class SimilarNotesWidget extends StandardWidget {
|
||||||
getWidgetTitle() { return "Similar notes"; }
|
getWidgetTitle() { return "Similar notes"; }
|
||||||
|
@ -9,20 +10,23 @@ class SimilarNotesWidget extends StandardWidget {
|
||||||
getMaxHeight() { return "200px"; }
|
getMaxHeight() { return "200px"; }
|
||||||
|
|
||||||
async doRenderBody() {
|
async doRenderBody() {
|
||||||
const similarNoteIds = await server.get('similar_notes/' + this.ctx.note.noteId);
|
const similarNotes = await server.get('similar_notes/' + this.ctx.note.noteId);
|
||||||
|
|
||||||
if (similarNoteIds.length === 0) {
|
if (similarNotes.length === 0) {
|
||||||
this.$body.text("No similar notes found ...");
|
this.$body.text("No similar notes found ...");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
await treeCache.getNotes(similarNoteIds); // preload all at once
|
await treeCache.getNotes(similarNotes.map(note => note.noteId)); // preload all at once
|
||||||
|
|
||||||
const $list = $("<ul>");
|
const $list = $('<ul style="padding-left: 20px;">');
|
||||||
|
|
||||||
|
for (const similarNote of similarNotes) {
|
||||||
|
similarNote.notePath.pop(); // remove last noteId since it's already in the link
|
||||||
|
|
||||||
for (const similarNoteId of similarNoteIds) {
|
|
||||||
const $item = $("<li>")
|
const $item = $("<li>")
|
||||||
.append(await linkService.createNoteLink(similarNoteId));
|
.append(await linkService.createNoteLink(similarNote.noteId))
|
||||||
|
.append($("<small>").text(" (" + await treeUtils.getNotePathTitle(similarNote.notePath.join("/")) + ")"));
|
||||||
|
|
||||||
$list.append($item);
|
$list.append($item);
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,11 +12,14 @@ async function getSimilarNotes(req) {
|
||||||
return [404, `Note ${noteId} not found.`];
|
return [404, `Note ${noteId} not found.`];
|
||||||
}
|
}
|
||||||
|
|
||||||
const results = await noteCacheService.findNotes(note.title);
|
const start = new Date();
|
||||||
|
|
||||||
|
const results = await noteCacheService.findSimilarNotes(note.title);
|
||||||
|
|
||||||
|
console.log("Similar note took: " + (Date.now() - start.getTime()) + "ms");
|
||||||
|
|
||||||
return results
|
return results
|
||||||
.map(r => r.noteId)
|
.filter(note => note.noteId !== noteId);
|
||||||
.filter(similarNoteId => similarNoteId !== noteId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
|
@ -5,6 +5,7 @@ const repository = require('./repository');
|
||||||
const protectedSessionService = require('./protected_session');
|
const protectedSessionService = require('./protected_session');
|
||||||
const utils = require('./utils');
|
const utils = require('./utils');
|
||||||
const hoistedNoteService = require('./hoisted_note');
|
const hoistedNoteService = require('./hoisted_note');
|
||||||
|
const stringSimilarity = require('string-similarity');
|
||||||
|
|
||||||
let loaded = false;
|
let loaded = false;
|
||||||
let noteTitles = {};
|
let noteTitles = {};
|
||||||
|
@ -37,6 +38,10 @@ async function load() {
|
||||||
await loadProtectedNotes();
|
await loadProtectedNotes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (const noteId in childToParent) {
|
||||||
|
resortChildToParent(noteId);
|
||||||
|
}
|
||||||
|
|
||||||
loaded = true;
|
loaded = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,11 +166,27 @@ async function findNotes(query) {
|
||||||
return apiResults;
|
return apiResults;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isArchived(notePath) {
|
||||||
|
// if the note is archived directly
|
||||||
|
if (archived[notePath[notePath.length - 1]] !== undefined) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < notePath.length - 1; i++) {
|
||||||
|
// this is going through parents so archived must be inheritable
|
||||||
|
if (archived[notePath[i]] === 1) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
function search(noteId, tokens, path, results) {
|
function search(noteId, tokens, path, results) {
|
||||||
if (tokens.length === 0) {
|
if (tokens.length === 0) {
|
||||||
const retPath = getSomePath(noteId, path);
|
const retPath = getSomePath(noteId, path);
|
||||||
|
|
||||||
if (retPath) {
|
if (retPath && !isArchived(retPath)) {
|
||||||
const thisNoteId = retPath[retPath.length - 1];
|
const thisNoteId = retPath[retPath.length - 1];
|
||||||
const thisParentNoteId = retPath[retPath.length - 2];
|
const thisParentNoteId = retPath[retPath.length - 2];
|
||||||
|
|
||||||
|
@ -262,7 +283,13 @@ function getNoteTitleForPath(path) {
|
||||||
return titles.join(' / ');
|
return titles.join(' / ');
|
||||||
}
|
}
|
||||||
|
|
||||||
function getSomePath(noteId, path) {
|
/**
|
||||||
|
* Returns notePath for noteId from cache. Note hoisting is respected.
|
||||||
|
* Archived notes are also returned, but non-archived paths are preferred if available
|
||||||
|
* - this means that archived paths is returned only if there's no non-archived path
|
||||||
|
* - you can check whether returned path is archived using isArchived()
|
||||||
|
*/
|
||||||
|
function getSomePath(noteId, path = []) {
|
||||||
if (noteId === 'root') {
|
if (noteId === 'root') {
|
||||||
path.push(noteId);
|
path.push(noteId);
|
||||||
path.reverse();
|
path.reverse();
|
||||||
|
@ -280,11 +307,6 @@ function getSomePath(noteId, path) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const parentNoteId of parents) {
|
for (const parentNoteId of parents) {
|
||||||
// archived applies here only if inheritable
|
|
||||||
if (archived[parentNoteId] === 1) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const retPath = getSomePath(parentNoteId, path.concat([noteId]));
|
const retPath = getSomePath(parentNoteId, path.concat([noteId]));
|
||||||
|
|
||||||
if (retPath) {
|
if (retPath) {
|
||||||
|
@ -296,9 +318,9 @@ function getSomePath(noteId, path) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function getNotePath(noteId) {
|
function getNotePath(noteId) {
|
||||||
const retPath = getSomePath(noteId, []);
|
const retPath = getSomePath(noteId);
|
||||||
|
|
||||||
if (retPath) {
|
if (retPath && !isArchived(retPath)) {
|
||||||
const noteTitle = getNoteTitleForPath(retPath);
|
const noteTitle = getNoteTitleForPath(retPath);
|
||||||
const parentNoteId = childToParent[noteId][0];
|
const parentNoteId = childToParent[noteId][0];
|
||||||
|
|
||||||
|
@ -311,6 +333,43 @@ function getNotePath(noteId) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function evaluateSimilarity(text1, text2, noteId, results) {
|
||||||
|
let coeff = stringSimilarity.compareTwoStrings(text1, text2);
|
||||||
|
|
||||||
|
if (coeff > 0.4) {
|
||||||
|
const notePath = getSomePath(noteId);
|
||||||
|
|
||||||
|
// this takes care of note hoisting
|
||||||
|
if (!notePath) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isArchived(notePath)) {
|
||||||
|
coeff -= 0.2; // archived penalization
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push({coeff, notePath, noteId});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function findSimilarNotes(title) {
|
||||||
|
const results = [];
|
||||||
|
|
||||||
|
for (const noteId in noteTitles) {
|
||||||
|
evaluateSimilarity(title, noteTitles[noteId], noteId, results);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (protectedSessionService.isProtectedSessionAvailable()) {
|
||||||
|
for (const noteId in protectedNoteTitles) {
|
||||||
|
evaluateSimilarity(title, protectedNoteTitles[noteId], noteId, results);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results.sort((a, b) => a.coeff > b.coeff ? -1 : 1);
|
||||||
|
|
||||||
|
return results.length > 50 ? results.slice(0, 50) : results;
|
||||||
|
}
|
||||||
|
|
||||||
eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED, eventService.ENTITY_SYNCED], async ({entityName, entity}) => {
|
eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED, eventService.ENTITY_SYNCED], async ({entityName, entity}) => {
|
||||||
// note that entity can also be just POJO without methods if coming from sync
|
// note that entity can also be just POJO without methods if coming from sync
|
||||||
|
|
||||||
|
@ -355,6 +414,8 @@ eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED
|
||||||
}
|
}
|
||||||
|
|
||||||
childToParent[branch.noteId].push(branch.parentNoteId);
|
childToParent[branch.noteId].push(branch.parentNoteId);
|
||||||
|
resortChildToParent(branch.noteId);
|
||||||
|
|
||||||
childParentToBranchId[branch.noteId + '-' + branch.parentNoteId] = branch.branchId;
|
childParentToBranchId[branch.noteId + '-' + branch.parentNoteId] = branch.branchId;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -376,6 +437,16 @@ eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// will sort the childs so that non-archived are first and archived at the end
|
||||||
|
// this is done so that non-archived paths are always explored as first when searching for note path
|
||||||
|
function resortChildToParent(noteId) {
|
||||||
|
if (!childToParent[noteId]) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
childToParent[noteId].sort((a, b) => archived[a] === 1 ? 1 : -1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param noteId
|
* @param noteId
|
||||||
* @returns {boolean} - true if note exists (is not deleted) and is not archived.
|
* @returns {boolean} - true if note exists (is not deleted) and is not archived.
|
||||||
|
@ -399,5 +470,6 @@ module.exports = {
|
||||||
getNotePath,
|
getNotePath,
|
||||||
getNoteTitleForPath,
|
getNoteTitleForPath,
|
||||||
isAvailable,
|
isAvailable,
|
||||||
load
|
load,
|
||||||
|
findSimilarNotes
|
||||||
};
|
};
|
Loading…
Reference in a new issue