mirror of
https://github.com/zadam/trilium.git
synced 2025-01-16 03:58:53 +08:00
improvements to similar notes - now using dice's coefficient for better results
This commit is contained in:
parent
0e867a995f
commit
55356963dd
5 changed files with 104 additions and 19 deletions
5
package-lock.json
generated
5
package-lock.json
generated
|
@ -12215,6 +12215,11 @@
|
|||
"resolved": "https://registry.npmjs.org/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz",
|
||||
"integrity": "sha1-J5siXfHVgrH1TmWt3UNS4Y+qBxM="
|
||||
},
|
||||
"string-similarity": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-3.0.0.tgz",
|
||||
"integrity": "sha512-7kS7LyTp56OqOI2BDWQNVnLX/rCxIQn+/5M0op1WV6P8Xx6TZNdajpuqQdiJ7Xx+p1C5CsWMvdiBp9ApMhxzEQ=="
|
||||
},
|
||||
"string-width": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz",
|
||||
|
|
|
@ -70,6 +70,7 @@
|
|||
"simple-node-logger": "18.12.23",
|
||||
"sqlite": "3.0.3",
|
||||
"sqlite3": "4.1.0",
|
||||
"string-similarity": "^3.0.0",
|
||||
"tar-stream": "2.1.0",
|
||||
"turndown": "5.0.3",
|
||||
"unescape": "1.0.1",
|
||||
|
|
|
@ -2,6 +2,7 @@ import StandardWidget from "./standard_widget.js";
|
|||
import linkService from "../services/link.js";
|
||||
import server from "../services/server.js";
|
||||
import treeCache from "../services/tree_cache.js";
|
||||
import treeUtils from "../services/tree_utils.js";
|
||||
|
||||
class SimilarNotesWidget extends StandardWidget {
|
||||
getWidgetTitle() { return "Similar notes"; }
|
||||
|
@ -9,20 +10,23 @@ class SimilarNotesWidget extends StandardWidget {
|
|||
getMaxHeight() { return "200px"; }
|
||||
|
||||
async doRenderBody() {
|
||||
const similarNoteIds = await server.get('similar_notes/' + this.ctx.note.noteId);
|
||||
const similarNotes = await server.get('similar_notes/' + this.ctx.note.noteId);
|
||||
|
||||
if (similarNoteIds.length === 0) {
|
||||
if (similarNotes.length === 0) {
|
||||
this.$body.text("No similar notes found ...");
|
||||
return;
|
||||
}
|
||||
|
||||
await treeCache.getNotes(similarNoteIds); // preload all at once
|
||||
await treeCache.getNotes(similarNotes.map(note => note.noteId)); // preload all at once
|
||||
|
||||
const $list = $("<ul>");
|
||||
const $list = $('<ul style="padding-left: 20px;">');
|
||||
|
||||
for (const similarNote of similarNotes) {
|
||||
similarNote.notePath.pop(); // remove last noteId since it's already in the link
|
||||
|
||||
for (const similarNoteId of similarNoteIds) {
|
||||
const $item = $("<li>")
|
||||
.append(await linkService.createNoteLink(similarNoteId));
|
||||
.append(await linkService.createNoteLink(similarNote.noteId))
|
||||
.append($("<small>").text(" (" + await treeUtils.getNotePathTitle(similarNote.notePath.join("/")) + ")"));
|
||||
|
||||
$list.append($item);
|
||||
}
|
||||
|
|
|
@ -12,11 +12,14 @@ async function getSimilarNotes(req) {
|
|||
return [404, `Note ${noteId} not found.`];
|
||||
}
|
||||
|
||||
const results = await noteCacheService.findNotes(note.title);
|
||||
const start = new Date();
|
||||
|
||||
const results = await noteCacheService.findSimilarNotes(note.title);
|
||||
|
||||
console.log("Similar note took: " + (Date.now() - start.getTime()) + "ms");
|
||||
|
||||
return results
|
||||
.map(r => r.noteId)
|
||||
.filter(similarNoteId => similarNoteId !== noteId);
|
||||
.filter(note => note.noteId !== noteId);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
|
|
@ -5,6 +5,7 @@ const repository = require('./repository');
|
|||
const protectedSessionService = require('./protected_session');
|
||||
const utils = require('./utils');
|
||||
const hoistedNoteService = require('./hoisted_note');
|
||||
const stringSimilarity = require('string-similarity');
|
||||
|
||||
let loaded = false;
|
||||
let noteTitles = {};
|
||||
|
@ -37,6 +38,10 @@ async function load() {
|
|||
await loadProtectedNotes();
|
||||
}
|
||||
|
||||
for (const noteId in childToParent) {
|
||||
resortChildToParent(noteId);
|
||||
}
|
||||
|
||||
loaded = true;
|
||||
}
|
||||
|
||||
|
@ -161,11 +166,27 @@ async function findNotes(query) {
|
|||
return apiResults;
|
||||
}
|
||||
|
||||
function isArchived(notePath) {
|
||||
// if the note is archived directly
|
||||
if (archived[notePath[notePath.length - 1]] !== undefined) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (let i = 0; i < notePath.length - 1; i++) {
|
||||
// this is going through parents so archived must be inheritable
|
||||
if (archived[notePath[i]] === 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function search(noteId, tokens, path, results) {
|
||||
if (tokens.length === 0) {
|
||||
const retPath = getSomePath(noteId, path);
|
||||
|
||||
if (retPath) {
|
||||
if (retPath && !isArchived(retPath)) {
|
||||
const thisNoteId = retPath[retPath.length - 1];
|
||||
const thisParentNoteId = retPath[retPath.length - 2];
|
||||
|
||||
|
@ -262,7 +283,13 @@ function getNoteTitleForPath(path) {
|
|||
return titles.join(' / ');
|
||||
}
|
||||
|
||||
function getSomePath(noteId, path) {
|
||||
/**
|
||||
* Returns notePath for noteId from cache. Note hoisting is respected.
|
||||
* Archived notes are also returned, but non-archived paths are preferred if available
|
||||
* - this means that archived paths is returned only if there's no non-archived path
|
||||
* - you can check whether returned path is archived using isArchived()
|
||||
*/
|
||||
function getSomePath(noteId, path = []) {
|
||||
if (noteId === 'root') {
|
||||
path.push(noteId);
|
||||
path.reverse();
|
||||
|
@ -280,11 +307,6 @@ function getSomePath(noteId, path) {
|
|||
}
|
||||
|
||||
for (const parentNoteId of parents) {
|
||||
// archived applies here only if inheritable
|
||||
if (archived[parentNoteId] === 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const retPath = getSomePath(parentNoteId, path.concat([noteId]));
|
||||
|
||||
if (retPath) {
|
||||
|
@ -296,9 +318,9 @@ function getSomePath(noteId, path) {
|
|||
}
|
||||
|
||||
function getNotePath(noteId) {
|
||||
const retPath = getSomePath(noteId, []);
|
||||
const retPath = getSomePath(noteId);
|
||||
|
||||
if (retPath) {
|
||||
if (retPath && !isArchived(retPath)) {
|
||||
const noteTitle = getNoteTitleForPath(retPath);
|
||||
const parentNoteId = childToParent[noteId][0];
|
||||
|
||||
|
@ -311,6 +333,43 @@ function getNotePath(noteId) {
|
|||
}
|
||||
}
|
||||
|
||||
function evaluateSimilarity(text1, text2, noteId, results) {
|
||||
let coeff = stringSimilarity.compareTwoStrings(text1, text2);
|
||||
|
||||
if (coeff > 0.4) {
|
||||
const notePath = getSomePath(noteId);
|
||||
|
||||
// this takes care of note hoisting
|
||||
if (!notePath) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (isArchived(notePath)) {
|
||||
coeff -= 0.2; // archived penalization
|
||||
}
|
||||
|
||||
results.push({coeff, notePath, noteId});
|
||||
}
|
||||
}
|
||||
|
||||
function findSimilarNotes(title) {
|
||||
const results = [];
|
||||
|
||||
for (const noteId in noteTitles) {
|
||||
evaluateSimilarity(title, noteTitles[noteId], noteId, results);
|
||||
}
|
||||
|
||||
if (protectedSessionService.isProtectedSessionAvailable()) {
|
||||
for (const noteId in protectedNoteTitles) {
|
||||
evaluateSimilarity(title, protectedNoteTitles[noteId], noteId, results);
|
||||
}
|
||||
}
|
||||
|
||||
results.sort((a, b) => a.coeff > b.coeff ? -1 : 1);
|
||||
|
||||
return results.length > 50 ? results.slice(0, 50) : results;
|
||||
}
|
||||
|
||||
eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED, eventService.ENTITY_SYNCED], async ({entityName, entity}) => {
|
||||
// note that entity can also be just POJO without methods if coming from sync
|
||||
|
||||
|
@ -355,6 +414,8 @@ eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED
|
|||
}
|
||||
|
||||
childToParent[branch.noteId].push(branch.parentNoteId);
|
||||
resortChildToParent(branch.noteId);
|
||||
|
||||
childParentToBranchId[branch.noteId + '-' + branch.parentNoteId] = branch.branchId;
|
||||
}
|
||||
}
|
||||
|
@ -376,6 +437,16 @@ eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED
|
|||
}
|
||||
});
|
||||
|
||||
// will sort the childs so that non-archived are first and archived at the end
|
||||
// this is done so that non-archived paths are always explored as first when searching for note path
|
||||
function resortChildToParent(noteId) {
|
||||
if (!childToParent[noteId]) {
|
||||
return;
|
||||
}
|
||||
|
||||
childToParent[noteId].sort((a, b) => archived[a] === 1 ? 1 : -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param noteId
|
||||
* @returns {boolean} - true if note exists (is not deleted) and is not archived.
|
||||
|
@ -399,5 +470,6 @@ module.exports = {
|
|||
getNotePath,
|
||||
getNoteTitleForPath,
|
||||
isAvailable,
|
||||
load
|
||||
load,
|
||||
findSimilarNotes
|
||||
};
|
Loading…
Reference in a new issue