improvements to similar notes - now using dice's coefficient for better results

This commit is contained in:
zadam 2019-09-01 11:33:45 +02:00
parent 0e867a995f
commit 55356963dd
5 changed files with 104 additions and 19 deletions

5
package-lock.json generated
View file

@ -12215,6 +12215,11 @@
"resolved": "https://registry.npmjs.org/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz",
"integrity": "sha1-J5siXfHVgrH1TmWt3UNS4Y+qBxM="
},
"string-similarity": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-3.0.0.tgz",
"integrity": "sha512-7kS7LyTp56OqOI2BDWQNVnLX/rCxIQn+/5M0op1WV6P8Xx6TZNdajpuqQdiJ7Xx+p1C5CsWMvdiBp9ApMhxzEQ=="
},
"string-width": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz",

View file

@ -70,6 +70,7 @@
"simple-node-logger": "18.12.23",
"sqlite": "3.0.3",
"sqlite3": "4.1.0",
"string-similarity": "^3.0.0",
"tar-stream": "2.1.0",
"turndown": "5.0.3",
"unescape": "1.0.1",

View file

@ -2,6 +2,7 @@ import StandardWidget from "./standard_widget.js";
import linkService from "../services/link.js";
import server from "../services/server.js";
import treeCache from "../services/tree_cache.js";
import treeUtils from "../services/tree_utils.js";
class SimilarNotesWidget extends StandardWidget {
getWidgetTitle() { return "Similar notes"; }
@ -9,20 +10,23 @@ class SimilarNotesWidget extends StandardWidget {
getMaxHeight() { return "200px"; }
async doRenderBody() {
const similarNoteIds = await server.get('similar_notes/' + this.ctx.note.noteId);
const similarNotes = await server.get('similar_notes/' + this.ctx.note.noteId);
if (similarNoteIds.length === 0) {
if (similarNotes.length === 0) {
this.$body.text("No similar notes found ...");
return;
}
await treeCache.getNotes(similarNoteIds); // preload all at once
await treeCache.getNotes(similarNotes.map(note => note.noteId)); // preload all at once
const $list = $("<ul>");
const $list = $('<ul style="padding-left: 20px;">');
for (const similarNote of similarNotes) {
similarNote.notePath.pop(); // remove last noteId since it's already in the link
for (const similarNoteId of similarNoteIds) {
const $item = $("<li>")
.append(await linkService.createNoteLink(similarNoteId));
.append(await linkService.createNoteLink(similarNote.noteId))
.append($("<small>").text(" (" + await treeUtils.getNotePathTitle(similarNote.notePath.join("/")) + ")"));
$list.append($item);
}

View file

@ -12,11 +12,14 @@ async function getSimilarNotes(req) {
return [404, `Note ${noteId} not found.`];
}
const results = await noteCacheService.findNotes(note.title);
const start = new Date();
const results = await noteCacheService.findSimilarNotes(note.title);
console.log("Similar note took: " + (Date.now() - start.getTime()) + "ms");
return results
.map(r => r.noteId)
.filter(similarNoteId => similarNoteId !== noteId);
.filter(note => note.noteId !== noteId);
}
module.exports = {

View file

@ -5,6 +5,7 @@ const repository = require('./repository');
const protectedSessionService = require('./protected_session');
const utils = require('./utils');
const hoistedNoteService = require('./hoisted_note');
const stringSimilarity = require('string-similarity');
let loaded = false;
let noteTitles = {};
@ -37,6 +38,10 @@ async function load() {
await loadProtectedNotes();
}
for (const noteId in childToParent) {
resortChildToParent(noteId);
}
loaded = true;
}
@ -161,11 +166,27 @@ async function findNotes(query) {
return apiResults;
}
function isArchived(notePath) {
// if the note is archived directly
if (archived[notePath[notePath.length - 1]] !== undefined) {
return true;
}
for (let i = 0; i < notePath.length - 1; i++) {
// this is going through parents so archived must be inheritable
if (archived[notePath[i]] === 1) {
return true;
}
}
return false;
}
function search(noteId, tokens, path, results) {
if (tokens.length === 0) {
const retPath = getSomePath(noteId, path);
if (retPath) {
if (retPath && !isArchived(retPath)) {
const thisNoteId = retPath[retPath.length - 1];
const thisParentNoteId = retPath[retPath.length - 2];
@ -262,7 +283,13 @@ function getNoteTitleForPath(path) {
return titles.join(' / ');
}
function getSomePath(noteId, path) {
/**
* Returns notePath for noteId from cache. Note hoisting is respected.
* Archived notes are also returned, but non-archived paths are preferred if available
* - this means that archived paths is returned only if there's no non-archived path
* - you can check whether returned path is archived using isArchived()
*/
function getSomePath(noteId, path = []) {
if (noteId === 'root') {
path.push(noteId);
path.reverse();
@ -280,11 +307,6 @@ function getSomePath(noteId, path) {
}
for (const parentNoteId of parents) {
// archived applies here only if inheritable
if (archived[parentNoteId] === 1) {
continue;
}
const retPath = getSomePath(parentNoteId, path.concat([noteId]));
if (retPath) {
@ -296,9 +318,9 @@ function getSomePath(noteId, path) {
}
function getNotePath(noteId) {
const retPath = getSomePath(noteId, []);
const retPath = getSomePath(noteId);
if (retPath) {
if (retPath && !isArchived(retPath)) {
const noteTitle = getNoteTitleForPath(retPath);
const parentNoteId = childToParent[noteId][0];
@ -311,6 +333,43 @@ function getNotePath(noteId) {
}
}
function evaluateSimilarity(text1, text2, noteId, results) {
let coeff = stringSimilarity.compareTwoStrings(text1, text2);
if (coeff > 0.4) {
const notePath = getSomePath(noteId);
// this takes care of note hoisting
if (!notePath) {
return;
}
if (isArchived(notePath)) {
coeff -= 0.2; // archived penalization
}
results.push({coeff, notePath, noteId});
}
}
function findSimilarNotes(title) {
const results = [];
for (const noteId in noteTitles) {
evaluateSimilarity(title, noteTitles[noteId], noteId, results);
}
if (protectedSessionService.isProtectedSessionAvailable()) {
for (const noteId in protectedNoteTitles) {
evaluateSimilarity(title, protectedNoteTitles[noteId], noteId, results);
}
}
results.sort((a, b) => a.coeff > b.coeff ? -1 : 1);
return results.length > 50 ? results.slice(0, 50) : results;
}
eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED, eventService.ENTITY_SYNCED], async ({entityName, entity}) => {
// note that entity can also be just POJO without methods if coming from sync
@ -355,6 +414,8 @@ eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED
}
childToParent[branch.noteId].push(branch.parentNoteId);
resortChildToParent(branch.noteId);
childParentToBranchId[branch.noteId + '-' + branch.parentNoteId] = branch.branchId;
}
}
@ -376,6 +437,16 @@ eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED
}
});
// will sort the childs so that non-archived are first and archived at the end
// this is done so that non-archived paths are always explored as first when searching for note path
function resortChildToParent(noteId) {
if (!childToParent[noteId]) {
return;
}
childToParent[noteId].sort((a, b) => archived[a] === 1 ? 1 : -1);
}
/**
* @param noteId
* @returns {boolean} - true if note exists (is not deleted) and is not archived.
@ -399,5 +470,6 @@ module.exports = {
getNotePath,
getNoteTitleForPath,
isAvailable,
load
load,
findSimilarNotes
};