diff --git a/package-lock.json b/package-lock.json
index 8098ac4a3..5eed89d9e 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -12215,6 +12215,11 @@
"resolved": "https://registry.npmjs.org/strict-uri-encode/-/strict-uri-encode-1.1.0.tgz",
"integrity": "sha1-J5siXfHVgrH1TmWt3UNS4Y+qBxM="
},
+ "string-similarity": {
+ "version": "3.0.0",
+ "resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-3.0.0.tgz",
+ "integrity": "sha512-7kS7LyTp56OqOI2BDWQNVnLX/rCxIQn+/5M0op1WV6P8Xx6TZNdajpuqQdiJ7Xx+p1C5CsWMvdiBp9ApMhxzEQ=="
+ },
"string-width": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz",
diff --git a/package.json b/package.json
index 678d0e73d..052af115f 100644
--- a/package.json
+++ b/package.json
@@ -70,6 +70,7 @@
"simple-node-logger": "18.12.23",
"sqlite": "3.0.3",
"sqlite3": "4.1.0",
+ "string-similarity": "^3.0.0",
"tar-stream": "2.1.0",
"turndown": "5.0.3",
"unescape": "1.0.1",
diff --git a/src/public/javascripts/widgets/similar_notes.js b/src/public/javascripts/widgets/similar_notes.js
index 2492f9f4a..dae891045 100644
--- a/src/public/javascripts/widgets/similar_notes.js
+++ b/src/public/javascripts/widgets/similar_notes.js
@@ -2,6 +2,7 @@ import StandardWidget from "./standard_widget.js";
import linkService from "../services/link.js";
import server from "../services/server.js";
import treeCache from "../services/tree_cache.js";
+import treeUtils from "../services/tree_utils.js";
class SimilarNotesWidget extends StandardWidget {
getWidgetTitle() { return "Similar notes"; }
@@ -9,20 +10,23 @@ class SimilarNotesWidget extends StandardWidget {
getMaxHeight() { return "200px"; }
async doRenderBody() {
- const similarNoteIds = await server.get('similar_notes/' + this.ctx.note.noteId);
+ const similarNotes = await server.get('similar_notes/' + this.ctx.note.noteId);
- if (similarNoteIds.length === 0) {
+ if (similarNotes.length === 0) {
this.$body.text("No similar notes found ...");
return;
}
- await treeCache.getNotes(similarNoteIds); // preload all at once
+ await treeCache.getNotes(similarNotes.map(note => note.noteId)); // preload all at once
- const $list = $("
");
+ const $list = $('');
+
+ for (const similarNote of similarNotes) {
+ similarNote.notePath.pop(); // remove last noteId since it's already in the link
- for (const similarNoteId of similarNoteIds) {
const $item = $("- ")
- .append(await linkService.createNoteLink(similarNoteId));
+ .append(await linkService.createNoteLink(similarNote.noteId))
+ .append($("").text(" (" + await treeUtils.getNotePathTitle(similarNote.notePath.join("/")) + ")"));
$list.append($item);
}
diff --git a/src/routes/api/similar_notes.js b/src/routes/api/similar_notes.js
index dd072cf98..b84589e6f 100644
--- a/src/routes/api/similar_notes.js
+++ b/src/routes/api/similar_notes.js
@@ -12,11 +12,14 @@ async function getSimilarNotes(req) {
return [404, `Note ${noteId} not found.`];
}
- const results = await noteCacheService.findNotes(note.title);
+ const start = new Date();
+
+ const results = await noteCacheService.findSimilarNotes(note.title);
+
+ console.log("Similar note took: " + (Date.now() - start.getTime()) + "ms");
return results
- .map(r => r.noteId)
- .filter(similarNoteId => similarNoteId !== noteId);
+ .filter(note => note.noteId !== noteId);
}
module.exports = {
diff --git a/src/services/note_cache.js b/src/services/note_cache.js
index 59f249f41..ba088e33c 100644
--- a/src/services/note_cache.js
+++ b/src/services/note_cache.js
@@ -5,6 +5,7 @@ const repository = require('./repository');
const protectedSessionService = require('./protected_session');
const utils = require('./utils');
const hoistedNoteService = require('./hoisted_note');
+const stringSimilarity = require('string-similarity');
let loaded = false;
let noteTitles = {};
@@ -37,6 +38,10 @@ async function load() {
await loadProtectedNotes();
}
+ for (const noteId in childToParent) {
+ resortChildToParent(noteId);
+ }
+
loaded = true;
}
@@ -161,11 +166,27 @@ async function findNotes(query) {
return apiResults;
}
+function isArchived(notePath) {
+ // if the note is archived directly
+ if (archived[notePath[notePath.length - 1]] !== undefined) {
+ return true;
+ }
+
+ for (let i = 0; i < notePath.length - 1; i++) {
+ // this is going through parents so archived must be inheritable
+ if (archived[notePath[i]] === 1) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
function search(noteId, tokens, path, results) {
if (tokens.length === 0) {
const retPath = getSomePath(noteId, path);
- if (retPath) {
+ if (retPath && !isArchived(retPath)) {
const thisNoteId = retPath[retPath.length - 1];
const thisParentNoteId = retPath[retPath.length - 2];
@@ -262,7 +283,13 @@ function getNoteTitleForPath(path) {
return titles.join(' / ');
}
-function getSomePath(noteId, path) {
+/**
+ * Returns notePath for noteId from cache. Note hoisting is respected.
+ * Archived notes are also returned, but non-archived paths are preferred if available
+ * - this means that archived paths is returned only if there's no non-archived path
+ * - you can check whether returned path is archived using isArchived()
+ */
+function getSomePath(noteId, path = []) {
if (noteId === 'root') {
path.push(noteId);
path.reverse();
@@ -280,11 +307,6 @@ function getSomePath(noteId, path) {
}
for (const parentNoteId of parents) {
- // archived applies here only if inheritable
- if (archived[parentNoteId] === 1) {
- continue;
- }
-
const retPath = getSomePath(parentNoteId, path.concat([noteId]));
if (retPath) {
@@ -296,9 +318,9 @@ function getSomePath(noteId, path) {
}
function getNotePath(noteId) {
- const retPath = getSomePath(noteId, []);
+ const retPath = getSomePath(noteId);
- if (retPath) {
+ if (retPath && !isArchived(retPath)) {
const noteTitle = getNoteTitleForPath(retPath);
const parentNoteId = childToParent[noteId][0];
@@ -311,6 +333,43 @@ function getNotePath(noteId) {
}
}
+function evaluateSimilarity(text1, text2, noteId, results) {
+ let coeff = stringSimilarity.compareTwoStrings(text1, text2);
+
+ if (coeff > 0.4) {
+ const notePath = getSomePath(noteId);
+
+ // this takes care of note hoisting
+ if (!notePath) {
+ return;
+ }
+
+ if (isArchived(notePath)) {
+ coeff -= 0.2; // archived penalization
+ }
+
+ results.push({coeff, notePath, noteId});
+ }
+}
+
+function findSimilarNotes(title) {
+ const results = [];
+
+ for (const noteId in noteTitles) {
+ evaluateSimilarity(title, noteTitles[noteId], noteId, results);
+ }
+
+ if (protectedSessionService.isProtectedSessionAvailable()) {
+ for (const noteId in protectedNoteTitles) {
+ evaluateSimilarity(title, protectedNoteTitles[noteId], noteId, results);
+ }
+ }
+
+ results.sort((a, b) => a.coeff > b.coeff ? -1 : 1);
+
+ return results.length > 50 ? results.slice(0, 50) : results;
+}
+
eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED, eventService.ENTITY_SYNCED], async ({entityName, entity}) => {
// note that entity can also be just POJO without methods if coming from sync
@@ -355,6 +414,8 @@ eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED
}
childToParent[branch.noteId].push(branch.parentNoteId);
+ resortChildToParent(branch.noteId);
+
childParentToBranchId[branch.noteId + '-' + branch.parentNoteId] = branch.branchId;
}
}
@@ -376,6 +437,16 @@ eventService.subscribe([eventService.ENTITY_CHANGED, eventService.ENTITY_DELETED
}
});
+// will sort the childs so that non-archived are first and archived at the end
+// this is done so that non-archived paths are always explored as first when searching for note path
+function resortChildToParent(noteId) {
+ if (!childToParent[noteId]) {
+ return;
+ }
+
+ childToParent[noteId].sort((a, b) => archived[a] === 1 ? 1 : -1);
+}
+
/**
* @param noteId
* @returns {boolean} - true if note exists (is not deleted) and is not archived.
@@ -399,5 +470,6 @@ module.exports = {
getNotePath,
getNoteTitleForPath,
isAvailable,
- load
+ load,
+ findSimilarNotes
};
\ No newline at end of file