mirror of
https://github.com/monkeytypegame/monkeytype.git
synced 2024-09-20 15:26:15 +08:00
Improve quote search experience (#2644) by Bruception
* Initial * Improve * Fix comment * Add stemming and idf * Remove normalization * Move idf calc outside
This commit is contained in:
parent
1622f37efd
commit
538fb9d385
42
frontend/package-lock.json
generated
42
frontend/package-lock.json
generated
|
@ -14,12 +14,14 @@
|
|||
"chartjs-plugin-annotation": "^0.5.7",
|
||||
"chartjs-plugin-trendline": "^0.2.2",
|
||||
"crypto-browserify": "^3.12.0",
|
||||
"damerau-levenshtein": "1.0.8",
|
||||
"dom-to-image": "^2.6.0",
|
||||
"firebase": "^8.4.2",
|
||||
"gulp-replace": "^1.1.3",
|
||||
"howler": "^2.2.1",
|
||||
"moment-timezone": "^0.5.33",
|
||||
"node-object-hash": "2.3.10",
|
||||
"stemmer": "2.0.0",
|
||||
"tinycolor2": "^1.4.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
@ -27,6 +29,7 @@
|
|||
"@babel/plugin-transform-modules-commonjs": "^7.16.8",
|
||||
"@babel/plugin-transform-runtime": "^7.17.0",
|
||||
"@babel/preset-env": "^7.16.11",
|
||||
"@types/damerau-levenshtein": "1.0.0",
|
||||
"@types/grecaptcha": "^3.0.3",
|
||||
"@types/howler": "^2.2.5",
|
||||
"@types/jquery": "^3.5.13",
|
||||
|
@ -2236,6 +2239,12 @@
|
|||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||
"integrity": "sha1-p3c2C1s5oaLlEG+OhY8v0tBgxXA="
|
||||
},
|
||||
"node_modules/@types/damerau-levenshtein": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/damerau-levenshtein/-/damerau-levenshtein-1.0.0.tgz",
|
||||
"integrity": "sha512-8XQ1jJHlOl6HjZ3/fU9Yrm/14jxM4gXVezPWiwkyiG0GnYROsI6wdh8DwKccAFGDNiNYBooTZkRXVe4du6plKA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/eslint": {
|
||||
"version": "8.4.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.4.1.tgz",
|
||||
|
@ -4058,6 +4067,11 @@
|
|||
"type": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/damerau-levenshtein": {
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz",
|
||||
"integrity": "sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA=="
|
||||
},
|
||||
"node_modules/dart-sass": {
|
||||
"version": "1.25.0",
|
||||
"resolved": "https://registry.npmjs.org/dart-sass/-/dart-sass-1.25.0.tgz",
|
||||
|
@ -10652,6 +10666,18 @@
|
|||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/stemmer": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/stemmer/-/stemmer-2.0.0.tgz",
|
||||
"integrity": "sha512-0YS2oMdTZ/wAWUHMMpf7AAJ8Gm6dHXyHddJ0zCu2DIfOfIbdwqAm1bbk4+Vti6gxNIcOrnm5jAP7vYTzQDvc5A==",
|
||||
"bin": {
|
||||
"stemmer": "cli.js"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/stream-browserify": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-3.0.0.tgz",
|
||||
|
@ -13742,6 +13768,12 @@
|
|||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
|
||||
"integrity": "sha1-p3c2C1s5oaLlEG+OhY8v0tBgxXA="
|
||||
},
|
||||
"@types/damerau-levenshtein": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/damerau-levenshtein/-/damerau-levenshtein-1.0.0.tgz",
|
||||
"integrity": "sha512-8XQ1jJHlOl6HjZ3/fU9Yrm/14jxM4gXVezPWiwkyiG0GnYROsI6wdh8DwKccAFGDNiNYBooTZkRXVe4du6plKA==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/eslint": {
|
||||
"version": "8.4.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.4.1.tgz",
|
||||
|
@ -15235,6 +15267,11 @@
|
|||
"type": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"damerau-levenshtein": {
|
||||
"version": "1.0.8",
|
||||
"resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz",
|
||||
"integrity": "sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA=="
|
||||
},
|
||||
"dart-sass": {
|
||||
"version": "1.25.0",
|
||||
"resolved": "https://registry.npmjs.org/dart-sass/-/dart-sass-1.25.0.tgz",
|
||||
|
@ -20387,6 +20424,11 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"stemmer": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/stemmer/-/stemmer-2.0.0.tgz",
|
||||
"integrity": "sha512-0YS2oMdTZ/wAWUHMMpf7AAJ8Gm6dHXyHddJ0zCu2DIfOfIbdwqAm1bbk4+Vti6gxNIcOrnm5jAP7vYTzQDvc5A=="
|
||||
},
|
||||
"stream-browserify": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/stream-browserify/-/stream-browserify-3.0.0.tgz",
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
"@babel/plugin-transform-modules-commonjs": "^7.16.8",
|
||||
"@babel/plugin-transform-runtime": "^7.17.0",
|
||||
"@babel/preset-env": "^7.16.11",
|
||||
"@types/damerau-levenshtein": "1.0.0",
|
||||
"@types/grecaptcha": "^3.0.3",
|
||||
"@types/howler": "^2.2.5",
|
||||
"@types/jquery": "^3.5.13",
|
||||
|
@ -56,12 +57,14 @@
|
|||
"chartjs-plugin-annotation": "^0.5.7",
|
||||
"chartjs-plugin-trendline": "^0.2.2",
|
||||
"crypto-browserify": "^3.12.0",
|
||||
"damerau-levenshtein": "1.0.8",
|
||||
"dom-to-image": "^2.6.0",
|
||||
"firebase": "^8.4.2",
|
||||
"gulp-replace": "^1.1.3",
|
||||
"howler": "^2.2.1",
|
||||
"moment-timezone": "^0.5.33",
|
||||
"node-object-hash": "2.3.10",
|
||||
"stemmer": "2.0.0",
|
||||
"tinycolor2": "^1.4.2"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,12 @@ import * as QuoteSubmitPopup from "./quote-submit-popup";
|
|||
import * as QuoteApprovePopup from "./quote-approve-popup";
|
||||
import * as QuoteReportPopup from "./quote-report-popup";
|
||||
import * as Misc from "../misc";
|
||||
import {
|
||||
buildSearchService,
|
||||
SearchService,
|
||||
TextExtractor,
|
||||
} from "../utils/search-service";
|
||||
import { debounce } from "../utils/debounce";
|
||||
|
||||
export let selectedId = 1;
|
||||
|
||||
|
@ -14,39 +20,65 @@ export function setSelectedId(val: number): void {
|
|||
selectedId = val;
|
||||
}
|
||||
|
||||
const searchServiceCache: Record<string, SearchService<any>> = {};
|
||||
|
||||
function getSearchService<T>(
|
||||
language: string,
|
||||
data: T[],
|
||||
textExtractor: TextExtractor<T>
|
||||
): SearchService<T> {
|
||||
if (language in searchServiceCache) {
|
||||
return searchServiceCache[language];
|
||||
}
|
||||
|
||||
const newSearchService = buildSearchService<T>(data, textExtractor);
|
||||
searchServiceCache[language] = newSearchService;
|
||||
|
||||
return newSearchService;
|
||||
}
|
||||
|
||||
function highlightMatches(text: string, matchedText: string[]): string {
|
||||
if (matchedText.length === 0) {
|
||||
return text;
|
||||
}
|
||||
const words = text.split(
|
||||
/(?=[.,'"/#!$%^&*;:{}=\-_`~()\s])|(?<=[.,'"/#!$%^&*;:{}=\-_`~()\s])/g
|
||||
);
|
||||
|
||||
const normalizedWords = words.map((word) => {
|
||||
const shouldHighlight = matchedText.find((match) => {
|
||||
return word.startsWith(match);
|
||||
});
|
||||
return shouldHighlight ? `<span class="highlight">${word}</span>` : word;
|
||||
});
|
||||
|
||||
return normalizedWords.join("");
|
||||
}
|
||||
|
||||
async function updateResults(searchText: string): Promise<void> {
|
||||
const quotes = await Misc.getQuotes(Config.language);
|
||||
const reg = new RegExp(searchText, "i");
|
||||
const found: MonkeyTypes.Quote[] = [];
|
||||
quotes.quotes.forEach((quote) => {
|
||||
const quoteText = quote["text"].replace(/[.,'"/#!$%^&*;:{}=\-_`~()]/g, "");
|
||||
const test1 = reg.test(quoteText);
|
||||
if (test1) {
|
||||
found.push(quote);
|
||||
const { quotes } = await Misc.getQuotes(Config.language);
|
||||
|
||||
const quoteSearchService = getSearchService<MonkeyTypes.Quote>(
|
||||
Config.language,
|
||||
quotes,
|
||||
(quote: MonkeyTypes.Quote) => {
|
||||
return `${quote.text} ${quote.id} ${quote.source}`;
|
||||
}
|
||||
});
|
||||
quotes.quotes.forEach((quote) => {
|
||||
const quoteSource = quote["source"].replace(
|
||||
/[.,'"/#!$%^&*;:{}=\-_`~()]/g,
|
||||
""
|
||||
);
|
||||
const quoteId = quote["id"];
|
||||
const test2 = reg.test(quoteSource);
|
||||
const test3 = reg.test(quoteId.toString());
|
||||
if ((test2 || test3) && found.filter((q) => q.id == quote.id).length == 0) {
|
||||
found.push(quote);
|
||||
}
|
||||
});
|
||||
);
|
||||
const { results: matches, matchedQueryTerms } =
|
||||
quoteSearchService.query(searchText);
|
||||
|
||||
$("#quoteSearchResults").remove();
|
||||
$("#quoteSearchPopup").append(
|
||||
'<div class="quoteSearchResults" id="quoteSearchResults"></div>'
|
||||
);
|
||||
const resultsList = $("#quoteSearchResults");
|
||||
let resultListLength = 0;
|
||||
|
||||
const resultsList = $("#quoteSearchResults");
|
||||
const isNotAuthed = !firebase.auth().currentUser;
|
||||
|
||||
found.forEach(async (quote) => {
|
||||
const quotesToShow = searchText === "" ? quotes : matches;
|
||||
|
||||
quotesToShow.slice(0, 100).forEach((quote) => {
|
||||
let lengthDesc;
|
||||
if (quote.length < 101) {
|
||||
lengthDesc = "short";
|
||||
|
@ -57,15 +89,21 @@ async function updateResults(searchText: string): Promise<void> {
|
|||
} else {
|
||||
lengthDesc = "thicc";
|
||||
}
|
||||
if (resultListLength++ < 100) {
|
||||
resultsList.append(`
|
||||
resultsList.append(`
|
||||
<div class="searchResult" id="${quote.id}">
|
||||
<div class="text">${quote.text}</div>
|
||||
<div class="id"><div class="sub">id</div><span class="quote-id">${
|
||||
quote.id
|
||||
}</span></div>
|
||||
<div class="text">${highlightMatches(
|
||||
quote.text,
|
||||
matchedQueryTerms
|
||||
)}</div>
|
||||
<div class="id"><div class="sub">id</div><span class="quote-id">${highlightMatches(
|
||||
quote.id.toString(),
|
||||
matchedQueryTerms
|
||||
)}</span></div>
|
||||
<div class="length"><div class="sub">length</div>${lengthDesc}</div>
|
||||
<div class="source"><div class="sub">source</div>${quote.source}</div>
|
||||
<div class="source"><div class="sub">source</div>${highlightMatches(
|
||||
quote.source,
|
||||
matchedQueryTerms
|
||||
)}</div>
|
||||
<div class="icon-button report ${
|
||||
isNotAuthed && "hidden"
|
||||
}" aria-label="Report quote" data-balloon-pos="left">
|
||||
|
@ -73,15 +111,14 @@ async function updateResults(searchText: string): Promise<void> {
|
|||
</div>
|
||||
</div>
|
||||
`);
|
||||
}
|
||||
});
|
||||
if (found.length > 100) {
|
||||
if (quotesToShow.length > 100) {
|
||||
$("#extraResults").html(
|
||||
found.length +
|
||||
quotesToShow.length +
|
||||
" results <span style='opacity: 0.5'>(only showing 100)</span>"
|
||||
);
|
||||
} else {
|
||||
$("#extraResults").html(found.length + " results");
|
||||
$("#extraResults").html(quotesToShow.length + " results");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -158,17 +195,14 @@ export function apply(val: number): boolean {
|
|||
return ret;
|
||||
}
|
||||
|
||||
$("#quoteSearchPopup .searchBox").keydown((e) => {
|
||||
if (e.code == "Escape") return;
|
||||
setTimeout(() => {
|
||||
let searchText = (<HTMLInputElement>document.getElementById("searchBox"))
|
||||
.value;
|
||||
searchText = searchText
|
||||
.replace(/[.,'"/#!$%^&*;:{}=\-_`~()]/g, "")
|
||||
.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
|
||||
const debouncedSearch = debounce(updateResults);
|
||||
|
||||
updateResults(searchText);
|
||||
}, 0.1); //arbitrarily v. small time as it's only to allow text to input before searching
|
||||
$("#quoteSearchPopup .searchBox").on("keyup", (e) => {
|
||||
if (e.code === "Escape") return;
|
||||
|
||||
const searchText = (<HTMLInputElement>document.getElementById("searchBox"))
|
||||
.value;
|
||||
debouncedSearch(searchText);
|
||||
});
|
||||
|
||||
$("#quoteSearchPopupWrapper").click((e) => {
|
||||
|
@ -218,17 +252,3 @@ $(document).keydown((event) => {
|
|||
event.preventDefault();
|
||||
}
|
||||
});
|
||||
|
||||
// $("#quoteSearchPopup input").keypress((e) => {
|
||||
// if (e.keyCode == 13) {
|
||||
// if (!isNaN(document.getElementById("searchBox").value)) {
|
||||
// apply();
|
||||
// } else {
|
||||
// let results = document.getElementsByClassName("searchResult");
|
||||
// if (results.length > 0) {
|
||||
// selectedId = parseInt(results[0].getAttribute("id"));
|
||||
// apply(selectedId);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
|
|
8
frontend/src/scripts/utils/debounce.ts
Normal file
8
frontend/src/scripts/utils/debounce.ts
Normal file
|
@ -0,0 +1,8 @@
|
|||
export function debounce(fn: any, ms = 250): any {
|
||||
let timeoutId: ReturnType<typeof setTimeout>;
|
||||
|
||||
return function (this: any, ...args: any[]) {
|
||||
clearTimeout(timeoutId);
|
||||
timeoutId = setTimeout(() => fn.apply(this, args), ms);
|
||||
};
|
||||
}
|
155
frontend/src/scripts/utils/search-service.ts
Normal file
155
frontend/src/scripts/utils/search-service.ts
Normal file
|
@ -0,0 +1,155 @@
|
|||
import { stemmer } from "stemmer";
|
||||
import levenshtein from "damerau-levenshtein";
|
||||
|
||||
export interface SearchService<T> {
|
||||
query: (query: string) => SearchResult<T>;
|
||||
}
|
||||
|
||||
interface SearchServiceOptions {
|
||||
fuzzyMatchSensitivity: number;
|
||||
scoreForSimilarMatch: number;
|
||||
scoreForExactMatch: number;
|
||||
}
|
||||
|
||||
interface InternalDocument {
|
||||
id: number;
|
||||
}
|
||||
|
||||
interface ReverseIndex {
|
||||
[key: string]: Set<InternalDocument>;
|
||||
}
|
||||
|
||||
interface TokenMap {
|
||||
[key: string]: Set<string>;
|
||||
}
|
||||
|
||||
interface SearchResult<T> {
|
||||
results: T[];
|
||||
matchedQueryTerms: string[];
|
||||
}
|
||||
|
||||
export type TextExtractor<T> = (document: T) => string;
|
||||
|
||||
const DEFAULT_OPTIONS: SearchServiceOptions = {
|
||||
fuzzyMatchSensitivity: 0.2, // Value between 0-1. Higher = more tolerant to spelling mistakes, too high and you get nonsense.
|
||||
scoreForSimilarMatch: 0.5, // When ranking results, the score a match gets for having a token that is similar to a search token.
|
||||
scoreForExactMatch: 1, // When ranking results, the score a match gets for having an exact match with a token in the search query.
|
||||
};
|
||||
|
||||
function inverseDocumentFrequency(
|
||||
numberOfDocuments: number,
|
||||
numberOfDocumentsWithTerm: number
|
||||
): number {
|
||||
if (numberOfDocumentsWithTerm === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return Math.log10(numberOfDocuments / numberOfDocumentsWithTerm);
|
||||
}
|
||||
|
||||
function tokenize(text: string): string[] {
|
||||
return text.match(/[a-zA-Z0-9]+/g) || [];
|
||||
}
|
||||
|
||||
export const buildSearchService = <T>(
|
||||
documents: T[],
|
||||
getSearchableText: TextExtractor<T>,
|
||||
options: SearchServiceOptions = DEFAULT_OPTIONS
|
||||
): SearchService<T> => {
|
||||
const reverseIndex: ReverseIndex = {};
|
||||
const normalizedTokenToOriginal: TokenMap = {};
|
||||
|
||||
documents.forEach((document, documentIndex) => {
|
||||
const rawTokens = tokenize(getSearchableText(document));
|
||||
|
||||
const internalDocument: InternalDocument = {
|
||||
id: documentIndex,
|
||||
};
|
||||
|
||||
rawTokens.forEach((token) => {
|
||||
const stemmedToken = stemmer(token);
|
||||
|
||||
if (!(stemmedToken in normalizedTokenToOriginal)) {
|
||||
normalizedTokenToOriginal[stemmedToken] = new Set<string>();
|
||||
}
|
||||
normalizedTokenToOriginal[stemmedToken].add(token);
|
||||
|
||||
if (!(stemmedToken in reverseIndex)) {
|
||||
reverseIndex[stemmedToken] = new Set<InternalDocument>();
|
||||
}
|
||||
reverseIndex[stemmedToken].add(internalDocument);
|
||||
});
|
||||
});
|
||||
|
||||
const tokenSet = Object.keys(reverseIndex);
|
||||
|
||||
const query = (searchQuery: string): SearchResult<T> => {
|
||||
const searchResult: SearchResult<T> = {
|
||||
results: [],
|
||||
matchedQueryTerms: [],
|
||||
};
|
||||
|
||||
const normalizedSearchQuery = new Set<string>(
|
||||
tokenize(searchQuery).map((token) => stemmer(token))
|
||||
);
|
||||
if (normalizedSearchQuery.size === 0) {
|
||||
return searchResult;
|
||||
}
|
||||
|
||||
const results = new Map<number, number>();
|
||||
const matchedTokens = new Set<string>();
|
||||
|
||||
normalizedSearchQuery.forEach((searchToken) => {
|
||||
tokenSet.forEach((token) => {
|
||||
const { similarity } = levenshtein(searchToken, token);
|
||||
|
||||
const matchesSearchToken = token === searchToken;
|
||||
const isSimilar = similarity >= 1 - options.fuzzyMatchSensitivity;
|
||||
|
||||
if (matchesSearchToken || isSimilar) {
|
||||
const documentMatches = reverseIndex[token];
|
||||
|
||||
const idf = inverseDocumentFrequency(
|
||||
documents.length,
|
||||
documentMatches.size
|
||||
);
|
||||
|
||||
documentMatches.forEach((document) => {
|
||||
const currentScore = results.get(document.id) ?? 0;
|
||||
|
||||
const scoreForExactMatch = matchesSearchToken
|
||||
? options.scoreForExactMatch
|
||||
: 0;
|
||||
const scoreForSimilarity = isSimilar
|
||||
? options.scoreForSimilarMatch
|
||||
: 0;
|
||||
const score = scoreForExactMatch + scoreForSimilarity;
|
||||
|
||||
const scoreForToken = score * idf;
|
||||
|
||||
results.set(document.id, currentScore + scoreForToken);
|
||||
});
|
||||
|
||||
normalizedTokenToOriginal[token].forEach((originalToken) => {
|
||||
matchedTokens.add(originalToken);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const orderedResults = [...results]
|
||||
.sort((match1, match2) => {
|
||||
return match2[1] - match1[1];
|
||||
})
|
||||
.map((match) => documents[match[0]]);
|
||||
|
||||
searchResult.results = orderedResults;
|
||||
searchResult.matchedQueryTerms = [...matchedTokens];
|
||||
|
||||
return searchResult;
|
||||
};
|
||||
|
||||
return {
|
||||
query,
|
||||
};
|
||||
};
|
|
@ -412,6 +412,10 @@
|
|||
}
|
||||
|
||||
#quoteSearchPopupWrapper {
|
||||
.highlight {
|
||||
color: var(--main-color);
|
||||
}
|
||||
|
||||
#quoteSearchPopup {
|
||||
background: var(--bg-color);
|
||||
border-radius: var(--roundness);
|
||||
|
|
Loading…
Reference in a new issue