Fix tokenizer (#2652)

This commit is contained in:
Bruce Berrios 2022-03-07 09:32:59 -05:00 committed by GitHub
parent 244b35e071
commit ff0ee93fe4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 2 deletions

View file

@ -42,7 +42,7 @@ function highlightMatches(text: string, matchedText: string[]): string {
return text;
}
const words = text.split(
/(?=[.,'"/#!$%^&*;:{}=\-_`~()\s])|(?<=[.,'"/#!$%^&*;:{}=\-_`~()\s])/g
/(?=[.,"/#!$%^&*;:{}=\-_`~()\s])|(?<=[.,"/#!$%^&*;:{}=\-_`~()\s])/g
);
const normalizedWords = words.map((word) => {

View file

@ -48,7 +48,7 @@ function inverseDocumentFrequency(
}
function tokenize(text: string): string[] {
return text.match(/[a-zA-Z0-9]+/g) || [];
return text.match(/[^\\\][.,"/#!?$%^&*;:{}=\-_`~()\s]+/g) || [];
}
export const buildSearchService = <T>(