diff --git a/frontend/__tests__/test/lazy-mode.spec.ts b/frontend/__tests__/test/lazy-mode.spec.ts index 1609719c6..6b0259fb9 100644 --- a/frontend/__tests__/test/lazy-mode.spec.ts +++ b/frontend/__tests__/test/lazy-mode.spec.ts @@ -6,6 +6,12 @@ let germanAccents = [ ["ü", "ue"], ] as [string, string][]; +let multicharAccents = [ + ["a", "bc"], + ["de", "f"], + ["gh", "ij"], +] as [string, string][]; + describe("lazy-mode", () => { describe("replaceAccents", () => { it("should replace common accents", () => { @@ -38,5 +44,23 @@ describe("lazy-mode", () => { expect(result).toBe("aesse"); }); }); + describe("multicharacter accents", () => { + it("should correctly replace multicharacter accents", () => { + const tests = [ + { input: "a", expected: "bc" }, + { input: "aa", expected: "bcbc" }, + { input: "de", expected: "f" }, + { input: "dede", expected: "ff" }, + { input: "gh", expected: "ij" }, + { input: "ghgh", expected: "ijij" }, + { input: "abcdefgh", expected: "bcbcffij" }, + ]; + + tests.forEach(({ input, expected }) => { + const result = replaceAccents(input, multicharAccents); + expect(result).toBe(expected); + }); + }); + }); }); }); diff --git a/frontend/src/ts/test/lazy-mode.ts b/frontend/src/ts/test/lazy-mode.ts index 90e94680d..a957b5920 100644 --- a/frontend/src/ts/test/lazy-mode.ts +++ b/frontend/src/ts/test/lazy-mode.ts @@ -49,14 +49,21 @@ const accentsMap = new Map( export type Accents = [string, string][]; function findAccent( - char: string, + wordSlice: string, additionalAccents?: Accents -): string | undefined { - const lookup = char.toLowerCase(); +): [string, string] | undefined { + const lookup = wordSlice.toLowerCase(); - const found = additionalAccents?.find((rule) => rule[0].includes(lookup)); + const found = additionalAccents?.find((rule) => lookup.startsWith(rule[0])); - return found !== undefined ? found[1] : accentsMap.get(lookup); + const common = accentsMap.get(lookup[0] as string); + + const commonFound = + common !== undefined + ? ([lookup[0], common] as [string, string]) + : undefined; + + return found !== undefined ? found : commonFound; } export function replaceAccents( @@ -68,19 +75,24 @@ export function replaceAccents( const cases = [...word].map((it, i) => it === uppercased[i]); const newWordArray: string[] = []; + let offset = 0; for (let i = 0; i < word.length; i++) { - const char = word[i] as string; - const isUpperCase = cases[i]; - const accent = findAccent(char, additionalAccents); + const index = i + offset; + if (index >= word.length) break; + const wordSlice = word.slice(index); + const caseSlice = cases.slice(index); + const accent = findAccent(wordSlice, additionalAccents); if (accent !== undefined) { - if (isUpperCase) { - newWordArray.push(accent.substring(0, 1).toUpperCase()); - newWordArray.push(accent.substring(1)); - } else { - newWordArray.push(accent); + for (let j = 0; j < accent[1].length; j++) { + const char = accent[1][j] as string; + const isUpperCase = caseSlice[j] ?? false; + newWordArray.push(isUpperCase ? char.toUpperCase() : char); } + offset += accent[0].length - 1; } else { + const char = word[index] as string; + const isUpperCase = cases[index]; newWordArray.push(isUpperCase ? char.toUpperCase() : char); } } diff --git a/frontend/static/languages/yiddish.json b/frontend/static/languages/yiddish.json index 369b43283..04270cfc4 100644 --- a/frontend/static/languages/yiddish.json +++ b/frontend/static/languages/yiddish.json @@ -2,8 +2,25 @@ "name": "yiddish", "rightToLeft": true, "ligatures": true, - "noLazyMode": true, "bcp47": "yi", + "additionalAccents": [ + ["אַ", "א"], + ["אָ", "א"], + ["בּ", "ב"], + ["בֿ", "ב"], + ["וּ", "ו"], + ["וֹ", "ו"], + ["יִ", "י"], + ["כּ", "כ"], + ["פּ", "פ"], + ["פֿ", "פ"], + ["שׂ", "ש"], + ["תּ", "ת"], + ["ײַ", "יי"], + ["ײ", "יי"], + ["ױ", "וי"], + ["װ", "וו"] + ], "words": [ "קאַווע", "אויפּס",