impr(lazy mode): support replacing 2 characters with one

also adds lazy mode to yiddish closes #6321
2025-12-18 06:00:18 +08:00 · 2025-06-22 10:49:05 +02:00 · 2025-06-22 10:49:05 +02:00 · ced4b6e162
commit ced4b6e162
parent 5ca47e116b
3 changed files with 67 additions and 14 deletions
--- a/frontend/tests/test/lazy-mode.spec.ts
+++ b/frontend/tests/test/lazy-mode.spec.ts
@ -6,6 +6,12 @@ let germanAccents = [
  ["ü", "ue"],
 ] as [string, string][];

+let multicharAccents = [
+  ["a", "bc"],
+  ["de", "f"],
+  ["gh", "ij"],
+] as [string, string][];
+
 describe("lazy-mode", () => {
  describe("replaceAccents", () => {
    it("should replace common accents", () => {
@ -38,5 +44,23 @@ describe("lazy-mode", () => {
        expect(result).toBe("aesse");
      });
    });
+    describe("multicharacter accents", () => {
+      it("should correctly replace multicharacter accents", () => {
+        const tests = [
+          { input: "a", expected: "bc" },
+          { input: "aa", expected: "bcbc" },
+          { input: "de", expected: "f" },
+          { input: "dede", expected: "ff" },
+          { input: "gh", expected: "ij" },
+          { input: "ghgh", expected: "ijij" },
+          { input: "abcdefgh", expected: "bcbcffij" },
+        ];
+
+        tests.forEach(({ input, expected }) => {
+          const result = replaceAccents(input, multicharAccents);
+          expect(result).toBe(expected);
+        });
+      });
+    });
  });
 });
--- a/frontend/src/ts/test/lazy-mode.ts
+++ b/frontend/src/ts/test/lazy-mode.ts
@ -49,14 +49,21 @@ const accentsMap = new Map<string, string>(
 export type Accents = [string, string][];

 function findAccent(
-  char: string,
+  wordSlice: string,
  additionalAccents?: Accents
-): string | undefined {
-  const lookup = char.toLowerCase();
+): [string, string] | undefined {
+  const lookup = wordSlice.toLowerCase();

-  const found = additionalAccents?.find((rule) => rule[0].includes(lookup));
+  const found = additionalAccents?.find((rule) => lookup.startsWith(rule[0]));

-  return found !== undefined ? found[1] : accentsMap.get(lookup);
+  const common = accentsMap.get(lookup[0] as string);
+
+  const commonFound =
+    common !== undefined
+      ? ([lookup[0], common] as [string, string])
+      : undefined;
+
+  return found !== undefined ? found : commonFound;
 }

 export function replaceAccents(
@ -68,19 +75,24 @@ export function replaceAccents(
  const cases = [...word].map((it, i) => it === uppercased[i]);
  const newWordArray: string[] = [];

+  let offset = 0;
  for (let i = 0; i < word.length; i++) {
-    const char = word[i] as string;
-    const isUpperCase = cases[i];
-    const accent = findAccent(char, additionalAccents);
+    const index = i + offset;
+    if (index >= word.length) break;
+    const wordSlice = word.slice(index);
+    const caseSlice = cases.slice(index);
+    const accent = findAccent(wordSlice, additionalAccents);

    if (accent !== undefined) {
-      if (isUpperCase) {
-        newWordArray.push(accent.substring(0, 1).toUpperCase());
-        newWordArray.push(accent.substring(1));
-      } else {
-        newWordArray.push(accent);
+      for (let j = 0; j < accent[1].length; j++) {
+        const char = accent[1][j] as string;
+        const isUpperCase = caseSlice[j] ?? false;
+        newWordArray.push(isUpperCase ? char.toUpperCase() : char);
      }
+      offset += accent[0].length - 1;
    } else {
+      const char = word[index] as string;
+      const isUpperCase = cases[index];
      newWordArray.push(isUpperCase ? char.toUpperCase() : char);
    }
  }
--- a/frontend/static/languages/yiddish.json
+++ b/frontend/static/languages/yiddish.json
@ -2,8 +2,25 @@
  "name": "yiddish",
  "rightToLeft": true,
  "ligatures": true,
-  "noLazyMode": true,
  "bcp47": "yi",
+  "additionalAccents": [
+    ["אַ", "א"],
+    ["אָ", "א"],
+    ["בּ", "ב"],
+    ["בֿ", "ב"],
+    ["וּ", "ו"],
+    ["וֹ", "ו"],
+    ["יִ", "י"],
+    ["כּ", "כ"],
+    ["פּ", "פ"],
+    ["פֿ", "פ"],
+    ["שׂ", "ש"],
+    ["תּ", "ת"],
+    ["ײַ", "יי"],
+    ["ײ", "יי"],
+    ["ױ", "וי"],
+    ["װ", "וו"]
+  ],
  "words": [
    "קאַווע",
    "אויפּס",