impr(caret): handle mixed language direction (@byseif21) (#6695)

### Description enhances the caret positioning logic to support mixed language directions (LTR and RTL) within words. It introduces a new hasRTLCharacters utility function to detect RTL characters in individual words, allowing the caret to adjust dynamically based on word-specific direction rather than relying solely on the language's default direction #### notes: * tested no affect to the normal single direction. * no tap mode handle included * related #6694 #6666 --------- Co-authored-by: Jack <jack@monkeytype.com>
2025-11-07 19:38:09 +08:00 · 2025-09-04 13:36:59 +03:00 · 2025-09-04 13:36:59 +03:00 · 01d8363e19
commit 01d8363e19
parent 64473e4e69
5 changed files with 332 additions and 11 deletions
--- a/frontend/tests/utils/strings.spec.ts
+++ b/frontend/tests/utils/strings.spec.ts
@ -1,4 +1,4 @@
-import { describe, it, expect } from "vitest";
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
 import * as Strings from "../../src/ts/utils/strings";

 describe("string utils", () => {
@ -66,4 +66,239 @@ describe("string utils", () => {
      }
    );
  });
+
+  describe("hasRTLCharacters", () => {
+    it.each([
+      // LTR characters should return false
+      [false, "hello", "basic Latin text"],
+      [false, "world123", "Latin text with numbers"],
+      [false, "test!", "Latin text with punctuation"],
+      [false, "ABC", "uppercase Latin text"],
+      [false, "", "empty string"],
+      [false, "123", "numbers only"],
+      [false, "!@#$%", "punctuation and symbols only"],
+      [false, "   ", "whitespace only"],
+
+      // Common LTR scripts
+      [false, "Здравствуй", "Cyrillic text"],
+      [false, "Bonjour", "Latin with accents"],
+      [false, "Καλημέρα", "Greek text"],
+      [false, "こんにちは", "Japanese Hiragana"],
+      [false, "你好", "Chinese characters"],
+      [false, "안녕하세요", "Korean text"],
+
+      // RTL characters should return true - Arabic
+      [true, "مرحبا", "Arabic text"],
+      [true, "السلام", "Arabic phrase"],
+      [true, "العربية", "Arabic word"],
+      [true, "٠١٢٣٤٥٦٧٨٩", "Arabic-Indic digits"],
+
+      // RTL characters should return true - Hebrew
+      [true, "שלום", "Hebrew text"],
+      [true, "עברית", "Hebrew word"],
+      [true, "ברוך", "Hebrew name"],
+
+      // RTL characters should return true - Persian/Farsi
+      [true, "سلام", "Persian text"],
+      [true, "فارسی", "Persian word"],
+
+      // Mixed content (should return true if ANY RTL characters are present)
+      [true, "hello مرحبا", "mixed LTR and Arabic"],
+      [true, "123 שלום", "numbers and Hebrew"],
+      [true, "test سلام!", "Latin, Persian, and punctuation"],
+      [true, "مرحبا123", "Arabic with numbers"],
+      [true, "hello؟", "Latin with Arabic punctuation"],
+
+      // Edge cases with various Unicode ranges
+      [false, "𝕳𝖊𝖑𝖑𝖔", "mathematical bold text (LTR)"],
+      [false, "🌍🌎🌏", "emoji"],
+    ] as const)(
+      "should return %s for word '%s' (%s)",
+      (expected: boolean, word: string, _description: string) => {
+        expect(Strings.__testing.hasRTLCharacters(word)).toBe(expected);
+      }
+    );
+  });
+
+  describe("getWordDirection", () => {
+    beforeEach(() => {
+      Strings.clearWordDirectionCache();
+    });
+
+    it.each([
+      // Basic functionality - should use hasRTLCharacters result when word has core content
+      [false, "hello", false, "LTR word in LTR language"],
+      [
+        false,
+        "hello",
+        true,
+        "LTR word in RTL language (word direction overrides language)",
+      ],
+      [
+        true,
+        "مرحبا",
+        false,
+        "RTL word in LTR language (word direction overrides language)",
+      ],
+      [true, "مرحبا", true, "RTL word in RTL language"],
+
+      // Punctuation stripping behavior
+      [false, "hello!", false, "LTR word with trailing punctuation"],
+      [false, "!hello", false, "LTR word with leading punctuation"],
+      [false, "!hello!", false, "LTR word with surrounding punctuation"],
+      [true, "مرحبا؟", false, "RTL word with trailing punctuation"],
+      [true, "؟مرحبا", false, "RTL word with leading punctuation"],
+      [true, "؟مرحبا؟", false, "RTL word with surrounding punctuation"],
+
+      // Fallback to language direction for empty/neutral content
+      [false, "", false, "empty string falls back to LTR language"],
+      [true, "", true, "empty string falls back to RTL language"],
+      [false, "!!!", false, "punctuation only falls back to LTR language"],
+      [true, "!!!", true, "punctuation only falls back to RTL language"],
+      [false, "   ", false, "whitespace only falls back to LTR language"],
+      [true, "   ", true, "whitespace only falls back to RTL language"],
+
+      // Numbers behavior (numbers are neutral, follow hasRTLCharacters detection)
+      [false, "123", false, "regular digits are not RTL"],
+      [false, "123", true, "regular digits are not RTL regardless of language"],
+      [true, "١٢٣", false, "Arabic-Indic digits are detected as RTL"],
+      [true, "١٢٣", true, "Arabic-Indic digits are detected as RTL"],
+    ] as const)(
+      "should return %s for word '%s' with languageRTL=%s (%s)",
+      (
+        expected: boolean,
+        word: string,
+        languageRTL: boolean,
+        _description: string
+      ) => {
+        expect(Strings.getWordDirection(word, languageRTL)).toBe(expected);
+      }
+    );
+
+    it("should return languageRTL for undefined word", () => {
+      expect(Strings.getWordDirection(undefined, false)).toBe(false);
+      expect(Strings.getWordDirection(undefined, true)).toBe(true);
+    });
+
+    describe("caching", () => {
+      let mapGetSpy: ReturnType<typeof vi.spyOn>;
+      let mapSetSpy: ReturnType<typeof vi.spyOn>;
+      let mapClearSpy: ReturnType<typeof vi.spyOn>;
+
+      beforeEach(() => {
+        mapGetSpy = vi.spyOn(Map.prototype, "get");
+        mapSetSpy = vi.spyOn(Map.prototype, "set");
+        mapClearSpy = vi.spyOn(Map.prototype, "clear");
+      });
+
+      afterEach(() => {
+        mapGetSpy.mockRestore();
+        mapSetSpy.mockRestore();
+        mapClearSpy.mockRestore();
+      });
+
+      it("should use cache for repeated calls", () => {
+        // First call should cache the result (cache miss)
+        const result1 = Strings.getWordDirection("hello", false);
+        expect(result1).toBe(false);
+        expect(mapSetSpy).toHaveBeenCalledWith("hello", false);
+
+        // Reset spies to check second call
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        // Second call should use cache (cache hit)
+        const result2 = Strings.getWordDirection("hello", false);
+        expect(result2).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
+
+        // Cache should work regardless of language direction for same word
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        const result3 = Strings.getWordDirection("hello", true);
+        expect(result3).toBe(false); // Still false because "hello" is LTR regardless of language
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
+      });
+
+      it("should cache based on core word without punctuation", () => {
+        // First call should cache the result for core "hello"
+        const result1 = Strings.getWordDirection("hello", false);
+        expect(result1).toBe(false);
+        expect(mapSetSpy).toHaveBeenCalledWith("hello", false);
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        // These should all use the same cache entry since they have the same core
+        const result2 = Strings.getWordDirection("hello!", false);
+        expect(result2).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled();
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        const result3 = Strings.getWordDirection("!hello", false);
+        expect(result3).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled();
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        const result4 = Strings.getWordDirection("!hello!", false);
+        expect(result4).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled();
+      });
+
+      it("should handle cache clearing", () => {
+        // Cache a result
+        Strings.getWordDirection("test", false);
+        expect(mapSetSpy).toHaveBeenCalledWith("test", false);
+
+        // Clear cache
+        Strings.clearWordDirectionCache();
+        expect(mapClearSpy).toHaveBeenCalled();
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+        mapClearSpy.mockClear();
+
+        // Should work normally after cache clear (cache miss again)
+        const result = Strings.getWordDirection("test", false);
+        expect(result).toBe(false);
+        expect(mapSetSpy).toHaveBeenCalledWith("test", false);
+      });
+
+      it("should demonstrate cache miss vs cache hit behavior", () => {
+        // Test cache miss - first time seeing this word
+        const result1 = Strings.getWordDirection("unique", false);
+        expect(result1).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("unique");
+        expect(mapSetSpy).toHaveBeenCalledWith("unique", false);
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        // Test cache hit - same word again
+        const result2 = Strings.getWordDirection("unique", false);
+        expect(result2).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("unique");
+        expect(mapSetSpy).not.toHaveBeenCalled(); // No cache set on hit
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        // Test cache miss - different word
+        const result3 = Strings.getWordDirection("different", false);
+        expect(result3).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("different");
+        expect(mapSetSpy).toHaveBeenCalledWith("different", false);
+      });
+    });
+  });
 });
--- a/frontend/src/ts/test/caret.ts
+++ b/frontend/src/ts/test/caret.ts
@ -6,7 +6,7 @@ import * as TestState from "../test/test-state";
 import * as TestWords from "./test-words";
 import { prefersReducedMotion } from "../utils/misc";
 import { convertRemToPixels } from "../utils/numbers";
-import { splitIntoCharacters } from "../utils/strings";
+import { splitIntoCharacters, getWordDirection } from "../utils/strings";
 import { safeNumber } from "@monkeytype/util/numbers";
 import { subscribe } from "../observables/config-event";

@ -59,11 +59,18 @@ function getTargetPositionLeft(
  currentWordNodeList: NodeListOf<HTMLElement>,
  fullWidthCaretWidth: number,
  wordLen: number,
-  inputLen: number
+  inputLen: number,
+  currentWord?: string
 ): number {
  const invisibleExtraLetters = Config.blindMode || Config.hideExtraLetters;
  let result = 0;

+  // use word-specific direction if available and different from language direction
+  const isWordRightToLeft = getWordDirection(
+    currentWord,
+    isLanguageRightToLeft
+  );
+
  if (Config.tapeMode === "off") {
    let positionOffsetToWord = 0;

@ -71,7 +78,7 @@ function getTargetPositionLeft(
    const lastWordLetter = currentWordNodeList[wordLen - 1];
    const lastInputLetter = currentWordNodeList[inputLen - 1];

-    if (isLanguageRightToLeft) {
+    if (isWordRightToLeft) {
      if (inputLen <= wordLen && currentLetter) {
        // at word beginning in zen mode both lengths are 0, but currentLetter is defined "_"
        positionOffsetToWord =
@ -104,13 +111,13 @@ function getTargetPositionLeft(
      $(document.querySelector("#wordsWrapper") as HTMLElement).width() ?? 0;
    const tapeMargin =
      wordsWrapperWidth *
-      (isLanguageRightToLeft
+      (isWordRightToLeft
        ? 1 - Config.tapeMargin / 100
        : Config.tapeMargin / 100);

    result =
      tapeMargin -
-      (fullWidthCaret && isLanguageRightToLeft ? fullWidthCaretWidth : 0);
+      (fullWidthCaret && isWordRightToLeft ? fullWidthCaretWidth : 0);

    if (Config.tapeMode === "word" && inputLen > 0) {
      let currentWordWidth = 0;
@ -125,7 +132,7 @@ function getTargetPositionLeft(
      // if current letter has zero width move the caret to previous positive width letter
      if ($(currentWordNodeList[inputLen] as Element).outerWidth(true) === 0)
        currentWordWidth -= lastPositiveLetterWidth;
-      if (isLanguageRightToLeft) currentWordWidth *= -1;
+      if (isWordRightToLeft) currentWordWidth *= -1;
      result += currentWordWidth;
    }
  }
@ -211,6 +218,12 @@ export async function updatePosition(noAnim = false): Promise<void> {
    currentWordNodeList
  );

+  // in zen mode, use the input content to determine word direction
+  const currentWordForDirection =
+    Config.mode === "zen"
+      ? TestInput.input.current
+      : TestWords.words.getCurrent();
+
  const letterPosLeft = getTargetPositionLeft(
    fullWidthCaret,
    isLanguageRightToLeft,
@ -218,7 +231,8 @@ export async function updatePosition(noAnim = false): Promise<void> {
    currentWordNodeList,
    letterWidth,
    wordLen,
-    inputLen
+    inputLen,
+    currentWordForDirection
  );
  const newLeft = letterPosLeft - (fullWidthCaret ? 0 : caretWidth / 2);

--- a/frontend/src/ts/test/pace-caret.ts
+++ b/frontend/src/ts/test/pace-caret.ts
@ -9,6 +9,7 @@ import * as TestState from "./test-state";
 import * as ConfigEvent from "../observables/config-event";
 import { convertRemToPixels } from "../utils/numbers";
 import { getActiveFunboxes } from "./funbox/list";
+import { getWordDirection } from "../utils/strings";

 type Settings = {
  wpm: number;
@ -53,12 +54,19 @@ async function resetCaretPosition(): Promise<void> {
  const currentLanguage = await JSONData.getCurrentLanguage(Config.language);
  const isLanguageRightToLeft = currentLanguage.rightToLeft;

+  const currentWord = TestWords.words.get(settings?.currentWordIndex ?? 0);
+
+  const isWordRightToLeft = getWordDirection(
+    currentWord,
+    isLanguageRightToLeft ?? false
+  );
+
  caret.stop(true, true).animate(
    {
      top: firstLetter.offsetTop - firstLetterHeight / 4,
      left:
        firstLetter.offsetLeft +
-        (isLanguageRightToLeft ? firstLetter.offsetWidth : 0),
+        (isWordRightToLeft ? firstLetter.offsetWidth : 0),
    },
    0,
    "linear"
@ -231,6 +239,12 @@ export async function update(expectedStepEnd: number): Promise<void> {
      );
      const isLanguageRightToLeft = currentLanguage.rightToLeft;

+      const currentWord = TestWords.words.get(settings.currentWordIndex);
+
+      const isWordRightToLeft = getWordDirection(
+        currentWord,
+        isLanguageRightToLeft ?? false
+      );
      newTop =
        word.offsetTop +
        currentLetter.offsetTop -
@ -240,13 +254,13 @@ export async function update(expectedStepEnd: number): Promise<void> {
          word.offsetLeft +
          currentLetter.offsetLeft -
          caretWidth / 2 +
-          (isLanguageRightToLeft ? currentLetterWidth : 0);
+          (isWordRightToLeft ? currentLetterWidth : 0);
      } else {
        newLeft =
          word.offsetLeft +
          currentLetter.offsetLeft -
          caretWidth / 2 +
-          (isLanguageRightToLeft ? 0 : currentLetterWidth);
+          (isWordRightToLeft ? 0 : currentLetterWidth);
      }
      caret.removeClass("hidden");
    } catch (e) {
--- a/frontend/src/ts/test/test-logic.ts
+++ b/frontend/src/ts/test/test-logic.ts
@ -162,6 +162,8 @@ export function restart(options = {} as RestartOptions): void {
  };

  options = { ...defaultOptions, ...options };
+  Strings.clearWordDirectionCache();
+
  const animationTime = options.noAnim ? 0 : Misc.applyReducedMotion(125);

  const noQuit = isFunboxActive("no_quit");
--- a/frontend/src/ts/utils/strings.ts
+++ b/frontend/src/ts/utils/strings.ts
@ -184,3 +184,59 @@ export function replaceControlCharacters(textToClear: string): string {

  return textToClear;
 }
+
+/**
+ * Detect if a word contains RTL (Right-to-Left) characters.
+ * This is for test scenarios where individual words may have different directions.
+ * Uses a simple regex pattern that covers all common RTL scripts.
+ * @param word the word to check for RTL characters
+ * @returns true if the word contains RTL characters, false otherwise
+ */
+function hasRTLCharacters(word: string): boolean {
+  if (!word || word.length === 0) {
+    return false;
+  }
+
+  // This covers Arabic, Farsi, Urdu, and other RTL scripts
+  const rtlPattern =
+    /[\u0590-\u05FF\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/;
+
+  return rtlPattern.test(word);
+}
+
+/**
+ * Cache for word direction to avoid repeated calculations per word
+ * Keyed by the stripped core of the word; can be manually cleared when needed
+ */
+let wordDirectionCache: Map<string, boolean> = new Map();
+
+export function clearWordDirectionCache(): void {
+  wordDirectionCache.clear();
+}
+
+export function getWordDirection(
+  word: string | undefined,
+  languageRTL: boolean
+): boolean {
+  if (word === undefined || word.length === 0) return languageRTL;
+
+  // Strip leading/trailing punctuation and whitespace so attached opposite-direction
+  // punctuation like "word؟" or "،word" doesn't flip the direction detection
+  // and if only punctuation/symbols/whitespace, use main language direction
+  const core = word.replace(/^[\p{P}\p{S}\s]+|[\p{P}\p{S}\s]+$/gu, "");
+  if (core.length === 0) return languageRTL;
+
+  // cache by core to handle variants like "word" vs "word؟"
+  const cached = wordDirectionCache.get(core);
+  if (cached !== undefined) return cached;
+
+  const result = hasRTLCharacters(core);
+  wordDirectionCache.set(core, result);
+
+  return result;
+}
+
+// Export testing utilities for unit tests
+export const __testing = {
+  hasRTLCharacters,
+};