From 01d8363e19e45516a6a3853aeda73bed5e6b92f9 Mon Sep 17 00:00:00 2001
From: Seif Soliman <byseif21@gmail.com>
Date: Thu, 4 Sep 2025 13:36:59 +0300
Subject: [PATCH] impr(caret): handle mixed language direction (@byseif21)
 (#6695)

### Description
enhances the caret positioning logic to support mixed language
directions (LTR and RTL) within words. It introduces a new
hasRTLCharacters utility function to detect RTL characters in individual
words, allowing the caret to adjust dynamically based on word-specific
direction rather than relying solely on the language's default direction

#### notes:
* tested no affect to the normal single direction.
* no tap mode handle included
 * related #6694 #6666

---------

Co-authored-by: Jack <jack@monkeytype.com>
---
 frontend/__tests__/utils/strings.spec.ts | 237 ++++++++++++++++++++++-
 frontend/src/ts/test/caret.ts            |  28 ++-
 frontend/src/ts/test/pace-caret.ts       |  20 +-
 frontend/src/ts/test/test-logic.ts       |   2 +
 frontend/src/ts/utils/strings.ts         |  56 ++++++
 5 files changed, 332 insertions(+), 11 deletions(-)

diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts
index bed750bd1..eaa0cd72f 100644
--- a/frontend/__tests__/utils/strings.spec.ts
+++ b/frontend/__tests__/utils/strings.spec.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect } from "vitest";
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
 import * as Strings from "../../src/ts/utils/strings";
 
 describe("string utils", () => {
@@ -66,4 +66,239 @@ describe("string utils", () => {
       }
     );
   });
+
+  describe("hasRTLCharacters", () => {
+    it.each([
+      // LTR characters should return false
+      [false, "hello", "basic Latin text"],
+      [false, "world123", "Latin text with numbers"],
+      [false, "test!", "Latin text with punctuation"],
+      [false, "ABC", "uppercase Latin text"],
+      [false, "", "empty string"],
+      [false, "123", "numbers only"],
+      [false, "!@#$%", "punctuation and symbols only"],
+      [false, "   ", "whitespace only"],
+
+      // Common LTR scripts
+      [false, "Здравствуй", "Cyrillic text"],
+      [false, "Bonjour", "Latin with accents"],
+      [false, "Καλημέρα", "Greek text"],
+      [false, "こんにちは", "Japanese Hiragana"],
+      [false, "你好", "Chinese characters"],
+      [false, "안녕하세요", "Korean text"],
+
+      // RTL characters should return true - Arabic
+      [true, "مرحبا", "Arabic text"],
+      [true, "السلام", "Arabic phrase"],
+      [true, "العربية", "Arabic word"],
+      [true, "٠١٢٣٤٥٦٧٨٩", "Arabic-Indic digits"],
+
+      // RTL characters should return true - Hebrew
+      [true, "שלום", "Hebrew text"],
+      [true, "עברית", "Hebrew word"],
+      [true, "ברוך", "Hebrew name"],
+
+      // RTL characters should return true - Persian/Farsi
+      [true, "سلام", "Persian text"],
+      [true, "فارسی", "Persian word"],
+
+      // Mixed content (should return true if ANY RTL characters are present)
+      [true, "hello مرحبا", "mixed LTR and Arabic"],
+      [true, "123 שלום", "numbers and Hebrew"],
+      [true, "test سلام!", "Latin, Persian, and punctuation"],
+      [true, "مرحبا123", "Arabic with numbers"],
+      [true, "hello؟", "Latin with Arabic punctuation"],
+
+      // Edge cases with various Unicode ranges
+      [false, "𝕳𝖊𝖑𝖑𝖔", "mathematical bold text (LTR)"],
+      [false, "🌍🌎🌏", "emoji"],
+    ] as const)(
+      "should return %s for word '%s' (%s)",
+      (expected: boolean, word: string, _description: string) => {
+        expect(Strings.__testing.hasRTLCharacters(word)).toBe(expected);
+      }
+    );
+  });
+
+  describe("getWordDirection", () => {
+    beforeEach(() => {
+      Strings.clearWordDirectionCache();
+    });
+
+    it.each([
+      // Basic functionality - should use hasRTLCharacters result when word has core content
+      [false, "hello", false, "LTR word in LTR language"],
+      [
+        false,
+        "hello",
+        true,
+        "LTR word in RTL language (word direction overrides language)",
+      ],
+      [
+        true,
+        "مرحبا",
+        false,
+        "RTL word in LTR language (word direction overrides language)",
+      ],
+      [true, "مرحبا", true, "RTL word in RTL language"],
+
+      // Punctuation stripping behavior
+      [false, "hello!", false, "LTR word with trailing punctuation"],
+      [false, "!hello", false, "LTR word with leading punctuation"],
+      [false, "!hello!", false, "LTR word with surrounding punctuation"],
+      [true, "مرحبا؟", false, "RTL word with trailing punctuation"],
+      [true, "؟مرحبا", false, "RTL word with leading punctuation"],
+      [true, "؟مرحبا؟", false, "RTL word with surrounding punctuation"],
+
+      // Fallback to language direction for empty/neutral content
+      [false, "", false, "empty string falls back to LTR language"],
+      [true, "", true, "empty string falls back to RTL language"],
+      [false, "!!!", false, "punctuation only falls back to LTR language"],
+      [true, "!!!", true, "punctuation only falls back to RTL language"],
+      [false, "   ", false, "whitespace only falls back to LTR language"],
+      [true, "   ", true, "whitespace only falls back to RTL language"],
+
+      // Numbers behavior (numbers are neutral, follow hasRTLCharacters detection)
+      [false, "123", false, "regular digits are not RTL"],
+      [false, "123", true, "regular digits are not RTL regardless of language"],
+      [true, "١٢٣", false, "Arabic-Indic digits are detected as RTL"],
+      [true, "١٢٣", true, "Arabic-Indic digits are detected as RTL"],
+    ] as const)(
+      "should return %s for word '%s' with languageRTL=%s (%s)",
+      (
+        expected: boolean,
+        word: string,
+        languageRTL: boolean,
+        _description: string
+      ) => {
+        expect(Strings.getWordDirection(word, languageRTL)).toBe(expected);
+      }
+    );
+
+    it("should return languageRTL for undefined word", () => {
+      expect(Strings.getWordDirection(undefined, false)).toBe(false);
+      expect(Strings.getWordDirection(undefined, true)).toBe(true);
+    });
+
+    describe("caching", () => {
+      let mapGetSpy: ReturnType<typeof vi.spyOn>;
+      let mapSetSpy: ReturnType<typeof vi.spyOn>;
+      let mapClearSpy: ReturnType<typeof vi.spyOn>;
+
+      beforeEach(() => {
+        mapGetSpy = vi.spyOn(Map.prototype, "get");
+        mapSetSpy = vi.spyOn(Map.prototype, "set");
+        mapClearSpy = vi.spyOn(Map.prototype, "clear");
+      });
+
+      afterEach(() => {
+        mapGetSpy.mockRestore();
+        mapSetSpy.mockRestore();
+        mapClearSpy.mockRestore();
+      });
+
+      it("should use cache for repeated calls", () => {
+        // First call should cache the result (cache miss)
+        const result1 = Strings.getWordDirection("hello", false);
+        expect(result1).toBe(false);
+        expect(mapSetSpy).toHaveBeenCalledWith("hello", false);
+
+        // Reset spies to check second call
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        // Second call should use cache (cache hit)
+        const result2 = Strings.getWordDirection("hello", false);
+        expect(result2).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
+
+        // Cache should work regardless of language direction for same word
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        const result3 = Strings.getWordDirection("hello", true);
+        expect(result3).toBe(false); // Still false because "hello" is LTR regardless of language
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
+      });
+
+      it("should cache based on core word without punctuation", () => {
+        // First call should cache the result for core "hello"
+        const result1 = Strings.getWordDirection("hello", false);
+        expect(result1).toBe(false);
+        expect(mapSetSpy).toHaveBeenCalledWith("hello", false);
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        // These should all use the same cache entry since they have the same core
+        const result2 = Strings.getWordDirection("hello!", false);
+        expect(result2).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled();
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        const result3 = Strings.getWordDirection("!hello", false);
+        expect(result3).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled();
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        const result4 = Strings.getWordDirection("!hello!", false);
+        expect(result4).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("hello");
+        expect(mapSetSpy).not.toHaveBeenCalled();
+      });
+
+      it("should handle cache clearing", () => {
+        // Cache a result
+        Strings.getWordDirection("test", false);
+        expect(mapSetSpy).toHaveBeenCalledWith("test", false);
+
+        // Clear cache
+        Strings.clearWordDirectionCache();
+        expect(mapClearSpy).toHaveBeenCalled();
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+        mapClearSpy.mockClear();
+
+        // Should work normally after cache clear (cache miss again)
+        const result = Strings.getWordDirection("test", false);
+        expect(result).toBe(false);
+        expect(mapSetSpy).toHaveBeenCalledWith("test", false);
+      });
+
+      it("should demonstrate cache miss vs cache hit behavior", () => {
+        // Test cache miss - first time seeing this word
+        const result1 = Strings.getWordDirection("unique", false);
+        expect(result1).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("unique");
+        expect(mapSetSpy).toHaveBeenCalledWith("unique", false);
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        // Test cache hit - same word again
+        const result2 = Strings.getWordDirection("unique", false);
+        expect(result2).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("unique");
+        expect(mapSetSpy).not.toHaveBeenCalled(); // No cache set on hit
+
+        mapGetSpy.mockClear();
+        mapSetSpy.mockClear();
+
+        // Test cache miss - different word
+        const result3 = Strings.getWordDirection("different", false);
+        expect(result3).toBe(false);
+        expect(mapGetSpy).toHaveBeenCalledWith("different");
+        expect(mapSetSpy).toHaveBeenCalledWith("different", false);
+      });
+    });
+  });
 });
diff --git a/frontend/src/ts/test/caret.ts b/frontend/src/ts/test/caret.ts
index 0fcca46f1..df292f0d4 100644
--- a/frontend/src/ts/test/caret.ts
+++ b/frontend/src/ts/test/caret.ts
@@ -6,7 +6,7 @@ import * as TestState from "../test/test-state";
 import * as TestWords from "./test-words";
 import { prefersReducedMotion } from "../utils/misc";
 import { convertRemToPixels } from "../utils/numbers";
-import { splitIntoCharacters } from "../utils/strings";
+import { splitIntoCharacters, getWordDirection } from "../utils/strings";
 import { safeNumber } from "@monkeytype/util/numbers";
 import { subscribe } from "../observables/config-event";
 
@@ -59,11 +59,18 @@ function getTargetPositionLeft(
   currentWordNodeList: NodeListOf<HTMLElement>,
   fullWidthCaretWidth: number,
   wordLen: number,
-  inputLen: number
+  inputLen: number,
+  currentWord?: string
 ): number {
   const invisibleExtraLetters = Config.blindMode || Config.hideExtraLetters;
   let result = 0;
 
+  // use word-specific direction if available and different from language direction
+  const isWordRightToLeft = getWordDirection(
+    currentWord,
+    isLanguageRightToLeft
+  );
+
   if (Config.tapeMode === "off") {
     let positionOffsetToWord = 0;
 
@@ -71,7 +78,7 @@ function getTargetPositionLeft(
     const lastWordLetter = currentWordNodeList[wordLen - 1];
     const lastInputLetter = currentWordNodeList[inputLen - 1];
 
-    if (isLanguageRightToLeft) {
+    if (isWordRightToLeft) {
       if (inputLen <= wordLen && currentLetter) {
         // at word beginning in zen mode both lengths are 0, but currentLetter is defined "_"
         positionOffsetToWord =
@@ -104,13 +111,13 @@ function getTargetPositionLeft(
       $(document.querySelector("#wordsWrapper") as HTMLElement).width() ?? 0;
     const tapeMargin =
       wordsWrapperWidth *
-      (isLanguageRightToLeft
+      (isWordRightToLeft
         ? 1 - Config.tapeMargin / 100
         : Config.tapeMargin / 100);
 
     result =
       tapeMargin -
-      (fullWidthCaret && isLanguageRightToLeft ? fullWidthCaretWidth : 0);
+      (fullWidthCaret && isWordRightToLeft ? fullWidthCaretWidth : 0);
 
     if (Config.tapeMode === "word" && inputLen > 0) {
       let currentWordWidth = 0;
@@ -125,7 +132,7 @@ function getTargetPositionLeft(
       // if current letter has zero width move the caret to previous positive width letter
       if ($(currentWordNodeList[inputLen] as Element).outerWidth(true) === 0)
         currentWordWidth -= lastPositiveLetterWidth;
-      if (isLanguageRightToLeft) currentWordWidth *= -1;
+      if (isWordRightToLeft) currentWordWidth *= -1;
       result += currentWordWidth;
     }
   }
@@ -211,6 +218,12 @@ export async function updatePosition(noAnim = false): Promise<void> {
     currentWordNodeList
   );
 
+  // in zen mode, use the input content to determine word direction
+  const currentWordForDirection =
+    Config.mode === "zen"
+      ? TestInput.input.current
+      : TestWords.words.getCurrent();
+
   const letterPosLeft = getTargetPositionLeft(
     fullWidthCaret,
     isLanguageRightToLeft,
@@ -218,7 +231,8 @@ export async function updatePosition(noAnim = false): Promise<void> {
     currentWordNodeList,
     letterWidth,
     wordLen,
-    inputLen
+    inputLen,
+    currentWordForDirection
   );
   const newLeft = letterPosLeft - (fullWidthCaret ? 0 : caretWidth / 2);
 
diff --git a/frontend/src/ts/test/pace-caret.ts b/frontend/src/ts/test/pace-caret.ts
index a57ca1867..7b01633b2 100644
--- a/frontend/src/ts/test/pace-caret.ts
+++ b/frontend/src/ts/test/pace-caret.ts
@@ -9,6 +9,7 @@ import * as TestState from "./test-state";
 import * as ConfigEvent from "../observables/config-event";
 import { convertRemToPixels } from "../utils/numbers";
 import { getActiveFunboxes } from "./funbox/list";
+import { getWordDirection } from "../utils/strings";
 
 type Settings = {
   wpm: number;
@@ -53,12 +54,19 @@ async function resetCaretPosition(): Promise<void> {
   const currentLanguage = await JSONData.getCurrentLanguage(Config.language);
   const isLanguageRightToLeft = currentLanguage.rightToLeft;
 
+  const currentWord = TestWords.words.get(settings?.currentWordIndex ?? 0);
+
+  const isWordRightToLeft = getWordDirection(
+    currentWord,
+    isLanguageRightToLeft ?? false
+  );
+
   caret.stop(true, true).animate(
     {
       top: firstLetter.offsetTop - firstLetterHeight / 4,
       left:
         firstLetter.offsetLeft +
-        (isLanguageRightToLeft ? firstLetter.offsetWidth : 0),
+        (isWordRightToLeft ? firstLetter.offsetWidth : 0),
     },
     0,
     "linear"
@@ -231,6 +239,12 @@ export async function update(expectedStepEnd: number): Promise<void> {
       );
       const isLanguageRightToLeft = currentLanguage.rightToLeft;
 
+      const currentWord = TestWords.words.get(settings.currentWordIndex);
+
+      const isWordRightToLeft = getWordDirection(
+        currentWord,
+        isLanguageRightToLeft ?? false
+      );
       newTop =
         word.offsetTop +
         currentLetter.offsetTop -
@@ -240,13 +254,13 @@ export async function update(expectedStepEnd: number): Promise<void> {
           word.offsetLeft +
           currentLetter.offsetLeft -
           caretWidth / 2 +
-          (isLanguageRightToLeft ? currentLetterWidth : 0);
+          (isWordRightToLeft ? currentLetterWidth : 0);
       } else {
         newLeft =
           word.offsetLeft +
           currentLetter.offsetLeft -
           caretWidth / 2 +
-          (isLanguageRightToLeft ? 0 : currentLetterWidth);
+          (isWordRightToLeft ? 0 : currentLetterWidth);
       }
       caret.removeClass("hidden");
     } catch (e) {
diff --git a/frontend/src/ts/test/test-logic.ts b/frontend/src/ts/test/test-logic.ts
index d74a77222..d0735c6ae 100644
--- a/frontend/src/ts/test/test-logic.ts
+++ b/frontend/src/ts/test/test-logic.ts
@@ -162,6 +162,8 @@ export function restart(options = {} as RestartOptions): void {
   };
 
   options = { ...defaultOptions, ...options };
+  Strings.clearWordDirectionCache();
+
   const animationTime = options.noAnim ? 0 : Misc.applyReducedMotion(125);
 
   const noQuit = isFunboxActive("no_quit");
diff --git a/frontend/src/ts/utils/strings.ts b/frontend/src/ts/utils/strings.ts
index dc72f3712..669b2503e 100644
--- a/frontend/src/ts/utils/strings.ts
+++ b/frontend/src/ts/utils/strings.ts
@@ -184,3 +184,59 @@ export function replaceControlCharacters(textToClear: string): string {
 
   return textToClear;
 }
+
+/**
+ * Detect if a word contains RTL (Right-to-Left) characters.
+ * This is for test scenarios where individual words may have different directions.
+ * Uses a simple regex pattern that covers all common RTL scripts.
+ * @param word the word to check for RTL characters
+ * @returns true if the word contains RTL characters, false otherwise
+ */
+function hasRTLCharacters(word: string): boolean {
+  if (!word || word.length === 0) {
+    return false;
+  }
+
+  // This covers Arabic, Farsi, Urdu, and other RTL scripts
+  const rtlPattern =
+    /[\u0590-\u05FF\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/;
+
+  return rtlPattern.test(word);
+}
+
+/**
+ * Cache for word direction to avoid repeated calculations per word
+ * Keyed by the stripped core of the word; can be manually cleared when needed
+ */
+let wordDirectionCache: Map<string, boolean> = new Map();
+
+export function clearWordDirectionCache(): void {
+  wordDirectionCache.clear();
+}
+
+export function getWordDirection(
+  word: string | undefined,
+  languageRTL: boolean
+): boolean {
+  if (word === undefined || word.length === 0) return languageRTL;
+
+  // Strip leading/trailing punctuation and whitespace so attached opposite-direction
+  // punctuation like "word؟" or "،word" doesn't flip the direction detection
+  // and if only punctuation/symbols/whitespace, use main language direction
+  const core = word.replace(/^[\p{P}\p{S}\s]+|[\p{P}\p{S}\s]+$/gu, "");
+  if (core.length === 0) return languageRTL;
+
+  // cache by core to handle variants like "word" vs "word؟"
+  const cached = wordDirectionCache.get(core);
+  if (cached !== undefined) return cached;
+
+  const result = hasRTLCharacters(core);
+  wordDirectionCache.set(core, result);
+
+  return result;
+}
+
+// Export testing utilities for unit tests
+export const __testing = {
+  hasRTLCharacters,
+};