From 01d8363e19e45516a6a3853aeda73bed5e6b92f9 Mon Sep 17 00:00:00 2001 From: Seif Soliman Date: Thu, 4 Sep 2025 13:36:59 +0300 Subject: [PATCH] impr(caret): handle mixed language direction (@byseif21) (#6695) ### Description enhances the caret positioning logic to support mixed language directions (LTR and RTL) within words. It introduces a new hasRTLCharacters utility function to detect RTL characters in individual words, allowing the caret to adjust dynamically based on word-specific direction rather than relying solely on the language's default direction #### notes: * tested no affect to the normal single direction. * no tap mode handle included * related #6694 #6666 --------- Co-authored-by: Jack --- frontend/__tests__/utils/strings.spec.ts | 237 ++++++++++++++++++++++- frontend/src/ts/test/caret.ts | 28 ++- frontend/src/ts/test/pace-caret.ts | 20 +- frontend/src/ts/test/test-logic.ts | 2 + frontend/src/ts/utils/strings.ts | 56 ++++++ 5 files changed, 332 insertions(+), 11 deletions(-) diff --git a/frontend/__tests__/utils/strings.spec.ts b/frontend/__tests__/utils/strings.spec.ts index bed750bd1..eaa0cd72f 100644 --- a/frontend/__tests__/utils/strings.spec.ts +++ b/frontend/__tests__/utils/strings.spec.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from "vitest"; +import { describe, it, expect, beforeEach, afterEach, vi } from "vitest"; import * as Strings from "../../src/ts/utils/strings"; describe("string utils", () => { @@ -66,4 +66,239 @@ describe("string utils", () => { } ); }); + + describe("hasRTLCharacters", () => { + it.each([ + // LTR characters should return false + [false, "hello", "basic Latin text"], + [false, "world123", "Latin text with numbers"], + [false, "test!", "Latin text with punctuation"], + [false, "ABC", "uppercase Latin text"], + [false, "", "empty string"], + [false, "123", "numbers only"], + [false, "!@#$%", "punctuation and symbols only"], + [false, " ", "whitespace only"], + + // Common LTR scripts + [false, "Здравствуй", "Cyrillic text"], + [false, "Bonjour", "Latin with accents"], + [false, "Καλημέρα", "Greek text"], + [false, "こんにちは", "Japanese Hiragana"], + [false, "你好", "Chinese characters"], + [false, "안녕하세요", "Korean text"], + + // RTL characters should return true - Arabic + [true, "مرحبا", "Arabic text"], + [true, "السلام", "Arabic phrase"], + [true, "العربية", "Arabic word"], + [true, "٠١٢٣٤٥٦٧٨٩", "Arabic-Indic digits"], + + // RTL characters should return true - Hebrew + [true, "שלום", "Hebrew text"], + [true, "עברית", "Hebrew word"], + [true, "ברוך", "Hebrew name"], + + // RTL characters should return true - Persian/Farsi + [true, "سلام", "Persian text"], + [true, "فارسی", "Persian word"], + + // Mixed content (should return true if ANY RTL characters are present) + [true, "hello مرحبا", "mixed LTR and Arabic"], + [true, "123 שלום", "numbers and Hebrew"], + [true, "test سلام!", "Latin, Persian, and punctuation"], + [true, "مرحبا123", "Arabic with numbers"], + [true, "hello؟", "Latin with Arabic punctuation"], + + // Edge cases with various Unicode ranges + [false, "𝕳𝖊𝖑𝖑𝖔", "mathematical bold text (LTR)"], + [false, "🌍🌎🌏", "emoji"], + ] as const)( + "should return %s for word '%s' (%s)", + (expected: boolean, word: string, _description: string) => { + expect(Strings.__testing.hasRTLCharacters(word)).toBe(expected); + } + ); + }); + + describe("getWordDirection", () => { + beforeEach(() => { + Strings.clearWordDirectionCache(); + }); + + it.each([ + // Basic functionality - should use hasRTLCharacters result when word has core content + [false, "hello", false, "LTR word in LTR language"], + [ + false, + "hello", + true, + "LTR word in RTL language (word direction overrides language)", + ], + [ + true, + "مرحبا", + false, + "RTL word in LTR language (word direction overrides language)", + ], + [true, "مرحبا", true, "RTL word in RTL language"], + + // Punctuation stripping behavior + [false, "hello!", false, "LTR word with trailing punctuation"], + [false, "!hello", false, "LTR word with leading punctuation"], + [false, "!hello!", false, "LTR word with surrounding punctuation"], + [true, "مرحبا؟", false, "RTL word with trailing punctuation"], + [true, "؟مرحبا", false, "RTL word with leading punctuation"], + [true, "؟مرحبا؟", false, "RTL word with surrounding punctuation"], + + // Fallback to language direction for empty/neutral content + [false, "", false, "empty string falls back to LTR language"], + [true, "", true, "empty string falls back to RTL language"], + [false, "!!!", false, "punctuation only falls back to LTR language"], + [true, "!!!", true, "punctuation only falls back to RTL language"], + [false, " ", false, "whitespace only falls back to LTR language"], + [true, " ", true, "whitespace only falls back to RTL language"], + + // Numbers behavior (numbers are neutral, follow hasRTLCharacters detection) + [false, "123", false, "regular digits are not RTL"], + [false, "123", true, "regular digits are not RTL regardless of language"], + [true, "١٢٣", false, "Arabic-Indic digits are detected as RTL"], + [true, "١٢٣", true, "Arabic-Indic digits are detected as RTL"], + ] as const)( + "should return %s for word '%s' with languageRTL=%s (%s)", + ( + expected: boolean, + word: string, + languageRTL: boolean, + _description: string + ) => { + expect(Strings.getWordDirection(word, languageRTL)).toBe(expected); + } + ); + + it("should return languageRTL for undefined word", () => { + expect(Strings.getWordDirection(undefined, false)).toBe(false); + expect(Strings.getWordDirection(undefined, true)).toBe(true); + }); + + describe("caching", () => { + let mapGetSpy: ReturnType; + let mapSetSpy: ReturnType; + let mapClearSpy: ReturnType; + + beforeEach(() => { + mapGetSpy = vi.spyOn(Map.prototype, "get"); + mapSetSpy = vi.spyOn(Map.prototype, "set"); + mapClearSpy = vi.spyOn(Map.prototype, "clear"); + }); + + afterEach(() => { + mapGetSpy.mockRestore(); + mapSetSpy.mockRestore(); + mapClearSpy.mockRestore(); + }); + + it("should use cache for repeated calls", () => { + // First call should cache the result (cache miss) + const result1 = Strings.getWordDirection("hello", false); + expect(result1).toBe(false); + expect(mapSetSpy).toHaveBeenCalledWith("hello", false); + + // Reset spies to check second call + mapGetSpy.mockClear(); + mapSetSpy.mockClear(); + + // Second call should use cache (cache hit) + const result2 = Strings.getWordDirection("hello", false); + expect(result2).toBe(false); + expect(mapGetSpy).toHaveBeenCalledWith("hello"); + expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again + + // Cache should work regardless of language direction for same word + mapGetSpy.mockClear(); + mapSetSpy.mockClear(); + + const result3 = Strings.getWordDirection("hello", true); + expect(result3).toBe(false); // Still false because "hello" is LTR regardless of language + expect(mapGetSpy).toHaveBeenCalledWith("hello"); + expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again + }); + + it("should cache based on core word without punctuation", () => { + // First call should cache the result for core "hello" + const result1 = Strings.getWordDirection("hello", false); + expect(result1).toBe(false); + expect(mapSetSpy).toHaveBeenCalledWith("hello", false); + + mapGetSpy.mockClear(); + mapSetSpy.mockClear(); + + // These should all use the same cache entry since they have the same core + const result2 = Strings.getWordDirection("hello!", false); + expect(result2).toBe(false); + expect(mapGetSpy).toHaveBeenCalledWith("hello"); + expect(mapSetSpy).not.toHaveBeenCalled(); + + mapGetSpy.mockClear(); + mapSetSpy.mockClear(); + + const result3 = Strings.getWordDirection("!hello", false); + expect(result3).toBe(false); + expect(mapGetSpy).toHaveBeenCalledWith("hello"); + expect(mapSetSpy).not.toHaveBeenCalled(); + + mapGetSpy.mockClear(); + mapSetSpy.mockClear(); + + const result4 = Strings.getWordDirection("!hello!", false); + expect(result4).toBe(false); + expect(mapGetSpy).toHaveBeenCalledWith("hello"); + expect(mapSetSpy).not.toHaveBeenCalled(); + }); + + it("should handle cache clearing", () => { + // Cache a result + Strings.getWordDirection("test", false); + expect(mapSetSpy).toHaveBeenCalledWith("test", false); + + // Clear cache + Strings.clearWordDirectionCache(); + expect(mapClearSpy).toHaveBeenCalled(); + + mapGetSpy.mockClear(); + mapSetSpy.mockClear(); + mapClearSpy.mockClear(); + + // Should work normally after cache clear (cache miss again) + const result = Strings.getWordDirection("test", false); + expect(result).toBe(false); + expect(mapSetSpy).toHaveBeenCalledWith("test", false); + }); + + it("should demonstrate cache miss vs cache hit behavior", () => { + // Test cache miss - first time seeing this word + const result1 = Strings.getWordDirection("unique", false); + expect(result1).toBe(false); + expect(mapGetSpy).toHaveBeenCalledWith("unique"); + expect(mapSetSpy).toHaveBeenCalledWith("unique", false); + + mapGetSpy.mockClear(); + mapSetSpy.mockClear(); + + // Test cache hit - same word again + const result2 = Strings.getWordDirection("unique", false); + expect(result2).toBe(false); + expect(mapGetSpy).toHaveBeenCalledWith("unique"); + expect(mapSetSpy).not.toHaveBeenCalled(); // No cache set on hit + + mapGetSpy.mockClear(); + mapSetSpy.mockClear(); + + // Test cache miss - different word + const result3 = Strings.getWordDirection("different", false); + expect(result3).toBe(false); + expect(mapGetSpy).toHaveBeenCalledWith("different"); + expect(mapSetSpy).toHaveBeenCalledWith("different", false); + }); + }); + }); }); diff --git a/frontend/src/ts/test/caret.ts b/frontend/src/ts/test/caret.ts index 0fcca46f1..df292f0d4 100644 --- a/frontend/src/ts/test/caret.ts +++ b/frontend/src/ts/test/caret.ts @@ -6,7 +6,7 @@ import * as TestState from "../test/test-state"; import * as TestWords from "./test-words"; import { prefersReducedMotion } from "../utils/misc"; import { convertRemToPixels } from "../utils/numbers"; -import { splitIntoCharacters } from "../utils/strings"; +import { splitIntoCharacters, getWordDirection } from "../utils/strings"; import { safeNumber } from "@monkeytype/util/numbers"; import { subscribe } from "../observables/config-event"; @@ -59,11 +59,18 @@ function getTargetPositionLeft( currentWordNodeList: NodeListOf, fullWidthCaretWidth: number, wordLen: number, - inputLen: number + inputLen: number, + currentWord?: string ): number { const invisibleExtraLetters = Config.blindMode || Config.hideExtraLetters; let result = 0; + // use word-specific direction if available and different from language direction + const isWordRightToLeft = getWordDirection( + currentWord, + isLanguageRightToLeft + ); + if (Config.tapeMode === "off") { let positionOffsetToWord = 0; @@ -71,7 +78,7 @@ function getTargetPositionLeft( const lastWordLetter = currentWordNodeList[wordLen - 1]; const lastInputLetter = currentWordNodeList[inputLen - 1]; - if (isLanguageRightToLeft) { + if (isWordRightToLeft) { if (inputLen <= wordLen && currentLetter) { // at word beginning in zen mode both lengths are 0, but currentLetter is defined "_" positionOffsetToWord = @@ -104,13 +111,13 @@ function getTargetPositionLeft( $(document.querySelector("#wordsWrapper") as HTMLElement).width() ?? 0; const tapeMargin = wordsWrapperWidth * - (isLanguageRightToLeft + (isWordRightToLeft ? 1 - Config.tapeMargin / 100 : Config.tapeMargin / 100); result = tapeMargin - - (fullWidthCaret && isLanguageRightToLeft ? fullWidthCaretWidth : 0); + (fullWidthCaret && isWordRightToLeft ? fullWidthCaretWidth : 0); if (Config.tapeMode === "word" && inputLen > 0) { let currentWordWidth = 0; @@ -125,7 +132,7 @@ function getTargetPositionLeft( // if current letter has zero width move the caret to previous positive width letter if ($(currentWordNodeList[inputLen] as Element).outerWidth(true) === 0) currentWordWidth -= lastPositiveLetterWidth; - if (isLanguageRightToLeft) currentWordWidth *= -1; + if (isWordRightToLeft) currentWordWidth *= -1; result += currentWordWidth; } } @@ -211,6 +218,12 @@ export async function updatePosition(noAnim = false): Promise { currentWordNodeList ); + // in zen mode, use the input content to determine word direction + const currentWordForDirection = + Config.mode === "zen" + ? TestInput.input.current + : TestWords.words.getCurrent(); + const letterPosLeft = getTargetPositionLeft( fullWidthCaret, isLanguageRightToLeft, @@ -218,7 +231,8 @@ export async function updatePosition(noAnim = false): Promise { currentWordNodeList, letterWidth, wordLen, - inputLen + inputLen, + currentWordForDirection ); const newLeft = letterPosLeft - (fullWidthCaret ? 0 : caretWidth / 2); diff --git a/frontend/src/ts/test/pace-caret.ts b/frontend/src/ts/test/pace-caret.ts index a57ca1867..7b01633b2 100644 --- a/frontend/src/ts/test/pace-caret.ts +++ b/frontend/src/ts/test/pace-caret.ts @@ -9,6 +9,7 @@ import * as TestState from "./test-state"; import * as ConfigEvent from "../observables/config-event"; import { convertRemToPixels } from "../utils/numbers"; import { getActiveFunboxes } from "./funbox/list"; +import { getWordDirection } from "../utils/strings"; type Settings = { wpm: number; @@ -53,12 +54,19 @@ async function resetCaretPosition(): Promise { const currentLanguage = await JSONData.getCurrentLanguage(Config.language); const isLanguageRightToLeft = currentLanguage.rightToLeft; + const currentWord = TestWords.words.get(settings?.currentWordIndex ?? 0); + + const isWordRightToLeft = getWordDirection( + currentWord, + isLanguageRightToLeft ?? false + ); + caret.stop(true, true).animate( { top: firstLetter.offsetTop - firstLetterHeight / 4, left: firstLetter.offsetLeft + - (isLanguageRightToLeft ? firstLetter.offsetWidth : 0), + (isWordRightToLeft ? firstLetter.offsetWidth : 0), }, 0, "linear" @@ -231,6 +239,12 @@ export async function update(expectedStepEnd: number): Promise { ); const isLanguageRightToLeft = currentLanguage.rightToLeft; + const currentWord = TestWords.words.get(settings.currentWordIndex); + + const isWordRightToLeft = getWordDirection( + currentWord, + isLanguageRightToLeft ?? false + ); newTop = word.offsetTop + currentLetter.offsetTop - @@ -240,13 +254,13 @@ export async function update(expectedStepEnd: number): Promise { word.offsetLeft + currentLetter.offsetLeft - caretWidth / 2 + - (isLanguageRightToLeft ? currentLetterWidth : 0); + (isWordRightToLeft ? currentLetterWidth : 0); } else { newLeft = word.offsetLeft + currentLetter.offsetLeft - caretWidth / 2 + - (isLanguageRightToLeft ? 0 : currentLetterWidth); + (isWordRightToLeft ? 0 : currentLetterWidth); } caret.removeClass("hidden"); } catch (e) { diff --git a/frontend/src/ts/test/test-logic.ts b/frontend/src/ts/test/test-logic.ts index d74a77222..d0735c6ae 100644 --- a/frontend/src/ts/test/test-logic.ts +++ b/frontend/src/ts/test/test-logic.ts @@ -162,6 +162,8 @@ export function restart(options = {} as RestartOptions): void { }; options = { ...defaultOptions, ...options }; + Strings.clearWordDirectionCache(); + const animationTime = options.noAnim ? 0 : Misc.applyReducedMotion(125); const noQuit = isFunboxActive("no_quit"); diff --git a/frontend/src/ts/utils/strings.ts b/frontend/src/ts/utils/strings.ts index dc72f3712..669b2503e 100644 --- a/frontend/src/ts/utils/strings.ts +++ b/frontend/src/ts/utils/strings.ts @@ -184,3 +184,59 @@ export function replaceControlCharacters(textToClear: string): string { return textToClear; } + +/** + * Detect if a word contains RTL (Right-to-Left) characters. + * This is for test scenarios where individual words may have different directions. + * Uses a simple regex pattern that covers all common RTL scripts. + * @param word the word to check for RTL characters + * @returns true if the word contains RTL characters, false otherwise + */ +function hasRTLCharacters(word: string): boolean { + if (!word || word.length === 0) { + return false; + } + + // This covers Arabic, Farsi, Urdu, and other RTL scripts + const rtlPattern = + /[\u0590-\u05FF\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/; + + return rtlPattern.test(word); +} + +/** + * Cache for word direction to avoid repeated calculations per word + * Keyed by the stripped core of the word; can be manually cleared when needed + */ +let wordDirectionCache: Map = new Map(); + +export function clearWordDirectionCache(): void { + wordDirectionCache.clear(); +} + +export function getWordDirection( + word: string | undefined, + languageRTL: boolean +): boolean { + if (word === undefined || word.length === 0) return languageRTL; + + // Strip leading/trailing punctuation and whitespace so attached opposite-direction + // punctuation like "word؟" or "،word" doesn't flip the direction detection + // and if only punctuation/symbols/whitespace, use main language direction + const core = word.replace(/^[\p{P}\p{S}\s]+|[\p{P}\p{S}\s]+$/gu, ""); + if (core.length === 0) return languageRTL; + + // cache by core to handle variants like "word" vs "word؟" + const cached = wordDirectionCache.get(core); + if (cached !== undefined) return cached; + + const result = hasRTLCharacters(core); + wordDirectionCache.set(core, result); + + return result; +} + +// Export testing utilities for unit tests +export const __testing = { + hasRTLCharacters, +};