impr(caret): handle mixed language direction (@byseif21) (#6695)

### Description
enhances the caret positioning logic to support mixed language
directions (LTR and RTL) within words. It introduces a new
hasRTLCharacters utility function to detect RTL characters in individual
words, allowing the caret to adjust dynamically based on word-specific
direction rather than relying solely on the language's default direction

#### notes:
* tested no affect to the normal single direction.
* no tap mode handle included 
 * related #6694 #6666

---------

Co-authored-by: Jack <jack@monkeytype.com>
This commit is contained in:
Seif Soliman 2025-09-04 13:36:59 +03:00 committed by GitHub
parent 64473e4e69
commit 01d8363e19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 332 additions and 11 deletions

View file

@ -1,4 +1,4 @@
import { describe, it, expect } from "vitest";
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
import * as Strings from "../../src/ts/utils/strings";
describe("string utils", () => {
@ -66,4 +66,239 @@ describe("string utils", () => {
}
);
});
describe("hasRTLCharacters", () => {
it.each([
// LTR characters should return false
[false, "hello", "basic Latin text"],
[false, "world123", "Latin text with numbers"],
[false, "test!", "Latin text with punctuation"],
[false, "ABC", "uppercase Latin text"],
[false, "", "empty string"],
[false, "123", "numbers only"],
[false, "!@#$%", "punctuation and symbols only"],
[false, " ", "whitespace only"],
// Common LTR scripts
[false, "Здравствуй", "Cyrillic text"],
[false, "Bonjour", "Latin with accents"],
[false, "Καλημέρα", "Greek text"],
[false, "こんにちは", "Japanese Hiragana"],
[false, "你好", "Chinese characters"],
[false, "안녕하세요", "Korean text"],
// RTL characters should return true - Arabic
[true, "مرحبا", "Arabic text"],
[true, "السلام", "Arabic phrase"],
[true, "العربية", "Arabic word"],
[true, "٠١٢٣٤٥٦٧٨٩", "Arabic-Indic digits"],
// RTL characters should return true - Hebrew
[true, "שלום", "Hebrew text"],
[true, "עברית", "Hebrew word"],
[true, "ברוך", "Hebrew name"],
// RTL characters should return true - Persian/Farsi
[true, "سلام", "Persian text"],
[true, "فارسی", "Persian word"],
// Mixed content (should return true if ANY RTL characters are present)
[true, "hello مرحبا", "mixed LTR and Arabic"],
[true, "123 שלום", "numbers and Hebrew"],
[true, "test سلام!", "Latin, Persian, and punctuation"],
[true, "مرحبا123", "Arabic with numbers"],
[true, "hello؟", "Latin with Arabic punctuation"],
// Edge cases with various Unicode ranges
[false, "𝕳𝖊𝖑𝖑𝖔", "mathematical bold text (LTR)"],
[false, "🌍🌎🌏", "emoji"],
] as const)(
"should return %s for word '%s' (%s)",
(expected: boolean, word: string, _description: string) => {
expect(Strings.__testing.hasRTLCharacters(word)).toBe(expected);
}
);
});
describe("getWordDirection", () => {
beforeEach(() => {
Strings.clearWordDirectionCache();
});
it.each([
// Basic functionality - should use hasRTLCharacters result when word has core content
[false, "hello", false, "LTR word in LTR language"],
[
false,
"hello",
true,
"LTR word in RTL language (word direction overrides language)",
],
[
true,
"مرحبا",
false,
"RTL word in LTR language (word direction overrides language)",
],
[true, "مرحبا", true, "RTL word in RTL language"],
// Punctuation stripping behavior
[false, "hello!", false, "LTR word with trailing punctuation"],
[false, "!hello", false, "LTR word with leading punctuation"],
[false, "!hello!", false, "LTR word with surrounding punctuation"],
[true, "مرحبا؟", false, "RTL word with trailing punctuation"],
[true, "؟مرحبا", false, "RTL word with leading punctuation"],
[true, "؟مرحبا؟", false, "RTL word with surrounding punctuation"],
// Fallback to language direction for empty/neutral content
[false, "", false, "empty string falls back to LTR language"],
[true, "", true, "empty string falls back to RTL language"],
[false, "!!!", false, "punctuation only falls back to LTR language"],
[true, "!!!", true, "punctuation only falls back to RTL language"],
[false, " ", false, "whitespace only falls back to LTR language"],
[true, " ", true, "whitespace only falls back to RTL language"],
// Numbers behavior (numbers are neutral, follow hasRTLCharacters detection)
[false, "123", false, "regular digits are not RTL"],
[false, "123", true, "regular digits are not RTL regardless of language"],
[true, "١٢٣", false, "Arabic-Indic digits are detected as RTL"],
[true, "١٢٣", true, "Arabic-Indic digits are detected as RTL"],
] as const)(
"should return %s for word '%s' with languageRTL=%s (%s)",
(
expected: boolean,
word: string,
languageRTL: boolean,
_description: string
) => {
expect(Strings.getWordDirection(word, languageRTL)).toBe(expected);
}
);
it("should return languageRTL for undefined word", () => {
expect(Strings.getWordDirection(undefined, false)).toBe(false);
expect(Strings.getWordDirection(undefined, true)).toBe(true);
});
describe("caching", () => {
let mapGetSpy: ReturnType<typeof vi.spyOn>;
let mapSetSpy: ReturnType<typeof vi.spyOn>;
let mapClearSpy: ReturnType<typeof vi.spyOn>;
beforeEach(() => {
mapGetSpy = vi.spyOn(Map.prototype, "get");
mapSetSpy = vi.spyOn(Map.prototype, "set");
mapClearSpy = vi.spyOn(Map.prototype, "clear");
});
afterEach(() => {
mapGetSpy.mockRestore();
mapSetSpy.mockRestore();
mapClearSpy.mockRestore();
});
it("should use cache for repeated calls", () => {
// First call should cache the result (cache miss)
const result1 = Strings.getWordDirection("hello", false);
expect(result1).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("hello", false);
// Reset spies to check second call
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Second call should use cache (cache hit)
const result2 = Strings.getWordDirection("hello", false);
expect(result2).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
// Cache should work regardless of language direction for same word
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result3 = Strings.getWordDirection("hello", true);
expect(result3).toBe(false); // Still false because "hello" is LTR regardless of language
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled(); // Should not set again
});
it("should cache based on core word without punctuation", () => {
// First call should cache the result for core "hello"
const result1 = Strings.getWordDirection("hello", false);
expect(result1).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("hello", false);
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// These should all use the same cache entry since they have the same core
const result2 = Strings.getWordDirection("hello!", false);
expect(result2).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result3 = Strings.getWordDirection("!hello", false);
expect(result3).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
const result4 = Strings.getWordDirection("!hello!", false);
expect(result4).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("hello");
expect(mapSetSpy).not.toHaveBeenCalled();
});
it("should handle cache clearing", () => {
// Cache a result
Strings.getWordDirection("test", false);
expect(mapSetSpy).toHaveBeenCalledWith("test", false);
// Clear cache
Strings.clearWordDirectionCache();
expect(mapClearSpy).toHaveBeenCalled();
mapGetSpy.mockClear();
mapSetSpy.mockClear();
mapClearSpy.mockClear();
// Should work normally after cache clear (cache miss again)
const result = Strings.getWordDirection("test", false);
expect(result).toBe(false);
expect(mapSetSpy).toHaveBeenCalledWith("test", false);
});
it("should demonstrate cache miss vs cache hit behavior", () => {
// Test cache miss - first time seeing this word
const result1 = Strings.getWordDirection("unique", false);
expect(result1).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("unique");
expect(mapSetSpy).toHaveBeenCalledWith("unique", false);
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Test cache hit - same word again
const result2 = Strings.getWordDirection("unique", false);
expect(result2).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("unique");
expect(mapSetSpy).not.toHaveBeenCalled(); // No cache set on hit
mapGetSpy.mockClear();
mapSetSpy.mockClear();
// Test cache miss - different word
const result3 = Strings.getWordDirection("different", false);
expect(result3).toBe(false);
expect(mapGetSpy).toHaveBeenCalledWith("different");
expect(mapSetSpy).toHaveBeenCalledWith("different", false);
});
});
});
});

View file

@ -6,7 +6,7 @@ import * as TestState from "../test/test-state";
import * as TestWords from "./test-words";
import { prefersReducedMotion } from "../utils/misc";
import { convertRemToPixels } from "../utils/numbers";
import { splitIntoCharacters } from "../utils/strings";
import { splitIntoCharacters, getWordDirection } from "../utils/strings";
import { safeNumber } from "@monkeytype/util/numbers";
import { subscribe } from "../observables/config-event";
@ -59,11 +59,18 @@ function getTargetPositionLeft(
currentWordNodeList: NodeListOf<HTMLElement>,
fullWidthCaretWidth: number,
wordLen: number,
inputLen: number
inputLen: number,
currentWord?: string
): number {
const invisibleExtraLetters = Config.blindMode || Config.hideExtraLetters;
let result = 0;
// use word-specific direction if available and different from language direction
const isWordRightToLeft = getWordDirection(
currentWord,
isLanguageRightToLeft
);
if (Config.tapeMode === "off") {
let positionOffsetToWord = 0;
@ -71,7 +78,7 @@ function getTargetPositionLeft(
const lastWordLetter = currentWordNodeList[wordLen - 1];
const lastInputLetter = currentWordNodeList[inputLen - 1];
if (isLanguageRightToLeft) {
if (isWordRightToLeft) {
if (inputLen <= wordLen && currentLetter) {
// at word beginning in zen mode both lengths are 0, but currentLetter is defined "_"
positionOffsetToWord =
@ -104,13 +111,13 @@ function getTargetPositionLeft(
$(document.querySelector("#wordsWrapper") as HTMLElement).width() ?? 0;
const tapeMargin =
wordsWrapperWidth *
(isLanguageRightToLeft
(isWordRightToLeft
? 1 - Config.tapeMargin / 100
: Config.tapeMargin / 100);
result =
tapeMargin -
(fullWidthCaret && isLanguageRightToLeft ? fullWidthCaretWidth : 0);
(fullWidthCaret && isWordRightToLeft ? fullWidthCaretWidth : 0);
if (Config.tapeMode === "word" && inputLen > 0) {
let currentWordWidth = 0;
@ -125,7 +132,7 @@ function getTargetPositionLeft(
// if current letter has zero width move the caret to previous positive width letter
if ($(currentWordNodeList[inputLen] as Element).outerWidth(true) === 0)
currentWordWidth -= lastPositiveLetterWidth;
if (isLanguageRightToLeft) currentWordWidth *= -1;
if (isWordRightToLeft) currentWordWidth *= -1;
result += currentWordWidth;
}
}
@ -211,6 +218,12 @@ export async function updatePosition(noAnim = false): Promise<void> {
currentWordNodeList
);
// in zen mode, use the input content to determine word direction
const currentWordForDirection =
Config.mode === "zen"
? TestInput.input.current
: TestWords.words.getCurrent();
const letterPosLeft = getTargetPositionLeft(
fullWidthCaret,
isLanguageRightToLeft,
@ -218,7 +231,8 @@ export async function updatePosition(noAnim = false): Promise<void> {
currentWordNodeList,
letterWidth,
wordLen,
inputLen
inputLen,
currentWordForDirection
);
const newLeft = letterPosLeft - (fullWidthCaret ? 0 : caretWidth / 2);

View file

@ -9,6 +9,7 @@ import * as TestState from "./test-state";
import * as ConfigEvent from "../observables/config-event";
import { convertRemToPixels } from "../utils/numbers";
import { getActiveFunboxes } from "./funbox/list";
import { getWordDirection } from "../utils/strings";
type Settings = {
wpm: number;
@ -53,12 +54,19 @@ async function resetCaretPosition(): Promise<void> {
const currentLanguage = await JSONData.getCurrentLanguage(Config.language);
const isLanguageRightToLeft = currentLanguage.rightToLeft;
const currentWord = TestWords.words.get(settings?.currentWordIndex ?? 0);
const isWordRightToLeft = getWordDirection(
currentWord,
isLanguageRightToLeft ?? false
);
caret.stop(true, true).animate(
{
top: firstLetter.offsetTop - firstLetterHeight / 4,
left:
firstLetter.offsetLeft +
(isLanguageRightToLeft ? firstLetter.offsetWidth : 0),
(isWordRightToLeft ? firstLetter.offsetWidth : 0),
},
0,
"linear"
@ -231,6 +239,12 @@ export async function update(expectedStepEnd: number): Promise<void> {
);
const isLanguageRightToLeft = currentLanguage.rightToLeft;
const currentWord = TestWords.words.get(settings.currentWordIndex);
const isWordRightToLeft = getWordDirection(
currentWord,
isLanguageRightToLeft ?? false
);
newTop =
word.offsetTop +
currentLetter.offsetTop -
@ -240,13 +254,13 @@ export async function update(expectedStepEnd: number): Promise<void> {
word.offsetLeft +
currentLetter.offsetLeft -
caretWidth / 2 +
(isLanguageRightToLeft ? currentLetterWidth : 0);
(isWordRightToLeft ? currentLetterWidth : 0);
} else {
newLeft =
word.offsetLeft +
currentLetter.offsetLeft -
caretWidth / 2 +
(isLanguageRightToLeft ? 0 : currentLetterWidth);
(isWordRightToLeft ? 0 : currentLetterWidth);
}
caret.removeClass("hidden");
} catch (e) {

View file

@ -162,6 +162,8 @@ export function restart(options = {} as RestartOptions): void {
};
options = { ...defaultOptions, ...options };
Strings.clearWordDirectionCache();
const animationTime = options.noAnim ? 0 : Misc.applyReducedMotion(125);
const noQuit = isFunboxActive("no_quit");

View file

@ -184,3 +184,59 @@ export function replaceControlCharacters(textToClear: string): string {
return textToClear;
}
/**
* Detect if a word contains RTL (Right-to-Left) characters.
* This is for test scenarios where individual words may have different directions.
* Uses a simple regex pattern that covers all common RTL scripts.
* @param word the word to check for RTL characters
* @returns true if the word contains RTL characters, false otherwise
*/
function hasRTLCharacters(word: string): boolean {
if (!word || word.length === 0) {
return false;
}
// This covers Arabic, Farsi, Urdu, and other RTL scripts
const rtlPattern =
/[\u0590-\u05FF\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]/;
return rtlPattern.test(word);
}
/**
* Cache for word direction to avoid repeated calculations per word
* Keyed by the stripped core of the word; can be manually cleared when needed
*/
let wordDirectionCache: Map<string, boolean> = new Map();
export function clearWordDirectionCache(): void {
wordDirectionCache.clear();
}
export function getWordDirection(
word: string | undefined,
languageRTL: boolean
): boolean {
if (word === undefined || word.length === 0) return languageRTL;
// Strip leading/trailing punctuation and whitespace so attached opposite-direction
// punctuation like "word؟" or "،word" doesn't flip the direction detection
// and if only punctuation/symbols/whitespace, use main language direction
const core = word.replace(/^[\p{P}\p{S}\s]+|[\p{P}\p{S}\s]+$/gu, "");
if (core.length === 0) return languageRTL;
// cache by core to handle variants like "word" vs "word؟"
const cached = wordDirectionCache.get(core);
if (cached !== undefined) return cached;
const result = hasRTLCharacters(core);
wordDirectionCache.set(core, result);
return result;
}
// Export testing utilities for unit tests
export const __testing = {
hasRTLCharacters,
};