mirror of
https://github.com/monkeytypegame/monkeytype.git
synced 2025-10-06 05:26:54 +08:00
fix: handling of characters outside the BMP (@fehmer) (#5911)
Handle multi-byte characters outside the [basic multilingual plane](https://en.wikipedia.org/wiki/Plane_(Unicode)) correctly. Fixes #5906
This commit is contained in:
parent
6bf1cb8672
commit
f9409e3fcf
4 changed files with 54 additions and 21 deletions
12
frontend/__tests__/utils/strings.spec.ts
Normal file
12
frontend/__tests__/utils/strings.spec.ts
Normal file
|
@ -0,0 +1,12 @@
|
|||
import * as Strings from "../../src/ts/utils/strings";
|
||||
|
||||
describe("string utils", () => {
|
||||
describe("splitIntoCharacters", () => {
|
||||
it("splits regular characters", () => {
|
||||
expect(Strings.splitIntoCharacters("abc")).toEqual(["a", "b", "c"]);
|
||||
});
|
||||
it("splits characters outside of the bmp", () => {
|
||||
expect(Strings.splitIntoCharacters("t𐑩e")).toEqual(["t", "𐑩", "e"]);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -6,6 +6,7 @@ import * as TestState from "../test/test-state";
|
|||
import * as TestWords from "./test-words";
|
||||
import { prefersReducedMotion } from "../utils/misc";
|
||||
import { convertRemToPixels } from "../utils/numbers";
|
||||
import { splitIntoCharacters } from "../utils/strings";
|
||||
|
||||
export let caretAnimating = true;
|
||||
const caret = document.querySelector("#caret") as HTMLElement;
|
||||
|
@ -133,8 +134,8 @@ export async function updatePosition(noAnim = false): Promise<void> {
|
|||
Config.caretStyle
|
||||
);
|
||||
|
||||
let wordLen = TestWords.words.getCurrent().length;
|
||||
const inputLen = TestInput.input.current.length;
|
||||
let wordLen = splitIntoCharacters(TestWords.words.getCurrent()).length;
|
||||
const inputLen = splitIntoCharacters(TestInput.input.current).length;
|
||||
if (Config.mode === "zen") wordLen = inputLen;
|
||||
const activeWordEl = document?.querySelector("#words .active") as HTMLElement;
|
||||
//insert temporary character so the caret will work in zen mode
|
||||
|
|
|
@ -41,13 +41,14 @@ function createHintsHtml(
|
|||
activeWordLetters: NodeListOf<Element>,
|
||||
inputWord: string
|
||||
): string {
|
||||
const inputChars = Strings.splitIntoCharacters(inputWord);
|
||||
let hintsHtml = "";
|
||||
for (const adjacentLetters of incorrectLtrIndices) {
|
||||
for (const indx of adjacentLetters) {
|
||||
const blockLeft = (activeWordLetters[indx] as HTMLElement).offsetLeft;
|
||||
const blockWidth = (activeWordLetters[indx] as HTMLElement).offsetWidth;
|
||||
const blockIndices = `[${indx}]`;
|
||||
const blockChars = inputWord[indx];
|
||||
const blockChars = inputChars[indx];
|
||||
|
||||
hintsHtml +=
|
||||
`<hint data-length=1 data-chars-index=${blockIndices}` +
|
||||
|
@ -332,16 +333,17 @@ function getWordHTML(word: string): string {
|
|||
const funbox = FunboxList.get(Config.funbox).find(
|
||||
(f) => f.functions?.getWordHtml
|
||||
);
|
||||
for (let c = 0; c < word.length; c++) {
|
||||
const chars = Strings.splitIntoCharacters(word);
|
||||
for (const char of chars) {
|
||||
if (funbox?.functions?.getWordHtml) {
|
||||
retval += funbox.functions.getWordHtml(word.charAt(c), true);
|
||||
} else if (word.charAt(c) === "\t") {
|
||||
retval += funbox.functions.getWordHtml(char, true);
|
||||
} else if (char === "\t") {
|
||||
retval += `<letter class='tabChar'><i class="fas fa-long-arrow-alt-right fa-fw"></i></letter>`;
|
||||
} else if (word.charAt(c) === "\n") {
|
||||
} else if (char === "\n") {
|
||||
newlineafter = true;
|
||||
retval += `<letter class='nlChar'><i class="fas fa-level-down-alt fa-rotate-90 fa-fw"></i></letter>`;
|
||||
} else {
|
||||
retval += "<letter>" + word.charAt(c) + "</letter>";
|
||||
retval += "<letter>" + char + "</letter>";
|
||||
}
|
||||
}
|
||||
retval += "</div>";
|
||||
|
@ -833,10 +835,12 @@ export async function updateActiveWordLetters(
|
|||
(f) => f.functions?.getWordHtml
|
||||
);
|
||||
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
const charCorrect = currentWord[i] === input[i];
|
||||
const inputChars = Strings.splitIntoCharacters(input);
|
||||
const currentWordChars = Strings.splitIntoCharacters(currentWord);
|
||||
for (let i = 0; i < inputChars.length; i++) {
|
||||
const charCorrect = currentWordChars[i] === inputChars[i];
|
||||
|
||||
let currentLetter = currentWord[i] as string;
|
||||
let currentLetter = currentWordChars[i] as string;
|
||||
let tabChar = "";
|
||||
let nlChar = "";
|
||||
if (funbox?.functions?.getWordHtml) {
|
||||
|
@ -862,13 +866,13 @@ export async function updateActiveWordLetters(
|
|||
) {
|
||||
ret += `<letter class="dead">${
|
||||
Config.indicateTypos === "replace"
|
||||
? input[i] === " "
|
||||
? inputChars[i] === " "
|
||||
? "_"
|
||||
: input[i]
|
||||
: inputChars[i]
|
||||
: currentLetter
|
||||
}</letter>`;
|
||||
} else if (currentLetter === undefined) {
|
||||
let letter = input[i];
|
||||
let letter = inputChars[i];
|
||||
if (letter === " " || letter === "\t" || letter === "\n") {
|
||||
letter = "_";
|
||||
}
|
||||
|
@ -877,9 +881,9 @@ export async function updateActiveWordLetters(
|
|||
ret +=
|
||||
`<letter class="incorrect ${tabChar}${nlChar}">` +
|
||||
(Config.indicateTypos === "replace"
|
||||
? input[i] === " "
|
||||
? inputChars[i] === " "
|
||||
? "_"
|
||||
: input[i]
|
||||
: inputChars[i]
|
||||
: currentLetter) +
|
||||
"</letter>";
|
||||
if (Config.indicateTypos === "below") {
|
||||
|
@ -893,15 +897,16 @@ export async function updateActiveWordLetters(
|
|||
}
|
||||
}
|
||||
|
||||
for (let i = input.length; i < currentWord.length; i++) {
|
||||
for (let i = inputChars.length; i < currentWordChars.length; i++) {
|
||||
const currentLetter = currentWordChars[i];
|
||||
if (funbox?.functions?.getWordHtml) {
|
||||
ret += funbox.functions.getWordHtml(currentWord[i] as string, true);
|
||||
} else if (currentWord[i] === "\t") {
|
||||
ret += funbox.functions.getWordHtml(currentLetter as string, true);
|
||||
} else if (currentLetter === "\t") {
|
||||
ret += `<letter class='tabChar'><i class="fas fa-long-arrow-alt-right fa-fw"></i></letter>`;
|
||||
} else if (currentWord[i] === "\n") {
|
||||
} else if (currentLetter === "\n") {
|
||||
ret += `<letter class='nlChar'><i class="fas fa-level-down-alt fa-rotate-90 fa-fw"></i></letter>`;
|
||||
} else {
|
||||
ret += `<letter>` + currentWord[i] + "</letter>";
|
||||
ret += `<letter>` + currentLetter + "</letter>";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -149,3 +149,18 @@ export function cleanTypographySymbols(textToClean: string): string {
|
|||
(char) => specials[char as keyof typeof specials] || ""
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split a string into characters. This supports multi-byte characters outside of the [Basic Multilinugal Plane](https://en.wikipedia.org/wiki/Plane_(Unicode).
|
||||
* Using `string.length` and `string[index]` does not work.
|
||||
* @param s string to be tokenized into characters
|
||||
* @returns array of characters
|
||||
*/
|
||||
export function splitIntoCharacters(s: string): string[] {
|
||||
const result: string[] = [];
|
||||
for (const t of s) {
|
||||
result.push(t);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue