fix: non-breaking space behaving like a character

This commit is contained in:
Miodec 2025-11-23 18:16:24 +01:00
parent 595ff27766
commit f86f253561
2 changed files with 23 additions and 18 deletions

View file

@ -474,14 +474,14 @@ describe("string utils", () => {
["\u2003", 0x2003, "em space", true],
["\u2009", 0x2009, "thin space", true],
[" ", 0x3000, "ideographic space", true],
["\u00A0", 0x00a0, "non-breaking space", true],
["\u2007", 0x2007, "figure space", true],
["\u2008", 0x2008, "punctuation space", true],
["\u200A", 0x200a, "hair space", true],
["", 0x200b, "zero-width space", true],
// Should return false for other characters
["\t", 0x0009, "tab", false],
["\u00A0", 0x00a0, "non-breaking space", false],
["\u2007", 0x2007, "figure space", false],
["\u2008", 0x2008, "punctuation space", false],
["\u200A", 0x200a, "hair space", false],
["", 0x200b, "zero-width space", false],
["a", 0x0061, "letter a", false],
["A", 0x0041, "letter A", false],
["1", 0x0031, "digit 1", false],

View file

@ -336,19 +336,24 @@ export function isSpace(char: string): boolean {
const codePoint = char.codePointAt(0);
if (codePoint === undefined) return false;
// Directly typable spaces:
// U+0020 - Regular space (spacebar)
// U+2002 - En space (Option+Space on Mac)
// U+2003 - Em space (Option+Shift+Space on Mac)
// U+2009 - Thin space (various input methods)
// U+3000 - Ideographic space (CJK input methods)
return (
codePoint === 0x0020 ||
codePoint === 0x2002 ||
codePoint === 0x2003 ||
codePoint === 0x2009 ||
codePoint === 0x3000
);
const spaces = new Set([
0x0020, // Regular space (spacebar)
0x2002, // En space (Option+Space on Mac)
0x2003, // Em space (Option+Shift+Space on Mac)
0x2009, // Thin space (various input methods)
0x3000, // Ideographic space (CJK input methods)
0x00a0, // Non-breaking space (Alt+0160 on Windows, Option+Space on Mac)
0x1680, // Ogham space mark (rare, but included for completeness)
0x202f, // Narrow no-break space (various input methods)
0xfeff, // Zero width no-break space (various input methods)
0x2007, // Figure space (various input methods)
0x2008, // Punctuation space (various input methods)
0x2004, // Three-per-em space (various input methods)
0x200a, // Hair space (various input methods)
0x200b, // Zero width space (various input methods)
]);
return spaces.has(codePoint);
}
// Export testing utilities for unit tests