From 35d8264c2b92f9aa8bac74c89ecf08620527d254 Mon Sep 17 00:00:00 2001 From: lionel-rowe Date: Tue, 14 May 2024 18:34:41 +0800 Subject: [PATCH] fix(text-to-binary): return valid UTF-8 results for non-ASCII text --- locales/en.yml | 4 +- .../text-to-binary/text-to-binary.e2e.spec.ts | 8 ++- .../text-to-binary.models.test.ts | 54 ++++++++++----- .../text-to-binary/text-to-binary.models.ts | 23 +++---- src/tools/text-to-binary/text-to-binary.vue | 66 ++++++++++++++----- 5 files changed, 105 insertions(+), 50 deletions(-) diff --git a/locales/en.yml b/locales/en.yml index d09d435a..44ed351e 100644 --- a/locales/en.yml +++ b/locales/en.yml @@ -389,5 +389,5 @@ tools: description: Encode text to URL-encoded format (also known as "percent-encoded"), or decode from it. text-to-binary: - title: Text to ASCII binary - description: Convert text to its ASCII binary representation and vice-versa. + title: Text to UTF-8 binary + description: Convert text to its UTF-8 binary representation and vice-versa. diff --git a/src/tools/text-to-binary/text-to-binary.e2e.spec.ts b/src/tools/text-to-binary/text-to-binary.e2e.spec.ts index 2b4e4313..edeb634e 100644 --- a/src/tools/text-to-binary/text-to-binary.e2e.spec.ts +++ b/src/tools/text-to-binary/text-to-binary.e2e.spec.ts @@ -1,12 +1,12 @@ import { expect, test } from '@playwright/test'; -test.describe('Tool - Text to ASCII binary', () => { +test.describe('Tool - Text to UTF-8 binary', () => { test.beforeEach(async ({ page }) => { await page.goto('/text-to-binary'); }); test('Has correct title', async ({ page }) => { - await expect(page).toHaveTitle('Text to ASCII binary - IT Tools'); + await expect(page).toHaveTitle('Text to UTF-8 binary - IT Tools'); }); test('Text to binary conversion', async ({ page }) => { @@ -17,7 +17,9 @@ test.describe('Tool - Text to ASCII binary', () => { }); test('Binary to text conversion', async ({ page }) => { - await page.getByTestId('binary-to-text-input').fill('01101001 01110100 00101101 01110100 01101111 01101111 01101100 01110011'); + await page + .getByTestId('binary-to-text-input') + .fill('01101001 01110100 00101101 01110100 01101111 01101111 01101100 01110011'); const text = await page.getByTestId('binary-to-text-output').inputValue(); expect(text).toEqual('it-tools'); diff --git a/src/tools/text-to-binary/text-to-binary.models.test.ts b/src/tools/text-to-binary/text-to-binary.models.test.ts index e4269b50..41ca64b0 100644 --- a/src/tools/text-to-binary/text-to-binary.models.test.ts +++ b/src/tools/text-to-binary/text-to-binary.models.test.ts @@ -1,32 +1,56 @@ import { describe, expect, it } from 'vitest'; -import { convertAsciiBinaryToText, convertTextToAsciiBinary } from './text-to-binary.models'; +import { convertTextToUtf8Binary, convertUtf8BinaryToText } from './text-to-binary.models'; describe('text-to-binary', () => { - describe('convertTextToAsciiBinary', () => { - it('a text string is converted to its ascii binary representation', () => { - expect(convertTextToAsciiBinary('A')).toBe('01000001'); - expect(convertTextToAsciiBinary('hello')).toBe('01101000 01100101 01101100 01101100 01101111'); - expect(convertTextToAsciiBinary('')).toBe(''); + const utf8Tests = [ + { text: '文字', binary: '11100110 10010110 10000111 11100101 10101101 10010111' }, + { text: '💩', binary: '11110000 10011111 10010010 10101001' }, + ]; + + describe('convertTextToUtf8Binary', () => { + it('a text string is converted to its UTF-8 binary representation', () => { + expect(convertTextToUtf8Binary('A')).toBe('01000001'); + expect(convertTextToUtf8Binary('hello')).toBe('01101000 01100101 01101100 01101100 01101111'); + expect(convertTextToUtf8Binary('')).toBe(''); }); it('the separator between octets can be changed', () => { - expect(convertTextToAsciiBinary('hello', { separator: '' })).toBe('0110100001100101011011000110110001101111'); + expect(convertTextToUtf8Binary('hello', { separator: '' })).toBe('0110100001100101011011000110110001101111'); + expect(convertTextToUtf8Binary('hello', { separator: '-' })).toBe('01101000-01100101-01101100-01101100-01101111'); + }); + it('works with non-ASCII input', () => { + for (const { text, binary } of utf8Tests) { + const converted = convertTextToUtf8Binary(text); + expect(converted).toBe(binary); + } }); }); - describe('convertAsciiBinaryToText', () => { + describe('convertUtf8BinaryToText', () => { it('an ascii binary string is converted to its text representation', () => { - expect(convertAsciiBinaryToText('01101000 01100101 01101100 01101100 01101111')).toBe('hello'); - expect(convertAsciiBinaryToText('01000001')).toBe('A'); - expect(convertTextToAsciiBinary('')).toBe(''); + expect(convertUtf8BinaryToText('01101000 01100101 01101100 01101100 01101111')).toBe('hello'); + expect(convertUtf8BinaryToText('01000001')).toBe('A'); + expect(convertTextToUtf8Binary('')).toBe(''); }); it('the given binary string is cleaned before conversion', () => { - expect(convertAsciiBinaryToText(' 01000 001garbage')).toBe('A'); + expect(convertUtf8BinaryToText(' 01000 001garbage')).toBe('A'); }); - it('throws an error if the given binary string as no complete octet', () => { - expect(() => convertAsciiBinaryToText('010000011')).toThrow('Invalid binary string'); - expect(() => convertAsciiBinaryToText('1')).toThrow('Invalid binary string'); + it('throws an error if the given binary string is not an integer number of complete octets', () => { + expect(() => convertUtf8BinaryToText('010000011')).toThrow('Invalid binary string'); + expect(() => convertUtf8BinaryToText('010000011 010000011')).toThrow('Invalid binary string'); + expect(() => convertUtf8BinaryToText('1')).toThrow('Invalid binary string'); + }); + + it('throws an error if the given binary string is not valid UTF-8', () => { + expect(() => convertUtf8BinaryToText('11111111')).toThrow(); + }); + + it('works with non-ASCII input', () => { + for (const { text, binary } of utf8Tests) { + const reverted = convertUtf8BinaryToText(binary); + expect(reverted).toBe(text); + } }); }); }); diff --git a/src/tools/text-to-binary/text-to-binary.models.ts b/src/tools/text-to-binary/text-to-binary.models.ts index ad9699af..de3feddf 100644 --- a/src/tools/text-to-binary/text-to-binary.models.ts +++ b/src/tools/text-to-binary/text-to-binary.models.ts @@ -1,22 +1,19 @@ -export { convertTextToAsciiBinary, convertAsciiBinaryToText }; +export { convertTextToUtf8Binary, convertUtf8BinaryToText }; -function convertTextToAsciiBinary(text: string, { separator = ' ' }: { separator?: string } = {}): string { - return text - .split('') - .map(char => char.charCodeAt(0).toString(2).padStart(8, '0')) - .join(separator); +function convertTextToUtf8Binary(text: string, { separator = ' ' }: { separator?: string } = {}): string { + return [...new TextEncoder().encode(text)].map(x => x.toString(2).padStart(8, '0')).join(separator); } -function convertAsciiBinaryToText(binary: string): string { - const cleanBinary = binary.replace(/[^01]/g, ''); +function convertUtf8BinaryToText(binary: string): string { + const cleanBinary = binary.replace(/[^01]+/g, ''); if (cleanBinary.length % 8) { throw new Error('Invalid binary string'); } - return cleanBinary - .split(/(\d{8})/) - .filter(Boolean) - .map(binary => String.fromCharCode(Number.parseInt(binary, 2))) - .join(''); + return new TextDecoder(undefined, { fatal: true }).decode( + Uint8Array.from({ length: cleanBinary.length / 8 }, (_, i) => + Number.parseInt(cleanBinary.slice(i * 8, (i + 1) * 8), 2), + ), + ); } diff --git a/src/tools/text-to-binary/text-to-binary.vue b/src/tools/text-to-binary/text-to-binary.vue index 37aa9bea..359f7545 100644 --- a/src/tools/text-to-binary/text-to-binary.vue +++ b/src/tools/text-to-binary/text-to-binary.vue @@ -1,42 +1,74 @@