From 85806f5e0345414ff619bcba143115fec3adcf89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20K=C5=82osko?= Date: Thu, 11 Feb 2021 22:02:07 +0100 Subject: [PATCH] Make the Delta behavior consistent for client and server (#26) --- lib/live_book/delta.ex | 23 ---------- lib/live_book/js_interop.ex | 70 ++++++++++++++++++++++++++++++ lib/live_book/session/data.ex | 4 +- test/live_book/delta_test.exs | 26 ----------- test/live_book/js_interop_test.exs | 46 ++++++++++++++++++++ 5 files changed, 118 insertions(+), 51 deletions(-) create mode 100644 lib/live_book/js_interop.ex create mode 100644 test/live_book/js_interop_test.exs diff --git a/lib/live_book/delta.ex b/lib/live_book/delta.ex index cbaa50ccd..2ed8b8231 100644 --- a/lib/live_book/delta.ex +++ b/lib/live_book/delta.ex @@ -121,29 +121,6 @@ defmodule LiveBook.Delta do end end - @doc """ - Returns the result of applying `delta` to `string`. - """ - @spec apply_to_string(t(), String.t()) :: String.t() - def apply_to_string(delta, string) do - do_apply_to_string(delta.ops, string) - end - - defp do_apply_to_string([], string), do: string - - defp do_apply_to_string([{:retain, n} | ops], string) do - {left, right} = String.split_at(string, n) - left <> do_apply_to_string(ops, right) - end - - defp do_apply_to_string([{:insert, inserted} | ops], string) do - inserted <> do_apply_to_string(ops, string) - end - - defp do_apply_to_string([{:delete, n} | ops], string) do - do_apply_to_string(ops, String.slice(string, n..-1)) - end - @doc """ Converts the given delta to a compact representation, suitable for sending over the network. diff --git a/lib/live_book/js_interop.ex b/lib/live_book/js_interop.ex new file mode 100644 index 000000000..0c33f0311 --- /dev/null +++ b/lib/live_book/js_interop.ex @@ -0,0 +1,70 @@ +defmodule LiveBook.JSInterop do + @moduledoc false + + alias LiveBook.Delta + + @doc """ + Returns the result of applying `delta` to `string`. + + The delta operation lenghts (retain, delete) are treated + such that they match the JavaScript strings behavior. + + JavaScript uses UTF-16 encoding, in which every character is stored + as either one or two 16-bit code units. JS treats the number of units + as string length and this also impacts position-based functions like `String.slice`. + To match this behavior we first convert normal UTF-8 string + into a list of UTF-16 code points, then apply the delta to this list + and finally convert back to a UTF-8 string. + """ + @spec apply_delta_to_string(Delta.t(), String.t()) :: String.t() + def apply_delta_to_string(delta, string) do + code_units = string_to_utf16_code_units(string) + + delta.ops + |> apply_to_code_units(code_units) + |> utf16_code_units_to_string() + end + + defp apply_to_code_units([], code_units), do: code_units + + defp apply_to_code_units([{:retain, n} | ops], code_units) do + {left, right} = Enum.split(code_units, n) + left ++ apply_to_code_units(ops, right) + end + + defp apply_to_code_units([{:insert, inserted} | ops], code_units) do + string_to_utf16_code_units(inserted) ++ apply_to_code_units(ops, code_units) + end + + defp apply_to_code_units([{:delete, n} | ops], code_units) do + apply_to_code_units(ops, Enum.slice(code_units, n..-1)) + end + + # --- + + defp string_to_utf16_code_units(string) do + string + |> :unicode.characters_to_binary(:utf8, :utf16) + |> utf16_binary_to_code_units([]) + |> Enum.reverse() + end + + defp utf16_binary_to_code_units(<<>>, code_units), do: code_units + + defp utf16_binary_to_code_units(<>, code_units) do + utf16_binary_to_code_units(rest, [code_unit | code_units]) + end + + defp utf16_code_units_to_string(code_units) do + code_units + |> Enum.reverse() + |> code_units_to_utf16_binary(<<>>) + |> :unicode.characters_to_binary(:utf16, :utf8) + end + + defp code_units_to_utf16_binary([], utf16_binary), do: utf16_binary + + defp code_units_to_utf16_binary([code_unit | code_units], utf16_binary) do + code_units_to_utf16_binary(code_units, <>) + end +end diff --git a/lib/live_book/session/data.ex b/lib/live_book/session/data.ex index a6e2053bf..23887023d 100644 --- a/lib/live_book/session/data.ex +++ b/lib/live_book/session/data.ex @@ -24,7 +24,7 @@ defmodule LiveBook.Session.Data do :runtime ] - alias LiveBook.{Notebook, Evaluator, Delta, Runtime} + alias LiveBook.{Notebook, Evaluator, Delta, Runtime, JSInterop} alias LiveBook.Notebook.{Cell, Section} @type t :: %__MODULE__{ @@ -464,7 +464,7 @@ defmodule LiveBook.Session.Data do Delta.transform(delta_ahead, transformed_new_delta, :left) end) - new_source = Delta.apply_to_string(transformed_new_delta, cell.source) + new_source = JSInterop.apply_delta_to_string(transformed_new_delta, cell.source) data_actions |> set!(notebook: Notebook.update_cell(data.notebook, cell.id, &%{&1 | source: new_source})) diff --git a/test/live_book/delta_test.exs b/test/live_book/delta_test.exs index bd050faf0..c6aa36a5b 100644 --- a/test/live_book/delta_test.exs +++ b/test/live_book/delta_test.exs @@ -43,30 +43,4 @@ defmodule LiveBook.DeltaTest do assert Delta.append(delta, op) == %Delta{ops: [insert: "cats", delete: 2]} end end - - describe "apply_to_string/2" do - test "prepend" do - string = "cats" - delta = Delta.new() |> Delta.insert("fat ") - assert Delta.apply_to_string(delta, string) == "fat cats" - end - - test "insert in the middle" do - string = "cats" - delta = Delta.new() |> Delta.retain(3) |> Delta.insert("'") - assert Delta.apply_to_string(delta, string) == "cat's" - end - - test "delete" do - string = "cats" - delta = Delta.new() |> Delta.retain(1) |> Delta.delete(2) - assert Delta.apply_to_string(delta, string) == "cs" - end - - test "replace" do - string = "cats" - delta = Delta.new() |> Delta.retain(1) |> Delta.delete(2) |> Delta.insert("ar") - assert Delta.apply_to_string(delta, string) == "cars" - end - end end diff --git a/test/live_book/js_interop_test.exs b/test/live_book/js_interop_test.exs new file mode 100644 index 000000000..d582cea52 --- /dev/null +++ b/test/live_book/js_interop_test.exs @@ -0,0 +1,46 @@ +defmodule LiveBook.JSInteropTest do + use ExUnit.Case, async: true + + alias LiveBook.{JSInterop, Delta} + + describe "apply_delta_to_string/2" do + test "prepend" do + string = "cats" + delta = Delta.new() |> Delta.insert("fat ") + assert JSInterop.apply_delta_to_string(delta, string) == "fat cats" + end + + test "insert in the middle" do + string = "cats" + delta = Delta.new() |> Delta.retain(3) |> Delta.insert("'") + assert JSInterop.apply_delta_to_string(delta, string) == "cat's" + end + + test "delete" do + string = "cats" + delta = Delta.new() |> Delta.retain(1) |> Delta.delete(2) + assert JSInterop.apply_delta_to_string(delta, string) == "cs" + end + + test "replace" do + string = "cats" + delta = Delta.new() |> Delta.retain(1) |> Delta.delete(2) |> Delta.insert("ar") + assert JSInterop.apply_delta_to_string(delta, string) == "cars" + end + + test "retain skips the given number UTF-16 code units" do + # 🚀 consists of 2 UTF-16 code units, so JavaScript assumes "🚀".length is 2 + string = "🚀 cats" + # Skip the emoji (2 code unit) and the space (1 code unit) + delta = Delta.new() |> Delta.retain(3) |> Delta.insert("my ") + assert JSInterop.apply_delta_to_string(delta, string) == "🚀 my cats" + end + + test "delete removes the given number UTF-16 code units" do + # 🚀 consists of 2 UTF-16 code units, so JavaScript assumes "🚀".length is 2 + string = "🚀 cats" + delta = Delta.new() |> Delta.delete(2) + assert JSInterop.apply_delta_to_string(delta, string) == " cats" + end + end +end