mirror of
https://github.com/livebook-dev/livebook.git
synced 2024-09-20 10:05:57 +08:00
Show a message when a smart cell crashes and allow restarts (#1381)
This commit is contained in:
parent
e35d026422
commit
ab9ad0369c
|
@ -409,6 +409,10 @@ defprotocol Livebook.Runtime do
|
|||
|
||||
* `{:runtime_smart_cell_started, ref, %{js_view: js_view(), source: String.t()}}`
|
||||
|
||||
In case of an unexpected failure it should also send
|
||||
|
||||
* `{:runtime_smart_cell_down, ref}`
|
||||
|
||||
## Communication
|
||||
|
||||
Apart from the regular JS view communication, the cell sends updates
|
||||
|
|
|
@ -229,7 +229,8 @@ defmodule Livebook.Runtime.ErlDist.RuntimeServer do
|
|||
{:noreply,
|
||||
state
|
||||
|> handle_down_evaluator(message)
|
||||
|> handle_down_scan_binding(message)}
|
||||
|> handle_down_scan_binding(message)
|
||||
|> handle_down_smart_cell(message)}
|
||||
end
|
||||
|
||||
def handle_info({:evaluation_finished, locator}, state) do
|
||||
|
@ -275,13 +276,25 @@ defmodule Livebook.Runtime.ErlDist.RuntimeServer do
|
|||
end
|
||||
|
||||
defp handle_down_scan_binding(state, {:DOWN, monitor_ref, :process, _, _}) do
|
||||
Enum.find_value(state.smart_cells, fn
|
||||
{ref, %{scan_binding_monitor_ref: ^monitor_ref}} -> ref
|
||||
_ -> nil
|
||||
end)
|
||||
state.smart_cells
|
||||
|> Enum.find(fn {_ref, info} -> info.scan_binding_monitor_ref == monitor_ref end)
|
||||
|> case do
|
||||
{ref, _info} -> finish_scan_binding(ref, state)
|
||||
nil -> state
|
||||
ref -> finish_scan_binding(ref, state)
|
||||
end
|
||||
end
|
||||
|
||||
defp handle_down_smart_cell(state, {:DOWN, monitor_ref, :process, _, _}) do
|
||||
state.smart_cells
|
||||
|> Enum.find(fn {_ref, info} -> info.monitor_ref == monitor_ref end)
|
||||
|> case do
|
||||
{ref, _info} ->
|
||||
send(state.owner, {:runtime_smart_cell_down, ref})
|
||||
{_, state} = pop_in(state.smart_cells[ref])
|
||||
state
|
||||
|
||||
nil ->
|
||||
state
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -412,6 +425,7 @@ defmodule Livebook.Runtime.ErlDist.RuntimeServer do
|
|||
|
||||
info = %{
|
||||
pid: pid,
|
||||
monitor_ref: Process.monitor(pid),
|
||||
scan_binding: scan_binding,
|
||||
base_locator: base_locator,
|
||||
scan_binding_pending: false,
|
||||
|
|
|
@ -338,7 +338,15 @@ defmodule Livebook.Session do
|
|||
end
|
||||
|
||||
@doc """
|
||||
Sends cell convertion request to the server.
|
||||
Sends cell recover request to the server.
|
||||
"""
|
||||
@spec recover_smart_cell(pid(), Cell.id()) :: :ok
|
||||
def recover_smart_cell(pid, cell_id) do
|
||||
GenServer.cast(pid, {:recover_smart_cell, self(), cell_id})
|
||||
end
|
||||
|
||||
@doc """
|
||||
Sends cell conversion request to the server.
|
||||
"""
|
||||
@spec convert_smart_cell(pid(), Cell.id()) :: :ok
|
||||
def convert_smart_cell(pid, cell_id) do
|
||||
|
@ -794,6 +802,12 @@ defmodule Livebook.Session do
|
|||
{:noreply, handle_operation(state, operation)}
|
||||
end
|
||||
|
||||
def handle_cast({:recover_smart_cell, client_pid, cell_id}, state) do
|
||||
client_id = client_id(state, client_pid)
|
||||
operation = {:recover_smart_cell, client_id, cell_id}
|
||||
{:noreply, handle_operation(state, operation)}
|
||||
end
|
||||
|
||||
def handle_cast({:convert_smart_cell, client_pid, cell_id}, state) do
|
||||
client_id = client_id(state, client_pid)
|
||||
|
||||
|
@ -1068,6 +1082,11 @@ defmodule Livebook.Session do
|
|||
end
|
||||
end
|
||||
|
||||
def handle_info({:runtime_smart_cell_down, id}, state) do
|
||||
operation = {:smart_cell_down, @client_id, id}
|
||||
{:noreply, handle_operation(state, operation)}
|
||||
end
|
||||
|
||||
def handle_info({:runtime_smart_cell_update, id, attrs, source, info}, state) do
|
||||
case Notebook.fetch_cell_and_section(state.data.notebook, id) do
|
||||
{:ok, cell, _section} ->
|
||||
|
|
|
@ -112,7 +112,7 @@ defmodule Livebook.Session.Data do
|
|||
@type cell_evaluation_validity :: :fresh | :evaluated | :stale | :aborted
|
||||
@type cell_evaluation_status :: :ready | :queued | :evaluating
|
||||
|
||||
@type smart_cell_status :: :dead | :starting | :started
|
||||
@type smart_cell_status :: :dead | :starting | :started | :down
|
||||
|
||||
@type input_id :: String.t()
|
||||
|
||||
|
@ -175,6 +175,8 @@ defmodule Livebook.Session.Data do
|
|||
Cell.Smart.editor() | nil}
|
||||
| {:update_smart_cell, client_id(), Cell.id(), Cell.Smart.attrs(), Delta.t(),
|
||||
reevaluate :: boolean()}
|
||||
| {:smart_cell_down, client_id(), Cell.id()}
|
||||
| {:recover_smart_cell, client_id(), Cell.id()}
|
||||
| {:erase_outputs, client_id()}
|
||||
| {:set_notebook_name, client_id(), String.t()}
|
||||
| {:set_section_name, client_id(), Section.id(), String.t()}
|
||||
|
@ -586,6 +588,31 @@ defmodule Livebook.Session.Data do
|
|||
end
|
||||
end
|
||||
|
||||
def apply_operation(data, {:smart_cell_down, _client_id, id}) do
|
||||
with {:ok, %Cell.Smart{} = cell, _section} <-
|
||||
Notebook.fetch_cell_and_section(data.notebook, id) do
|
||||
data
|
||||
|> with_actions()
|
||||
|> smart_cell_down(cell)
|
||||
|> wrap_ok()
|
||||
else
|
||||
_ -> :error
|
||||
end
|
||||
end
|
||||
|
||||
def apply_operation(data, {:recover_smart_cell, _client_id, cell_id}) do
|
||||
with {:ok, %Cell.Smart{} = cell, section} <-
|
||||
Notebook.fetch_cell_and_section(data.notebook, cell_id),
|
||||
:down <- data.cell_infos[cell_id].status do
|
||||
data
|
||||
|> with_actions()
|
||||
|> recover_smart_cell(cell, section)
|
||||
|> wrap_ok()
|
||||
else
|
||||
_ -> :error
|
||||
end
|
||||
end
|
||||
|
||||
def apply_operation(data, {:erase_outputs, _client_id}) do
|
||||
data
|
||||
|> with_actions()
|
||||
|
@ -819,7 +846,7 @@ defmodule Livebook.Session.Data do
|
|||
info = data.cell_infos[cell.id]
|
||||
|
||||
data_actions =
|
||||
if is_struct(cell, Cell.Smart) and info.status != :dead do
|
||||
if is_struct(cell, Cell.Smart) and info.status in [:started, :starting] do
|
||||
add_action(data_actions, {:stop_smart_cell, cell})
|
||||
else
|
||||
data_actions
|
||||
|
@ -1290,6 +1317,11 @@ defmodule Livebook.Session.Data do
|
|||
|> add_action({:broadcast_delta, client_id, updated_cell, :primary, delta})
|
||||
end
|
||||
|
||||
defp smart_cell_down(data_actions, cell) do
|
||||
data_actions
|
||||
|> update_cell_info!(cell.id, &%{&1 | status: :down})
|
||||
end
|
||||
|
||||
defp maybe_queue_updated_smart_cell({data, _} = data_actions, cell, section, reevaluate) do
|
||||
info = data.cell_infos[cell.id]
|
||||
|
||||
|
@ -1305,6 +1337,14 @@ defmodule Livebook.Session.Data do
|
|||
end
|
||||
end
|
||||
|
||||
defp recover_smart_cell({data, _} = data_actions, cell, section) do
|
||||
if Runtime.connected?(data.runtime) do
|
||||
start_smart_cell(data_actions, cell, section)
|
||||
else
|
||||
data_actions
|
||||
end
|
||||
end
|
||||
|
||||
defp erase_outputs({data, _} = data_actions) do
|
||||
data_actions
|
||||
|> clear_all_evaluation()
|
||||
|
@ -1499,15 +1539,19 @@ defmodule Livebook.Session.Data do
|
|||
cells_ready_to_start = Enum.filter(dead_cells, fn {cell, _} -> cell.kind in kinds end)
|
||||
|
||||
reduce(data_actions, cells_ready_to_start, fn data_actions, {cell, section} ->
|
||||
data_actions
|
||||
|> update_cell_info!(cell.id, &%{&1 | status: :starting})
|
||||
|> add_action({:start_smart_cell, cell, section})
|
||||
start_smart_cell(data_actions, cell, section)
|
||||
end)
|
||||
else
|
||||
data_actions
|
||||
end
|
||||
end
|
||||
|
||||
defp start_smart_cell(data_actions, cell, section) do
|
||||
data_actions
|
||||
|> update_cell_info!(cell.id, &%{&1 | status: :starting})
|
||||
|> add_action({:start_smart_cell, cell, section})
|
||||
end
|
||||
|
||||
defp dead_smart_cells_with_section(data) do
|
||||
for section <- Notebook.all_sections(data.notebook),
|
||||
%Cell.Smart{} = cell <- section.cells,
|
||||
|
@ -1583,7 +1627,7 @@ defmodule Livebook.Session.Data do
|
|||
defp update_smart_cell_bases({data, _} = data_actions, prev_data) do
|
||||
alive_smart_cell_ids =
|
||||
for {%Cell.Smart{} = cell, _} <- Notebook.cells_with_section(data.notebook),
|
||||
data.cell_infos[cell.id].status != :dead,
|
||||
data.cell_infos[cell.id].status in [:started, :starting],
|
||||
into: MapSet.new(),
|
||||
do: cell.id
|
||||
|
||||
|
|
|
@ -906,6 +906,13 @@ defmodule LivebookWeb.SessionLive do
|
|||
{:noreply, socket}
|
||||
end
|
||||
|
||||
def handle_event("recover_smart_cell", %{"cell_id" => cell_id}, socket) do
|
||||
assert_policy!(socket, :read)
|
||||
Session.recover_smart_cell(socket.assigns.session.pid, cell_id)
|
||||
|
||||
{:noreply, socket}
|
||||
end
|
||||
|
||||
def handle_event("convert_smart_cell", %{"cell_id" => cell_id}, socket) do
|
||||
assert_policy!(socket, :edit)
|
||||
Session.convert_smart_cell(socket.assigns.session.pid, cell_id)
|
||||
|
|
|
@ -210,6 +210,18 @@ defmodule LivebookWeb.SessionLive.CellComponent do
|
|||
Run the notebook setup to show the contents of this Smart cell.
|
||||
<% end %>
|
||||
</div>
|
||||
<% :down -> %>
|
||||
<div class="info-box flex justify-between items-center">
|
||||
<span>
|
||||
The Smart cell crashed unexpectedly, this is most likely a bug.
|
||||
</span>
|
||||
<button
|
||||
class="button-base button-gray"
|
||||
phx-click={JS.push("recover_smart_cell", value: %{cell_id: @cell_view.id})}
|
||||
>
|
||||
Restart Smart cell
|
||||
</button>
|
||||
</div>
|
||||
<% :starting -> %>
|
||||
<div class="delay-200">
|
||||
<.content_skeleton empty={false} />
|
||||
|
|
|
@ -250,6 +250,14 @@ defmodule Livebook.Runtime.ErlDist.RuntimeServerTest do
|
|||
assert_receive {:runtime_smart_cell_started, "ref", %{js_view: %{}, source: "source"}}
|
||||
end
|
||||
|
||||
@tag opts: @opts
|
||||
test "notifies runtime owner when a smart cell goes down", %{pid: pid} do
|
||||
RuntimeServer.start_smart_cell(pid, "dumb", "ref", %{}, {:c1, nil})
|
||||
assert_receive {:runtime_smart_cell_started, "ref", %{js_view: %{pid: pid}}}
|
||||
Process.exit(pid, :crashed)
|
||||
assert_receive {:runtime_smart_cell_down, "ref"}
|
||||
end
|
||||
|
||||
@tag opts: @opts
|
||||
test "once started scans binding and sends the result to the cell server", %{pid: pid} do
|
||||
RuntimeServer.start_smart_cell(pid, "dumb", "ref", %{}, {:c1, nil})
|
||||
|
|
|
@ -2735,6 +2735,56 @@ defmodule Livebook.Session.DataTest do
|
|||
end
|
||||
end
|
||||
|
||||
describe "apply_operation/2 given :smart_cell_down" do
|
||||
test "updates the smart cell status" do
|
||||
data =
|
||||
data_after_operations!([
|
||||
{:insert_section, @cid, 0, "s1"},
|
||||
{:set_runtime, @cid, connected_noop_runtime()},
|
||||
{:set_smart_cell_definitions, @cid, @smart_cell_definitions},
|
||||
{:insert_cell, @cid, "s1", 0, :smart, "c1", %{kind: "text"}}
|
||||
])
|
||||
|
||||
operation = {:smart_cell_down, @cid, "c1"}
|
||||
|
||||
assert {:ok, %{cell_infos: %{"c1" => %{status: :down}}}, _actions} =
|
||||
Data.apply_operation(data, operation)
|
||||
end
|
||||
end
|
||||
|
||||
describe "apply_operation/2 given :recover_smart_cell" do
|
||||
test "returns an error if the smart cell is not down" do
|
||||
data =
|
||||
data_after_operations!([
|
||||
{:insert_section, @cid, 0, "s1"},
|
||||
{:set_runtime, @cid, connected_noop_runtime()},
|
||||
{:set_smart_cell_definitions, @cid, @smart_cell_definitions},
|
||||
{:insert_cell, @cid, "s1", 0, :smart, "c1", %{kind: "text"}}
|
||||
])
|
||||
|
||||
operation = {:recover_smart_cell, @cid, "c1"}
|
||||
|
||||
assert :error = Data.apply_operation(data, operation)
|
||||
end
|
||||
|
||||
test "marks the smart cell as starting if there is a runtime" do
|
||||
data =
|
||||
data_after_operations!([
|
||||
{:insert_section, @cid, 0, "s1"},
|
||||
{:set_runtime, @cid, connected_noop_runtime()},
|
||||
{:set_smart_cell_definitions, @cid, @smart_cell_definitions},
|
||||
{:insert_cell, @cid, "s1", 0, :smart, "c1", %{kind: "text"}},
|
||||
{:smart_cell_down, @cid, "c1"}
|
||||
])
|
||||
|
||||
operation = {:recover_smart_cell, @cid, "c1"}
|
||||
|
||||
assert {:ok, %{cell_infos: %{"c1" => %{status: :starting}}},
|
||||
[{:start_smart_cell, %{id: "c1"}, %{id: "s1"}}]} =
|
||||
Data.apply_operation(data, operation)
|
||||
end
|
||||
end
|
||||
|
||||
describe "apply_operation/2 given :erase_outputs" do
|
||||
test "clears all sections evaluation and queues" do
|
||||
data =
|
||||
|
|
|
@ -105,6 +105,38 @@ defmodule Livebook.SessionTest do
|
|||
end
|
||||
end
|
||||
|
||||
describe "recover_smart_cell/2" do
|
||||
test "sends a recover operations to subscribers and starts the smart cell" do
|
||||
smart_cell = %{Notebook.Cell.new(:smart) | kind: "text", source: "content"}
|
||||
section = %{Notebook.Section.new() | cells: [smart_cell]}
|
||||
notebook = %{Notebook.new() | sections: [section]}
|
||||
|
||||
session = start_session(notebook: notebook)
|
||||
|
||||
send(
|
||||
session.pid,
|
||||
{:runtime_smart_cell_definitions, [%{kind: "text", name: "Text", requirement: nil}]}
|
||||
)
|
||||
|
||||
send(
|
||||
session.pid,
|
||||
{:runtime_smart_cell_started, smart_cell.id,
|
||||
%{source: "content!", js_view: %{}, editor: nil}}
|
||||
)
|
||||
|
||||
send(session.pid, {:runtime_smart_cell_down, smart_cell.id})
|
||||
|
||||
Session.subscribe(session.id)
|
||||
|
||||
Session.recover_smart_cell(session.pid, smart_cell.id)
|
||||
|
||||
cell_id = smart_cell.id
|
||||
|
||||
assert_receive {:operation, {:recover_smart_cell, _client_id, ^cell_id}}
|
||||
assert_receive {:operation, {:smart_cell_started, _, ^cell_id, _, _, _}}
|
||||
end
|
||||
end
|
||||
|
||||
describe "convert_smart_cell/2" do
|
||||
test "sends a delete and insert operations to subscribers" do
|
||||
smart_cell = %{Notebook.Cell.new(:smart) | kind: "text", source: "content"}
|
||||
|
|
Loading…
Reference in a new issue