Allow disconnecting Fly runtime during initialization (#2776)

This commit is contained in:
Jonatan Kłosko 2024-09-09 16:27:12 +02:00 committed by GitHub
parent 85ae48ee3c
commit 2eb5963efe
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 98 additions and 22 deletions

View file

@ -188,6 +188,22 @@ defmodule Livebook.FlyAPI do
%{id: machine["id"], private_ip: machine["private_ip"]} %{id: machine["id"], private_ip: machine["private_ip"]}
end end
@doc """
Deletes the given machine.
"""
@spec delete_machine(String.t(), String.t(), String.t()) :: :ok | {:error, error}
def delete_machine(token, app_name, machine_id) do
params = %{force: true}
with {:ok, _data} <-
flaps_request(token, "/v1/apps/#{app_name}/machines/#{machine_id}",
method: :delete,
params: params
) do
:ok
end
end
@doc """ @doc """
Waits for the machine to start. Waits for the machine to start.
""" """

View file

@ -806,13 +806,20 @@ defprotocol Livebook.Runtime do
The `runtime` should be the struct updated with all information The `runtime` should be the struct updated with all information
necessary for further communication. necessary for further communication.
In case the initialization is a particularly involved process, the In case the initialization is a particularly involved, the process
process may send updates to the caller: may send updates to the caller:
* `{:runtime_connect_info, pid, info}` * `{:runtime_connect_info, pid, info}`
Where `info` is a few word text describing the current initialization Where `info` is a few word text describing the current initialization
step. step.
If the caller decides to abort the initialization, they can forecefully
kill the process. The runtime resources should already be tolerant
to abrupt Livebook termination and autodestroy through monitoring
and timeouts. However, when the initialization process gets killed,
it may be desirable to eagerly remove the resources it has already
allocated, which can be achieved with an additional watcher process.
""" """
@spec connect(t()) :: pid() @spec connect(t()) :: pid()
def connect(runtime) def connect(runtime)

View file

@ -116,6 +116,14 @@ defmodule Livebook.Runtime.Fly do
|> :erlang.term_to_binary() |> :erlang.term_to_binary()
|> Base.encode64() |> Base.encode64()
parent = self()
{:ok, watcher_pid} =
DynamicSupervisor.start_child(
Livebook.RuntimeSupervisor,
{Task, fn -> watcher(parent, config) end}
)
with :ok <- with :ok <-
(if config.volume_id && runtime.previous_machine_id do (if config.volume_id && runtime.previous_machine_id do
with_log(caller, "await resources", fn -> with_log(caller, "await resources", fn ->
@ -128,6 +136,7 @@ defmodule Livebook.Runtime.Fly do
with_log(caller, "create machine", fn -> with_log(caller, "create machine", fn ->
create_machine(config, runtime_data) create_machine(config, runtime_data)
end), end),
_ <- send(watcher_pid, {:machine_created, machine_id}),
child_node <- :"#{node_base}@#{machine_id}.vm.#{config.app_name}.internal", child_node <- :"#{node_base}@#{machine_id}.vm.#{config.app_name}.internal",
{:ok, proxy_port} <- {:ok, proxy_port} <-
with_log(caller, "start proxy", fn -> with_log(caller, "start proxy", fn ->
@ -151,6 +160,8 @@ defmodule Livebook.Runtime.Fly do
send(primary_pid, :node_initialized) send(primary_pid, :node_initialized)
send(watcher_pid, :done)
runtime = %{runtime | node: child_node, server_pid: server_pid, machine_id: machine_id} runtime = %{runtime | node: child_node, server_pid: server_pid, machine_id: machine_id}
send(caller, {:runtime_connect_done, self(), {:ok, runtime}}) send(caller, {:runtime_connect_done, self(), {:ok, runtime}})
@ -172,6 +183,29 @@ defmodule Livebook.Runtime.Fly do
{:noreply, state} {:noreply, state}
end end
defp watcher(parent, config) do
ref = Process.monitor(parent)
watcher_loop(%{ref: ref, config: config, machine_id: nil})
end
defp watcher_loop(state) do
receive do
{:DOWN, ref, :process, _pid, _reason} when ref == state.ref ->
# If the parent process is killed, we try to eagerly free the
# created resources
if machine_id = state.machine_id do
config = state.config
_ = Livebook.FlyAPI.delete_machine(config.token, config.app_name, machine_id)
end
{:machine_created, machine_id} ->
watcher_loop(%{state | machine_id: machine_id})
:done ->
:ok
end
end
defp create_machine(config, runtime_data) do defp create_machine(config, runtime_data) do
base_image = Enum.find(Livebook.Config.docker_images(), &(&1.tag == config.docker_tag)) base_image = Enum.find(Livebook.Config.docker_images(), &(&1.tag == config.docker_tag))
image = "ghcr.io/livebook-dev/livebook:#{base_image.tag}" image = "ghcr.io/livebook-dev/livebook:#{base_image.tag}"

View file

@ -2424,9 +2424,15 @@ defmodule Livebook.Session do
end end
defp handle_action(state, {:disconnect_runtime, runtime}) do defp handle_action(state, {:disconnect_runtime, runtime}) do
Runtime.disconnect(runtime) if state.runtime_connect do
state = %{state | runtime_monitor_ref: nil} Process.demonitor(state.runtime_connect.ref, [:flush])
after_runtime_disconnected(state) Process.exit(state.runtime_connect.pid, :kill)
%{state | runtime_connect: nil}
else
Runtime.disconnect(runtime)
state = %{state | runtime_monitor_ref: nil}
after_runtime_disconnected(state)
end
end end
defp handle_action(state, {:start_evaluation, cell, section, evaluation_opts}) do defp handle_action(state, {:start_evaluation, cell, section, evaluation_opts}) do

View file

@ -921,7 +921,7 @@ defmodule Livebook.Session.Data do
end end
def apply_operation(data, {:disconnect_runtime, _client_id}) do def apply_operation(data, {:disconnect_runtime, _client_id}) do
with :connected <- data.runtime_status do with true <- data.runtime_status in [:connecting, :connected] do
data data
|> with_actions() |> with_actions()
|> disconnect_runtime() |> disconnect_runtime()

View file

@ -122,16 +122,27 @@ defmodule LivebookWeb.SessionLive.FlyRuntimeComponent do
/> />
<div class="mt-8"> <div class="mt-8">
<.button <div class="flex gap-2">
phx-click="init" <.button
phx-target={@myself} phx-click="init"
disabled={ phx-target={@myself}
@runtime_status == :connecting or not @specs_changeset.valid? or disabled={
volume_errors(@volume_id, @volumes, @region) != [] @runtime_status == :connecting or not @specs_changeset.valid? or
} volume_errors(@volume_id, @volumes, @region) != []
> }
<%= label(@app_name, @runtime, @runtime_status) %> >
</.button> <%= label(@app_name, @runtime, @runtime_status) %>
</.button>
<.button
:if={@runtime_status == :connecting}
color="red"
outlined
phx-click="disconnect"
phx-target={@myself}
>
Disconnect
</.button>
</div>
<div <div
:if={reconnecting?(@app_name, @runtime) && @runtime_connect_info} :if={reconnecting?(@app_name, @runtime) && @runtime_connect_info}
class="mt-4 scroll-mb-8" class="mt-4 scroll-mb-8"
@ -582,6 +593,11 @@ defmodule LivebookWeb.SessionLive.FlyRuntimeComponent do
{:noreply, socket} {:noreply, socket}
end end
def handle_event("disconnect", %{}, socket) do
Session.disconnect_runtime(socket.assigns.session.pid)
{:noreply, socket}
end
def handle_event("open_save_config", %{}, socket) do def handle_event("open_save_config", %{}, socket) do
changeset = config_secret_changeset(socket, %{name: @config_secret_prefix}) changeset = config_secret_changeset(socket, %{name: @config_secret_prefix})
save_config = %{changeset: changeset, inflight: false, error: false} save_config = %{changeset: changeset, inflight: false, error: false}

View file

@ -676,7 +676,7 @@ defmodule LivebookWeb.SessionLive.Render do
</.button> </.button>
<.button <.button
:if={@data_view.runtime_status == :connected} :if={@data_view.runtime_status in [:connected, :connecting]}
color="red" color="red"
outlined outlined
type="button" type="button"

View file

@ -3940,11 +3940,8 @@ defmodule Livebook.Session.DataTest do
end end
describe "apply_operation/2 given :disconnect_runtime" do describe "apply_operation/2 given :disconnect_runtime" do
test "returns an error if the runtime is not connected" do test "returns an error if the runtime is disconnected" do
data = data = Data.new()
data_after_operations!([
{:connect_runtime, @cid}
])
operation = {:disconnect_runtime, @cid} operation = {:disconnect_runtime, @cid}