Wait for Fly machine to start before attempting to connect (#2712)

This commit is contained in:
Jonatan Kłosko 2024-07-17 08:21:56 +02:00 committed by GitHub
parent 7858466ab5
commit 06279bc98f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 35 additions and 0 deletions

View file

@ -188,6 +188,21 @@ defmodule Livebook.FlyAPI do
%{id: machine["id"], private_ip: machine["private_ip"]}
end
@doc """
Waits for the machine to start.
"""
@spec await_machine_started(String.t(), String.t(), String.t()) :: :ok | {:error, error}
def await_machine_started(token, app_name, machine_id) do
with {:ok, _data} <-
flaps_request(token, "/v1/apps/#{app_name}/machines/#{machine_id}/wait",
params: %{state: "started", timeout: 60},
receive_timeout: 90_000,
retry: false
) do
:ok
end
end
defp flaps_request(token, path, opts \\ []) do
opts =
[base_url: @flaps_url, url: path, auth: {:bearer, token}]

View file

@ -123,6 +123,10 @@ defmodule Livebook.Runtime.Fly do
with_log(caller, "start proxy", fn ->
start_fly_proxy(config.app_name, machine_ip, local_port, remote_port, config.token)
end),
:ok <-
with_log(caller, "machine starting", fn ->
await_machine_started(config, machine_id)
end),
:ok <-
with_log(caller, "connect to node", fn ->
connect_loop(child_node, 40, 250)
@ -203,6 +207,21 @@ defmodule Livebook.Runtime.Fly do
end
end
defp await_machine_started(config, machine_id) do
case Livebook.FlyAPI.await_machine_started(config.token, config.app_name, machine_id) do
:ok ->
:ok
{:error, %{status: 408}} ->
{:error,
"timed out while waiting for the machine to start. See the app" <>
" logs in the Fly.io dashboard to determine the reason"}
{:error, %{message: message}} ->
{:error, "failed while waiting for the machine to started, reason: #{message}"}
end
end
defp connect_loop(_node, 0, _interval) do
{:error, "could not establish connection with the node"}
end

View file

@ -26,6 +26,7 @@ defmodule Livebook.Runtime.FlyTest do
assert_receive {:runtime_connect_info, ^pid, "create machine"}, @assert_receive_timeout
assert_receive {:runtime_connect_info, ^pid, "start proxy"}, @assert_receive_timeout
assert_receive {:runtime_connect_info, ^pid, "machine starting"}, @assert_receive_timeout
assert_receive {:runtime_connect_info, ^pid, "connect to node"}, @assert_receive_timeout
assert_receive {:runtime_connect_info, ^pid, "initialize node"}, @assert_receive_timeout
assert_receive {:runtime_connect_done, ^pid, {:ok, runtime}}, @assert_receive_timeout