mirror of
https://github.com/livebook-dev/livebook.git
synced 2024-09-20 10:05:57 +08:00
Merge standalone init module into Elixir runtime
This commit is contained in:
parent
6d7f416f18
commit
a28f5de36d
|
@ -8,8 +8,6 @@ defmodule Livebook.Runtime.ElixirStandalone do
|
|||
# stay in the system when the session or the entire Livebook
|
||||
# terminates.
|
||||
|
||||
import Livebook.Runtime.StandaloneInit
|
||||
|
||||
alias Livebook.Utils
|
||||
|
||||
@type t :: %__MODULE__{
|
||||
|
@ -50,7 +48,7 @@ defmodule Livebook.Runtime.ElixirStandalone do
|
|||
|
||||
with {:ok, elixir_path} <- find_elixir_executable(),
|
||||
port = start_elixir_node(elixir_path, child_node),
|
||||
{:ok, server_pid} <- parent_init_sequence(child_node, port, init_opts: init_opts) do
|
||||
{:ok, server_pid} <- parent_init_sequence(child_node, port, init_opts) do
|
||||
runtime = %{runtime | node: child_node, server_pid: server_pid}
|
||||
{:ok, runtime}
|
||||
else
|
||||
|
@ -59,6 +57,13 @@ defmodule Livebook.Runtime.ElixirStandalone do
|
|||
end)
|
||||
end
|
||||
|
||||
defp find_elixir_executable() do
|
||||
case System.find_executable("elixir") do
|
||||
nil -> {:error, "no Elixir executable found in PATH"}
|
||||
path -> {:ok, path}
|
||||
end
|
||||
end
|
||||
|
||||
defp start_elixir_node(elixir_path, node_name) do
|
||||
# Here we create a port to start the system process in a non-blocking way.
|
||||
Port.open({:spawn_executable, elixir_path}, [
|
||||
|
@ -71,6 +76,125 @@ defmodule Livebook.Runtime.ElixirStandalone do
|
|||
args: elixir_flags(node_name)
|
||||
])
|
||||
end
|
||||
|
||||
# ---
|
||||
#
|
||||
# Once the new node is spawned we need to establish a connection,
|
||||
# initialize it and make sure it correctly reacts to the parent node terminating.
|
||||
#
|
||||
# The procedure goes as follows:
|
||||
#
|
||||
# 1. The child sends {:node_initialized, ref} message to the parent
|
||||
# to communicate it's ready for initialization.
|
||||
#
|
||||
# 2. The parent initializes the child node - loads necessary modules,
|
||||
# starts the NodeManager process and a single RuntimeServer process.
|
||||
#
|
||||
# 3. The parent sends {:node_initialized, ref} message back to the child,
|
||||
# to communicate successful initialization.
|
||||
#
|
||||
# 4. The child starts monitoring the NodeManager process and freezes
|
||||
# until the NodeManager process terminates. The NodeManager process
|
||||
# serves as the leading remote process and represents the node from now on.
|
||||
#
|
||||
# The nodes either successfully go through this flow or return an error,
|
||||
# either if the other node dies or is not responding for too long.
|
||||
#
|
||||
# ---
|
||||
|
||||
defp parent_init_sequence(child_node, port, init_opts) do
|
||||
port_ref = Port.monitor(port)
|
||||
|
||||
loop = fn loop ->
|
||||
receive do
|
||||
{:node_started, init_ref, ^child_node, child_port, primary_pid} ->
|
||||
Port.demonitor(port_ref)
|
||||
Livebook.EPMD.update_child_node(child_node, child_port)
|
||||
server_pid = Livebook.Runtime.ErlDist.initialize(child_node, init_opts)
|
||||
|
||||
send(primary_pid, {:node_initialized, init_ref})
|
||||
{:ok, server_pid}
|
||||
|
||||
{^port, {:data, _output}} ->
|
||||
loop.(loop)
|
||||
|
||||
{:DOWN, ^port_ref, :port, _object, reason} ->
|
||||
{:error,
|
||||
"Elixir terminated unexpectedly, please check your logs for errors. Reason: #{inspect(reason)}"}
|
||||
after
|
||||
# Use a longer timeout to account for longer child node startup.
|
||||
30_000 ->
|
||||
{:error, "connection timed out"}
|
||||
end
|
||||
end
|
||||
|
||||
loop.(loop)
|
||||
end
|
||||
|
||||
# Note Windows does not handle escaped quotes and newlines the same way as Unix,
|
||||
# so the string cannot have constructs newlines nor strings. That's why we pass
|
||||
# the parent node name as ARGV and write the code avoiding newlines.
|
||||
#
|
||||
# This boot script must be kept in sync with Livebook.EPMD.
|
||||
#
|
||||
# Also note that we explicitly halt, just in case `System.no_halt(true)` is
|
||||
# called within the runtime.
|
||||
@child_node_eval_string """
|
||||
{:ok, [[mode, node]]} = :init.get_argument(:livebook_current);\
|
||||
{:ok, _} = :net_kernel.start(List.to_atom(node), %{name_domain: List.to_atom(mode)});\
|
||||
{:ok, [[parent_node, _port]]} = :init.get_argument(:livebook_parent);\
|
||||
dist_port = :persistent_term.get(:livebook_dist_port, 0);\
|
||||
init_ref = make_ref();\
|
||||
parent_process = {node(), List.to_atom(parent_node)};\
|
||||
send(parent_process, {:node_started, init_ref, node(), dist_port, self()});\
|
||||
receive do {:node_initialized, ^init_ref} ->\
|
||||
manager_ref = Process.monitor(Livebook.Runtime.ErlDist.NodeManager);\
|
||||
receive do {:DOWN, ^manager_ref, :process, _object, _reason} -> :ok end;\
|
||||
after 10_000 ->\
|
||||
:timeout;\
|
||||
end;\
|
||||
System.halt()\
|
||||
"""
|
||||
|
||||
if @child_node_eval_string =~ "\n" do
|
||||
raise "invalid @child_node_eval_string, newline found: #{inspect(@child_node_eval_string)}"
|
||||
end
|
||||
|
||||
defp elixir_flags(node_name) do
|
||||
parent_name = node()
|
||||
parent_port = Livebook.EPMD.dist_port()
|
||||
|
||||
mode = if Livebook.Config.longname(), do: :longnames, else: :shortnames
|
||||
|
||||
epmdless_flags =
|
||||
if parent_port != 0 do
|
||||
"-epmd_module Elixir.Livebook.EPMD -start_epmd false -erl_epmd_port 0 "
|
||||
else
|
||||
""
|
||||
end
|
||||
|
||||
[
|
||||
"--erl",
|
||||
# Minimize schedulers busy wait threshold,
|
||||
# so that they go to sleep immediately after evaluation.
|
||||
# Increase the default stack for dirty io threads (cuda requires it).
|
||||
# Enable ANSI escape codes as we handle them with HTML.
|
||||
# Disable stdin, so that the system process never tries to read terminal input.
|
||||
"+sbwt none +sbwtdcpu none +sbwtdio none +sssdio 128 -elixir ansi_enabled true -noinput " <>
|
||||
epmdless_flags <>
|
||||
"-livebook_parent #{parent_name} #{parent_port} -livebook_current #{mode} #{node_name}",
|
||||
# Add the location of Livebook.EPMD
|
||||
"-pa",
|
||||
Application.app_dir(:livebook, "priv/epmd"),
|
||||
# Make the node hidden, so it doesn't automatically join the cluster
|
||||
"--hidden",
|
||||
# Use the cookie in Livebook
|
||||
"--cookie",
|
||||
Atom.to_string(Node.get_cookie()),
|
||||
"--eval",
|
||||
@child_node_eval_string
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
defimpl Livebook.Runtime, for: Livebook.Runtime.ElixirStandalone do
|
||||
|
|
|
@ -1,151 +0,0 @@
|
|||
defmodule Livebook.Runtime.StandaloneInit do
|
||||
# TODO: Move logic inside ElixirStandalone module.
|
||||
# Generic functionality related to starting and setting up
|
||||
# a new Elixir system process. It's used by ElixirStandalone.
|
||||
|
||||
@doc """
|
||||
Tries locating Elixir executable in PATH.
|
||||
"""
|
||||
@spec find_elixir_executable() :: {:ok, String.t()} | {:error, String.t()}
|
||||
def find_elixir_executable() do
|
||||
case System.find_executable("elixir") do
|
||||
nil -> {:error, "no Elixir executable found in PATH"}
|
||||
path -> {:ok, path}
|
||||
end
|
||||
end
|
||||
|
||||
# ---
|
||||
#
|
||||
# Once the new node is spawned we need to establish a connection,
|
||||
# initialize it and make sure it correctly reacts to the parent node terminating.
|
||||
#
|
||||
# The procedure goes as follows:
|
||||
#
|
||||
# 1. The child sends {:node_initialized, ref} message to the parent
|
||||
# to communicate it's ready for initialization.
|
||||
#
|
||||
# 2. The parent initializes the child node - loads necessary modules,
|
||||
# starts the NodeManager process and a single RuntimeServer process.
|
||||
#
|
||||
# 3. The parent sends {:node_initialized, ref} message back to the child,
|
||||
# to communicate successful initialization.
|
||||
#
|
||||
# 4. The child starts monitoring the NodeManager process and freezes
|
||||
# until the NodeManager process terminates. The NodeManager process
|
||||
# serves as the leading remote process and represents the node from now on.
|
||||
#
|
||||
# The nodes either successfully go through this flow or return an error,
|
||||
# either if the other node dies or is not responding for too long.
|
||||
#
|
||||
# ---
|
||||
|
||||
@doc """
|
||||
Performs the parent side of the initialization contract.
|
||||
|
||||
Should be called by the initializing process on the parent node.
|
||||
|
||||
## Options
|
||||
|
||||
* `:init_opts` - see `Livebook.Runtime.ErlDist.initialize/2`
|
||||
|
||||
"""
|
||||
@spec parent_init_sequence(node(), port(), keyword()) :: {:ok, pid()} | {:error, String.t()}
|
||||
def parent_init_sequence(child_node, port, opts \\ []) do
|
||||
port_ref = Port.monitor(port)
|
||||
|
||||
loop = fn loop ->
|
||||
receive do
|
||||
{:node_started, init_ref, ^child_node, child_port, primary_pid} ->
|
||||
Port.demonitor(port_ref)
|
||||
Livebook.EPMD.update_child_node(child_node, child_port)
|
||||
server_pid = Livebook.Runtime.ErlDist.initialize(child_node, opts[:init_opts] || [])
|
||||
|
||||
send(primary_pid, {:node_initialized, init_ref})
|
||||
|
||||
{:ok, server_pid}
|
||||
|
||||
{^port, {:data, _output}} ->
|
||||
loop.(loop)
|
||||
|
||||
{:DOWN, ^port_ref, :port, _object, reason} ->
|
||||
{:error,
|
||||
"Elixir terminated unexpectedly, please check your logs for errors. Reason: #{inspect(reason)}"}
|
||||
after
|
||||
# Use a longer timeout to account for longer child node startup.
|
||||
30_000 ->
|
||||
{:error, "connection timed out"}
|
||||
end
|
||||
end
|
||||
|
||||
loop.(loop)
|
||||
end
|
||||
|
||||
# Note Windows does not handle escaped quotes and newlines the same way as Unix,
|
||||
# so the string cannot have constructs newlines nor strings. That's why we pass
|
||||
# the parent node name as ARGV and write the code avoiding newlines.
|
||||
#
|
||||
# This boot script must be kept in sync with Livebook.EPMD.
|
||||
#
|
||||
# Also note that we explicitly halt, just in case `System.no_halt(true)` is
|
||||
# called within the runtime.
|
||||
@child_node_eval_string """
|
||||
{:ok, [[mode, node]]} = :init.get_argument(:livebook_current);\
|
||||
{:ok, _} = :net_kernel.start(List.to_atom(node), %{name_domain: List.to_atom(mode)});\
|
||||
{:ok, [[parent_node, _port]]} = :init.get_argument(:livebook_parent);\
|
||||
dist_port = :persistent_term.get(:livebook_dist_port, 0);\
|
||||
init_ref = make_ref();\
|
||||
parent_process = {node(), List.to_atom(parent_node)};\
|
||||
send(parent_process, {:node_started, init_ref, node(), dist_port, self()});\
|
||||
receive do {:node_initialized, ^init_ref} ->\
|
||||
manager_ref = Process.monitor(Livebook.Runtime.ErlDist.NodeManager);\
|
||||
receive do {:DOWN, ^manager_ref, :process, _object, _reason} -> :ok end;\
|
||||
after 10_000 ->\
|
||||
:timeout;\
|
||||
end;\
|
||||
System.halt()\
|
||||
"""
|
||||
|
||||
if @child_node_eval_string =~ "\n" do
|
||||
raise "invalid @child_node_eval_string, newline found: #{inspect(@child_node_eval_string)}"
|
||||
end
|
||||
|
||||
@doc """
|
||||
A list of common flags used for spawned Elixir runtimes.
|
||||
"""
|
||||
@spec elixir_flags(node()) :: list()
|
||||
def elixir_flags(node_name) do
|
||||
parent_name = node()
|
||||
parent_port = Livebook.EPMD.dist_port()
|
||||
|
||||
mode = if Livebook.Config.longname(), do: :longnames, else: :shortnames
|
||||
|
||||
epmdless_flags =
|
||||
if parent_port != 0 do
|
||||
"-epmd_module Elixir.Livebook.EPMD -start_epmd false -erl_epmd_port 0 "
|
||||
else
|
||||
""
|
||||
end
|
||||
|
||||
[
|
||||
"--erl",
|
||||
# Minimize schedulers busy wait threshold,
|
||||
# so that they go to sleep immediately after evaluation.
|
||||
# Increase the default stack for dirty io threads (cuda requires it).
|
||||
# Enable ANSI escape codes as we handle them with HTML.
|
||||
# Disable stdin, so that the system process never tries to read terminal input.
|
||||
"+sbwt none +sbwtdcpu none +sbwtdio none +sssdio 128 -elixir ansi_enabled true -noinput " <>
|
||||
epmdless_flags <>
|
||||
"-livebook_parent #{parent_name} #{parent_port} -livebook_current #{mode} #{node_name}",
|
||||
# Add the location of Livebook.EPMD
|
||||
"-pa",
|
||||
Application.app_dir(:livebook, "priv/epmd"),
|
||||
# Make the node hidden, so it doesn't automatically join the cluster
|
||||
"--hidden",
|
||||
# Use the cookie in Livebook
|
||||
"--cookie",
|
||||
Atom.to_string(Node.get_cookie()),
|
||||
"--eval",
|
||||
@child_node_eval_string
|
||||
]
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue