livebook/lib/livebook/runtime/elixir_standalone.ex
2024-05-20 17:04:04 -03:00

330 lines
10 KiB
Elixir

defmodule Livebook.Runtime.ElixirStandalone do
defstruct [:node, :server_pid]
# A runtime backed by a standalone Elixir node managed by Livebook.
#
# Livebook is responsible for starting and terminating the node.
# Most importantly we have to make sure the started node doesn't
# stay in the system when the session or the entire Livebook
# terminates.
alias Livebook.Utils
@type t :: %__MODULE__{
node: node() | nil,
server_pid: pid() | nil
}
@doc """
Returns a new runtime instance.
"""
@spec new() :: t()
def new() do
%__MODULE__{}
end
@doc """
Starts a new Elixir node (a system process) and initializes it with
Livebook-specific modules and processes.
If no process calls `Runtime.take_ownership/1` for a period of time,
the node automatically terminates. Whoever takes the ownersihp,
becomes the owner and as soon as it terminates, the node terminates
as well. The node may also be terminated by calling `Runtime.disconnect/1`.
Note: to start the node it is required that `elixir` is a recognised
executable within the system.
"""
@spec connect(t()) :: {:ok, t()} | {:error, String.t()}
def connect(runtime) do
child_node = Livebook.EPMD.random_child_node()
Utils.temporarily_register(self(), child_node, fn ->
init_opts = [
runtime_server_opts: [
extra_smart_cell_definitions: Livebook.Runtime.Definitions.smart_cell_definitions()
]
]
with {:ok, elixir_path} <- find_elixir_executable(),
port = start_elixir_node(elixir_path, child_node),
{:ok, server_pid} <- parent_init_sequence(child_node, port, init_opts) do
runtime = %{runtime | node: child_node, server_pid: server_pid}
{:ok, runtime}
else
{:error, error} -> {:error, error}
end
end)
end
defp find_elixir_executable() do
case System.find_executable("elixir") do
nil -> {:error, "no Elixir executable found in PATH"}
path -> {:ok, path}
end
end
defp start_elixir_node(elixir_path, node_name) do
# Here we create a port to start the system process in a non-blocking way.
Port.open({:spawn_executable, elixir_path}, [
:binary,
# We don't communicate with the system process via stdio,
# contrarily, we want any non-captured output to go directly
# to the terminal
:nouse_stdio,
:hide,
args: elixir_flags(node_name)
])
end
# ---
#
# Once the new node is spawned we need to establish a connection,
# initialize it and make sure it correctly reacts to the parent node terminating.
#
# The procedure goes as follows:
#
# 1. The child sends {:node_initialized, ref} message to the parent
# to communicate it's ready for initialization.
#
# 2. The parent initializes the child node - loads necessary modules,
# starts the NodeManager process and a single RuntimeServer process.
#
# 3. The parent sends {:node_initialized, ref} message back to the child,
# to communicate successful initialization.
#
# 4. The child starts monitoring the NodeManager process and freezes
# until the NodeManager process terminates. The NodeManager process
# serves as the leading remote process and represents the node from now on.
#
# The nodes either successfully go through this flow or return an error,
# either if the other node dies or is not responding for too long.
#
# ---
defp parent_init_sequence(child_node, port, init_opts) do
port_ref = Port.monitor(port)
loop = fn loop ->
receive do
{:node_started, init_ref, ^child_node, child_port, primary_pid} ->
Port.demonitor(port_ref)
Livebook.EPMD.update_child_node(child_node, child_port)
server_pid = Livebook.Runtime.ErlDist.initialize(child_node, init_opts)
send(primary_pid, {:node_initialized, init_ref})
{:ok, server_pid}
{^port, {:data, _output}} ->
loop.(loop)
{:DOWN, ^port_ref, :port, _object, reason} ->
{:error,
"Elixir terminated unexpectedly, please check your logs for errors. Reason: #{inspect(reason)}"}
after
# Use a longer timeout to account for longer child node startup.
30_000 ->
{:error, "connection timed out"}
end
end
loop.(loop)
end
# Note Windows does not handle escaped quotes and newlines the same way as Unix,
# so the string cannot have constructs newlines nor strings. That's why we pass
# the parent node name as ARGV and write the code avoiding newlines.
#
# This boot script must be kept in sync with Livebook.EPMD.
#
# Also note that we explicitly halt, just in case `System.no_halt(true)` is
# called within the runtime.
@child_node_eval_string """
{:ok, [[mode, node]]} = :init.get_argument(:livebook_current);\
{:ok, _} = :net_kernel.start(List.to_atom(node), %{name_domain: List.to_atom(mode)});\
{:ok, [[parent_node, _port]]} = :init.get_argument(:livebook_parent);\
dist_port = :persistent_term.get(:livebook_dist_port, 0);\
init_ref = make_ref();\
parent_process = {node(), List.to_atom(parent_node)};\
send(parent_process, {:node_started, init_ref, node(), dist_port, self()});\
receive do {:node_initialized, ^init_ref} ->\
manager_ref = Process.monitor(Livebook.Runtime.ErlDist.NodeManager);\
receive do {:DOWN, ^manager_ref, :process, _object, _reason} -> :ok end;\
after 10_000 ->\
:timeout;\
end;\
System.halt()\
"""
if @child_node_eval_string =~ "\n" do
raise "invalid @child_node_eval_string, newline found: #{inspect(@child_node_eval_string)}"
end
defp elixir_flags(node_name) do
parent_name = node()
parent_port = Livebook.EPMD.dist_port()
mode = if Livebook.Config.longname(), do: :longnames, else: :shortnames
epmdless_flags =
if parent_port != 0 do
"-epmd_module Elixir.Livebook.EPMD -start_epmd false -erl_epmd_port 0 "
else
""
end
[
"--erl",
# Minimize schedulers busy wait threshold,
# so that they go to sleep immediately after evaluation.
# Increase the default stack for dirty io threads (cuda requires it).
# Enable ANSI escape codes as we handle them with HTML.
# Disable stdin, so that the system process never tries to read terminal input.
"+sbwt none +sbwtdcpu none +sbwtdio none +sssdio 128 -elixir ansi_enabled true -noinput " <>
epmdless_flags <>
"-livebook_parent #{parent_name} #{parent_port} -livebook_current #{mode} #{node_name}",
# Add the location of Livebook.EPMD
"-pa",
Application.app_dir(:livebook, "priv/epmd"),
# Make the node hidden, so it doesn't automatically join the cluster
"--hidden",
# Use the cookie in Livebook
"--cookie",
Atom.to_string(Node.get_cookie()),
"--eval",
@child_node_eval_string
]
end
end
defimpl Livebook.Runtime, for: Livebook.Runtime.ElixirStandalone do
alias Livebook.Runtime.ErlDist.RuntimeServer
def describe(runtime) do
[{"Type", "Elixir standalone"}] ++
if connected?(runtime) do
[{"Node name", Atom.to_string(runtime.node)}]
else
[]
end
end
def connect(runtime) do
Livebook.Runtime.ElixirStandalone.connect(runtime)
end
def connected?(runtime) do
runtime.server_pid != nil
end
def take_ownership(runtime, opts \\ []) do
RuntimeServer.attach(runtime.server_pid, self(), opts)
Process.monitor(runtime.server_pid)
end
def disconnect(runtime) do
:ok = RuntimeServer.stop(runtime.server_pid)
{:ok, %{runtime | node: nil, server_pid: nil}}
end
def duplicate(_runtime) do
Livebook.Runtime.ElixirStandalone.new()
end
def evaluate_code(runtime, language, code, locator, parent_locators, opts \\ []) do
RuntimeServer.evaluate_code(
runtime.server_pid,
language,
code,
locator,
parent_locators,
opts
)
end
def forget_evaluation(runtime, locator) do
RuntimeServer.forget_evaluation(runtime.server_pid, locator)
end
def drop_container(runtime, container_ref) do
RuntimeServer.drop_container(runtime.server_pid, container_ref)
end
def handle_intellisense(runtime, send_to, request, parent_locators, node) do
RuntimeServer.handle_intellisense(runtime.server_pid, send_to, request, parent_locators, node)
end
def read_file(runtime, path) do
RuntimeServer.read_file(runtime.server_pid, path)
end
def transfer_file(runtime, path, file_id, callback) do
RuntimeServer.transfer_file(runtime.server_pid, path, file_id, callback)
end
def relabel_file(runtime, file_id, new_file_id) do
RuntimeServer.relabel_file(runtime.server_pid, file_id, new_file_id)
end
def revoke_file(runtime, file_id) do
RuntimeServer.revoke_file(runtime.server_pid, file_id)
end
def start_smart_cell(runtime, kind, ref, attrs, parent_locators) do
RuntimeServer.start_smart_cell(runtime.server_pid, kind, ref, attrs, parent_locators)
end
def set_smart_cell_parent_locators(runtime, ref, parent_locators) do
RuntimeServer.set_smart_cell_parent_locators(runtime.server_pid, ref, parent_locators)
end
def stop_smart_cell(runtime, ref) do
RuntimeServer.stop_smart_cell(runtime.server_pid, ref)
end
def fixed_dependencies?(_runtime), do: false
def add_dependencies(_runtime, code, dependencies) do
Livebook.Runtime.Dependencies.add_dependencies(code, dependencies)
end
def has_dependencies?(runtime, dependencies) do
RuntimeServer.has_dependencies?(runtime.server_pid, dependencies)
end
def snippet_definitions(_runtime) do
Livebook.Runtime.Definitions.snippet_definitions()
end
def search_packages(_runtime, send_to, search) do
Livebook.Runtime.Dependencies.search_packages_on_hex(send_to, search)
end
def disable_dependencies_cache(runtime) do
RuntimeServer.disable_dependencies_cache(runtime.server_pid)
end
def put_system_envs(runtime, envs) do
RuntimeServer.put_system_envs(runtime.server_pid, envs)
end
def delete_system_envs(runtime, names) do
RuntimeServer.delete_system_envs(runtime.server_pid, names)
end
def restore_transient_state(runtime, transient_state) do
RuntimeServer.restore_transient_state(runtime.server_pid, transient_state)
end
def register_clients(runtime, clients) do
RuntimeServer.register_clients(runtime.server_pid, clients)
end
def unregister_clients(runtime, client_ids) do
RuntimeServer.unregister_clients(runtime.server_pid, client_ids)
end
def fetch_proxy_handler(runtime) do
RuntimeServer.fetch_proxy_handler(runtime.server_pid)
end
end