mirror of
https://github.com/livebook-dev/livebook.git
synced 2025-09-06 04:54:29 +08:00
Support audio transcription on drag and drop (#2227)
This commit is contained in:
parent
f25d6663d8
commit
f8581c225d
2 changed files with 55 additions and 34 deletions
|
@ -16,8 +16,9 @@ config :logger, :console,
|
||||||
# Use Jason for JSON parsing in Phoenix
|
# Use Jason for JSON parsing in Phoenix
|
||||||
config :phoenix, :json_library, Jason
|
config :phoenix, :json_library, Jason
|
||||||
|
|
||||||
# Add mime type to upload notebooks with `Phoenix.LiveView.Upload`
|
# Additional mime types
|
||||||
config :mime, :types, %{
|
config :mime, :types, %{
|
||||||
|
"audio/m4a" => ["m4a"],
|
||||||
"text/plain" => ["livemd"]
|
"text/plain" => ["livemd"]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,12 +31,12 @@ defmodule Livebook.Runtime.Definitions do
|
||||||
|
|
||||||
kino_bumblebee = %{
|
kino_bumblebee = %{
|
||||||
name: "kino_bumblebee",
|
name: "kino_bumblebee",
|
||||||
dependency: %{dep: {:kino_bumblebee, "~> 0.3.0"}, config: []}
|
dependency: %{dep: {:kino_bumblebee, github: "livebook-dev/kino_bumblebee"}, config: []}
|
||||||
}
|
}
|
||||||
|
|
||||||
exla = %{
|
exla = %{
|
||||||
name: "exla",
|
name: "exla",
|
||||||
dependency: %{dep: {:exla, "~> 0.5.1"}, config: [nx: [default_backend: EXLA.Backend]]}
|
dependency: %{dep: {:exla, ">= 0.0.0"}, config: [nx: [default_backend: EXLA.Backend]]}
|
||||||
}
|
}
|
||||||
|
|
||||||
torchx = %{
|
torchx = %{
|
||||||
|
@ -338,40 +338,60 @@ defmodule Livebook.Runtime.Definitions do
|
||||||
type: :file_action,
|
type: :file_action,
|
||||||
file_types: ["audio/*"],
|
file_types: ["audio/*"],
|
||||||
description: "Transcribe speech",
|
description: "Transcribe speech",
|
||||||
source: """
|
source:
|
||||||
# To explore more models, see "+ Smart" > "Neural Network task"
|
|
||||||
|
|
||||||
{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
|
|
||||||
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
|
|
||||||
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
|
|
||||||
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
|
|
||||||
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
|
|
||||||
|
|
||||||
#{if windows? do
|
|
||||||
"""
|
"""
|
||||||
serving = Bumblebee.Audio.speech_to_text_whisper(model_info, featurizer, tokenizer, generation_config,
|
# To explore more models, see "+ Smart" > "Neural Network task"
|
||||||
chunk_num_seconds: 30,
|
|
||||||
timestamps: :segments,
|
|
||||||
compile: [batch_size: 4]
|
|
||||||
)\
|
|
||||||
"""
|
|
||||||
else
|
|
||||||
"""
|
|
||||||
serving =
|
|
||||||
Bumblebee.Audio.speech_to_text_whisper(model_info, featurizer, tokenizer, generation_config,
|
|
||||||
chunk_num_seconds: 30,
|
|
||||||
timestamps: :segments,
|
|
||||||
compile: [batch_size: 4],
|
|
||||||
defn_options: [compiler: EXLA]
|
|
||||||
)\
|
|
||||||
"""
|
|
||||||
end}
|
|
||||||
|
|
||||||
path = Kino.FS.file_path("{{NAME}}")
|
{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
|
||||||
output = Nx.Serving.run(serving, {:file, path})
|
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
|
||||||
|
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
|
||||||
|
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
|
||||||
|
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
|
||||||
|
|
||||||
# output.chunks |> Enum.map_join(& &1.text) |> String.trim()\
|
""" <>
|
||||||
""",
|
if windows? do
|
||||||
|
"""
|
||||||
|
serving =
|
||||||
|
Bumblebee.Audio.speech_to_text_whisper(model_info, featurizer, tokenizer, generation_config,
|
||||||
|
chunk_num_seconds: 30,
|
||||||
|
timestamps: :segments,
|
||||||
|
stream: true,
|
||||||
|
compile: [batch_size: 4]
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
else
|
||||||
|
"""
|
||||||
|
serving =
|
||||||
|
Bumblebee.Audio.speech_to_text_whisper(model_info, featurizer, tokenizer, generation_config,
|
||||||
|
chunk_num_seconds: 30,
|
||||||
|
timestamps: :segments,
|
||||||
|
stream: true,
|
||||||
|
compile: [batch_size: 4],
|
||||||
|
defn_options: [compiler: EXLA]
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
end <>
|
||||||
|
~S"""
|
||||||
|
|
||||||
|
path = Kino.FS.file_path("{{NAME}}")
|
||||||
|
Kino.render(Kino.Text.new("(Start of transcription)", chunk: true))
|
||||||
|
|
||||||
|
for chunk <- Nx.Serving.run(serving, {:file, path}) do
|
||||||
|
[start_mark, end_mark] =
|
||||||
|
for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
|
||||||
|
seconds
|
||||||
|
|> round()
|
||||||
|
|> Time.from_seconds_after_midnight()
|
||||||
|
|> Time.to_string()
|
||||||
|
end
|
||||||
|
|
||||||
|
text = "\n#{start_mark}-#{end_mark}: #{chunk.text}"
|
||||||
|
Kino.render(Kino.Text.new(text, chunk: true))
|
||||||
|
end
|
||||||
|
|
||||||
|
Kino.render(Kino.Text.new("\n(End of transcription)", chunk: true))
|
||||||
|
:ok
|
||||||
|
""",
|
||||||
packages: [kino_bumblebee, nx_backend_package]
|
packages: [kino_bumblebee, nx_backend_package]
|
||||||
},
|
},
|
||||||
%{
|
%{
|
||||||
|
|
Loading…
Add table
Reference in a new issue