mirror of
https://github.com/livebook-dev/livebook.git
synced 2025-09-04 12:04:20 +08:00
Support audio transcription on drag and drop (#2227)
This commit is contained in:
parent
f25d6663d8
commit
f8581c225d
2 changed files with 55 additions and 34 deletions
|
@ -16,8 +16,9 @@ config :logger, :console,
|
|||
# Use Jason for JSON parsing in Phoenix
|
||||
config :phoenix, :json_library, Jason
|
||||
|
||||
# Add mime type to upload notebooks with `Phoenix.LiveView.Upload`
|
||||
# Additional mime types
|
||||
config :mime, :types, %{
|
||||
"audio/m4a" => ["m4a"],
|
||||
"text/plain" => ["livemd"]
|
||||
}
|
||||
|
||||
|
|
|
@ -31,12 +31,12 @@ defmodule Livebook.Runtime.Definitions do
|
|||
|
||||
kino_bumblebee = %{
|
||||
name: "kino_bumblebee",
|
||||
dependency: %{dep: {:kino_bumblebee, "~> 0.3.0"}, config: []}
|
||||
dependency: %{dep: {:kino_bumblebee, github: "livebook-dev/kino_bumblebee"}, config: []}
|
||||
}
|
||||
|
||||
exla = %{
|
||||
name: "exla",
|
||||
dependency: %{dep: {:exla, "~> 0.5.1"}, config: [nx: [default_backend: EXLA.Backend]]}
|
||||
dependency: %{dep: {:exla, ">= 0.0.0"}, config: [nx: [default_backend: EXLA.Backend]]}
|
||||
}
|
||||
|
||||
torchx = %{
|
||||
|
@ -338,40 +338,60 @@ defmodule Livebook.Runtime.Definitions do
|
|||
type: :file_action,
|
||||
file_types: ["audio/*"],
|
||||
description: "Transcribe speech",
|
||||
source: """
|
||||
# To explore more models, see "+ Smart" > "Neural Network task"
|
||||
|
||||
{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
|
||||
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
|
||||
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
|
||||
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
|
||||
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
|
||||
|
||||
#{if windows? do
|
||||
source:
|
||||
"""
|
||||
serving = Bumblebee.Audio.speech_to_text_whisper(model_info, featurizer, tokenizer, generation_config,
|
||||
chunk_num_seconds: 30,
|
||||
timestamps: :segments,
|
||||
compile: [batch_size: 4]
|
||||
)\
|
||||
"""
|
||||
else
|
||||
"""
|
||||
serving =
|
||||
Bumblebee.Audio.speech_to_text_whisper(model_info, featurizer, tokenizer, generation_config,
|
||||
chunk_num_seconds: 30,
|
||||
timestamps: :segments,
|
||||
compile: [batch_size: 4],
|
||||
defn_options: [compiler: EXLA]
|
||||
)\
|
||||
"""
|
||||
end}
|
||||
# To explore more models, see "+ Smart" > "Neural Network task"
|
||||
|
||||
path = Kino.FS.file_path("{{NAME}}")
|
||||
output = Nx.Serving.run(serving, {:file, path})
|
||||
{:ok, model_info} = Bumblebee.load_model({:hf, "openai/whisper-tiny"})
|
||||
{:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"})
|
||||
{:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"})
|
||||
{:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"})
|
||||
generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
|
||||
|
||||
# output.chunks |> Enum.map_join(& &1.text) |> String.trim()\
|
||||
""",
|
||||
""" <>
|
||||
if windows? do
|
||||
"""
|
||||
serving =
|
||||
Bumblebee.Audio.speech_to_text_whisper(model_info, featurizer, tokenizer, generation_config,
|
||||
chunk_num_seconds: 30,
|
||||
timestamps: :segments,
|
||||
stream: true,
|
||||
compile: [batch_size: 4]
|
||||
)
|
||||
"""
|
||||
else
|
||||
"""
|
||||
serving =
|
||||
Bumblebee.Audio.speech_to_text_whisper(model_info, featurizer, tokenizer, generation_config,
|
||||
chunk_num_seconds: 30,
|
||||
timestamps: :segments,
|
||||
stream: true,
|
||||
compile: [batch_size: 4],
|
||||
defn_options: [compiler: EXLA]
|
||||
)
|
||||
"""
|
||||
end <>
|
||||
~S"""
|
||||
|
||||
path = Kino.FS.file_path("{{NAME}}")
|
||||
Kino.render(Kino.Text.new("(Start of transcription)", chunk: true))
|
||||
|
||||
for chunk <- Nx.Serving.run(serving, {:file, path}) do
|
||||
[start_mark, end_mark] =
|
||||
for seconds <- [chunk.start_timestamp_seconds, chunk.end_timestamp_seconds] do
|
||||
seconds
|
||||
|> round()
|
||||
|> Time.from_seconds_after_midnight()
|
||||
|> Time.to_string()
|
||||
end
|
||||
|
||||
text = "\n#{start_mark}-#{end_mark}: #{chunk.text}"
|
||||
Kino.render(Kino.Text.new(text, chunk: true))
|
||||
end
|
||||
|
||||
Kino.render(Kino.Text.new("\n(End of transcription)", chunk: true))
|
||||
:ok
|
||||
""",
|
||||
packages: [kino_bumblebee, nx_backend_package]
|
||||
},
|
||||
%{
|
||||
|
|
Loading…
Add table
Reference in a new issue