found an example that just works
This commit is contained in:
283
candle-wasm-examples-whisper/src/app.rs
Normal file
283
candle-wasm-examples-whisper/src/app.rs
Normal file
@@ -0,0 +1,283 @@
|
||||
use crate::console_log;
|
||||
use crate::worker::{ModelData, Segment, Worker, WorkerInput, WorkerOutput};
|
||||
use js_sys::Date;
|
||||
use wasm_bindgen::prelude::*;
|
||||
use wasm_bindgen_futures::JsFuture;
|
||||
use yew::{html, Component, Context, Html};
|
||||
use yew_agent::{Bridge, Bridged};
|
||||
|
||||
const SAMPLE_NAMES: [&str; 6] = [
|
||||
"audios/samples_jfk.wav",
|
||||
"audios/samples_a13.wav",
|
||||
"audios/samples_gb0.wav",
|
||||
"audios/samples_gb1.wav",
|
||||
"audios/samples_hp0.wav",
|
||||
"audios/samples_mm0.wav",
|
||||
];
|
||||
|
||||
async fn fetch_url(url: &str) -> Result<Vec<u8>, JsValue> {
|
||||
use web_sys::{Request, RequestCache, RequestInit, RequestMode, Response};
|
||||
let window = web_sys::window().ok_or("window")?;
|
||||
let mut opts = RequestInit::new();
|
||||
let opts = opts
|
||||
.method("GET")
|
||||
.mode(RequestMode::Cors)
|
||||
.cache(RequestCache::NoCache);
|
||||
|
||||
let request = Request::new_with_str_and_init(url, opts)?;
|
||||
|
||||
let resp_value = JsFuture::from(window.fetch_with_request(&request)).await?;
|
||||
|
||||
// `resp_value` is a `Response` object.
|
||||
assert!(resp_value.is_instance_of::<Response>());
|
||||
let resp: Response = resp_value.dyn_into()?;
|
||||
let data = JsFuture::from(resp.blob()?).await?;
|
||||
let blob = web_sys::Blob::from(data);
|
||||
let array_buffer = JsFuture::from(blob.array_buffer()).await?;
|
||||
let data = js_sys::Uint8Array::new(&array_buffer).to_vec();
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
pub enum Msg {
|
||||
Run(usize),
|
||||
UpdateStatus(String),
|
||||
SetDecoder(ModelData),
|
||||
WorkerInMsg(WorkerInput),
|
||||
WorkerOutMsg(Result<WorkerOutput, String>),
|
||||
}
|
||||
|
||||
pub struct CurrentDecode {
|
||||
start_time: Option<f64>,
|
||||
}
|
||||
|
||||
pub struct App {
|
||||
status: String,
|
||||
loaded: bool,
|
||||
segments: Vec<Segment>,
|
||||
current_decode: Option<CurrentDecode>,
|
||||
worker: Box<dyn Bridge<Worker>>,
|
||||
}
|
||||
|
||||
async fn model_data_load() -> Result<ModelData, JsValue> {
|
||||
let quantized = false;
|
||||
let is_multilingual = false;
|
||||
|
||||
let (tokenizer, mel_filters, weights, config) = if quantized {
|
||||
console_log!("loading quantized weights");
|
||||
let tokenizer = fetch_url("quantized/tokenizer-tiny-en.json").await?;
|
||||
let mel_filters = fetch_url("mel_filters.safetensors").await?;
|
||||
let weights = fetch_url("quantized/model-tiny-en-q80.gguf").await?;
|
||||
let config = fetch_url("quantized/config-tiny-en.json").await?;
|
||||
(tokenizer, mel_filters, weights, config)
|
||||
} else {
|
||||
console_log!("loading float weights");
|
||||
if is_multilingual {
|
||||
let mel_filters = fetch_url("mel_filters.safetensors").await?;
|
||||
let tokenizer = fetch_url("whisper-tiny/tokenizer.json").await?;
|
||||
let weights = fetch_url("whisper-tiny/model.safetensors").await?;
|
||||
let config = fetch_url("whisper-tiny/config.json").await?;
|
||||
(tokenizer, mel_filters, weights, config)
|
||||
} else {
|
||||
let mel_filters = fetch_url("mel_filters.safetensors").await?;
|
||||
let tokenizer = fetch_url("whisper-tiny.en/tokenizer.json").await?;
|
||||
let weights = fetch_url("whisper-tiny.en/model.safetensors").await?;
|
||||
let config = fetch_url("whisper-tiny.en/config.json").await?;
|
||||
(tokenizer, mel_filters, weights, config)
|
||||
}
|
||||
};
|
||||
|
||||
let timestamps = true;
|
||||
let _task = Some("transcribe".to_string());
|
||||
console_log!("{}", weights.len());
|
||||
Ok(ModelData {
|
||||
tokenizer,
|
||||
mel_filters,
|
||||
weights,
|
||||
config,
|
||||
quantized,
|
||||
timestamps,
|
||||
task: None,
|
||||
is_multilingual,
|
||||
language: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn performance_now() -> Option<f64> {
|
||||
let window = web_sys::window()?;
|
||||
let performance = window.performance()?;
|
||||
Some(performance.now() / 1000.)
|
||||
}
|
||||
|
||||
impl Component for App {
|
||||
type Message = Msg;
|
||||
type Properties = ();
|
||||
|
||||
fn create(ctx: &Context<Self>) -> Self {
|
||||
let status = "loading weights".to_string();
|
||||
let cb = {
|
||||
let link = ctx.link().clone();
|
||||
move |e| link.send_message(Self::Message::WorkerOutMsg(e))
|
||||
};
|
||||
let worker = Worker::bridge(std::rc::Rc::new(cb));
|
||||
Self {
|
||||
status,
|
||||
segments: vec![],
|
||||
current_decode: None,
|
||||
worker,
|
||||
loaded: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn rendered(&mut self, ctx: &Context<Self>, first_render: bool) {
|
||||
if first_render {
|
||||
ctx.link().send_future(async {
|
||||
match model_data_load().await {
|
||||
Err(err) => {
|
||||
let status = format!("{err:?}");
|
||||
Msg::UpdateStatus(status)
|
||||
}
|
||||
Ok(model_data) => Msg::SetDecoder(model_data),
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, ctx: &Context<Self>, msg: Self::Message) -> bool {
|
||||
match msg {
|
||||
Msg::SetDecoder(md) => {
|
||||
self.status = "weights loaded successfully!".to_string();
|
||||
self.loaded = true;
|
||||
console_log!("loaded weights");
|
||||
self.worker.send(WorkerInput::ModelData(md));
|
||||
true
|
||||
}
|
||||
Msg::Run(sample_index) => {
|
||||
let sample = SAMPLE_NAMES[sample_index];
|
||||
if self.current_decode.is_some() {
|
||||
self.status = "already decoding some sample at the moment".to_string()
|
||||
} else {
|
||||
let start_time = performance_now();
|
||||
self.current_decode = Some(CurrentDecode { start_time });
|
||||
self.status = format!("decoding {sample}");
|
||||
self.segments.clear();
|
||||
ctx.link().send_future(async move {
|
||||
match fetch_url(sample).await {
|
||||
Err(err) => {
|
||||
let output = Err(format!("decoding error: {err:?}"));
|
||||
// Mimic a worker output to so as to release current_decode
|
||||
Msg::WorkerOutMsg(output)
|
||||
}
|
||||
Ok(wav_bytes) => {
|
||||
Msg::WorkerInMsg(WorkerInput::DecodeTask { wav_bytes })
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
//
|
||||
true
|
||||
}
|
||||
Msg::WorkerOutMsg(output) => {
|
||||
let dt = self.current_decode.as_ref().and_then(|current_decode| {
|
||||
current_decode.start_time.and_then(|start_time| {
|
||||
performance_now().map(|stop_time| stop_time - start_time)
|
||||
})
|
||||
});
|
||||
self.current_decode = None;
|
||||
match output {
|
||||
Ok(WorkerOutput::WeightsLoaded) => self.status = "weights loaded!".to_string(),
|
||||
Ok(WorkerOutput::Decoded(segments)) => {
|
||||
self.status = match dt {
|
||||
None => "decoding succeeded!".to_string(),
|
||||
Some(dt) => format!("decoding succeeded in {:.2}s", dt),
|
||||
};
|
||||
self.segments = segments;
|
||||
}
|
||||
Err(err) => {
|
||||
self.status = format!("decoding error {err:?}");
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
Msg::WorkerInMsg(inp) => {
|
||||
self.worker.send(inp);
|
||||
true
|
||||
}
|
||||
Msg::UpdateStatus(status) => {
|
||||
self.status = status;
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn view(&self, ctx: &Context<Self>) -> Html {
|
||||
html! {
|
||||
<div>
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>{"Sample"}</th>
|
||||
<th></th>
|
||||
<th></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{
|
||||
SAMPLE_NAMES.iter().enumerate().map(|(i, name)| { html! {
|
||||
<tr>
|
||||
<th>{name}</th>
|
||||
<th><audio controls=true src={format!("./{name}")}></audio></th>
|
||||
{ if self.loaded {
|
||||
html!(<th><button class="button" onclick={ctx.link().callback(move |_| Msg::Run(i))}> { "run" }</button></th>)
|
||||
}else{html!()}
|
||||
}
|
||||
</tr>
|
||||
}
|
||||
}).collect::<Html>()
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
<h2>
|
||||
{&self.status}
|
||||
</h2>
|
||||
{
|
||||
if !self.loaded{
|
||||
html! { <progress id="progress-bar" aria-label="loading weights…"></progress> }
|
||||
} else if self.current_decode.is_some() {
|
||||
html! { <progress id="progress-bar" aria-label="decoding…"></progress> }
|
||||
} else { html!{
|
||||
<blockquote>
|
||||
<p>
|
||||
{
|
||||
self.segments.iter().map(|segment| { html! {
|
||||
<>
|
||||
<i>
|
||||
{
|
||||
format!("{:.2}s-{:.2}s: (avg-logprob: {:.4}, no-speech-prob: {:.4})",
|
||||
segment.start,
|
||||
segment.start + segment.duration,
|
||||
segment.dr.avg_logprob,
|
||||
segment.dr.no_speech_prob,
|
||||
)
|
||||
}
|
||||
</i>
|
||||
<br/ >
|
||||
{&segment.dr.text}
|
||||
<br/ >
|
||||
</>
|
||||
} }).collect::<Html>()
|
||||
}
|
||||
</p>
|
||||
</blockquote>
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Display the current date and time the page was rendered
|
||||
<p class="footer">
|
||||
{ "Rendered: " }
|
||||
{ String::from(Date::new_0().to_string()) }
|
||||
</p>
|
||||
</div>
|
||||
}
|
||||
}
|
||||
}
|
||||
215
candle-wasm-examples-whisper/src/audio.rs
Normal file
215
candle-wasm-examples-whisper/src/audio.rs
Normal file
@@ -0,0 +1,215 @@
|
||||
// Audio processing code, adapted from whisper.cpp
|
||||
// https://github.com/ggerganov/whisper.cpp
|
||||
use super::worker;
|
||||
|
||||
pub trait Float: num_traits::Float + num_traits::FloatConst + num_traits::NumAssign {}
|
||||
|
||||
impl Float for f32 {}
|
||||
impl Float for f64 {}
|
||||
|
||||
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2357
|
||||
fn fft<T: Float>(inp: &[T]) -> Vec<T> {
|
||||
let n = inp.len();
|
||||
let zero = T::zero();
|
||||
if n == 1 {
|
||||
return vec![inp[0], zero];
|
||||
}
|
||||
if n % 2 == 1 {
|
||||
return dft(inp);
|
||||
}
|
||||
let mut out = vec![zero; n * 2];
|
||||
|
||||
let mut even = Vec::with_capacity(n / 2);
|
||||
let mut odd = Vec::with_capacity(n / 2);
|
||||
|
||||
for (i, &inp) in inp.iter().enumerate() {
|
||||
if i % 2 == 0 {
|
||||
even.push(inp)
|
||||
} else {
|
||||
odd.push(inp);
|
||||
}
|
||||
}
|
||||
|
||||
let even_fft = fft(&even);
|
||||
let odd_fft = fft(&odd);
|
||||
|
||||
let two_pi = T::PI() + T::PI();
|
||||
let n_t = T::from(n).unwrap();
|
||||
for k in 0..n / 2 {
|
||||
let k_t = T::from(k).unwrap();
|
||||
let theta = two_pi * k_t / n_t;
|
||||
let re = theta.cos();
|
||||
let im = -theta.sin();
|
||||
|
||||
let re_odd = odd_fft[2 * k];
|
||||
let im_odd = odd_fft[2 * k + 1];
|
||||
|
||||
out[2 * k] = even_fft[2 * k] + re * re_odd - im * im_odd;
|
||||
out[2 * k + 1] = even_fft[2 * k + 1] + re * im_odd + im * re_odd;
|
||||
|
||||
out[2 * (k + n / 2)] = even_fft[2 * k] - re * re_odd + im * im_odd;
|
||||
out[2 * (k + n / 2) + 1] = even_fft[2 * k + 1] - re * im_odd - im * re_odd;
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2337
|
||||
fn dft<T: Float>(inp: &[T]) -> Vec<T> {
|
||||
let zero = T::zero();
|
||||
let n = inp.len();
|
||||
let two_pi = T::PI() + T::PI();
|
||||
|
||||
let mut out = Vec::with_capacity(2 * n);
|
||||
let n_t = T::from(n).unwrap();
|
||||
for k in 0..n {
|
||||
let k_t = T::from(k).unwrap();
|
||||
let mut re = zero;
|
||||
let mut im = zero;
|
||||
|
||||
for (j, &inp) in inp.iter().enumerate() {
|
||||
let j_t = T::from(j).unwrap();
|
||||
let angle = two_pi * k_t * j_t / n_t;
|
||||
re += inp * angle.cos();
|
||||
im -= inp * angle.sin();
|
||||
}
|
||||
|
||||
out.push(re);
|
||||
out.push(im);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2414
|
||||
fn log_mel_spectrogram_w<T: Float>(
|
||||
ith: usize,
|
||||
hann: &[T],
|
||||
samples: &[T],
|
||||
filters: &[T],
|
||||
fft_size: usize,
|
||||
fft_step: usize,
|
||||
speed_up: bool,
|
||||
n_len: usize,
|
||||
n_mel: usize,
|
||||
n_threads: usize,
|
||||
) -> Vec<T> {
|
||||
let n_fft = if speed_up {
|
||||
1 + fft_size / 4
|
||||
} else {
|
||||
1 + fft_size / 2
|
||||
};
|
||||
|
||||
let zero = T::zero();
|
||||
let half = T::from(0.5).unwrap();
|
||||
let mut fft_in = vec![zero; fft_size];
|
||||
let mut mel = vec![zero; n_len * n_mel];
|
||||
|
||||
for i in (ith..n_len).step_by(n_threads) {
|
||||
let offset = i * fft_step;
|
||||
|
||||
// apply Hanning window
|
||||
for j in 0..fft_size {
|
||||
fft_in[j] = if offset + j < samples.len() {
|
||||
hann[j] * samples[offset + j]
|
||||
} else {
|
||||
zero
|
||||
}
|
||||
}
|
||||
|
||||
// FFT -> mag^2
|
||||
let mut fft_out: Vec<T> = fft(&fft_in);
|
||||
|
||||
for j in 0..fft_size {
|
||||
fft_out[j] = fft_out[2 * j] * fft_out[2 * j] + fft_out[2 * j + 1] * fft_out[2 * j + 1];
|
||||
}
|
||||
for j in 1..fft_size / 2 {
|
||||
let v = fft_out[fft_size - j];
|
||||
fft_out[j] += v;
|
||||
}
|
||||
|
||||
if speed_up {
|
||||
// scale down in the frequency domain results in a speed up in the time domain
|
||||
for j in 0..n_fft {
|
||||
fft_out[j] = half * (fft_out[2 * j] + fft_out[2 * j + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
// mel spectrogram
|
||||
for j in 0..n_mel {
|
||||
let mut sum = zero;
|
||||
for k in 0..n_fft {
|
||||
sum += fft_out[k] * filters[j * n_fft + k];
|
||||
}
|
||||
mel[j * n_len + i] = T::max(sum, T::from(1e-10).unwrap()).log10();
|
||||
}
|
||||
}
|
||||
mel
|
||||
}
|
||||
|
||||
fn log_mel_spectrogram_<T: Float + std::fmt::Display>(
|
||||
samples: &[T],
|
||||
filters: &[T],
|
||||
fft_size: usize,
|
||||
fft_step: usize,
|
||||
n_mel: usize,
|
||||
speed_up: bool,
|
||||
) -> Vec<T> {
|
||||
let zero = T::zero();
|
||||
let two_pi = T::PI() + T::PI();
|
||||
let half = T::from(0.5).unwrap();
|
||||
let one = T::from(1.0).unwrap();
|
||||
let four = T::from(4.0).unwrap();
|
||||
let fft_size_t = T::from(fft_size).unwrap();
|
||||
|
||||
let hann: Vec<T> = (0..fft_size)
|
||||
.map(|i| half * (one - ((two_pi * T::from(i).unwrap()) / fft_size_t).cos()))
|
||||
.collect();
|
||||
let n_len = samples.len() / fft_step;
|
||||
|
||||
// pad audio with at least one extra chunk of zeros
|
||||
let pad = 100 * worker::m::CHUNK_LENGTH / 2;
|
||||
let n_len = if n_len % pad != 0 {
|
||||
(n_len / pad + 1) * pad
|
||||
} else {
|
||||
n_len
|
||||
};
|
||||
let n_len = n_len + pad;
|
||||
let samples = {
|
||||
let mut samples_padded = samples.to_vec();
|
||||
let to_add = n_len * fft_step - samples.len();
|
||||
samples_padded.extend(std::iter::repeat(zero).take(to_add));
|
||||
samples_padded
|
||||
};
|
||||
|
||||
// Use a single thread for now.
|
||||
let mut mel = log_mel_spectrogram_w(
|
||||
0, &hann, &samples, filters, fft_size, fft_step, speed_up, n_len, n_mel, 1,
|
||||
);
|
||||
let mmax = mel
|
||||
.iter()
|
||||
.max_by(|&u, &v| u.partial_cmp(v).unwrap_or(std::cmp::Ordering::Greater))
|
||||
.copied()
|
||||
.unwrap_or(zero)
|
||||
- T::from(8).unwrap();
|
||||
for m in mel.iter_mut() {
|
||||
let v = T::max(*m, mmax);
|
||||
*m = v / four + one
|
||||
}
|
||||
mel
|
||||
}
|
||||
|
||||
pub fn pcm_to_mel<T: Float + std::fmt::Display>(
|
||||
cfg: &worker::m::Config,
|
||||
samples: &[T],
|
||||
filters: &[T],
|
||||
) -> anyhow::Result<Vec<T>> {
|
||||
let mel = log_mel_spectrogram_(
|
||||
samples,
|
||||
filters,
|
||||
worker::m::N_FFT,
|
||||
worker::m::HOP_LENGTH,
|
||||
cfg.num_mel_bins,
|
||||
false,
|
||||
);
|
||||
Ok(mel)
|
||||
}
|
||||
4
candle-wasm-examples-whisper/src/bin/app.rs
Normal file
4
candle-wasm-examples-whisper/src/bin/app.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
fn main() {
|
||||
wasm_logger::init(wasm_logger::Config::new(log::Level::Trace));
|
||||
yew::Renderer::<candle_wasm_example_whisper::App>::new().render();
|
||||
}
|
||||
53
candle-wasm-examples-whisper/src/bin/m.rs
Normal file
53
candle-wasm-examples-whisper/src/bin/m.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
use candle_wasm_example_whisper::worker::{Decoder as D, ModelData};
|
||||
use wasm_bindgen::prelude::*;
|
||||
|
||||
#[wasm_bindgen]
|
||||
pub struct Decoder {
|
||||
decoder: D,
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
impl Decoder {
|
||||
#[wasm_bindgen(constructor)]
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
weights: Vec<u8>,
|
||||
tokenizer: Vec<u8>,
|
||||
mel_filters: Vec<u8>,
|
||||
config: Vec<u8>,
|
||||
quantized: bool,
|
||||
is_multilingual: bool,
|
||||
timestamps: bool,
|
||||
task: Option<String>,
|
||||
language: Option<String>,
|
||||
) -> Result<Decoder, JsError> {
|
||||
let decoder = D::load(ModelData {
|
||||
tokenizer,
|
||||
mel_filters,
|
||||
config,
|
||||
quantized,
|
||||
weights,
|
||||
is_multilingual,
|
||||
timestamps,
|
||||
task,
|
||||
language,
|
||||
});
|
||||
|
||||
match decoder {
|
||||
Ok(decoder) => Ok(Self { decoder }),
|
||||
Err(e) => Err(JsError::new(&e.to_string())),
|
||||
}
|
||||
}
|
||||
|
||||
#[wasm_bindgen]
|
||||
pub fn decode(&mut self, wav_input: Vec<u8>) -> Result<String, JsError> {
|
||||
let segments = self
|
||||
.decoder
|
||||
.convert_and_run(&wav_input)
|
||||
.map_err(|e| JsError::new(&e.to_string()))?;
|
||||
let json = serde_json::to_string(&segments)?;
|
||||
Ok(json)
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {}
|
||||
4
candle-wasm-examples-whisper/src/bin/worker.rs
Normal file
4
candle-wasm-examples-whisper/src/bin/worker.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
use yew_agent::PublicWorker;
|
||||
fn main() {
|
||||
candle_wasm_example_whisper::Worker::register();
|
||||
}
|
||||
101
candle-wasm-examples-whisper/src/languages.rs
Normal file
101
candle-wasm-examples-whisper/src/languages.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
pub const LANGUAGES: [(&str, &str); 99] = [
|
||||
("en", "english"),
|
||||
("zh", "chinese"),
|
||||
("de", "german"),
|
||||
("es", "spanish"),
|
||||
("ru", "russian"),
|
||||
("ko", "korean"),
|
||||
("fr", "french"),
|
||||
("ja", "japanese"),
|
||||
("pt", "portuguese"),
|
||||
("tr", "turkish"),
|
||||
("pl", "polish"),
|
||||
("ca", "catalan"),
|
||||
("nl", "dutch"),
|
||||
("ar", "arabic"),
|
||||
("sv", "swedish"),
|
||||
("it", "italian"),
|
||||
("id", "indonesian"),
|
||||
("hi", "hindi"),
|
||||
("fi", "finnish"),
|
||||
("vi", "vietnamese"),
|
||||
("he", "hebrew"),
|
||||
("uk", "ukrainian"),
|
||||
("el", "greek"),
|
||||
("ms", "malay"),
|
||||
("cs", "czech"),
|
||||
("ro", "romanian"),
|
||||
("da", "danish"),
|
||||
("hu", "hungarian"),
|
||||
("ta", "tamil"),
|
||||
("no", "norwegian"),
|
||||
("th", "thai"),
|
||||
("ur", "urdu"),
|
||||
("hr", "croatian"),
|
||||
("bg", "bulgarian"),
|
||||
("lt", "lithuanian"),
|
||||
("la", "latin"),
|
||||
("mi", "maori"),
|
||||
("ml", "malayalam"),
|
||||
("cy", "welsh"),
|
||||
("sk", "slovak"),
|
||||
("te", "telugu"),
|
||||
("fa", "persian"),
|
||||
("lv", "latvian"),
|
||||
("bn", "bengali"),
|
||||
("sr", "serbian"),
|
||||
("az", "azerbaijani"),
|
||||
("sl", "slovenian"),
|
||||
("kn", "kannada"),
|
||||
("et", "estonian"),
|
||||
("mk", "macedonian"),
|
||||
("br", "breton"),
|
||||
("eu", "basque"),
|
||||
("is", "icelandic"),
|
||||
("hy", "armenian"),
|
||||
("ne", "nepali"),
|
||||
("mn", "mongolian"),
|
||||
("bs", "bosnian"),
|
||||
("kk", "kazakh"),
|
||||
("sq", "albanian"),
|
||||
("sw", "swahili"),
|
||||
("gl", "galician"),
|
||||
("mr", "marathi"),
|
||||
("pa", "punjabi"),
|
||||
("si", "sinhala"),
|
||||
("km", "khmer"),
|
||||
("sn", "shona"),
|
||||
("yo", "yoruba"),
|
||||
("so", "somali"),
|
||||
("af", "afrikaans"),
|
||||
("oc", "occitan"),
|
||||
("ka", "georgian"),
|
||||
("be", "belarusian"),
|
||||
("tg", "tajik"),
|
||||
("sd", "sindhi"),
|
||||
("gu", "gujarati"),
|
||||
("am", "amharic"),
|
||||
("yi", "yiddish"),
|
||||
("lo", "lao"),
|
||||
("uz", "uzbek"),
|
||||
("fo", "faroese"),
|
||||
("ht", "haitian creole"),
|
||||
("ps", "pashto"),
|
||||
("tk", "turkmen"),
|
||||
("nn", "nynorsk"),
|
||||
("mt", "maltese"),
|
||||
("sa", "sanskrit"),
|
||||
("lb", "luxembourgish"),
|
||||
("my", "myanmar"),
|
||||
("bo", "tibetan"),
|
||||
("tl", "tagalog"),
|
||||
("mg", "malagasy"),
|
||||
("as", "assamese"),
|
||||
("tt", "tatar"),
|
||||
("haw", "hawaiian"),
|
||||
("ln", "lingala"),
|
||||
("ha", "hausa"),
|
||||
("ba", "bashkir"),
|
||||
("jw", "javanese"),
|
||||
("su", "sundanese"),
|
||||
];
|
||||
29
candle-wasm-examples-whisper/src/lib.rs
Normal file
29
candle-wasm-examples-whisper/src/lib.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
pub const WITH_TIMER: bool = true;
|
||||
|
||||
struct Timer {
|
||||
label: &'static str,
|
||||
}
|
||||
|
||||
// impl Timer {
|
||||
// fn new(label: &'static str) -> Self {
|
||||
// if WITH_TIMER {
|
||||
// web_sys::console::time_with_label(label);
|
||||
// }
|
||||
// Self { label }
|
||||
// }
|
||||
// }
|
||||
|
||||
impl Drop for Timer {
|
||||
fn drop(&mut self) {
|
||||
if WITH_TIMER {
|
||||
web_sys::console::time_end_with_label(self.label)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod app;
|
||||
mod audio;
|
||||
pub mod languages;
|
||||
pub mod worker;
|
||||
pub use app::App;
|
||||
pub use worker::Worker;
|
||||
492
candle-wasm-examples-whisper/src/worker.rs
Normal file
492
candle-wasm-examples-whisper/src/worker.rs
Normal file
@@ -0,0 +1,492 @@
|
||||
use crate::languages::LANGUAGES;
|
||||
use anyhow::Error as E;
|
||||
use candle::{safetensors::Load, DType, Device, IndexOp, Tensor, D};
|
||||
use candle_nn::{ops::softmax, VarBuilder};
|
||||
pub use candle_transformers::models::whisper::{self as m, Config};
|
||||
use rand::{distributions::Distribution, rngs::StdRng, SeedableRng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokenizers::Tokenizer;
|
||||
use wasm_bindgen::prelude::*;
|
||||
use yew_agent::{HandlerId, Public, WorkerLink};
|
||||
|
||||
#[wasm_bindgen]
|
||||
extern "C" {
|
||||
// Use `js_namespace` here to bind `console.log(..)` instead of just
|
||||
// `log(..)`
|
||||
#[wasm_bindgen(js_namespace = console)]
|
||||
pub fn log(s: &str);
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! console_log {
|
||||
// Note that this is using the `log` function imported above during
|
||||
// `bare_bones`
|
||||
($($t:tt)*) => ($crate::worker::log(&format_args!($($t)*).to_string()))
|
||||
}
|
||||
|
||||
pub const DTYPE: DType = DType::F32;
|
||||
|
||||
pub enum Model {
|
||||
Normal(m::model::Whisper),
|
||||
Quantized(m::quantized_model::Whisper),
|
||||
}
|
||||
|
||||
// Maybe we should use some traits rather than doing the dispatch for all these.
|
||||
impl Model {
|
||||
pub fn config(&self) -> &Config {
|
||||
match self {
|
||||
Self::Normal(m) => &m.config,
|
||||
Self::Quantized(m) => &m.config,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encoder_forward(&mut self, x: &Tensor, flush: bool) -> candle::Result<Tensor> {
|
||||
match self {
|
||||
Self::Normal(m) => m.encoder.forward(x, flush),
|
||||
Self::Quantized(m) => m.encoder.forward(x, flush),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decoder_forward(
|
||||
&mut self,
|
||||
x: &Tensor,
|
||||
xa: &Tensor,
|
||||
flush: bool,
|
||||
) -> candle::Result<Tensor> {
|
||||
match self {
|
||||
Self::Normal(m) => m.decoder.forward(x, xa, flush),
|
||||
Self::Quantized(m) => m.decoder.forward(x, xa, flush),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decoder_final_linear(&self, x: &Tensor) -> candle::Result<Tensor> {
|
||||
match self {
|
||||
Self::Normal(m) => m.decoder.final_linear(x),
|
||||
Self::Quantized(m) => m.decoder.final_linear(x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DecodingResult {
|
||||
pub tokens: Vec<u32>,
|
||||
pub text: String,
|
||||
pub avg_logprob: f64,
|
||||
pub no_speech_prob: f64,
|
||||
temperature: f64,
|
||||
compression_ratio: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Segment {
|
||||
pub start: f64,
|
||||
pub duration: f64,
|
||||
pub dr: DecodingResult,
|
||||
}
|
||||
|
||||
pub struct Decoder {
|
||||
model: Model,
|
||||
rng: rand::rngs::StdRng,
|
||||
task: Option<Task>,
|
||||
language: Option<String>,
|
||||
is_multilingual: bool,
|
||||
mel_filters: Vec<f32>,
|
||||
timestamps: bool,
|
||||
tokenizer: Tokenizer,
|
||||
suppress_tokens: Tensor,
|
||||
sot_token: u32,
|
||||
transcribe_token: u32,
|
||||
translate_token: u32,
|
||||
eot_token: u32,
|
||||
no_speech_token: u32,
|
||||
no_timestamps_token: u32,
|
||||
}
|
||||
|
||||
impl Decoder {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn new(
|
||||
model: Model,
|
||||
tokenizer: Tokenizer,
|
||||
mel_filters: Vec<f32>,
|
||||
device: &Device,
|
||||
task: Option<Task>,
|
||||
language: Option<String>,
|
||||
is_multilingual: bool,
|
||||
timestamps: bool,
|
||||
) -> anyhow::Result<Self> {
|
||||
let suppress_tokens: Vec<f32> = (0..model.config().vocab_size as u32)
|
||||
.map(|i| {
|
||||
if model.config().suppress_tokens.contains(&i) {
|
||||
f32::NEG_INFINITY
|
||||
} else {
|
||||
0f32
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let no_timestamps_token = token_id(&tokenizer, m::NO_TIMESTAMPS_TOKEN)?;
|
||||
let suppress_tokens = Tensor::new(suppress_tokens.as_slice(), device)?;
|
||||
let sot_token = token_id(&tokenizer, m::SOT_TOKEN)?;
|
||||
let transcribe_token = token_id(&tokenizer, m::TRANSCRIBE_TOKEN)?;
|
||||
let translate_token = token_id(&tokenizer, m::TRANSLATE_TOKEN)?;
|
||||
let eot_token = token_id(&tokenizer, m::EOT_TOKEN)?;
|
||||
let no_speech_token = m::NO_SPEECH_TOKENS
|
||||
.iter()
|
||||
.find_map(|token| token_id(&tokenizer, token).ok());
|
||||
let no_speech_token = match no_speech_token {
|
||||
None => anyhow::bail!("unable to find any non-speech token"),
|
||||
Some(n) => n,
|
||||
};
|
||||
let seed = 299792458;
|
||||
Ok(Self {
|
||||
model,
|
||||
rng: StdRng::seed_from_u64(seed),
|
||||
tokenizer,
|
||||
mel_filters,
|
||||
task,
|
||||
timestamps,
|
||||
language,
|
||||
is_multilingual,
|
||||
suppress_tokens,
|
||||
sot_token,
|
||||
transcribe_token,
|
||||
translate_token,
|
||||
eot_token,
|
||||
no_speech_token,
|
||||
no_timestamps_token,
|
||||
})
|
||||
}
|
||||
|
||||
fn decode(&mut self, mel: &Tensor, t: f64) -> anyhow::Result<DecodingResult> {
|
||||
let model = &mut self.model;
|
||||
let language_token = match (self.is_multilingual, &self.language) {
|
||||
(true, None) => Some(detect_language(model, &self.tokenizer, mel)?),
|
||||
(false, None) => None,
|
||||
(true, Some(language)) => {
|
||||
match token_id(&self.tokenizer, &format!("<|{:?}|>", self.language)) {
|
||||
Ok(token_id) => Some(token_id),
|
||||
Err(_) => anyhow::bail!("language {language} is not supported"),
|
||||
}
|
||||
}
|
||||
(false, Some(_)) => {
|
||||
anyhow::bail!("a language cannot be set for non-multilingual models")
|
||||
}
|
||||
};
|
||||
|
||||
let audio_features = model.encoder_forward(mel, true)?;
|
||||
println!("audio features: {:?}", audio_features.dims());
|
||||
let sample_len = model.config().max_target_positions / 2;
|
||||
let mut sum_logprob = 0f64;
|
||||
let mut no_speech_prob = f64::NAN;
|
||||
let mut tokens = vec![self.sot_token];
|
||||
if let Some(language_token) = language_token {
|
||||
tokens.push(language_token);
|
||||
}
|
||||
match self.task {
|
||||
None | Some(Task::Transcribe) => tokens.push(self.transcribe_token),
|
||||
Some(Task::Translate) => tokens.push(self.translate_token),
|
||||
}
|
||||
if !self.timestamps {
|
||||
tokens.push(self.no_timestamps_token);
|
||||
}
|
||||
for i in 0..sample_len {
|
||||
let tokens_t = Tensor::new(tokens.as_slice(), mel.device())?;
|
||||
|
||||
// The model expects a batch dim but this inference loop does not handle
|
||||
// it so we add it at this point.
|
||||
let tokens_t = tokens_t.unsqueeze(0)?;
|
||||
let ys = model.decoder_forward(&tokens_t, &audio_features, i == 0)?;
|
||||
|
||||
// Extract the no speech probability on the first iteration by looking at the first
|
||||
// token logits and the probability for the according token.
|
||||
if i == 0 {
|
||||
let logits = model.decoder_final_linear(&ys.i(..1)?)?.i(0)?.i(0)?;
|
||||
no_speech_prob = softmax(&logits, 0)?
|
||||
.i(self.no_speech_token as usize)?
|
||||
.to_scalar::<f32>()? as f64;
|
||||
}
|
||||
|
||||
let (_, seq_len, _) = ys.dims3()?;
|
||||
let logits = model
|
||||
.decoder_final_linear(&ys.i((..1, seq_len - 1..))?)?
|
||||
.i(0)?
|
||||
.i(0)?;
|
||||
// TODO: Besides suppress tokens, we should apply the heuristics from
|
||||
// ApplyTimestampRules, i.e.:
|
||||
// - Timestamps come in pairs, except before EOT.
|
||||
// - Timestamps should be non-decreasing.
|
||||
// - If the sum of the probabilities of timestamps is higher than any other tokens,
|
||||
// only consider timestamps when sampling.
|
||||
// https://github.com/openai/whisper/blob/e8622f9afc4eba139bf796c210f5c01081000472/whisper/decoding.py#L439
|
||||
let logits = logits.broadcast_add(&self.suppress_tokens)?;
|
||||
let next_token = if t > 0f64 {
|
||||
let prs = softmax(&(&logits / t)?, 0)?;
|
||||
let logits_v: Vec<f32> = prs.to_vec1()?;
|
||||
let distr = rand::distributions::WeightedIndex::new(&logits_v)?;
|
||||
distr.sample(&mut self.rng) as u32
|
||||
} else {
|
||||
let logits_v: Vec<f32> = logits.to_vec1()?;
|
||||
logits_v
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by(|(_, u), (_, v)| u.total_cmp(v))
|
||||
.map(|(i, _)| i as u32)
|
||||
.unwrap()
|
||||
};
|
||||
tokens.push(next_token);
|
||||
let prob = softmax(&logits, candle::D::Minus1)?
|
||||
.i(next_token as usize)?
|
||||
.to_scalar::<f32>()? as f64;
|
||||
if next_token == self.eot_token || tokens.len() > model.config().max_target_positions {
|
||||
break;
|
||||
}
|
||||
sum_logprob += prob.ln();
|
||||
}
|
||||
let text = self.tokenizer.decode(&tokens, true).map_err(E::msg)?;
|
||||
let avg_logprob = sum_logprob / tokens.len() as f64;
|
||||
|
||||
Ok(DecodingResult {
|
||||
tokens,
|
||||
text,
|
||||
avg_logprob,
|
||||
no_speech_prob,
|
||||
temperature: t,
|
||||
compression_ratio: f64::NAN,
|
||||
})
|
||||
}
|
||||
|
||||
fn decode_with_fallback(&mut self, segment: &Tensor) -> anyhow::Result<DecodingResult> {
|
||||
for (i, &t) in m::TEMPERATURES.iter().enumerate() {
|
||||
let dr: Result<DecodingResult, _> = self.decode(segment, t);
|
||||
if i == m::TEMPERATURES.len() - 1 {
|
||||
return dr;
|
||||
}
|
||||
// On errors, we try again with a different temperature.
|
||||
match dr {
|
||||
Ok(dr) => {
|
||||
let needs_fallback = dr.compression_ratio > m::COMPRESSION_RATIO_THRESHOLD
|
||||
|| dr.avg_logprob < m::LOGPROB_THRESHOLD;
|
||||
if !needs_fallback || dr.no_speech_prob > m::NO_SPEECH_THRESHOLD {
|
||||
return Ok(dr);
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
console_log!("Error running at {t}: {err}")
|
||||
}
|
||||
}
|
||||
}
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
fn run(&mut self, mel: &Tensor) -> anyhow::Result<Vec<Segment>> {
|
||||
let (_, _, content_frames) = mel.dims3()?;
|
||||
let mut seek = 0;
|
||||
let mut segments = vec![];
|
||||
while seek < content_frames {
|
||||
let time_offset = (seek * m::HOP_LENGTH) as f64 / m::SAMPLE_RATE as f64;
|
||||
let segment_size = usize::min(content_frames - seek, m::N_FRAMES);
|
||||
let mel_segment = mel.narrow(2, seek, segment_size)?;
|
||||
let segment_duration = (segment_size * m::HOP_LENGTH) as f64 / m::SAMPLE_RATE as f64;
|
||||
let dr = self.decode_with_fallback(&mel_segment)?;
|
||||
seek += segment_size;
|
||||
if dr.no_speech_prob > m::NO_SPEECH_THRESHOLD && dr.avg_logprob < m::LOGPROB_THRESHOLD {
|
||||
console_log!("no speech detected, skipping {seek} {dr:?}");
|
||||
continue;
|
||||
}
|
||||
let segment = Segment {
|
||||
start: time_offset,
|
||||
duration: segment_duration,
|
||||
dr,
|
||||
};
|
||||
console_log!("{seek}: {segment:?}");
|
||||
segments.push(segment)
|
||||
}
|
||||
Ok(segments)
|
||||
}
|
||||
|
||||
pub fn load(md: ModelData) -> anyhow::Result<Self> {
|
||||
let device = Device::Cpu;
|
||||
let tokenizer = Tokenizer::from_bytes(&md.tokenizer).map_err(E::msg)?;
|
||||
|
||||
let mel_filters = safetensors::tensor::SafeTensors::deserialize(&md.mel_filters)?;
|
||||
let mel_filters = mel_filters.tensor("mel_80")?.load(&device)?;
|
||||
console_log!("loaded mel filters {:?}", mel_filters.shape());
|
||||
let mel_filters = mel_filters.flatten_all()?.to_vec1::<f32>()?;
|
||||
let config: Config = serde_json::from_slice(&md.config)?;
|
||||
let model = if md.quantized {
|
||||
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf_buffer(
|
||||
&md.weights,
|
||||
)?;
|
||||
Model::Quantized(m::quantized_model::Whisper::load(&vb, config)?)
|
||||
} else {
|
||||
let vb = VarBuilder::from_buffered_safetensors(md.weights, m::DTYPE, &device)?;
|
||||
Model::Normal(m::model::Whisper::load(&vb, config)?)
|
||||
};
|
||||
console_log!("done loading model");
|
||||
|
||||
let task = match md.task.as_deref() {
|
||||
Some("translate") => Some(Task::Translate),
|
||||
_ => Some(Task::Transcribe),
|
||||
};
|
||||
|
||||
let decoder = Self::new(
|
||||
model,
|
||||
tokenizer,
|
||||
mel_filters,
|
||||
&device,
|
||||
task,
|
||||
md.language,
|
||||
md.is_multilingual,
|
||||
md.timestamps,
|
||||
)?;
|
||||
Ok(decoder)
|
||||
}
|
||||
|
||||
pub fn convert_and_run(&mut self, wav_input: &[u8]) -> anyhow::Result<Vec<Segment>> {
|
||||
let device = Device::Cpu;
|
||||
let mut wav_input = std::io::Cursor::new(wav_input);
|
||||
let (header, data) = wav::read(&mut wav_input)?;
|
||||
console_log!("loaded wav data: {header:?}");
|
||||
if header.sampling_rate != m::SAMPLE_RATE as u32 {
|
||||
anyhow::bail!("wav file must have a {} sampling rate", m::SAMPLE_RATE);
|
||||
}
|
||||
let data = data.as_sixteen().expect("expected 16 bit wav file");
|
||||
let pcm_data: Vec<_> = data[..data.len() / header.channel_count as usize]
|
||||
.iter()
|
||||
.map(|v| *v as f32 / 32768.)
|
||||
.collect();
|
||||
console_log!("pcm data loaded {}", pcm_data.len());
|
||||
let mel = crate::audio::pcm_to_mel(self.model.config(), &pcm_data, &self.mel_filters)?;
|
||||
let mel_len = mel.len();
|
||||
let n_mels = self.model.config().num_mel_bins;
|
||||
let mel = Tensor::from_vec(mel, (1, n_mels, mel_len / n_mels), &device)?;
|
||||
console_log!("loaded mel: {:?}", mel.dims());
|
||||
let segments = self.run(&mel)?;
|
||||
Ok(segments)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the token id for the selected language.
|
||||
pub fn detect_language(model: &mut Model, tokenizer: &Tokenizer, mel: &Tensor) -> Result<u32, E> {
|
||||
console_log!("detecting language");
|
||||
let (_bsize, _, seq_len) = mel.dims3()?;
|
||||
let mel = mel.narrow(
|
||||
2,
|
||||
0,
|
||||
usize::min(seq_len, model.config().max_source_positions),
|
||||
)?;
|
||||
let device = mel.device();
|
||||
|
||||
let language_token_ids = LANGUAGES
|
||||
.iter()
|
||||
.map(|(t, _)| token_id(tokenizer, &format!("<|{t}|>")))
|
||||
.map(|e| e.map_err(E::msg))
|
||||
.collect::<Result<Vec<_>, E>>()?;
|
||||
|
||||
let sot_token = token_id(tokenizer, m::SOT_TOKEN)?;
|
||||
let audio_features = model.encoder_forward(&mel, true)?;
|
||||
let tokens = Tensor::new(&[[sot_token]], device)?;
|
||||
let language_token_ids = Tensor::new(language_token_ids.as_slice(), device)?;
|
||||
let ys = model.decoder_forward(&tokens, &audio_features, true)?;
|
||||
let logits = model.decoder_final_linear(&ys.i(..1)?)?.i(0)?.i(0)?;
|
||||
let logits = logits.index_select(&language_token_ids, 0)?;
|
||||
let probs = candle_nn::ops::softmax(&logits, D::Minus1)?;
|
||||
let probs = probs.to_vec1::<f32>()?;
|
||||
let mut probs = LANGUAGES.iter().zip(probs.iter()).collect::<Vec<_>>();
|
||||
probs.sort_by(|(_, p1), (_, p2)| p2.total_cmp(p1));
|
||||
for ((_, language), p) in probs.iter().take(5) {
|
||||
println!("{language}: {p}")
|
||||
}
|
||||
let token = &format!("<|{}|>", probs[0].0 .0);
|
||||
let language = token_id(tokenizer, token)?;
|
||||
console_log!("detected language: {language} {token}");
|
||||
Ok(language)
|
||||
}
|
||||
pub fn token_id(tokenizer: &Tokenizer, token: &str) -> candle::Result<u32> {
|
||||
match tokenizer.token_to_id(token) {
|
||||
None => candle::bail!("no token-id for {token}"),
|
||||
Some(id) => Ok(id),
|
||||
}
|
||||
}
|
||||
#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
|
||||
pub enum Task {
|
||||
Transcribe,
|
||||
Translate,
|
||||
}
|
||||
|
||||
// Communication to the worker happens through bincode, the model weights and configs are fetched
|
||||
// on the main thread and transferred via the following structure.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ModelData {
|
||||
pub weights: Vec<u8>,
|
||||
pub tokenizer: Vec<u8>,
|
||||
pub mel_filters: Vec<u8>,
|
||||
pub config: Vec<u8>,
|
||||
pub quantized: bool,
|
||||
pub timestamps: bool,
|
||||
pub is_multilingual: bool,
|
||||
pub language: Option<String>,
|
||||
pub task: Option<String>,
|
||||
}
|
||||
|
||||
pub struct Worker {
|
||||
link: WorkerLink<Self>,
|
||||
decoder: Option<Decoder>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub enum WorkerInput {
|
||||
ModelData(ModelData),
|
||||
DecodeTask { wav_bytes: Vec<u8> },
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub enum WorkerOutput {
|
||||
Decoded(Vec<Segment>),
|
||||
WeightsLoaded,
|
||||
}
|
||||
|
||||
impl yew_agent::Worker for Worker {
|
||||
type Input = WorkerInput;
|
||||
type Message = ();
|
||||
type Output = Result<WorkerOutput, String>;
|
||||
type Reach = Public<Self>;
|
||||
|
||||
fn create(link: WorkerLink<Self>) -> Self {
|
||||
Self {
|
||||
link,
|
||||
decoder: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn update(&mut self, _msg: Self::Message) {
|
||||
// no messaging
|
||||
}
|
||||
|
||||
fn handle_input(&mut self, msg: Self::Input, id: HandlerId) {
|
||||
let output = match msg {
|
||||
WorkerInput::ModelData(md) => match Decoder::load(md) {
|
||||
Ok(decoder) => {
|
||||
self.decoder = Some(decoder);
|
||||
Ok(WorkerOutput::WeightsLoaded)
|
||||
}
|
||||
Err(err) => Err(format!("model creation error {err:?}")),
|
||||
},
|
||||
WorkerInput::DecodeTask { wav_bytes } => match &mut self.decoder {
|
||||
None => Err("model has not been set".to_string()),
|
||||
Some(decoder) => decoder
|
||||
.convert_and_run(&wav_bytes)
|
||||
.map(WorkerOutput::Decoded)
|
||||
.map_err(|e| e.to_string()),
|
||||
},
|
||||
};
|
||||
self.link.respond(id, output);
|
||||
}
|
||||
|
||||
fn name_of_resource() -> &'static str {
|
||||
"worker.js"
|
||||
}
|
||||
|
||||
fn resource_path_is_relative() -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user