From de099d99174d490785033ffcc4b84f53ab08c07f Mon Sep 17 00:00:00 2001 From: Bel LaPointe <153096461+breel-render@users.noreply.github.com> Date: Thu, 21 Dec 2023 14:34:22 -0500 Subject: [PATCH] but it is soooo sloooow --- rust-whisper-lib/src/lib.rs | 38 +++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/rust-whisper-lib/src/lib.rs b/rust-whisper-lib/src/lib.rs index cfade81..2ba6ade 100644 --- a/rust-whisper-lib/src/lib.rs +++ b/rust-whisper-lib/src/lib.rs @@ -42,18 +42,10 @@ pub fn wav(flags: Flags, handler_fn: F, wav_path: String) where F: FnMut(Resu flags.stream_tail, handler_fn, ).unwrap(); - let (header, data) = wav::read( - &mut std::fs::File::open(wav_path).expect("failed to open $WAV"), - ).expect("failed to decode $WAV"); - assert!(header.channel_count == 1); - assert!(header.sampling_rate == 16_000); - let data16 = data.as_sixteen().expect("wav is not 32bit floats"); - let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16); - - w.transcribe(&audio_data); + w.transcribe(&f32_from_wav_file(&wav_path).unwrap()) } -fn f32_from_wav_file(path: String) -> Result, String> { +fn f32_from_wav_file(path: &String) -> Result, String> { let f = std::fs::File::open(path); if let Some(err) = f.as_ref().err() { return Err(format!("failed to open wav file: {}", err)); @@ -267,10 +259,18 @@ impl Engine { } struct Engine2 { + model: rwhisper::Whisper, } fn new_engine2(model_path: Option, model_buffer: Option>, threads: i32) -> Result { - Ok(Engine2{}) + match rwhisper::WhisperBuilder::default() + .with_cpu(true) + .with_language(Some(rwhisper::WhisperLanguage::English)) + .with_source(rwhisper::WhisperSource::TinyEn) + .build() { + Ok(model) => Ok(Engine2{model: model}), + Err(msg) => Err(format!("failed to create model: {}", msg)), + } } impl Engine2 { @@ -279,15 +279,15 @@ impl Engine2 { } fn _transcribe(&self, data: &Vec) -> Result { - let model = rwhisper::WhisperBuilder::default() - .with_cpu(true) - .with_language(Some(rwhisper::WhisperLanguage::English)) - .with_source(rwhisper::WhisperSource::TinyEn) - .build().unwrap(); let buffer = rodio::buffer::SamplesBuffer::new(1, 16_000, data.clone()); + let stream = self.model.transcribe(buffer); + if stream.as_ref().is_err() { + return Err(format!("failed to start transcribing: {}", stream.err().unwrap())); + } + let stream = stream.unwrap(); + let future = async { let mut w: Vec = vec![]; - let mut stream = model.transcribe(buffer).unwrap(); stream.write_to(&mut w).await.unwrap(); w }; @@ -412,8 +412,10 @@ mod tests { None, 4, ).expect("failed to make new engine2"); - let data = f32_from_wav_file("../gitea-whisper-rs/sys/whisper.cpp/bindings/go/samples/jfk.wav".to_string()).expect("failed to read jfk.wav"); + let data = f32_from_wav_file(&"../gitea-whisper-rs/sys/whisper.cpp/bindings/go/samples/jfk.wav".to_string()).expect("failed to read jfk.wav"); + let start = std::time::Instant::now(); let result = engine_2.transcribe(&data).expect("failed to transcribe"); + println!("rwhisper = {}s", start.elapsed().as_secs_f32()); assert_eq!(" And so my fellow American asked not what your country can do for you, ask what you can do for your country.".to_string(), result.to_string()); } }