|
|
|
|
@@ -41,15 +41,29 @@ pub fn wav<F>(flags: Flags, handler_fn: F, wav_path: String) where F: FnMut(Resu
|
|
|
|
|
flags.stream_tail,
|
|
|
|
|
handler_fn,
|
|
|
|
|
).unwrap();
|
|
|
|
|
let (header, data) = wav::read(
|
|
|
|
|
&mut std::fs::File::open(wav_path).expect("failed to open $WAV"),
|
|
|
|
|
).expect("failed to decode $WAV");
|
|
|
|
|
assert!(header.channel_count == 1);
|
|
|
|
|
assert!(header.sampling_rate == 16_000);
|
|
|
|
|
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
|
|
|
|
|
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
|
|
|
|
|
w.transcribe(&f32_from_wav_file(&wav_path).unwrap())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
w.transcribe(&audio_data);
|
|
|
|
|
fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
|
|
|
|
|
let f = std::fs::File::open(path);
|
|
|
|
|
if let Some(err) = f.as_ref().err() {
|
|
|
|
|
return Err(format!("failed to open wav file: {}", err));
|
|
|
|
|
}
|
|
|
|
|
let wav_read = wav::read(&mut f.unwrap());
|
|
|
|
|
if let Some(err) = wav_read.as_ref().err() {
|
|
|
|
|
return Err(format!("failed to parse wav file: {}", err));
|
|
|
|
|
}
|
|
|
|
|
let (header, data) = wav_read.unwrap();
|
|
|
|
|
if header.channel_count != 1 {
|
|
|
|
|
return Err("!= 1 channel".to_string());
|
|
|
|
|
}
|
|
|
|
|
if header.sampling_rate != 16_000 {
|
|
|
|
|
return Err("!= 16_000 hz".to_string());
|
|
|
|
|
}
|
|
|
|
|
match data.as_sixteen() {
|
|
|
|
|
Some(data16) => Ok(whisper_rs::convert_integer_to_float_audio(&data16)),
|
|
|
|
|
None => Err(format!("couldnt translate wav to 16s")),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
pub fn channel<F>(flags: Flags, handler_fn: F, stream: std::sync::mpsc::Receiver<Vec<f32>>) where F: FnMut(Result<Transcribed, String>) + Send + 'static {
|
|
|
|
|
@@ -320,7 +334,7 @@ mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_transcribe_tiny_jfk_wav() {
|
|
|
|
|
fn test_transcribe_tiny_jfk_wav_whisper_rs() {
|
|
|
|
|
wav(
|
|
|
|
|
Flags {
|
|
|
|
|
model_path: None,
|
|
|
|
|
|