37 lines
1.4 KiB
Rust
37 lines
1.4 KiB
Rust
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy};
|
|
|
|
fn main() {
|
|
let mut ctx = WhisperContext::new("../models/ggml-tiny.en.bin").expect("failed to load model");
|
|
|
|
// create a params object
|
|
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
|
|
params.set_n_threads(4);
|
|
params.set_translate(false);
|
|
params.set_language(Some("en"));
|
|
params.set_print_special(false);
|
|
params.set_print_progress(false);
|
|
params.set_print_realtime(false);
|
|
params.set_print_timestamps(false);
|
|
|
|
// assume we have a buffer of audio data
|
|
// here we'll make a fake one, floating point samples, 32 bit, 16KHz, mono
|
|
//let audio_data = vec![0_f32; 16000 * 2];
|
|
let (header, data) = wav::read(&mut std::fs::File::open("../git.d/samples/jfk.wav").expect("failed to open .wav")).expect("failed to decode .wav");
|
|
assert!(header.channel_count == 1);
|
|
assert!(header.sampling_rate == 16000);
|
|
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
|
|
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
|
|
|
|
// now we can run the model
|
|
ctx.full(params, &audio_data[..])
|
|
.expect("failed to run model");
|
|
|
|
// fetch the results
|
|
let num_segments = ctx.full_n_segments();
|
|
for i in 0..num_segments {
|
|
let segment = ctx.full_get_segment_text(i).expect("failed to get segment");
|
|
print!("{} ", segment);
|
|
}
|
|
println!("");
|
|
}
|