From 1009c4230e2739380d34f410a88d2b08043c2f34 Mon Sep 17 00:00:00 2001 From: Bel LaPointe Date: Tue, 28 Nov 2023 22:13:05 -0700 Subject: [PATCH] env variable ify --- rust-whisper.d/src/main.rs | 55 +++++++++++++------------------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/rust-whisper.d/src/main.rs b/rust-whisper.d/src/main.rs index 9a4a66e..ca3686e 100644 --- a/rust-whisper.d/src/main.rs +++ b/rust-whisper.d/src/main.rs @@ -2,14 +2,24 @@ use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError}; use cpal::traits::{HostTrait, DeviceTrait, StreamTrait}; use signal_hook::{iterator::Signals, consts::signal::SIGINT}; use std::time::{Duration, Instant}; -use std::fs; -use byteorder::WriteBytesExt; fn main() { let w = new_whisper( - std::env::var("MODEL").unwrap_or(String::from("../models/ggml-tiny.en.bin")), - std::env::var("P").unwrap_or(String::from("8")).parse::().expect("$P must be a number"), + std::env::var("MODEL") + .unwrap_or(String::from("../models/ggml-tiny.en.bin")), + std::env::var("P") + .unwrap_or(String::from("8")) + .parse::().expect("$P must be a number"), ).unwrap(); + let stream_churn = std::env::var("STREAM_CHURN") + .unwrap_or(String::from("0.8")) + .parse::().expect("$STREAM_CHURN must be a number"); + let stream_step = Duration::new( + std::env::var("STREAM_STEP") + .unwrap_or(String::from("5")) + .parse::().expect("$STREAM_STEP must be a number"), + 0, + ); match std::env::var("WAV") { Ok(wav) => { @@ -27,42 +37,17 @@ fn main() { Err(_) => { let host = cpal::default_host(); let device = host.default_input_device().unwrap(); - let output_device = host.default_output_device().unwrap(); - let cfg = device.supported_input_configs() .unwrap() .filter(|x| x.sample_format() == cpal::SampleFormat::F32) .nth(0) .unwrap() .with_max_sample_rate(); + let channels = cfg.channels(); let downsample_ratio = cfg.sample_rate().0 as f32 / 16000.0; - - let output_cfg = output_device.supported_output_configs() - .unwrap() - .filter(|x| x.sample_format() == cpal::SampleFormat::F32) - .filter(|x| x.channels() == 2) - .nth(0) - .unwrap() - .with_max_sample_rate(); - eprintln!("trying output with {} / {:?}", output_device.name().unwrap(), output_cfg); - let mut buffer = vec![]; let mut last = Instant::now(); - let five_seconds = Duration::new(5, 0); - device.build_output_stream( - &output_cfg.into(), - move |data: &mut [f32], _: &cpal::OutputCallbackInfo| { - for i in data.iter_mut() { - *i = cpal::Sample::EQUILIBRIUM; - } - // TODO - }, - move |err| { - eprintln!("output error: {}", err) - }, - None, - ); //.unwrap().play().unwrap(); let stream = device.build_input_stream( &cfg.clone().into(), move |data: &[f32], _: &cpal::InputCallbackInfo| { @@ -76,15 +61,11 @@ fn main() { downsampled_data.push(mono_data[upsampled as usize]); } downsampled_data.iter().for_each(|x| buffer.push(*x)); - if Instant::now() - last > five_seconds { - let mut f = fs::File::create("/tmp/transcribed.pcm").unwrap(); - for i in &buffer { - f.write_f32::(*i).unwrap(); - } + if Instant::now() - last > stream_step { let result = w.transcribe(&buffer).unwrap(); - println!("({} from {:?} and downsampled {} * {} ({} -> {})) {}", buffer.len(), cfg, channels, downsample_ratio, data.len(), downsampled_data.len(), result); + println!("{}", result); - let retain = buffer.len() - buffer.len() / 10; + let retain = buffer.len() - (buffer.len() as f32 * stream_churn) as usize; for i in retain..buffer.len() { buffer[i - retain] = buffer[i] }