successfully confirmed audio is k with sox -r 44100 -t f32 /tmp/transcribed.pcm --default-device

master
Bel LaPointe 2023-11-28 21:22:26 -07:00
parent 54964ec59b
commit 4ef419e6c0
3 changed files with 18 additions and 3 deletions

View File

@ -130,6 +130,12 @@ version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]] [[package]]
name = "bytes" name = "bytes"
version = "1.5.0" version = "1.5.0"
@ -713,6 +719,7 @@ checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
name = "rust-whisper" name = "rust-whisper"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"byteorder",
"cpal", "cpal",
"signal-hook", "signal-hook",
"tokio", "tokio",

View File

@ -11,3 +11,4 @@ wav = "1"
tokio = "1.27" tokio = "1.27"
cpal = "0.15.2" cpal = "0.15.2"
signal-hook = "0.3.17" signal-hook = "0.3.17"
byteorder = "1.5.0"

View File

@ -2,6 +2,9 @@ use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait}; use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
use signal_hook::{iterator::Signals, consts::signal::SIGINT}; use signal_hook::{iterator::Signals, consts::signal::SIGINT};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use std::fs;
use std::io::Write;
use byteorder::WriteBytesExt;
fn main() { fn main() {
let w = new_whisper( let w = new_whisper(
@ -59,9 +62,9 @@ fn main() {
eprintln!("output error: {}", err) eprintln!("output error: {}", err)
}, },
None, None,
).unwrap().play().unwrap(); ); //.unwrap().play().unwrap();
let stream = device.build_input_stream( let stream = device.build_input_stream(
&cfg.into(), &cfg.clone().into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| { move |data: &[f32], _: &cpal::InputCallbackInfo| {
data.iter() data.iter()
.map(|x| *x) .map(|x| *x)
@ -69,7 +72,11 @@ fn main() {
.for_each(|x| buffer.push(x)); .for_each(|x| buffer.push(x));
if Instant::now() - last > five_seconds { if Instant::now() - last > five_seconds {
let result = w.transcribe(&buffer).unwrap(); let result = w.transcribe(&buffer).unwrap();
println!("({}) {}", buffer.len(), result); println!("({} from {:?}) {}", buffer.len(), cfg, result);
let mut f = fs::File::create("/tmp/transcribed.pcm").unwrap();
for i in &buffer {
f.write_f32::<byteorder::LittleEndian>(*i).unwrap();
}
let retain = buffer.len() - buffer.len() / 10; let retain = buffer.len() - buffer.len() / 10;
for i in retain..buffer.len() { for i in retain..buffer.len() {