From fffea2ddf03b80b8d3abbeb0e398d698a9c96d04 Mon Sep 17 00:00:00 2001 From: Bel LaPointe <153096461+breel-render@users.noreply.github.com> Date: Wed, 10 Sep 2025 11:20:01 -0600 Subject: [PATCH] no render mac --- .gitmodules | 2 +- rust-whisper-baked/src/main.rs | 408 ++++++++++++++++++--------------- 2 files changed, 229 insertions(+), 181 deletions(-) diff --git a/.gitmodules b/.gitmodules index 1ec5cad..2d40593 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ -[submodule "rust-whisper.d/gitea-whisper-rs"] +[submodule "gitea-whisper-rs"] path = gitea-whisper-rs url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git diff --git a/rust-whisper-baked/src/main.rs b/rust-whisper-baked/src/main.rs index ea9ca68..f22dc30 100644 --- a/rust-whisper-baked/src/main.rs +++ b/rust-whisper-baked/src/main.rs @@ -1,236 +1,284 @@ -use rust_whisper_lib; -use rust_whisper_baked_lib; use clap::Parser; use listen_lib; +use rust_whisper_baked_lib; +use rust_whisper_lib; use std::thread; fn main() { - let flags = rust_whisper_lib::Flags::parse(); - match flags.wav.clone() { - Some(_) => wav_channel(flags), - None => channel(flags), - }; + let flags = rust_whisper_lib::Flags::parse(); + match flags.wav.clone() { + Some(_) => wav_channel(flags), + None => channel(flags), + }; } fn wav_channel(flags: rust_whisper_lib::Flags) { - let mut w = new_destutterer(); - rust_whisper_baked_lib::wav_channel( - flags.clone(), - move |result: Result| { - match result { - Ok(transcribed) => { - let s = w.step(transcribed.to_string()); - println!("{}", s); - }, - Err(msg) => { eprintln!("error: {}", msg); }, - }; - }, - ); + let mut w = new_destutterer(); + rust_whisper_baked_lib::wav_channel( + flags.clone(), + move |result: Result| { + match result { + Ok(transcribed) => { + let s = w.step(transcribed.to_string()); + println!("{}", s); + } + Err(msg) => { + eprintln!("error: {}", msg); + } + }; + }, + ); } fn wav(flags: rust_whisper_lib::Flags, _path: String) { - let mut w = new_destutterer(); - rust_whisper_baked_lib::wav(flags, - move |result: Result| { - match result { - Ok(transcribed) => { - let s = w.step(transcribed.to_string()); - println!("{}", s); - }, - Err(msg) => { eprintln!("error: {}", msg); }, - }; - }, - ); + let mut w = new_destutterer(); + rust_whisper_baked_lib::wav( + flags, + move |result: Result| { + match result { + Ok(transcribed) => { + let s = w.step(transcribed.to_string()); + println!("{}", s); + } + Err(msg) => { + eprintln!("error: {}", msg); + } + }; + }, + ); } fn channel(flags: rust_whisper_lib::Flags) { - let (send, recv) = std::sync::mpsc::sync_channel(100); + let (send, recv) = std::sync::mpsc::sync_channel(100); - eprintln!("rust whisper baked lib channel..."); - thread::spawn(move || { - let mut w = new_destutterer(); - rust_whisper_baked_lib::channel( - flags.clone(), - move |result: Result| { - match result { - Ok(transcribed) => { - let s = w.step(transcribed.to_string()); - println!("{}", s); - }, - Err(msg) => { eprintln!("error: {}", msg); }, - }; - }, - recv, - ); - }); + eprintln!("rust whisper baked lib channel..."); + thread::spawn(move || { + let mut w = new_destutterer(); + rust_whisper_baked_lib::channel( + flags.clone(), + move |result: Result| { + match result { + Ok(transcribed) => { + let s = w.step(transcribed.to_string()); + println!("{}", s); + } + Err(msg) => { + eprintln!("error: {}", msg); + } + }; + }, + recv, + ); + }); - eprintln!("listen lib main..."); - let flags = rust_whisper_lib::Flags::parse(); - match flags.stream_device { - Some(device_name) => { - eprintln!("with device ({}) '{}'", device_name.len(), &device_name); - if device_name.len() == 0 { - let mut i = 0; - for device in listen_lib::devices() { - eprintln!("[{}] {}", i, device); - i += 1; + eprintln!("listen lib main..."); + let flags = rust_whisper_lib::Flags::parse(); + match flags.stream_device { + Some(device_name) => { + eprintln!("with device ({}) '{}'", device_name.len(), &device_name); + if device_name.len() == 0 { + let mut i = 0; + for device in listen_lib::devices() { + eprintln!("[{}] {}", i, device); + i += 1; + } + eprintln!("found {} devices", i); + } else { + listen_lib::main_with( + |data| { + send.send(data).unwrap(); + }, + device_name, + ); } - eprintln!("found {} devices", i); - } else { - listen_lib::main_with(|data| { - send.send(data).unwrap(); - }, device_name); - } - }, - None => { - eprintln!("without any device"); - listen_lib::main(|data| { - send.send(data).unwrap(); - }); - } - } - eprintln!("/listen lib main..."); + } + None => { + eprintln!("without any device"); + listen_lib::main(|data| { + send.send(data).unwrap(); + }); + } + } + eprintln!("/listen lib main..."); } struct Destutterer { - prev: Words, + prev: Words, } fn new_destutterer() -> Destutterer { - Destutterer{prev: new_words()} + Destutterer { prev: new_words() } } impl Destutterer { - fn step(&mut self, next: String) -> String { - if next.len() == 0 { - return next; - } + fn step(&mut self, next: String) -> String { + if next.len() == 0 { + return next; + } - let next_words = Words::from_string(next.clone()); - let mut n = self.prev.comparable_len().clamp(0, next_words.comparable_len()); - //println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words()); - while n > 0 { - let (prev_s, _) = self.prev.last_n_comparable_to_string(n); - let (next_s, next_idx) = next_words.first_n_comparable_to_string(n); - if prev_s == next_s { - self.prev = next_words; - return self.prev.skip(next_idx+1).to_string(); - } - n -= 1; - } - self.prev = next_words; - self.prev.to_string() - } + let next_words = Words::from_string(next.clone()); + let mut n = self + .prev + .comparable_len() + .clamp(0, next_words.comparable_len()); + //println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words()); + while n > 0 { + let (prev_s, _) = self.prev.last_n_comparable_to_string(n); + let (next_s, next_idx) = next_words.first_n_comparable_to_string(n); + if prev_s == next_s { + self.prev = next_words; + return self.prev.skip(next_idx + 1).to_string(); + } + n -= 1; + } + self.prev = next_words; + self.prev.to_string() + } } #[derive(Clone, Debug)] struct Words { - raw: Vec, + raw: Vec, } fn new_words() -> Words { - Words{raw: vec![]} + Words { raw: vec![] } } impl Words { - fn from_string(s: String) -> Words { - let mut result = Words{raw: vec![]}; - for word in s.split(" ") { - let word = word.trim(); - if word.len() > 0 { - result.raw.push(word.to_string()); - } - } - result - } + fn from_string(s: String) -> Words { + let mut result = Words { raw: vec![] }; + for word in s.split(" ") { + let word = word.trim(); + if word.len() > 0 { + result.raw.push(word.to_string()); + } + } + result + } - fn skip(&self, n: usize) -> Words { - Words{ - raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(), - } - } + fn skip(&self, n: usize) -> Words { + Words { + raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(), + } + } - fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) { - let v = self.to_comparable_words(); - let v = v[(v.len() - n).clamp(0, v.len())..].to_vec(); - return (v.iter().map(|x| x.s.clone().unwrap()).collect::>().join(" "), v[0].idx) - } + fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) { + let v = self.to_comparable_words(); + let v = v[(v.len() - n).clamp(0, v.len())..].to_vec(); + return ( + v.iter() + .map(|x| x.s.clone().unwrap()) + .collect::>() + .join(" "), + v[0].idx, + ); + } - fn first_n_comparable_to_string(&self, n: usize) -> (String, usize){ - let v = self.to_comparable_words(); - let v = v[0..n.clamp(0, v.len())].to_vec(); - return (v.iter().map(|x| x.s.clone().unwrap()).collect::>().join(" "), v[v.len()-1].idx) - } + fn first_n_comparable_to_string(&self, n: usize) -> (String, usize) { + let v = self.to_comparable_words(); + let v = v[0..n.clamp(0, v.len())].to_vec(); + return ( + v.iter() + .map(|x| x.s.clone().unwrap()) + .collect::>() + .join(" "), + v[v.len() - 1].idx, + ); + } - fn comparable_len(&self) -> usize { - self.to_comparable_words().len() - } + fn comparable_len(&self) -> usize { + self.to_comparable_words().len() + } - fn to_comparable_words(&self) -> Vec { - self.to_words().iter() - .filter(|x| x.s.is_some()) - .map(|x| x.clone()) - .collect() - } + fn to_comparable_words(&self) -> Vec { + self.to_words() + .iter() + .filter(|x| x.s.is_some()) + .map(|x| x.clone()) + .collect() + } - fn to_words(&self) -> Vec { - let skips = stop_words::get("en"); - let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English); - let strs = self.raw.iter() - .map(|w| w.to_lowercase()) - .map(|w| w.chars().filter(|c| c.is_ascii_alphanumeric()).collect::()) - .map(|w| stemmer.stem(&w).into_owned()) - .collect::>(); - let mut result = vec![]; - for i in 0..strs.len() { - result.push(Word{ - s: match skips.contains(&strs[i]) { - true => None, - false => Some(strs[i].clone()), - }, - idx: i as usize, - }); - } - result - } + fn to_words(&self) -> Vec { + let skips = stop_words::get("en"); + let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English); + let strs = self + .raw + .iter() + .map(|w| w.to_lowercase()) + .map(|w| { + w.chars() + .filter(|c| c.is_ascii_alphanumeric()) + .collect::() + }) + .map(|w| stemmer.stem(&w).into_owned()) + .collect::>(); + let mut result = vec![]; + for i in 0..strs.len() { + result.push(Word { + s: match skips.contains(&strs[i]) { + true => None, + false => Some(strs[i].clone()), + }, + idx: i as usize, + }); + } + result + } - fn to_string(&self) -> String { - self.raw.iter() - .map(|x| x.clone()) - .collect::>() - .join(" ") - } + fn to_string(&self) -> String { + self.raw + .iter() + .map(|x| x.clone()) + .collect::>() + .join(" ") + } } #[derive(Debug, Clone)] struct Word { - s: Option, - idx: usize, + s: Option, + idx: usize, } #[cfg(test)] mod tests { - use super::*; + use super::*; - #[test] - fn test_destutterer_stop_words() { - let mut w = new_destutterer(); - assert_eq!("welcome to the internet".to_string(), w.step("welcome to the internet".to_string())); - assert_eq!("have a look around".to_string(), w.step("welcome to the a internet; have a look around".to_string())); - } + #[test] + fn test_destutterer_stop_words() { + let mut w = new_destutterer(); + assert_eq!( + "welcome to the internet".to_string(), + w.step("welcome to the internet".to_string()) + ); + assert_eq!( + "have a look around".to_string(), + w.step("welcome to the a internet; have a look around".to_string()) + ); + } - #[test] - fn test_destutterer_punctuation() { - let mut w = new_destutterer(); - assert_eq!("cat, dog. cow? moose!".to_string(), w.step("cat, dog. cow? moose!".to_string())); - assert_eq!("elephant! fez gator".to_string(), w.step("moose, elephant! fez gator".to_string())); - assert_eq!("hij".to_string(), w.step("fez gator hij".to_string())); - } + #[test] + fn test_destutterer_punctuation() { + let mut w = new_destutterer(); + assert_eq!( + "cat, dog. cow? moose!".to_string(), + w.step("cat, dog. cow? moose!".to_string()) + ); + assert_eq!( + "elephant! fez gator".to_string(), + w.step("moose, elephant! fez gator".to_string()) + ); + assert_eq!("hij".to_string(), w.step("fez gator hij".to_string())); + } - #[test] - fn test_destutterer_basic() { - let mut w = new_destutterer(); - assert_eq!("cat dog cow".to_string(), w.step(" cat dog cow ".to_string())); - assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string())); - } + #[test] + fn test_destutterer_basic() { + let mut w = new_destutterer(); + assert_eq!( + "cat dog cow".to_string(), + w.step(" cat dog cow ".to_string()) + ); + assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string())); + } }