no render mac

master
Bel LaPointe 2025-09-10 11:20:01 -06:00
parent 12dbf12299
commit fffea2ddf0
2 changed files with 229 additions and 181 deletions

2
.gitmodules vendored
View File

@ -1,3 +1,3 @@
[submodule "rust-whisper.d/gitea-whisper-rs"] [submodule "gitea-whisper-rs"]
path = gitea-whisper-rs path = gitea-whisper-rs
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git

View File

@ -1,236 +1,284 @@
use rust_whisper_lib;
use rust_whisper_baked_lib;
use clap::Parser; use clap::Parser;
use listen_lib; use listen_lib;
use rust_whisper_baked_lib;
use rust_whisper_lib;
use std::thread; use std::thread;
fn main() { fn main() {
let flags = rust_whisper_lib::Flags::parse(); let flags = rust_whisper_lib::Flags::parse();
match flags.wav.clone() { match flags.wav.clone() {
Some(_) => wav_channel(flags), Some(_) => wav_channel(flags),
None => channel(flags), None => channel(flags),
}; };
} }
fn wav_channel(flags: rust_whisper_lib::Flags) { fn wav_channel(flags: rust_whisper_lib::Flags) {
let mut w = new_destutterer(); let mut w = new_destutterer();
rust_whisper_baked_lib::wav_channel( rust_whisper_baked_lib::wav_channel(
flags.clone(), flags.clone(),
move |result: Result<rust_whisper_lib::Transcribed, String>| { move |result: Result<rust_whisper_lib::Transcribed, String>| {
match result { match result {
Ok(transcribed) => { Ok(transcribed) => {
let s = w.step(transcribed.to_string()); let s = w.step(transcribed.to_string());
println!("{}", s); println!("{}", s);
}, }
Err(msg) => { eprintln!("error: {}", msg); }, Err(msg) => {
}; eprintln!("error: {}", msg);
}, }
); };
},
);
} }
fn wav(flags: rust_whisper_lib::Flags, _path: String) { fn wav(flags: rust_whisper_lib::Flags, _path: String) {
let mut w = new_destutterer(); let mut w = new_destutterer();
rust_whisper_baked_lib::wav(flags, rust_whisper_baked_lib::wav(
move |result: Result<rust_whisper_lib::Transcribed, String>| { flags,
match result { move |result: Result<rust_whisper_lib::Transcribed, String>| {
Ok(transcribed) => { match result {
let s = w.step(transcribed.to_string()); Ok(transcribed) => {
println!("{}", s); let s = w.step(transcribed.to_string());
}, println!("{}", s);
Err(msg) => { eprintln!("error: {}", msg); }, }
}; Err(msg) => {
}, eprintln!("error: {}", msg);
); }
};
},
);
} }
fn channel(flags: rust_whisper_lib::Flags) { fn channel(flags: rust_whisper_lib::Flags) {
let (send, recv) = std::sync::mpsc::sync_channel(100); let (send, recv) = std::sync::mpsc::sync_channel(100);
eprintln!("rust whisper baked lib channel..."); eprintln!("rust whisper baked lib channel...");
thread::spawn(move || { thread::spawn(move || {
let mut w = new_destutterer(); let mut w = new_destutterer();
rust_whisper_baked_lib::channel( rust_whisper_baked_lib::channel(
flags.clone(), flags.clone(),
move |result: Result<rust_whisper_lib::Transcribed, String>| { move |result: Result<rust_whisper_lib::Transcribed, String>| {
match result { match result {
Ok(transcribed) => { Ok(transcribed) => {
let s = w.step(transcribed.to_string()); let s = w.step(transcribed.to_string());
println!("{}", s); println!("{}", s);
}, }
Err(msg) => { eprintln!("error: {}", msg); }, Err(msg) => {
}; eprintln!("error: {}", msg);
}, }
recv, };
); },
}); recv,
);
});
eprintln!("listen lib main..."); eprintln!("listen lib main...");
let flags = rust_whisper_lib::Flags::parse(); let flags = rust_whisper_lib::Flags::parse();
match flags.stream_device { match flags.stream_device {
Some(device_name) => { Some(device_name) => {
eprintln!("with device ({}) '{}'", device_name.len(), &device_name); eprintln!("with device ({}) '{}'", device_name.len(), &device_name);
if device_name.len() == 0 { if device_name.len() == 0 {
let mut i = 0; let mut i = 0;
for device in listen_lib::devices() { for device in listen_lib::devices() {
eprintln!("[{}] {}", i, device); eprintln!("[{}] {}", i, device);
i += 1; i += 1;
}
eprintln!("found {} devices", i);
} else {
listen_lib::main_with(
|data| {
send.send(data).unwrap();
},
device_name,
);
} }
eprintln!("found {} devices", i); }
} else { None => {
listen_lib::main_with(|data| { eprintln!("without any device");
send.send(data).unwrap(); listen_lib::main(|data| {
}, device_name); send.send(data).unwrap();
} });
}, }
None => { }
eprintln!("without any device"); eprintln!("/listen lib main...");
listen_lib::main(|data| {
send.send(data).unwrap();
});
}
}
eprintln!("/listen lib main...");
} }
struct Destutterer { struct Destutterer {
prev: Words, prev: Words,
} }
fn new_destutterer() -> Destutterer { fn new_destutterer() -> Destutterer {
Destutterer{prev: new_words()} Destutterer { prev: new_words() }
} }
impl Destutterer { impl Destutterer {
fn step(&mut self, next: String) -> String { fn step(&mut self, next: String) -> String {
if next.len() == 0 { if next.len() == 0 {
return next; return next;
} }
let next_words = Words::from_string(next.clone()); let next_words = Words::from_string(next.clone());
let mut n = self.prev.comparable_len().clamp(0, next_words.comparable_len()); let mut n = self
//println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words()); .prev
while n > 0 { .comparable_len()
let (prev_s, _) = self.prev.last_n_comparable_to_string(n); .clamp(0, next_words.comparable_len());
let (next_s, next_idx) = next_words.first_n_comparable_to_string(n); //println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words());
if prev_s == next_s { while n > 0 {
self.prev = next_words; let (prev_s, _) = self.prev.last_n_comparable_to_string(n);
return self.prev.skip(next_idx+1).to_string(); let (next_s, next_idx) = next_words.first_n_comparable_to_string(n);
} if prev_s == next_s {
n -= 1; self.prev = next_words;
} return self.prev.skip(next_idx + 1).to_string();
self.prev = next_words; }
self.prev.to_string() n -= 1;
} }
self.prev = next_words;
self.prev.to_string()
}
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
struct Words { struct Words {
raw: Vec<String>, raw: Vec<String>,
} }
fn new_words() -> Words { fn new_words() -> Words {
Words{raw: vec![]} Words { raw: vec![] }
} }
impl Words { impl Words {
fn from_string(s: String) -> Words { fn from_string(s: String) -> Words {
let mut result = Words{raw: vec![]}; let mut result = Words { raw: vec![] };
for word in s.split(" ") { for word in s.split(" ") {
let word = word.trim(); let word = word.trim();
if word.len() > 0 { if word.len() > 0 {
result.raw.push(word.to_string()); result.raw.push(word.to_string());
} }
} }
result result
} }
fn skip(&self, n: usize) -> Words { fn skip(&self, n: usize) -> Words {
Words{ Words {
raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(), raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(),
} }
} }
fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) { fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) {
let v = self.to_comparable_words(); let v = self.to_comparable_words();
let v = v[(v.len() - n).clamp(0, v.len())..].to_vec(); let v = v[(v.len() - n).clamp(0, v.len())..].to_vec();
return (v.iter().map(|x| x.s.clone().unwrap()).collect::<Vec<String>>().join(" "), v[0].idx) return (
} v.iter()
.map(|x| x.s.clone().unwrap())
.collect::<Vec<String>>()
.join(" "),
v[0].idx,
);
}
fn first_n_comparable_to_string(&self, n: usize) -> (String, usize){ fn first_n_comparable_to_string(&self, n: usize) -> (String, usize) {
let v = self.to_comparable_words(); let v = self.to_comparable_words();
let v = v[0..n.clamp(0, v.len())].to_vec(); let v = v[0..n.clamp(0, v.len())].to_vec();
return (v.iter().map(|x| x.s.clone().unwrap()).collect::<Vec<String>>().join(" "), v[v.len()-1].idx) return (
} v.iter()
.map(|x| x.s.clone().unwrap())
.collect::<Vec<String>>()
.join(" "),
v[v.len() - 1].idx,
);
}
fn comparable_len(&self) -> usize { fn comparable_len(&self) -> usize {
self.to_comparable_words().len() self.to_comparable_words().len()
} }
fn to_comparable_words(&self) -> Vec<Word> { fn to_comparable_words(&self) -> Vec<Word> {
self.to_words().iter() self.to_words()
.filter(|x| x.s.is_some()) .iter()
.map(|x| x.clone()) .filter(|x| x.s.is_some())
.collect() .map(|x| x.clone())
} .collect()
}
fn to_words(&self) -> Vec<Word> { fn to_words(&self) -> Vec<Word> {
let skips = stop_words::get("en"); let skips = stop_words::get("en");
let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English); let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English);
let strs = self.raw.iter() let strs = self
.map(|w| w.to_lowercase()) .raw
.map(|w| w.chars().filter(|c| c.is_ascii_alphanumeric()).collect::<String>()) .iter()
.map(|w| stemmer.stem(&w).into_owned()) .map(|w| w.to_lowercase())
.collect::<Vec<String>>(); .map(|w| {
let mut result = vec![]; w.chars()
for i in 0..strs.len() { .filter(|c| c.is_ascii_alphanumeric())
result.push(Word{ .collect::<String>()
s: match skips.contains(&strs[i]) { })
true => None, .map(|w| stemmer.stem(&w).into_owned())
false => Some(strs[i].clone()), .collect::<Vec<String>>();
}, let mut result = vec![];
idx: i as usize, for i in 0..strs.len() {
}); result.push(Word {
} s: match skips.contains(&strs[i]) {
result true => None,
} false => Some(strs[i].clone()),
},
idx: i as usize,
});
}
result
}
fn to_string(&self) -> String { fn to_string(&self) -> String {
self.raw.iter() self.raw
.map(|x| x.clone()) .iter()
.collect::<Vec<String>>() .map(|x| x.clone())
.join(" ") .collect::<Vec<String>>()
} .join(" ")
}
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct Word { struct Word {
s: Option<String>, s: Option<String>,
idx: usize, idx: usize,
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
#[test] #[test]
fn test_destutterer_stop_words() { fn test_destutterer_stop_words() {
let mut w = new_destutterer(); let mut w = new_destutterer();
assert_eq!("welcome to the internet".to_string(), w.step("welcome to the internet".to_string())); assert_eq!(
assert_eq!("have a look around".to_string(), w.step("welcome to the a internet; have a look around".to_string())); "welcome to the internet".to_string(),
} w.step("welcome to the internet".to_string())
);
assert_eq!(
"have a look around".to_string(),
w.step("welcome to the a internet; have a look around".to_string())
);
}
#[test] #[test]
fn test_destutterer_punctuation() { fn test_destutterer_punctuation() {
let mut w = new_destutterer(); let mut w = new_destutterer();
assert_eq!("cat, dog. cow? moose!".to_string(), w.step("cat, dog. cow? moose!".to_string())); assert_eq!(
assert_eq!("elephant! fez gator".to_string(), w.step("moose, elephant! fez gator".to_string())); "cat, dog. cow? moose!".to_string(),
assert_eq!("hij".to_string(), w.step("fez gator hij".to_string())); w.step("cat, dog. cow? moose!".to_string())
} );
assert_eq!(
"elephant! fez gator".to_string(),
w.step("moose, elephant! fez gator".to_string())
);
assert_eq!("hij".to_string(), w.step("fez gator hij".to_string()));
}
#[test] #[test]
fn test_destutterer_basic() { fn test_destutterer_basic() {
let mut w = new_destutterer(); let mut w = new_destutterer();
assert_eq!("cat dog cow".to_string(), w.step(" cat dog cow ".to_string())); assert_eq!(
assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string())); "cat dog cow".to_string(),
} w.step(" cat dog cow ".to_string())
);
assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string()));
}
} }