no render mac
parent
12dbf12299
commit
fffea2ddf0
|
|
@ -1,3 +1,3 @@
|
||||||
[submodule "rust-whisper.d/gitea-whisper-rs"]
|
[submodule "gitea-whisper-rs"]
|
||||||
path = gitea-whisper-rs
|
path = gitea-whisper-rs
|
||||||
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git
|
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git
|
||||||
|
|
|
||||||
|
|
@ -1,236 +1,284 @@
|
||||||
use rust_whisper_lib;
|
|
||||||
use rust_whisper_baked_lib;
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use listen_lib;
|
use listen_lib;
|
||||||
|
use rust_whisper_baked_lib;
|
||||||
|
use rust_whisper_lib;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let flags = rust_whisper_lib::Flags::parse();
|
let flags = rust_whisper_lib::Flags::parse();
|
||||||
match flags.wav.clone() {
|
match flags.wav.clone() {
|
||||||
Some(_) => wav_channel(flags),
|
Some(_) => wav_channel(flags),
|
||||||
None => channel(flags),
|
None => channel(flags),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn wav_channel(flags: rust_whisper_lib::Flags) {
|
fn wav_channel(flags: rust_whisper_lib::Flags) {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
rust_whisper_baked_lib::wav_channel(
|
rust_whisper_baked_lib::wav_channel(
|
||||||
flags.clone(),
|
flags.clone(),
|
||||||
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
||||||
match result {
|
match result {
|
||||||
Ok(transcribed) => {
|
Ok(transcribed) => {
|
||||||
let s = w.step(transcribed.to_string());
|
let s = w.step(transcribed.to_string());
|
||||||
println!("{}", s);
|
println!("{}", s);
|
||||||
},
|
}
|
||||||
Err(msg) => { eprintln!("error: {}", msg); },
|
Err(msg) => {
|
||||||
};
|
eprintln!("error: {}", msg);
|
||||||
},
|
}
|
||||||
);
|
};
|
||||||
|
},
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn wav(flags: rust_whisper_lib::Flags, _path: String) {
|
fn wav(flags: rust_whisper_lib::Flags, _path: String) {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
rust_whisper_baked_lib::wav(flags,
|
rust_whisper_baked_lib::wav(
|
||||||
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
flags,
|
||||||
match result {
|
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
||||||
Ok(transcribed) => {
|
match result {
|
||||||
let s = w.step(transcribed.to_string());
|
Ok(transcribed) => {
|
||||||
println!("{}", s);
|
let s = w.step(transcribed.to_string());
|
||||||
},
|
println!("{}", s);
|
||||||
Err(msg) => { eprintln!("error: {}", msg); },
|
}
|
||||||
};
|
Err(msg) => {
|
||||||
},
|
eprintln!("error: {}", msg);
|
||||||
);
|
}
|
||||||
|
};
|
||||||
|
},
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn channel(flags: rust_whisper_lib::Flags) {
|
fn channel(flags: rust_whisper_lib::Flags) {
|
||||||
let (send, recv) = std::sync::mpsc::sync_channel(100);
|
let (send, recv) = std::sync::mpsc::sync_channel(100);
|
||||||
|
|
||||||
eprintln!("rust whisper baked lib channel...");
|
eprintln!("rust whisper baked lib channel...");
|
||||||
thread::spawn(move || {
|
thread::spawn(move || {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
rust_whisper_baked_lib::channel(
|
rust_whisper_baked_lib::channel(
|
||||||
flags.clone(),
|
flags.clone(),
|
||||||
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
||||||
match result {
|
match result {
|
||||||
Ok(transcribed) => {
|
Ok(transcribed) => {
|
||||||
let s = w.step(transcribed.to_string());
|
let s = w.step(transcribed.to_string());
|
||||||
println!("{}", s);
|
println!("{}", s);
|
||||||
},
|
}
|
||||||
Err(msg) => { eprintln!("error: {}", msg); },
|
Err(msg) => {
|
||||||
};
|
eprintln!("error: {}", msg);
|
||||||
},
|
}
|
||||||
recv,
|
};
|
||||||
);
|
},
|
||||||
});
|
recv,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
eprintln!("listen lib main...");
|
eprintln!("listen lib main...");
|
||||||
let flags = rust_whisper_lib::Flags::parse();
|
let flags = rust_whisper_lib::Flags::parse();
|
||||||
match flags.stream_device {
|
match flags.stream_device {
|
||||||
Some(device_name) => {
|
Some(device_name) => {
|
||||||
eprintln!("with device ({}) '{}'", device_name.len(), &device_name);
|
eprintln!("with device ({}) '{}'", device_name.len(), &device_name);
|
||||||
if device_name.len() == 0 {
|
if device_name.len() == 0 {
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
for device in listen_lib::devices() {
|
for device in listen_lib::devices() {
|
||||||
eprintln!("[{}] {}", i, device);
|
eprintln!("[{}] {}", i, device);
|
||||||
i += 1;
|
i += 1;
|
||||||
|
}
|
||||||
|
eprintln!("found {} devices", i);
|
||||||
|
} else {
|
||||||
|
listen_lib::main_with(
|
||||||
|
|data| {
|
||||||
|
send.send(data).unwrap();
|
||||||
|
},
|
||||||
|
device_name,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
eprintln!("found {} devices", i);
|
}
|
||||||
} else {
|
None => {
|
||||||
listen_lib::main_with(|data| {
|
eprintln!("without any device");
|
||||||
send.send(data).unwrap();
|
listen_lib::main(|data| {
|
||||||
}, device_name);
|
send.send(data).unwrap();
|
||||||
}
|
});
|
||||||
},
|
}
|
||||||
None => {
|
}
|
||||||
eprintln!("without any device");
|
eprintln!("/listen lib main...");
|
||||||
listen_lib::main(|data| {
|
|
||||||
send.send(data).unwrap();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
eprintln!("/listen lib main...");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Destutterer {
|
struct Destutterer {
|
||||||
prev: Words,
|
prev: Words,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_destutterer() -> Destutterer {
|
fn new_destutterer() -> Destutterer {
|
||||||
Destutterer{prev: new_words()}
|
Destutterer { prev: new_words() }
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Destutterer {
|
impl Destutterer {
|
||||||
fn step(&mut self, next: String) -> String {
|
fn step(&mut self, next: String) -> String {
|
||||||
if next.len() == 0 {
|
if next.len() == 0 {
|
||||||
return next;
|
return next;
|
||||||
}
|
}
|
||||||
|
|
||||||
let next_words = Words::from_string(next.clone());
|
let next_words = Words::from_string(next.clone());
|
||||||
let mut n = self.prev.comparable_len().clamp(0, next_words.comparable_len());
|
let mut n = self
|
||||||
//println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words());
|
.prev
|
||||||
while n > 0 {
|
.comparable_len()
|
||||||
let (prev_s, _) = self.prev.last_n_comparable_to_string(n);
|
.clamp(0, next_words.comparable_len());
|
||||||
let (next_s, next_idx) = next_words.first_n_comparable_to_string(n);
|
//println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words());
|
||||||
if prev_s == next_s {
|
while n > 0 {
|
||||||
self.prev = next_words;
|
let (prev_s, _) = self.prev.last_n_comparable_to_string(n);
|
||||||
return self.prev.skip(next_idx+1).to_string();
|
let (next_s, next_idx) = next_words.first_n_comparable_to_string(n);
|
||||||
}
|
if prev_s == next_s {
|
||||||
n -= 1;
|
self.prev = next_words;
|
||||||
}
|
return self.prev.skip(next_idx + 1).to_string();
|
||||||
self.prev = next_words;
|
}
|
||||||
self.prev.to_string()
|
n -= 1;
|
||||||
}
|
}
|
||||||
|
self.prev = next_words;
|
||||||
|
self.prev.to_string()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct Words {
|
struct Words {
|
||||||
raw: Vec<String>,
|
raw: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_words() -> Words {
|
fn new_words() -> Words {
|
||||||
Words{raw: vec![]}
|
Words { raw: vec![] }
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Words {
|
impl Words {
|
||||||
fn from_string(s: String) -> Words {
|
fn from_string(s: String) -> Words {
|
||||||
let mut result = Words{raw: vec![]};
|
let mut result = Words { raw: vec![] };
|
||||||
for word in s.split(" ") {
|
for word in s.split(" ") {
|
||||||
let word = word.trim();
|
let word = word.trim();
|
||||||
if word.len() > 0 {
|
if word.len() > 0 {
|
||||||
result.raw.push(word.to_string());
|
result.raw.push(word.to_string());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
fn skip(&self, n: usize) -> Words {
|
fn skip(&self, n: usize) -> Words {
|
||||||
Words{
|
Words {
|
||||||
raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(),
|
raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) {
|
fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) {
|
||||||
let v = self.to_comparable_words();
|
let v = self.to_comparable_words();
|
||||||
let v = v[(v.len() - n).clamp(0, v.len())..].to_vec();
|
let v = v[(v.len() - n).clamp(0, v.len())..].to_vec();
|
||||||
return (v.iter().map(|x| x.s.clone().unwrap()).collect::<Vec<String>>().join(" "), v[0].idx)
|
return (
|
||||||
}
|
v.iter()
|
||||||
|
.map(|x| x.s.clone().unwrap())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(" "),
|
||||||
|
v[0].idx,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn first_n_comparable_to_string(&self, n: usize) -> (String, usize){
|
fn first_n_comparable_to_string(&self, n: usize) -> (String, usize) {
|
||||||
let v = self.to_comparable_words();
|
let v = self.to_comparable_words();
|
||||||
let v = v[0..n.clamp(0, v.len())].to_vec();
|
let v = v[0..n.clamp(0, v.len())].to_vec();
|
||||||
return (v.iter().map(|x| x.s.clone().unwrap()).collect::<Vec<String>>().join(" "), v[v.len()-1].idx)
|
return (
|
||||||
}
|
v.iter()
|
||||||
|
.map(|x| x.s.clone().unwrap())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(" "),
|
||||||
|
v[v.len() - 1].idx,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn comparable_len(&self) -> usize {
|
fn comparable_len(&self) -> usize {
|
||||||
self.to_comparable_words().len()
|
self.to_comparable_words().len()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn to_comparable_words(&self) -> Vec<Word> {
|
fn to_comparable_words(&self) -> Vec<Word> {
|
||||||
self.to_words().iter()
|
self.to_words()
|
||||||
.filter(|x| x.s.is_some())
|
.iter()
|
||||||
.map(|x| x.clone())
|
.filter(|x| x.s.is_some())
|
||||||
.collect()
|
.map(|x| x.clone())
|
||||||
}
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
fn to_words(&self) -> Vec<Word> {
|
fn to_words(&self) -> Vec<Word> {
|
||||||
let skips = stop_words::get("en");
|
let skips = stop_words::get("en");
|
||||||
let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English);
|
let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English);
|
||||||
let strs = self.raw.iter()
|
let strs = self
|
||||||
.map(|w| w.to_lowercase())
|
.raw
|
||||||
.map(|w| w.chars().filter(|c| c.is_ascii_alphanumeric()).collect::<String>())
|
.iter()
|
||||||
.map(|w| stemmer.stem(&w).into_owned())
|
.map(|w| w.to_lowercase())
|
||||||
.collect::<Vec<String>>();
|
.map(|w| {
|
||||||
let mut result = vec![];
|
w.chars()
|
||||||
for i in 0..strs.len() {
|
.filter(|c| c.is_ascii_alphanumeric())
|
||||||
result.push(Word{
|
.collect::<String>()
|
||||||
s: match skips.contains(&strs[i]) {
|
})
|
||||||
true => None,
|
.map(|w| stemmer.stem(&w).into_owned())
|
||||||
false => Some(strs[i].clone()),
|
.collect::<Vec<String>>();
|
||||||
},
|
let mut result = vec![];
|
||||||
idx: i as usize,
|
for i in 0..strs.len() {
|
||||||
});
|
result.push(Word {
|
||||||
}
|
s: match skips.contains(&strs[i]) {
|
||||||
result
|
true => None,
|
||||||
}
|
false => Some(strs[i].clone()),
|
||||||
|
},
|
||||||
|
idx: i as usize,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
fn to_string(&self) -> String {
|
fn to_string(&self) -> String {
|
||||||
self.raw.iter()
|
self.raw
|
||||||
.map(|x| x.clone())
|
.iter()
|
||||||
.collect::<Vec<String>>()
|
.map(|x| x.clone())
|
||||||
.join(" ")
|
.collect::<Vec<String>>()
|
||||||
}
|
.join(" ")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
struct Word {
|
struct Word {
|
||||||
s: Option<String>,
|
s: Option<String>,
|
||||||
idx: usize,
|
idx: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_destutterer_stop_words() {
|
fn test_destutterer_stop_words() {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
assert_eq!("welcome to the internet".to_string(), w.step("welcome to the internet".to_string()));
|
assert_eq!(
|
||||||
assert_eq!("have a look around".to_string(), w.step("welcome to the a internet; have a look around".to_string()));
|
"welcome to the internet".to_string(),
|
||||||
}
|
w.step("welcome to the internet".to_string())
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"have a look around".to_string(),
|
||||||
|
w.step("welcome to the a internet; have a look around".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_destutterer_punctuation() {
|
fn test_destutterer_punctuation() {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
assert_eq!("cat, dog. cow? moose!".to_string(), w.step("cat, dog. cow? moose!".to_string()));
|
assert_eq!(
|
||||||
assert_eq!("elephant! fez gator".to_string(), w.step("moose, elephant! fez gator".to_string()));
|
"cat, dog. cow? moose!".to_string(),
|
||||||
assert_eq!("hij".to_string(), w.step("fez gator hij".to_string()));
|
w.step("cat, dog. cow? moose!".to_string())
|
||||||
}
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"elephant! fez gator".to_string(),
|
||||||
|
w.step("moose, elephant! fez gator".to_string())
|
||||||
|
);
|
||||||
|
assert_eq!("hij".to_string(), w.step("fez gator hij".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_destutterer_basic() {
|
fn test_destutterer_basic() {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
assert_eq!("cat dog cow".to_string(), w.step(" cat dog cow ".to_string()));
|
assert_eq!(
|
||||||
assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string()));
|
"cat dog cow".to_string(),
|
||||||
}
|
w.step(" cat dog cow ".to_string())
|
||||||
|
);
|
||||||
|
assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue