9 Commits

Author SHA1 Message Date
Bel LaPointe
b08e055dac todo 2024-01-02 18:23:03 -07:00
Bel LaPointe
9d993cfc8a update destutterer to do punctuation-free words 2024-01-02 18:20:46 -07:00
Bel LaPointe
f4f8ea429a merge 2024-01-02 17:51:29 -07:00
Bel LaPointe
38bea3735f todo 2024-01-02 17:51:14 -07:00
Bel LaPointe
1c48026690 need to overlap without ANY puctuation, which i can do by breaking into words 2024-01-02 17:49:47 -07:00
Bel LaPointe
a57312786a gr 2024-01-02 17:48:17 -07:00
Bel LaPointe
55e3bf0a26 update defaults 2024-01-02 17:47:00 -07:00
Bel LaPointe
743c8c5f67 time cargo run -- --wav $HOME/Downloads/41A6C472-6E4D-4953-9A90-2497D2DAD8C9.wav --stream-step 30 --stream-retain 25 --stream-{head,tail}=1 2> /dev/null 2024-01-02 16:45:04 -07:00
Bel LaPointe
d32f7a4c40 destutterer doesnt drop stutter for prev 2024-01-02 16:36:39 -07:00
4 changed files with 72 additions and 43 deletions

View File

@@ -24,6 +24,7 @@ pub fn wav_channel<F>(
handler_fn: F
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
flags.model_path = None;
flags.model_buffer = Some(include_bytes!("../../models/ggml-distil-medium.en.bin").to_vec());
flags.model_buffer = Some(include_bytes!("../../models/ggml-base.en.bin").to_vec());
rust_whisper_lib::wav_channel(flags, handler_fn);
}

View File

@@ -84,52 +84,65 @@ fn channel(flags: rust_whisper_lib::Flags) {
}
struct Destutterer {
prev: Option<String>,
prevs: Vec<Word>,
}
fn new_destutterer() -> Destutterer {
Destutterer{prev: None}
Destutterer{prevs: vec![]}
}
impl Destutterer {
fn step(&mut self, next: String) -> String {
let next = next.trim().to_string();
if next.len() == 0 {
return next;
}
match &self.prev {
None => {
self.prev = Some(next.clone());
next
},
Some(prev) => {
let without_trailing_punctuation = {
let mut next = next.clone();
while next.ends_with("?") || next.ends_with(".") {
next = next[..next.len()-1].to_string();
}
next
};
let trailing_punctuation = next[without_trailing_punctuation.len() ..].to_string();
let next = without_trailing_punctuation;
let next = {
let mut n = prev.len().clamp(0, next.len());
let nexts = Word::from_string(next.clone());
let mut n = self.prevs.len().clamp(0, nexts.len());
while n > 0 {
if prev[prev.len() - n..] == next[..n] {
let prev_s = Word::to_comparable_string(self.prevs[self.prevs.len() - n..].to_vec());
let next_s = Word::to_comparable_string(nexts[..n].to_vec());
if prev_s == next_s {
break;
}
n -= 1;
}
next[n..].to_string()
};
if next.len() == 0 {
return "".to_string();
self.prevs = nexts.clone();
Word::to_string(nexts[n..].to_vec())
}
self.prev = Some(next.clone());
next + &trailing_punctuation
},
}
#[derive(Clone)]
struct Word {
raw: String,
}
impl Word {
fn from_string(s: String) -> Vec<Word> {
let mut result = vec![];
for word in s.split(" ") {
let word = word.trim();
if word.len() > 0 {
result.push(Word{raw: word.to_string()});
}
}
result
}
fn to_comparable_string(v: Vec<Word>) -> String {
v.iter()
.map(|x| x.raw.chars().filter(|c| c.is_ascii_alphanumeric()).collect())
.collect::<Vec<String>>()
.join(" ")
}
fn to_string(v: Vec<Word>) -> String {
v.iter()
.map(|x| x.raw.clone())
.collect::<Vec<String>>()
.join(" ")
}
}
#[cfg(test)]
@@ -137,11 +150,19 @@ mod tests {
use super::*;
#[test]
fn test_destutterer() {
fn test_destutterer_punctuation() {
let mut w = new_destutterer();
assert_eq!("abcde".to_string(), w.step("abcde".to_string()));
assert_eq!("fg".to_string(), w.step("cdefg".to_string()));
assert_eq!("hij".to_string(), w.step("fghij".to_string()));
assert_eq!("fghij".to_string(), w.step("fghij".to_string()));
assert_eq!("a, b. c? d!".to_string(), w.step("a, b. c? d!".to_string()));
assert_eq!("e! f g".to_string(), w.step("d, e! f g".to_string()));
assert_eq!("hij".to_string(), w.step("f g hij".to_string()));
}
#[test]
fn test_destutterer_letters() {
let mut w = new_destutterer();
assert_eq!("a b c d e".to_string(), w.step("a b c d e".to_string()));
assert_eq!("f g".to_string(), w.step(" c d e f g".to_string()));
assert_eq!("h i j".to_string(), w.step("f g h i j ".to_string()));
assert_eq!("a g h i j".to_string(), w.step("a g h i j".to_string()));
}
}

View File

@@ -14,13 +14,13 @@ pub struct Flags {
#[arg(long, default_value = "8")]
pub threads: i32,
#[arg(long, default_value = "5")]
#[arg(long, default_value = "30")]
pub stream_step: u64,
#[arg(long, default_value = "0.6")]
#[arg(long, default_value = "28.0")]
pub stream_retain: f32,
#[arg(long, default_value = "0.3")]
#[arg(long, default_value = "0.1")]
pub stream_head: f32,
#[arg(long, default_value = "0.3")]
#[arg(long, default_value = "0.1")]
pub stream_tail: f32,
#[arg(long, default_value = "false")]

7
todo.yaml Executable file
View File

@@ -0,0 +1,7 @@
todo:
- overlap without stop words
- rust-whisper warn when transcription time ~ input time
scheduled: []
done:
- todo: need to overlap without ANY puctuation, which i can do by breaking into words
ts: Tue Jan 2 18:23:00 MST 2024