diff --git a/rust-whisper-baked/src/main.rs b/rust-whisper-baked/src/main.rs index cf9cd9f..b22553b 100644 --- a/rust-whisper-baked/src/main.rs +++ b/rust-whisper-baked/src/main.rs @@ -88,7 +88,7 @@ struct Destutterer { } fn new_destutterer() -> Destutterer { - Destutterer{prevs: vec![]} + Destutterer{prev: new_words()} } impl Destutterer { @@ -98,19 +98,18 @@ impl Destutterer { } let next_words = Words::from_string(next.clone()); - let mut n = self.prevs.len().clamp(0, next_words.len()); + let mut n = self.prev.to_comparable_words().len().clamp(0, next_words.to_comparable_words().len()); while n > 0 { - let prev_s, _ = self.prevs.last_n_comparable_to_string(n); - let next_s, _ = next_words.first_n_comparable_to_string(n); - eprintln!("prevs => '{}'", &prev_s); - eprintln!("nexts => '{}'", &next_s); + let (prev_s, _) = self.prev.last_n_comparable_to_string(n); + let (next_s, next_idx) = next_words.first_n_comparable_to_string(n); if prev_s == next_s { - break; + self.prev = next_words; + return self.prev.skip(next_idx+1).to_string(); } n -= 1; } - self.prevs = next_words; - Word::to_string(nexts[n..].to_vec()) + self.prev = next_words; + self.prev.to_string() } } @@ -119,6 +118,10 @@ struct Words { raw: Vec, } +fn new_words() -> Words { + Words{raw: vec![]} +} + impl Words { fn from_string(s: String) -> Words { let mut result = Words{raw: vec![]}; @@ -131,16 +134,22 @@ impl Words { result } + fn skip(&self, n: usize) -> Words { + Words{ + raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(), + } + } + fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) { let v = self.to_comparable_words(); - v = v[(v.len() - n).clamp(0, v.len())..].to_vec(); - v.iter().map(|x| x.s).collect().join(" "), v[v.len()-1].idx + let v = v[(v.len() - n).clamp(0, v.len())..].to_vec(); + return (v.iter().map(|x| x.s.clone().unwrap()).collect::>().join(" "), v[0].idx) } fn first_n_comparable_to_string(&self, n: usize) -> (String, usize){ let v = self.to_comparable_words(); - v = v[0..n.clamp(0, v.len())].to_vec(); - v.iter().map(|x| x.s).collect().join(" "), v[0].idx + let v = v[0..n.clamp(0, v.len())].to_vec(); + return (v.iter().map(|x| x.s.clone().unwrap()).collect::>().join(" "), v[v.len()-1].idx) } fn comparable_len(&self) -> usize { @@ -148,7 +157,7 @@ impl Words { } fn to_comparable_words(&self) -> Vec { - self.to_words().iter().filter(|x| x.s.is_some()).collect() + self.to_words().iter().filter(|x| x.s.is_some()).map(|x| x.clone()).collect() } fn to_words(&self) -> Vec { @@ -162,7 +171,7 @@ impl Words { result.push(Word{ s: match skips.contains(&strs[i]) { true => None, - false => Some(strs[i]), + false => Some(strs[i].clone()), }, idx: i as usize, }); @@ -178,7 +187,7 @@ impl Words { } } -#[derive(Debug)] +#[derive(Debug, Clone)] struct Word { s: Option, idx: usize, @@ -198,17 +207,15 @@ mod tests { #[test] fn test_destutterer_punctuation() { let mut w = new_destutterer(); - assert_eq!("a, b. c? d!".to_string(), w.step("a, b. c? d!".to_string())); - assert_eq!("e! f g".to_string(), w.step("d, e! f g".to_string())); - assert_eq!("hij".to_string(), w.step("f g hij".to_string())); + assert_eq!("cat, dog. cow? moose!".to_string(), w.step("cat, dog. cow? moose!".to_string())); + assert_eq!("elephant! fez gator".to_string(), w.step("moose, elephant! fez gator".to_string())); + assert_eq!("hij".to_string(), w.step("fez gator hij".to_string())); } #[test] - fn test_destutterer_letters() { + fn test_destutterer_basic() { let mut w = new_destutterer(); - assert_eq!("a b c d e".to_string(), w.step("a b c d e".to_string())); - assert_eq!("f g".to_string(), w.step(" c d e f g".to_string())); - assert_eq!("h i j".to_string(), w.step("f g h i j ".to_string())); - assert_eq!("a g h i j".to_string(), w.step("a g h i j".to_string())); + assert_eq!("cat dog cow".to_string(), w.step(" cat dog cow ".to_string())); + assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string())); } }