tod
parent
fb7892b52b
commit
26595396cf
|
|
@ -84,7 +84,7 @@ fn channel(flags: rust_whisper_lib::Flags) {
|
|||
}
|
||||
|
||||
struct Destutterer {
|
||||
prevs: Vec<Word>,
|
||||
prev: Words,
|
||||
}
|
||||
|
||||
fn new_destutterer() -> Destutterer {
|
||||
|
|
@ -97,59 +97,89 @@ impl Destutterer {
|
|||
return next;
|
||||
}
|
||||
|
||||
let nexts = Word::from_string(next.clone());
|
||||
|
||||
let mut n = self.prevs.len().clamp(0, nexts.len());
|
||||
let next_words = Words::from_string(next.clone());
|
||||
let mut n = self.prevs.len().clamp(0, next_words.len());
|
||||
while n > 0 {
|
||||
let prev_s = Word::to_comparable_string(self.prevs, self.prevs.len(), n);
|
||||
eprintln!("prevs: {:?} => '{}'", self.prevs[self.prevs.len() - n..].to_vec(), &prev_s);
|
||||
let next_s = Word::to_comparable_string(nexts, 0, n); // TODO indexes skip stop words
|
||||
eprintln!("nexts: {:?} => '{}'", nexts[..n].to_vec(), &next_s);
|
||||
let prev_s = (self.prevs.len()-n..self.prevs.len()).map(|i| self.prevs.comparable_string(i)).collect().join(" ");
|
||||
let next_s = (0-n).map(|i| next_words.comparable_string(i)).collect().join(" ");
|
||||
eprintln!("prevs => '{}'", &prev_s);
|
||||
eprintln!("nexts => '{}'", &next_s);
|
||||
if prev_s == next_s {
|
||||
break;
|
||||
}
|
||||
n -= 1;
|
||||
}
|
||||
self.prevs = nexts.clone();
|
||||
self.prevs = next_words;
|
||||
Word::to_string(nexts[n..].to_vec())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct Word {
|
||||
raw: String,
|
||||
struct Words {
|
||||
raw: Vec<String>,
|
||||
}
|
||||
|
||||
impl Word {
|
||||
fn from_string(s: String) -> Vec<Word> {
|
||||
let mut result = vec![];
|
||||
impl Words {
|
||||
fn from_string(s: String) -> Words {
|
||||
let mut result = Words{raw: vec![]};
|
||||
for word in s.split(" ") {
|
||||
let word = word.trim();
|
||||
if word.len() > 0 {
|
||||
result.push(Word{raw: word.to_string()});
|
||||
result.raw.push(word.to_string());
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn to_comparable_string(v: Vec<Word>) -> String {
|
||||
let skips = stop_words::get("en");
|
||||
v.iter()
|
||||
.map(|w| w.raw.to_lowercase())
|
||||
.filter(|word| !skips.contains(word))
|
||||
.map(|word| word.chars().filter(|c| c.is_ascii_alphanumeric()).collect::<String>())
|
||||
.collect::<Vec<String>>()
|
||||
.join(" ")
|
||||
fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) {
|
||||
TODO
|
||||
}
|
||||
|
||||
fn to_string(v: Vec<Word>) -> String {
|
||||
v.iter()
|
||||
.map(|x| x.raw.clone())
|
||||
fn first_n_comparable_to_string(&self, n: usize) -> (String, usize){
|
||||
TODO
|
||||
}
|
||||
|
||||
fn comparable_len(&self) -> usize {
|
||||
self.to_comparable_words().len()
|
||||
}
|
||||
|
||||
fn to_comparable_words(&self) -> Vec<Word> {
|
||||
self.to_words().iter().filter(|x| x.s.is_some()).collect()
|
||||
}
|
||||
|
||||
fn to_words(&self) -> Vec<Word> {
|
||||
let skips = stop_words::get("en");
|
||||
let strs = self.raw.iter()
|
||||
.map(|w| w.to_lowercase())
|
||||
.map(|word| word.chars().filter(|c| c.is_ascii_alphanumeric()).collect::<String>())
|
||||
.collect::<Vec<String>>();
|
||||
let mut result = vec![];
|
||||
for i in 0..strs.len() {
|
||||
result.push(Word{
|
||||
s: match skips.contains(&strs[i]) {
|
||||
true => None,
|
||||
false => Some(strs[i]),
|
||||
},
|
||||
idx: i as usize,
|
||||
});
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn to_string(&self) -> String {
|
||||
self.raw.iter()
|
||||
.map(|x| x.clone())
|
||||
.collect::<Vec<String>>()
|
||||
.join(" ")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Word {
|
||||
s: Option<String>,
|
||||
idx: usize,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
|||
Loading…
Reference in New Issue