|
|
|
|
@@ -88,7 +88,7 @@ struct Destutterer {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn new_destutterer() -> Destutterer {
|
|
|
|
|
Destutterer{prevs: vec![]}
|
|
|
|
|
Destutterer{prev: new_words()}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Destutterer {
|
|
|
|
|
@@ -98,19 +98,19 @@ impl Destutterer {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let next_words = Words::from_string(next.clone());
|
|
|
|
|
let mut n = self.prevs.len().clamp(0, next_words.len());
|
|
|
|
|
let mut n = self.prev.to_comparable_words().len().clamp(0, next_words.to_comparable_words().len());
|
|
|
|
|
//println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words());
|
|
|
|
|
while n > 0 {
|
|
|
|
|
let prev_s, _ = self.prevs.last_n_comparable_to_string(n);
|
|
|
|
|
let next_s, _ = next_words.first_n_comparable_to_string(n);
|
|
|
|
|
eprintln!("prevs => '{}'", &prev_s);
|
|
|
|
|
eprintln!("nexts => '{}'", &next_s);
|
|
|
|
|
let (prev_s, _) = self.prev.last_n_comparable_to_string(n);
|
|
|
|
|
let (next_s, next_idx) = next_words.first_n_comparable_to_string(n);
|
|
|
|
|
if prev_s == next_s {
|
|
|
|
|
break;
|
|
|
|
|
self.prev = next_words;
|
|
|
|
|
return self.prev.skip(next_idx+1).to_string();
|
|
|
|
|
}
|
|
|
|
|
n -= 1;
|
|
|
|
|
}
|
|
|
|
|
self.prevs = next_words;
|
|
|
|
|
Word::to_string(nexts[n..].to_vec())
|
|
|
|
|
self.prev = next_words;
|
|
|
|
|
self.prev.to_string()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -119,6 +119,10 @@ struct Words {
|
|
|
|
|
raw: Vec<String>,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn new_words() -> Words {
|
|
|
|
|
Words{raw: vec![]}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Words {
|
|
|
|
|
fn from_string(s: String) -> Words {
|
|
|
|
|
let mut result = Words{raw: vec![]};
|
|
|
|
|
@@ -131,16 +135,22 @@ impl Words {
|
|
|
|
|
result
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn skip(&self, n: usize) -> Words {
|
|
|
|
|
Words{
|
|
|
|
|
raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) {
|
|
|
|
|
let v = self.to_comparable_words();
|
|
|
|
|
v = v[(v.len() - n).clamp(0, v.len())..].to_vec();
|
|
|
|
|
v.iter().map(|x| x.s).collect().join(" "), v[v.len()-1].idx
|
|
|
|
|
let v = v[(v.len() - n).clamp(0, v.len())..].to_vec();
|
|
|
|
|
return (v.iter().map(|x| x.s.clone().unwrap()).collect::<Vec<String>>().join(" "), v[0].idx)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn first_n_comparable_to_string(&self, n: usize) -> (String, usize){
|
|
|
|
|
let v = self.to_comparable_words();
|
|
|
|
|
v = v[0..n.clamp(0, v.len())].to_vec();
|
|
|
|
|
v.iter().map(|x| x.s).collect().join(" "), v[0].idx
|
|
|
|
|
let v = v[0..n.clamp(0, v.len())].to_vec();
|
|
|
|
|
return (v.iter().map(|x| x.s.clone().unwrap()).collect::<Vec<String>>().join(" "), v[v.len()-1].idx)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn comparable_len(&self) -> usize {
|
|
|
|
|
@@ -148,7 +158,7 @@ impl Words {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn to_comparable_words(&self) -> Vec<Word> {
|
|
|
|
|
self.to_words().iter().filter(|x| x.s.is_some()).collect()
|
|
|
|
|
self.to_words().iter().filter(|x| x.s.is_some()).map(|x| x.clone()).collect()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn to_words(&self) -> Vec<Word> {
|
|
|
|
|
@@ -162,7 +172,7 @@ impl Words {
|
|
|
|
|
result.push(Word{
|
|
|
|
|
s: match skips.contains(&strs[i]) {
|
|
|
|
|
true => None,
|
|
|
|
|
false => Some(strs[i]),
|
|
|
|
|
false => Some(strs[i].clone()),
|
|
|
|
|
},
|
|
|
|
|
idx: i as usize,
|
|
|
|
|
});
|
|
|
|
|
@@ -178,7 +188,7 @@ impl Words {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
|
struct Word {
|
|
|
|
|
s: Option<String>,
|
|
|
|
|
idx: usize,
|
|
|
|
|
@@ -198,17 +208,15 @@ mod tests {
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_destutterer_punctuation() {
|
|
|
|
|
let mut w = new_destutterer();
|
|
|
|
|
assert_eq!("a, b. c? d!".to_string(), w.step("a, b. c? d!".to_string()));
|
|
|
|
|
assert_eq!("e! f g".to_string(), w.step("d, e! f g".to_string()));
|
|
|
|
|
assert_eq!("hij".to_string(), w.step("f g hij".to_string()));
|
|
|
|
|
assert_eq!("cat, dog. cow? moose!".to_string(), w.step("cat, dog. cow? moose!".to_string()));
|
|
|
|
|
assert_eq!("elephant! fez gator".to_string(), w.step("moose, elephant! fez gator".to_string()));
|
|
|
|
|
assert_eq!("hij".to_string(), w.step("fez gator hij".to_string()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_destutterer_letters() {
|
|
|
|
|
fn test_destutterer_basic() {
|
|
|
|
|
let mut w = new_destutterer();
|
|
|
|
|
assert_eq!("a b c d e".to_string(), w.step("a b c d e".to_string()));
|
|
|
|
|
assert_eq!("f g".to_string(), w.step(" c d e f g".to_string()));
|
|
|
|
|
assert_eq!("h i j".to_string(), w.step("f g h i j ".to_string()));
|
|
|
|
|
assert_eq!("a g h i j".to_string(), w.step("a g h i j".to_string()));
|
|
|
|
|
assert_eq!("cat dog cow".to_string(), w.step(" cat dog cow ".to_string()));
|
|
|
|
|
assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string()));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|