stem words for destuttering
parent
4c80247ab9
commit
5fdc60e32c
|
|
@ -13,6 +13,7 @@ if ! which rust-whisper-baked; then
|
|||
fi >&2
|
||||
|
||||
cat <<EOF
|
||||
rust-whisper-baked --stream-device pulse_monitor --stream-step 16 --stream-retain 8 --stream-{head,tail}=0.25 2> /dev/null
|
||||
rust-whisper-baked --stream-device 'BlackHole 2ch' --stream-step 30 --stream-retain 1 --stream-{head,tail}=0.25 --threads 9 2> /dev/null
|
||||
| tee -a "$HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
|
||||
| tee -a "$HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"
|
||||
|
|
|
|||
|
|
@ -763,12 +763,23 @@ version = "1.0.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
|
||||
|
||||
[[package]]
|
||||
name = "rust-stemmers"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-whisper-baked"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"listen-lib",
|
||||
"rust-stemmers",
|
||||
"rust-whisper-baked-lib",
|
||||
"rust-whisper-lib",
|
||||
"stop-words",
|
||||
|
|
|
|||
|
|
@ -11,3 +11,4 @@ rust-whisper-baked-lib = { path = "../rust-whisper-baked-lib" }
|
|||
listen-lib = { path = "../listen-lib" }
|
||||
clap = { version = "4.4.10", features = ["derive"] }
|
||||
stop-words = "0.8.0"
|
||||
rust-stemmers = "1.2.0"
|
||||
|
|
|
|||
|
|
@ -167,14 +167,19 @@ impl Words {
|
|||
}
|
||||
|
||||
fn to_comparable_words(&self) -> Vec<Word> {
|
||||
self.to_words().iter().filter(|x| x.s.is_some()).map(|x| x.clone()).collect()
|
||||
self.to_words().iter()
|
||||
.filter(|x| x.s.is_some())
|
||||
.map(|x| x.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn to_words(&self) -> Vec<Word> {
|
||||
let skips = stop_words::get("en");
|
||||
let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English);
|
||||
let strs = self.raw.iter()
|
||||
.map(|w| w.to_lowercase())
|
||||
.map(|word| word.chars().filter(|c| c.is_ascii_alphanumeric()).collect::<String>())
|
||||
.map(|w| w.chars().filter(|c| c.is_ascii_alphanumeric()).collect::<String>())
|
||||
.map(|w| stemmer.stem(&w).into_owned())
|
||||
.collect::<Vec<String>>();
|
||||
let mut result = vec![];
|
||||
for i in 0..strs.len() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue