Compare commits
28 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
fffea2ddf0 | |
|
|
12dbf12299 | |
|
|
f04a55590f | |
|
|
2254afcbfb | |
|
|
5fdc60e32c | |
|
|
4c80247ab9 | |
|
|
53e675b9a0 | |
|
|
9780c6f2ef | |
|
|
7f902af26f | |
|
|
9bc009996c | |
|
|
cbc8a4f9fd | |
|
|
a8c8140d18 | |
|
|
5bc3209070 | |
|
|
8b5c18e65e | |
|
|
ec47d8142a | |
|
|
03659164ba | |
|
|
709dd1dba3 | |
|
|
26595396cf | |
|
|
fb7892b52b | |
|
|
b08e055dac | |
|
|
9d993cfc8a | |
|
|
f4f8ea429a | |
|
|
38bea3735f | |
|
|
1c48026690 | |
|
|
a57312786a | |
|
|
55e3bf0a26 | |
|
|
743c8c5f67 | |
|
|
d32f7a4c40 |
|
|
@ -1,3 +1,3 @@
|
||||||
[submodule "rust-whisper.d/gitea-whisper-rs"]
|
[submodule "gitea-whisper-rs"]
|
||||||
path = gitea-whisper-rs
|
path = gitea-whisper-rs
|
||||||
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git
|
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ if ! which rust-whisper-baked; then
|
||||||
fi >&2
|
fi >&2
|
||||||
|
|
||||||
cat <<EOF
|
cat <<EOF
|
||||||
|
rust-whisper-baked --stream-device pulse_monitor --stream-step 16 --stream-retain 8 --stream-{head,tail}=0.25 2> /dev/null
|
||||||
rust-whisper-baked --stream-device 'BlackHole 2ch' --stream-step 30 --stream-retain 1 --stream-{head,tail}=0.25 --threads 9 2> /dev/null
|
rust-whisper-baked --stream-device 'BlackHole 2ch' --stream-step 30 --stream-retain 1 --stream-{head,tail}=0.25 --threads 9 2> /dev/null
|
||||||
| tee -a "$HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
|
| tee -a "$HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
|
||||||
| tee -a "$HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"
|
| tee -a "$HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,11 @@ pub fn devices() -> Vec<String> {
|
||||||
fn _devices() -> Result<Vec<cpal::Device>, String> {
|
fn _devices() -> Result<Vec<cpal::Device>, String> {
|
||||||
match cpal::default_host().devices() {
|
match cpal::default_host().devices() {
|
||||||
Ok(devices) => Ok(devices.filter(|device| {
|
Ok(devices) => Ok(devices.filter(|device| {
|
||||||
device.supported_input_configs().unwrap().count() > 0
|
let input_configs = device.supported_input_configs();
|
||||||
|
if !input_configs.is_ok() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
input_configs.unwrap().count() > 0
|
||||||
}).collect()),
|
}).collect()),
|
||||||
Err(msg) => Err(format!("failed to get devices: {}", msg)),
|
Err(msg) => Err(format!("failed to get devices: {}", msg)),
|
||||||
}
|
}
|
||||||
|
|
@ -92,13 +96,22 @@ impl Listener {
|
||||||
filter(|device| device.name().unwrap() == self.device_name).
|
filter(|device| device.name().unwrap() == self.device_name).
|
||||||
collect::<Vec<_>>();
|
collect::<Vec<_>>();
|
||||||
let device = devices.first().unwrap();
|
let device = devices.first().unwrap();
|
||||||
let cfg = device.supported_input_configs()
|
let mut sample_rate = 15_500;
|
||||||
|
let mut cfgs: Vec<_> = device.supported_input_configs()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
|
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
|
||||||
.filter(|x| x.min_sample_rate() >= cpal::SampleRate(15_500))
|
.filter(|x| x.min_sample_rate() >= cpal::SampleRate(sample_rate))
|
||||||
.nth(0)
|
.collect();
|
||||||
.unwrap()
|
while cfgs.len() == 0 && sample_rate > 0 {
|
||||||
.with_max_sample_rate();
|
sample_rate /= 2;
|
||||||
|
cfgs = device.supported_input_configs()
|
||||||
|
.unwrap()
|
||||||
|
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
|
||||||
|
.filter(|x| x.min_sample_rate() >= cpal::SampleRate(sample_rate))
|
||||||
|
.collect();
|
||||||
|
}
|
||||||
|
assert!(cfgs.len() > 0);
|
||||||
|
let cfg = cfgs[0].clone().with_max_sample_rate();
|
||||||
|
|
||||||
let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
|
let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
|
||||||
let stream = device.build_input_stream(
|
let stream = device.build_input_stream(
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ pub fn channel<F>(
|
||||||
stream: std::sync::mpsc::Receiver<Vec<f32>>,
|
stream: std::sync::mpsc::Receiver<Vec<f32>>,
|
||||||
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
|
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
|
||||||
flags.model_path = None;
|
flags.model_path = None;
|
||||||
flags.model_buffer = Some(include_bytes!("../../models/ggml-tiny.en.bin").to_vec());
|
flags.model_buffer = Some(get_fast());
|
||||||
rust_whisper_lib::channel(flags.clone(), handler_fn, stream);
|
rust_whisper_lib::channel(flags.clone(), handler_fn, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -15,7 +15,7 @@ pub fn wav<F>(
|
||||||
handler_fn: F
|
handler_fn: F
|
||||||
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
|
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
|
||||||
flags.model_path = None;
|
flags.model_path = None;
|
||||||
flags.model_buffer = Some(include_bytes!("../../models/ggml-distil-medium.en.bin").to_vec());
|
flags.model_buffer = Some(get_good());
|
||||||
rust_whisper_lib::wav(flags.clone(), handler_fn, flags.wav.unwrap());
|
rust_whisper_lib::wav(flags.clone(), handler_fn, flags.wav.unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -24,10 +24,18 @@ pub fn wav_channel<F>(
|
||||||
handler_fn: F
|
handler_fn: F
|
||||||
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
|
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
|
||||||
flags.model_path = None;
|
flags.model_path = None;
|
||||||
flags.model_buffer = Some(include_bytes!("../../models/ggml-base.en.bin").to_vec());
|
flags.model_buffer = Some(get_good());
|
||||||
rust_whisper_lib::wav_channel(flags, handler_fn);
|
rust_whisper_lib::wav_channel(flags, handler_fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
|
pub fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
|
||||||
rust_whisper_lib::f32_from_wav_file(path)
|
rust_whisper_lib::f32_from_wav_file(path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_fast() -> Vec<u8> {
|
||||||
|
include_bytes!("../../models/ggml-small.en.bin").to_vec()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_good() -> Vec<u8> {
|
||||||
|
include_bytes!("../../models/ggml-distil-medium.en.bin").to_vec()
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -383,6 +383,12 @@ dependencies = [
|
||||||
"hashbrown",
|
"hashbrown",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itoa"
|
||||||
|
version = "1.0.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jni"
|
name = "jni"
|
||||||
version = "0.19.0"
|
version = "0.19.0"
|
||||||
|
|
@ -757,14 +763,26 @@ version = "1.0.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
|
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rust-stemmers"
|
||||||
|
version = "1.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
|
||||||
|
dependencies = [
|
||||||
|
"serde",
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rust-whisper-baked"
|
name = "rust-whisper-baked"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"listen-lib",
|
"listen-lib",
|
||||||
|
"rust-stemmers",
|
||||||
"rust-whisper-baked-lib",
|
"rust-whisper-baked-lib",
|
||||||
"rust-whisper-lib",
|
"rust-whisper-lib",
|
||||||
|
"stop-words",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -802,6 +820,12 @@ dependencies = [
|
||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ryu"
|
||||||
|
version = "1.0.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "same-file"
|
name = "same-file"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
|
|
@ -817,6 +841,37 @@ version = "1.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde"
|
||||||
|
version = "1.0.193"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
|
||||||
|
dependencies = [
|
||||||
|
"serde_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_derive"
|
||||||
|
version = "1.0.193"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.41",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "serde_json"
|
||||||
|
version = "1.0.109"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cb0652c533506ad7a2e353cce269330d6afd8bdfb6d75e0ace5b35aacbd7b9e9"
|
||||||
|
dependencies = [
|
||||||
|
"itoa",
|
||||||
|
"ryu",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "shlex"
|
name = "shlex"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
|
|
@ -848,6 +903,15 @@ version = "1.11.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
|
checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "stop-words"
|
||||||
|
version = "0.8.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8500024d809de02ecbf998472b7bed3c4fca380df2be68917f6a473bdb28ddcc"
|
||||||
|
dependencies = [
|
||||||
|
"serde_json",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.10.0"
|
version = "0.10.0"
|
||||||
|
|
|
||||||
|
|
@ -10,3 +10,5 @@ rust-whisper-lib = { path = "../rust-whisper-lib" }
|
||||||
rust-whisper-baked-lib = { path = "../rust-whisper-baked-lib" }
|
rust-whisper-baked-lib = { path = "../rust-whisper-baked-lib" }
|
||||||
listen-lib = { path = "../listen-lib" }
|
listen-lib = { path = "../listen-lib" }
|
||||||
clap = { version = "4.4.10", features = ["derive"] }
|
clap = { version = "4.4.10", features = ["derive"] }
|
||||||
|
stop-words = "0.8.0"
|
||||||
|
rust-stemmers = "1.2.0"
|
||||||
|
|
|
||||||
|
|
@ -1,147 +1,284 @@
|
||||||
use rust_whisper_lib;
|
|
||||||
use rust_whisper_baked_lib;
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use listen_lib;
|
use listen_lib;
|
||||||
|
use rust_whisper_baked_lib;
|
||||||
|
use rust_whisper_lib;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let flags = rust_whisper_lib::Flags::parse();
|
let flags = rust_whisper_lib::Flags::parse();
|
||||||
match flags.wav.clone() {
|
match flags.wav.clone() {
|
||||||
Some(_) => wav_channel(flags),
|
Some(_) => wav_channel(flags),
|
||||||
None => channel(flags),
|
None => channel(flags),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn wav_channel(flags: rust_whisper_lib::Flags) {
|
fn wav_channel(flags: rust_whisper_lib::Flags) {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
rust_whisper_baked_lib::wav_channel(
|
rust_whisper_baked_lib::wav_channel(
|
||||||
flags.clone(),
|
flags.clone(),
|
||||||
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
||||||
match result {
|
match result {
|
||||||
Ok(transcribed) => {
|
Ok(transcribed) => {
|
||||||
let s = w.step(transcribed.to_string());
|
let s = w.step(transcribed.to_string());
|
||||||
println!("{}", s);
|
println!("{}", s);
|
||||||
},
|
}
|
||||||
Err(msg) => { eprintln!("error: {}", msg); },
|
Err(msg) => {
|
||||||
};
|
eprintln!("error: {}", msg);
|
||||||
},
|
}
|
||||||
);
|
};
|
||||||
|
},
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn wav(flags: rust_whisper_lib::Flags, _path: String) {
|
fn wav(flags: rust_whisper_lib::Flags, _path: String) {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
rust_whisper_baked_lib::wav(flags,
|
rust_whisper_baked_lib::wav(
|
||||||
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
flags,
|
||||||
match result {
|
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
||||||
Ok(transcribed) => {
|
match result {
|
||||||
let s = w.step(transcribed.to_string());
|
Ok(transcribed) => {
|
||||||
println!("{}", s);
|
let s = w.step(transcribed.to_string());
|
||||||
},
|
println!("{}", s);
|
||||||
Err(msg) => { eprintln!("error: {}", msg); },
|
}
|
||||||
};
|
Err(msg) => {
|
||||||
},
|
eprintln!("error: {}", msg);
|
||||||
);
|
}
|
||||||
|
};
|
||||||
|
},
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn channel(flags: rust_whisper_lib::Flags) {
|
fn channel(flags: rust_whisper_lib::Flags) {
|
||||||
let (send, recv) = std::sync::mpsc::sync_channel(100);
|
let (send, recv) = std::sync::mpsc::sync_channel(100);
|
||||||
|
|
||||||
eprintln!("rust whisper baked lib channel...");
|
eprintln!("rust whisper baked lib channel...");
|
||||||
thread::spawn(move || {
|
thread::spawn(move || {
|
||||||
rust_whisper_baked_lib::channel(
|
let mut w = new_destutterer();
|
||||||
flags.clone(),
|
rust_whisper_baked_lib::channel(
|
||||||
|result: Result<rust_whisper_lib::Transcribed, String>| {
|
flags.clone(),
|
||||||
match result {
|
move |result: Result<rust_whisper_lib::Transcribed, String>| {
|
||||||
Ok(transcribed) => { println!("{}", transcribed.to_string()); },
|
match result {
|
||||||
Err(msg) => { eprintln!("error: {}", msg); },
|
Ok(transcribed) => {
|
||||||
};
|
let s = w.step(transcribed.to_string());
|
||||||
},
|
println!("{}", s);
|
||||||
recv,
|
}
|
||||||
);
|
Err(msg) => {
|
||||||
});
|
eprintln!("error: {}", msg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
},
|
||||||
|
recv,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
eprintln!("listen lib main...");
|
eprintln!("listen lib main...");
|
||||||
let flags = rust_whisper_lib::Flags::parse();
|
let flags = rust_whisper_lib::Flags::parse();
|
||||||
match flags.stream_device {
|
match flags.stream_device {
|
||||||
Some(device_name) => {
|
Some(device_name) => {
|
||||||
if device_name == "" {
|
eprintln!("with device ({}) '{}'", device_name.len(), &device_name);
|
||||||
for device in listen_lib::devices() {
|
if device_name.len() == 0 {
|
||||||
eprintln!("{}", device);
|
let mut i = 0;
|
||||||
|
for device in listen_lib::devices() {
|
||||||
|
eprintln!("[{}] {}", i, device);
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
eprintln!("found {} devices", i);
|
||||||
|
} else {
|
||||||
|
listen_lib::main_with(
|
||||||
|
|data| {
|
||||||
|
send.send(data).unwrap();
|
||||||
|
},
|
||||||
|
device_name,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
listen_lib::main_with(|data| {
|
None => {
|
||||||
send.send(data).unwrap();
|
eprintln!("without any device");
|
||||||
}, device_name);
|
listen_lib::main(|data| {
|
||||||
}
|
send.send(data).unwrap();
|
||||||
},
|
});
|
||||||
None => {
|
}
|
||||||
listen_lib::main(|data| {
|
}
|
||||||
send.send(data).unwrap();
|
eprintln!("/listen lib main...");
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
eprintln!("/listen lib main...");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Destutterer {
|
struct Destutterer {
|
||||||
prev: Option<String>,
|
prev: Words,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_destutterer() -> Destutterer {
|
fn new_destutterer() -> Destutterer {
|
||||||
Destutterer{prev: None}
|
Destutterer { prev: new_words() }
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Destutterer {
|
impl Destutterer {
|
||||||
fn step(&mut self, next: String) -> String {
|
fn step(&mut self, next: String) -> String {
|
||||||
let next = next.trim().to_string();
|
if next.len() == 0 {
|
||||||
if next.len() == 0 {
|
return next;
|
||||||
return next;
|
}
|
||||||
}
|
|
||||||
match &self.prev {
|
let next_words = Words::from_string(next.clone());
|
||||||
None => {
|
let mut n = self
|
||||||
self.prev = Some(next.clone());
|
.prev
|
||||||
next
|
.comparable_len()
|
||||||
},
|
.clamp(0, next_words.comparable_len());
|
||||||
Some(prev) => {
|
//println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words());
|
||||||
let without_trailing_punctuation = {
|
while n > 0 {
|
||||||
let mut next = next.clone();
|
let (prev_s, _) = self.prev.last_n_comparable_to_string(n);
|
||||||
while next.ends_with("?") || next.ends_with(".") {
|
let (next_s, next_idx) = next_words.first_n_comparable_to_string(n);
|
||||||
next = next[..next.len()-1].to_string();
|
if prev_s == next_s {
|
||||||
}
|
self.prev = next_words;
|
||||||
next
|
return self.prev.skip(next_idx + 1).to_string();
|
||||||
};
|
|
||||||
let trailing_punctuation = next[without_trailing_punctuation.len() ..].to_string();
|
|
||||||
let next = without_trailing_punctuation;
|
|
||||||
let next = {
|
|
||||||
let mut n = prev.len().clamp(0, next.len());
|
|
||||||
while n > 0 {
|
|
||||||
if prev[prev.len() - n..] == next[..n] {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
n -= 1;
|
|
||||||
}
|
|
||||||
next[n..].to_string()
|
|
||||||
};
|
|
||||||
if next.len() == 0 {
|
|
||||||
return "".to_string();
|
|
||||||
}
|
}
|
||||||
self.prev = Some(next.clone());
|
n -= 1;
|
||||||
next + &trailing_punctuation
|
}
|
||||||
},
|
self.prev = next_words;
|
||||||
}
|
self.prev.to_string()
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
struct Words {
|
||||||
|
raw: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_words() -> Words {
|
||||||
|
Words { raw: vec![] }
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Words {
|
||||||
|
fn from_string(s: String) -> Words {
|
||||||
|
let mut result = Words { raw: vec![] };
|
||||||
|
for word in s.split(" ") {
|
||||||
|
let word = word.trim();
|
||||||
|
if word.len() > 0 {
|
||||||
|
result.raw.push(word.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn skip(&self, n: usize) -> Words {
|
||||||
|
Words {
|
||||||
|
raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) {
|
||||||
|
let v = self.to_comparable_words();
|
||||||
|
let v = v[(v.len() - n).clamp(0, v.len())..].to_vec();
|
||||||
|
return (
|
||||||
|
v.iter()
|
||||||
|
.map(|x| x.s.clone().unwrap())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(" "),
|
||||||
|
v[0].idx,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn first_n_comparable_to_string(&self, n: usize) -> (String, usize) {
|
||||||
|
let v = self.to_comparable_words();
|
||||||
|
let v = v[0..n.clamp(0, v.len())].to_vec();
|
||||||
|
return (
|
||||||
|
v.iter()
|
||||||
|
.map(|x| x.s.clone().unwrap())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(" "),
|
||||||
|
v[v.len() - 1].idx,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn comparable_len(&self) -> usize {
|
||||||
|
self.to_comparable_words().len()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_comparable_words(&self) -> Vec<Word> {
|
||||||
|
self.to_words()
|
||||||
|
.iter()
|
||||||
|
.filter(|x| x.s.is_some())
|
||||||
|
.map(|x| x.clone())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_words(&self) -> Vec<Word> {
|
||||||
|
let skips = stop_words::get("en");
|
||||||
|
let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English);
|
||||||
|
let strs = self
|
||||||
|
.raw
|
||||||
|
.iter()
|
||||||
|
.map(|w| w.to_lowercase())
|
||||||
|
.map(|w| {
|
||||||
|
w.chars()
|
||||||
|
.filter(|c| c.is_ascii_alphanumeric())
|
||||||
|
.collect::<String>()
|
||||||
|
})
|
||||||
|
.map(|w| stemmer.stem(&w).into_owned())
|
||||||
|
.collect::<Vec<String>>();
|
||||||
|
let mut result = vec![];
|
||||||
|
for i in 0..strs.len() {
|
||||||
|
result.push(Word {
|
||||||
|
s: match skips.contains(&strs[i]) {
|
||||||
|
true => None,
|
||||||
|
false => Some(strs[i].clone()),
|
||||||
|
},
|
||||||
|
idx: i as usize,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
self.raw
|
||||||
|
.iter()
|
||||||
|
.map(|x| x.clone())
|
||||||
|
.collect::<Vec<String>>()
|
||||||
|
.join(" ")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct Word {
|
||||||
|
s: Option<String>,
|
||||||
|
idx: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_destutterer() {
|
fn test_destutterer_stop_words() {
|
||||||
let mut w = new_destutterer();
|
let mut w = new_destutterer();
|
||||||
assert_eq!("abcde".to_string(), w.step("abcde".to_string()));
|
assert_eq!(
|
||||||
assert_eq!("fg".to_string(), w.step("cdefg".to_string()));
|
"welcome to the internet".to_string(),
|
||||||
assert_eq!("hij".to_string(), w.step("fghij".to_string()));
|
w.step("welcome to the internet".to_string())
|
||||||
assert_eq!("fghij".to_string(), w.step("fghij".to_string()));
|
);
|
||||||
}
|
assert_eq!(
|
||||||
|
"have a look around".to_string(),
|
||||||
|
w.step("welcome to the a internet; have a look around".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_destutterer_punctuation() {
|
||||||
|
let mut w = new_destutterer();
|
||||||
|
assert_eq!(
|
||||||
|
"cat, dog. cow? moose!".to_string(),
|
||||||
|
w.step("cat, dog. cow? moose!".to_string())
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
"elephant! fez gator".to_string(),
|
||||||
|
w.step("moose, elephant! fez gator".to_string())
|
||||||
|
);
|
||||||
|
assert_eq!("hij".to_string(), w.step("fez gator hij".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_destutterer_basic() {
|
||||||
|
let mut w = new_destutterer();
|
||||||
|
assert_eq!(
|
||||||
|
"cat dog cow".to_string(),
|
||||||
|
w.step(" cat dog cow ".to_string())
|
||||||
|
);
|
||||||
|
assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,13 +14,13 @@ pub struct Flags {
|
||||||
#[arg(long, default_value = "8")]
|
#[arg(long, default_value = "8")]
|
||||||
pub threads: i32,
|
pub threads: i32,
|
||||||
|
|
||||||
#[arg(long, default_value = "5")]
|
#[arg(long, default_value = "8")]
|
||||||
pub stream_step: u64,
|
pub stream_step: u64,
|
||||||
#[arg(long, default_value = "0.6")]
|
#[arg(long, default_value = "4.0")]
|
||||||
pub stream_retain: f32,
|
pub stream_retain: f32,
|
||||||
#[arg(long, default_value = "0.3")]
|
#[arg(long, default_value = "2.0")]
|
||||||
pub stream_head: f32,
|
pub stream_head: f32,
|
||||||
#[arg(long, default_value = "0.3")]
|
#[arg(long, default_value = "0.0")]
|
||||||
pub stream_tail: f32,
|
pub stream_tail: f32,
|
||||||
|
|
||||||
#[arg(long, default_value = "false")]
|
#[arg(long, default_value = "false")]
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
todo:
|
||||||
|
- wav to subtitles
|
||||||
|
- compound words like checkmark vs check mark should destutter
|
||||||
|
- whisper trims outside silence so head and tail never get hit
|
||||||
|
- split on silence-ish instead of duration
|
||||||
|
- rust-whisper warn when transcription time ~ input time
|
||||||
|
scheduled: []
|
||||||
|
done:
|
||||||
|
- todo: need to overlap without ANY puctuation, which i can do by breaking into words
|
||||||
|
ts: Tue Jan 2 13:23:00 EST 2024
|
||||||
|
- todo: overlap without stop words
|
||||||
|
ts: Wed Jan 3 03:22:14 EST 2024
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 11 KiB |
|
|
@ -0,0 +1,100 @@
|
||||||
|
// This example is not going to build in this folder.
|
||||||
|
// You need to copy this code into your project and add the dependencies whisper_rs and hound in your cargo.toml
|
||||||
|
|
||||||
|
use hound;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Write;
|
||||||
|
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
|
||||||
|
|
||||||
|
/// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout.
|
||||||
|
fn main() -> Result<(), &'static str> {
|
||||||
|
let args: Vec<String> = std::env::args().collect();
|
||||||
|
|
||||||
|
// Load a context and model.
|
||||||
|
let ctx = WhisperContext::new(&args[1])
|
||||||
|
.expect("failed to load model");
|
||||||
|
// Create a state
|
||||||
|
let mut state = ctx.create_state().expect("failed to create key");
|
||||||
|
|
||||||
|
// Create a params object for running the model.
|
||||||
|
// The number of past samples to consider defaults to 0.
|
||||||
|
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
|
||||||
|
|
||||||
|
// Edit params as needed.
|
||||||
|
// Set the number of threads to use to 1.
|
||||||
|
//params.set_n_threads(1);
|
||||||
|
// Enable translation.
|
||||||
|
params.set_translate(true);
|
||||||
|
// Set the language to translate to to English.
|
||||||
|
params.set_language(Some("en"));
|
||||||
|
// Disable anything that prints to stdout.
|
||||||
|
params.set_print_special(false);
|
||||||
|
params.set_print_progress(false);
|
||||||
|
params.set_print_realtime(false);
|
||||||
|
params.set_print_timestamps(false);
|
||||||
|
|
||||||
|
// Open the audio file.
|
||||||
|
let mut reader = hound::WavReader::open(&args[2]).expect("failed to open file");
|
||||||
|
#[allow(unused_variables)]
|
||||||
|
let hound::WavSpec {
|
||||||
|
channels,
|
||||||
|
sample_rate,
|
||||||
|
bits_per_sample,
|
||||||
|
..
|
||||||
|
} = reader.spec();
|
||||||
|
|
||||||
|
// Convert the audio to floating point samples.
|
||||||
|
let mut audio = whisper_rs::convert_integer_to_float_audio(
|
||||||
|
&reader
|
||||||
|
.samples::<i16>()
|
||||||
|
.map(|s| s.expect("invalid sample"))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Convert audio to 16KHz mono f32 samples, as required by the model.
|
||||||
|
// These utilities are provided for convenience, but can be replaced with custom conversion logic.
|
||||||
|
// SIMD variants of these functions are also available on nightly Rust (see the docs).
|
||||||
|
if channels == 2 {
|
||||||
|
audio = whisper_rs::convert_stereo_to_mono_audio(&audio)?;
|
||||||
|
} else if channels != 1 {
|
||||||
|
panic!(">2 channels unsupported");
|
||||||
|
}
|
||||||
|
|
||||||
|
if sample_rate != 16000 {
|
||||||
|
panic!("sample rate must be 16KHz");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the model.
|
||||||
|
state.full(params, &audio[..]).expect("failed to run model");
|
||||||
|
|
||||||
|
// Create a file to write the transcript to.
|
||||||
|
let mut file = File::create("transcript.txt").expect("failed to create file");
|
||||||
|
|
||||||
|
// Iterate through the segments of the transcript.
|
||||||
|
let num_segments = state
|
||||||
|
.full_n_segments()
|
||||||
|
.expect("failed to get number of segments");
|
||||||
|
for i in 0..num_segments {
|
||||||
|
// Get the transcribed text and timestamps for the current segment.
|
||||||
|
let segment = state
|
||||||
|
.full_get_segment_text(i)
|
||||||
|
.expect("failed to get segment");
|
||||||
|
let start_timestamp = state
|
||||||
|
.full_get_segment_t0(i)
|
||||||
|
.expect("failed to get start timestamp");
|
||||||
|
let end_timestamp = state
|
||||||
|
.full_get_segment_t1(i)
|
||||||
|
.expect("failed to get end timestamp");
|
||||||
|
|
||||||
|
// Print the segment to stdout.
|
||||||
|
println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
|
||||||
|
|
||||||
|
// Format the segment information as a string.
|
||||||
|
let line = format!("[{} - {}]: {}\n", start_timestamp, end_timestamp, segment);
|
||||||
|
|
||||||
|
// Write the segment information to the file.
|
||||||
|
file.write_all(line.as_bytes())
|
||||||
|
.expect("failed to write to file");
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,66 @@
|
||||||
|
#! /bin/bash
|
||||||
|
|
||||||
|
main() {
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
input_wav="$(realpath "$1")"
|
||||||
|
model="$(realpath "${2:-../models/ggml-small.en.bin}")"
|
||||||
|
already_transcribed="${3:-false}"
|
||||||
|
|
||||||
|
sanitized_wav="${input_wav%.*}.mono-16khz.wav"
|
||||||
|
ffmpeg -y -i "$input_wav" -ac 1 -ar 16k "$sanitized_wav"
|
||||||
|
|
||||||
|
if ! $already_transcribed; then
|
||||||
|
pushd "$(dirname "$(realpath "$BASH_SOURCE")")"
|
||||||
|
cd ../gitea-whisper-rs/
|
||||||
|
cargo run --example wav_subtitles -- "$model" "$sanitized_wav"
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
out_to_srt ../gitea-whisper-rs/transcript.txt > "${input_wav%.*}.srt"
|
||||||
|
|
||||||
|
ffmpeg -y \
|
||||||
|
-loop 1 -i sc.jpg \
|
||||||
|
-i "$input_wav" \
|
||||||
|
-i "${input_wav%.*}.srt" \
|
||||||
|
-c:v libx264 \
|
||||||
|
-tune stillimage \
|
||||||
|
-pix_fmt yuv420p -shortest \
|
||||||
|
"${input_wav%.*}.mkv"
|
||||||
|
|
||||||
|
ls "${input_wav%.*}.mkv"
|
||||||
|
}
|
||||||
|
|
||||||
|
out_to_srt() {
|
||||||
|
cs_to_ts() {
|
||||||
|
echo "$1" | awk '{
|
||||||
|
printf "%02d:%02d:%02d,000",
|
||||||
|
int(($1/100.0)/60/60),
|
||||||
|
int(($1/100.0)/60%60),
|
||||||
|
int(($1/100.0)%60)
|
||||||
|
}'
|
||||||
|
}
|
||||||
|
|
||||||
|
cat "$1" \
|
||||||
|
| (
|
||||||
|
i=0
|
||||||
|
while read -r line; do
|
||||||
|
((i+=1))
|
||||||
|
echo "$i"
|
||||||
|
echo "$(cs_to_ts "$(
|
||||||
|
echo "${line%%:] *}" \
|
||||||
|
| tr -d '[' \
|
||||||
|
| awk '{print $1}'
|
||||||
|
)") --> $(cs_to_ts "$(
|
||||||
|
echo "${line%%:] *}" \
|
||||||
|
| tr -d '[' \
|
||||||
|
| awk '{print $3}'
|
||||||
|
)")"
|
||||||
|
echo "${line#*: }"
|
||||||
|
echo
|
||||||
|
done
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ "$0" == "$BASH_SOURCE" ]; then
|
||||||
|
main "$@"
|
||||||
|
fi
|
||||||
Loading…
Reference in New Issue