39 Commits

Author SHA1 Message Date
Bel LaPointe
fffea2ddf0 no render mac 2025-09-10 11:20:01 -06:00
Bel LaPointe
12dbf12299 k 2024-09-21 21:33:40 -04:00
Bel LaPointe
f04a55590f fixed 2024-09-21 21:33:40 -04:00
Bel LaPointe
2254afcbfb wav to mkv with subtitles scripting 2024-09-21 21:33:40 -04:00
bel
5fdc60e32c stem words for destuttering 2024-01-03 20:40:35 -07:00
bel
4c80247ab9 accept lower sample rates if 16k not avail 2024-01-03 17:18:07 -07:00
bel
53e675b9a0 no panic on unusable mic 2024-01-03 17:09:27 -07:00
Bel LaPointe
9780c6f2ef todo 2024-01-03 08:50:59 -07:00
Bel LaPointe
7f902af26f default update 2024-01-03 08:40:13 -07:00
Bel LaPointe
9bc009996c oop 2024-01-03 08:38:24 -07:00
Bel LaPointe
cbc8a4f9fd cargo run -- --stream-step 8 --stream-retain 4 --stream-head=2 --stream-tail=0 2> /dev/null 2024-01-03 08:37:27 -07:00
Bel LaPointe
a8c8140d18 functionize at least 2024-01-03 08:28:22 -07:00
Bel LaPointe
5bc3209070 x=2; cargo run -- --wav $HOME/Downloads/41A6C472-6E4D-4953-9A90-2497D2DAD8C9.wav --stream-step $((x*4)) --stream-retain $((x*2)) --stream-{head,tail}=$((x)) 2> /dev/null 2024-01-03 08:22:45 -07:00
Bel LaPointe
8b5c18e65e todo 2024-01-03 08:22:15 -07:00
Bel LaPointe
ec47d8142a destutter with stopwords impl 2024-01-03 07:54:21 -07:00
bel
03659164ba wip 2024-01-02 21:14:02 -07:00
bel
709dd1dba3 tod 2024-01-02 21:12:33 -07:00
bel
26595396cf tod 2024-01-02 21:01:01 -07:00
Bel LaPointe
fb7892b52b todo 2024-01-02 18:38:13 -07:00
Bel LaPointe
b08e055dac todo 2024-01-02 18:23:03 -07:00
Bel LaPointe
9d993cfc8a update destutterer to do punctuation-free words 2024-01-02 18:20:46 -07:00
Bel LaPointe
f4f8ea429a merge 2024-01-02 17:51:29 -07:00
Bel LaPointe
38bea3735f todo 2024-01-02 17:51:14 -07:00
Bel LaPointe
1c48026690 need to overlap without ANY puctuation, which i can do by breaking into words 2024-01-02 17:49:47 -07:00
Bel LaPointe
a57312786a gr 2024-01-02 17:48:17 -07:00
Bel LaPointe
55e3bf0a26 update defaults 2024-01-02 17:47:00 -07:00
Bel LaPointe
743c8c5f67 time cargo run -- --wav $HOME/Downloads/41A6C472-6E4D-4953-9A90-2497D2DAD8C9.wav --stream-step 30 --stream-retain 25 --stream-{head,tail}=1 2> /dev/null 2024-01-02 16:45:04 -07:00
Bel LaPointe
d32f7a4c40 destutterer doesnt drop stutter for prev 2024-01-02 16:36:39 -07:00
Bel LaPointe
d94cbd6927 baked-lib wav_channel to base 2024-01-02 16:30:07 -07:00
Bel LaPointe
7a5db3b2ac no callback on empty str 2024-01-02 16:29:58 -07:00
Bel LaPointe
d0dc9571d7 rust-whisper-baked listen also destutters 2024-01-02 16:12:07 -07:00
Bel LaPointe
6082f7e446 rust-whisper-baked de-stutters wav_channel output 2024-01-02 16:10:11 -07:00
Bel LaPointe
dd6f980266 rust-whisper-baked with --wav to wav_channel 2024-01-02 14:18:37 -07:00
Bel LaPointe
601fe517d7 rust-whisper-baked-lib::wav_channel() 2024-01-02 14:17:57 -07:00
Bel LaPointe
cd339de334 rust-whisper-lib::wav_channel() 2024-01-02 14:15:53 -07:00
bel
393100973c baked streams even wav files 2024-01-01 20:56:53 -07:00
bel
97c025f04d rust-whisper-baked works with WAV 2024-01-01 19:06:14 -07:00
bel
871efd9b8c drop unused 2024-01-01 18:41:49 -07:00
Bel LaPointe
86bb1769f3 del unused 2023-12-21 22:30:43 -05:00
14 changed files with 632 additions and 530 deletions

2
.gitmodules vendored
View File

@@ -1,3 +1,3 @@
[submodule "rust-whisper.d/gitea-whisper-rs"]
[submodule "gitea-whisper-rs"]
path = gitea-whisper-rs
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git

View File

@@ -13,6 +13,7 @@ if ! which rust-whisper-baked; then
fi >&2
cat <<EOF
rust-whisper-baked --stream-device pulse_monitor --stream-step 16 --stream-retain 8 --stream-{head,tail}=0.25 2> /dev/null
rust-whisper-baked --stream-device 'BlackHole 2ch' --stream-step 30 --stream-retain 1 --stream-{head,tail}=0.25 --threads 9 2> /dev/null
| tee -a "$HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
| tee -a "$HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"

View File

@@ -25,7 +25,11 @@ pub fn devices() -> Vec<String> {
fn _devices() -> Result<Vec<cpal::Device>, String> {
match cpal::default_host().devices() {
Ok(devices) => Ok(devices.filter(|device| {
device.supported_input_configs().unwrap().count() > 0
let input_configs = device.supported_input_configs();
if !input_configs.is_ok() {
return false;
}
input_configs.unwrap().count() > 0
}).collect()),
Err(msg) => Err(format!("failed to get devices: {}", msg)),
}
@@ -92,13 +96,22 @@ impl Listener {
filter(|device| device.name().unwrap() == self.device_name).
collect::<Vec<_>>();
let device = devices.first().unwrap();
let cfg = device.supported_input_configs()
let mut sample_rate = 15_500;
let mut cfgs: Vec<_> = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.filter(|x| x.min_sample_rate() >= cpal::SampleRate(15_500))
.nth(0)
.unwrap()
.with_max_sample_rate();
.filter(|x| x.min_sample_rate() >= cpal::SampleRate(sample_rate))
.collect();
while cfgs.len() == 0 && sample_rate > 0 {
sample_rate /= 2;
cfgs = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.filter(|x| x.min_sample_rate() >= cpal::SampleRate(sample_rate))
.collect();
}
assert!(cfgs.len() > 0);
let cfg = cfgs[0].clone().with_max_sample_rate();
let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
let stream = device.build_input_stream(

View File

@@ -2,21 +2,6 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "addr2line"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aho-corasick"
version = "1.1.2"
@@ -26,21 +11,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.5"
@@ -89,27 +59,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "backtrace"
version = "0.3.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "bindgen"
version = "0.68.1"
@@ -139,18 +88,6 @@ version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
[[package]]
name = "bumpalo"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.0.83"
@@ -175,20 +112,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-targets 0.48.5",
]
[[package]]
name = "clang-sys"
version = "1.6.1"
@@ -255,12 +178,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "core-foundation-sys"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "either"
version = "1.9.0"
@@ -283,12 +200,6 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "gimli"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "glob"
version = "0.3.1"
@@ -310,38 +221,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "iana-time-zone"
version = "0.1.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "js-sys"
version = "0.3.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@@ -394,15 +273,6 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "nom"
version = "7.1.3"
@@ -413,24 +283,6 @@ dependencies = [
"minimal-lexical",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "object"
version = "0.32.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.19.0"
@@ -443,12 +295,6 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pin-project-lite"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
[[package]]
name = "prettyplease"
version = "0.2.15"
@@ -523,20 +369,11 @@ dependencies = [
name = "rust-whisper-lib"
version = "0.1.0"
dependencies = [
"byteorder",
"chrono",
"clap",
"tokio",
"wav",
"whisper-rs",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustc-hash"
version = "1.1.0"
@@ -579,16 +416,6 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.35.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104"
dependencies = [
"backtrace",
"pin-project-lite",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
@@ -601,60 +428,6 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "wasm-bindgen"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f"
[[package]]
name = "wav"
version = "1.0.0"
@@ -715,37 +488,13 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.51.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
"windows-targets",
]
[[package]]
@@ -754,93 +503,51 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm 0.52.0",
"windows_aarch64_msvc 0.52.0",
"windows_i686_gnu 0.52.0",
"windows_i686_msvc 0.52.0",
"windows_x86_64_gnu 0.52.0",
"windows_x86_64_gnullvm 0.52.0",
"windows_x86_64_msvc 0.52.0",
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"

View File

@@ -6,7 +6,7 @@ pub fn channel<F>(
stream: std::sync::mpsc::Receiver<Vec<f32>>,
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
flags.model_path = None;
flags.model_buffer = Some(include_bytes!("../../models/ggml-tiny.en.bin").to_vec());
flags.model_buffer = Some(get_fast());
rust_whisper_lib::channel(flags.clone(), handler_fn, stream);
}
@@ -15,7 +15,27 @@ pub fn wav<F>(
handler_fn: F
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
flags.model_path = None;
flags.model_buffer = Some(include_bytes!("../../models/ggml-distil-medium.en.bin").to_vec());
flags.model_buffer = Some(get_good());
rust_whisper_lib::wav(flags.clone(), handler_fn, flags.wav.unwrap());
}
pub fn wav_channel<F>(
mut flags: rust_whisper_lib::Flags,
handler_fn: F
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
flags.model_path = None;
flags.model_buffer = Some(get_good());
rust_whisper_lib::wav_channel(flags, handler_fn);
}
pub fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
rust_whisper_lib::f32_from_wav_file(path)
}
fn get_fast() -> Vec<u8> {
include_bytes!("../../models/ggml-small.en.bin").to_vec()
}
fn get_good() -> Vec<u8> {
include_bytes!("../../models/ggml-distil-medium.en.bin").to_vec()
}

View File

@@ -2,21 +2,6 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "addr2line"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aho-corasick"
version = "1.1.2"
@@ -48,21 +33,6 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.5"
@@ -117,21 +87,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "backtrace"
version = "0.3.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "bindgen"
version = "0.68.1"
@@ -193,12 +148,6 @@ version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bytes"
version = "1.5.0"
@@ -236,20 +185,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-targets 0.48.5",
]
[[package]]
name = "clang-sys"
version = "1.6.1"
@@ -411,12 +346,6 @@ version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "gimli"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "glob"
version = "0.3.1"
@@ -444,29 +373,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "iana-time-zone"
version = "0.1.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "indexmap"
version = "2.1.0"
@@ -477,6 +383,12 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "itoa"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
[[package]]
name = "jni"
version = "0.19.0"
@@ -608,15 +520,6 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "ndk"
version = "0.7.0"
@@ -708,15 +611,6 @@ dependencies = [
"syn 1.0.109",
]
[[package]]
name = "object"
version = "0.32.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
dependencies = [
"memchr",
]
[[package]]
name = "oboe"
version = "0.5.0"
@@ -775,12 +669,6 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pin-project-lite"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
[[package]]
name = "pkg-config"
version = "0.3.27"
@@ -875,14 +763,26 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]]
name = "rust-stemmers"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
dependencies = [
"serde",
"serde_derive",
]
[[package]]
name = "rust-whisper-baked"
version = "0.1.0"
dependencies = [
"clap",
"listen-lib",
"rust-stemmers",
"rust-whisper-baked-lib",
"rust-whisper-lib",
"stop-words",
]
[[package]]
@@ -896,20 +796,11 @@ dependencies = [
name = "rust-whisper-lib"
version = "0.1.0"
dependencies = [
"byteorder",
"chrono",
"clap",
"tokio",
"wav",
"whisper-rs",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustc-hash"
version = "1.1.0"
@@ -929,6 +820,12 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "ryu"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
[[package]]
name = "same-file"
version = "1.0.6"
@@ -944,6 +841,37 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
]
[[package]]
name = "serde_json"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb0652c533506ad7a2e353cce269330d6afd8bdfb6d75e0ace5b35aacbd7b9e9"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "shlex"
version = "1.2.0"
@@ -975,6 +903,15 @@ version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
[[package]]
name = "stop-words"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8500024d809de02ecbf998472b7bed3c4fca380df2be68917f6a473bdb28ddcc"
dependencies = [
"serde_json",
]
[[package]]
name = "strsim"
version = "0.10.0"
@@ -1023,16 +960,6 @@ dependencies = [
"syn 2.0.41",
]
[[package]]
name = "tokio"
version = "1.35.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104"
dependencies = [
"backtrace",
"pin-project-lite",
]
[[package]]
name = "toml_datetime"
version = "0.6.5"
@@ -1226,15 +1153,6 @@ dependencies = [
"windows-targets 0.42.2",
]
[[package]]
name = "windows-core"
version = "0.51.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"

View File

@@ -10,3 +10,5 @@ rust-whisper-lib = { path = "../rust-whisper-lib" }
rust-whisper-baked-lib = { path = "../rust-whisper-baked-lib" }
listen-lib = { path = "../listen-lib" }
clap = { version = "4.4.10", features = ["derive"] }
stop-words = "0.8.0"
rust-stemmers = "1.2.0"

View File

@@ -1,46 +1,284 @@
use rust_whisper_lib;
use rust_whisper_baked_lib;
use clap::Parser;
use listen_lib;
use rust_whisper_baked_lib;
use rust_whisper_lib;
use std::thread;
fn main() {
let flags = rust_whisper_lib::Flags::parse();
let (send, recv) = std::sync::mpsc::sync_channel(100);
eprintln!("rust whisper baked lib channel...");
thread::spawn(move || {
rust_whisper_baked_lib::channel(
flags.clone(),
|result: Result<rust_whisper_lib::Transcribed, String>| {
match result {
Ok(transcribed) => { println!("{}", transcribed.to_string()); },
Err(msg) => { eprintln!("error: {}", msg); },
};
},
recv,
);
});
eprintln!("listen lib main...");
let flags = rust_whisper_lib::Flags::parse();
match flags.stream_device {
Some(device_name) => {
if device_name == "" {
for device in listen_lib::devices() {
eprintln!("{}", device);
}
} else {
listen_lib::main_with(|data| {
send.send(data).unwrap();
}, device_name);
}
},
None => {
listen_lib::main(|data| {
send.send(data).unwrap();
});
}
}
eprintln!("/listen lib main...");
let flags = rust_whisper_lib::Flags::parse();
match flags.wav.clone() {
Some(_) => wav_channel(flags),
None => channel(flags),
};
}
fn wav_channel(flags: rust_whisper_lib::Flags) {
let mut w = new_destutterer();
rust_whisper_baked_lib::wav_channel(
flags.clone(),
move |result: Result<rust_whisper_lib::Transcribed, String>| {
match result {
Ok(transcribed) => {
let s = w.step(transcribed.to_string());
println!("{}", s);
}
Err(msg) => {
eprintln!("error: {}", msg);
}
};
},
);
}
fn wav(flags: rust_whisper_lib::Flags, _path: String) {
let mut w = new_destutterer();
rust_whisper_baked_lib::wav(
flags,
move |result: Result<rust_whisper_lib::Transcribed, String>| {
match result {
Ok(transcribed) => {
let s = w.step(transcribed.to_string());
println!("{}", s);
}
Err(msg) => {
eprintln!("error: {}", msg);
}
};
},
);
}
fn channel(flags: rust_whisper_lib::Flags) {
let (send, recv) = std::sync::mpsc::sync_channel(100);
eprintln!("rust whisper baked lib channel...");
thread::spawn(move || {
let mut w = new_destutterer();
rust_whisper_baked_lib::channel(
flags.clone(),
move |result: Result<rust_whisper_lib::Transcribed, String>| {
match result {
Ok(transcribed) => {
let s = w.step(transcribed.to_string());
println!("{}", s);
}
Err(msg) => {
eprintln!("error: {}", msg);
}
};
},
recv,
);
});
eprintln!("listen lib main...");
let flags = rust_whisper_lib::Flags::parse();
match flags.stream_device {
Some(device_name) => {
eprintln!("with device ({}) '{}'", device_name.len(), &device_name);
if device_name.len() == 0 {
let mut i = 0;
for device in listen_lib::devices() {
eprintln!("[{}] {}", i, device);
i += 1;
}
eprintln!("found {} devices", i);
} else {
listen_lib::main_with(
|data| {
send.send(data).unwrap();
},
device_name,
);
}
}
None => {
eprintln!("without any device");
listen_lib::main(|data| {
send.send(data).unwrap();
});
}
}
eprintln!("/listen lib main...");
}
struct Destutterer {
prev: Words,
}
fn new_destutterer() -> Destutterer {
Destutterer { prev: new_words() }
}
impl Destutterer {
fn step(&mut self, next: String) -> String {
if next.len() == 0 {
return next;
}
let next_words = Words::from_string(next.clone());
let mut n = self
.prev
.comparable_len()
.clamp(0, next_words.comparable_len());
//println!("n={} prev='{:?}' next='{:?}'", n, self.prev.to_comparable_words(), next_words.to_comparable_words());
while n > 0 {
let (prev_s, _) = self.prev.last_n_comparable_to_string(n);
let (next_s, next_idx) = next_words.first_n_comparable_to_string(n);
if prev_s == next_s {
self.prev = next_words;
return self.prev.skip(next_idx + 1).to_string();
}
n -= 1;
}
self.prev = next_words;
self.prev.to_string()
}
}
#[derive(Clone, Debug)]
struct Words {
raw: Vec<String>,
}
fn new_words() -> Words {
Words { raw: vec![] }
}
impl Words {
fn from_string(s: String) -> Words {
let mut result = Words { raw: vec![] };
for word in s.split(" ") {
let word = word.trim();
if word.len() > 0 {
result.raw.push(word.to_string());
}
}
result
}
fn skip(&self, n: usize) -> Words {
Words {
raw: self.raw.iter().skip(n).map(|x| x.clone()).collect(),
}
}
fn last_n_comparable_to_string(&self, n: usize) -> (String, usize) {
let v = self.to_comparable_words();
let v = v[(v.len() - n).clamp(0, v.len())..].to_vec();
return (
v.iter()
.map(|x| x.s.clone().unwrap())
.collect::<Vec<String>>()
.join(" "),
v[0].idx,
);
}
fn first_n_comparable_to_string(&self, n: usize) -> (String, usize) {
let v = self.to_comparable_words();
let v = v[0..n.clamp(0, v.len())].to_vec();
return (
v.iter()
.map(|x| x.s.clone().unwrap())
.collect::<Vec<String>>()
.join(" "),
v[v.len() - 1].idx,
);
}
fn comparable_len(&self) -> usize {
self.to_comparable_words().len()
}
fn to_comparable_words(&self) -> Vec<Word> {
self.to_words()
.iter()
.filter(|x| x.s.is_some())
.map(|x| x.clone())
.collect()
}
fn to_words(&self) -> Vec<Word> {
let skips = stop_words::get("en");
let stemmer = rust_stemmers::Stemmer::create(rust_stemmers::Algorithm::English);
let strs = self
.raw
.iter()
.map(|w| w.to_lowercase())
.map(|w| {
w.chars()
.filter(|c| c.is_ascii_alphanumeric())
.collect::<String>()
})
.map(|w| stemmer.stem(&w).into_owned())
.collect::<Vec<String>>();
let mut result = vec![];
for i in 0..strs.len() {
result.push(Word {
s: match skips.contains(&strs[i]) {
true => None,
false => Some(strs[i].clone()),
},
idx: i as usize,
});
}
result
}
fn to_string(&self) -> String {
self.raw
.iter()
.map(|x| x.clone())
.collect::<Vec<String>>()
.join(" ")
}
}
#[derive(Debug, Clone)]
struct Word {
s: Option<String>,
idx: usize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_destutterer_stop_words() {
let mut w = new_destutterer();
assert_eq!(
"welcome to the internet".to_string(),
w.step("welcome to the internet".to_string())
);
assert_eq!(
"have a look around".to_string(),
w.step("welcome to the a internet; have a look around".to_string())
);
}
#[test]
fn test_destutterer_punctuation() {
let mut w = new_destutterer();
assert_eq!(
"cat, dog. cow? moose!".to_string(),
w.step("cat, dog. cow? moose!".to_string())
);
assert_eq!(
"elephant! fez gator".to_string(),
w.step("moose, elephant! fez gator".to_string())
);
assert_eq!("hij".to_string(), w.step("fez gator hij".to_string()));
}
#[test]
fn test_destutterer_basic() {
let mut w = new_destutterer();
assert_eq!(
"cat dog cow".to_string(),
w.step(" cat dog cow ".to_string())
);
assert_eq!("moose".to_string(), w.step(" dog cow moose ".to_string()));
}
}

View File

@@ -14,13 +14,13 @@ pub struct Flags {
#[arg(long, default_value = "8")]
pub threads: i32,
#[arg(long, default_value = "5")]
#[arg(long, default_value = "8")]
pub stream_step: u64,
#[arg(long, default_value = "0.6")]
#[arg(long, default_value = "4.0")]
pub stream_retain: f32,
#[arg(long, default_value = "0.3")]
#[arg(long, default_value = "2.0")]
pub stream_head: f32,
#[arg(long, default_value = "0.3")]
#[arg(long, default_value = "0.0")]
pub stream_tail: f32,
#[arg(long, default_value = "false")]
@@ -44,7 +44,7 @@ pub fn wav<F>(flags: Flags, handler_fn: F, wav_path: String) where F: FnMut(Resu
w.transcribe(&f32_from_wav_file(&wav_path).unwrap())
}
fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
pub fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
let f = std::fs::File::open(path);
if let Some(err) = f.as_ref().err() {
return Err(format!("failed to open wav file: {}", err));
@@ -72,7 +72,30 @@ fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
}
}
pub fn channel<F>(flags: Flags, handler_fn: F, stream: std::sync::mpsc::Receiver<Vec<f32>>) where F: FnMut(Result<Transcribed, String>) + Send + 'static {
pub fn wav_channel<F>(flags: Flags, handler_fn: F) where F: FnMut(Result<Transcribed, String>) + Send + 'static {
let path = flags.wav.as_ref().unwrap();
let mut audio = f32_from_wav_file(&path).unwrap();
let mut iter = vec![];
let n = audio.len() / match audio.len() % 100 {
0 => 100,
_ => 99,
};
for _ in 0..100 {
iter.push(audio.drain(0..n.clamp(0, audio.len())).collect());
}
let (fin_send, fin_recv) = std::sync::mpsc::sync_channel::<Option<i32>>(1);
channel_and_close(flags.clone(), handler_fn, iter, move || { fin_send.send(None).unwrap(); });
match fin_recv.recv() {
Ok(_) => {},
Err(x) => panic!("failed to receive: {}", x),
};
}
pub fn channel<F, I>(flags: Flags, handler_fn: F, stream: I) where F: FnMut(Result<Transcribed, String>) + Send + 'static, I: IntoIterator<Item = Vec<f32>> {
channel_and_close(flags, handler_fn, stream, || {});
}
fn channel_and_close<F, I, G>(flags: Flags, handler_fn: F, stream: I, mut close_fn: G) where F: FnMut(Result<Transcribed, String>) + Send + 'static, I: IntoIterator<Item = Vec<f32>>, G: FnMut() + Send + 'static {
let w = new_service(
flags.model_path,
flags.model_buffer,
@@ -87,7 +110,7 @@ pub fn channel<F>(flags: Flags, handler_fn: F, stream: std::sync::mpsc::Receiver
false => {},
};
let mut buffer = vec![];
for data in stream.iter() {
for data in stream {
data.iter().for_each(|x| buffer.push(*x));
if buffer.len() >= (flags.stream_step * 16_000) as usize {
w.transcribe_async(&buffer).unwrap();
@@ -112,6 +135,10 @@ pub fn channel<F>(flags: Flags, handler_fn: F, stream: std::sync::mpsc::Receiver
buffer.truncate(stream_retain);
}
}
if buffer.len() > 0 {
w.transcribe(&buffer);
}
close_fn();
}
struct Service {
@@ -203,7 +230,9 @@ impl Impl {
let result = whispered
.after(&(self.stream_head * 100.0))
.before(&(self.stream_tail * 100.0));
(self.handler_fn.as_mut().unwrap())(Ok(result));
if result.to_string().trim().len() > 0 {
(self.handler_fn.as_mut().unwrap())(Ok(result));
}
}
fn on_error(&mut self, msg: String) {
@@ -263,19 +292,6 @@ impl Engine {
}
}
struct Engine2 {
}
fn new_engine_2() -> Result<Engine2, String> {
Ok(Engine2{})
}
impl Engine2 {
fn transcribe(&self, data: &Vec<f32>) -> Result<Transcribed, String> {
Err(format!("not impl"))
}
}
struct ATranscribe {
data: Vec<f32>,
ack: Option<std::sync::mpsc::SyncSender<bool>>,
@@ -352,6 +368,28 @@ impl Transcribed {
mod tests {
use super::*;
#[test]
fn test_transcribe_tiny_jfk_wav_whisper_rs_wav_channel() {
wav_channel(
Flags {
model_path: None,
model_buffer: Some(include_bytes!("../../models/ggml-tiny.en.bin").to_vec()),
threads: 8,
stream_step: 30,
stream_retain: 0.0,
stream_head: 0.0,
stream_tail: 0.0,
wav: Some("../gitea-whisper-rs/sys/whisper.cpp/bindings/go/samples/jfk.wav".to_string()),
debug: false,
stream_device: None,
},
move | result | {
assert!(result.is_ok());
assert_eq!(result.unwrap().to_string(), " And so my fellow Americans ask not what your country can do for you ask what you can do for your country.");
},
);
}
#[test]
fn test_transcribe_tiny_jfk_wav_whisper_rs() {
wav(
@@ -374,16 +412,4 @@ mod tests {
"../gitea-whisper-rs/sys/whisper.cpp/bindings/go/samples/jfk.wav".to_string(),
);
}
#[test]
fn test_transcribe_tiny_jfk_wav_candle() {
let wav_path = "../gitea-whisper-rs/sys/whisper.cpp/bindings/go/samples/jfk.wav".to_string();
let audio_data = f32_from_wav_file(&wav_path).unwrap();
let engine = new_engine_2().unwrap();
let result = engine.transcribe(&audio_data);
assert_eq!(
" And so my fellow Americans ask not what your country can do for you ask what you can do for your country.".to_string(),
result.unwrap().to_string(),
);
}
}

12
todo.yaml Executable file
View File

@@ -0,0 +1,12 @@
todo:
- wav to subtitles
- compound words like checkmark vs check mark should destutter
- whisper trims outside silence so head and tail never get hit
- split on silence-ish instead of duration
- rust-whisper warn when transcription time ~ input time
scheduled: []
done:
- todo: need to overlap without ANY puctuation, which i can do by breaking into words
ts: Tue Jan 2 13:23:00 EST 2024
- todo: overlap without stop words
ts: Wed Jan 3 03:22:14 EST 2024

BIN
wav_to_mkv.d/sc.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -0,0 +1,100 @@
// This example is not going to build in this folder.
// You need to copy this code into your project and add the dependencies whisper_rs and hound in your cargo.toml
use hound;
use std::fs::File;
use std::io::Write;
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext};
/// Loads a context and model, processes an audio file, and prints the resulting transcript to stdout.
fn main() -> Result<(), &'static str> {
let args: Vec<String> = std::env::args().collect();
// Load a context and model.
let ctx = WhisperContext::new(&args[1])
.expect("failed to load model");
// Create a state
let mut state = ctx.create_state().expect("failed to create key");
// Create a params object for running the model.
// The number of past samples to consider defaults to 0.
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
// Edit params as needed.
// Set the number of threads to use to 1.
//params.set_n_threads(1);
// Enable translation.
params.set_translate(true);
// Set the language to translate to to English.
params.set_language(Some("en"));
// Disable anything that prints to stdout.
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
// Open the audio file.
let mut reader = hound::WavReader::open(&args[2]).expect("failed to open file");
#[allow(unused_variables)]
let hound::WavSpec {
channels,
sample_rate,
bits_per_sample,
..
} = reader.spec();
// Convert the audio to floating point samples.
let mut audio = whisper_rs::convert_integer_to_float_audio(
&reader
.samples::<i16>()
.map(|s| s.expect("invalid sample"))
.collect::<Vec<_>>(),
);
// Convert audio to 16KHz mono f32 samples, as required by the model.
// These utilities are provided for convenience, but can be replaced with custom conversion logic.
// SIMD variants of these functions are also available on nightly Rust (see the docs).
if channels == 2 {
audio = whisper_rs::convert_stereo_to_mono_audio(&audio)?;
} else if channels != 1 {
panic!(">2 channels unsupported");
}
if sample_rate != 16000 {
panic!("sample rate must be 16KHz");
}
// Run the model.
state.full(params, &audio[..]).expect("failed to run model");
// Create a file to write the transcript to.
let mut file = File::create("transcript.txt").expect("failed to create file");
// Iterate through the segments of the transcript.
let num_segments = state
.full_n_segments()
.expect("failed to get number of segments");
for i in 0..num_segments {
// Get the transcribed text and timestamps for the current segment.
let segment = state
.full_get_segment_text(i)
.expect("failed to get segment");
let start_timestamp = state
.full_get_segment_t0(i)
.expect("failed to get start timestamp");
let end_timestamp = state
.full_get_segment_t1(i)
.expect("failed to get end timestamp");
// Print the segment to stdout.
println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment);
// Format the segment information as a string.
let line = format!("[{} - {}]: {}\n", start_timestamp, end_timestamp, segment);
// Write the segment information to the file.
file.write_all(line.as_bytes())
.expect("failed to write to file");
}
Ok(())
}

View File

@@ -0,0 +1,66 @@
#! /bin/bash
main() {
set -euo pipefail
input_wav="$(realpath "$1")"
model="$(realpath "${2:-../models/ggml-small.en.bin}")"
already_transcribed="${3:-false}"
sanitized_wav="${input_wav%.*}.mono-16khz.wav"
ffmpeg -y -i "$input_wav" -ac 1 -ar 16k "$sanitized_wav"
if ! $already_transcribed; then
pushd "$(dirname "$(realpath "$BASH_SOURCE")")"
cd ../gitea-whisper-rs/
cargo run --example wav_subtitles -- "$model" "$sanitized_wav"
popd
fi
out_to_srt ../gitea-whisper-rs/transcript.txt > "${input_wav%.*}.srt"
ffmpeg -y \
-loop 1 -i sc.jpg \
-i "$input_wav" \
-i "${input_wav%.*}.srt" \
-c:v libx264 \
-tune stillimage \
-pix_fmt yuv420p -shortest \
"${input_wav%.*}.mkv"
ls "${input_wav%.*}.mkv"
}
out_to_srt() {
cs_to_ts() {
echo "$1" | awk '{
printf "%02d:%02d:%02d,000",
int(($1/100.0)/60/60),
int(($1/100.0)/60%60),
int(($1/100.0)%60)
}'
}
cat "$1" \
| (
i=0
while read -r line; do
((i+=1))
echo "$i"
echo "$(cs_to_ts "$(
echo "${line%%:] *}" \
| tr -d '[' \
| awk '{print $1}'
)") --> $(cs_to_ts "$(
echo "${line%%:] *}" \
| tr -d '[' \
| awk '{print $3}'
)")"
echo "${line#*: }"
echo
done
)
}
if [ "$0" == "$BASH_SOURCE" ]; then
main "$@"
fi

Submodule whisper-burn deleted from 0ee20fbc80