36 Commits

Author SHA1 Message Date
Bel LaPointe
9db417005c move all into subdirs 2023-12-19 22:01:27 -05:00
Bel LaPointe
799283d49c drop original root-level 2023-12-19 22:00:09 -05:00
Bel LaPointe
1caa9f564e cmd imports rust-whisper-lib 2023-12-19 21:59:34 -05:00
Bel LaPointe
641d7d077a stub /rust-whisper as a bin 2023-12-19 21:54:02 -05:00
Bel LaPointe
f864d4dfc7 lib-ify as /rust-whisper-lib 2023-12-19 21:53:02 -05:00
Bel LaPointe
2883830dbe get ready to split into package 2023-12-19 21:46:02 -05:00
Bel LaPointe
11b5091872 found the type needed to pass closures with local variables 2023-12-19 21:39:53 -05:00
Bel LaPointe
03370f362e from borrow since a grant is K 2023-12-19 21:20:52 -05:00
Bel LaPointe
ec6a71d38c purge non callback handling 2023-12-19 21:19:37 -05:00
Bel LaPointe
1b96b132e1 dumb callbacks work 2023-12-19 21:18:04 -05:00
Bel LaPointe
839487b99e drop redundant on_success time trimming 2023-12-19 21:11:53 -05:00
Bel LaPointe
a2fee32fbc refactor to whisper_service enqueues, whisper_impl transforms, whisper_engine provides raw 2023-12-19 21:08:59 -05:00
Bel LaPointe
091958e08d moved to a callback BUT costed me a global so lets iterate to someTrait 2023-12-19 20:38:01 -05:00
Bel LaPointe
5f47b2c88b wait i just needed an option? f off 2023-12-19 20:20:24 -05:00
Bel LaPointe
367838ac23 test to show include_bytes! macro supports large symlinks 2023-12-19 16:36:17 -05:00
Bel LaPointe
d05287fa3d update --stream-* defaults 2023-12-19 10:30:10 -05:00
Bel LaPointe
01be2637ca swap order 2023-12-19 10:26:22 -05:00
Bel LaPointe
226bedb80e add --debug to write a file that can be played with cat /tmp/page.rawf32audio | sox -r 16000 -b 32 -t f32 -e floating-point - -d 2023-12-19 10:25:48 -05:00
Bel LaPointe
6b54e500cd i think my recording has gaps 2023-12-19 09:54:21 -05:00
Bel LaPointe
8603f20a24 break into words but keep more stream head/tail tiebreaking 2023-12-19 09:51:11 -05:00
Bel LaPointe
eee0bf5e65 wip... 2023-12-19 09:30:15 -05:00
Bel LaPointe
15a3f8430a WIP trim the head and tail from text output because low confidence 2023-12-19 09:09:38 -05:00
Bel LaPointe
116f3f58c9 no buffer 2023-11-30 12:37:19 -07:00
Bel LaPointe
532ae22908 back to mvp 2023-11-30 12:28:35 -07:00
Bel LaPointe
deffc420ca at least it complies 2023-11-30 12:00:16 -07:00
Bel LaPointe
2391d07994 transcribing results as callbacks 2023-11-30 09:58:28 -07:00
Bel LaPointe
eea4b75bc8 confirmed threaded listen vs transcribe stream is naisu 2023-11-30 09:45:09 -07:00
Bel LaPointe
8982276a90 not infinite buffer 2023-11-30 09:41:12 -07:00
Bel LaPointe
479cfb055f threaded something i guess 2023-11-30 09:39:43 -07:00
Bel LaPointe
0667b5b5c6 large distill too 2023-11-30 09:12:38 -07:00
Bel LaPointe
9e97f8669d fuuuuuuuu lost my models folder oh well 2023-11-30 09:06:26 -07:00
Bel LaPointe
ff0f34f80b move rust to root 2023-11-30 09:02:11 -07:00
Bel LaPointe
bf3dd75074 gitignore 2023-11-30 09:02:02 -07:00
Bel LaPointe
827436d96c drop snowboy 2023-11-30 09:01:50 -07:00
Bel LaPointe
3b4295d026 unnest submodule 2023-11-30 09:01:44 -07:00
Bel LaPointe
2936fec1e4 dont need to choose 1 channel since downsampling should randomly choose from all 2023-11-29 05:33:27 -07:00
43 changed files with 2029 additions and 582 deletions

3
.gitignore vendored
View File

@@ -2,7 +2,10 @@
/whisper-cpp-2023/rust.d/target /whisper-cpp-2023/rust.d/target
/rust-whisper.d/target /rust-whisper.d/target
/rust-whisper.d/models /rust-whisper.d/models
/target/
/models/
snowboy-2022/snowboy snowboy-2022/snowboy
**/*.git.d **/*.git.d
**/*.wav **/*.wav
snowboy-2022/Dockerfile snowboy-2022/Dockerfile
**/target

2
.gitmodules vendored
View File

@@ -1,3 +1,3 @@
[submodule "rust-whisper.d/gitea-whisper-rs"] [submodule "rust-whisper.d/gitea-whisper-rs"]
path = rust-whisper.d/gitea-whisper-rs path = gitea-whisper-rs
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git

View File

@@ -1,29 +0,0 @@
# stt
## listen on linux
https://wiki.archlinux.org/title/PulseAudio/Examples
```
10. ALSA monitor source
To be able to record from a monitor source (a.k.a. "What-U-Hear", "Stereo Mix"), use pactl list to find out the name of the source in PulseAudio (e.g. alsa_output.pci-0000_00_1b.0.analog-stereo.monitor). Then add lines like the following to /etc/asound.conf or ~/.asoundrc:
pcm.pulse_monitor {
type pulse
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
}
ctl.pulse_monitor {
type pulse
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
}
Now you can select pulse_monitor as a recording source.
Alternatively, you can use pavucontrol to do this: make sure you have set up the display to "All input devices", then select "Monitor of [your sound card]" as the recording source.
```
```bash
$ pactl list | grep -A 50 RUNNING | grep -E 'RUNNING|Name:|Monitor Source:' | grep Monitor.Source | head -n 1 | awk '{print $NF}'
```

13
models/download_models.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
# Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en" "large-v2"; do
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
done
test -f ./ggml-distil-medium.en.bin || wget https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/ggml-medium-32-2.en.bin?download=true -O ./ggml-distil-medium.en.bin
test -f ./ggml-distil-large-v2.bin || wget https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/ggml-large-32-2.en.bin?download=true -O ./ggml-distil-large-v2.bin

7
models/testme/Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "testme"
version = "0.1.0"

8
models/testme/Cargo.toml Normal file
View File

@@ -0,0 +1,8 @@
[package]
name = "testme"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View File

@@ -0,0 +1,4 @@
fn main() {
let bytes = include_bytes!("./test.txt");
println!("{}", String::from_utf8_lossy(bytes));
}

1
models/testme/src/test.txt Symbolic link
View File

@@ -0,0 +1 @@
../../ggml-tiny.en.bin

View File

@@ -65,9 +65,9 @@ dependencies = [
[[package]] [[package]]
name = "anstream" name = "anstream"
version = "0.6.4" version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
dependencies = [ dependencies = [
"anstyle", "anstyle",
"anstyle-parse", "anstyle-parse",
@@ -85,27 +85,27 @@ checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
[[package]] [[package]]
name = "anstyle-parse" name = "anstyle-parse"
version = "0.2.2" version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140" checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
dependencies = [ dependencies = [
"utf8parse", "utf8parse",
] ]
[[package]] [[package]]
name = "anstyle-query" name = "anstyle-query"
version = "1.0.0" version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [ dependencies = [
"windows-sys", "windows-sys",
] ]
[[package]] [[package]]
name = "anstyle-wincon" name = "anstyle-wincon"
version = "3.0.1" version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [ dependencies = [
"anstyle", "anstyle",
"windows-sys", "windows-sys",
@@ -151,7 +151,7 @@ dependencies = [
"regex", "regex",
"rustc-hash", "rustc-hash",
"shlex", "shlex",
"syn 2.0.39", "syn 2.0.41",
"which", "which",
] ]
@@ -172,7 +172,7 @@ dependencies = [
"regex", "regex",
"rustc-hash", "rustc-hash",
"shlex", "shlex",
"syn 2.0.39", "syn 2.0.41",
] ]
[[package]] [[package]]
@@ -263,9 +263,9 @@ dependencies = [
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.4.10" version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fffed7514f420abec6d183b1d3acfd9099c79c3a10a06ade4f8203f1411272" checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2"
dependencies = [ dependencies = [
"clap_builder", "clap_builder",
"clap_derive", "clap_derive",
@@ -273,9 +273,9 @@ dependencies = [
[[package]] [[package]]
name = "clap_builder" name = "clap_builder"
version = "4.4.9" version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "63361bae7eef3771745f02d8d892bec2fee5f6e34af316ba556e7f97a7069ff1" checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb"
dependencies = [ dependencies = [
"anstream", "anstream",
"anstyle", "anstyle",
@@ -292,7 +292,7 @@ dependencies = [
"heck", "heck",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.39", "syn 2.0.41",
] ]
[[package]] [[package]]
@@ -328,9 +328,9 @@ dependencies = [
[[package]] [[package]]
name = "core-foundation-sys" name = "core-foundation-sys"
version = "0.8.4" version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]] [[package]]
name = "coreaudio-rs" name = "coreaudio-rs"
@@ -397,9 +397,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]] [[package]]
name = "errno" name = "errno"
version = "0.3.7" version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f258a7194e7f7c2a7837a8913aeab7fd8c383457034fa20ce4dd3dcb813e8eb8" checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
dependencies = [ dependencies = [
"libc", "libc",
"windows-sys", "windows-sys",
@@ -437,9 +437,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]] [[package]]
name = "home" name = "home"
version = "0.5.5" version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [ dependencies = [
"windows-sys", "windows-sys",
] ]
@@ -543,9 +543,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.150" version = "0.2.151"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
[[package]] [[package]]
name = "libloading" name = "libloading"
@@ -559,9 +559,9 @@ dependencies = [
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.4.11" version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
@@ -581,9 +581,9 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]] [[package]]
name = "mach2" name = "mach2"
version = "0.4.1" version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8" checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709"
dependencies = [ dependencies = [
"libc", "libc",
] ]
@@ -734,9 +734,9 @@ dependencies = [
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.18.0" version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
@@ -786,7 +786,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"syn 2.0.39", "syn 2.0.41",
] ]
[[package]] [[package]]
@@ -868,7 +868,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1" checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]] [[package]]
name = "rust-whisper" name = "rust-whisper-lib"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"byteorder", "byteorder",
@@ -895,9 +895,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "0.38.25" version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [ dependencies = [
"bitflags 2.4.1", "bitflags 2.4.1",
"errno", "errno",
@@ -971,9 +971,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.39" version = "2.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -982,29 +982,29 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.50" version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.50" version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.39", "syn 2.0.41",
] ]
[[package]] [[package]]
name = "tokio" name = "tokio"
version = "1.34.0" version = "1.35.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104"
dependencies = [ dependencies = [
"backtrace", "backtrace",
"pin-project-lite", "pin-project-lite",
@@ -1070,7 +1070,7 @@ dependencies = [
"once_cell", "once_cell",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.39", "syn 2.0.41",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
@@ -1104,7 +1104,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.39", "syn 2.0.41",
"wasm-bindgen-backend", "wasm-bindgen-backend",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
@@ -1214,11 +1214,11 @@ dependencies = [
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.48.0" version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [ dependencies = [
"windows-targets 0.48.5", "windows-targets 0.52.0",
] ]
[[package]] [[package]]
@@ -1251,6 +1251,21 @@ dependencies = [
"windows_x86_64_msvc 0.48.5", "windows_x86_64_msvc 0.48.5",
] ]
[[package]]
name = "windows-targets"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm 0.52.0",
"windows_aarch64_msvc 0.52.0",
"windows_i686_gnu 0.52.0",
"windows_i686_msvc 0.52.0",
"windows_x86_64_gnu 0.52.0",
"windows_x86_64_gnullvm 0.52.0",
"windows_x86_64_msvc 0.52.0",
]
[[package]] [[package]]
name = "windows_aarch64_gnullvm" name = "windows_aarch64_gnullvm"
version = "0.42.2" version = "0.42.2"
@@ -1263,6 +1278,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.42.2" version = "0.42.2"
@@ -1275,6 +1296,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.42.2" version = "0.42.2"
@@ -1287,6 +1314,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.42.2" version = "0.42.2"
@@ -1299,6 +1332,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.42.2" version = "0.42.2"
@@ -1311,6 +1350,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]] [[package]]
name = "windows_x86_64_gnullvm" name = "windows_x86_64_gnullvm"
version = "0.42.2" version = "0.42.2"
@@ -1323,6 +1368,12 @@ version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.42.2" version = "0.42.2"
@@ -1336,10 +1387,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]] [[package]]
name = "winnow" name = "windows_x86_64_msvc"
version = "0.5.19" version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "829846f3e3db426d4cee4510841b71a8e58aa2a76b1132579487ae430ccd9c7b" checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
[[package]]
name = "winnow"
version = "0.5.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b5c3db89721d50d0e2a673f5043fc4722f76dcc352d7b1ab8b8288bed4ed2c5"
dependencies = [ dependencies = [
"memchr", "memchr",
] ]

View File

@@ -1,12 +1,12 @@
[package] [package]
name = "rust-whisper" name = "rust-whisper-lib"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
whisper-rs = { path = "./gitea-whisper-rs", version = "0.8.0" } whisper-rs = { path = "../gitea-whisper-rs", version = "0.8.0" }
wav = "1" wav = "1"
tokio = "1.27" tokio = "1.27"
cpal = "0.15.2" cpal = "0.15.2"

363
rust-whisper-lib/src/lib.rs Normal file
View File

@@ -0,0 +1,363 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
use std::time::{Duration, Instant};
use clap::Parser;
use std::thread;
use std::fs::File;
use std::io::Write;
#[derive(Parser, Debug)]
pub struct Flags {
#[arg(long, default_value = "./models/ggml-tiny.en.bin")]
pub model: String,
#[arg(long, default_value = "8")]
pub threads: i32,
#[arg(long, default_value = "5")]
pub stream_step: u64,
#[arg(long, default_value = "0.6")]
pub stream_retain: f32,
#[arg(long, default_value = "0.3")]
pub stream_head: f32,
#[arg(long, default_value = "0.3")]
pub stream_tail: f32,
pub wav: Option<String>,
#[arg(long, default_value = "false")]
pub debug: bool,
}
pub fn main<F>(flags: Flags, handler_fn: F) where F: FnMut(Result<Whispered, String>) + Send + 'static {
let w = new_whisper_service(
flags.model,
flags.threads,
flags.stream_head,
flags.stream_tail,
handler_fn,
).unwrap();
let stream_retain = (flags.stream_retain * 16_000.0) as usize;
let stream_step = Duration::new(flags.stream_step, 0);
match flags.wav {
Some(wav) => {
let (header, data) = wav::read(
&mut std::fs::File::open(wav).expect("failed to open $WAV"),
).expect("failed to decode $WAV");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16_000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
w.transcribe(&audio_data);
},
None => {
match &flags.debug {
true => { File::create("/tmp/page.rawf32audio").unwrap(); },
false => {},
};
let mut buffer = vec![];
let mut last = Instant::now();
new_listener().listen(move |data: Vec<f32>| {
data.iter().for_each(|x| buffer.push(*x));
if Instant::now() - last > stream_step {
w.transcribe_async(&buffer).unwrap();
match &flags.debug {
true => {
let mut f = File::options().append(true).open("/tmp/page.rawf32audio").unwrap();
let mut wav_data = vec![];
for i in buffer.iter() {
for j in i.to_le_bytes() {
wav_data.push(j);
}
}
f.write_all(wav_data.as_slice()).unwrap();
},
false => {},
};
for i in 0..stream_retain {
buffer[i] = buffer[buffer.len() - stream_retain + i];
}
buffer.truncate(stream_retain);
last = Instant::now();
}
});
},
};
}
struct WhisperService {
jobs: std::sync::mpsc::SyncSender<AWhisper>,
}
fn new_whisper_service<F>(model_path: String, threads: i32, stream_head: f32, stream_tail: f32, handler_fn: F) -> Result<WhisperService, String> where F: FnMut(Result<Whispered, String>) + Send + 'static {
match new_whisper_engine(model_path, threads) {
Ok(engine) => {
let mut whisper = new_whisper_impl(engine, stream_head, stream_tail, handler_fn);
let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { whisper.transcribe_asyncs(recv); });
Ok(WhisperService{jobs: send})
},
Err(msg) => Err(format!("failed to initialize engine: {}", msg)),
}
}
impl WhisperService {
fn transcribe(&self, data: &Vec<f32>) {
let (send, recv) = std::sync::mpsc::sync_channel(0);
self._transcribe_async(data, Some(send)).unwrap();
recv.recv().unwrap();
}
fn transcribe_async(&self, data: &Vec<f32>) -> Result<(), String> {
self._transcribe_async(data, None)
}
fn _transcribe_async(&self, data: &Vec<f32>, ack: Option<std::sync::mpsc::SyncSender<bool>>) -> Result<(), String> {
match self.jobs.try_send(AWhisper{
data: data.clone().to_vec(),
ack: ack,
}) {
Ok(_) => Ok(()),
Err(msg) => Err(format!("failed to enqueue transcription: {}", msg)),
}
}
}
struct WhisperImpl {
engine: WhisperEngine,
stream_head: f32,
stream_tail: f32,
handler_fn: Option<Box<dyn FnMut(Result<Whispered, String>) + Send + 'static>>
}
fn new_whisper_impl<F>(engine: WhisperEngine, stream_head: f32, stream_tail: f32, handler_fn: F) -> WhisperImpl where F: FnMut(Result<Whispered, String>) + Send + 'static {
WhisperImpl {
engine: engine,
stream_head: stream_head,
stream_tail: stream_tail,
handler_fn: Some(Box::new(handler_fn)),
}
}
impl WhisperImpl {
fn transcribe_asyncs(&mut self, recv: std::sync::mpsc::Receiver<AWhisper>) {
loop {
match recv.recv() {
Ok(job) => {
let result = self.transcribe(&job).is_ok();
match job.ack {
Some(ack) => {
ack.send(result).unwrap();
},
None => (),
};
}
Err(_) => return,
};
}
}
fn transcribe(&mut self, a_whisper: &AWhisper) -> Result<(), ()> {
match self.engine.transcribe(&a_whisper.data) {
Ok(result) => {
self.on_success(&result);
Ok(())
},
Err(msg) => {
self.on_error(msg.to_string());
Err(())
},
}
}
fn on_success(&mut self, whispered: &Whispered) {
let result = whispered
.after(&(self.stream_head * 100.0))
.before(&(self.stream_tail * 100.0));
(self.handler_fn.as_mut().unwrap())(Ok(result));
}
fn on_error(&mut self, msg: String) {
(self.handler_fn.as_mut().unwrap())(Err(format!("failed to transcribe: {}", &msg)));
}
}
struct WhisperEngine {
ctx: WhisperContext,
threads: i32,
}
fn new_whisper_engine(model_path: String, threads: i32) -> Result<WhisperEngine, String> {
match WhisperContext::new(&model_path) {
Ok(ctx) => Ok(WhisperEngine{ctx: ctx, threads: threads}),
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
}
}
impl WhisperEngine {
fn transcribe(&self, data: &Vec<f32>) -> Result<Whispered, WhisperError> {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_no_context(true);
params.set_n_threads(self.threads);
params.set_translate(false);
params.set_detect_language(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let mut state = self.ctx.create_state()?;
state.full(params, &data[..])?;
let mut result = new_whispered();
let num_segments = state.full_n_segments()?;
for i in 0..num_segments {
let data = state.full_get_segment_text(i)?;
let start = state.full_get_segment_t0(i)?;
let stop = state.full_get_segment_t1(i)?;
result.push(data, start, stop);
}
Ok(result)
}
}
struct AWhisper {
data: Vec<f32>,
ack: Option<std::sync::mpsc::SyncSender<bool>>,
}
#[derive(Clone, Debug)]
pub struct Whispered {
pub data: Vec<AWhispered>,
}
#[derive(Clone, Debug)]
pub struct AWhispered {
pub data: String,
pub offset: i64,
pub length: i64,
}
fn new_whispered() -> Whispered {
Whispered{data: vec![]}
}
fn new_a_whispered(data: String, start: i64, stop: i64) -> AWhispered {
AWhispered{
data: data,
offset: start.clone(),
length: stop - start,
}
}
impl Whispered {
pub fn to_string(&self) -> String {
let mut result = "".to_string();
for i in 0..self.data.len() {
result = format!("{} {}", result, &self.data[i].data);
}
result
}
fn after(&self, t: &f32) -> Whispered {
let mut result = new_whispered();
self.data
.iter()
.filter(|x| x.offset as f32 >= *t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn before(&self, t: &f32) -> Whispered {
let mut result = new_whispered();
let end = match self.data.iter().map(|x| x.offset + x.length).max() {
Some(x) => x,
None => 1,
};
let t = (end as f32) - *t;
self.data
.iter()
.filter(|x| ((x.offset) as f32) <= t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn push(&mut self, data: String, start: i64, stop: i64) {
let words: Vec<_> = data.split_whitespace().collect();
let per_word = (stop - start) / (words.len() as i64);
for i in 0..words.len() {
let start = (i as i64) * per_word;
let stop = start.clone() + per_word;
self.data.push(new_a_whispered(words[i].to_string(), start, stop));
}
}
}
struct Listener {
}
fn new_listener() -> Listener {
Listener{}
}
impl Listener {
fn listen(self, mut cb: impl FnMut(Vec<f32>)) {
let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { self._listen(send); });
loop {
match recv.recv() {
Ok(msg) => cb(msg),
Err(_) => return,
};
}
}
fn _listen(self, send: std::sync::mpsc::SyncSender<Vec<f32>>) {
let host = cpal::default_host();
let device = host.default_input_device().unwrap();
let cfg = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.nth(0)
.unwrap()
.with_max_sample_rate();
let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
let stream = device.build_input_stream(
&cfg.clone().into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let mut downsampled_data = vec![];
for i in 0..(data.len() as f32 / downsample_ratio) as usize {
let mut upsampled = i as f32 * downsample_ratio;
if upsampled > (data.len()-1) as f32 {
upsampled = (data.len()-1) as f32
}
downsampled_data.push(data[upsampled as usize]);
}
match send.try_send(downsampled_data) {
Ok(_) => (),
Err(msg) => eprintln!("failed to ingest audio: {}", msg),
};
},
move |err| {
eprintln!("input error: {}", err)
},
None,
).unwrap();
stream.play().unwrap();
eprintln!("listening on {}", device.name().unwrap());
let mut signals = Signals::new(&[SIGINT]).unwrap();
for sig in signals.forever() {
eprintln!("sig {}", sig);
break;
}
stream.pause().unwrap();
}
}

View File

@@ -0,0 +1 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="List of all items in this crate"><title>List of all items in this crate</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Bold-a2c9cd1067f8b328.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../static.files/rustdoc-fa3bb1812debf86c.css"><meta name="rustdoc-vars" data-root-path="../" data-static-root-path="../static.files/" data-current-crate="rust_whisper_lib" data-themes="" data-resource-suffix="" data-rustdoc-version="1.74.0 (79e9716c9 2023-11-13)" data-channel="1.74.0" data-search-js="search-8be46b629f5f14a8.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="../static.files/main-c5bd66d33317d69f.js"></script><noscript><link rel="stylesheet" href="../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc mod"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="mobile-topbar"><button class="sidebar-menu-toggle">&#9776;</button><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a></nav><nav class="sidebar"><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a><h2 class="location"><a href="#">Crate rust_whisper_lib</a></h2><div class="sidebar-elems"><section><ul class="block"><li><a href="#structs">Structs</a></li><li><a href="#functions">Functions</a></li></ul></section></div></nav><main><div class="width-limiter"><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press S to search, ? for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><h1>List of all items</h1><h3 id="structs">Structs</h3><ul class="all-items"><li><a href="struct.AWhispered.html">AWhispered</a></li><li><a href="struct.Flags.html">Flags</a></li><li><a href="struct.Whispered.html">Whispered</a></li></ul><h3 id="functions">Functions</h3><ul class="all-items"><li><a href="fn.main.html">main</a></li></ul></section></div></main></body></html>

View File

@@ -0,0 +1,2 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="API documentation for the Rust `main` fn in crate `rust_whisper_lib`."><title>main in rust_whisper_lib - Rust</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Bold-a2c9cd1067f8b328.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../static.files/rustdoc-fa3bb1812debf86c.css"><meta name="rustdoc-vars" data-root-path="../" data-static-root-path="../static.files/" data-current-crate="rust_whisper_lib" data-themes="" data-resource-suffix="" data-rustdoc-version="1.74.0 (79e9716c9 2023-11-13)" data-channel="1.74.0" data-search-js="search-8be46b629f5f14a8.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="sidebar-items.js"></script><script defer src="../static.files/main-c5bd66d33317d69f.js"></script><noscript><link rel="stylesheet" href="../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc fn"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="mobile-topbar"><button class="sidebar-menu-toggle">&#9776;</button><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a></nav><nav class="sidebar"><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a><div class="sidebar-elems"><h2><a href="index.html">In rust_whisper_lib</a></h2></div></nav><main><div class="width-limiter"><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press S to search, ? for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><div class="main-heading"><h1>Function <a href="index.html">rust_whisper_lib</a>::<wbr><a class="fn" href="#">main</a><button id="copy-path" title="Copy item path to clipboard"><img src="../static.files/clipboard-7571035ce49a181d.svg" width="19" height="18" alt="Copy item path"></button></h1><span class="out-of-band"><a class="src" href="../src/rust_whisper_lib/lib.rs.html#33-90">source</a> · <button id="toggle-all-docs" title="collapse all docs">[<span>&#x2212;</span>]</button></span></div><pre class="rust item-decl"><code>pub fn main&lt;F&gt;(flags: <a class="struct" href="struct.Flags.html" title="struct rust_whisper_lib::Flags">Flags</a>, handler_fn: F)<span class="where fmt-newline">where
F: <a class="trait" href="https://doc.rust-lang.org/1.74.0/core/ops/function/trait.FnMut.html" title="trait core::ops::function::FnMut">FnMut</a>(<a class="enum" href="https://doc.rust-lang.org/1.74.0/core/result/enum.Result.html" title="enum core::result::Result">Result</a>&lt;<a class="struct" href="struct.Whispered.html" title="struct rust_whisper_lib::Whispered">Whispered</a>, <a class="struct" href="https://doc.rust-lang.org/1.74.0/alloc/string/struct.String.html" title="struct alloc::string::String">String</a>&gt;) + <a class="trait" href="https://doc.rust-lang.org/1.74.0/core/marker/trait.Send.html" title="trait core::marker::Send">Send</a> + 'static,</span></code></pre></section></div></main></body></html>

View File

@@ -0,0 +1 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="API documentation for the Rust `rust_whisper_lib` crate."><title>rust_whisper_lib - Rust</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Bold-a2c9cd1067f8b328.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../static.files/rustdoc-fa3bb1812debf86c.css"><meta name="rustdoc-vars" data-root-path="../" data-static-root-path="../static.files/" data-current-crate="rust_whisper_lib" data-themes="" data-resource-suffix="" data-rustdoc-version="1.74.0 (79e9716c9 2023-11-13)" data-channel="1.74.0" data-search-js="search-8be46b629f5f14a8.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="../crates.js"></script><script defer src="../static.files/main-c5bd66d33317d69f.js"></script><noscript><link rel="stylesheet" href="../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc mod crate"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="mobile-topbar"><button class="sidebar-menu-toggle">&#9776;</button><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a></nav><nav class="sidebar"><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a><h2 class="location"><a href="#">Crate rust_whisper_lib</a></h2><div class="sidebar-elems"><ul class="block"><li class="version">Version 0.1.0</li><li><a id="all-types" href="all.html">All Items</a></li></ul><section><ul class="block"><li><a href="#structs">Structs</a></li><li><a href="#functions">Functions</a></li></ul></section></div></nav><main><div class="width-limiter"><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press S to search, ? for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><div class="main-heading"><h1>Crate <a class="mod" href="#">rust_whisper_lib</a><button id="copy-path" title="Copy item path to clipboard"><img src="../static.files/clipboard-7571035ce49a181d.svg" width="19" height="18" alt="Copy item path"></button></h1><span class="out-of-band"><a class="src" href="../src/rust_whisper_lib/lib.rs.html#1-363">source</a> · <button id="toggle-all-docs" title="collapse all docs">[<span>&#x2212;</span>]</button></span></div><h2 id="structs" class="small-section-header"><a href="#structs">Structs</a></h2><ul class="item-table"><li><div class="item-name"><a class="struct" href="struct.AWhispered.html" title="struct rust_whisper_lib::AWhispered">AWhispered</a></div></li><li><div class="item-name"><a class="struct" href="struct.Flags.html" title="struct rust_whisper_lib::Flags">Flags</a></div></li><li><div class="item-name"><a class="struct" href="struct.Whispered.html" title="struct rust_whisper_lib::Whispered">Whispered</a></div></li></ul><h2 id="functions" class="small-section-header"><a href="#functions">Functions</a></h2><ul class="item-table"><li><div class="item-name"><a class="fn" href="fn.main.html" title="fn rust_whisper_lib::main">main</a></div></li></ul></section></div></main></body></html>

View File

@@ -0,0 +1 @@
window.SIDEBAR_ITEMS = {"fn":["main"],"struct":["AWhispered","Flags","Whispered"]};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,9 +0,0 @@
#!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
# Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en"; do
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
done

View File

@@ -1,148 +0,0 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
use std::time::{Duration, Instant};
use chrono;
use clap::Parser;
#[derive(Parser, Debug)]
struct Flags {
#[arg(long, default_value = "../models/ggml-tiny.en.bin")]
model: String,
#[arg(long, default_value = "8")]
threads: i32,
#[arg(long, default_value = "0.8")]
stream_churn: f32,
#[arg(long, default_value = "5")]
stream_step: u64,
wav: Option<String>,
}
fn main() {
let flags = Flags::parse();
let w = new_whisper(flags.model, flags.threads).unwrap();
let stream_churn = flags.stream_churn;
let stream_step = Duration::new(flags.stream_step, 0);
match flags.wav {
Some(wav) => {
let (header, data) = wav::read(
&mut std::fs::File::open(wav).expect("failed to open $WAV"),
).expect("failed to decode $WAV");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
let result = w.transcribe(&audio_data).unwrap();
println!("{}", result);
},
None => {
let host = cpal::default_host();
let device = host.default_input_device().unwrap();
let cfg = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.nth(0)
.unwrap()
.with_max_sample_rate();
let channels = cfg.channels();
let downsample_ratio = cfg.sample_rate().0 as f32 / 16000.0;
let mut buffer = vec![];
let mut last = Instant::now();
let stream = device.build_input_stream(
&cfg.clone().into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let mono_data: Vec<f32> = data.iter().map(|x| *x).step_by(channels.into()).collect();
let mut downsampled_data = vec![];
for i in 0..(mono_data.len() as f32 / downsample_ratio) as usize {
let mut upsampled = i as f32 * downsample_ratio;
if upsampled > (mono_data.len()-1) as f32 {
upsampled = (mono_data.len()-1) as f32
}
downsampled_data.push(mono_data[upsampled as usize]);
}
downsampled_data.iter().for_each(|x| buffer.push(*x));
if Instant::now() - last > stream_step {
let result = w.transcribe(&buffer).unwrap();
eprintln!("{}", chrono::Local::now());
println!("{}", result);
let retain = buffer.len() - (buffer.len() as f32 * stream_churn) as usize;
for i in retain..buffer.len() {
buffer[i - retain] = buffer[i]
}
buffer.truncate(retain);
last = Instant::now();
}
},
move |err| {
eprintln!("input error: {}", err)
},
None,
).unwrap();
stream.play().unwrap();
eprintln!("listening on {}", device.name().unwrap());
let mut signals = Signals::new(&[SIGINT]).unwrap();
for sig in signals.forever() {
eprintln!("sig {}", sig);
break;
}
stream.pause().unwrap();
},
};
}
struct Whisper {
ctx: WhisperContext,
threads: i32,
}
fn new_whisper(model_path: String, threads: i32) -> Result<Whisper, String> {
match WhisperContext::new(&model_path) {
Ok(ctx) => Ok(Whisper{
ctx: ctx,
threads: threads,
}),
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
}
}
impl Whisper {
fn transcribe(&self, data: &Vec<f32>) -> Result<String, String> {
match self._transcribe(&data) {
Ok(result) => Ok(result),
Err(msg) => Err(format!("failed to transcribe: {}", msg)),
}
}
fn _transcribe(&self, data: &Vec<f32>) -> Result<String, WhisperError> {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_no_context(true);
params.set_n_threads(self.threads);
params.set_translate(false);
params.set_detect_language(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let mut state = self.ctx.create_state()?;
state.full(params, &data[..])?;
let num_segments = state.full_n_segments()?;
let mut result = "".to_string();
for i in 0..num_segments {
let segment = state.full_get_segment_text(i)?;
result = format!("{} {}", result, segment);
}
Ok(result)
}
}

1410
rust-whisper/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

10
rust-whisper/Cargo.toml Normal file
View File

@@ -0,0 +1,10 @@
[package]
name = "rust-whisper"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rust-whisper-lib = { path = "../rust-whisper-lib" }
clap = { version = "4.4.10", features = ["derive"] }

8
rust-whisper/cargo.sh Normal file
View File

@@ -0,0 +1,8 @@
#! /bin/bash
#RUSTFLAGS="-Clink-args=-lstd++" \
export C_INCLUDE_PATH="$C_INCLUDE_PATH:$PWD/.."
export LIBRARY_PATH="$LIBRARY_PATH:$PWD/.."
cargo "$@"

15
rust-whisper/src/main.rs Normal file
View File

@@ -0,0 +1,15 @@
use rust_whisper_lib;
use clap::Parser;
fn main() {
let flags = rust_whisper_lib::Flags::parse();
rust_whisper_lib::main(
flags,
|result: Result<rust_whisper_lib::Whispered, String>| {
match result {
Ok(whispered) => { println!("{}", whispered.to_string()); },
Err(msg) => { eprintln!("error: {}", msg); },
};
},
);
}

View File

@@ -1,99 +0,0 @@
#! /bin/bash
echo https://github.com/seasalt-ai/snowboy
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
set -e
set -o pipefail
if [ ! -d ./snowboy.git.d ]; then
git clone https://github.com/seasalt-ai/snowboy snowboy.git.d
fi
timeout 2 docker version &> /dev/null
if ! docker images | grep snowboy-pmdl.*latest &> /dev/null; then
pushd snowboy.git.d
docker build -t snowboy-pmdl:latest .
popd
fi
export HOTWORD="${HOTWORD:-${TRAIN:-default_hotword}}"
if [ -n "$TRAIN" ] || [ ! -d ./model ] || [ ! -f ./model/$HOTWORD.pmdl ]; then
mkdir -p model
pushd model
rm -f ./record{1,2,3}.wav || true
echo "record 3 instances of '$HOTWORD'" >&2
for i in 1 2 3; do
read -p "[$i/3] ready? you get 3 seconds."
(
timeout 3 rec \
-r 16000 \
-c 1 \
-b 16 \
-e signed-integer \
-t wav \
record$i.wav
) || true
ls record$i.wav
done
popd
docker run \
--rm \
-it \
-v "$(realpath ./model)":/snowboy-master/examples/Python/model \
snowboy-pmdl:latest
mv ./model/hotword.pmdl ./model/$HOTWORD.pmdl
if [ -n "$TRAIN" ]; then
exit 0
fi
fi
if false; then
if ! which swig; then
brew install swig
fi
pip3 install pyaudio
pushd snowboy.git.d/swig/Python3/
make
popd
cd snowboy.git.d/examples/Python3/
echo '
import snowboydecoder
import datetime
detected_callback = lambda *args: print(datetime.datetime.now(), "GOTCHA")
d = snowboydecoder.HotwordDetector("../../../model/'"$HOTWORD"'.pmdl", sensitivity=0.5, audio_gain=1)
d.start(detected_callback)
' > breel.py
echo GO
cleanup() {
echo OK IM DONE NOW
}
trap cleanup EXIT
python3 ./breel.py
else
resources="$(realpath snowboy.git.d/resources/common.res)"
hotword="$(realpath ./model/$HOTWORD.pmdl)"
GOPROXY= go build -o snowboy
if [ -z "$PUSH" ]; then
./snowboy \
-ms "$hotword/$HOTWORD" \
-r "$resources" \
-s 0.5 \
"$@"
else
echo '
FROM registry-app.eng.qops.net:5001/imported/alpine:3.16
WORKDIR /main/
COPY ./snowboy.git.d/resources/common.res ./
COPY ./model/hotword.pmdl ./
COPY ./snowboy ./
ENTRYPOINT ["sh", "-c", "true; echo copying /main/ to /mnt/; cp /main/* /mnt/"]
CMD []
' > Dockerfile
docker build -t registry-app.eng.qops.net:5001/breel/snowboy:latest .
docker push registry-app.eng.qops.net:5001/breel/snowboy:latest
fi
fi

View File

@@ -1,13 +0,0 @@
module snowboy
go 1.19
require (
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc
)
require (
github.com/Kitt-AI/snowboy v1.3.0 // indirect
github.com/stretchr/testify v1.8.1 // indirect
)

View File

@@ -1,22 +0,0 @@
github.com/Kitt-AI/snowboy v1.3.0 h1:PjBVN84M/9tAzDBQXILAKMoJMxt/fT0nhJ1rhKtVRUc=
github.com/Kitt-AI/snowboy v1.3.0/go.mod h1:sDzzMXFQ1wFkXkZaX/ant0xJsizGVq/9hyKb7ZB3cNI=
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af h1:ijY5OHNQs3CdzTN2XT+zByIsR1QVyXTvOUSkQcBm6pw=
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af/go.mod h1:XcT4k8Tn9hrM5SLVvu5hNQbAC6GojXM0MXz1Rt8CL68=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc h1:yYLpN7bJxKYILKnk20oczGQOQd2h3/7z7/cxdD9Se/I=
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,122 +0,0 @@
// This example streams the microphone thru Snowboy to listen for the hotword,
// by using the PortAudio interface.
//
// HOW TO USE:
// go run examples/Go/listen/main.go [path to snowboy resource file] [path to snowboy hotword file]
//
package main
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"log"
"path"
"strings"
"time"
"github.com/brentnd/go-snowboy"
"github.com/gordonklaus/portaudio"
)
// Sound represents a sound stream implementing the io.Reader interface
// that provides the microphone data.
type Sound struct {
stream *portaudio.Stream
data []int16
}
// Init initializes the Sound's PortAudio stream.
func (s *Sound) Init() {
inputChannels := 1
outputChannels := 0
sampleRate := 16000
s.data = make([]int16, 1024)
// initialize the audio recording interface
err := portaudio.Initialize()
if err != nil {
fmt.Errorf("Error initialize audio interface: %s", err)
return
}
// open the sound input stream for the microphone
stream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, float64(sampleRate), len(s.data), s.data)
if err != nil {
fmt.Errorf("Error open default audio stream: %s", err)
return
}
err = stream.Start()
if err != nil {
fmt.Errorf("Error on stream start: %s", err)
return
}
s.stream = stream
}
// Close closes down the Sound's PortAudio connection.
func (s *Sound) Close() {
s.stream.Close()
portaudio.Terminate()
}
// Read is the Sound's implementation of the io.Reader interface.
func (s *Sound) Read(p []byte) (int, error) {
s.stream.Read()
buf := &bytes.Buffer{}
for _, v := range s.data {
binary.Write(buf, binary.LittleEndian, v)
}
copy(p, buf.Bytes())
return len(p), nil
}
func main() {
resources := flag.String("r", "", "path to the .res file")
models := flag.String("ms", "", "comma delimited path to the .?mdl file/output")
sensitivity := flag.Float64("s", 0.45, "0..1")
quiet := flag.Bool("q", false, "emit '1' on detect else silent")
flag.Parse()
if *resources == "" || *models == "" {
panic("all flags must be set")
}
// open the mic
mic := &Sound{}
mic.Init()
defer mic.Close()
// open the snowboy detector
d := snowboy.NewDetector(*resources)
defer d.Close()
// set the handlers
for _, modelStrC := range strings.Split(*models, ",") {
modelStr := modelStrC
d.HandleFunc(snowboy.NewHotword(path.Dir(modelStr), float32(*sensitivity)), func(string) {
if !*quiet {
log.Println(path.Base(modelStr))
}
fmt.Println(path.Base(modelStr))
})
}
d.HandleSilenceFunc(1*time.Second, func(string) {
if !*quiet {
log.Println("...")
}
})
// display the detector's expected audio format
sr, nc, bd := d.AudioFormat()
log.Printf("sample rate=%d, num channels=%d, bit depth=%d\n", sr, nc, bd)
// start detecting using the microphone
d.ReadAndDetect(mic)
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,82 +0,0 @@
#! /bin/bash
main() {
cleanup() {
killall -9 $(jobs -p)
killall snowboy
}
trap cleanup EXIT
if [ ! -e /tmp/stt.fifo ]; then
mkfifo /tmp/stt.fifo
fi
echo starting in
for ((i=2; i>0; i--)); do
echo "...$i..."
sleep 1
done
local models=($(
cat pyautogui.yaml \
| gojq -r -c --yaml-input '
to_entries[] | "model/"+.key+".pmdl/"+.key
' \
| tr '\n' ',' \
| sed 's/,$//'
))
echo models=$models
./snowboy -r resources.res -ms $models "$@" > /tmp/stt.fifo &
python3 -c '
import pyautogui
import time
keys = set()
def toggle(key):
global keys
if key in keys:
release(key)
else:
hold(key)
def hold(key):
global keys
for keyin in [todrop for todrop in keys]:
if keyin != key:
release(keyin)
keys = set()
keys.add(key)
print()
print("pressing", key)
print()
pyautogui.keyDown(key)
def release(key):
print()
print("releasing", key)
print()
pyautogui.keyUp(key)
def main():
with open("/tmp/stt.fifo", "r") as q:
for line in q:
handle(line.strip())
import yaml
mapping = yaml.safe_load(open("./pyautogui.yaml", "r"))
print(mapping)
def handle(cmd):
global mapping
hold(mapping.get(cmd))
main()
'
}
if [ "$0" == "$BASH_SOURCE" ]; then
main "$@"
fi

View File

@@ -1,5 +0,0 @@
up: w
down: s
left: a
right: d
jump: w

Binary file not shown.