Compare commits
45 Commits
0c5c1f647c
...
v0.1.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
11b5091872 | ||
|
|
03370f362e | ||
|
|
ec6a71d38c | ||
|
|
1b96b132e1 | ||
|
|
839487b99e | ||
|
|
a2fee32fbc | ||
|
|
091958e08d | ||
|
|
5f47b2c88b | ||
|
|
367838ac23 | ||
|
|
d05287fa3d | ||
|
|
01be2637ca | ||
|
|
226bedb80e | ||
|
|
6b54e500cd | ||
|
|
8603f20a24 | ||
|
|
eee0bf5e65 | ||
|
|
15a3f8430a | ||
|
|
116f3f58c9 | ||
|
|
532ae22908 | ||
|
|
deffc420ca | ||
|
|
2391d07994 | ||
|
|
eea4b75bc8 | ||
|
|
8982276a90 | ||
|
|
479cfb055f | ||
|
|
0667b5b5c6 | ||
|
|
9e97f8669d | ||
|
|
ff0f34f80b | ||
|
|
bf3dd75074 | ||
|
|
827436d96c | ||
|
|
3b4295d026 | ||
|
|
2936fec1e4 | ||
|
|
1dd631872c | ||
|
|
72a1420638 | ||
|
|
1009c4230e | ||
|
|
30e5515da1 | ||
|
|
b4c9ecb98b | ||
|
|
4ef419e6c0 | ||
|
|
54964ec59b | ||
|
|
62e764436a | ||
|
|
d631def834 | ||
|
|
3168968cae | ||
|
|
437d7cac39 | ||
|
|
3093a91d84 | ||
|
|
f58e3a0331 | ||
|
|
6dffa401b7 | ||
|
|
f4d9730b5a |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -2,6 +2,8 @@
|
||||
/whisper-cpp-2023/rust.d/target
|
||||
/rust-whisper.d/target
|
||||
/rust-whisper.d/models
|
||||
/target/
|
||||
/models/
|
||||
snowboy-2022/snowboy
|
||||
**/*.git.d
|
||||
**/*.wav
|
||||
|
||||
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,3 +1,3 @@
|
||||
[submodule "rust-whisper.d/gitea-whisper-rs"]
|
||||
path = rust-whisper.d/gitea-whisper-rs
|
||||
path = gitea-whisper-rs
|
||||
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git
|
||||
|
||||
1345
Cargo.lock
generated
Normal file
1345
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
16
Cargo.toml
Normal file
16
Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
||||
[package]
|
||||
name = "rust-whisper"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
whisper-rs = { path = "./gitea-whisper-rs", version = "0.8.0" }
|
||||
wav = "1"
|
||||
tokio = "1.27"
|
||||
cpal = "0.15.2"
|
||||
signal-hook = "0.3.17"
|
||||
byteorder = "1.5.0"
|
||||
chrono = "0.4.31"
|
||||
clap = { version = "4.4.10", features = ["derive"] }
|
||||
29
README.md
29
README.md
@@ -1,29 +0,0 @@
|
||||
# stt
|
||||
|
||||
## listen on linux
|
||||
|
||||
https://wiki.archlinux.org/title/PulseAudio/Examples
|
||||
|
||||
```
|
||||
10. ALSA monitor source
|
||||
|
||||
To be able to record from a monitor source (a.k.a. "What-U-Hear", "Stereo Mix"), use pactl list to find out the name of the source in PulseAudio (e.g. alsa_output.pci-0000_00_1b.0.analog-stereo.monitor). Then add lines like the following to /etc/asound.conf or ~/.asoundrc:
|
||||
|
||||
pcm.pulse_monitor {
|
||||
type pulse
|
||||
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
|
||||
}
|
||||
|
||||
ctl.pulse_monitor {
|
||||
type pulse
|
||||
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
|
||||
}
|
||||
|
||||
Now you can select pulse_monitor as a recording source.
|
||||
|
||||
Alternatively, you can use pavucontrol to do this: make sure you have set up the display to "All input devices", then select "Monitor of [your sound card]" as the recording source.
|
||||
```
|
||||
|
||||
```bash
|
||||
$ pactl list | grep -A 50 RUNNING | grep -E 'RUNNING|Name:|Monitor Source:' | grep Monitor.Source | head -n 1 | awk '{print $NF}'
|
||||
```
|
||||
13
models/download_models.sh
Executable file
13
models/download_models.sh
Executable file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
|
||||
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
|
||||
|
||||
# Whisper models
|
||||
for model in "tiny.en" "base.en" "small.en" "medium.en" "large-v2"; do
|
||||
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
|
||||
done
|
||||
|
||||
test -f ./ggml-distil-medium.en.bin || wget https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/ggml-medium-32-2.en.bin?download=true -O ./ggml-distil-medium.en.bin
|
||||
|
||||
test -f ./ggml-distil-large-v2.bin || wget https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/ggml-large-32-2.en.bin?download=true -O ./ggml-distil-large-v2.bin
|
||||
7
models/testme/Cargo.lock
generated
Normal file
7
models/testme/Cargo.lock
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "testme"
|
||||
version = "0.1.0"
|
||||
@@ -1,14 +1,8 @@
|
||||
[package]
|
||||
name = "rust-whisper"
|
||||
name = "testme"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
whisper-rs = "0.8.0"
|
||||
wav = "1"
|
||||
tokio = "1.27"
|
||||
|
||||
[patch.crates-io]
|
||||
whisper-rs = { path = "./gitea-whisper-rs" }
|
||||
4
models/testme/src/main.rs
Normal file
4
models/testme/src/main.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
fn main() {
|
||||
let bytes = include_bytes!("./test.txt");
|
||||
println!("{}", String::from_utf8_lossy(bytes));
|
||||
}
|
||||
1
models/testme/src/test.txt
Symbolic link
1
models/testme/src/test.txt
Symbolic link
@@ -0,0 +1 @@
|
||||
../../ggml-tiny.en.bin
|
||||
512
rust-whisper.d/Cargo.lock
generated
512
rust-whisper.d/Cargo.lock
generated
@@ -1,512 +0,0 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "addr2line"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
|
||||
dependencies = [
|
||||
"gimli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
|
||||
dependencies = [
|
||||
"addr2line",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"miniz_oxide",
|
||||
"object",
|
||||
"rustc-demangle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.68.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"log",
|
||||
"peeking_take_while",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.83"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cexpr"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
"libloading",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.50"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f258a7194e7f7c2a7837a8913aeab7fd8c383457034fa20ce4dd3dcb813e8eb8"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fs_extra"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
|
||||
|
||||
[[package]]
|
||||
name = "gimli"
|
||||
version = "0.28.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
|
||||
[[package]]
|
||||
name = "home"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "lazycell"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.150"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.32.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "peeking_take_while"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
|
||||
|
||||
[[package]]
|
||||
name = "prettyplease"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.70"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
||||
|
||||
[[package]]
|
||||
name = "riff"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
|
||||
|
||||
[[package]]
|
||||
name = "rust-whisper"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"tokio",
|
||||
"wav",
|
||||
"whisper-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.39"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.34.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "wav"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a65e199c799848b4f997072aa4d673c034f80f40191f97fe2f0a23f410be1609"
|
||||
dependencies = [
|
||||
"riff",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "which"
|
||||
version = "4.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
|
||||
dependencies = [
|
||||
"either",
|
||||
"home",
|
||||
"once_cell",
|
||||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "whisper-rs"
|
||||
version = "0.8.0"
|
||||
dependencies = [
|
||||
"whisper-rs-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "whisper-rs-sys"
|
||||
version = "0.7.3"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"cfg-if",
|
||||
"cmake",
|
||||
"fs_extra",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
|
||||
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
|
||||
|
||||
# Whisper models
|
||||
for model in "tiny.en" "base.en" "small.en" "medium.en"; do
|
||||
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
|
||||
done
|
||||
@@ -1,37 +0,0 @@
|
||||
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy};
|
||||
|
||||
fn main() {
|
||||
let ctx = WhisperContext::new(
|
||||
&std::env::var("MODEL").unwrap_or(String::from("../models/ggml-tiny.en.bin"))
|
||||
).expect("failed to load model");
|
||||
let mut state = ctx.create_state().expect("failed to create state");
|
||||
|
||||
// create a params object
|
||||
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
|
||||
params.set_n_threads(
|
||||
std::env::var("P").unwrap_or(String::from("1")).parse::<i32>().expect("$P must be a number")
|
||||
);
|
||||
params.set_translate(false);
|
||||
params.set_language(Some("en"));
|
||||
params.set_print_special(false);
|
||||
params.set_print_progress(false);
|
||||
params.set_print_realtime(false);
|
||||
params.set_print_timestamps(false);
|
||||
|
||||
let (header, data) = wav::read(&mut std::fs::File::open(
|
||||
&std::env::var("WAV").unwrap_or(String::from("../git.d/samples/jfk.wav"))
|
||||
).expect("failed to open .wav")).expect("failed to decode .wav");
|
||||
assert!(header.channel_count == 1);
|
||||
assert!(header.sampling_rate == 16000);
|
||||
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
|
||||
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
|
||||
|
||||
state.full(params, &audio_data[..]).expect("failed to run model");
|
||||
|
||||
let num_segments = state.full_n_segments().expect("failed to get number of segments");
|
||||
for i in 0..num_segments {
|
||||
let segment = state.full_get_segment_text(i).expect("failed to get segment");
|
||||
print!("{} ", segment);
|
||||
}
|
||||
println!("");
|
||||
}
|
||||
@@ -1,99 +0,0 @@
|
||||
#! /bin/bash
|
||||
|
||||
echo https://github.com/seasalt-ai/snowboy
|
||||
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
|
||||
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
if [ ! -d ./snowboy.git.d ]; then
|
||||
git clone https://github.com/seasalt-ai/snowboy snowboy.git.d
|
||||
fi
|
||||
|
||||
timeout 2 docker version &> /dev/null
|
||||
if ! docker images | grep snowboy-pmdl.*latest &> /dev/null; then
|
||||
pushd snowboy.git.d
|
||||
docker build -t snowboy-pmdl:latest .
|
||||
popd
|
||||
fi
|
||||
|
||||
export HOTWORD="${HOTWORD:-${TRAIN:-default_hotword}}"
|
||||
|
||||
if [ -n "$TRAIN" ] || [ ! -d ./model ] || [ ! -f ./model/$HOTWORD.pmdl ]; then
|
||||
mkdir -p model
|
||||
pushd model
|
||||
rm -f ./record{1,2,3}.wav || true
|
||||
echo "record 3 instances of '$HOTWORD'" >&2
|
||||
for i in 1 2 3; do
|
||||
read -p "[$i/3] ready? you get 3 seconds."
|
||||
(
|
||||
timeout 3 rec \
|
||||
-r 16000 \
|
||||
-c 1 \
|
||||
-b 16 \
|
||||
-e signed-integer \
|
||||
-t wav \
|
||||
record$i.wav
|
||||
) || true
|
||||
ls record$i.wav
|
||||
done
|
||||
popd
|
||||
|
||||
docker run \
|
||||
--rm \
|
||||
-it \
|
||||
-v "$(realpath ./model)":/snowboy-master/examples/Python/model \
|
||||
snowboy-pmdl:latest
|
||||
mv ./model/hotword.pmdl ./model/$HOTWORD.pmdl
|
||||
if [ -n "$TRAIN" ]; then
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if false; then
|
||||
if ! which swig; then
|
||||
brew install swig
|
||||
fi
|
||||
pip3 install pyaudio
|
||||
pushd snowboy.git.d/swig/Python3/
|
||||
make
|
||||
popd
|
||||
|
||||
cd snowboy.git.d/examples/Python3/
|
||||
echo '
|
||||
import snowboydecoder
|
||||
import datetime
|
||||
detected_callback = lambda *args: print(datetime.datetime.now(), "GOTCHA")
|
||||
d = snowboydecoder.HotwordDetector("../../../model/'"$HOTWORD"'.pmdl", sensitivity=0.5, audio_gain=1)
|
||||
d.start(detected_callback)
|
||||
' > breel.py
|
||||
echo GO
|
||||
cleanup() {
|
||||
echo OK IM DONE NOW
|
||||
}
|
||||
trap cleanup EXIT
|
||||
python3 ./breel.py
|
||||
else
|
||||
resources="$(realpath snowboy.git.d/resources/common.res)"
|
||||
hotword="$(realpath ./model/$HOTWORD.pmdl)"
|
||||
GOPROXY= go build -o snowboy
|
||||
if [ -z "$PUSH" ]; then
|
||||
./snowboy \
|
||||
-ms "$hotword/$HOTWORD" \
|
||||
-r "$resources" \
|
||||
-s 0.5 \
|
||||
"$@"
|
||||
else
|
||||
echo '
|
||||
FROM registry-app.eng.qops.net:5001/imported/alpine:3.16
|
||||
WORKDIR /main/
|
||||
COPY ./snowboy.git.d/resources/common.res ./
|
||||
COPY ./model/hotword.pmdl ./
|
||||
COPY ./snowboy ./
|
||||
ENTRYPOINT ["sh", "-c", "true; echo copying /main/ to /mnt/; cp /main/* /mnt/"]
|
||||
CMD []
|
||||
' > Dockerfile
|
||||
docker build -t registry-app.eng.qops.net:5001/breel/snowboy:latest .
|
||||
docker push registry-app.eng.qops.net:5001/breel/snowboy:latest
|
||||
fi
|
||||
fi
|
||||
@@ -1,13 +0,0 @@
|
||||
module snowboy
|
||||
|
||||
go 1.19
|
||||
|
||||
require (
|
||||
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af
|
||||
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Kitt-AI/snowboy v1.3.0 // indirect
|
||||
github.com/stretchr/testify v1.8.1 // indirect
|
||||
)
|
||||
@@ -1,22 +0,0 @@
|
||||
github.com/Kitt-AI/snowboy v1.3.0 h1:PjBVN84M/9tAzDBQXILAKMoJMxt/fT0nhJ1rhKtVRUc=
|
||||
github.com/Kitt-AI/snowboy v1.3.0/go.mod h1:sDzzMXFQ1wFkXkZaX/ant0xJsizGVq/9hyKb7ZB3cNI=
|
||||
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af h1:ijY5OHNQs3CdzTN2XT+zByIsR1QVyXTvOUSkQcBm6pw=
|
||||
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af/go.mod h1:XcT4k8Tn9hrM5SLVvu5hNQbAC6GojXM0MXz1Rt8CL68=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc h1:yYLpN7bJxKYILKnk20oczGQOQd2h3/7z7/cxdD9Se/I=
|
||||
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
|
||||
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
|
||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
@@ -1,122 +0,0 @@
|
||||
// This example streams the microphone thru Snowboy to listen for the hotword,
|
||||
// by using the PortAudio interface.
|
||||
//
|
||||
// HOW TO USE:
|
||||
// go run examples/Go/listen/main.go [path to snowboy resource file] [path to snowboy hotword file]
|
||||
//
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/brentnd/go-snowboy"
|
||||
"github.com/gordonklaus/portaudio"
|
||||
)
|
||||
|
||||
// Sound represents a sound stream implementing the io.Reader interface
|
||||
// that provides the microphone data.
|
||||
type Sound struct {
|
||||
stream *portaudio.Stream
|
||||
data []int16
|
||||
}
|
||||
|
||||
// Init initializes the Sound's PortAudio stream.
|
||||
func (s *Sound) Init() {
|
||||
inputChannels := 1
|
||||
outputChannels := 0
|
||||
sampleRate := 16000
|
||||
s.data = make([]int16, 1024)
|
||||
|
||||
// initialize the audio recording interface
|
||||
err := portaudio.Initialize()
|
||||
if err != nil {
|
||||
fmt.Errorf("Error initialize audio interface: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
// open the sound input stream for the microphone
|
||||
stream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, float64(sampleRate), len(s.data), s.data)
|
||||
if err != nil {
|
||||
fmt.Errorf("Error open default audio stream: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
err = stream.Start()
|
||||
if err != nil {
|
||||
fmt.Errorf("Error on stream start: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
s.stream = stream
|
||||
}
|
||||
|
||||
// Close closes down the Sound's PortAudio connection.
|
||||
func (s *Sound) Close() {
|
||||
s.stream.Close()
|
||||
portaudio.Terminate()
|
||||
}
|
||||
|
||||
// Read is the Sound's implementation of the io.Reader interface.
|
||||
func (s *Sound) Read(p []byte) (int, error) {
|
||||
s.stream.Read()
|
||||
|
||||
buf := &bytes.Buffer{}
|
||||
for _, v := range s.data {
|
||||
binary.Write(buf, binary.LittleEndian, v)
|
||||
}
|
||||
|
||||
copy(p, buf.Bytes())
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
resources := flag.String("r", "", "path to the .res file")
|
||||
models := flag.String("ms", "", "comma delimited path to the .?mdl file/output")
|
||||
sensitivity := flag.Float64("s", 0.45, "0..1")
|
||||
quiet := flag.Bool("q", false, "emit '1' on detect else silent")
|
||||
flag.Parse()
|
||||
|
||||
if *resources == "" || *models == "" {
|
||||
panic("all flags must be set")
|
||||
}
|
||||
|
||||
// open the mic
|
||||
mic := &Sound{}
|
||||
mic.Init()
|
||||
defer mic.Close()
|
||||
|
||||
// open the snowboy detector
|
||||
d := snowboy.NewDetector(*resources)
|
||||
defer d.Close()
|
||||
|
||||
// set the handlers
|
||||
for _, modelStrC := range strings.Split(*models, ",") {
|
||||
modelStr := modelStrC
|
||||
d.HandleFunc(snowboy.NewHotword(path.Dir(modelStr), float32(*sensitivity)), func(string) {
|
||||
if !*quiet {
|
||||
log.Println(path.Base(modelStr))
|
||||
}
|
||||
fmt.Println(path.Base(modelStr))
|
||||
})
|
||||
}
|
||||
|
||||
d.HandleSilenceFunc(1*time.Second, func(string) {
|
||||
if !*quiet {
|
||||
log.Println("...")
|
||||
}
|
||||
})
|
||||
|
||||
// display the detector's expected audio format
|
||||
sr, nc, bd := d.AudioFormat()
|
||||
log.Printf("sample rate=%d, num channels=%d, bit depth=%d\n", sr, nc, bd)
|
||||
|
||||
// start detecting using the microphone
|
||||
d.ReadAndDetect(mic)
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,82 +0,0 @@
|
||||
#! /bin/bash
|
||||
|
||||
main() {
|
||||
cleanup() {
|
||||
killall -9 $(jobs -p)
|
||||
killall snowboy
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
if [ ! -e /tmp/stt.fifo ]; then
|
||||
mkfifo /tmp/stt.fifo
|
||||
fi
|
||||
|
||||
echo starting in
|
||||
for ((i=2; i>0; i--)); do
|
||||
echo "...$i..."
|
||||
sleep 1
|
||||
done
|
||||
|
||||
local models=($(
|
||||
cat pyautogui.yaml \
|
||||
| gojq -r -c --yaml-input '
|
||||
to_entries[] | "model/"+.key+".pmdl/"+.key
|
||||
' \
|
||||
| tr '\n' ',' \
|
||||
| sed 's/,$//'
|
||||
))
|
||||
echo models=$models
|
||||
|
||||
./snowboy -r resources.res -ms $models "$@" > /tmp/stt.fifo &
|
||||
|
||||
python3 -c '
|
||||
import pyautogui
|
||||
import time
|
||||
|
||||
keys = set()
|
||||
|
||||
def toggle(key):
|
||||
global keys
|
||||
if key in keys:
|
||||
release(key)
|
||||
else:
|
||||
hold(key)
|
||||
|
||||
def hold(key):
|
||||
global keys
|
||||
for keyin in [todrop for todrop in keys]:
|
||||
if keyin != key:
|
||||
release(keyin)
|
||||
keys = set()
|
||||
keys.add(key)
|
||||
print()
|
||||
print("pressing", key)
|
||||
print()
|
||||
pyautogui.keyDown(key)
|
||||
|
||||
def release(key):
|
||||
print()
|
||||
print("releasing", key)
|
||||
print()
|
||||
pyautogui.keyUp(key)
|
||||
|
||||
def main():
|
||||
with open("/tmp/stt.fifo", "r") as q:
|
||||
for line in q:
|
||||
handle(line.strip())
|
||||
|
||||
import yaml
|
||||
mapping = yaml.safe_load(open("./pyautogui.yaml", "r"))
|
||||
print(mapping)
|
||||
|
||||
def handle(cmd):
|
||||
global mapping
|
||||
hold(mapping.get(cmd))
|
||||
|
||||
main()
|
||||
'
|
||||
}
|
||||
|
||||
if [ "$0" == "$BASH_SOURCE" ]; then
|
||||
main "$@"
|
||||
fi
|
||||
@@ -1,5 +0,0 @@
|
||||
up: w
|
||||
down: s
|
||||
left: a
|
||||
right: d
|
||||
jump: w
|
||||
Binary file not shown.
374
src/main.rs
Normal file
374
src/main.rs
Normal file
@@ -0,0 +1,374 @@
|
||||
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
|
||||
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
|
||||
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
|
||||
use std::time::{Duration, Instant};
|
||||
use chrono;
|
||||
use clap::Parser;
|
||||
use std::thread;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct Flags {
|
||||
#[arg(long, default_value = "./models/ggml-tiny.en.bin")]
|
||||
model: String,
|
||||
|
||||
#[arg(long, default_value = "8")]
|
||||
threads: i32,
|
||||
|
||||
#[arg(long, default_value = "5")]
|
||||
stream_step: u64,
|
||||
#[arg(long, default_value = "0.6")]
|
||||
stream_retain: f32,
|
||||
#[arg(long, default_value = "0.3")]
|
||||
stream_head: f32,
|
||||
#[arg(long, default_value = "0.3")]
|
||||
stream_tail: f32,
|
||||
|
||||
wav: Option<String>,
|
||||
|
||||
#[arg(long, default_value = "false")]
|
||||
debug: bool,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let flags = Flags::parse();
|
||||
|
||||
let w = new_whisper_service(
|
||||
flags.model,
|
||||
flags.threads,
|
||||
flags.stream_head,
|
||||
flags.stream_tail,
|
||||
|result: Result<Whispered, String>| {
|
||||
match result {
|
||||
Ok(whispered) => {
|
||||
eprintln!("{}: {:?}", chrono::Local::now(), whispered);
|
||||
println!("{}", whispered.to_string());
|
||||
},
|
||||
Err(msg) => { eprintln!("Error whispering: {}", msg); },
|
||||
};
|
||||
},
|
||||
).unwrap();
|
||||
let stream_retain = (flags.stream_retain * 16_000.0) as usize;
|
||||
let stream_step = Duration::new(flags.stream_step, 0);
|
||||
match flags.wav {
|
||||
Some(wav) => {
|
||||
let (header, data) = wav::read(
|
||||
&mut std::fs::File::open(wav).expect("failed to open $WAV"),
|
||||
).expect("failed to decode $WAV");
|
||||
assert!(header.channel_count == 1);
|
||||
assert!(header.sampling_rate == 16_000);
|
||||
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
|
||||
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
|
||||
|
||||
w.transcribe(&audio_data);
|
||||
},
|
||||
None => {
|
||||
match &flags.debug {
|
||||
true => { File::create("/tmp/page.rawf32audio").unwrap(); },
|
||||
false => {},
|
||||
};
|
||||
let mut buffer = vec![];
|
||||
let mut last = Instant::now();
|
||||
new_listener().listen(move |data: Vec<f32>| {
|
||||
data.iter().for_each(|x| buffer.push(*x));
|
||||
if Instant::now() - last > stream_step {
|
||||
w.transcribe_async(&buffer).unwrap();
|
||||
|
||||
match &flags.debug {
|
||||
true => {
|
||||
let mut f = File::options().append(true).open("/tmp/page.rawf32audio").unwrap();
|
||||
let mut wav_data = vec![];
|
||||
for i in buffer.iter() {
|
||||
for j in i.to_le_bytes() {
|
||||
wav_data.push(j);
|
||||
}
|
||||
}
|
||||
f.write_all(wav_data.as_slice()).unwrap();
|
||||
},
|
||||
false => {},
|
||||
};
|
||||
|
||||
for i in 0..stream_retain {
|
||||
buffer[i] = buffer[buffer.len() - stream_retain + i];
|
||||
}
|
||||
buffer.truncate(stream_retain);
|
||||
last = Instant::now();
|
||||
}
|
||||
});
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
struct WhisperService {
|
||||
jobs: std::sync::mpsc::SyncSender<AWhisper>,
|
||||
}
|
||||
|
||||
fn new_whisper_service<F>(model_path: String, threads: i32, stream_head: f32, stream_tail: f32, handler_fn: F) -> Result<WhisperService, String> where F: FnMut(Result<Whispered, String>) + Send + 'static {
|
||||
match new_whisper_engine(model_path, threads) {
|
||||
Ok(engine) => {
|
||||
let mut whisper = new_whisper_impl(engine, stream_head, stream_tail, handler_fn);
|
||||
let (send, recv) = std::sync::mpsc::sync_channel(100);
|
||||
thread::spawn(move || { whisper.transcribe_asyncs(recv); });
|
||||
Ok(WhisperService{jobs: send})
|
||||
},
|
||||
Err(msg) => Err(format!("failed to initialize engine: {}", msg)),
|
||||
}
|
||||
}
|
||||
|
||||
impl WhisperService {
|
||||
fn transcribe(&self, data: &Vec<f32>) {
|
||||
let (send, recv) = std::sync::mpsc::sync_channel(0);
|
||||
self._transcribe_async(data, Some(send)).unwrap();
|
||||
recv.recv().unwrap();
|
||||
}
|
||||
|
||||
fn transcribe_async(&self, data: &Vec<f32>) -> Result<(), String> {
|
||||
self._transcribe_async(data, None)
|
||||
}
|
||||
|
||||
fn _transcribe_async(&self, data: &Vec<f32>, ack: Option<std::sync::mpsc::SyncSender<bool>>) -> Result<(), String> {
|
||||
match self.jobs.try_send(AWhisper{
|
||||
data: data.clone().to_vec(),
|
||||
ack: ack,
|
||||
}) {
|
||||
Ok(_) => Ok(()),
|
||||
Err(msg) => Err(format!("failed to enqueue transcription: {}", msg)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct WhisperImpl {
|
||||
engine: WhisperEngine,
|
||||
stream_head: f32,
|
||||
stream_tail: f32,
|
||||
handler_fn: Option<Box<dyn FnMut(Result<Whispered, String>) + Send + 'static>>
|
||||
}
|
||||
|
||||
fn new_whisper_impl<F>(engine: WhisperEngine, stream_head: f32, stream_tail: f32, handler_fn: F) -> WhisperImpl where F: FnMut(Result<Whispered, String>) + Send + 'static {
|
||||
WhisperImpl {
|
||||
engine: engine,
|
||||
stream_head: stream_head,
|
||||
stream_tail: stream_tail,
|
||||
handler_fn: Some(Box::new(handler_fn)),
|
||||
}
|
||||
}
|
||||
|
||||
impl WhisperImpl {
|
||||
fn transcribe_asyncs(&mut self, recv: std::sync::mpsc::Receiver<AWhisper>) {
|
||||
loop {
|
||||
match recv.recv() {
|
||||
Ok(job) => {
|
||||
let result = self.transcribe(&job).is_ok();
|
||||
match job.ack {
|
||||
Some(ack) => {
|
||||
ack.send(result).unwrap();
|
||||
},
|
||||
None => (),
|
||||
};
|
||||
}
|
||||
Err(_) => return,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn transcribe(&mut self, a_whisper: &AWhisper) -> Result<(), ()> {
|
||||
match self.engine.transcribe(&a_whisper.data) {
|
||||
Ok(result) => {
|
||||
self.on_success(&result);
|
||||
Ok(())
|
||||
},
|
||||
Err(msg) => {
|
||||
self.on_error(msg.to_string());
|
||||
Err(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn on_success(&mut self, whispered: &Whispered) {
|
||||
let result = whispered
|
||||
.after(&(self.stream_head * 100.0))
|
||||
.before(&(self.stream_tail * 100.0));
|
||||
(self.handler_fn.as_mut().unwrap())(Ok(result));
|
||||
}
|
||||
|
||||
fn on_error(&mut self, msg: String) {
|
||||
(self.handler_fn.as_mut().unwrap())(Err(format!("failed to transcribe: {}", &msg)));
|
||||
}
|
||||
}
|
||||
|
||||
struct WhisperEngine {
|
||||
ctx: WhisperContext,
|
||||
threads: i32,
|
||||
}
|
||||
|
||||
fn new_whisper_engine(model_path: String, threads: i32) -> Result<WhisperEngine, String> {
|
||||
match WhisperContext::new(&model_path) {
|
||||
Ok(ctx) => Ok(WhisperEngine{ctx: ctx, threads: threads}),
|
||||
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
|
||||
}
|
||||
}
|
||||
|
||||
impl WhisperEngine {
|
||||
fn transcribe(&self, data: &Vec<f32>) -> Result<Whispered, WhisperError> {
|
||||
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
|
||||
params.set_no_context(true);
|
||||
params.set_n_threads(self.threads);
|
||||
params.set_translate(false);
|
||||
params.set_detect_language(false);
|
||||
params.set_language(Some("en"));
|
||||
params.set_print_special(false);
|
||||
params.set_print_progress(false);
|
||||
params.set_print_realtime(false);
|
||||
params.set_print_timestamps(false);
|
||||
|
||||
let mut state = self.ctx.create_state()?;
|
||||
state.full(params, &data[..])?;
|
||||
|
||||
let mut result = new_whispered();
|
||||
let num_segments = state.full_n_segments()?;
|
||||
for i in 0..num_segments {
|
||||
let data = state.full_get_segment_text(i)?;
|
||||
let start = state.full_get_segment_t0(i)?;
|
||||
let stop = state.full_get_segment_t1(i)?;
|
||||
result.push(data, start, stop);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
struct AWhisper {
|
||||
data: Vec<f32>,
|
||||
ack: Option<std::sync::mpsc::SyncSender<bool>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct Whispered {
|
||||
data: Vec<AWhispered>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct AWhispered {
|
||||
data: String,
|
||||
offset: i64,
|
||||
length: i64,
|
||||
}
|
||||
|
||||
fn new_whispered() -> Whispered {
|
||||
Whispered{data: vec![]}
|
||||
}
|
||||
|
||||
fn new_a_whispered(data: String, start: i64, stop: i64) -> AWhispered {
|
||||
AWhispered{
|
||||
data: data,
|
||||
offset: start.clone(),
|
||||
length: stop - start,
|
||||
}
|
||||
}
|
||||
|
||||
impl Whispered {
|
||||
fn to_string(&self) -> String {
|
||||
let mut result = "".to_string();
|
||||
for i in 0..self.data.len() {
|
||||
result = format!("{} {}", result, &self.data[i].data);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn after(&self, t: &f32) -> Whispered {
|
||||
let mut result = new_whispered();
|
||||
self.data
|
||||
.iter()
|
||||
.filter(|x| x.offset as f32 >= *t)
|
||||
.for_each(|x| result.data.push(x.clone()));
|
||||
result
|
||||
}
|
||||
|
||||
fn before(&self, t: &f32) -> Whispered {
|
||||
let mut result = new_whispered();
|
||||
let end = match self.data.iter().map(|x| x.offset + x.length).max() {
|
||||
Some(x) => x,
|
||||
None => 1,
|
||||
};
|
||||
let t = (end as f32) - *t;
|
||||
self.data
|
||||
.iter()
|
||||
.filter(|x| ((x.offset) as f32) <= t)
|
||||
.for_each(|x| result.data.push(x.clone()));
|
||||
result
|
||||
}
|
||||
|
||||
fn push(&mut self, data: String, start: i64, stop: i64) {
|
||||
let words: Vec<_> = data.split_whitespace().collect();
|
||||
let per_word = (stop - start) / (words.len() as i64);
|
||||
for i in 0..words.len() {
|
||||
let start = (i as i64) * per_word;
|
||||
let stop = start.clone() + per_word;
|
||||
self.data.push(new_a_whispered(words[i].to_string(), start, stop));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Listener {
|
||||
}
|
||||
|
||||
fn new_listener() -> Listener {
|
||||
Listener{}
|
||||
}
|
||||
|
||||
impl Listener {
|
||||
fn listen(self, mut cb: impl FnMut(Vec<f32>)) {
|
||||
let (send, recv) = std::sync::mpsc::sync_channel(100);
|
||||
thread::spawn(move || { self._listen(send); });
|
||||
loop {
|
||||
match recv.recv() {
|
||||
Ok(msg) => cb(msg),
|
||||
Err(_) => return,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn _listen(self, send: std::sync::mpsc::SyncSender<Vec<f32>>) {
|
||||
let host = cpal::default_host();
|
||||
let device = host.default_input_device().unwrap();
|
||||
let cfg = device.supported_input_configs()
|
||||
.unwrap()
|
||||
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
|
||||
.nth(0)
|
||||
.unwrap()
|
||||
.with_max_sample_rate();
|
||||
|
||||
let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
|
||||
let stream = device.build_input_stream(
|
||||
&cfg.clone().into(),
|
||||
move |data: &[f32], _: &cpal::InputCallbackInfo| {
|
||||
let mut downsampled_data = vec![];
|
||||
for i in 0..(data.len() as f32 / downsample_ratio) as usize {
|
||||
let mut upsampled = i as f32 * downsample_ratio;
|
||||
if upsampled > (data.len()-1) as f32 {
|
||||
upsampled = (data.len()-1) as f32
|
||||
}
|
||||
downsampled_data.push(data[upsampled as usize]);
|
||||
}
|
||||
match send.try_send(downsampled_data) {
|
||||
Ok(_) => (),
|
||||
Err(msg) => eprintln!("failed to ingest audio: {}", msg),
|
||||
};
|
||||
},
|
||||
move |err| {
|
||||
eprintln!("input error: {}", err)
|
||||
},
|
||||
None,
|
||||
).unwrap();
|
||||
stream.play().unwrap();
|
||||
|
||||
eprintln!("listening on {}", device.name().unwrap());
|
||||
let mut signals = Signals::new(&[SIGINT]).unwrap();
|
||||
for sig in signals.forever() {
|
||||
eprintln!("sig {}", sig);
|
||||
break;
|
||||
}
|
||||
stream.pause().unwrap();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user