139 Commits

Author SHA1 Message Date
Bel LaPointe
d94cbd6927 baked-lib wav_channel to base 2024-01-02 16:30:07 -07:00
Bel LaPointe
7a5db3b2ac no callback on empty str 2024-01-02 16:29:58 -07:00
Bel LaPointe
d0dc9571d7 rust-whisper-baked listen also destutters 2024-01-02 16:12:07 -07:00
Bel LaPointe
6082f7e446 rust-whisper-baked de-stutters wav_channel output 2024-01-02 16:10:11 -07:00
Bel LaPointe
dd6f980266 rust-whisper-baked with --wav to wav_channel 2024-01-02 14:18:37 -07:00
Bel LaPointe
601fe517d7 rust-whisper-baked-lib::wav_channel() 2024-01-02 14:17:57 -07:00
Bel LaPointe
cd339de334 rust-whisper-lib::wav_channel() 2024-01-02 14:15:53 -07:00
bel
393100973c baked streams even wav files 2024-01-01 20:56:53 -07:00
bel
97c025f04d rust-whisper-baked works with WAV 2024-01-01 19:06:14 -07:00
bel
871efd9b8c drop unused 2024-01-01 18:41:49 -07:00
Bel LaPointe
86bb1769f3 del unused 2023-12-21 22:30:43 -05:00
Bel LaPointe
c23352b7e5 found an example that just works 2023-12-21 22:26:49 -05:00
Bel LaPointe
1cceb773f2 reduce whisper-rs space 2023-12-21 22:15:00 -05:00
Bel LaPointe
29ff174f5d Revert "found burn depends on zstd that doesnt wasm"
This reverts commit 1c9d646a50.
2023-12-21 22:13:25 -05:00
Bel LaPointe
1c9d646a50 found burn depends on zstd that doesnt wasm 2023-12-21 22:13:23 -05:00
Bel LaPointe
d63d05505c drop excess packages 2023-12-21 22:02:25 -05:00
Bel LaPointe
5fb1308b30 rust-whisper-lib imports whisper-burn@432ef86ec1cb9d57406b9fab4e2a13155b10d74b 2023-12-21 22:00:51 -05:00
Bel LaPointe
19b0a4b385 baked channel uses tiny 2023-12-21 15:09:37 -05:00
Bel LaPointe
f326f077ce no more rwhisper 2023-12-21 15:07:05 -05:00
Bel LaPointe
3b79024bdd confirmed rwhisper aint gonna cut it seemingly because candle doesnt cut it 2023-12-21 14:40:17 -05:00
Bel LaPointe
de099d9917 but it is soooo sloooow 2023-12-21 14:34:22 -05:00
Bel LaPointe
0d8315550a IT RUNS WITH RWHISPER WOO 2023-12-21 14:20:19 -05:00
Bel LaPointe
a420ef5381 upd8 2023-12-20 23:56:16 -05:00
Bel LaPointe
18bc7f3604 update transcript.sh for rust-whisper-baked 2023-12-20 23:51:13 -05:00
Bel LaPointe
48c0565b1f oop 2023-12-20 23:45:23 -05:00
Bel LaPointe
af1171b228 and change model 2023-12-20 23:43:38 -05:00
Bel LaPointe
e74a5ac0ca fix so each buffer page isnt stream_step from last BUT stream_step of content 2023-12-20 23:43:06 -05:00
Bel LaPointe
6bb29c106f use at least 15k hz for input device config 2023-12-20 23:14:13 -05:00
Bel LaPointe
43e495ac6c still broken looping in lib-rs but marginally better so 2023-12-20 22:58:25 -05:00
Bel LaPointe
1cdefd1eca document learnings 2023-12-20 22:50:12 -05:00
Bel LaPointe
c69f6c5f7f virtual audio device for mac 2 ez 2023-12-20 22:39:06 -05:00
Bel LaPointe
6f1df41d83 fix compilation 2023-12-20 22:14:52 -05:00
Bel LaPointe
5c34112c0c rust-whisper-baked can list input devices 2023-12-20 22:12:35 -05:00
Bel LaPointe
e27a8f8f56 listen-lib accepts device name 2023-12-20 21:54:15 -05:00
Bel LaPointe
f97f746ab9 minor refactor 2023-12-20 14:55:33 -05:00
Bel LaPointe
589c6e65b8 whisper no wasm 2023-12-20 12:58:16 -05:00
Bel LaPointe
3b6ae8801c rust-whisper-baked streams again woo 2023-12-20 09:48:38 -05:00
Bel LaPointe
1e56d44b2f baked-lib to ::wav, ::stream 2023-12-20 09:38:29 -05:00
Bel LaPointe
4f44697ef0 just stream and wav 2023-12-20 09:35:41 -05:00
Bel LaPointe
6e32967067 drop cpal, signal from rust-whisper-lib and accept channel 2023-12-20 09:31:36 -05:00
Bel LaPointe
32d8080251 wippy 2023-12-20 09:28:47 -05:00
Bel LaPointe
a617d2a924 maybe i CAN use whisper... 2023-12-20 09:11:34 -05:00
Bel LaPointe
fd77e19b60 Rename away from whisper 2023-12-20 09:09:48 -05:00
Bel LaPointe
26f3ccad37 make a unittest 2023-12-20 09:01:06 -05:00
Bel LaPointe
27562e6599 wasm dead in the water because no libc 2023-12-20 07:30:27 -05:00
Bel LaPointe
6502402fca wasm-friendly args 2023-12-20 06:47:25 -05:00
Bel LaPointe
da60e92a8b grrrr f wasm 2023-12-19 22:41:40 -05:00
Bel LaPointe
c0bc9a9c76 smaller for dev 2023-12-19 22:33:32 -05:00
Bel LaPointe
218816a9dc swapped rust-whisper-baked to rust-whisper-baked-lib 2023-12-19 22:30:54 -05:00
Bel LaPointe
7a9cdd7e4f wrap rust-whisper-lib as rust-whisper-baked-lib to cook in a model 2023-12-19 22:29:10 -05:00
Bel LaPointe
d1ef3e4085 try the model-baked-in-cli-version 2023-12-19 22:24:55 -05:00
Bel LaPointe
9363975178 un-export 2023-12-19 22:19:18 -05:00
Bel LaPointe
73ed522d49 update docs 2023-12-19 22:18:15 -05:00
Bel LaPointe
0a0827a7b2 fix clap args so cli works 2023-12-19 22:17:37 -05:00
Bel LaPointe
95f638d3d0 accept model_path or model_buffer in flags 2023-12-19 22:14:22 -05:00
Bel LaPointe
82b24cabeb move all into subdirs 2023-12-19 22:01:27 -05:00
Bel LaPointe
2d4451f97a drop original root-level 2023-12-19 22:00:09 -05:00
Bel LaPointe
9055cd4aee cmd imports rust-whisper-lib 2023-12-19 21:59:34 -05:00
Bel LaPointe
cd68435eae stub /rust-whisper as a bin 2023-12-19 21:54:02 -05:00
Bel LaPointe
febe905616 lib-ify as /rust-whisper-lib 2023-12-19 21:53:02 -05:00
Bel LaPointe
2883830dbe get ready to split into package 2023-12-19 21:46:02 -05:00
Bel LaPointe
11b5091872 found the type needed to pass closures with local variables 2023-12-19 21:39:53 -05:00
Bel LaPointe
03370f362e from borrow since a grant is K 2023-12-19 21:20:52 -05:00
Bel LaPointe
ec6a71d38c purge non callback handling 2023-12-19 21:19:37 -05:00
Bel LaPointe
1b96b132e1 dumb callbacks work 2023-12-19 21:18:04 -05:00
Bel LaPointe
839487b99e drop redundant on_success time trimming 2023-12-19 21:11:53 -05:00
Bel LaPointe
a2fee32fbc refactor to whisper_service enqueues, whisper_impl transforms, whisper_engine provides raw 2023-12-19 21:08:59 -05:00
Bel LaPointe
091958e08d moved to a callback BUT costed me a global so lets iterate to someTrait 2023-12-19 20:38:01 -05:00
Bel LaPointe
5f47b2c88b wait i just needed an option? f off 2023-12-19 20:20:24 -05:00
Bel LaPointe
367838ac23 test to show include_bytes! macro supports large symlinks 2023-12-19 16:36:17 -05:00
Bel LaPointe
d05287fa3d update --stream-* defaults 2023-12-19 10:30:10 -05:00
Bel LaPointe
01be2637ca swap order 2023-12-19 10:26:22 -05:00
Bel LaPointe
226bedb80e add --debug to write a file that can be played with cat /tmp/page.rawf32audio | sox -r 16000 -b 32 -t f32 -e floating-point - -d 2023-12-19 10:25:48 -05:00
Bel LaPointe
6b54e500cd i think my recording has gaps 2023-12-19 09:54:21 -05:00
Bel LaPointe
8603f20a24 break into words but keep more stream head/tail tiebreaking 2023-12-19 09:51:11 -05:00
Bel LaPointe
eee0bf5e65 wip... 2023-12-19 09:30:15 -05:00
Bel LaPointe
15a3f8430a WIP trim the head and tail from text output because low confidence 2023-12-19 09:09:38 -05:00
Bel LaPointe
116f3f58c9 no buffer 2023-11-30 12:37:19 -07:00
Bel LaPointe
532ae22908 back to mvp 2023-11-30 12:28:35 -07:00
Bel LaPointe
deffc420ca at least it complies 2023-11-30 12:00:16 -07:00
Bel LaPointe
2391d07994 transcribing results as callbacks 2023-11-30 09:58:28 -07:00
Bel LaPointe
eea4b75bc8 confirmed threaded listen vs transcribe stream is naisu 2023-11-30 09:45:09 -07:00
Bel LaPointe
8982276a90 not infinite buffer 2023-11-30 09:41:12 -07:00
Bel LaPointe
479cfb055f threaded something i guess 2023-11-30 09:39:43 -07:00
Bel LaPointe
0667b5b5c6 large distill too 2023-11-30 09:12:38 -07:00
Bel LaPointe
9e97f8669d fuuuuuuuu lost my models folder oh well 2023-11-30 09:06:26 -07:00
Bel LaPointe
ff0f34f80b move rust to root 2023-11-30 09:02:11 -07:00
Bel LaPointe
bf3dd75074 gitignore 2023-11-30 09:02:02 -07:00
Bel LaPointe
827436d96c drop snowboy 2023-11-30 09:01:50 -07:00
Bel LaPointe
3b4295d026 unnest submodule 2023-11-30 09:01:44 -07:00
Bel LaPointe
2936fec1e4 dont need to choose 1 channel since downsampling should randomly choose from all 2023-11-29 05:33:27 -07:00
Bel LaPointe
1dd631872c from env to flags 2023-11-28 22:31:16 -07:00
Bel LaPointe
72a1420638 wheee 2023-11-28 22:24:10 -07:00
Bel LaPointe
1009c4230e env variable ify 2023-11-28 22:13:05 -07:00
Bel LaPointe
30e5515da1 GOTTEM 2023-11-28 22:03:08 -07:00
Bel LaPointe
b4c9ecb98b successfully confirmed audio is k with sox -r 16000 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:30:24 -07:00
Bel LaPointe
4ef419e6c0 successfully confirmed audio is k with sox -r 44100 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:22:26 -07:00
Bel LaPointe
54964ec59b grrrrr output 2023-11-28 21:03:00 -07:00
Bel LaPointe
62e764436a no warnings but still nothing sane coming out... 2023-11-28 20:36:26 -07:00
Bel LaPointe
d631def834 CLOSER like easily 80 20 range right 2023-11-28 20:32:09 -07:00
Bel LaPointe
3168968cae ok stream les go 2023-11-28 19:23:21 -07:00
Bel LaPointe
437d7cac39 successful refactor 2023-11-28 19:18:05 -07:00
Bel LaPointe
3093a91d84 wip 2023-11-28 19:10:07 -07:00
Bel LaPointe
f58e3a0331 better default err msgs 2023-11-26 17:37:26 -07:00
Bel LaPointe
6dffa401b7 cleaner 2023-11-26 17:21:40 -07:00
Bel LaPointe
f4d9730b5a hm i lost it but i get it back 2023-11-26 17:13:29 -07:00
Bel LaPointe
0c5c1f647c submodule for gitea-whisper-rs 2023-11-26 17:04:16 -07:00
Bel LaPointe
77ad40b61a closer 2023-11-26 17:00:42 -07:00
Bel LaPointe
09894c4fd0 confirmed just needs whisper-rs-sys upgrade for whisper.cpp up 2023-11-26 16:39:42 -07:00
Bel LaPointe
3e2e1e2ff8 wip 2023-11-26 16:23:42 -07:00
Bel LaPointe
50058037eb Revert "try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck"
This reverts commit a483aaf25c.
2023-11-08 11:35:31 -07:00
Bel LaPointe
a483aaf25c try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck 2023-11-08 11:35:29 -07:00
Bel LaPointe
be7d85f85e confirmed whisper.cpp works with distill iff no gpu 2023-11-08 11:29:50 -07:00
Bel LaPointe
60d38c4d5c update distil.sh 2023-11-08 10:58:58 -07:00
Bel LaPointe
e3a7628acf try distil-whisper 2023-11-08 10:22:30 -07:00
Bel LaPointe
91c7791860 up whisper-rs to 0.8.0 2023-11-08 09:25:00 -07:00
bel
247edd2ced more trans 2023-07-15 19:05:00 -06:00
bel
edd94aef72 catch 2023-07-05 22:36:07 -06:00
bel
b4d3e5a27c HOTWORDS yaml @ can have comma delimited and KEYS 2023-04-19 18:24:07 -06:00
bel
a1436e3bd2 revise 2023-04-12 19:37:43 -06:00
bel
410769b8c6 tr 2023-04-12 19:26:03 -06:00
bel
5869016de6 tr 2023-04-12 19:16:07 -06:00
bel
0955f6c0c0 oof 2023-04-12 19:15:32 -06:00
bel
242f4407df script 2023-04-12 18:50:49 -06:00
bel
814a8ae2f3 typo 2023-04-08 22:23:20 -06:00
bel
7c369e72d4 delimiters 2023-04-08 22:22:22 -06:00
bel
0aff4f556b one more 2023-04-08 20:05:03 -06:00
bel
88bf54d022 url replaces hotword,context too 2023-04-02 10:48:41 -06:00
bel
63fe8e7e9e whoops 2023-04-01 11:13:57 -06:00
bel
e26e700a21 reader does finally put and some exception shuffle 2023-04-01 11:12:44 -06:00
bel
fa23e396f1 gr 2023-04-01 10:46:16 -06:00
bel
11789a5c98 with boo 2023-04-01 10:45:00 -06:00
bel
97006d95c8 stack pls 2023-04-01 10:41:22 -06:00
bel
bb578e98f6 retry slower on bad mic things but ultimately segfault still 2023-04-01 10:35:04 -06:00
bel
465193b60d DEBUG stderr 2023-03-31 22:36:30 -06:00
bel
0e6c12a94f oops 2023-03-31 21:35:23 -06:00
bel
599f9079d3 req.txt 2023-03-31 20:52:38 -06:00
bel
0805bb6fd8 prune to only one whisper 2023-03-31 16:42:36 -06:00
bel
b109b0144d move to /rust-whisper.d with a download_models.sh 2023-03-31 16:41:46 -06:00
74 changed files with 6645 additions and 893 deletions

9
.gitignore vendored
View File

@@ -1,6 +1,15 @@
**/*.sw*
/whisper-cpp-2023/rust.d/target
/rust-whisper.d/target
/rust-whisper.d/models
/target/
/models/
snowboy-2022/snowboy
**/*.git.d
**/*.wav
snowboy-2022/Dockerfile
**/target
/candle-wasm-examples-whisper/mel_filters*
/candle-wasm-examples-whisper/whisper-tiny*
/candle-wasm-examples-whisper/quantized*
/candle-wasm-examples-whisper/audios*

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "rust-whisper.d/gitea-whisper-rs"]
path = gitea-whisper-rs
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git

View File

@@ -1,29 +0,0 @@
# stt
## listen on linux
https://wiki.archlinux.org/title/PulseAudio/Examples
```
10. ALSA monitor source
To be able to record from a monitor source (a.k.a. "What-U-Hear", "Stereo Mix"), use pactl list to find out the name of the source in PulseAudio (e.g. alsa_output.pci-0000_00_1b.0.analog-stereo.monitor). Then add lines like the following to /etc/asound.conf or ~/.asoundrc:
pcm.pulse_monitor {
type pulse
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
}
ctl.pulse_monitor {
type pulse
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
}
Now you can select pulse_monitor as a recording source.
Alternatively, you can use pavucontrol to do this: make sure you have set up the display to "All input devices", then select "Monitor of [your sound card]" as the recording source.
```
```bash
$ pactl list | grep -A 50 RUNNING | grep -E 'RUNNING|Name:|Monitor Source:' | grep Monitor.Source | head -n 1 | awk '{print $NF}'
```

View File

@@ -0,0 +1,52 @@
[package]
name = "candle-wasm-example-whisper"
version.workspace = true
edition.workspace = true
description.workspace = true
repository.workspace = true
keywords.workspace = true
categories.workspace = true
license.workspace = true
[dependencies]
candle = { path = "../../candle-core", version = "0.3.2", package = "candle-core" }
candle-nn = { path = "../../candle-nn", version = "0.3.2" }
candle-transformers = { path = "../../candle-transformers", version = "0.3.2" }
num-traits = { workspace = true }
tokenizers = { workspace = true, features = ["unstable_wasm"] }
# App crates.
anyhow = { workspace = true }
log = { workspace = true }
rand = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
wav = { workspace = true }
safetensors = { workspace = true }
# Wasm specific crates.
getrandom = { version = "0.2", features = ["js"] }
gloo = "0.8"
js-sys = "0.3.64"
wasm-bindgen = "0.2.87"
wasm-bindgen-futures = "0.4.37"
wasm-logger = "0.2"
yew-agent = "0.2.0"
yew = { version = "0.20.0", features = ["csr"] }
[dependencies.web-sys]
version = "0.3.64"
features = [
'Blob',
'Document',
'Element',
'HtmlElement',
'Node',
'Window',
'Request',
'RequestCache',
'RequestInit',
'RequestMode',
'Response',
'Performance',
]

View File

@@ -0,0 +1,68 @@
## Running Whisper Examples
Here, we provide two examples of how to run Whisper using a Candle-compiled WASM binary and runtimes.
### Pure Rust UI
To build and test the UI made in Rust you will need [Trunk](https://trunkrs.dev/#install)
From the `candle-wasm-examples/whisper` directory run:
Download assets:
```bash
# mel filters
wget -c https://huggingface.co/spaces/lmz/candle-whisper/resolve/main/mel_filters.safetensors
# Model and tokenizer tiny.en
wget -c https://huggingface.co/openai/whisper-tiny.en/resolve/main/model.safetensors -P whisper-tiny.en
wget -c https://huggingface.co/openai/whisper-tiny.en/raw/main/tokenizer.json -P whisper-tiny.en
wget -c https://huggingface.co/openai/whisper-tiny.en/raw/main/config.json -P whisper-tiny.en
# model and tokenizer tiny multilanguage
wget -c https://huggingface.co/openai/whisper-tiny/resolve/main/model.safetensors -P whisper-tiny
wget -c https://huggingface.co/openai/whisper-tiny/raw/main/tokenizer.json -P whisper-tiny
wget -c https://huggingface.co/openai/whisper-tiny/raw/main/config.json -P whisper-tiny
#quantized
wget -c https://huggingface.co/lmz/candle-whisper/resolve/main/model-tiny-en-q80.gguf -P quantized
wget -c https://huggingface.co/lmz/candle-whisper/raw/main/tokenizer-tiny-en.json -P quantized
wget -c https://huggingface.co/lmz/candle-whisper/raw/main/config-tiny-en.json -P quantized
# Audio samples
wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_gb0.wav -P audios
wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_a13.wav -P audios
wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_gb1.wav -P audios
wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_hp0.wav -P audios
wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_jfk.wav -P audios
wget -c https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/samples_mm0.wav -P audios
```
Run hot reload server:
```bash
trunk serve --release --public-url / --port 8080
```
### Vanilla JS and WebWorkers
To build and test the UI made in Vanilla JS and WebWorkers, first we need to build the WASM library:
```bash
sh build-lib.sh
```
This will bundle the library under `./build` and we can import it inside our WebWorker like a normal JS module:
```js
import init, { Decoder } from "./build/m.js";
```
The full example can be found under `./lib-example.html`. All needed assets are fetched from the web, so no need to download anything.
Finally, you can preview the example by running a local HTTP server. For example:
```bash
python -m http.server
```
Then open `http://localhost:8000/lib-example.html` in your browser.

View File

@@ -0,0 +1,2 @@
cargo build --target wasm32-unknown-unknown --release
wasm-bindgen ../../target/wasm32-unknown-unknown/release/m.wasm --out-dir build --target web

View File

@@ -0,0 +1,40 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>Welcome to Candle!</title>
<link data-trunk rel="copy-file" href="mel_filters.safetensors" />
<!-- samples -->
<link data-trunk rel="copy-dir" href="audios" />
<!-- tiny.en -->
<link data-trunk rel="copy-dir" href="whisper-tiny.en" />
<!-- tiny -->
<link data-trunk rel="copy-dir" href="whisper-tiny" />
<!-- quantized -->
<link data-trunk rel="copy-dir" href="quantized" />
<link
data-trunk
rel="rust"
href="Cargo.toml"
data-bin="app"
data-type="main" />
<link
data-trunk
rel="rust"
href="Cargo.toml"
data-bin="worker"
data-type="worker" />
<link
rel="stylesheet"
href="https://fonts.googleapis.com/css?family=Roboto:300,300italic,700,700italic" />
<link
rel="stylesheet"
href="https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.css" />
<link
rel="stylesheet"
href="https://cdnjs.cloudflare.com/ajax/libs/milligram/1.4.1/milligram.css" />
</head>
<body></body>
</html>

View File

@@ -0,0 +1,351 @@
<html>
<head>
<meta content="text/html;charset=utf-8" http-equiv="Content-Type" />
<title>Candle Whisper Rust/WASM</title>
</head>
<body></body>
</html>
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<style>
@import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
html,
body {
font-family: "Source Sans 3", sans-serif;
}
</style>
<script src="https://cdn.tailwindcss.com"></script>
<script type="module">
// base url for audio examples
const AUDIO_BASE_URL =
"https://huggingface.co/datasets/Narsil/candle-examples/resolve/main/";
// models base url
const MODELS = {
tiny_multilingual: {
base_url: "https://huggingface.co/openai/whisper-tiny/resolve/main/",
model: "model.safetensors",
tokenizer: "tokenizer.json",
config: "config.json",
size: "151 MB",
},
tiny_en: {
base_url:
"https://huggingface.co/openai/whisper-tiny.en/resolve/main/",
model: "model.safetensors",
tokenizer: "tokenizer.json",
config: "config.json",
size: "151 MB",
},
tiny_quantized_multilingual_q80: {
base_url: "https://huggingface.co/lmz/candle-whisper/resolve/main/",
model: "model-tiny-q80.gguf",
tokenizer: "tokenizer-tiny.json",
config: "config-tiny.json",
size: "41.5 MB",
},
tiny_en_quantized_q80: {
base_url: "https://huggingface.co/lmz/candle-whisper/resolve/main/",
model: "model-tiny-q80.gguf",
tokenizer: "tokenizer-tiny-en.json",
config: "config-tiny-en.json",
size: "41.8 MB",
},
distil_medium_en: {
base_url:
"https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/",
model: "model.safetensors",
tokenizer: "tokenizer.json",
config: "config.json",
size: "789 MB",
},
};
const modelEl = document.querySelector("#model");
Object.keys(MODELS).forEach((modelID) => {
const model = MODELS[modelID];
const option = document.createElement("option");
option.value = modelID;
option.textContent = `${modelID} (${model.size})`;
modelEl.appendChild(option);
});
const whisperWorker = new Worker("./whisperWorker.js", {
type: "module",
});
async function classifyAudio(
weightsURL, // URL to the weights file
modelID, // model ID
tokenizerURL, // URL to the tokenizer file
configURL, // model config URL
mel_filtersURL, // URL to the mel filters file
audioURL, // URL to the audio file
updateStatus // function to update the status
) {
return new Promise((resolve, reject) => {
whisperWorker.postMessage({
weightsURL,
modelID,
tokenizerURL,
configURL,
mel_filtersURL,
audioURL,
});
function messageHandler(event) {
console.log(event.data);
if ("status" in event.data) {
updateStatus(event.data);
}
if ("error" in event.data) {
whisperWorker.removeEventListener("message", messageHandler);
reject(new Error(event.data.error));
}
if (event.data.status === "complete") {
whisperWorker.removeEventListener("message", messageHandler);
resolve(event.data);
}
}
whisperWorker.addEventListener("message", messageHandler);
});
}
// keep track of the audio URL
let audioURL = null;
function setAudio(src) {
const audio = document.querySelector("#audio");
audio.src = src;
audio.controls = true;
audio.hidden = false;
document.querySelector("#detect").disabled = false;
audioURL = src;
}
// add event listener to audio buttons
document.querySelectorAll("#audios-select > button").forEach((target) => {
target.addEventListener("click", (e) => {
const value = target.dataset.value;
const href = AUDIO_BASE_URL + value;
setAudio(href);
});
});
//add event listener to file input
document.querySelector("#file-upload").addEventListener("change", (e) => {
const target = e.target;
if (target.files.length > 0) {
const href = URL.createObjectURL(target.files[0]);
setAudio(href);
}
});
// add event listener to drop-area
const dropArea = document.querySelector("#drop-area");
dropArea.addEventListener("dragenter", (e) => {
e.preventDefault();
dropArea.classList.add("border-blue-700");
});
dropArea.addEventListener("dragleave", (e) => {
e.preventDefault();
dropArea.classList.remove("border-blue-700");
});
dropArea.addEventListener("dragover", (e) => {
e.preventDefault();
dropArea.classList.add("border-blue-700");
});
dropArea.addEventListener("drop", (e) => {
e.preventDefault();
dropArea.classList.remove("border-blue-700");
const url = e.dataTransfer.getData("text/uri-list");
const files = e.dataTransfer.files;
if (files.length > 0) {
const href = URL.createObjectURL(files[0]);
setAudio(href);
} else if (url) {
setAudio(url);
}
});
// add event listener to detect button
document.querySelector("#detect").addEventListener("click", async () => {
if (audioURL === null) {
return;
}
const modelID = modelEl.value;
const model = MODELS[modelID];
const modelURL = model.base_url + model.model;
const tokenizerURL = model.base_url + model.tokenizer;
const configURL = model.base_url + model.config;
classifyAudio(
modelURL,
modelID,
tokenizerURL,
configURL,
"mel_filters.safetensors",
audioURL,
updateStatus
)
.then((result) => {
console.log("RESULT", result);
const { output } = result;
const text = output.map((segment) => segment.dr.text).join(" ");
console.log(text);
document.querySelector("#output-status").hidden = true;
document.querySelector("#output-generation").hidden = false;
document.querySelector("#output-generation").textContent = text;
})
.catch((error) => {
console.error(error);
});
});
function updateStatus(data) {
const { status, message } = data;
const button = document.querySelector("#detect");
if (status === "decoding" || status === "loading") {
button.disabled = true;
button.textContent = message;
} else if (status === "complete") {
button.disabled = false;
button.textContent = "Transcribe Audio";
}
}
</script>
</head>
<body class="container max-w-4xl mx-auto p-4">
<main class="grid grid-cols-1 gap-8 relative">
<span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
<div>
<h1 class="text-5xl font-bold">Candle Whisper</h1>
<h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
<p class="max-w-lg">
Transcribe audio in the browser using rust/wasm with an audio file.
This demo uses the
<a
href="https://huggingface.co/openai/"
target="_blank"
class="underline hover:text-blue-500 hover:no-underline">
OpenAI Whisper models
</a>
and WASM runtime built with
<a
href="https://github.com/huggingface/candle/"
target="_blank"
class="underline hover:text-blue-500 hover:no-underline"
>Candle
</a>
</p>
</div>
<div>
<label for="model" class="font-medium">Models Options: </label>
<select
id="model"
class="border-2 border-gray-500 rounded-md font-light">
</select>
</div>
<!-- drag and drop area -->
<div class="relative">
<div
id="drop-area"
class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative h-48 w-full overflow-hidden">
<div
class="flex flex-col items-center justify-center space-y-1 text-center">
<svg
width="25"
height="25"
viewBox="0 0 25 25"
fill="none"
xmlns="http://www.w3.org/2000/svg">
<path
d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"
fill="#000" />
</svg>
<div class="flex text-sm text-gray-600">
<label
for="file-upload"
class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700">
<span>Drag and drop your audio here</span>
<span class="block text-xs">or</span>
<span class="block text-xs">Click to upload</span>
</label>
</div>
<input
id="file-upload"
name="file-upload"
type="file"
accept="audio/*"
class="sr-only" />
</div>
<audio
id="audio"
hidden
controls
class="w-full p-2 select-none"></audio>
</div>
</div>
<div>
<div class="flex flex-wrap gap-3 items-center" id="audios-select">
<h3 class="font-medium">Examples:</h3>
<button
data-value="samples_jfk.wav"
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
<span>jfk.wav</span>
<span class="text-xs block"> (352 kB)</span>
</button>
<button
data-value="samples_a13.wav"
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
<span>a13.wav</span>
<span class="text-xs block"> (960 kB)</span>
</button>
<button
data-value="samples_mm0.wav"
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
<span>mm0.wav</span>
<span class="text-xs block new"> (957 kB)</span>
</button>
<button
data-value="samples_gb0.wav"
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
<span>gb0.wav </span>
<span class="text-xs block">(4.08 MB)</span>
</button>
<button
data-value="samples_gb1.wav"
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
<span>gb1.wav </span>
<span class="text-xs block">(6.36 MB)</span>
</button>
<button
data-value="samples_hp0.wav"
class="text-gray-500 border border-gray-500 rounded-md p-2 underline hover:no-underline">
<span>hp0.wav </span>
<span class="text-xs block">(8.75 MB)</span>
</button>
</div>
</div>
<div>
<button
id="detect"
disabled
class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 px-4 rounded disabled:bg-gray-300 disabled:cursor-not-allowed">
Transcribe Audio
</button>
</div>
<div>
<h3 class="font-medium">Transcription:</h3>
<div
class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2">
<p hidden id="output-generation" class="grid-rows-2"></p>
<span id="output-status" class="m-auto font-light"
>No transcription results yet</span
>
</div>
</div>
</main>
</body>
</html>

View File

@@ -0,0 +1,6 @@
import init, { run_app } from './pkg/candle_wasm_example_whisper.js';
async function main() {
await init('/pkg/candle_wasm_example_whisper_bg.wasm');
run_app();
}
main()

View File

@@ -0,0 +1,283 @@
use crate::console_log;
use crate::worker::{ModelData, Segment, Worker, WorkerInput, WorkerOutput};
use js_sys::Date;
use wasm_bindgen::prelude::*;
use wasm_bindgen_futures::JsFuture;
use yew::{html, Component, Context, Html};
use yew_agent::{Bridge, Bridged};
const SAMPLE_NAMES: [&str; 6] = [
"audios/samples_jfk.wav",
"audios/samples_a13.wav",
"audios/samples_gb0.wav",
"audios/samples_gb1.wav",
"audios/samples_hp0.wav",
"audios/samples_mm0.wav",
];
async fn fetch_url(url: &str) -> Result<Vec<u8>, JsValue> {
use web_sys::{Request, RequestCache, RequestInit, RequestMode, Response};
let window = web_sys::window().ok_or("window")?;
let mut opts = RequestInit::new();
let opts = opts
.method("GET")
.mode(RequestMode::Cors)
.cache(RequestCache::NoCache);
let request = Request::new_with_str_and_init(url, opts)?;
let resp_value = JsFuture::from(window.fetch_with_request(&request)).await?;
// `resp_value` is a `Response` object.
assert!(resp_value.is_instance_of::<Response>());
let resp: Response = resp_value.dyn_into()?;
let data = JsFuture::from(resp.blob()?).await?;
let blob = web_sys::Blob::from(data);
let array_buffer = JsFuture::from(blob.array_buffer()).await?;
let data = js_sys::Uint8Array::new(&array_buffer).to_vec();
Ok(data)
}
pub enum Msg {
Run(usize),
UpdateStatus(String),
SetDecoder(ModelData),
WorkerInMsg(WorkerInput),
WorkerOutMsg(Result<WorkerOutput, String>),
}
pub struct CurrentDecode {
start_time: Option<f64>,
}
pub struct App {
status: String,
loaded: bool,
segments: Vec<Segment>,
current_decode: Option<CurrentDecode>,
worker: Box<dyn Bridge<Worker>>,
}
async fn model_data_load() -> Result<ModelData, JsValue> {
let quantized = false;
let is_multilingual = false;
let (tokenizer, mel_filters, weights, config) = if quantized {
console_log!("loading quantized weights");
let tokenizer = fetch_url("quantized/tokenizer-tiny-en.json").await?;
let mel_filters = fetch_url("mel_filters.safetensors").await?;
let weights = fetch_url("quantized/model-tiny-en-q80.gguf").await?;
let config = fetch_url("quantized/config-tiny-en.json").await?;
(tokenizer, mel_filters, weights, config)
} else {
console_log!("loading float weights");
if is_multilingual {
let mel_filters = fetch_url("mel_filters.safetensors").await?;
let tokenizer = fetch_url("whisper-tiny/tokenizer.json").await?;
let weights = fetch_url("whisper-tiny/model.safetensors").await?;
let config = fetch_url("whisper-tiny/config.json").await?;
(tokenizer, mel_filters, weights, config)
} else {
let mel_filters = fetch_url("mel_filters.safetensors").await?;
let tokenizer = fetch_url("whisper-tiny.en/tokenizer.json").await?;
let weights = fetch_url("whisper-tiny.en/model.safetensors").await?;
let config = fetch_url("whisper-tiny.en/config.json").await?;
(tokenizer, mel_filters, weights, config)
}
};
let timestamps = true;
let _task = Some("transcribe".to_string());
console_log!("{}", weights.len());
Ok(ModelData {
tokenizer,
mel_filters,
weights,
config,
quantized,
timestamps,
task: None,
is_multilingual,
language: None,
})
}
fn performance_now() -> Option<f64> {
let window = web_sys::window()?;
let performance = window.performance()?;
Some(performance.now() / 1000.)
}
impl Component for App {
type Message = Msg;
type Properties = ();
fn create(ctx: &Context<Self>) -> Self {
let status = "loading weights".to_string();
let cb = {
let link = ctx.link().clone();
move |e| link.send_message(Self::Message::WorkerOutMsg(e))
};
let worker = Worker::bridge(std::rc::Rc::new(cb));
Self {
status,
segments: vec![],
current_decode: None,
worker,
loaded: false,
}
}
fn rendered(&mut self, ctx: &Context<Self>, first_render: bool) {
if first_render {
ctx.link().send_future(async {
match model_data_load().await {
Err(err) => {
let status = format!("{err:?}");
Msg::UpdateStatus(status)
}
Ok(model_data) => Msg::SetDecoder(model_data),
}
});
}
}
fn update(&mut self, ctx: &Context<Self>, msg: Self::Message) -> bool {
match msg {
Msg::SetDecoder(md) => {
self.status = "weights loaded successfully!".to_string();
self.loaded = true;
console_log!("loaded weights");
self.worker.send(WorkerInput::ModelData(md));
true
}
Msg::Run(sample_index) => {
let sample = SAMPLE_NAMES[sample_index];
if self.current_decode.is_some() {
self.status = "already decoding some sample at the moment".to_string()
} else {
let start_time = performance_now();
self.current_decode = Some(CurrentDecode { start_time });
self.status = format!("decoding {sample}");
self.segments.clear();
ctx.link().send_future(async move {
match fetch_url(sample).await {
Err(err) => {
let output = Err(format!("decoding error: {err:?}"));
// Mimic a worker output to so as to release current_decode
Msg::WorkerOutMsg(output)
}
Ok(wav_bytes) => {
Msg::WorkerInMsg(WorkerInput::DecodeTask { wav_bytes })
}
}
})
}
//
true
}
Msg::WorkerOutMsg(output) => {
let dt = self.current_decode.as_ref().and_then(|current_decode| {
current_decode.start_time.and_then(|start_time| {
performance_now().map(|stop_time| stop_time - start_time)
})
});
self.current_decode = None;
match output {
Ok(WorkerOutput::WeightsLoaded) => self.status = "weights loaded!".to_string(),
Ok(WorkerOutput::Decoded(segments)) => {
self.status = match dt {
None => "decoding succeeded!".to_string(),
Some(dt) => format!("decoding succeeded in {:.2}s", dt),
};
self.segments = segments;
}
Err(err) => {
self.status = format!("decoding error {err:?}");
}
}
true
}
Msg::WorkerInMsg(inp) => {
self.worker.send(inp);
true
}
Msg::UpdateStatus(status) => {
self.status = status;
true
}
}
}
fn view(&self, ctx: &Context<Self>) -> Html {
html! {
<div>
<table>
<thead>
<tr>
<th>{"Sample"}</th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
{
SAMPLE_NAMES.iter().enumerate().map(|(i, name)| { html! {
<tr>
<th>{name}</th>
<th><audio controls=true src={format!("./{name}")}></audio></th>
{ if self.loaded {
html!(<th><button class="button" onclick={ctx.link().callback(move |_| Msg::Run(i))}> { "run" }</button></th>)
}else{html!()}
}
</tr>
}
}).collect::<Html>()
}
</tbody>
</table>
<h2>
{&self.status}
</h2>
{
if !self.loaded{
html! { <progress id="progress-bar" aria-label="loading weights…"></progress> }
} else if self.current_decode.is_some() {
html! { <progress id="progress-bar" aria-label="decoding…"></progress> }
} else { html!{
<blockquote>
<p>
{
self.segments.iter().map(|segment| { html! {
<>
<i>
{
format!("{:.2}s-{:.2}s: (avg-logprob: {:.4}, no-speech-prob: {:.4})",
segment.start,
segment.start + segment.duration,
segment.dr.avg_logprob,
segment.dr.no_speech_prob,
)
}
</i>
<br/ >
{&segment.dr.text}
<br/ >
</>
} }).collect::<Html>()
}
</p>
</blockquote>
}
}
}
// Display the current date and time the page was rendered
<p class="footer">
{ "Rendered: " }
{ String::from(Date::new_0().to_string()) }
</p>
</div>
}
}
}

View File

@@ -0,0 +1,215 @@
// Audio processing code, adapted from whisper.cpp
// https://github.com/ggerganov/whisper.cpp
use super::worker;
pub trait Float: num_traits::Float + num_traits::FloatConst + num_traits::NumAssign {}
impl Float for f32 {}
impl Float for f64 {}
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2357
fn fft<T: Float>(inp: &[T]) -> Vec<T> {
let n = inp.len();
let zero = T::zero();
if n == 1 {
return vec![inp[0], zero];
}
if n % 2 == 1 {
return dft(inp);
}
let mut out = vec![zero; n * 2];
let mut even = Vec::with_capacity(n / 2);
let mut odd = Vec::with_capacity(n / 2);
for (i, &inp) in inp.iter().enumerate() {
if i % 2 == 0 {
even.push(inp)
} else {
odd.push(inp);
}
}
let even_fft = fft(&even);
let odd_fft = fft(&odd);
let two_pi = T::PI() + T::PI();
let n_t = T::from(n).unwrap();
for k in 0..n / 2 {
let k_t = T::from(k).unwrap();
let theta = two_pi * k_t / n_t;
let re = theta.cos();
let im = -theta.sin();
let re_odd = odd_fft[2 * k];
let im_odd = odd_fft[2 * k + 1];
out[2 * k] = even_fft[2 * k] + re * re_odd - im * im_odd;
out[2 * k + 1] = even_fft[2 * k + 1] + re * im_odd + im * re_odd;
out[2 * (k + n / 2)] = even_fft[2 * k] - re * re_odd + im * im_odd;
out[2 * (k + n / 2) + 1] = even_fft[2 * k + 1] - re * im_odd - im * re_odd;
}
out
}
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2337
fn dft<T: Float>(inp: &[T]) -> Vec<T> {
let zero = T::zero();
let n = inp.len();
let two_pi = T::PI() + T::PI();
let mut out = Vec::with_capacity(2 * n);
let n_t = T::from(n).unwrap();
for k in 0..n {
let k_t = T::from(k).unwrap();
let mut re = zero;
let mut im = zero;
for (j, &inp) in inp.iter().enumerate() {
let j_t = T::from(j).unwrap();
let angle = two_pi * k_t * j_t / n_t;
re += inp * angle.cos();
im -= inp * angle.sin();
}
out.push(re);
out.push(im);
}
out
}
#[allow(clippy::too_many_arguments)]
// https://github.com/ggerganov/whisper.cpp/blob/4774d2feb01a772a15de81ffc34b34a1f294f020/whisper.cpp#L2414
fn log_mel_spectrogram_w<T: Float>(
ith: usize,
hann: &[T],
samples: &[T],
filters: &[T],
fft_size: usize,
fft_step: usize,
speed_up: bool,
n_len: usize,
n_mel: usize,
n_threads: usize,
) -> Vec<T> {
let n_fft = if speed_up {
1 + fft_size / 4
} else {
1 + fft_size / 2
};
let zero = T::zero();
let half = T::from(0.5).unwrap();
let mut fft_in = vec![zero; fft_size];
let mut mel = vec![zero; n_len * n_mel];
for i in (ith..n_len).step_by(n_threads) {
let offset = i * fft_step;
// apply Hanning window
for j in 0..fft_size {
fft_in[j] = if offset + j < samples.len() {
hann[j] * samples[offset + j]
} else {
zero
}
}
// FFT -> mag^2
let mut fft_out: Vec<T> = fft(&fft_in);
for j in 0..fft_size {
fft_out[j] = fft_out[2 * j] * fft_out[2 * j] + fft_out[2 * j + 1] * fft_out[2 * j + 1];
}
for j in 1..fft_size / 2 {
let v = fft_out[fft_size - j];
fft_out[j] += v;
}
if speed_up {
// scale down in the frequency domain results in a speed up in the time domain
for j in 0..n_fft {
fft_out[j] = half * (fft_out[2 * j] + fft_out[2 * j + 1]);
}
}
// mel spectrogram
for j in 0..n_mel {
let mut sum = zero;
for k in 0..n_fft {
sum += fft_out[k] * filters[j * n_fft + k];
}
mel[j * n_len + i] = T::max(sum, T::from(1e-10).unwrap()).log10();
}
}
mel
}
fn log_mel_spectrogram_<T: Float + std::fmt::Display>(
samples: &[T],
filters: &[T],
fft_size: usize,
fft_step: usize,
n_mel: usize,
speed_up: bool,
) -> Vec<T> {
let zero = T::zero();
let two_pi = T::PI() + T::PI();
let half = T::from(0.5).unwrap();
let one = T::from(1.0).unwrap();
let four = T::from(4.0).unwrap();
let fft_size_t = T::from(fft_size).unwrap();
let hann: Vec<T> = (0..fft_size)
.map(|i| half * (one - ((two_pi * T::from(i).unwrap()) / fft_size_t).cos()))
.collect();
let n_len = samples.len() / fft_step;
// pad audio with at least one extra chunk of zeros
let pad = 100 * worker::m::CHUNK_LENGTH / 2;
let n_len = if n_len % pad != 0 {
(n_len / pad + 1) * pad
} else {
n_len
};
let n_len = n_len + pad;
let samples = {
let mut samples_padded = samples.to_vec();
let to_add = n_len * fft_step - samples.len();
samples_padded.extend(std::iter::repeat(zero).take(to_add));
samples_padded
};
// Use a single thread for now.
let mut mel = log_mel_spectrogram_w(
0, &hann, &samples, filters, fft_size, fft_step, speed_up, n_len, n_mel, 1,
);
let mmax = mel
.iter()
.max_by(|&u, &v| u.partial_cmp(v).unwrap_or(std::cmp::Ordering::Greater))
.copied()
.unwrap_or(zero)
- T::from(8).unwrap();
for m in mel.iter_mut() {
let v = T::max(*m, mmax);
*m = v / four + one
}
mel
}
pub fn pcm_to_mel<T: Float + std::fmt::Display>(
cfg: &worker::m::Config,
samples: &[T],
filters: &[T],
) -> anyhow::Result<Vec<T>> {
let mel = log_mel_spectrogram_(
samples,
filters,
worker::m::N_FFT,
worker::m::HOP_LENGTH,
cfg.num_mel_bins,
false,
);
Ok(mel)
}

View File

@@ -0,0 +1,4 @@
fn main() {
wasm_logger::init(wasm_logger::Config::new(log::Level::Trace));
yew::Renderer::<candle_wasm_example_whisper::App>::new().render();
}

View File

@@ -0,0 +1,53 @@
use candle_wasm_example_whisper::worker::{Decoder as D, ModelData};
use wasm_bindgen::prelude::*;
#[wasm_bindgen]
pub struct Decoder {
decoder: D,
}
#[wasm_bindgen]
impl Decoder {
#[wasm_bindgen(constructor)]
#[allow(clippy::too_many_arguments)]
pub fn new(
weights: Vec<u8>,
tokenizer: Vec<u8>,
mel_filters: Vec<u8>,
config: Vec<u8>,
quantized: bool,
is_multilingual: bool,
timestamps: bool,
task: Option<String>,
language: Option<String>,
) -> Result<Decoder, JsError> {
let decoder = D::load(ModelData {
tokenizer,
mel_filters,
config,
quantized,
weights,
is_multilingual,
timestamps,
task,
language,
});
match decoder {
Ok(decoder) => Ok(Self { decoder }),
Err(e) => Err(JsError::new(&e.to_string())),
}
}
#[wasm_bindgen]
pub fn decode(&mut self, wav_input: Vec<u8>) -> Result<String, JsError> {
let segments = self
.decoder
.convert_and_run(&wav_input)
.map_err(|e| JsError::new(&e.to_string()))?;
let json = serde_json::to_string(&segments)?;
Ok(json)
}
}
fn main() {}

View File

@@ -0,0 +1,4 @@
use yew_agent::PublicWorker;
fn main() {
candle_wasm_example_whisper::Worker::register();
}

View File

@@ -0,0 +1,101 @@
pub const LANGUAGES: [(&str, &str); 99] = [
("en", "english"),
("zh", "chinese"),
("de", "german"),
("es", "spanish"),
("ru", "russian"),
("ko", "korean"),
("fr", "french"),
("ja", "japanese"),
("pt", "portuguese"),
("tr", "turkish"),
("pl", "polish"),
("ca", "catalan"),
("nl", "dutch"),
("ar", "arabic"),
("sv", "swedish"),
("it", "italian"),
("id", "indonesian"),
("hi", "hindi"),
("fi", "finnish"),
("vi", "vietnamese"),
("he", "hebrew"),
("uk", "ukrainian"),
("el", "greek"),
("ms", "malay"),
("cs", "czech"),
("ro", "romanian"),
("da", "danish"),
("hu", "hungarian"),
("ta", "tamil"),
("no", "norwegian"),
("th", "thai"),
("ur", "urdu"),
("hr", "croatian"),
("bg", "bulgarian"),
("lt", "lithuanian"),
("la", "latin"),
("mi", "maori"),
("ml", "malayalam"),
("cy", "welsh"),
("sk", "slovak"),
("te", "telugu"),
("fa", "persian"),
("lv", "latvian"),
("bn", "bengali"),
("sr", "serbian"),
("az", "azerbaijani"),
("sl", "slovenian"),
("kn", "kannada"),
("et", "estonian"),
("mk", "macedonian"),
("br", "breton"),
("eu", "basque"),
("is", "icelandic"),
("hy", "armenian"),
("ne", "nepali"),
("mn", "mongolian"),
("bs", "bosnian"),
("kk", "kazakh"),
("sq", "albanian"),
("sw", "swahili"),
("gl", "galician"),
("mr", "marathi"),
("pa", "punjabi"),
("si", "sinhala"),
("km", "khmer"),
("sn", "shona"),
("yo", "yoruba"),
("so", "somali"),
("af", "afrikaans"),
("oc", "occitan"),
("ka", "georgian"),
("be", "belarusian"),
("tg", "tajik"),
("sd", "sindhi"),
("gu", "gujarati"),
("am", "amharic"),
("yi", "yiddish"),
("lo", "lao"),
("uz", "uzbek"),
("fo", "faroese"),
("ht", "haitian creole"),
("ps", "pashto"),
("tk", "turkmen"),
("nn", "nynorsk"),
("mt", "maltese"),
("sa", "sanskrit"),
("lb", "luxembourgish"),
("my", "myanmar"),
("bo", "tibetan"),
("tl", "tagalog"),
("mg", "malagasy"),
("as", "assamese"),
("tt", "tatar"),
("haw", "hawaiian"),
("ln", "lingala"),
("ha", "hausa"),
("ba", "bashkir"),
("jw", "javanese"),
("su", "sundanese"),
];

View File

@@ -0,0 +1,29 @@
pub const WITH_TIMER: bool = true;
struct Timer {
label: &'static str,
}
// impl Timer {
// fn new(label: &'static str) -> Self {
// if WITH_TIMER {
// web_sys::console::time_with_label(label);
// }
// Self { label }
// }
// }
impl Drop for Timer {
fn drop(&mut self) {
if WITH_TIMER {
web_sys::console::time_end_with_label(self.label)
}
}
}
mod app;
mod audio;
pub mod languages;
pub mod worker;
pub use app::App;
pub use worker::Worker;

View File

@@ -0,0 +1,492 @@
use crate::languages::LANGUAGES;
use anyhow::Error as E;
use candle::{safetensors::Load, DType, Device, IndexOp, Tensor, D};
use candle_nn::{ops::softmax, VarBuilder};
pub use candle_transformers::models::whisper::{self as m, Config};
use rand::{distributions::Distribution, rngs::StdRng, SeedableRng};
use serde::{Deserialize, Serialize};
use tokenizers::Tokenizer;
use wasm_bindgen::prelude::*;
use yew_agent::{HandlerId, Public, WorkerLink};
#[wasm_bindgen]
extern "C" {
// Use `js_namespace` here to bind `console.log(..)` instead of just
// `log(..)`
#[wasm_bindgen(js_namespace = console)]
pub fn log(s: &str);
}
#[macro_export]
macro_rules! console_log {
// Note that this is using the `log` function imported above during
// `bare_bones`
($($t:tt)*) => ($crate::worker::log(&format_args!($($t)*).to_string()))
}
pub const DTYPE: DType = DType::F32;
pub enum Model {
Normal(m::model::Whisper),
Quantized(m::quantized_model::Whisper),
}
// Maybe we should use some traits rather than doing the dispatch for all these.
impl Model {
pub fn config(&self) -> &Config {
match self {
Self::Normal(m) => &m.config,
Self::Quantized(m) => &m.config,
}
}
pub fn encoder_forward(&mut self, x: &Tensor, flush: bool) -> candle::Result<Tensor> {
match self {
Self::Normal(m) => m.encoder.forward(x, flush),
Self::Quantized(m) => m.encoder.forward(x, flush),
}
}
pub fn decoder_forward(
&mut self,
x: &Tensor,
xa: &Tensor,
flush: bool,
) -> candle::Result<Tensor> {
match self {
Self::Normal(m) => m.decoder.forward(x, xa, flush),
Self::Quantized(m) => m.decoder.forward(x, xa, flush),
}
}
pub fn decoder_final_linear(&self, x: &Tensor) -> candle::Result<Tensor> {
match self {
Self::Normal(m) => m.decoder.final_linear(x),
Self::Quantized(m) => m.decoder.final_linear(x),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DecodingResult {
pub tokens: Vec<u32>,
pub text: String,
pub avg_logprob: f64,
pub no_speech_prob: f64,
temperature: f64,
compression_ratio: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Segment {
pub start: f64,
pub duration: f64,
pub dr: DecodingResult,
}
pub struct Decoder {
model: Model,
rng: rand::rngs::StdRng,
task: Option<Task>,
language: Option<String>,
is_multilingual: bool,
mel_filters: Vec<f32>,
timestamps: bool,
tokenizer: Tokenizer,
suppress_tokens: Tensor,
sot_token: u32,
transcribe_token: u32,
translate_token: u32,
eot_token: u32,
no_speech_token: u32,
no_timestamps_token: u32,
}
impl Decoder {
#[allow(clippy::too_many_arguments)]
fn new(
model: Model,
tokenizer: Tokenizer,
mel_filters: Vec<f32>,
device: &Device,
task: Option<Task>,
language: Option<String>,
is_multilingual: bool,
timestamps: bool,
) -> anyhow::Result<Self> {
let suppress_tokens: Vec<f32> = (0..model.config().vocab_size as u32)
.map(|i| {
if model.config().suppress_tokens.contains(&i) {
f32::NEG_INFINITY
} else {
0f32
}
})
.collect();
let no_timestamps_token = token_id(&tokenizer, m::NO_TIMESTAMPS_TOKEN)?;
let suppress_tokens = Tensor::new(suppress_tokens.as_slice(), device)?;
let sot_token = token_id(&tokenizer, m::SOT_TOKEN)?;
let transcribe_token = token_id(&tokenizer, m::TRANSCRIBE_TOKEN)?;
let translate_token = token_id(&tokenizer, m::TRANSLATE_TOKEN)?;
let eot_token = token_id(&tokenizer, m::EOT_TOKEN)?;
let no_speech_token = m::NO_SPEECH_TOKENS
.iter()
.find_map(|token| token_id(&tokenizer, token).ok());
let no_speech_token = match no_speech_token {
None => anyhow::bail!("unable to find any non-speech token"),
Some(n) => n,
};
let seed = 299792458;
Ok(Self {
model,
rng: StdRng::seed_from_u64(seed),
tokenizer,
mel_filters,
task,
timestamps,
language,
is_multilingual,
suppress_tokens,
sot_token,
transcribe_token,
translate_token,
eot_token,
no_speech_token,
no_timestamps_token,
})
}
fn decode(&mut self, mel: &Tensor, t: f64) -> anyhow::Result<DecodingResult> {
let model = &mut self.model;
let language_token = match (self.is_multilingual, &self.language) {
(true, None) => Some(detect_language(model, &self.tokenizer, mel)?),
(false, None) => None,
(true, Some(language)) => {
match token_id(&self.tokenizer, &format!("<|{:?}|>", self.language)) {
Ok(token_id) => Some(token_id),
Err(_) => anyhow::bail!("language {language} is not supported"),
}
}
(false, Some(_)) => {
anyhow::bail!("a language cannot be set for non-multilingual models")
}
};
let audio_features = model.encoder_forward(mel, true)?;
println!("audio features: {:?}", audio_features.dims());
let sample_len = model.config().max_target_positions / 2;
let mut sum_logprob = 0f64;
let mut no_speech_prob = f64::NAN;
let mut tokens = vec![self.sot_token];
if let Some(language_token) = language_token {
tokens.push(language_token);
}
match self.task {
None | Some(Task::Transcribe) => tokens.push(self.transcribe_token),
Some(Task::Translate) => tokens.push(self.translate_token),
}
if !self.timestamps {
tokens.push(self.no_timestamps_token);
}
for i in 0..sample_len {
let tokens_t = Tensor::new(tokens.as_slice(), mel.device())?;
// The model expects a batch dim but this inference loop does not handle
// it so we add it at this point.
let tokens_t = tokens_t.unsqueeze(0)?;
let ys = model.decoder_forward(&tokens_t, &audio_features, i == 0)?;
// Extract the no speech probability on the first iteration by looking at the first
// token logits and the probability for the according token.
if i == 0 {
let logits = model.decoder_final_linear(&ys.i(..1)?)?.i(0)?.i(0)?;
no_speech_prob = softmax(&logits, 0)?
.i(self.no_speech_token as usize)?
.to_scalar::<f32>()? as f64;
}
let (_, seq_len, _) = ys.dims3()?;
let logits = model
.decoder_final_linear(&ys.i((..1, seq_len - 1..))?)?
.i(0)?
.i(0)?;
// TODO: Besides suppress tokens, we should apply the heuristics from
// ApplyTimestampRules, i.e.:
// - Timestamps come in pairs, except before EOT.
// - Timestamps should be non-decreasing.
// - If the sum of the probabilities of timestamps is higher than any other tokens,
// only consider timestamps when sampling.
// https://github.com/openai/whisper/blob/e8622f9afc4eba139bf796c210f5c01081000472/whisper/decoding.py#L439
let logits = logits.broadcast_add(&self.suppress_tokens)?;
let next_token = if t > 0f64 {
let prs = softmax(&(&logits / t)?, 0)?;
let logits_v: Vec<f32> = prs.to_vec1()?;
let distr = rand::distributions::WeightedIndex::new(&logits_v)?;
distr.sample(&mut self.rng) as u32
} else {
let logits_v: Vec<f32> = logits.to_vec1()?;
logits_v
.iter()
.enumerate()
.max_by(|(_, u), (_, v)| u.total_cmp(v))
.map(|(i, _)| i as u32)
.unwrap()
};
tokens.push(next_token);
let prob = softmax(&logits, candle::D::Minus1)?
.i(next_token as usize)?
.to_scalar::<f32>()? as f64;
if next_token == self.eot_token || tokens.len() > model.config().max_target_positions {
break;
}
sum_logprob += prob.ln();
}
let text = self.tokenizer.decode(&tokens, true).map_err(E::msg)?;
let avg_logprob = sum_logprob / tokens.len() as f64;
Ok(DecodingResult {
tokens,
text,
avg_logprob,
no_speech_prob,
temperature: t,
compression_ratio: f64::NAN,
})
}
fn decode_with_fallback(&mut self, segment: &Tensor) -> anyhow::Result<DecodingResult> {
for (i, &t) in m::TEMPERATURES.iter().enumerate() {
let dr: Result<DecodingResult, _> = self.decode(segment, t);
if i == m::TEMPERATURES.len() - 1 {
return dr;
}
// On errors, we try again with a different temperature.
match dr {
Ok(dr) => {
let needs_fallback = dr.compression_ratio > m::COMPRESSION_RATIO_THRESHOLD
|| dr.avg_logprob < m::LOGPROB_THRESHOLD;
if !needs_fallback || dr.no_speech_prob > m::NO_SPEECH_THRESHOLD {
return Ok(dr);
}
}
Err(err) => {
console_log!("Error running at {t}: {err}")
}
}
}
unreachable!()
}
fn run(&mut self, mel: &Tensor) -> anyhow::Result<Vec<Segment>> {
let (_, _, content_frames) = mel.dims3()?;
let mut seek = 0;
let mut segments = vec![];
while seek < content_frames {
let time_offset = (seek * m::HOP_LENGTH) as f64 / m::SAMPLE_RATE as f64;
let segment_size = usize::min(content_frames - seek, m::N_FRAMES);
let mel_segment = mel.narrow(2, seek, segment_size)?;
let segment_duration = (segment_size * m::HOP_LENGTH) as f64 / m::SAMPLE_RATE as f64;
let dr = self.decode_with_fallback(&mel_segment)?;
seek += segment_size;
if dr.no_speech_prob > m::NO_SPEECH_THRESHOLD && dr.avg_logprob < m::LOGPROB_THRESHOLD {
console_log!("no speech detected, skipping {seek} {dr:?}");
continue;
}
let segment = Segment {
start: time_offset,
duration: segment_duration,
dr,
};
console_log!("{seek}: {segment:?}");
segments.push(segment)
}
Ok(segments)
}
pub fn load(md: ModelData) -> anyhow::Result<Self> {
let device = Device::Cpu;
let tokenizer = Tokenizer::from_bytes(&md.tokenizer).map_err(E::msg)?;
let mel_filters = safetensors::tensor::SafeTensors::deserialize(&md.mel_filters)?;
let mel_filters = mel_filters.tensor("mel_80")?.load(&device)?;
console_log!("loaded mel filters {:?}", mel_filters.shape());
let mel_filters = mel_filters.flatten_all()?.to_vec1::<f32>()?;
let config: Config = serde_json::from_slice(&md.config)?;
let model = if md.quantized {
let vb = candle_transformers::quantized_var_builder::VarBuilder::from_gguf_buffer(
&md.weights,
)?;
Model::Quantized(m::quantized_model::Whisper::load(&vb, config)?)
} else {
let vb = VarBuilder::from_buffered_safetensors(md.weights, m::DTYPE, &device)?;
Model::Normal(m::model::Whisper::load(&vb, config)?)
};
console_log!("done loading model");
let task = match md.task.as_deref() {
Some("translate") => Some(Task::Translate),
_ => Some(Task::Transcribe),
};
let decoder = Self::new(
model,
tokenizer,
mel_filters,
&device,
task,
md.language,
md.is_multilingual,
md.timestamps,
)?;
Ok(decoder)
}
pub fn convert_and_run(&mut self, wav_input: &[u8]) -> anyhow::Result<Vec<Segment>> {
let device = Device::Cpu;
let mut wav_input = std::io::Cursor::new(wav_input);
let (header, data) = wav::read(&mut wav_input)?;
console_log!("loaded wav data: {header:?}");
if header.sampling_rate != m::SAMPLE_RATE as u32 {
anyhow::bail!("wav file must have a {} sampling rate", m::SAMPLE_RATE);
}
let data = data.as_sixteen().expect("expected 16 bit wav file");
let pcm_data: Vec<_> = data[..data.len() / header.channel_count as usize]
.iter()
.map(|v| *v as f32 / 32768.)
.collect();
console_log!("pcm data loaded {}", pcm_data.len());
let mel = crate::audio::pcm_to_mel(self.model.config(), &pcm_data, &self.mel_filters)?;
let mel_len = mel.len();
let n_mels = self.model.config().num_mel_bins;
let mel = Tensor::from_vec(mel, (1, n_mels, mel_len / n_mels), &device)?;
console_log!("loaded mel: {:?}", mel.dims());
let segments = self.run(&mel)?;
Ok(segments)
}
}
/// Returns the token id for the selected language.
pub fn detect_language(model: &mut Model, tokenizer: &Tokenizer, mel: &Tensor) -> Result<u32, E> {
console_log!("detecting language");
let (_bsize, _, seq_len) = mel.dims3()?;
let mel = mel.narrow(
2,
0,
usize::min(seq_len, model.config().max_source_positions),
)?;
let device = mel.device();
let language_token_ids = LANGUAGES
.iter()
.map(|(t, _)| token_id(tokenizer, &format!("<|{t}|>")))
.map(|e| e.map_err(E::msg))
.collect::<Result<Vec<_>, E>>()?;
let sot_token = token_id(tokenizer, m::SOT_TOKEN)?;
let audio_features = model.encoder_forward(&mel, true)?;
let tokens = Tensor::new(&[[sot_token]], device)?;
let language_token_ids = Tensor::new(language_token_ids.as_slice(), device)?;
let ys = model.decoder_forward(&tokens, &audio_features, true)?;
let logits = model.decoder_final_linear(&ys.i(..1)?)?.i(0)?.i(0)?;
let logits = logits.index_select(&language_token_ids, 0)?;
let probs = candle_nn::ops::softmax(&logits, D::Minus1)?;
let probs = probs.to_vec1::<f32>()?;
let mut probs = LANGUAGES.iter().zip(probs.iter()).collect::<Vec<_>>();
probs.sort_by(|(_, p1), (_, p2)| p2.total_cmp(p1));
for ((_, language), p) in probs.iter().take(5) {
println!("{language}: {p}")
}
let token = &format!("<|{}|>", probs[0].0 .0);
let language = token_id(tokenizer, token)?;
console_log!("detected language: {language} {token}");
Ok(language)
}
pub fn token_id(tokenizer: &Tokenizer, token: &str) -> candle::Result<u32> {
match tokenizer.token_to_id(token) {
None => candle::bail!("no token-id for {token}"),
Some(id) => Ok(id),
}
}
#[derive(Serialize, Deserialize, Clone, Copy, Debug)]
pub enum Task {
Transcribe,
Translate,
}
// Communication to the worker happens through bincode, the model weights and configs are fetched
// on the main thread and transferred via the following structure.
#[derive(Serialize, Deserialize)]
pub struct ModelData {
pub weights: Vec<u8>,
pub tokenizer: Vec<u8>,
pub mel_filters: Vec<u8>,
pub config: Vec<u8>,
pub quantized: bool,
pub timestamps: bool,
pub is_multilingual: bool,
pub language: Option<String>,
pub task: Option<String>,
}
pub struct Worker {
link: WorkerLink<Self>,
decoder: Option<Decoder>,
}
#[derive(Serialize, Deserialize)]
pub enum WorkerInput {
ModelData(ModelData),
DecodeTask { wav_bytes: Vec<u8> },
}
#[derive(Serialize, Deserialize)]
pub enum WorkerOutput {
Decoded(Vec<Segment>),
WeightsLoaded,
}
impl yew_agent::Worker for Worker {
type Input = WorkerInput;
type Message = ();
type Output = Result<WorkerOutput, String>;
type Reach = Public<Self>;
fn create(link: WorkerLink<Self>) -> Self {
Self {
link,
decoder: None,
}
}
fn update(&mut self, _msg: Self::Message) {
// no messaging
}
fn handle_input(&mut self, msg: Self::Input, id: HandlerId) {
let output = match msg {
WorkerInput::ModelData(md) => match Decoder::load(md) {
Ok(decoder) => {
self.decoder = Some(decoder);
Ok(WorkerOutput::WeightsLoaded)
}
Err(err) => Err(format!("model creation error {err:?}")),
},
WorkerInput::DecodeTask { wav_bytes } => match &mut self.decoder {
None => Err("model has not been set".to_string()),
Some(decoder) => decoder
.convert_and_run(&wav_bytes)
.map(WorkerOutput::Decoded)
.map_err(|e| e.to_string()),
},
};
self.link.respond(id, output);
}
fn name_of_resource() -> &'static str {
"worker.js"
}
fn resource_path_is_relative() -> bool {
true
}
}

View File

@@ -0,0 +1,116 @@
//load the candle Whisper decoder wasm module
import init, { Decoder } from "./build/m.js";
async function fetchArrayBuffer(url) {
const cacheName = "whisper-candle-cache";
const cache = await caches.open(cacheName);
const cachedResponse = await cache.match(url);
if (cachedResponse) {
const data = await cachedResponse.arrayBuffer();
return new Uint8Array(data);
}
const res = await fetch(url, { cache: "force-cache" });
cache.put(url, res.clone());
return new Uint8Array(await res.arrayBuffer());
}
class Whisper {
static instance = {};
// Retrieve the Whisper model. When called for the first time,
// this will load the model and save it for future use.
static async getInstance(params) {
const {
weightsURL,
modelID,
tokenizerURL,
mel_filtersURL,
configURL,
quantized,
is_multilingual,
timestamps,
task,
language,
} = params;
// load individual modelID only once
if (!this.instance[modelID]) {
await init();
self.postMessage({ status: "loading", message: "Loading Model" });
const [
weightsArrayU8,
tokenizerArrayU8,
mel_filtersArrayU8,
configArrayU8,
] = await Promise.all([
fetchArrayBuffer(weightsURL),
fetchArrayBuffer(tokenizerURL),
fetchArrayBuffer(mel_filtersURL),
fetchArrayBuffer(configURL),
]);
this.instance[modelID] = new Decoder(
weightsArrayU8,
tokenizerArrayU8,
mel_filtersArrayU8,
configArrayU8,
quantized,
is_multilingual,
timestamps,
task,
language
);
} else {
self.postMessage({ status: "loading", message: "Model Already Loaded" });
}
return this.instance[modelID];
}
}
self.addEventListener("message", async (event) => {
const {
weightsURL,
modelID,
tokenizerURL,
configURL,
mel_filtersURL,
audioURL,
} = event.data;
try {
self.postMessage({ status: "decoding", message: "Starting Decoder" });
let quantized = false;
if (modelID.includes("quantized")) {
quantized = true;
}
let is_multilingual = false;
if (modelID.includes("multilingual")) {
is_multilingual = true;
}
let timestamps = true;
const decoder = await Whisper.getInstance({
weightsURL,
modelID,
tokenizerURL,
mel_filtersURL,
configURL,
quantized,
is_multilingual,
timestamps,
task: null,
language: null,
});
self.postMessage({ status: "decoding", message: "Loading Audio" });
const audioArrayU8 = await fetchArrayBuffer(audioURL);
self.postMessage({ status: "decoding", message: "Running Decoder..." });
const segments = decoder.decode(audioArrayU8);
// Send the segment back to the main thread as JSON
self.postMessage({
status: "complete",
message: "complete",
output: JSON.parse(segments),
});
} catch (e) {
self.postMessage({ error: e });
}
});

1
gitea-whisper-rs Submodule

Submodule gitea-whisper-rs added at dd62f2b9f6

View File

@@ -47,37 +47,60 @@ class Reader(threading.Thread):
self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
log("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit()
self.inq = inq
self.outq = outq
def run(self):
log("Reader.run: start")
try:
idx = [
def mic_idx(self):
mics = []
while not mics and not self.should_stop():
log(f'searching for one of {self.name.split(",")} in {sr.Microphone.list_microphone_names()}...')
time.sleep(1)
mics = [
idx for idx,v in enumerate(
sr.Microphone.list_microphone_names(),
) if v in self.name.split(",")
][0]
with sr.Microphone(device_index=idx) as mic:
while not self.should_stop():
try:
self.outq.put(self._run(mic))
except Exception as e:
if not "timed out" in str(e):
log("Reader.run: error:", e)
]
log("mic#", mics[0])
return mics[0]
def run(self):
try:
log("Reader.run: start")
self._run()
except Exception as e:
log("Reader.run panic:", e)
log("microphones:", sr.Microphone.list_microphone_names())
log("Reader:run: exit:", e)
finally:
self.outq.put(None)
log("Reader.run: stop")
log("Reader.run: stop")
def _run(self):
while not self.should_stop():
time.sleep(3)
mic = sr.Microphone(device_index=self.mic_idx())
try:
mic.__enter__()
while not self.should_stop():
try:
self.outq.put(self.read(mic))
except Exception as e:
if not "timed out" in str(e):
raise e
except Exception as e:
import traceback
traceback.print_exception(e)
log("Reader.run: error:", e)
finally:
try:
mic.__exit__(None, None, None)
except Exception as e:
log("Reader.run.catch: error:", e)
def should_stop(self):
return not self.inq.empty()
def _run(self, mic):
def read(self, mic):
mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5))
r = sr.Recognizer()
return r.listen(
@@ -113,13 +136,19 @@ class Parser(threading.Thread):
p = "/tmp/whisper-cpp.wav"
with open("/tmp/whisper-cpp.wav", "wb") as f:
f.write(wav)
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P=2 rust-whisper", capture_output=True, shell=True)
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P={os.environ.get('P', '2')} rust-whisper", capture_output=True, shell=True)
result = proc.stdout.decode().strip()
if os.environ.get("DEBUG", None):
log("stderr:", proc.stderr.decode().strip())
log("raw transcript:", result)
result = result.replace(">>", "")
result = "".join([i.split("]")[-1] for i in result.split("[")[0]])
result = "".join([i.split(")")[-1] for i in result.split("(")[0]])
for pair in [
("[", "]"),
("(", ")"),
("<", ">"),
("*", "*"),
]:
result = "".join([i.split(pair[1])[-1] for i in result.split(pair[0])[0]])
if os.environ.get("DEBUG", None):
log("annotation-free transcript:", result)
return result
@@ -156,6 +185,8 @@ def _load_dot_notation(v, items):
else:
result.append(subresult)
return result
elif k == "KEYS":
v = [k for k in v]
else:
if isinstance(v, list):
v = v[int(k)]
@@ -200,8 +231,15 @@ class Reactor(threading.Thread):
def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1])
return ["".join(i.strip().lower().split()) for i in v if i]
if os.environ.get("DEBUG", None):
log(f'opened {p.split("@")[0]} and got {v}')
result = []
for to_find in [i for i in p.split("@")[-1].split(",") if i]:
if os.environ.get("DEBUG", None):
log(f'finding {to_find} in {v}')
v2 = load_dot_notation(v, to_find)
result.extend(["".join(i.strip().lower().split()) for i in v2 if i])
return result
load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file
else:
@@ -230,13 +268,23 @@ class Reactor(threading.Thread):
log("Reactor.run: stop")
def handle(self, text):
try:
self._handle(text)
except Exception:
pass
def _handle(self, text):
hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}")
log(f"seeking {hotwords} in {text}. $HOTWORDS={os.environ.get('HOTWORDS', None)}")
if not hotwords:
if not os.environ.get("HOTWORDS", None):
print(text)
if os.environ.get("DEBUG", None):
log(f"HOTWORDS is False; {text}")
print(text, flush=True)
else:
if os.environ.get("DEBUG", None):
log(f"HOTWORDS is True; {text}")
log(text)
return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
@@ -256,7 +304,7 @@ class Actor(threading.Thread):
self.pid = int(environ["SIGUSR2"])
self.handle = self.handle_signal
elif os.environ.get("URL", ""):
self.url = environ["URL"]
self.url = os.environ["URL"]
self.handle = self.handle_url
self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i]
self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}')
@@ -276,7 +324,7 @@ class Actor(threading.Thread):
def handle_stdout(self, hotword, context):
log(context)
print(hotword)
print(hotword, flush=True)
def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context)
@@ -295,9 +343,12 @@ class Actor(threading.Thread):
body = self.body
body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context)
url = self.url
url = url.replace("{{hotword}}", hotword)
url = url.replace("{{context}}", context)
if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body)
requests.post(self.url, headers=headers, data=body)
log("POST", url, headers, body)
requests.post(url, headers=headers, data=body)
except Exception as e:
log("Actor.handle_url:", e)

View File

@@ -2,3 +2,4 @@ git+https://github.com/openai/whisper.git
soundfile
PyAudio
SpeechRecognition
PyYAML

19
hotwords/transcript.sh Normal file
View File

@@ -0,0 +1,19 @@
#! /bin/bash
set -euo pipefail
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
if ! which rust-whisper-baked; then
(
set -euo pipefail
cd ../rust-whisper-baked
cargo install --path .
)
fi >&2
cat <<EOF
rust-whisper-baked --stream-device 'BlackHole 2ch' --stream-step 30 --stream-retain 1 --stream-{head,tail}=0.25 --threads 9 2> /dev/null
| tee -a "$HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
| tee -a "$HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"
EOF

939
listen-lib/Cargo.lock generated Normal file
View File

@@ -0,0 +1,939 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]]
name = "alsa"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2562ad8dcf0f789f65c6fdaad8a8a9708ed6b488e649da28c01656ad66b8b47"
dependencies = [
"alsa-sys",
"bitflags 1.3.2",
"libc",
"nix",
]
[[package]]
name = "alsa-sys"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db8fee663d06c4e303404ef5f40488a53e062f89ba8bfed81f42325aafad1527"
dependencies = [
"libc",
"pkg-config",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bindgen"
version = "0.69.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ffcebc3849946a7170a05992aac39da343a90676ab392c51a4280981d6379c2"
dependencies = [
"bitflags 2.4.1",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"peeking_take_while",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn 2.0.41",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
[[package]]
name = "bumpalo"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
[[package]]
name = "bytes"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"jobserver",
"libc",
]
[[package]]
name = "cesu8"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clang-sys"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "combine"
version = "4.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
dependencies = [
"bytes",
"memchr",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "coreaudio-rs"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "321077172d79c662f64f5071a03120748d5bb652f5231570141be24cfcd2bace"
dependencies = [
"bitflags 1.3.2",
"core-foundation-sys",
"coreaudio-sys",
]
[[package]]
name = "coreaudio-sys"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3120ebb80a9de008e638ad833d4127d50ea3d3a960ea23ea69bc66d9358a028"
dependencies = [
"bindgen",
]
[[package]]
name = "cpal"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d959d90e938c5493000514b446987c07aed46c668faaa7d34d6c7a67b1a578c"
dependencies = [
"alsa",
"core-foundation-sys",
"coreaudio-rs",
"dasp_sample",
"jni 0.19.0",
"js-sys",
"libc",
"mach2",
"ndk",
"ndk-context",
"oboe",
"once_cell",
"parking_lot",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
"windows",
]
[[package]]
name = "dasp_sample"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c87e182de0887fd5361989c677c4e8f5000cd9491d6d563161a8f3a5519fc7f"
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "hashbrown"
version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
[[package]]
name = "indexmap"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "jni"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6df18c2e3db7e453d3c6ac5b3e9d5182664d28788126d39b91f2d1e22b017ec"
dependencies = [
"cesu8",
"combine",
"jni-sys",
"log",
"thiserror",
"walkdir",
]
[[package]]
name = "jni"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "039022cdf4d7b1cf548d31f60ae783138e5fd42013f6271049d7df7afadef96c"
dependencies = [
"cesu8",
"combine",
"jni-sys",
"log",
"thiserror",
"walkdir",
]
[[package]]
name = "jni-sys"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130"
[[package]]
name = "jobserver"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d"
dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.151"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
[[package]]
name = "libloading"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
dependencies = [
"cfg-if",
"winapi",
]
[[package]]
name = "listen-lib"
version = "0.1.0"
dependencies = [
"cpal",
"signal-hook",
]
[[package]]
name = "lock_api"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "mach2"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709"
dependencies = [
"libc",
]
[[package]]
name = "memchr"
version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "ndk"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "451422b7e4718271c8b5b3aadf5adedba43dc76312454b387e98fae0fc951aa0"
dependencies = [
"bitflags 1.3.2",
"jni-sys",
"ndk-sys",
"num_enum",
"raw-window-handle",
"thiserror",
]
[[package]]
name = "ndk-context"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b"
[[package]]
name = "ndk-sys"
version = "0.4.1+23.1.7779620"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3cf2aae958bd232cac5069850591667ad422d263686d75b52a065f9badeee5a3"
dependencies = [
"jni-sys",
]
[[package]]
name = "nix"
version = "0.24.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069"
dependencies = [
"bitflags 1.3.2",
"cfg-if",
"libc",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "num-derive"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "num_enum"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9"
dependencies = [
"num_enum_derive",
]
[[package]]
name = "num_enum_derive"
version = "0.5.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799"
dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "oboe"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8868cc237ee02e2d9618539a23a8d228b9bb3fc2e7a5b11eed3831de77c395d0"
dependencies = [
"jni 0.20.0",
"ndk",
"ndk-context",
"num-derive",
"num-traits",
"oboe-sys",
]
[[package]]
name = "oboe-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f44155e7fb718d3cfddcf70690b2b51ac4412f347cd9e4fbe511abe9cd7b5f2"
dependencies = [
"cc",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets 0.48.5",
]
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pkg-config"
version = "0.3.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
[[package]]
name = "proc-macro-crate"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919"
dependencies = [
"once_cell",
"toml_edit",
]
[[package]]
name = "proc-macro2"
version = "1.0.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "raw-window-handle"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9"
[[package]]
name = "redox_syscall"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "regex"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "shlex"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
[[package]]
name = "signal-hook"
version = "0.3.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801"
dependencies = [
"libc",
"signal-hook-registry",
]
[[package]]
name = "signal-hook-registry"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
dependencies = [
"libc",
]
[[package]]
name = "smallvec"
version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
]
[[package]]
name = "toml_datetime"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
[[package]]
name = "toml_edit"
version = "0.19.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
dependencies = [
"indexmap",
"toml_datetime",
"winnow",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "walkdir"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.41",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac36a15a220124ac510204aec1c3e5db8a22ab06fd6706d881dc6149f8ed9a12"
dependencies = [
"cfg-if",
"js-sys",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f"
[[package]]
name = "web-sys"
version = "0.3.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50c24a44ec86bb68fbecd1b3efed7e85ea5621b39b35ef2766b66cd984f8010f"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdacb41e6a96a052c6cb63a144f24900236121c6f63f4f8219fef5977ecb0c25"
dependencies = [
"windows-targets 0.42.2",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm 0.42.2",
"windows_aarch64_msvc 0.42.2",
"windows_i686_gnu 0.42.2",
"windows_i686_msvc 0.42.2",
"windows_x86_64_gnu 0.42.2",
"windows_x86_64_gnullvm 0.42.2",
"windows_x86_64_msvc 0.42.2",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "winnow"
version = "0.5.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b5c3db89721d50d0e2a673f5043fc4722f76dcc352d7b1ab8b8288bed4ed2c5"
dependencies = [
"memchr",
]

View File

@@ -1,11 +1,10 @@
[package]
name = "rust-whisper"
name = "listen-lib"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
whisper-rs = "0.5"
wav = "1"
tokio = "1.27"
signal-hook = "0.3.17"
cpal = "0.15.2"

135
listen-lib/src/lib.rs Normal file
View File

@@ -0,0 +1,135 @@
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
use std::thread;
pub fn main(callback: impl FnMut(Vec<f32>)) {
new_listener(None).unwrap().listen(callback);
}
pub fn main_with(callback: impl FnMut(Vec<f32>), device_name: String) {
new_listener(Some(device_name)).unwrap().listen(callback);
}
pub fn devices() -> Vec<String> {
match _devices() {
Ok(devices) => devices.
iter().
map(|device| device.name()).
filter(|device_name| device_name.is_ok()).
map(|device_name| device_name.unwrap()).
collect(),
Err(_) => vec![],
}
}
fn _devices() -> Result<Vec<cpal::Device>, String> {
match cpal::default_host().devices() {
Ok(devices) => Ok(devices.filter(|device| {
device.supported_input_configs().unwrap().count() > 0
}).collect()),
Err(msg) => Err(format!("failed to get devices: {}", msg)),
}
}
struct Listener {
device_name: String,
}
fn new_listener(device_name: Option<String>) -> Result<Listener, String> {
let device_name = match device_name {
Some(device_name) => {
let devices: Vec<_> = _devices()?.
iter().
map(|device| device.name()).
filter(|device_name| device_name.is_ok()).
map(|device_name| device_name.unwrap()).
filter(|a_device_name| *a_device_name == device_name).
collect();
match devices.first() {
Some(_) => Ok(device_name),
None => Err(format!("device like {} not found", device_name)),
}
},
None => {
let host = cpal::default_host();
match host.default_input_device() {
Some(device) => {
match device.name() {
Ok(device_name) => Ok(device_name),
Err(msg) => Err(format!("failed to get default input device name: {}", msg)),
}
},
None => Err(format!("failed to get any input device")),
}
},
};
match device_name {
Ok(device_name) => Ok(Listener{device_name: device_name}),
Err(msg) => Err(msg),
}
}
impl Listener {
fn listen(self, mut cb: impl FnMut(Vec<f32>)) {
let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { self._listen(send); });
let mut signals = Signals::new(&[SIGINT]).unwrap();
loop {
match recv.recv_timeout(std::time::Duration::new(1, 0)) {
Ok(msg) => cb(msg),
Err(_) => {
if signals.pending().count() > 0 {
break;
}
},
};
}
}
fn _listen(self, send: std::sync::mpsc::SyncSender<Vec<f32>>) {
let devices = _devices().unwrap();
let devices = devices.iter().
filter(|device| device.name().unwrap() == self.device_name).
collect::<Vec<_>>();
let device = devices.first().unwrap();
let cfg = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.filter(|x| x.min_sample_rate() >= cpal::SampleRate(15_500))
.nth(0)
.unwrap()
.with_max_sample_rate();
let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
let stream = device.build_input_stream(
&cfg.clone().into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let mut downsampled_data = vec![];
for i in 0..(data.len() as f32 / downsample_ratio) as usize {
let mut upsampled = i as f32 * downsample_ratio;
if upsampled > (data.len()-1) as f32 {
upsampled = (data.len()-1) as f32
}
downsampled_data.push(data[upsampled as usize]);
}
match send.try_send(downsampled_data) {
Ok(_) => (),
Err(msg) => eprintln!("failed to ingest audio: {}", msg),
};
},
move |err| {
eprintln!("input error: {}", err)
},
None,
).unwrap();
stream.play().unwrap();
eprintln!("listening on {}", device.name().unwrap());
let mut signals = Signals::new(&[SIGINT]).unwrap();
for sig in signals.forever() {
eprintln!("sig {}", sig);
break;
}
let _ = stream.pause();
}
}

13
models/download_models.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
# Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en" "large-v2"; do
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
done
test -f ./ggml-distil-medium.en.bin || wget https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/ggml-medium-32-2.en.bin?download=true -O ./ggml-distil-medium.en.bin
test -f ./ggml-distil-large-v2.bin || wget https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/ggml-large-32-2.en.bin?download=true -O ./ggml-distil-large-v2.bin

7
models/testme/Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "testme"
version = "0.1.0"

8
models/testme/Cargo.toml Normal file
View File

@@ -0,0 +1,8 @@
[package]
name = "testme"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View File

@@ -0,0 +1,4 @@
fn main() {
let bytes = include_bytes!("./test.txt");
println!("{}", String::from_utf8_lossy(bytes));
}

1
models/testme/src/test.txt Symbolic link
View File

@@ -0,0 +1 @@
../../ggml-tiny.en.bin

857
rust-whisper-baked-lib-wasm/Cargo.lock generated Normal file
View File

@@ -0,0 +1,857 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "addr2line"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
dependencies = [
"gimli",
]
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]]
name = "android-tzdata"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
[[package]]
name = "anstyle-parse"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "backtrace"
version = "0.3.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
dependencies = [
"addr2line",
"cc",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
]
[[package]]
name = "bindgen"
version = "0.68.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"log",
"peeking_take_while",
"prettyplease",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
"which",
]
[[package]]
name = "bitflags"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
[[package]]
name = "bumpalo"
version = "3.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-targets 0.48.5",
]
[[package]]
name = "clang-sys"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "clap"
version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "cmake"
version = "0.1.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
dependencies = [
"cc",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "core-foundation-sys"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "errno"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
dependencies = [
"libc",
"windows-sys",
]
[[package]]
name = "fs_extra"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "gimli"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "home"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [
"windows-sys",
]
[[package]]
name = "iana-time-zone"
version = "0.1.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "js-sys"
version = "0.3.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.151"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
[[package]]
name = "libloading"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
dependencies = [
"cfg-if",
"winapi",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "memchr"
version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
dependencies = [
"adler",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "object"
version = "0.32.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pin-project-lite"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
[[package]]
name = "prettyplease"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
dependencies = [
"proc-macro2",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "riff"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]]
name = "rust-whisper-baked-lib"
version = "0.1.0"
dependencies = [
"rust-whisper-lib",
]
[[package]]
name = "rust-whisper-baked-lib-wasm"
version = "0.1.0"
dependencies = [
"rust-whisper-baked-lib",
"rust-whisper-lib",
"wasm-bindgen",
]
[[package]]
name = "rust-whisper-lib"
version = "0.1.0"
dependencies = [
"byteorder",
"chrono",
"clap",
"tokio",
"wav",
"whisper-rs",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustix"
version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]]
name = "shlex"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "2.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.35.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104"
dependencies = [
"backtrace",
"pin-project-lite",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "wasm-bindgen"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f"
[[package]]
name = "wav"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a65e199c799848b4f997072aa4d673c034f80f40191f97fe2f0a23f410be1609"
dependencies = [
"riff",
]
[[package]]
name = "which"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
dependencies = [
"either",
"home",
"once_cell",
"rustix",
]
[[package]]
name = "whisper-rs"
version = "0.8.0"
dependencies = [
"whisper-rs-sys",
]
[[package]]
name = "whisper-rs-sys"
version = "0.7.3"
dependencies = [
"bindgen",
"cfg-if",
"cmake",
"fs_extra",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-core"
version = "0.51.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm 0.48.5",
"windows_aarch64_msvc 0.48.5",
"windows_i686_gnu 0.48.5",
"windows_i686_msvc 0.48.5",
"windows_x86_64_gnu 0.48.5",
"windows_x86_64_gnullvm 0.48.5",
"windows_x86_64_msvc 0.48.5",
]
[[package]]
name = "windows-targets"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm 0.52.0",
"windows_aarch64_msvc 0.52.0",
"windows_i686_gnu 0.52.0",
"windows_i686_msvc 0.52.0",
"windows_x86_64_gnu 0.52.0",
"windows_x86_64_gnullvm 0.52.0",
"windows_x86_64_msvc 0.52.0",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"

View File

@@ -0,0 +1,12 @@
[package]
name = "rust-whisper-baked-lib-wasm"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
rust-whisper-lib = { path = "../rust-whisper-lib" }
rust-whisper-baked-lib = { path = "../rust-whisper-baked-lib" }
wasm-bindgen = "0.2.89"

View File

@@ -0,0 +1,5 @@
#! /bin/bash
echo https://developer.mozilla.org/en-US/docs/WebAssembly/Rust_to_Wasm
cargo install wasm-pack
wasm-pack build --target web

View File

@@ -0,0 +1,43 @@
use wasm_bindgen::prelude::*;
use rust_whisper_lib;
use rust_whisper_baked_lib;
#[wasm_bindgen]
extern {
pub fn whisper_data() -> Vec<f32>;
pub fn whisper_on_success(s: String);
pub fn whisper_on_error(s: String);
}
#[wasm_bindgen]
pub fn new_transcriber(
stream_step: Option<u64>,
stream_retain: Option<f32>,
stream_head: Option<f32>,
stream_tail: Option<f32>,
) {
let flags = rust_whisper_lib::Flags {
model_path: None,
model_buffer: None,
threads: 4,
stream_step: stream_step.unwrap_or(5),
stream_retain: stream_retain.unwrap_or(1.0),
stream_head: stream_head.unwrap_or(0.5),
stream_tail: stream_tail.unwrap_or(0.5),
wav: None,
debug: false,
stream_device: None,
};
let (send, recv) = std::sync::mpsc::sync_channel(100);
rust_whisper_baked_lib::channel(flags, |result| {
match result {
Ok(msg) => whisper_on_success(msg.to_string()),
Err(msg) => whisper_on_error(msg),
};
}, recv);
loop {
let data = whisper_data();
send.send(data).unwrap();
}
}

555
rust-whisper-baked-lib/Cargo.lock generated Normal file
View File

@@ -0,0 +1,555 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
[[package]]
name = "anstyle-parse"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "bindgen"
version = "0.68.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"log",
"peeking_take_while",
"prettyplease",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
"which",
]
[[package]]
name = "bitflags"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
[[package]]
name = "cc"
version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"libc",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clang-sys"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "clap"
version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "cmake"
version = "0.1.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
dependencies = [
"cc",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "errno"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
dependencies = [
"libc",
"windows-sys",
]
[[package]]
name = "fs_extra"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "home"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [
"windows-sys",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.151"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
[[package]]
name = "libloading"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
dependencies = [
"cfg-if",
"winapi",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "memchr"
version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "prettyplease"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
dependencies = [
"proc-macro2",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "riff"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]]
name = "rust-whisper-baked-lib"
version = "0.1.0"
dependencies = [
"rust-whisper-lib",
]
[[package]]
name = "rust-whisper-lib"
version = "0.1.0"
dependencies = [
"clap",
"wav",
"whisper-rs",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustix"
version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]]
name = "shlex"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "2.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "wav"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a65e199c799848b4f997072aa4d673c034f80f40191f97fe2f0a23f410be1609"
dependencies = [
"riff",
]
[[package]]
name = "which"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
dependencies = [
"either",
"home",
"once_cell",
"rustix",
]
[[package]]
name = "whisper-rs"
version = "0.8.0"
dependencies = [
"whisper-rs-sys",
]
[[package]]
name = "whisper-rs-sys"
version = "0.7.3"
dependencies = [
"bindgen",
"cfg-if",
"cmake",
"fs_extra",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"

View File

@@ -0,0 +1,9 @@
[package]
name = "rust-whisper-baked-lib"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rust-whisper-lib = { path = "../rust-whisper-lib" }

View File

@@ -0,0 +1,33 @@
use rust_whisper_lib;
pub fn channel<F>(
mut flags: rust_whisper_lib::Flags,
handler_fn: F,
stream: std::sync::mpsc::Receiver<Vec<f32>>,
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
flags.model_path = None;
flags.model_buffer = Some(include_bytes!("../../models/ggml-tiny.en.bin").to_vec());
rust_whisper_lib::channel(flags.clone(), handler_fn, stream);
}
pub fn wav<F>(
mut flags: rust_whisper_lib::Flags,
handler_fn: F
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
flags.model_path = None;
flags.model_buffer = Some(include_bytes!("../../models/ggml-distil-medium.en.bin").to_vec());
rust_whisper_lib::wav(flags.clone(), handler_fn, flags.wav.unwrap());
}
pub fn wav_channel<F>(
mut flags: rust_whisper_lib::Flags,
handler_fn: F
) where F: FnMut(Result<rust_whisper_lib::Transcribed, String>) + Send + 'static {
flags.model_path = None;
flags.model_buffer = Some(include_bytes!("../../models/ggml-base.en.bin").to_vec());
rust_whisper_lib::wav_channel(flags, handler_fn);
}
pub fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
rust_whisper_lib::f32_from_wav_file(path)
}

1279
rust-whisper-baked/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
[package]
name = "rust-whisper-baked"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rust-whisper-lib = { path = "../rust-whisper-lib" }
rust-whisper-baked-lib = { path = "../rust-whisper-baked-lib" }
listen-lib = { path = "../listen-lib" }
clap = { version = "4.4.10", features = ["derive"] }

View File

@@ -0,0 +1,15 @@
#! /bin/bash
#RUSTFLAGS="-Clink-args=-lstd++" \
echo 0. brew install blackhole-2ch / https://github.com/ExistentialAudio/BlackHole
echo 1. Audio Midi Setup / https://existential.audio/howto/StreamFromGarageBandToZoom.php
echo 2. Multi-Output Device / primary External Headphones, Drift Correction for BlackHole 2ch
echo 3. Aggregate Device / clock MacBook Pro Microphone, Drift Correction BlackHole 2ch
echo 4. Discord outputs to Multi-Output Device
echo 5. apps read Aggregate Device
export C_INCLUDE_PATH="$C_INCLUDE_PATH:$PWD/.."
export LIBRARY_PATH="$LIBRARY_PATH:$PWD/.."
cargo "$@"

View File

@@ -0,0 +1,147 @@
use rust_whisper_lib;
use rust_whisper_baked_lib;
use clap::Parser;
use listen_lib;
use std::thread;
fn main() {
let flags = rust_whisper_lib::Flags::parse();
match flags.wav.clone() {
Some(_) => wav_channel(flags),
None => channel(flags),
};
}
fn wav_channel(flags: rust_whisper_lib::Flags) {
let mut w = new_destutterer();
rust_whisper_baked_lib::wav_channel(
flags.clone(),
move |result: Result<rust_whisper_lib::Transcribed, String>| {
match result {
Ok(transcribed) => {
let s = w.step(transcribed.to_string());
println!("{}", s);
},
Err(msg) => { eprintln!("error: {}", msg); },
};
},
);
}
fn wav(flags: rust_whisper_lib::Flags, _path: String) {
let mut w = new_destutterer();
rust_whisper_baked_lib::wav(flags,
move |result: Result<rust_whisper_lib::Transcribed, String>| {
match result {
Ok(transcribed) => {
let s = w.step(transcribed.to_string());
println!("{}", s);
},
Err(msg) => { eprintln!("error: {}", msg); },
};
},
);
}
fn channel(flags: rust_whisper_lib::Flags) {
let (send, recv) = std::sync::mpsc::sync_channel(100);
eprintln!("rust whisper baked lib channel...");
thread::spawn(move || {
rust_whisper_baked_lib::channel(
flags.clone(),
|result: Result<rust_whisper_lib::Transcribed, String>| {
match result {
Ok(transcribed) => { println!("{}", transcribed.to_string()); },
Err(msg) => { eprintln!("error: {}", msg); },
};
},
recv,
);
});
eprintln!("listen lib main...");
let flags = rust_whisper_lib::Flags::parse();
match flags.stream_device {
Some(device_name) => {
if device_name == "" {
for device in listen_lib::devices() {
eprintln!("{}", device);
}
} else {
listen_lib::main_with(|data| {
send.send(data).unwrap();
}, device_name);
}
},
None => {
listen_lib::main(|data| {
send.send(data).unwrap();
});
}
}
eprintln!("/listen lib main...");
}
struct Destutterer {
prev: Option<String>,
}
fn new_destutterer() -> Destutterer {
Destutterer{prev: None}
}
impl Destutterer {
fn step(&mut self, next: String) -> String {
let next = next.trim().to_string();
if next.len() == 0 {
return next;
}
match &self.prev {
None => {
self.prev = Some(next.clone());
next
},
Some(prev) => {
let without_trailing_punctuation = {
let mut next = next.clone();
while next.ends_with("?") || next.ends_with(".") {
next = next[..next.len()-1].to_string();
}
next
};
let trailing_punctuation = next[without_trailing_punctuation.len() ..].to_string();
let next = without_trailing_punctuation;
let next = {
let mut n = prev.len().clamp(0, next.len());
while n > 0 {
if prev[prev.len() - n..] == next[..n] {
break;
}
n -= 1;
}
next[n..].to_string()
};
if next.len() == 0 {
return "".to_string();
}
self.prev = Some(next.clone());
next + &trailing_punctuation
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_destutterer() {
let mut w = new_destutterer();
assert_eq!("abcde".to_string(), w.step("abcde".to_string()));
assert_eq!("fg".to_string(), w.step("cdefg".to_string()));
assert_eq!("hij".to_string(), w.step("fghij".to_string()));
assert_eq!("fghij".to_string(), w.step("fghij".to_string()));
}
}

View File

@@ -3,16 +3,67 @@
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
[[package]]
name = "anstyle-parse"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "bindgen"
version = "0.64.0"
version = "0.68.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4"
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
dependencies = [
"bitflags",
"cexpr",
@@ -21,6 +72,7 @@ dependencies = [
"lazycell",
"log",
"peeking_take_while",
"prettyplease",
"proc-macro2",
"quote",
"regex",
@@ -32,9 +84,15 @@ dependencies = [
[[package]]
name = "bitflags"
version = "1.3.2"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cexpr"
@@ -63,10 +121,71 @@ dependencies = [
]
[[package]]
name = "either"
version = "1.8.1"
name = "clap"
version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "cmake"
version = "0.1.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
dependencies = [
"cc",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "fs_extra"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
[[package]]
name = "glob"
@@ -74,6 +193,12 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "lazy_static"
version = "1.4.0"
@@ -88,9 +213,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.140"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "libloading"
@@ -104,12 +229,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.17"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]]
name = "memchr"
@@ -135,9 +257,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.17.1"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "peeking_take_while"
@@ -146,43 +268,61 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pin-project-lite"
version = "0.2.9"
name = "prettyplease"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
dependencies = [
"proc-macro2",
"syn",
]
[[package]]
name = "proc-macro2"
version = "1.0.54"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.26"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.7.3"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "riff"
@@ -191,10 +331,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]]
name = "rust-whisper"
name = "rust-whisper-lib"
version = "0.1.0"
dependencies = [
"tokio",
"clap",
"wav",
"whisper-rs",
]
@@ -207,15 +347,21 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "shlex"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "1.0.109"
version = "2.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2"
dependencies = [
"proc-macro2",
"quote",
@@ -223,21 +369,16 @@ dependencies = [
]
[[package]]
name = "tokio"
version = "1.27.0"
name = "unicode-ident"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001"
dependencies = [
"autocfg",
"pin-project-lite",
"windows-sys",
]
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]]
name = "unicode-ident"
version = "1.0.8"
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "wav"
@@ -261,20 +402,19 @@ dependencies = [
[[package]]
name = "whisper-rs"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7e1b9b003aa3285a0e4469219566266aa1d51ced1be38587251a4f713a1677"
version = "0.8.0"
dependencies = [
"whisper-rs-sys",
]
[[package]]
name = "whisper-rs-sys"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97a389dc665c7354ba6b1982850d4ba05b862907e535708ebdec92cbd9c599e8"
version = "0.7.3"
dependencies = [
"bindgen",
"cfg-if",
"cmake",
"fs_extra",
]
[[package]]
@@ -301,18 +441,18 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.45.0"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
@@ -325,42 +465,42 @@ dependencies = [
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"

View File

@@ -0,0 +1,11 @@
[package]
name = "rust-whisper-lib"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
whisper-rs = { path = "../gitea-whisper-rs", version = "0.8.0" }
clap = { version = "4.4.10", features = ["derive"] }
wav = "1"

415
rust-whisper-lib/src/lib.rs Normal file
View File

@@ -0,0 +1,415 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
use clap::Parser;
use std::thread;
use std::fs::File;
use std::io::Write;
#[derive(Parser, Debug, Clone)]
pub struct Flags {
#[arg(long, default_value = "../models/ggml-tiny.en.bin")]
pub model_path: Option<String>,
#[arg(long, default_value = None)]
pub model_buffer: Option<Vec<u8>>,
#[arg(long, default_value = "8")]
pub threads: i32,
#[arg(long, default_value = "5")]
pub stream_step: u64,
#[arg(long, default_value = "0.6")]
pub stream_retain: f32,
#[arg(long, default_value = "0.3")]
pub stream_head: f32,
#[arg(long, default_value = "0.3")]
pub stream_tail: f32,
#[arg(long, default_value = "false")]
pub debug: bool,
#[arg(long, default_value = None)]
pub wav: Option<String>,
#[arg(long, default_value = None)]
pub stream_device: Option<String>,
}
pub fn wav<F>(flags: Flags, handler_fn: F, wav_path: String) where F: FnMut(Result<Transcribed, String>) + Send + 'static {
let w = new_service(
flags.model_path,
flags.model_buffer,
flags.threads,
flags.stream_head,
flags.stream_tail,
handler_fn,
).unwrap();
w.transcribe(&f32_from_wav_file(&wav_path).unwrap())
}
pub fn f32_from_wav_file(path: &String) -> Result<Vec<f32>, String> {
let f = std::fs::File::open(path);
if let Some(err) = f.as_ref().err() {
return Err(format!("failed to open wav file: {}", err));
}
let wav_read = wav::read(&mut f.unwrap());
if let Some(err) = wav_read.as_ref().err() {
return Err(format!("failed to parse wav file: {}", err));
}
let (header, data) = wav_read.unwrap();
if header.channel_count != 1 {
return Err("!= 1 channel".to_string());
}
if header.sampling_rate != 16_000 {
return Err("!= 16_000 hz".to_string());
}
match data.as_sixteen() {
Some(data16) => {
let mut floats = Vec::with_capacity(data16.len());
for sample in data16 {
floats.push(*sample as f32 / 32768.0);
}
Ok(floats)
},
None => Err(format!("couldnt translate wav to 16s")),
}
}
pub fn wav_channel<F>(flags: Flags, handler_fn: F) where F: FnMut(Result<Transcribed, String>) + Send + 'static {
let path = flags.wav.as_ref().unwrap();
let mut audio = f32_from_wav_file(&path).unwrap();
let mut iter = vec![];
let n = audio.len() / match audio.len() % 100 {
0 => 100,
_ => 99,
};
for _ in 0..100 {
iter.push(audio.drain(0..n.clamp(0, audio.len())).collect());
}
let (fin_send, fin_recv) = std::sync::mpsc::sync_channel::<Option<i32>>(1);
channel_and_close(flags.clone(), handler_fn, iter, move || { fin_send.send(None).unwrap(); });
match fin_recv.recv() {
Ok(_) => {},
Err(x) => panic!("failed to receive: {}", x),
};
}
pub fn channel<F, I>(flags: Flags, handler_fn: F, stream: I) where F: FnMut(Result<Transcribed, String>) + Send + 'static, I: IntoIterator<Item = Vec<f32>> {
channel_and_close(flags, handler_fn, stream, || {});
}
fn channel_and_close<F, I, G>(flags: Flags, handler_fn: F, stream: I, mut close_fn: G) where F: FnMut(Result<Transcribed, String>) + Send + 'static, I: IntoIterator<Item = Vec<f32>>, G: FnMut() + Send + 'static {
let w = new_service(
flags.model_path,
flags.model_buffer,
flags.threads,
flags.stream_head,
flags.stream_tail,
handler_fn,
).unwrap();
let stream_retain = (flags.stream_retain * 16_000.0) as usize;
match &flags.debug {
true => { File::create("/tmp/page.rawf32audio").unwrap(); },
false => {},
};
let mut buffer = vec![];
for data in stream {
data.iter().for_each(|x| buffer.push(*x));
if buffer.len() >= (flags.stream_step * 16_000) as usize {
w.transcribe_async(&buffer).unwrap();
match &flags.debug {
true => {
let mut f = File::options().append(true).open("/tmp/page.rawf32audio").unwrap();
let mut wav_data = vec![];
for i in buffer.iter() {
for j in i.to_le_bytes() {
wav_data.push(j);
}
}
f.write_all(wav_data.as_slice()).unwrap();
},
false => {},
};
for i in 0..stream_retain {
buffer[i] = buffer[buffer.len() - stream_retain + i];
}
buffer.truncate(stream_retain);
}
}
if buffer.len() > 0 {
w.transcribe(&buffer);
}
close_fn();
}
struct Service {
jobs: std::sync::mpsc::SyncSender<ATranscribe>,
}
fn new_service<F>(model_path: Option<String>, model_buffer: Option<Vec<u8>>, threads: i32, stream_head: f32, stream_tail: f32, handler_fn: F) -> Result<Service, String> where F: FnMut(Result<Transcribed, String>) + Send + 'static {
match new_engine(model_path, model_buffer, threads) {
Ok(engine) => {
let mut whisper = new_whisper_impl(engine, stream_head, stream_tail, handler_fn);
let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { whisper.transcribe_asyncs(recv); });
Ok(Service{jobs: send})
},
Err(msg) => Err(format!("failed to initialize engine: {}", msg)),
}
}
impl Service {
fn transcribe(&self, data: &Vec<f32>) {
let (send, recv) = std::sync::mpsc::sync_channel(0);
self._transcribe_async(data, Some(send)).unwrap();
recv.recv().unwrap();
}
fn transcribe_async(&self, data: &Vec<f32>) -> Result<(), String> {
self._transcribe_async(data, None)
}
fn _transcribe_async(&self, data: &Vec<f32>, ack: Option<std::sync::mpsc::SyncSender<bool>>) -> Result<(), String> {
match self.jobs.try_send(ATranscribe{
data: data.clone().to_vec(),
ack: ack,
}) {
Ok(_) => Ok(()),
Err(msg) => Err(format!("failed to enqueue transcription: {}", msg)),
}
}
}
struct Impl {
engine: Engine,
stream_head: f32,
stream_tail: f32,
handler_fn: Option<Box<dyn FnMut(Result<Transcribed, String>) + Send + 'static>>
}
fn new_whisper_impl<F>(engine: Engine, stream_head: f32, stream_tail: f32, handler_fn: F) -> Impl where F: FnMut(Result<Transcribed, String>) + Send + 'static {
Impl {
engine: engine,
stream_head: stream_head,
stream_tail: stream_tail,
handler_fn: Some(Box::new(handler_fn)),
}
}
impl Impl {
fn transcribe_asyncs(&mut self, recv: std::sync::mpsc::Receiver<ATranscribe>) {
loop {
match recv.recv() {
Ok(job) => {
let result = self.transcribe(&job).is_ok();
match job.ack {
Some(ack) => {
ack.send(result).unwrap();
},
None => (),
};
}
Err(_) => return,
};
}
}
fn transcribe(&mut self, a_whisper: &ATranscribe) -> Result<(), ()> {
match self.engine.transcribe(&a_whisper.data) {
Ok(result) => {
self.on_success(&result);
Ok(())
},
Err(msg) => {
self.on_error(msg.to_string());
Err(())
},
}
}
fn on_success(&mut self, whispered: &Transcribed) {
let result = whispered
.after(&(self.stream_head * 100.0))
.before(&(self.stream_tail * 100.0));
if result.to_string().trim().len() > 0 {
(self.handler_fn.as_mut().unwrap())(Ok(result));
}
}
fn on_error(&mut self, msg: String) {
(self.handler_fn.as_mut().unwrap())(Err(format!("failed to transcribe: {}", &msg)));
}
}
struct Engine {
ctx: WhisperContext,
threads: i32,
}
fn new_engine(model_path: Option<String>, model_buffer: Option<Vec<u8>>, threads: i32) -> Result<Engine, String> {
let whisper_context_result = match model_path {
Some(model_path) => WhisperContext::new(&model_path),
None => WhisperContext::new_from_buffer(&model_buffer.unwrap()),
};
match whisper_context_result {
Ok(ctx) => Ok(Engine{ctx: ctx, threads: threads}),
Err(msg) => Err(format!("failed to load model: {}", msg)),
}
}
impl Engine {
fn transcribe(&self, data: &Vec<f32>) -> Result<Transcribed, String> {
match self._transcribe(data) {
Ok(transcribed) => Ok(transcribed),
Err(msg) => Err(format!("{}", msg)),
}
}
fn _transcribe(&self, data: &Vec<f32>) -> Result<Transcribed, WhisperError> {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_no_context(true);
params.set_n_threads(self.threads);
params.set_translate(false);
params.set_detect_language(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let mut state = self.ctx.create_state()?;
state.full(params, &data[..])?;
let mut result = new_whispered();
let num_segments = state.full_n_segments()?;
for i in 0..num_segments {
let data = state.full_get_segment_text(i)?;
let start = state.full_get_segment_t0(i)?;
let stop = state.full_get_segment_t1(i)?;
result.push(data, start, stop);
}
Ok(result)
}
}
struct ATranscribe {
data: Vec<f32>,
ack: Option<std::sync::mpsc::SyncSender<bool>>,
}
#[derive(Clone, Debug)]
pub struct Transcribed {
data: Vec<ATranscribed>,
}
#[derive(Clone, Debug)]
struct ATranscribed {
pub data: String,
pub offset: i64,
pub length: i64,
}
fn new_whispered() -> Transcribed {
Transcribed{data: vec![]}
}
fn new_a_whispered(data: String, start: i64, stop: i64) -> ATranscribed {
ATranscribed{
data: data,
offset: start.clone(),
length: stop - start,
}
}
impl Transcribed {
pub fn to_string(&self) -> String {
let mut result = "".to_string();
for i in 0..self.data.len() {
result = format!("{} {}", result, &self.data[i].data);
}
result
}
fn after(&self, t: &f32) -> Transcribed {
let mut result = new_whispered();
self.data
.iter()
.filter(|x| x.offset as f32 >= *t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn before(&self, t: &f32) -> Transcribed {
let mut result = new_whispered();
let end = match self.data.iter().map(|x| x.offset + x.length).max() {
Some(x) => x,
None => 1,
};
let t = (end as f32) - *t;
self.data
.iter()
.filter(|x| ((x.offset) as f32) <= t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn push(&mut self, data: String, start: i64, stop: i64) {
let words: Vec<_> = data.split_whitespace().collect();
let per_word = (stop - start) / (words.len() as i64);
for i in 0..words.len() {
let start = (i as i64) * per_word;
let stop = start.clone() + per_word;
self.data.push(new_a_whispered(words[i].to_string(), start, stop));
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_transcribe_tiny_jfk_wav_whisper_rs_wav_channel() {
wav_channel(
Flags {
model_path: None,
model_buffer: Some(include_bytes!("../../models/ggml-tiny.en.bin").to_vec()),
threads: 8,
stream_step: 30,
stream_retain: 0.0,
stream_head: 0.0,
stream_tail: 0.0,
wav: Some("../gitea-whisper-rs/sys/whisper.cpp/bindings/go/samples/jfk.wav".to_string()),
debug: false,
stream_device: None,
},
move | result | {
assert!(result.is_ok());
assert_eq!(result.unwrap().to_string(), " And so my fellow Americans ask not what your country can do for you ask what you can do for your country.");
},
);
}
#[test]
fn test_transcribe_tiny_jfk_wav_whisper_rs() {
wav(
Flags {
model_path: None,
model_buffer: Some(include_bytes!("../../models/ggml-tiny.en.bin").to_vec()),
threads: 8,
stream_step: 0,
stream_retain: 0.0,
stream_head: 0.0,
stream_tail: 0.0,
wav: Some("../gitea-whisper-rs/sys/whisper.cpp/bindings/go/samples/jfk.wav".to_string()),
debug: false,
stream_device: None,
},
| result | {
assert!(result.is_ok());
assert_eq!(result.unwrap().to_string(), " And so my fellow Americans ask not what your country can do for you ask what you can do for your country.");
},
"../gitea-whisper-rs/sys/whisper.cpp/bindings/go/samples/jfk.wav".to_string(),
);
}
}

View File

@@ -0,0 +1 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="List of all items in this crate"><title>List of all items in this crate</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Bold-a2c9cd1067f8b328.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../static.files/rustdoc-fa3bb1812debf86c.css"><meta name="rustdoc-vars" data-root-path="../" data-static-root-path="../static.files/" data-current-crate="rust_whisper_lib" data-themes="" data-resource-suffix="" data-rustdoc-version="1.74.0 (79e9716c9 2023-11-13)" data-channel="1.74.0" data-search-js="search-8be46b629f5f14a8.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="../static.files/main-c5bd66d33317d69f.js"></script><noscript><link rel="stylesheet" href="../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc mod"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="mobile-topbar"><button class="sidebar-menu-toggle">&#9776;</button><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a></nav><nav class="sidebar"><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a><h2 class="location"><a href="#">Crate rust_whisper_lib</a></h2><div class="sidebar-elems"><section><ul class="block"><li><a href="#structs">Structs</a></li><li><a href="#functions">Functions</a></li></ul></section></div></nav><main><div class="width-limiter"><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press S to search, ? for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><h1>List of all items</h1><h3 id="structs">Structs</h3><ul class="all-items"><li><a href="struct.Flags.html">Flags</a></li><li><a href="struct.Transcribed.html">Transcribed</a></li></ul><h3 id="functions">Functions</h3><ul class="all-items"><li><a href="fn.channel.html">channel</a></li><li><a href="fn.wav.html">wav</a></li></ul></section></div></main></body></html>

View File

@@ -0,0 +1 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="API documentation for the Rust `rust_whisper_lib` crate."><title>rust_whisper_lib - Rust</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Bold-a2c9cd1067f8b328.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../static.files/rustdoc-fa3bb1812debf86c.css"><meta name="rustdoc-vars" data-root-path="../" data-static-root-path="../static.files/" data-current-crate="rust_whisper_lib" data-themes="" data-resource-suffix="" data-rustdoc-version="1.74.0 (79e9716c9 2023-11-13)" data-channel="1.74.0" data-search-js="search-8be46b629f5f14a8.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="../crates.js"></script><script defer src="../static.files/main-c5bd66d33317d69f.js"></script><noscript><link rel="stylesheet" href="../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc mod crate"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="mobile-topbar"><button class="sidebar-menu-toggle">&#9776;</button><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a></nav><nav class="sidebar"><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a><h2 class="location"><a href="#">Crate rust_whisper_lib</a></h2><div class="sidebar-elems"><ul class="block"><li class="version">Version 0.1.0</li><li><a id="all-types" href="all.html">All Items</a></li></ul><section><ul class="block"><li><a href="#structs">Structs</a></li><li><a href="#functions">Functions</a></li></ul></section></div></nav><main><div class="width-limiter"><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press S to search, ? for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><div class="main-heading"><h1>Crate <a class="mod" href="#">rust_whisper_lib</a><button id="copy-path" title="Copy item path to clipboard"><img src="../static.files/clipboard-7571035ce49a181d.svg" width="19" height="18" alt="Copy item path"></button></h1><span class="out-of-band"><a class="src" href="../src/rust_whisper_lib/lib.rs.html#1-352">source</a> · <button id="toggle-all-docs" title="collapse all docs">[<span>&#x2212;</span>]</button></span></div><h2 id="structs" class="small-section-header"><a href="#structs">Structs</a></h2><ul class="item-table"><li><div class="item-name"><a class="struct" href="struct.Flags.html" title="struct rust_whisper_lib::Flags">Flags</a></div></li><li><div class="item-name"><a class="struct" href="struct.Transcribed.html" title="struct rust_whisper_lib::Transcribed">Transcribed</a></div></li></ul><h2 id="functions" class="small-section-header"><a href="#functions">Functions</a></h2><ul class="item-table"><li><div class="item-name"><a class="fn" href="fn.channel.html" title="fn rust_whisper_lib::channel">channel</a></div></li><li><div class="item-name"><a class="fn" href="fn.wav.html" title="fn rust_whisper_lib::wav">wav</a></div></li></ul></section></div></main></body></html>

View File

@@ -0,0 +1 @@
window.SIDEBAR_ITEMS = {"fn":["channel","wav"],"struct":["Flags","Transcribed"]};

View File

@@ -1,99 +0,0 @@
#! /bin/bash
echo https://github.com/seasalt-ai/snowboy
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
set -e
set -o pipefail
if [ ! -d ./snowboy.git.d ]; then
git clone https://github.com/seasalt-ai/snowboy snowboy.git.d
fi
timeout 2 docker version &> /dev/null
if ! docker images | grep snowboy-pmdl.*latest &> /dev/null; then
pushd snowboy.git.d
docker build -t snowboy-pmdl:latest .
popd
fi
export HOTWORD="${HOTWORD:-${TRAIN:-default_hotword}}"
if [ -n "$TRAIN" ] || [ ! -d ./model ] || [ ! -f ./model/$HOTWORD.pmdl ]; then
mkdir -p model
pushd model
rm -f ./record{1,2,3}.wav || true
echo "record 3 instances of '$HOTWORD'" >&2
for i in 1 2 3; do
read -p "[$i/3] ready? you get 3 seconds."
(
timeout 3 rec \
-r 16000 \
-c 1 \
-b 16 \
-e signed-integer \
-t wav \
record$i.wav
) || true
ls record$i.wav
done
popd
docker run \
--rm \
-it \
-v "$(realpath ./model)":/snowboy-master/examples/Python/model \
snowboy-pmdl:latest
mv ./model/hotword.pmdl ./model/$HOTWORD.pmdl
if [ -n "$TRAIN" ]; then
exit 0
fi
fi
if false; then
if ! which swig; then
brew install swig
fi
pip3 install pyaudio
pushd snowboy.git.d/swig/Python3/
make
popd
cd snowboy.git.d/examples/Python3/
echo '
import snowboydecoder
import datetime
detected_callback = lambda *args: print(datetime.datetime.now(), "GOTCHA")
d = snowboydecoder.HotwordDetector("../../../model/'"$HOTWORD"'.pmdl", sensitivity=0.5, audio_gain=1)
d.start(detected_callback)
' > breel.py
echo GO
cleanup() {
echo OK IM DONE NOW
}
trap cleanup EXIT
python3 ./breel.py
else
resources="$(realpath snowboy.git.d/resources/common.res)"
hotword="$(realpath ./model/$HOTWORD.pmdl)"
GOPROXY= go build -o snowboy
if [ -z "$PUSH" ]; then
./snowboy \
-ms "$hotword/$HOTWORD" \
-r "$resources" \
-s 0.5 \
"$@"
else
echo '
FROM registry-app.eng.qops.net:5001/imported/alpine:3.16
WORKDIR /main/
COPY ./snowboy.git.d/resources/common.res ./
COPY ./model/hotword.pmdl ./
COPY ./snowboy ./
ENTRYPOINT ["sh", "-c", "true; echo copying /main/ to /mnt/; cp /main/* /mnt/"]
CMD []
' > Dockerfile
docker build -t registry-app.eng.qops.net:5001/breel/snowboy:latest .
docker push registry-app.eng.qops.net:5001/breel/snowboy:latest
fi
fi

View File

@@ -1,13 +0,0 @@
module snowboy
go 1.19
require (
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc
)
require (
github.com/Kitt-AI/snowboy v1.3.0 // indirect
github.com/stretchr/testify v1.8.1 // indirect
)

View File

@@ -1,22 +0,0 @@
github.com/Kitt-AI/snowboy v1.3.0 h1:PjBVN84M/9tAzDBQXILAKMoJMxt/fT0nhJ1rhKtVRUc=
github.com/Kitt-AI/snowboy v1.3.0/go.mod h1:sDzzMXFQ1wFkXkZaX/ant0xJsizGVq/9hyKb7ZB3cNI=
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af h1:ijY5OHNQs3CdzTN2XT+zByIsR1QVyXTvOUSkQcBm6pw=
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af/go.mod h1:XcT4k8Tn9hrM5SLVvu5hNQbAC6GojXM0MXz1Rt8CL68=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc h1:yYLpN7bJxKYILKnk20oczGQOQd2h3/7z7/cxdD9Se/I=
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,122 +0,0 @@
// This example streams the microphone thru Snowboy to listen for the hotword,
// by using the PortAudio interface.
//
// HOW TO USE:
// go run examples/Go/listen/main.go [path to snowboy resource file] [path to snowboy hotword file]
//
package main
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"log"
"path"
"strings"
"time"
"github.com/brentnd/go-snowboy"
"github.com/gordonklaus/portaudio"
)
// Sound represents a sound stream implementing the io.Reader interface
// that provides the microphone data.
type Sound struct {
stream *portaudio.Stream
data []int16
}
// Init initializes the Sound's PortAudio stream.
func (s *Sound) Init() {
inputChannels := 1
outputChannels := 0
sampleRate := 16000
s.data = make([]int16, 1024)
// initialize the audio recording interface
err := portaudio.Initialize()
if err != nil {
fmt.Errorf("Error initialize audio interface: %s", err)
return
}
// open the sound input stream for the microphone
stream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, float64(sampleRate), len(s.data), s.data)
if err != nil {
fmt.Errorf("Error open default audio stream: %s", err)
return
}
err = stream.Start()
if err != nil {
fmt.Errorf("Error on stream start: %s", err)
return
}
s.stream = stream
}
// Close closes down the Sound's PortAudio connection.
func (s *Sound) Close() {
s.stream.Close()
portaudio.Terminate()
}
// Read is the Sound's implementation of the io.Reader interface.
func (s *Sound) Read(p []byte) (int, error) {
s.stream.Read()
buf := &bytes.Buffer{}
for _, v := range s.data {
binary.Write(buf, binary.LittleEndian, v)
}
copy(p, buf.Bytes())
return len(p), nil
}
func main() {
resources := flag.String("r", "", "path to the .res file")
models := flag.String("ms", "", "comma delimited path to the .?mdl file/output")
sensitivity := flag.Float64("s", 0.45, "0..1")
quiet := flag.Bool("q", false, "emit '1' on detect else silent")
flag.Parse()
if *resources == "" || *models == "" {
panic("all flags must be set")
}
// open the mic
mic := &Sound{}
mic.Init()
defer mic.Close()
// open the snowboy detector
d := snowboy.NewDetector(*resources)
defer d.Close()
// set the handlers
for _, modelStrC := range strings.Split(*models, ",") {
modelStr := modelStrC
d.HandleFunc(snowboy.NewHotword(path.Dir(modelStr), float32(*sensitivity)), func(string) {
if !*quiet {
log.Println(path.Base(modelStr))
}
fmt.Println(path.Base(modelStr))
})
}
d.HandleSilenceFunc(1*time.Second, func(string) {
if !*quiet {
log.Println("...")
}
})
// display the detector's expected audio format
sr, nc, bd := d.AudioFormat()
log.Printf("sample rate=%d, num channels=%d, bit depth=%d\n", sr, nc, bd)
// start detecting using the microphone
d.ReadAndDetect(mic)
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,82 +0,0 @@
#! /bin/bash
main() {
cleanup() {
killall -9 $(jobs -p)
killall snowboy
}
trap cleanup EXIT
if [ ! -e /tmp/stt.fifo ]; then
mkfifo /tmp/stt.fifo
fi
echo starting in
for ((i=2; i>0; i--)); do
echo "...$i..."
sleep 1
done
local models=($(
cat pyautogui.yaml \
| gojq -r -c --yaml-input '
to_entries[] | "model/"+.key+".pmdl/"+.key
' \
| tr '\n' ',' \
| sed 's/,$//'
))
echo models=$models
./snowboy -r resources.res -ms $models "$@" > /tmp/stt.fifo &
python3 -c '
import pyautogui
import time
keys = set()
def toggle(key):
global keys
if key in keys:
release(key)
else:
hold(key)
def hold(key):
global keys
for keyin in [todrop for todrop in keys]:
if keyin != key:
release(keyin)
keys = set()
keys.add(key)
print()
print("pressing", key)
print()
pyautogui.keyDown(key)
def release(key):
print()
print("releasing", key)
print()
pyautogui.keyUp(key)
def main():
with open("/tmp/stt.fifo", "r") as q:
for line in q:
handle(line.strip())
import yaml
mapping = yaml.safe_load(open("./pyautogui.yaml", "r"))
print(mapping)
def handle(cmd):
global mapping
hold(mapping.get(cmd))
main()
'
}
if [ "$0" == "$BASH_SOURCE" ]; then
main "$@"
fi

View File

@@ -1,5 +0,0 @@
up: w
down: s
left: a
right: d
jump: w

Binary file not shown.

View File

@@ -1,304 +0,0 @@
import speech_recognition as sr
import time
import threading
import queue
import signal
import sys
import os
import requests
import yaml
def log(*args):
print(">", *args, file=sys.stderr)
class Piper(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
def run(self):
while True:
got = self.inq.get()
if got is None:
break
self._run(got)
self.outq.put(None)
class Manager(threading.Thread):
def __init__(self, outq):
threading.Thread.__init__(self)
self.outq = outq
inq = queue.Queue()
def catcher(sig, frame):
inq.put(None)
self.inq = inq
signal.signal(signal.SIGINT, catcher)
def run(self):
log("Manager.run: start")
self.inq.get()
self.outq.put(None)
log("Manager.run: stop")
class Reader(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit()
self.inq = inq
self.outq = outq
def run(self):
log("Reader.run: start")
try:
idx = [
idx for idx,v in enumerate(
sr.Microphone.list_microphone_names(),
) if v in self.name.split(",")
][0]
with sr.Microphone(device_index=idx) as mic:
while not self.should_stop():
try:
self.outq.put(self._run(mic))
except Exception as e:
if not "timed out" in str(e):
log("Reader.run: error:", e)
except Exception as e:
log("Reader.run panic:", e)
log("microphones:", sr.Microphone.list_microphone_names())
finally:
self.outq.put(None)
log("Reader.run: stop")
def should_stop(self):
return not self.inq.empty()
def _run(self, mic):
mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5))
r = sr.Recognizer()
return r.listen(
mic,
timeout=mic_timeout,
phrase_time_limit=mic_timeout,
)
class Parser(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
def run(self):
log("Parser.run: start")
while True:
try:
clip = self.inq.get()
backlog = self.inq.qsize()
if backlog:
log("Parser.run backlog", backlog)
if clip is None:
break
self.outq.put(self._run(clip).strip())
except Exception as e:
log("Parser.run: error:", e)
self.outq.put(None)
log("Parser.run: stop")
def _run(self, clip):
r = sr.Recognizer()
return r.recognize_whisper(clip, language="english", model=os.environ.get("MODEL", "small.en")) # tiny.en=32x, base.en=16x, small.en=6x, medium.en=x2
def load_dot_notation(v, s):
items = s.replace("[]", ".[]").split(".")
return _load_dot_notation(v, items)
def _load_dot_notation(v, items):
for i in range(len(items)):
k = items[i]
if not k:
continue
if k == "[]":
if isinstance(v, list):
result = []
for j in v:
subresult = _load_dot_notation(j, items[i+1:])
if isinstance(subresult, list):
result.extend(subresult)
else:
result.append(subresult)
return result
else:
result = []
for j in v.values():
subresult = _load_dot_notation(j, items[i+1:])
if isinstance(subresult, list):
result.extend(subresult)
else:
result.append(subresult)
return result
else:
if isinstance(v, list):
v = v[int(k)]
else:
v = v[k]
return v
def test_load_dot_notation():
for i in [
"a" == load_dot_notation("a", "."),
["a"] == load_dot_notation(["a"], "."),
"b" == load_dot_notation({"a":"b"}, ".a"),
"c" == load_dot_notation({"a":{"b":"c"}}, ".a.b"),
"c" == load_dot_notation({"a":{"b":["c"]}}, ".a.b.0"),
["c","d"] == load_dot_notation({"a":{"b":"c"}, "a2":{"b":"d"}}, ".[].b"),
["c","d"] == load_dot_notation({"a":{"b":["c"], "b2":["d"]}}, ".a.[].0"),
["c","d"] == load_dot_notation({"a":{"b":["c"], "b2":["d"]}}, ".a[].0"),
["c","d"] == load_dot_notation(["c", "d"], "."),
["c","d"] == load_dot_notation(["c", "d"], "[]"),
]:
if not i:
raise Exception(i)
test_load_dot_notation()
class Reactor(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
self.load_hotwords = Reactor.new_load_hotwords()
log(f"hotwords: {self.load_hotwords()}")
def new_load_hotwords():
p = os.environ.get("HOTWORDS", None)
if not p:
def load_nothing():
return []
return load_nothing
try:
if "@" in p:
def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1])
return ["".join(i.strip().lower().split()) for i in v if i]
load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file
else:
def load_hotwords_in_file():
with open(p, "r") as f:
return ["".join(i.strip().lower().split()) for i in f.readlines()]
load_hotwords_in_file()
return load_hotwords_in_file
except Exception as e:
log(f"$HOTWORDS {p} is not a file: {e}")
hotwords = ["".join(i.lower().strip().split()) for i in p.split("\/\/")]
log(f'$HOTWORDS: {hotwords}')
def load_hotwords_as_literal():
return hotwords
return load_hotwords_as_literal
def run(self):
log("Reactor.run: start")
while True:
text = self.inq.get()
if text is None:
break
self.handle(text)
self.outq.put(None)
log("Reactor.run: stop")
def handle(self, text):
hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}")
if not hotwords:
if not os.environ.get("HOTWORDS", None):
print(text)
else:
log(text)
return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
for i in hotwords:
if i in cleantext:
#log(f"Reactor.handle: found hotword '{i}' in '{text}' as '{cleantext}'")
self.outq.put((i, text))
class Actor(threading.Thread):
def __init__(self, inq):
threading.Thread.__init__(self)
self.inq = inq
self.handle = self.handle_stderr
if os.environ.get("STDOUT", "") == "true":
self.handle = self.handle_stdout
elif os.environ.get("SIGUSR2", ""):
self.pid = int(environ["SIGUSR2"])
self.handle = self.handle_signal
elif os.environ.get("URL", ""):
self.url = environ["URL"]
self.handle = self.handle_url
self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i]
self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}')
log(self.headers)
def run(self):
log("Actor.run: start")
while True:
got = self.inq.get()
if got is None:
break
self.handle(got[0], got[1])
log("Actor.run: stop")
def handle_stderr(self, hotword, context):
log(f"'{hotword}' in '{context}'")
def handle_stdout(self, hotword, context):
log(context)
print(hotword)
def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context)
os.kill(self.pid, signal.SIGUSR2)
def handle_url(self, hotword, context):
self.handle_stderr(hotword, context)
try:
headers = {}
for i in self.headers:
key = i[0]
value = i[1]
value = value.replace("{{hotword}}", hotword)
value = value.replace("{{context}}", context)
headers[key] = value
body = self.body
body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context)
if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body)
requests.post(self.url, headers=headers, data=body)
except Exception as e:
log("Actor.handle_url:", e)
def main():
managerToParserQ = queue.Queue(maxsize=1)
readerToParserQ = queue.Queue(maxsize=10)
parserToReactorQ = queue.Queue(maxsize=10)
reactorToActorQ = queue.Queue(maxsize=10)
threads = [
Manager(managerToParserQ),
Reader(managerToParserQ, readerToParserQ),
Parser(readerToParserQ, parserToReactorQ),
Reactor(parserToReactorQ, reactorToActorQ),
Actor(reactorToActorQ),
]
[t.start() for t in threads]
[t.join() for t in threads]
if __name__ == "__main__":
main()

View File

@@ -1,7 +0,0 @@
#! /bin/bash
sudo apt install portaudio19-dev python3-pyaudio
python3 -m pip install git+https://github.com/openai/whisper.git soundfile PyAudio SpeechRecognition
#sudo apt-get install python3 python3-all-dev python3-pip build-essential swig git libpulse-dev libasound2-dev
#python3 -m pip install pocketsphinx

View File

@@ -1,20 +0,0 @@
FROM debian:buster as builder
RUN apt -y update && apt -y install build-essential wget ffmpeg
WORKDIR /tmp/whisper-cpp.git.d
RUN wget https://github.com/ggerganov/whisper.cpp/archive/refs/tags/v1.2.1.tar.gz \
&& tar -xf ./*.tar.gz \
&& mv ./whisper*/ ./git.d
WORKDIR /tmp/whisper-cpp.git.d/git.d
RUN make && make samples
FROM debian:buster
RUN apt -y update && apt -y install curl
COPY --from=builder /tmp/whisper-cpp.git.d/git.d/ /whisper-cpp.git.d/
WORKDIR /whisper.d
RUN bash /whisper-cpp.git.d/models/download-ggml-model.sh tiny.en
ENTRYPOINT []
CMD /whisper-cpp.git.d/main -m /whisper-cpp.git.d/models/ggml-tiny.en.bin -f /whisper-cpp.git.d/samples/gb1.wav -t 4

View File

@@ -1 +0,0 @@
git.d/main

View File

@@ -1,14 +0,0 @@
# git.d/samples/mm0.wav (30s)
| model | threads | rust | c |
| ----- | ------- | ------------- | ----------- |
| tiny | 1 | 4.4s@122% | 4.9s@125% |
| tiny | 2 | 2.7s@210% | 3.3s@190% |
| tiny | 4 | 2.0s@- | 2.9s@400% |
| tiny | 8 | 2.0s@- | 3.1s@700% |
| small | 1 | 23.9s@175% | 28.5s@205% |
| small | 2 | 14.9s@347% | 19.2s@330% |
| small | 4 | 12.3s@515% | 22.1s@530% |
| base | 1 | 8.7s@150% | 10.2s@155% |
| base | 2 | 5.1s@240% | 7.1s@270% |
| base | 4 | 3.8s@370% | 6.0s@430% |

View File

@@ -1,7 +0,0 @@
export P=${1:-1}
export MODEL=${2:-models/ggml-tiny.en.bin}
export WAV=${3:-git.d/samples/jfk.wav}
echo === RUST
time rust-whisper 2>&1 | grep -v ^whisper_ | grep ..
echo === C
time ./c-whisper -m $MODEL -f $WAV -t $P 2>&1 | grep -v ^whisper_ | grep -v ^system_info | grep -v ^main: | grep ..

Submodule whisper-cpp-2023/git.d deleted from 0a2d1210bc

View File

@@ -1 +0,0 @@
git.d/libwhisper.a

View File

@@ -1 +0,0 @@
git.d/models

View File

@@ -1,24 +0,0 @@
#! /bin/bash
if [ ! -d ./git.d/.git ]; then
git clone https://github.com/ggerganov/whisper.cpp.git git.d
fi
cd ./git.d
if [ ! -f ./samples/gb1.wav ]; then
make samples
fi
if [ ! -f ./main ]; then
make
fi
if [ ! -f ./stream ]; then
make stream
fi
if [ ! -f ./models/ggml-${MODEL:-tiny.en}.bin ]; then
bash ./models/download-ggml-model.sh ${MODEL:-tiny.en}
fi
if [ -n "$STREAM" ]; then
./stream -m ./models/ggml-${MODEL:-tiny.en}.bin -t 8 --step 500 --length ${MIC_TIMEOUT:-2}000 $(test -n "$MIC_ID" && echo -c "$MIC_ID")
else
time ./main -m ./models/ggml-${MODEL:-tiny.en}.bin -f ./samples/gb1.wav -t 4
fi

View File

@@ -1,37 +0,0 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy};
fn main() {
let mut ctx = WhisperContext::new(
&std::env::var("MODEL").unwrap_or(String::from("../models/ggml-tiny.en.bin"))
).expect("failed to load model");
// create a params object
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_n_threads(
std::env::var("P").unwrap_or(String::from("1")).parse::<i32>().expect("$P must be a number")
);
params.set_translate(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let (header, data) = wav::read(&mut std::fs::File::open(
&std::env::var("WAV").unwrap_or(String::from("../git.d/samples/jfk.wav"))
).expect("failed to open .wav")).expect("failed to decode .wav");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
ctx.full(params, &audio_data[..])
.expect("failed to run model");
let num_segments = ctx.full_n_segments();
for i in 0..num_segments {
let segment = ctx.full_get_segment_text(i).expect("failed to get segment");
print!("{} ", segment);
}
println!("");
}

View File

@@ -1 +0,0 @@
git.d/whisper.h