84 Commits

Author SHA1 Message Date
Bel LaPointe
9db417005c move all into subdirs 2023-12-19 22:01:27 -05:00
Bel LaPointe
799283d49c drop original root-level 2023-12-19 22:00:09 -05:00
Bel LaPointe
1caa9f564e cmd imports rust-whisper-lib 2023-12-19 21:59:34 -05:00
Bel LaPointe
641d7d077a stub /rust-whisper as a bin 2023-12-19 21:54:02 -05:00
Bel LaPointe
f864d4dfc7 lib-ify as /rust-whisper-lib 2023-12-19 21:53:02 -05:00
Bel LaPointe
2883830dbe get ready to split into package 2023-12-19 21:46:02 -05:00
Bel LaPointe
11b5091872 found the type needed to pass closures with local variables 2023-12-19 21:39:53 -05:00
Bel LaPointe
03370f362e from borrow since a grant is K 2023-12-19 21:20:52 -05:00
Bel LaPointe
ec6a71d38c purge non callback handling 2023-12-19 21:19:37 -05:00
Bel LaPointe
1b96b132e1 dumb callbacks work 2023-12-19 21:18:04 -05:00
Bel LaPointe
839487b99e drop redundant on_success time trimming 2023-12-19 21:11:53 -05:00
Bel LaPointe
a2fee32fbc refactor to whisper_service enqueues, whisper_impl transforms, whisper_engine provides raw 2023-12-19 21:08:59 -05:00
Bel LaPointe
091958e08d moved to a callback BUT costed me a global so lets iterate to someTrait 2023-12-19 20:38:01 -05:00
Bel LaPointe
5f47b2c88b wait i just needed an option? f off 2023-12-19 20:20:24 -05:00
Bel LaPointe
367838ac23 test to show include_bytes! macro supports large symlinks 2023-12-19 16:36:17 -05:00
Bel LaPointe
d05287fa3d update --stream-* defaults 2023-12-19 10:30:10 -05:00
Bel LaPointe
01be2637ca swap order 2023-12-19 10:26:22 -05:00
Bel LaPointe
226bedb80e add --debug to write a file that can be played with cat /tmp/page.rawf32audio | sox -r 16000 -b 32 -t f32 -e floating-point - -d 2023-12-19 10:25:48 -05:00
Bel LaPointe
6b54e500cd i think my recording has gaps 2023-12-19 09:54:21 -05:00
Bel LaPointe
8603f20a24 break into words but keep more stream head/tail tiebreaking 2023-12-19 09:51:11 -05:00
Bel LaPointe
eee0bf5e65 wip... 2023-12-19 09:30:15 -05:00
Bel LaPointe
15a3f8430a WIP trim the head and tail from text output because low confidence 2023-12-19 09:09:38 -05:00
Bel LaPointe
116f3f58c9 no buffer 2023-11-30 12:37:19 -07:00
Bel LaPointe
532ae22908 back to mvp 2023-11-30 12:28:35 -07:00
Bel LaPointe
deffc420ca at least it complies 2023-11-30 12:00:16 -07:00
Bel LaPointe
2391d07994 transcribing results as callbacks 2023-11-30 09:58:28 -07:00
Bel LaPointe
eea4b75bc8 confirmed threaded listen vs transcribe stream is naisu 2023-11-30 09:45:09 -07:00
Bel LaPointe
8982276a90 not infinite buffer 2023-11-30 09:41:12 -07:00
Bel LaPointe
479cfb055f threaded something i guess 2023-11-30 09:39:43 -07:00
Bel LaPointe
0667b5b5c6 large distill too 2023-11-30 09:12:38 -07:00
Bel LaPointe
9e97f8669d fuuuuuuuu lost my models folder oh well 2023-11-30 09:06:26 -07:00
Bel LaPointe
ff0f34f80b move rust to root 2023-11-30 09:02:11 -07:00
Bel LaPointe
bf3dd75074 gitignore 2023-11-30 09:02:02 -07:00
Bel LaPointe
827436d96c drop snowboy 2023-11-30 09:01:50 -07:00
Bel LaPointe
3b4295d026 unnest submodule 2023-11-30 09:01:44 -07:00
Bel LaPointe
2936fec1e4 dont need to choose 1 channel since downsampling should randomly choose from all 2023-11-29 05:33:27 -07:00
Bel LaPointe
1dd631872c from env to flags 2023-11-28 22:31:16 -07:00
Bel LaPointe
72a1420638 wheee 2023-11-28 22:24:10 -07:00
Bel LaPointe
1009c4230e env variable ify 2023-11-28 22:13:05 -07:00
Bel LaPointe
30e5515da1 GOTTEM 2023-11-28 22:03:08 -07:00
Bel LaPointe
b4c9ecb98b successfully confirmed audio is k with sox -r 16000 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:30:24 -07:00
Bel LaPointe
4ef419e6c0 successfully confirmed audio is k with sox -r 44100 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:22:26 -07:00
Bel LaPointe
54964ec59b grrrrr output 2023-11-28 21:03:00 -07:00
Bel LaPointe
62e764436a no warnings but still nothing sane coming out... 2023-11-28 20:36:26 -07:00
Bel LaPointe
d631def834 CLOSER like easily 80 20 range right 2023-11-28 20:32:09 -07:00
Bel LaPointe
3168968cae ok stream les go 2023-11-28 19:23:21 -07:00
Bel LaPointe
437d7cac39 successful refactor 2023-11-28 19:18:05 -07:00
Bel LaPointe
3093a91d84 wip 2023-11-28 19:10:07 -07:00
Bel LaPointe
f58e3a0331 better default err msgs 2023-11-26 17:37:26 -07:00
Bel LaPointe
6dffa401b7 cleaner 2023-11-26 17:21:40 -07:00
Bel LaPointe
f4d9730b5a hm i lost it but i get it back 2023-11-26 17:13:29 -07:00
Bel LaPointe
0c5c1f647c submodule for gitea-whisper-rs 2023-11-26 17:04:16 -07:00
Bel LaPointe
77ad40b61a closer 2023-11-26 17:00:42 -07:00
Bel LaPointe
09894c4fd0 confirmed just needs whisper-rs-sys upgrade for whisper.cpp up 2023-11-26 16:39:42 -07:00
Bel LaPointe
3e2e1e2ff8 wip 2023-11-26 16:23:42 -07:00
Bel LaPointe
50058037eb Revert "try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck"
This reverts commit a483aaf25c.
2023-11-08 11:35:31 -07:00
Bel LaPointe
a483aaf25c try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck 2023-11-08 11:35:29 -07:00
Bel LaPointe
be7d85f85e confirmed whisper.cpp works with distill iff no gpu 2023-11-08 11:29:50 -07:00
Bel LaPointe
60d38c4d5c update distil.sh 2023-11-08 10:58:58 -07:00
Bel LaPointe
e3a7628acf try distil-whisper 2023-11-08 10:22:30 -07:00
Bel LaPointe
91c7791860 up whisper-rs to 0.8.0 2023-11-08 09:25:00 -07:00
bel
247edd2ced more trans 2023-07-15 19:05:00 -06:00
bel
edd94aef72 catch 2023-07-05 22:36:07 -06:00
bel
b4d3e5a27c HOTWORDS yaml @ can have comma delimited and KEYS 2023-04-19 18:24:07 -06:00
bel
a1436e3bd2 revise 2023-04-12 19:37:43 -06:00
bel
410769b8c6 tr 2023-04-12 19:26:03 -06:00
bel
5869016de6 tr 2023-04-12 19:16:07 -06:00
bel
0955f6c0c0 oof 2023-04-12 19:15:32 -06:00
bel
242f4407df script 2023-04-12 18:50:49 -06:00
bel
814a8ae2f3 typo 2023-04-08 22:23:20 -06:00
bel
7c369e72d4 delimiters 2023-04-08 22:22:22 -06:00
bel
0aff4f556b one more 2023-04-08 20:05:03 -06:00
bel
88bf54d022 url replaces hotword,context too 2023-04-02 10:48:41 -06:00
bel
63fe8e7e9e whoops 2023-04-01 11:13:57 -06:00
bel
e26e700a21 reader does finally put and some exception shuffle 2023-04-01 11:12:44 -06:00
bel
fa23e396f1 gr 2023-04-01 10:46:16 -06:00
bel
11789a5c98 with boo 2023-04-01 10:45:00 -06:00
bel
97006d95c8 stack pls 2023-04-01 10:41:22 -06:00
bel
bb578e98f6 retry slower on bad mic things but ultimately segfault still 2023-04-01 10:35:04 -06:00
bel
465193b60d DEBUG stderr 2023-03-31 22:36:30 -06:00
bel
0e6c12a94f oops 2023-03-31 21:35:23 -06:00
bel
599f9079d3 req.txt 2023-03-31 20:52:38 -06:00
bel
0805bb6fd8 prune to only one whisper 2023-03-31 16:42:36 -06:00
bel
b109b0144d move to /rust-whisper.d with a download_models.sh 2023-03-31 16:41:46 -06:00
54 changed files with 3418 additions and 1188 deletions

5
.gitignore vendored
View File

@@ -1,6 +1,11 @@
**/*.sw*
/whisper-cpp-2023/rust.d/target
/rust-whisper.d/target
/rust-whisper.d/models
/target/
/models/
snowboy-2022/snowboy
**/*.git.d
**/*.wav
snowboy-2022/Dockerfile
**/target

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "rust-whisper.d/gitea-whisper-rs"]
path = gitea-whisper-rs
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git

View File

@@ -1,29 +0,0 @@
# stt
## listen on linux
https://wiki.archlinux.org/title/PulseAudio/Examples
```
10. ALSA monitor source
To be able to record from a monitor source (a.k.a. "What-U-Hear", "Stereo Mix"), use pactl list to find out the name of the source in PulseAudio (e.g. alsa_output.pci-0000_00_1b.0.analog-stereo.monitor). Then add lines like the following to /etc/asound.conf or ~/.asoundrc:
pcm.pulse_monitor {
type pulse
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
}
ctl.pulse_monitor {
type pulse
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
}
Now you can select pulse_monitor as a recording source.
Alternatively, you can use pavucontrol to do this: make sure you have set up the display to "All input devices", then select "Monitor of [your sound card]" as the recording source.
```
```bash
$ pactl list | grep -A 50 RUNNING | grep -E 'RUNNING|Name:|Monitor Source:' | grep Monitor.Source | head -n 1 | awk '{print $NF}'
```

1
gitea-whisper-rs Submodule

Submodule gitea-whisper-rs added at dd62f2b9f6

View File

@@ -47,37 +47,60 @@ class Reader(threading.Thread):
self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
log("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit()
self.inq = inq
self.outq = outq
def run(self):
log("Reader.run: start")
try:
idx = [
def mic_idx(self):
mics = []
while not mics and not self.should_stop():
log(f'searching for one of {self.name.split(",")} in {sr.Microphone.list_microphone_names()}...')
time.sleep(1)
mics = [
idx for idx,v in enumerate(
sr.Microphone.list_microphone_names(),
) if v in self.name.split(",")
][0]
with sr.Microphone(device_index=idx) as mic:
while not self.should_stop():
try:
self.outq.put(self._run(mic))
except Exception as e:
if not "timed out" in str(e):
log("Reader.run: error:", e)
]
log("mic#", mics[0])
return mics[0]
def run(self):
try:
log("Reader.run: start")
self._run()
except Exception as e:
log("Reader.run panic:", e)
log("microphones:", sr.Microphone.list_microphone_names())
log("Reader:run: exit:", e)
finally:
self.outq.put(None)
log("Reader.run: stop")
log("Reader.run: stop")
def _run(self):
while not self.should_stop():
time.sleep(3)
mic = sr.Microphone(device_index=self.mic_idx())
try:
mic.__enter__()
while not self.should_stop():
try:
self.outq.put(self.read(mic))
except Exception as e:
if not "timed out" in str(e):
raise e
except Exception as e:
import traceback
traceback.print_exception(e)
log("Reader.run: error:", e)
finally:
try:
mic.__exit__(None, None, None)
except Exception as e:
log("Reader.run.catch: error:", e)
def should_stop(self):
return not self.inq.empty()
def _run(self, mic):
def read(self, mic):
mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5))
r = sr.Recognizer()
return r.listen(
@@ -113,13 +136,19 @@ class Parser(threading.Thread):
p = "/tmp/whisper-cpp.wav"
with open("/tmp/whisper-cpp.wav", "wb") as f:
f.write(wav)
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P=2 rust-whisper", capture_output=True, shell=True)
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P={os.environ.get('P', '2')} rust-whisper", capture_output=True, shell=True)
result = proc.stdout.decode().strip()
if os.environ.get("DEBUG", None):
log("stderr:", proc.stderr.decode().strip())
log("raw transcript:", result)
result = result.replace(">>", "")
result = "".join([i.split("]")[-1] for i in result.split("[")[0]])
result = "".join([i.split(")")[-1] for i in result.split("(")[0]])
for pair in [
("[", "]"),
("(", ")"),
("<", ">"),
("*", "*"),
]:
result = "".join([i.split(pair[1])[-1] for i in result.split(pair[0])[0]])
if os.environ.get("DEBUG", None):
log("annotation-free transcript:", result)
return result
@@ -156,6 +185,8 @@ def _load_dot_notation(v, items):
else:
result.append(subresult)
return result
elif k == "KEYS":
v = [k for k in v]
else:
if isinstance(v, list):
v = v[int(k)]
@@ -200,8 +231,15 @@ class Reactor(threading.Thread):
def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1])
return ["".join(i.strip().lower().split()) for i in v if i]
if os.environ.get("DEBUG", None):
log(f'opened {p.split("@")[0]} and got {v}')
result = []
for to_find in [i for i in p.split("@")[-1].split(",") if i]:
if os.environ.get("DEBUG", None):
log(f'finding {to_find} in {v}')
v2 = load_dot_notation(v, to_find)
result.extend(["".join(i.strip().lower().split()) for i in v2 if i])
return result
load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file
else:
@@ -230,13 +268,23 @@ class Reactor(threading.Thread):
log("Reactor.run: stop")
def handle(self, text):
try:
self._handle(text)
except Exception:
pass
def _handle(self, text):
hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}")
log(f"seeking {hotwords} in {text}. $HOTWORDS={os.environ.get('HOTWORDS', None)}")
if not hotwords:
if not os.environ.get("HOTWORDS", None):
print(text)
if os.environ.get("DEBUG", None):
log(f"HOTWORDS is False; {text}")
print(text, flush=True)
else:
if os.environ.get("DEBUG", None):
log(f"HOTWORDS is True; {text}")
log(text)
return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
@@ -256,7 +304,7 @@ class Actor(threading.Thread):
self.pid = int(environ["SIGUSR2"])
self.handle = self.handle_signal
elif os.environ.get("URL", ""):
self.url = environ["URL"]
self.url = os.environ["URL"]
self.handle = self.handle_url
self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i]
self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}')
@@ -276,7 +324,7 @@ class Actor(threading.Thread):
def handle_stdout(self, hotword, context):
log(context)
print(hotword)
print(hotword, flush=True)
def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context)
@@ -295,9 +343,12 @@ class Actor(threading.Thread):
body = self.body
body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context)
url = self.url
url = url.replace("{{hotword}}", hotword)
url = url.replace("{{context}}", context)
if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body)
requests.post(self.url, headers=headers, data=body)
log("POST", url, headers, body)
requests.post(url, headers=headers, data=body)
except Exception as e:
log("Actor.handle_url:", e)

View File

@@ -2,3 +2,4 @@ git+https://github.com/openai/whisper.git
soundfile
PyAudio
SpeechRecognition
PyYAML

2
hotwords/transcript.sh Normal file
View File

@@ -0,0 +1,2 @@
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"

13
models/download_models.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
# Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en" "large-v2"; do
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
done
test -f ./ggml-distil-medium.en.bin || wget https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/ggml-medium-32-2.en.bin?download=true -O ./ggml-distil-medium.en.bin
test -f ./ggml-distil-large-v2.bin || wget https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/ggml-large-32-2.en.bin?download=true -O ./ggml-distil-large-v2.bin

7
models/testme/Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "testme"
version = "0.1.0"

View File

@@ -1,11 +1,8 @@
[package]
name = "rust-whisper"
name = "testme"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
whisper-rs = "0.5"
wav = "1"
tokio = "1.27"

View File

@@ -0,0 +1,4 @@
fn main() {
let bytes = include_bytes!("./test.txt");
println!("{}", String::from_utf8_lossy(bytes));
}

1
models/testme/src/test.txt Symbolic link
View File

@@ -0,0 +1 @@
../../ggml-tiny.en.bin

1402
rust-whisper-lib/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,16 @@
[package]
name = "rust-whisper-lib"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
whisper-rs = { path = "../gitea-whisper-rs", version = "0.8.0" }
wav = "1"
tokio = "1.27"
cpal = "0.15.2"
signal-hook = "0.3.17"
byteorder = "1.5.0"
chrono = "0.4.31"
clap = { version = "4.4.10", features = ["derive"] }

363
rust-whisper-lib/src/lib.rs Normal file
View File

@@ -0,0 +1,363 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
use std::time::{Duration, Instant};
use clap::Parser;
use std::thread;
use std::fs::File;
use std::io::Write;
#[derive(Parser, Debug)]
pub struct Flags {
#[arg(long, default_value = "./models/ggml-tiny.en.bin")]
pub model: String,
#[arg(long, default_value = "8")]
pub threads: i32,
#[arg(long, default_value = "5")]
pub stream_step: u64,
#[arg(long, default_value = "0.6")]
pub stream_retain: f32,
#[arg(long, default_value = "0.3")]
pub stream_head: f32,
#[arg(long, default_value = "0.3")]
pub stream_tail: f32,
pub wav: Option<String>,
#[arg(long, default_value = "false")]
pub debug: bool,
}
pub fn main<F>(flags: Flags, handler_fn: F) where F: FnMut(Result<Whispered, String>) + Send + 'static {
let w = new_whisper_service(
flags.model,
flags.threads,
flags.stream_head,
flags.stream_tail,
handler_fn,
).unwrap();
let stream_retain = (flags.stream_retain * 16_000.0) as usize;
let stream_step = Duration::new(flags.stream_step, 0);
match flags.wav {
Some(wav) => {
let (header, data) = wav::read(
&mut std::fs::File::open(wav).expect("failed to open $WAV"),
).expect("failed to decode $WAV");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16_000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
w.transcribe(&audio_data);
},
None => {
match &flags.debug {
true => { File::create("/tmp/page.rawf32audio").unwrap(); },
false => {},
};
let mut buffer = vec![];
let mut last = Instant::now();
new_listener().listen(move |data: Vec<f32>| {
data.iter().for_each(|x| buffer.push(*x));
if Instant::now() - last > stream_step {
w.transcribe_async(&buffer).unwrap();
match &flags.debug {
true => {
let mut f = File::options().append(true).open("/tmp/page.rawf32audio").unwrap();
let mut wav_data = vec![];
for i in buffer.iter() {
for j in i.to_le_bytes() {
wav_data.push(j);
}
}
f.write_all(wav_data.as_slice()).unwrap();
},
false => {},
};
for i in 0..stream_retain {
buffer[i] = buffer[buffer.len() - stream_retain + i];
}
buffer.truncate(stream_retain);
last = Instant::now();
}
});
},
};
}
struct WhisperService {
jobs: std::sync::mpsc::SyncSender<AWhisper>,
}
fn new_whisper_service<F>(model_path: String, threads: i32, stream_head: f32, stream_tail: f32, handler_fn: F) -> Result<WhisperService, String> where F: FnMut(Result<Whispered, String>) + Send + 'static {
match new_whisper_engine(model_path, threads) {
Ok(engine) => {
let mut whisper = new_whisper_impl(engine, stream_head, stream_tail, handler_fn);
let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { whisper.transcribe_asyncs(recv); });
Ok(WhisperService{jobs: send})
},
Err(msg) => Err(format!("failed to initialize engine: {}", msg)),
}
}
impl WhisperService {
fn transcribe(&self, data: &Vec<f32>) {
let (send, recv) = std::sync::mpsc::sync_channel(0);
self._transcribe_async(data, Some(send)).unwrap();
recv.recv().unwrap();
}
fn transcribe_async(&self, data: &Vec<f32>) -> Result<(), String> {
self._transcribe_async(data, None)
}
fn _transcribe_async(&self, data: &Vec<f32>, ack: Option<std::sync::mpsc::SyncSender<bool>>) -> Result<(), String> {
match self.jobs.try_send(AWhisper{
data: data.clone().to_vec(),
ack: ack,
}) {
Ok(_) => Ok(()),
Err(msg) => Err(format!("failed to enqueue transcription: {}", msg)),
}
}
}
struct WhisperImpl {
engine: WhisperEngine,
stream_head: f32,
stream_tail: f32,
handler_fn: Option<Box<dyn FnMut(Result<Whispered, String>) + Send + 'static>>
}
fn new_whisper_impl<F>(engine: WhisperEngine, stream_head: f32, stream_tail: f32, handler_fn: F) -> WhisperImpl where F: FnMut(Result<Whispered, String>) + Send + 'static {
WhisperImpl {
engine: engine,
stream_head: stream_head,
stream_tail: stream_tail,
handler_fn: Some(Box::new(handler_fn)),
}
}
impl WhisperImpl {
fn transcribe_asyncs(&mut self, recv: std::sync::mpsc::Receiver<AWhisper>) {
loop {
match recv.recv() {
Ok(job) => {
let result = self.transcribe(&job).is_ok();
match job.ack {
Some(ack) => {
ack.send(result).unwrap();
},
None => (),
};
}
Err(_) => return,
};
}
}
fn transcribe(&mut self, a_whisper: &AWhisper) -> Result<(), ()> {
match self.engine.transcribe(&a_whisper.data) {
Ok(result) => {
self.on_success(&result);
Ok(())
},
Err(msg) => {
self.on_error(msg.to_string());
Err(())
},
}
}
fn on_success(&mut self, whispered: &Whispered) {
let result = whispered
.after(&(self.stream_head * 100.0))
.before(&(self.stream_tail * 100.0));
(self.handler_fn.as_mut().unwrap())(Ok(result));
}
fn on_error(&mut self, msg: String) {
(self.handler_fn.as_mut().unwrap())(Err(format!("failed to transcribe: {}", &msg)));
}
}
struct WhisperEngine {
ctx: WhisperContext,
threads: i32,
}
fn new_whisper_engine(model_path: String, threads: i32) -> Result<WhisperEngine, String> {
match WhisperContext::new(&model_path) {
Ok(ctx) => Ok(WhisperEngine{ctx: ctx, threads: threads}),
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
}
}
impl WhisperEngine {
fn transcribe(&self, data: &Vec<f32>) -> Result<Whispered, WhisperError> {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_no_context(true);
params.set_n_threads(self.threads);
params.set_translate(false);
params.set_detect_language(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let mut state = self.ctx.create_state()?;
state.full(params, &data[..])?;
let mut result = new_whispered();
let num_segments = state.full_n_segments()?;
for i in 0..num_segments {
let data = state.full_get_segment_text(i)?;
let start = state.full_get_segment_t0(i)?;
let stop = state.full_get_segment_t1(i)?;
result.push(data, start, stop);
}
Ok(result)
}
}
struct AWhisper {
data: Vec<f32>,
ack: Option<std::sync::mpsc::SyncSender<bool>>,
}
#[derive(Clone, Debug)]
pub struct Whispered {
pub data: Vec<AWhispered>,
}
#[derive(Clone, Debug)]
pub struct AWhispered {
pub data: String,
pub offset: i64,
pub length: i64,
}
fn new_whispered() -> Whispered {
Whispered{data: vec![]}
}
fn new_a_whispered(data: String, start: i64, stop: i64) -> AWhispered {
AWhispered{
data: data,
offset: start.clone(),
length: stop - start,
}
}
impl Whispered {
pub fn to_string(&self) -> String {
let mut result = "".to_string();
for i in 0..self.data.len() {
result = format!("{} {}", result, &self.data[i].data);
}
result
}
fn after(&self, t: &f32) -> Whispered {
let mut result = new_whispered();
self.data
.iter()
.filter(|x| x.offset as f32 >= *t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn before(&self, t: &f32) -> Whispered {
let mut result = new_whispered();
let end = match self.data.iter().map(|x| x.offset + x.length).max() {
Some(x) => x,
None => 1,
};
let t = (end as f32) - *t;
self.data
.iter()
.filter(|x| ((x.offset) as f32) <= t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn push(&mut self, data: String, start: i64, stop: i64) {
let words: Vec<_> = data.split_whitespace().collect();
let per_word = (stop - start) / (words.len() as i64);
for i in 0..words.len() {
let start = (i as i64) * per_word;
let stop = start.clone() + per_word;
self.data.push(new_a_whispered(words[i].to_string(), start, stop));
}
}
}
struct Listener {
}
fn new_listener() -> Listener {
Listener{}
}
impl Listener {
fn listen(self, mut cb: impl FnMut(Vec<f32>)) {
let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { self._listen(send); });
loop {
match recv.recv() {
Ok(msg) => cb(msg),
Err(_) => return,
};
}
}
fn _listen(self, send: std::sync::mpsc::SyncSender<Vec<f32>>) {
let host = cpal::default_host();
let device = host.default_input_device().unwrap();
let cfg = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.nth(0)
.unwrap()
.with_max_sample_rate();
let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
let stream = device.build_input_stream(
&cfg.clone().into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let mut downsampled_data = vec![];
for i in 0..(data.len() as f32 / downsample_ratio) as usize {
let mut upsampled = i as f32 * downsample_ratio;
if upsampled > (data.len()-1) as f32 {
upsampled = (data.len()-1) as f32
}
downsampled_data.push(data[upsampled as usize]);
}
match send.try_send(downsampled_data) {
Ok(_) => (),
Err(msg) => eprintln!("failed to ingest audio: {}", msg),
};
},
move |err| {
eprintln!("input error: {}", err)
},
None,
).unwrap();
stream.play().unwrap();
eprintln!("listening on {}", device.name().unwrap());
let mut signals = Signals::new(&[SIGINT]).unwrap();
for sig in signals.forever() {
eprintln!("sig {}", sig);
break;
}
stream.pause().unwrap();
}
}

View File

@@ -0,0 +1 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="List of all items in this crate"><title>List of all items in this crate</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Bold-a2c9cd1067f8b328.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../static.files/rustdoc-fa3bb1812debf86c.css"><meta name="rustdoc-vars" data-root-path="../" data-static-root-path="../static.files/" data-current-crate="rust_whisper_lib" data-themes="" data-resource-suffix="" data-rustdoc-version="1.74.0 (79e9716c9 2023-11-13)" data-channel="1.74.0" data-search-js="search-8be46b629f5f14a8.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="../static.files/main-c5bd66d33317d69f.js"></script><noscript><link rel="stylesheet" href="../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc mod"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="mobile-topbar"><button class="sidebar-menu-toggle">&#9776;</button><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a></nav><nav class="sidebar"><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a><h2 class="location"><a href="#">Crate rust_whisper_lib</a></h2><div class="sidebar-elems"><section><ul class="block"><li><a href="#structs">Structs</a></li><li><a href="#functions">Functions</a></li></ul></section></div></nav><main><div class="width-limiter"><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press S to search, ? for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><h1>List of all items</h1><h3 id="structs">Structs</h3><ul class="all-items"><li><a href="struct.AWhispered.html">AWhispered</a></li><li><a href="struct.Flags.html">Flags</a></li><li><a href="struct.Whispered.html">Whispered</a></li></ul><h3 id="functions">Functions</h3><ul class="all-items"><li><a href="fn.main.html">main</a></li></ul></section></div></main></body></html>

View File

@@ -0,0 +1,2 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="API documentation for the Rust `main` fn in crate `rust_whisper_lib`."><title>main in rust_whisper_lib - Rust</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Bold-a2c9cd1067f8b328.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../static.files/rustdoc-fa3bb1812debf86c.css"><meta name="rustdoc-vars" data-root-path="../" data-static-root-path="../static.files/" data-current-crate="rust_whisper_lib" data-themes="" data-resource-suffix="" data-rustdoc-version="1.74.0 (79e9716c9 2023-11-13)" data-channel="1.74.0" data-search-js="search-8be46b629f5f14a8.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="sidebar-items.js"></script><script defer src="../static.files/main-c5bd66d33317d69f.js"></script><noscript><link rel="stylesheet" href="../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc fn"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="mobile-topbar"><button class="sidebar-menu-toggle">&#9776;</button><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a></nav><nav class="sidebar"><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a><div class="sidebar-elems"><h2><a href="index.html">In rust_whisper_lib</a></h2></div></nav><main><div class="width-limiter"><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press S to search, ? for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><div class="main-heading"><h1>Function <a href="index.html">rust_whisper_lib</a>::<wbr><a class="fn" href="#">main</a><button id="copy-path" title="Copy item path to clipboard"><img src="../static.files/clipboard-7571035ce49a181d.svg" width="19" height="18" alt="Copy item path"></button></h1><span class="out-of-band"><a class="src" href="../src/rust_whisper_lib/lib.rs.html#33-90">source</a> · <button id="toggle-all-docs" title="collapse all docs">[<span>&#x2212;</span>]</button></span></div><pre class="rust item-decl"><code>pub fn main&lt;F&gt;(flags: <a class="struct" href="struct.Flags.html" title="struct rust_whisper_lib::Flags">Flags</a>, handler_fn: F)<span class="where fmt-newline">where
F: <a class="trait" href="https://doc.rust-lang.org/1.74.0/core/ops/function/trait.FnMut.html" title="trait core::ops::function::FnMut">FnMut</a>(<a class="enum" href="https://doc.rust-lang.org/1.74.0/core/result/enum.Result.html" title="enum core::result::Result">Result</a>&lt;<a class="struct" href="struct.Whispered.html" title="struct rust_whisper_lib::Whispered">Whispered</a>, <a class="struct" href="https://doc.rust-lang.org/1.74.0/alloc/string/struct.String.html" title="struct alloc::string::String">String</a>&gt;) + <a class="trait" href="https://doc.rust-lang.org/1.74.0/core/marker/trait.Send.html" title="trait core::marker::Send">Send</a> + 'static,</span></code></pre></section></div></main></body></html>

View File

@@ -0,0 +1 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><meta name="generator" content="rustdoc"><meta name="description" content="API documentation for the Rust `rust_whisper_lib` crate."><title>rust_whisper_lib - Rust</title><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Regular-018c141bf0843ffd.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/FiraSans-Medium-8f9a781e4970d388.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Regular-562dcc5011b6de7d.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceSerif4-Bold-a2c9cd1067f8b328.ttf.woff2"><link rel="preload" as="font" type="font/woff2" crossorigin href="../static.files/SourceCodePro-Semibold-d899c5a5c4aeb14a.ttf.woff2"><link rel="stylesheet" href="../static.files/normalize-76eba96aa4d2e634.css"><link rel="stylesheet" href="../static.files/rustdoc-fa3bb1812debf86c.css"><meta name="rustdoc-vars" data-root-path="../" data-static-root-path="../static.files/" data-current-crate="rust_whisper_lib" data-themes="" data-resource-suffix="" data-rustdoc-version="1.74.0 (79e9716c9 2023-11-13)" data-channel="1.74.0" data-search-js="search-8be46b629f5f14a8.js" data-settings-js="settings-74424d7eec62a23e.js" ><script src="../static.files/storage-fec3eaa3851e447d.js"></script><script defer src="../crates.js"></script><script defer src="../static.files/main-c5bd66d33317d69f.js"></script><noscript><link rel="stylesheet" href="../static.files/noscript-5d8b3c7633ad77ba.css"></noscript><link rel="alternate icon" type="image/png" href="../static.files/favicon-16x16-8b506e7a72182f1c.png"><link rel="alternate icon" type="image/png" href="../static.files/favicon-32x32-422f7d1d52889060.png"><link rel="icon" type="image/svg+xml" href="../static.files/favicon-2c020d218678b618.svg"></head><body class="rustdoc mod crate"><!--[if lte IE 11]><div class="warning">This old browser is unsupported and will most likely display funky things.</div><![endif]--><nav class="mobile-topbar"><button class="sidebar-menu-toggle">&#9776;</button><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a></nav><nav class="sidebar"><a class="logo-container" href="../rust_whisper_lib/index.html"><img class="rust-logo" src="../static.files/rust-logo-151179464ae7ed46.svg" alt="logo"></a><h2 class="location"><a href="#">Crate rust_whisper_lib</a></h2><div class="sidebar-elems"><ul class="block"><li class="version">Version 0.1.0</li><li><a id="all-types" href="all.html">All Items</a></li></ul><section><ul class="block"><li><a href="#structs">Structs</a></li><li><a href="#functions">Functions</a></li></ul></section></div></nav><main><div class="width-limiter"><nav class="sub"><form class="search-form"><span></span><input class="search-input" name="search" aria-label="Run search in the documentation" autocomplete="off" spellcheck="false" placeholder="Click or press S to search, ? for more options…" type="search"><div id="help-button" title="help" tabindex="-1"><a href="../help.html">?</a></div><div id="settings-menu" tabindex="-1"><a href="../settings.html" title="settings"><img width="22" height="22" alt="Change settings" src="../static.files/wheel-7b819b6101059cd0.svg"></a></div></form></nav><section id="main-content" class="content"><div class="main-heading"><h1>Crate <a class="mod" href="#">rust_whisper_lib</a><button id="copy-path" title="Copy item path to clipboard"><img src="../static.files/clipboard-7571035ce49a181d.svg" width="19" height="18" alt="Copy item path"></button></h1><span class="out-of-band"><a class="src" href="../src/rust_whisper_lib/lib.rs.html#1-363">source</a> · <button id="toggle-all-docs" title="collapse all docs">[<span>&#x2212;</span>]</button></span></div><h2 id="structs" class="small-section-header"><a href="#structs">Structs</a></h2><ul class="item-table"><li><div class="item-name"><a class="struct" href="struct.AWhispered.html" title="struct rust_whisper_lib::AWhispered">AWhispered</a></div></li><li><div class="item-name"><a class="struct" href="struct.Flags.html" title="struct rust_whisper_lib::Flags">Flags</a></div></li><li><div class="item-name"><a class="struct" href="struct.Whispered.html" title="struct rust_whisper_lib::Whispered">Whispered</a></div></li></ul><h2 id="functions" class="small-section-header"><a href="#functions">Functions</a></h2><ul class="item-table"><li><div class="item-name"><a class="fn" href="fn.main.html" title="fn rust_whisper_lib::main">main</a></div></li></ul></section></div></main></body></html>

View File

@@ -0,0 +1 @@
window.SIDEBAR_ITEMS = {"fn":["main"],"struct":["AWhispered","Flags","Whispered"]};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1410
rust-whisper/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

10
rust-whisper/Cargo.toml Normal file
View File

@@ -0,0 +1,10 @@
[package]
name = "rust-whisper"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rust-whisper-lib = { path = "../rust-whisper-lib" }
clap = { version = "4.4.10", features = ["derive"] }

8
rust-whisper/cargo.sh Normal file
View File

@@ -0,0 +1,8 @@
#! /bin/bash
#RUSTFLAGS="-Clink-args=-lstd++" \
export C_INCLUDE_PATH="$C_INCLUDE_PATH:$PWD/.."
export LIBRARY_PATH="$LIBRARY_PATH:$PWD/.."
cargo "$@"

15
rust-whisper/src/main.rs Normal file
View File

@@ -0,0 +1,15 @@
use rust_whisper_lib;
use clap::Parser;
fn main() {
let flags = rust_whisper_lib::Flags::parse();
rust_whisper_lib::main(
flags,
|result: Result<rust_whisper_lib::Whispered, String>| {
match result {
Ok(whispered) => { println!("{}", whispered.to_string()); },
Err(msg) => { eprintln!("error: {}", msg); },
};
},
);
}

View File

@@ -1,99 +0,0 @@
#! /bin/bash
echo https://github.com/seasalt-ai/snowboy
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
set -e
set -o pipefail
if [ ! -d ./snowboy.git.d ]; then
git clone https://github.com/seasalt-ai/snowboy snowboy.git.d
fi
timeout 2 docker version &> /dev/null
if ! docker images | grep snowboy-pmdl.*latest &> /dev/null; then
pushd snowboy.git.d
docker build -t snowboy-pmdl:latest .
popd
fi
export HOTWORD="${HOTWORD:-${TRAIN:-default_hotword}}"
if [ -n "$TRAIN" ] || [ ! -d ./model ] || [ ! -f ./model/$HOTWORD.pmdl ]; then
mkdir -p model
pushd model
rm -f ./record{1,2,3}.wav || true
echo "record 3 instances of '$HOTWORD'" >&2
for i in 1 2 3; do
read -p "[$i/3] ready? you get 3 seconds."
(
timeout 3 rec \
-r 16000 \
-c 1 \
-b 16 \
-e signed-integer \
-t wav \
record$i.wav
) || true
ls record$i.wav
done
popd
docker run \
--rm \
-it \
-v "$(realpath ./model)":/snowboy-master/examples/Python/model \
snowboy-pmdl:latest
mv ./model/hotword.pmdl ./model/$HOTWORD.pmdl
if [ -n "$TRAIN" ]; then
exit 0
fi
fi
if false; then
if ! which swig; then
brew install swig
fi
pip3 install pyaudio
pushd snowboy.git.d/swig/Python3/
make
popd
cd snowboy.git.d/examples/Python3/
echo '
import snowboydecoder
import datetime
detected_callback = lambda *args: print(datetime.datetime.now(), "GOTCHA")
d = snowboydecoder.HotwordDetector("../../../model/'"$HOTWORD"'.pmdl", sensitivity=0.5, audio_gain=1)
d.start(detected_callback)
' > breel.py
echo GO
cleanup() {
echo OK IM DONE NOW
}
trap cleanup EXIT
python3 ./breel.py
else
resources="$(realpath snowboy.git.d/resources/common.res)"
hotword="$(realpath ./model/$HOTWORD.pmdl)"
GOPROXY= go build -o snowboy
if [ -z "$PUSH" ]; then
./snowboy \
-ms "$hotword/$HOTWORD" \
-r "$resources" \
-s 0.5 \
"$@"
else
echo '
FROM registry-app.eng.qops.net:5001/imported/alpine:3.16
WORKDIR /main/
COPY ./snowboy.git.d/resources/common.res ./
COPY ./model/hotword.pmdl ./
COPY ./snowboy ./
ENTRYPOINT ["sh", "-c", "true; echo copying /main/ to /mnt/; cp /main/* /mnt/"]
CMD []
' > Dockerfile
docker build -t registry-app.eng.qops.net:5001/breel/snowboy:latest .
docker push registry-app.eng.qops.net:5001/breel/snowboy:latest
fi
fi

View File

@@ -1,13 +0,0 @@
module snowboy
go 1.19
require (
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc
)
require (
github.com/Kitt-AI/snowboy v1.3.0 // indirect
github.com/stretchr/testify v1.8.1 // indirect
)

View File

@@ -1,22 +0,0 @@
github.com/Kitt-AI/snowboy v1.3.0 h1:PjBVN84M/9tAzDBQXILAKMoJMxt/fT0nhJ1rhKtVRUc=
github.com/Kitt-AI/snowboy v1.3.0/go.mod h1:sDzzMXFQ1wFkXkZaX/ant0xJsizGVq/9hyKb7ZB3cNI=
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af h1:ijY5OHNQs3CdzTN2XT+zByIsR1QVyXTvOUSkQcBm6pw=
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af/go.mod h1:XcT4k8Tn9hrM5SLVvu5hNQbAC6GojXM0MXz1Rt8CL68=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc h1:yYLpN7bJxKYILKnk20oczGQOQd2h3/7z7/cxdD9Se/I=
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,122 +0,0 @@
// This example streams the microphone thru Snowboy to listen for the hotword,
// by using the PortAudio interface.
//
// HOW TO USE:
// go run examples/Go/listen/main.go [path to snowboy resource file] [path to snowboy hotword file]
//
package main
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"log"
"path"
"strings"
"time"
"github.com/brentnd/go-snowboy"
"github.com/gordonklaus/portaudio"
)
// Sound represents a sound stream implementing the io.Reader interface
// that provides the microphone data.
type Sound struct {
stream *portaudio.Stream
data []int16
}
// Init initializes the Sound's PortAudio stream.
func (s *Sound) Init() {
inputChannels := 1
outputChannels := 0
sampleRate := 16000
s.data = make([]int16, 1024)
// initialize the audio recording interface
err := portaudio.Initialize()
if err != nil {
fmt.Errorf("Error initialize audio interface: %s", err)
return
}
// open the sound input stream for the microphone
stream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, float64(sampleRate), len(s.data), s.data)
if err != nil {
fmt.Errorf("Error open default audio stream: %s", err)
return
}
err = stream.Start()
if err != nil {
fmt.Errorf("Error on stream start: %s", err)
return
}
s.stream = stream
}
// Close closes down the Sound's PortAudio connection.
func (s *Sound) Close() {
s.stream.Close()
portaudio.Terminate()
}
// Read is the Sound's implementation of the io.Reader interface.
func (s *Sound) Read(p []byte) (int, error) {
s.stream.Read()
buf := &bytes.Buffer{}
for _, v := range s.data {
binary.Write(buf, binary.LittleEndian, v)
}
copy(p, buf.Bytes())
return len(p), nil
}
func main() {
resources := flag.String("r", "", "path to the .res file")
models := flag.String("ms", "", "comma delimited path to the .?mdl file/output")
sensitivity := flag.Float64("s", 0.45, "0..1")
quiet := flag.Bool("q", false, "emit '1' on detect else silent")
flag.Parse()
if *resources == "" || *models == "" {
panic("all flags must be set")
}
// open the mic
mic := &Sound{}
mic.Init()
defer mic.Close()
// open the snowboy detector
d := snowboy.NewDetector(*resources)
defer d.Close()
// set the handlers
for _, modelStrC := range strings.Split(*models, ",") {
modelStr := modelStrC
d.HandleFunc(snowboy.NewHotword(path.Dir(modelStr), float32(*sensitivity)), func(string) {
if !*quiet {
log.Println(path.Base(modelStr))
}
fmt.Println(path.Base(modelStr))
})
}
d.HandleSilenceFunc(1*time.Second, func(string) {
if !*quiet {
log.Println("...")
}
})
// display the detector's expected audio format
sr, nc, bd := d.AudioFormat()
log.Printf("sample rate=%d, num channels=%d, bit depth=%d\n", sr, nc, bd)
// start detecting using the microphone
d.ReadAndDetect(mic)
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,82 +0,0 @@
#! /bin/bash
main() {
cleanup() {
killall -9 $(jobs -p)
killall snowboy
}
trap cleanup EXIT
if [ ! -e /tmp/stt.fifo ]; then
mkfifo /tmp/stt.fifo
fi
echo starting in
for ((i=2; i>0; i--)); do
echo "...$i..."
sleep 1
done
local models=($(
cat pyautogui.yaml \
| gojq -r -c --yaml-input '
to_entries[] | "model/"+.key+".pmdl/"+.key
' \
| tr '\n' ',' \
| sed 's/,$//'
))
echo models=$models
./snowboy -r resources.res -ms $models "$@" > /tmp/stt.fifo &
python3 -c '
import pyautogui
import time
keys = set()
def toggle(key):
global keys
if key in keys:
release(key)
else:
hold(key)
def hold(key):
global keys
for keyin in [todrop for todrop in keys]:
if keyin != key:
release(keyin)
keys = set()
keys.add(key)
print()
print("pressing", key)
print()
pyautogui.keyDown(key)
def release(key):
print()
print("releasing", key)
print()
pyautogui.keyUp(key)
def main():
with open("/tmp/stt.fifo", "r") as q:
for line in q:
handle(line.strip())
import yaml
mapping = yaml.safe_load(open("./pyautogui.yaml", "r"))
print(mapping)
def handle(cmd):
global mapping
hold(mapping.get(cmd))
main()
'
}
if [ "$0" == "$BASH_SOURCE" ]; then
main "$@"
fi

View File

@@ -1,5 +0,0 @@
up: w
down: s
left: a
right: d
jump: w

Binary file not shown.

View File

@@ -1,304 +0,0 @@
import speech_recognition as sr
import time
import threading
import queue
import signal
import sys
import os
import requests
import yaml
def log(*args):
print(">", *args, file=sys.stderr)
class Piper(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
def run(self):
while True:
got = self.inq.get()
if got is None:
break
self._run(got)
self.outq.put(None)
class Manager(threading.Thread):
def __init__(self, outq):
threading.Thread.__init__(self)
self.outq = outq
inq = queue.Queue()
def catcher(sig, frame):
inq.put(None)
self.inq = inq
signal.signal(signal.SIGINT, catcher)
def run(self):
log("Manager.run: start")
self.inq.get()
self.outq.put(None)
log("Manager.run: stop")
class Reader(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit()
self.inq = inq
self.outq = outq
def run(self):
log("Reader.run: start")
try:
idx = [
idx for idx,v in enumerate(
sr.Microphone.list_microphone_names(),
) if v in self.name.split(",")
][0]
with sr.Microphone(device_index=idx) as mic:
while not self.should_stop():
try:
self.outq.put(self._run(mic))
except Exception as e:
if not "timed out" in str(e):
log("Reader.run: error:", e)
except Exception as e:
log("Reader.run panic:", e)
log("microphones:", sr.Microphone.list_microphone_names())
finally:
self.outq.put(None)
log("Reader.run: stop")
def should_stop(self):
return not self.inq.empty()
def _run(self, mic):
mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5))
r = sr.Recognizer()
return r.listen(
mic,
timeout=mic_timeout,
phrase_time_limit=mic_timeout,
)
class Parser(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
def run(self):
log("Parser.run: start")
while True:
try:
clip = self.inq.get()
backlog = self.inq.qsize()
if backlog:
log("Parser.run backlog", backlog)
if clip is None:
break
self.outq.put(self._run(clip).strip())
except Exception as e:
log("Parser.run: error:", e)
self.outq.put(None)
log("Parser.run: stop")
def _run(self, clip):
r = sr.Recognizer()
return r.recognize_whisper(clip, language="english", model=os.environ.get("MODEL", "small.en")) # tiny.en=32x, base.en=16x, small.en=6x, medium.en=x2
def load_dot_notation(v, s):
items = s.replace("[]", ".[]").split(".")
return _load_dot_notation(v, items)
def _load_dot_notation(v, items):
for i in range(len(items)):
k = items[i]
if not k:
continue
if k == "[]":
if isinstance(v, list):
result = []
for j in v:
subresult = _load_dot_notation(j, items[i+1:])
if isinstance(subresult, list):
result.extend(subresult)
else:
result.append(subresult)
return result
else:
result = []
for j in v.values():
subresult = _load_dot_notation(j, items[i+1:])
if isinstance(subresult, list):
result.extend(subresult)
else:
result.append(subresult)
return result
else:
if isinstance(v, list):
v = v[int(k)]
else:
v = v[k]
return v
def test_load_dot_notation():
for i in [
"a" == load_dot_notation("a", "."),
["a"] == load_dot_notation(["a"], "."),
"b" == load_dot_notation({"a":"b"}, ".a"),
"c" == load_dot_notation({"a":{"b":"c"}}, ".a.b"),
"c" == load_dot_notation({"a":{"b":["c"]}}, ".a.b.0"),
["c","d"] == load_dot_notation({"a":{"b":"c"}, "a2":{"b":"d"}}, ".[].b"),
["c","d"] == load_dot_notation({"a":{"b":["c"], "b2":["d"]}}, ".a.[].0"),
["c","d"] == load_dot_notation({"a":{"b":["c"], "b2":["d"]}}, ".a[].0"),
["c","d"] == load_dot_notation(["c", "d"], "."),
["c","d"] == load_dot_notation(["c", "d"], "[]"),
]:
if not i:
raise Exception(i)
test_load_dot_notation()
class Reactor(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
self.load_hotwords = Reactor.new_load_hotwords()
log(f"hotwords: {self.load_hotwords()}")
def new_load_hotwords():
p = os.environ.get("HOTWORDS", None)
if not p:
def load_nothing():
return []
return load_nothing
try:
if "@" in p:
def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1])
return ["".join(i.strip().lower().split()) for i in v if i]
load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file
else:
def load_hotwords_in_file():
with open(p, "r") as f:
return ["".join(i.strip().lower().split()) for i in f.readlines()]
load_hotwords_in_file()
return load_hotwords_in_file
except Exception as e:
log(f"$HOTWORDS {p} is not a file: {e}")
hotwords = ["".join(i.lower().strip().split()) for i in p.split("\/\/")]
log(f'$HOTWORDS: {hotwords}')
def load_hotwords_as_literal():
return hotwords
return load_hotwords_as_literal
def run(self):
log("Reactor.run: start")
while True:
text = self.inq.get()
if text is None:
break
self.handle(text)
self.outq.put(None)
log("Reactor.run: stop")
def handle(self, text):
hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}")
if not hotwords:
if not os.environ.get("HOTWORDS", None):
print(text)
else:
log(text)
return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
for i in hotwords:
if i in cleantext:
#log(f"Reactor.handle: found hotword '{i}' in '{text}' as '{cleantext}'")
self.outq.put((i, text))
class Actor(threading.Thread):
def __init__(self, inq):
threading.Thread.__init__(self)
self.inq = inq
self.handle = self.handle_stderr
if os.environ.get("STDOUT", "") == "true":
self.handle = self.handle_stdout
elif os.environ.get("SIGUSR2", ""):
self.pid = int(environ["SIGUSR2"])
self.handle = self.handle_signal
elif os.environ.get("URL", ""):
self.url = environ["URL"]
self.handle = self.handle_url
self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i]
self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}')
log(self.headers)
def run(self):
log("Actor.run: start")
while True:
got = self.inq.get()
if got is None:
break
self.handle(got[0], got[1])
log("Actor.run: stop")
def handle_stderr(self, hotword, context):
log(f"'{hotword}' in '{context}'")
def handle_stdout(self, hotword, context):
log(context)
print(hotword)
def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context)
os.kill(self.pid, signal.SIGUSR2)
def handle_url(self, hotword, context):
self.handle_stderr(hotword, context)
try:
headers = {}
for i in self.headers:
key = i[0]
value = i[1]
value = value.replace("{{hotword}}", hotword)
value = value.replace("{{context}}", context)
headers[key] = value
body = self.body
body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context)
if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body)
requests.post(self.url, headers=headers, data=body)
except Exception as e:
log("Actor.handle_url:", e)
def main():
managerToParserQ = queue.Queue(maxsize=1)
readerToParserQ = queue.Queue(maxsize=10)
parserToReactorQ = queue.Queue(maxsize=10)
reactorToActorQ = queue.Queue(maxsize=10)
threads = [
Manager(managerToParserQ),
Reader(managerToParserQ, readerToParserQ),
Parser(readerToParserQ, parserToReactorQ),
Reactor(parserToReactorQ, reactorToActorQ),
Actor(reactorToActorQ),
]
[t.start() for t in threads]
[t.join() for t in threads]
if __name__ == "__main__":
main()

View File

@@ -1,7 +0,0 @@
#! /bin/bash
sudo apt install portaudio19-dev python3-pyaudio
python3 -m pip install git+https://github.com/openai/whisper.git soundfile PyAudio SpeechRecognition
#sudo apt-get install python3 python3-all-dev python3-pip build-essential swig git libpulse-dev libasound2-dev
#python3 -m pip install pocketsphinx

View File

@@ -1,20 +0,0 @@
FROM debian:buster as builder
RUN apt -y update && apt -y install build-essential wget ffmpeg
WORKDIR /tmp/whisper-cpp.git.d
RUN wget https://github.com/ggerganov/whisper.cpp/archive/refs/tags/v1.2.1.tar.gz \
&& tar -xf ./*.tar.gz \
&& mv ./whisper*/ ./git.d
WORKDIR /tmp/whisper-cpp.git.d/git.d
RUN make && make samples
FROM debian:buster
RUN apt -y update && apt -y install curl
COPY --from=builder /tmp/whisper-cpp.git.d/git.d/ /whisper-cpp.git.d/
WORKDIR /whisper.d
RUN bash /whisper-cpp.git.d/models/download-ggml-model.sh tiny.en
ENTRYPOINT []
CMD /whisper-cpp.git.d/main -m /whisper-cpp.git.d/models/ggml-tiny.en.bin -f /whisper-cpp.git.d/samples/gb1.wav -t 4

View File

@@ -1 +0,0 @@
git.d/main

View File

@@ -1,14 +0,0 @@
# git.d/samples/mm0.wav (30s)
| model | threads | rust | c |
| ----- | ------- | ------------- | ----------- |
| tiny | 1 | 4.4s@122% | 4.9s@125% |
| tiny | 2 | 2.7s@210% | 3.3s@190% |
| tiny | 4 | 2.0s@- | 2.9s@400% |
| tiny | 8 | 2.0s@- | 3.1s@700% |
| small | 1 | 23.9s@175% | 28.5s@205% |
| small | 2 | 14.9s@347% | 19.2s@330% |
| small | 4 | 12.3s@515% | 22.1s@530% |
| base | 1 | 8.7s@150% | 10.2s@155% |
| base | 2 | 5.1s@240% | 7.1s@270% |
| base | 4 | 3.8s@370% | 6.0s@430% |

View File

@@ -1,7 +0,0 @@
export P=${1:-1}
export MODEL=${2:-models/ggml-tiny.en.bin}
export WAV=${3:-git.d/samples/jfk.wav}
echo === RUST
time rust-whisper 2>&1 | grep -v ^whisper_ | grep ..
echo === C
time ./c-whisper -m $MODEL -f $WAV -t $P 2>&1 | grep -v ^whisper_ | grep -v ^system_info | grep -v ^main: | grep ..

Submodule whisper-cpp-2023/git.d deleted from 0a2d1210bc

View File

@@ -1 +0,0 @@
git.d/libwhisper.a

View File

@@ -1 +0,0 @@
git.d/models

View File

@@ -1,24 +0,0 @@
#! /bin/bash
if [ ! -d ./git.d/.git ]; then
git clone https://github.com/ggerganov/whisper.cpp.git git.d
fi
cd ./git.d
if [ ! -f ./samples/gb1.wav ]; then
make samples
fi
if [ ! -f ./main ]; then
make
fi
if [ ! -f ./stream ]; then
make stream
fi
if [ ! -f ./models/ggml-${MODEL:-tiny.en}.bin ]; then
bash ./models/download-ggml-model.sh ${MODEL:-tiny.en}
fi
if [ -n "$STREAM" ]; then
./stream -m ./models/ggml-${MODEL:-tiny.en}.bin -t 8 --step 500 --length ${MIC_TIMEOUT:-2}000 $(test -n "$MIC_ID" && echo -c "$MIC_ID")
else
time ./main -m ./models/ggml-${MODEL:-tiny.en}.bin -f ./samples/gb1.wav -t 4
fi

View File

@@ -1,366 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bindgen"
version = "0.64.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"log",
"peeking_take_while",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
"which",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clang-sys"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "either"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.140"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
[[package]]
name = "libloading"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
dependencies = [
"cfg-if",
"winapi",
]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pin-project-lite"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
[[package]]
name = "proc-macro2"
version = "1.0.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"
dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "riff"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]]
name = "rust-whisper"
version = "0.1.0"
dependencies = [
"tokio",
"wav",
"whisper-rs",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "shlex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001"
dependencies = [
"autocfg",
"pin-project-lite",
"windows-sys",
]
[[package]]
name = "unicode-ident"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
[[package]]
name = "wav"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a65e199c799848b4f997072aa4d673c034f80f40191f97fe2f0a23f410be1609"
dependencies = [
"riff",
]
[[package]]
name = "which"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"
dependencies = [
"either",
"libc",
"once_cell",
]
[[package]]
name = "whisper-rs"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7e1b9b003aa3285a0e4469219566266aa1d51ced1be38587251a4f713a1677"
dependencies = [
"whisper-rs-sys",
]
[[package]]
name = "whisper-rs-sys"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97a389dc665c7354ba6b1982850d4ba05b862907e535708ebdec92cbd9c599e8"
dependencies = [
"bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"

View File

@@ -1,37 +0,0 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy};
fn main() {
let mut ctx = WhisperContext::new(
&std::env::var("MODEL").unwrap_or(String::from("../models/ggml-tiny.en.bin"))
).expect("failed to load model");
// create a params object
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_n_threads(
std::env::var("P").unwrap_or(String::from("1")).parse::<i32>().expect("$P must be a number")
);
params.set_translate(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let (header, data) = wav::read(&mut std::fs::File::open(
&std::env::var("WAV").unwrap_or(String::from("../git.d/samples/jfk.wav"))
).expect("failed to open .wav")).expect("failed to decode .wav");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
ctx.full(params, &audio_data[..])
.expect("failed to run model");
let num_segments = ctx.full_n_segments();
for i in 0..num_segments {
let segment = ctx.full_get_segment_text(i).expect("failed to get segment");
print!("{} ", segment);
}
println!("");
}

View File

@@ -1 +0,0 @@
git.d/whisper.h