67 Commits

Author SHA1 Message Date
Bel LaPointe
11b5091872 found the type needed to pass closures with local variables 2023-12-19 21:39:53 -05:00
Bel LaPointe
03370f362e from borrow since a grant is K 2023-12-19 21:20:52 -05:00
Bel LaPointe
ec6a71d38c purge non callback handling 2023-12-19 21:19:37 -05:00
Bel LaPointe
1b96b132e1 dumb callbacks work 2023-12-19 21:18:04 -05:00
Bel LaPointe
839487b99e drop redundant on_success time trimming 2023-12-19 21:11:53 -05:00
Bel LaPointe
a2fee32fbc refactor to whisper_service enqueues, whisper_impl transforms, whisper_engine provides raw 2023-12-19 21:08:59 -05:00
Bel LaPointe
091958e08d moved to a callback BUT costed me a global so lets iterate to someTrait 2023-12-19 20:38:01 -05:00
Bel LaPointe
5f47b2c88b wait i just needed an option? f off 2023-12-19 20:20:24 -05:00
Bel LaPointe
367838ac23 test to show include_bytes! macro supports large symlinks 2023-12-19 16:36:17 -05:00
Bel LaPointe
d05287fa3d update --stream-* defaults 2023-12-19 10:30:10 -05:00
Bel LaPointe
01be2637ca swap order 2023-12-19 10:26:22 -05:00
Bel LaPointe
226bedb80e add --debug to write a file that can be played with cat /tmp/page.rawf32audio | sox -r 16000 -b 32 -t f32 -e floating-point - -d 2023-12-19 10:25:48 -05:00
Bel LaPointe
6b54e500cd i think my recording has gaps 2023-12-19 09:54:21 -05:00
Bel LaPointe
8603f20a24 break into words but keep more stream head/tail tiebreaking 2023-12-19 09:51:11 -05:00
Bel LaPointe
eee0bf5e65 wip... 2023-12-19 09:30:15 -05:00
Bel LaPointe
15a3f8430a WIP trim the head and tail from text output because low confidence 2023-12-19 09:09:38 -05:00
Bel LaPointe
116f3f58c9 no buffer 2023-11-30 12:37:19 -07:00
Bel LaPointe
532ae22908 back to mvp 2023-11-30 12:28:35 -07:00
Bel LaPointe
deffc420ca at least it complies 2023-11-30 12:00:16 -07:00
Bel LaPointe
2391d07994 transcribing results as callbacks 2023-11-30 09:58:28 -07:00
Bel LaPointe
eea4b75bc8 confirmed threaded listen vs transcribe stream is naisu 2023-11-30 09:45:09 -07:00
Bel LaPointe
8982276a90 not infinite buffer 2023-11-30 09:41:12 -07:00
Bel LaPointe
479cfb055f threaded something i guess 2023-11-30 09:39:43 -07:00
Bel LaPointe
0667b5b5c6 large distill too 2023-11-30 09:12:38 -07:00
Bel LaPointe
9e97f8669d fuuuuuuuu lost my models folder oh well 2023-11-30 09:06:26 -07:00
Bel LaPointe
ff0f34f80b move rust to root 2023-11-30 09:02:11 -07:00
Bel LaPointe
bf3dd75074 gitignore 2023-11-30 09:02:02 -07:00
Bel LaPointe
827436d96c drop snowboy 2023-11-30 09:01:50 -07:00
Bel LaPointe
3b4295d026 unnest submodule 2023-11-30 09:01:44 -07:00
Bel LaPointe
2936fec1e4 dont need to choose 1 channel since downsampling should randomly choose from all 2023-11-29 05:33:27 -07:00
Bel LaPointe
1dd631872c from env to flags 2023-11-28 22:31:16 -07:00
Bel LaPointe
72a1420638 wheee 2023-11-28 22:24:10 -07:00
Bel LaPointe
1009c4230e env variable ify 2023-11-28 22:13:05 -07:00
Bel LaPointe
30e5515da1 GOTTEM 2023-11-28 22:03:08 -07:00
Bel LaPointe
b4c9ecb98b successfully confirmed audio is k with sox -r 16000 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:30:24 -07:00
Bel LaPointe
4ef419e6c0 successfully confirmed audio is k with sox -r 44100 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:22:26 -07:00
Bel LaPointe
54964ec59b grrrrr output 2023-11-28 21:03:00 -07:00
Bel LaPointe
62e764436a no warnings but still nothing sane coming out... 2023-11-28 20:36:26 -07:00
Bel LaPointe
d631def834 CLOSER like easily 80 20 range right 2023-11-28 20:32:09 -07:00
Bel LaPointe
3168968cae ok stream les go 2023-11-28 19:23:21 -07:00
Bel LaPointe
437d7cac39 successful refactor 2023-11-28 19:18:05 -07:00
Bel LaPointe
3093a91d84 wip 2023-11-28 19:10:07 -07:00
Bel LaPointe
f58e3a0331 better default err msgs 2023-11-26 17:37:26 -07:00
Bel LaPointe
6dffa401b7 cleaner 2023-11-26 17:21:40 -07:00
Bel LaPointe
f4d9730b5a hm i lost it but i get it back 2023-11-26 17:13:29 -07:00
Bel LaPointe
0c5c1f647c submodule for gitea-whisper-rs 2023-11-26 17:04:16 -07:00
Bel LaPointe
77ad40b61a closer 2023-11-26 17:00:42 -07:00
Bel LaPointe
09894c4fd0 confirmed just needs whisper-rs-sys upgrade for whisper.cpp up 2023-11-26 16:39:42 -07:00
Bel LaPointe
3e2e1e2ff8 wip 2023-11-26 16:23:42 -07:00
Bel LaPointe
50058037eb Revert "try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck"
This reverts commit a483aaf25c.
2023-11-08 11:35:31 -07:00
Bel LaPointe
a483aaf25c try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck 2023-11-08 11:35:29 -07:00
Bel LaPointe
be7d85f85e confirmed whisper.cpp works with distill iff no gpu 2023-11-08 11:29:50 -07:00
Bel LaPointe
60d38c4d5c update distil.sh 2023-11-08 10:58:58 -07:00
Bel LaPointe
e3a7628acf try distil-whisper 2023-11-08 10:22:30 -07:00
Bel LaPointe
91c7791860 up whisper-rs to 0.8.0 2023-11-08 09:25:00 -07:00
bel
247edd2ced more trans 2023-07-15 19:05:00 -06:00
bel
edd94aef72 catch 2023-07-05 22:36:07 -06:00
bel
b4d3e5a27c HOTWORDS yaml @ can have comma delimited and KEYS 2023-04-19 18:24:07 -06:00
bel
a1436e3bd2 revise 2023-04-12 19:37:43 -06:00
bel
410769b8c6 tr 2023-04-12 19:26:03 -06:00
bel
5869016de6 tr 2023-04-12 19:16:07 -06:00
bel
0955f6c0c0 oof 2023-04-12 19:15:32 -06:00
bel
242f4407df script 2023-04-12 18:50:49 -06:00
bel
814a8ae2f3 typo 2023-04-08 22:23:20 -06:00
bel
7c369e72d4 delimiters 2023-04-08 22:22:22 -06:00
bel
0aff4f556b one more 2023-04-08 20:05:03 -06:00
bel
88bf54d022 url replaces hotword,context too 2023-04-02 10:48:41 -06:00
33 changed files with 1807 additions and 800 deletions

2
.gitignore vendored
View File

@@ -2,6 +2,8 @@
/whisper-cpp-2023/rust.d/target
/rust-whisper.d/target
/rust-whisper.d/models
/target/
/models/
snowboy-2022/snowboy
**/*.git.d
**/*.wav

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "rust-whisper.d/gitea-whisper-rs"]
path = gitea-whisper-rs
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git

1345
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

16
Cargo.toml Normal file
View File

@@ -0,0 +1,16 @@
[package]
name = "rust-whisper"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
whisper-rs = { path = "./gitea-whisper-rs", version = "0.8.0" }
wav = "1"
tokio = "1.27"
cpal = "0.15.2"
signal-hook = "0.3.17"
byteorder = "1.5.0"
chrono = "0.4.31"
clap = { version = "4.4.10", features = ["derive"] }

View File

@@ -1,29 +0,0 @@
# stt
## listen on linux
https://wiki.archlinux.org/title/PulseAudio/Examples
```
10. ALSA monitor source
To be able to record from a monitor source (a.k.a. "What-U-Hear", "Stereo Mix"), use pactl list to find out the name of the source in PulseAudio (e.g. alsa_output.pci-0000_00_1b.0.analog-stereo.monitor). Then add lines like the following to /etc/asound.conf or ~/.asoundrc:
pcm.pulse_monitor {
type pulse
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
}
ctl.pulse_monitor {
type pulse
device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
}
Now you can select pulse_monitor as a recording source.
Alternatively, you can use pavucontrol to do this: make sure you have set up the display to "All input devices", then select "Monitor of [your sound card]" as the recording source.
```
```bash
$ pactl list | grep -A 50 RUNNING | grep -E 'RUNNING|Name:|Monitor Source:' | grep Monitor.Source | head -n 1 | awk '{print $NF}'
```

1
gitea-whisper-rs Submodule

Submodule gitea-whisper-rs added at dd62f2b9f6

View File

@@ -47,7 +47,7 @@ class Reader(threading.Thread):
self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
log("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit()
self.inq = inq
self.outq = outq
@@ -136,14 +136,19 @@ class Parser(threading.Thread):
p = "/tmp/whisper-cpp.wav"
with open("/tmp/whisper-cpp.wav", "wb") as f:
f.write(wav)
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P=2 rust-whisper", capture_output=True, shell=True)
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P={os.environ.get('P', '2')} rust-whisper", capture_output=True, shell=True)
result = proc.stdout.decode().strip()
if os.environ.get("DEBUG", None):
log("stderr:", proc.stderr.decode().strip())
log("raw transcript:", result)
result = result.replace(">>", "")
result = "".join([i.split("]")[-1] for i in result.split("[")[0]])
result = "".join([i.split(")")[-1] for i in result.split("(")[0]])
for pair in [
("[", "]"),
("(", ")"),
("<", ">"),
("*", "*"),
]:
result = "".join([i.split(pair[1])[-1] for i in result.split(pair[0])[0]])
if os.environ.get("DEBUG", None):
log("annotation-free transcript:", result)
return result
@@ -180,6 +185,8 @@ def _load_dot_notation(v, items):
else:
result.append(subresult)
return result
elif k == "KEYS":
v = [k for k in v]
else:
if isinstance(v, list):
v = v[int(k)]
@@ -224,8 +231,15 @@ class Reactor(threading.Thread):
def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1])
return ["".join(i.strip().lower().split()) for i in v if i]
if os.environ.get("DEBUG", None):
log(f'opened {p.split("@")[0]} and got {v}')
result = []
for to_find in [i for i in p.split("@")[-1].split(",") if i]:
if os.environ.get("DEBUG", None):
log(f'finding {to_find} in {v}')
v2 = load_dot_notation(v, to_find)
result.extend(["".join(i.strip().lower().split()) for i in v2 if i])
return result
load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file
else:
@@ -254,13 +268,23 @@ class Reactor(threading.Thread):
log("Reactor.run: stop")
def handle(self, text):
try:
self._handle(text)
except Exception:
pass
def _handle(self, text):
hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}")
log(f"seeking {hotwords} in {text}. $HOTWORDS={os.environ.get('HOTWORDS', None)}")
if not hotwords:
if not os.environ.get("HOTWORDS", None):
print(text)
if os.environ.get("DEBUG", None):
log(f"HOTWORDS is False; {text}")
print(text, flush=True)
else:
if os.environ.get("DEBUG", None):
log(f"HOTWORDS is True; {text}")
log(text)
return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
@@ -300,7 +324,7 @@ class Actor(threading.Thread):
def handle_stdout(self, hotword, context):
log(context)
print(hotword)
print(hotword, flush=True)
def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context)
@@ -319,9 +343,12 @@ class Actor(threading.Thread):
body = self.body
body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context)
url = self.url
url = url.replace("{{hotword}}", hotword)
url = url.replace("{{context}}", context)
if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body)
requests.post(self.url, headers=headers, data=body)
log("POST", url, headers, body)
requests.post(url, headers=headers, data=body)
except Exception as e:
log("Actor.handle_url:", e)

13
models/download_models.sh Executable file
View File

@@ -0,0 +1,13 @@
#!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
# Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en" "large-v2"; do
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
done
test -f ./ggml-distil-medium.en.bin || wget https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/ggml-medium-32-2.en.bin?download=true -O ./ggml-distil-medium.en.bin
test -f ./ggml-distil-large-v2.bin || wget https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/ggml-large-32-2.en.bin?download=true -O ./ggml-distil-large-v2.bin

7
models/testme/Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "testme"
version = "0.1.0"

View File

@@ -1,11 +1,8 @@
[package]
name = "rust-whisper"
name = "testme"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
whisper-rs = "0.5"
wav = "1"
tokio = "1.27"

View File

@@ -0,0 +1,4 @@
fn main() {
let bytes = include_bytes!("./test.txt");
println!("{}", String::from_utf8_lossy(bytes));
}

1
models/testme/src/test.txt Symbolic link
View File

@@ -0,0 +1 @@
../../ggml-tiny.en.bin

View File

@@ -1,366 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bindgen"
version = "0.64.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"log",
"peeking_take_while",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
"which",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clang-sys"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "either"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.140"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
[[package]]
name = "libloading"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
dependencies = [
"cfg-if",
"winapi",
]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pin-project-lite"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
[[package]]
name = "proc-macro2"
version = "1.0.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"
dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "riff"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]]
name = "rust-whisper"
version = "0.1.0"
dependencies = [
"tokio",
"wav",
"whisper-rs",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "shlex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001"
dependencies = [
"autocfg",
"pin-project-lite",
"windows-sys",
]
[[package]]
name = "unicode-ident"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
[[package]]
name = "wav"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a65e199c799848b4f997072aa4d673c034f80f40191f97fe2f0a23f410be1609"
dependencies = [
"riff",
]
[[package]]
name = "which"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"
dependencies = [
"either",
"libc",
"once_cell",
]
[[package]]
name = "whisper-rs"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7e1b9b003aa3285a0e4469219566266aa1d51ced1be38587251a4f713a1677"
dependencies = [
"whisper-rs-sys",
]
[[package]]
name = "whisper-rs-sys"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97a389dc665c7354ba6b1982850d4ba05b862907e535708ebdec92cbd9c599e8"
dependencies = [
"bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"

View File

@@ -1,10 +0,0 @@
#!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
d="${1:-"$PWD"/models}"
mkdir -p "$d"
# Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en"; do
test -f "$d"/ggml-$model.bin || wget --quiet --show-progress -O "$d"/ggml-$model.bin "$src-$model.bin"
done

View File

@@ -1,37 +0,0 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy};
fn main() {
let mut ctx = WhisperContext::new(
&std::env::var("MODEL").unwrap_or(String::from("../models/ggml-tiny.en.bin"))
).expect("failed to load model");
// create a params object
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_n_threads(
std::env::var("P").unwrap_or(String::from("1")).parse::<i32>().expect("$P must be a number")
);
params.set_translate(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let (header, data) = wav::read(&mut std::fs::File::open(
&std::env::var("WAV").unwrap_or(String::from("../git.d/samples/jfk.wav"))
).expect("failed to open .wav")).expect("failed to decode .wav");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
ctx.full(params, &audio_data[..])
.expect("failed to run model");
let num_segments = ctx.full_n_segments();
for i in 0..num_segments {
let segment = ctx.full_get_segment_text(i).expect("failed to get segment");
print!("{} ", segment);
}
println!("");
}

View File

@@ -1,99 +0,0 @@
#! /bin/bash
echo https://github.com/seasalt-ai/snowboy
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
set -e
set -o pipefail
if [ ! -d ./snowboy.git.d ]; then
git clone https://github.com/seasalt-ai/snowboy snowboy.git.d
fi
timeout 2 docker version &> /dev/null
if ! docker images | grep snowboy-pmdl.*latest &> /dev/null; then
pushd snowboy.git.d
docker build -t snowboy-pmdl:latest .
popd
fi
export HOTWORD="${HOTWORD:-${TRAIN:-default_hotword}}"
if [ -n "$TRAIN" ] || [ ! -d ./model ] || [ ! -f ./model/$HOTWORD.pmdl ]; then
mkdir -p model
pushd model
rm -f ./record{1,2,3}.wav || true
echo "record 3 instances of '$HOTWORD'" >&2
for i in 1 2 3; do
read -p "[$i/3] ready? you get 3 seconds."
(
timeout 3 rec \
-r 16000 \
-c 1 \
-b 16 \
-e signed-integer \
-t wav \
record$i.wav
) || true
ls record$i.wav
done
popd
docker run \
--rm \
-it \
-v "$(realpath ./model)":/snowboy-master/examples/Python/model \
snowboy-pmdl:latest
mv ./model/hotword.pmdl ./model/$HOTWORD.pmdl
if [ -n "$TRAIN" ]; then
exit 0
fi
fi
if false; then
if ! which swig; then
brew install swig
fi
pip3 install pyaudio
pushd snowboy.git.d/swig/Python3/
make
popd
cd snowboy.git.d/examples/Python3/
echo '
import snowboydecoder
import datetime
detected_callback = lambda *args: print(datetime.datetime.now(), "GOTCHA")
d = snowboydecoder.HotwordDetector("../../../model/'"$HOTWORD"'.pmdl", sensitivity=0.5, audio_gain=1)
d.start(detected_callback)
' > breel.py
echo GO
cleanup() {
echo OK IM DONE NOW
}
trap cleanup EXIT
python3 ./breel.py
else
resources="$(realpath snowboy.git.d/resources/common.res)"
hotword="$(realpath ./model/$HOTWORD.pmdl)"
GOPROXY= go build -o snowboy
if [ -z "$PUSH" ]; then
./snowboy \
-ms "$hotword/$HOTWORD" \
-r "$resources" \
-s 0.5 \
"$@"
else
echo '
FROM registry-app.eng.qops.net:5001/imported/alpine:3.16
WORKDIR /main/
COPY ./snowboy.git.d/resources/common.res ./
COPY ./model/hotword.pmdl ./
COPY ./snowboy ./
ENTRYPOINT ["sh", "-c", "true; echo copying /main/ to /mnt/; cp /main/* /mnt/"]
CMD []
' > Dockerfile
docker build -t registry-app.eng.qops.net:5001/breel/snowboy:latest .
docker push registry-app.eng.qops.net:5001/breel/snowboy:latest
fi
fi

View File

@@ -1,13 +0,0 @@
module snowboy
go 1.19
require (
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc
)
require (
github.com/Kitt-AI/snowboy v1.3.0 // indirect
github.com/stretchr/testify v1.8.1 // indirect
)

View File

@@ -1,22 +0,0 @@
github.com/Kitt-AI/snowboy v1.3.0 h1:PjBVN84M/9tAzDBQXILAKMoJMxt/fT0nhJ1rhKtVRUc=
github.com/Kitt-AI/snowboy v1.3.0/go.mod h1:sDzzMXFQ1wFkXkZaX/ant0xJsizGVq/9hyKb7ZB3cNI=
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af h1:ijY5OHNQs3CdzTN2XT+zByIsR1QVyXTvOUSkQcBm6pw=
github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af/go.mod h1:XcT4k8Tn9hrM5SLVvu5hNQbAC6GojXM0MXz1Rt8CL68=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc h1:yYLpN7bJxKYILKnk20oczGQOQd2h3/7z7/cxdD9Se/I=
github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,122 +0,0 @@
// This example streams the microphone thru Snowboy to listen for the hotword,
// by using the PortAudio interface.
//
// HOW TO USE:
// go run examples/Go/listen/main.go [path to snowboy resource file] [path to snowboy hotword file]
//
package main
import (
"bytes"
"encoding/binary"
"flag"
"fmt"
"log"
"path"
"strings"
"time"
"github.com/brentnd/go-snowboy"
"github.com/gordonklaus/portaudio"
)
// Sound represents a sound stream implementing the io.Reader interface
// that provides the microphone data.
type Sound struct {
stream *portaudio.Stream
data []int16
}
// Init initializes the Sound's PortAudio stream.
func (s *Sound) Init() {
inputChannels := 1
outputChannels := 0
sampleRate := 16000
s.data = make([]int16, 1024)
// initialize the audio recording interface
err := portaudio.Initialize()
if err != nil {
fmt.Errorf("Error initialize audio interface: %s", err)
return
}
// open the sound input stream for the microphone
stream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, float64(sampleRate), len(s.data), s.data)
if err != nil {
fmt.Errorf("Error open default audio stream: %s", err)
return
}
err = stream.Start()
if err != nil {
fmt.Errorf("Error on stream start: %s", err)
return
}
s.stream = stream
}
// Close closes down the Sound's PortAudio connection.
func (s *Sound) Close() {
s.stream.Close()
portaudio.Terminate()
}
// Read is the Sound's implementation of the io.Reader interface.
func (s *Sound) Read(p []byte) (int, error) {
s.stream.Read()
buf := &bytes.Buffer{}
for _, v := range s.data {
binary.Write(buf, binary.LittleEndian, v)
}
copy(p, buf.Bytes())
return len(p), nil
}
func main() {
resources := flag.String("r", "", "path to the .res file")
models := flag.String("ms", "", "comma delimited path to the .?mdl file/output")
sensitivity := flag.Float64("s", 0.45, "0..1")
quiet := flag.Bool("q", false, "emit '1' on detect else silent")
flag.Parse()
if *resources == "" || *models == "" {
panic("all flags must be set")
}
// open the mic
mic := &Sound{}
mic.Init()
defer mic.Close()
// open the snowboy detector
d := snowboy.NewDetector(*resources)
defer d.Close()
// set the handlers
for _, modelStrC := range strings.Split(*models, ",") {
modelStr := modelStrC
d.HandleFunc(snowboy.NewHotword(path.Dir(modelStr), float32(*sensitivity)), func(string) {
if !*quiet {
log.Println(path.Base(modelStr))
}
fmt.Println(path.Base(modelStr))
})
}
d.HandleSilenceFunc(1*time.Second, func(string) {
if !*quiet {
log.Println("...")
}
})
// display the detector's expected audio format
sr, nc, bd := d.AudioFormat()
log.Printf("sample rate=%d, num channels=%d, bit depth=%d\n", sr, nc, bd)
// start detecting using the microphone
d.ReadAndDetect(mic)
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -1,82 +0,0 @@
#! /bin/bash
main() {
cleanup() {
killall -9 $(jobs -p)
killall snowboy
}
trap cleanup EXIT
if [ ! -e /tmp/stt.fifo ]; then
mkfifo /tmp/stt.fifo
fi
echo starting in
for ((i=2; i>0; i--)); do
echo "...$i..."
sleep 1
done
local models=($(
cat pyautogui.yaml \
| gojq -r -c --yaml-input '
to_entries[] | "model/"+.key+".pmdl/"+.key
' \
| tr '\n' ',' \
| sed 's/,$//'
))
echo models=$models
./snowboy -r resources.res -ms $models "$@" > /tmp/stt.fifo &
python3 -c '
import pyautogui
import time
keys = set()
def toggle(key):
global keys
if key in keys:
release(key)
else:
hold(key)
def hold(key):
global keys
for keyin in [todrop for todrop in keys]:
if keyin != key:
release(keyin)
keys = set()
keys.add(key)
print()
print("pressing", key)
print()
pyautogui.keyDown(key)
def release(key):
print()
print("releasing", key)
print()
pyautogui.keyUp(key)
def main():
with open("/tmp/stt.fifo", "r") as q:
for line in q:
handle(line.strip())
import yaml
mapping = yaml.safe_load(open("./pyautogui.yaml", "r"))
print(mapping)
def handle(cmd):
global mapping
hold(mapping.get(cmd))
main()
'
}
if [ "$0" == "$BASH_SOURCE" ]; then
main "$@"
fi

View File

@@ -1,5 +0,0 @@
up: w
down: s
left: a
right: d
jump: w

Binary file not shown.

374
src/main.rs Normal file
View File

@@ -0,0 +1,374 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
use std::time::{Duration, Instant};
use chrono;
use clap::Parser;
use std::thread;
use std::fs::File;
use std::io::Write;
#[derive(Parser, Debug)]
struct Flags {
#[arg(long, default_value = "./models/ggml-tiny.en.bin")]
model: String,
#[arg(long, default_value = "8")]
threads: i32,
#[arg(long, default_value = "5")]
stream_step: u64,
#[arg(long, default_value = "0.6")]
stream_retain: f32,
#[arg(long, default_value = "0.3")]
stream_head: f32,
#[arg(long, default_value = "0.3")]
stream_tail: f32,
wav: Option<String>,
#[arg(long, default_value = "false")]
debug: bool,
}
fn main() {
let flags = Flags::parse();
let w = new_whisper_service(
flags.model,
flags.threads,
flags.stream_head,
flags.stream_tail,
|result: Result<Whispered, String>| {
match result {
Ok(whispered) => {
eprintln!("{}: {:?}", chrono::Local::now(), whispered);
println!("{}", whispered.to_string());
},
Err(msg) => { eprintln!("Error whispering: {}", msg); },
};
},
).unwrap();
let stream_retain = (flags.stream_retain * 16_000.0) as usize;
let stream_step = Duration::new(flags.stream_step, 0);
match flags.wav {
Some(wav) => {
let (header, data) = wav::read(
&mut std::fs::File::open(wav).expect("failed to open $WAV"),
).expect("failed to decode $WAV");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16_000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
w.transcribe(&audio_data);
},
None => {
match &flags.debug {
true => { File::create("/tmp/page.rawf32audio").unwrap(); },
false => {},
};
let mut buffer = vec![];
let mut last = Instant::now();
new_listener().listen(move |data: Vec<f32>| {
data.iter().for_each(|x| buffer.push(*x));
if Instant::now() - last > stream_step {
w.transcribe_async(&buffer).unwrap();
match &flags.debug {
true => {
let mut f = File::options().append(true).open("/tmp/page.rawf32audio").unwrap();
let mut wav_data = vec![];
for i in buffer.iter() {
for j in i.to_le_bytes() {
wav_data.push(j);
}
}
f.write_all(wav_data.as_slice()).unwrap();
},
false => {},
};
for i in 0..stream_retain {
buffer[i] = buffer[buffer.len() - stream_retain + i];
}
buffer.truncate(stream_retain);
last = Instant::now();
}
});
},
};
}
struct WhisperService {
jobs: std::sync::mpsc::SyncSender<AWhisper>,
}
fn new_whisper_service<F>(model_path: String, threads: i32, stream_head: f32, stream_tail: f32, handler_fn: F) -> Result<WhisperService, String> where F: FnMut(Result<Whispered, String>) + Send + 'static {
match new_whisper_engine(model_path, threads) {
Ok(engine) => {
let mut whisper = new_whisper_impl(engine, stream_head, stream_tail, handler_fn);
let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { whisper.transcribe_asyncs(recv); });
Ok(WhisperService{jobs: send})
},
Err(msg) => Err(format!("failed to initialize engine: {}", msg)),
}
}
impl WhisperService {
fn transcribe(&self, data: &Vec<f32>) {
let (send, recv) = std::sync::mpsc::sync_channel(0);
self._transcribe_async(data, Some(send)).unwrap();
recv.recv().unwrap();
}
fn transcribe_async(&self, data: &Vec<f32>) -> Result<(), String> {
self._transcribe_async(data, None)
}
fn _transcribe_async(&self, data: &Vec<f32>, ack: Option<std::sync::mpsc::SyncSender<bool>>) -> Result<(), String> {
match self.jobs.try_send(AWhisper{
data: data.clone().to_vec(),
ack: ack,
}) {
Ok(_) => Ok(()),
Err(msg) => Err(format!("failed to enqueue transcription: {}", msg)),
}
}
}
struct WhisperImpl {
engine: WhisperEngine,
stream_head: f32,
stream_tail: f32,
handler_fn: Option<Box<dyn FnMut(Result<Whispered, String>) + Send + 'static>>
}
fn new_whisper_impl<F>(engine: WhisperEngine, stream_head: f32, stream_tail: f32, handler_fn: F) -> WhisperImpl where F: FnMut(Result<Whispered, String>) + Send + 'static {
WhisperImpl {
engine: engine,
stream_head: stream_head,
stream_tail: stream_tail,
handler_fn: Some(Box::new(handler_fn)),
}
}
impl WhisperImpl {
fn transcribe_asyncs(&mut self, recv: std::sync::mpsc::Receiver<AWhisper>) {
loop {
match recv.recv() {
Ok(job) => {
let result = self.transcribe(&job).is_ok();
match job.ack {
Some(ack) => {
ack.send(result).unwrap();
},
None => (),
};
}
Err(_) => return,
};
}
}
fn transcribe(&mut self, a_whisper: &AWhisper) -> Result<(), ()> {
match self.engine.transcribe(&a_whisper.data) {
Ok(result) => {
self.on_success(&result);
Ok(())
},
Err(msg) => {
self.on_error(msg.to_string());
Err(())
},
}
}
fn on_success(&mut self, whispered: &Whispered) {
let result = whispered
.after(&(self.stream_head * 100.0))
.before(&(self.stream_tail * 100.0));
(self.handler_fn.as_mut().unwrap())(Ok(result));
}
fn on_error(&mut self, msg: String) {
(self.handler_fn.as_mut().unwrap())(Err(format!("failed to transcribe: {}", &msg)));
}
}
struct WhisperEngine {
ctx: WhisperContext,
threads: i32,
}
fn new_whisper_engine(model_path: String, threads: i32) -> Result<WhisperEngine, String> {
match WhisperContext::new(&model_path) {
Ok(ctx) => Ok(WhisperEngine{ctx: ctx, threads: threads}),
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
}
}
impl WhisperEngine {
fn transcribe(&self, data: &Vec<f32>) -> Result<Whispered, WhisperError> {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_no_context(true);
params.set_n_threads(self.threads);
params.set_translate(false);
params.set_detect_language(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let mut state = self.ctx.create_state()?;
state.full(params, &data[..])?;
let mut result = new_whispered();
let num_segments = state.full_n_segments()?;
for i in 0..num_segments {
let data = state.full_get_segment_text(i)?;
let start = state.full_get_segment_t0(i)?;
let stop = state.full_get_segment_t1(i)?;
result.push(data, start, stop);
}
Ok(result)
}
}
struct AWhisper {
data: Vec<f32>,
ack: Option<std::sync::mpsc::SyncSender<bool>>,
}
#[derive(Clone, Debug)]
struct Whispered {
data: Vec<AWhispered>,
}
#[derive(Clone, Debug)]
struct AWhispered {
data: String,
offset: i64,
length: i64,
}
fn new_whispered() -> Whispered {
Whispered{data: vec![]}
}
fn new_a_whispered(data: String, start: i64, stop: i64) -> AWhispered {
AWhispered{
data: data,
offset: start.clone(),
length: stop - start,
}
}
impl Whispered {
fn to_string(&self) -> String {
let mut result = "".to_string();
for i in 0..self.data.len() {
result = format!("{} {}", result, &self.data[i].data);
}
result
}
fn after(&self, t: &f32) -> Whispered {
let mut result = new_whispered();
self.data
.iter()
.filter(|x| x.offset as f32 >= *t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn before(&self, t: &f32) -> Whispered {
let mut result = new_whispered();
let end = match self.data.iter().map(|x| x.offset + x.length).max() {
Some(x) => x,
None => 1,
};
let t = (end as f32) - *t;
self.data
.iter()
.filter(|x| ((x.offset) as f32) <= t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn push(&mut self, data: String, start: i64, stop: i64) {
let words: Vec<_> = data.split_whitespace().collect();
let per_word = (stop - start) / (words.len() as i64);
for i in 0..words.len() {
let start = (i as i64) * per_word;
let stop = start.clone() + per_word;
self.data.push(new_a_whispered(words[i].to_string(), start, stop));
}
}
}
struct Listener {
}
fn new_listener() -> Listener {
Listener{}
}
impl Listener {
fn listen(self, mut cb: impl FnMut(Vec<f32>)) {
let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { self._listen(send); });
loop {
match recv.recv() {
Ok(msg) => cb(msg),
Err(_) => return,
};
}
}
fn _listen(self, send: std::sync::mpsc::SyncSender<Vec<f32>>) {
let host = cpal::default_host();
let device = host.default_input_device().unwrap();
let cfg = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.nth(0)
.unwrap()
.with_max_sample_rate();
let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
let stream = device.build_input_stream(
&cfg.clone().into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let mut downsampled_data = vec![];
for i in 0..(data.len() as f32 / downsample_ratio) as usize {
let mut upsampled = i as f32 * downsample_ratio;
if upsampled > (data.len()-1) as f32 {
upsampled = (data.len()-1) as f32
}
downsampled_data.push(data[upsampled as usize]);
}
match send.try_send(downsampled_data) {
Ok(_) => (),
Err(msg) => eprintln!("failed to ingest audio: {}", msg),
};
},
move |err| {
eprintln!("input error: {}", err)
},
None,
).unwrap();
stream.play().unwrap();
eprintln!("listening on {}", device.name().unwrap());
let mut signals = Signals::new(&[SIGINT]).unwrap();
for sig in signals.forever() {
eprintln!("sig {}", sig);
break;
}
stream.pause().unwrap();
}
}

2
transcript.sh Normal file
View File

@@ -0,0 +1,2 @@
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"