48 Commits

Author SHA1 Message Date
Bel LaPointe
1dd631872c from env to flags 2023-11-28 22:31:16 -07:00
Bel LaPointe
72a1420638 wheee 2023-11-28 22:24:10 -07:00
Bel LaPointe
1009c4230e env variable ify 2023-11-28 22:13:05 -07:00
Bel LaPointe
30e5515da1 GOTTEM 2023-11-28 22:03:08 -07:00
Bel LaPointe
b4c9ecb98b successfully confirmed audio is k with sox -r 16000 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:30:24 -07:00
Bel LaPointe
4ef419e6c0 successfully confirmed audio is k with sox -r 44100 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:22:26 -07:00
Bel LaPointe
54964ec59b grrrrr output 2023-11-28 21:03:00 -07:00
Bel LaPointe
62e764436a no warnings but still nothing sane coming out... 2023-11-28 20:36:26 -07:00
Bel LaPointe
d631def834 CLOSER like easily 80 20 range right 2023-11-28 20:32:09 -07:00
Bel LaPointe
3168968cae ok stream les go 2023-11-28 19:23:21 -07:00
Bel LaPointe
437d7cac39 successful refactor 2023-11-28 19:18:05 -07:00
Bel LaPointe
3093a91d84 wip 2023-11-28 19:10:07 -07:00
Bel LaPointe
f58e3a0331 better default err msgs 2023-11-26 17:37:26 -07:00
Bel LaPointe
6dffa401b7 cleaner 2023-11-26 17:21:40 -07:00
Bel LaPointe
f4d9730b5a hm i lost it but i get it back 2023-11-26 17:13:29 -07:00
Bel LaPointe
0c5c1f647c submodule for gitea-whisper-rs 2023-11-26 17:04:16 -07:00
Bel LaPointe
77ad40b61a closer 2023-11-26 17:00:42 -07:00
Bel LaPointe
09894c4fd0 confirmed just needs whisper-rs-sys upgrade for whisper.cpp up 2023-11-26 16:39:42 -07:00
Bel LaPointe
3e2e1e2ff8 wip 2023-11-26 16:23:42 -07:00
Bel LaPointe
50058037eb Revert "try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck"
This reverts commit a483aaf25c.
2023-11-08 11:35:31 -07:00
Bel LaPointe
a483aaf25c try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck 2023-11-08 11:35:29 -07:00
Bel LaPointe
be7d85f85e confirmed whisper.cpp works with distill iff no gpu 2023-11-08 11:29:50 -07:00
Bel LaPointe
60d38c4d5c update distil.sh 2023-11-08 10:58:58 -07:00
Bel LaPointe
e3a7628acf try distil-whisper 2023-11-08 10:22:30 -07:00
Bel LaPointe
91c7791860 up whisper-rs to 0.8.0 2023-11-08 09:25:00 -07:00
bel
247edd2ced more trans 2023-07-15 19:05:00 -06:00
bel
edd94aef72 catch 2023-07-05 22:36:07 -06:00
bel
b4d3e5a27c HOTWORDS yaml @ can have comma delimited and KEYS 2023-04-19 18:24:07 -06:00
bel
a1436e3bd2 revise 2023-04-12 19:37:43 -06:00
bel
410769b8c6 tr 2023-04-12 19:26:03 -06:00
bel
5869016de6 tr 2023-04-12 19:16:07 -06:00
bel
0955f6c0c0 oof 2023-04-12 19:15:32 -06:00
bel
242f4407df script 2023-04-12 18:50:49 -06:00
bel
814a8ae2f3 typo 2023-04-08 22:23:20 -06:00
bel
7c369e72d4 delimiters 2023-04-08 22:22:22 -06:00
bel
0aff4f556b one more 2023-04-08 20:05:03 -06:00
bel
88bf54d022 url replaces hotword,context too 2023-04-02 10:48:41 -06:00
bel
63fe8e7e9e whoops 2023-04-01 11:13:57 -06:00
bel
e26e700a21 reader does finally put and some exception shuffle 2023-04-01 11:12:44 -06:00
bel
fa23e396f1 gr 2023-04-01 10:46:16 -06:00
bel
11789a5c98 with boo 2023-04-01 10:45:00 -06:00
bel
97006d95c8 stack pls 2023-04-01 10:41:22 -06:00
bel
bb578e98f6 retry slower on bad mic things but ultimately segfault still 2023-04-01 10:35:04 -06:00
bel
465193b60d DEBUG stderr 2023-03-31 22:36:30 -06:00
bel
0e6c12a94f oops 2023-03-31 21:35:23 -06:00
bel
599f9079d3 req.txt 2023-03-31 20:52:38 -06:00
bel
0805bb6fd8 prune to only one whisper 2023-03-31 16:42:36 -06:00
bel
b109b0144d move to /rust-whisper.d with a download_models.sh 2023-03-31 16:41:46 -06:00
24 changed files with 1596 additions and 813 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,7 @@
**/*.sw* **/*.sw*
/whisper-cpp-2023/rust.d/target /whisper-cpp-2023/rust.d/target
/rust-whisper.d/target
/rust-whisper.d/models
snowboy-2022/snowboy snowboy-2022/snowboy
**/*.git.d **/*.git.d
**/*.wav **/*.wav

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "rust-whisper.d/gitea-whisper-rs"]
path = rust-whisper.d/gitea-whisper-rs
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git

1345
rust-whisper.d/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -6,6 +6,11 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
whisper-rs = "0.5" whisper-rs = { path = "./gitea-whisper-rs", version = "0.8.0" }
wav = "1" wav = "1"
tokio = "1.27" tokio = "1.27"
cpal = "0.15.2"
signal-hook = "0.3.17"
byteorder = "1.5.0"
chrono = "0.4.31"
clap = { version = "4.4.10", features = ["derive"] }

View File

@@ -47,37 +47,60 @@ class Reader(threading.Thread):
self.name = os.environ.get("MIC_NAME", "pulse_monitor") self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name: if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()): for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name)) log("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit() exit()
self.inq = inq self.inq = inq
self.outq = outq self.outq = outq
def run(self): def mic_idx(self):
log("Reader.run: start") mics = []
try: while not mics and not self.should_stop():
idx = [ log(f'searching for one of {self.name.split(",")} in {sr.Microphone.list_microphone_names()}...')
time.sleep(1)
mics = [
idx for idx,v in enumerate( idx for idx,v in enumerate(
sr.Microphone.list_microphone_names(), sr.Microphone.list_microphone_names(),
) if v in self.name.split(",") ) if v in self.name.split(",")
][0] ]
with sr.Microphone(device_index=idx) as mic: log("mic#", mics[0])
while not self.should_stop(): return mics[0]
try:
self.outq.put(self._run(mic)) def run(self):
except Exception as e: try:
if not "timed out" in str(e): log("Reader.run: start")
log("Reader.run: error:", e) self._run()
except Exception as e: except Exception as e:
log("Reader.run panic:", e) log("Reader:run: exit:", e)
log("microphones:", sr.Microphone.list_microphone_names())
finally: finally:
self.outq.put(None) self.outq.put(None)
log("Reader.run: stop") log("Reader.run: stop")
def _run(self):
while not self.should_stop():
time.sleep(3)
mic = sr.Microphone(device_index=self.mic_idx())
try:
mic.__enter__()
while not self.should_stop():
try:
self.outq.put(self.read(mic))
except Exception as e:
if not "timed out" in str(e):
raise e
except Exception as e:
import traceback
traceback.print_exception(e)
log("Reader.run: error:", e)
finally:
try:
mic.__exit__(None, None, None)
except Exception as e:
log("Reader.run.catch: error:", e)
def should_stop(self): def should_stop(self):
return not self.inq.empty() return not self.inq.empty()
def _run(self, mic): def read(self, mic):
mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5)) mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5))
r = sr.Recognizer() r = sr.Recognizer()
return r.listen( return r.listen(
@@ -113,13 +136,19 @@ class Parser(threading.Thread):
p = "/tmp/whisper-cpp.wav" p = "/tmp/whisper-cpp.wav"
with open("/tmp/whisper-cpp.wav", "wb") as f: with open("/tmp/whisper-cpp.wav", "wb") as f:
f.write(wav) f.write(wav)
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P=2 rust-whisper", capture_output=True, shell=True) proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P={os.environ.get('P', '2')} rust-whisper", capture_output=True, shell=True)
result = proc.stdout.decode().strip() result = proc.stdout.decode().strip()
if os.environ.get("DEBUG", None): if os.environ.get("DEBUG", None):
log("stderr:", proc.stderr.decode().strip())
log("raw transcript:", result) log("raw transcript:", result)
result = result.replace(">>", "") result = result.replace(">>", "")
result = "".join([i.split("]")[-1] for i in result.split("[")[0]]) for pair in [
result = "".join([i.split(")")[-1] for i in result.split("(")[0]]) ("[", "]"),
("(", ")"),
("<", ">"),
("*", "*"),
]:
result = "".join([i.split(pair[1])[-1] for i in result.split(pair[0])[0]])
if os.environ.get("DEBUG", None): if os.environ.get("DEBUG", None):
log("annotation-free transcript:", result) log("annotation-free transcript:", result)
return result return result
@@ -156,6 +185,8 @@ def _load_dot_notation(v, items):
else: else:
result.append(subresult) result.append(subresult)
return result return result
elif k == "KEYS":
v = [k for k in v]
else: else:
if isinstance(v, list): if isinstance(v, list):
v = v[int(k)] v = v[int(k)]
@@ -200,8 +231,15 @@ class Reactor(threading.Thread):
def load_hotwords_in_yaml_file(): def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f: with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f) v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1]) if os.environ.get("DEBUG", None):
return ["".join(i.strip().lower().split()) for i in v if i] log(f'opened {p.split("@")[0]} and got {v}')
result = []
for to_find in [i for i in p.split("@")[-1].split(",") if i]:
if os.environ.get("DEBUG", None):
log(f'finding {to_find} in {v}')
v2 = load_dot_notation(v, to_find)
result.extend(["".join(i.strip().lower().split()) for i in v2 if i])
return result
load_hotwords_in_yaml_file() load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file return load_hotwords_in_yaml_file
else: else:
@@ -230,13 +268,23 @@ class Reactor(threading.Thread):
log("Reactor.run: stop") log("Reactor.run: stop")
def handle(self, text): def handle(self, text):
try:
self._handle(text)
except Exception:
pass
def _handle(self, text):
hotwords = self.load_hotwords() hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None): if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}") log(f"seeking {hotwords} in {text}. $HOTWORDS={os.environ.get('HOTWORDS', None)}")
if not hotwords: if not hotwords:
if not os.environ.get("HOTWORDS", None): if not os.environ.get("HOTWORDS", None):
print(text) if os.environ.get("DEBUG", None):
log(f"HOTWORDS is False; {text}")
print(text, flush=True)
else: else:
if os.environ.get("DEBUG", None):
log(f"HOTWORDS is True; {text}")
log(text) log(text)
return return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()]) cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
@@ -256,7 +304,7 @@ class Actor(threading.Thread):
self.pid = int(environ["SIGUSR2"]) self.pid = int(environ["SIGUSR2"])
self.handle = self.handle_signal self.handle = self.handle_signal
elif os.environ.get("URL", ""): elif os.environ.get("URL", ""):
self.url = environ["URL"] self.url = os.environ["URL"]
self.handle = self.handle_url self.handle = self.handle_url
self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i] self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i]
self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}') self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}')
@@ -276,7 +324,7 @@ class Actor(threading.Thread):
def handle_stdout(self, hotword, context): def handle_stdout(self, hotword, context):
log(context) log(context)
print(hotword) print(hotword, flush=True)
def handle_signal(self, hotword, context): def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context) self.handle_stderr(hotword, context)
@@ -295,9 +343,12 @@ class Actor(threading.Thread):
body = self.body body = self.body
body = body.replace("{{hotword}}", hotword) body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context) body = body.replace("{{context}}", context)
url = self.url
url = url.replace("{{hotword}}", hotword)
url = url.replace("{{context}}", context)
if os.environ.get("DEBUG", "") : if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body) log("POST", url, headers, body)
requests.post(self.url, headers=headers, data=body) requests.post(url, headers=headers, data=body)
except Exception as e: except Exception as e:
log("Actor.handle_url:", e) log("Actor.handle_url:", e)

View File

@@ -0,0 +1,9 @@
#!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
# Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en"; do
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
done

View File

@@ -2,3 +2,4 @@ git+https://github.com/openai/whisper.git
soundfile soundfile
PyAudio PyAudio
SpeechRecognition SpeechRecognition
PyYAML

148
rust-whisper.d/src/main.rs Normal file
View File

@@ -0,0 +1,148 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
use std::time::{Duration, Instant};
use chrono;
use clap::Parser;
#[derive(Parser, Debug)]
struct Flags {
#[arg(long, default_value = "../models/ggml-tiny.en.bin")]
model: String,
#[arg(long, default_value = "8")]
threads: i32,
#[arg(long, default_value = "0.8")]
stream_churn: f32,
#[arg(long, default_value = "5")]
stream_step: u64,
wav: Option<String>,
}
fn main() {
let flags = Flags::parse();
let w = new_whisper(flags.model, flags.threads).unwrap();
let stream_churn = flags.stream_churn;
let stream_step = Duration::new(flags.stream_step, 0);
match flags.wav {
Some(wav) => {
let (header, data) = wav::read(
&mut std::fs::File::open(wav).expect("failed to open $WAV"),
).expect("failed to decode $WAV");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
let result = w.transcribe(&audio_data).unwrap();
println!("{}", result);
},
None => {
let host = cpal::default_host();
let device = host.default_input_device().unwrap();
let cfg = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.nth(0)
.unwrap()
.with_max_sample_rate();
let channels = cfg.channels();
let downsample_ratio = cfg.sample_rate().0 as f32 / 16000.0;
let mut buffer = vec![];
let mut last = Instant::now();
let stream = device.build_input_stream(
&cfg.clone().into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let mono_data: Vec<f32> = data.iter().map(|x| *x).step_by(channels.into()).collect();
let mut downsampled_data = vec![];
for i in 0..(mono_data.len() as f32 / downsample_ratio) as usize {
let mut upsampled = i as f32 * downsample_ratio;
if upsampled > (mono_data.len()-1) as f32 {
upsampled = (mono_data.len()-1) as f32
}
downsampled_data.push(mono_data[upsampled as usize]);
}
downsampled_data.iter().for_each(|x| buffer.push(*x));
if Instant::now() - last > stream_step {
let result = w.transcribe(&buffer).unwrap();
eprintln!("{}", chrono::Local::now());
println!("{}", result);
let retain = buffer.len() - (buffer.len() as f32 * stream_churn) as usize;
for i in retain..buffer.len() {
buffer[i - retain] = buffer[i]
}
buffer.truncate(retain);
last = Instant::now();
}
},
move |err| {
eprintln!("input error: {}", err)
},
None,
).unwrap();
stream.play().unwrap();
eprintln!("listening on {}", device.name().unwrap());
let mut signals = Signals::new(&[SIGINT]).unwrap();
for sig in signals.forever() {
eprintln!("sig {}", sig);
break;
}
stream.pause().unwrap();
},
};
}
struct Whisper {
ctx: WhisperContext,
threads: i32,
}
fn new_whisper(model_path: String, threads: i32) -> Result<Whisper, String> {
match WhisperContext::new(&model_path) {
Ok(ctx) => Ok(Whisper{
ctx: ctx,
threads: threads,
}),
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
}
}
impl Whisper {
fn transcribe(&self, data: &Vec<f32>) -> Result<String, String> {
match self._transcribe(&data) {
Ok(result) => Ok(result),
Err(msg) => Err(format!("failed to transcribe: {}", msg)),
}
}
fn _transcribe(&self, data: &Vec<f32>) -> Result<String, WhisperError> {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_no_context(true);
params.set_n_threads(self.threads);
params.set_translate(false);
params.set_detect_language(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let mut state = self.ctx.create_state()?;
state.full(params, &data[..])?;
let num_segments = state.full_n_segments()?;
let mut result = "".to_string();
for i in 0..num_segments {
let segment = state.full_get_segment_text(i)?;
result = format!("{} {}", result, segment);
}
Ok(result)
}
}

View File

@@ -0,0 +1,2 @@
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"

View File

@@ -1,304 +0,0 @@
import speech_recognition as sr
import time
import threading
import queue
import signal
import sys
import os
import requests
import yaml
def log(*args):
print(">", *args, file=sys.stderr)
class Piper(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
def run(self):
while True:
got = self.inq.get()
if got is None:
break
self._run(got)
self.outq.put(None)
class Manager(threading.Thread):
def __init__(self, outq):
threading.Thread.__init__(self)
self.outq = outq
inq = queue.Queue()
def catcher(sig, frame):
inq.put(None)
self.inq = inq
signal.signal(signal.SIGINT, catcher)
def run(self):
log("Manager.run: start")
self.inq.get()
self.outq.put(None)
log("Manager.run: stop")
class Reader(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit()
self.inq = inq
self.outq = outq
def run(self):
log("Reader.run: start")
try:
idx = [
idx for idx,v in enumerate(
sr.Microphone.list_microphone_names(),
) if v in self.name.split(",")
][0]
with sr.Microphone(device_index=idx) as mic:
while not self.should_stop():
try:
self.outq.put(self._run(mic))
except Exception as e:
if not "timed out" in str(e):
log("Reader.run: error:", e)
except Exception as e:
log("Reader.run panic:", e)
log("microphones:", sr.Microphone.list_microphone_names())
finally:
self.outq.put(None)
log("Reader.run: stop")
def should_stop(self):
return not self.inq.empty()
def _run(self, mic):
mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5))
r = sr.Recognizer()
return r.listen(
mic,
timeout=mic_timeout,
phrase_time_limit=mic_timeout,
)
class Parser(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
def run(self):
log("Parser.run: start")
while True:
try:
clip = self.inq.get()
backlog = self.inq.qsize()
if backlog:
log("Parser.run backlog", backlog)
if clip is None:
break
self.outq.put(self._run(clip).strip())
except Exception as e:
log("Parser.run: error:", e)
self.outq.put(None)
log("Parser.run: stop")
def _run(self, clip):
r = sr.Recognizer()
return r.recognize_whisper(clip, language="english", model=os.environ.get("MODEL", "small.en")) # tiny.en=32x, base.en=16x, small.en=6x, medium.en=x2
def load_dot_notation(v, s):
items = s.replace("[]", ".[]").split(".")
return _load_dot_notation(v, items)
def _load_dot_notation(v, items):
for i in range(len(items)):
k = items[i]
if not k:
continue
if k == "[]":
if isinstance(v, list):
result = []
for j in v:
subresult = _load_dot_notation(j, items[i+1:])
if isinstance(subresult, list):
result.extend(subresult)
else:
result.append(subresult)
return result
else:
result = []
for j in v.values():
subresult = _load_dot_notation(j, items[i+1:])
if isinstance(subresult, list):
result.extend(subresult)
else:
result.append(subresult)
return result
else:
if isinstance(v, list):
v = v[int(k)]
else:
v = v[k]
return v
def test_load_dot_notation():
for i in [
"a" == load_dot_notation("a", "."),
["a"] == load_dot_notation(["a"], "."),
"b" == load_dot_notation({"a":"b"}, ".a"),
"c" == load_dot_notation({"a":{"b":"c"}}, ".a.b"),
"c" == load_dot_notation({"a":{"b":["c"]}}, ".a.b.0"),
["c","d"] == load_dot_notation({"a":{"b":"c"}, "a2":{"b":"d"}}, ".[].b"),
["c","d"] == load_dot_notation({"a":{"b":["c"], "b2":["d"]}}, ".a.[].0"),
["c","d"] == load_dot_notation({"a":{"b":["c"], "b2":["d"]}}, ".a[].0"),
["c","d"] == load_dot_notation(["c", "d"], "."),
["c","d"] == load_dot_notation(["c", "d"], "[]"),
]:
if not i:
raise Exception(i)
test_load_dot_notation()
class Reactor(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
self.load_hotwords = Reactor.new_load_hotwords()
log(f"hotwords: {self.load_hotwords()}")
def new_load_hotwords():
p = os.environ.get("HOTWORDS", None)
if not p:
def load_nothing():
return []
return load_nothing
try:
if "@" in p:
def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1])
return ["".join(i.strip().lower().split()) for i in v if i]
load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file
else:
def load_hotwords_in_file():
with open(p, "r") as f:
return ["".join(i.strip().lower().split()) for i in f.readlines()]
load_hotwords_in_file()
return load_hotwords_in_file
except Exception as e:
log(f"$HOTWORDS {p} is not a file: {e}")
hotwords = ["".join(i.lower().strip().split()) for i in p.split("\/\/")]
log(f'$HOTWORDS: {hotwords}')
def load_hotwords_as_literal():
return hotwords
return load_hotwords_as_literal
def run(self):
log("Reactor.run: start")
while True:
text = self.inq.get()
if text is None:
break
self.handle(text)
self.outq.put(None)
log("Reactor.run: stop")
def handle(self, text):
hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}")
if not hotwords:
if not os.environ.get("HOTWORDS", None):
print(text)
else:
log(text)
return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
for i in hotwords:
if i in cleantext:
#log(f"Reactor.handle: found hotword '{i}' in '{text}' as '{cleantext}'")
self.outq.put((i, text))
class Actor(threading.Thread):
def __init__(self, inq):
threading.Thread.__init__(self)
self.inq = inq
self.handle = self.handle_stderr
if os.environ.get("STDOUT", "") == "true":
self.handle = self.handle_stdout
elif os.environ.get("SIGUSR2", ""):
self.pid = int(environ["SIGUSR2"])
self.handle = self.handle_signal
elif os.environ.get("URL", ""):
self.url = environ["URL"]
self.handle = self.handle_url
self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i]
self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}')
log(self.headers)
def run(self):
log("Actor.run: start")
while True:
got = self.inq.get()
if got is None:
break
self.handle(got[0], got[1])
log("Actor.run: stop")
def handle_stderr(self, hotword, context):
log(f"'{hotword}' in '{context}'")
def handle_stdout(self, hotword, context):
log(context)
print(hotword)
def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context)
os.kill(self.pid, signal.SIGUSR2)
def handle_url(self, hotword, context):
self.handle_stderr(hotword, context)
try:
headers = {}
for i in self.headers:
key = i[0]
value = i[1]
value = value.replace("{{hotword}}", hotword)
value = value.replace("{{context}}", context)
headers[key] = value
body = self.body
body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context)
if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body)
requests.post(self.url, headers=headers, data=body)
except Exception as e:
log("Actor.handle_url:", e)
def main():
managerToParserQ = queue.Queue(maxsize=1)
readerToParserQ = queue.Queue(maxsize=10)
parserToReactorQ = queue.Queue(maxsize=10)
reactorToActorQ = queue.Queue(maxsize=10)
threads = [
Manager(managerToParserQ),
Reader(managerToParserQ, readerToParserQ),
Parser(readerToParserQ, parserToReactorQ),
Reactor(parserToReactorQ, reactorToActorQ),
Actor(reactorToActorQ),
]
[t.start() for t in threads]
[t.join() for t in threads]
if __name__ == "__main__":
main()

View File

@@ -1,7 +0,0 @@
#! /bin/bash
sudo apt install portaudio19-dev python3-pyaudio
python3 -m pip install git+https://github.com/openai/whisper.git soundfile PyAudio SpeechRecognition
#sudo apt-get install python3 python3-all-dev python3-pip build-essential swig git libpulse-dev libasound2-dev
#python3 -m pip install pocketsphinx

View File

@@ -1,20 +0,0 @@
FROM debian:buster as builder
RUN apt -y update && apt -y install build-essential wget ffmpeg
WORKDIR /tmp/whisper-cpp.git.d
RUN wget https://github.com/ggerganov/whisper.cpp/archive/refs/tags/v1.2.1.tar.gz \
&& tar -xf ./*.tar.gz \
&& mv ./whisper*/ ./git.d
WORKDIR /tmp/whisper-cpp.git.d/git.d
RUN make && make samples
FROM debian:buster
RUN apt -y update && apt -y install curl
COPY --from=builder /tmp/whisper-cpp.git.d/git.d/ /whisper-cpp.git.d/
WORKDIR /whisper.d
RUN bash /whisper-cpp.git.d/models/download-ggml-model.sh tiny.en
ENTRYPOINT []
CMD /whisper-cpp.git.d/main -m /whisper-cpp.git.d/models/ggml-tiny.en.bin -f /whisper-cpp.git.d/samples/gb1.wav -t 4

View File

@@ -1 +0,0 @@
git.d/main

View File

@@ -1,14 +0,0 @@
# git.d/samples/mm0.wav (30s)
| model | threads | rust | c |
| ----- | ------- | ------------- | ----------- |
| tiny | 1 | 4.4s@122% | 4.9s@125% |
| tiny | 2 | 2.7s@210% | 3.3s@190% |
| tiny | 4 | 2.0s@- | 2.9s@400% |
| tiny | 8 | 2.0s@- | 3.1s@700% |
| small | 1 | 23.9s@175% | 28.5s@205% |
| small | 2 | 14.9s@347% | 19.2s@330% |
| small | 4 | 12.3s@515% | 22.1s@530% |
| base | 1 | 8.7s@150% | 10.2s@155% |
| base | 2 | 5.1s@240% | 7.1s@270% |
| base | 4 | 3.8s@370% | 6.0s@430% |

View File

@@ -1,7 +0,0 @@
export P=${1:-1}
export MODEL=${2:-models/ggml-tiny.en.bin}
export WAV=${3:-git.d/samples/jfk.wav}
echo === RUST
time rust-whisper 2>&1 | grep -v ^whisper_ | grep ..
echo === C
time ./c-whisper -m $MODEL -f $WAV -t $P 2>&1 | grep -v ^whisper_ | grep -v ^system_info | grep -v ^main: | grep ..

Submodule whisper-cpp-2023/git.d deleted from 0a2d1210bc

View File

@@ -1 +0,0 @@
git.d/libwhisper.a

View File

@@ -1 +0,0 @@
git.d/models

View File

@@ -1,24 +0,0 @@
#! /bin/bash
if [ ! -d ./git.d/.git ]; then
git clone https://github.com/ggerganov/whisper.cpp.git git.d
fi
cd ./git.d
if [ ! -f ./samples/gb1.wav ]; then
make samples
fi
if [ ! -f ./main ]; then
make
fi
if [ ! -f ./stream ]; then
make stream
fi
if [ ! -f ./models/ggml-${MODEL:-tiny.en}.bin ]; then
bash ./models/download-ggml-model.sh ${MODEL:-tiny.en}
fi
if [ -n "$STREAM" ]; then
./stream -m ./models/ggml-${MODEL:-tiny.en}.bin -t 8 --step 500 --length ${MIC_TIMEOUT:-2}000 $(test -n "$MIC_ID" && echo -c "$MIC_ID")
else
time ./main -m ./models/ggml-${MODEL:-tiny.en}.bin -f ./samples/gb1.wav -t 4
fi

View File

@@ -1,366 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bindgen"
version = "0.64.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"log",
"peeking_take_while",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
"which",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clang-sys"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "either"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "libc"
version = "0.2.140"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c"
[[package]]
name = "libloading"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
dependencies = [
"cfg-if",
"winapi",
]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]]
name = "pin-project-lite"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
[[package]]
name = "proc-macro2"
version = "1.0.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d"
dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "riff"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1"
[[package]]
name = "rust-whisper"
version = "0.1.0"
dependencies = [
"tokio",
"wav",
"whisper-rs",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "shlex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.27.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001"
dependencies = [
"autocfg",
"pin-project-lite",
"windows-sys",
]
[[package]]
name = "unicode-ident"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
[[package]]
name = "wav"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a65e199c799848b4f997072aa4d673c034f80f40191f97fe2f0a23f410be1609"
dependencies = [
"riff",
]
[[package]]
name = "which"
version = "4.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269"
dependencies = [
"either",
"libc",
"once_cell",
]
[[package]]
name = "whisper-rs"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7e1b9b003aa3285a0e4469219566266aa1d51ced1be38587251a4f713a1677"
dependencies = [
"whisper-rs-sys",
]
[[package]]
name = "whisper-rs-sys"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97a389dc665c7354ba6b1982850d4ba05b862907e535708ebdec92cbd9c599e8"
dependencies = [
"bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.45.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
[[package]]
name = "windows_i686_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
[[package]]
name = "windows_i686_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
[[package]]
name = "windows_x86_64_gnu"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
[[package]]
name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"

View File

@@ -1,37 +0,0 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy};
fn main() {
let mut ctx = WhisperContext::new(
&std::env::var("MODEL").unwrap_or(String::from("../models/ggml-tiny.en.bin"))
).expect("failed to load model");
// create a params object
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_n_threads(
std::env::var("P").unwrap_or(String::from("1")).parse::<i32>().expect("$P must be a number")
);
params.set_translate(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let (header, data) = wav::read(&mut std::fs::File::open(
&std::env::var("WAV").unwrap_or(String::from("../git.d/samples/jfk.wav"))
).expect("failed to open .wav")).expect("failed to decode .wav");
assert!(header.channel_count == 1);
assert!(header.sampling_rate == 16000);
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
ctx.full(params, &audio_data[..])
.expect("failed to run model");
let num_segments = ctx.full_n_segments();
for i in 0..num_segments {
let segment = ctx.full_get_segment_text(i).expect("failed to get segment");
print!("{} ", segment);
}
println!("");
}

View File

@@ -1 +0,0 @@
git.d/whisper.h