11 Commits

Author SHA1 Message Date
bel
63fe8e7e9e whoops 2023-04-01 11:13:57 -06:00
bel
e26e700a21 reader does finally put and some exception shuffle 2023-04-01 11:12:44 -06:00
bel
fa23e396f1 gr 2023-04-01 10:46:16 -06:00
bel
11789a5c98 with boo 2023-04-01 10:45:00 -06:00
bel
97006d95c8 stack pls 2023-04-01 10:41:22 -06:00
bel
bb578e98f6 retry slower on bad mic things but ultimately segfault still 2023-04-01 10:35:04 -06:00
bel
465193b60d DEBUG stderr 2023-03-31 22:36:30 -06:00
bel
0e6c12a94f oops 2023-03-31 21:35:23 -06:00
bel
599f9079d3 req.txt 2023-03-31 20:52:38 -06:00
bel
0805bb6fd8 prune to only one whisper 2023-03-31 16:42:36 -06:00
bel
b109b0144d move to /rust-whisper.d with a download_models.sh 2023-03-31 16:41:46 -06:00
19 changed files with 54 additions and 398 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,7 @@
**/*.sw*
/whisper-cpp-2023/rust.d/target
/rust-whisper.d/target
/rust-whisper.d/models
snowboy-2022/snowboy
**/*.git.d
**/*.wav

View File

@@ -0,0 +1,10 @@
#!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
d="${1:-"$PWD"/models}"
mkdir -p "$d"
# Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en"; do
test -f "$d"/ggml-$model.bin || wget --quiet --show-progress -O "$d"/ggml-$model.bin "$src-$model.bin"
done

View File

@@ -52,32 +52,55 @@ class Reader(threading.Thread):
self.inq = inq
self.outq = outq
def run(self):
log("Reader.run: start")
try:
idx = [
def mic_idx(self):
mics = []
while not mics and not self.should_stop():
log(f'searching for one of {self.name.split(",")} in {sr.Microphone.list_microphone_names()}...')
time.sleep(1)
mics = [
idx for idx,v in enumerate(
sr.Microphone.list_microphone_names(),
) if v in self.name.split(",")
][0]
with sr.Microphone(device_index=idx) as mic:
while not self.should_stop():
try:
self.outq.put(self._run(mic))
except Exception as e:
if not "timed out" in str(e):
log("Reader.run: error:", e)
]
log("mic#", mics[0])
return mics[0]
def run(self):
try:
log("Reader.run: start")
self._run()
except Exception as e:
log("Reader.run panic:", e)
log("microphones:", sr.Microphone.list_microphone_names())
log("Reader:run: exit:", e)
finally:
self.outq.put(None)
log("Reader.run: stop")
log("Reader.run: stop")
def _run(self):
while not self.should_stop():
time.sleep(3)
mic = sr.Microphone(device_index=self.mic_idx())
try:
mic.__enter__()
while not self.should_stop():
try:
self.outq.put(self.read(mic))
except Exception as e:
if not "timed out" in str(e):
raise e
except Exception as e:
import traceback
traceback.print_exception(e)
log("Reader.run: error:", e)
finally:
try:
mic.__exit__(None, None, None)
except Exception as e:
log("Reader.run.catch: error:", e)
def should_stop(self):
return not self.inq.empty()
def _run(self, mic):
def read(self, mic):
mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5))
r = sr.Recognizer()
return r.listen(
@@ -116,6 +139,7 @@ class Parser(threading.Thread):
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P=2 rust-whisper", capture_output=True, shell=True)
result = proc.stdout.decode().strip()
if os.environ.get("DEBUG", None):
log("stderr:", proc.stderr.decode().strip())
log("raw transcript:", result)
result = result.replace(">>", "")
result = "".join([i.split("]")[-1] for i in result.split("[")[0]])
@@ -256,7 +280,7 @@ class Actor(threading.Thread):
self.pid = int(environ["SIGUSR2"])
self.handle = self.handle_signal
elif os.environ.get("URL", ""):
self.url = environ["URL"]
self.url = os.environ["URL"]
self.handle = self.handle_url
self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i]
self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}')

View File

@@ -2,3 +2,4 @@ git+https://github.com/openai/whisper.git
soundfile
PyAudio
SpeechRecognition
PyYAML

View File

@@ -1,304 +0,0 @@
import speech_recognition as sr
import time
import threading
import queue
import signal
import sys
import os
import requests
import yaml
def log(*args):
print(">", *args, file=sys.stderr)
class Piper(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
def run(self):
while True:
got = self.inq.get()
if got is None:
break
self._run(got)
self.outq.put(None)
class Manager(threading.Thread):
def __init__(self, outq):
threading.Thread.__init__(self)
self.outq = outq
inq = queue.Queue()
def catcher(sig, frame):
inq.put(None)
self.inq = inq
signal.signal(signal.SIGINT, catcher)
def run(self):
log("Manager.run: start")
self.inq.get()
self.outq.put(None)
log("Manager.run: stop")
class Reader(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit()
self.inq = inq
self.outq = outq
def run(self):
log("Reader.run: start")
try:
idx = [
idx for idx,v in enumerate(
sr.Microphone.list_microphone_names(),
) if v in self.name.split(",")
][0]
with sr.Microphone(device_index=idx) as mic:
while not self.should_stop():
try:
self.outq.put(self._run(mic))
except Exception as e:
if not "timed out" in str(e):
log("Reader.run: error:", e)
except Exception as e:
log("Reader.run panic:", e)
log("microphones:", sr.Microphone.list_microphone_names())
finally:
self.outq.put(None)
log("Reader.run: stop")
def should_stop(self):
return not self.inq.empty()
def _run(self, mic):
mic_timeout = int(os.environ.get("MIC_TIMEOUT", 5))
r = sr.Recognizer()
return r.listen(
mic,
timeout=mic_timeout,
phrase_time_limit=mic_timeout,
)
class Parser(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
def run(self):
log("Parser.run: start")
while True:
try:
clip = self.inq.get()
backlog = self.inq.qsize()
if backlog:
log("Parser.run backlog", backlog)
if clip is None:
break
self.outq.put(self._run(clip).strip())
except Exception as e:
log("Parser.run: error:", e)
self.outq.put(None)
log("Parser.run: stop")
def _run(self, clip):
r = sr.Recognizer()
return r.recognize_whisper(clip, language="english", model=os.environ.get("MODEL", "small.en")) # tiny.en=32x, base.en=16x, small.en=6x, medium.en=x2
def load_dot_notation(v, s):
items = s.replace("[]", ".[]").split(".")
return _load_dot_notation(v, items)
def _load_dot_notation(v, items):
for i in range(len(items)):
k = items[i]
if not k:
continue
if k == "[]":
if isinstance(v, list):
result = []
for j in v:
subresult = _load_dot_notation(j, items[i+1:])
if isinstance(subresult, list):
result.extend(subresult)
else:
result.append(subresult)
return result
else:
result = []
for j in v.values():
subresult = _load_dot_notation(j, items[i+1:])
if isinstance(subresult, list):
result.extend(subresult)
else:
result.append(subresult)
return result
else:
if isinstance(v, list):
v = v[int(k)]
else:
v = v[k]
return v
def test_load_dot_notation():
for i in [
"a" == load_dot_notation("a", "."),
["a"] == load_dot_notation(["a"], "."),
"b" == load_dot_notation({"a":"b"}, ".a"),
"c" == load_dot_notation({"a":{"b":"c"}}, ".a.b"),
"c" == load_dot_notation({"a":{"b":["c"]}}, ".a.b.0"),
["c","d"] == load_dot_notation({"a":{"b":"c"}, "a2":{"b":"d"}}, ".[].b"),
["c","d"] == load_dot_notation({"a":{"b":["c"], "b2":["d"]}}, ".a.[].0"),
["c","d"] == load_dot_notation({"a":{"b":["c"], "b2":["d"]}}, ".a[].0"),
["c","d"] == load_dot_notation(["c", "d"], "."),
["c","d"] == load_dot_notation(["c", "d"], "[]"),
]:
if not i:
raise Exception(i)
test_load_dot_notation()
class Reactor(threading.Thread):
def __init__(self, inq, outq):
threading.Thread.__init__(self)
self.inq = inq
self.outq = outq
self.load_hotwords = Reactor.new_load_hotwords()
log(f"hotwords: {self.load_hotwords()}")
def new_load_hotwords():
p = os.environ.get("HOTWORDS", None)
if not p:
def load_nothing():
return []
return load_nothing
try:
if "@" in p:
def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1])
return ["".join(i.strip().lower().split()) for i in v if i]
load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file
else:
def load_hotwords_in_file():
with open(p, "r") as f:
return ["".join(i.strip().lower().split()) for i in f.readlines()]
load_hotwords_in_file()
return load_hotwords_in_file
except Exception as e:
log(f"$HOTWORDS {p} is not a file: {e}")
hotwords = ["".join(i.lower().strip().split()) for i in p.split("\/\/")]
log(f'$HOTWORDS: {hotwords}')
def load_hotwords_as_literal():
return hotwords
return load_hotwords_as_literal
def run(self):
log("Reactor.run: start")
while True:
text = self.inq.get()
if text is None:
break
self.handle(text)
self.outq.put(None)
log("Reactor.run: stop")
def handle(self, text):
hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}")
if not hotwords:
if not os.environ.get("HOTWORDS", None):
print(text)
else:
log(text)
return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
for i in hotwords:
if i in cleantext:
#log(f"Reactor.handle: found hotword '{i}' in '{text}' as '{cleantext}'")
self.outq.put((i, text))
class Actor(threading.Thread):
def __init__(self, inq):
threading.Thread.__init__(self)
self.inq = inq
self.handle = self.handle_stderr
if os.environ.get("STDOUT", "") == "true":
self.handle = self.handle_stdout
elif os.environ.get("SIGUSR2", ""):
self.pid = int(environ["SIGUSR2"])
self.handle = self.handle_signal
elif os.environ.get("URL", ""):
self.url = environ["URL"]
self.handle = self.handle_url
self.headers = [i.split("=")[:2] for i in os.environ.get("HEADERS", "").split("//") if i]
self.body = os.environ.get("BODY", '{"hotword":"{{hotword}}","context":"{{context}}"}')
log(self.headers)
def run(self):
log("Actor.run: start")
while True:
got = self.inq.get()
if got is None:
break
self.handle(got[0], got[1])
log("Actor.run: stop")
def handle_stderr(self, hotword, context):
log(f"'{hotword}' in '{context}'")
def handle_stdout(self, hotword, context):
log(context)
print(hotword)
def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context)
os.kill(self.pid, signal.SIGUSR2)
def handle_url(self, hotword, context):
self.handle_stderr(hotword, context)
try:
headers = {}
for i in self.headers:
key = i[0]
value = i[1]
value = value.replace("{{hotword}}", hotword)
value = value.replace("{{context}}", context)
headers[key] = value
body = self.body
body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context)
if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body)
requests.post(self.url, headers=headers, data=body)
except Exception as e:
log("Actor.handle_url:", e)
def main():
managerToParserQ = queue.Queue(maxsize=1)
readerToParserQ = queue.Queue(maxsize=10)
parserToReactorQ = queue.Queue(maxsize=10)
reactorToActorQ = queue.Queue(maxsize=10)
threads = [
Manager(managerToParserQ),
Reader(managerToParserQ, readerToParserQ),
Parser(readerToParserQ, parserToReactorQ),
Reactor(parserToReactorQ, reactorToActorQ),
Actor(reactorToActorQ),
]
[t.start() for t in threads]
[t.join() for t in threads]
if __name__ == "__main__":
main()

View File

@@ -1,7 +0,0 @@
#! /bin/bash
sudo apt install portaudio19-dev python3-pyaudio
python3 -m pip install git+https://github.com/openai/whisper.git soundfile PyAudio SpeechRecognition
#sudo apt-get install python3 python3-all-dev python3-pip build-essential swig git libpulse-dev libasound2-dev
#python3 -m pip install pocketsphinx

View File

@@ -1,20 +0,0 @@
FROM debian:buster as builder
RUN apt -y update && apt -y install build-essential wget ffmpeg
WORKDIR /tmp/whisper-cpp.git.d
RUN wget https://github.com/ggerganov/whisper.cpp/archive/refs/tags/v1.2.1.tar.gz \
&& tar -xf ./*.tar.gz \
&& mv ./whisper*/ ./git.d
WORKDIR /tmp/whisper-cpp.git.d/git.d
RUN make && make samples
FROM debian:buster
RUN apt -y update && apt -y install curl
COPY --from=builder /tmp/whisper-cpp.git.d/git.d/ /whisper-cpp.git.d/
WORKDIR /whisper.d
RUN bash /whisper-cpp.git.d/models/download-ggml-model.sh tiny.en
ENTRYPOINT []
CMD /whisper-cpp.git.d/main -m /whisper-cpp.git.d/models/ggml-tiny.en.bin -f /whisper-cpp.git.d/samples/gb1.wav -t 4

View File

@@ -1 +0,0 @@
git.d/main

View File

@@ -1,14 +0,0 @@
# git.d/samples/mm0.wav (30s)
| model | threads | rust | c |
| ----- | ------- | ------------- | ----------- |
| tiny | 1 | 4.4s@122% | 4.9s@125% |
| tiny | 2 | 2.7s@210% | 3.3s@190% |
| tiny | 4 | 2.0s@- | 2.9s@400% |
| tiny | 8 | 2.0s@- | 3.1s@700% |
| small | 1 | 23.9s@175% | 28.5s@205% |
| small | 2 | 14.9s@347% | 19.2s@330% |
| small | 4 | 12.3s@515% | 22.1s@530% |
| base | 1 | 8.7s@150% | 10.2s@155% |
| base | 2 | 5.1s@240% | 7.1s@270% |
| base | 4 | 3.8s@370% | 6.0s@430% |

View File

@@ -1,7 +0,0 @@
export P=${1:-1}
export MODEL=${2:-models/ggml-tiny.en.bin}
export WAV=${3:-git.d/samples/jfk.wav}
echo === RUST
time rust-whisper 2>&1 | grep -v ^whisper_ | grep ..
echo === C
time ./c-whisper -m $MODEL -f $WAV -t $P 2>&1 | grep -v ^whisper_ | grep -v ^system_info | grep -v ^main: | grep ..

Submodule whisper-cpp-2023/git.d deleted from 0a2d1210bc

View File

@@ -1 +0,0 @@
git.d/libwhisper.a

View File

@@ -1 +0,0 @@
git.d/models

View File

@@ -1,24 +0,0 @@
#! /bin/bash
if [ ! -d ./git.d/.git ]; then
git clone https://github.com/ggerganov/whisper.cpp.git git.d
fi
cd ./git.d
if [ ! -f ./samples/gb1.wav ]; then
make samples
fi
if [ ! -f ./main ]; then
make
fi
if [ ! -f ./stream ]; then
make stream
fi
if [ ! -f ./models/ggml-${MODEL:-tiny.en}.bin ]; then
bash ./models/download-ggml-model.sh ${MODEL:-tiny.en}
fi
if [ -n "$STREAM" ]; then
./stream -m ./models/ggml-${MODEL:-tiny.en}.bin -t 8 --step 500 --length ${MIC_TIMEOUT:-2}000 $(test -n "$MIC_ID" && echo -c "$MIC_ID")
else
time ./main -m ./models/ggml-${MODEL:-tiny.en}.bin -f ./samples/gb1.wav -t 4
fi

View File

@@ -1 +0,0 @@
git.d/whisper.h