diff --git a/whisper-2023/install.sh b/whisper-2023/install.sh new file mode 100644 index 0000000..5381c5e --- /dev/null +++ b/whisper-2023/install.sh @@ -0,0 +1,7 @@ +#! /bin/bash + +sudo apt install portaudio19-dev python3-pyaudio +python3 -m pip install git+https://github.com/openai/whisper.git soundfile PyAudio SpeechRecognition + +sudo apt-get install python3 python3-all-dev python3-pip build-essential swig git libpulse-dev libasound2-dev +python3 -m pip install pocketsphinx diff --git a/whisper-2023/microphone_recognition.py b/whisper-2023/microphone_recognition.py new file mode 100644 index 0000000..58e747f --- /dev/null +++ b/whisper-2023/microphone_recognition.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +# https://github.com/Uberi/speech_recognition/blob/master/examples/microphone_recognition.py + + +# NOTE: this example requires PyAudio because it uses the Microphone class + +import speech_recognition as sr +import time +import threading +import queue +from sys import stderr +from os import environ + +class Recognizer(threading.Thread): + def __init__(self, q): + threading.Thread.__init__(self) + self.q = q + + def run(self): + while True: + got = self.q.get() + self.one(got) + + def one(self, audio): + r = sr.Recognizer() + ## recognize speech using Sphinx + #try: + # start = time.time() + # print("Sphinx thinks you said " + r.recognize_sphinx(audio)) + # print("/Sphinx", int(time.time()-start)) + #except sr.UnknownValueError: + # print("Sphinx could not understand audio") + #except sr.RequestError as e: + # print("Sphinx error; {0}".format(e)) + + # recognize speech using whisper + try: + start = time.time() + print(r.recognize_whisper(audio, language="english")) + print("/Whisper", int(time.time()-start), file=stderr) + except sr.UnknownValueError: + print("Whisper could not understand audio") + except sr.RequestError as e: + print("Could not request results from Whisper") + +class Listener(): + def __init__(self): + if not environ.get("MIC_NAME", None): + from sys import stdin + for index, name in enumerate(sr.Microphone.list_microphone_names()): + print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name)) + exit() + self.name = environ["MIC_NAME"] + + def run(self): + mic_timeout=int(environ.get("MIC_TIMEOUT", 5)) + # obtain audio from the microphone + r = sr.Recognizer() + idx = [idx for idx,v in enumerate(sr.Microphone.list_microphone_names()) if v in self.name.split(",")][0] + with sr.Microphone(device_index=idx) as source: + return r.listen(source, timeout=mic_timeout, phrase_time_limit=mic_timeout) + +q = queue.Queue(maxsize=1) +l = Listener() +t = Recognizer(q) +t.start() + +while True: + got = l.run() + q.put(got) + +t.join()