37 Commits

Author SHA1 Message Date
Bel LaPointe
1dd631872c from env to flags 2023-11-28 22:31:16 -07:00
Bel LaPointe
72a1420638 wheee 2023-11-28 22:24:10 -07:00
Bel LaPointe
1009c4230e env variable ify 2023-11-28 22:13:05 -07:00
Bel LaPointe
30e5515da1 GOTTEM 2023-11-28 22:03:08 -07:00
Bel LaPointe
b4c9ecb98b successfully confirmed audio is k with sox -r 16000 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:30:24 -07:00
Bel LaPointe
4ef419e6c0 successfully confirmed audio is k with sox -r 44100 -t f32 /tmp/transcribed.pcm --default-device 2023-11-28 21:22:26 -07:00
Bel LaPointe
54964ec59b grrrrr output 2023-11-28 21:03:00 -07:00
Bel LaPointe
62e764436a no warnings but still nothing sane coming out... 2023-11-28 20:36:26 -07:00
Bel LaPointe
d631def834 CLOSER like easily 80 20 range right 2023-11-28 20:32:09 -07:00
Bel LaPointe
3168968cae ok stream les go 2023-11-28 19:23:21 -07:00
Bel LaPointe
437d7cac39 successful refactor 2023-11-28 19:18:05 -07:00
Bel LaPointe
3093a91d84 wip 2023-11-28 19:10:07 -07:00
Bel LaPointe
f58e3a0331 better default err msgs 2023-11-26 17:37:26 -07:00
Bel LaPointe
6dffa401b7 cleaner 2023-11-26 17:21:40 -07:00
Bel LaPointe
f4d9730b5a hm i lost it but i get it back 2023-11-26 17:13:29 -07:00
Bel LaPointe
0c5c1f647c submodule for gitea-whisper-rs 2023-11-26 17:04:16 -07:00
Bel LaPointe
77ad40b61a closer 2023-11-26 17:00:42 -07:00
Bel LaPointe
09894c4fd0 confirmed just needs whisper-rs-sys upgrade for whisper.cpp up 2023-11-26 16:39:42 -07:00
Bel LaPointe
3e2e1e2ff8 wip 2023-11-26 16:23:42 -07:00
Bel LaPointe
50058037eb Revert "try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck"
This reverts commit a483aaf25c.
2023-11-08 11:35:31 -07:00
Bel LaPointe
a483aaf25c try rollback to whisper-rs5 to avoid gpu in whispercpp but no luck 2023-11-08 11:35:29 -07:00
Bel LaPointe
be7d85f85e confirmed whisper.cpp works with distill iff no gpu 2023-11-08 11:29:50 -07:00
Bel LaPointe
60d38c4d5c update distil.sh 2023-11-08 10:58:58 -07:00
Bel LaPointe
e3a7628acf try distil-whisper 2023-11-08 10:22:30 -07:00
Bel LaPointe
91c7791860 up whisper-rs to 0.8.0 2023-11-08 09:25:00 -07:00
bel
247edd2ced more trans 2023-07-15 19:05:00 -06:00
bel
edd94aef72 catch 2023-07-05 22:36:07 -06:00
bel
b4d3e5a27c HOTWORDS yaml @ can have comma delimited and KEYS 2023-04-19 18:24:07 -06:00
bel
a1436e3bd2 revise 2023-04-12 19:37:43 -06:00
bel
410769b8c6 tr 2023-04-12 19:26:03 -06:00
bel
5869016de6 tr 2023-04-12 19:16:07 -06:00
bel
0955f6c0c0 oof 2023-04-12 19:15:32 -06:00
bel
242f4407df script 2023-04-12 18:50:49 -06:00
bel
814a8ae2f3 typo 2023-04-08 22:23:20 -06:00
bel
7c369e72d4 delimiters 2023-04-08 22:22:22 -06:00
bel
0aff4f556b one more 2023-04-08 20:05:03 -06:00
bel
88bf54d022 url replaces hotword,context too 2023-04-02 10:48:41 -06:00
8 changed files with 1234 additions and 107 deletions

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "rust-whisper.d/gitea-whisper-rs"]
path = rust-whisper.d/gitea-whisper-rs
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git

1103
rust-whisper.d/Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -6,6 +6,11 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
whisper-rs = "0.5" whisper-rs = { path = "./gitea-whisper-rs", version = "0.8.0" }
wav = "1" wav = "1"
tokio = "1.27" tokio = "1.27"
cpal = "0.15.2"
signal-hook = "0.3.17"
byteorder = "1.5.0"
chrono = "0.4.31"
clap = { version = "4.4.10", features = ["derive"] }

View File

@@ -47,7 +47,7 @@ class Reader(threading.Thread):
self.name = os.environ.get("MIC_NAME", "pulse_monitor") self.name = os.environ.get("MIC_NAME", "pulse_monitor")
if not self.name: if not self.name:
for index, name in enumerate(sr.Microphone.list_microphone_names()): for index, name in enumerate(sr.Microphone.list_microphone_names()):
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name)) log("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
exit() exit()
self.inq = inq self.inq = inq
self.outq = outq self.outq = outq
@@ -136,14 +136,19 @@ class Parser(threading.Thread):
p = "/tmp/whisper-cpp.wav" p = "/tmp/whisper-cpp.wav"
with open("/tmp/whisper-cpp.wav", "wb") as f: with open("/tmp/whisper-cpp.wav", "wb") as f:
f.write(wav) f.write(wav)
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P=2 rust-whisper", capture_output=True, shell=True) proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P={os.environ.get('P', '2')} rust-whisper", capture_output=True, shell=True)
result = proc.stdout.decode().strip() result = proc.stdout.decode().strip()
if os.environ.get("DEBUG", None): if os.environ.get("DEBUG", None):
log("stderr:", proc.stderr.decode().strip()) log("stderr:", proc.stderr.decode().strip())
log("raw transcript:", result) log("raw transcript:", result)
result = result.replace(">>", "") result = result.replace(">>", "")
result = "".join([i.split("]")[-1] for i in result.split("[")[0]]) for pair in [
result = "".join([i.split(")")[-1] for i in result.split("(")[0]]) ("[", "]"),
("(", ")"),
("<", ">"),
("*", "*"),
]:
result = "".join([i.split(pair[1])[-1] for i in result.split(pair[0])[0]])
if os.environ.get("DEBUG", None): if os.environ.get("DEBUG", None):
log("annotation-free transcript:", result) log("annotation-free transcript:", result)
return result return result
@@ -180,6 +185,8 @@ def _load_dot_notation(v, items):
else: else:
result.append(subresult) result.append(subresult)
return result return result
elif k == "KEYS":
v = [k for k in v]
else: else:
if isinstance(v, list): if isinstance(v, list):
v = v[int(k)] v = v[int(k)]
@@ -224,8 +231,15 @@ class Reactor(threading.Thread):
def load_hotwords_in_yaml_file(): def load_hotwords_in_yaml_file():
with open(p.split("@")[0], "r") as f: with open(p.split("@")[0], "r") as f:
v = yaml.safe_load(f) v = yaml.safe_load(f)
v = load_dot_notation(v, p.split("@")[-1]) if os.environ.get("DEBUG", None):
return ["".join(i.strip().lower().split()) for i in v if i] log(f'opened {p.split("@")[0]} and got {v}')
result = []
for to_find in [i for i in p.split("@")[-1].split(",") if i]:
if os.environ.get("DEBUG", None):
log(f'finding {to_find} in {v}')
v2 = load_dot_notation(v, to_find)
result.extend(["".join(i.strip().lower().split()) for i in v2 if i])
return result
load_hotwords_in_yaml_file() load_hotwords_in_yaml_file()
return load_hotwords_in_yaml_file return load_hotwords_in_yaml_file
else: else:
@@ -254,13 +268,23 @@ class Reactor(threading.Thread):
log("Reactor.run: stop") log("Reactor.run: stop")
def handle(self, text): def handle(self, text):
try:
self._handle(text)
except Exception:
pass
def _handle(self, text):
hotwords = self.load_hotwords() hotwords = self.load_hotwords()
if os.environ.get("DEBUG", None): if os.environ.get("DEBUG", None):
log(f"seeking {hotwords} in {text}") log(f"seeking {hotwords} in {text}. $HOTWORDS={os.environ.get('HOTWORDS', None)}")
if not hotwords: if not hotwords:
if not os.environ.get("HOTWORDS", None): if not os.environ.get("HOTWORDS", None):
print(text) if os.environ.get("DEBUG", None):
log(f"HOTWORDS is False; {text}")
print(text, flush=True)
else: else:
if os.environ.get("DEBUG", None):
log(f"HOTWORDS is True; {text}")
log(text) log(text)
return return
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()]) cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
@@ -300,7 +324,7 @@ class Actor(threading.Thread):
def handle_stdout(self, hotword, context): def handle_stdout(self, hotword, context):
log(context) log(context)
print(hotword) print(hotword, flush=True)
def handle_signal(self, hotword, context): def handle_signal(self, hotword, context):
self.handle_stderr(hotword, context) self.handle_stderr(hotword, context)
@@ -319,9 +343,12 @@ class Actor(threading.Thread):
body = self.body body = self.body
body = body.replace("{{hotword}}", hotword) body = body.replace("{{hotword}}", hotword)
body = body.replace("{{context}}", context) body = body.replace("{{context}}", context)
url = self.url
url = url.replace("{{hotword}}", hotword)
url = url.replace("{{context}}", context)
if os.environ.get("DEBUG", "") : if os.environ.get("DEBUG", "") :
log("POST", self.url, headers, body) log("POST", url, headers, body)
requests.post(self.url, headers=headers, data=body) requests.post(url, headers=headers, data=body)
except Exception as e: except Exception as e:
log("Actor.handle_url:", e) log("Actor.handle_url:", e)

View File

@@ -1,10 +1,9 @@
#!/bin/bash #!/bin/bash
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml" src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
d="${1:-"$PWD"/models}" cd "$(dirname "$(realpath "$BASH_SOURCE")")"
mkdir -p "$d"
# Whisper models # Whisper models
for model in "tiny.en" "base.en" "small.en" "medium.en"; do for model in "tiny.en" "base.en" "small.en" "medium.en"; do
test -f "$d"/ggml-$model.bin || wget --quiet --show-progress -O "$d"/ggml-$model.bin "$src-$model.bin" test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
done done

View File

@@ -1,37 +1,148 @@
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy}; use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
use std::time::{Duration, Instant};
use chrono;
use clap::Parser;
#[derive(Parser, Debug)]
struct Flags {
#[arg(long, default_value = "../models/ggml-tiny.en.bin")]
model: String,
#[arg(long, default_value = "8")]
threads: i32,
#[arg(long, default_value = "0.8")]
stream_churn: f32,
#[arg(long, default_value = "5")]
stream_step: u64,
wav: Option<String>,
}
fn main() { fn main() {
let mut ctx = WhisperContext::new( let flags = Flags::parse();
&std::env::var("MODEL").unwrap_or(String::from("../models/ggml-tiny.en.bin"))
).expect("failed to load model");
// create a params object let w = new_whisper(flags.model, flags.threads).unwrap();
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 }); let stream_churn = flags.stream_churn;
params.set_n_threads( let stream_step = Duration::new(flags.stream_step, 0);
std::env::var("P").unwrap_or(String::from("1")).parse::<i32>().expect("$P must be a number") match flags.wav {
); Some(wav) => {
params.set_translate(false); let (header, data) = wav::read(
params.set_language(Some("en")); &mut std::fs::File::open(wav).expect("failed to open $WAV"),
params.set_print_special(false); ).expect("failed to decode $WAV");
params.set_print_progress(false); assert!(header.channel_count == 1);
params.set_print_realtime(false); assert!(header.sampling_rate == 16000);
params.set_print_timestamps(false); let data16 = data.as_sixteen().expect("wav is not 32bit floats");
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
let (header, data) = wav::read(&mut std::fs::File::open( let result = w.transcribe(&audio_data).unwrap();
&std::env::var("WAV").unwrap_or(String::from("../git.d/samples/jfk.wav")) println!("{}", result);
).expect("failed to open .wav")).expect("failed to decode .wav"); },
assert!(header.channel_count == 1); None => {
assert!(header.sampling_rate == 16000); let host = cpal::default_host();
let data16 = data.as_sixteen().expect("wav is not 32bit floats"); let device = host.default_input_device().unwrap();
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16); let cfg = device.supported_input_configs()
.unwrap()
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
.nth(0)
.unwrap()
.with_max_sample_rate();
ctx.full(params, &audio_data[..]) let channels = cfg.channels();
.expect("failed to run model"); let downsample_ratio = cfg.sample_rate().0 as f32 / 16000.0;
let mut buffer = vec![];
let mut last = Instant::now();
let stream = device.build_input_stream(
&cfg.clone().into(),
move |data: &[f32], _: &cpal::InputCallbackInfo| {
let mono_data: Vec<f32> = data.iter().map(|x| *x).step_by(channels.into()).collect();
let mut downsampled_data = vec![];
for i in 0..(mono_data.len() as f32 / downsample_ratio) as usize {
let mut upsampled = i as f32 * downsample_ratio;
if upsampled > (mono_data.len()-1) as f32 {
upsampled = (mono_data.len()-1) as f32
}
downsampled_data.push(mono_data[upsampled as usize]);
}
downsampled_data.iter().for_each(|x| buffer.push(*x));
if Instant::now() - last > stream_step {
let result = w.transcribe(&buffer).unwrap();
eprintln!("{}", chrono::Local::now());
println!("{}", result);
let num_segments = ctx.full_n_segments(); let retain = buffer.len() - (buffer.len() as f32 * stream_churn) as usize;
for i in 0..num_segments { for i in retain..buffer.len() {
let segment = ctx.full_get_segment_text(i).expect("failed to get segment"); buffer[i - retain] = buffer[i]
print!("{} ", segment); }
} buffer.truncate(retain);
println!(""); last = Instant::now();
}
},
move |err| {
eprintln!("input error: {}", err)
},
None,
).unwrap();
stream.play().unwrap();
eprintln!("listening on {}", device.name().unwrap());
let mut signals = Signals::new(&[SIGINT]).unwrap();
for sig in signals.forever() {
eprintln!("sig {}", sig);
break;
}
stream.pause().unwrap();
},
};
}
struct Whisper {
ctx: WhisperContext,
threads: i32,
}
fn new_whisper(model_path: String, threads: i32) -> Result<Whisper, String> {
match WhisperContext::new(&model_path) {
Ok(ctx) => Ok(Whisper{
ctx: ctx,
threads: threads,
}),
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
}
}
impl Whisper {
fn transcribe(&self, data: &Vec<f32>) -> Result<String, String> {
match self._transcribe(&data) {
Ok(result) => Ok(result),
Err(msg) => Err(format!("failed to transcribe: {}", msg)),
}
}
fn _transcribe(&self, data: &Vec<f32>) -> Result<String, WhisperError> {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_no_context(true);
params.set_n_threads(self.threads);
params.set_translate(false);
params.set_detect_language(false);
params.set_language(Some("en"));
params.set_print_special(false);
params.set_print_progress(false);
params.set_print_realtime(false);
params.set_print_timestamps(false);
let mut state = self.ctx.create_state()?;
state.full(params, &data[..])?;
let num_segments = state.full_n_segments()?;
let mut result = "".to_string();
for i in 0..num_segments {
let segment = state.full_get_segment_text(i)?;
result = format!("{} {}", result, segment);
}
Ok(result)
}
} }

View File

@@ -0,0 +1,2 @@
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"