Compare commits
37 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1dd631872c | ||
|
|
72a1420638 | ||
|
|
1009c4230e | ||
|
|
30e5515da1 | ||
|
|
b4c9ecb98b | ||
|
|
4ef419e6c0 | ||
|
|
54964ec59b | ||
|
|
62e764436a | ||
|
|
d631def834 | ||
|
|
3168968cae | ||
|
|
437d7cac39 | ||
|
|
3093a91d84 | ||
|
|
f58e3a0331 | ||
|
|
6dffa401b7 | ||
|
|
f4d9730b5a | ||
|
|
0c5c1f647c | ||
|
|
77ad40b61a | ||
|
|
09894c4fd0 | ||
|
|
3e2e1e2ff8 | ||
|
|
50058037eb | ||
|
|
a483aaf25c | ||
|
|
be7d85f85e | ||
|
|
60d38c4d5c | ||
|
|
e3a7628acf | ||
|
|
91c7791860 | ||
|
|
247edd2ced | ||
|
|
edd94aef72 | ||
|
|
b4d3e5a27c | ||
|
|
a1436e3bd2 | ||
|
|
410769b8c6 | ||
|
|
5869016de6 | ||
|
|
0955f6c0c0 | ||
|
|
242f4407df | ||
|
|
814a8ae2f3 | ||
|
|
7c369e72d4 | ||
|
|
0aff4f556b | ||
|
|
88bf54d022 |
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "rust-whisper.d/gitea-whisper-rs"]
|
||||
path = rust-whisper.d/gitea-whisper-rs
|
||||
url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git
|
||||
1103
rust-whisper.d/Cargo.lock
generated
1103
rust-whisper.d/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -6,6 +6,11 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
whisper-rs = "0.5"
|
||||
whisper-rs = { path = "./gitea-whisper-rs", version = "0.8.0" }
|
||||
wav = "1"
|
||||
tokio = "1.27"
|
||||
cpal = "0.15.2"
|
||||
signal-hook = "0.3.17"
|
||||
byteorder = "1.5.0"
|
||||
chrono = "0.4.31"
|
||||
clap = { version = "4.4.10", features = ["derive"] }
|
||||
|
||||
1
rust-whisper.d/gitea-whisper-rs
Submodule
1
rust-whisper.d/gitea-whisper-rs
Submodule
Submodule rust-whisper.d/gitea-whisper-rs added at dd62f2b9f6
@@ -47,7 +47,7 @@ class Reader(threading.Thread):
|
||||
self.name = os.environ.get("MIC_NAME", "pulse_monitor")
|
||||
if not self.name:
|
||||
for index, name in enumerate(sr.Microphone.list_microphone_names()):
|
||||
print("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
|
||||
log("[{0}] Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
|
||||
exit()
|
||||
self.inq = inq
|
||||
self.outq = outq
|
||||
@@ -136,14 +136,19 @@ class Parser(threading.Thread):
|
||||
p = "/tmp/whisper-cpp.wav"
|
||||
with open("/tmp/whisper-cpp.wav", "wb") as f:
|
||||
f.write(wav)
|
||||
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P=2 rust-whisper", capture_output=True, shell=True)
|
||||
proc = subprocess.run(f"MODEL=./models/ggml-{os.environ.get('MODEL','tiny.en')}.bin WAV={p} P={os.environ.get('P', '2')} rust-whisper", capture_output=True, shell=True)
|
||||
result = proc.stdout.decode().strip()
|
||||
if os.environ.get("DEBUG", None):
|
||||
log("stderr:", proc.stderr.decode().strip())
|
||||
log("raw transcript:", result)
|
||||
result = result.replace(">>", "")
|
||||
result = "".join([i.split("]")[-1] for i in result.split("[")[0]])
|
||||
result = "".join([i.split(")")[-1] for i in result.split("(")[0]])
|
||||
for pair in [
|
||||
("[", "]"),
|
||||
("(", ")"),
|
||||
("<", ">"),
|
||||
("*", "*"),
|
||||
]:
|
||||
result = "".join([i.split(pair[1])[-1] for i in result.split(pair[0])[0]])
|
||||
if os.environ.get("DEBUG", None):
|
||||
log("annotation-free transcript:", result)
|
||||
return result
|
||||
@@ -180,6 +185,8 @@ def _load_dot_notation(v, items):
|
||||
else:
|
||||
result.append(subresult)
|
||||
return result
|
||||
elif k == "KEYS":
|
||||
v = [k for k in v]
|
||||
else:
|
||||
if isinstance(v, list):
|
||||
v = v[int(k)]
|
||||
@@ -224,8 +231,15 @@ class Reactor(threading.Thread):
|
||||
def load_hotwords_in_yaml_file():
|
||||
with open(p.split("@")[0], "r") as f:
|
||||
v = yaml.safe_load(f)
|
||||
v = load_dot_notation(v, p.split("@")[-1])
|
||||
return ["".join(i.strip().lower().split()) for i in v if i]
|
||||
if os.environ.get("DEBUG", None):
|
||||
log(f'opened {p.split("@")[0]} and got {v}')
|
||||
result = []
|
||||
for to_find in [i for i in p.split("@")[-1].split(",") if i]:
|
||||
if os.environ.get("DEBUG", None):
|
||||
log(f'finding {to_find} in {v}')
|
||||
v2 = load_dot_notation(v, to_find)
|
||||
result.extend(["".join(i.strip().lower().split()) for i in v2 if i])
|
||||
return result
|
||||
load_hotwords_in_yaml_file()
|
||||
return load_hotwords_in_yaml_file
|
||||
else:
|
||||
@@ -254,13 +268,23 @@ class Reactor(threading.Thread):
|
||||
log("Reactor.run: stop")
|
||||
|
||||
def handle(self, text):
|
||||
try:
|
||||
self._handle(text)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _handle(self, text):
|
||||
hotwords = self.load_hotwords()
|
||||
if os.environ.get("DEBUG", None):
|
||||
log(f"seeking {hotwords} in {text}")
|
||||
log(f"seeking {hotwords} in {text}. $HOTWORDS={os.environ.get('HOTWORDS', None)}")
|
||||
if not hotwords:
|
||||
if not os.environ.get("HOTWORDS", None):
|
||||
print(text)
|
||||
if os.environ.get("DEBUG", None):
|
||||
log(f"HOTWORDS is False; {text}")
|
||||
print(text, flush=True)
|
||||
else:
|
||||
if os.environ.get("DEBUG", None):
|
||||
log(f"HOTWORDS is True; {text}")
|
||||
log(text)
|
||||
return
|
||||
cleantext = "".join([i for i in "".join(text.lower().split()) if i.isalpha()])
|
||||
@@ -300,7 +324,7 @@ class Actor(threading.Thread):
|
||||
|
||||
def handle_stdout(self, hotword, context):
|
||||
log(context)
|
||||
print(hotword)
|
||||
print(hotword, flush=True)
|
||||
|
||||
def handle_signal(self, hotword, context):
|
||||
self.handle_stderr(hotword, context)
|
||||
@@ -319,9 +343,12 @@ class Actor(threading.Thread):
|
||||
body = self.body
|
||||
body = body.replace("{{hotword}}", hotword)
|
||||
body = body.replace("{{context}}", context)
|
||||
url = self.url
|
||||
url = url.replace("{{hotword}}", hotword)
|
||||
url = url.replace("{{context}}", context)
|
||||
if os.environ.get("DEBUG", "") :
|
||||
log("POST", self.url, headers, body)
|
||||
requests.post(self.url, headers=headers, data=body)
|
||||
log("POST", url, headers, body)
|
||||
requests.post(url, headers=headers, data=body)
|
||||
except Exception as e:
|
||||
log("Actor.handle_url:", e)
|
||||
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
|
||||
d="${1:-"$PWD"/models}"
|
||||
mkdir -p "$d"
|
||||
cd "$(dirname "$(realpath "$BASH_SOURCE")")"
|
||||
|
||||
# Whisper models
|
||||
for model in "tiny.en" "base.en" "small.en" "medium.en"; do
|
||||
test -f "$d"/ggml-$model.bin || wget --quiet --show-progress -O "$d"/ggml-$model.bin "$src-$model.bin"
|
||||
test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
|
||||
done
|
||||
@@ -1,37 +1,148 @@
|
||||
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy};
|
||||
use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
|
||||
use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
|
||||
use signal_hook::{iterator::Signals, consts::signal::SIGINT};
|
||||
use std::time::{Duration, Instant};
|
||||
use chrono;
|
||||
use clap::Parser;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct Flags {
|
||||
#[arg(long, default_value = "../models/ggml-tiny.en.bin")]
|
||||
model: String,
|
||||
|
||||
#[arg(long, default_value = "8")]
|
||||
threads: i32,
|
||||
|
||||
#[arg(long, default_value = "0.8")]
|
||||
stream_churn: f32,
|
||||
#[arg(long, default_value = "5")]
|
||||
stream_step: u64,
|
||||
|
||||
wav: Option<String>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let mut ctx = WhisperContext::new(
|
||||
&std::env::var("MODEL").unwrap_or(String::from("../models/ggml-tiny.en.bin"))
|
||||
).expect("failed to load model");
|
||||
let flags = Flags::parse();
|
||||
|
||||
// create a params object
|
||||
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
|
||||
params.set_n_threads(
|
||||
std::env::var("P").unwrap_or(String::from("1")).parse::<i32>().expect("$P must be a number")
|
||||
);
|
||||
params.set_translate(false);
|
||||
params.set_language(Some("en"));
|
||||
params.set_print_special(false);
|
||||
params.set_print_progress(false);
|
||||
params.set_print_realtime(false);
|
||||
params.set_print_timestamps(false);
|
||||
let w = new_whisper(flags.model, flags.threads).unwrap();
|
||||
let stream_churn = flags.stream_churn;
|
||||
let stream_step = Duration::new(flags.stream_step, 0);
|
||||
match flags.wav {
|
||||
Some(wav) => {
|
||||
let (header, data) = wav::read(
|
||||
&mut std::fs::File::open(wav).expect("failed to open $WAV"),
|
||||
).expect("failed to decode $WAV");
|
||||
assert!(header.channel_count == 1);
|
||||
assert!(header.sampling_rate == 16000);
|
||||
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
|
||||
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
|
||||
|
||||
let (header, data) = wav::read(&mut std::fs::File::open(
|
||||
&std::env::var("WAV").unwrap_or(String::from("../git.d/samples/jfk.wav"))
|
||||
).expect("failed to open .wav")).expect("failed to decode .wav");
|
||||
assert!(header.channel_count == 1);
|
||||
assert!(header.sampling_rate == 16000);
|
||||
let data16 = data.as_sixteen().expect("wav is not 32bit floats");
|
||||
let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
|
||||
let result = w.transcribe(&audio_data).unwrap();
|
||||
println!("{}", result);
|
||||
},
|
||||
None => {
|
||||
let host = cpal::default_host();
|
||||
let device = host.default_input_device().unwrap();
|
||||
let cfg = device.supported_input_configs()
|
||||
.unwrap()
|
||||
.filter(|x| x.sample_format() == cpal::SampleFormat::F32)
|
||||
.nth(0)
|
||||
.unwrap()
|
||||
.with_max_sample_rate();
|
||||
|
||||
ctx.full(params, &audio_data[..])
|
||||
.expect("failed to run model");
|
||||
let channels = cfg.channels();
|
||||
let downsample_ratio = cfg.sample_rate().0 as f32 / 16000.0;
|
||||
let mut buffer = vec![];
|
||||
let mut last = Instant::now();
|
||||
let stream = device.build_input_stream(
|
||||
&cfg.clone().into(),
|
||||
move |data: &[f32], _: &cpal::InputCallbackInfo| {
|
||||
let mono_data: Vec<f32> = data.iter().map(|x| *x).step_by(channels.into()).collect();
|
||||
let mut downsampled_data = vec![];
|
||||
for i in 0..(mono_data.len() as f32 / downsample_ratio) as usize {
|
||||
let mut upsampled = i as f32 * downsample_ratio;
|
||||
if upsampled > (mono_data.len()-1) as f32 {
|
||||
upsampled = (mono_data.len()-1) as f32
|
||||
}
|
||||
downsampled_data.push(mono_data[upsampled as usize]);
|
||||
}
|
||||
downsampled_data.iter().for_each(|x| buffer.push(*x));
|
||||
if Instant::now() - last > stream_step {
|
||||
let result = w.transcribe(&buffer).unwrap();
|
||||
eprintln!("{}", chrono::Local::now());
|
||||
println!("{}", result);
|
||||
|
||||
let num_segments = ctx.full_n_segments();
|
||||
for i in 0..num_segments {
|
||||
let segment = ctx.full_get_segment_text(i).expect("failed to get segment");
|
||||
print!("{} ", segment);
|
||||
}
|
||||
println!("");
|
||||
let retain = buffer.len() - (buffer.len() as f32 * stream_churn) as usize;
|
||||
for i in retain..buffer.len() {
|
||||
buffer[i - retain] = buffer[i]
|
||||
}
|
||||
buffer.truncate(retain);
|
||||
last = Instant::now();
|
||||
}
|
||||
},
|
||||
move |err| {
|
||||
eprintln!("input error: {}", err)
|
||||
},
|
||||
None,
|
||||
).unwrap();
|
||||
stream.play().unwrap();
|
||||
|
||||
eprintln!("listening on {}", device.name().unwrap());
|
||||
let mut signals = Signals::new(&[SIGINT]).unwrap();
|
||||
for sig in signals.forever() {
|
||||
eprintln!("sig {}", sig);
|
||||
break;
|
||||
}
|
||||
stream.pause().unwrap();
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
struct Whisper {
|
||||
ctx: WhisperContext,
|
||||
threads: i32,
|
||||
}
|
||||
|
||||
fn new_whisper(model_path: String, threads: i32) -> Result<Whisper, String> {
|
||||
match WhisperContext::new(&model_path) {
|
||||
Ok(ctx) => Ok(Whisper{
|
||||
ctx: ctx,
|
||||
threads: threads,
|
||||
}),
|
||||
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
|
||||
}
|
||||
}
|
||||
|
||||
impl Whisper {
|
||||
fn transcribe(&self, data: &Vec<f32>) -> Result<String, String> {
|
||||
match self._transcribe(&data) {
|
||||
Ok(result) => Ok(result),
|
||||
Err(msg) => Err(format!("failed to transcribe: {}", msg)),
|
||||
}
|
||||
}
|
||||
|
||||
fn _transcribe(&self, data: &Vec<f32>) -> Result<String, WhisperError> {
|
||||
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
|
||||
params.set_no_context(true);
|
||||
params.set_n_threads(self.threads);
|
||||
params.set_translate(false);
|
||||
params.set_detect_language(false);
|
||||
params.set_language(Some("en"));
|
||||
params.set_print_special(false);
|
||||
params.set_print_progress(false);
|
||||
params.set_print_realtime(false);
|
||||
params.set_print_timestamps(false);
|
||||
|
||||
let mut state = self.ctx.create_state()?;
|
||||
state.full(params, &data[..])?;
|
||||
|
||||
let num_segments = state.full_n_segments()?;
|
||||
let mut result = "".to_string();
|
||||
for i in 0..num_segments {
|
||||
let segment = state.full_get_segment_text(i)?;
|
||||
result = format!("{} {}", result, segment);
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
2
rust-whisper.d/transcript.sh
Normal file
2
rust-whisper.d/transcript.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/bdoob/__log.d/$(date +%Y.%m.%d).transcript.txt"
|
||||
echo "pkill -9 -f hotwords.py; MIC_TIMEOUT=30 MODEL=small.en P=4 DEBUG=true HOTWORDS= python3 ./hotwords.py | tee -a $HOME/Sync/drawful/DnD/nessira.d/_log.d/$(date +%Y.%m.%d).transcript.txt"
|
||||
Reference in New Issue
Block a user