16 Commits

Author SHA1 Message Date
Bel LaPointe
11b5091872 found the type needed to pass closures with local variables 2023-12-19 21:39:53 -05:00
Bel LaPointe
03370f362e from borrow since a grant is K 2023-12-19 21:20:52 -05:00
Bel LaPointe
ec6a71d38c purge non callback handling 2023-12-19 21:19:37 -05:00
Bel LaPointe
1b96b132e1 dumb callbacks work 2023-12-19 21:18:04 -05:00
Bel LaPointe
839487b99e drop redundant on_success time trimming 2023-12-19 21:11:53 -05:00
Bel LaPointe
a2fee32fbc refactor to whisper_service enqueues, whisper_impl transforms, whisper_engine provides raw 2023-12-19 21:08:59 -05:00
Bel LaPointe
091958e08d moved to a callback BUT costed me a global so lets iterate to someTrait 2023-12-19 20:38:01 -05:00
Bel LaPointe
5f47b2c88b wait i just needed an option? f off 2023-12-19 20:20:24 -05:00
Bel LaPointe
367838ac23 test to show include_bytes! macro supports large symlinks 2023-12-19 16:36:17 -05:00
Bel LaPointe
d05287fa3d update --stream-* defaults 2023-12-19 10:30:10 -05:00
Bel LaPointe
01be2637ca swap order 2023-12-19 10:26:22 -05:00
Bel LaPointe
226bedb80e add --debug to write a file that can be played with cat /tmp/page.rawf32audio | sox -r 16000 -b 32 -t f32 -e floating-point - -d 2023-12-19 10:25:48 -05:00
Bel LaPointe
6b54e500cd i think my recording has gaps 2023-12-19 09:54:21 -05:00
Bel LaPointe
8603f20a24 break into words but keep more stream head/tail tiebreaking 2023-12-19 09:51:11 -05:00
Bel LaPointe
eee0bf5e65 wip... 2023-12-19 09:30:15 -05:00
Bel LaPointe
15a3f8430a WIP trim the head and tail from text output because low confidence 2023-12-19 09:09:38 -05:00
5 changed files with 200 additions and 52 deletions

7
models/testme/Cargo.lock generated Normal file
View File

@@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "testme"
version = "0.1.0"

8
models/testme/Cargo.toml Normal file
View File

@@ -0,0 +1,8 @@
[package]
name = "testme"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View File

@@ -0,0 +1,4 @@
fn main() {
let bytes = include_bytes!("./test.txt");
println!("{}", String::from_utf8_lossy(bytes));
}

1
models/testme/src/test.txt Symbolic link
View File

@@ -0,0 +1 @@
../../ggml-tiny.en.bin

View File

@@ -5,6 +5,8 @@ use std::time::{Duration, Instant};
use chrono; use chrono;
use clap::Parser; use clap::Parser;
use std::thread; use std::thread;
use std::fs::File;
use std::io::Write;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
struct Flags { struct Flags {
@@ -14,18 +16,39 @@ struct Flags {
#[arg(long, default_value = "8")] #[arg(long, default_value = "8")]
threads: i32, threads: i32,
#[arg(long, default_value = "1.0")]
stream_retain: f32,
#[arg(long, default_value = "5")] #[arg(long, default_value = "5")]
stream_step: u64, stream_step: u64,
#[arg(long, default_value = "0.6")]
stream_retain: f32,
#[arg(long, default_value = "0.3")]
stream_head: f32,
#[arg(long, default_value = "0.3")]
stream_tail: f32,
wav: Option<String>, wav: Option<String>,
#[arg(long, default_value = "false")]
debug: bool,
} }
fn main() { fn main() {
let flags = Flags::parse(); let flags = Flags::parse();
let w = new_whisper(flags.model, flags.threads, Handler{}).unwrap(); let w = new_whisper_service(
flags.model,
flags.threads,
flags.stream_head,
flags.stream_tail,
|result: Result<Whispered, String>| {
match result {
Ok(whispered) => {
eprintln!("{}: {:?}", chrono::Local::now(), whispered);
println!("{}", whispered.to_string());
},
Err(msg) => { eprintln!("Error whispering: {}", msg); },
};
},
).unwrap();
let stream_retain = (flags.stream_retain * 16_000.0) as usize; let stream_retain = (flags.stream_retain * 16_000.0) as usize;
let stream_step = Duration::new(flags.stream_step, 0); let stream_step = Duration::new(flags.stream_step, 0);
match flags.wav { match flags.wav {
@@ -41,18 +64,33 @@ fn main() {
w.transcribe(&audio_data); w.transcribe(&audio_data);
}, },
None => { None => {
match &flags.debug {
true => { File::create("/tmp/page.rawf32audio").unwrap(); },
false => {},
};
let mut buffer = vec![]; let mut buffer = vec![];
let mut last = Instant::now(); let mut last = Instant::now();
new_listener().listen(move |data: Vec<f32>| { new_listener().listen(move |data: Vec<f32>| {
data.iter().for_each(|x| buffer.push(*x)); data.iter().for_each(|x| buffer.push(*x));
if Instant::now() - last > stream_step { if Instant::now() - last > stream_step {
match w.transcribe_async(&buffer) { w.transcribe_async(&buffer).unwrap();
Ok(_) => (),
Err(msg) => eprintln!("{}", msg), match &flags.debug {
true => {
let mut f = File::options().append(true).open("/tmp/page.rawf32audio").unwrap();
let mut wav_data = vec![];
for i in buffer.iter() {
for j in i.to_le_bytes() {
wav_data.push(j);
}
}
f.write_all(wav_data.as_slice()).unwrap();
},
false => {},
}; };
for i in stream_retain..buffer.len() { for i in 0..stream_retain {
buffer[i - stream_retain] = buffer[i] buffer[i] = buffer[buffer.len() - stream_retain + i];
} }
buffer.truncate(stream_retain); buffer.truncate(stream_retain);
last = Instant::now(); last = Instant::now();
@@ -62,28 +100,23 @@ fn main() {
}; };
} }
struct Whisper { struct WhisperService {
jobs: std::sync::mpsc::SyncSender<AWhisper>, jobs: std::sync::mpsc::SyncSender<AWhisper>,
} }
struct WhisperEngine { fn new_whisper_service<F>(model_path: String, threads: i32, stream_head: f32, stream_tail: f32, handler_fn: F) -> Result<WhisperService, String> where F: FnMut(Result<Whispered, String>) + Send + 'static {
ctx: WhisperContext, match new_whisper_engine(model_path, threads) {
threads: i32,
handler: Handler,
}
fn new_whisper(model_path: String, threads: i32, handler: Handler) -> Result<Whisper, String> {
match new_whisper_engine(model_path, threads, handler) {
Ok(engine) => { Ok(engine) => {
let mut whisper = new_whisper_impl(engine, stream_head, stream_tail, handler_fn);
let (send, recv) = std::sync::mpsc::sync_channel(100); let (send, recv) = std::sync::mpsc::sync_channel(100);
thread::spawn(move || { engine.transcribe_asyncs(recv); }); thread::spawn(move || { whisper.transcribe_asyncs(recv); });
Ok(Whisper{jobs: send}) Ok(WhisperService{jobs: send})
}, },
Err(msg) => Err(format!("failed to initialize engine: {}", msg)), Err(msg) => Err(format!("failed to initialize engine: {}", msg)),
} }
} }
impl Whisper { impl WhisperService {
fn transcribe(&self, data: &Vec<f32>) { fn transcribe(&self, data: &Vec<f32>) {
let (send, recv) = std::sync::mpsc::sync_channel(0); let (send, recv) = std::sync::mpsc::sync_channel(0);
self._transcribe_async(data, Some(send)).unwrap(); self._transcribe_async(data, Some(send)).unwrap();
@@ -105,41 +138,79 @@ impl Whisper {
} }
} }
fn new_whisper_engine(model_path: String, threads: i32, handler: Handler) -> Result<WhisperEngine, String> { struct WhisperImpl {
match WhisperContext::new(&model_path) { engine: WhisperEngine,
Ok(ctx) => Ok(WhisperEngine{ctx: ctx, threads: threads, handler: handler}), stream_head: f32,
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)), stream_tail: f32,
handler_fn: Option<Box<dyn FnMut(Result<Whispered, String>) + Send + 'static>>
}
fn new_whisper_impl<F>(engine: WhisperEngine, stream_head: f32, stream_tail: f32, handler_fn: F) -> WhisperImpl where F: FnMut(Result<Whispered, String>) + Send + 'static {
WhisperImpl {
engine: engine,
stream_head: stream_head,
stream_tail: stream_tail,
handler_fn: Some(Box::new(handler_fn)),
} }
} }
impl WhisperEngine { impl WhisperImpl {
fn transcribe_asyncs(&self, recv: std::sync::mpsc::Receiver<AWhisper>) { fn transcribe_asyncs(&mut self, recv: std::sync::mpsc::Receiver<AWhisper>) {
loop { loop {
match recv.recv() { match recv.recv() {
Ok(job) => { Ok(job) => {
match self.transcribe(&job.data) { let result = self.transcribe(&job).is_ok();
Ok(result) => { match job.ack {
self.handler.on_success(result); Some(ack) => {
match job.ack { ack.send(result).unwrap();
Some(ack) => { let _ = ack.send(true); },
None => (),
};
},
Err(msg) => {
self.handler.on_error(format!("failed to transcribe: {}", msg));
match job.ack {
Some(ack) => { let _ = ack.send(false); },
None => (),
};
}, },
None => (),
}; };
}, }
Err(_) => return, Err(_) => return,
}; };
} }
} }
fn transcribe(&self, data: &Vec<f32>) -> Result<String, WhisperError> { fn transcribe(&mut self, a_whisper: &AWhisper) -> Result<(), ()> {
match self.engine.transcribe(&a_whisper.data) {
Ok(result) => {
self.on_success(&result);
Ok(())
},
Err(msg) => {
self.on_error(msg.to_string());
Err(())
},
}
}
fn on_success(&mut self, whispered: &Whispered) {
let result = whispered
.after(&(self.stream_head * 100.0))
.before(&(self.stream_tail * 100.0));
(self.handler_fn.as_mut().unwrap())(Ok(result));
}
fn on_error(&mut self, msg: String) {
(self.handler_fn.as_mut().unwrap())(Err(format!("failed to transcribe: {}", &msg)));
}
}
struct WhisperEngine {
ctx: WhisperContext,
threads: i32,
}
fn new_whisper_engine(model_path: String, threads: i32) -> Result<WhisperEngine, String> {
match WhisperContext::new(&model_path) {
Ok(ctx) => Ok(WhisperEngine{ctx: ctx, threads: threads}),
Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
}
}
impl WhisperEngine {
fn transcribe(&self, data: &Vec<f32>) -> Result<Whispered, WhisperError> {
let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 }); let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
params.set_no_context(true); params.set_no_context(true);
params.set_n_threads(self.threads); params.set_n_threads(self.threads);
@@ -154,11 +225,13 @@ impl WhisperEngine {
let mut state = self.ctx.create_state()?; let mut state = self.ctx.create_state()?;
state.full(params, &data[..])?; state.full(params, &data[..])?;
let mut result = new_whispered();
let num_segments = state.full_n_segments()?; let num_segments = state.full_n_segments()?;
let mut result = "".to_string();
for i in 0..num_segments { for i in 0..num_segments {
let segment = state.full_get_segment_text(i)?; let data = state.full_get_segment_text(i)?;
result = format!("{} {}", result, segment); let start = state.full_get_segment_t0(i)?;
let stop = state.full_get_segment_t1(i)?;
result.push(data, start, stop);
} }
Ok(result) Ok(result)
@@ -170,15 +243,70 @@ struct AWhisper {
ack: Option<std::sync::mpsc::SyncSender<bool>>, ack: Option<std::sync::mpsc::SyncSender<bool>>,
} }
struct Handler {} #[derive(Clone, Debug)]
struct Whispered {
data: Vec<AWhispered>,
}
impl Handler { #[derive(Clone, Debug)]
fn on_success(&self, result: String) { struct AWhispered {
eprintln!("{}", chrono::Local::now()); data: String,
println!("{}", result); offset: i64,
length: i64,
}
fn new_whispered() -> Whispered {
Whispered{data: vec![]}
}
fn new_a_whispered(data: String, start: i64, stop: i64) -> AWhispered {
AWhispered{
data: data,
offset: start.clone(),
length: stop - start,
} }
fn on_error(&self, msg: String) { }
eprintln!("error: {}", msg);
impl Whispered {
fn to_string(&self) -> String {
let mut result = "".to_string();
for i in 0..self.data.len() {
result = format!("{} {}", result, &self.data[i].data);
}
result
}
fn after(&self, t: &f32) -> Whispered {
let mut result = new_whispered();
self.data
.iter()
.filter(|x| x.offset as f32 >= *t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn before(&self, t: &f32) -> Whispered {
let mut result = new_whispered();
let end = match self.data.iter().map(|x| x.offset + x.length).max() {
Some(x) => x,
None => 1,
};
let t = (end as f32) - *t;
self.data
.iter()
.filter(|x| ((x.offset) as f32) <= t)
.for_each(|x| result.data.push(x.clone()));
result
}
fn push(&mut self, data: String, start: i64, stop: i64) {
let words: Vec<_> = data.split_whitespace().collect();
let per_word = (stop - start) / (words.len() as i64);
for i in 0..words.len() {
let start = (i as i64) * per_word;
let stop = start.clone() + per_word;
self.data.push(new_a_whispered(words[i].to_string(), start, stop));
}
} }
} }