From 2699d73c9c150b959d5a6505fe1238143156924e Mon Sep 17 00:00:00 2001 From: Bel LaPointe Date: Fri, 31 Mar 2023 09:46:34 -0600 Subject: [PATCH] oooo closer --- .gitignore | 1 + whisper-cpp-2023/rust.d/Cargo.lock | 260 ++++++++++++++++++++++++++++ whisper-cpp-2023/rust.d/Cargo.toml | 9 + whisper-cpp-2023/rust.d/cargo.sh | 8 + whisper-cpp-2023/rust.d/src/main.rs | 37 ++++ 5 files changed, 315 insertions(+) create mode 100644 whisper-cpp-2023/rust.d/Cargo.lock create mode 100644 whisper-cpp-2023/rust.d/Cargo.toml create mode 100644 whisper-cpp-2023/rust.d/cargo.sh create mode 100644 whisper-cpp-2023/rust.d/src/main.rs diff --git a/.gitignore b/.gitignore index 3526bde..31726f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ **/*.sw* +/whisper-cpp-2023/rust.d/target snowboy-2022/snowboy **/*.git.d **/*.wav diff --git a/whisper-cpp-2023/rust.d/Cargo.lock b/whisper-cpp-2023/rust.d/Cargo.lock new file mode 100644 index 0000000..822a991 --- /dev/null +++ b/whisper-cpp-2023/rust.d/Cargo.lock @@ -0,0 +1,260 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "bindgen" +version = "0.64.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "proc-macro2" +version = "1.0.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e472a104799c74b514a57226160104aa483546de37e839ec50e3c2e41dd87534" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "shlex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" + +[[package]] +name = "which" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +dependencies = [ + "either", + "libc", + "once_cell", +] + +[[package]] +name = "whisper-cpp-er" +version = "0.1.0" +dependencies = [ + "whisper-rs", +] + +[[package]] +name = "whisper-rs" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7e1b9b003aa3285a0e4469219566266aa1d51ced1be38587251a4f713a1677" +dependencies = [ + "whisper-rs-sys", +] + +[[package]] +name = "whisper-rs-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97a389dc665c7354ba6b1982850d4ba05b862907e535708ebdec92cbd9c599e8" +dependencies = [ + "bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/whisper-cpp-2023/rust.d/Cargo.toml b/whisper-cpp-2023/rust.d/Cargo.toml new file mode 100644 index 0000000..339e6df --- /dev/null +++ b/whisper-cpp-2023/rust.d/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "whisper-cpp-er" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +whisper-rs = "0.5" diff --git a/whisper-cpp-2023/rust.d/cargo.sh b/whisper-cpp-2023/rust.d/cargo.sh new file mode 100644 index 0000000..3bfa077 --- /dev/null +++ b/whisper-cpp-2023/rust.d/cargo.sh @@ -0,0 +1,8 @@ +#! /bin/bash + +#RUSTFLAGS="-Clink-args=-lstd++" \ + +export C_INCLUDE_PATH="$C_INCLUDE_PATH:$PWD/.." +export LIBRARY_PATH="$LIBRARY_PATH:$PWD/.." + +cargo "$@" diff --git a/whisper-cpp-2023/rust.d/src/main.rs b/whisper-cpp-2023/rust.d/src/main.rs new file mode 100644 index 0000000..1a69431 --- /dev/null +++ b/whisper-cpp-2023/rust.d/src/main.rs @@ -0,0 +1,37 @@ +use whisper_rs::{WhisperContext, FullParams, SamplingStrategy}; +use std::io::Read; + +fn main() { + let mut ctx = WhisperContext::new("../models/ggml-tiny.en.bin").expect("failed to load model"); + + // create a params object + let params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 }); + + // assume we have a buffer of audio data + // here we'll make a fake one, floating point samples, 32 bit, 16KHz, mono + //let audio_data = vec![0_f32; 16000 * 2]; + let mut audio_data = Vec::new(); + let mut input = std::io::BufReader::new(std::fs::File::open("../git.d/samples/jfk.wav").expect("cannot open jfk.wav")); + loop { + let mut buff = [0u8; std::mem::size_of::()]; + let res = input.read_exact(&mut buff); + match res { + Err(_) => break, + _ => {}, + }; + audio_data.push(f32::from_le_bytes(buff)); + } + + // now we can run the model + ctx.full(params, &audio_data[..]) + .expect("failed to run model"); + + // fetch the results + let num_segments = ctx.full_n_segments(); + for i in 0..num_segments { + let segment = ctx.full_get_segment_text(i).expect("failed to get segment"); + let start_timestamp = ctx.full_get_segment_t0(i); + let end_timestamp = ctx.full_get_segment_t1(i); + println!("[{} - {}]: {}", start_timestamp, end_timestamp, segment); + } +}