found the type needed to pass closures with local variables

from borrow since a grant is K
purge non callback handling
2023-12-19 21:39:53 -05:00 · 2023-12-19 21:20:52 -05:00 · 2023-12-19 21:19:37 -05:00 · 2023-12-19 21:18:04 -05:00 · 2023-12-19 21:11:53 -05:00 · 2023-12-19 21:08:59 -05:00
32 changed files with 410 additions and 530 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 /whisper-cpp-2023/rust.d/target
 /rust-whisper.d/target
 /rust-whisper.d/models
 /target/
 /models/
 snowboy-2022/snowboy
 **/*.git.d
 **/*.wav
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "rust-whisper.d/gitea-whisper-rs"]
-	path = rust-whisper.d/gitea-whisper-rs
+	path = gitea-whisper-rs
 	url = https://gitea.inhome.blapointe.com/bel/whisper-rs.git
--- a/rust-whisper.d/Cargo.lock
+++ b/rust-whisper.d/Cargo.lock
--- a/rust-whisper.d/Cargo.toml
+++ b/rust-whisper.d/Cargo.toml
--- a/README.md
+++ b/README.md
@@ -1,29 +0,0 @@
 # stt
 ## listen on linux
 https://wiki.archlinux.org/title/PulseAudio/Examples
 ```
 10. ALSA monitor source
 To be able to record from a monitor source (a.k.a. "What-U-Hear", "Stereo Mix"), use pactl list to find out the name of the source in PulseAudio (e.g. alsa_output.pci-0000_00_1b.0.analog-stereo.monitor). Then add lines like the following to /etc/asound.conf or ~/.asoundrc:
 pcm.pulse_monitor {
  type pulse
  device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
 }
 ctl.pulse_monitor {
  type pulse
  device alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
 }
 Now you can select pulse_monitor as a recording source.
 Alternatively, you can use pavucontrol to do this: make sure you have set up the display to "All input devices", then select "Monitor of [your sound card]" as the recording source.
 ```
 ```bash
 $ pactl list | grep -A 50 RUNNING | grep -E 'RUNNING|Name:|Monitor Source:' | grep Monitor.Source | head -n 1 | awk '{print $NF}'
 ```
--- a/rust-whisper.d/cargo.sh
+++ b/rust-whisper.d/cargo.sh
--- a/rust-whisper.d/gitea-whisper-rs
+++ b/rust-whisper.d/gitea-whisper-rs
--- a/rust-whisper.d/hotwords.py
+++ b/rust-whisper.d/hotwords.py
--- a/models/download_models.sh
+++ b/models/download_models.sh
@@ -0,0 +1,13 @@
 #!/bin/bash
 src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
 cd "$(dirname "$(realpath "$BASH_SOURCE")")"
 # Whisper models
 for model in "tiny.en" "base.en" "small.en" "medium.en" "large-v2"; do
    	test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
 done
 test -f ./ggml-distil-medium.en.bin || wget https://huggingface.co/distil-whisper/distil-medium.en/resolve/main/ggml-medium-32-2.en.bin?download=true -O ./ggml-distil-medium.en.bin
 test -f ./ggml-distil-large-v2.bin || wget https://huggingface.co/distil-whisper/distil-large-v2/resolve/main/ggml-large-32-2.en.bin?download=true -O ./ggml-distil-large-v2.bin
--- a/models/testme/Cargo.lock
+++ b/models/testme/Cargo.lock
@@ -0,0 +1,7 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 version = 3
 [[package]]
 name = "testme"
 version = "0.1.0"
--- a/models/testme/Cargo.toml
+++ b/models/testme/Cargo.toml
@@ -0,0 +1,8 @@
 [package]
 name = "testme"
 version = "0.1.0"
 edition = "2021"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [dependencies]
--- a/models/testme/src/main.rs
+++ b/models/testme/src/main.rs
@@ -0,0 +1,4 @@
 fn main() {
   let bytes = include_bytes!("./test.txt");
   println!("{}", String::from_utf8_lossy(bytes));
 }
--- a/models/testme/src/test.txt
+++ b/models/testme/src/test.txt
@@ -0,0 +1 @@
 ../../ggml-tiny.en.bin
--- a/rust-whisper.d/requirements.txt
+++ b/rust-whisper.d/requirements.txt
--- a/rust-whisper.d/models/download_models.sh
+++ b/rust-whisper.d/models/download_models.sh
@@ -1,9 +0,0 @@
 #!/bin/bash
 src="https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml"
 cd "$(dirname "$(realpath "$BASH_SOURCE")")"
 # Whisper models
 for model in "tiny.en" "base.en" "small.en" "medium.en"; do
    	test -f ./ggml-$model.bin || wget --quiet --show-progress -O ./ggml-$model.bin "$src-$model.bin"
 done
--- a/rust-whisper.d/src/main.rs
+++ b/rust-whisper.d/src/main.rs
@@ -1,148 +0,0 @@
 use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
 use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
 use signal_hook::{iterator::Signals, consts::signal::SIGINT};
 use std::time::{Duration, Instant};
 use chrono;
 use clap::Parser;
 #[derive(Parser, Debug)]
 struct Flags {
   #[arg(long, default_value = "../models/ggml-tiny.en.bin")]
   model: String,
   #[arg(long, default_value = "8")]
   threads: i32,
   #[arg(long, default_value = "0.8")]
   stream_churn: f32,
   #[arg(long, default_value = "5")]
   stream_step: u64,
   wav: Option<String>,
 }
 fn main() {
   let flags = Flags::parse();
   let w = new_whisper(flags.model, flags.threads).unwrap();
   let stream_churn = flags.stream_churn;
   let stream_step = Duration::new(flags.stream_step, 0);
   match flags.wav {
      Some(wav) => {
         let (header, data) = wav::read(
            &mut std::fs::File::open(wav).expect("failed to open $WAV"),
         ).expect("failed to decode $WAV");
         assert!(header.channel_count == 1);
         assert!(header.sampling_rate == 16000);
         let data16 = data.as_sixteen().expect("wav is not 32bit floats");
         let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
         let result = w.transcribe(&audio_data).unwrap();
         println!("{}", result);
      },
      None => {
         let host = cpal::default_host();
         let device = host.default_input_device().unwrap();
         let cfg = device.supported_input_configs()
            .unwrap()
            .filter(|x| x.sample_format() == cpal::SampleFormat::F32)
            .nth(0)
            .unwrap()
            .with_max_sample_rate();
         let channels = cfg.channels();
         let downsample_ratio = cfg.sample_rate().0 as f32 / 16000.0;
         let mut buffer = vec![];
         let mut last = Instant::now();
         let stream = device.build_input_stream(
            &cfg.clone().into(),
            move |data: &[f32], _: &cpal::InputCallbackInfo| {
               let mono_data: Vec<f32> = data.iter().map(|x| *x).step_by(channels.into()).collect();
               let mut downsampled_data = vec![];
               for i in 0..(mono_data.len() as f32 / downsample_ratio) as usize {
                  let mut upsampled = i as f32 * downsample_ratio;
                  if upsampled > (mono_data.len()-1) as f32 {
                     upsampled = (mono_data.len()-1) as f32
                  }
                  downsampled_data.push(mono_data[upsampled as usize]);
               }
               downsampled_data.iter().for_each(|x| buffer.push(*x));
               if Instant::now() - last > stream_step {
                  let result = w.transcribe(&buffer).unwrap();
                  eprintln!("{}", chrono::Local::now());
                  println!("{}", result);
                  let retain = buffer.len() - (buffer.len() as f32 * stream_churn) as usize;
                  for i in retain..buffer.len() {
                     buffer[i - retain] = buffer[i]
                  }
                  buffer.truncate(retain);
                  last = Instant::now();
               }
            },
            move |err| {
               eprintln!("input error: {}", err)
            },
            None,
         ).unwrap();
         stream.play().unwrap();
         eprintln!("listening on {}", device.name().unwrap());
         let mut signals = Signals::new(&[SIGINT]).unwrap();
         for sig in signals.forever() {
            eprintln!("sig {}", sig);
            break;
         }
         stream.pause().unwrap();
      },
   };
 }
 struct Whisper {
   ctx: WhisperContext,
   threads: i32,
 }
 fn new_whisper(model_path: String, threads: i32) -> Result<Whisper, String> {
   match WhisperContext::new(&model_path) {
      Ok(ctx) => Ok(Whisper{
         ctx: ctx,
         threads: threads,
      }),
      Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
   }
 }
 impl Whisper {
   fn transcribe(&self, data: &Vec<f32>) -> Result<String, String> {
      match self._transcribe(&data) {
         Ok(result) => Ok(result),
         Err(msg) => Err(format!("failed to transcribe: {}", msg)),
      }
   }
   fn _transcribe(&self, data: &Vec<f32>) -> Result<String, WhisperError> {
      let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
      params.set_no_context(true);
      params.set_n_threads(self.threads);
      params.set_translate(false);
      params.set_detect_language(false);
      params.set_language(Some("en"));
      params.set_print_special(false);
      params.set_print_progress(false);
      params.set_print_realtime(false);
      params.set_print_timestamps(false);
      let mut state = self.ctx.create_state()?;
      state.full(params, &data[..])?;
      let num_segments = state.full_n_segments()?;
      let mut result = "".to_string();
      for i in 0..num_segments {
         let segment = state.full_get_segment_text(i)?;
         result = format!("{} {}", result, segment);
      }
      Ok(result)
   }
 }
--- a/snowboy-2022/build.sh
+++ b/snowboy-2022/build.sh
@@ -1,99 +0,0 @@
 #! /bin/bash
 echo https://github.com/seasalt-ai/snowboy
 cd "$(dirname "$(realpath "$BASH_SOURCE")")"
 set -e
 set -o pipefail
 if [ ! -d ./snowboy.git.d ]; then
   git clone https://github.com/seasalt-ai/snowboy snowboy.git.d
 fi
 timeout 2 docker version &> /dev/null
 if ! docker images | grep snowboy-pmdl.*latest &> /dev/null; then
   pushd snowboy.git.d
   docker build -t snowboy-pmdl:latest .
   popd
 fi
 export HOTWORD="${HOTWORD:-${TRAIN:-default_hotword}}"
 if [ -n "$TRAIN" ] || [ ! -d ./model ] || [ ! -f ./model/$HOTWORD.pmdl ]; then
   mkdir -p model
   pushd model
   rm -f ./record{1,2,3}.wav || true
   echo "record 3 instances of '$HOTWORD'" >&2
   for i in 1 2 3; do
      read -p "[$i/3] ready? you get 3 seconds."
      (
         timeout 3 rec \
            -r 16000 \
            -c 1 \
            -b 16 \
            -e signed-integer \
            -t wav \
            record$i.wav
      ) || true
      ls record$i.wav
   done
   popd
   docker run \
      --rm \
      -it \
      -v "$(realpath ./model)":/snowboy-master/examples/Python/model \
      snowboy-pmdl:latest
   mv ./model/hotword.pmdl ./model/$HOTWORD.pmdl
   if [ -n "$TRAIN" ]; then
      exit 0
   fi
 fi
 if false; then
   if ! which swig; then
      brew install swig
   fi
   pip3 install pyaudio
   pushd snowboy.git.d/swig/Python3/
   make
   popd
   cd snowboy.git.d/examples/Python3/
   echo '
   import snowboydecoder
   import datetime
   detected_callback = lambda *args: print(datetime.datetime.now(), "GOTCHA")
   d = snowboydecoder.HotwordDetector("../../../model/'"$HOTWORD"'.pmdl", sensitivity=0.5, audio_gain=1)
   d.start(detected_callback)
   ' > breel.py
   echo GO
   cleanup() {
      echo OK IM DONE NOW
   }
   trap cleanup EXIT
   python3 ./breel.py
 else
   resources="$(realpath snowboy.git.d/resources/common.res)"
   hotword="$(realpath ./model/$HOTWORD.pmdl)"
   GOPROXY= go build -o snowboy
   if [ -z "$PUSH" ]; then
      ./snowboy \
         -ms "$hotword/$HOTWORD" \
         -r "$resources" \
         -s 0.5 \
         "$@"
   else
      echo '
 FROM registry-app.eng.qops.net:5001/imported/alpine:3.16
 WORKDIR /main/
 COPY ./snowboy.git.d/resources/common.res ./
 COPY ./model/hotword.pmdl ./
 COPY ./snowboy ./
 ENTRYPOINT ["sh", "-c", "true; echo copying /main/ to /mnt/; cp /main/* /mnt/"]
 CMD []
      ' > Dockerfile
      docker build -t registry-app.eng.qops.net:5001/breel/snowboy:latest .
      docker push registry-app.eng.qops.net:5001/breel/snowboy:latest
   fi
 fi
--- a/snowboy-2022/go.mod
+++ b/snowboy-2022/go.mod
@@ -1,13 +0,0 @@
 module snowboy
 go 1.19
 require (
 	github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af
 	github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc
 )
 require (
 	github.com/Kitt-AI/snowboy v1.3.0 // indirect
 	github.com/stretchr/testify v1.8.1 // indirect
 )
--- a/snowboy-2022/go.sum
+++ b/snowboy-2022/go.sum
@@ -1,22 +0,0 @@
 github.com/Kitt-AI/snowboy v1.3.0 h1:PjBVN84M/9tAzDBQXILAKMoJMxt/fT0nhJ1rhKtVRUc=
 github.com/Kitt-AI/snowboy v1.3.0/go.mod h1:sDzzMXFQ1wFkXkZaX/ant0xJsizGVq/9hyKb7ZB3cNI=
 github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af h1:ijY5OHNQs3CdzTN2XT+zByIsR1QVyXTvOUSkQcBm6pw=
 github.com/brentnd/go-snowboy v0.0.0-20190301212623-e19133c572af/go.mod h1:XcT4k8Tn9hrM5SLVvu5hNQbAC6GojXM0MXz1Rt8CL68=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc h1:yYLpN7bJxKYILKnk20oczGQOQd2h3/7z7/cxdD9Se/I=
 github.com/gordonklaus/portaudio v0.0.0-20221027163845-7c3b689db3cc/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
 github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/snowboy-2022/main.go
+++ b/snowboy-2022/main.go
@@ -1,122 +0,0 @@
 // This example streams the microphone thru Snowboy to listen for the hotword,
 // by using the PortAudio interface.
 //
 // HOW TO USE:
 // 	go run examples/Go/listen/main.go [path to snowboy resource file] [path to snowboy hotword file]
 //
 package main
 import (
 	"bytes"
 	"encoding/binary"
 	"flag"
 	"fmt"
 	"log"
 	"path"
 	"strings"
 	"time"
 	"github.com/brentnd/go-snowboy"
 	"github.com/gordonklaus/portaudio"
 )
 // Sound represents a sound stream implementing the io.Reader interface
 // that provides the microphone data.
 type Sound struct {
 	stream *portaudio.Stream
 	data   []int16
 }
 // Init initializes the Sound's PortAudio stream.
 func (s *Sound) Init() {
 	inputChannels := 1
 	outputChannels := 0
 	sampleRate := 16000
 	s.data = make([]int16, 1024)
 	// initialize the audio recording interface
 	err := portaudio.Initialize()
 	if err != nil {
 		fmt.Errorf("Error initialize audio interface: %s", err)
 		return
 	}
 	// open the sound input stream for the microphone
 	stream, err := portaudio.OpenDefaultStream(inputChannels, outputChannels, float64(sampleRate), len(s.data), s.data)
 	if err != nil {
 		fmt.Errorf("Error open default audio stream: %s", err)
 		return
 	}
 	err = stream.Start()
 	if err != nil {
 		fmt.Errorf("Error on stream start: %s", err)
 		return
 	}
 	s.stream = stream
 }
 // Close closes down the Sound's PortAudio connection.
 func (s *Sound) Close() {
 	s.stream.Close()
 	portaudio.Terminate()
 }
 // Read is the Sound's implementation of the io.Reader interface.
 func (s *Sound) Read(p []byte) (int, error) {
 	s.stream.Read()
 	buf := &bytes.Buffer{}
 	for _, v := range s.data {
 		binary.Write(buf, binary.LittleEndian, v)
 	}
 	copy(p, buf.Bytes())
 	return len(p), nil
 }
 func main() {
 	resources := flag.String("r", "", "path to the .res file")
 	models := flag.String("ms", "", "comma delimited path to the .?mdl file/output")
 	sensitivity := flag.Float64("s", 0.45, "0..1")
 	quiet := flag.Bool("q", false, "emit '1' on detect else silent")
 	flag.Parse()
 	if *resources == "" || *models == "" {
 		panic("all flags must be set")
 	}
 	// open the mic
 	mic := &Sound{}
 	mic.Init()
 	defer mic.Close()
 	// open the snowboy detector
 	d := snowboy.NewDetector(*resources)
 	defer d.Close()
 	// set the handlers
 	for _, modelStrC := range strings.Split(*models, ",") {
 		modelStr := modelStrC
 		d.HandleFunc(snowboy.NewHotword(path.Dir(modelStr), float32(*sensitivity)), func(string) {
 			if !*quiet {
 				log.Println(path.Base(modelStr))
 			}
 			fmt.Println(path.Base(modelStr))
 		})
 	}
 	d.HandleSilenceFunc(1*time.Second, func(string) {
 		if !*quiet {
 			log.Println("...")
 		}
 	})
 	// display the detector's expected audio format
 	sr, nc, bd := d.AudioFormat()
 	log.Printf("sample rate=%d, num channels=%d, bit depth=%d\n", sr, nc, bd)
 	// start detecting using the microphone
 	d.ReadAndDetect(mic)
 }
--- a/snowboy-2022/model.pmdl
+++ b/snowboy-2022/model.pmdl
--- a/snowboy-2022/model/default_hotword.pmdl
+++ b/snowboy-2022/model/default_hotword.pmdl
--- a/snowboy-2022/model/down.pmdl
+++ b/snowboy-2022/model/down.pmdl
--- a/snowboy-2022/model/jump.pmdl
+++ b/snowboy-2022/model/jump.pmdl
--- a/snowboy-2022/model/left.pmdl
+++ b/snowboy-2022/model/left.pmdl
--- a/snowboy-2022/model/right.pmdl
+++ b/snowboy-2022/model/right.pmdl
--- a/snowboy-2022/model/up.pmdl
+++ b/snowboy-2022/model/up.pmdl
--- a/snowboy-2022/pyautogui.sh
+++ b/snowboy-2022/pyautogui.sh
@@ -1,82 +0,0 @@
 #! /bin/bash
 main() {
   cleanup() {
      killall -9 $(jobs -p)
      killall snowboy
   }
   trap cleanup EXIT
   if [ ! -e /tmp/stt.fifo ]; then
      mkfifo /tmp/stt.fifo
   fi
   echo starting in
   for ((i=2; i>0; i--)); do
      echo "...$i..."
      sleep 1
   done
   local models=($(
      cat pyautogui.yaml \
         | gojq -r -c --yaml-input '
            to_entries[] | "model/"+.key+".pmdl/"+.key
         ' \
      | tr '\n' ',' \
      | sed 's/,$//'
   ))
   echo models=$models
   ./snowboy -r resources.res -ms $models "$@" > /tmp/stt.fifo &
   python3 -c '
 import pyautogui
 import time
 keys = set()
 def toggle(key):
   global keys
   if key in keys:
      release(key)
   else:
      hold(key)
 def hold(key):
   global keys
   for keyin in [todrop for todrop in keys]:
      if keyin != key:
         release(keyin)
   keys = set()
   keys.add(key)
   print()
   print("pressing", key)
   print()
   pyautogui.keyDown(key)
 def release(key):
   print()
   print("releasing", key)
   print()
   pyautogui.keyUp(key)
 def main():
   with open("/tmp/stt.fifo", "r") as q:
      for line in q:
         handle(line.strip())
 import yaml
 mapping = yaml.safe_load(open("./pyautogui.yaml", "r"))
 print(mapping)
 def handle(cmd):
   global mapping
   hold(mapping.get(cmd))
 main()
   '
 }
 if [ "$0" == "$BASH_SOURCE" ]; then
   main "$@"
 fi
--- a/snowboy-2022/pyautogui.yaml
+++ b/snowboy-2022/pyautogui.yaml
@@ -1,5 +0,0 @@
 up: w
 down: s
 left: a
 right: d
 jump: w
--- a/snowboy-2022/resources.res
+++ b/snowboy-2022/resources.res
--- a/src/main.rs
+++ b/src/main.rs
@@ -0,0 +1,374 @@
 use whisper_rs::{WhisperContext, FullParams, SamplingStrategy, WhisperError};
 use cpal::traits::{HostTrait, DeviceTrait, StreamTrait};
 use signal_hook::{iterator::Signals, consts::signal::SIGINT};
 use std::time::{Duration, Instant};
 use chrono;
 use clap::Parser;
 use std::thread;
 use std::fs::File;
 use std::io::Write;
 #[derive(Parser, Debug)]
 struct Flags {
   #[arg(long, default_value = "./models/ggml-tiny.en.bin")]
   model: String,
   #[arg(long, default_value = "8")]
   threads: i32,
   #[arg(long, default_value = "5")]
   stream_step: u64,
   #[arg(long, default_value = "0.6")]
   stream_retain: f32,
   #[arg(long, default_value = "0.3")]
   stream_head: f32,
   #[arg(long, default_value = "0.3")]
   stream_tail: f32,
   wav: Option<String>,
   #[arg(long, default_value = "false")]
   debug: bool,
 }
 fn main() {
   let flags = Flags::parse();
   let w = new_whisper_service(
      flags.model,
      flags.threads,
      flags.stream_head,
      flags.stream_tail,
      |result: Result<Whispered, String>| {
         match result {
            Ok(whispered) => {
               eprintln!("{}: {:?}", chrono::Local::now(), whispered);
               println!("{}", whispered.to_string());
            },
            Err(msg) => { eprintln!("Error whispering: {}", msg); },
         };
      },
   ).unwrap();
   let stream_retain = (flags.stream_retain * 16_000.0) as usize;
   let stream_step = Duration::new(flags.stream_step, 0);
   match flags.wav {
      Some(wav) => {
         let (header, data) = wav::read(
            &mut std::fs::File::open(wav).expect("failed to open $WAV"),
         ).expect("failed to decode $WAV");
         assert!(header.channel_count == 1);
         assert!(header.sampling_rate == 16_000);
         let data16 = data.as_sixteen().expect("wav is not 32bit floats");
         let audio_data = &whisper_rs::convert_integer_to_float_audio(&data16);
         w.transcribe(&audio_data);
      },
      None => {
         match &flags.debug {
            true => { File::create("/tmp/page.rawf32audio").unwrap(); },
            false => {},
         };
         let mut buffer = vec![];
         let mut last = Instant::now();
         new_listener().listen(move |data: Vec<f32>| {
            data.iter().for_each(|x| buffer.push(*x));
            if Instant::now() - last > stream_step {
               w.transcribe_async(&buffer).unwrap();
               match &flags.debug {
                  true => {
                     let mut f = File::options().append(true).open("/tmp/page.rawf32audio").unwrap();
                     let mut wav_data = vec![];
                     for i in buffer.iter() {
                        for j in i.to_le_bytes() {
                           wav_data.push(j);
                        }
                     }
                     f.write_all(wav_data.as_slice()).unwrap();
                  },
                  false => {},
               };
               for i in 0..stream_retain {
                  buffer[i] = buffer[buffer.len() - stream_retain + i];
               }
               buffer.truncate(stream_retain);
               last = Instant::now();
            }
         });
      },
   };
 }
 struct WhisperService {
   jobs: std::sync::mpsc::SyncSender<AWhisper>,
 }
 fn new_whisper_service<F>(model_path: String, threads: i32, stream_head: f32, stream_tail: f32, handler_fn: F) -> Result<WhisperService, String> where F: FnMut(Result<Whispered, String>) + Send + 'static {
   match new_whisper_engine(model_path, threads) {
      Ok(engine) => {
         let mut whisper = new_whisper_impl(engine, stream_head, stream_tail, handler_fn);
         let (send, recv) = std::sync::mpsc::sync_channel(100);
         thread::spawn(move || { whisper.transcribe_asyncs(recv); });
         Ok(WhisperService{jobs: send})
      },
      Err(msg) => Err(format!("failed to initialize engine: {}", msg)),
   }
 }
 impl WhisperService {
   fn transcribe(&self, data: &Vec<f32>) {
      let (send, recv) = std::sync::mpsc::sync_channel(0);
      self._transcribe_async(data, Some(send)).unwrap();
      recv.recv().unwrap();
   }
   fn transcribe_async(&self, data: &Vec<f32>) -> Result<(), String> {
      self._transcribe_async(data, None)
   }
   fn _transcribe_async(&self, data: &Vec<f32>, ack: Option<std::sync::mpsc::SyncSender<bool>>) -> Result<(), String> {
      match self.jobs.try_send(AWhisper{
         data: data.clone().to_vec(),
         ack: ack,
      }) {
         Ok(_) => Ok(()),
         Err(msg) => Err(format!("failed to enqueue transcription: {}", msg)),
      }
   }
 }
 struct WhisperImpl {
   engine: WhisperEngine,
   stream_head: f32,
   stream_tail: f32,
   handler_fn: Option<Box<dyn FnMut(Result<Whispered, String>) + Send + 'static>>
 }
 fn new_whisper_impl<F>(engine: WhisperEngine, stream_head: f32, stream_tail: f32, handler_fn: F) -> WhisperImpl where F: FnMut(Result<Whispered, String>) + Send + 'static {
   WhisperImpl {
      engine: engine,
      stream_head: stream_head,
      stream_tail: stream_tail,
      handler_fn: Some(Box::new(handler_fn)),
   }
 }
 impl WhisperImpl {
   fn transcribe_asyncs(&mut self, recv: std::sync::mpsc::Receiver<AWhisper>) {
      loop {
         match recv.recv() {
            Ok(job) => {
               let result = self.transcribe(&job).is_ok();
               match job.ack {
                  Some(ack) => {
                     ack.send(result).unwrap();
                  },
                  None => (),
               };
            }
            Err(_) => return,
         };
      }
   }
   fn transcribe(&mut self, a_whisper: &AWhisper) -> Result<(), ()> {
      match self.engine.transcribe(&a_whisper.data) {
         Ok(result) => {
            self.on_success(&result);
            Ok(())
         },
         Err(msg) => {
            self.on_error(msg.to_string());
            Err(())
         },
      }
   }
   fn on_success(&mut self, whispered: &Whispered) {
      let result = whispered
         .after(&(self.stream_head * 100.0))
         .before(&(self.stream_tail * 100.0));
      (self.handler_fn.as_mut().unwrap())(Ok(result));
   }
   fn on_error(&mut self, msg: String) {
      (self.handler_fn.as_mut().unwrap())(Err(format!("failed to transcribe: {}", &msg)));
   }
 }
 struct WhisperEngine {
   ctx: WhisperContext,
   threads: i32,
 }
 fn new_whisper_engine(model_path: String, threads: i32) -> Result<WhisperEngine, String> {
   match WhisperContext::new(&model_path) {
      Ok(ctx) => Ok(WhisperEngine{ctx: ctx, threads: threads}),
      Err(msg) => Err(format!("failed to load {}: {}", model_path, msg)),
   }
 }
 impl WhisperEngine {
   fn transcribe(&self, data: &Vec<f32>) -> Result<Whispered, WhisperError> {
      let mut params = FullParams::new(SamplingStrategy::Greedy { best_of: 0 });
      params.set_no_context(true);
      params.set_n_threads(self.threads);
      params.set_translate(false);
      params.set_detect_language(false);
      params.set_language(Some("en"));
      params.set_print_special(false);
      params.set_print_progress(false);
      params.set_print_realtime(false);
      params.set_print_timestamps(false);
      let mut state = self.ctx.create_state()?;
      state.full(params, &data[..])?;
      let mut result = new_whispered();
      let num_segments = state.full_n_segments()?;
      for i in 0..num_segments {
         let data = state.full_get_segment_text(i)?;
         let start = state.full_get_segment_t0(i)?;
         let stop = state.full_get_segment_t1(i)?;
         result.push(data, start, stop);
      }
      Ok(result)
   }
 }
 struct AWhisper {
   data: Vec<f32>,
   ack: Option<std::sync::mpsc::SyncSender<bool>>,
 }
 #[derive(Clone, Debug)]
 struct Whispered {
   data: Vec<AWhispered>,
 }
 #[derive(Clone, Debug)]
 struct AWhispered {
   data: String,
   offset: i64,
   length: i64,
 }
 fn new_whispered() -> Whispered {
   Whispered{data: vec![]}
 }
 fn new_a_whispered(data: String, start: i64, stop: i64) -> AWhispered {
   AWhispered{
      data: data,
      offset: start.clone(),
      length: stop - start,
   }
 }
 impl Whispered {
   fn to_string(&self) -> String {
      let mut result = "".to_string();
      for i in 0..self.data.len() {
         result = format!("{} {}", result, &self.data[i].data);
      }
      result
   }
   fn after(&self, t: &f32) -> Whispered {
      let mut result = new_whispered();
      self.data
         .iter()
         .filter(|x| x.offset as f32 >= *t)
         .for_each(|x| result.data.push(x.clone()));
      result
   }
   fn before(&self, t: &f32) -> Whispered {
      let mut result = new_whispered();
      let end = match self.data.iter().map(|x| x.offset + x.length).max() {
         Some(x) => x,
         None => 1,
      };
      let t = (end as f32) - *t;
      self.data
         .iter()
         .filter(|x| ((x.offset) as f32) <= t)
         .for_each(|x| result.data.push(x.clone()));
      result
   }
   fn push(&mut self, data: String, start: i64, stop: i64) {
      let words: Vec<_> = data.split_whitespace().collect();
      let per_word = (stop - start) / (words.len() as i64);
      for i in 0..words.len() {
         let start = (i as i64) * per_word;
         let stop = start.clone() + per_word;
         self.data.push(new_a_whispered(words[i].to_string(), start, stop));
      }
   }
 }
 struct Listener {
 }
 fn new_listener() -> Listener {
   Listener{}
 }
 impl Listener {
   fn listen(self, mut cb: impl FnMut(Vec<f32>)) {
      let (send, recv) = std::sync::mpsc::sync_channel(100);
      thread::spawn(move || { self._listen(send); });
      loop {
         match recv.recv() {
            Ok(msg) => cb(msg),
            Err(_) => return,
         };
      }
   }
   fn _listen(self, send: std::sync::mpsc::SyncSender<Vec<f32>>) {
      let host = cpal::default_host();
      let device = host.default_input_device().unwrap();
      let cfg = device.supported_input_configs()
         .unwrap()
         .filter(|x| x.sample_format() == cpal::SampleFormat::F32)
         .nth(0)
         .unwrap()
         .with_max_sample_rate();
      let downsample_ratio = cfg.channels() as f32 * (cfg.sample_rate().0 as f32 / 16_000.0);
      let stream = device.build_input_stream(
         &cfg.clone().into(),
         move |data: &[f32], _: &cpal::InputCallbackInfo| {
            let mut downsampled_data = vec![];
            for i in 0..(data.len() as f32 / downsample_ratio) as usize {
               let mut upsampled = i as f32 * downsample_ratio;
               if upsampled > (data.len()-1) as f32 {
                  upsampled = (data.len()-1) as f32
               }
               downsampled_data.push(data[upsampled as usize]);
            }
            match send.try_send(downsampled_data) {
               Ok(_) => (),
               Err(msg) => eprintln!("failed to ingest audio: {}", msg),
            };
         },
         move |err| {
            eprintln!("input error: {}", err)
         },
         None,
      ).unwrap();
      stream.play().unwrap();
      eprintln!("listening on {}", device.name().unwrap());
      let mut signals = Signals::new(&[SIGINT]).unwrap();
      for sig in signals.forever() {
         eprintln!("sig {}", sig);
         break;
      }
      stream.pause().unwrap();
   }
 }
--- a/rust-whisper.d/transcript.sh
+++ b/rust-whisper.d/transcript.sh
Author	SHA1	Message	Date
Bel LaPointe	11b5091872	found the type needed to pass closures with local variables	2023-12-19 21:39:53 -05:00
Bel LaPointe	03370f362e	from borrow since a grant is K	2023-12-19 21:20:52 -05:00
Bel LaPointe	ec6a71d38c	purge non callback handling	2023-12-19 21:19:37 -05:00
Bel LaPointe	1b96b132e1	dumb callbacks work	2023-12-19 21:18:04 -05:00
Bel LaPointe	839487b99e	drop redundant on_success time trimming	2023-12-19 21:11:53 -05:00
Bel LaPointe	a2fee32fbc	refactor to whisper_service enqueues, whisper_impl transforms, whisper_engine provides raw	2023-12-19 21:08:59 -05:00
Bel LaPointe	091958e08d	moved to a callback BUT costed me a global so lets iterate to someTrait	2023-12-19 20:38:01 -05:00
Bel LaPointe	5f47b2c88b	wait i just needed an option? f off	2023-12-19 20:20:24 -05:00
Bel LaPointe	367838ac23	test to show include_bytes! macro supports large symlinks	2023-12-19 16:36:17 -05:00
Bel LaPointe	d05287fa3d	update --stream-* defaults	2023-12-19 10:30:10 -05:00
Bel LaPointe	01be2637ca	swap order	2023-12-19 10:26:22 -05:00
Bel LaPointe	226bedb80e	add --debug to write a file that can be played with cat /tmp/page.rawf32audio \| sox -r 16000 -b 32 -t f32 -e floating-point - -d	2023-12-19 10:25:48 -05:00
Bel LaPointe	6b54e500cd	i think my recording has gaps	2023-12-19 09:54:21 -05:00
Bel LaPointe	8603f20a24	break into words but keep more stream head/tail tiebreaking	2023-12-19 09:51:11 -05:00
Bel LaPointe	eee0bf5e65	wip...	2023-12-19 09:30:15 -05:00
Bel LaPointe	15a3f8430a	WIP trim the head and tail from text output because low confidence	2023-12-19 09:09:38 -05:00
Bel LaPointe	116f3f58c9	no buffer	2023-11-30 12:37:19 -07:00
Bel LaPointe	532ae22908	back to mvp	2023-11-30 12:28:35 -07:00
Bel LaPointe	deffc420ca	at least it complies	2023-11-30 12:00:16 -07:00
Bel LaPointe	2391d07994	transcribing results as callbacks	2023-11-30 09:58:28 -07:00
Bel LaPointe	eea4b75bc8	confirmed threaded listen vs transcribe stream is naisu	2023-11-30 09:45:09 -07:00
Bel LaPointe	8982276a90	not infinite buffer	2023-11-30 09:41:12 -07:00
Bel LaPointe	479cfb055f	threaded something i guess	2023-11-30 09:39:43 -07:00
Bel LaPointe	0667b5b5c6	large distill too	2023-11-30 09:12:38 -07:00
Bel LaPointe	9e97f8669d	fuuuuuuuu lost my models folder oh well	2023-11-30 09:06:26 -07:00
Bel LaPointe	ff0f34f80b	move rust to root	2023-11-30 09:02:11 -07:00
Bel LaPointe	bf3dd75074	gitignore	2023-11-30 09:02:02 -07:00
Bel LaPointe	827436d96c	drop snowboy	2023-11-30 09:01:50 -07:00
Bel LaPointe	3b4295d026	unnest submodule	2023-11-30 09:01:44 -07:00
Bel LaPointe	2936fec1e4	dont need to choose 1 channel since downsampling should randomly choose from all	2023-11-29 05:33:27 -07:00