From 7e93939f3a9b48b5380a1a8e018f427d0f8779bf Mon Sep 17 00:00:00 2001 From: bel Date: Thu, 30 Mar 2023 22:16:53 -0600 Subject: [PATCH] o cgo is super inefficient fuck ok we rust --- whisper-cpp-2023/main.go | 73 +++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 27 deletions(-) diff --git a/whisper-cpp-2023/main.go b/whisper-cpp-2023/main.go index 97d803f..4a34308 100644 --- a/whisper-cpp-2023/main.go +++ b/whisper-cpp-2023/main.go @@ -1,35 +1,25 @@ package main import ( + "bytes" "fmt" "io" + "log" + "net/http" "os" + "strings" + "time" "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" "github.com/go-audio/wav" ) func main() { - p := os.Args[1] - f, err := os.Open(p) - if err != nil { - panic(err) + modelName := "small.en" + if v := os.Getenv("MODEL"); v != "" { + modelName = v } - defer f.Close() - - var data []float32 - dec := wav.NewDecoder(f) - if buf, err := dec.FullPCMBuffer(); err != nil { - panic(err) - } else if dec.SampleRate != whisper.SampleRate { - panic(dec.SampleRate) - } else if dec.NumChans != 1 { - panic(dec.NumChans) - } else { - data = buf.AsFloat32Buffer().Data - } - - model, err := whisper.New("./models/ggml-small.en.bin") + model, err := whisper.New("./models/ggml-" + modelName + ".bin") if err != nil { panic(err) } @@ -39,22 +29,51 @@ func main() { } context.SetThreads(4) - context.ResetTimings() - if err := context.Process(data, func(segment whisper.Segment) { - //log.Printf("%+v", segment) - }); err != nil { + if err := http.ListenAndServe(":8080", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + b, _ := io.ReadAll(r.Body) + if result, err := transcribe(context, bytes.NewReader(b)); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } else { + w.Write([]byte(result)) + } + })); err != nil { panic(err) } - context.PrintTimings() +} +func transcribe(context whisper.Context, r io.ReadSeeker) (string, error) { + start := time.Now() + defer func() { + if os.Getenv("DEBUG") == "true" { + log.Printf("%0.1f to transcribe", time.Since(start).Seconds()) + } + }() + + var data []float32 + dec := wav.NewDecoder(r) + if buf, err := dec.FullPCMBuffer(); err != nil { + return "", err + } else if dec.SampleRate != whisper.SampleRate { + return "", fmt.Errorf("sample rate %v != %v", dec.SampleRate, whisper.SampleRate) + } else if dec.NumChans != 1 { + return "", fmt.Errorf("chans %v != %v", dec.NumChans, 1) + } else { + data = buf.AsFloat32Buffer().Data + } + + if err := context.Process(data, nil); err != nil { + return "", err + } + + result := []string{} for { segment, err := context.NextSegment() if err == io.EOF { break } else if err != nil { - panic(err) + return "", err } - fmt.Printf("%s ", segment.Text) + result = append(result, segment.Text) } - fmt.Printf("\n") + return strings.Join(result, " "), nil }