rust-whisper/whisper-cpp-2023/main.go

80 lines
1.7 KiB
Go

package main
import (
"bytes"
"fmt"
"io"
"log"
"net/http"
"os"
"strings"
"time"
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
"github.com/go-audio/wav"
)
func main() {
modelName := "small.en"
if v := os.Getenv("MODEL"); v != "" {
modelName = v
}
model, err := whisper.New("./models/ggml-" + modelName + ".bin")
if err != nil {
panic(err)
}
context, err := model.NewContext()
if err != nil {
panic(err)
}
context.SetThreads(4)
if err := http.ListenAndServe(":8080", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
b, _ := io.ReadAll(r.Body)
if result, err := transcribe(context, bytes.NewReader(b)); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
} else {
w.Write([]byte(result))
}
})); err != nil {
panic(err)
}
}
func transcribe(context whisper.Context, r io.ReadSeeker) (string, error) {
start := time.Now()
defer func() {
if os.Getenv("DEBUG") == "true" {
log.Printf("%0.1f to transcribe", time.Since(start).Seconds())
}
}()
var data []float32
dec := wav.NewDecoder(r)
if buf, err := dec.FullPCMBuffer(); err != nil {
return "", err
} else if dec.SampleRate != whisper.SampleRate {
return "", fmt.Errorf("sample rate %v != %v", dec.SampleRate, whisper.SampleRate)
} else if dec.NumChans != 1 {
return "", fmt.Errorf("chans %v != %v", dec.NumChans, 1)
} else {
data = buf.AsFloat32Buffer().Data
}
if err := context.Process(data, nil); err != nil {
return "", err
}
result := []string{}
for {
segment, err := context.NextSegment()
if err == io.EOF {
break
} else if err != nil {
return "", err
}
result = append(result, segment.Text)
}
return strings.Join(result, " "), nil
}