package main import ( "bytes" "fmt" "io" "log" "net/http" "os" "strings" "time" "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper" "github.com/go-audio/wav" ) func main() { modelName := "small.en" if v := os.Getenv("MODEL"); v != "" { modelName = v } model, err := whisper.New("./models/ggml-" + modelName + ".bin") if err != nil { panic(err) } context, err := model.NewContext() if err != nil { panic(err) } context.SetThreads(4) if err := http.ListenAndServe(":8080", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := io.ReadAll(r.Body) if result, err := transcribe(context, bytes.NewReader(b)); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) } else { w.Write([]byte(result)) } })); err != nil { panic(err) } } func transcribe(context whisper.Context, r io.ReadSeeker) (string, error) { start := time.Now() defer func() { if os.Getenv("DEBUG") == "true" { log.Printf("%0.1f to transcribe", time.Since(start).Seconds()) } }() var data []float32 dec := wav.NewDecoder(r) if buf, err := dec.FullPCMBuffer(); err != nil { return "", err } else if dec.SampleRate != whisper.SampleRate { return "", fmt.Errorf("sample rate %v != %v", dec.SampleRate, whisper.SampleRate) } else if dec.NumChans != 1 { return "", fmt.Errorf("chans %v != %v", dec.NumChans, 1) } else { data = buf.AsFloat32Buffer().Data } if err := context.Process(data, nil); err != nil { return "", err } result := []string{} for { segment, err := context.NextSegment() if err == io.EOF { break } else if err != nil { return "", err } result = append(result, segment.Text) } return strings.Join(result, " "), nil }