80 lines
1.7 KiB
Go
80 lines
1.7 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"net/http"
|
|
"os"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
|
"github.com/go-audio/wav"
|
|
)
|
|
|
|
func main() {
|
|
modelName := "small.en"
|
|
if v := os.Getenv("MODEL"); v != "" {
|
|
modelName = v
|
|
}
|
|
model, err := whisper.New("./models/ggml-" + modelName + ".bin")
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
context, err := model.NewContext()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
context.SetThreads(4)
|
|
|
|
if err := http.ListenAndServe(":8080", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
b, _ := io.ReadAll(r.Body)
|
|
if result, err := transcribe(context, bytes.NewReader(b)); err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
} else {
|
|
w.Write([]byte(result))
|
|
}
|
|
})); err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
func transcribe(context whisper.Context, r io.ReadSeeker) (string, error) {
|
|
start := time.Now()
|
|
defer func() {
|
|
if os.Getenv("DEBUG") == "true" {
|
|
log.Printf("%0.1f to transcribe", time.Since(start).Seconds())
|
|
}
|
|
}()
|
|
|
|
var data []float32
|
|
dec := wav.NewDecoder(r)
|
|
if buf, err := dec.FullPCMBuffer(); err != nil {
|
|
return "", err
|
|
} else if dec.SampleRate != whisper.SampleRate {
|
|
return "", fmt.Errorf("sample rate %v != %v", dec.SampleRate, whisper.SampleRate)
|
|
} else if dec.NumChans != 1 {
|
|
return "", fmt.Errorf("chans %v != %v", dec.NumChans, 1)
|
|
} else {
|
|
data = buf.AsFloat32Buffer().Data
|
|
}
|
|
|
|
if err := context.Process(data, nil); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
result := []string{}
|
|
for {
|
|
segment, err := context.NextSegment()
|
|
if err == io.EOF {
|
|
break
|
|
} else if err != nil {
|
|
return "", err
|
|
}
|
|
result = append(result, segment.Text)
|
|
}
|
|
return strings.Join(result, " "), nil
|
|
}
|