o cgo is super inefficient fuck ok we rust
parent
12f824a9a7
commit
7e93939f3a
|
|
@ -1,35 +1,25 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
|
||||
"github.com/go-audio/wav"
|
||||
)
|
||||
|
||||
func main() {
|
||||
p := os.Args[1]
|
||||
f, err := os.Open(p)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
modelName := "small.en"
|
||||
if v := os.Getenv("MODEL"); v != "" {
|
||||
modelName = v
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var data []float32
|
||||
dec := wav.NewDecoder(f)
|
||||
if buf, err := dec.FullPCMBuffer(); err != nil {
|
||||
panic(err)
|
||||
} else if dec.SampleRate != whisper.SampleRate {
|
||||
panic(dec.SampleRate)
|
||||
} else if dec.NumChans != 1 {
|
||||
panic(dec.NumChans)
|
||||
} else {
|
||||
data = buf.AsFloat32Buffer().Data
|
||||
}
|
||||
|
||||
model, err := whisper.New("./models/ggml-small.en.bin")
|
||||
model, err := whisper.New("./models/ggml-" + modelName + ".bin")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
|
@ -39,22 +29,51 @@ func main() {
|
|||
}
|
||||
context.SetThreads(4)
|
||||
|
||||
context.ResetTimings()
|
||||
if err := context.Process(data, func(segment whisper.Segment) {
|
||||
//log.Printf("%+v", segment)
|
||||
}); err != nil {
|
||||
if err := http.ListenAndServe(":8080", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
b, _ := io.ReadAll(r.Body)
|
||||
if result, err := transcribe(context, bytes.NewReader(b)); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
} else {
|
||||
w.Write([]byte(result))
|
||||
}
|
||||
})); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
context.PrintTimings()
|
||||
}
|
||||
|
||||
func transcribe(context whisper.Context, r io.ReadSeeker) (string, error) {
|
||||
start := time.Now()
|
||||
defer func() {
|
||||
if os.Getenv("DEBUG") == "true" {
|
||||
log.Printf("%0.1f to transcribe", time.Since(start).Seconds())
|
||||
}
|
||||
}()
|
||||
|
||||
var data []float32
|
||||
dec := wav.NewDecoder(r)
|
||||
if buf, err := dec.FullPCMBuffer(); err != nil {
|
||||
return "", err
|
||||
} else if dec.SampleRate != whisper.SampleRate {
|
||||
return "", fmt.Errorf("sample rate %v != %v", dec.SampleRate, whisper.SampleRate)
|
||||
} else if dec.NumChans != 1 {
|
||||
return "", fmt.Errorf("chans %v != %v", dec.NumChans, 1)
|
||||
} else {
|
||||
data = buf.AsFloat32Buffer().Data
|
||||
}
|
||||
|
||||
if err := context.Process(data, nil); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
result := []string{}
|
||||
for {
|
||||
segment, err := context.NextSegment()
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
panic(err)
|
||||
return "", err
|
||||
}
|
||||
fmt.Printf("%s ", segment.Text)
|
||||
result = append(result, segment.Text)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
return strings.Join(result, " "), nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue