go test -tags=ai -v -run=AI works with ollama which is cool and fast with llama3

2024-04-18 14:08:28 -06:00
parent 8557ddc522
commit 14de286415
3 changed files with 28 additions and 208 deletions
--- a/ai.go
+++ b/ai.go
@@ -1,13 +1,8 @@
 package main
 import (
 	"bytes"
 	"context"
 	"os"
 	"strings"
 	nn "github.com/nikolaydubina/llama2.go/exp/nnfast"
 	"github.com/nikolaydubina/llama2.go/llama2"
 	"github.com/tmc/langchaingo/llms"
 	"github.com/tmc/langchaingo/llms/ollama"
 )
@@ -46,124 +41,3 @@ func (ai AIOllama) Do(ctx context.Context, prompt string) (string, error) {
 	}
 	return llms.GenerateFromSinglePrompt(ctx, llm, prompt)
 }
 type AILocal struct {
 	checkpointPath string
 	tokenizerPath  string
 	temperature    float64
 	steps          int
 	topp           float64
 }
 func NewAILocal(
 	checkpointPath string,
 	tokenizerPath string,
 	temperature float64,
 	steps int,
 	topp float64,
 ) AILocal {
 	return AILocal{
 		checkpointPath: checkpointPath,
 		tokenizerPath:  tokenizerPath,
 		temperature:    temperature,
 		steps:          steps,
 		topp:           topp,
 	}
 }
 // https://github.com/nikolaydubina/llama2.go/blob/master/main.go
 func (ai AILocal) Do(ctx context.Context, prompt string) (string, error) {
 	checkpointFile, err := os.OpenFile(ai.checkpointPath, os.O_RDONLY, 0)
 	if err != nil {
 		return "", err
 	}
 	defer checkpointFile.Close()
 	config, err := llama2.NewConfigFromCheckpoint(checkpointFile)
 	if err != nil {
 		return "", err
 	}
 	isSharedWeights := config.VocabSize > 0
 	if config.VocabSize < 0 {
 		config.VocabSize = -config.VocabSize
 	}
 	tokenizerFile, err := os.OpenFile(ai.tokenizerPath, os.O_RDONLY, 0)
 	if err != nil {
 		return "", err
 	}
 	defer tokenizerFile.Close()
 	vocab := llama2.NewVocabFromFile(config.VocabSize, tokenizerFile)
 	w := llama2.NewTransformerWeightsFromCheckpoint(config, checkpointFile, isSharedWeights)
 	// right now we cannot run for more than config.SeqLen steps
 	steps := ai.steps
 	if steps <= 0 || steps > config.SeqLen {
 		steps = config.SeqLen
 	}
 	runState := llama2.NewRunState(config)
 	promptTokens := vocab.Encode(strings.ReplaceAll(prompt, "\n", "<0x0A>"))
 	out := bytes.NewBuffer(nil)
 	// the current position we are in
 	var token int = 1 // 1 = BOS token in llama-2 sentencepiece
 	var pos = 0
 	for pos < steps {
 		// forward the transformer to get logits for the next token
 		llama2.Transformer(token, pos, config, runState, w)
 		var next int
 		if pos < len(promptTokens) {
 			next = promptTokens[pos]
 		} else {
 			// sample the next token
 			if ai.temperature == 0 {
 				// greedy argmax sampling
 				next = nn.ArgMax(runState.Logits)
 			} else {
 				// apply the temperature to the logits
 				for q := 0; q < config.VocabSize; q++ {
 					runState.Logits[q] /= float32(ai.temperature)
 				}
 				// apply softmax to the logits to the probabilities for next token
 				nn.SoftMax(runState.Logits)
 				// we now want to sample from this distribution to get the next token
 				if ai.topp <= 0 || ai.topp >= 1 {
 					// simply sample from the predicted probability distribution
 					next = nn.Sample(runState.Logits)
 				} else {
 					// top-p (nucleus) sampling, clamping the least likely tokens to zero
 					next = nn.SampleTopP(runState.Logits, float32(ai.topp))
 				}
 			}
 		}
 		pos++
 		// data-dependent terminating condition: the BOS (1) token delimits sequences
 		if next == 1 {
 			break
 		}
 		// following BOS (1) token, sentencepiece decoder strips any leading whitespace
 		var tokenStr string
 		if token == 1 && vocab.Words[next][0] == ' ' {
 			tokenStr = vocab.Words[next][1:]
 		} else {
 			tokenStr = vocab.Words[next]
 		}
 		out.Write([]byte(tokenStr))
 		// advance forward
 		token = next
 	}
 	out.Write([]byte("\n"))
 	return strings.ReplaceAll(string(out.Bytes()), "<0x0A>", "\n"), nil
 }
--- a/ai_test.go
+++ b/ai_test.go
@@ -4,11 +4,6 @@ package main
 import (
 	"context"
 	"fmt"
 	"io"
 	"net/http"
 	"os"
 	"path"
 	"testing"
 	"time"
 )
@@ -22,53 +17,7 @@ func TestAINoop(t *testing.T) {
 func TestAIOllama(t *testing.T) {
 	t.Parallel()
-	ai := NewAIOllama("http://localhost:11434", "gemma:2b")
+	ai := NewAIOllama("http://localhost:11434", "llama3")
 	testAI(t, ai)
 }
 func TestAILocal(t *testing.T) {
 	t.Parallel()
 	d := os.TempDir()
 	checkpoints := "checkpoints"
 	tokenizer := "tokenizer"
 	for u, p := range map[string]*string{
 		"https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.bin": &checkpoints,
 		"https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin":           &tokenizer,
 	} {
 		func() {
 			*p = path.Base(u)
 			if _, err := os.Stat(path.Join(d, *p)); os.IsNotExist(err) {
 				t.Logf("downloading %s from %s", u, *p)
 				resp, err := http.Get(u)
 				if err != nil {
 					t.Fatal(err)
 				}
 				defer resp.Body.Close()
 				f, err := os.Create(path.Join(d, *p))
 				if err != nil {
 					t.Fatal(err)
 				}
 				defer f.Close()
 				if _, err := io.Copy(f, resp.Body); err != nil {
 					f.Close()
 					os.Remove(path.Join(d, *p))
 					t.Fatal(err)
 				}
 			}
 		}()
 	}
 	ai := NewAILocal(
 		path.Join(d, checkpoints),
 		path.Join(d, tokenizer),
 		0.9,
 		256,
 		0.9,
 	)
 	testAI(t, ai)
 }
@@ -78,7 +27,7 @@ func testAI(t *testing.T, ai AI) {
 	defer can()
 	t.Run("mvp", func(t *testing.T) {
-		if result, err := ai.Do(ctx, "hello world"); err != nil {
+		if result, err := ai.Do(ctx, "Tell me a fun fact."); err != nil {
 			t.Fatal(err)
 		} else if len(result) < 3 {
 			t.Error(result)
@@ -87,6 +36,7 @@ func testAI(t *testing.T, ai AI) {
 		}
 	})
 	/*
 			t.Run("simulation", func(t *testing.T) {
 				d := NewRAM()
 				FillWithTestdata(ctx, d, renderAssetPattern, renderDatacenterPattern, renderEventNamePattern)
@@ -114,5 +64,5 @@ func testAI(t *testing.T, ai AI) {
 				}
 				t.Logf("\n\t%s\n->\n\t%s", input, result)
 			})
-
+	*/
 }
--- a/config.go
+++ b/config.go
@@ -25,8 +25,6 @@ type Config struct {
 	FillWithTestdata           bool
 	OllamaURL                  string
 	OllamaModel                string
 	LocalCheckpoint            string
 	LocalTokenizer             string
 	AssetPattern               string
 	DatacenterPattern          string
 	EventNamePattern           string
@@ -132,8 +130,6 @@ func newConfigFromEnv(ctx context.Context, getEnv func(string) string) (Config,
 	if result.OllamaURL != "" {
 		result.ai = NewAIOllama(result.OllamaURL, result.OllamaModel)
 	} else if result.LocalCheckpoint != "" && result.LocalTokenizer != "" {
 		result.ai = NewAILocal(result.LocalCheckpoint, result.LocalTokenizer, 0.9, 128, 0.9)
 	} else {
 		result.ai = NewAINoop()
 	}