From ab2365a5b5bb2925b4cadda44347cd18930f8171 Mon Sep 17 00:00:00 2001 From: hupe1980 Date: Thu, 28 Mar 2024 22:34:02 +0100 Subject: [PATCH] Add new embeddings --- go.mod | 4 ++-- go.sum | 8 ++++---- tiktoken.go | 2 ++ tiktoken_test.go | 3 +++ 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/go.mod b/go.mod index b677882..dac6235 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/hupe1980/go-tiktoken go 1.20 require ( - github.com/dlclark/regexp2 v1.10.0 - github.com/stretchr/testify v1.8.4 + github.com/dlclark/regexp2 v1.11.0 + github.com/stretchr/testify v1.9.0 ) require ( diff --git a/go.sum b/go.sum index 18250ef..4d2eff4 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,11 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= -github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= +github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= +github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/tiktoken.go b/tiktoken.go index 020e36b..8ddb9f1 100644 --- a/tiktoken.go +++ b/tiktoken.go @@ -53,6 +53,8 @@ var ModelToEncoding = map[string]string{ "code-davinci-edit-001": P50kEdit, // embeddings "text-embedding-ada-002": CL100kBase, + "text-embedding-3-small": CL100kBase, + "text-embedding-3-large": CL100kBase, // old embeddings "text-similarity-davinci-001": R50kBase, "text-similarity-curie-001": R50kBase, diff --git a/tiktoken_test.go b/tiktoken_test.go index cb5cfb8..5be5c0d 100644 --- a/tiktoken_test.go +++ b/tiktoken_test.go @@ -55,10 +55,13 @@ func TestNewEncodingForModel(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { encoding, err := NewEncodingForModel(tc.model) + name := "" + if encoding != nil { name = encoding.Name() } + assert.Equal(t, tc.expectedResult, name, "Unexpected encoding result") assert.Equal(t, tc.expectedError, err, "Unexpected error") })