Merge branch 'master' into gw-list-model-filter-upgrade

mudler · Aug 6, 2024 · 579ee54 · 579ee54
2 parents f3e9759 + 9cfc9ac
commit 579ee54
Show file tree

Hide file tree

Showing 64 changed files with 250 additions and 68 deletions.
diff --git a/.github/bump_deps.sh b/.github/bump_deps.sh
@@ -6,4 +6,17 @@ VAR=$3
 
 LAST_COMMIT=$(curl -s -H "Accept: application/vnd.github.VERSION.sha" "https://api.github.com/repos/$REPO/commits/$BRANCH")
 
+# Read $VAR from Makefile (only first match)
+set +e
+CURRENT_COMMIT="$(grep -m1 "^$VAR?=" Makefile | cut -d'=' -f2)"
+set -e
+
 sed -i Makefile -e "s/$VAR?=.*/$VAR?=$LAST_COMMIT/"
+
+if [ -z "$CURRENT_COMMIT" ]; then
+    echo "Could not find $VAR in Makefile."
+    exit 0
+fi
+
+echo "Changes: https://github.com/$REPO/compare/${CURRENT_COMMIT}..${LAST_COMMIT}" >> "${VAR}_message.txt"
+echo "${LAST_COMMIT}" >> "${VAR}_commit.txt"
diff --git a/.github/workflows/bump_deps.yaml b/.github/workflows/bump_deps.yaml
@@ -40,17 +40,30 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Bump dependencies 🔧
+        id: bump
         run: |
           bash .github/bump_deps.sh ${{ matrix.repository }} ${{ matrix.branch }} ${{ matrix.variable }}
+          {
+            echo 'message<<EOF'
+            cat "${{ matrix.variable }}_message.txt"
+            echo EOF
+          } >> "$GITHUB_OUTPUT"
+          {
+            echo 'commit<<EOF'
+            cat "${{ matrix.variable }}_commit.txt"
+            echo EOF
+          } >> "$GITHUB_OUTPUT"
+          rm -rfv ${{ matrix.variable }}_message.txt
+          rm -rfv ${{ matrix.variable }}_commit.txt
       - name: Create Pull Request
         uses: peter-evans/create-pull-request@v6
         with:
           token: ${{ secrets.UPDATE_BOT_TOKEN }}
           push-to-fork: ci-forks/LocalAI
           commit-message: ':arrow_up: Update ${{ matrix.repository }}'
-          title: 'chore: :arrow_up: Update ${{ matrix.repository }}'
+          title: 'chore: :arrow_up: Update ${{ matrix.repository }} to `${{ steps.bump.outputs.commit }}`'
           branch: "update/${{ matrix.variable }}"
-          body: Bump of ${{ matrix.repository }} version
+          body: ${{ steps.bump.outputs.message }}
           signoff: true
 
 

diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ DETECT_LIBS?=true
 # llama.cpp versions
 GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
 GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
-CPPLLAMA_VERSION?=b7a08fd5e0e7c898c68d1743066ea495202d9608
+CPPLLAMA_VERSION?=0a4ce786814b123096d18aadca89cd352b9e590b
 
 # gpt4all version
 GPT4ALL_REPO?=https://github.com/nomic-ai/gpt4all
@@ -20,7 +20,7 @@ RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
 
 # whisper.cpp version
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
-WHISPER_CPP_VERSION?=6739eb83c3ca5cf40d24c6fe8442a761a1eb6248
+WHISPER_CPP_VERSION?=fe36c909715e6751277ddb020e7892c7670b61d4
 
 # bert.cpp version
 BERT_REPO?=https://github.com/go-skynet/go-bert.cpp

diff --git a/backend/cpp/llama/grpc-server.cpp b/backend/cpp/llama/grpc-server.cpp
@@ -458,7 +458,9 @@ struct llama_server_context
             }
         }
 
-        std::tie(model, ctx) = llama_init_from_gpt_params(params);
+        llama_init_result llama_init = llama_init_from_gpt_params(params);
+        model = llama_init.model;
+        ctx = llama_init.context;
         if (model == nullptr)
         {
             LOG_ERROR("unable to load model", {{"model", params.model}});

diff --git a/backend/python/autogptq/requirements-cublas11.txt b/backend/python/autogptq/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/autogptq/requirements-cublas12.txt b/backend/python/autogptq/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/autogptq/requirements.txt b/backend/python/autogptq/requirements.txt
@@ -1,7 +1,6 @@
 accelerate
 auto-gptq==0.7.1
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
-torch
 certifi
 transformers
diff --git a/backend/python/bark/requirements-cublas11.txt b/backend/python/bark/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
diff --git a/backend/python/bark/requirements-cublas12.txt b/backend/python/bark/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
diff --git a/backend/python/bark/requirements.txt b/backend/python/bark/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 bark==0.1.5
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers
diff --git a/backend/python/common/libbackend.sh b/backend/python/common/libbackend.sh
@@ -122,6 +122,13 @@ function installRequirements() {
         requirementFiles+=("${MY_DIR}/requirements-${BUILD_PROFILE}.txt")
     fi
 
+    # if BUILD_TYPE is empty, we are a CPU build, so we should try to install the CPU requirements
+    if [ "x${BUILD_TYPE}" == "x" ]; then
+        requirementFiles+=("${MY_DIR}/requirements-cpu.txt")
+    fi
+
+    requirementFiles+=("${MY_DIR}/requirements-after.txt")
+
     for reqFile in ${requirementFiles[@]}; do
         if [ -f ${reqFile} ]; then
             echo "starting requirements install for ${reqFile}"

diff --git a/backend/python/common/template/requirements.txt b/backend/python/common/template/requirements.txt
@@ -1,2 +1,2 @@
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
diff --git a/backend/python/coqui/requirements-cublas11.txt b/backend/python/coqui/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
diff --git a/backend/python/coqui/requirements-cublas12.txt b/backend/python/coqui/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
diff --git a/backend/python/coqui/requirements.txt b/backend/python/coqui/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 TTS==0.22.0
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers
diff --git a/backend/python/diffusers/requirements-cublas11.txt b/backend/python/diffusers/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/diffusers/requirements-cublas12.txt b/backend/python/diffusers/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/diffusers/requirements.txt b/backend/python/diffusers/requirements.txt
@@ -3,11 +3,10 @@ accelerate
 compel
 peft
 diffusers
-grpcio==1.65.1
+grpcio==1.65.4
 opencv-python
 pillow
 protobuf
 sentencepiece
-torch
 transformers
 certifi
diff --git a/backend/python/exllama/requirements-cublas11.txt b/backend/python/exllama/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/exllama/requirements-cublas12.txt b/backend/python/exllama/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/exllama/requirements.txt b/backend/python/exllama/requirements.txt
@@ -1,6 +1,5 @@
 grpcio==1.65.0
 protobuf
-torch
 transformers
 certifi
 setuptools
diff --git a/backend/python/exllama2/requirements-cublas11.txt b/backend/python/exllama2/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/exllama2/requirements-cublas12.txt b/backend/python/exllama2/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/exllama2/requirements.txt b/backend/python/exllama2/requirements.txt
@@ -1,7 +1,6 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
-torch
 wheel
 setuptools
diff --git a/backend/python/mamba/requirements-after.txt b/backend/python/mamba/requirements-after.txt
@@ -0,0 +1,2 @@
+causal-conv1d==1.4.0
+mamba-ssm==2.2.2
diff --git a/backend/python/mamba/requirements-cpu.txt b/backend/python/mamba/requirements-cpu.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/mamba/requirements-cublas11.txt b/backend/python/mamba/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/mamba/requirements-cublas12.txt b/backend/python/mamba/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/mamba/requirements-install.txt b/backend/python/mamba/requirements-install.txt
@@ -3,5 +3,4 @@
 # https://github.com/Dao-AILab/causal-conv1d/issues/24
 packaging
 setuptools
-wheel
-torch==2.3.1
+wheel
diff --git a/backend/python/mamba/requirements.txt b/backend/python/mamba/requirements.txt
@@ -1,5 +1,3 @@
-causal-conv1d==1.4.0
-mamba-ssm==2.2.2
 grpcio==1.65.1
 protobuf
 certifi

diff --git a/backend/python/openvoice/requirements-cublas11.txt b/backend/python/openvoice/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/openvoice/requirements-cublas12.txt b/backend/python/openvoice/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/openvoice/requirements-intel.txt b/backend/python/openvoice/requirements-intel.txt
@@ -2,7 +2,7 @@
 intel-extension-for-pytorch
 torch
 optimum[openvino]
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 librosa==0.9.1
 faster-whisper==1.0.3

diff --git a/backend/python/openvoice/requirements.txt b/backend/python/openvoice/requirements.txt
@@ -1,4 +1,4 @@
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 librosa
 faster-whisper

diff --git a/backend/python/parler-tts/requirements-cublas11.txt b/backend/python/parler-tts/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
diff --git a/backend/python/parler-tts/requirements-cublas12.txt b/backend/python/parler-tts/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
diff --git a/backend/python/parler-tts/requirements.txt b/backend/python/parler-tts/requirements.txt
@@ -1,7 +1,6 @@
 accelerate
 grpcio==1.65.1
 protobuf
-torch
 git+https://github.com/huggingface/parler-tts.git@10016fb0300c0dc31a0fb70e26f3affee7b62f16
 certifi
 transformers
diff --git a/backend/python/petals/requirements-cublas11.txt b/backend/python/petals/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/petals/requirements-cublas12.txt b/backend/python/petals/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/rerankers/requirements-cublas11.txt b/backend/python/rerankers/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/rerankers/requirements-cublas12.txt b/backend/python/rerankers/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/rerankers/requirements.txt b/backend/python/rerankers/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 rerankers[transformers]
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers
diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/transformers-musicgen/requirements-cublas11.txt b/backend/python/transformers-musicgen/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/transformers-musicgen/requirements-cublas12.txt b/backend/python/transformers-musicgen/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/transformers-musicgen/requirements.txt b/backend/python/transformers-musicgen/requirements.txt
@@ -1,7 +1,6 @@
 accelerate
 transformers
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
-torch
 scipy==1.14.0
 certifi
diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt
@@ -0,0 +1 @@
+torch
diff --git a/backend/python/transformers/requirements.txt b/backend/python/transformers/requirements.txt
@@ -1,8 +1,7 @@
 accelerate
 transformers
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
-torch
 certifi
 intel-extension-for-transformers
 bitsandbytes

diff --git a/backend/python/vall-e-x/requirements-cublas11.txt b/backend/python/vall-e-x/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+torchaudio
diff --git a/backend/python/vall-e-x/requirements-cublas12.txt b/backend/python/vall-e-x/requirements-cublas12.txt
@@ -0,0 +1,2 @@
+torch
+torchaudio
diff --git a/backend/python/vall-e-x/requirements.txt b/backend/python/vall-e-x/requirements.txt
@@ -1,4 +1,4 @@
 accelerate
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
diff --git a/backend/python/vllm/requirements-cublas11.txt b/backend/python/vllm/requirements-cublas11.txt
@@ -0,0 +1,3 @@
+--extra-index-url https://download.pytorch.org/whl/cu118
+torch
+flash-attn
diff --git a/backend/python/vllm/requirements-cublas.txt → ...end/python/vllm/requirements-cublas12.txt b/backend/python/vllm/requirements-cublas.txt → ...end/python/vllm/requirements-cublas12.txt
@@ -1 +1,2 @@
+torch
 flash-attn
diff --git a/backend/python/vllm/requirements.txt b/backend/python/vllm/requirements.txt
@@ -1,6 +1,6 @@
 accelerate
 vllm
-grpcio==1.65.1
+grpcio==1.65.4
 protobuf
 certifi
 transformers

diff --git a/core/config/guesser.go b/core/config/guesser.go
@@ -26,15 +26,17 @@ const (
 type settingsConfig struct {
 	StopWords      []string
 	TemplateConfig TemplateConfig
+	RepeatPenalty float64
 }
 
 // default settings to adopt with a given model family
 var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
 	Gemma: {
+		RepeatPenalty: 1.0,
 		StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
 		TemplateConfig: TemplateConfig{
-			Chat:        "{{.Input }}\n<|start_of_turn|>model\n",
-			ChatMessage: "<|start_of_turn|>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<|end_of_turn|>",
+			Chat:        "{{.Input }}\n<start_of_turn>model\n",
+			ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
 			Completion:  "{{.Input}}",
 		},
 	},
@@ -192,6 +194,9 @@ func guessDefaultsFromFile(cfg *BackendConfig, modelPath string) {
 		if len(cfg.StopWords) == 0 {
 			cfg.StopWords = settings.StopWords
 		}
+		if cfg.RepeatPenalty == 0.0 {
+			cfg.RepeatPenalty = settings.RepeatPenalty
+		}
 	} else {
 		log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
 	}
@@ -219,7 +224,7 @@ func identifyFamily(f *gguf.GGUFFile) familyType {
 	commandR := arch == "command-r" && eosTokenID == 255001
 	qwen2 := arch == "qwen2"
 	phi3 := arch == "phi-3"
-	gemma := strings.HasPrefix(f.Model().Name, "gemma")
+	gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
 	deepseek2 := arch == "deepseek2"
 
 	switch {

diff --git a/docs/themes/hugo-theme-relearn b/docs/themes/hugo-theme-relearn
diff --git a/examples/chainlit/requirements.txt b/examples/chainlit/requirements.txt
@@ -1,4 +1,4 @@
-llama_index==0.10.56
+llama_index==0.10.59
 requests==2.32.3
 weaviate_client==4.6.7
 transformers
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		--extra-index-url https://download.pytorch.org/whl/cu118
		torch