chore(model gallery): add DeepSeek R1 14b, 32b and 70b (#4679)

Signed-off-by: Gianluca Boiano <morf3089@gmail.com>
mudler · Jan 24, 2025 · 66e9ef3 · 66e9ef3
1 parent 8282414
commit 66e9ef3
Showing 1 changed file with 80 additions and 33 deletions.
diff --git a/gallery/index.yaml b/gallery/index.yaml
@@ -2696,39 +2696,6 @@
     - filename: Qwentile2.5-32B-Instruct-Q4_K_M.gguf
       sha256: e476d6e3c15c78fc3f986d7ae8fa35c16116843827f2e6243c05767cef2f3615
       uri: huggingface://bartowski/Qwentile2.5-32B-Instruct-GGUF/Qwentile2.5-32B-Instruct-Q4_K_M.gguf
-- !!merge <<: *qwen25
-  name: "deepseek-r1-distill-qwen-1.5b"
-  icon: "https://avatars.githubusercontent.com/u/148330874"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
-    - https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
-  description: |
-    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
-    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
-    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
-  overrides:
-    parameters:
-      model: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
-  files:
-    - filename: deepseek-r1-distill-qwen-1.5b-Q4_K_M.gguf
-      sha256: c2c43b6018cf7700ce0ddee8807deb1a9a26758ef878232f3a142d16df81f0fe
-      uri: huggingface://unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
-- !!merge <<: *qwen25
-  name: "deepseek-r1-distill-qwen-7b"
-  urls:
-    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
-    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
-  description: |
-    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
-    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
-    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
-  overrides:
-    parameters:
-      model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
-  files:
-    - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
-      sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
-      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
 - &archfunct
   license: apache-2.0
   tags:
@@ -5334,6 +5301,86 @@
     - filename: archangel_sft_pythia2-8b.Q4_K_M.gguf
       sha256: a47782c55ef2b39b19644213720a599d9849511a73c9ebb0c1de749383c0a0f8
       uri: huggingface://RichardErkhov/ContextualAI_-_archangel_sft_pythia2-8b-gguf/archangel_sft_pythia2-8b.Q4_K_M.gguf
+- &deepseek-r1  ## Start DeepSeek-R1
+  url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
+  name: "deepseek-r1-distill-qwen-1.5b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5b
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF
+  description: |
+    DeepSeek-R1 is our advanced first-generation reasoning model designed to enhance performance in reasoning tasks.
+    Building on the foundation laid by its predecessor, DeepSeek-R1-Zero, which was trained using large-scale reinforcement learning (RL) without supervised fine-tuning, DeepSeek-R1 addresses the challenges faced by R1-Zero, such as endless repetition, poor readability, and language mixing.
+    By incorporating cold-start data prior to the RL phase,DeepSeek-R1 significantly improves reasoning capabilities and achieves performance levels comparable to OpenAI-o1 across a variety of domains, including mathematics, coding, and complex reasoning tasks.
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+      sha256: 1741e5b2d062b07acf048bf0d2c514dadf2a48f94e2b4aa0cfe069af3838ee2f
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/DeepSeek-R1-Distill-Qwen-1.5B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-7b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+      sha256: 731ece8d06dc7eda6f6572997feb9ee1258db0784827e642909d9b565641937b
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF/DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-14b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+      sha256: 0b319bd0572f2730bfe11cc751defe82045fad5085b4e60591ac2cd2d9633181
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-14B-GGUF/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-qwen-32b"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+      sha256: bed9b0f551f5b95bf9da5888a48f0f87c37ad6b72519c4cbd775f54ac0b9fc62
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Qwen-32B-GGUF/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-llama-8b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B
+    - https://huggingface.co/bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+      sha256: 87bcba20b4846d8dadf753d3ff48f9285d131fc95e3e0e7e934d4f20bc896f5d
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf
+- !!merge <<: *deepseek-r1
+  name: "deepseek-r1-distill-llama-70b"
+  icon: "https://avatars.githubusercontent.com/u/148330874"
+  urls:
+    - https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B
+    - https://huggingface.co/bartowski/DeepSeek-R 1-Distill-Llama-70B-GGUF
+  overrides:
+    parameters:
+      model: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
+  files:
+    - filename: DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
+      sha256: 181a82a1d6d2fa24fe4db83a68eee030384986bdbdd4773ba76424e3a6eb9fd8
+      uri: huggingface://bartowski/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf
 - &qwen2  ## Start QWEN2
   url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
   name: "qwen2-7b-instruct"