diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json index b0f20bae6a..507ab4ebc8 100644 --- a/extensions/inference-cortex-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.23", + "version": "1.0.24", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json index 391c939905..ccc8f693f5 100644 --- a/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json +++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json @@ -21,7 +21,8 @@ "ctx_len": 4096, "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", "llama_model_path": "ggml-model-q5_k.gguf", - "mmproj": "mmproj-model-f16.gguf" + "mmproj": "mmproj-model-f16.gguf", + "ngl": 33 }, "parameters": { "max_tokens": 4096 diff --git a/extensions/inference-cortex-extension/resources/models/llava-13b/model.json b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json index 791c98749d..fe058e259a 100644 --- a/extensions/inference-cortex-extension/resources/models/llava-13b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json @@ -21,7 +21,8 @@ "ctx_len": 4096, "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", "llama_model_path": "llava-v1.6-vicuna-13b.Q4_K_M.gguf", - "mmproj": "mmproj-model-f16.gguf" + "mmproj": "mmproj-model-f16.gguf", + "ngl": 33 }, "parameters": { "max_tokens": 4096, diff --git a/extensions/inference-cortex-extension/resources/models/llava-7b/model.json b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json index b22899c96e..8e5cdf09f1 100644 --- a/extensions/inference-cortex-extension/resources/models/llava-7b/model.json +++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json @@ -21,7 +21,8 @@ "ctx_len": 4096, "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", "llama_model_path": "llava-v1.6-mistral-7b.Q4_K_M.gguf", - "mmproj": "mmproj-model-f16.gguf" + "mmproj": "mmproj-model-f16.gguf", + "ngl": 33 }, "parameters": { "max_tokens": 4096,