Skip to content

Commit

Permalink
Only show GPU when we're actually using it.
Browse files Browse the repository at this point in the history
  • Loading branch information
manyoso committed Sep 14, 2023
1 parent 1fa67a5 commit 3076e0b
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 3 deletions.
10 changes: 10 additions & 0 deletions gpt4all-backend/llamamodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,16 @@ bool LLamaModel::hasGPUDevice()
#endif
}

bool LLamaModel::usingGPUDevice()
{
#if defined(GGML_USE_KOMPUTE)
return ggml_vk_using_vulkan();
#elif defined(GGML_USE_METAL)
return true;
#endif
return false;
}

#if defined(_WIN32)
#define DLL_EXPORT __declspec(dllexport)
#else
Expand Down
1 change: 1 addition & 0 deletions gpt4all-backend/llamamodel_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class LLamaModel : public LLModel {
bool initializeGPUDevice(const GPUDevice &device) override;
bool initializeGPUDevice(int device) override;
bool hasGPUDevice() override;
bool usingGPUDevice() override;

private:
LLamaPrivate *d_ptr;
Expand Down
1 change: 1 addition & 0 deletions gpt4all-backend/llmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class LLModel {
virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; }
virtual bool initializeGPUDevice(int /*device*/) { return false; }
virtual bool hasGPUDevice() { return false; }
virtual bool usingGPUDevice() { return false; }

protected:
// These are pure virtual because subclasses need to implement as the default implementation of
Expand Down
14 changes: 11 additions & 3 deletions gpt4all-backend/replit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ struct mpt_hparams {
int32_t n_embd = 0; //max_seq_len
int32_t n_head = 0; // n_heads
int32_t n_layer = 0; //n_layers
int32_t ftype = 0;
int32_t ftype = 0;
};

struct replit_layer {
Expand Down Expand Up @@ -220,7 +220,7 @@ static bool kv_cache_init(
params.mem_size = cache.buf.size;
params.mem_buffer = cache.buf.addr;
params.no_alloc = false;

cache.ctx = ggml_init(params);
if (!cache.ctx) {
fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
Expand Down Expand Up @@ -503,7 +503,7 @@ bool replit_model_load(const std::string & fname, std::istream &fin, replit_mode
}

GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "data", data_ptr, data_size, max_size));
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "kv", ggml_get_mem_buffer(model.kv_self.ctx),
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "kv", ggml_get_mem_buffer(model.kv_self.ctx),
ggml_get_mem_size(model.kv_self.ctx), 0));
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "eval", model.eval_buf.addr, model.eval_buf.size, 0));
GGML_CHECK_BUF(ggml_metal_add_buffer(model.ctx_metal, "scr0", model.scr0_buf.addr, model.scr0_buf.size, 0));
Expand Down Expand Up @@ -975,6 +975,14 @@ const std::vector<LLModel::Token> &Replit::endTokens() const
return fres;
}

bool Replit::usingGPUDevice()
{
#if defined(GGML_USE_METAL)
return true;
#endif
return false;
}

#if defined(_WIN32)
#define DLL_EXPORT __declspec(dllexport)
#else
Expand Down
1 change: 1 addition & 0 deletions gpt4all-backend/replit_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Replit : public LLModel {
size_t restoreState(const uint8_t *src) override;
void setThreadCount(int32_t n_threads) override;
int32_t threadCount() const override;
bool usingGPUDevice() override;

private:
ReplitPrivate *d_ptr;
Expand Down
5 changes: 5 additions & 0 deletions gpt4all-chat/chatllm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
m_llModelInfo = LLModelInfo();
emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
} else {
// We might have had to fallback to CPU after load if the model is not possible to accelerate
// for instance if the quantization method is not supported on Vulkan yet
if (actualDevice != "CPU" && !m_llModelInfo.model->usingGPUDevice())
emit reportDevice("CPU");

switch (m_llModelInfo.model->implementation().modelType()[0]) {
case 'L': m_llModelType = LLModelType::LLAMA_; break;
case 'G': m_llModelType = LLModelType::GPTJ_; break;
Expand Down

0 comments on commit 3076e0b

Please sign in to comment.