From d29c8842d55575bcd49cea1899de57c14c7b8800 Mon Sep 17 00:00:00 2001 From: Aaron Miller Date: Tue, 10 Oct 2023 11:10:25 -0700 Subject: [PATCH] python bindings should be quiet by default * disable llama.cpp logging unless GPT4ALL_VERBOSE_LLAMACPP envvar is nonempty * make verbose flag for retrieve_model default false (but also be overridable via gpt4all constructor) should be able to run a basic test: ```python import gpt4all model = gpt4all.GPT4All('/Users/aaron/Downloads/rift-coder-v0-7b-q4_0.gguf') print(model.generate('def fib(n):')) ``` and see no non-model output when successful --- gpt4all-backend/llamamodel.cpp | 18 +++++++++++++++++- gpt4all-bindings/python/gpt4all/gpt4all.py | 5 +++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp index 7fd9fce129b5..4f7ea78a8dd2 100644 --- a/gpt4all-backend/llamamodel.cpp +++ b/gpt4all-backend/llamamodel.cpp @@ -36,6 +36,17 @@ namespace { const char *modelType_ = "LLaMA"; } +static void null_log_callback(enum ggml_log_level, const char*, void*) { + return; +} + +static bool llama_verbose() { + const char* var = getenv("GPT4ALL_VERBOSE_LLAMACPP"); + if (var == nullptr) return false; + if (var[0] == 0) return false; + return true; +} + struct gpt_params { int32_t seed = -1; // RNG seed int32_t n_keep = 0; // number of tokens to keep from initial prompt @@ -144,7 +155,9 @@ bool LLamaModel::loadModel(const std::string &modelPath) d_ptr->params.use_mlock = params.use_mlock; #endif #ifdef GGML_USE_METAL - std::cerr << "llama.cpp: using Metal" << std::endl; + if (llama_verbose()) { + std::cerr << "llama.cpp: using Metal" << std::endl; + } // metal always runs the whole model if n_gpu_layers is not 0, at least // currently d_ptr->params.n_gpu_layers = 1; @@ -390,6 +403,9 @@ DLL_EXPORT bool magic_match(const char * fname) { } DLL_EXPORT LLModel *construct() { + if (!llama_verbose()) { + llama_log_set(null_log_callback, nullptr); + } return new LLamaModel; } } diff --git a/gpt4all-bindings/python/gpt4all/gpt4all.py b/gpt4all-bindings/python/gpt4all/gpt4all.py index c6d5c9baa13f..8728648e9c84 100644 --- a/gpt4all-bindings/python/gpt4all/gpt4all.py +++ b/gpt4all-bindings/python/gpt4all/gpt4all.py @@ -65,6 +65,7 @@ def __init__( allow_download: bool = True, n_threads: Optional[int] = None, device: Optional[str] = "cpu", + verbose: bool = False, ): """ Constructor @@ -89,7 +90,7 @@ def __init__( self.model_type = model_type self.model = pyllmodel.LLModel() # Retrieve model and download if allowed - self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download) + self.config: ConfigType = self.retrieve_model(model_name, model_path=model_path, allow_download=allow_download, verbose=verbose) if device is not None: if device != "cpu": self.model.init_gpu(model_path=self.config["path"], device=device) @@ -117,7 +118,7 @@ def retrieve_model( model_name: str, model_path: Optional[str] = None, allow_download: bool = True, - verbose: bool = True, + verbose: bool = False, ) -> ConfigType: """ Find model file, and if it doesn't exist, download the model.