nomic-ai · jacoobes · Nov 1, 2023 · Sep 2, 2023 · Sep 2, 2023 · Sep 2, 2023
diff --git a/.circleci/continue_config.yml b/.circleci/continue_config.yml
@@ -994,7 +994,7 @@ jobs:
           command: |
             cd gpt4all-bindings/typescript
             npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
-            npm publish --access public --tag alpha
+            npm publish
 
 workflows:
   version: 2

diff --git a/gpt4all-bindings/typescript/.yarnrc.yml b/gpt4all-bindings/typescript/.yarnrc.yml
@@ -0,0 +1 @@
+nodeLinker: node-modules
diff --git a/gpt4all-bindings/typescript/README.md b/gpt4all-bindings/typescript/README.md
@@ -75,15 +75,12 @@ cd gpt4all-bindings/typescript
 ```sh
 yarn
 ```
-
 *   llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory
 
 ```sh
 git submodule update --init --depth 1 --recursive
 ```
 
-**AS OF NEW BACKEND** to build the backend,
-
 ```sh
 yarn build:backend
 ```

diff --git a/gpt4all-bindings/typescript/index.cc b/gpt4all-bindings/typescript/index.cc
@@ -1,6 +1,5 @@
 #include "index.h"
 
-Napi::FunctionReference NodeModelWrapper::constructor;
 
 Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
     Napi::Function self = DefineClass(env, "LLModel", {
@@ -13,14 +12,64 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
        InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding),
        InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount),
        InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
+       InstanceMethod("initGpuByString", &NodeModelWrapper::InitGpuByString),
+       InstanceMethod("hasGpuDevice", &NodeModelWrapper::HasGpuDevice),
+       InstanceMethod("listGpu", &NodeModelWrapper::GetGpuDevices),
+       InstanceMethod("memoryNeeded", &NodeModelWrapper::GetRequiredMemory),
+       InstanceMethod("dispose", &NodeModelWrapper::Dispose)
     });
     // Keep a static reference to the constructor
     //
-    constructor = Napi::Persistent(self);
-    constructor.SuppressDestruct();
+    Napi::FunctionReference* constructor = new Napi::FunctionReference();
+    *constructor = Napi::Persistent(self);
+    env.SetInstanceData(constructor);
     return self;
+}
+Napi::Value NodeModelWrapper::GetRequiredMemory(const Napi::CallbackInfo& info) 
+{
+    auto env = info.Env();
+    return Napi::Number::New(env, static_cast<uint32_t>( llmodel_required_mem(GetInference(), full_model_path.c_str()) ));
+
+}
+  Napi::Value NodeModelWrapper::GetGpuDevices(const Napi::CallbackInfo& info) 
+  {
+    auto env = info.Env();
+    int num_devices = 0;
+    auto mem_size = llmodel_required_mem(GetInference(), full_model_path.c_str());
+    llmodel_gpu_device* all_devices = llmodel_available_gpu_devices(GetInference(), mem_size, &num_devices);
+    if(all_devices == nullptr) {
+        Napi::Error::New(
+            env, 
+            "Unable to retrieve list of all GPU devices"
+        ).ThrowAsJavaScriptException(); 
+        return env.Undefined();
+    }
+    auto js_array = Napi::Array::New(env, num_devices);
+    for(int i = 0; i < num_devices; ++i) {
+       auto gpu_device = all_devices[i];
+       /* 
+        *
+        * struct llmodel_gpu_device {
+            int index = 0;
+            int type = 0;           // same as VkPhysicalDeviceType
+            size_t heapSize = 0; 
+            const char * name;
+            const char * vendor;
+          };
+        *
+        */
+       Napi::Object js_gpu_device = Napi::Object::New(env);
+        js_gpu_device["index"] = uint32_t(gpu_device.index);
+        js_gpu_device["type"] = uint32_t(gpu_device.type);
+        js_gpu_device["heapSize"] = static_cast<uint32_t>( gpu_device.heapSize );
+        js_gpu_device["name"]= gpu_device.name;
+        js_gpu_device["vendor"] = gpu_device.vendor;
+
+        js_array[i] = js_gpu_device;
+    }
+    return js_array;
   }
- 
+
   Napi::Value NodeModelWrapper::getType(const Napi::CallbackInfo& info) 
   {
     if(type.empty()) {
@@ -29,15 +78,41 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
     return Napi::String::New(info.Env(), type);
   }
 
+  Napi::Value NodeModelWrapper::InitGpuByString(const Napi::CallbackInfo& info) 
+  {
+    auto env = info.Env();
+    uint32_t memory_required = info[0].As<Napi::Number>();
+
+    std::string gpu_device_identifier = info[1].As<Napi::String>();   
+
+    size_t converted_value;
+    if(memory_required <= std::numeric_limits<size_t>::max()) {
+        converted_value = static_cast<size_t>(memory_required);
+    } else {
+         Napi::Error::New(
+            env, 
+            "invalid number for memory size. Exceeded bounds for memory."
+        ).ThrowAsJavaScriptException(); 
+        return env.Undefined();
+    }
+
+    auto result = llmodel_gpu_init_gpu_device_by_string(GetInference(), converted_value, gpu_device_identifier.c_str());
+    return Napi::Boolean::New(env, result);
+  }
+  Napi::Value NodeModelWrapper::HasGpuDevice(const Napi::CallbackInfo& info) 
+  {
+    return Napi::Boolean::New(info.Env(), llmodel_has_gpu_device(GetInference()));
+  }
+
   NodeModelWrapper::NodeModelWrapper(const Napi::CallbackInfo& info) : Napi::ObjectWrap<NodeModelWrapper>(info) 
   {
     auto env = info.Env();
     fs::path model_path;
 
-    std::string full_weight_path;
-    //todo
-    std::string library_path = ".";
-    std::string model_name;
+    std::string full_weight_path,
+                library_path = ".",
+                model_name, 
+                device;
     if(info[0].IsString()) {
         model_path = info[0].As<Napi::String>().Utf8Value();
         full_weight_path = model_path.string();
@@ -56,13 +131,14 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
         } else {
             library_path = ".";
         }
+        device = config_object.Get("device").As<Napi::String>();
     }
     llmodel_set_implementation_search_path(library_path.c_str());
     llmodel_error e = {
         .message="looks good to me",
         .code=0,
     };
-    inference_ = std::make_shared<llmodel_model>(llmodel_model_create2(full_weight_path.c_str(), "auto", &e));
+    inference_ = llmodel_model_create2(full_weight_path.c_str(), "auto", &e);
     if(e.code != 0) {
        Napi::Error::New(env, e.message).ThrowAsJavaScriptException(); 
        return;
@@ -74,18 +150,45 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
        Napi::Error::New(env, "Had an issue creating llmodel object, inference is null").ThrowAsJavaScriptException(); 
        return;
     }
+    if(device != "cpu") {
+        size_t mem = llmodel_required_mem(GetInference(), full_weight_path.c_str());
+        if(mem == 0) {
+            std::cout << "WARNING: no memory needed. does this model support gpu?\n";
+        }
+        std::cout << "Initiating GPU\n";
+        std::cout << "Memory required estimation: " << mem << "\n";
+
+        auto success = llmodel_gpu_init_gpu_device_by_string(GetInference(), mem, device.c_str());
+        if(success) {
+            std::cout << "GPU init successfully\n";
+        } else {
+            std::cout << "WARNING: Failed to init GPU\n";
+        }
+    }
 
     auto success = llmodel_loadModel(GetInference(), full_weight_path.c_str());
     if(!success) {
         Napi::Error::New(env, "Failed to load model at given path").ThrowAsJavaScriptException(); 
         return;
     }
+
     name = model_name.empty() ? model_path.filename().string() : model_name;
+    full_model_path = full_weight_path;
   };
-  //NodeModelWrapper::~NodeModelWrapper() {
-    //GetInference().reset();
-  //}
 
+//  NodeModelWrapper::~NodeModelWrapper() {
+//    if(GetInference() != nullptr) {
+//        std::cout << "Debug: deleting model\n";
+//        llmodel_model_destroy(inference_);
+//        std::cout << (inference_ == nullptr);
+//    }
+//  }
+//  void NodeModelWrapper::Finalize(Napi::Env env) {
+//    if(inference_ != nullptr) {
+//        std::cout << "Debug: deleting model\n";
+//
+//    } 
+//  }
   Napi::Value NodeModelWrapper::IsModelLoaded(const Napi::CallbackInfo& info) {
     return Napi::Boolean::New(info.Env(), llmodel_isModelLoaded(GetInference()));
   }
@@ -193,8 +296,9 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
     std::string copiedQuestion = question;
     PromptWorkContext pc = {
         copiedQuestion,
-        std::ref(inference_),
+        inference_,
         copiedPrompt,
+        ""
     };
     auto threadSafeContext = new TsfnContext(env, pc);
     threadSafeContext->tsfn = Napi::ThreadSafeFunction::New(
@@ -210,7 +314,9 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
     threadSafeContext->nativeThread = std::thread(threadEntry, threadSafeContext);
     return threadSafeContext->deferred_.Promise();
   }
-
+  void NodeModelWrapper::Dispose(const Napi::CallbackInfo& info) {
+    llmodel_model_destroy(inference_);
+  }
   void NodeModelWrapper::SetThreadCount(const Napi::CallbackInfo& info) {
     if(info[0].IsNumber()) {
         llmodel_setThreadCount(GetInference(), info[0].As<Napi::Number>().Int64Value());
@@ -233,7 +339,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
   }
 
   llmodel_model NodeModelWrapper::GetInference() {
-    return *inference_;
+    return inference_;
   }
 
 //Exports Bindings

diff --git a/gpt4all-bindings/typescript/index.h b/gpt4all-bindings/typescript/index.h
@@ -6,24 +6,33 @@
 #include <atomic>
 #include <memory>
 #include <filesystem>
+#include <set>
 namespace fs = std::filesystem;
 
+
 class NodeModelWrapper: public Napi::ObjectWrap<NodeModelWrapper> {
 public:
   NodeModelWrapper(const Napi::CallbackInfo &);
-  //~NodeModelWrapper();
+  //virtual ~NodeModelWrapper();
   Napi::Value getType(const Napi::CallbackInfo& info);
   Napi::Value IsModelLoaded(const Napi::CallbackInfo& info);
   Napi::Value StateSize(const Napi::CallbackInfo& info);
+  //void Finalize(Napi::Env env) override;
   /**
    * Prompting the model. This entails spawning a new thread and adding the response tokens
    * into a thread local string variable.
    */
   Napi::Value Prompt(const Napi::CallbackInfo& info);
   void SetThreadCount(const Napi::CallbackInfo& info);
+  void Dispose(const Napi::CallbackInfo& info);
   Napi::Value getName(const Napi::CallbackInfo& info);
   Napi::Value ThreadCount(const Napi::CallbackInfo& info);
   Napi::Value GenerateEmbedding(const Napi::CallbackInfo& info);
+  Napi::Value HasGpuDevice(const Napi::CallbackInfo& info);
+  Napi::Value ListGpus(const Napi::CallbackInfo& info);
+  Napi::Value InitGpuByString(const Napi::CallbackInfo& info);
+  Napi::Value GetRequiredMemory(const Napi::CallbackInfo& info);
+  Napi::Value GetGpuDevices(const Napi::CallbackInfo& info);
   /*
    * The path that is used to search for the dynamic libraries
    */
@@ -37,10 +46,10 @@ class NodeModelWrapper: public Napi::ObjectWrap<NodeModelWrapper> {
   /**
    * The underlying inference that interfaces with the C interface
    */
-  std::shared_ptr<llmodel_model> inference_;
+  llmodel_model inference_;
 
   std::string type;
   // corresponds to LLModel::name() in typescript
   std::string name;
-  static Napi::FunctionReference constructor;
+  std::string full_model_path;
 };
diff --git a/gpt4all-bindings/typescript/package.json b/gpt4all-bindings/typescript/package.json
@@ -47,5 +47,10 @@
   },
   "jest": {
     "verbose": true
+  }, 
+  "publishConfig": {
+    "registry": "https://registry.npmjs.org/",
+    "access": "public",
+    "tag": "latest"
   }
 }
diff --git a/gpt4all-bindings/typescript/prompt.cc b/gpt4all-bindings/typescript/prompt.cc
@@ -30,7 +30,7 @@ void threadEntry(TsfnContext* context) {
     context->tsfn.BlockingCall(&context->pc,
     [](Napi::Env env, Napi::Function jsCallback, PromptWorkContext* pc) {
         llmodel_prompt(
-            *pc->inference_,
+            pc->inference_,
             pc->question.c_str(),
             &prompt_callback,
             &response_callback,

diff --git a/gpt4all-bindings/typescript/prompt.h b/gpt4all-bindings/typescript/prompt.h
@@ -10,7 +10,7 @@
 #include <memory>
 struct PromptWorkContext {
     std::string question;
-    std::shared_ptr<llmodel_model>& inference_;
+    llmodel_model inference_;
     llmodel_prompt_context prompt_params;
     std::string res;
 

diff --git a/gpt4all-bindings/typescript/spec/chat.mjs b/gpt4all-bindings/typescript/spec/chat.mjs
@@ -1,8 +1,8 @@
 import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'
 
 const model = await loadModel(
-    'orca-mini-3b-gguf2-q4_0.gguf',
-    { verbose: true }
+    'mistral-7b-openorca.Q4_0.gguf',
+    { verbose: true, device: 'gpu' }
 );
 const ll = model.llm;
 
@@ -26,7 +26,9 @@ console.log("name " + ll.name());
 console.log("type: " + ll.type());
 console.log("Default directory for models", DEFAULT_DIRECTORY);
 console.log("Default directory for libraries", DEFAULT_LIBRARIES_DIRECTORY);
-
+console.log("Has GPU", ll.hasGpuDevice());
+console.log("gpu devices", ll.listGpu())
+console.log("Required Mem in bytes", ll.memoryNeeded())
 const completion1 = await createCompletion(model, [ 
     { role : 'system', content: 'You are an advanced mathematician.'  },
     { role : 'user', content: 'What is 1 + 1?'  }, 
@@ -40,23 +42,25 @@ const completion2 = await createCompletion(model, [
 
 console.log(completion2.choices[0].message)
 
+//CALLING DISPOSE WILL INVALID THE NATIVE MODEL. USE THIS TO CLEANUP
+model.dispose()
 // At the moment, from testing this code, concurrent model prompting is not possible. 
 // Behavior: The last prompt gets answered, but the rest are cancelled
 // my experience with threading is not the best, so if anyone who is good is willing to give this a shot,
 // maybe this is possible
 // INFO: threading with llama.cpp is not the best maybe not even possible, so this will be left here as reference
 
 //const responses = await Promise.all([
-//    createCompletion(ll, [ 
+//    createCompletion(model, [ 
 //    { role : 'system', content: 'You are an advanced mathematician.'  },
 //    { role : 'user', content: 'What is 1 + 1?'  }, 
 //    ], { verbose: true }),
-//    createCompletion(ll, [ 
+//    createCompletion(model, [ 
 //    { role : 'system', content: 'You are an advanced mathematician.'  },
 //    { role : 'user', content: 'What is 1 + 1?'  }, 
 //    ], { verbose: true }),
 //
-//createCompletion(ll, [ 
+//createCompletion(model, [ 
 //    { role : 'system', content: 'You are an advanced mathematician.'  },
 //    { role : 'user', content: 'What is 1 + 1?'  }, 
 //], { verbose: true })

diff --git a/gpt4all-bindings/typescript/spec/embed.mjs b/gpt4all-bindings/typescript/spec/embed.mjs
@@ -1,8 +1,6 @@
-import {  loadModel, createEmbedding } from '../src/gpt4all.js'
+import { loadModel, createEmbedding } from '../src/gpt4all.js'
 
-const embedder = await loadModel("ggml-all-MiniLM-L6-v2-f16.bin", { verbose: true })
+const embedder = await loadModel("ggml-all-MiniLM-L6-v2-f16.bin", { verbose: true, type: 'embedding'})
 
-console.log(
-    createEmbedding(embedder, "Accept your current situation")
-)
+console.log(createEmbedding(embedder, "Accept your current situation"))