diff --git a/gpt4all-bindings/python/docs/gpt4all_nodejs.md b/gpt4all-bindings/python/docs/gpt4all_nodejs.md
index f3a589f1fcba3..ccbc5fd31fa7a 100644
--- a/gpt4all-bindings/python/docs/gpt4all_nodejs.md
+++ b/gpt4all-bindings/python/docs/gpt4all_nodejs.md
@@ -145,152 +145,852 @@ This package is in active development, and breaking changes may happen until the
     *   Should include prebuilds to avoid painful node-gyp errors
 *   \[ ] createChatSession ( the python equivalent to create\_chat\_session )
 
-<!-- Generated by documentation.js. Update this documentation by updating the source code. -->### Table of Contents*   [ModelType][1]
-*   [ModelFile][2]
-    *   [gptj][3]
-    *   [llama][4]
-    *   [mpt][5]
-    *   [replit][6]
-*   [type][7]
-*   [dispose][8]
-*   [dispose][9]
-*   [LLModel][10]
-    *   [constructor][11]
-        *   [Parameters][12]
-    *   [type][13]
-    *   [name][14]
-    *   [stateSize][15]
-    *   [threadCount][16]
-    *   [setThreadCount][17]
-        *   [Parameters][18]
-    *   [raw\_prompt][19]
-        *   [Parameters][20]
-    *   [embed][21]
-        *   [Parameters][22]
-    *   [isModelLoaded][23]
-    *   [setLibraryPath][24]
-        *   [Parameters][25]
-    *   [getLibraryPath][26]
-    *   [initGpuByString][27]
-        *   [Parameters][28]
-    *   [hasGpuDevice][29]
-    *   [listGpu][30]
-    *   [dispose][31]
-*   [GpuDevice][32]
-    *   [type][33]
-*   [loadModel][34]
-    *   [Parameters][35]
-*   [createCompletion][36]
+<!-- Generated by documentation.js. Update this documentation by updating the source code. -->
+
+### Table of Contents
+
+*   [ModelFile][1]
+    *   [gptj][2]
+    *   [llama][3]
+    *   [mpt][4]
+    *   [replit][5]
+*   [type][6]
+*   [InferenceModel][7]
+    *   [dispose][8]
+*   [EmbeddingModel][9]
+    *   [dispose][10]
+*   [LLModel][11]
+    *   [constructor][12]
+        *   [Parameters][13]
+    *   [type][14]
+    *   [name][15]
+    *   [stateSize][16]
+    *   [threadCount][17]
+    *   [setThreadCount][18]
+        *   [Parameters][19]
+    *   [raw\_prompt][20]
+        *   [Parameters][21]
+    *   [embed][22]
+        *   [Parameters][23]
+    *   [isModelLoaded][24]
+    *   [setLibraryPath][25]
+        *   [Parameters][26]
+    *   [getLibraryPath][27]
+    *   [initGpuByString][28]
+        *   [Parameters][29]
+    *   [hasGpuDevice][30]
+    *   [listGpu][31]
+    *   [dispose][32]
+*   [GpuDevice][33]
+    *   [type][34]
+*   [LoadModelOptions][35]
+*   [loadModel][36]
     *   [Parameters][37]
-*   [createEmbedding][38]
+*   [createCompletion][38]
     *   [Parameters][39]
-*   [CompletionOptions][40]
-    *   [verbose][41]
-    *   [systemPromptTemplate][42]
-    *   [promptTemplate][43]
-    *   [promptHeader][44]
-    *   [promptFooter][45]
-*   [PromptMessage][46]
-    *   [role][47]
-    *   [content][48]
-*   [prompt\_tokens][49]
-*   [completion\_tokens][50]
-*   [total\_tokens][51]
-*   [CompletionReturn][52]
-    *   [model][53]
-    *   [usage][54]
-    *   [choices][55]
-*   [CompletionChoice][56]
-    *   [message][57]
-*   [LLModelPromptContext][58]
-    *   [logitsSize][59]
-    *   [tokensSize][60]
-    *   [nPast][61]
-    *   [nCtx][62]
-    *   [nPredict][63]
-    *   [topK][64]
-    *   [topP][65]
-    *   [temp][66]
-    *   [nBatch][67]
-    *   [repeatPenalty][68]
-    *   [repeatLastN][69]
-    *   [contextErase][70]
-*   [createTokenStream][71]
-    *   [Parameters][72]
-*   [DEFAULT\_DIRECTORY][73]
-*   [DEFAULT\_LIBRARIES\_DIRECTORY][74]
-*   [DEFAULT\_MODEL\_CONFIG][75]
-*   [DEFAULT\_PROMPT\_CONTEXT][76]
-*   [DEFAULT\_MODEL\_LIST\_URL][77]
-*   [downloadModel][78]
-    *   [Parameters][79]
-    *   [Examples][80]
-*   [DownloadModelOptions][81]
-    *   [modelPath][82]
-    *   [verbose][83]
-    *   [url][84]
-    *   [md5sum][85]
-*   [DownloadController][86]
-    *   [cancel][87]
-    *   [promise][88]## ModelTypeType of the modelType: (`"gptj"` | `"llama"` | `"mpt"` | `"replit"`)## ModelFileFull list of models available**Meta***   **deprecated**: These model names are outdated and this type will not be maintained, please use a string literal instead### gptjList of GPT-J ModelsType: (`"ggml-gpt4all-j-v1.3-groovy.bin"` | `"ggml-gpt4all-j-v1.2-jazzy.bin"` | `"ggml-gpt4all-j-v1.1-breezy.bin"` | `"ggml-gpt4all-j.bin"`)### llamaList Llama ModelsType: (`"ggml-gpt4all-l13b-snoozy.bin"` | `"ggml-vicuna-7b-1.1-q4_2.bin"` | `"ggml-vicuna-13b-1.1-q4_2.bin"` | `"ggml-wizardLM-7B.q4_2.bin"` | `"ggml-stable-vicuna-13B.q4_2.bin"` | `"ggml-nous-gpt4-vicuna-13b.bin"` | `"ggml-v3-13b-hermes-q5_1.bin"`)### mptList of MPT ModelsType: (`"ggml-mpt-7b-base.bin"` | `"ggml-mpt-7b-chat.bin"` | `"ggml-mpt-7b-instruct.bin"`)### replitList of Replit ModelsType: `"ggml-replit-code-v1-3b.bin"`## typeModel architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.Type: [ModelType][1]## disposedelete and cleanup the native modelReturns **void** ## disposedelete and cleanup the native modelReturns **void** ## LLModelLLModel class representing a language model.
-This is a base class that provides common functionality for different types of language models.### constructorInitialize a new LLModel.#### Parameters*   `path` **[string][89]** Absolute path to the model file.*   Throws **[Error][90]** If the model file does not exist.### typeeither 'gpt', mpt', or 'llama' or undefinedReturns **([ModelType][1] | [undefined][91])** ### nameThe name of the model.Returns **[string][89]** ### stateSizeGet the size of the internal state of the model.
-NOTE: This state data is specific to the type of model you have created.Returns **[number][92]** the size in bytes of the internal state of the model### threadCountGet the number of threads used for model inference.
-The default is the number of physical cores your computer has.Returns **[number][92]** The number of threads used for model inference.### setThreadCountSet the number of threads used for model inference.#### Parameters*   `newNumber` **[number][92]** The new number of threads.Returns **void** ### raw\_promptPrompt the model with a given input and optional parameters.
+*   [createEmbedding][40]
+    *   [Parameters][41]
+*   [CompletionOptions][42]
+    *   [verbose][43]
+    *   [systemPromptTemplate][44]
+    *   [promptTemplate][45]
+    *   [promptHeader][46]
+    *   [promptFooter][47]
+*   [PromptMessage][48]
+    *   [role][49]
+    *   [content][50]
+*   [prompt\_tokens][51]
+*   [completion\_tokens][52]
+*   [total\_tokens][53]
+*   [CompletionReturn][54]
+    *   [model][55]
+    *   [usage][56]
+    *   [choices][57]
+*   [CompletionChoice][58]
+    *   [message][59]
+*   [LLModelPromptContext][60]
+    *   [logitsSize][61]
+    *   [tokensSize][62]
+    *   [nPast][63]
+    *   [nCtx][64]
+    *   [nPredict][65]
+    *   [topK][66]
+    *   [topP][67]
+    *   [temp][68]
+    *   [nBatch][69]
+    *   [repeatPenalty][70]
+    *   [repeatLastN][71]
+    *   [contextErase][72]
+*   [createTokenStream][73]
+    *   [Parameters][74]
+*   [DEFAULT\_DIRECTORY][75]
+*   [DEFAULT\_LIBRARIES\_DIRECTORY][76]
+*   [DEFAULT\_MODEL\_CONFIG][77]
+*   [DEFAULT\_PROMPT\_CONTEXT][78]
+*   [DEFAULT\_MODEL\_LIST\_URL][79]
+*   [downloadModel][80]
+    *   [Parameters][81]
+    *   [Examples][82]
+*   [DownloadModelOptions][83]
+    *   [modelPath][84]
+    *   [verbose][85]
+    *   [url][86]
+    *   [md5sum][87]
+*   [DownloadController][88]
+    *   [cancel][89]
+    *   [promise][90]
+
+## ModelFile
+
+Full list of models available
+
+### gptj
+
+List of GPT-J Models
+
+Type: (`"ggml-gpt4all-j-v1.3-groovy.bin"` | `"ggml-gpt4all-j-v1.2-jazzy.bin"` | `"ggml-gpt4all-j-v1.1-breezy.bin"` | `"ggml-gpt4all-j.bin"`)
+
+### llama
+
+List Llama Models
+
+Type: (`"ggml-gpt4all-l13b-snoozy.bin"` | `"ggml-vicuna-7b-1.1-q4_2.bin"` | `"ggml-vicuna-13b-1.1-q4_2.bin"` | `"ggml-wizardLM-7B.q4_2.bin"` | `"ggml-stable-vicuna-13B.q4_2.bin"` | `"ggml-nous-gpt4-vicuna-13b.bin"` | `"ggml-v3-13b-hermes-q5_1.bin"`)
+
+### mpt
+
+List of MPT Models
+
+Type: (`"ggml-mpt-7b-base.bin"` | `"ggml-mpt-7b-chat.bin"` | `"ggml-mpt-7b-instruct.bin"`)
+
+### replit
+
+List of Replit Models
+
+Type: `"ggml-replit-code-v1-3b.bin"`
+
+## type
+
+Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
+
+Type: ModelType
+
+## InferenceModel
+
+InferenceModel represents an LLM which can make chat predictions, similar to GPT transformers.
+
+### dispose
+
+delete and cleanup the native model
+
+Returns **void**&#x20;
+
+## EmbeddingModel
+
+EmbeddingModel represents an LLM which can create embeddings, which are float arrays
+
+### dispose
+
+delete and cleanup the native model
+
+Returns **void**&#x20;
+
+## LLModel
+
+LLModel class representing a language model.
+This is a base class that provides common functionality for different types of language models.
+
+### constructor
+
+Initialize a new LLModel.
+
+#### Parameters
+
+*   `path` **[string][91]** Absolute path to the model file.
+
+<!---->
+
+*   Throws **[Error][92]** If the model file does not exist.
+
+### type
+
+either 'gpt', mpt', or 'llama' or undefined
+
+Returns **(ModelType | [undefined][93])**&#x20;
+
+### name
+
+The name of the model.
+
+Returns **[string][91]**&#x20;
+
+### stateSize
+
+Get the size of the internal state of the model.
+NOTE: This state data is specific to the type of model you have created.
+
+Returns **[number][94]** the size in bytes of the internal state of the model
+
+### threadCount
+
+Get the number of threads used for model inference.
+The default is the number of physical cores your computer has.
+
+Returns **[number][94]** The number of threads used for model inference.
+
+### setThreadCount
+
+Set the number of threads used for model inference.
+
+#### Parameters
+
+*   `newNumber` **[number][94]** The new number of threads.
+
+Returns **void**&#x20;
+
+### raw\_prompt
+
+Prompt the model with a given input and optional parameters.
 This is the raw output from model.
-Use the prompt function exported for a value#### Parameters*   `q` **[string][89]** The prompt input.
-*   `params` **Partial<[LLModelPromptContext][58]>** Optional parameters for the prompt context.
-*   `callback` **function (res: [string][89]): void**&#x20;Returns **void** The result of the model prompt.### embedEmbed text with the model. Keep in mind that
+Use the prompt function exported for a value
+
+#### Parameters
+
+*   `q` **[string][91]** The prompt input.
+*   `params` **Partial<[LLModelPromptContext][60]>** Optional parameters for the prompt context.
+*   `callback` **function (res: [string][91]): void**&#x20;
+
+Returns **void** The result of the model prompt.
+
+### embed
+
+Embed text with the model. Keep in mind that
 not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy))
-Use the prompt function exported for a value#### Parameters*   `text` **[string][89]**&#x20;
+Use the prompt function exported for a value
+
+#### Parameters
+
+*   `text` **[string][91]**&#x20;
 *   `q`  The prompt input.
-*   `params`  Optional parameters for the prompt context.Returns **[Float32Array][93]** The result of the model prompt.### isModelLoadedWhether the model is loaded or not.Returns **[boolean][94]** ### setLibraryPathWhere to search for the pluggable backend libraries#### Parameters*   `s` **[string][89]**&#x20;Returns **void** ### getLibraryPathWhere to get the pluggable backend librariesReturns **[string][89]** ### initGpuByStringInitiate a GPU by a string identifier.#### Parameters*   `memory_required` **[number][92]** Should be in the range size\_t or will throw
-*   `device_name` **[string][89]** 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
-    read LoadModelOptions.device for more informationReturns **[boolean][94]** ### hasGpuDeviceFrom C documentationReturns **[boolean][94]** True if a GPU device is successfully initialized, false otherwise.### listGpuGPUs that are usable for this LLModel*   Throws **any** if hasGpuDevice returns false (i think)Returns **[Array][95]<[GpuDevice][32]>** ### disposedelete and cleanup the native modelReturns **void** ## GpuDevicean object that contains gpu data on this machine.### typesame as VkPhysicalDeviceTypeType: [number][92]## loadModelLoads a machine learning model with the specified name. The defacto way to create a model.
-By default this will download a model from the official GPT4ALL website, if a model is not present at given path.### Parameters*   `modelName` **[string][89]** The name of the model to load.
-*   `options` **(LoadModelOptions | [undefined][91])?** (Optional) Additional options for loading the model.Returns **[Promise][96]<(InferenceModel | EmbeddingModel)>** A promise that resolves to an instance of the loaded LLModel.## createCompletionThe nodejs equivalent to python binding's chat\_completion### Parameters*   `model` **InferenceModel** The language model object.
-*   `messages` **[Array][95]<[PromptMessage][46]>** The array of messages for the conversation.
-*   `options` **[CompletionOptions][40]** The options for creating the completion.Returns **[CompletionReturn][52]** The completion result.## createEmbeddingThe nodejs moral equivalent to python binding's Embed4All().embed()
-meow### Parameters*   `model` **EmbeddingModel** The language model object.
-*   `text` **[string][89]** text to embedReturns **[Float32Array][93]** The completion result.## CompletionOptions**Extends Partial\<LLModelPromptContext>**The options for creating the completion.### verboseIndicates if verbose logging is enabled.Type: [boolean][94]### systemPromptTemplateTemplate for the system message. Will be put before the conversation with %1 being replaced by all system messages.
-Note that if this is not defined, system messages will not be included in the prompt.Type: [string][89]### promptTemplateTemplate for user messages, with %1 being replaced by the message.Type: [boolean][94]### promptHeaderThe initial instruction for the model, on top of the promptType: [string][89]### promptFooterThe last instruction for the model, appended to the end of the prompt.Type: [string][89]## PromptMessageA message in the conversation, identical to OpenAI's chat message.### roleThe role of the message.Type: (`"system"` | `"assistant"` | `"user"`)### contentThe message content.Type: [string][89]## prompt\_tokensThe number of tokens used in the prompt.Type: [number][92]## completion\_tokensThe number of tokens used in the completion.Type: [number][92]## total\_tokensThe total number of tokens used.Type: [number][92]## CompletionReturnThe result of the completion, similar to OpenAI's format.### modelThe model used for the completion.Type: [string][89]### usageToken usage report.Type: {prompt\_tokens: [number][92], completion\_tokens: [number][92], total\_tokens: [number][92]}### choicesThe generated completions.Type: [Array][95]<[CompletionChoice][56]>## CompletionChoiceA completion choice, similar to OpenAI's format.### messageResponse messageType: [PromptMessage][46]## LLModelPromptContextModel inference arguments for generating completions.### logitsSizeThe size of the raw logits vector.Type: [number][92]### tokensSizeThe size of the raw tokens vector.Type: [number][92]### nPastThe number of tokens in the past conversation.Type: [number][92]### nCtxThe number of tokens possible in the context window.Type: [number][92]### nPredictThe number of tokens to predict.Type: [number][92]### topKThe top-k logits to sample from.
+*   `params`  Optional parameters for the prompt context.
+
+Returns **[Float32Array][95]** The result of the model prompt.
+
+### isModelLoaded
+
+Whether the model is loaded or not.
+
+Returns **[boolean][96]**&#x20;
+
+### setLibraryPath
+
+Where to search for the pluggable backend libraries
+
+#### Parameters
+
+*   `s` **[string][91]**&#x20;
+
+Returns **void**&#x20;
+
+### getLibraryPath
+
+Where to get the pluggable backend libraries
+
+Returns **[string][91]**&#x20;
+
+### initGpuByString
+
+Initiate a GPU by a string identifier.
+
+#### Parameters
+
+*   `memory_required` **[number][94]** Should be in the range size\_t or will throw
+*   `device_name` **[string][91]** 'amd' | 'nvidia' | 'intel' | 'gpu' | gpu name.
+    read LoadModelOptions.device for more information
+
+Returns **[boolean][96]**&#x20;
+
+### hasGpuDevice
+
+From C documentation
+
+Returns **[boolean][96]** True if a GPU device is successfully initialized, false otherwise.
+
+### listGpu
+
+GPUs that are usable for this LLModel
+
+*   Throws **any** if hasGpuDevice returns false (i think)
+
+Returns **[Array][97]<[GpuDevice][33]>**&#x20;
+
+### dispose
+
+delete and cleanup the native model
+
+Returns **void**&#x20;
+
+## GpuDevice
+
+an object that contains gpu data on this machine.
+
+### type
+
+same as VkPhysicalDeviceType
+
+Type: [number][94]
+
+## LoadModelOptions
+
+Options that configure a model's behavior.
+
+## loadModel
+
+Loads a machine learning model with the specified name. The defacto way to create a model.
+By default this will download a model from the official GPT4ALL website, if a model is not present at given path.
+
+### Parameters
+
+*   `modelName` **[string][91]** The name of the model to load.
+*   `options` **([LoadModelOptions][35] | [undefined][93])?** (Optional) Additional options for loading the model.
+
+Returns **[Promise][98]<([InferenceModel][7] | [EmbeddingModel][9])>** A promise that resolves to an instance of the loaded LLModel.
+
+## createCompletion
+
+The nodejs equivalent to python binding's chat\_completion
+
+### Parameters
+
+*   `model` **[InferenceModel][7]** The language model object.
+*   `messages` **[Array][97]<[PromptMessage][48]>** The array of messages for the conversation.
+*   `options` **[CompletionOptions][42]** The options for creating the completion.
+
+Returns **[CompletionReturn][54]** The completion result.
+
+## createEmbedding
+
+The nodejs moral equivalent to python binding's Embed4All().embed()
+meow
+
+### Parameters
+
+*   `model` **[EmbeddingModel][9]** The language model object.
+*   `text` **[string][91]** text to embed
+
+Returns **[Float32Array][95]** The completion result.
+
+## CompletionOptions
+
+**Extends Partial\<LLModelPromptContext>**
+
+The options for creating the completion.
+
+### verbose
+
+Indicates if verbose logging is enabled.
+
+Type: [boolean][96]
+
+### systemPromptTemplate
+
+Template for the system message. Will be put before the conversation with %1 being replaced by all system messages.
+Note that if this is not defined, system messages will not be included in the prompt.
+
+Type: [string][91]
+
+### promptTemplate
+
+Template for user messages, with %1 being replaced by the message.
+
+Type: [boolean][96]
+
+### promptHeader
+
+The initial instruction for the model, on top of the prompt
+
+Type: [string][91]
+
+### promptFooter
+
+The last instruction for the model, appended to the end of the prompt.
+
+Type: [string][91]
+
+## PromptMessage
+
+A message in the conversation, identical to OpenAI's chat message.
+
+### role
+
+The role of the message.
+
+Type: (`"system"` | `"assistant"` | `"user"`)
+
+### content
+
+The message content.
+
+Type: [string][91]
+
+## prompt\_tokens
+
+The number of tokens used in the prompt.
+
+Type: [number][94]
+
+## completion\_tokens
+
+The number of tokens used in the completion.
+
+Type: [number][94]
+
+## total\_tokens
+
+The total number of tokens used.
+
+Type: [number][94]
+
+## CompletionReturn
+
+The result of the completion, similar to OpenAI's format.
+
+### model
+
+The model used for the completion.
+
+Type: [string][91]
+
+### usage
+
+Token usage report.
+
+Type: {prompt\_tokens: [number][94], completion\_tokens: [number][94], total\_tokens: [number][94]}
+
+### choices
+
+The generated completions.
+
+Type: [Array][97]<[CompletionChoice][58]>
+
+## CompletionChoice
+
+A completion choice, similar to OpenAI's format.
+
+### message
+
+Response message
+
+Type: [PromptMessage][48]
+
+## LLModelPromptContext
+
+Model inference arguments for generating completions.
+
+### logitsSize
+
+The size of the raw logits vector.
+
+Type: [number][94]
+
+### tokensSize
+
+The size of the raw tokens vector.
+
+Type: [number][94]
+
+### nPast
+
+The number of tokens in the past conversation.
+
+Type: [number][94]
+
+### nCtx
+
+The number of tokens possible in the context window.
+
+Type: [number][94]
+
+### nPredict
+
+The number of tokens to predict.
+
+Type: [number][94]
+
+### topK
+
+The top-k logits to sample from.
 Top-K sampling selects the next token only from the top K most likely tokens predicted by the model.
 It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit
 the diversity of the output. A higher value for top-K (eg., 100) will consider more tokens and lead
 to more diverse text, while a lower value (eg., 10) will focus on the most probable tokens and generate
-more conservative text. 30 - 60 is a good range for most tasks.Type: [number][92]### topPThe nucleus sampling probability threshold.
+more conservative text. 30 - 60 is a good range for most tasks.
+
+Type: [number][94]
+
+### topP
+
+The nucleus sampling probability threshold.
 Top-P limits the selection of the next token to a subset of tokens with a cumulative probability
 above a threshold P. This method, also known as nucleus sampling, finds a balance between diversity
 and quality by considering both token probabilities and the number of tokens available for sampling.
 When using a higher value for top-P (eg., 0.95), the generated text becomes more diverse.
 On the other hand, a lower value (eg., 0.1) produces more focused and conservative text.
 The default value is 0.4, which is aimed to be the middle ground between focus and diversity, but
-for more creative tasks a higher top-p value will be beneficial, about 0.5-0.9 is a good range for that.Type: [number][92]### tempThe temperature to adjust the model's output distribution.
+for more creative tasks a higher top-p value will be beneficial, about 0.5-0.9 is a good range for that.
+
+Type: [number][94]
+
+### temp
+
+The temperature to adjust the model's output distribution.
 Temperature is like a knob that adjusts how creative or focused the output becomes. Higher temperatures
 (eg., 1.2) increase randomness, resulting in more imaginative and diverse text. Lower temperatures (eg., 0.5)
 make the output more focused, predictable, and conservative. When the temperature is set to 0, the output
 becomes completely deterministic, always selecting the most probable next token and producing identical results
-each time. A safe range would be around 0.6 - 0.85, but you are free to search what value fits best for you.Type: [number][92]### nBatchThe number of predictions to generate in parallel.
+each time. A safe range would be around 0.6 - 0.85, but you are free to search what value fits best for you.
+
+Type: [number][94]
+
+### nBatch
+
+The number of predictions to generate in parallel.
 By splitting the prompt every N tokens, prompt-batch-size reduces RAM usage during processing. However,
 this can increase the processing time as a trade-off. If the N value is set too low (e.g., 10), long prompts
 with 500+ tokens will be most affected, requiring numerous processing runs to complete the prompt processing.
-To ensure optimal performance, setting the prompt-batch-size to 2048 allows processing of all tokens in a single run.Type: [number][92]### repeatPenaltyThe penalty factor for repeated tokens.
+To ensure optimal performance, setting the prompt-batch-size to 2048 allows processing of all tokens in a single run.
+
+Type: [number][94]
+
+### repeatPenalty
+
+The penalty factor for repeated tokens.
 Repeat-penalty can help penalize tokens based on how frequently they occur in the text, including the input prompt.
 A token that has already appeared five times is penalized more heavily than a token that has appeared only one time.
-A value of 1 means that there is no penalty and values larger than 1 discourage repeated tokens.Type: [number][92]### repeatLastNThe number of last tokens to penalize.
+A value of 1 means that there is no penalty and values larger than 1 discourage repeated tokens.
+
+Type: [number][94]
+
+### repeatLastN
+
+The number of last tokens to penalize.
 The repeat-penalty-tokens N option controls the number of tokens in the history to consider for penalizing repetition.
 A larger value will look further back in the generated text to prevent repetitions, while a smaller value will only
-consider recent tokens.Type: [number][92]### contextEraseThe percentage of context to erase if the context window is exceeded.Type: [number][92]## createTokenStreamTODO: Help wanted to implement this### Parameters*   `llmodel` **[LLModel][10]**&#x20;
-*   `messages` **[Array][95]<[PromptMessage][46]>**&#x20;
-*   `options` **[CompletionOptions][40]**&#x20;Returns **function (ll: [LLModel][10]): AsyncGenerator<[string][89]>** ## DEFAULT\_DIRECTORYFrom python api:
-models will be stored in (homedir)/.cache/gpt4all/\`Type: [string][89]## DEFAULT\_LIBRARIES\_DIRECTORYFrom python api:
+consider recent tokens.
+
+Type: [number][94]
+
+### contextErase
+
+The percentage of context to erase if the context window is exceeded.
+
+Type: [number][94]
+
+## createTokenStream
+
+TODO: Help wanted to implement this
+
+### Parameters
+
+*   `llmodel` **[LLModel][11]**&#x20;
+*   `messages` **[Array][97]<[PromptMessage][48]>**&#x20;
+*   `options` **[CompletionOptions][42]**&#x20;
+
+Returns **function (ll: [LLModel][11]): AsyncGenerator<[string][91]>**&#x20;
+
+## DEFAULT\_DIRECTORY
+
+From python api:
+models will be stored in (homedir)/.cache/gpt4all/\`
+
+Type: [string][91]
+
+## DEFAULT\_LIBRARIES\_DIRECTORY
+
+From python api:
 The default path for dynamic libraries to be stored.
 You may separate paths by a semicolon to search in multiple areas.
-This searches DEFAULT\_DIRECTORY/libraries, cwd/libraries, and finally cwd.Type: [string][89]## DEFAULT\_MODEL\_CONFIGDefault model configuration.Type: ModelConfig## DEFAULT\_PROMPT\_CONTEXTDefault prompt context.Type: [LLModelPromptContext][58]## DEFAULT\_MODEL\_LIST\_URLDefault model list url.Type: [string][89]## downloadModelInitiates the download of a model file.
-By default this downloads without waiting. use the controller returned to alter this behavior.### Parameters*   `modelName` **[string][89]** The model to be downloaded.
-*   `options` **DownloadOptions** to pass into the downloader. Default is { location: (cwd), verbose: false }.### Examples```javascript
+This searches DEFAULT\_DIRECTORY/libraries, cwd/libraries, and finally cwd.
+
+Type: [string][91]
+
+## DEFAULT\_MODEL\_CONFIG
+
+Default model configuration.
+
+Type: ModelConfig
+
+## DEFAULT\_PROMPT\_CONTEXT
+
+Default prompt context.
+
+Type: [LLModelPromptContext][60]
+
+## DEFAULT\_MODEL\_LIST\_URL
+
+Default model list url.
+
+Type: [string][91]
+
+## downloadModel
+
+Initiates the download of a model file.
+By default this downloads without waiting. use the controller returned to alter this behavior.
+
+### Parameters
+
+*   `modelName` **[string][91]** The model to be downloaded.
+*   `options` **DownloadOptions** to pass into the downloader. Default is { location: (cwd), verbose: false }.
+
+### Examples
+
+```javascript
 const download = downloadModel('ggml-gpt4all-j-v1.3-groovy.bin')
 download.promise.then(() => console.log('Downloaded!'))
-```*   Throws **[Error][90]** If the model already exists in the specified location.
-*   Throws **[Error][90]** If the model cannot be found at the specified url.Returns **[DownloadController][86]** object that allows controlling the download process.## DownloadModelOptionsOptions for the model download process.### modelPathlocation to download the model.
-Default is process.cwd(), or the current working directoryType: [string][89]### verboseDebug mode -- check how long it took to download in secondsType: [boolean][94]### urlRemote download url. Defaults to `https://gpt4all.io/models/gguf/<modelName>`Type: [string][89]### md5sumMD5 sum of the model file. If this is provided, the downloaded file will be checked against this sum.
-If the sums do not match, an error will be thrown and the file will be deleted.Type: [string][89]## DownloadControllerModel download controller.### cancelCancel the request to download if this is called.Type: function (): void### promiseA promise resolving to the downloaded models config once the download is doneType: [Promise][96]\<ModelConfig>[1]: #modeltype[2]: #modelfile[3]: #gptj[4]: #llama[5]: #mpt[6]: #replit[7]: #type[8]: #dispose[9]: #dispose-1[10]: #llmodel[11]: #constructor[12]: #parameters[13]: #type-1[14]: #name[15]: #statesize[16]: #threadcount[17]: #setthreadcount[18]: #parameters-1[19]: #raw_prompt[20]: #parameters-2[21]: #embed[22]: #parameters-3[23]: #ismodelloaded[24]: #setlibrarypath[25]: #parameters-4[26]: #getlibrarypath[27]: #initgpubystring[28]: #parameters-5[29]: #hasgpudevice[30]: #listgpu[31]: #dispose-2[32]: #gpudevice[33]: #type-2[34]: #loadmodel[35]: #parameters-6[36]: #createcompletion[37]: #parameters-7[38]: #createembedding[39]: #parameters-8[40]: #completionoptions[41]: #verbose[42]: #systemprompttemplate[43]: #prompttemplate[44]: #promptheader[45]: #promptfooter[46]: #promptmessage[47]: #role[48]: #content[49]: #prompt_tokens[50]: #completion_tokens[51]: #total_tokens[52]: #completionreturn[53]: #model[54]: #usage[55]: #choices[56]: #completionchoice[57]: #message[58]: #llmodelpromptcontext[59]: #logitssize[60]: #tokenssize[61]: #npast[62]: #nctx[63]: #npredict[64]: #topk[65]: #topp[66]: #temp[67]: #nbatch[68]: #repeatpenalty[69]: #repeatlastn[70]: #contexterase[71]: #createtokenstream[72]: #parameters-9[73]: #default_directory[74]: #default_libraries_directory[75]: #default_model_config[76]: #default_prompt_context[77]: #default_model_list_url[78]: #downloadmodel[79]: #parameters-10[80]: #examples[81]: #downloadmodeloptions[82]: #modelpath[83]: #verbose-1[84]: #url[85]: #md5sum[86]: #downloadcontroller[87]: #cancel[88]: #promise[89]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String[90]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error[91]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined[92]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number[93]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array[94]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean[95]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array[96]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise
+```
+
+*   Throws **[Error][92]** If the model already exists in the specified location.
+*   Throws **[Error][92]** If the model cannot be found at the specified url.
+
+Returns **[DownloadController][88]** object that allows controlling the download process.
+
+## DownloadModelOptions
+
+Options for the model download process.
+
+### modelPath
+
+location to download the model.
+Default is process.cwd(), or the current working directory
+
+Type: [string][91]
+
+### verbose
+
+Debug mode -- check how long it took to download in seconds
+
+Type: [boolean][96]
+
+### url
+
+Remote download url. Defaults to `https://gpt4all.io/models/gguf/<modelName>`
+
+Type: [string][91]
+
+### md5sum
+
+MD5 sum of the model file. If this is provided, the downloaded file will be checked against this sum.
+If the sums do not match, an error will be thrown and the file will be deleted.
+
+Type: [string][91]
+
+## DownloadController
+
+Model download controller.
+
+### cancel
+
+Cancel the request to download if this is called.
+
+Type: function (): void
+
+### promise
+
+A promise resolving to the downloaded models config once the download is done
+
+Type: [Promise][98]\<ModelConfig>
+
+[1]: #modelfile
+
+[2]: #gptj
+
+[3]: #llama
+
+[4]: #mpt
+
+[5]: #replit
+
+[6]: #type
+
+[7]: #inferencemodel
+
+[8]: #dispose
+
+[9]: #embeddingmodel
+
+[10]: #dispose-1
+
+[11]: #llmodel
+
+[12]: #constructor
+
+[13]: #parameters
+
+[14]: #type-1
+
+[15]: #name
+
+[16]: #statesize
+
+[17]: #threadcount
+
+[18]: #setthreadcount
+
+[19]: #parameters-1
+
+[20]: #raw_prompt
+
+[21]: #parameters-2
+
+[22]: #embed
+
+[23]: #parameters-3
+
+[24]: #ismodelloaded
+
+[25]: #setlibrarypath
+
+[26]: #parameters-4
+
+[27]: #getlibrarypath
+
+[28]: #initgpubystring
+
+[29]: #parameters-5
+
+[30]: #hasgpudevice
+
+[31]: #listgpu
+
+[32]: #dispose-2
+
+[33]: #gpudevice
+
+[34]: #type-2
+
+[35]: #loadmodeloptions
+
+[36]: #loadmodel
+
+[37]: #parameters-6
+
+[38]: #createcompletion
+
+[39]: #parameters-7
+
+[40]: #createembedding
+
+[41]: #parameters-8
+
+[42]: #completionoptions
+
+[43]: #verbose
+
+[44]: #systemprompttemplate
+
+[45]: #prompttemplate
+
+[46]: #promptheader
+
+[47]: #promptfooter
+
+[48]: #promptmessage
+
+[49]: #role
+
+[50]: #content
+
+[51]: #prompt_tokens
+
+[52]: #completion_tokens
+
+[53]: #total_tokens
+
+[54]: #completionreturn
+
+[55]: #model
+
+[56]: #usage
+
+[57]: #choices
+
+[58]: #completionchoice
+
+[59]: #message
+
+[60]: #llmodelpromptcontext
+
+[61]: #logitssize
+
+[62]: #tokenssize
+
+[63]: #npast
+
+[64]: #nctx
+
+[65]: #npredict
+
+[66]: #topk
+
+[67]: #topp
+
+[68]: #temp
+
+[69]: #nbatch
+
+[70]: #repeatpenalty
+
+[71]: #repeatlastn
+
+[72]: #contexterase
+
+[73]: #createtokenstream
+
+[74]: #parameters-9
+
+[75]: #default_directory
+
+[76]: #default_libraries_directory
+
+[77]: #default_model_config
+
+[78]: #default_prompt_context
+
+[79]: #default_model_list_url
+
+[80]: #downloadmodel
+
+[81]: #parameters-10
+
+[82]: #examples
+
+[83]: #downloadmodeloptions
+
+[84]: #modelpath
+
+[85]: #verbose-1
+
+[86]: #url
+
+[87]: #md5sum
+
+[88]: #downloadcontroller
+
+[89]: #cancel
+
+[90]: #promise
+
+[91]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String
+
+[92]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error
+
+[93]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined
+
+[94]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number
+
+[95]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array
+
+[96]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean
+
+[97]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array
+
+[98]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise
diff --git a/gpt4all-bindings/typescript/package.json b/gpt4all-bindings/typescript/package.json
index fbd140cbbf92c..d998275e84752 100644
--- a/gpt4all-bindings/typescript/package.json
+++ b/gpt4all-bindings/typescript/package.json
@@ -9,7 +9,7 @@
     "test": "jest",
     "build:backend": "node scripts/build.js",
     "build": "node-gyp-build",
-    "docs:build": "documentation build ./src/gpt4all.d.ts --parse-extension js d.ts --format md  -o ../python/docs/gpt4all_nodejs.md && node scripts/docs.js "
+    "docs:build": "documentation build ./src/gpt4all.d.ts --parse-extension d.ts --format md  -o ../python/docs/gpt4all_nodejs.md && node scripts/docs.js "
   },
   "files": [
     "src/**/*",
diff --git a/gpt4all-bindings/typescript/src/gpt4all.d.ts b/gpt4all-bindings/typescript/src/gpt4all.d.ts
index 1420b88cba564..f3e557a9ddd46 100644
--- a/gpt4all-bindings/typescript/src/gpt4all.d.ts
+++ b/gpt4all-bindings/typescript/src/gpt4all.d.ts
@@ -1,13 +1,12 @@
 /// <reference types="node" />
 declare module "gpt4all";
 
-/** Type of the model */
 type ModelType = "gptj" | "llama" | "mpt" | "replit";
 
 // NOTE: "deprecated" tag in below comment breaks the doc generator https://github.com/documentationjs/documentation/issues/1596
 /**
  * Full list of models available
- * @deprecated These model names are outdated and this type will not be maintained, please use a string literal instead
+ * DEPRECATED!! These model names are outdated and this type will not be maintained, please use a string literal instead
  */
 interface ModelFile {
     /** List of GPT-J Models */
@@ -34,7 +33,6 @@ interface ModelFile {
     replit: "ggml-replit-code-v1-3b.bin";
 }
 
-//mirrors py options
 interface LLModelOptions {
     /**
      * Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
@@ -51,7 +49,11 @@ interface ModelConfig {
     path: string;
     url?: string;
 }
-
+/**
+ *
+ * InferenceModel represents an LLM which can make chat predictions, similar to GPT transformers.
+ *
+ */
 declare class InferenceModel {
     constructor(llm: LLModel, config: ModelConfig);
     llm: LLModel;
@@ -68,6 +70,9 @@ declare class InferenceModel {
     dispose(): void
 }
 
+/**
+ * EmbeddingModel represents an LLM which can create embeddings, which are float arrays
+ */
 declare class EmbeddingModel {
     constructor(llm: LLModel, config: ModelConfig);
     llm: LLModel;
@@ -182,8 +187,8 @@ declare class LLModel {
     dispose(): void
 }
 /** 
-  * an object that contains gpu data on this machine.
-  */
+ * an object that contains gpu data on this machine.
+ */
 interface GpuDevice {
     index: number;
     /**
@@ -195,6 +200,9 @@ interface GpuDevice {
     vendor: string;
 }
 
+/**
+  * Options that configure a model's behavior.
+  */
 interface LoadModelOptions {
     modelPath?: string;
     librariesPath?: string;