Add the browser package

- Added LICENSE and README.md files for the browser package - Created a new package.json file with dependencies and scripts - Implemented interfaces and main.ts file for the WllamaProvider class - Implemented the WllamaProvider class in wllama.ts, including methods for loading models and making inferences
synw · Sep 1, 2024 · 84ec70d · 84ec70d
1 parent 8c03a71
commit 84ec70d
Show file tree

Hide file tree

Showing 7 changed files with 398 additions and 0 deletions.
diff --git a/packages/browser/LICENSE b/packages/browser/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2022 synw
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/packages/browser/README.md b/packages/browser/README.md
@@ -0,0 +1,56 @@
+# LocalLm Browser
+
+Run models in the browser using [Wllama](https://github.com/ngxson/wllama)
+
+## Install
+
+```bash
+npm i @locallm/browser
+```
+
+## Usage
+
+Vuejs example:
+
+```vue
+<template>
+  <div>
+    {{ output }}
+  </div>
+</template>
+
+<script setup lang="ts">
+import { onMounted, ref } from 'vue';
+import { PromptTemplate } from 'modprompt';
+import { LmBrowserProviderParams, OnLoadProgress, WllamaProvider } from '@locallm/browser';
+
+const output = ref("");
+
+const lm = WllamaProvider.init({
+  onToken: (t) => { output.value = t },
+} as LmBrowserProviderParams);
+const model = {
+  name: "Qween 0.5b",
+  url: "https://huggingface.co/Qwen/Qwen2-0.5B-Instruct-GGUF/resolve/main/qwen2-0_5b-instruct-q5_k_m.gguf",
+  ctx: 32768,
+}
+
+const onModelLoading: OnLoadProgress = (st) => {
+  console.log(st.percent, "%")
+}
+
+async function init() {
+  await lm.loadBrowsermodel(model.name, model.url, model.ctx, onModelLoading);
+  const p = new PromptTemplate("chatml")
+    .replaceSystem("You are an AI assistant. Important: always use json to respond")
+    .prompt("List the planets of the solar system.")
+  const res = await lm.infer(
+    p,
+    { temperature: 0, min_p: 0.05 }
+  );
+  console.log(res.stats)
+}
+
+onMounted(() => init())
+</script>
+```
diff --git a/packages/browser/package.json b/packages/browser/package.json
@@ -0,0 +1,41 @@
+{
+  "name": "@locallm/browser",
+  "version": "0.0.1",
+  "description": "Run language models in the browser",
+  "repository": "https://github.com/synw/locallm",
+  "scripts": {
+    "build": "rm -rf dist/* && tsc",
+    "docs": "typedoc --entryPointStrategy expand"
+  },
+  "devDependencies": {
+    "@locallm/types": "^0.1.2",
+    "@types/node": "^22.5.2",
+    "ts-node": "^10.9.2",
+    "tslib": "^2.7.0",
+    "typedoc": "^0.26.6",
+    "typedoc-plugin-markdown": "^4.2.6",
+    "typedoc-plugin-rename-defaults": "^0.7.1",
+    "typescript": "^5.5.4"
+  },
+  "type": "module",
+  "files": [
+    "dist"
+  ],
+  "module": "./dist/main.js",
+  "types": "./dist/main.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/main.js"
+    }
+  },
+  "publishConfig": {
+    "access": "public",
+    "registry": "https://registry.npmjs.org/"
+  },
+  "license": "MIT",
+  "dependencies": {
+    "@locallm/api": "^0.1.1",
+    "@wllama/wllama": "^1.16.0",
+    "restmix": "^0.5.0"
+  }
+}
diff --git a/packages/browser/src/interfaces.ts b/packages/browser/src/interfaces.ts
@@ -0,0 +1,26 @@
+interface LmBrowserProviderParams {
+    name: string;
+    onToken?: (t: string) => void;
+    onStartEmit?: (data?: any) => void;
+    onError?: (err: string) => void;
+}
+
+interface OnLoadProgressBasic {
+    total: number;
+    loaded: number;
+}
+
+interface OnLoadProgressFull extends OnLoadProgressBasic {
+    percent: number;
+}
+
+type OnLoadProgress = (data: OnLoadProgressFull) => void;
+type BasicOnLoadProgress = (data: OnLoadProgressBasic) => void;
+
+export {
+    OnLoadProgress,
+    OnLoadProgressBasic,
+    OnLoadProgressFull,
+    BasicOnLoadProgress,
+    LmBrowserProviderParams,
+}
diff --git a/packages/browser/src/main.ts b/packages/browser/src/main.ts
@@ -0,0 +1,13 @@
+import {
+    OnLoadProgress,
+    OnLoadProgressFull,
+    LmBrowserProviderParams,
+} from "./interfaces";
+import { WllamaProvider } from "./wllama";
+
+export {
+    WllamaProvider,
+    OnLoadProgress,
+    OnLoadProgressFull,
+    LmBrowserProviderParams,
+}
diff --git a/packages/browser/src/wllama.ts b/packages/browser/src/wllama.ts
@@ -0,0 +1,190 @@
+import { useApi } from 'restmix';
+import { InferenceParams, InferenceResult, IngestionStats, LmProvider, LmProviderParams, ModelConf } from "@locallm/types";
+import { parseJson as parseJsonUtil, useStats } from '@locallm/api';
+import { ChatCompletionOptions, SamplingConfig, Wllama } from '@wllama/wllama';
+import { BasicOnLoadProgress, LmBrowserProviderParams, OnLoadProgress } from './interfaces';
+
+const CONFIG_PATHS = {
+    'single-thread/wllama.js': './esm/single-thread/wllama.js',
+    'single-thread/wllama.wasm': './esm/single-thread/wllama.wasm',
+    'multi-thread/wllama.js': './esm/multi-thread/wllama.js',
+    'multi-thread/wllama.wasm': './esm/multi-thread/wllama.wasm',
+    'multi-thread/wllama.worker.mjs': './esm/multi-thread/wllama.worker.mjs',
+};
+const wllama = new Wllama(CONFIG_PATHS);
+
+class WllamaProvider implements LmProvider {
+    name: string;
+    api = useApi();
+    onToken?: (t: string) => void;
+    onStartEmit?: (data: IngestionStats) => void;
+    onEndEmit?: (result: InferenceResult) => void;
+    onError?: (err: string) => void;
+    // state
+    model: ModelConf = { name: "", ctx: 2048 };
+    models = new Array<ModelConf>();
+    //abortController = new AbortController();
+    apiKey: string;
+    serverUrl: string;
+    // state
+    abortInference = false;
+
+    constructor(params: LmProviderParams) {
+        this.name = params.name;
+        this.onToken = params.onToken;
+        this.onStartEmit = params.onStartEmit;
+        this.onError = params.onError;
+        this.apiKey = params.apiKey ?? "";
+        this.serverUrl = params.serverUrl;
+    }
+
+    static init(params: LmBrowserProviderParams): WllamaProvider {
+        return new WllamaProvider({
+            serverUrl: "",
+            apiKey: "",
+            ...params,
+        })
+    }
+
+    /**
+   * Not implemented for this provider
+   *
+   * @async
+   * @returns {Promise<void>}
+   */
+    async modelsInfo(): Promise<void> {
+        console.warn("Not implemented for this provider")
+    }
+
+    async info(): Promise<Record<string, any>> {
+        if (wllama.isModelLoaded()) {
+            console.log(wllama.getModelMetadata())
+        }
+        return {}
+    }
+
+    async loadModel(name: string, ctx?: number, threads?: number, gpu_layers?: number): Promise<void> {
+        throw new Error("Not implemented for this provider: use loadBrowserModel");
+    }
+
+    async loadBrowsermodel(name: string, urls: string | string[], ctx: number, onLoadProgress: OnLoadProgress) {
+        const progressCallback: BasicOnLoadProgress = (p) => {
+            const progressPercentage = Math.round((p.loaded / p.total) * 100);
+            const data = { ...p, percent: progressPercentage }
+            onLoadProgress(data);
+        };
+        await wllama.loadModelFromUrl(urls, {
+            progressCallback: progressCallback,
+            n_ctx: ctx,
+        });
+        this.model.name = name;
+        this.model.ctx = ctx;
+    }
+
+    /**
+ * Makes an inference based on the provided prompt and parameters.
+ *
+ * @async
+ * @param {string} prompt - The input text to base the inference on.
+ * @param {InferenceParams} params - Parameters for customizing the inference behavior.
+ * @returns {Promise<InferenceResult>} - The result of the inference.
+ */
+    async infer(
+        prompt: string,
+        params: InferenceParams,
+        parseJson = false,
+        parseJsonFunc?: (data: string) => Record<string, any>
+    ): Promise<InferenceResult> {
+        if (!wllama.isModelLoaded()) {
+            throw new Error("No model loaded")
+        }
+        this.abortInference = false;
+        let _prompt = prompt;
+        if (params?.template) {
+            _prompt = params.template.replace("{prompt}", prompt);
+            delete params.template;
+        }
+        const options: ChatCompletionOptions = {};
+        let samplingOptions: SamplingConfig = {};
+        if ("max_tokens" in params) {
+            options.nPredict = params.max_tokens;
+        }
+        if ("stop" in params) {
+            let st = new Array<number>();
+            for (const t of (params?.stop ?? [])) {
+                st = [...st, ...(await wllama.tokenize(t))]
+            }
+            options.stopTokens = st;
+        }
+        if ("temperature" in params) {
+            samplingOptions.temp = params.temperature;
+        }
+        if ("top_k" in params) {
+            samplingOptions.top_k = params.top_k;
+        }
+        if ("top_p" in params) {
+            samplingOptions.top_p = params.top_p;
+        }
+        if ("min_p" in params) {
+            samplingOptions.min_p = params.min_p;
+        }
+        if ("tfs" in params) {
+            samplingOptions.tfs_z = params.tfs;
+        }
+        if ("repeat_penalty" in params) {
+            samplingOptions.penalty_repeat = params.repeat_penalty;
+        }
+        if ("grammar" in params) {
+            samplingOptions.grammar = params.grammar;
+        }
+        if ("extra" in params) {
+            samplingOptions = { ...samplingOptions, ...params.extra }
+        }
+        let i = 1;
+        options.onNewToken = (token, piece, currentText, { abortSignal }) => {
+            if (i == 1) {
+                const ins = stats.inferenceStarts();
+                if (this.onStartEmit) {
+                    this.onStartEmit(ins)
+                }
+            }
+            if (this.onToken) {
+                this.onToken(currentText);
+            }
+            if (this.abortInference) {
+                abortSignal()
+            }
+            ++i
+        };
+        const stats = useStats();
+        stats.start();
+        console.log(_prompt);
+        const txt = await wllama.createCompletion(_prompt, options);
+        const finalStats = stats.inferenceEnds(i);
+        let data: Record<string, any> = {};
+        if (parseJson) {
+            data = parseJsonUtil(txt, parseJsonFunc);
+        }
+        const res: InferenceResult = {
+            text: txt,
+            data: data,
+            stats: finalStats,
+            serverStats: {},
+        };
+        if (this.onEndEmit) {
+            this.onEndEmit(res)
+        }
+        return res
+    }
+    /**
+ * Aborts a currently running inference task.
+ *
+ * @async
+ * @returns {Promise<void>}
+ */
+    async abort(): Promise<void> {
+        this.abortInference = true;
+    }
+}
+
+export { WllamaProvider }