diff --git a/README.md b/README.md
index 4eb0c160..2286326a 100644
--- a/README.md
+++ b/README.md
@@ -65,8 +65,50 @@ async function main() {
main();
```
-Finally, you can find a complete
-You can also find a complete chat app in [examples/simple-chat](examples/simple-chat/).
+### Using Web Worker
+
+WebLLM comes with API support for WebWorker so you can hook
+the generation process into a separate worker thread so that
+the compute in the webworker won't disrupt the UI.
+
+We first create a worker script that created a ChatModule and
+hook it up to a handler that handles requests.
+
+```typescript
+// worker.ts
+import { ChatWorkerHandler, ChatModule } from "@mlc-ai/web-llm";
+
+// Hookup a chat module to a worker handler
+const chat = new ChatModule();
+const handler = new ChatWorkerHandler(chat);
+self.onmessage = (msg: MessageEvent) => {
+ handler.onmessage(msg);
+};
+```
+
+Then in the main logic, we create a `ChatWorkerClient` that
+implements the same `ChatInterface`. The rest of the logic remains the same.
+
+```typescript
+// main.ts
+import * as webllm from "@mlc-ai/web-llm";
+
+async function main() {
+ // Use a chat worker client instead of ChatModule here
+ const chat = new webllm.ChatWorkerClient(new Worker(
+ new URL('./worker.ts', import.meta.url),
+ {type: 'module'}
+ ));
+ // everything else remains the same
+}
+```
+
+
+### Build a ChatApp
+
+You can find a complete
+a complete chat app example in [examples/simple-chat](examples/simple-chat/).
+
## Customized Model Weights
diff --git a/examples/README.md b/examples/README.md
index 57998bcd..8b83ddd1 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -6,5 +6,5 @@ Please send a pull request if you find things that belongs to here.
## Tutorial Examples
- [get-started](get-started): minimum get started example.
+- [web-worker](web-worker): get started with web worker backed chat.
- [simple-chat](simple-chat): a mininum and complete chat app.
-
diff --git a/examples/get-started/README.md b/examples/get-started/README.md
index 9896513e..b872605f 100644
--- a/examples/get-started/README.md
+++ b/examples/get-started/README.md
@@ -7,7 +7,7 @@ To try it out, you can do the following steps
- `@mlc-ai/web-llm` points to a valid npm version e.g.
```js
"dependencies": {
- "@mlc-ai/web-llm": "^0.1.3"
+ "@mlc-ai/web-llm": "^0.2.0"
}
```
Try this option if you would like to use WebLLM without building it yourself.
diff --git a/examples/simple-chat/README.md b/examples/simple-chat/README.md
index 461c6488..18f38a72 100644
--- a/examples/simple-chat/README.md
+++ b/examples/simple-chat/README.md
@@ -7,7 +7,7 @@ chat app based on WebLLM. To try it out, you can do the following steps
- Option 1: `@mlc-ai/web-llm` points to a valid npm version e.g.
```js
"dependencies": {
- "@mlc-ai/web-llm": "^0.1.3"
+ "@mlc-ai/web-llm": "^0.2.0"
}
```
Try this option if you would like to use WebLLM.
diff --git a/examples/simple-chat/src/gh-config.js b/examples/simple-chat/src/gh-config.js
index 3205ef2d..0a971502 100644
--- a/examples/simple-chat/src/gh-config.js
+++ b/examples/simple-chat/src/gh-config.js
@@ -18,5 +18,6 @@ export default {
"vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/vicuna-v1-7b-q4f32_0-webgpu.wasm",
"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm",
"RedPajama-INCITE-Chat-3B-v1-q4f16_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f16_0-webgpu.wasm"
- }
+ },
+ "use_web_worker": true
}
diff --git a/examples/simple-chat/src/llm_chat.html b/examples/simple-chat/src/llm_chat.html
index f14e04f3..065a76be 100644
--- a/examples/simple-chat/src/llm_chat.html
+++ b/examples/simple-chat/src/llm_chat.html
@@ -1,7 +1,6 @@
-
diff --git a/examples/simple-chat/src/mlc-local-config.js b/examples/simple-chat/src/mlc-local-config.js
index e0d9ce4a..8fbe9b83 100644
--- a/examples/simple-chat/src/mlc-local-config.js
+++ b/examples/simple-chat/src/mlc-local-config.js
@@ -22,5 +22,6 @@ export default {
"vicuna-v1-7b-q4f32_0": "http://localhost:8000/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm",
"RedPajama-INCITE-Chat-3B-v1-q4f32_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f32_0/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm",
"RedPajama-INCITE-Chat-3B-v1-q4f16_0": "http://localhost:8000/RedPajama-INCITE-Chat-3B-v1-q4f16_0/RedPajama-INCITE-Chat-3B-v1-q4f16_0-webgpu.wasm"
- }
+ },
+ "use_web_worker": true
}
diff --git a/examples/simple-chat/src/simple_chat.ts b/examples/simple-chat/src/simple_chat.ts
index c98c35ab..618c6595 100644
--- a/examples/simple-chat/src/simple_chat.ts
+++ b/examples/simple-chat/src/simple_chat.ts
@@ -1,5 +1,5 @@
import appConfig from "./app-config";
-import { ChatModule, ModelRecord } from "@mlc-ai/web-llm";
+import { ChatInterface, ChatModule, ChatWorkerClient, ModelRecord } from "@mlc-ai/web-llm";
function getElementAndCheck(id: string): HTMLElement {
const element = document.getElementById(id);
@@ -18,7 +18,7 @@ class ChatUI {
private uiChat: HTMLElement;
private uiChatInput: HTMLInputElement;
private uiChatInfoLabel: HTMLLabelElement;
- private chat: ChatModule;
+ private chat: ChatInterface;
private config: AppConfig = appConfig;
private selectedModel: string;
private chatLoaded = false;
@@ -27,8 +27,9 @@ class ChatUI {
// all requests send to chat are sequentialized
private chatRequestChain: Promise = Promise.resolve();
- constructor() {
- this.chat = new ChatModule();
+ constructor(chat: ChatInterface) {
+ // use web worker to run chat generation in background
+ this.chat = chat;
// get the elements
this.uiChat = getElementAndCheck("chatui-chat");
this.uiChatInput = getElementAndCheck("chatui-input") as HTMLInputElement;
@@ -156,9 +157,10 @@ class ChatUI {
private resetChatHistory() {
const clearTags = ["left", "right", "init", "error"];
for (const tag of clearTags) {
- const matches = this.uiChat.getElementsByClassName(`msg ${tag}-msg`);
+ // need to unpack to list so the iterator don't get affected by mutation
+ const matches = [...this.uiChat.getElementsByClassName(`msg ${tag}-msg`)];
for (const item of matches) {
- item.remove();
+ this.uiChat.removeChild(item);
}
}
if (this.uiChatInfoLabel !== undefined) {
@@ -211,11 +213,6 @@ class ChatUI {
this.appendMessage("left", "");
const callbackUpdateResponse = (step, msg) => {
- if (msg.endsWith("##")) {
- msg = msg.substring(0, msg.length - 2);
- } else if (msg.endsWith("#")) {
- msg = msg.substring(0, msg.length - 1);
- }
this.updateLastMessage("left", msg);
};
@@ -233,4 +230,15 @@ class ChatUI {
}
}
-new ChatUI();
+const useWebWorker = appConfig.use_web_worker;
+let chat: ChatInterface;
+
+if (useWebWorker) {
+ chat = new ChatWorkerClient(new Worker(
+ new URL('./worker.ts', import.meta.url),
+ {type: 'module'}
+ ));
+} else {
+ chat = new ChatModule();
+}
+new ChatUI(chat);
diff --git a/examples/simple-chat/src/worker.ts b/examples/simple-chat/src/worker.ts
new file mode 100644
index 00000000..5495c13d
--- /dev/null
+++ b/examples/simple-chat/src/worker.ts
@@ -0,0 +1,8 @@
+// Serve the chat workload through web worker
+import { ChatWorkerHandler, ChatModule } from "@mlc-ai/web-llm";
+
+const chat = new ChatModule();
+const handler = new ChatWorkerHandler(chat);
+self.onmessage = (msg: MessageEvent) => {
+ handler.onmessage(msg);
+};
diff --git a/examples/web-worker/README.md b/examples/web-worker/README.md
new file mode 100644
index 00000000..c6e00d62
--- /dev/null
+++ b/examples/web-worker/README.md
@@ -0,0 +1,25 @@
+# WebLLM Get Started with WebWorker
+
+This folder provides a minimum demo to show WebLLM API using
+[WebWorker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).
+The main benefit of web worker is that all ML workloads runs on a separate thread as a result
+will less likely block the UI.
+
+To try it out, you can do the following steps
+
+- Modify [package.json](package.json) to make sure either
+ - `@mlc-ai/web-llm` points to a valid npm version e.g.
+ ```js
+ "dependencies": {
+ "@mlc-ai/web-llm": "^0.2.0"
+ }
+ ```
+ Try this option if you would like to use WebLLM without building it yourself.
+ - Or keep the dependencies as `"file:../.."`, and follow the build from source
+ instruction in the project to build webllm locally. This option is more useful
+ for developers who would like to hack WebLLM core package.
+- Run the following command
+ ```bash
+ npm install
+ npm start
+ ```
diff --git a/examples/web-worker/package.json b/examples/web-worker/package.json
new file mode 100644
index 00000000..424756dd
--- /dev/null
+++ b/examples/web-worker/package.json
@@ -0,0 +1,17 @@
+{
+ "name": "get-started-web-worker",
+ "version": "0.1.0",
+ "private": true,
+ "scripts": {
+ "start": "parcel src/get_started.html --port 8888",
+ "build": "parcel build src/get_started.html --dist-dir lib"
+ },
+ "devDependencies": {
+ "parcel": "^2.8.3",
+ "typescript": "^4.9.5",
+ "tslib": "^2.3.1"
+ },
+ "dependencies": {
+ "@mlc-ai/web-llm": "file:../.."
+ }
+}
diff --git a/examples/web-worker/src/get_started.html b/examples/web-worker/src/get_started.html
new file mode 100644
index 00000000..a376ef62
--- /dev/null
+++ b/examples/web-worker/src/get_started.html
@@ -0,0 +1,22 @@
+
+
+
+
+