Update local deployment scripts and related fix (#102)

MasterJH5574 · web-flow · commit 4acfb1e289c1 · 2023-05-19T19:23:24.000-04:00
This PR separates local deployment config from global deployment config,
fixes related deployment scripts, and updates the publicly available
wasm/weights links.
diff --git a/mlc-llm b/mlc-llm
@@ -1 +1 @@
-Subproject commit 615020d2abe29ee34488188122cc9c4bb9169a1e
+Subproject commit f3e1b39817af76d6a93577897378d7e20949ed10
diff --git a/scripts/build_site.sh b/scripts/build_site.sh
@@ -12,10 +12,10 @@ if [ -z ${MLC_LLM_HOME_SET} ]; then
 fi
 
 rm -rf site/dist
-mkdir -p site/dist site/_inlcudes
+mkdir -p site/dist site/_includes
 
 echo "Copy local configurations.."
-cp $1 site/global_config.json
+cp $1 site/llm-chat-config.json
 echo "Copy files..."
 cp web/llm_chat.html site/_includes
 cp web/llm_chat.js site/dist/
@@ -25,15 +25,4 @@ cp dist/tvmjs_runtime.wasi.js site/dist
 cp dist/tvmjs.bundle.js site/dist
 cp -r dist/tokenizers-cpp site/dist
 
-if [ -d "$MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/params" ]; then
-    mkdir -p site/dist/vicuna-v1-7b-q4f32_0
-    cp -rf $MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/tokenizer.model site/dist/vicuna-v1-7b-q4f32_0/
-    cp -rf $MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm site/dist/vicuna-v1-7b-q4f32_0/
-fi
-if [ -d "$MLC_LLM_HOME/dist/wizardlm-7b/params" ]; then
-    mkdir -p site/dist/wizardlm-7b
-    cp -rf $MLC_LLM_HOME/dist/wizardlm-7b/tokenizer.model site/dist/wizardlm-7b/
-    cp -rf $MLC_LLM_HOME/dist/wizardlm-7b/wizardlm-7b-webgpu.wasm site/dist/wizardlm-7b/
-fi
-
 cd site && jekyll b && cd ..
diff --git a/scripts/local_deploy_site.sh b/scripts/local_deploy_site.sh
@@ -7,20 +7,27 @@ if [ -z ${MLC_LLM_HOME_SET} ]; then
     export MLC_LLM_HOME="${MLC_LLM_HOME:-mlc-llm}"
 fi
 
-scripts/build_site.sh web/global_config.json
+scripts/build_site.sh web/local-config.json
 
 echo "symlink parameter location to site.."
 
 if [ -d "$MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/params" ]; then
-    rm -rf site/_site/dist/vicuna-v1-7b-q4f32_0-params
+    rm -rf site/_site/dist/vicuna-v1-7b-q4f32_0
+    mkdir -p site/_site/dist/vicuna-v1-7b-q4f32_0
     ln -s $MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/params site/_site/dist/vicuna-v1-7b-q4f32_0/params
-    ls site/_site/dist/vicuna-v1-7b-q4f32_0
+    cp -rf $MLC_LLM_HOME/dist/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm site/_site/dist/vicuna-v1-7b-q4f32_0/
+fi
+if [ -d "$MLC_LLM_HOME/dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0/params" ]; then
+    rm -rf site/_site/dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0
+    mkdir -p site/_site/dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0
+    ln -s $MLC_LLM_HOME/dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0/params site/_site/dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0/params
+    cp -rf $MLC_LLM_HOME/dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm site/_site/dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0/
 fi
 if [ -d "$MLC_LLM_HOME/dist/wizardlm-7b/params" ]; then
-    rm -rf site/_site/dist/wizardlm-7b-params
-    ln -s $MLC_LLM_HOME/dist/wizardlm-7b/params site/_site/dist/wizardlm-7b-params
+    rm -rf site/_site/dist/wizardlm-7b
+    mkdir -p site/_site/dist/wizardlm-7b
+    ln -s $MLC_LLM_HOME/dist/wizardlm-7b/params site/_site/dist/wizardlm-7b/params
+    cp -rf $MLC_LLM_HOME/dist/wizardlm-7b/wizardlm-7b-webgpu.wasm site/_site/dist/wizardlm-7b/
 fi
 
-
-
 cd site && jekyll serve  --skip-initial-build --host localhost --baseurl /web-llm --port 8888
diff --git a/site/.gitignore b/site/.gitignore
@@ -1,5 +1,4 @@
 dist
 llm-chat-config.json
-global_config.json
 _includes/stable_diffusion.html
 _site
diff --git a/web/gh-page-config.json b/web/gh-page-config.json
@@ -1,13 +1,10 @@
 {
-    "kvConfig": {
-        "numLayers": 64,
-        "shape": [32, 32, 128],
-        "dtype": "float32"
+    "url_dict": {
+        "vicuna-v1-7b-q4f32_0": "https://huggingface.co/mlc-ai/mlc-chat-vicuna-v1-7b-q4f32_0/resolve/main/mlc-chat-config.json",
+        "RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://huggingface.co/mlc-ai/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f32_0/resolve/main/mlc-chat-config.json"
     },
-    "wasmUrl": "dist/vicuna-7b-v1/vicuna-7b-v1_webgpu.wasm",
-    "cacheUrl": "https://huggingface.co/mlc-ai/web-lm/resolve/main/vicuna-7b-v1/",
-    "tokenizer": "dist/vicuna-7b-v1/tokenizer.model",
-    "maxGenLength": 1024,
-    "meanGenLength": 256,
-    "maxWindowLength": 2048
-}
+    "model_lib_map": {
+        "vicuna-v1-7b-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/wasm/vicuna-v1-7b-q4f32_0-webgpu.wasm",
+        "RedPajama-INCITE-Chat-3B-v1-q4f32_0": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/wasm/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm"
+    }
+}
diff --git a/web/global_config.json b/web/global_config.json
diff --git a/web/llm_chat.js b/web/llm_chat.js
@@ -88,7 +88,7 @@ class Conversation {
         const role = item[0];
         const message = item[1];
         if (message !== undefined && message != "") {
-          ret.push(message + this.seps[i % this.seps.length]+"\n");
+          ret.push(message + this.seps[i % this.seps.length] + "\n");
         } else {
           ret.push(role + ":");
         }
@@ -118,7 +118,7 @@ class Conversation {
 }
 
 function getConversation(conv_template) {
-  if (conv_template == "vicuna-v1.1") {
+  if (conv_template == "vicuna_v1.1") {
     return new Conversation({
       system: "A chat between a curious user and an artificial intelligence assistant. " +
         "The assistant gives helpful, detailed, and polite answers to the user's questions.",
@@ -145,12 +145,12 @@ function getConversation(conv_template) {
       roles: ["<human>", "<bot>"],
       messages: [],
       offset: 0,
-      seps: ["",""],
+      seps: ["", ""],
       separator_style: "RedPajamaChat",
       add_bos: false,
     })
   } else {
-    throw Error("Unknown conv template "+ conv_template);
+    throw Error("Unknown conv template " + conv_template);
   }
 };
 
@@ -580,8 +580,8 @@ class LLMChatInstance {
     this.uiChat = document.getElementById("chatui-chat");
     this.uiChatInput = document.getElementById("chatui-input");
     this.uiChatInfoLabel = document.getElementById("chatui-info-label");
-    var global_config = await (await fetch("global_config.json")).json();
-    
+    var global_config = await (await fetch("llm-chat-config.json")).json();
+
     var model_config_url = undefined;
     if (global_config.url_dict[this.model] === undefined) {
       model_config_url = this.model;
@@ -591,7 +591,6 @@ class LLMChatInstance {
     this.config = await (
       await fetch(model_config_url)
     ).json();
-    this.logger(this.config)
     this.config.wasmUrl = global_config.model_lib_map[this.config.model_lib]
     var last_slash = model_config_url.lastIndexOf("/");
     var base_url = model_config_url.substring(0, last_slash + 1);
@@ -603,6 +602,9 @@ class LLMChatInstance {
   }
 
   async findTokenizerPath(base_url) {
+    if (!base_url.startsWith("http")) {
+      base_url = new URL(base_url, document.URL).href;
+    }
     const tokenizer_model_path = new URL("tokenizer.model", base_url);
     var tokenizer_model = await fetch(tokenizer_model_path);
     if (tokenizer_model.ok) {
@@ -780,7 +782,7 @@ function handle_model_change() {
   function onChange() {
     localLLMChatIntance.reboot();
     localLLMChatIntance.model = e.value;
-    localLLMChatIntance.logger("model changed to " +e.value)
+    localLLMChatIntance.logger("model changed to " + e.value)
   }
   e.onchange = onChange;
 }
diff --git a/web/local-config.json b/web/local-config.json
@@ -1,13 +1,10 @@
 {
-    "kvConfig": {
-        "numLayers": 64,
-        "shape": [32, 32, 128],
-        "dtype": "float32"
+    "url_dict": {
+        "vicuna-v1-7b-q4f32_0": "dist/vicuna-v1-7b-q4f32_0/params/mlc-chat-config.json",
+        "RedPajama-INCITE-Chat-3B-v1-q4f32_0": "dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0/params/mlc-chat-config.json"
     },
-    "wasmUrl": "dist/vicuna-7b-v1/vicuna-7b-v1_webgpu.wasm",
-    "cacheUrl": "vicuna-7b-v1-params/",
-    "tokenizer": "dist/vicuna-7b-v1/tokenizer.model",
-    "maxGenLength": 1024,
-    "meanGenLength": 256,
-    "maxWindowLength": 2048
-}
+    "model_lib_map": {
+        "vicuna-v1-7b-q4f32_0": "dist/vicuna-v1-7b-q4f32_0/vicuna-v1-7b-q4f32_0-webgpu.wasm",
+        "RedPajama-INCITE-Chat-3B-v1-q4f32_0": "dist/RedPajama-INCITE-Chat-3B-v1-q4f32_0/RedPajama-INCITE-Chat-3B-v1-q4f32_0-webgpu.wasm"
+    }
+}