Update

- manually setting home: handle by passing in the argument instead of using the ENV variable (for flexibility) - installed() API - better exception handling - request() now takes a "url" attribute (Previously the url was being set in the constructor, this is not really correct, so moved it to the request() method) - added 2 latest attributes: `repeat_last_n`, `repeat_penalty` - changed `skipEnd` to `skip_end` to be consistent with the rest of the request spec - web UI update: less brutalist. expose all customizable knobs in the header. display all currently installed models using the new `installed()` api
cocktailpeanut · Mar 13, 2023 · 0de5346 · 0de5346
1 parent 1185477
commit 0de5346
Show file tree

Hide file tree

Showing 6 changed files with 1,772 additions and 178 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
 .env
 .DS_Store
+venv
 node_modules
+llama.cpp
diff --git a/bin/web/views/index.ejs b/bin/web/views/index.ejs
@@ -1,18 +1,36 @@
 <html>
 <head>
 <title>Dalai LLaMA</title>
+<meta name="viewport" content="width=device-width, initial-scale=1">
 <style>
-  body { margin: 0; padding: 10px; color: rgba(0,0,0,0.8); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; }
-  form { border: 2px solid rgba(0,0,0,0.2); display: flex; padding: 5px; box-sizing: border-box; margin: 0; }
-  #input { white-space: pre-wrap; padding: 5px; outline: none; border: none; flex-grow: 1;  font-size: 14px; box-sizing: border-box; }
+  body { margin: 0; padding: 0px; color: rgba(0,0,0,0.8); font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; }
+  .input-field {
+  /*
+    border: 2px solid rgba(0,0,0,0.2);
+    */
+    background: rgba(0,0,0,0.04);
+    display: flex;
+    padding: 10px;
+    box-sizing: border-box;
+  }
+  #input {
+    white-space: pre-wrap;
+    padding: 5px;
+    outline: none;
+    border: none;
+    flex-grow: 1;
+    font-size: 14px;
+    box-sizing: border-box;
+  }
   #input:focus { outline: none; }
-  #form > button { padding: 10px; background: gold; border: none; border-radius: 3px; outline: none; color: black; box-sizing: border-box; }
+  button { padding: 10px; background: rgba(0,0,0,0.8); color: white; border: none; border-radius: 3px; outline: none; box-sizing: border-box; }
 
   #messages { list-style-type: none; margin: 0; box-sizing: border-box; font-size: 14px; padding: 0; }
   #messages > li { padding: 10px; font-size: 14px; box-sizing: border-box; }
   #messages > li:nth-child(odd) { background: #efefef; }
   li { white-space: pre-wrap; }
   .loading {
+    margin-bottom: 10px;
     padding: 10px;
     box-sizing: border-box;
     color: rgba(0,0,0,0.7);
@@ -22,33 +40,112 @@
   .hidden {
     display: none !important;
   }
+  .input-container {
+    margin: 10px;
+  }
   .info {
     font-size: 12px;
     padding: 5px;
   }
+  .form-header {
+    display: flex;
+    align-items: center;
+    flex-wrap: wrap;
+    margin: 0;
+    background: rgba(0,0,0,0.04);
+    padding: 10px 20px;
+  }
+  .stretch {
+    flex-grow: 1;
+  }
+  input[type=text] , select {
+    margin-right: 5px;
+    border: none;
+    background: rgba(0,0,0,0.08);
+    padding: 5px 10px;
+    box-sizing: border-box;
+  }
+  .logo {
+    font-weight: bold;
+    text-decoration: none;
+    color: black;
+    display: block;
+    font-size: 30px;
+    letter-spacing: -1px;
+    font-weight: bold;
+    font-family: georgia;
+
+  }
+  .config-container {
+    display: flex;
+    flex-wrap: wrap;
+  }
+  .kv {
+    display: block;
+    font-size: 14px;
+  }
+  .kv label {
+    display: block;
+    padding: 5px 0px;
+    font-weight: bold;
+    color: rgba(0,0,0,0.7);
+    font-size: 12px;
+  }
 </style>
 </head>
 <body>
 <form id="form" action="">
-  <select id="model">
-    <!-- options: 7B, 13B, 30B, 65B -->
-    <option value="7B">7B</option>
-    <option value="13B">13B</option>
-    <option value="30B">30B</option>
-    <option value="65B">65B</option>
-  </select>
-  <div contenteditable id='input'></div>
-  <button>autocomplete</button>
+  <div class='form-header'></div>
+  <div class='input-container'>
+    <div class='input-field'>
+      <div contenteditable id='input'></div>
+      <button>Autocomplete</button>
+    </div>
+    <div class='info'>TIP: shift+enter for multiple lines</div>
+  </div>
 </form>
-<div class='info'>TIP: shift+enter for multiple lines</div>
 <div class='loading hidden'></div>
 <ul id="messages"></ul>
 <script src="/socket.io.min.js"></script>
 <script>
+const config = {
+  seed: -1,
+  threads: 4,
+  n_predict: 256,
+  model: "7B",
+  top_k: 40,
+  top_p: 0.9,
+  temp: 0.8,
+  repeat_last_n: 64,
+  repeat_penalty: 1.3,
+  models: []
+}
 const socket = io();
 const form = document.getElementById('form');
 const input = document.querySelector('#input');
 const model = document.querySelector('#model');
+const renderHeader = (config) => {
+  const fields = ["n_predict", "repeat_last_n", "repeat_penalty", "top_k", "top_p", "temp", "seed"].map((key) => {
+    return `<div class='kv'>
+<label>${key}</label>
+<input name="${key}" type='text' placeholder="${key}" value="${config[key] || ''}">
+</div>`
+
+  }).join("")
+
+
+  const models = config.models.map((model) => {
+    return `<option value="7B" ${config.model === model ? "selected" : ""}>${model}</option>`
+  }).join("")
+  return `<a class='logo' href="/">Dalai</a><div class='stretch'></div>
+<div class='config-container'>
+${fields}
+<div class='kv'>
+<label>model</label>
+<select id="model" name="model">${models}</select>
+</div>
+</div>`
+}
 const loading = (on) => {
   if (on) {
     document.querySelector(".loading").textContent = on
@@ -58,21 +155,24 @@ const loading = (on) => {
     document.querySelector(".loading").classList.add("hidden")
   }
 }
+document.querySelector("form").addEventListener("input", (e) => {
+  if (e.target.tagName === "SELECT") {
+    config[e.target.name] = config.models[e.target.selectedIndex]
+  } else {
+    config[e.target.name] = e.target.value
+  }
+})
 form.addEventListener('submit', (e) => {
   e.preventDefault();
   e.stopPropagation()
   if (input.textContent) {
-    socket.emit('request', {
-      prompt: input.textContent,
-      n_predict: 256,
-      model: model.value,
-    })
-    loading(input.textContent)
+    config.prompt = input.textContent
+    socket.emit('request', config)
+    loading(config.prompt)
     input.textContent = "";
   }
 });
 input.addEventListener("keydown", (e) => {
-  console.log("e", e)
   if (e.keyCode === 13) {
     e.preventDefault();
     if (e.shiftKey) {
@@ -98,16 +198,28 @@ const say = (msg, id) => {
   item.textContent = msg;
   messages.prepend(item);
 }
+socket.emit('request', {
+  method: "installed"
+})
 socket.on('result', async ({ request, response }) => {
   loading(false)
-  const id = await sha256(request.prompt) 
-  console.log({ id, prompt: request.prompt })
-
-  let existing = document.querySelector(`[data-id='${id}']`)
-  if (existing) {
-    existing.textContent = existing.textContent + response
+  if (request.method === "installed") {
+    if (response == "\n\n<end>") {
+      document.querySelector(".form-header").innerHTML = renderHeader(config)
+    } else {
+      config.models.push(response)
+    }
   } else {
-    say(response, id)
+    if (response == "\n\n<end>") {
+    } else {
+      const id = await sha256(request.prompt) 
+      let existing = document.querySelector(`[data-id='${id}']`)
+      if (existing) {
+        existing.textContent = existing.textContent + response
+      } else {
+        say(response, id)
+      }
+    }
   }
 //  window.scrollTo(0, document.body.scrollHeight);
 });

diff --git a/docs/README.md b/docs/README.md
@@ -60,8 +60,8 @@ npx dalai llama 7B 13B 30B 65B
 The install command :
 
 1. Creates a folder named `dalai` under your home directory (`~`)
-2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/dalai`
-3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/dalai/models`
+2. Installs and builds the [llama.cpp](https://github.com/ggerganov/llama.cpp) project under `~/llama.cpp`
+3. Downloads all the requested models from the [llama-dl CDN](https://github.com/shawwn/llama-dl) to `~/llama.cpp/models`
 4. Runs some tasks to convert the LLaMA models so they can be used
 
 ---
@@ -88,25 +88,32 @@ npm install dalai
 ### Syntax
 
 ```javascript
-const dalai = new Dalai(url)
+const dalai = new Dalai(home)
 ```
 
-- `url`: (optional)
-  - if unspecified, it uses the node.js API to directly run dalai
-  - if specified (for example `ws://localhost:3000`) it looks for a socket.io endpoint at the URL and connects to it.
+- `home`: (optional) manually specify the [llama.cpp](https://github.com/ggerganov/llama.cpp) folder
+
+By default, Dalai automatically stores the entire `llama.cpp` repository under `~/llama.cpp`.
+
+However, often you may already have a `llama.cpp` repository somewhere else on your machine and want to just use that folder. In this case you can pass in the `home` attribute.
 
 ### Examples
 
-Initializing a client that connects to a local model (no network):
+#### Basic
+
+Creates a workspace  at `~/llama.cpp` 
 
 ```javascript
 const dalai = new Dalai()
 ```
 
-Initializing a client that connects to a remote dalai server (a dalai server must be running at the URL):
+#### Custom path
+
+Manually set the `llama.cpp` path:
+
 
 ```javascript
-const dalai = new Dalai("ws://localhost:3000")
+const dalai = new Dalai("/Documents/llama.cpp")
 ```
 
 ---
@@ -122,13 +129,19 @@ dalai.request(req, callback)
 - `req`: a request object. made up of the following attributes:
   - `prompt`: **(required)** The prompt string
   - `model`: **(required)** The model name to query ("7B", "13B", etc.)
+  - `url`: only needed if connecting to a remote dalai server
+    - if unspecified, it uses the node.js API to directly run dalai locally
+    - if specified (for example `ws://localhost:3000`) it looks for a socket.io endpoint at the URL and connects to it.
   - `threads`: The number of threads to use (The default is 8 if unspecified)
   - `n_predict`: The number of tokens to return (The default is 128 if unspecified)
   - `seed`: The seed. The default is -1 (none)
   - `top_k`
   - `top_p`
+  - `repeat_last_n`
+  - `repeat_penalty`
   - `temp`: temperature
   - `batch_size`: batch size
+  - `skip_end`: by default, every session ends with `\n\n<end>`, which can be used as a marker to know when the full response has returned. However sometimes you may not want this suffix. Set `skip_end: true` and the response will no longer end with `\n\n<end>`
 - `callback`: the streaming callback function that gets called every time the client gets any token response back from the model
 
 ### Examples
@@ -167,7 +180,8 @@ Then once the server is running, simply make requests to it by passing the `ws:/
 
 ```javascript
 const Dalai = require("dalai")
-new Dalai("ws://localhost:3000").request({
+new Dalai().request({
+  url: "ws://localhost:3000",
   model: "7B",
   prompt: "The following is a conversation between a boy and a girl:",
 }, (token) => {
@@ -220,3 +234,46 @@ http.listen(3000, () => {
   console.log("server started")
 })
 ```
+
+## 5. install()
+
+### Syntax
+
+```javascript
+await dalai.install(model1, model2, ...)
+```
+
+- `models`: the model names to install ("7B"`, "13B", "30B", "65B", etc)
+
+### Examples
+
+Install the "7B" and "13B" models:
+
+
+```javascript
+const Dalai = require("dalai");
+const dalai = new Dalai()
+await dalai.install("7B", "13B")
+```
+
+---
+
+## 6. installed()
+
+returns the array of installed models
+
+### Syntax
+
+```javascript
+const models = await dalai.installed()
+```
+
+### Examples
+
+
+```javascript
+const Dalai = require("dalai");
+const dalai = new Dalai()
+const models = await dalai.installed()
+console.log(models)     // prints ["7B", "13B"]
+```