feat: added tinyllama-1.1b-chat model

limcheekin · Dec 17, 2023 · 3a87f7c · 3a87f7c
1 parent 617b178
commit 3a87f7c
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 14 deletions.
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
@@ -3,7 +3,7 @@ name: Deploy Dev
 on:
   push:
     branches:
-      - phi-2
+      - tinyllama-1.1b-chat
 jobs:
   deploy-dev:
     runs-on: ubuntu-latest
@@ -27,4 +27,4 @@ jobs:
         run: cd ${{ github.ref_name }};modal deploy fastapi_app.py
 
       - name: Test FastAPI app
-        run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.PHI_2_APP_URL }}v1/completions"
+        run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.TINYLLAMA_CHAT_APP_URL }}v1/completions"
diff --git a/phi-2/prompt.json b/phi-2/prompt.json
diff --git a/phi-2/Dockerfile → tinyllama-1.1b-chat/Dockerfile b/phi-2/Dockerfile → tinyllama-1.1b-chat/Dockerfile
@@ -15,4 +15,4 @@ RUN pip install -U pip setuptools wheel && \
 
 # Download model
 RUN mkdir model && \
-    curl -L https://huggingface.co/radames/phi-2-quantized/resolve/main/model-v2-q80.gguf -o model/gguf-model.bin
+    curl -L https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6/resolve/main/ggml-model-q4_0.gguf -o model/gguf-model.bin
diff --git a/phi-2/fastapi_app.py → tinyllama-1.1b-chat/fastapi_app.py b/phi-2/fastapi_app.py → tinyllama-1.1b-chat/fastapi_app.py
@@ -1,7 +1,7 @@
 # Modal Lab web app for llama.cpp.
 from modal import Image, Stub, asgi_app
 
-stub = Stub("phi-2")
+stub = Stub("tinyllama-1-1b-chat")
 
 image = Image.from_dockerfile(
     "Dockerfile", force_build=True
@@ -11,17 +11,18 @@
 )
 
 
-@stub.function(image=image, cpu=4, memory=5632, timeout=600)
+@stub.function(image=image, cpu=2, memory=2048, timeout=600)
 @asgi_app()
 def fastapi_app():
     from llama_cpp.server.app import create_app, Settings
     import os
     print("os.cpu_count()", os.cpu_count())
     app = create_app(
         Settings(
-            n_threads=4,
+            n_threads=2,
             model="/model/gguf-model.bin",
-            embedding=False
+            embedding=False,
+            chat_format="zephyr"
         )
     )
     return app

diff --git a/tinyllama-1.1b-chat/prompt.json b/tinyllama-1.1b-chat/prompt.json
@@ -0,0 +1,7 @@
+{
+  "prompt": [
+    "<|system|>\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.</s>\n<|user|>\nAnswer the question based on the context below.\nContext: The main benefit of operators is to automate operations. Kubernetes operators are capable to automate the expensive and error likely human operations. Features like autopilot and self-healing are typical scenarios. Another benefit of operators is the reusability of software. Software providers can expose operators in various catalogs to reach new markets and to promote their software. Operators leverage the Kubernetes community, since they are a natural and Kubernetes-native way to extend Kubernetes.\nQuestion: What are the main benefits of Kubernetes Operators?</s>\n<|assistant|>\n"
+  ],
+  "max_tokens": 128,
+  "stop": []
+}