From 3a87f7cb19716fecb78f9aa7ea16be367b7ae353 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Sun, 17 Dec 2023 10:46:56 +0800 Subject: [PATCH] feat: added tinyllama-1.1b-chat model --- .github/workflows/dev.yml | 4 ++-- phi-2/prompt.json | 7 ------- {phi-2 => tinyllama-1.1b-chat}/Dockerfile | 2 +- {phi-2 => tinyllama-1.1b-chat}/fastapi_app.py | 9 +++++---- tinyllama-1.1b-chat/prompt.json | 7 +++++++ 5 files changed, 15 insertions(+), 14 deletions(-) delete mode 100644 phi-2/prompt.json rename {phi-2 => tinyllama-1.1b-chat}/Dockerfile (78%) rename {phi-2 => tinyllama-1.1b-chat}/fastapi_app.py (80%) create mode 100644 tinyllama-1.1b-chat/prompt.json diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 9696d8a..1be8705 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -3,7 +3,7 @@ name: Deploy Dev on: push: branches: - - phi-2 + - tinyllama-1.1b-chat jobs: deploy-dev: runs-on: ubuntu-latest @@ -27,4 +27,4 @@ jobs: run: cd ${{ github.ref_name }};modal deploy fastapi_app.py - name: Test FastAPI app - run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.PHI_2_APP_URL }}v1/completions" + run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.TINYLLAMA_CHAT_APP_URL }}v1/completions" diff --git a/phi-2/prompt.json b/phi-2/prompt.json deleted file mode 100644 index 91c0002..0000000 --- a/phi-2/prompt.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "prompt": [ - "Instruct: You are an AI assistant that follows instruction extremely well. Help as much as you can. Answer the question based on the context below.\nContext: The main benefit of operators is to automate operations. Kubernetes operators are capable to automate the expensive and error likely human operations. Features like autopilot and self-healing are typical scenarios. Another benefit of operators is the reusability of software. Software providers can expose operators in various catalogs to reach new markets and to promote their software. Operators leverage the Kubernetes community, since they are a natural and Kubernetes-native way to extend Kubernetes.\nQuestion: What are the main benefits of Kubernetes Operators?\nOutput:" - ], - "max_tokens": 128, - "stop": [] -} diff --git a/phi-2/Dockerfile b/tinyllama-1.1b-chat/Dockerfile similarity index 78% rename from phi-2/Dockerfile rename to tinyllama-1.1b-chat/Dockerfile index bbcd82a..577481f 100644 --- a/phi-2/Dockerfile +++ b/tinyllama-1.1b-chat/Dockerfile @@ -15,4 +15,4 @@ RUN pip install -U pip setuptools wheel && \ # Download model RUN mkdir model && \ - curl -L https://huggingface.co/radames/phi-2-quantized/resolve/main/model-v2-q80.gguf -o model/gguf-model.bin + curl -L https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6/resolve/main/ggml-model-q4_0.gguf -o model/gguf-model.bin diff --git a/phi-2/fastapi_app.py b/tinyllama-1.1b-chat/fastapi_app.py similarity index 80% rename from phi-2/fastapi_app.py rename to tinyllama-1.1b-chat/fastapi_app.py index 298a25d..5546e95 100644 --- a/phi-2/fastapi_app.py +++ b/tinyllama-1.1b-chat/fastapi_app.py @@ -1,7 +1,7 @@ # Modal Lab web app for llama.cpp. from modal import Image, Stub, asgi_app -stub = Stub("phi-2") +stub = Stub("tinyllama-1-1b-chat") image = Image.from_dockerfile( "Dockerfile", force_build=True @@ -11,7 +11,7 @@ ) -@stub.function(image=image, cpu=4, memory=5632, timeout=600) +@stub.function(image=image, cpu=2, memory=2048, timeout=600) @asgi_app() def fastapi_app(): from llama_cpp.server.app import create_app, Settings @@ -19,9 +19,10 @@ def fastapi_app(): print("os.cpu_count()", os.cpu_count()) app = create_app( Settings( - n_threads=4, + n_threads=2, model="/model/gguf-model.bin", - embedding=False + embedding=False, + chat_format="zephyr" ) ) return app diff --git a/tinyllama-1.1b-chat/prompt.json b/tinyllama-1.1b-chat/prompt.json new file mode 100644 index 0000000..b3bc526 --- /dev/null +++ b/tinyllama-1.1b-chat/prompt.json @@ -0,0 +1,7 @@ +{ + "prompt": [ + "<|system|>\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n<|user|>\nAnswer the question based on the context below.\nContext: The main benefit of operators is to automate operations. Kubernetes operators are capable to automate the expensive and error likely human operations. Features like autopilot and self-healing are typical scenarios. Another benefit of operators is the reusability of software. Software providers can expose operators in various catalogs to reach new markets and to promote their software. Operators leverage the Kubernetes community, since they are a natural and Kubernetes-native way to extend Kubernetes.\nQuestion: What are the main benefits of Kubernetes Operators?\n<|assistant|>\n" + ], + "max_tokens": 128, + "stop": [] +}