From 5152221334efb21d3f8d73e0e6617f56c2115ed8 Mon Sep 17 00:00:00 2001 From: Lim Chee Kin Date: Mon, 6 Nov 2023 11:59:02 +0800 Subject: [PATCH] feat: added zephyr-7b model --- .github/workflows/dev.yml | 8 ++++---- {orca-mini-v3-7b => zephyr-7b}/Dockerfile | 4 ++-- {orca-mini-v3-7b => zephyr-7b}/fastapi_app.py | 4 ++-- {orca-mini-v3-7b => zephyr-7b}/prompt.json | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) rename {orca-mini-v3-7b => zephyr-7b}/Dockerfile (73%) rename {orca-mini-v3-7b => zephyr-7b}/fastapi_app.py (89%) rename {orca-mini-v3-7b => zephyr-7b}/prompt.json (70%) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 0a52440..3052a86 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -3,7 +3,7 @@ name: Deploy Dev on: push: branches: - - orca-mini-v3-7b + - zephyr-7b jobs: deploy-dev: runs-on: ubuntu-latest @@ -13,12 +13,12 @@ jobs: steps: - name: Checkout own repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.11" - name: Install Python packages run: pip install modal-client @@ -27,4 +27,4 @@ jobs: run: cd ${{ github.ref_name }};modal deploy fastapi_app.py - name: Test FastAPI app - run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.ORCA_MINI_V3_7B_APP_URL }}v1/completions" + run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.ZEPHYR_7B_APP_URL }}v1/completions" diff --git a/orca-mini-v3-7b/Dockerfile b/zephyr-7b/Dockerfile similarity index 73% rename from orca-mini-v3-7b/Dockerfile rename to zephyr-7b/Dockerfile index 7503f52..9fe7c4c 100644 --- a/orca-mini-v3-7b/Dockerfile +++ b/zephyr-7b/Dockerfile @@ -1,5 +1,5 @@ # Grab a fresh copy of the Python image -FROM python:3.10-slim +FROM python:3.11-slim # Install build and runtime dependencies RUN apt-get update && \ @@ -15,4 +15,4 @@ RUN pip install -U pip setuptools wheel && \ # Download model RUN mkdir model && \ - curl -L https://huggingface.co/TheBloke/orca_mini_v3_7B-GGUF/resolve/main/orca_mini_v3_7b.Q4_K_M.gguf -o model/gguf-model.bin + curl -L https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf -o model/gguf-model.bin diff --git a/orca-mini-v3-7b/fastapi_app.py b/zephyr-7b/fastapi_app.py similarity index 89% rename from orca-mini-v3-7b/fastapi_app.py rename to zephyr-7b/fastapi_app.py index 9ba64a4..7ba78f1 100644 --- a/orca-mini-v3-7b/fastapi_app.py +++ b/zephyr-7b/fastapi_app.py @@ -1,7 +1,7 @@ # Modal Lab web app for llama.cpp. from modal import Image, Stub, asgi_app -stub = Stub("orca-mini-v3-7b") +stub = Stub("zephyr-7b") image = Image.from_dockerfile( "Dockerfile", force_build=True @@ -11,7 +11,7 @@ ) -@stub.function(image=image, cpu=14, memory=7168, keep_warm=1, timeout=600) +@stub.function(image=image, cpu=14, memory=8704, keep_warm=1, timeout=600) @asgi_app() def fastapi_app(): from llama_cpp.server.app import create_app, Settings diff --git a/orca-mini-v3-7b/prompt.json b/zephyr-7b/prompt.json similarity index 70% rename from orca-mini-v3-7b/prompt.json rename to zephyr-7b/prompt.json index b47db04..1b38b8c 100644 --- a/orca-mini-v3-7b/prompt.json +++ b/zephyr-7b/prompt.json @@ -1,7 +1,7 @@ { "prompt": [ - "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n### User:\nAnswer the question based on the context below.\n\nContext: The main benefit of operators is to automate operations. Kubernetes operators are capable to automate the expensive and error likely human operations. Features like autopilot and self-healing are typical scenarios. Another benefit of operators is the reusability of software. Software providers can expose operators in various catalogs to reach new markets and to promote their software. Operators leverage the Kubernetes community, since they are a natural and Kubernetes-native way to extend Kubernetes.\n\nQuestion: What are the main benefits of Kubernetes Operators?\n\n### Assistant:\n" + "<|system|>\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n<|user|>\nAnswer the question based on the context below.\n\nContext: The main benefit of operators is to automate operations. Kubernetes operators are capable to automate the expensive and error likely human operations. Features like autopilot and self-healing are typical scenarios. Another benefit of operators is the reusability of software. Software providers can expose operators in various catalogs to reach new markets and to promote their software. Operators leverage the Kubernetes community, since they are a natural and Kubernetes-native way to extend Kubernetes.\n\nQuestion: What are the main benefits of Kubernetes Operators?\n<|assistant|>\n" ], "max_tokens": 128, - "stop": ["\n\n", "###"] + "stop": ["<|system|>", "<|user|>", "<|assistant|>", ""] }