diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 3052a86..53c169d 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -3,7 +3,7 @@ name: Deploy Dev on: push: branches: - - zephyr-7b + - llava-1.5-13b jobs: deploy-dev: runs-on: ubuntu-latest @@ -27,4 +27,4 @@ jobs: run: cd ${{ github.ref_name }};modal deploy fastapi_app.py - name: Test FastAPI app - run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.ZEPHYR_7B_APP_URL }}v1/completions" + run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.LLAVA_1_5_13B_APP_URL }}v1/completions" diff --git a/zephyr-7b/Dockerfile b/llava-1.5-13b/Dockerfile similarity index 78% rename from zephyr-7b/Dockerfile rename to llava-1.5-13b/Dockerfile index 9fe7c4c..52cb248 100644 --- a/zephyr-7b/Dockerfile +++ b/llava-1.5-13b/Dockerfile @@ -15,4 +15,4 @@ RUN pip install -U pip setuptools wheel && \ # Download model RUN mkdir model && \ - curl -L https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf -o model/gguf-model.bin + curl -L https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/ggml-model-q5_k.gguf -o model/gguf-model.bin diff --git a/zephyr-7b/fastapi_app.py b/llava-1.5-13b/fastapi_app.py similarity index 88% rename from zephyr-7b/fastapi_app.py rename to llava-1.5-13b/fastapi_app.py index 7ba78f1..dbf6189 100644 --- a/zephyr-7b/fastapi_app.py +++ b/llava-1.5-13b/fastapi_app.py @@ -1,7 +1,7 @@ # Modal Lab web app for llama.cpp. from modal import Image, Stub, asgi_app -stub = Stub("zephyr-7b") +stub = Stub("llava-1.5-13b") image = Image.from_dockerfile( "Dockerfile", force_build=True @@ -11,7 +11,7 @@ ) -@stub.function(image=image, cpu=14, memory=8704, keep_warm=1, timeout=600) +@stub.function(image=image, cpu=14, memory=12288, keep_warm=1, timeout=600) @asgi_app() def fastapi_app(): from llama_cpp.server.app import create_app, Settings diff --git a/llava-1.5-13b/prompt.json b/llava-1.5-13b/prompt.json new file mode 100644 index 0000000..b73cc32 --- /dev/null +++ b/llava-1.5-13b/prompt.json @@ -0,0 +1,7 @@ +{ + "prompt": [ + "\n\n### Instructions:\nDescribe the AutoGen framework with bullet points.\n\n### Response:\n" + ], + "max_tokens": 512, + "stop": ["###"] +} diff --git a/zephyr-7b/prompt.json b/zephyr-7b/prompt.json deleted file mode 100644 index 1b38b8c..0000000 --- a/zephyr-7b/prompt.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "prompt": [ - "<|system|>\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n<|user|>\nAnswer the question based on the context below.\n\nContext: The main benefit of operators is to automate operations. Kubernetes operators are capable to automate the expensive and error likely human operations. Features like autopilot and self-healing are typical scenarios. Another benefit of operators is the reusability of software. Software providers can expose operators in various catalogs to reach new markets and to promote their software. Operators leverage the Kubernetes community, since they are a natural and Kubernetes-native way to extend Kubernetes.\n\nQuestion: What are the main benefits of Kubernetes Operators?\n<|assistant|>\n" - ], - "max_tokens": 128, - "stop": ["<|system|>", "<|user|>", "<|assistant|>", ""] -}