feat: added llava-1.5-13b model

limcheekin · Nov 12, 2023 · 2330f77 · 2330f77
1 parent 5152221
commit 2330f77
Show file tree

Hide file tree

Showing 5 changed files with 12 additions and 12 deletions.
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
@@ -3,7 +3,7 @@ name: Deploy Dev
 on:
   push:
     branches:
-      - zephyr-7b
+      - llava-1.5-13b
 jobs:
   deploy-dev:
     runs-on: ubuntu-latest
@@ -27,4 +27,4 @@ jobs:
         run: cd ${{ github.ref_name }};modal deploy fastapi_app.py
 
       - name: Test FastAPI app
-        run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.ZEPHYR_7B_APP_URL }}v1/completions"
+        run: "pwd;cd ${{ github.ref_name }};curl -X POST -H 'Content-Type: application/json' -d @prompt.json ${{ secrets.LLAVA_1_5_13B_APP_URL }}v1/completions"
diff --git a/zephyr-7b/Dockerfile → llava-1.5-13b/Dockerfile b/zephyr-7b/Dockerfile → llava-1.5-13b/Dockerfile
@@ -15,4 +15,4 @@ RUN pip install -U pip setuptools wheel && \
 
 # Download model
 RUN mkdir model && \
-    curl -L https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf -o model/gguf-model.bin
+    curl -L https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/ggml-model-q5_k.gguf -o model/gguf-model.bin
diff --git a/zephyr-7b/fastapi_app.py → llava-1.5-13b/fastapi_app.py b/zephyr-7b/fastapi_app.py → llava-1.5-13b/fastapi_app.py
@@ -1,7 +1,7 @@
 # Modal Lab web app for llama.cpp.
 from modal import Image, Stub, asgi_app
 
-stub = Stub("zephyr-7b")
+stub = Stub("llava-1.5-13b")
 
 image = Image.from_dockerfile(
     "Dockerfile", force_build=True
@@ -11,7 +11,7 @@
 )
 
 
-@stub.function(image=image, cpu=14, memory=8704, keep_warm=1, timeout=600)
+@stub.function(image=image, cpu=14, memory=12288, keep_warm=1, timeout=600)
 @asgi_app()
 def fastapi_app():
     from llama_cpp.server.app import create_app, Settings

diff --git a/llava-1.5-13b/prompt.json b/llava-1.5-13b/prompt.json
@@ -0,0 +1,7 @@
+{
+  "prompt": [
+    "\n\n### Instructions:\nDescribe the AutoGen framework <img https://raw.githubusercontent.com/microsoft/autogen/main/website/static/img/autogen_agentchat.png> with bullet points.\n\n### Response:\n"
+  ],
+  "max_tokens": 512,
+  "stop": ["###"]
+}
diff --git a/zephyr-7b/prompt.json b/zephyr-7b/prompt.json