testing workflow

pytorch · May 21, 2024 · 4bf3a26 · 4bf3a26
1 parent 1b34b7d
commit 4bf3a26
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 17 deletions.
diff --git a/.github/workflows/benchmark_nightly_lmi.yml b/.github/workflows/benchmark_nightly_lmi.yml
@@ -53,6 +53,9 @@ jobs:
           pip install -r benchmarks/requirements-ab.txt
       - name: Benchmark lmi nightly
         run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_lmi.yaml --skip false
+      - name: Clean up weights
+        run: |
+          rm -rf /home/ubuntu/Llama-2-7b-chat-hf
       # - name: Save benchmark artifacts
       #   uses: actions/upload-artifact@v2
       #   with:

diff --git a/benchmarks/models_config/llama-2-7b.yaml b/benchmarks/models_config/llama-2-7b.yaml
@@ -4,34 +4,34 @@ llama-2-7b:
     benchmark_engine: "ab"
     url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
     workers:
-      - 4
-    batch_delay: 100
-    batch_size:
       - 1
-    input: "./examples/large_models/gpt_fast/request.json"
-    requests: 1000
-    concurrency: 4
-    backend_profiling: False
-    exec_env: "local"
-    processors:
-      - "gpus": "all"
-    stream: "false"
-  base:
-    benchmark_engine: "ab"
-    url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
-    workers:
-      - 4
     batch_delay: 100
     batch_size:
       - 1
     input: "./examples/large_models/gpt_fast/request.json"
     requests: 1000
-    concurrency: 4
+    concurrency: 1
     backend_profiling: False
     exec_env: "local"
     processors:
       - "gpus": "all"
     stream: "false"
+  # base:
+  #   benchmark_engine: "ab"
+  #   url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
+  #   workers:
+  #     - 4
+  #   batch_delay: 100
+  #   batch_size:
+  #     - 1
+  #   input: "./examples/large_models/gpt_fast/request.json"
+  #   requests: 1000
+  #   concurrency: 4
+  #   backend_profiling: False
+  #   exec_env: "local"
+  #   processors:
+  #     - "gpus": "all"
+  #   stream: "false"
   # int8-tp:
   #   benchmark_engine: "ab"
   #   url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int8-tp.mar