diff --git a/.github/workflows/benchmark_nightly_lmi.yml b/.github/workflows/benchmark_nightly_lmi.yml index 178975b4c3..5c015fe51f 100644 --- a/.github/workflows/benchmark_nightly_lmi.yml +++ b/.github/workflows/benchmark_nightly_lmi.yml @@ -53,6 +53,9 @@ jobs: pip install -r benchmarks/requirements-ab.txt - name: Benchmark lmi nightly run: python benchmarks/auto_benchmark.py --input benchmarks/benchmark_config_lmi.yaml --skip false + - name: Clean up weights + run: | + rm -rf /home/ubuntu/Llama-2-7b-chat-hf # - name: Save benchmark artifacts # uses: actions/upload-artifact@v2 # with: diff --git a/benchmarks/models_config/llama-2-7b.yaml b/benchmarks/models_config/llama-2-7b.yaml index df9197080f..e6955d6632 100644 --- a/benchmarks/models_config/llama-2-7b.yaml +++ b/benchmarks/models_config/llama-2-7b.yaml @@ -4,34 +4,34 @@ llama-2-7b: benchmark_engine: "ab" url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar workers: - - 4 - batch_delay: 100 - batch_size: - 1 - input: "./examples/large_models/gpt_fast/request.json" - requests: 1000 - concurrency: 4 - backend_profiling: False - exec_env: "local" - processors: - - "gpus": "all" - stream: "false" - base: - benchmark_engine: "ab" - url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar - workers: - - 4 batch_delay: 100 batch_size: - 1 input: "./examples/large_models/gpt_fast/request.json" requests: 1000 - concurrency: 4 + concurrency: 1 backend_profiling: False exec_env: "local" processors: - "gpus": "all" stream: "false" + # base: + # benchmark_engine: "ab" + # url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar + # workers: + # - 4 + # batch_delay: 100 + # batch_size: + # - 1 + # input: "./examples/large_models/gpt_fast/request.json" + # requests: 1000 + # concurrency: 4 + # backend_profiling: False + # exec_env: "local" + # processors: + # - "gpus": "all" + # stream: "false" # int8-tp: # benchmark_engine: "ab" # url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int8-tp.mar