Skip to content

Commit

Permalink
int4 and int8
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed May 21, 2024
1 parent 65fcb53 commit f9a629d
Showing 1 changed file with 34 additions and 18 deletions.
52 changes: 34 additions & 18 deletions benchmarks/models_config/llama-2-7b.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
---
llama-2-7b:
# int8:
# benchmark_engine: "ab"
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
# workers:
# - 1
# batch_delay: 100
# batch_size:
# - 1
# input: "./examples/large_models/gpt_fast/request.json"
# requests: 1000
# concurrency: 1
# backend_profiling: False
# exec_env: "local"
# processors:
# - "gpus": "all"
# stream: "false"
base:
int8:
benchmark_engine: "ab"
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int8.mar
workers:
- 1
batch_delay: 100
batch_size:
- 1
input: "./examples/large_models/gpt_fast/request.json"
requests: 1000
concurrency: 1
backend_profiling: False
exec_env: "local"
processors:
- "gpus": "all"
stream: "false"
int4:
benchmark_engine: "ab"
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-int4.mar
workers:
- 1
batch_delay: 100
Expand All @@ -32,6 +32,22 @@ llama-2-7b:
processors:
- "gpus": "all"
stream: "false"
# base:
# benchmark_engine: "ab"
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/mar+files/llama-2-7b-base.mar
# workers:
# - 1
# batch_delay: 100
# batch_size:
# - 1
# input: "./examples/large_models/gpt_fast/request.json"
# requests: 1000
# concurrency: 1
# backend_profiling: False
# exec_env: "local"
# processors:
# - "gpus": "all"
# stream: "false"
# int8-tp:
# benchmark_engine: "ab"
# url: https://torchserve.s3.amazonaws.com/mar_files/llama-2/llama-2-7b-int8-tp.mar
Expand Down

0 comments on commit f9a629d

Please sign in to comment.