Skip to content

Commit

Permalink
add configurations for NDv5 and AMD MI300
Browse files Browse the repository at this point in the history
  • Loading branch information
yukirora authored Sep 25, 2024
1 parent 0ac21a5 commit 711b9ef
Show file tree
Hide file tree
Showing 2 changed files with 537 additions and 0 deletions.
232 changes: 232 additions & 0 deletions superbench/config/amd_mi300.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
# SuperBench Config
version: v0.11
superbench:
enable: null
var:
default_local_mode: &default_local_mode
enable: true
modes:
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes
default_pytorch_mode: &default_pytorch_mode
enable: true
modes:
- name: torch.distributed
proc_num: 8
node_num: 1
frameworks:
- pytorch
common_model_config: &common_model_config
model_ddp_parameter: &model_ddp_param
duration: 0
num_warmup: 128
num_steps: 512
sample_count: 8192
batch_size: 128
precision: [float32, float16]
model_action: [train]
pin_memory: yes
num_workers: 0
benchmarks:
kernel-launch:
<<: *default_local_mode
gemm-flops:
<<: *default_local_mode
parameters:
m: 7680
n: 8192
k: 8192
hipblaslt-gemm:
enable: true
modes:
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank}
parallel: yes
parameters:
in_types: ["fp32", "fp16", "bf16", 'fp8']
tolerant_fail: yes
num_warmup: 100
num_steps: 1000
shapes:
- 4096,4096,4096
- 8192,8192,8192
- 16384,16384,16384
rccl-bw:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: 1
mca:
pml: ob1
btl: ^openib
btl_tcp_if_exclude: lo,docker0
coll_hcoll_enable: 0
parameters:
maxbytes: 16G
ngpus: 1
operation: allreduce
cpu-memory-bw-latency:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
tests:
- bandwidth_matrix
- latency_matrix
- max_bandwidth
mem-bw:
enable: true
modes:
- name: local
proc_num: 8
prefix: HIP_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/4))
parallel: no
ib-loopback:
enable: true
modes:
- name: local
proc_num: 16
prefix: PROC_RANK={proc_rank} IB_DEVICES=0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7 numactl -N $(({proc_rank}/8)) -m $(({proc_rank}/8))
parallel: no
parameters:
msg_size: 8388608
disk-benchmark:
enable: false
modes:
- name: local
proc_num: 1
parallel: no
parameters:
block_devices: []
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type: [htod, dtoh, dtod, one_to_all, all_to_one, all_to_all]
copy_type: [sm, dma]
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type: [htod, dtoh, dtod, one_to_all, all_to_one, all_to_all]
copy_type: [sm, dma]
ib-traffic:
enable: false
modes:
- name: mpi
proc_num: 1
mca:
btl: tcp,self
pml: ob1
btl_tcp_if_include: ens17f0
gpcnet-network-test:
enable: false
modes:
- name: mpi
proc_num: 1
mca:
pml: ucx
btl: ^uct
btl_tcp_if_include: ens17f0
tcp-connectivity:
enable: false
modes:
- name: local
parallel: no
parameters:
port: 22
dist-inference:
modes:
- name: mpi
proc_num: 8
node_num: 1
mca:
pml: ob1
btl: ^openib
btl_tcp_if_exclude: lo,docker0
coll_hcoll_enable: 0
frameworks:
- pytorch
parameters:
num_layers: 50
num_warmup: 20
num_steps: 100
use_cuda_graph: true
precision: float16
hidden_size: 128
input_size: 128
batch_size: 1024
model-benchmarks:gpt:
enable: true
<<: *default_pytorch_mode
models:
- gpt2-small
- gpt2-large
parameters:
<<: *model_ddp_param
precision: [float32, float16, fp8_hybrid]
batch_size: 32
seq_len: 224
model-benchmarks:bert:
enable: true
<<: *default_pytorch_mode
models:
- bert-base
- bert-large
parameters:
<<: *model_ddp_param
precision: [float32, float16, fp8_hybrid]
seq_len: 224
model-benchmarks:lstm:
enable: true
<<: *default_pytorch_mode
models:
- lstm
parameters:
<<: *model_ddp_param
batch_size: 1024
input_size: 224
hidden_size: 1000
seq_len: 32
model-benchmarks:resnet:
enable: true
<<: *default_pytorch_mode
models:
- resnet50
- resnet101
- resnet152
parameters:
<<: *model_ddp_param
batch_size: 384
model-benchmarks:densenet:
enable: true
<<: *default_pytorch_mode
models:
- densenet169
- densenet201
parameters:
<<: *model_ddp_param
model-benchmarks:vgg:
enable: true
<<: *default_pytorch_mode
models:
- vgg11
- vgg13
- vgg16
- vgg19
parameters:
<<: *model_ddp_param
Loading

0 comments on commit 711b9ef

Please sign in to comment.