Skip to content

Commit

Permalink
Benchmarks: Micro benchmark - Add graph mode in NCCL/RCCL benchmarks …
Browse files Browse the repository at this point in the history
…for latency metrics (#583)

**Description**
Revise NCCL/RCCL benchmarks to graph mode add latency metrics.
  • Loading branch information
yzygitzh authored Dec 5, 2023
1 parent 9ae8c67 commit 254ea7f
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ def add_parser_arguments(self):
default=5,
help='Number of warmup iterations. Default: 5.',
)
self._parser.add_argument(
'--graph_iters',
type=int,
default=0,
help='Number of graph launch iterations. Set to 0 to disable graph mode. Default: 0.',
)

def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
Expand Down Expand Up @@ -117,9 +123,9 @@ def _preprocess(self):
return False

command = os.path.join(self._args.bin_dir, self._bin_name)
command += ' -b {} -e {} -f {} -g {} -c {} -n {} -w {}'.format(
command += ' -b {} -e {} -f {} -g {} -c {} -n {} -w {} -G {}'.format(
self._args.minbytes, self._args.maxbytes, str(self._args.stepfactor), str(self._args.ngpus),
str(self._args.check), str(self._args.iters), str(self._args.warmup_iters)
str(self._args.check), str(self._args.iters), str(self._args.warmup_iters), str(self._args.graph_iters)
)
self._commands.append(command)

Expand Down
11 changes: 11 additions & 0 deletions superbench/config/azure_ndmv4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@ superbench:
NCCL_IB_DISABLE: '0'
parameters:
ngpus: 8
nccl-lat:default:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: 1
parameters:
maxbytes: 16M
warmup_iters: 20
iters: 1000
graph_iters: 1
ib-loopback:
enable: true
modes:
Expand Down
11 changes: 11 additions & 0 deletions superbench/config/azure_ndv4.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ superbench:
NCCL_IB_DISABLE: '0'
parameters:
ngpus: 8
nccl-lat:default:
enable: true
modes:
- name: mpi
proc_num: 8
node_num: 1
parameters:
maxbytes: 16M
warmup_iters: 20
iters: 1000
graph_iters: 1
ib-loopback:
enable: true
modes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def test_nccl_bw_performance(self, allgather, allreduce, reduce, broadcast, redu
assert (benchmark._args.check == 0)
assert (benchmark._args.iters == 20)
assert (benchmark._args.warmup_iters == 5)
assert (benchmark._args.graph_iters == 0)

# Check command list
bin_names = [
Expand All @@ -73,7 +74,7 @@ def test_nccl_bw_performance(self, allgather, allreduce, reduce, broadcast, redu
]

command = bin_names[0] + benchmark._commands[0].split(bin_names[0])[1]
expected_command = '{} -b 8 -e 8G -f 2 -g 8 -c 0 -n 20 -w 5'.format(bin_names[0])
expected_command = '{} -b 8 -e 8G -f 2 -g 8 -c 0 -n 20 -w 5 -G 0'.format(bin_names[0])
assert (command == expected_command)

# Check results and metrics.
Expand Down

0 comments on commit 254ea7f

Please sign in to comment.