Skip to content

Commit

Permalink
Bug: Executor - Fix executor for Benchmark Execution Without Explicit…
Browse files Browse the repository at this point in the history
… Framework Field (#636)

**Description**
Fix executor for Benchmark Execution Without Explicit Framework Field
  • Loading branch information
RyoYang authored Aug 20, 2024
1 parent 7af75df commit 96cc4d9
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 24 deletions.
41 changes: 20 additions & 21 deletions superbench/executor/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,29 +228,16 @@ def exec(self):
logger.warning('Monitor can not support CPU platform.')

benchmark_real_name = benchmark_name.split(':')[0]
if 'frameworks' in benchmark_config:
for framework in benchmark_config.frameworks or [Framework.NONE.value]:
if benchmark_real_name == 'model-benchmarks' or (
':' not in benchmark_name and benchmark_name.endswith('_models')
):
for model in benchmark_config.models:
full_name = f'{benchmark_name}/{framework}-{model}'
logger.info('Executor is going to execute %s.', full_name)
context = BenchmarkRegistry.create_benchmark_context(
model,
platform=self.__get_platform(),
framework=Framework(framework.lower()),
parameters=self.__get_arguments(
{} if 'parameters' not in benchmark_config else benchmark_config.parameters
)
)
result = self.__exec_benchmark(full_name, context)
benchmark_results.append(result)
else:
full_name = benchmark_name
frameworks = benchmark_config.get('frameworks', [Framework.NONE.value])
for framework in frameworks:
if benchmark_real_name == 'model-benchmarks' or (
':' not in benchmark_name and benchmark_name.endswith('_models')
):
for model in benchmark_config.models:
full_name = f'{benchmark_name}/{framework}-{model}'
logger.info('Executor is going to execute %s.', full_name)
context = BenchmarkRegistry.create_benchmark_context(
benchmark_real_name,
model,
platform=self.__get_platform(),
framework=Framework(framework.lower()),
parameters=self.__get_arguments(
Expand All @@ -259,6 +246,18 @@ def exec(self):
)
result = self.__exec_benchmark(full_name, context)
benchmark_results.append(result)
else:
full_name = benchmark_name
logger.info('Executor is going to execute %s.', full_name)
context = BenchmarkRegistry.create_benchmark_context(
benchmark_real_name,
platform=self.__get_platform(),
framework=Framework(framework.lower()),
parameters=self.
__get_arguments({} if 'parameters' not in benchmark_config else benchmark_config.parameters)
)
result = self.__exec_benchmark(full_name, context)
benchmark_results.append(result)

if monitor:
monitor.stop()
Expand Down
4 changes: 2 additions & 2 deletions superbench/runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __validate_sb_config(self): # noqa: C901
if 'proc_num' not in mode:
self._sb_benchmarks[name].modes[idx].proc_num = 8
elif mode.name == 'mpi':
if 'machinefile' not in mode:
if 'mca' not in mode:
self._sb_benchmarks[name].modes[idx].mca = {
'pml': 'ob1',
'btl': '^openib',
Expand Down Expand Up @@ -448,7 +448,7 @@ def _run_proc(self, benchmark_name, mode, vars):
mode.env.update({'SB_MODE_SERIAL_INDEX': mode.serial_index, 'SB_MODE_PARALLEL_INDEX': mode.parallel_index})
logger.info('Runner is going to run %s in %s mode, proc rank %d.', benchmark_name, mode.name, mode.proc_rank)

timeout = self._sb_benchmarks[benchmark_name].get('timeout', 60)
timeout = self._sb_benchmarks[benchmark_name].get('timeout', None)
if isinstance(timeout, int):
timeout = max(timeout, 60)

Expand Down
4 changes: 3 additions & 1 deletion tests/executor/test_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,5 +166,7 @@ def test_exec_default_benchmarks(self, mock_launch_benchmark):
self.assertTrue(p.is_dir())
self.assertTrue((p / 'results.json').is_file())
with (p / 'results.json').open() as f:
for result in json.load(f):
results = json.load(f)
self.assertTrue(len(results) > 0)
for result in results:
self.assertIn(benchmark_name, result['name'])
43 changes: 43 additions & 0 deletions tests/runner/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,22 @@ def test_set_logger(self):
expected_log_file = Path(self.runner._sb_output_dir) / 'sb-run.log'
self.assertTrue(expected_log_file.is_file())

def test_validate_sb_config(self):
"""Test validate_sb_config."""
self.runner._SuperBenchRunner__validate_sb_config()
self.assertIn('env', self.runner._sb_config.superbench)
for name in self.runner._sb_benchmarks:
self.assertIn('modes', self.runner._sb_config.superbench.benchmarks[name])
for mode in self.runner._sb_config.superbench.benchmarks[name].modes:
self.assertIn('env', mode)
if mode.name == 'local':
self.assertIn('proc_num', mode)
self.assertIn('prefix', mode)
if mode.name == 'torch.distributed':
self.assertIn('proc_num', mode)
if mode.name == 'mpi':
self.assertIn('mca', mode)

def test_get_failure_count(self):
"""Test get_failure_count."""
self.assertEqual(0, self.runner.get_failure_count())
Expand Down Expand Up @@ -410,3 +426,30 @@ def test_generate_metric_name(self):
test_case['run_count'], test_case['curr_rank'], test_case['curr_run']
), test_case['expected']
)

def test_run_proc_timeout(self):
"""Test run_proc_ timeout."""
self.runner._sb_benchmarks = {
'benchmark1': {
'timeout': 120
},
'benchmark2': {
'timeout': None
},
'benchmark3': {
'timeout': 30
},
}

test_cases = [
('benchmark1', 120),
('benchmark2', None),
('benchmark3', 60),
]

for benchmark_name, expected_timeout in test_cases:
with self.subTest(benchmark_name=benchmark_name):
timeout = self.runner._sb_benchmarks[benchmark_name].get('timeout', None)
if isinstance(timeout, int):
timeout = max(timeout, 60)
self.assertEqual(timeout, expected_timeout)

0 comments on commit 96cc4d9

Please sign in to comment.