diff --git a/evals/benchmark/benchmark.py b/evals/benchmark/benchmark.py index 45121234..6e5419aa 100644 --- a/evals/benchmark/benchmark.py +++ b/evals/benchmark/benchmark.py @@ -46,7 +46,9 @@ def extract_test_case_data(content): "test_output_dir": test_suite_config.get("test_output_dir"), "run_time": test_suite_config.get("run_time"), "collect_service_metric": test_suite_config.get("collect_service_metric"), - "all_case_data": {example: content["test_cases"].get(example, {}) for example in test_suite_config.get("examples", [])} + "all_case_data": { + example: content["test_cases"].get(example, {}) for example in test_suite_config.get("examples", []) + }, } @@ -70,6 +72,7 @@ def create_run_yaml_content(service_name, base_url, bench_target, concurrency, u } } + def create_and_save_run_yaml(example, service_type, service_name, base_url, test_suite_config, index): """Create and save the run.yaml file for the service being tested.""" os.makedirs(test_suite_config["test_output_dir"], exist_ok=True) @@ -78,10 +81,16 @@ def create_and_save_run_yaml(example, service_type, service_name, base_url, test for user_queries in test_suite_config["user_queries"]: concurrency = max(1, user_queries // test_suite_config["concurrent_level"]) - bench_target = f"{example}{'bench' if service_type == 'e2e' and test_suite_config['random_prompt'] else 'fixed'}" - run_yaml_content = create_run_yaml_content(service_name, base_url, bench_target, concurrency, user_queries, test_suite_config) + bench_target = ( + f"{example}{'bench' if service_type == 'e2e' and test_suite_config['random_prompt'] else 'fixed'}" + ) + run_yaml_content = create_run_yaml_content( + service_name, base_url, bench_target, concurrency, user_queries, test_suite_config + ) - run_yaml_path = os.path.join(test_suite_config["test_output_dir"], f"run_{service_name}_{index}_users_{user_queries}.yaml") + run_yaml_path = os.path.join( + test_suite_config["test_output_dir"], f"run_{service_name}_{index}_users_{user_queries}.yaml" + ) with open(run_yaml_path, "w") as yaml_file: yaml.dump(run_yaml_content, yaml_file) @@ -101,7 +110,9 @@ def run_service_test(example, service_type, service_name, parameters, test_suite timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Create the run.yaml for the service - run_yaml_paths = create_and_save_run_yaml(example, service_type, service_name, base_url, test_suite_config, timestamp) + run_yaml_paths = create_and_save_run_yaml( + example, service_type, service_name, base_url, test_suite_config, timestamp + ) # Run the test using locust_runtests function for index, run_yaml_path in enumerate(run_yaml_paths, start=1): @@ -115,7 +126,9 @@ def process_service(example, service_name, case_data, test_suite_config): service = case_data.get(service_name) if service and service.get("run_test"): print(f"[OPEA BENCHMARK] 🚀 Example: {example} Service: {service.get('service_name')}, Running test...") - run_service_test(example, service_name, service.get("service_name"), service.get("parameters", {}), test_suite_config) + run_service_test( + example, service_name, service.get("service_name"), service.get("parameters", {}), test_suite_config + ) if __name__ == "__main__": @@ -132,7 +145,6 @@ def process_service(example, service_name, case_data, test_suite_config): "test_output_dir": parsed_data["test_output_dir"], } - # Mapping of example names to service types example_service_map = { "chatqna": [