Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new constant loader & Fix poisson loader issue #158

Merged
merged 10 commits into from
Oct 18, 2024
7 changes: 4 additions & 3 deletions evals/benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,11 @@ test_suite_config:
load_shape: # Tenant concurrency pattern
name: constant # poisson or constant(locust default load shape)
params: # Loadshape-specific parameters
constant: # Poisson load shape specific parameters, activate only if load_shape is poisson
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
poisson: # Poisson load shape specific parameters, activate only if load_shape is poisson
arrival-rate: 1.0 # Request arrival rate
# arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
arrival_rate: 1.0 # Request arrival rate
warm_ups: 0 # Number of test requests for warm-ups
run_time: 60m # Total runtime for the test suite
seed: # The seed for all RNGs
Expand Down
2 changes: 1 addition & 1 deletion evals/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def run_service_test(example, service_type, service, test_suite_config):
deployment_type,
test_suite_config.get("service_ip"),
test_suite_config.get("service_port"),
test_suite_config.get("namespace")
test_suite_config.get("namespace"),
lvliang-intel marked this conversation as resolved.
Show resolved Hide resolved
)

base_url = f"http://{svc_ip}:{port}"
Expand Down
7 changes: 4 additions & 3 deletions evals/benchmark/benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ test_suite_config: # Overall configuration settings for the test suite
load_shape: # Tenant concurrency pattern
name: constant # poisson or constant(locust default load shape)
params: # Loadshape-specific parameters
constant: # Poisson load shape specific parameters, activate only if load_shape is poisson
constant: # Constant load shape specific parameters, activate only if load_shape.name is constant
concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
poisson: # Poisson load shape specific parameters, activate only if load_shape is poisson
arrival-rate: 1.0 # Request arrival rate
# arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate
poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson
arrival_rate: 1.0 # Request arrival rate
namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.

test_cases:
Expand Down
33 changes: 18 additions & 15 deletions evals/benchmark/stresscli/commands/load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,17 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
load_shape_conf = run_settings.get("load-shape", global_settings.get("load-shape", locust_defaults["load-shape"]))
try:
load_shape = load_shape_conf["name"]
runspec["load-shape"] = load_shape_conf
except KeyError:
load_shape = DEFAULT_LOADSHAPE

runspec["load-shape"] = load_shape
if load_shape == DEFAULT_LOADSHAPE:
load_shape_params = None
try:
load_shape_params = load_shape_conf["params"][load_shape]
except KeyError:
console_logger.info(f"The specified load shape not found: {load_shape}")

if load_shape == DEFAULT_LOADSHAPE and (load_shape_params is None or "arrival_rate" not in load_shape_params):
# constant load is Locust's default load shape, do nothing.
console_logger.info("Use default load shape.")
else:
Expand All @@ -155,7 +161,10 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
if os.path.isfile(f_custom_load_shape):
# Add the locust file of the custom load shape into classpath
runspec["locustfile"] += f",{f_custom_load_shape}"
console_logger.info("Use custom load shape: {load_shape}")
if load_shape == DEFAULT_LOADSHAPE:
console_logger.info("Use default load shape based on request arrival rate")
else:
console_logger.info("Use custom load shape: {load_shape}")
else:
console_logger.error(
f"Unsupported load shape: {load_shape}."
Expand All @@ -180,15 +189,9 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in

spawn_rate = 100 if runspec["users"] > 100 else runspec["users"]

load_shape_params = None
try:
load_shape_params = load_shape_conf["params"][load_shape]
except KeyError:
console_logger.info(f"The specified load shape not found: {load_shape}")

# Dynamically allocate Locust processes to fit different loads
processes = 2
if load_shape == "constant":
if load_shape == "constant" and (load_shape_params is None or "arrival_rate" not in load_shape_params):
if runspec["max_requests"] > 0:
processes = 10 if runspec["max_requests"] > 2000 else 5 if runspec["max_requests"] > 1000 else processes
else:
Expand All @@ -197,11 +200,11 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
concurrent_level = int(load_shape_params["concurrent_level"])
processes = 10 if concurrent_level > 400 else 5 if concurrent_level > 200 else processes
elif load_shape == "poisson":
if load_shape_params and "arrival-rate" in load_shape_params:
processes = max(2, math.ceil(int(load_shape_params["arrival-rate"]) / 5))
if load_shape_params and "arrival_rate" in load_shape_params:
processes = max(2, math.ceil(int(load_shape_params["arrival_rate"]) / 5))
else:
if load_shape_params and "arrival-rate" in load_shape_params:
processes = max(2, math.ceil(int(load_shape_params["arrival-rate"]) / 5))
if load_shape_params and "arrival_rate" in load_shape_params:
processes = max(2, math.ceil(int(load_shape_params["arrival_rate"]) / 5))
elif runspec["max_requests"] > 0:
processes = 10 if runspec["max_requests"] > 2000 else 5 if runspec["max_requests"] > 1000 else processes

Expand All @@ -214,7 +217,7 @@ def run_locust_test(kubeconfig, global_settings, run_settings, output_folder, in
"--run-time",
runspec["runtime"],
"--load-shape",
runspec["load-shape"],
load_shape,
"--dataset",
runspec["dataset"],
"--seed",
Expand Down
19 changes: 14 additions & 5 deletions evals/benchmark/stresscli/locust/aistress.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def __init__(self, *args, **kwargs):
def bench_main(self):
max_request = self.environment.parsed_options.max_request
if max_request >= 0 and AiStressUser.request >= max_request:
# For poisson load shape, a user only sends single request before it stops.
# For custom load shape based on arrival_rate, new users spawned after exceeding max_request is reached will be stopped.
# TODO: user should not care about load shape
if self.environment.parsed_options.load_shape == "poisson":
if "arrival_rate" in self.environment.parsed_options:
self.stop(force=True)

time.sleep(1)
Expand Down Expand Up @@ -186,10 +186,10 @@ def bench_main(self):
self.environment.runner.stats.log_request("POST", url, time.perf_counter() - start_ts, 0)
self.environment.runner.stats.log_error("POST", url, "Locust Request error")

# For poisson load shape, a user only sends single request before it stops.
# For custom load shape based on arrival_rate, a user only sends single request before it sleeps.
# TODO: user should not care about load shape
if self.environment.parsed_options.load_shape == "poisson":
self.stop(force=True)
if "arrival_rate" in self.environment.parsed_options:
time.sleep(365 * 60 * 60)

# def on_stop(self) -> None:

Expand Down Expand Up @@ -220,6 +220,7 @@ def on_locust_init(environment, **_kwargs):
environment.runner.register_message("worker_reqsent", on_reqsent)
if not isinstance(environment.runner, MasterRunner):
environment.runner.register_message("all_reqcnt", on_reqcount)
environment.runner.register_message("test_quit", on_quit)


@events.quitting.add_listener
Expand Down Expand Up @@ -249,12 +250,20 @@ def on_reqcount(msg, **kwargs):
AiStressUser.request = msg.data


def on_quit(environment, msg, **kwargs):
logging.debug("Test quitting, set stop_timeout to 0...")
environment.runner.environment.stop_timeout = 0


def checker(environment):
while environment.runner.state not in [STATE_STOPPING, STATE_STOPPED, STATE_CLEANUP]:
time.sleep(1)
max_request = environment.parsed_options.max_request
if max_request >= 0 and environment.runner.stats.num_requests >= max_request:
logging.info(f"Exceed the max-request number:{environment.runner.stats.num_requests}, Exit...")
# Remove stop_timeout after test quit to avoid Locust user stop exception with custom load shape
environment.runner.send_message("test_quit", None)
environment.runner.environment.stop_timeout = 0
# while environment.runner.user_count > 0:
time.sleep(5)
environment.runner.quit()
Expand Down
50 changes: 50 additions & 0 deletions evals/benchmark/stresscli/locust/constant_load_shape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import logging

import locust
from locust import LoadTestShape, events

logger = logging.getLogger(__name__)


@events.init_command_line_parser.add_listener
def _(parser):
parser.add_argument(
"--arrival_rate",
type=float,
default=1.0,
)


class ConstantRPSLoadShape(LoadTestShape):
use_common_options = True

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.arrival_rate = locust.argument_parser.parse_options().arrival_rate
self.last_tick = 0

def tick(self):
if self.last_tick == 0:
logger.info("Constant load shape arrival rate: {arrival_rate}".format(arrival_rate=self.arrival_rate))

run_time = self.get_run_time()
if run_time < self.runner.environment.parsed_options.run_time:
self.last_tick = run_time

new_users = int(self.arrival_rate)
current_users = self.get_current_user_count()
user_count = current_users + new_users
logger.debug(
"Current users: {current_users}, New users: {new_users}, Target users: {target_users}".format(
current_users=current_users, new_users=new_users, target_users=user_count
)
)
# Avoid illegal spawn_rate value of 0
spawn_rate = max(0.01, new_users)
return (user_count, spawn_rate)

self.runner.environment.stop_timeout = 0
return None
2 changes: 1 addition & 1 deletion evals/benchmark/stresscli/locust/poisson_load_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
@events.init_command_line_parser.add_listener
def _(parser):
parser.add_argument(
"--arrival-rate",
"--arrival_rate",
type=float,
default=1.0,
)
Expand Down
2 changes: 2 additions & 0 deletions evals/benchmark/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ def write_json(data, filename):
logging.error(f"Failed to write {filename}: {e}")
return False


from kubernetes import client, config


def get_service_cluster_ip(service_name, namespace="default"):
# Load the Kubernetes configuration
config.load_kube_config() # or use config.load_incluster_config() if running inside a Kubernetes pod
Expand Down