Skip to content

Commit

Permalink
Unit test for the LaunchInstancesError exception
Browse files Browse the repository at this point in the history
Signed-off-by: Luca Carrogu <[email protected]>
  • Loading branch information
lukeseawalker committed Oct 18, 2023
1 parent 19bb9d7 commit 8899511
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 40 deletions.
128 changes: 89 additions & 39 deletions tests/slurm_plugin/test_fleet_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,13 @@
import pytest
from assertpy import assert_that
from botocore.exceptions import ClientError
from slurm_plugin.fleet_manager import Ec2CreateFleetManager, EC2Instance, Ec2RunInstancesManager, FleetManagerFactory
from slurm_plugin.fleet_manager import (
Ec2CreateFleetManager,
EC2Instance,
Ec2RunInstancesManager,
FleetManagerFactory,
LaunchInstancesError,
)

from tests.common import FLEET_CONFIG, MockedBoto3Request

Expand Down Expand Up @@ -248,45 +254,48 @@ class TestEc2CreateFleetManager:
}

test_fleet_spot_params = {
"LaunchTemplateConfigs": [
{
"LaunchTemplateSpecification": {"LaunchTemplateName": "hit-queue1-fleet-spot", "Version": "$Latest"},
"Overrides": [
{"MaxPrice": "10", "InstanceType": "t2.medium", "SubnetId": "1234567"},
{"MaxPrice": "10", "InstanceType": "t2.large", "SubnetId": "1234567"},
],
}
],
"SpotOptions": {
"AllocationStrategy": "capacity-optimized",
"SingleInstanceType": False,
"SingleAvailabilityZone": True,
"MinTargetCapacity": 1,
},
"TargetCapacitySpecification": {"TotalTargetCapacity": 5, "DefaultTargetCapacityType": "spot"},
"Type": "instant",
}
"LaunchTemplateConfigs": [
{
"LaunchTemplateSpecification": {"LaunchTemplateName": "hit-queue1-fleet-spot", "Version": "$Latest"},
"Overrides": [
{"MaxPrice": "10", "InstanceType": "t2.medium", "SubnetId": "1234567"},
{"MaxPrice": "10", "InstanceType": "t2.large", "SubnetId": "1234567"},
],
}
],
"SpotOptions": {
"AllocationStrategy": "capacity-optimized",
"SingleInstanceType": False,
"SingleAvailabilityZone": True,
"MinTargetCapacity": 1,
},
"TargetCapacitySpecification": {"TotalTargetCapacity": 5, "DefaultTargetCapacityType": "spot"},
"Type": "instant",
}

test_on_demand_params = {
"LaunchTemplateConfigs": [
{
"LaunchTemplateSpecification": {"LaunchTemplateName": "hit-queue2-fleet-ondemand", "Version": "$Latest"},
"Overrides": [
{"InstanceType": "t2.medium", "SubnetId": "1234567"},
{"InstanceType": "t2.large", "SubnetId": "1234567"},
],
}
],
"OnDemandOptions": {
"AllocationStrategy": "lowest-price",
"SingleInstanceType": False,
"SingleAvailabilityZone": True,
"MinTargetCapacity": 1,
"CapacityReservationOptions": {"UsageStrategy": "use-capacity-reservations-first"},
},
"TargetCapacitySpecification": {"TotalTargetCapacity": 5, "DefaultTargetCapacityType": "on-demand"},
"Type": "instant",
}
"LaunchTemplateConfigs": [
{
"LaunchTemplateSpecification": {
"LaunchTemplateName": "hit-queue2-fleet-ondemand",
"Version": "$Latest",
},
"Overrides": [
{"InstanceType": "t2.medium", "SubnetId": "1234567"},
{"InstanceType": "t2.large", "SubnetId": "1234567"},
],
}
],
"OnDemandOptions": {
"AllocationStrategy": "lowest-price",
"SingleInstanceType": False,
"SingleAvailabilityZone": True,
"MinTargetCapacity": 1,
"CapacityReservationOptions": {"UsageStrategy": "use-capacity-reservations-first"},
},
"TargetCapacitySpecification": {"TotalTargetCapacity": 5, "DefaultTargetCapacityType": "on-demand"},
"Type": "instant",
}

@pytest.mark.parametrize(
(
Expand Down Expand Up @@ -594,8 +603,45 @@ def test_evaluate_launch_params(
}
],
),
# create-fleet - throttling
(
test_on_demand_params,
[
MockedBoto3Request(
method="create_fleet",
response={
"Instances": [],
"Errors": [
{"ErrorCode": "RequestLimitExceeded", "ErrorMessage": "Request limit exceeded."}
],
"ResponseMetadata": {"RequestId": "37633199-bcc6-4a88-89e3-89d859d76096"},
},
expected_params=test_on_demand_params,
),
],
[],
),
# create-fleet - multiple errors
(
test_on_demand_params,
[
MockedBoto3Request(
method="create_fleet",
response={
"Instances": [],
"Errors": [
{"ErrorCode": "RequestLimitExceeded", "ErrorMessage": "Request limit exceeded."},
{"ErrorCode": "AnotherError", "ErrorMessage": "Something went wrong"},
],
"ResponseMetadata": {"RequestId": "37633199-bcc6-4a88-89e3-89d859d76096"},
},
expected_params=test_on_demand_params,
),
],
[],
),
],
ids=["fleet_spot", "fleet_exception", "fleet_ondemand"],
ids=["fleet_spot", "fleet_exception", "fleet_ondemand", "fleet_throttling", "fleet_multiple_errors"],
)
def test_launch_instances(
self,
Expand All @@ -617,6 +663,10 @@ def test_launch_instances(
with pytest.raises(Exception) as e:
fleet_manager._launch_instances(launch_params)
assert isinstance(e, ClientError)
elif len(expected_assigned_nodes) == 0 and len(mocked_boto3_request[0].response.get("Errors")) == 1:
with pytest.raises(LaunchInstancesError) as e:
fleet_manager._launch_instances(launch_params)
assert isinstance(e, LaunchInstancesError)
else:
assigned_nodes = fleet_manager._launch_instances(launch_params)
assert_that(assigned_nodes.get("Instances", [])).is_equal_to(expected_assigned_nodes)
Expand Down
72 changes: 71 additions & 1 deletion tests/slurm_plugin/test_instance_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import slurm_plugin
from assertpy import assert_that
from slurm_plugin.common import ScalingStrategy
from slurm_plugin.fleet_manager import EC2Instance
from slurm_plugin.fleet_manager import EC2Instance, LaunchInstancesError
from slurm_plugin.instance_manager import (
HostnameDnsStoreError,
InstanceManager,
Expand Down Expand Up @@ -3252,6 +3252,76 @@ def test_node_assignment_by_scaling_strategy(
{},
{"InsufficientInstanceCapacity": {"queue4-st-c5xlarge-1"}},
),
(
None,
{
"queue1": {"c52xlarge": ["queue1-st-c52xlarge-1"]},
"queue2": {"c5xlarge": ["queue2-st-c5xlarge-1"]},
"queue3": {"p4d24xlarge": ["queue3-st-p4d24xlarge-1"]},
},
15,
{},
[
{
"Instances": [
{
"InstanceId": "i-12345",
"InstanceType": "c5.2xlarge",
"PrivateIpAddress": "ip.1.0.0.1",
"PrivateDnsName": "ip-1-0-0-1",
"LaunchTime": datetime(2020, 1, 1, tzinfo=timezone.utc),
"NetworkInterfaces": [
{
"Attachment": {
"DeviceIndex": 0,
"NetworkCardIndex": 0,
},
"PrivateIpAddress": "ip.1.0.0.1",
},
],
}
],
},
LaunchInstancesError("throttling", "got throttled"),
{
"Instances": [
{
"InstanceId": "i-123456",
"InstanceType": "p4d24xlarge",
"PrivateIpAddress": "ip.1.0.0.2",
"PrivateDnsName": "ip-1-0-0-2",
"LaunchTime": datetime(2020, 1, 1, tzinfo=timezone.utc),
"NetworkInterfaces": [
{
"Attachment": {
"DeviceIndex": 0,
"NetworkCardIndex": 0,
},
"PrivateIpAddress": "ip.1.0.0.2",
},
],
}
],
},
],
{
"queue1": {
"c52xlarge": [
EC2Instance(
"i-12345", "ip.1.0.0.1", "ip-1-0-0-1", datetime(2020, 1, 1, tzinfo=timezone.utc)
)
]
},
"queue3": {
"p4d24xlarge": [
EC2Instance(
"i-123456", "ip.1.0.0.2", "ip-1-0-0-2", datetime(2020, 1, 1, tzinfo=timezone.utc)
)
]
},
},
{"throttling": {"queue2-st-c5xlarge-1"}},
),
],
)
def test_launch_instances(
Expand Down

0 comments on commit 8899511

Please sign in to comment.