Skip to content

Commit

Permalink
Merge branch 'main' into chain-choice-states
Browse files Browse the repository at this point in the history
  • Loading branch information
ca-nguyen authored May 21, 2021
2 parents 851023f + 4f90ba3 commit 2c72de7
Show file tree
Hide file tree
Showing 14 changed files with 560 additions and 56 deletions.
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ Before sending us a pull request, please ensure that:
### Running the Unit Tests

1. Install tox using `pip install tox`
1. Install test dependencies, including coverage, using `pip install .[test]`
1. cd into the aws-step-functions-data-science-sdk-python folder: `cd aws-step-functions-data-science-sdk-python` or `cd /environment/aws-step-functions-data-science-sdk-python`
1. Install test dependencies, including coverage, using `pip install ".[test]"`
1. Run the following tox command and verify that all code checks and unit tests pass: `tox tests/unit`

You can also run a single test with the following command: `tox -e py36 -- -s -vv <path_to_file><file_name>::<test_function_name>`
Expand All @@ -80,7 +80,7 @@ You should only worry about manually running any new integration tests that you

1. Create a new git branch:
```shell
git checkout -b my-fix-branch master
git checkout -b my-fix-branch
```
1. Make your changes, **including unit tests** and, if appropriate, integration tests.
1. Include unit tests when you contribute new features or make bug fixes, as they help to:
Expand Down
84 changes: 76 additions & 8 deletions src/stepfunctions/steps/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,31 @@
# permissions and limitations under the License.
from __future__ import absolute_import

from enum import Enum
from stepfunctions.steps.states import Task
from stepfunctions.steps.fields import Field
from stepfunctions.steps.integration_resources import IntegrationPattern, get_service_integration_arn

LAMBDA_SERVICE_NAME = "lambda"
GLUE_SERVICE_NAME = "glue"
ECS_SERVICE_NAME = "ecs"
BATCH_SERVICE_NAME = "batch"


class LambdaApi(Enum):
Invoke = "invoke"


class GlueApi(Enum):
StartJobRun = "startJobRun"


class EcsApi(Enum):
RunTask = "runTask"


class BatchApi(Enum):
SubmitJob = "submitJob"


class LambdaStep(Task):
Expand All @@ -37,10 +60,22 @@ def __init__(self, state_id, wait_for_callback=False, **kwargs):
result_path (str, optional): Path specifying the raw input’s combination with or replacement by the state’s result. (default: '$')
output_path (str, optional): Path applied to the state’s output after the application of `result_path`, producing the effective output which serves as the raw input for the next state. (default: '$')
"""

if wait_for_callback:
kwargs[Field.Resource.value] = 'arn:aws:states:::lambda:invoke.waitForTaskToken'
"""
Example resource arn: arn:aws:states:::lambda:invoke.waitForTaskToken
"""

kwargs[Field.Resource.value] = get_service_integration_arn(LAMBDA_SERVICE_NAME,
LambdaApi.Invoke,
IntegrationPattern.WaitForTaskToken)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::lambda:invoke'
"""
Example resource arn: arn:aws:states:::lambda:invoke
"""

kwargs[Field.Resource.value] = get_service_integration_arn(LAMBDA_SERVICE_NAME, LambdaApi.Invoke)


super(LambdaStep, self).__init__(state_id, **kwargs)

Expand All @@ -67,9 +102,20 @@ def __init__(self, state_id, wait_for_completion=True, **kwargs):
output_path (str, optional): Path applied to the state’s output after the application of `result_path`, producing the effective output which serves as the raw input for the next state. (default: '$')
"""
if wait_for_completion:
kwargs[Field.Resource.value] = 'arn:aws:states:::glue:startJobRun.sync'
"""
Example resource arn: arn:aws:states:::glue:startJobRun.sync
"""

kwargs[Field.Resource.value] = get_service_integration_arn(GLUE_SERVICE_NAME,
GlueApi.StartJobRun,
IntegrationPattern.WaitForCompletion)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::glue:startJobRun'
"""
Example resource arn: arn:aws:states:::glue:startJobRun
"""

kwargs[Field.Resource.value] = get_service_integration_arn(GLUE_SERVICE_NAME,
GlueApi.StartJobRun)

super(GlueStartJobRunStep, self).__init__(state_id, **kwargs)

Expand All @@ -96,9 +142,20 @@ def __init__(self, state_id, wait_for_completion=True, **kwargs):
output_path (str, optional): Path applied to the state’s output after the application of `result_path`, producing the effective output which serves as the raw input for the next state. (default: '$')
"""
if wait_for_completion:
kwargs[Field.Resource.value] = 'arn:aws:states:::batch:submitJob.sync'
"""
Example resource arn: arn:aws:states:::batch:submitJob.sync
"""

kwargs[Field.Resource.value] = get_service_integration_arn(BATCH_SERVICE_NAME,
BatchApi.SubmitJob,
IntegrationPattern.WaitForCompletion)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::batch:submitJob'
"""
Example resource arn: arn:aws:states:::batch:submitJob
"""

kwargs[Field.Resource.value] = get_service_integration_arn(BATCH_SERVICE_NAME,
BatchApi.SubmitJob)

super(BatchSubmitJobStep, self).__init__(state_id, **kwargs)

Expand All @@ -125,8 +182,19 @@ def __init__(self, state_id, wait_for_completion=True, **kwargs):
output_path (str, optional): Path applied to the state’s output after the application of `result_path`, producing the effective output which serves as the raw input for the next state. (default: '$')
"""
if wait_for_completion:
kwargs[Field.Resource.value] = 'arn:aws:states:::ecs:runTask.sync'
"""
Example resource arn: arn:aws:states:::ecs:runTask.sync
"""

kwargs[Field.Resource.value] = get_service_integration_arn(ECS_SERVICE_NAME,
EcsApi.RunTask,
IntegrationPattern.WaitForCompletion)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::ecs:runTask'
"""
Example resource arn: arn:aws:states:::ecs:runTask
"""

kwargs[Field.Resource.value] = get_service_integration_arn(ECS_SERVICE_NAME,
EcsApi.RunTask)

super(EcsRunTaskStep, self).__init__(state_id, **kwargs)
46 changes: 46 additions & 0 deletions src/stepfunctions/steps/integration_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License").
# You may not use this file except in compliance with the License.
# A copy of the License is located at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# or in the "license" file accompanying this file. This file is distributed
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

from __future__ import absolute_import

from enum import Enum
from stepfunctions.steps.utils import get_aws_partition


class IntegrationPattern(Enum):
"""
Integration pattern enum classes for task integration resource arn builder
"""

WaitForTaskToken = "waitForTaskToken"
WaitForCompletion = "sync"
RequestResponse = ""


def get_service_integration_arn(service, api, integration_pattern=IntegrationPattern.RequestResponse):

"""
ARN builder for task integration
Args:
service (str): The service name for the service integration
api (str): The api of the service integration
integration_pattern (IntegrationPattern, optional): The integration pattern for the task. (Default: IntegrationPattern.RequestResponse)
"""
arn = ""
if integration_pattern == IntegrationPattern.RequestResponse:
arn = f"arn:{get_aws_partition()}:states:::{service}:{api.value}"
else:
arn = f"arn:{get_aws_partition()}:states:::{service}:{api.value}.{integration_pattern.value}"
return arn


108 changes: 95 additions & 13 deletions src/stepfunctions/steps/sagemaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,31 @@
# permissions and limitations under the License.
from __future__ import absolute_import

from enum import Enum
from stepfunctions.inputs import ExecutionInput, StepInput
from stepfunctions.steps.states import Task
from stepfunctions.steps.fields import Field
from stepfunctions.steps.utils import tags_dict_to_kv_list
from stepfunctions.steps.integration_resources import IntegrationPattern, get_service_integration_arn

from sagemaker.workflow.airflow import training_config, transform_config, model_config, tuning_config, processing_config
from sagemaker.model import Model, FrameworkModel
from sagemaker.model_monitor import DataCaptureConfig

SAGEMAKER_SERVICE_NAME = "sagemaker"


class SageMakerApi(Enum):
CreateTrainingJob = "createTrainingJob"
CreateTransformJob = "createTransformJob"
CreateModel = "createModel"
CreateEndpointConfig = "createEndpointConfig"
UpdateEndpoint = "updateEndpoint"
CreateEndpoint = "createEndpoint"
CreateHyperParameterTuningJob = "createHyperParameterTuningJob"
CreateProcessingJob = "createProcessingJob"


class TrainingStep(Task):

"""
Expand Down Expand Up @@ -58,9 +74,20 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non
self.job_name = job_name

if wait_for_completion:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createTrainingJob.sync'
"""
Example resource arn: arn:aws:states:::sagemaker:createTrainingJob.sync
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateTrainingJob,
IntegrationPattern.WaitForCompletion)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createTrainingJob'
"""
Example resource arn: arn:aws:states:::sagemaker:createTrainingJob
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateTrainingJob)

if isinstance(job_name, str):
parameters = training_config(estimator=estimator, inputs=data, job_name=job_name, mini_batch_size=mini_batch_size)
Expand Down Expand Up @@ -141,9 +168,20 @@ def __init__(self, state_id, transformer, job_name, model_name, data, data_type=
join_source (str): The source of data to be joined to the transform output. It can be set to ‘Input’ meaning the entire input record will be joined to the inference result. You can use OutputFilter to select the useful portion before uploading to S3. (default: None). Valid values: Input, None.
"""
if wait_for_completion:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createTransformJob.sync'
"""
Example resource arn: arn:aws:states:::sagemaker:createTransformJob.sync
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateTransformJob,
IntegrationPattern.WaitForCompletion)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createTransformJob'
"""
Example resource arn: arn:aws:states:::sagemaker:createTransformJob
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateTransformJob)

if isinstance(job_name, str):
parameters = transform_config(
Expand Down Expand Up @@ -225,7 +263,13 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No
parameters['Tags'] = tags_dict_to_kv_list(tags)

kwargs[Field.Parameters.value] = parameters
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createModel'

"""
Example resource arn: arn:aws:states:::sagemaker:createModel
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateModel)

super(ModelStep, self).__init__(state_id, **kwargs)

Expand Down Expand Up @@ -266,7 +310,13 @@ def __init__(self, state_id, endpoint_config_name, model_name, initial_instance_
if tags:
parameters['Tags'] = tags_dict_to_kv_list(tags)

kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createEndpointConfig'
"""
Example resource arn: arn:aws:states:::sagemaker:createEndpointConfig
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateEndpointConfig)

kwargs[Field.Parameters.value] = parameters

super(EndpointConfigStep, self).__init__(state_id, **kwargs)
Expand Down Expand Up @@ -298,9 +348,19 @@ def __init__(self, state_id, endpoint_name, endpoint_config_name, tags=None, upd
parameters['Tags'] = tags_dict_to_kv_list(tags)

if update:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:updateEndpoint'
"""
Example resource arn: arn:aws:states:::sagemaker:updateEndpoint
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.UpdateEndpoint)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createEndpoint'
"""
Example resource arn: arn:aws:states:::sagemaker:createEndpoint
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateEndpoint)

kwargs[Field.Parameters.value] = parameters

Expand Down Expand Up @@ -338,9 +398,20 @@ def __init__(self, state_id, tuner, job_name, data, wait_for_completion=True, ta
tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
"""
if wait_for_completion:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createHyperParameterTuningJob.sync'
"""
Example resource arn: arn:aws:states:::sagemaker:createHyperParameterTuningJob.sync
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateHyperParameterTuningJob,
IntegrationPattern.WaitForCompletion)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createHyperParameterTuningJob'
"""
Example resource arn: arn:aws:states:::sagemaker:createHyperParameterTuningJob
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateHyperParameterTuningJob)

parameters = tuning_config(tuner=tuner, inputs=data, job_name=job_name).copy()

Expand Down Expand Up @@ -387,10 +458,21 @@ def __init__(self, state_id, processor, job_name, inputs=None, outputs=None, exp
tags (list[dict], optional): `List to tags <https://docs.aws.amazon.com/sagemaker/latest/dg/API_Tag.html>`_ to associate with the resource.
"""
if wait_for_completion:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createProcessingJob.sync'
"""
Example resource arn: arn:aws:states:::sagemaker:createProcessingJob.sync
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateProcessingJob,
IntegrationPattern.WaitForCompletion)
else:
kwargs[Field.Resource.value] = 'arn:aws:states:::sagemaker:createProcessingJob'

"""
Example resource arn: arn:aws:states:::sagemaker:createProcessingJob
"""

kwargs[Field.Resource.value] = get_service_integration_arn(SAGEMAKER_SERVICE_NAME,
SageMakerApi.CreateProcessingJob)

if isinstance(job_name, str):
parameters = processing_config(processor=processor, inputs=inputs, outputs=outputs, container_arguments=container_arguments, container_entrypoint=container_entrypoint, kms_key_id=kms_key_id, job_name=job_name)
else:
Expand Down
Loading

0 comments on commit 2c72de7

Please sign in to comment.