From e9d20c6624621cc87f034b7d724f01bc403206bf Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Tue, 17 Dec 2024 13:31:31 -0800 Subject: [PATCH 1/5] Fix misspelled fn names in aibs_informatics_aws_utils.efs.mount_point A number of private functions (thankfully) had misspelled names (`moint` -> `mount`). This commit corrects them. --- src/aibs_informatics_aws_utils/efs/mount_point.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/aibs_informatics_aws_utils/efs/mount_point.py b/src/aibs_informatics_aws_utils/efs/mount_point.py index 7651d8c..ec78d83 100644 --- a/src/aibs_informatics_aws_utils/efs/mount_point.py +++ b/src/aibs_informatics_aws_utils/efs/mount_point.py @@ -382,17 +382,17 @@ def detect_mount_points() -> List[MountPointConfiguration]: if batch_job_id := get_env_var("AWS_BATCH_JOB_ID"): logger.info(f"Detected Batch job {batch_job_id}") - batch_mp_configs = _detect_moint_points_from_batch_job(batch_job_id) + batch_mp_configs = _detect_mount_points_from_batch_job(batch_job_id) logger.info(f"Detected {len(batch_mp_configs)} EFS mount points from Batch") mount_points.extend(batch_mp_configs) elif lambda_function_name := get_env_var("AWS_LAMBDA_FUNCTION_NAME"): logger.info(f"Detected Lambda function {lambda_function_name}") - lambda_mp_configs = _detect_moint_points_from_lambda(lambda_function_name) + lambda_mp_configs = _detect_mount_points_from_lambda(lambda_function_name) logger.info(f"Detected {len(lambda_mp_configs)} EFS mount points from Lambda") mount_points.extend(lambda_mp_configs) else: logger.info("No Lambda or Batch environment detected. Using environment variables.") - env_mount_points = _detect_moint_points_from_env() + env_mount_points = _detect_mount_points_from_env() logger.info( f"Detected {len(env_mount_points)} EFS mount points from environment variables" ) @@ -441,7 +441,7 @@ def deduplicate_mount_points( # ------------------------------------ -def _detect_moint_points_from_lambda(lambda_function_name: str) -> List[MountPointConfiguration]: +def _detect_mount_points_from_lambda(lambda_function_name: str) -> List[MountPointConfiguration]: mount_points: List[MountPointConfiguration] = [] lambda_ = get_lambda_client() response = lambda_.get_function_configuration(FunctionName=lambda_function_name) @@ -458,7 +458,7 @@ def _detect_moint_points_from_lambda(lambda_function_name: str) -> List[MountPoi return _remove_invalid_mount_points(mount_points) -def _detect_moint_points_from_batch_job(batch_job_id: str) -> List[MountPointConfiguration]: +def _detect_mount_points_from_batch_job(batch_job_id: str) -> List[MountPointConfiguration]: mount_points: List[MountPointConfiguration] = [] batch = AWSService.BATCH.get_client() response = batch.describe_jobs(jobs=[batch_job_id]) @@ -496,7 +496,7 @@ def _detect_moint_points_from_batch_job(batch_job_id: str) -> List[MountPointCon return _remove_invalid_mount_points(mount_points) -def _detect_moint_points_from_env() -> List[MountPointConfiguration]: +def _detect_mount_points_from_env() -> List[MountPointConfiguration]: mount_points: List[MountPointConfiguration] = [] for k, v in os.environ.items(): From c1619a440bfa3a81cd0864e966deff09edf183b1 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Tue, 17 Dec 2024 13:33:37 -0800 Subject: [PATCH 2/5] Standardize how clients are instantiated The `_detect_mount_points_from_batch_job()` function is obtaining its client in a slightly different way compared to the lambda version (`_detect_mount_points_from_lambda()`). This commit just standardizes the client instantiation method so that the batch version matches the lambda version of the function. --- src/aibs_informatics_aws_utils/efs/mount_point.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aibs_informatics_aws_utils/efs/mount_point.py b/src/aibs_informatics_aws_utils/efs/mount_point.py index ec78d83..7439302 100644 --- a/src/aibs_informatics_aws_utils/efs/mount_point.py +++ b/src/aibs_informatics_aws_utils/efs/mount_point.py @@ -460,7 +460,7 @@ def _detect_mount_points_from_lambda(lambda_function_name: str) -> List[MountPoi def _detect_mount_points_from_batch_job(batch_job_id: str) -> List[MountPointConfiguration]: mount_points: List[MountPointConfiguration] = [] - batch = AWSService.BATCH.get_client() + batch = get_batch_client() response = batch.describe_jobs(jobs=[batch_job_id]) job_container = response.get("jobs", [{}])[0].get("container", {}) batch_mount_points = job_container.get("mountPoints") From c90ff2af84782a3be63a167b349a6dbf764da878 Mon Sep 17 00:00:00 2001 From: Nicholas Mei Date: Tue, 17 Dec 2024 13:36:48 -0800 Subject: [PATCH 3/5] Add a retry to `detect_mount_points()` function During a merscope pipeline analysis run, a dist-data-sync: Batch Data Sync Batch SubmitJob API call failed due to the following proximal causes: ``` File "/var/task/aibs_informatics_aws_utils/efs/mount_point.py", line 385, in detect_mount_points batch_mp_configs = _detect_moint_points_from_batch_job(batch_job_id) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/var/task/aibs_informatics_aws_utils/efs/mount_point.py", line 464, in _detect_moint_points_from_batch_job response = batch.describe_jobs(jobs=[batch_job_id]) ``` The last `batch.describe_jobs()` call eventually resulted in the error: `botocore.exceptions.NoCredentialsError: Unable to locate credentials` This commit tries to fix this by adding a retry to the `detect_mount_points()` function (which calls the `_detect_moint_points_from_batch_job() function) as well as a lambda version. This retry will only do so if a `NoCredentialsError` is encountered under the assumption that such an error is ephemeral. --- src/aibs_informatics_aws_utils/efs/mount_point.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/aibs_informatics_aws_utils/efs/mount_point.py b/src/aibs_informatics_aws_utils/efs/mount_point.py index 7439302..d63f8fd 100644 --- a/src/aibs_informatics_aws_utils/efs/mount_point.py +++ b/src/aibs_informatics_aws_utils/efs/mount_point.py @@ -16,8 +16,10 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Union from aibs_informatics_core.models.aws.efs import AccessPointId, EFSPath, FileSystemId +from aibs_informatics_core.utils.decorators import retry from aibs_informatics_core.utils.hashing import sha256_hexdigest from aibs_informatics_core.utils.os_operations import get_env_var +from botocore.exceptions import NoCredentialsError from aibs_informatics_aws_utils.constants.efs import ( EFS_MOUNT_POINT_ID_VAR, @@ -377,6 +379,7 @@ def __repr__(self) -> str: @cache +@retry(retryable_exceptions=(NoCredentialsError), tries=5, backoff=2.0) def detect_mount_points() -> List[MountPointConfiguration]: mount_points: List[MountPointConfiguration] = [] From be0631d1f983f1450baceb36f4ed6e1ec9f0c0f9 Mon Sep 17 00:00:00 2001 From: Ryan McGinty Date: Wed, 18 Dec 2024 13:23:27 -0800 Subject: [PATCH 4/5] fix moto patch/credential errors in py3.12 --- .../data_sync/test_file_system.py | 1 - test/aibs_informatics_aws_utils/efs/base.py | 19 ++++++++++++++-- .../efs/test_core.py | 1 - .../efs/test_mount_point.py | 22 +++++++++++++------ 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/test/aibs_informatics_aws_utils/data_sync/test_file_system.py b/test/aibs_informatics_aws_utils/data_sync/test_file_system.py index 044e360..a17c183 100644 --- a/test/aibs_informatics_aws_utils/data_sync/test_file_system.py +++ b/test/aibs_informatics_aws_utils/data_sync/test_file_system.py @@ -281,7 +281,6 @@ def assertLocalFileSystem_partition( self.assertSetEqual(expected_node_paths, local_node_paths) -@moto.mock_aws class EFSFileSystemTests(EFSTestsBase): def setUp(self) -> None: super().setUp() diff --git a/test/aibs_informatics_aws_utils/efs/base.py b/test/aibs_informatics_aws_utils/efs/base.py index 9d0e87f..50a2f11 100644 --- a/test/aibs_informatics_aws_utils/efs/base.py +++ b/test/aibs_informatics_aws_utils/efs/base.py @@ -4,21 +4,36 @@ import boto3 import moto +import moto.core +import moto.core.config +import moto.core.decorator class EFSTestsBase(AwsBaseTest): def setUp(self) -> None: super().setUp() - self.mock_efs = moto.mock_aws() + # HACK: We must define moto.mock_aws here instead of as a decorator because moto gives us + # issues when we try to use the moto.mock_aws decorator in the child classes + # (as https://github.com/getmoto/moto/issues/7063 suggests). + # We previously double decorated - decorating the child and parent class - but this + # now fails in python 3.12 (perhaps cleanup of mock collisions leniency). + # + # This is a workaround until we can find a better solution. If configs need to be + # overridden, they can be passed in via the mock_aws_config property on the child class. + self.mock_efs = moto.mock_aws(config=self.mock_aws_config) self.mock_efs.start() self.set_aws_credentials() self._file_store_name_id_map: Dict[str, str] = {} def tearDown(self) -> None: + super().tearDown() self.mock_efs.stop() - return super().tearDown() + + @property + def mock_aws_config(self) -> Optional[moto.core.config.DefaultConfig]: + return None @property def efs_client(self): diff --git a/test/aibs_informatics_aws_utils/efs/test_core.py b/test/aibs_informatics_aws_utils/efs/test_core.py index 7bc24d6..6d82869 100644 --- a/test/aibs_informatics_aws_utils/efs/test_core.py +++ b/test/aibs_informatics_aws_utils/efs/test_core.py @@ -10,7 +10,6 @@ ) -@moto.mock_aws class EFSTests(EFSTestsBase): def test__list_efs_file_systems__filters_based_on_tag(self): file_system_id1 = self.create_file_system("fs1", env="dev") diff --git a/test/aibs_informatics_aws_utils/efs/test_mount_point.py b/test/aibs_informatics_aws_utils/efs/test_mount_point.py index fde7229..0571f00 100644 --- a/test/aibs_informatics_aws_utils/efs/test_mount_point.py +++ b/test/aibs_informatics_aws_utils/efs/test_mount_point.py @@ -1,11 +1,11 @@ import json from pathlib import Path from test.aibs_informatics_aws_utils.efs.base import EFSTestsBase -from typing import Optional, Tuple, Union -from unittest import mock, skip +from typing import TYPE_CHECKING, Optional, Tuple, Union import boto3 import moto +from moto.core.config import DefaultConfig from aibs_informatics_aws_utils.constants.efs import ( EFS_MOUNT_POINT_ID_VAR, @@ -18,13 +18,21 @@ detect_mount_points, ) +if TYPE_CHECKING: + from mypy_boto3_lambda.type_defs import FileSystemConfigTypeDef +else: # pragma: no cover + FileSystemConfigTypeDef = dict + -@moto.mock_aws(config={"batch": {"use_docker": False}}) class MountPointConfigurationTests(EFSTestsBase): def setUp(self) -> None: super().setUp() detect_mount_points.cache_clear() + @property + def mock_aws_config(self) -> Optional[DefaultConfig]: + return {"batch": {"use_docker": False}, "core": {"reset_boto3_session": True}} + def setUpEFS(self, *access_points: Tuple[str, Path], file_system_name: Optional[str] = None): self.create_file_system(file_system_name) for access_point_name, access_point_path in access_points: @@ -136,13 +144,13 @@ def test__detect_mount_points__lambda_config_overrides(self): # Set up lambda lambda_client = boto3.client("lambda") - file_system_configs = [ + file_system_configs: list[FileSystemConfigTypeDef] = [ { - "Arn": (c1.access_point or {}).get("AccessPointArn"), # type: ignore, + "Arn": (c1.access_point or {}).get("AccessPointArn"), # type: ignore "LocalMountPath": c1.mount_point.as_posix(), }, { - "Arn": (c2.access_point or {}).get("AccessPointArn"), # type: ignore, + "Arn": (c2.access_point or {}).get("AccessPointArn"), # type: ignore "LocalMountPath": c2.mount_point.as_posix(), }, ] @@ -331,7 +339,7 @@ def test__detect_mount_points__batch_job_config_overrides(self): describe_job_response = batch_client.describe_jobs(jobs=[job_id]) with self.stub(batch_client) as batch_stubber: - describe_job_response["jobs"][0]["container"][ + describe_job_response["jobs"][0]["container"][ # type: ignore "mountPoints" ] = batch_mount_point_configs batch_stubber.add_response( From b4414dd0b5dee7f6297bbb51943bdee45e224224 Mon Sep 17 00:00:00 2001 From: Ryan McGinty Date: Wed, 18 Dec 2024 13:54:25 -0800 Subject: [PATCH 5/5] Add macOS compatibility handling for data sync test --- .../aibs_informatics_aws_utils/data_sync/test_operations.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/aibs_informatics_aws_utils/data_sync/test_operations.py b/test/aibs_informatics_aws_utils/data_sync/test_operations.py index ce46036..790f57c 100644 --- a/test/aibs_informatics_aws_utils/data_sync/test_operations.py +++ b/test/aibs_informatics_aws_utils/data_sync/test_operations.py @@ -1,3 +1,4 @@ +import sys from pathlib import Path from test.aibs_informatics_aws_utils.base import AwsBaseTest from typing import Optional, Union @@ -6,6 +7,7 @@ from aibs_informatics_core.models.aws.s3 import S3URI from aibs_informatics_core.models.data_sync import RemoteToLocalConfig from aibs_informatics_core.utils.os_operations import find_all_paths +from pytest import mark from aibs_informatics_aws_utils.data_sync.operations import sync_data from aibs_informatics_aws_utils.s3 import get_s3_client, get_s3_resource, is_object, list_s3_paths @@ -360,6 +362,10 @@ def test__sync_data__s3_to_local__file__does_not_exist(self): ) assert not destination_path.exists() + @mark.xfail( + sys.platform == "darwin", + reason="Test does not run on macOS (tmp dir is /private which is not accessible)", + ) def test__sync_data__s3_to_local__file__auto_custom_tmp_dir__succeeds(self): fs = self.setUpLocalFS() self.setUpBucket()