Skip to content

Commit

Permalink
Fix DataSyncRequest model
Browse files Browse the repository at this point in the history
The previous version of this model returned `config` and `task`
properties that did not follow typing properly.

This commit attempts to fix things by making the `config` and `task`
properties return the correct "types". It also adds in missing
fields in the `DataSyncConfig` model.

This commit also adds a new field to the `DataSyncConfig` model
called `remote_to_local_config` which allows more fine-grained
controlled over remote (S3) to local filesystem transfers. Specifically,
the temporary location where data sync operations download individual objects.
  • Loading branch information
njmei committed Nov 7, 2024
1 parent 2e2e847 commit 47ab7ec
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 4 deletions.
37 changes: 35 additions & 2 deletions src/aibs_informatics_core/models/data_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,23 @@ class DataSyncTask(SchemaModel):
)


@dataclass
class RemoteToLocalConfig(SchemaModel):
# Use a custom intermediate tmp dir when syncing an s3 object to a local filesystem
# instead of using boto3's implementation which creates a part file (e.g. *.6eF5b5da)
# in SAME parent dir as the desired destination path.
use_custom_tmp_dir: bool = custom_field(default=False, mm_field=BooleanField())
custom_tmp_dir: Optional[Union[EFSPath, Path]] = custom_field(
default=None,
mm_field=UnionField(
[
(EFSPath, EFSPath.as_mm_field()),
((Path, str), PathField()),
]
),
)


@dataclass
class DataSyncConfig(SchemaModel):
max_concurrency: int = custom_field(default=25, mm_field=IntegerField())
Expand All @@ -104,17 +121,33 @@ class DataSyncConfig(SchemaModel):
force: bool = custom_field(default=False, mm_field=BooleanField())
size_only: bool = custom_field(default=False, mm_field=BooleanField())
fail_if_missing: bool = custom_field(default=True, mm_field=BooleanField())
remote_to_local_config: RemoteToLocalConfig = custom_field(
default=RemoteToLocalConfig(),
mm_field=RemoteToLocalConfig.as_mm_field(),
)


@dataclass
class DataSyncRequest(DataSyncConfig, DataSyncTask): # type: ignore[misc]
@property
def config(self) -> DataSyncConfig:
return self
return DataSyncConfig(
max_concurrency=self.max_concurrency,
retain_source_data=self.retain_source_data,
require_lock=self.require_lock,
force=self.force,
size_only=self.size_only,
fail_if_missing=self.fail_if_missing,
remote_to_local_config=self.remote_to_local_config,
)

@property
def task(self) -> DataSyncTask:
return self
return DataSyncTask(
source_path=self.source_path,
destination_path=self.destination_path,
source_path_prefix=self.source_path_prefix,
)


@dataclass
Expand Down
10 changes: 8 additions & 2 deletions test/aibs_informatics_core/models/test_data_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,11 @@ def test__DataSyncRequest__properties():
destination_path=S3_URI,
retain_source_data=True,
)
assert request.config == request
assert request.task == request
assert request.config == DataSyncConfig(retain_source_data=True)
assert request.task == DataSyncTask(
source_path=S3_URI,
destination_path=S3_URI,
)


def test__BatchDataSyncRequest__from_dict():
Expand Down Expand Up @@ -144,6 +147,7 @@ def test__BatchDataSyncRequest__to_dict():
"force": False,
"size_only": False,
"retain_source_data": True,
"use_custom_object_download_tmp": True,
},
],
}
Expand Down Expand Up @@ -189,6 +193,7 @@ def test__PrepareBatchDataSyncResponse__to_dict():
"force": False,
"size_only": False,
"retain_source_data": True,
"use_custom_object_download_tmp": True,
},
],
},
Expand Down Expand Up @@ -240,6 +245,7 @@ def test__PrepareBatchDataSyncResponse__from_dict():
"force": False,
"size_only": False,
"retain_source_data": True,
"use_custom_object_download_tmp": True,
},
],
},
Expand Down

0 comments on commit 47ab7ec

Please sign in to comment.