From 3d97d9ec47c811f349a3c8ebe34aab6779d106b2 Mon Sep 17 00:00:00 2001 From: Adriana Lopez Lopez <71252798+dlpzx@users.noreply.github.com> Date: Tue, 10 Dec 2024 08:16:58 +0100 Subject: [PATCH] Consistent get_ permissions - S3_Datasets (#1727) ### Feature or Bugfix - Feature/Bugfix ### Detail This PR is the first part of a series and only handles S3_Datasets and its child components Tables and Folders #### Current implementation Most API calls on a particular resource are restricted by GET_RESOURCE permissions. But for resources that are indexed in the Catalog, some metadata is considered public as it is useful for data consumers to discover and understand the data assets. Users will click on these resources from the Catalog view and call one of the following API calls: - getDataset - getDatasetStorageLocation - getDatasetTable - getDashboard - getRedshiftDataset - getRedshiftTable From the above list, initially all are decorated with resource_permission checks except for getDataset and getDatasetTable. #### Target implementation - Public information should be available for data consumers to explore, that means that we first need to remove the resource_permission checks from the list of APIs. - Not all information is public, to get AWS information and other restricted metadata we still need to verify GET_X permissions on the resource. - For restricted metadata, we should provide default messages that do not break the frontend In addition in this PR some unused fields are removed and `syncTables` is simplified to return an integer with the count of synced tables ### Relates ### Testing For each of the following I tested with a user with GET permissions and without GET permissions. FE views render and show information or unauthorized to view info placeholder - [X] Dataset View, Dataset Edit Form and list Datasets - [x] Dataset Data tab with list of tables and folders - [X] Table view, Table Edit - [X] Folder view and Folder Edit Other checks - [x] Complete share request - [X] With requester check table and folder and view the details of the account... - [X] Worksheets query with owned table - [X] Worksheets query with shared table - [X] Crawl dataset - correct S3 path - [X] Sync tables - correct params ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../s3_datasets/api/dataset/resolvers.py | 6 ++ .../modules/s3_datasets/api/dataset/types.py | 45 +++++----- .../api/storage_location/resolvers.py | 6 ++ .../s3_datasets/api/storage_location/types.py | 48 +++-------- .../s3_datasets/api/table/mutations.py | 2 +- .../s3_datasets/api/table/resolvers.py | 6 ++ .../modules/s3_datasets/api/table/types.py | 17 +++- .../services/dataset_location_service.py | 10 ++- .../s3_datasets/services/dataset_service.py | 9 +- .../services/dataset_table_service.py | 12 +-- .../Folders/components/FolderOverview.js | 6 +- .../Folders/components/FolderS3Properties.js | 8 +- .../services/getDatasetStorageLocation.js | 10 ++- .../components/DatasetConsoleAccess.js | 15 ++-- .../S3_Datasets/components/DatasetFolders.js | 7 +- .../S3_Datasets/components/DatasetOverview.js | 4 +- .../components/DatasetStartCrawlerModal.js | 2 +- .../S3_Datasets/components/DatasetTables.js | 18 ++-- .../S3_Datasets/components/DatasetUpload.js | 2 +- .../services/listDatasetStorageLocations.js | 3 + .../S3_Datasets/services/startGlueCrawler.js | 2 - .../S3_Datasets/services/syncTables.js | 20 +---- .../S3_Datasets/views/DatasetEditForm.js | 2 +- .../Tables/services/getDatasetTable.js | 11 +-- .../src/modules/Tables/views/TableView.js | 6 +- .../modules/Worksheets/views/WorksheetView.js | 12 ++- .../services/graphql/Datasets/getDataset.js | 16 ++-- .../graphql/Datasets/listDatasetTables.js | 9 +- .../Datasets/listS3DatasetsOwnedByEnvGroup.js | 10 ++- .../utils/helpers/emptyPrintUnauthorized.js | 5 ++ frontend/src/utils/helpers/index.js | 1 + tests/modules/s3_datasets/conftest.py | 29 +++---- tests/modules/s3_datasets/test_dataset.py | 82 +++++++++---------- .../s3_datasets/test_dataset_glossary.py | 6 +- .../s3_datasets/test_dataset_location.py | 8 +- .../modules/s3_datasets/test_dataset_table.py | 8 +- tests/modules/s3_datasets_shares/conftest.py | 22 +++-- tests/permissions.py | 17 ++-- .../modules/catalog/conftest.py | 4 +- .../modules/s3_datasets/global_conftest.py | 24 ++++-- .../modules/s3_datasets/queries.py | 72 ++++++---------- .../modules/s3_datasets/test_s3_dataset.py | 4 +- .../modules/s3_datasets/test_s3_tables.py | 2 +- 43 files changed, 317 insertions(+), 291 deletions(-) create mode 100644 frontend/src/utils/helpers/emptyPrintUnauthorized.js diff --git a/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py b/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py index ef8acdbb7..db2fad2df 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py @@ -112,6 +112,12 @@ def get_dataset_statistics(context: Context, source: S3Dataset, **kwargs): return DatasetService.get_dataset_statistics(source) +def get_dataset_restricted_information(context: Context, source: S3Dataset, **kwargs): + if not source: + return None + return DatasetService.get_dataset_restricted_information(uri=source.datasetUri, dataset=source) + + @is_feature_enabled('modules.s3_datasets.features.aws_actions') def get_dataset_assume_role_url(context: Context, source, datasetUri: str = None): return DatasetService.get_dataset_assume_role_url(uri=datasetUri) diff --git a/backend/dataall/modules/s3_datasets/api/dataset/types.py b/backend/dataall/modules/s3_datasets/api/dataset/types.py index 2b257bb19..cc2e88139 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/types.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/types.py @@ -11,6 +11,7 @@ get_dataset_statistics, get_dataset_glossary_terms, resolve_dataset_stack, + get_dataset_restricted_information, ) from dataall.core.environment.api.enums import EnvironmentPermission @@ -23,6 +24,22 @@ ], ) +DatasetRestrictedInformation = gql.ObjectType( + name='DatasetRestrictedInformation', + fields=[ + gql.Field(name='AwsAccountId', type=gql.String), + gql.Field(name='region', type=gql.String), + gql.Field(name='S3BucketName', type=gql.String), + gql.Field(name='GlueDatabaseName', type=gql.String), + gql.Field(name='IAMDatasetAdminRoleArn', type=gql.String), + gql.Field(name='KmsAlias', type=gql.String), + gql.Field(name='importedS3Bucket', type=gql.Boolean), + gql.Field(name='importedGlueDatabase', type=gql.Boolean), + gql.Field(name='importedKmsKey', type=gql.Boolean), + gql.Field(name='importedAdminRole', type=gql.Boolean), + ], +) + Dataset = gql.ObjectType( name='Dataset', fields=[ @@ -35,29 +52,13 @@ gql.Field(name='created', type=gql.String), gql.Field(name='updated', type=gql.String), gql.Field(name='admins', type=gql.ArrayType(gql.String)), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='region', type=gql.String), - gql.Field(name='S3BucketName', type=gql.String), - gql.Field(name='GlueDatabaseName', type=gql.String), - gql.Field(name='GlueCrawlerName', type=gql.String), - gql.Field(name='GlueCrawlerSchedule', type=gql.String), - gql.Field(name='GlueProfilingJobName', type=gql.String), - gql.Field(name='GlueProfilingTriggerSchedule', type=gql.String), - gql.Field(name='IAMDatasetAdminRoleArn', type=gql.String), - gql.Field(name='KmsAlias', type=gql.String), - gql.Field(name='bucketCreated', type=gql.Boolean), - gql.Field(name='glueDatabaseCreated', type=gql.Boolean), - gql.Field(name='iamAdminRoleCreated', type=gql.Boolean), - gql.Field(name='lakeformationLocationCreated', type=gql.Boolean), - gql.Field(name='bucketPolicyCreated', type=gql.Boolean), gql.Field(name='SamlAdminGroupName', type=gql.String), - gql.Field(name='businessOwnerEmail', type=gql.String), - gql.Field(name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String)), - gql.Field(name='importedS3Bucket', type=gql.Boolean), - gql.Field(name='importedGlueDatabase', type=gql.Boolean), - gql.Field(name='importedKmsKey', type=gql.Boolean), - gql.Field(name='importedAdminRole', type=gql.Boolean), gql.Field(name='imported', type=gql.Boolean), + gql.Field( + name='restricted', + type=DatasetRestrictedInformation, + resolver=get_dataset_restricted_information, + ), gql.Field( name='environment', type=gql.Ref('EnvironmentSimplified'), @@ -130,8 +131,6 @@ name='GlueCrawler', fields=[ gql.Field(name='Name', type=gql.ID), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='region', type=gql.String), gql.Field(name='status', type=gql.String), ], ) diff --git a/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py b/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py index 39aa2d8cf..928d0d4f8 100644 --- a/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/storage_location/resolvers.py @@ -50,6 +50,12 @@ def resolve_dataset(context, source: DatasetStorageLocation, **kwargs): return DatasetService.find_dataset(uri=source.datasetUri) +def get_folder_restricted_information(context: Context, source: DatasetStorageLocation, **kwargs): + if not source: + return None + return DatasetLocationService.get_folder_restricted_information(uri=source.locationUri, folder=source) + + def resolve_glossary_terms(context: Context, source: DatasetStorageLocation, **kwargs): if not source: return None diff --git a/backend/dataall/modules/s3_datasets/api/storage_location/types.py b/backend/dataall/modules/s3_datasets/api/storage_location/types.py index 40070a287..14db04c06 100644 --- a/backend/dataall/modules/s3_datasets/api/storage_location/types.py +++ b/backend/dataall/modules/s3_datasets/api/storage_location/types.py @@ -1,5 +1,9 @@ from dataall.base.api import gql -from dataall.modules.s3_datasets.api.storage_location.resolvers import resolve_glossary_terms, resolve_dataset +from dataall.modules.s3_datasets.api.storage_location.resolvers import ( + resolve_glossary_terms, + resolve_dataset, + get_folder_restricted_information, +) DatasetStorageLocation = gql.ObjectType( name='DatasetStorageLocation', @@ -11,13 +15,15 @@ gql.Field(name='owner', type=gql.String), gql.Field(name='created', type=gql.String), gql.Field(name='updated', type=gql.String), - gql.Field(name='region', type=gql.String), gql.Field(name='tags', type=gql.ArrayType(gql.String)), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='S3BucketName', type=gql.String), gql.Field(name='S3Prefix', type=gql.String), gql.Field(name='locationCreated', type=gql.Boolean), gql.Field(name='dataset', type=gql.Ref('Dataset'), resolver=resolve_dataset), + gql.Field( + name='restricted', + type=gql.Ref('DatasetRestrictedInformation'), + resolver=get_folder_restricted_information, + ), gql.Field(name='userRoleForStorageLocation', type=gql.Ref('DatasetRole')), gql.Field(name='environmentEndPoint', type=gql.String), gql.Field( @@ -40,37 +46,3 @@ gql.Field(name='hasPrevious', type=gql.Boolean), ], ) - - -DatasetAccessPoint = gql.ObjectType( - name='DatasetAccessPoint', - fields=[ - gql.Field(name='accessPointUri', type=gql.ID), - gql.Field(name='location', type=DatasetStorageLocation), - gql.Field(name='dataset', type=gql.Ref('Dataset')), - gql.Field(name='name', type=gql.String), - gql.Field(name='description', type=gql.String), - gql.Field(name='owner', type=gql.String), - gql.Field(name='created', type=gql.String), - gql.Field(name='updated', type=gql.String), - gql.Field(name='region', type=gql.String), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='S3BucketName', type=gql.String), - gql.Field(name='S3Prefix', type=gql.String), - gql.Field(name='S3AccessPointName', type=gql.String), - ], -) - - -DatasetAccessPointSearchResult = gql.ObjectType( - name='DatasetAccessPointSearchResult', - fields=[ - gql.Field(name='count', type=gql.Integer), - gql.Field(name='page', type=gql.Integer), - gql.Field(name='pageSize', type=gql.Integer), - gql.Field(name='pages', type=gql.Integer), - gql.Field(name='hasNext', type=gql.Integer), - gql.Field(name='hasPrevious', type=gql.Integer), - gql.Field(name='nodes', type=gql.ArrayType(DatasetAccessPoint)), - ], -) diff --git a/backend/dataall/modules/s3_datasets/api/table/mutations.py b/backend/dataall/modules/s3_datasets/api/table/mutations.py index 9c846ebca..386a6fa0a 100644 --- a/backend/dataall/modules/s3_datasets/api/table/mutations.py +++ b/backend/dataall/modules/s3_datasets/api/table/mutations.py @@ -28,7 +28,7 @@ syncTables = gql.MutationField( name='syncTables', args=[gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String))], - type=gql.Ref('DatasetTableSearchResult'), + type=gql.Integer, resolver=sync_tables, ) diff --git a/backend/dataall/modules/s3_datasets/api/table/resolvers.py b/backend/dataall/modules/s3_datasets/api/table/resolvers.py index b88c62405..63d0b0299 100644 --- a/backend/dataall/modules/s3_datasets/api/table/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/table/resolvers.py @@ -72,3 +72,9 @@ def list_table_data_filters(context: Context, source, tableUri: str = None, filt if not filter: filter = {'page': 1, 'pageSize': 5} return DatasetTableDataFilterService.list_table_data_filters(uri=tableUri, data=filter) + + +def get_dataset_table_restricted_information(context: Context, source: DatasetTable, **kwargs): + if not source: + return None + return DatasetTableService.get_table_restricted_information(uri=source.tableUri, table=source) diff --git a/backend/dataall/modules/s3_datasets/api/table/types.py b/backend/dataall/modules/s3_datasets/api/table/types.py index e91291f6f..8464ad77e 100644 --- a/backend/dataall/modules/s3_datasets/api/table/types.py +++ b/backend/dataall/modules/s3_datasets/api/table/types.py @@ -4,6 +4,7 @@ resolve_dataset, get_glue_table_properties, resolve_glossary_terms, + get_dataset_table_restricted_information, ) TablePermission = gql.ObjectType( @@ -21,6 +22,15 @@ gql.Field(name='nodes', type=gql.ArrayType(TablePermission)), ], ) +DatasetTableRestrictedInformation = gql.ObjectType( + name='DatasetTableRestrictedInformation', + fields=[ + gql.Field(name='AwsAccountId', type=gql.String), + gql.Field(name='GlueDatabaseName', type=gql.String), + gql.Field(name='GlueTableName', type=gql.String), + gql.Field(name='S3Prefix', type=gql.String), + ], +) DatasetTable = gql.ObjectType( name='DatasetTable', @@ -35,12 +45,11 @@ gql.Field(name='created', type=gql.String), gql.Field(name='updated', type=gql.String), gql.Field(name='admins', type=gql.ArrayType(gql.String)), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='GlueDatabaseName', type=gql.String), - gql.Field(name='GlueTableName', type=gql.String), gql.Field(name='LastGlueTableStatus', type=gql.String), - gql.Field(name='S3Prefix', type=gql.String), gql.Field(name='GlueTableConfig', type=gql.String), + gql.Field( + name='restricted', type=DatasetTableRestrictedInformation, resolver=get_dataset_table_restricted_information + ), gql.Field( name='GlueTableProperties', type=gql.String, diff --git a/backend/dataall/modules/s3_datasets/services/dataset_location_service.py b/backend/dataall/modules/s3_datasets/services/dataset_location_service.py index ee83d1c5f..13c12d144 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_location_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_location_service.py @@ -3,7 +3,7 @@ from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository -from dataall.base.db.exceptions import ResourceShared, ResourceAlreadyExists +from dataall.base.db.exceptions import ResourceAlreadyExists from dataall.modules.s3_datasets.services.dataset_service import DatasetService from dataall.modules.s3_datasets.aws.s3_location_client import S3LocationClient from dataall.modules.s3_datasets.db.dataset_location_repositories import DatasetLocationRepository @@ -59,7 +59,6 @@ def list_dataset_locations(uri: str, filter: dict = None): return DatasetLocationRepository.list_dataset_locations(session=session, uri=uri, data=filter) @staticmethod - @ResourcePolicyService.has_resource_permission(GET_DATASET_FOLDER) def get_storage_location(uri): with get_context().db_engine.scoped_session() as session: return DatasetLocationRepository.get_location_by_uri(session, uri) @@ -135,3 +134,10 @@ def _delete_dataset_folder_read_permission(session, dataset: S3Dataset, location } for group in permission_group: ResourcePolicyService.delete_resource_policy(session=session, group=group, resource_uri=location_uri) + + @staticmethod + @ResourcePolicyService.has_resource_permission(GET_DATASET_FOLDER) + def get_folder_restricted_information(uri: str, folder: DatasetStorageLocation): + context = get_context() + with context.db_engine.scoped_session() as session: + return DatasetRepository.get_dataset_by_uri(session, folder.datasetUri) diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index 6d3010bf4..3575a4a21 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -38,8 +38,8 @@ DATASET_ALL, DATASET_READ, IMPORT_DATASET, - GET_DATASET, DATASET_TABLE_ALL, + GET_DATASET, ) from dataall.modules.datasets_base.services.dataset_list_permissions import LIST_ENVIRONMENT_DATASETS from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository @@ -344,6 +344,11 @@ def get_dataset_statistics(dataset: S3Dataset): 'upvotes': count_upvotes or 0, } + @staticmethod + @ResourcePolicyService.has_resource_permission(GET_DATASET) + def get_dataset_restricted_information(uri: str, dataset: S3Dataset): + return dataset + @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET) @@ -397,8 +402,6 @@ def start_crawler(uri: str, data: dict = None): return { 'Name': dataset.GlueCrawlerName, - 'AwsAccountId': dataset.AwsAccountId, - 'region': dataset.region, 'status': crawler.get('LastCrawl', {}).get('Status', 'N/A'), } diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py index 386ab252e..21336c58b 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py @@ -1,5 +1,4 @@ import logging - from dataall.base.context import get_context from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService @@ -44,6 +43,11 @@ def get_table(uri: str): with get_context().db_engine.scoped_session() as session: return DatasetTableRepository.get_dataset_table_by_uri(session, uri) + @staticmethod + @ResourcePolicyService.has_resource_permission(GET_DATASET_TABLE) + def get_table_restricted_information(uri: str, table: DatasetTable): + return table + @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission(UPDATE_DATASET_TABLE, parent_resource=_get_dataset_uri) @@ -127,11 +131,7 @@ def sync_tables_for_dataset(cls, uri): DatasetTableIndexer.upsert_all(session=session, dataset_uri=dataset.datasetUri) DatasetTableIndexer.remove_all_deleted(session=session, dataset_uri=dataset.datasetUri) DatasetIndexer.upsert(session=session, dataset_uri=dataset.datasetUri) - return DatasetRepository.paginated_dataset_tables( - session=session, - uri=uri, - data={'page': 1, 'pageSize': 10}, - ) + return DatasetRepository.count_dataset_tables(session, dataset.datasetUri) @staticmethod def sync_existing_tables(session, uri, glue_tables=None): diff --git a/frontend/src/modules/Folders/components/FolderOverview.js b/frontend/src/modules/Folders/components/FolderOverview.js index c1b6c469c..6c3ec47bc 100644 --- a/frontend/src/modules/Folders/components/FolderOverview.js +++ b/frontend/src/modules/Folders/components/FolderOverview.js @@ -23,12 +23,14 @@ export const FolderOverview = (props) => { } /> - + {folder.restricted && ( + + )} { S3 URI - {`s3://${folder.dataset.S3BucketName}/${folder.S3Prefix}/`} + {`s3://${folder.restricted.S3BucketName}/${folder.S3Prefix}/`} @@ -27,7 +27,7 @@ export const FolderS3Properties = (props) => { S3 ARN - {`arn:aws:s3:::${folder.dataset.S3BucketName}/${folder.S3Prefix}/`} + {`arn:aws:s3:::${folder.restricted.S3BucketName}/${folder.S3Prefix}/`} @@ -35,7 +35,7 @@ export const FolderS3Properties = (props) => { Region - {folder.dataset.region} + {folder.restricted.region} @@ -43,7 +43,7 @@ export const FolderS3Properties = (props) => { Account - {folder.dataset.AwsAccountId} + {folder.restricted.AwsAccountId} diff --git a/frontend/src/modules/Folders/services/getDatasetStorageLocation.js b/frontend/src/modules/Folders/services/getDatasetStorageLocation.js index 292603f8f..42d91dd9b 100644 --- a/frontend/src/modules/Folders/services/getDatasetStorageLocation.js +++ b/frontend/src/modules/Folders/services/getDatasetStorageLocation.js @@ -7,14 +7,17 @@ export const getDatasetStorageLocation = (locationUri) => ({ query: gql` query getDatasetStorageLocation($locationUri: String!) { getDatasetStorageLocation(locationUri: $locationUri) { + restricted { + AwsAccountId + region + S3BucketName + } dataset { datasetUri name + label userRoleForDataset - region SamlAdminGroupName - S3BucketName - AwsAccountId owner environment { environmentUri @@ -31,7 +34,6 @@ export const getDatasetStorageLocation = (locationUri) => ({ created tags locationUri - AwsAccountId label name S3Prefix diff --git a/frontend/src/modules/S3_Datasets/components/DatasetConsoleAccess.js b/frontend/src/modules/S3_Datasets/components/DatasetConsoleAccess.js index d3545e713..d24dca471 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetConsoleAccess.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetConsoleAccess.js @@ -19,7 +19,7 @@ export const DatasetConsoleAccess = (props) => { Account - {dataset.AwsAccountId} + {dataset.restricted.AwsAccountId} @@ -28,7 +28,7 @@ export const DatasetConsoleAccess = (props) => { arn:aws:s3::: - {dataset.S3BucketName} + {dataset.restricted.S3BucketName} @@ -36,7 +36,7 @@ export const DatasetConsoleAccess = (props) => { Glue database - {`arn:aws:glue:${dataset.region}:${dataset.AwsAccountId}/database:${dataset.GlueDatabaseName}`} + {`arn:aws:glue:${dataset.restricted.region}:${dataset.restricted.AwsAccountId}/database:${dataset.restricted.GlueDatabaseName}`} @@ -44,16 +44,17 @@ export const DatasetConsoleAccess = (props) => { IAM role - {dataset.IAMDatasetAdminRoleArn} + {dataset.restricted.IAMDatasetAdminRoleArn} - {dataset.KmsAlias === 'SSE-S3' || dataset.KmsAlias === 'Undefined' ? ( + {dataset.restricted.KmsAlias === 'SSE-S3' || + dataset.restricted.KmsAlias === 'Undefined' ? ( S3 Encryption - {`${dataset.KmsAlias}`} + {`${dataset.restricted.KmsAlias}`} ) : ( @@ -62,7 +63,7 @@ export const DatasetConsoleAccess = (props) => { S3 Encryption SSE-KMS - {`arn:aws:kms:${dataset.region}:${dataset.AwsAccountId}/alias:${dataset.KmsAlias}`} + {`arn:aws:kms:${dataset.restricted.region}:${dataset.restricted.AwsAccountId}/alias:${dataset.restricted.KmsAlias}`} )} diff --git a/frontend/src/modules/S3_Datasets/components/DatasetFolders.js b/frontend/src/modules/S3_Datasets/components/DatasetFolders.js index 7cefeefee..ab5e4a446 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetFolders.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetFolders.js @@ -37,6 +37,7 @@ import { } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; import { deleteDatasetStorageLocation, useClient } from 'services'; +import { emptyPrintUnauthorized } from 'utils'; import { listDatasetStorageLocations } from '../services'; import { FolderCreateModal } from './FolderCreateModal'; @@ -69,7 +70,7 @@ export const DatasetFolders = (props) => { const response = await client.query( listDatasetStorageLocations(dataset.datasetUri, filter) ); - if (!response.errors) { + if (response.data.getDataset != null) { setItems({ ...response.data.getDataset.locations }); } else { dispatch({ type: SET_ERROR, error: response.errors[0].message }); @@ -224,7 +225,9 @@ export const DatasetFolders = (props) => { - {`s3://${dataset.S3BucketName}/${folder.S3Prefix}`} + {`s3://${emptyPrintUnauthorized( + folder.restricted?.S3BucketName + )}/${folder.S3Prefix}`} {folder.description} diff --git a/frontend/src/modules/S3_Datasets/components/DatasetOverview.js b/frontend/src/modules/S3_Datasets/components/DatasetOverview.js index 0599d33b2..3e61c26dd 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetOverview.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetOverview.js @@ -33,7 +33,9 @@ export const DatasetOverview = (props) => { objectType="dataset" /> - {isAdmin && } + {isAdmin && dataset.restricted && ( + + )} diff --git a/frontend/src/modules/S3_Datasets/components/DatasetStartCrawlerModal.js b/frontend/src/modules/S3_Datasets/components/DatasetStartCrawlerModal.js index ba5b3ac47..8196760fc 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetStartCrawlerModal.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetStartCrawlerModal.js @@ -132,7 +132,7 @@ export const DatasetStartCrawlerModal = (props) => { error={Boolean(touched.prefix && errors.prefix)} fullWidth helperText={touched.prefix && errors.prefix} - label={`s3://${dataset.S3BucketName}/`} + label={`s3://${dataset.restricted.S3BucketName}/`} name="prefix" onBlur={handleBlur} onChange={handleChange} diff --git a/frontend/src/modules/S3_Datasets/components/DatasetTables.js b/frontend/src/modules/S3_Datasets/components/DatasetTables.js index 46b3603e3..32f7774cc 100644 --- a/frontend/src/modules/S3_Datasets/components/DatasetTables.js +++ b/frontend/src/modules/S3_Datasets/components/DatasetTables.js @@ -40,7 +40,7 @@ import { listDatasetTables, deleteDatasetTable, useClient } from 'services'; import { syncTables } from '../services'; import { DatasetStartCrawlerModal } from './DatasetStartCrawlerModal'; -import { isFeatureEnabled } from 'utils'; +import { emptyPrintUnauthorized, isFeatureEnabled } from 'utils'; export const DatasetTables = (props) => { const { dataset, isAdmin } = props; @@ -80,7 +80,7 @@ export const DatasetTables = (props) => { filter: { ...filter } }) ); - if (!response.errors) { + if (response.data.getDataset != null) { setItems({ ...response.data.getDataset.tables }); } else { dispatch({ type: SET_ERROR, error: response.errors[0].message }); @@ -95,7 +95,7 @@ export const DatasetTables = (props) => { fetchItems().catch((e) => dispatch({ type: SET_ERROR, error: e.message }) ); - enqueueSnackbar(`Retrieved ${response.data.syncTables.count} tables`, { + enqueueSnackbar(`Retrieved ${response.data.syncTables} tables`, { anchorOrigin: { horizontal: 'right', vertical: 'top' @@ -257,11 +257,17 @@ export const DatasetTables = (props) => { to={`/console/s3-datasets/table/${table.tableUri}`} variant="subtitle2" > - {table.GlueTableName} + {table.name} - {table.GlueDatabaseName} - {table.S3Prefix} + + {emptyPrintUnauthorized( + table.restricted?.GlueDatabaseName + )} + + + {emptyPrintUnauthorized(table.restricted?.S3Prefix)} + {isAdmin && ( { ({ description created userRoleForStorageLocation + restricted { + S3BucketName + } } } } diff --git a/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js b/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js index 2c110baa5..feb59d959 100644 --- a/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js +++ b/frontend/src/modules/S3_Datasets/services/startGlueCrawler.js @@ -9,8 +9,6 @@ export const startGlueCrawler = ({ datasetUri, input }) => ({ mutation StartGlueCrawler($datasetUri: String, $input: CrawlerInput) { startGlueCrawler(datasetUri: $datasetUri, input: $input) { Name - AwsAccountId - region status } } diff --git a/frontend/src/modules/S3_Datasets/services/syncTables.js b/frontend/src/modules/S3_Datasets/services/syncTables.js index dd0f991ec..608335d01 100644 --- a/frontend/src/modules/S3_Datasets/services/syncTables.js +++ b/frontend/src/modules/S3_Datasets/services/syncTables.js @@ -6,25 +6,7 @@ export const syncTables = (datasetUri) => ({ }, mutation: gql` mutation SyncTables($datasetUri: String!) { - syncTables(datasetUri: $datasetUri) { - count - nodes { - tableUri - GlueTableName - GlueDatabaseName - description - name - label - created - S3Prefix - dataset { - datasetUri - name - GlueDatabaseName - userRoleForDataset - } - } - } + syncTables(datasetUri: $datasetUri) } ` }); diff --git a/frontend/src/modules/S3_Datasets/views/DatasetEditForm.js b/frontend/src/modules/S3_Datasets/views/DatasetEditForm.js index 6026b1823..73f5543de 100644 --- a/frontend/src/modules/S3_Datasets/views/DatasetEditForm.js +++ b/frontend/src/modules/S3_Datasets/views/DatasetEditForm.js @@ -333,7 +333,7 @@ const DatasetEditForm = (props) => { terms: dataset.terms || [], stewards: dataset.stewards, confidentiality: dataset.confidentiality, - KmsAlias: dataset.KmsAlias, + KmsAlias: dataset.restricted.KmsAlias, autoApprovalEnabled: dataset.autoApprovalEnabled, expirationSetting: dataset.expirySetting, minValidity: dataset.expiryMinDuration, diff --git a/frontend/src/modules/Tables/services/getDatasetTable.js b/frontend/src/modules/Tables/services/getDatasetTable.js index 1491871b0..0384c9d91 100644 --- a/frontend/src/modules/Tables/services/getDatasetTable.js +++ b/frontend/src/modules/Tables/services/getDatasetTable.js @@ -11,7 +11,6 @@ export const getDatasetTable = (tableUri) => ({ datasetUri name userRoleForDataset - region SamlAdminGroupName owner confidentiality @@ -31,13 +30,15 @@ export const getDatasetTable = (tableUri) => ({ created tags tableUri - AwsAccountId - GlueTableName - GlueDatabaseName LastGlueTableStatus label name - S3Prefix + restricted { + S3Prefix + AwsAccountId + GlueTableName + GlueDatabaseName + } terms { count nodes { diff --git a/frontend/src/modules/Tables/views/TableView.js b/frontend/src/modules/Tables/views/TableView.js index 85ce3c0a6..ee3da9d1a 100644 --- a/frontend/src/modules/Tables/views/TableView.js +++ b/frontend/src/modules/Tables/views/TableView.js @@ -59,7 +59,7 @@ function TablePageHeader(props) { - Table {table.GlueTableName} + Table {table.label} - {table.GlueTableName} + {table.label} @@ -222,7 +222,7 @@ const TableView = () => { const fetchItem = useCallback(async () => { setLoading(true); const response = await client.query(getDatasetTable(params.uri)); - if (!response.errors && response.data.getDatasetTable !== null) { + if (response.data.getDatasetTable !== null) { setTable(response.data.getDatasetTable); handleUserRole( response.data.getDatasetTable.dataset.userRoleForDataset, diff --git a/frontend/src/modules/Worksheets/views/WorksheetView.js b/frontend/src/modules/Worksheets/views/WorksheetView.js index 545c3f190..f3c409aef 100644 --- a/frontend/src/modules/Worksheets/views/WorksheetView.js +++ b/frontend/src/modules/Worksheets/views/WorksheetView.js @@ -136,7 +136,7 @@ const WorksheetView = () => { (d) => ({ ...d, value: d.datasetUri, - label: d.GlueDatabaseName + label: d.restricted.GlueDatabaseName }) ); } @@ -198,7 +198,7 @@ const WorksheetView = () => { response.data.getDataset.tables.nodes.map((t) => ({ ...t, value: t.tableUri, - label: t.GlueTableName + label: t.restricted.GlueTableName })) ); } else { @@ -379,7 +379,11 @@ const WorksheetView = () => { dispatch({ type: SET_ERROR, error: e.message }) ); setSqlBody( - `SELECT * FROM "${selectedDatabase.label}"."${event.target.value.GlueTableName}" limit 10;` + `SELECT * FROM "${selectedDatabase.label}"."${ + event.target.value.restricted + ? event.target.value.restricted.GlueTableName + : event.target.value.GlueTableName + }" limit 10;` ); } @@ -512,7 +516,7 @@ const WorksheetView = () => { {tableOptions.length > 0 ? ( tableOptions.map((table) => ( - {table.GlueTableName} + {table.label} )) ) : ( diff --git a/frontend/src/services/graphql/Datasets/getDataset.js b/frontend/src/services/graphql/Datasets/getDataset.js index 322b5189d..7dabc1b20 100644 --- a/frontend/src/services/graphql/Datasets/getDataset.js +++ b/frontend/src/services/graphql/Datasets/getDataset.js @@ -12,20 +12,20 @@ export const getDataset = (datasetUri) => ({ description label name - region created imported userRoleForDataset SamlAdminGroupName - AwsAccountId - KmsAlias - S3BucketName - GlueDatabaseName + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } tags - businessOwnerEmail stewards - IAMDatasetAdminRoleArn - businessOwnerDelegationEmails stack { stack status diff --git a/frontend/src/services/graphql/Datasets/listDatasetTables.js b/frontend/src/services/graphql/Datasets/listDatasetTables.js index 343e80461..eee1db27f 100644 --- a/frontend/src/services/graphql/Datasets/listDatasetTables.js +++ b/frontend/src/services/graphql/Datasets/listDatasetTables.js @@ -26,11 +26,14 @@ export const listDatasetTables = ({ datasetUri, filter }) => ({ tableUri name created - GlueTableName - GlueDatabaseName + restricted { + S3Prefix + AwsAccountId + GlueTableName + GlueDatabaseName + } description stage - S3Prefix userRoleForTable } } diff --git a/frontend/src/services/graphql/Datasets/listS3DatasetsOwnedByEnvGroup.js b/frontend/src/services/graphql/Datasets/listS3DatasetsOwnedByEnvGroup.js index 589d4cf59..cbe5e41f9 100644 --- a/frontend/src/services/graphql/Datasets/listS3DatasetsOwnedByEnvGroup.js +++ b/frontend/src/services/graphql/Datasets/listS3DatasetsOwnedByEnvGroup.js @@ -29,14 +29,16 @@ export const listS3DatasetsOwnedByEnvGroup = ({ nodes { datasetUri label - AwsAccountId - region - GlueDatabaseName SamlAdminGroupName name - S3BucketName created owner + restricted { + AwsAccountId + region + S3BucketName + GlueDatabaseName + } stack { status } diff --git a/frontend/src/utils/helpers/emptyPrintUnauthorized.js b/frontend/src/utils/helpers/emptyPrintUnauthorized.js new file mode 100644 index 000000000..9713c753d --- /dev/null +++ b/frontend/src/utils/helpers/emptyPrintUnauthorized.js @@ -0,0 +1,5 @@ +function emptyPrintUnauthorized(param) { + return param ? param : '**********'; +} + +export { emptyPrintUnauthorized }; diff --git a/frontend/src/utils/helpers/index.js b/frontend/src/utils/helpers/index.js index 5eec0fd36..b00b552f5 100644 --- a/frontend/src/utils/helpers/index.js +++ b/frontend/src/utils/helpers/index.js @@ -1,5 +1,6 @@ export * from './bytesToSize'; export * from './dayjs'; +export * from './emptyPrintUnauthorized'; export * from './listToTree'; export * from './moduleUtils'; export * from './linkMarkup'; diff --git a/tests/modules/s3_datasets/conftest.py b/tests/modules/s3_datasets/conftest.py index 99a4dcdcf..8295b4be2 100644 --- a/tests/modules/s3_datasets/conftest.py +++ b/tests/modules/s3_datasets/conftest.py @@ -76,19 +76,20 @@ def factory( datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName - GlueCrawlerName enableExpiration expirySetting expiryMinDuration expiryMaxDuration + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } tables{ nodes{ tableUri @@ -180,11 +181,11 @@ def factory(dataset: S3Dataset, name, username) -> DatasetTable: label=name, owner=username, datasetUri=dataset.datasetUri, - GlueDatabaseName=dataset.GlueDatabaseName, + GlueDatabaseName=dataset.restricted.GlueDatabaseName, GlueTableName=name, - region=dataset.region, - AWSAccountId=dataset.AwsAccountId, - S3BucketName=dataset.S3BucketName, + region=dataset.restricted.region, + AWSAccountId=dataset.restricted.AwsAccountId, + S3BucketName=dataset.restricted.S3BucketName, S3Prefix=f'{name}', ) session.add(table) @@ -325,9 +326,9 @@ def factory(dataset: S3Dataset, name, username) -> DatasetStorageLocation: label=name, owner=username, datasetUri=dataset.datasetUri, - S3BucketName=dataset.S3BucketName, - region=dataset.region, - AWSAccountId=dataset.AwsAccountId, + S3BucketName=dataset.restricted.S3BucketName, + region=dataset.restricted.region, + AWSAccountId=dataset.restricted.AwsAccountId, S3Prefix=f'{name}', ) session.add(ds_location) diff --git a/tests/modules/s3_datasets/test_dataset.py b/tests/modules/s3_datasets/test_dataset.py index 69b1de0f6..c3801edfa 100644 --- a/tests/modules/s3_datasets/test_dataset.py +++ b/tests/modules/s3_datasets/test_dataset.py @@ -69,13 +69,15 @@ def test_get_dataset(client, dataset1, env_fixture, group): query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId description - region - imported - importedS3Bucket stewards owners + imported + restricted { + AwsAccountId + region + importedS3Bucket + } } } """, @@ -83,11 +85,11 @@ def test_get_dataset(client, dataset1, env_fixture, group): username='alice', groups=[group.name], ) - assert response.data.getDataset.AwsAccountId == env_fixture.AwsAccountId - assert response.data.getDataset.region == env_fixture.region + assert response.data.getDataset.restricted.AwsAccountId == env_fixture.AwsAccountId + assert response.data.getDataset.restricted.region == env_fixture.region assert response.data.getDataset.label == 'dataset1' assert response.data.getDataset.imported is False - assert response.data.getDataset.importedS3Bucket is False + assert response.data.getDataset.restricted.importedS3Bucket is False def test_list_datasets(client, dataset1, group): @@ -194,8 +196,6 @@ def test_start_crawler(org_fixture, env_fixture, dataset1, client, group, module mutation StartGlueCrawler($datasetUri:String, $input:CrawlerInput){ startGlueCrawler(datasetUri:$datasetUri,input:$input){ Name - AwsAccountId - region status } } @@ -209,7 +209,7 @@ def test_start_crawler(org_fixture, env_fixture, dataset1, client, group, module 'prefix': 'raw', }, ) - assert response.data.startGlueCrawler.Name == dataset1.GlueCrawlerName + assert response.data.Name == dataset1.restricted.GlueCrawlerName def test_update_dataset_unauthorized(dataset1, client, group): @@ -309,9 +309,11 @@ def test_list_dataset_tables(client, dataset1, group): tableUri name label - GlueDatabaseName - GlueTableName - S3Prefix + restricted{ + GlueDatabaseName + GlueTableName + S3Prefix + } } } } @@ -391,9 +393,11 @@ def test_delete_dataset(client, dataset, env_fixture, org_fixture, db, module_mo query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId + restricted { + AwsAccountId + region + } description - region } } """, @@ -428,17 +432,15 @@ def test_import_dataset(org_fixture, env_fixture, dataset1, client, group): mutation importDataset($input:ImportDatasetInput){ importDataset(input:$input){ label - AwsAccountId - region imported - importedS3Bucket - importedGlueDatabase - importedKmsKey - importedAdminRole - S3BucketName - GlueDatabaseName - IAMDatasetAdminRoleArn - KmsAlias + restricted { + AwsAccountId + region + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + KmsAlias + } } } """, @@ -457,17 +459,13 @@ def test_import_dataset(org_fixture, env_fixture, dataset1, client, group): }, ) assert response.data.importDataset.label == 'datasetImported' - assert response.data.importDataset.AwsAccountId == env_fixture.AwsAccountId - assert response.data.importDataset.region == env_fixture.region + assert response.data.importDataset.restricted.AwsAccountId == env_fixture.AwsAccountId + assert response.data.importDataset.restricted.region == env_fixture.region assert response.data.importDataset.imported is True - assert response.data.importDataset.importedS3Bucket is True - assert response.data.importDataset.importedGlueDatabase is True - assert response.data.importDataset.importedKmsKey is True - assert response.data.importDataset.importedAdminRole is True - assert response.data.importDataset.S3BucketName == 'dhimportedbucket' - assert response.data.importDataset.GlueDatabaseName == 'dhimportedGlueDB' - assert response.data.importDataset.KmsAlias == '1234-YYEY' - assert 'dhimportedRole' in response.data.importDataset.IAMDatasetAdminRoleArn + assert response.data.importDataset.restricted.S3BucketName == 'dhimportedbucket' + assert response.data.importDataset.restricted.GlueDatabaseName == 'dhimportedGlueDB' + assert response.data.importDataset.restricted.KmsAlias == '1234-YYEY' + assert 'dhimportedRole' in response.data.importDataset.restricted.IAMDatasetAdminRoleArn def test_get_dataset_by_prefix(db, env_fixture, org_fixture): @@ -512,13 +510,15 @@ def test_stewardship(client, dataset, env_fixture, org_fixture, db, group2, grou datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName stewards diff --git a/tests/modules/s3_datasets/test_dataset_glossary.py b/tests/modules/s3_datasets/test_dataset_glossary.py index 543d20f91..5a25d1b34 100644 --- a/tests/modules/s3_datasets/test_dataset_glossary.py +++ b/tests/modules/s3_datasets/test_dataset_glossary.py @@ -14,12 +14,12 @@ def _columns(db, dataset_fixture, table_fixture) -> List[DatasetTableColumn]: datasetUri=dataset_fixture.datasetUri, tableUri=table_fixture.tableUri, label=f'c{i+1}', - AWSAccountId=dataset_fixture.AwsAccountId, - region=dataset_fixture.region, + AWSAccountId=dataset_fixture.restricted.AwsAccountId, + region=dataset_fixture.restricted.region, GlueTableName='table', typeName='String', owner='user', - GlueDatabaseName=dataset_fixture.GlueDatabaseName, + GlueDatabaseName=dataset_fixture.restricted.GlueDatabaseName, ) session.add(c) cols.append(c) diff --git a/tests/modules/s3_datasets/test_dataset_location.py b/tests/modules/s3_datasets/test_dataset_location.py index 3d388e641..d74f863da 100644 --- a/tests/modules/s3_datasets/test_dataset_location.py +++ b/tests/modules/s3_datasets/test_dataset_location.py @@ -51,11 +51,11 @@ def test_manage_dataset_location(client, dataset1, user, group): query GetDataset($datasetUri:String!){ getDataset(datasetUri:$datasetUri){ label - AwsAccountId description - region - imported - importedS3Bucket + restricted { + AwsAccountId + region + } locations{ nodes{ locationUri diff --git a/tests/modules/s3_datasets/test_dataset_table.py b/tests/modules/s3_datasets/test_dataset_table.py index e20af5b86..1736c4958 100644 --- a/tests/modules/s3_datasets/test_dataset_table.py +++ b/tests/modules/s3_datasets/test_dataset_table.py @@ -81,9 +81,11 @@ def test_list_dataset_tables(client, dataset_fixture): tableUri name label - GlueDatabaseName - GlueTableName - S3Prefix + restricted { + GlueDatabaseName + GlueTableName + S3Prefix + } } } } diff --git a/tests/modules/s3_datasets_shares/conftest.py b/tests/modules/s3_datasets_shares/conftest.py index 8fc050487..7a5ac19ba 100644 --- a/tests/modules/s3_datasets_shares/conftest.py +++ b/tests/modules/s3_datasets_shares/conftest.py @@ -83,15 +83,16 @@ def factory( datasetUri label description - AwsAccountId - S3BucketName - GlueDatabaseName owner - region, - businessOwnerEmail - businessOwnerDelegationEmails SamlAdminGroupName - GlueCrawlerName + restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn + } tables{ nodes{ tableUri @@ -253,7 +254,12 @@ def dataset_confidential_fixture(env_fixture, org_fixture, dataset, group) -> S3 @pytest.fixture(scope='module') def table_fixture(db, dataset_fixture, table, group, user): - table1 = table(dataset=dataset_fixture, name='table1', username=user.username) + dataset = dataset_fixture + dataset.GlueDatabaseName = dataset_fixture.restricted.GlueDatabaseName + dataset.region = dataset_fixture.restricted.region + dataset.S3BucketName = dataset_fixture.restricted.S3BucketName + dataset.AwsAccountId = dataset_fixture.restricted.AwsAccountId + table1 = table(dataset=dataset, name='table1', username=user.username) with db.scoped_session() as session: ResourcePolicyService.attach_resource_policy( diff --git a/tests/permissions.py b/tests/permissions.py index 9f3a29785..910681e53 100644 --- a/tests/permissions.py +++ b/tests/permissions.py @@ -246,6 +246,7 @@ def __post_init__(self): field_id('Dataset', 'owners'): TestData( resource_ignore=IgnoreReason.INTRAMODULE, tenant_ignore=IgnoreReason.NOTREQUIRED ), + field_id('Dataset', 'restricted'): TestData(resource_perm=GET_DATASET, tenant_ignore=IgnoreReason.NOTREQUIRED), field_id('Dataset', 'stack'): TestData(resource_perm=GET_DATASET, tenant_ignore=IgnoreReason.NOTREQUIRED), field_id('Dataset', 'statistics'): TestData( resource_ignore=IgnoreReason.INTRAMODULE, tenant_ignore=IgnoreReason.NOTREQUIRED @@ -285,6 +286,9 @@ def __post_init__(self): field_id('DatasetStorageLocation', 'dataset'): TestData( resource_perm=GET_DATASET, tenant_ignore=IgnoreReason.NOTREQUIRED ), + field_id('DatasetStorageLocation', 'restricted'): TestData( + resource_perm=GET_DATASET_FOLDER, tenant_ignore=IgnoreReason.NOTREQUIRED + ), field_id('DatasetStorageLocation', 'terms'): TestData( resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED ), @@ -297,6 +301,9 @@ def __post_init__(self): field_id('DatasetTable', 'dataset'): TestData( resource_ignore=IgnoreReason.INTRAMODULE, tenant_ignore=IgnoreReason.NOTREQUIRED ), + field_id('DatasetTable', 'restricted'): TestData( + resource_perm=GET_DATASET_TABLE, tenant_ignore=IgnoreReason.NOTREQUIRED + ), field_id('DatasetTable', 'terms'): TestData( resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED ), @@ -725,8 +732,8 @@ def __post_init__(self): resource_perm=CREDENTIALS_PIPELINE, tenant_perm=MANAGE_PIPELINES ), field_id('Query', 'getDataset'): TestData( - resource_ignore=IgnoreReason.NOTREQUIRED, tenant_ignore=IgnoreReason.NOTREQUIRED - ), # TODO Review + resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED + ), field_id('Query', 'getDatasetAssumeRoleUrl'): TestData( tenant_perm=MANAGE_DATASETS, resource_perm=CREDENTIALS_DATASET ), @@ -737,11 +744,11 @@ def __post_init__(self): tenant_perm=MANAGE_DATASETS, resource_perm=CREDENTIALS_DATASET ), field_id('Query', 'getDatasetStorageLocation'): TestData( - resource_perm=GET_DATASET_FOLDER, tenant_ignore=IgnoreReason.NOTREQUIRED + resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED ), field_id('Query', 'getDatasetTable'): TestData( - resource_ignore=IgnoreReason.NOTREQUIRED, tenant_ignore=IgnoreReason.NOTREQUIRED - ), # TODO Review + resource_ignore=IgnoreReason.PUBLIC, tenant_ignore=IgnoreReason.NOTREQUIRED + ), field_id('Query', 'getDatasetTableProfilingRun'): TestData( resource_ignore=IgnoreReason.CUSTOM, tenant_ignore=IgnoreReason.CUSTOM ), diff --git a/tests_new/integration_tests/modules/catalog/conftest.py b/tests_new/integration_tests/modules/catalog/conftest.py index fa93399fe..3476afad8 100644 --- a/tests_new/integration_tests/modules/catalog/conftest.py +++ b/tests_new/integration_tests/modules/catalog/conftest.py @@ -85,7 +85,7 @@ def dataset_association1(client1, group1, glossary1, glossary_term1, session_s3_ datasetUri=session_s3_dataset1.datasetUri, input={ 'terms': [glossary_term1.nodeUri], - 'KmsAlias': session_s3_dataset1.KmsAlias, + 'KmsAlias': session_s3_dataset1.restricted.KmsAlias, }, ) response = list_glossary_associations(client1, node_uri=glossary1.nodeUri) @@ -100,7 +100,7 @@ def dataset_association1(client1, group1, glossary1, glossary_term1, session_s3_ datasetUri=session_s3_dataset1.datasetUri, input={ 'terms': [], - 'KmsAlias': session_s3_dataset1.KmsAlias, + 'KmsAlias': session_s3_dataset1.restricted.KmsAlias, }, ) diff --git a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py index e05861e73..ea8341ee5 100644 --- a/tests_new/integration_tests/modules/s3_datasets/global_conftest.py +++ b/tests_new/integration_tests/modules/s3_datasets/global_conftest.py @@ -181,14 +181,22 @@ def create_tables(client, dataset): file_path = os.path.join(os.path.dirname(__file__), 'sample_data/csv_table/csv_sample.csv') s3_client = S3Client(dataset_session, dataset.region) glue_client = GlueClient(dataset_session, dataset.region) - s3_client.upload_file_to_prefix(local_file_path=file_path, s3_path=f'{dataset.S3BucketName}/integrationtest1') + s3_client.upload_file_to_prefix( + local_file_path=file_path, s3_path=f'{dataset.restricted.S3BucketName}/integrationtest1' + ) glue_client.create_table( - database_name=dataset.GlueDatabaseName, table_name='integrationtest1', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, + table_name='integrationtest1', + bucket=dataset.restricted.S3BucketName, ) - s3_client.upload_file_to_prefix(local_file_path=file_path, s3_path=f'{dataset.S3BucketName}/integrationtest2') + s3_client.upload_file_to_prefix( + local_file_path=file_path, s3_path=f'{dataset.restricted.S3BucketName}/integrationtest2' + ) glue_client.create_table( - database_name=dataset.GlueDatabaseName, table_name='integrationtest2', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, + table_name='integrationtest2', + bucket=dataset.restricted.S3BucketName, ) response = sync_tables(client, datasetUri=dataset.datasetUri) return [table for table in response.get('nodes', []) if table.GlueTableName.startswith('integrationtest')] @@ -238,7 +246,9 @@ def session_s3_dataset1(client1, group1, org1, session_env1, session_id, testdat finally: if ds: delete_s3_dataset(client1, session_env1['environmentUri'], ds) - delete_aws_dataset_resources(aws_client=session_env1_aws_client, env=session_env1, bucket=ds.S3BucketName) + delete_aws_dataset_resources( + aws_client=session_env1_aws_client, env=session_env1, bucket=ds.restricted.S3BucketName + ) @pytest.fixture(scope='session') @@ -394,7 +404,9 @@ def temp_s3_dataset1(client1, group1, org1, session_env1, session_id, testdata, if ds: delete_s3_dataset(client1, session_env1['environmentUri'], ds) - delete_aws_dataset_resources(aws_client=session_env1_aws_client, env=session_env1, bucket=ds.S3BucketName) + delete_aws_dataset_resources( + aws_client=session_env1_aws_client, env=session_env1, bucket=ds.restricted.S3BucketName + ) """ diff --git a/tests_new/integration_tests/modules/s3_datasets/queries.py b/tests_new/integration_tests/modules/s3_datasets/queries.py index e80ddbf7d..a92ba6828 100644 --- a/tests_new/integration_tests/modules/s3_datasets/queries.py +++ b/tests_new/integration_tests/modules/s3_datasets/queries.py @@ -10,24 +10,16 @@ created updated admins -AwsAccountId -region -S3BucketName -GlueDatabaseName -GlueCrawlerName -GlueCrawlerSchedule -GlueProfilingJobName -GlueProfilingTriggerSchedule -IAMDatasetAdminRoleArn -KmsAlias SamlAdminGroupName -businessOwnerEmail -businessOwnerDelegationEmails -importedS3Bucket -importedGlueDatabase -importedKmsKey -importedAdminRole imported +restricted { + AwsAccountId + region + KmsAlias + S3BucketName + GlueDatabaseName + IAMDatasetAdminRoleArn +} environment { environmentUri label @@ -265,8 +257,6 @@ def start_glue_crawler(client, datasetUri, input): mutation StartGlueCrawler($datasetUri: String, $input: CrawlerInput) {{ startGlueCrawler(datasetUri: $datasetUri, input: $input) {{ Name - AwsAccountId - region status }} }} @@ -299,12 +289,14 @@ def list_s3_datasets_owned_by_env_group(client, environment_uri, group_uri, term nodes {{ datasetUri label - AwsAccountId - region - GlueDatabaseName + restricted {{ + AwsAccountId + region + S3BucketName + GlueDatabaseName + }} SamlAdminGroupName name - S3BucketName created owner stack {{ @@ -360,8 +352,6 @@ def update_folder(client, locationUri, input): mutation updateDatasetStorageLocation($locationUri: String!, $input: ModifyDatasetStorageLocationInput!) {{ updateDatasetStorageLocation(locationUri: $locationUri, input: $input) {{ locationUri - S3Prefix - label }} }} """, @@ -431,25 +421,7 @@ def sync_tables(client, datasetUri): 'variables': {'datasetUri': datasetUri}, 'query': f""" mutation SyncTables($datasetUri: String!) {{ - syncTables(datasetUri: $datasetUri) {{ - count - nodes {{ - tableUri - GlueTableName - GlueDatabaseName - description - name - label - created - S3Prefix - dataset {{ - datasetUri - name - GlueDatabaseName - userRoleForDataset - }} - }} - }} + syncTables(datasetUri: $datasetUri) }} """, } @@ -500,13 +472,15 @@ def get_dataset_table(client, tableUri): created tags tableUri - AwsAccountId - GlueTableName - GlueDatabaseName + restricted {{ + S3Prefix + AwsAccountId + GlueTableName + GlueDatabaseName + }} LastGlueTableStatus label name - S3Prefix }} }} """, @@ -526,7 +500,9 @@ def list_dataset_tables(client, datasetUri): tables {{ count nodes {{ - GlueTableName + restricted {{ + GlueTableName + }} }} }} }} diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py index e1882ac0d..fcdcc2865 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_dataset.py @@ -159,7 +159,7 @@ def test_update_dataset(client1, dataset_fixture_name, request): test_description = f'a test description {datetime.utcnow().isoformat()}' dataset_uri = dataset.datasetUri updated_dataset = update_dataset( - client1, dataset_uri, {'description': test_description, 'KmsAlias': dataset.KmsAlias} + client1, dataset_uri, {'description': test_description, 'KmsAlias': dataset.restricted.KmsAlias} ) assert_that(updated_dataset).contains_entry(datasetUri=dataset_uri, description=test_description) env = get_dataset(client1, dataset_uri) @@ -175,7 +175,7 @@ def test_update_dataset_unauthorized(client1, client2, dataset_fixture_name, req test_description = f'unauthorized {datetime.utcnow().isoformat()}' dataset_uri = dataset.datasetUri assert_that(update_dataset).raises(GqlError).when_called_with( - client2, dataset_uri, {'description': test_description, 'KmsAlias': dataset.KmsAlias} + client2, dataset_uri, {'description': test_description, 'KmsAlias': dataset.restricted.KmsAlias} ).contains('UnauthorizedOperation', dataset_uri) response = get_dataset(client1, dataset_uri) assert_that(response).contains_entry(datasetUri=dataset_uri).does_not_contain_entry(description=test_description) diff --git a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py index 934cac9d4..798303905 100644 --- a/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py +++ b/tests_new/integration_tests/modules/s3_datasets/test_s3_tables.py @@ -117,7 +117,7 @@ def test_delete_table(client1, dataset_fixture_name, request): aws_session_token=creds['sessionToken'], ) GlueClient(dataset_session, dataset.region).create_table( - database_name=dataset.GlueDatabaseName, table_name='todelete', bucket=dataset.S3BucketName + database_name=dataset.restricted.GlueDatabaseName, table_name='todelete', bucket=dataset.restricted.S3BucketName ) response = sync_tables(client1, datasetUri=dataset.datasetUri) table_uri = [table.tableUri for table in response.get('nodes', []) if table.label == 'todelete'][0]