Skip to content

Commit

Permalink
Add LIST_ENVIRONMENT_DATASETS permission for listing shared datasets …
Browse files Browse the repository at this point in the history
…and cleanup unused code (#1719)

### Feature or Bugfix
- Bugfix

### Detail
Added permission check on the list datasets API calls from the S3 shares
module. Ensuring that only environment members can see environment
shared datasets.

++ remove some unused code

### Relates

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)?
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization?
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features?
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users?
  - Have you used the least-privilege principle? How?


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.
  • Loading branch information
dlpzx authored Nov 28, 2024
1 parent 553037e commit 90dd1e3
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,6 @@ def get_profiling_run(session, profiling_run_uri=None, glue_job_run_id=None, glu
)
return run

@staticmethod
def list_profiling_runs(session, dataset_uri):
# TODO filter is always default
filter = {}
q = (
session.query(DatasetProfilingRun)
.filter(DatasetProfilingRun.datasetUri == dataset_uri)
.order_by(DatasetProfilingRun.created.desc())
)
return paginate(q, page=filter.get('page', 1), page_size=filter.get('pageSize', 20)).to_dict()

@staticmethod
def list_table_profiling_runs(session, table_uri):
# TODO filter is always default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,6 @@ def resolve_profiling_run_status(run_uri):
session.add(task)
Worker.queue(engine=context.db_engine, task_ids=[task.taskUri])

@staticmethod
@ResourcePolicyService.has_resource_permission(GET_DATASET)
@is_feature_enabled('modules.s3_datasets.features.metrics_data')
def list_profiling_runs(uri):
with get_context().db_engine.scoped_session() as session:
return DatasetProfilingRepository.list_profiling_runs(session, uri)

@classmethod
@is_feature_enabled('modules.s3_datasets.features.metrics_data')
def get_dataset_table_profiling_run(cls, uri: str):
Expand Down
6 changes: 2 additions & 4 deletions backend/dataall/modules/s3_datasets_shares/api/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,11 @@ def get_s3_consumption_data(context: Context, source, shareUri: str):


def list_shared_databases_tables_with_env_group(context: Context, source, environmentUri: str, groupUri: str):
return S3ShareService.list_shared_databases_tables_with_env_group(environmentUri=environmentUri, groupUri=groupUri)
return S3ShareService.list_shared_databases_tables_with_env_group(uri=environmentUri, group_uri=groupUri)


def resolve_shared_db_name(context: Context, source, **kwargs):
return S3ShareService.resolve_shared_db_name(
source.GlueDatabaseName, source.shareUri, source.targetEnvAwsAccountId, source.targetEnvRegion
)
return S3ShareService.resolve_shared_db_name(source.GlueDatabaseName, source.shareUri)


def list_shared_table_columns(context: Context, source, tableUri: str, shareUri: str, filter: dict):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from dataall.base.db import utils
from dataall.base.db import utils, exceptions
from dataall.base.context import get_context
from dataall.base.aws.sts import SessionHelper
from dataall.base.aws.iam import IAM
Expand All @@ -10,6 +10,7 @@
from dataall.core.tasks.db.task_models import Task
from dataall.core.tasks.service_handlers import Worker
from dataall.modules.datasets_base.db.dataset_repositories import DatasetBaseRepository
from dataall.modules.datasets_base.services.dataset_list_permissions import LIST_ENVIRONMENT_DATASETS
from dataall.modules.shares_base.db.share_object_models import ShareObject
from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository
from dataall.modules.shares_base.db.share_object_item_repositories import ShareObjectItemRepository
Expand Down Expand Up @@ -173,12 +174,13 @@ def reapply_share_items_for_dataset(uri: str):
return True

@staticmethod
def list_shared_tables_by_env_dataset(dataset_uri: str, env_uri: str):
@ResourcePolicyService.has_resource_permission(LIST_ENVIRONMENT_DATASETS)
def list_shared_tables_by_env_dataset(uri: str, dataset_uri: str):
context = get_context()
with context.db_engine.scoped_session() as session:
log.info(
S3ShareObjectRepository.query_dataset_tables_shared_with_env(
session, env_uri, dataset_uri, context.username, context.groups
session, uri, dataset_uri, context.username, context.groups
)
)
return [
Expand All @@ -188,7 +190,7 @@ def list_shared_tables_by_env_dataset(dataset_uri: str, env_uri: str):
+ (f'_{res.resourceLinkSuffix}' if res.resourceLinkSuffix else ''),
}
for res in S3ShareObjectRepository.query_dataset_tables_shared_with_env(
session, env_uri, dataset_uri, context.username, context.groups
session, uri, dataset_uri, context.username, context.groups
)
]

Expand Down Expand Up @@ -259,11 +261,17 @@ def get_s3_consumption_data(uri):
}

@staticmethod
def list_shared_databases_tables_with_env_group(environmentUri: str, groupUri: str):
@ResourcePolicyService.has_resource_permission(LIST_ENVIRONMENT_DATASETS)
def list_shared_databases_tables_with_env_group(uri: str, group_uri: str):
context = get_context()
if group_uri not in context.groups:
raise exceptions.UnauthorizedOperation(
action='LIST_ENVIRONMENT_GROUP_DATASETS',
message=f'User: {context.username} is not a member of the owner team',
)
with context.db_engine.scoped_session() as session:
return S3ShareObjectRepository.query_shared_glue_databases(
session=session, groups=context.groups, env_uri=environmentUri, group_uri=groupUri
session=session, groups=context.groups, env_uri=uri, group_uri=group_uri
)

@staticmethod
Expand Down Expand Up @@ -303,7 +311,7 @@ def list_table_data_filters_by_attached(uri: str, data: dict):
)

@staticmethod
def resolve_shared_db_name(GlueDatabaseName: str, shareUri: str, targetEnvAwsAccountId: str, targetEnvRegion: str):
def resolve_shared_db_name(GlueDatabaseName: str, shareUri: str):
with get_context().db_engine.scoped_session() as session:
share = ShareObjectRepository.get_share_by_uri(session, shareUri)
dataset = DatasetBaseRepository.get_dataset_by_uri(session, share.datasetUri)
Expand Down

0 comments on commit 90dd1e3

Please sign in to comment.