From 0b3c95eb920fee3d2a044af8cee80d4f66ba5c4b Mon Sep 17 00:00:00 2001 From: Noah Paige <69586985+noah-paige@users.noreply.github.com> Date: Wed, 24 Jul 2024 02:32:47 -0400 Subject: [PATCH] Gh pages v2 6 0 (#1428) ### Feature or Bugfix - Documentation ### Detail - Merge latest docs for `v2.6` ### Relates - #1425 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --------- Co-authored-by: Tejas Rajopadhye <71188245+TejasRGitHub@users.noreply.github.com> Co-authored-by: Sofia Sazonova Co-authored-by: Sofia Sazonova --- .gitignore | 8 +++ .idea/aws.xml | 6 ++ pages/deploy/deploy_aws.md | 120 ++++++++++++++++----------------- pages/deploy/deploy_locally.md | 2 +- 4 files changed, 73 insertions(+), 63 deletions(-) diff --git a/.gitignore b/.gitignore index f40fbd8ba..80d84042e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,11 @@ _site .jekyll-cache .jekyll-metadata vendor +.DS_Store +*cdk.out* +/node_modules/ +*venv* +*__pycache__* +*__pytest_cache__* +/frontend/ +/backend/ \ No newline at end of file diff --git a/.idea/aws.xml b/.idea/aws.xml index d63e1e851..3091856c5 100644 --- a/.idea/aws.xml +++ b/.idea/aws.xml @@ -15,4 +15,10 @@ + + + + \ No newline at end of file diff --git a/pages/deploy/deploy_aws.md b/pages/deploy/deploy_aws.md index b212df833..a36d4e1ff 100644 --- a/pages/deploy/deploy_aws.md +++ b/pages/deploy/deploy_aws.md @@ -65,7 +65,7 @@ which means that AWS services used by this construct need to be available in the Clone the GitHub repository from: ```bash -git clone https://github.com/data-dot-all/dataall.git --branch v2.5.0 +git clone https://github.com/data-dot-all/dataall.git --branch v2.6.0 cd dataall ``` ## 2. Setup Python virtualenv @@ -191,7 +191,6 @@ of our repository. Open it, you should be seen something like: "prod_sizing": "boolean_SET_INFRA_SIZING_TO_PROD_VALUES_IF_TRUE|DEFAULT=true", "enable_cw_rum": "boolean_SET_CLOUDWATCH_RUM_APP_MONITOR|DEFAULT=false", "enable_cw_canaries": "boolean_SET_CLOUDWATCH_CANARIES_FOR_FRONTEND_TESTING|DEFAULT=false", - "enable_quicksight_monitoring": "boolean_ENABLE_CONNECTION_QUICKSIGHT_RDS|DEFAULT=false", "shared_dashboards_sessions": "string_TYPE_SESSION_SHARED_DASHBOARDS|(reader, anonymous) DEFAULT=anonymous", "enable_pivot_role_auto_create": "boolean_ENABLE_PIVOT_ROLE_AUTO_CREATE_IN_ENVIRONMENT|DEFAULT=false", "enable_update_dataall_stacks_in_cicd_pipeline": "boolean_ENABLE_UPDATE_DATAALL_STACKS_IN_CICD_PIPELINE|DEFAULT=false", @@ -252,7 +251,6 @@ and find 2 examples of cdk.json files. | prod_sizing | Optional | If set to **true**, infrastructure sizing is adapted to prod environments. Check additional resources section for more details. (default: true) | | enable_cw_rum | Optional | If set to **true** CloudWatch RUM monitor is created to monitor the user interface (default: false) | | enable_cw_canaries | Optional | If set to **true**, CloudWatch Synthetics Canaries are created to monitor the GUI workflow of principle features (default: false) | -| enable_quicksight_monitoring | Optional | If set to **true**, RDS security groups and VPC NACL rules are modified to allow connection of the RDS metadata database with Quicksight in the infrastructure account (default: false) | | shared_dashboard_sessions | Optional | Either 'anonymous' or 'reader'. It indicates the type of Quicksight session used for Shared Dashboards (default: 'anonymous') | | enable_pivot_role_auto_create | Optional | If set to **true**, data.all creates the pivot IAM role as part of the environment stack. If false, a CloudFormation template is provided in the UI and AWS account admins need to deploy this stack as pre-requisite to link a data.all environment (default: false) | | enable_update_dataall_stacks_in_cicd_pipeline | Optional | If set to **true**, CI/CD pipeline update stacks stage is enabled for the deployment environment. This stage triggers the update of all environment and dataset stacks (default: false) | @@ -454,45 +452,58 @@ the different configuration options. "datapipelines": { "active": true }, - "s3_datasets": { + "omics": { + "active": false + }, + "datasets_base": { "active": true, "features": { - "file_uploads": true, - "file_actions": true, - "aws_actions": true, - "share_notifications": { - "email": { - "active": false, - "parameters": { - "group_notifications": true - } - } - }, - "preview_data": true, - "glue_crawler": true, - "confidentiality_dropdown" : true, - "topics_dropdown" : true, - "auto_approval_for_confidentiality_level" : { - "Unclassified" : true, - "Official" : true, - "Secret" : true + "share_notifications": { + "email": { + "active": false, + "persistent_reminders": false, + "parameters": { + "group_notifications": true + } } + }, + "confidentiality_dropdown" : true, + "topics_dropdown" : true, + "auto_approval_for_confidentiality_level" : { + "Unclassified" : true, + "Official" : true, + "Secret" : true + } } - }, - "dataset_sharing": { + }, + "s3_datasets": { + "active": true, + "features": { + "file_uploads": true, + "file_actions": true, + "aws_actions": true, + "preview_data": true, + "glue_crawler": true + } + }, + "s3_datasets_shares": { "active": true - }, + }, "worksheets": { "active": true }, "dashboards": { "active": true + }, + "maintenance": { + "active": true } }, "core": { "features": { "env_aws_actions": true, - "cdk_pivot_role_multiple_environments_same_account": false + "cdk_pivot_role_multiple_environments_same_account": false, + "enable_quicksight_monitoring": false } } } @@ -510,20 +521,22 @@ The following table contains a list of the available modules and their dependenc functionality. If you want to know more about each module, check the [UserGuide](https://github.com/data-dot-all/dataall/blob/main/UserGuide.pdf) available as PDF in the repository. -| **Module** | **depends on** | **Description** | -|-----------------|-----------------------------------------------------|---------------------------------------------------------------------------------------| -| catalog | None | Central catalog of data items. In this module a glossary of terms is defined. | -| feed | None | S3 Bucket and Glue database construct to store data in data.all | -| vote | catalog | S3 Bucket and Glue database construct to store data in data.all | -| s3_datasets | datasets_base, dataset_sharing, catalog, vote, feed | S3 Bucket and Glue database construct to store data in data.all | -| dataset_sharing | datasets_base, notifications | Sub-module that allows sharing of Datasets through Lake Formation and S3 | -| datasets_base | None | Shared code related to Datasets (not exposed on `config.json`). | -| worksheets | datasets | Athena query editor integrated in data.all UI | -| datapipelines | feed | CICD pipelines that deploy [AWS DDK](https://awslabs.github.io/aws-ddk/) applications | -| mlstudio | None | SageMaker Studio users that can open a session directly from data.all UI | -| notebooks | None | SageMaker Notebooks created and accessible from data.all UI | -| dashboards | catalog, vote, feed | Start a Quicksight session or import and share a Quicksight Dashboard. | -| notifications | None | Construct to notify users on dataset sharing updates in data.all | +| **Module** | **depends on** | **Description** | +|-----------------|-----------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------| +| catalog | None | Central catalog of data items. In this module a glossary of terms is defined. | +| feed | None | S3 Bucket and Glue database construct to store data in data.all | +| vote | catalog | S3 Bucket and Glue database construct to store data in data.all | +| s3_datasets | datasets_base, s3_datasets_shares, catalog, vote, feed | S3 Bucket and Glue database construct to store data in data.all | +| s3_datasets_shares | datasets_base, notifications | Sub-module that allows sharing of Datasets through Lake Formation and S3 | +| datasets_base | None | Shared code related to Datasets (not exposed on `config.json`). | +| worksheets | datasets | Athena query editor integrated in data.all UI | +| datapipelines | feed | CICD pipelines that deploy [AWS DDK](https://awslabs.github.io/aws-ddk/) applications | +| omics | None | adds the capability to view and instantiate HealthOmics Ready2Run workflows as runs that can output and save omic data as data.all Datasets.| +| mlstudio | None | SageMaker Studio users that can open a session directly from data.all UI | +| notebooks | None | SageMaker Notebooks created and accessible from data.all UI | +| dashboards | catalog, vote, feed | Start a Quicksight session or import and share a Quicksight Dashboard. | +| notifications | None | Construct to notify users on dataset sharing updates in data.all | +| maintenance | None | Admin control to start/stop data.all maintenance mode to restrict user actions in data.all and allow a stable window for deploying new updates | ### Disable module features @@ -535,26 +548,7 @@ In the example config.json, the feature that enables file upload from data.all U "s3_datasets": { "active": true, "features": { - "file_uploads": true, - "file_actions": true, - "aws_actions": true, - "share_notifications": { - "email": { - "active": false, - "parameters": { - "group_notifications": true - } - } - }, - "preview_data": true, - "glue_crawler": true, - "confidentiality_dropdown" : true, - "topics_dropdown" : true, - "auto_approval_for_confidentiality_level" : { - "Unclassified" : true, - "Official" : true, - "Secret" : true - } + "file_uploads": false, } }, ``` @@ -603,7 +597,8 @@ a particular feature in the core is to add it to the core section of the `config "core": { "features": { "env_aws_actions": true, - "cdk_pivot_role_multiple_environments_same_account": false + "cdk_pivot_role_multiple_environments_same_account": false, + "enable_quicksight_monitoring": false } } ``` @@ -614,6 +609,7 @@ disable or modify the bahavior any other core feature. |-----------------------|----------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | env_aws_actions | environments | If set to True, users can get AWS Credentials and assume Environment Group IAM roles from data.all's UI | | cdk_pivot_role_multiple_environments_same_account | environments | If set to True, the CDK-created pivot role as part of the environment stack will be region specific (`dataallPivotRole-cdk-`). This feature allows users to create multiple data.all environments in the same account but multiple regions. | +| enable_quicksight_monitoring | environments | If set to **true**, RDS security groups and VPC NACL rules are modified to allow connection of the RDS metadata database with Quicksight in the infrastructure account (default: false) | ## 8. Run CDK synth and check cdk.context.json diff --git a/pages/deploy/deploy_locally.md b/pages/deploy/deploy_locally.md index 786e7b359..e13cb4e3c 100644 --- a/pages/deploy/deploy_locally.md +++ b/pages/deploy/deploy_locally.md @@ -25,7 +25,7 @@ data.all is fully dockerized with docker-compose, and can be fully run from your The first step is to clone the repo. ```bash -git clone https://github.com/data-dot-all/dataall.git --branch v2.5.0 +git clone https://github.com/data-dot-all/dataall.git --branch v2.6.0 ``` With docker compose we orchestrate the build of 5 containers: frontend, db, graphql, cdkproxy, opensearch.