diff --git a/.fossa.yml b/.fossa.yml new file mode 100644 index 0000000..89103eb --- /dev/null +++ b/.fossa.yml @@ -0,0 +1,7 @@ +--- +version: 3 +project: + id: gooddata-productivity-tools + +telemetry: + scope: "off" diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml new file mode 100644 index 0000000..21288d0 --- /dev/null +++ b/.github/workflows/python.yaml @@ -0,0 +1,54 @@ +name: Python tests and linting + +on: + push: + branches: ["master"] + pull_request: + branches: ["master"] + +jobs: + validate-python: + runs-on: ubuntu-latest + + steps: + - name: checkout + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox + + - name: Run lint + run: tox -e lint + + - name: Run type + run: tox -e type + + test-python: + runs-on: ubuntu-latest + + steps: + - name: checkout + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox + + - name: Set up fake AWS credentials + run: sh tests/fake_aws_creds.sh + + - name: Run tests + run: tox -e 3.11 diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..5aaa58e --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,26 @@ +--- +name: Release + +on: + push: + branches: + - master + +jobs: + release: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + - name: Bump version and push tag + id: tag_version + uses: mathieudutour/github-tag-action@v6.1 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + - name: Create a GitHub release + uses: ncipollo/release-action@v1 + with: + tag: ${{ steps.tag_version.outputs.new_tag }} + name: Release ${{ steps.tag_version.outputs.new_tag }} + body: ${{ steps.tag_version.outputs.changelog }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b418743 --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +.idea +.vscode +.venv* +venv/ +.secrets +__pycache__ +*.egg-info +.DS_Store +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..cd3ef20 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,32 @@ +# Contributing +We want to make contributing to this project as easy and transparent as possible, whether it's: + +- Reporting a bug +- Discussing the current state of the code +- Submitting a fix +- Proposing new features + +## We Develop with GitHub +We use GitHub to host code, to track issues and feature requests, as well as accept pull requests. + +## We Use [GitHub Flow](https://guides.github.com/introduction/flow/index.html), So All Code Changes Happen Through Pull Requests +Pull requests are the best way to propose changes to the codebase (we use [GitHub Flow](https://guides.github.com/introduction/flow/index.html)): + +1. Fork the repo and create your branch from `master`. +2. If you've added code that should be tested, add tests. +3. If you've changed behaviour, update the documentation. +4. Ensure the test suite passes. +5. Make sure your code lints. +6. Issue a pull request. + +## Any contributions you make will be under the MIT Software License +In short, when you submit code changes, your submissions are understood to be under the same [MIT License](http://choosealicense.com/licenses/mit/) that covers the project. Feel free to contact the maintainers if that's a concern. + +## Report using GitHub's [issues](https://github.com/briandk/transcriptase-atom/issues) +We use GitHub issues. If you want to report a bug or propose new feature, [open a new issue](). + +## Use a Consistent Coding Style +Before creating pull request run `black` and `ruff` on the codebase to ensure consistent style and run `mypy` to check for typing errors. + +## License +By contributing, you agree that your contributions will be licensed under its MIT License. diff --git a/LICENCE b/LICENCE new file mode 100644 index 0000000..332301a --- /dev/null +++ b/LICENCE @@ -0,0 +1,11 @@ +BSD License + +Copyright (c) 2023-2024, GoodData Corporation. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted, provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c65eae --- /dev/null +++ b/README.md @@ -0,0 +1,176 @@ +# gooddata-productivity-tools +This repository contains tools that help with GoodData Cloud/CN workspace management, user management, and backup/restore of workspaces. + +This section of the documentation contains information on how to set up the environment and relevant authentication files. At the end of the Tools section, there is more specific documentation for each tool. The steps mentioned here are shared between them. + +## Requirements +Python 3.10+ + +Depending on your environment, the statements can start either as +```sh +pip +pip3 +``` +```sh +python +python3 +``` +please use the one that works for you and refers to python 3.10+. + +The version can be checked by running +```sh +python -V +``` + +## Install +In order to install tooling requirements to the target environment, run the following: + +```sh +pip install -r requirements.txt +``` + +## Authentication + +Overall, the scripts developed within the repository follow the credential/authentication provisioning conventions of the GoodData and any used storage provider (e.g. AWS). + +The following section describes what credentials need to be set up, where to find them, and what format they should follow. If you need help with how to edit files in your user home folder (~), you can also refer to [step by step authentication setup guide](docs/SETUPATUHENTICATION.md). + + +### GoodData +When authenticating against GoodData, you can either export the required credentials using environment variables, or provide a GoodData profiles file. + +For example, you can export the environment variables like so: + +```sh +export GDC_AUTH_TOKEN="some_auth_token" +export GDC_HOSTNAME="https://host.name.cloud.gooddata.com/" +``` + +or you can choose to provide a GoodData `profiles.yaml` file of the following format: + +```yaml +default: + host: https://host.name.cloud.gooddata.com/ + token: some_auth_token + +customer: + host: https://customer.hostname.cloud.gooddata.com/ + token: other_auth_token +``` + +By default, a tool attempts to locate a GoodData profile file at `~/.gooddata/profiles.yaml`, but you can also choose to provide a custom path like so: + +```sh +python scripts/restore.py -p path/to/profiles.yaml +``` + +You can define multiple GoodData profiles in a single profiles file. By default, the `default` profile is used, but you can choose different one to use. For example, if you want to tell a tool to use the `customer` profile defined in the example `profiles.yaml` above, you can do so like this: + +```sh +python scripts/restore.py -p path/to/profiles.yaml --profile customer +``` + +In case of providing both ways of authentication to a tool, the environment variables takes precedence and the profiles config is ignored. + +### AWS + +When authenticating against AWS, the [conventions made by the boto3 library](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) are followed. + +From the tool user perspective that means following the points 3. to 8. from the [Configuring Credentials section](https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html#configuring-credentials). + +One example of how you can supply AWS credentials for tools, is by defining one or more AWS profiles in `~/.aws/credentials` file. + +``` +[default] +aws_access_key_id = some_key_id +aws_secret_access_key = some_access_key + +[services] +aws_access_key_id = other_key_id +aws_secret_access_key = other_access_key +``` + +If you want to specify the specific AWS credentials profile to be used, see the tool-specific documentation. + +## Tools + +- [Backup workspace](docs/BACKUP.md) +- [Restore workspace](docs/RESTORE.md) +- [Workspace permission management](docs/PERMISSION_MGMT.md) +- [User management](docs/USER_MGMT.md) + + +## Known MacOS issue SSL: CERTIFICATE_VERIFY_FAILED + +If you are getting the following message: + +`Caused by SSLError(SSLCertVerificationError(1, ‘[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1129)'))` + +it is likely caused by Python and it occurs if you have installed Python directly from python.org. + +To mitigate, please install your SSL certificates in HD -> Applications -> Python -> Install Certificates.command. + +--- + +## Development + +This section is aimed towards developers wanting to adjust / test the code. If you are regular user you can ignore following parts. + +### Setup +To set up local development environment do the following: + +1. (optional) Set up a local python virtual environment: + +```sh + python -m venv venv + source venv/bin/activate +``` + +2. Install tool, dev, and test requirements: + +```sh +pip install -r requirements.txt -r requirements-test.txt -r requirements-dev.txt +``` + + +### Style checking, linting, and typing +The codebase (both, scripts and tests) is style, lint, and type checked when the CI/CD pipeline runs. + +Linting and style-checking is done with help of `black` and `ruff`. + +Type checking is done using `mypy`. + +To run either of the mentioned tools locally, just call the tool with a target directory. + +```sh + +``` + + For example, in order to check the typing in the scripts, call the following from the repository's root directory: + +```sh +mypy scripts +``` + + +### Testing +The tooling test suite makes use of some third party tools, such as `pytest`, `tox`, and `moto`. + +To run the test suite locally, ensure you have test and script requirements installed (see Setup step above) change working directory to repository's root and then call: + +```sh +pytest . +``` + + +### Tox +To run the test suite, linters and type checks locally you can also use `tox`. + +To check everything at once, ensure youre in the repository's root directory and simply call: + +```sh +tox +``` + +## Contributing +If you want to contribute to the project, please read the [contributing guide](CONTRIBUTING.md). diff --git a/configuration_local.yaml b/configuration_local.yaml new file mode 100644 index 0000000..2b68171 --- /dev/null +++ b/configuration_local.yaml @@ -0,0 +1,2 @@ +storage_type: local +storage: diff --git a/configuration_s3.yaml b/configuration_s3.yaml new file mode 100644 index 0000000..a18befd --- /dev/null +++ b/configuration_s3.yaml @@ -0,0 +1,5 @@ +storage_type: s3 +storage: + bucket: gdc-ms-dev-eu + backup_path: subfolder1/subfolder2/backups + profile: services diff --git a/docs/BACKUP.md b/docs/BACKUP.md new file mode 100644 index 0000000..a78ae87 --- /dev/null +++ b/docs/BACKUP.md @@ -0,0 +1,87 @@ +# GD Export workspace definition + +Tool which exports / creates a backup of one or more workspaces - their logical data model (LDM), analytical model (AM) and user data filters (UDF). Backups are stored either locally or can be uploaded to S3 bucket. + +## Usage +The tool requires the following arguments on input: +- `ws_csv` - a path to a csv file defining target workspace IDs to restore to, and a backup source paths +- `conf` - a path to a configuration file containing information required for accessing the backup source storage + +Use the tool like so: + +```sh +python scripts/backup.py ws_csv conf +``` + +Where ws_csv refers to input csv and conf to configuration file in yaml format. + +For example, if you have csv file named "example_input.csv" in the folder from which you are executing the python command and configuration file named "example_conf.yaml" in subfolder relative to the folder you are executing the script from named "subfolder", the execution could look like this: + +```sh +python scripts/backup.py example_input.csv subfolder/example_conf.yaml +``` + + +To show the help for using arguments, call: +```sh +python scripts/backup.py -h +``` + +There are two more optional arguments for setting up GoodData profiles. +By default, a tool attempts to locate a GoodData profile file at ~/.gooddata/profiles.yaml, but you can also choose to provide a custom path like so: +- `-p` - path/to/profiles.yaml +- `--profile` - name of GoodData profile to be used + +```sh +python scripts/backup.py input.csv conf.yaml -p path/to/profiles.yaml --profile customer +``` + +## Configuration file (conf) +The configuration files let you define which type of storage the export tool will save the backups to, and any additional storage-specific information that might be required. Currently AWS S3 and Local storage are supported. + +The configuration file has the following format: +```yaml +storage_type: some_storage +storage: + arg1: foo + arg2: bar +``` + +### AWS S3 + +You can define the configuration file for S3 storage like so: + +```yaml +storage_type: s3 +storage: + bucket: some_bucket + backup_path: some/path/to/backups/ + profile: services +``` +Here, the meaning of different `storage` fields is as follows: +- bucket - S3 storage bucket containing the backups +- backup_path - absolute path within the S3 bucket which leads to the root directory where the backups should be saved +- profile (optional) - AWS profile to be used + +## Local Storage + +```yaml +storage_type: local +storage: +``` + +In this case exports are saved to ./local_backups/ folder relative to where the script is executed from. The amount of backups already present in this folder might affect the performace of the script. + +## Input CSV file (ws_csv) +The input CSV file defines the targets and sources for backup restores (imports). + +Following format of the csv is expected: + +| workspace_id | +|--------------| +| ws_id_1 | +| ws_id_2 | +| ws_id_3 | + +Here, each `workspace_id` is the workspace ID of the workspace to perform the export on. +If the defined workspace does not exit in the target organization, this information will be present as ERROR log. If something fails, please read over all ERROR log messages for information where the issue lies. diff --git a/docs/PERMISSION_MGMT.md b/docs/PERMISSION_MGMT.md new file mode 100644 index 0000000..6fba02a --- /dev/null +++ b/docs/PERMISSION_MGMT.md @@ -0,0 +1,55 @@ +# GD Workspace Permission Management +Tool which helps manage user/userGroup bound workspace permissions within GoodData organization. + +Goal of the tool is to help manage state of the user-workspace or userGroup-workspace permission pairs in a granular fashion (one input row per each permission - e.g. `user_1 - ws_id_1 - "ANALYZE"`). + + +## Usage + +The tool requires the following argument on input: +- `perm_csv` - a path to a csv file defining workspace permissions bound to specific ws_id-user or ws_id-userGroup pairs and the permissions isActive state + +Some other, _optional_, arguments are: +- `-d | --delimiter` - column delimiter for the csv files. Use this to define how the csv is parsed. Default value is "`,`" + +Use the tool like so: +```sh +python scripts/permission_mgmt.py perm_csv +``` +Where `perm_csv` refers to input csv. + +If you would like to define custom delimiter, use the tool like so: +```sh +python scripts/permission_mgmt.py perm_csv -d "," +``` + +To show the help for using arguments, call: +```sh +python scripts/permission_mgmt.py -h +``` + +## Input CSV file (perm_csv) +The input CSV file defines the workspace permissions which you might want to manage. + +[Example input csv.](examples/permission_mgmt/input.csv) + +Following format of the csv is expected: + +| user_id | ug_id | ws_id | ws_permissions | is_active | +|---------|-------|---------|----------------|-----------| +| user_1 | | ws_id_1 | ANALYZE | True | +| user_1 | | ws_id_1 | VIEW | False | +| user_1 | | ws_id_2 | MANAGE | True | +| user_2 | | ws_id_1 | ANALYZE | True | +| user_2 | | ws_id_2 | MANAGE | True | +| | ug_1 | ws_id_1 | ANALYZE | True | +| | ug_1 | ws_id_1 | VIEW | True | +| | ug_1 | ws_id_1 | MANAGE | False | +| | ug_2 | ws_id_1 | ANALYZE | True | +| | ug_2 | ws_id_2 | MANAGE | True | + +Here, each `user_id` is the ID of the user to manage, and `ug_id` is the ID of the user group to manage. Note that these fields are mutually exclusive and you should provide only one of the two values per each row. + +The `ws_id` is the workspace ID that the permission is bound to. + +Lastly, the `is_active` field contains information about whether the permission should or should not exist in the organization. The `is_active` field is case insensitive and considers `true` as the only value taken as positive. Any other value in this field is considered negative (e.g.: `blabla` would evaluate to `False`). diff --git a/docs/RESTORE.md b/docs/RESTORE.md new file mode 100644 index 0000000..657680a --- /dev/null +++ b/docs/RESTORE.md @@ -0,0 +1,76 @@ +# GD Workspace backup restore +Tool which restores one, or more, workspace analytical model (AM), logical data models (LDM) and user data filters (UDF) from source backup archives in an incremental manner. + +The backups contain declarative definitions of AM, LDM and UDFs which are unarchived, loaded into memory and finally put into the target GD workspace. + +The restores are workspace-agnostic, which means that if you need to, you can import a backed-up of one workspace into a different workspace. + +## Usage +The tool requires the following arguments on input: +- `ws_csv` - a path to a csv file defining target workspace IDs to restore to, and a backup source paths +- `conf` - a path to a configuration file containing information required for accessing the backup source storage + +Use the tool like so: + +```sh +python scripts/restore.py ws_csv conf +``` + +Where ws_csv refers to input csv and conf to configuration file in yaml format. + +For example, if you have csv file named "example_input.csv" in the folder from which you are executing the python command and configuration file named "example_conf.yaml" in subfolder relative to the folder you are executing the script from named "subfolder", the execution could look like this: + +```sh +python scripts/restore.py example_input.csv subfolder/example_conf.yaml +``` + + +To show the help for using arguments, call: +```sh +python scripts/restore.py -h +``` + +## Configuration file (conf) +The configuration files lets you define which type of storage the restore tool will source the backups from, and any additional storage-specific information that might be required. Currently only AWS S3 is supported. + +The configuration file has the following format: +```yaml +storage_type: some_storage +storage: + arg1: foo + arg2: bar +``` + +### AWS S3 + +You can define the configuration file for S3 storage like so: + +```yaml +storage_type: s3 +storage: + bucket: some_bucket + backup_path: some/path/to/backups/gd_org_id/ + profile: services +``` +Here, the meaning of different `storage` fields is as follows: +- bucket - S3 storage bucket containing the backups +- backup_path - absolute path within the S3 bucket which leads to the root directory of the backups (the input csv file defines sources from here) +- profile (optional) - AWS profile to be used + + +## Input CSV file (ws_csv) +The input CSV file defines the the targets and sources for backup restores (imports). + +Following format of the csv is expected: + +| workspace_id | path | +|--------------|------------------| +| ws_id_1 | path/to/backup_1 | +| ws_id_2 | path/to/backup_2 | +| ws_id_3 | path/to/backup_1 | + +Here, each `workspace_id` is the workspace ID of the workspace to perform the restore to. The `path` is the path (e.g. in S3) to a directory which contains the target backup archive (`gooddata_layouts.zip`). + +The `path` is then prefixed with a additional information (e.g. S3 bucket and backup_path to backups root dir). + +If something fails, please read over all ERROR log messages for information where the issue lies. \ No newline at end of file diff --git a/docs/SETUPATUHENTICATION.md b/docs/SETUPATUHENTICATION.md new file mode 100644 index 0000000..f2a869c --- /dev/null +++ b/docs/SETUPATUHENTICATION.md @@ -0,0 +1,116 @@ +# Authentication + +This section contains step by step guide how to set up authorization files either by using GUI or Terminal. + +## MacOS + +### GUI + +In Finder, go to your current user home directory by pressing +```sh +CMD+SHIFT+H +``` + +And subsequently reveal hidden folders by +```sh +CMD+SHIFT+. +``` +Now you can create required folders and files manually. To hide folders afterwards, press the same combination. + +### Terminal + +1. Open Terminal +2. You should be in your current user home directory. You can check it by executing + +```sh +pwd +``` + +Expected result: /Users/{your_username} + +If thats not the case, run + +```sh +cd ~ +``` + +Create directories `.aws` and `.gooddata` by executing following statements: + +```sh +mkdir .aws +mkdir .gooddata +``` + +First create aws `credentials` file in the `.aws` directory: + +```sh +nano .aws/credentials +``` + +Populate the credentials file with appropriate credentials. + +Format: +``` +[default] +aws_access_key_id = some_access_key_id +aws_secret_access_key = some_access_key + +[customer1] +aws_access_key_id = other_access_key_id +aws_secret_access_key = other_access_key +``` +Save by pressing ctrl+X, Y and Enter. + + +Now create create `profiles.yaml` file within the `.gooddata` folder: + +```sh +nano .gooddata/profiles.yaml +``` + +Format: +```yaml +default: + host: https://host.name.cloud.gooddata.com/ + token: some_auth_token + +customer: + host: https://customer.hostname.cloud.gooddata.com/ + token: other_auth_token +``` +Save by pressing ctrl+X, Y and Enter. + + +## Windows + +### GUI + +Navigate to your user folder. That’s `C:\Users\USERNAME\` (replace `USERNAME` with your actual username). Inside create a new folder named `.aws`, and inside the `.aws` folder create a file named `credentials`. The full path should look like this: `C:\Users\USERNAME\.aws\credentials`. + +If you cannot see the `.aws` file after creating it, ensure you can [see hidden files](https://support.microsoft.com/en-us/windows/view-hidden-files-and-folders-in-windows-97fbc472-c603-9d90-91d0-1166d1d9f4b5#WindowsVersion=Windows_11) on your computer. + +Inside the `credentials` file set up the necessary AWS credentials in a following format: + +``` +[default] +aws_access_key_id = some_access_key_id +aws_secret_access_key = some_access_key + +[customer1] +aws_access_key_id = other_access_key_id +aws_secret_access_key = other_access_key +``` + +Now, on the same path (`C:\Users\USERNAME\`) create a new folder named `.gooddata`. Inside this folder create a file named `profiles.yaml`. The full path should look like this: `C:\Users\USERNAME\.gooddata\profiles.yaml`. + +Inside the `profiles.yaml` file set up the necessary GoodData credentials in the following format: + +```yaml +default: + host: https://host.name.cloud.gooddata.com/ + token: some_auth_token + +customer: + host: https://customer.hostname.cloud.gooddata.com/ + token: other_auth_token +``` diff --git a/docs/USER_GROUP_MGMT.md b/docs/USER_GROUP_MGMT.md new file mode 100644 index 0000000..8a19865 --- /dev/null +++ b/docs/USER_GROUP_MGMT.md @@ -0,0 +1,52 @@ +# GD User Group Management +This tool facilitates the management of user groups within a GoodData organization. It supports the creation, updating, and deletion of user groups, including the assignment of parent user groups as defined in the input details. + +## Usage + +The tool requires the following argument: +- `user_group_csv` - a path to a CSV file that defines the user groups, their names, parent user groups, and active status. + +Optional arguments include: +- `-d | --delimiter` - column delimiter for the CSV files. This defines how the CSV is parsed. The default value is "`,`". +- `-u | --ug_delimiter` - delimiter used to separate different parent user groups within the parent user group column. This must differ from the "delimiter" argument. Default value is "`|`". +- `-q | --quotechar` - quotation character used to escape special characters (such as the delimiter) within the column values. The default value is '`"`'. If you need to escape the quotechar itself, you have to embed it in quotechars and then double the quotation character (e.g.: `"some""string"` will yield `some"string`). + +Use the tool like so: +```sh +python scripts/user_group_mgmt.py user_group_csv +``` +Where `user_group_csv` refers to the input CSV file. + +For custom delimiters, use the command: +```sh +python scripts/user_group_mgmt.py user_group_csv -d "," -u "|" +``` + +To display help for using arguments, run: +```sh +python scripts/user_group_mgmt.py -h +``` + +## Input CSV File (`user_group_csv`) +The input CSV file defines the user groups to be managed. User groups not defined in the input file will not be modified. + +[Example input CSV.](examples/user_group_mgmt/input.csv) + +Expected CSV format: + +| user_group_id | user_group_name | parent_user_groups | is_active | +|----------------|------------------|--------------------|-----------| +| ug_1 | Admins | | True | +| ug_2 | Developers | ug_1 | True | +| ug_3 | Testers | ug_1, ug_2 | True | +| ug_4 | TemporaryAccess | ug_2 | False | + +Here, each `user_group_id` is the unique identifier for the user group. + +The `user_group_name` field is an optional name for the user group, defaulting to the ID if not provided. + +The `parent_user_groups` field specifies the parent user groups, defining hierarchical relationships. + +The `is_active` field contains information about whether the user group should exist or be deleted from the organization. The `is_active` field is case-insensitive, recognizing `true` as the only affirmative value. Any other value is considered negative (e.g., `no` would evaluate to `False`). + +This documentation provides a comprehensive guide to using the GD User Group Management tool effectively within your GoodData organization. \ No newline at end of file diff --git a/docs/USER_MGMT.md b/docs/USER_MGMT.md new file mode 100644 index 0000000..ee848f8 --- /dev/null +++ b/docs/USER_MGMT.md @@ -0,0 +1,55 @@ +# GD User Management +Tool which helps manage user entities in an GoodData organization. + +Users can be created, updated, and deleted. This includes creation of any new userGroups which would be provided in user details. + +## Usage + +The tool requires the following argument on input: +- `user_csv` - a path to a csv file defining user entities, their relevant attributes, userGroup memberships, and isActive state + +Some other, _optional_, arguments are: +- `-d | --delimiter` - column delimiter for the csv files. Use this to define how the csv is parsed. Default value is "`,`" +- `-u | --ug_delimiter` - userGroups column value delimiter. Use this to separate the different userGroups defined in the userGroup column. Default value is "`|`". Note that `--delimiter` and `--ug_delimiter` have to differ. +- `-q | --quotechar` - quotation character used to escape special characters (such as the delimiter) within the column field value. Default value is '`"`' If you need to escape the quotechar itself, you have to embed it in quotechars and then double the quotation character (e.g.: `"some""string"` will yield `some"string`). + +Use the tool like so: +```sh +python scripts/user_mgmt.py user_csv +``` +Where `user_csv` refers to input csv. + +If you would like to define custom delimiters, use the tool like so: +```sh +python scripts/user_mgmt.py user_csv -d "," -u "|" +``` + +To show the help for using arguments, call: +```sh +python scripts/user_mgmt.py -h +``` + +## Input CSV file (user_csv) +The input CSV file defines the user entities which you might want to manage. Note that GD organization users that are not defined in the input will not be modified in any way. + +[Example input csv.](examples/user_mgmt/input.csv) + +Following format of the csv is expected: + +| user_id | firstname | lastname | email | auth_id | user_groups | is_active | +|----------------------|-----------|----------|-------------------------|-----------|-------------|-----------| +| jozef.mrkva | jozef | mrkva | jozef.mrkva@test.com | auth_id_1 | | True | +| bartolomej.brokolica | | | | | | False | +| peter.pertzlen | peter | pertzlen | peter.pertzlen@test.com | auth_id_3 | ug_1, ug_2 | True | +| zoltan.zeler | zoltan | zeler | zoltan.zeler@test.com | auth_id_4 | ug_1 | True | +| kristian.kalerab | kristian | kalerab | | auth_id_5 | | True | +| richard.cvikla | | | richard.cvikla@test.com | auth_id_6 | ug_1, ug_2 | False | +| adam.avokado | | | | auth_id_7 | | False | + +Here, each `user_id` is the ID of the user to manage. + +The `firstname`, `lastname`, `email`, and `auth_id` fields are optional attributes of the user. + +The `user_groups` field specifies user group memberships of the user. + +Lastly, the `is_active` field contains information about whether the user should or should not exist in the organization. The `is_active` field is case-insensitive and considers `true` as the only value taken as positive. Any other value in this field is considered negative (e.g.: `blabla` would evaluate to `False`). diff --git a/docs/examples/permission_mgmt/input.csv b/docs/examples/permission_mgmt/input.csv new file mode 100644 index 0000000..12357f8 --- /dev/null +++ b/docs/examples/permission_mgmt/input.csv @@ -0,0 +1,11 @@ +user_id,ug_id,ws_id,ws_permission,is_active +user_1,,ws_id_1,ANALYZE,True +user_1,,ws_id_1,VIEW,False +user_1,,ws_id_2,MANAGE,True +user_2,,ws_id_1,ANALYZE,True +user_2,,ws_id_2,MANAGE,True +,ug_1,ws_id_1,ANALYZE,True +,ug_1,ws_id_1,VIEW,True +,ug_1,ws_id_1,MANAGE,False +,ug_2,ws_id_1,ANALYZE,True +,ug_2,ws_id_2,MANAGE,True diff --git a/docs/examples/user_group_mgmt/input.csv b/docs/examples/user_group_mgmt/input.csv new file mode 100644 index 0000000..6d91442 --- /dev/null +++ b/docs/examples/user_group_mgmt/input.csv @@ -0,0 +1,5 @@ +user_group_id,user_group_name,parent_user_groups,is_active +ug_1,Admins,,True +ug_2,Developers,ug_1,True +ug_3,Testers,ug_1|ug_2,True +ug_4,TemporaryAccess,ug_2,False diff --git a/docs/examples/user_mgmt/input.csv b/docs/examples/user_mgmt/input.csv new file mode 100644 index 0000000..7c38ea8 --- /dev/null +++ b/docs/examples/user_mgmt/input.csv @@ -0,0 +1,8 @@ +user_id, firstname, lastname, email, auth_id, user_groups, is_active +jozef.mrkva,jozef,mrkva,jozef.mrkva@test.com,auth_id_1,,True +bartolomej.brokolica,,,,,,False +peter.pertzlen,peter,pertzlen,peter.pertzlen@test.com,auth_id_3,ug_1|ug_2,True +zoltan.zeler,zoltan,zeler,zoltan.zeler@test.com,auth_id_4,ug_1,True +kristian.kalerab,kristian,kalerab,,auth_id_5,,True +richard.cvikla,,,richard.cvikla@test.com,auth_id_6,ug_1|ug_2,False +adam.avokado,,,,auth_id_7,,False diff --git a/input.csv b/input.csv new file mode 100644 index 0000000..80f5b6c --- /dev/null +++ b/input.csv @@ -0,0 +1,4 @@ +workspace_id +workspaceidtobeexportedone +workspaceidtobeexportedtwo +workspaceidtobeexportedthree diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7be71c1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[tool.black] +target-version = ['py311'] + +[tool.mypy] +python_version = "3.11" + +[[tool.mypy.overrides]] +module = [ + "boto3.*", + "gooddata_api_client.*", + "gooddata_sdk.*", + "pytest.*", + "yaml.*", + "jmespath.*", + "moto.*", + "gooddata_api_client.*", + "requests.*", +] +ignore_missing_imports = true + +[tool.ruff] +# Aligned with the default line length of Black +line-length = 88 diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..d84c3dc --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,3 @@ +black~=23.3.0 +mypy~=1.3.0 +ruff~=0.0.282 diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..94b458a --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,2 @@ +pytest~=7.3.2 +moto~=4.1.11 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c780ffb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +boto3~=1.26.152 +gooddata_sdk==1.17.0 +requests==2.31.0 diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..332df81 --- /dev/null +++ b/scripts/__init__.py @@ -0,0 +1 @@ +# (C) 2023 GoodData Corporation diff --git a/scripts/backup.py b/scripts/backup.py new file mode 100644 index 0000000..9a791cc --- /dev/null +++ b/scripts/backup.py @@ -0,0 +1,404 @@ +# (C) 2023 GoodData Corporation +import abc +import argparse +import csv +import datetime +import json +import logging +import os +import requests +import shutil +import tempfile +import yaml +from typing import Any, Optional, TypeAlias, Type + + +import boto3 +from pathlib import Path +import gooddata_api_client +from gooddata_sdk import __version__ as sdk_version +from gooddata_sdk import GoodDataSdk + + +TIMESTAMP_SDK_FOLDER = ( + str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) + + "-" + + sdk_version.replace(".", "_") +) + +API_VERSION = "v1" +BEARER_TKN_PREFIX = "Bearer" +PROFILES_FILE = "profiles.yaml" +PROFILES_DIRECTORY = ".gooddata" +PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE + +FORMAT = "%(asctime)s [%(levelname)s] %(message)s" +logger = logging.getLogger(__name__) +logging.getLogger(__name__).setLevel(logging.INFO) +logger.setLevel(logging.INFO) +ch = logging.StreamHandler() +ch.setFormatter(logging.Formatter(fmt=FORMAT)) +logger.addHandler(ch) + +LAYOUTS_DIR = "gooddata_layouts" +LDM_DIR = "ldm" + + +class GoodDataRestApiError(Exception): + """Wrapper for errors occurring from interaction with GD REST API.""" + + +class BackupRestoreConfig: + def __init__(self, conf_path: str): + with open(conf_path, "r") as stream: + conf = yaml.safe_load(stream) + self.storage_type = conf["storage_type"] + self.storage = conf["storage"] + + +class BackupStorage(abc.ABC): + @abc.abstractmethod + def export(self, folder, org): + """Exports the content of the folder to the storage.""" + raise NotImplementedError + + +class S3Storage(BackupStorage): + def __init__(self, conf: BackupRestoreConfig): + self._config = conf.storage + self._profile = self._config.get("profile", "default") + self._session = self._create_boto_session(self._profile) + self._api = self._session.resource("s3") + self._bucket = self._api.Bucket(self._config["bucket"]) + suffix = "/" if not self._config["backup_path"].endswith("/") else "" + self._backup_path = self._config["backup_path"] + suffix + + @staticmethod + def _create_boto_session(profile: str) -> boto3.Session: + try: + return boto3.Session(profile_name=profile) + except Exception: + logger.warning( + 'AWS profile "[default]" not found. Trying other fallback methods...' + ) + + return boto3.Session() + + def export(self, folder, org_id) -> None: + """Uploads the content of the folder to S3 as backup.""" + storage_path = self._config["bucket"] + "/" + self._backup_path + logger.info(f"Uploading {org_id} to {storage_path}") + folder = folder + "/" + org_id + for subdir, dirs, files in os.walk(folder): + full_path = os.path.join(subdir) + export_path = ( + self._backup_path + org_id + "/" + full_path[len(folder) + 1 :] + "/" + ) + self._bucket.put_object(Key=export_path) + + for file in files: + full_path = os.path.join(subdir, file) + with open(full_path, "rb") as data: + export_path = ( + self._backup_path + org_id + "/" + full_path[len(folder) + 1 :] + ) + self._bucket.put_object(Key=export_path, Body=data) + + +class LocalStorage(BackupStorage): + def __init__(self, conf: BackupRestoreConfig): + return + + def export(self, folder, org_id, export_folder="local_backups"): + """Copies the content of the folder to local storage as backup.""" + logger.info(f"Saving {org_id} to local storage") + shutil.copytree( + Path(folder), Path(Path.cwd(), export_folder), dirs_exist_ok=True + ) + + +MaybeResponse: TypeAlias = Optional[requests.Response] + + +class GDApi: + """Wrapper for GoodData REST API client.""" + + def __init__(self, host: str, api_token: str, headers=None): + self.endpoint = self._handle_endpoint(host) + self.api_token = api_token + self.headers = headers if headers else {} + self.wait_api_time = 10 + + @staticmethod + def _handle_endpoint(host: str) -> str: + """Ensures that the endpoint URL is correctly formatted.""" + return ( + f"{host}api/{API_VERSION}" + if host[-1] == "/" + else f"{host}/api/{API_VERSION}" + ) + + def get( + self, + path: str, + params, + ok_code: int = 200, + not_found_code: int = 404, + ) -> MaybeResponse: + """Sends a GET request to the GoodData API.""" + kwargs = self._prepare_request(path, params) + logger.debug(f"GET request: {json.dumps(kwargs)}") + response = requests.get(**kwargs) + return self._resolve_return_code( + response, ok_code, kwargs["url"], "RestApi.get", not_found_code + ) + + def _prepare_request(self, path: str, params=None) -> dict[str, Any]: + """Prepares the request to be sent to the GoodData API.""" + kwargs: dict[str, Any] = { + "url": f"{self.endpoint}/{path}", + "headers": self.headers.copy(), + } + if params: + kwargs["params"] = params + if self.api_token: + kwargs["headers"]["Authorization"] = f"{BEARER_TKN_PREFIX} {self.api_token}" + else: + raise RuntimeError( + "Token required for authentication against GD API is missing." + ) + # TODO - Currently no credentials validation + # TODO - do we also support username+pwd auth? Or do we enforce token only? + # else: + # kwargs['auth'] = (self.user, self.password) if self.user is not None else None # noqa + return kwargs + + @staticmethod + def _resolve_return_code( + response, ok_code: int, url, method, not_found_code: Optional[int] = None + ) -> MaybeResponse: + """Resolves the return code of the response.""" + if response.status_code == ok_code: + logger.debug(f"{method} to {url} succeeded") + return response + if not_found_code and response.status_code == not_found_code: + logger.debug(f"{method} to {url} failed - target not found") + return None + raise GoodDataRestApiError( + f"{method} to {url} failed - " + f"response_code={response.status_code} message={response.text}" + ) + + +def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi: + """Creates a GoodData API client from the specified profile.""" + with open(profile_config, "r") as file: + config = yaml.safe_load(file) + + if profile not in config: + raise RuntimeError( + f'Specified profile name "{profile}" not found in "{profile_config}".' + ) + + profile_conf = config[profile] + hostname, token = profile_conf["host"], profile_conf["token"] + return GDApi(hostname, token) + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument( + "ws_csv", help="Path to csv with IDs of GD workspaces to backup.", type=Path + ) + parser.add_argument( + "conf", help="Path to backup storage configuration file.", type=Path + ) + parser.add_argument( + "-p", + "--profile-config", + type=Path, + default=PROFILES_FILE_PATH, + help="Optional path to GoodData profile config. " + f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', + ) + parser.add_argument( + "--profile", + type=str, + default="default", + help='GoodData profile to use. If not profile is provided, "default" is used.', + ) + + return parser + + +def write_to_yaml(folder, source): + """Writes the source to a YAML file.""" + with open(folder, "w") as outfile: + yaml.dump(source, outfile) + + +def get_storage(storage_type: str) -> Type[BackupStorage]: + """Returns the storage class based on the storage type.""" + match storage_type: + case "s3": + logger.info("Storage type set to S3.") + return S3Storage + case "local": + logger.info("Storage type set to local storage.") + return LocalStorage + case _: + raise RuntimeError(f'Unsupported storage type "{storage_type}".') + + +def get_user_data_filters(api: GDApi, ws_id: str) -> dict | None: + """Returns the user data filters for the specified workspace.""" + try: + user_data_filters = api.get(f"/layout/workspaces/{ws_id}/userDataFilters", None) + if user_data_filters: + return user_data_filters.json() + except GoodDataRestApiError as e: + logger.error(f"UDF call for {ws_id} returned error: {e}") + return None + + +def store_user_data_filters( + user_data_filters: dict, export_path: Path, org_id: str, ws_id: str +): + """Stores the user data filters in the specified export path.""" + os.mkdir( + os.path.join( + export_path, + "gooddata_layouts", + org_id, + "workspaces", + ws_id, + "user_data_filters", + ) + ) + + for filter in user_data_filters["userDataFilters"]: + udf_file_path = os.path.join( + export_path, + "gooddata_layouts", + org_id, + "workspaces", + ws_id, + "user_data_filters", + filter["id"] + ".yaml", + ) + write_to_yaml(udf_file_path, filter) + + +def get_workspace_export( + sdk: GoodDataSdk, + api: GDApi, + storage_type: str, + local_target_path: str, + org_id: str, +) -> None: + """ + Iterate over all workspaces in the input ws_csv and store their + declarative_workspace and their respective user data filters. + """ + with open(args.ws_csv) as csvfile: + workspace_list = csv.reader(csvfile, skipinitialspace=True) + next(workspace_list, None) + exported = False + for row in workspace_list: + ws_id = row[0] + export_path = Path(local_target_path, org_id, ws_id, TIMESTAMP_SDK_FOLDER) + + user_data_filters = get_user_data_filters(api, ws_id) + if not user_data_filters: + logger.error( + f"Skipping backup of {ws_id} - user data filters returned None." + ) + logger.error(f"Check if {ws_id} exists and the API is functional") + continue + + try: + sdk.catalog_workspace.store_declarative_workspace(ws_id, export_path) + store_user_data_filters(user_data_filters, export_path, org_id, ws_id) + logger.info(f"Stored export for {ws_id}") + exported = True + except gooddata_api_client.exceptions.NotFoundException: + logger.error(f"Workspace {ws_id} does not exist. Skipping.") + + if not exported: + raise RuntimeError( + "None of the workspaces were exported." + "Check source file and their existence." + ) + + +def archive_gooddata_layouts_to_zip(folder: str) -> None: + """Archives the gooddata_layouts directory to a zip file.""" + target_subdir = "" + for subdir, dirs, files in os.walk(folder): + if LAYOUTS_DIR in dirs: + target_subdir = os.path.join(subdir, dirs[0]) + if LDM_DIR in dirs: + inner_layouts_dir = subdir + "/gooddata_layouts" + os.mkdir(inner_layouts_dir) + for dir in dirs: + shutil.move(os.path.join(subdir, dir), os.path.join(inner_layouts_dir)) + shutil.make_archive(target_subdir, "zip", subdir) + shutil.rmtree(target_subdir) + + +def create_client(args: argparse.Namespace) -> tuple[GoodDataSdk, GDApi]: + """Creates a GoodData client.""" + gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") + gdc_hostname = os.environ.get("GDC_HOSTNAME") + + if gdc_hostname and gdc_auth_token: + logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") + sdk = GoodDataSdk.create(gdc_hostname, gdc_auth_token) + api = GDApi(gdc_hostname, gdc_auth_token) + return sdk, api + + profile_config, profile = args.profile_config, args.profile + if os.path.exists(profile_config): + logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") + sdk = GoodDataSdk.create_from_profile(profile, profile_config) + api = create_api_client_from_profile(profile, profile_config) + return sdk, api + + raise RuntimeError( + "No GoodData credentials provided. Please export required ENVVARS " + "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to profile config." + ) + + +def validate_args(args): + """Validates the arguments provided.""" + if not os.path.exists(args.ws_csv): + raise RuntimeError("Invalid path to csv given.") + + if not os.path.exists(args.conf): + raise RuntimeError("Invalid path to backup storage configuration given.") + + +def main(args): + """Main function for the backup script.""" + sdk, api = create_client(args) + + org_id = sdk.catalog_organization.organization_id + + conf = BackupRestoreConfig(args.conf) + + storage = get_storage(conf.storage_type)(conf) + + with tempfile.TemporaryDirectory() as tmpdir: + get_workspace_export(sdk, api, conf.storage_type, tmpdir, org_id) + + archive_gooddata_layouts_to_zip(Path(tmpdir, org_id)) + + storage.export(tmpdir, org_id) + + +if __name__ == "__main__": + parser = create_parser() + args = parser.parse_args() + validate_args(args) + main(args) diff --git a/scripts/permission_mgmt.py b/scripts/permission_mgmt.py new file mode 100644 index 0000000..41151f0 --- /dev/null +++ b/scripts/permission_mgmt.py @@ -0,0 +1,467 @@ +# (C) 2023 GoodData Corporation +import argparse +import csv +import logging +import os +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterator, Optional, TypeAlias + +import gooddata_sdk as gd_sdk +from gooddata_api_client.exceptions import NotFoundException + + +USER_TYPE = "user" +USER_GROUP_TYPE = "userGroup" + +PROFILES_FILE = "profiles.yaml" +PROFILES_DIRECTORY = ".gooddata" +PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE +LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Management of workspace permissions.") + parser.add_argument( + "-v", "--verbose", action="store_true", help="Turns on the debug log output." + ) + parser.add_argument( + "perm_csv", + type=Path, + help=( + "Path to (comma-delimited) csv with user/userGroup " + "to workspace permission pairs." + ), + ) + parser.add_argument( + "-d", + "--delimiter", + type=str, + default=",", + help="Delimiter used to separate different columns in the user_csv.", + ) + parser.add_argument( + "-p", + "--profile-config", + type=Path, + default=PROFILES_FILE_PATH, + help="Optional path to GoodData profile config. " + f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', + ) + parser.add_argument( + "--profile", + type=str, + default="default", + help='GoodData profile to use. If not profile is provided, "default" is used.', + ) + return parser + + +TargetsPermissionDict: TypeAlias = dict[str, dict[str, bool]] + + +@dataclass(frozen=True) +class WSPermission: + permission: str + ws_id: str + id: str + type: str + is_active: bool + + @classmethod + def from_csv_row(cls, row: list[Any]) -> "WSPermission": + """Construct WSPermission data object from csv row input.""" + user_id, user_group_id, ws_id, permission, is_active = row + + id = user_id if user_id else user_group_id + target_type = USER_TYPE if user_id else USER_GROUP_TYPE + + return WSPermission( + permission=permission, + ws_id=ws_id, + id=id, + type=target_type, + is_active=str(is_active).lower() == "true", + ) + + +@dataclass +class WSPermissionDeclaration: + users: TargetsPermissionDict + user_groups: TargetsPermissionDict + + @classmethod + def from_sdk_api( + cls, declaration: gd_sdk.CatalogDeclarativeWorkspacePermissions + ) -> "WSPermissionDeclaration": + """ + Constructs an WSPermissionDeclaration instance + from GoodData SDK CatalogDeclarativeWorkspacePermissions. + """ + users: TargetsPermissionDict = {} + user_groups: TargetsPermissionDict = {} + + for permission in declaration.permissions: + permission_type, id = permission.assignee.type, permission.assignee.id + target_dict = users if permission_type == USER_TYPE else user_groups + + id_permissions = target_dict.get(id) + if not id_permissions: + target_dict[id] = dict() + + target_dict[id][permission.name] = True + + return WSPermissionDeclaration(users, user_groups) + + @staticmethod + def _construct_upstream_permission( + permission: str, assignee: gd_sdk.CatalogAssigneeIdentifier + ) -> gd_sdk.CatalogDeclarativeSingleWorkspacePermission | None: + """Constructs single permission declaration for the SDK API.""" + try: + return gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name=permission, assignee=assignee + ) + except Exception as e: + logger.error( + "Failed to construct SDK declaration " + f'for type={assignee.type} id={assignee.id}. Error: "{e}".' + ) + return None + + def _permissions_for_target( + self, permissions: dict[str, bool], assignee: gd_sdk.CatalogAssigneeIdentifier + ) -> Iterator[gd_sdk.CatalogDeclarativeSingleWorkspacePermission]: + """Constructs permission declarations for a single target.""" + for permission, is_active in permissions.items(): + if not is_active: + continue + declaration = self._construct_upstream_permission(permission, assignee) + if not declaration: + continue + yield declaration + + def to_sdk_api(self) -> gd_sdk.CatalogDeclarativeWorkspacePermissions: + """ + Constructs the GoodData SDK CatalogDeclarativeWorkspacePermissions + object from the WSPermissionDeclaration instance. + """ + permission_declarations: list[ + gd_sdk.CatalogDeclarativeSingleWorkspacePermission + ] = [] + + for user_id, permissions in self.users.items(): + assignee = gd_sdk.CatalogAssigneeIdentifier(id=user_id, type=USER_TYPE) + for declaration in self._permissions_for_target(permissions, assignee): + permission_declarations.append(declaration) + + for ug_id, permissions in self.user_groups.items(): + assignee = gd_sdk.CatalogAssigneeIdentifier(id=ug_id, type=USER_GROUP_TYPE) + for declaration in self._permissions_for_target(permissions, assignee): + permission_declarations.append(declaration) + + return gd_sdk.CatalogDeclarativeWorkspacePermissions( + permissions=permission_declarations + ) + + def add_permission(self, permission: WSPermission): + """ + Adds WSPermission object into respective field within the instance. + Handles duplicate permissions and different combinations of input + and upstream is_active permission states. + """ + target_dict = self.users if permission.type == USER_TYPE else self.user_groups + + if permission.id not in target_dict: + target_dict[permission.id] = {} + + is_active = permission.is_active + target_permissions = target_dict[permission.id] + permission_value = permission.permission + + if permission_value not in target_permissions: + target_permissions[permission_value] = is_active + elif not is_active and target_permissions[permission_value] is True: + logger.warning( + "isActive=False provided after True has been specificed " + f'for the same input. Skipping "{permission}".' + ) + elif is_active and target_permissions[permission_value] is False: + logger.warning( + "isActive=True provided after False has been specified " + f'for the same input. Overwriting "{permission}".' + ) + target_permissions[permission_value] = is_active + + def upsert(self, other: "WSPermissionDeclaration"): + """ + Modifies the owner object by merging with the other. + Keeps the unmodified users/userGroups untouched. + If some user/userGroup is modified, it gets overwritten with permissions + defined in the input. + """ + for user_id, permissions in other.users.items(): + self.users[user_id] = permissions + + for ug_id, permissions in other.user_groups.items(): + self.user_groups[ug_id] = permissions + + +WSPermissionsDeclarations: TypeAlias = dict[str, WSPermissionDeclaration] + + +class InvalidPermissionException(Exception): + pass + + +class WSPermissionManager: + def __init__(self, sdk: gd_sdk.GoodDataSdk): + self._sdk = sdk + + def _get_ws_declaration(self, ws_id: str) -> WSPermissionDeclaration: + users: TargetsPermissionDict = {} + user_groups: TargetsPermissionDict = {} + + upstream_declaration = self._sdk.catalog_permission.get_declarative_permissions( + ws_id + ) + + for permission in upstream_declaration.permissions: + permission_type, id = permission.assignee.type, permission.assignee.id + target_dict = users if permission_type == USER_TYPE else user_groups + + id_permissions = target_dict.get(id) + if not id_permissions: + target_dict[id] = dict() + + target_dict[id][permission.name] = True + + return WSPermissionDeclaration(users, user_groups) + + def _get_upstream_declaration( + self, ws_id: str + ) -> Optional[WSPermissionDeclaration]: + """Retrieves upstream permission declaration for a workspace.""" + try: + declaration = self._sdk.catalog_permission.get_declarative_permissions( + ws_id + ) + return WSPermissionDeclaration.from_sdk_api(declaration) + except NotFoundException as e: + logger.error(f"Workspace with id {ws_id} doesn't exist. Error: {e}") + except Exception as e: + logger.error( + "Some error occured while retrieving workspace " + f'permission declaration for workspace "{ws_id}". Error: "{e}"' + ) + return None + + def _get_upstream_declarations( + self, input_ws_ids: list[str] + ) -> WSPermissionsDeclarations: + """Retrieves upstream permission declarations for a list of workspaces.""" + ws_dict: WSPermissionsDeclarations = {} + for ws_id in input_ws_ids: + declaration = self._get_upstream_declaration(ws_id) + if declaration: + ws_dict[ws_id] = declaration + return ws_dict + + @staticmethod + def _construct_declarations( + permissions: list[WSPermission], + ) -> WSPermissionsDeclarations: + """Constructs workspace permission declarations from the input permissions.""" + ws_dict: WSPermissionsDeclarations = {} + for permission in permissions: + ws_id = permission.ws_id + + if ws_id not in ws_dict: + ws_dict[ws_id] = WSPermissionDeclaration({}, {}) + + ws_dict[ws_id].add_permission(permission) + return ws_dict + + def _check_user_exists(self, user_id: str): + """Checks if user with provided ID exists.""" + try: + self._sdk.catalog_user.get_user(user_id) + except NotFoundException: + raise InvalidPermissionException("Provided user ID does not exist.") + + def _check_user_group_exists(self, ug_id: str): + """Checks if user group with provided ID exists.""" + try: + self._sdk.catalog_user.get_user_group(ug_id) + except NotFoundException: + raise InvalidPermissionException("Provided user group ID does not exist.") + + def _validate_permission(self, permission: WSPermission): + """Validates if the permission is correctly defined.""" + if permission.type == USER_TYPE: + self._check_user_exists(permission.id) + else: + self._check_user_group_exists(permission.id) + + def _filter_invalid_permissions( + self, permissions: list[WSPermission] + ) -> list[WSPermission]: + """Filters out invalid permissions from the input list.""" + valid_permissions: list[WSPermission] = [] + for permission in permissions: + try: + self._validate_permission(permission) + except InvalidPermissionException as e: + logger.error( + f'Invalid permission defined. Skipping "{permission}. Error: "{e}".' + ) + continue + valid_permissions.append(permission) + return valid_permissions + + def manage_permissions(self, permissions: list[WSPermission]): + """Manages permissions for a list of workspaces. + Modify upstream workspace declarations for each input workspace and skip non-existent ws_ids + """ + logger.info( + f"Starting permission management run of {len(permissions)} permissions..." + ) + valid_permissions = self._filter_invalid_permissions(permissions) + + input_declarations = self._construct_declarations(valid_permissions) + + input_ws_ids = list(input_declarations.keys()) + upstream_declarations = self._get_upstream_declarations(input_ws_ids) + + for ws_id, declaration in input_declarations.items(): + if ws_id not in upstream_declarations: + continue + + upstream_declarations[ws_id].upsert(declaration) + + ws_permissions = upstream_declarations[ws_id].to_sdk_api() + + logger.info(f'Putting declarative permissions for workspace "{ws_id}".') + try: + self._sdk.catalog_permission.put_declarative_permissions( + ws_id, ws_permissions + ) + except Exception as e: + logger.error( + "Failed to update declarative workspace " + f'permissions for workspace "{ws_id}". Error: {e}' + ) + logger.info("Finished permission management run.") + + +def csv_row_is_valid(row: list[Any]) -> bool: + """Validates if the csv row is correctly defined.""" + try: + user_id, user_group_id, ws_id, permission, is_active = row + except Exception as e: + logger.error( + "Unable to parse csv row. " + "Most probably an incorrect amount of values was defined. " + f'Skipping following row: "{row}". Error: "{e}".' + ) + return False + + if user_id and user_group_id: + logger.error( + "UserID and UserGroupID are mutually exclusive per csv row. " + f'Skipping following row: "{row}".' + ) + return False + + if not user_id and not user_group_id: + logger.error( + "Either UserID or UserGroupID have to be defined per csv row. " + f'Skipping following row: "{row}".' + ) + return False + + if not ws_id: + logger.error(f'ws_id field seems to be empty. Skipping following row: "{row}".') + return False + + if not permission: + logger.error( + f'permission field seems to be empty. Skipping following row: "{row}".' + ) + return False + + if not is_active: + logger.error( + f'is_active field seems to be empty. Skipping following row: "{row}".' + ) + return False + + return True + + +def read_permissions_from_csv(csv_path: str) -> list[WSPermission]: + """Reads permissions from the input csv file.""" + permissions: list[WSPermission] = [] + with open(csv_path, "r") as f: + reader = csv.reader(f, skipinitialspace=True) + next(reader) # Skip header + for row in reader: + if not csv_row_is_valid(row): + continue + try: + permission = WSPermission.from_csv_row(row) + except Exception as e: + logger.error(f'Unable to load following row: "{row}". Error: "{e}"') + continue + permissions.append(permission) + return permissions + + +def create_client(args: argparse.Namespace) -> gd_sdk.GoodDataSdk: + """Creates GoodData SDK client based on the input arguments.""" + gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") + gdc_hostname = os.environ.get("GDC_HOSTNAME") + + if gdc_hostname and gdc_auth_token: + logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") + return gd_sdk.GoodDataSdk.create(gdc_hostname, gdc_auth_token) + + profile_config, profile = args.profile_config, args.profile + if os.path.exists(profile_config): + logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") + return gd_sdk.GoodDataSdk.create_from_profile(profile, profile_config) + + raise RuntimeError( + "No GoodData credentials provided. Please export required ENVVARS " + "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to GD profile config." + ) + + +def validate_args(args: argparse.Namespace) -> None: + """Validates the input arguments.""" + if not os.path.exists(args.perm_csv): + raise RuntimeError( + "Invalid path to workspace permission management input csv given." + ) + + +def permission_mgmt(args): + """Main function for the permission management script.""" + validate_args(args) + permissions = read_permissions_from_csv(args.perm_csv) + sdk = create_client(args) + permission_manager = WSPermissionManager(sdk) + permission_manager.manage_permissions(permissions) + + +if __name__ == "__main__": + parser = create_parser() + args = parser.parse_args() + permission_mgmt(args) diff --git a/scripts/restore.py b/scripts/restore.py new file mode 100644 index 0000000..def3941 --- /dev/null +++ b/scripts/restore.py @@ -0,0 +1,507 @@ +# (C) 2023 GoodData Corporation +import abc +import argparse +import csv +import json +import logging +import os +import tempfile +import traceback +import requests +import sys +import yaml +import zipfile +from pathlib import Path +from typing import Any, Optional, TypeAlias, Type + +import boto3 +from gooddata_sdk import ( + GoodDataSdk, + CatalogDeclarativeAnalytics, + CatalogDeclarativeModel, +) + +BEARER_TKN_PREFIX = "Bearer" +LAYOUTS_DIR = "gooddata_layouts" +AM_DIR = "analytics_model" +LDM_DIR = "ldm" +UDF_DIR = "user_data_filters" + +PROFILES_FILE = "profiles.yaml" +PROFILES_DIRECTORY = ".gooddata" +PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE +LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + +GDWorkspace: TypeAlias = tuple[CatalogDeclarativeModel, CatalogDeclarativeAnalytics] + + +class GoodDataRestApiError(Exception): + """Wrapper for errors occurring from interaction with GD REST API.""" + + +class BackupRestoreError(Exception): + def __init__(self, cause: str = "Unknown"): + self.cause = cause + + +class BackupRestoreConfig: + def __init__(self, conf_path: str): + conf = self._load_conf(conf_path) + self.storage_type = conf["storage_type"] + self.storage = conf["storage"] + + @staticmethod + def _load_conf(path: str) -> dict[str, Any]: + with open(path, "r") as conf: + return yaml.safe_load(conf) + + +class BackupStorage(abc.ABC): + """ + Retrieves archive of backed up hierarchical export of workspace declaration. + + Implement this abstract base class for different kinds of storage providers. + """ + + @abc.abstractmethod + def get_ws_declaration(self, target_path: str, local_target_path: Path) -> None: + raise NotImplementedError + + +class S3StorageConfig: + def __init__(self, storconf: dict[str, Any]): + self.bucket: str = storconf["bucket"] + suffix = "/" if not storconf["backup_path"].endswith("/") else "" + self.backup_path: str = storconf["backup_path"] + suffix + self.profile = storconf.get("profile", "default") + + +class S3Storage(BackupStorage): + """ + Retrieves archive of backed up hierarchical export of workspace declaration from S3. + """ + + def __init__(self, conf: BackupRestoreConfig): + self._config = S3StorageConfig(conf.storage) + self._session = self._create_boto_session(self._config.profile) + self._api = self._session.resource("s3") + self._bucket = self._api.Bucket(self._config.bucket) + self._validate_backup_path() + + @staticmethod + def _create_boto_session(profile: str) -> boto3.Session: + try: + return boto3.Session(profile_name=profile) + except Exception: + logger.warning( + 'AWS profile "[default]" not found. Trying other fallback methods...' + ) + + return boto3.Session() + + def _validate_backup_path(self) -> None: + """Validates if backup path exists in the S3 bucket.""" + objects_filter = self._bucket.objects.filter(Prefix=self._config.backup_path) + + try: + objects = list(objects_filter) + except Exception as e: + raise RuntimeError(f"Error raised while validating s3 config. Error: {e}") + + if len(objects) == 0: + raise RuntimeError("Provided s3 backup_path does not exist. Exiting...") + + def get_ws_declaration(self, s3_target_path: str, local_target_path: Path) -> None: + """Retrieves workspace declaration from S3 bucket.""" + s3_backup_path = self._config.backup_path + target_s3_prefix = f"{s3_backup_path}{s3_target_path}" + + objs_found = list(self._bucket.objects.filter(Prefix=target_s3_prefix)) + + # Remove the included directory (which equals prefix) on hit + objs_found = objs_found[1:] if len(objs_found) > 0 else objs_found + + if not objs_found: + logger.error(f"No target backup found for {target_s3_prefix}.") + raise BackupRestoreError(f"No target found for {target_s3_prefix}") + + if len(objs_found) > 1: + logger.warning( + f"Multiple backups found at {target_s3_prefix}." + " Continuing with the first one, ignoring the rest..." + ) + + s3_obj = objs_found[0] + self._bucket.download_file(s3_obj.key, local_target_path) + + +MaybeResponse: TypeAlias = Optional[requests.Response] + + +class GDApi: + def __init__(self, host: str, api_token: str, headers: dict[str, Any] = {}): + self.endpoint = self._handle_endpoint(host) + self.api_token = api_token + self.headers = headers + self.wait_api_time = 10 + + @staticmethod + def _handle_endpoint(host: str) -> str: + """Ensures that the endpoint URL is properly formatted.""" + return f"{host}api/v1" if host[-1] == "/" else f"{host}/api/v1" + + def put( + self, path: str, request: dict[str, Any], ok_code: int = 200 + ) -> requests.Response: + """Sends a PUT request to the GoodData API.""" + kwargs = self._prepare_request(path) + kwargs["headers"]["Content-Type"] = "application/json" + kwargs["json"] = request + logger.debug(f"PUT request: {json.dumps(request)}") + response = requests.put(**kwargs) + resolved_response = self._resolve_return_code( + response, ok_code, kwargs["url"], "RestApi.put" + ) + assert resolved_response is not None + return resolved_response + + def _prepare_request(self, path: str, params=None) -> dict[str, Any]: + """Prepares the request to be sent to the GoodData API.""" + kwargs: dict[str, Any] = { + "url": f"{self.endpoint}/{path}", + "headers": self.headers.copy(), + } + if params: + kwargs["params"] = params + if self.api_token: + kwargs["headers"]["Authorization"] = f"{BEARER_TKN_PREFIX} {self.api_token}" + else: + raise RuntimeError( + "Token required for authentication against GD API is missing." + ) + + return kwargs + + @staticmethod + def _resolve_return_code( + response, ok_code: int, url, method, not_found_code: Optional[int] = None + ) -> MaybeResponse: + """Resolves the return code of the response.""" + if response.status_code == ok_code: + logger.debug(f"{method} to {url} succeeded") + return response + if not_found_code and response.status_code == not_found_code: + logger.debug(f"{method} to {url} failed - target not found") + return None + raise GoodDataRestApiError( + f"{method} to {url} failed - " + f"response_code={response.status_code} message={response.text}" + ) + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument( + "ws_csv", type=Path, help="Path to csv with IDs of GD workspaces to restore." + ) + parser.add_argument( + "conf", type=Path, help="Path to backup storage configuration file." + ) + parser.add_argument( + "-p", + "--profile-config", + type=Path, + default=PROFILES_FILE_PATH, + help="Optional path to GoodData profile config. " + f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', + ) + parser.add_argument( + "--profile", + type=str, + default="default", + help='GoodData profile to use. If not profile is provided, "default" is used.', + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Turns on the debug log output." + ) + return parser + + +def read_targets_from_csv(csv_path: str) -> dict[str, str]: + """Reads the csv file with workspace IDs and paths to backups.""" + # TODO - handling of csv files with and without headers + # TODO - handling csv files with unsupported structure/schema + ws_paths: dict[str, str] = {} + with open(csv_path, "r") as f: + reader = csv.reader(f, skipinitialspace=True) + next(reader) # Skip header + for row in reader: + ws_id, ws_path = row + + if ws_paths.get(ws_id) is not None: + logger.warning( + f'Duplicate backup targets for ws_id "{ws_id}" found. ' + f'Overwriting the target at "{ws_paths[ws_id]}" with "{ws_path}".' + ) + + ws_paths[ws_id] = ws_path + + return ws_paths + + +def validate_targets(sdk: GoodDataSdk, ws_paths: dict[str, str]) -> None: + """Validates the targets provided. + Since for now we don't support restore of deleted backups, + we can let the user know in advance about unknown IDs. + """ + ws_list = sdk.catalog_workspace.list_workspaces() + available_ids = {ws.id for ws in ws_list} + target_ids = set(ws_paths.keys()) + + unknown_ids = target_ids - available_ids + if unknown_ids: + logger.error( + "Unknown IDs specified in the input csv file. " + f"These will be ignored. The unknown IDs are: {unknown_ids}." + ) + + for ws_id in unknown_ids: + ws_paths.pop(ws_id) + + +def get_storage(storage_type: str) -> Type[BackupStorage]: + """Factory method for creating storage providers.""" + match storage_type: + case "s3": + return S3Storage + case _: + raise RuntimeError(f'Unsupported storage type "{storage_type}".') + + +class RestoreWorker: + def __init__( + self, + sdk: GoodDataSdk, + api: GDApi, + storage: BackupStorage, + ws_paths: dict[str, str], + ): + self._sdk = sdk + self._api = api + self._storage = storage + self._ws_paths = ws_paths + self.org_id = sdk.catalog_organization.organization_id + + def _get_ws_declaration(self, ws_path: str, target: Path) -> None: + """Fetches the backup of workspace declaration from storage provider.""" + try: + self._storage.get_ws_declaration(ws_path, target) + except Exception as e: + logger.error("Failed to fetch restore backup for workspace.") + raise BackupRestoreError(type(e).__name__) + + @staticmethod + def _extract_zip_archive(target: Path, tempdir_path: Path) -> None: + """Extracts the backup from zip archive.""" + try: + with zipfile.ZipFile(target, "r") as zip_ref: + zip_ref.extractall(tempdir_path) + except Exception as e: + logger.error("Failed to extract backup from zip archive.") + raise BackupRestoreError(type(e).__name__) + + def _load_workspace_layout(self, src_path: Path) -> GDWorkspace: + """Loads the workspace layout from the backup.""" + try: + sdk_catalog = self._sdk.catalog_workspace_content + + ldm = sdk_catalog.load_ldm_from_disk(src_path) + am = sdk_catalog.load_analytics_model_from_disk(src_path) + + return ldm, am + except Exception as e: + logger.error("Failed to load workspace declaration.") + raise BackupRestoreError(type(e).__name__) + + @staticmethod + def _convert_udf_files_to_api_body(src_path: Path) -> dict: + """Converts UDF files to API body.""" + user_data_filters: dict = {"userDataFilters": []} + user_data_filters_folder = os.path.join(src_path, UDF_DIR) + for filename in os.listdir(user_data_filters_folder): + f = os.path.join(user_data_filters_folder, filename) + with open(f, "r") as file: + user_data_filter = yaml.safe_load(file) + user_data_filters["userDataFilters"].append(user_data_filter) + + return user_data_filters + + def _load_user_data_filters(self, src_path: Path) -> dict: + try: + return self._convert_udf_files_to_api_body(src_path) + except Exception as e: + logger.error("Failed to retrieve contents of user_data_filters folder.") + raise BackupRestoreError(type(e).__name__) + + @staticmethod + def _check_workspace_is_valid(src_path: Path) -> None: + """Checks if the workspace layout is valid.""" + # NOTE - this is a weaker, temporary validation. + # Should be replaced upon SDK version bump. + if not src_path.exists() or not src_path.is_dir(): + logger.error( + "Invalid source path found upon backup fetch. " + f"Got {src_path}. " + "Check if target zip contains gooddata_layouts directory." + ) + raise BackupRestoreError("Invalid source path upon load.") + + children = list(src_path.iterdir()) + am_path = src_path / AM_DIR + ldm_path = src_path / LDM_DIR + udf_path = src_path / UDF_DIR + + if ( + am_path not in children + or ldm_path not in children + or udf_path not in children + ): + logger.error( + "LDM or AM directory missing in the workspace hierarchy. " + "Check if gooddata_layouts contains " + f"{AM_DIR}, {LDM_DIR} and {UDF_DIR} directories." + ) + raise BackupRestoreError("LDM or AM directory missing.") + + def _put_workspace_layout(self, ws_id: str, workspace: GDWorkspace) -> None: + """Puts the workspace layout into GoodData.""" + ldm, am = workspace + try: + sdk_catalog = self._sdk.catalog_workspace_content + + sdk_catalog.put_declarative_ldm(ws_id, ldm) + sdk_catalog.put_declarative_analytics_model(ws_id, am) + + except Exception as e: + logger.error("Failed to put workspace into GoodData.") + raise BackupRestoreError(type(e).__name__) + + def _put_user_data_filters(self, ws_id: str, user_data_filters: dict): + """Puts the user data filters into GoodData workspace.""" + try: + self._api.put( + f"layout/workspaces/{ws_id}/userDataFilters", user_data_filters, 204 + ) + except GoodDataRestApiError as e: + logger.error(f"Failed to put user data filters into {ws_id}") + raise BackupRestoreError(type(e).__name__) + + def _restore_backup(self, ws_id: str, tempdir: str) -> None: + """Restores the backup of a workspace.""" + ws_path = self._ws_paths[ws_id] + tempdir_path = Path(tempdir) + zip_target = tempdir_path / f"{LAYOUTS_DIR}.zip" + src_path = tempdir_path / LAYOUTS_DIR + + try: + self._get_ws_declaration(ws_path, zip_target) + self._extract_zip_archive(zip_target, tempdir_path) + self._check_workspace_is_valid(src_path) + workspace = self._load_workspace_layout(src_path) + user_data_filters = self._load_user_data_filters(src_path) + self._put_workspace_layout(ws_id, workspace) + self._put_user_data_filters(ws_id, user_data_filters) + logger.info(f"Finished backup restore of {ws_id} from {ws_path}.") + except BackupRestoreError as e: + logger.error( + f"Failed to restore backup of {ws_id} from {ws_path}. " + f"Error caused by {e.cause}." + ) + trace = traceback.format_exc() + logger.debug( + f"Attempt to restore backup raised following error: {e.cause}. " + f"Traceback:\n{trace}" + ) + + def incremental_restore(self): + """Restores the backups of workspaces incrementally.""" + for ws_id in self._ws_paths.keys(): + with tempfile.TemporaryDirectory() as tempdir: + self._restore_backup(ws_id, tempdir) + + +def create_api_client_from_profile(profile: str, profile_config: Path) -> GDApi: + """Creates a GoodData API client from a profile.""" + with open(profile_config, "r") as file: + config = yaml.safe_load(file) + + if profile not in config: + raise RuntimeError( + f'Specified profile name "{profile}" not found in "{profile_config}".' + ) + + profile_conf = config[profile] + hostname, token = profile_conf["host"], profile_conf["token"] + return GDApi(hostname, token) + + +def create_client(args: argparse.Namespace) -> tuple[GoodDataSdk, GDApi]: + """Creates GoodData SDK and API clients.""" + gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") + gdc_hostname = os.environ.get("GDC_HOSTNAME") + + if gdc_hostname and gdc_auth_token: + logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") + sdk = GoodDataSdk.create(gdc_hostname, gdc_auth_token) + api = GDApi(gdc_hostname, gdc_auth_token) + return sdk, api + + profile_config, profile = args.profile_config, args.profile + if os.path.exists(profile_config): + logger.info(f"Using GoodData profile {profile} sourced from {profile_config}.") + sdk = GoodDataSdk.create_from_profile(profile, profile_config) + api = create_api_client_from_profile(profile, profile_config) + return sdk, api + + raise RuntimeError( + "No GoodData credentials provided. Please export required ENVVARS " + "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to GD profile config." + ) + + +def main(args): + """Main entry point of the script.""" + if args.verbose: + logger.setLevel(logging.DEBUG) + + if not os.path.exists(args.ws_csv): + raise RuntimeError("Invalid path to csv given.") + + if not os.path.exists(args.conf): + raise RuntimeError("Invalid path to backup storage configuration given.") + + sdk, api = create_client(args) + + conf = BackupRestoreConfig(args.conf) + + storage = get_storage(conf.storage_type)(conf) + + ws_paths = read_targets_from_csv(args.ws_csv) + validate_targets(sdk, ws_paths) + + restore_worker = RestoreWorker(sdk, api, storage, ws_paths) + + logger.info("Starting incremental backup restore based on target csv file...") + restore_worker.incremental_restore() + + +if __name__ == "__main__": + parser = create_parser() + args = parser.parse_args() + main(args) diff --git a/scripts/user_group_mgmt.py b/scripts/user_group_mgmt.py new file mode 100644 index 0000000..6668197 --- /dev/null +++ b/scripts/user_group_mgmt.py @@ -0,0 +1,352 @@ +# BSD License +# +# Copyright (c) 2024, GoodData Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted, provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse +import csv +import logging +import os +import re +import sys + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from gooddata_sdk import GoodDataSdk +from gooddata_sdk.catalog.user.entity_model.user import CatalogUserGroup + +UG_REGEX = r"^(?!\.)[.A-Za-z0-9_-]{1,255}$" + +PROFILES_FILE = "profiles.yaml" +PROFILES_DIRECTORY = ".gooddata" +PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE +LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +# TODO - simplify after complete switch to SDK +def create_clients(args: argparse.Namespace) -> GoodDataSdk: + """Creates GoodData SDK client.""" + gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") + gdc_hostname = os.environ.get("GDC_HOSTNAME") + + if gdc_hostname and gdc_auth_token: + logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") + return GoodDataSdk.create(gdc_hostname, gdc_auth_token) + + profile_config, profile = args.profile_config, args.profile + if os.path.exists(profile_config): + logger.info( + f"Using GoodData profile {profile} " f"sourced from {profile_config}." + ) + return GoodDataSdk.create_from_profile(profile, profile_config) + + raise RuntimeError( + "No GoodData credentials provided. Please export required ENVVARS " + "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to GD profile config." + ) + + +def create_parser() -> argparse.ArgumentParser: + """Creates an argument parser.""" + parser = argparse.ArgumentParser(description="Management of users and userGroups.") + parser.add_argument( + "-v", "--verbose", action="store_true", help="Turns on the debug log output." + ) + parser.add_argument( + "user_group_csv", type=Path, help="Path to csv with user groups definition." + ) + parser.add_argument( + "-d", + "--delimiter", + type=str, + default=",", + help="Delimiter used to separate different columns in the user_group_csv.", + ) + parser.add_argument( + "-u", + "--ug_delimiter", + type=str, + default="|", + help=( + "Delimiter used to separate different parent user groups within " + "the parent user group column in the user_group_csv. " + 'This must differ from the "delimiter" argument.' + ), + ) + parser.add_argument( + "-q", + "--quotechar", + type=str, + default='"', + help=( + "Character used for quoting (escaping) values " + "which contain delimiters or quotechars." + ), + ) + parser.add_argument( + "-p", + "--profile-config", + type=Path, + default=PROFILES_FILE_PATH, + help="Optional path to GoodData profile config. " + f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', + ) + parser.add_argument( + "--profile", + type=str, + default="default", + help='GoodData profile to use. If no profile is provided, "default" is used.', + ) + return parser + + +def validate_args(args: argparse.Namespace) -> None: + """Validates the arguments provided.""" + if not os.path.exists(args.user_group_csv): + raise RuntimeError("Invalid path to user management input csv given.") + + if args.delimiter == args.ug_delimiter: + raise RuntimeError( + "Delimiter and ParentUserGroups Delimiter cannot be the same." + ) + + if args.ug_delimiter == "." or re.match(UG_REGEX, args.ug_delimiter): + raise RuntimeError( + 'ParentUserGroups delimiter cannot be dot (".") ' + f'or match the following regex: "{UG_REGEX}".' + ) + + if len(args.quotechar) != 1: + raise RuntimeError("The quotechar argument must be exactly one character long.") + + +@dataclass +class TargetUserGroup: + user_group_id: str + user_group_name: str + parent_user_groups: list[str] + is_active: bool = field(compare=False) + + @classmethod + def from_csv_row(cls, row: list[Any], parent_user_group_delimiter: str = ","): + """Creates GDUserGroupTarget from csv row.""" + user_group_id, user_group_name, parent_user_groups, is_active = row + user_group_name_or_id = user_group_name or user_group_id + parent_user_groups = ( + parent_user_groups.split(parent_user_group_delimiter) + if parent_user_groups + else [] + ) + return TargetUserGroup( + user_group_id=user_group_id, + user_group_name=user_group_name_or_id, + parent_user_groups=parent_user_groups, + is_active=str(is_active).lower() == "true", + ) + + +def read_users_groups_from_csv(args: argparse.Namespace) -> list[TargetUserGroup]: + """Reads users from csv file.""" + # TODO - handling of csv files with and without headers + user_groups: list[TargetUserGroup] = [] + with open(args.user_group_csv, "r") as f: + reader = csv.reader( + f, delimiter=args.delimiter, quotechar=args.quotechar, skipinitialspace=True + ) + next(reader) # Skip header + for row in reader: + if not csv_row_is_valid(row): + continue + try: + user_group = TargetUserGroup.from_csv_row(row, args.ug_delimiter) + except Exception as e: + logger.error(f'Unable to load following row: "{row}". Error: "{e}"') + continue + user_groups.append(user_group) + + return user_groups + + +def csv_row_is_valid(row: list[Any]) -> bool: + """Validates csv row.""" + try: + user_group_id, user_group_name, parent_user_group, is_active = row + except ValueError as e: + logger.error( + "Unable to parse csv row. " + "Most probably an incorrect amount of values was defined. " + f'Skipping following row: "{row}". Error: "{e}".' + ) + return False + + if not user_group_id: + logger.error( + f'user_group_id field seems to be empty. Skipping following row: "{row}".' + ) + return False + + if not is_active: + logger.error( + f'is_active field seems to be empty. Skipping following row: "{row}".' + ) + return False + + return True + + +class UserGroupManager: + def __init__( + self, client_sdk: GoodDataSdk, target_user_groups: list[TargetUserGroup] + ): + self.sdk = client_sdk + self.target_user_groups = target_user_groups + self.gd_user_groups = self._get_gd_user_groups() + + def _get_gd_user_groups(self) -> list[CatalogUserGroup]: + try: + return self.sdk.catalog_user.list_user_groups() + except Exception as e: + logger.error(f"Failed to list user groups from GoodData: {e}") + return [] + + @staticmethod + def _is_changed(group: TargetUserGroup, existing_group: CatalogUserGroup) -> bool: + """Checks if user group has some changes and needs to be updated.""" + parents_changed = ( + group.parent_user_groups.sort() != existing_group.get_parents.sort() + ) + name_changed = group.user_group_name != existing_group.name + return parents_changed or name_changed + + def _create_or_update_user_group( + self, group_id, group_name, parent_user_groups, action + ) -> None: + """Creates or updates user group in the project.""" + catalog_user_group = CatalogUserGroup.init( + user_group_id=group_id, + user_group_name=group_name, + user_group_parent_ids=parent_user_groups, + ) + try: + self.sdk.catalog_user.create_or_update_user_group(catalog_user_group) + logger.info(f"Succeeded to {action} user group {group_id}") + except Exception as e: + logger.error(f"Failed to {action} user group {group_id}: {e}") + + def _create_missing_user_groups(self, group_ids_to_create) -> None: + """Provisions user groups that don't exist.""" + groups_to_create = [ + group + for group in self.target_user_groups + if group.user_group_id in group_ids_to_create + ] + + for group in groups_to_create: + logger.info( + f'User group "{group.user_group_id}" does not exist, creating...' + ) + self._create_or_update_user_group( + group.user_group_id, + group.user_group_name, + group.parent_user_groups, + "create", + ) + + def _update_existing_user_groups(self, group_ids_to_update) -> None: + """Update existing user groups and update ws_permissions.""" + groups_to_update = [ + group + for group in self.target_user_groups + if group.user_group_id in group_ids_to_update + ] + + existing_groups = {group.id: group for group in self.gd_user_groups} + + for group in groups_to_update: + existing_group = existing_groups[group.user_group_id] + if self._is_changed(group, existing_group): + logger.info( + f"Updating parent user groups of group {group.user_group_id}..." + ) + self._create_or_update_user_group( + group.user_group_id, + group.user_group_name, + group.parent_user_groups, + "update", + ) + + def _delete_user_group(self, group_ids_to_delete) -> None: + """Deletes user group from the project.""" + for user_group_id in group_ids_to_delete: + try: + logger.info(f'Deleting user group"{user_group_id}"') + self.sdk.catalog_user.delete_user_group(user_group_id) + except Exception as e: + logger.error(f'Failed to deleted user group "{user_group_id}": {e}') + + def manage_user_groups(self) -> None: + """Manages multiple users groups based on the provided input.""" + + logger.info( + f"Starting user group management run of {len(self.target_user_groups)} user groups..." + ) + + gd_group_ids = {group.id for group in self.gd_user_groups} + + active_target_groups = { + group.user_group_id + for group in self.target_user_groups + if group.is_active is True + } + inactive_target_groups = { + group.user_group_id + for group in self.target_user_groups + if group.is_active is False + } + + group_ids_to_create = active_target_groups.difference(gd_group_ids) + self._create_missing_user_groups(group_ids_to_create) + + group_ids_to_update = active_target_groups.intersection(gd_group_ids) + self._update_existing_user_groups(group_ids_to_update) + + group_ids_to_delete = inactive_target_groups.intersection(gd_group_ids) + self._delete_user_group(group_ids_to_delete) + + logger.info("User management run finished.") + + +def user_group_mgmt(args): + """Main function for user management.""" + if args.verbose: + logger.setLevel(logging.DEBUG) + + try: + validate_args(args) + client_sdk = create_clients(args) + target_user_groups = read_users_groups_from_csv(args) + user_group_manager = UserGroupManager(client_sdk, target_user_groups) + user_group_manager.manage_user_groups() + except RuntimeError as e: + logger.error(f"Runtime error has occurred: {e}") + + +if __name__ == "__main__": + parser = create_parser() + args = parser.parse_args() + user_group_mgmt(args) diff --git a/scripts/user_mgmt.py b/scripts/user_mgmt.py new file mode 100644 index 0000000..caa6923 --- /dev/null +++ b/scripts/user_mgmt.py @@ -0,0 +1,329 @@ +# (C) 2023 GoodData Corporation +import argparse +import csv +import logging +import os +import re +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + +import gooddata_sdk as gd_sdk +from gooddata_api_client.exceptions import NotFoundException + +UG_REGEX = r"^(?!\.)[.A-Za-z0-9_-]{1,255}$" + +PROFILES_FILE = "profiles.yaml" +PROFILES_DIRECTORY = ".gooddata" +PROFILES_FILE_PATH = Path.home() / PROFILES_DIRECTORY / PROFILES_FILE +LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler(sys.stdout) +handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT)) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Management of users and userGroups.") + parser.add_argument( + "-v", "--verbose", action="store_true", help="Turns on the debug log output." + ) + parser.add_argument( + "user_csv", type=Path, help="Path to csv with user definitions." + ) + parser.add_argument( + "-d", + "--delimiter", + type=str, + default=",", + help="Delimiter used to separate different columns in the user_csv.", + ) + parser.add_argument( + "-u", + "--ug_delimiter", + type=str, + default="|", + help=( + "Delimiter used to separate different user groups within " + "the relevant user groups column in the user_csv. " + 'This must differ from the "delimiter" argument.' + ), + ) + parser.add_argument( + "-q", + "--quotechar", + type=str, + default='"', + help=( + "Character used for quoting (escaping) values " + "which contain delimiters or quotechars." + ), + ) + parser.add_argument( + "-p", + "--profile-config", + type=Path, + default=PROFILES_FILE_PATH, + help="Optional path to GoodData profile config. " + f'If no path is provided, "{PROFILES_FILE_PATH}" is used.', + ) + parser.add_argument( + "--profile", + type=str, + default="default", + help='GoodData profile to use. If no profile is provided, "default" is used.', + ) + return parser + + +class GoodDataRestApiError(Exception): + """Wrapper for errors occurring from interaction with GD REST API.""" + + +def optional(string: str) -> Optional[str]: + """ + Ensures conversion of empty string to None. + + CSV reader parses empty fields as empty strings. + + Returns string or None. + """ + return string if string else None + + +@dataclass +class GDUserTarget: + user_id: str + firstname: Optional[str] + lastname: Optional[str] + email: Optional[str] + auth_id: Optional[str] + user_groups: list[str] + is_active: bool = field(compare=False) + + @classmethod + def from_csv_row( + cls, row: list[Any], user_group_delim: str = "," + ) -> "GDUserTarget": + """Creates GDUserTarget from csv row.""" + user_id, firstname, lastname, email, auth_id, user_groups, is_active = row + user_groups_list = user_groups.split(user_group_delim) if user_groups else [] + return GDUserTarget( + user_id=user_id, + firstname=optional(firstname), + lastname=optional(lastname), + email=optional(email), + auth_id=optional(auth_id), + user_groups=user_groups_list, + is_active=str(is_active).lower() == "true", + ) + + @classmethod + def from_sdk_obj(cls, obj: gd_sdk.CatalogUser) -> "GDUserTarget": + """Creates GDUserTarget from CatalogUser SDK object.""" + return GDUserTarget( + user_id=obj.id, + firstname=obj.attributes.firstname, + lastname=obj.attributes.lastname, + email=obj.attributes.email, + auth_id=obj.attributes.authentication_id, + user_groups=[ug.id for ug in obj.user_groups], + is_active=True, + ) + + def to_sdk_obj(self) -> gd_sdk.CatalogUser: + """Converts GDUserTarget to CatalogUser SDK object.""" + return gd_sdk.CatalogUser.init( + user_id=self.user_id, + firstname=self.firstname, + lastname=self.lastname, + email=self.email, + authentication_id=self.auth_id, + user_group_ids=self.user_groups, + ) + + +class UserManager: + def __init__(self, sdk: gd_sdk.GoodDataSdk): + self._sdk = sdk + + def _try_get_user(self, user: GDUserTarget) -> Optional[GDUserTarget]: + try: + user_sdk_obj = self._sdk.catalog_user.get_user(user.user_id) + return GDUserTarget.from_sdk_obj(user_sdk_obj) + except NotFoundException: + return None + + def _get_or_create_user_groups(self, groups: list[str]): + """Ensures that all user groups exist in the project.""" + # TODO - Can be optimized - preloading all user groups and checking on the go + for group in groups: + try: + self._sdk.catalog_user.get_user_group(group) + except NotFoundException: + logger.info(f'UserGroup "{group}" doesn\'t exist - creating...') + self._sdk.catalog_user.create_or_update_user_group( + gd_sdk.CatalogUserGroup.init( + user_group_id=group, user_group_name=group + ) + ) + + def _create_or_update_user(self, user: GDUserTarget): + """Creates or updates user in the project.""" + upstream_user = self._try_get_user(user) + if user == upstream_user: + logger.info(f'No action for user "{user.user_id}"') + return + if not upstream_user: + logger.info(f'Creating user "{user.user_id}"...') + else: + logger.info(f'Updating user "{user.user_id}"...') + + self._get_or_create_user_groups(user.user_groups) + self._sdk.catalog_user.create_or_update_user(user.to_sdk_obj()) + + def _delete_user(self, user: GDUserTarget): + """Deletes user from the project.""" + try: + self._sdk.catalog_user.get_user(user.user_id) + except NotFoundException: + logger.info(f'No action for user "{user.user_id}"') + return + logger.info(f'Deleting user "{user.user_id}"') + self._sdk.catalog_user.delete_user(user.user_id) + + def manage_user(self, user: GDUserTarget): + """Manages user based on the provided GDUserTarget.""" + if user.is_active: + self._create_or_update_user(user) + else: + self._delete_user(user) + + def manage_users(self, users: list[GDUserTarget]): + """Manages multiple users based on the provided GDUserTargets.""" + logger.info(f"Starting user management run of {len(users)} users...") + for user in users: + try: + self.manage_user(user) + except GoodDataRestApiError as e: + logger.error(f"API request for user failed: {e}") + except Exception as e: + logger.error(f"Something went wrong for {user.user_id}. Error: {e}") + logger.info("User management run finished.") + + +# TODO - simplify after complete switch to SDK +def create_clients(args: argparse.Namespace) -> gd_sdk.GoodDataSdk: + """Creates GoodData SDK client.""" + gdc_auth_token = os.environ.get("GDC_AUTH_TOKEN") + gdc_hostname = os.environ.get("GDC_HOSTNAME") + + if gdc_hostname and gdc_auth_token: + logger.info("Using GDC_HOSTNAME and GDC_AUTH_TOKEN envvars.") + sdk = gd_sdk.GoodDataSdk.create(gdc_hostname, gdc_auth_token) + return sdk + + profile_config, profile = args.profile_config, args.profile + if os.path.exists(profile_config): + logger.info( + f"Using GoodData profile {profile} " f"sourced from {profile_config}." + ) + sdk = gd_sdk.GoodDataSdk.create_from_profile(profile, profile_config) + return sdk + + raise RuntimeError( + "No GoodData credentials provided. Please export required ENVVARS " + "(GDC_HOSTNAME, GDC_AUTH_TOKEN) or provide path to GD profile config." + ) + + +def csv_row_is_valid(row: list[Any]) -> bool: + """Validates csv row.""" + try: + user_id, firstname, lastname, email, auth_id, user_groups, is_active = row + except Exception as e: + logger.error( + "Unable to parse csv row. " + "Most probably an incorrect amount of values was defined. " + f'Skipping following row: "{row}". Error: "{e}".' + ) + return False + + if not user_id: + logger.error( + f'user_id field seems to be empty. Skipping following row: "{row}".' + ) + return False + + if not is_active: + logger.error( + f'is_active field seems to be empty. Skipping following row: "{row}".' + ) + return False + + return True + + +def read_users_from_csv(args: argparse.Namespace) -> list[GDUserTarget]: + """Reads users from csv file.""" + # TODO - handling of csv files with and without headers + users: list[GDUserTarget] = [] + with open(args.user_csv, "r") as f: + reader = csv.reader( + f, delimiter=args.delimiter, quotechar=args.quotechar, skipinitialspace=True + ) + next(reader) # Skip header + for row in reader: + if not csv_row_is_valid(row): + continue + try: + user = GDUserTarget.from_csv_row(row, args.ug_delimiter) + except Exception as e: + logger.error(f'Unable to load following row: "{row}". Error: "{e}"') + continue + users.append(user) + + return users + + +def validate_args(args: argparse.Namespace) -> None: + """Validates the arguments provided.""" + if not os.path.exists(args.user_csv): + raise RuntimeError("Invalid path to user management input csv given.") + + if args.delimiter == args.ug_delimiter: + raise RuntimeError("Delimiter and UserGroups Delimiter cannot be the same.") + + if args.ug_delimiter == "." or re.match(UG_REGEX, args.ug_delimiter): + raise RuntimeError( + 'Usergroup delimiter cannot be dot (".") ' + f'or match the following regex: "{UG_REGEX}".' + ) + + if len(args.quotechar) != 1: + raise RuntimeError("The quotechar argument must be exactly one character long.") + + +def user_mgmt(args): + """Main function for user management.""" + if args.verbose: + logger.setLevel(logging.DEBUG) + + validate_args(args) + + users = read_users_from_csv(args) + + sdk = create_clients(args) + + user_manager = UserManager(sdk) + + user_manager.manage_users(users) + + +if __name__ == "__main__": + parser = create_parser() + args = parser.parse_args() + user_mgmt(args) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..332df81 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# (C) 2023 GoodData Corporation diff --git a/tests/data/backup/test_conf.yaml b/tests/data/backup/test_conf.yaml new file mode 100644 index 0000000..9a4d005 --- /dev/null +++ b/tests/data/backup/test_conf.yaml @@ -0,0 +1,4 @@ +storage_type: s3 +storage: + bucket: some-s3-bucket + backup_path: some/s3/backup/path/org_id/ diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboard_extensions/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboard_extensions/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboards/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/analytical_dashboards/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/dashboard_plugins/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/dashboard_plugins/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/filter_contexts/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/filter_contexts/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/metrics/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/metrics/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/visualization_objects/.gitkeep b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model/visualization_objects/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/datasets/test.yaml b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/datasets/test.yaml new file mode 100644 index 0000000..56a6051 --- /dev/null +++ b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/datasets/test.yaml @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/date_instances/testinstance.yaml b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/date_instances/testinstance.yaml new file mode 100644 index 0000000..d8263ee --- /dev/null +++ b/tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/ldm/date_instances/testinstance.yaml @@ -0,0 +1 @@ +2 \ No newline at end of file diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboard_extensions/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboard_extensions/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml new file mode 100644 index 0000000..9b26e9b --- /dev/null +++ b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml @@ -0,0 +1 @@ ++ \ No newline at end of file diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/dashboard_plugins/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/dashboard_plugins/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/metrics/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/metrics/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/visualization_objects/test.yaml b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/visualization_objects/test.yaml new file mode 100644 index 0000000..05a5366 --- /dev/null +++ b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/visualization_objects/test.yaml @@ -0,0 +1 @@ +id \ No newline at end of file diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/datasets/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/datasets/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/date_instances/.gitkeep b/tests/data/backup/test_exports/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/ldm/date_instances/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboard_extensions/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboard_extensions/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboards/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/analytical_dashboards/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/dashboard_plugins/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/dashboard_plugins/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/filter_contexts/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/filter_contexts/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/metrics/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/metrics/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/visualization_objects/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/analytics_model/visualization_objects/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/datasets/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/datasets/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/date_instances/.gitkeep b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm/date_instances/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/user_data_filters/.gitignore b/tests/data/backup/test_exports/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/user_data_filters/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/permission_mgmt/input.csv b/tests/data/permission_mgmt/input.csv new file mode 100644 index 0000000..2c0b9b5 --- /dev/null +++ b/tests/data/permission_mgmt/input.csv @@ -0,0 +1,24 @@ +user_id,ug_id,ws_id,ws_permissions,is_active +bad,row, +user_1,,ws_id_1,ANALYZE,True +user_1,,ws_id_1,VIEW,True +user_1,,ws_id_1,MANAGE,False +user_2,,ws_id_1,ANALYZE,True +user_2,,ws_id_1,MANAGE,True +,ug_1,ws_id_1,ANALYZE,True +,ug_1,ws_id_1,VIEW,True +,ug_1,ws_id_1,MANAGE,False +,ug_2,ws_id_1,ANALYZE,True +,ug_2,ws_id_1,MANAGE,True +user,ug,ws_id_1,ANALYZE,True +user_1,,ws_id_3,ANALYZE,True +,,ws_id_1,ANALYZE,True +user_1,,ws_id_1,ANALYZE, +user_1,,ws_id_1,,True +user_1,,,ANALYZE,True +user_1,,ws_id_2,MANAGE,True +user_2,,ws_id_2,ANALYZE,False +user_3,,ws_id_2,MANAGE,True +,ug_1,ws_id_2,MANAGE,True +,ug_2,ws_id_2,ANALYZE,False +,ug_3,ws_id_2,MANAGE,True diff --git a/tests/data/restore/test.csv b/tests/data/restore/test.csv new file mode 100644 index 0000000..1d68747 --- /dev/null +++ b/tests/data/restore/test.csv @@ -0,0 +1,5 @@ +workspace_id,path +thiswsdoesnotexist,thiswsdoesnotexist/blabla +ws_id_1,ws_id_1/bla +ws_id_2,ws_id_2/bla +ws_id_3,ws_id_2/bla diff --git a/tests/data/restore/test_conf.yaml b/tests/data/restore/test_conf.yaml new file mode 100644 index 0000000..9a4d005 --- /dev/null +++ b/tests/data/restore/test_conf.yaml @@ -0,0 +1,4 @@ +storage_type: s3 +storage: + bucket: some-s3-bucket + backup_path: some/s3/backup/path/org_id/ diff --git a/tests/data/restore/test_ldm_load/ldm/datasets/some_dataset_id.yaml b/tests/data/restore/test_ldm_load/ldm/datasets/some_dataset_id.yaml new file mode 100644 index 0000000..4032fa0 --- /dev/null +++ b/tests/data/restore/test_ldm_load/ldm/datasets/some_dataset_id.yaml @@ -0,0 +1,15 @@ +grain: + - id: some_dataset_id.some_id + type: attribute +id: some_dataset_id +references: + - identifier: + id: some_ref_id + type: dataset + multivalue: false + sourceColumns: + - country_id +title: Test Dataset +workspaceDataFilterColumns: + - dataType: STRING + name: wdf__country diff --git a/tests/data/restore/test_user_data_filters/user_data_filters/datafilter2.yaml b/tests/data/restore/test_user_data_filters/user_data_filters/datafilter2.yaml new file mode 100644 index 0000000..be622d6 --- /dev/null +++ b/tests/data/restore/test_user_data_filters/user_data_filters/datafilter2.yaml @@ -0,0 +1,6 @@ +id: datafilter2 +maql: '{label/campaign_channels.category} = "1"' +title: Status filter +user: + id: 5c867a8a-12af-45bf-8d85-c7d16bedebd1 + type: user diff --git a/tests/data/restore/test_user_data_filters/user_data_filters/datafilter4.yaml b/tests/data/restore/test_user_data_filters/user_data_filters/datafilter4.yaml new file mode 100644 index 0000000..ca63315 --- /dev/null +++ b/tests/data/restore/test_user_data_filters/user_data_filters/datafilter4.yaml @@ -0,0 +1,6 @@ +id: datafilter4 +maql: '{label/campaign_channels.category} = "1"' +title: Status filter +user: + id: 5c867a8a-12af-45bf-8d85-c7d16bedebd1 + type: user diff --git a/tests/data/user_group_mgmt/input.csv b/tests/data/user_group_mgmt/input.csv new file mode 100644 index 0000000..6d91442 --- /dev/null +++ b/tests/data/user_group_mgmt/input.csv @@ -0,0 +1,5 @@ +user_group_id,user_group_name,parent_user_groups,is_active +ug_1,Admins,,True +ug_2,Developers,ug_1,True +ug_3,Testers,ug_1|ug_2,True +ug_4,TemporaryAccess,ug_2,False diff --git a/tests/data/user_mgmt/input.csv b/tests/data/user_mgmt/input.csv new file mode 100644 index 0000000..b1372aa --- /dev/null +++ b/tests/data/user_mgmt/input.csv @@ -0,0 +1,8 @@ +user_id, firstname, lastname, email, auth_id, user_groups, is_active +jozef.mrkva,jozef,mrkva,jozef.mrkva@test.com,auth_id_1,,True +bartolomej.brokolica,,,,auth_id_2,,False +peter.pertzlen,peter,pertzlen,peter.pertzlen@test.com,auth_id_3,ug_1|ug_2,True +zoltan.zeler,zoltan,zeler,zoltan.zeler@test.com,auth_id_4,ug_1,True +kristian.kalerab,kristian,kalerab,,auth_id_5,,True +richard.cvikla,,,richard.cvikla@test.com,auth_id_6,ug_1|ug_2,False +adam.avokado,,,,auth_id_7,,False diff --git a/tests/fake_aws_creds.sh b/tests/fake_aws_creds.sh new file mode 100644 index 0000000..997d417 --- /dev/null +++ b/tests/fake_aws_creds.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# This creates fake ~/.aws/credentials file for boto3 mocking +mkdir ~/.aws && touch ~/.aws/credentials && echo "[default]\naws_access_key_id = test\naws_secret_access_key = test" > ~/.aws/credentials diff --git a/tests/test_backup.py b/tests/test_backup.py new file mode 100644 index 0000000..b5372ab --- /dev/null +++ b/tests/test_backup.py @@ -0,0 +1,328 @@ +# (C) 2023 GoodData Corporation +import argparse +import os +import tempfile +from pathlib import Path +from unittest import mock + +import boto3 +import pytest +import shutil + +from moto import mock_s3 +from scripts import backup + +LOGGER_NAME = "scripts.backup" +MOCK_DL_TARGET = Path("overlays.zip") +TEST_CONF_PATH = "tests/data/backup/test_conf.yaml" + +S3_BACKUP_PATH = "some/s3/backup/path/org_id/" +S3_BUCKET = "some-s3-bucket" + + +class MockGdWorkspace: + def __init__(self, id: str) -> None: + self.id = id + + +class MockResponse: + def __init__(self, status_code, json_response=None, text: str = ""): + self.status_code = status_code + self.json_response = json_response if json_response else {} + self.text = text + + def json(self): + return self.json_response + + +def mock_requests_get(**kwargs): + body = {"userDataFilters": []} + return MockResponse(200, body) + + +def mock_requests(): + requests = mock.Mock() + requests.get.side_effect = mock_requests_get + return requests + + +@pytest.fixture(scope="function") +def aws_credentials(): + """ + Mocked AWS Credentials for moto. + Ensures no locally set AWS credential envvars are used. + """ + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + + +@pytest.fixture(scope="function") +def s3(aws_credentials): + with mock_s3(): + yield boto3.resource("s3") + + +@pytest.fixture(scope="function") +def s3_bucket(s3): + s3.create_bucket(Bucket=S3_BUCKET) + yield s3.Bucket(S3_BUCKET) + + +@pytest.fixture(scope="function") +def create_backups_in_bucket(s3_bucket): + def create_backups(ws_ids: list[str], is_e2e: bool = False, suffix: str = "bla"): + # If used within e2e test, add some suffix to path + # in order to simulate a more realistic scenario + path_suffix = f"/{suffix}" if is_e2e else "" + + for ws_id in ws_ids: + s3_bucket.put_object( + Bucket=S3_BUCKET, Key=f"{S3_BACKUP_PATH}{ws_id}{path_suffix}/" + ) + s3_bucket.put_object( + Bucket=S3_BUCKET, + Key=f"{S3_BACKUP_PATH}{ws_id}{path_suffix}/gooddata_layouts.zip", + ) + + return create_backups + + +def assert_not_called_with(target, *args, **kwargs): + try: + target.assert_called_with(*args, **kwargs) + except AssertionError: + return + formatted_call = target._format_mock_call_signature(args, kwargs) + raise AssertionError(f"Expected {formatted_call} to not have been called.") + + +@mock.patch.dict(os.environ, {"GDC_HOSTNAME": "hostname", "GDC_AUTH_TOKEN": "token"}) +@mock.patch("gooddata_sdk.GoodDataSdk.create_from_profile") +@mock.patch("gooddata_sdk.GoodDataSdk.create") +def test_gd_client_env(client_create_env, client_create_profile): + backup.create_client(argparse.Namespace()) + client_create_env.assert_called_once_with("hostname", "token") + client_create_profile.assert_not_called() + + +@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch("scripts.backup.create_api_client_from_profile") +@mock.patch("gooddata_sdk.GoodDataSdk.create_from_profile") +@mock.patch("gooddata_sdk.GoodDataSdk.create") +@mock.patch("os.path.exists") +def test_gd_client_profile( + path_exists, + client_create_env, + client_create_profile, + create_api_client_from_profile, +): + path_exists.return_value = True + args = argparse.Namespace( + profile_config="gdc_profile_config_path", + profile="gdc_profile", + ) + backup.create_client(args) + client_create_env.assert_not_called() + client_create_profile.assert_called_once_with( + "gdc_profile", "gdc_profile_config_path" + ) + create_api_client_from_profile.assert_called_once_with( + "gdc_profile", "gdc_profile_config_path" + ) + + +@mock.patch.dict(os.environ, {}, clear=True) +def test_gd_client_no_creds_raises_error(): + args = argparse.Namespace( + profile_config="", + profile="", + ) + with pytest.raises(RuntimeError): + backup.create_client(args) + + +# Incorrect ws_csv and conf args throw error +@pytest.mark.parametrize("conf_path", ["", "configuration_nonexist.yaml"]) +@pytest.mark.parametrize("csv_path", ["", "input_nonexist.csv"]) +def test_wrong_wscsv_conf_raise_error(csv_path, conf_path): + args = argparse.Namespace(ws_csv=csv_path, conf=conf_path, verbose=False) + with pytest.raises(RuntimeError): + backup.validate_args(args) + + +def test_get_s3_storage(): + s3_storage_type = backup.get_storage("s3") + assert s3_storage_type == backup.S3Storage + + +def test_get_local_storage(): + local_storage_type = backup.get_storage("local") + assert local_storage_type == backup.LocalStorage + + +def test_get_unknown_storage_raises_error(): + with pytest.raises(RuntimeError): + backup.get_storage("unknown_storage") + + +# Test that zipping gooddata_layouts folder works +def test_archive_gooddata_layouts_to_zip(): + with tempfile.TemporaryDirectory() as tmpdir: + shutil.copytree( + Path("tests/data/backup/test_exports/services/"), Path(tmpdir + "/services") + ) + backup.archive_gooddata_layouts_to_zip(Path(tmpdir, "services")) + + zip_exists = os.path.isfile( + Path( + tmpdir, "services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts.zip" + ) + ) + gooddata_layouts_dir_exists = os.path.isdir( + Path(tmpdir, "services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts") + ) + + assert gooddata_layouts_dir_exists is False + assert zip_exists + + zip_exists = os.path.isfile( + Path( + tmpdir, "services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts.zip" + ) + ) + gooddata_layouts_dir_exists = os.path.isdir( + Path(tmpdir, "services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts") + ) + + assert gooddata_layouts_dir_exists is False + assert zip_exists + + zip_exists = os.path.isfile( + Path( + tmpdir, "services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts.zip" + ) + ) + gooddata_layouts_dir_exists = os.path.isdir( + Path(tmpdir, "services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts") + ) + + assert gooddata_layouts_dir_exists is False + assert zip_exists + + +@mock.patch("scripts.backup.requests", new_callable=mock_requests) +def test_get_user_data_filters_normal_response(requests): + api = backup.GDApi("some.host.com", "token") + + response = backup.get_user_data_filters( + api, + "workspace", + ) + assert response == {"userDataFilters": []} + + +def test_store_user_data_filters(): + user_data_filters = { + "userDataFilters": [ + { + "id": "datafilter2", + "maql": '{label/campaign_channels.category} = "1"', + "title": "Status filter", + "user": {"id": "5c867a8a-12af-45bf-8d85-c7d16bedebd1", "type": "user"}, + }, + { + "id": "datafilter4", + "maql": '{label/campaign_channels.category} = "1"', + "title": "Status filter", + "user": {"id": "5c867a8a-12af-45bf-8d85-c7d16bedebd1", "type": "user"}, + }, + ] + } + user_data_filter_folderlocation = Path( + "tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/user_data_filters" + ) + backup.store_user_data_filters( + user_data_filters, + Path( + "tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5" + ), + "services", + "wsid1", + ) + user_data_filter_folder = os.path.isdir(Path(user_data_filter_folderlocation)) + user_data_filter2 = os.path.isfile( + Path(f"{user_data_filter_folderlocation}/datafilter2.yaml") + ) + user_data_filter4 = os.path.isfile( + Path(f"{user_data_filter_folderlocation}/datafilter4.yaml") + ) + assert user_data_filter_folder + assert user_data_filter2 + assert user_data_filter4 + + count = 0 + for path in os.listdir(user_data_filter_folderlocation): + if os.path.isfile(os.path.join(user_data_filter_folderlocation, path)): + count += 1 + + assert count == 2 + + shutil.rmtree( + "tests/data/backup/test_exports/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/user_data_filters" + ) + + +def test_local_storage_export(): + with tempfile.TemporaryDirectory() as tmpdir: + org_store_location = Path(tmpdir + "/services") + shutil.copytree( + Path("tests/data/backup/test_exports/services/"), org_store_location + ) + + local_storage_type = backup.get_storage("local") + local_storage_type.export( + self=local_storage_type, + folder=tmpdir, + org_id="services", + export_folder="tests/data/local_export", + ) + local_export_folder_exist = os.path.isdir( + Path( + "tests/data/local_export/services/wsid1/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid1/analytics_model" + ) + ) + local_export_folder2_exist = os.path.isdir( + Path( + "tests/data/local_export/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/ldm" + ) + ) + + local_export_folder3_exist = os.path.isdir( + Path( + "tests/data/local_export/services/wsid3/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid3/user_data_filters" + ) + ) + + local_export_file_exist = os.path.isfile( + Path( + "tests/data/local_export/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/analytical_dashboards/id.yaml" + ) + ) + assert local_export_folder_exist + assert local_export_folder2_exist + assert local_export_folder3_exist + assert local_export_file_exist + shutil.rmtree("tests/data/local_export") + + +def test_file_upload(s3, s3_bucket): + conf = backup.BackupRestoreConfig(TEST_CONF_PATH) + s3storage = backup.get_storage("s3")(conf) + s3storage.export("tests/data/backup/test_exports", "services") + s3.Object( + S3_BUCKET, + "some/s3/backup/path/org_id/services/wsid2/20230713-132759-1_3_1_dev5/gooddata_layouts/services/workspaces/wsid2/analytics_model/filter_contexts/id.yaml", + ).load() diff --git a/tests/test_permissions.py b/tests/test_permissions.py new file mode 100644 index 0000000..bdd9c69 --- /dev/null +++ b/tests/test_permissions.py @@ -0,0 +1,373 @@ +# (C) 2023 GoodData Corporation +import argparse +from unittest import mock + +import gooddata_sdk as gd_sdk +from gooddata_api_client.exceptions import NotFoundException + +from scripts import permission_mgmt + +TEST_CSV_PATH = "tests/data/permission_mgmt/input.csv" + +USER_1 = gd_sdk.CatalogAssigneeIdentifier(id="user_1", type="user") +USER_2 = gd_sdk.CatalogAssigneeIdentifier(id="user_2", type="user") +USER_3 = gd_sdk.CatalogAssigneeIdentifier(id="user_3", type="user") +UG_1 = gd_sdk.CatalogAssigneeIdentifier(id="ug_1", type="userGroup") +UG_2 = gd_sdk.CatalogAssigneeIdentifier(id="ug_2", type="userGroup") +UG_3 = gd_sdk.CatalogAssigneeIdentifier(id="ug_3", type="userGroup") + +UPSTREAM_PERMISSIONS = [ + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="ANALYZE", assignee=USER_1), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="VIEW", assignee=USER_1), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="MANAGE", assignee=USER_1), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="ANALYZE", assignee=USER_2), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="VIEW", assignee=USER_2), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="ANALYZE", assignee=USER_3), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="ANALYZE", assignee=UG_1), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="VIEW", assignee=UG_1), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="MANAGE", assignee=UG_1), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="ANALYZE", assignee=UG_2), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="VIEW", assignee=UG_2), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="ANALYZE", assignee=UG_3), +] + +WS_PERMISSION_DECLARATION = permission_mgmt.WSPermissionDeclaration( + users={ + "user_1": {"ANALYZE": True, "VIEW": True, "MANAGE": True}, + "user_2": {"ANALYZE": True, "VIEW": True}, + "user_3": {"ANALYZE": True}, + }, + user_groups={ + "ug_1": {"ANALYZE": True, "VIEW": True, "MANAGE": True}, + "ug_2": {"ANALYZE": True, "VIEW": True}, + "ug_3": {"ANALYZE": True}, + }, +) + +UPSTREAM_WS_PERMISSION = gd_sdk.CatalogDeclarativeWorkspacePermissions( + permissions=UPSTREAM_PERMISSIONS +) + +UPSTREAM_WS_PERMISSIONS = { + "ws_id_1": UPSTREAM_WS_PERMISSION, + "ws_id_2": UPSTREAM_WS_PERMISSION, +} + +EXPECTED_WS1_PERMISSIONS = gd_sdk.CatalogDeclarativeWorkspacePermissions( + permissions=[ + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=USER_1 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="VIEW", assignee=USER_1 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=USER_2 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="MANAGE", assignee=USER_2 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=USER_3 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=UG_1 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission(name="VIEW", assignee=UG_1), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=UG_2 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="MANAGE", assignee=UG_2 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=UG_3 + ), + ] +) + +EXPECTED_WS2_PERMISSIONS = gd_sdk.CatalogDeclarativeWorkspacePermissions( + permissions=[ + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="MANAGE", assignee=USER_1 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="MANAGE", assignee=USER_3 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="MANAGE", assignee=UG_1 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="MANAGE", assignee=UG_3 + ), + ] +) + + +def test_declaration_from_populated_sdk_api_obj(): + declaration = permission_mgmt.WSPermissionDeclaration.from_sdk_api( + UPSTREAM_WS_PERMISSION + ) + assert declaration == WS_PERMISSION_DECLARATION + + +def test_declaration_from_empty_sdk_api_obj(): + api_obj = gd_sdk.CatalogDeclarativeWorkspacePermissions(permissions=[]) + declaration = permission_mgmt.WSPermissionDeclaration.from_sdk_api(api_obj) + assert len(declaration.users) == 0 + assert len(declaration.user_groups) == 0 + + +def test_declaration_to_populated_sdk_api_obj(): + api_obj = permission_mgmt.WSPermissionDeclaration.to_sdk_api( + WS_PERMISSION_DECLARATION + ) + assert api_obj == UPSTREAM_WS_PERMISSION + + +def test_declaration_with_inactive_to_sdk_api_obj(): + users = { + "user_1": {"ANALYZE": True, "VIEW": False}, + "user_2": {"ANALYZE": True}, + } + ugs = { + "ug_1": {"ANALYZE": True, "VIEW": False}, + "ug_2": {"ANALYZE": True}, + } + declaration = permission_mgmt.WSPermissionDeclaration(users, ugs) + api_obj = declaration.to_sdk_api() + expected = gd_sdk.CatalogDeclarativeWorkspacePermissions( + permissions=[ + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=USER_1 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=USER_2 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=UG_1 + ), + gd_sdk.CatalogDeclarativeSingleWorkspacePermission( + name="ANALYZE", assignee=UG_2 + ), + ] + ) + assert api_obj == expected + + +def test_declaration_with_only_inactive_to_sdk_api_obj(): + users = { + "user_1": {"ANALYZE": False, "VIEW": False}, + "user_2": {"ANALYZE": False}, + } + ugs = { + "ug_1": {"ANALYZE": False, "VIEW": False}, + "ug_2": {"ANALYZE": False}, + } + declaration = permission_mgmt.WSPermissionDeclaration(users, ugs) + api_obj = declaration.to_sdk_api() + expected = gd_sdk.CatalogDeclarativeWorkspacePermissions(permissions=[]) + assert api_obj == expected + + +# Declarations are explicitly defined anew here to avoid dict mutations +# in subsequent calls and to avoid dict deepcopy overhead. + + +def test_add_new_active_user_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("MANAGE", "", "user_1", "user", True) + declaration.add_permission(permission) + assert declaration.users == { + "user_1": {"ANALYZE": True, "VIEW": False, "MANAGE": True} + } + assert declaration.user_groups == {"ug_1": {"VIEW": True, "ANALYZE": False}} + + +def test_add_new_inactive_user_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("MANAGE", "", "user_1", "user", False) + declaration.add_permission(permission) + assert declaration.users == { + "user_1": {"ANALYZE": True, "VIEW": False, "MANAGE": False} + } + assert declaration.user_groups == {"ug_1": {"VIEW": True, "ANALYZE": False}} + + +def test_overwrite_inactive_user_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("VIEW", "", "user_1", "user", True) + declaration.add_permission(permission) + assert declaration.users == {"user_1": {"ANALYZE": True, "VIEW": True}} + assert declaration.user_groups == {"ug_1": {"VIEW": True, "ANALYZE": False}} + + +def test_overwrite_active_user_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("ANALYZE", "", "user_1", "user", False) + declaration.add_permission(permission) + assert declaration.users == {"user_1": {"ANALYZE": True, "VIEW": False}} + assert declaration.user_groups == {"ug_1": {"VIEW": True, "ANALYZE": False}} + + +def test_add_new_user_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("VIEW", "", "user_2", "user", True) + declaration.add_permission(permission) + assert declaration.users == { + "user_1": {"ANALYZE": True, "VIEW": False}, + "user_2": {"VIEW": True}, + } + assert declaration.user_groups == {"ug_1": {"VIEW": True, "ANALYZE": False}} + + +def test_modify_one_of_user_perms(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}, "user_2": {"VIEW": True}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("MANAGE", "", "user_1", "user", True) + declaration.add_permission(permission) + assert declaration.users == { + "user_1": {"ANALYZE": True, "VIEW": False, "MANAGE": True}, + "user_2": {"VIEW": True}, + } + assert declaration.user_groups == {"ug_1": {"VIEW": True, "ANALYZE": False}} + + +# Add userGroup permission + + +def test_add_new_active_ug_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("MANAGE", "", "ug_1", "userGroup", True) + declaration.add_permission(permission) + assert declaration.users == {"user_1": {"ANALYZE": True, "VIEW": False}} + assert declaration.user_groups == { + "ug_1": {"VIEW": True, "ANALYZE": False, "MANAGE": True} + } + + +def test_add_new_inactive_ug_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("MANAGE", "", "ug_1", "userGroup", False) + declaration.add_permission(permission) + assert declaration.users == {"user_1": {"ANALYZE": True, "VIEW": False}} + assert declaration.user_groups == { + "ug_1": {"VIEW": True, "ANALYZE": False, "MANAGE": False} + } + + +def test_overwrite_inactive_ug_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("ANALYZE", "", "ug_1", "userGroup", True) + declaration.add_permission(permission) + assert declaration.users == {"user_1": {"ANALYZE": True, "VIEW": False}} + assert declaration.user_groups == {"ug_1": {"VIEW": True, "ANALYZE": True}} + + +def test_overwrite_active_ug_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("VIEW", "", "ug_1", "userGroup", False) + declaration.add_permission(permission) + assert declaration.users == {"user_1": {"ANALYZE": True, "VIEW": False}} + assert declaration.user_groups == {"ug_1": {"VIEW": True, "ANALYZE": False}} + + +def test_add_new_ug_perm(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}}, + ) + permission = permission_mgmt.WSPermission("VIEW", "", "ug_2", "userGroup", True) + declaration.add_permission(permission) + assert declaration.users == {"user_1": {"ANALYZE": True, "VIEW": False}} + assert declaration.user_groups == { + "ug_1": {"VIEW": True, "ANALYZE": False}, + "ug_2": {"VIEW": True}, + } + + +def test_modify_one_of_ug_perms(): + declaration = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True, "VIEW": False}}, + {"ug_1": {"VIEW": True, "ANALYZE": False}, "ug_2": {"VIEW": True}}, + ) + permission = permission_mgmt.WSPermission("MANAGE", "", "ug_1", "userGroup", True) + declaration.add_permission(permission) + assert declaration.users == {"user_1": {"ANALYZE": True, "VIEW": False}} + assert declaration.user_groups == { + "ug_1": {"VIEW": True, "ANALYZE": False, "MANAGE": True}, + "ug_2": {"VIEW": True}, + } + + +def test_upsert(): + owner = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"ANALYZE": True}, "user_2": {"VIEW": True}}, + {"ug_1": {"ANALYZE": True}, "ug_2": {"VIEW": True}}, + ) + other = permission_mgmt.WSPermissionDeclaration( + {"user_1": {"MANAGE": True, "VIEW": False}}, + {"ug_2": {"MANAGE": True, "VIEW": False}}, + ) + owner.upsert(other) + assert owner.users == { + "user_1": {"MANAGE": True, "VIEW": False}, + "user_2": {"VIEW": True}, + } + assert owner.user_groups == { + "ug_1": {"ANALYZE": True}, + "ug_2": {"MANAGE": True, "VIEW": False}, + } + + +def mock_upstream_perms(ws_id: str) -> gd_sdk.CatalogDeclarativeWorkspacePermissions: + if ws_id not in UPSTREAM_WS_PERMISSIONS: + raise NotFoundException(404) + return UPSTREAM_WS_PERMISSIONS[ws_id] + + +@mock.patch("scripts.permission_mgmt.create_client") +def test_permission_management_e2e(create_client): + sdk = mock.Mock() + sdk.catalog_permission.get_declarative_permissions.side_effect = mock_upstream_perms + create_client.return_value = sdk + + args = argparse.Namespace(perm_csv=TEST_CSV_PATH, verbose=False) + + permission_mgmt.permission_mgmt(args) + + sdk.catalog_permission.put_declarative_permissions.assert_has_calls( + [ + mock.call("ws_id_1", EXPECTED_WS1_PERMISSIONS), + mock.call("ws_id_2", EXPECTED_WS2_PERMISSIONS), + ] + ) diff --git a/tests/test_restore.py b/tests/test_restore.py new file mode 100644 index 0000000..1ea8e84 --- /dev/null +++ b/tests/test_restore.py @@ -0,0 +1,479 @@ +# (C) 2023 GoodData Corporation +import argparse +import logging +import os +import tempfile +from pathlib import Path +from unittest import mock + +import boto3 +import pytest +from moto import mock_s3 +import gooddata_sdk as gd_sdk + +from scripts import restore + +LOGGER_NAME = "scripts.restore" +MOCK_DL_TARGET = Path("overlays.zip") +TEST_CONF_PATH = "tests/data/restore/test_conf.yaml" +TEST_CSV_PATH = "tests/data/restore/test.csv" +TEST_LDM_PATH = Path("tests/data/restore/test_ldm_load") +TEST_UDF_PATH = Path("tests/data/restore/test_user_data_filters/") + +S3_BACKUP_PATH = "some/s3/backup/path/org_id/" +S3_BUCKET = "some-s3-bucket" + + +class MockGdWorkspace: + def __init__(self, id: str) -> None: + self.id = id + + +@pytest.fixture() +def aws_credentials(): + """ + Mocked AWS Credentials for moto. + Ensures no locally set AWS credential envvars are used. + """ + os.environ["AWS_ACCESS_KEY_ID"] = "testing" + os.environ["AWS_SECRET_ACCESS_KEY"] = "testing" + os.environ["AWS_SECURITY_TOKEN"] = "testing" + os.environ["AWS_SESSION_TOKEN"] = "testing" + os.environ["AWS_DEFAULT_REGION"] = "us-east-1" + + +@pytest.fixture() +def s3(aws_credentials): + with mock_s3(): + yield boto3.resource("s3") + + +@pytest.fixture() +def s3_bucket(s3): + s3.create_bucket(Bucket=S3_BUCKET) + yield s3.Bucket(S3_BUCKET) + + +@pytest.fixture() +def create_backups_in_bucket(s3_bucket): + def create_backups(ws_ids: list[str], is_e2e: bool = False, suffix: str = "bla"): + # If used within e2e test, add some suffix to path + # in order to simulate a more realistic scenario + path_suffix = f"/{suffix}" if is_e2e else "" + + for ws_id in ws_ids: + s3_bucket.put_object( + Bucket=S3_BUCKET, Key=f"{S3_BACKUP_PATH}{ws_id}{path_suffix}/" + ) + s3_bucket.put_object( + Bucket=S3_BUCKET, + Key=f"{S3_BACKUP_PATH}{ws_id}{path_suffix}/gooddata_layouts.zip", + ) + + return create_backups + + +def assert_not_called_with(target, *args, **kwargs): + try: + target.assert_called_with(*args, **kwargs) + except AssertionError: + return + formatted_call = target._format_mock_call_signature(args, kwargs) + raise AssertionError(f"Expected {formatted_call} to not have been called.") + + +@mock.patch.dict(os.environ, {"GDC_HOSTNAME": "hostname", "GDC_AUTH_TOKEN": "token"}) +@mock.patch("gooddata_sdk.GoodDataSdk.create_from_profile") +@mock.patch("gooddata_sdk.GoodDataSdk.create") +def test_gd_client_env(client_create_env, client_create_profile): + restore.create_client(argparse.Namespace()) + client_create_env.assert_called_once_with("hostname", "token") + client_create_profile.assert_not_called() + + +@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch("gooddata_sdk.GoodDataSdk.create_from_profile") +@mock.patch("scripts.restore.create_api_client_from_profile") +@mock.patch("gooddata_sdk.GoodDataSdk.create") +@mock.patch("os.path.exists") +def test_gd_client_profile( + path_exists, + client_create_env, + client_create_profile, + create_api_client_from_profile, +): + path_exists.return_value = True + args = argparse.Namespace( + profile_config="gdc_profile_config_path", + profile="gdc_profile", + ) + restore.create_client(args) + client_create_env.assert_not_called() + client_create_profile.assert_called_once_with( + "gdc_profile", "gdc_profile_config_path" + ) + create_api_client_from_profile.assert_called_once_with( + "gdc_profile", "gdc_profile_config_path" + ) + + +@mock.patch.dict(os.environ, {}, clear=True) +def test_gd_client_no_creds_raises_error(): + args = argparse.Namespace( + profile_config="", + profile="", + ) + with pytest.raises(RuntimeError): + restore.create_client(args) + + +@pytest.mark.parametrize("csv_path", ["", "bad/path"]) +@mock.patch("scripts.restore.create_client") +def test_bad_csv_path_raises_error(_, csv_path): + args = argparse.Namespace(ws_csv=csv_path, verbose=False) + with pytest.raises(RuntimeError): + restore.main(args) + + +@pytest.mark.parametrize("conf_path", ["", "bad/path"]) +@mock.patch("scripts.restore.create_client") +def test_bad_conf_path_raises_error(_, conf_path): + args = argparse.Namespace(conf=conf_path, ws_csv=".", verbose=False) + with pytest.raises(RuntimeError): + restore.main(args) + + +def test_get_s3_storage(): + s3_storage_type = restore.get_storage("s3") + assert s3_storage_type == restore.S3Storage + + +def test_get_unknown_storage_raises_error(): + with pytest.raises(RuntimeError): + restore.get_storage("unknown_storage") + + +def test_s3_storage(create_backups_in_bucket): + create_backups_in_bucket(["ws_id"]) + + conf = restore.BackupRestoreConfig(TEST_CONF_PATH) + storage = restore.S3Storage(conf) + + with tempfile.TemporaryDirectory() as tempdir: + target_path = Path(tempdir, MOCK_DL_TARGET) + storage.get_ws_declaration("ws_id/", target_path) + + +def test_s3_storage_no_target_only_dir(s3_bucket): + s3_bucket.put_object(Bucket=S3_BUCKET, Key=f"{S3_BACKUP_PATH}/ws_id/") + conf = restore.BackupRestoreConfig(TEST_CONF_PATH) + storage = restore.S3Storage(conf) + with pytest.raises(restore.BackupRestoreError): + storage.get_ws_declaration("ws_id/", MOCK_DL_TARGET) + + +def test_s3_storage_no_target(s3_bucket): + s3_bucket.put_object(Bucket=S3_BUCKET, Key=f"{S3_BACKUP_PATH}/bla/") + conf = restore.BackupRestoreConfig(TEST_CONF_PATH) + storage = restore.S3Storage(conf) + with pytest.raises(restore.BackupRestoreError): + storage.get_ws_declaration("bad_target/", MOCK_DL_TARGET) + + +def test_init_ldm_with_ws_data_filter_cols(): + # Regression test - this doesn't work for sdk 1.3 and lesser + sdk = gd_sdk.GoodDataSdk.create("", "") + model = sdk.catalog_workspace_content.load_ldm_from_disk(TEST_LDM_PATH) + assert len(model.ldm.datasets) == 1 + + +def test_validate_targets(caplog): + sdk = mock.Mock() + sdk.catalog_workspace.list_workspaces.return_value = [ + MockGdWorkspace(id=f"ws_id_{i}") for i in range(4) + ] + + ws_paths = {f"ws_id_{i}": "" for i in range(2, 6)} + + restore.validate_targets(sdk, ws_paths) + + assert len(caplog.record_tuples) == 1 + logger, level, msg = caplog.record_tuples[0] + assert logger == LOGGER_NAME + assert level == logging.ERROR + for i in range(4, 6): + assert f"ws_id_{i}" in msg + + +def test_bad_s3_bucket_raises_error(s3): + conf = restore.BackupRestoreConfig(TEST_CONF_PATH) + with pytest.raises(RuntimeError): + restore.S3Storage(conf) + + +def test_bad_s3_path_raises_error(s3_bucket): + conf = restore.BackupRestoreConfig(TEST_CONF_PATH) + with pytest.raises(RuntimeError): + restore.S3Storage(conf) + + +@mock.patch("scripts.restore.zipfile.ZipFile") +def test_restore_empty_ws(zipfile): + def create_empty_ws(tempdir): + os.mkdir(tempdir / "gooddata_layouts") + os.mkdir(tempdir / "gooddata_layouts" / "ldm") + os.mkdir(tempdir / "gooddata_layouts" / "analytics_model") + os.mkdir(tempdir / "gooddata_layouts" / "user_data_filters") + + zipfile.return_value.__enter__.return_value.extractall = create_empty_ws + sdk = mock.Mock() + api = mock.Mock() + storage = mock.Mock() + ws_paths = {"ws_id": "some/ws/path"} + + worker = restore.RestoreWorker(sdk, api, storage, ws_paths) + worker.incremental_restore() + + sdk.catalog_workspace_content.put_declarative_ldm.assert_called_once_with( + "ws_id", mock.ANY + ) + sdk.catalog_workspace_content.put_declarative_analytics_model.assert_called_once_with( + "ws_id", mock.ANY + ) + + +@mock.patch("scripts.restore.zipfile.ZipFile") +def test_invalid_ws_on_disk_skipped(zipfile): + def create_invalid_ws(tempdir): + # Missing AM directory + os.mkdir(tempdir / "gooddata_layouts") + os.mkdir(tempdir / "gooddata_layouts" / "ldm") + + zipfile.return_value.__enter__.return_value.extractall = create_invalid_ws + + sdk = mock.Mock() + api = mock.Mock() + storage = mock.Mock() + ws_paths = {"ws_id": "some/ws/path"} + + worker = restore.RestoreWorker(sdk, api, storage, ws_paths) + worker.incremental_restore() + + sdk.catalog_workspace_content.put_declarative_ldm.assert_not_called() + sdk.catalog_workspace_content.put_declarative_analytics_model.assert_not_called() + + +# e2e tests + + +def prepare_catalog_mocks(): + ldm = mock.Mock() + ldm.to_dict.return_value = {"ldm": {"foo": "bar"}} + ws_catalog = mock.MagicMock() + return ldm, ws_catalog + + +@mock.patch("scripts.restore.RestoreWorker._load_user_data_filters") +@mock.patch("scripts.restore.zipfile") +def test_incremental_restore(_, _load_user_data_filters, create_backups_in_bucket): + # Prepare sdk-related mocks + ldm, ws_catalog = prepare_catalog_mocks() + ws_catalog.load_ldm_from_disk.return_value = ldm + sdk = mock.Mock() + api = mock.Mock() + sdk.catalog_workspace_content = ws_catalog + + create_backups_in_bucket(["ws_id_1", "ws_id_2"]) + + conf = restore.BackupRestoreConfig(TEST_CONF_PATH) + storage = restore.S3Storage(conf) + + ws_paths = {"ws_id_1": "ws_id_1", "ws_id_2": "ws_id_2"} + + worker = restore.RestoreWorker(sdk, api, storage, ws_paths) + with mock.patch("scripts.restore.RestoreWorker._check_workspace_is_valid") as _: + worker.incremental_restore() + + ws_catalog.assert_has_calls( + [ + mock.call.load_ldm_from_disk(mock.ANY), + mock.call.load_analytics_model_from_disk(mock.ANY), + ] + ) + ws_catalog.assert_has_calls( + [ + mock.call.put_declarative_ldm("ws_id_1", ldm), + mock.call.put_declarative_analytics_model("ws_id_1", mock.ANY), + ] + ) + ws_catalog.assert_has_calls( + [ + mock.call.put_declarative_ldm("ws_id_2", ldm), + mock.call.put_declarative_analytics_model("ws_id_2", mock.ANY), + ] + ) + + +@mock.patch("scripts.restore.RestoreWorker._load_user_data_filters") +@mock.patch("scripts.restore.zipfile") +def test_incremental_restore_different_ws_source( + _, _load_user_data_filters, create_backups_in_bucket +): + # Prepare sdk-related mocks + ldm, ws_catalog = prepare_catalog_mocks() + ws_catalog.load_ldm_from_disk.return_value = ldm + sdk = mock.Mock() + sdk.catalog_workspace_content = ws_catalog + + api = mock.Mock() + + create_backups_in_bucket(["ws_id_1"]) + + conf = restore.BackupRestoreConfig(TEST_CONF_PATH) + storage = restore.S3Storage(conf) + + # 1 -> 1; 2 -> 1 + ws_paths = {"ws_id_1": "ws_id_1", "ws_id_2": "ws_id_1"} + + worker = restore.RestoreWorker(sdk, api, storage, ws_paths) + with mock.patch("scripts.restore.RestoreWorker._check_workspace_is_valid") as _: + worker.incremental_restore() + + ws_catalog.assert_has_calls( + [ + mock.call.load_ldm_from_disk(mock.ANY), + mock.call.load_analytics_model_from_disk(mock.ANY), + ] + ) + ws_catalog.assert_has_calls( + [ + mock.call.put_declarative_ldm("ws_id_1", ldm), + mock.call.put_declarative_analytics_model("ws_id_1", mock.ANY), + ] + ) + ws_catalog.assert_has_calls( + [ + mock.call.put_declarative_ldm("ws_id_2", ldm), + mock.call.put_declarative_analytics_model("ws_id_2", mock.ANY), + ] + ) + + +@mock.patch("scripts.restore.RestoreWorker._load_user_data_filters") +@mock.patch("scripts.restore.zipfile") +def test_incremental_restore_one_succeeds_one_fails( + _, _load_user_data_filters, create_backups_in_bucket +): + # Prepare sdk-related mocks + ldm, ws_catalog = prepare_catalog_mocks() + # One load succeeds, one fails... + ws_catalog.load_ldm_from_disk.side_effect = [ldm, Exception()] + sdk = mock.Mock() + sdk.catalog_workspace_content = ws_catalog + + api = mock.Mock() + + create_backups_in_bucket(["ws_id_1", "ws_id_2"]) + + conf = restore.BackupRestoreConfig(TEST_CONF_PATH) + storage = restore.S3Storage(conf) + + ws_paths = {"ws_id_1": "ws_id_1", "ws_id_2": "ws_id_1"} + + worker = restore.RestoreWorker(sdk, api, storage, ws_paths) + with mock.patch("scripts.restore.RestoreWorker._check_workspace_is_valid") as _: + worker.incremental_restore() + + ws_catalog.assert_has_calls( + [ + mock.call.put_declarative_ldm("ws_id_1", ldm), + mock.call.put_declarative_analytics_model("ws_id_1", mock.ANY), + ] + ) + # Ensure that despite the failure on ws_id_2 restore, we don't put anything + assert_not_called_with(ws_catalog.put_declarative_ldm, "ws_id_2", mock.ANY) + assert_not_called_with( + ws_catalog.put_declarative_analytics_model, "ws_id_2", mock.ANY + ) + + +def test_load_user_data_filters(): + sdk = mock.Mock() + api = mock.Mock() + storage = mock.Mock() + ws_paths = mock.Mock() + + worker = restore.RestoreWorker(sdk, api, storage, ws_paths) + user_data_filters = worker._load_user_data_filters(TEST_UDF_PATH) + user_data_filters_expected = { + "userDataFilters": [ + { + "id": "datafilter2", + "maql": '{label/campaign_channels.category} = "1"', + "title": "Status filter", + "user": {"id": "5c867a8a-12af-45bf-8d85-c7d16bedebd1", "type": "user"}, + }, + { + "id": "datafilter4", + "maql": '{label/campaign_channels.category} = "1"', + "title": "Status filter", + "user": {"id": "5c867a8a-12af-45bf-8d85-c7d16bedebd1", "type": "user"}, + }, + ] + } + assert user_data_filters == user_data_filters_expected + + +@mock.patch("scripts.restore.create_client") +@mock.patch("scripts.restore.RestoreWorker._load_user_data_filters") +@mock.patch("scripts.restore.zipfile") +def test_e2e(_, _load_user_data_filters, create_client, create_backups_in_bucket): + conf_path = TEST_CONF_PATH + csv_path = TEST_CSV_PATH + args = argparse.Namespace(conf=conf_path, ws_csv=csv_path, verbose=False) + + # Prepare sdk-related mocks + ldm, ws_catalog = prepare_catalog_mocks() + # On load_ldm_from_disk: Success, Fail, Success + ws_catalog.load_ldm_from_disk.side_effect = [ldm, Exception(), ldm] + sdk = mock.Mock() + sdk.catalog_workspace_content = ws_catalog + sdk.catalog_workspace.list_workspaces.return_value = [ + MockGdWorkspace(id=f"ws_id_{i}") for i in range(1, 4) + ] + + api = mock.Mock() + + create_client.return_value = sdk, api + + create_backups_in_bucket(["ws_id_1", "ws_id_2"], is_e2e=True) + + with mock.patch("scripts.restore.RestoreWorker._check_workspace_is_valid") as _: + restore.main(args) + + assert_not_called_with( + ws_catalog.put_declarative_ldm, "thiswsdoesnotexist", mock.ANY + ) + assert_not_called_with( + ws_catalog.put_declarative_analytics_model, "thiswsdoesnotexist", mock.ANY + ) + + ws_catalog.assert_has_calls( + [ + mock.call.put_declarative_ldm("ws_id_1", ldm), + mock.call.put_declarative_analytics_model("ws_id_1", mock.ANY), + ] + ) + + # Ensure that in case of the failure on ws_id_2 restore, we don't PUT anything + assert_not_called_with(ws_catalog.put_declarative_ldm, "ws_id_2", mock.ANY) + assert_not_called_with( + ws_catalog.put_declarative_analytics_model, "ws_id_2", mock.ANY + ) + + ws_catalog.assert_has_calls( + [ + mock.call.put_declarative_ldm("ws_id_3", ldm), + mock.call.put_declarative_analytics_model("ws_id_3", mock.ANY), + ] + ) diff --git a/tests/test_user_group_mgmt.py b/tests/test_user_group_mgmt.py new file mode 100644 index 0000000..cdb7fa4 --- /dev/null +++ b/tests/test_user_group_mgmt.py @@ -0,0 +1,136 @@ +# BSD License +# +# Copyright (c) 2024, GoodData Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, are permitted, provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import argparse +import pytest +from unittest import mock +from dataclasses import dataclass + +from gooddata_sdk.catalog.user.entity_model.user import CatalogUserGroup +from scripts import user_group_mgmt + +TEST_CSV_PATH = "tests/data/user_group_mgmt/input.csv" + + +@dataclass +class MockUserGroup: + id: str + name: str + parent_ids: list[str] + + def to_sdk(self): + return CatalogUserGroup.init( + user_group_id=self.id, + user_group_name=self.name, + user_group_parent_ids=self.parent_ids, + ) + + +@mock.patch("os.path.exists") +def test_conflicting_delimiters_raises_error(path_exists): + path_exists.return_value = True + args = argparse.Namespace( + user_group_csv="", delimiter=",", ug_delimiter=",", quotechar='"' + ) + with pytest.raises(RuntimeError): + user_group_mgmt.validate_args(args) + + +def test_from_csv_row_standard(): + row = ["ug_1", "Admins", "ug_2|ug_3", "True"] + result = user_group_mgmt.TargetUserGroup.from_csv_row(row, "|") + expected = user_group_mgmt.TargetUserGroup( + user_group_id="ug_1", + user_group_name="Admins", + parent_user_groups=["ug_2", "ug_3"], + is_active=True, + ) + assert result == expected, "Standard row should be parsed correctly" + + +def test_from_csv_row_no_parent_groups(): + row = ["ug_2", "Developers", "", "True"] + result = user_group_mgmt.TargetUserGroup.from_csv_row(row, "|") + expected = user_group_mgmt.TargetUserGroup( + user_group_id="ug_2", + user_group_name="Developers", + parent_user_groups=[], + is_active=True, + ) + assert ( + result == expected + ), "Row without parent user groups should be parsed correctly" + + +def test_from_csv_row_fallback_name(): + row = ["ug_3", "", "", "False"] + result = user_group_mgmt.TargetUserGroup.from_csv_row(row, "|") + expected = user_group_mgmt.TargetUserGroup( + user_group_id="ug_3", + user_group_name="ug_3", + parent_user_groups=[], + is_active=False, + ) + assert result == expected, "Row with empty name should fallback to user group ID" + + +def test_from_csv_row_invalid_is_active(): + row = ["ug_4", "Testers", "ug_1", "not_a_boolean"] + result = user_group_mgmt.TargetUserGroup.from_csv_row(row, "|") + expected = user_group_mgmt.TargetUserGroup( + user_group_id="ug_4", + user_group_name="Testers", + parent_user_groups=["ug_1"], + is_active=False, + ) + assert result == expected, "Invalid 'is_active' value should default to False" + + +def prepare_sdk(): + def mock_list_user_groups(): + return [ + MockUserGroup("ug_1", "Admins", []).to_sdk(), + MockUserGroup("ug_4", "TemporaryAccess", ["ug_2"]).to_sdk(), + ] + + sdk = mock.Mock() + sdk.catalog_user.list_user_groups = mock_list_user_groups + return sdk + + +@mock.patch("scripts.user_group_mgmt.create_clients") +def test_user_group_mgmt_e2e(create_client): + sdk = prepare_sdk() + create_client.return_value = sdk + + args = argparse.Namespace( + user_group_csv=TEST_CSV_PATH, + delimiter=",", + ug_delimiter="|", + quotechar='"', + verbose=False, + ) + + user_group_mgmt.user_group_mgmt(args) + + expected_create_or_update_calls = [ + mock.call(CatalogUserGroup.init("ug_2", "Developers", ["ug_1"])), + mock.call(CatalogUserGroup.init("ug_3", "Testers", ["ug_1", "ug_2"])), + ] + sdk.catalog_user.create_or_update_user_group.assert_has_calls( + expected_create_or_update_calls, any_order=True + ) + + expected_delete_calls = [mock.call("ug_4")] + sdk.catalog_user.delete_user_group.assert_has_calls( + expected_delete_calls, any_order=True + ) diff --git a/tests/test_user_mgmt.py b/tests/test_user_mgmt.py new file mode 100644 index 0000000..fbe811e --- /dev/null +++ b/tests/test_user_mgmt.py @@ -0,0 +1,203 @@ +# (C) 2023 GoodData Corporation +import argparse +from dataclasses import dataclass +from unittest import mock +from typing import Any, Optional + +import pytest +import gooddata_sdk as gd_sdk +from gooddata_api_client.exceptions import NotFoundException + +from scripts import user_mgmt + + +TEST_CSV_PATH = "tests/data/user_mgmt/input.csv" + + +@dataclass +class MockUser: + id: str + firstname: Optional[str] + lastname: Optional[str] + email: Optional[str] + authenticationId: Optional[str] + user_groups: list[str] + + def to_sdk(self): + return gd_sdk.CatalogUser.init( + user_id=self.id, + firstname=self.firstname, + lastname=self.lastname, + email=self.email, + authentication_id=self.authenticationId, + user_group_ids=self.user_groups, + ) + + def to_json(self): + attrs = {} + if self.authenticationId: + attrs["authenticationId"] = self.authenticationId + if self.firstname: + attrs["firstname"] = self.firstname + if self.lastname: + attrs["lastname"] = self.lastname + if self.email: + attrs["email"] = self.email + + data = { + "id": self.id, + "type": "user", + "attributes": attrs, + } + + if not self.user_groups: + return data + + relsdata = [{"id": group, "type": "userGroup"} for group in self.user_groups] + if relsdata: + data["relationships"] = {"userGroups": {"data": relsdata}} + return data + + +@mock.patch("os.path.exists") +def test_conflicting_delimiters_raises_error(path_exists): + path_exists.return_value = True + args = argparse.Namespace( + conf="", user_csv="", delimiter=",", ug_delimiter=",", quotechar='"' + ) + with pytest.raises(RuntimeError): + user_mgmt.validate_args(args) + + +def test_user_obj_from_sdk(): + user_input = MockUser("some.user", "some", "user", "some@email.com", "auth", ["ug"]) + excepted = user_mgmt.GDUserTarget( + "some.user", "some", "user", "some@email.com", "auth", ["ug"], True + ) + user = user_mgmt.GDUserTarget.from_sdk_obj(user_input.to_sdk()) + assert excepted == user + + +def test_user_obj_from_sdk_no_ugs(): + user_input = MockUser("some.user", "some", "user", "some@email.com", "auth", []) + excepted = user_mgmt.GDUserTarget( + "some.user", "some", "user", "some@email.com", "auth", [], True + ) + user = user_mgmt.GDUserTarget.from_sdk_obj(user_input.to_sdk()) + assert excepted == user + + +def test_user_obj_to_sdk(): + user_input = MockUser("some.user", "some", "user", "some@email.com", "auth", ["ug"]) + user = user_mgmt.GDUserTarget( + "some.user", "some", "user", "some@email.com", "auth", ["ug"], True + ) + excepted = user_input.to_sdk() + assert excepted == user.to_sdk_obj() + + +def test_user_obj_to_sdk_no_ugs(): + user_input = MockUser("some.user", "some", "user", "some@email.com", "auth", []) + user = user_mgmt.GDUserTarget( + "some.user", "some", "user", "some@email.com", "auth", [], True + ) + excepted = user_input.to_sdk() + assert excepted == user.to_sdk_obj() + + +class MockResponse: + def __init__(self, status_code, json_response: dict[str, Any] = {}, text: str = ""): + self.status_code = status_code + self.json_response = json_response + self.text = text + + def json(self): + return self.json_response + + +UPSTREAM_USERS = { + "jozef.mrkva": MockUser( + "jozef.mrkva", "jozef", "mrkva", "jozef.mrkva@test.com", "auth_id_1", [] + ), + "kristian.kalerab": MockUser( + "kristian.kalerab", + "kristian", + "kalerab", + "kristian.kalerab@test.com", + "auth_id_5", + [], + ), + "richard.cvikla": MockUser( + "richard.cvikla", "richard", "cvikla", None, "auth_id_6", [] + ), + "adam.avokado": MockUser("adam.avokado", None, None, None, "auth_id_7", []), +} + +UPSTREAM_UG_ID = "ug_1" +EXPECTED_NEW_UG_OBJ = gd_sdk.CatalogUserGroup.init("ug_2", "ug_2") +EXPECTED_GET_IDS = {"jozef.mrkva", "kristian.kalerab", "peter.pertzlen", "zoltan.zeler"} +EXPECTED_CREATE_OR_UPDATE_IDS = {"peter.pertzlen", "zoltan.zeler", "kristian.kalerab"} + + +def prepare_sdk(): + def mock_get_user(user_id): + if user_id not in UPSTREAM_USERS: + raise NotFoundException + return UPSTREAM_USERS[user_id].to_sdk() + + def mock_get_user_group(ug_id): + if ug_id != UPSTREAM_UG_ID: + raise NotFoundException + return + + sdk = mock.Mock() + sdk.catalog_user.get_user.side_effect = mock_get_user + sdk.catalog_user.get_user_group.side_effect = mock_get_user_group + return sdk + + +""" +jozef - No change; user exists +bartolomej - no change; user doesnt exist +peter - create (2 ugs); 1 ug exists, 1 doesnt +zoltan - create (1 ug); ug exists +kristian - update +richard - delete (diff fields than in upstream) +adam - delete (same fields as in upstream) +""" + + +@mock.patch("scripts.user_mgmt.create_clients") +def test_user_mgmt_e2e(create_client): + sdk = prepare_sdk() + create_client.return_value = sdk + + args = argparse.Namespace( + user_csv=TEST_CSV_PATH, + delimiter=",", + ug_delimiter="|", + quotechar='"', + verbose=False, + ) + + user_mgmt.user_mgmt(args) + + sdk.catalog_user.get_user.assert_has_calls( + [mock.call(id) for id in EXPECTED_GET_IDS], + any_order=True, + ) + + created_or_updated = { + call[0][0].id for call in sdk.catalog_user.create_or_update_user.call_args_list + } + assert created_or_updated == EXPECTED_CREATE_OR_UPDATE_IDS + + sdk.catalog_user.delete_user.assert_has_calls( + [mock.call("richard.cvikla"), mock.call("adam.avokado")] + ) + sdk.catalog_user.get_user_group.assert_has_calls( + [mock.call("ug_1"), mock.call("ug_2")] + ) + sdk.catalog_user.create_or_update_user_group.assert_called_once_with( + EXPECTED_NEW_UG_OBJ + ) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..7aa2d78 --- /dev/null +++ b/tox.ini @@ -0,0 +1,29 @@ +[tox] +env_list = py311, type, lint, 3.11 + +[testenv] +allowlist_externals = + sh + pytest +deps = + -rrequirements.txt + -rrequirements-test.txt +commands = + pytest -v tests + +[testenv:type] +description = Run mypy type checks +skip_install = true +deps = + mypy +commands = mypy scripts tests + +[testenv:lint] +description = Lint and format check the source code with black and ruff +skip_install = true +deps = + black + ruff +commands = + black --check --diff scripts tests + ruff check scripts tests