From d0bbf498d88b07c657d6d8d67f05c78c0b318253 Mon Sep 17 00:00:00 2001 From: Bernardo Meireles <67381633+bcmeireles@users.noreply.github.com> Date: Wed, 4 Dec 2024 11:13:32 +0000 Subject: [PATCH] Improve the CI workflow (#252) ### Summary Made some improvements to our CI workflow ### Description - Broke down the steps in the pipeline into different bash scripts for readability and local reproducibility - Setting environment variables using the correct approach for action jobs - Credentials (even dummy) handled through secrets ### Test Results N/A ### Changelog - Created scripts inside `.github/scripts` - Updated `.github/ci.yml` ### Related Issue https://github.com/dremio/dbt-dremio/issues/39 This improves https://github.com/dremio/dbt-dremio/pull/248 --- .github/scripts/clean_pycache.sh | 8 + .github/scripts/create_dbt_projects.sh | 42 ++++ .github/scripts/create_dbt_test_users.sh | 32 +++ .github/scripts/create_docker_network.sh | 8 + .github/scripts/create_dremio_s3_source.sh | 53 +++++ .github/scripts/create_env_file.sh | 21 ++ .github/scripts/create_minio_bucket.sh | 36 ++++ .github/scripts/install_minio_client.sh | 10 + .github/scripts/run_tests.sh | 25 +++ .github/scripts/start_dremio.sh | 14 ++ .github/scripts/start_minio.sh | 18 ++ .github/workflows/ci.yml | 239 +++------------------ 12 files changed, 298 insertions(+), 208 deletions(-) create mode 100644 .github/scripts/clean_pycache.sh create mode 100644 .github/scripts/create_dbt_projects.sh create mode 100644 .github/scripts/create_dbt_test_users.sh create mode 100644 .github/scripts/create_docker_network.sh create mode 100644 .github/scripts/create_dremio_s3_source.sh create mode 100644 .github/scripts/create_env_file.sh create mode 100644 .github/scripts/create_minio_bucket.sh create mode 100644 .github/scripts/install_minio_client.sh create mode 100644 .github/scripts/run_tests.sh create mode 100644 .github/scripts/start_dremio.sh create mode 100644 .github/scripts/start_minio.sh diff --git a/.github/scripts/clean_pycache.sh b/.github/scripts/clean_pycache.sh new file mode 100644 index 00000000..f315fc50 --- /dev/null +++ b/.github/scripts/clean_pycache.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -e + +echo "Cleaning up __pycache__ directories..." + +find . -type d -name "__pycache__" -exec rm -r {} + + +echo "__pycache__ directories cleaned up." \ No newline at end of file diff --git a/.github/scripts/create_dbt_projects.sh b/.github/scripts/create_dbt_projects.sh new file mode 100644 index 00000000..dd262867 --- /dev/null +++ b/.github/scripts/create_dbt_projects.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -e + +echo "Creating dbt projects..." + +init_dbt_project() { + local project_name=$1 + local profile_selection=$2 + local target_selection=$3 + local host=$4 + local username=$5 + local password=$6 + + # indentation and empty lines are on purpose, simulating required user input + + dbt init "$project_name" <> $GITHUB_ENV + +# Create the S3 source in Dremio +echo "Creating the S3 source in Dremio..." +curl -s -X PUT "http://localhost:9047/apiv2/source/dbt_test_source" \ + -H "Content-Type: application/json" \ + -H "Authorization: _dremio$AUTH_TOKEN" \ + --data "{\"name\":\"dbt_test_source\",\"config\":{\"credentialType\":\"ACCESS_KEY\",\"accessKey\":\"$MINIO_ROOT_USER\",\"accessSecret\":\"$MINIO_ROOT_PASSWORD\",\"secure\":false,\"externalBucketList\":[],\"enableAsync\":true,\"enableFileStatusCheck\":true,\"rootPath\":\"/\",\"defaultCtasFormat\":\"ICEBERG\",\"propertyList\":[{\"name\":\"fs.s3a.path.style.access\",\"value\":\"true\"},{\"name\":\"fs.s3a.endpoint\",\"value\":\"minio:9000\"},{\"name\":\"dremio.s3.compat\",\"value\":\"true\"}],\"whitelistedBuckets\":[],\"isCachingEnabled\":false,\"maxCacheSpacePct\":100},\"type\":\"S3\",\"metadataPolicy\":{\"deleteUnavailableDatasets\":true,\"autoPromoteDatasets\":false,\"namesRefreshMillis\":3600000,\"datasetDefinitionRefreshAfterMillis\":3600000,\"datasetDefinitionExpireAfterMillis\":10800000,\"authTTLMillis\":86400000,\"updateMode\":\"PREFETCH_QUERIED\"}}" + +echo "S3 Source created in Dremio." \ No newline at end of file diff --git a/.github/scripts/create_env_file.sh b/.github/scripts/create_env_file.sh new file mode 100644 index 00000000..75e09707 --- /dev/null +++ b/.github/scripts/create_env_file.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e + +: "${DREMIO_SOFTWARE_USERNAME:?Need to set DREMIO_SOFTWARE_USERNAME}" +: "${DREMIO_SOFTWARE_PASSWORD:?Need to set DREMIO_SOFTWARE_PASSWORD}" + +echo "Creating .env file for tests..." + +mkdir -p tests +cat < tests/.env +DREMIO_SOFTWARE_HOST=localhost +DREMIO_SOFTWARE_USERNAME=${DREMIO_SOFTWARE_USERNAME} +DREMIO_SOFTWARE_PASSWORD=${DREMIO_SOFTWARE_PASSWORD} +DREMIO_DATALAKE=dbt_test_source +DREMIO_DATABASE=dbt_test +DBT_TEST_USER_1=dbt_test_user_1 +DBT_TEST_USER_2=dbt_test_user_2 +DBT_TEST_USER_3=dbt_test_user_3 +EOF + +echo ".env file created successfully." \ No newline at end of file diff --git a/.github/scripts/create_minio_bucket.sh b/.github/scripts/create_minio_bucket.sh new file mode 100644 index 00000000..de8ab23d --- /dev/null +++ b/.github/scripts/create_minio_bucket.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set -e + +: "${RETRY_COUNT:?Need to set RETRY_COUNT}" +: "${SLEEP_INTERVAL:?Need to set SLEEP_INTERVAL}" +: "${MINIO_HEALTH_URL:?Need to set MINIO_HEALTH_URL}" +: "${MINIO_ROOT_USER:?Need to set MINIO_ROOT_USER}" +: "${MINIO_ROOT_PASSWORD:?Need to set MINIO_ROOT_PASSWORD}" + +echo "Waiting for MinIO to become ready..." + +for i in $(seq 1 $RETRY_COUNT); do + if curl -s $MINIO_HEALTH_URL; then + echo "MinIO is up." + break + fi + echo "Attempt $i/$RETRY_COUNT: MinIO is not ready yet. Retrying in $SLEEP_INTERVAL seconds..." + sleep $SLEEP_INTERVAL +done + +if ! curl -s $MINIO_HEALTH_URL; then + echo "MinIO did not become ready in time." + exit 1 +fi + +# Set alias to MinIO using localhost +mc alias set myminio http://localhost:9000 "$MINIO_ROOT_USER" "$MINIO_ROOT_PASSWORD" + +echo "Creating bucket dbtdremios3" +mc mb myminio/dbtdremios3 + +echo "Setting bucket policy to public" +mc policy set public myminio/dbtdremios3 + +echo "Listing all buckets to verify" +mc ls myminio \ No newline at end of file diff --git a/.github/scripts/install_minio_client.sh b/.github/scripts/install_minio_client.sh new file mode 100644 index 00000000..179be8cb --- /dev/null +++ b/.github/scripts/install_minio_client.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -e + +echo "Installing MinIO Client (mc)..." + +curl -O https://dl.min.io/client/mc/release/linux-amd64/mc +chmod +x mc +sudo mv mc /usr/local/bin/ + +echo "MinIO Client installed successfully." \ No newline at end of file diff --git a/.github/scripts/run_tests.sh b/.github/scripts/run_tests.sh new file mode 100644 index 00000000..9b2282a7 --- /dev/null +++ b/.github/scripts/run_tests.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +mkdir -p reports + +echo "Starting tests..." + +test_dirs=$(find tests/ -type f \( -name 'test_*.py' -o -name '*_test.py' \) -exec dirname {} \; | sort -u) + +echo "Test directories found:" +echo "$test_dirs" + +# Run tests in each directory and save reports +for dir in $test_dirs; do + echo "Running tests in directory: $dir" + + # Generate a safe report filename + report_file="reports/$(echo "$dir" | tr '/' '_').txt" + + echo "Saving report to: $report_file" + + pytest "$dir" | tee "$report_file" +done + +echo "All tests executed successfully." \ No newline at end of file diff --git a/.github/scripts/start_dremio.sh b/.github/scripts/start_dremio.sh new file mode 100644 index 00000000..972c97c0 --- /dev/null +++ b/.github/scripts/start_dremio.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +echo "Starting Dremio service..." + +docker run -d \ + --network ci-network \ + --name dremio \ + -p 31010:31010 \ + -p 9047:9047 \ + -e "DREMIO_JAVA_SERVER_EXTRA_OPTS=-Ddebug.addDefaultUser=true" \ + dremio/dremio-oss + +echo "Dremio service started." \ No newline at end of file diff --git a/.github/scripts/start_minio.sh b/.github/scripts/start_minio.sh new file mode 100644 index 00000000..2679edc1 --- /dev/null +++ b/.github/scripts/start_minio.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -e + +: "${MINIO_ROOT_USER:?Need to set MINIO_ROOT_USER}" +: "${MINIO_ROOT_PASSWORD:?Need to set MINIO_ROOT_PASSWORD}" + +echo "Starting MinIO service..." + +docker run -d \ + --network ci-network \ + --name minio \ + -p 9000:9000 \ + -p 9001:9001 \ + -e "MINIO_ROOT_USER=${MINIO_ROOT_USER}" \ + -e "MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}" \ + minio/minio server /data --console-address ":9001" + +echo "MinIO service started." \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0beee2b..05c5a5ff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ on: pull_request: branches: - main - + jobs: run-tests: name: Setup and Run Tests @@ -17,250 +17,73 @@ jobs: SLEEP_INTERVAL: 5 # Sleep duration in seconds between retries MINIO_HEALTH_URL: http://localhost:9000/minio/health/live DREMIO_HEALTH_URL: http://localhost:9047 + MINIO_ROOT_USER: ${{ secrets.MINIO_ROOT_USER }} + MINIO_ROOT_PASSWORD: ${{ secrets.MINIO_ROOT_PASSWORD }} + DREMIO_SOFTWARE_USERNAME: ${{ secrets.DREMIO_SOFTWARE_USERNAME }} + DREMIO_SOFTWARE_PASSWORD: ${{ secrets.DREMIO_SOFTWARE_PASSWORD }} + DREMIO_SOFTWARE_HOST: localhost + DREMIO_DATALAKE: dbt_test_source + DREMIO_DATABASE: dbt_test + DBT_TEST_USER_1: dbt_test_user_1 + DBT_TEST_USER_2: dbt_test_user_2 + DBT_TEST_USER_3: dbt_test_user_3 steps: - name: Check out repository - uses: actions/checkout@v2 - - - name: Set up Environment Variables - run: | - echo "DREMIO_SOFTWARE_HOST=localhost" >> $GITHUB_ENV - echo "DREMIO_SOFTWARE_USERNAME=dremio" >> $GITHUB_ENV - echo "DREMIO_SOFTWARE_PASSWORD=dremio123" >> $GITHUB_ENV - echo "DREMIO_DATALAKE=dbt_test_source" >> $GITHUB_ENV - echo "DREMIO_DATABASE=dbt_test" >> $GITHUB_ENV - echo "DBT_TEST_USER_1=dbt_test_user_1" >> $GITHUB_ENV - echo "DBT_TEST_USER_2=dbt_test_user_2" >> $GITHUB_ENV - echo "DBT_TEST_USER_3=dbt_test_user_3" >> $GITHUB_ENV + uses: actions/checkout@v4 - name: Create Docker Network run: | - docker network create ci-network + docker network create ci-network || echo "Network already exists" - name: Start MinIO Service - run: | - docker run -d \ - --network ci-network \ - --name minio \ - -p 9000:9000 \ - -p 9001:9001 \ - -e "MINIO_ROOT_USER=admin" \ - -e "MINIO_ROOT_PASSWORD=password" \ - minio/minio server /data --console-address ":9001" + run: bash .github/scripts/start_minio.sh - name: Start Dremio Service - run: | - docker run -d \ - --network ci-network \ - --name dremio \ - -p 31010:31010 \ - -p 9047:9047 \ - -e "DREMIO_JAVA_SERVER_EXTRA_OPTS=-Ddebug.addDefaultUser=true" \ - dremio/dremio-oss + run: bash .github/scripts/start_dremio.sh - name: Install MinIO Client (mc) - run: | - curl -O https://dl.min.io/client/mc/release/linux-amd64/mc - chmod +x mc - sudo mv mc /usr/local/bin/ + run: bash .github/scripts/install_minio_client.sh - name: Create MinIO bucket - run: | - for i in $(seq 1 $RETRY_COUNT); do - if docker exec minio curl -s $MINIO_HEALTH_URL; then - echo "MinIO is up." - break - fi - echo "Waiting for MinIO to become ready..." - sleep 5 - done - if ! docker exec minio curl -s $MINIO_HEALTH_URL; then - echo "MinIO did not become ready in time." - exit 1 - fi - - # Set alias to MinIO - mc alias set myminio http://localhost:9000 admin password - - echo "Creating bucket dbtdremios3" - mc mb myminio/dbtdremios3 - - echo "Setting bucket policy to public" - mc policy set public myminio/dbtdremios3 - - echo "Listing all buckets to verify" - mc ls myminio - - - name: "Create Dremio S3 Source" - run: | - sudo apt-get update - sudo apt-get install -y curl jq - - for i in $(seq 1 $RETRY_COUNT); do - if docker exec dremio curl -s $DREMIO_HEALTH_URL; then - echo "Dremio is up." - break - fi - echo "Waiting for Dremio to become ready..." - sleep 5 - done - if ! docker exec dremio curl -s $DREMIO_HEALTH_URL; then - echo "Dremio did not become ready in time." - exit 1 - fi - - echo "Logging into Dremio to obtain auth token..." - AUTH_TOKEN=$(curl -s -X POST "http://localhost:9047/apiv2/login" \ - -H "Content-Type: application/json" \ - --data "{\"userName\":\"dremio\", \"password\":\"dremio123\"}" | jq -r .token) - - # Check if AUTH_TOKEN is not empty - if [ -z "$AUTH_TOKEN" ]; then - echo "Failed to obtain Dremio auth token" - exit 1 - fi + run: bash .github/scripts/create_minio_bucket.sh - echo "Obtained Dremio auth token: $AUTH_TOKEN" - - echo "Creating the S3 source in Dremio..." - curl -s -X PUT "http://localhost:9047/apiv2/source/dbt_test_source" \ - -H "Content-Type: application/json" \ - -H "Authorization: _dremio$AUTH_TOKEN" \ - --data "{\"name\":\"dbt_test_source\",\"config\":{\"credentialType\":\"ACCESS_KEY\",\"accessKey\":\"admin\",\"accessSecret\":\"password\",\"secure\":false,\"externalBucketList\":[],\"enableAsync\":true,\"enableFileStatusCheck\":true,\"rootPath\":\"/\",\"defaultCtasFormat\":\"ICEBERG\",\"propertyList\":[{\"name\":\"fs.s3a.path.style.access\",\"value\":\"true\"},{\"name\":\"fs.s3a.endpoint\",\"value\":\"minio:9000\"},{\"name\":\"dremio.s3.compat\",\"value\":\"true\"}],\"whitelistedBuckets\":[],\"isCachingEnabled\":false,\"maxCacheSpacePct\":100},\"type\":\"S3\",\"metadataPolicy\":{\"deleteUnavailableDatasets\":true,\"autoPromoteDatasets\":false,\"namesRefreshMillis\":3600000,\"datasetDefinitionRefreshAfterMillis\":3600000,\"datasetDefinitionExpireAfterMillis\":10800000,\"authTTLMillis\":86400000,\"updateMode\":\"PREFETCH_QUERIED\"}}" - - echo "S3 Source created in Dremio" + - name: Create Dremio S3 Source + run: bash .github/scripts/create_dremio_s3_source.sh - name: Install Dependencies + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: Install Python Dependencies run: | + pip install --upgrade pip pip install -r dev_requirements.txt pip install . - name: Create dbt test users - run: | - curl 'http://localhost:9047/api/v3/user' \ - -H "Authorization: _dremio$AUTH_TOKEN" \ - -H 'Content-Type: application/json' \ - --data-raw '{"firstName":"dbt","lastName":"user1","name":"dbt_test_user_1","email":"dbt_test_user_1@dremio.com","password":"dremio123"}' - - curl 'http://localhost:9047/api/v3/user' \ - -H "Authorization: _dremio$AUTH_TOKEN" \ - -H 'Content-Type: application/json' \ - --data-raw '{"firstName":"dbt","lastName":"user2","name":"dbt_test_user_2","email":"dbt_test_user_2@dremio.com","password":"dremio123"}' - - curl 'http://localhost:9047/api/v3/user' \ - -H "Authorization: _dremio$AUTH_TOKEN" \ - -H 'Content-Type: application/json' \ - --data-raw '{"firstName":"dbt","lastName":"user3","name":"dbt_test_user_3","email":"dbt_test_user_3@dremio.com","password":"dremio123"}' - - echo "users created" + run: bash .github/scripts/create_dbt_test_users.sh - name: Create dbt projects - run: | - dbt init test_cloud_options < tests/.env - DREMIO_SOFTWARE_HOST=localhost - DREMIO_SOFTWARE_USERNAME=dremio - DREMIO_SOFTWARE_PASSWORD=dremio123 - DREMIO_DATALAKE=dbt_test_source - DREMIO_DATABASE=dbt_test - DBT_TEST_USER_1=dbt_test_user_1 - DBT_TEST_USER_2=dbt_test_user_2 - DBT_TEST_USER_3=dbt_test_user_3 - EOF - - - name: Create Reports Directory - run: | - mkdir -p reports + run: bash .github/scripts/create_env_file.sh - name: Run tests - run: | - #!/bin/bash - set -e - - echo "Starting tests" - - test_dirs=$(find tests/ -type f \( -name 'test_*.py' -o -name '*_test.py' \) -exec dirname {} \; | sort -u) - - echo "$test_dirs" - - for dir in $test_dirs; do - echo "Running tests in directory: $dir" - # Generate a safe report filename - report_file="reports/$(echo "$dir" | tr '/' '_').txt" - echo "Saving report to: $report_file" - pytest "$dir" | tee "$report_file" - done - - echo "All tests executed." + run: bash .github/scripts/run_tests.sh - name: Upload tests report as artifact uses: actions/upload-artifact@v3 with: name: all-tests-reports path: reports/ - + upload-individual-test-reports: name: Upload Tests Artifacts runs-on: ubuntu-latest @@ -286,7 +109,7 @@ jobs: steps: - name: Check out repository - uses: actions/checkout@v2 + uses: actions/checkout@v3.5.2 - name: Download All Test Reports uses: actions/download-artifact@v3