From 1abb37145fcc3b756c513a60f04a659491d84761 Mon Sep 17 00:00:00 2001 From: Pankaj Koti Date: Wed, 20 Nov 2024 15:09:13 +0530 Subject: [PATCH] Enable Azure task in the remote store manifest example DAG (#1333) During the work on PR https://github.com/astronomer/astronomer-cosmos/pull/1297, an issue arose where the Azure remote manifest task began failing after installing providers and packages with constraints. To allow other tests, which were running successfully, to proceed, the task was temporarily disabled. Upon reviewing the GitHub Actions logs from previous successful runs, it was [observed](https://github.com/astronomer/astronomer-cosmos/actions/runs/11573670971/job/32216315282#step:6:250) that the Azure provider version installed was 10.5.1. However, after the refactoring introduced in the PR, the failing actions [showed](https://github.com/astronomer/astronomer-cosmos/actions/runs/11911545582/job/33193301710#step:6:474) azure provider==8.4.0 being installed with the constraints file. To investigate, I tested locally to identify a working version. While the task failed with 8.4.0, it succeeded with 8.5.0. Analyzing the failure [logs](https://github.com/astronomer/astronomer-cosmos/actions/runs/11911545582/job/33193301710#step:7:467) and reviewing the Azure provider changelog hints that https://github.com/apache/airflow/pull/35820/ is potentially the fix for the failure with our connection setup in the CI that was released in 8.5.0. Therefore, I propose using azure provider>=8.5.0. However, the Airflow [constraints file](https://raw.githubusercontent.com/apache/airflow/constraints-2.8.0/constraints-3.8.txt) for version 2.8 specifies azure provider==8.4.0, which conflicts with this requirement. To address this, I am making changes to the pre-install script in our CI to install azure provider>=8.5.0 without relying on the constraints file, citing the reasons above. closes: #1304 --- dev/dags/cosmos_manifest_example.py | 30 ++++++++++++++--------------- pyproject.toml | 6 +++--- scripts/test/pre-install-airflow.sh | 10 +++++++++- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/dev/dags/cosmos_manifest_example.py b/dev/dags/cosmos_manifest_example.py index 543fe641c..8e35208b8 100644 --- a/dev/dags/cosmos_manifest_example.py +++ b/dev/dags/cosmos_manifest_example.py @@ -87,26 +87,24 @@ def cosmos_manifest_example() -> None: # [END gcp_gs_example] # [START azure_abfs_example] - # azure_abfs_example = DbtTaskGroup( - # group_id="azure_abfs_example", - # project_config=ProjectConfig( - # manifest_path="abfs://cosmos-manifest-test/manifest.json", - # manifest_conn_id="azure_abfs_conn", - # # `manifest_conn_id` is optional. If not provided, the default connection ID `wasb_default` is used. - # project_name="jaffle_shop", - # ), - # profile_config=profile_config, - # render_config=render_config, - # execution_config=execution_config, - # operator_args={"install_deps": True}, - # ) + azure_abfs_example = DbtTaskGroup( + group_id="azure_abfs_example", + project_config=ProjectConfig( + manifest_path="abfs://cosmos-manifest-test/manifest.json", + manifest_conn_id="azure_abfs_conn", + # `manifest_conn_id` is optional. If not provided, the default connection ID `wasb_default` is used. + project_name="jaffle_shop", + ), + profile_config=profile_config, + render_config=render_config, + execution_config=execution_config, + operator_args={"install_deps": True}, + ) # [END azure_abfs_example] post_dbt = EmptyOperator(task_id="post_dbt") - (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> post_dbt) - # TODO: re-enable the following - # (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> azure_abfs_example >> post_dbt) + (pre_dbt >> local_example >> aws_s3_example >> gcp_gs_example >> azure_abfs_example >> post_dbt) cosmos_manifest_example() diff --git a/pyproject.toml b/pyproject.toml index 912dd83ea..11290b1a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ amazon = [ "apache-airflow-providers-amazon[s3fs]>=3.0.0", ] google = ["apache-airflow-providers-google>=10.17.0"] -microsoft = ["apache-airflow-providers-microsoft-azure"] +microsoft = ["apache-airflow-providers-microsoft-azure>=8.5.0"] all = [ "astronomer-cosmos[dbt-all]", "astronomer-cosmos[openlineage]", @@ -95,7 +95,7 @@ aws_eks = [ "apache-airflow-providers-amazon>=8.0.0", ] azure-container-instance = [ - "apache-airflow-providers-microsoft-azure>=8.4.0", + "apache-airflow-providers-microsoft-azure>=8.5.0", ] gcp-cloud-run-job = [ "apache-airflow-providers-google>=10.11.0", @@ -187,7 +187,7 @@ dependencies = [ "apache-airflow-providers-amazon[s3fs]>=3.0.0", "apache-airflow-providers-cncf-kubernetes>=5.1.1", "apache-airflow-providers-google>=10.17.0", - "apache-airflow-providers-microsoft-azure", + "apache-airflow-providers-microsoft-azure>=8.5.0", "msgpack", "openlineage-airflow", "pydantic>=1.10.0", diff --git a/scripts/test/pre-install-airflow.sh b/scripts/test/pre-install-airflow.sh index 7017fc628..da8e72fe3 100755 --- a/scripts/test/pre-install-airflow.sh +++ b/scripts/test/pre-install-airflow.sh @@ -44,6 +44,7 @@ elif [ "$AIRFLOW_VERSION" = "2.7" ] ; then else uv pip install "apache-airflow-providers-amazon[s3fs]" --constraint /tmp/constraint.txt uv pip install "apache-airflow-providers-cncf-kubernetes" --constraint /tmp/constraint.txt + # The Airflow 2.9 constraints file at # https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.11.txt # specifies apache-airflow-providers-google==10.16.0. However, our CI setup uses a Google connection without a token, @@ -52,7 +53,14 @@ else # we are using apache-airflow-providers-google>=10.17.0 and skipping constraints installation, as the specified # version does not meet our requirements. uv pip install "apache-airflow-providers-google>=10.17.0" - uv pip install apache-airflow-providers-microsoft-azure --constraint /tmp/constraint.txt + + # The Airflow 2.8 constraints file at + # https://raw.githubusercontent.com/apache/airflow/constraints-2.8.0/constraints-3.11.txt + # specifies apache-airflow-providers-microsoft-azure==8.4.0. However, our Azure connection setup in the CI, + # previously led to authentication issues with this version. This issue got resolved in + # apache-airflow-providers-microsoft-azure==8.5.0. Hence, we are using apache-airflow-providers-microsoft-azure>=8.5.0 + # and skipping installation with constraints, as the specified version does not meet our requirements. + uv pip install "apache-airflow-providers-microsoft-azure>=8.5.0" fi rm /tmp/constraint.txt