diff --git a/.github/ISSUE_TEMPLATE/01-bug.yml b/.github/ISSUE_TEMPLATE/01-bug.yml new file mode 100644 index 000000000..658d0b9cb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/01-bug.yml @@ -0,0 +1,183 @@ +--- +name: Bug Report +description: File a bug report. +title: "[Bug]: " +labels: ["bug", "triage-needed"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! + - type: dropdown + id: cosmos-version + attributes: + label: Astronomer Cosmos Version + description: What version of Astronomer Cosmos are you running? If you do not see your version in the list, please (ideally) test on + the latest release or main to see if the issue is fixed before reporting it. + options: + - "1.4.1" + - "main (development)" + - "Other Astronomer Cosmos version (please specify below)" + multiple: false + validations: + required: true + - type: input + attributes: + label: If "Other Astronomer Cosmos version" selected, which one? + # yamllint disable rule:line-length + description: > + On what version of Astronomer Cosmos are you currently experiencing the issue? Remember, you are encouraged to + test with the latest release or on the main branch to verify your issue still exists. + - type: input + id: dbt-core-version + attributes: + label: dbt-core version + description: What version of dbt-core are you running? + placeholder: ex. 1.8.0 + validations: + required: true + - type: textarea + attributes: + label: Versions of dbt adapters + description: What dbt adapter versions are you using? + placeholder: You can use `pip freeze | grep dbt` (you can leave only relevant ones) + - type: dropdown + id: load-mode + attributes: + label: LoadMode + description: Which LoadMode are you using? + options: + - "AUTOMATIC" + - "CUSTOM" + - "DBT_LS" + - "DBT_LS_FILE" + - "DBT_LS_MANIFEST" + multiple: false + validations: + required: true + - type: dropdown + id: execution-mode + attributes: + label: ExecutionMode + description: Which ExecutionMode are you using? + options: + - "AWS_EKS" + - "AZURE_CONTAINER_INSTANCE" + - "DOCKER" + - "KUBERNETES" + - "LOCAL" + - "VIRTUALENV" + multiple: false + validations: + required: true + - type: dropdown + id: invocation-mode + attributes: + label: InvocationMode + description: Which InvocationMode are you using? + options: + - "DBT_RUNNER" + - "SUBPROCESS" + multiple: false + - type: input + id: airflow-version + attributes: + label: airflow version + description: What version of Apache Airflow are you running? + placeholder: ex. 2.9.0 + validations: + required: true + - type: input + attributes: + label: Operating System + description: What Operating System are you using? + placeholder: "You can get it via `cat /etc/os-release` for example" + validations: + required: true + - type: dropdown + id: browsers + attributes: + label: If a you think it's an UI issue, what browsers are you seeing the problem on? + multiple: true + options: + - Firefox + - Chrome + - Safari + - Microsoft Edge + - type: dropdown + attributes: + label: Deployment + description: > + What kind of deployment do you have? + multiple: false + options: + - "Official Apache Airflow Helm Chart" + - "Other 3rd-party Helm chart" + - "Docker-Compose" + - "Other Docker-based deployment" + - "Virtualenv installation" + - "Astronomer" + - "Google Cloud Composer" + - "Amazon (AWS) MWAA" + - "Microsoft ADF Managed Airflow" + - "Other" + validations: + required: true + - type: textarea + attributes: + label: Deployment details + description: Additional description of your deployment. + placeholder: > + Enter any relevant details of your deployment. Especially version of your tools, + software (docker-compose, helm, k8s, etc.), any customisation and configuration you added. + - type: textarea + id: what-happened + attributes: + label: What happened? + description: Also tell us, what did you expect to happen? + placeholder: Tell us what you see! + value: "A bug happened!" + validations: + required: true + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. + render: shell + - type: textarea + attributes: + label: How to reproduce + description: What should we do to reproduce the problem? + placeholder: > + Please make sure you provide a reproducible step-by-step case of how to reproduce the problem + as minimally and precisely as possible. Keep in mind we do not have access to your cluster or DAGs. + Remember that non-reproducible issues make it hard for us to help you or resolve the issue! + validations: + required: true + - type: textarea + attributes: + label: Anything else :)? + description: Anything else we need to know? + placeholder: > + How often does this problem occur? (Once? Every time? Only when certain conditions are met?) + - type: checkboxes + attributes: + label: Are you willing to submit PR? + description: > + This is absolutely not required, but we are happy to guide you in the contribution process + especially if you already have a good understanding of how to implement the fix. We love to bring new + contributors in. + options: + - label: Yes I am willing to submit a PR! + - type: input + id: contact + attributes: + label: Contact Details + description: (Optional) How can we get in touch with you if we need more info? + placeholder: ex. email@example.com + validations: + required: false + - type: markdown + attributes: + value: "Thanks for completing our form!" diff --git a/.github/ISSUE_TEMPLATE/02-feature.yml b/.github/ISSUE_TEMPLATE/02-feature.yml new file mode 100644 index 000000000..e179d357d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/02-feature.yml @@ -0,0 +1,33 @@ +--- +name: Feature request +description: Suggest an idea for this project +labels: ["enhancement", "needs-triage"] +body: + - type: markdown + attributes: + # yamllint disable rule:line-length + value: " + Thank you for finding the time to propose new feature! + + We really appreciate the community efforts to improve Cosmos." + # yamllint enable rule:line-length + - type: textarea + attributes: + label: Description + description: A short description of your feature + - type: textarea + attributes: + label: Use case/motivation + description: What would you like to happen? + - type: textarea + attributes: + label: Related issues + description: Is there currently another issue associated with this? + - type: checkboxes + attributes: + label: Are you willing to submit a PR? + options: + - label: Yes, I am willing to submit a PR! + - type: markdown + attributes: + value: "Thanks for completing our form!" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 264baa93b..e38cd7157 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -38,11 +38,25 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] airflow-version: ["2.4", "2.5", "2.6", "2.7", "2.8", "2.9"] exclude: - python-version: "3.11" airflow-version: "2.4" + # Apache Airflow versions prior to 2.9.0 have not been tested with Python 3.12. + # Official support for Python 3.12 and the corresponding constraints.txt are available only for Apache Airflow >= 2.9.0. + # See: https://github.com/apache/airflow/tree/2.9.0?tab=readme-ov-file#requirements + # See: https://github.com/apache/airflow/tree/2.8.4?tab=readme-ov-file#requirements + - python-version: "3.12" + airflow-version: "2.4" + - python-version: "3.12" + airflow-version: "2.5" + - python-version: "3.12" + airflow-version: "2.6" + - python-version: "3.12" + airflow-version: "2.7" + - python-version: "3.12" + airflow-version: "2.8" steps: - uses: actions/checkout@v3 with: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 118ba915b..ad4bb2c65 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,7 +25,7 @@ repos: args: ["--autofix"] - id: trailing-whitespace - repo: https://github.com/codespell-project/codespell - rev: v2.2.6 + rev: v2.3.0 hooks: - id: codespell name: Run codespell to check for common misspellings in files @@ -54,7 +54,7 @@ repos: - --py37-plus - --keep-runtime-typing - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.4.4 + rev: v0.4.7 hooks: - id: ruff args: diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2ef66782c..2e6331eda 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,21 +1,47 @@ Changelog ========= +1.4.2 (2024-06-06) +------------------ + +Bug fixes + +* Fix the invocation mode for ``ExecutionMode.VIRTUALENV`` by @marco9663 in #1023 +* Fix Cosmos ``enable_cache`` setting by @tatiana in #1025 +* Make ``GoogleCloudServiceAccountDictProfileMapping`` dataset profile arg optional by @oliverrmaa and @pankajastro in #839 and #1017 +* Athena profile mapping set ``aws_session_token`` in profile only if it exists by @pankajastro in #1022 + +Others + +* Update dbt and Airflow conflicts matrix by @tatiana in #1026 +* Enable Python 3.12 unittest by @pankajastro in #1018 +* Improve error logging in ``DbtLocalBaseOperator`` by @davidsteinar in #1004 +* Add GitHub issue templates for bug reports and feature request by @pankajkoti in #1009 +* Add more fields in bug template to reduce turnaround in issue triaging by @pankajkoti in #1027 +* Fix ``dev/Dockerfile`` + Add ``uv pip install`` for faster build time by @dwreeves in #997 +* Drop support for Airflow 2.3 by @pankajkoti in #994 +* Update Astro Runtime image by @RNHTTR in #988 and #989 +* Enable ruff F linting by @pankajastro in #985 +* Move Cosmos Airflow configuration to settings.py by @pankajastro in #975 +* Fix CI Issues by @tatiana in #1005 +* Pre-commit hook updates in #1000, #1019 + + 1.4.1 (2024-05-17) --------------------- +------------------ Bug fixes -* Fix manifest testing behavior in #955 by @chris-okorodudu -* Handle ValueError when unpacking partial_parse.msgpack in #972 by @tatiana +* Fix manifest testing behavior by @chris-okorodudu in #955 +* Handle ValueError when unpacking partial_parse.msgpack by @tatiana in #972 Others * Enable pre-commit run and fix type-check job by @pankajastro in #957 -* Clean databricks credentials in test/CI in #969 by @tatiana -* Update CODEOWNERS in #969 by @tatiana -* Update emeritus contributors list in #961 by @tatiana -* Promote @dwreeves to committer in #960 by @tatiana +* Clean databricks credentials in test/CI by @tatiana in #969 +* Update CODEOWNERS by @tatiana in #969 x +* Update emeritus contributors list by @tatiana in #961 +* Promote @dwreeves to committer by @tatiana in #960 * Pre-commit hook updates in #956 diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index aad2b3071..b3378c60a 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -5,7 +5,7 @@ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, +identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation. diff --git a/cosmos/__init__.py b/cosmos/__init__.py index 5d88d35d3..7a73e722e 100644 --- a/cosmos/__init__.py +++ b/cosmos/__init__.py @@ -5,7 +5,7 @@ Contains dags, task groups, and operators. """ -__version__ = "1.4.1" +__version__ = "1.4.2" from cosmos.airflow.dag import DbtDag diff --git a/cosmos/config.py b/cosmos/config.py index 64a7acd08..820833e6c 100644 --- a/cosmos/config.py +++ b/cosmos/config.py @@ -319,6 +319,18 @@ class ExecutionConfig: project_path: Path | None = field(init=False) def __post_init__(self, dbt_project_path: str | Path | None) -> None: - if self.invocation_mode and self.execution_mode != ExecutionMode.LOCAL: - raise CosmosValueError("ExecutionConfig.invocation_mode is only configurable for ExecutionMode.LOCAL.") + if self.invocation_mode and self.execution_mode not in (ExecutionMode.LOCAL, ExecutionMode.VIRTUALENV): + raise CosmosValueError( + "ExecutionConfig.invocation_mode is only configurable for ExecutionMode.LOCAL and ExecutionMode.VIRTUALENV." + ) + if self.execution_mode == ExecutionMode.VIRTUALENV: + if self.invocation_mode == InvocationMode.DBT_RUNNER: + raise CosmosValueError( + "InvocationMode.DBT_RUNNER has not been implemented for ExecutionMode.VIRTUALENV" + ) + elif self.invocation_mode is None: + logger.debug( + "Defaulting to InvocationMode.SUBPROCESS as it is the only supported invocation mode for ExecutionMode.VIRTUALENV" + ) + self.invocation_mode = InvocationMode.SUBPROCESS self.project_path = Path(dbt_project_path) if dbt_project_path else None diff --git a/cosmos/operators/local.py b/cosmos/operators/local.py index 1104b43fb..c62f708e8 100644 --- a/cosmos/operators/local.py +++ b/cosmos/operators/local.py @@ -179,10 +179,8 @@ def handle_exception_subprocess(self, result: FullOutputSubprocessResult) -> Non if self.skip_exit_code is not None and result.exit_code == self.skip_exit_code: raise AirflowSkipException(f"dbt command returned exit code {self.skip_exit_code}. Skipping.") elif result.exit_code != 0: - raise AirflowException( - f"dbt command failed. The command returned a non-zero exit code {result.exit_code}. Details: ", - *result.full_output, - ) + logger.error("\n".join(result.full_output)) + raise AirflowException(f"dbt command failed. The command returned a non-zero exit code {result.exit_code}.") def handle_exception_dbt_runner(self, result: dbtRunnerResult) -> None: """dbtRunnerResult has an attribute `success` that is False if the command failed.""" diff --git a/cosmos/profiles/athena/access_key.py b/cosmos/profiles/athena/access_key.py index 02de2be24..8dc14f839 100644 --- a/cosmos/profiles/athena/access_key.py +++ b/cosmos/profiles/athena/access_key.py @@ -66,9 +66,11 @@ def profile(self) -> dict[str, Any | None]: **self.profile_args, "aws_access_key_id": self.temporary_credentials.access_key, "aws_secret_access_key": self.get_env_var_format("aws_secret_access_key"), - "aws_session_token": self.get_env_var_format("aws_session_token"), } + if self.temporary_credentials.token: + profile["aws_session_token"] = self.get_env_var_format("aws_session_token") + return self.filter_null(profile) @property diff --git a/cosmos/profiles/bigquery/service_account_keyfile_dict.py b/cosmos/profiles/bigquery/service_account_keyfile_dict.py index 038b34153..17858d7bb 100644 --- a/cosmos/profiles/bigquery/service_account_keyfile_dict.py +++ b/cosmos/profiles/bigquery/service_account_keyfile_dict.py @@ -22,7 +22,6 @@ class GoogleCloudServiceAccountDictProfileMapping(BaseProfileMapping): required_fields = [ "project", - "dataset", "keyfile_json", ] @@ -45,12 +44,14 @@ def profile(self) -> dict[str, Any | None]: Even though the Airflow connection contains hard-coded Service account credentials, we generate a temporary file and the DBT profile uses it. """ - return { + profile_dict = { **self.mapped_params, "threads": 1, **self.profile_args, } + return self.filter_null(profile_dict) + @property def mock_profile(self) -> dict[str, Any | None]: "Generates mock profile. Defaults `threads` to 1." diff --git a/cosmos/settings.py b/cosmos/settings.py index 44a08fd48..369913b93 100644 --- a/cosmos/settings.py +++ b/cosmos/settings.py @@ -10,7 +10,7 @@ # In MacOS users may want to set the envvar `TMPDIR` if they do not want the value of the temp directory to change DEFAULT_CACHE_DIR = Path(tempfile.gettempdir(), DEFAULT_COSMOS_CACHE_DIR_NAME) cache_dir = Path(conf.get("cosmos", "cache_dir", fallback=DEFAULT_CACHE_DIR) or DEFAULT_CACHE_DIR) -enable_cache = conf.get("cosmos", "enable_cache", fallback=True) +enable_cache = conf.getboolean("cosmos", "enable_cache", fallback=True) propagate_logs = conf.getboolean("cosmos", "propagate_logs", fallback=True) dbt_docs_dir = conf.get("cosmos", "dbt_docs_dir", fallback=None) dbt_docs_conn_id = conf.get("cosmos", "dbt_docs_conn_id", fallback=None) diff --git a/dev/Dockerfile b/dev/Dockerfile index 07c46d3f2..cc6307f36 100644 --- a/dev/Dockerfile +++ b/dev/Dockerfile @@ -2,12 +2,22 @@ FROM quay.io/astronomer/astro-runtime:11.3.0-base USER root + +# dbt-postgres 1.8.0 requires building psycopg2 from source +RUN /bin/sh -c set -eux; \ + apt-get update; \ + apt-get install -y --no-install-recommends build-essential libpq-dev; \ + apt-get clean; \ + rm -rf /var/lib/apt/lists/* + +RUN pip install -U uv + COPY ./pyproject.toml ${AIRFLOW_HOME}/astronomer_cosmos/ COPY ./README.rst ${AIRFLOW_HOME}/astronomer_cosmos/ COPY ./cosmos/ ${AIRFLOW_HOME}/astronomer_cosmos/cosmos/ # install the package in editable mode -RUN pip install -e "${AIRFLOW_HOME}/astronomer_cosmos"[dbt-postgres,dbt-databricks] +RUN uv pip install --system -e "${AIRFLOW_HOME}/astronomer_cosmos"[dbt-postgres,dbt-databricks] # make sure astro user owns the package RUN chown -R astro:astro ${AIRFLOW_HOME}/astronomer_cosmos diff --git a/docs/getting_started/execution-modes-local-conflicts.rst b/docs/getting_started/execution-modes-local-conflicts.rst index 96921b6f7..3f537baba 100644 --- a/docs/getting_started/execution-modes-local-conflicts.rst +++ b/docs/getting_started/execution-modes-local-conflicts.rst @@ -10,24 +10,25 @@ If you find errors, we recommend users look into using `alternative execution mo In the following table, ``x`` represents combinations that lead to conflicts (vanilla ``apache-airflow`` and ``dbt-core`` packages): -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+ -| Airflow / DBT | 1.0 | 1.1 | 1.2 | 1.3 | 1.4 | 1.5 | 1.6 | 1.7 | -+===============+=====+=====+=====+=====+=====+=====+=====+=====+ -| 2.2 | | | | x | x | x | x | x | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+ -| 2.3 | x | x | | x | x | x | x | x | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+ -| 2.4 | x | x | x | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+ -| 2.5 | x | x | x | | | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+ -| 2.6 | x | x | x | x | x | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+ -| 2.7 | x | x | x | x | x | | | | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+ -| 2.8 | x | x | x | x | x | | x | x | -+---------------+-----+-----+-----+-----+-----+-----+-----+-----+ - ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +| Airflow / DBT | 1.0 | 1.1 | 1.2 | 1.3 | 1.4 | 1.5 | 1.6 | 1.7 | 1.8 | ++===============+=====+=====+=====+=====+=====+=====+=====+=====+=====+ +| 2.2 | | | | x | x | x | x | x | x | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +| 2.3 | x | x | | x | x | x | x | x | X | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +| 2.4 | x | x | x | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +| 2.5 | x | x | x | | | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +| 2.6 | x | x | x | x | x | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +| 2.7 | x | x | x | x | x | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +| 2.8 | x | x | x | x | x | | x | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ +| 2.9 | x | x | x | x | x | | | | | ++---------------+-----+-----+-----+-----+-----+-----+-----+-----+-----+ Examples of errors ----------------------------------- @@ -92,9 +93,11 @@ The table was created by running `nox `__ wi @nox.session(python=["3.10"]) @nox.parametrize( - "dbt_version", ["1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7"] + "dbt_version", ["1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8"] + ) + @nox.parametrize( + "airflow_version", ["2.2.4", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8", "2.9"] ) - @nox.parametrize("airflow_version", ["2.2.4", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8"]) def compatibility(session: nox.Session, airflow_version, dbt_version) -> None: """Run both unit and integration tests.""" session.run( diff --git a/docs/getting_started/execution-modes.rst b/docs/getting_started/execution-modes.rst index 1765144d9..40e6103a4 100644 --- a/docs/getting_started/execution-modes.rst +++ b/docs/getting_started/execution-modes.rst @@ -87,6 +87,7 @@ Some drawbacks of this approach: - It is slower than ``local`` because it creates a new Python virtual environment for each Cosmos dbt task run. - If dbt is unavailable in the Airflow scheduler, the default ``LoadMode.DBT_LS`` will not work. In this scenario, users must use a `parsing method `_ that does not rely on dbt, such as ``LoadMode.MANIFEST``. +- Only ``InvocationMode.SUBPROCESS`` is supported currently, attempt to use ``InvocationMode.DBT_RUNNER`` will raise error. Example of how to use: diff --git a/pyproject.toml b/pyproject.toml index 8044162fc..14b2fc705 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", ] dependencies = [ "aenum", @@ -59,7 +60,7 @@ dbt-redshift = ["dbt-redshift"] dbt-snowflake = ["dbt-snowflake"] dbt-spark = ["dbt-spark"] dbt-vertica = ["dbt-vertica<=1.5.4"] -openlineage = ["openlineage-integration-common", "openlineage-airflow"] +openlineage = ["openlineage-integration-common!=1.15.0", "openlineage-airflow"] all = ["astronomer-cosmos[dbt-all]", "astronomer-cosmos[openlineage]"] docs = [ "sphinx", @@ -118,10 +119,10 @@ packages = ["/cosmos"] [tool.hatch.envs.tests] dependencies = [ "astronomer-cosmos[tests]", - "apache-airflow-providers-postgres", "apache-airflow-providers-cncf-kubernetes>=5.1.1", "apache-airflow-providers-docker>=3.5.0", "apache-airflow-providers-microsoft-azure", + "apache-airflow-providers-postgres", "types-PyYAML", "types-attrs", "types-requests", @@ -132,7 +133,7 @@ dependencies = [ pre-install-commands = ["sh scripts/test/pre-install-airflow.sh {matrix:airflow} {matrix:python}"] [[tool.hatch.envs.tests.matrix]] -python = ["3.8", "3.9", "3.10", "3.11"] +python = ["3.8", "3.9", "3.10", "3.11", "3.12"] airflow = ["2.4", "2.5", "2.6", "2.7", "2.8", "2.9"] [tool.hatch.envs.tests.overrides] diff --git a/tests/operators/test_base.py b/tests/operators/test_base.py index 70e4059e7..3d39d43a7 100644 --- a/tests/operators/test_base.py +++ b/tests/operators/test_base.py @@ -1,3 +1,4 @@ +import sys from unittest.mock import patch import pytest @@ -14,6 +15,10 @@ ) +@pytest.mark.skipif( + (sys.version_info.major, sys.version_info.minor) == (3, 12), + reason="The error message for the abstract class instantiation seems to have changed between Python 3.11 and 3.12", +) def test_dbt_base_operator_is_abstract(): """Tests that the abstract base operator cannot be instantiated since the base_cmd is not defined.""" expected_error = ( @@ -23,6 +28,20 @@ def test_dbt_base_operator_is_abstract(): AbstractDbtBaseOperator() +@pytest.mark.skipif( + (sys.version_info.major, sys.version_info.minor) != (3, 12), + reason="The error message for the abstract class instantiation seems to have changed between Python 3.11 and 3.12", +) +def test_dbt_base_operator_is_abstract_py12(): + """Tests that the abstract base operator cannot be instantiated since the base_cmd is not defined.""" + expected_error = ( + "Can't instantiate abstract class AbstractDbtBaseOperator without an implementation for abstract methods " + "'base_cmd', 'build_and_run_cmd'" + ) + with pytest.raises(TypeError, match=expected_error): + AbstractDbtBaseOperator() + + @pytest.mark.parametrize("cmd_flags", [["--some-flag"], []]) @patch("cosmos.operators.base.AbstractDbtBaseOperator.build_and_run_cmd") def test_dbt_base_operator_execute(mock_build_and_run_cmd, cmd_flags, monkeypatch): diff --git a/tests/operators/test_local.py b/tests/operators/test_local.py index 0f35705b6..5513b1c4b 100644 --- a/tests/operators/test_local.py +++ b/tests/operators/test_local.py @@ -23,7 +23,7 @@ parse_number_of_warnings_dbt_runner, parse_number_of_warnings_subprocess, ) -from cosmos.exceptions import AirflowCompatibilityError +from cosmos.hooks.subprocess import FullOutputSubprocessResult from cosmos.operators.local import ( DbtBuildLocalOperator, DbtDocsAzureStorageLocalOperator, @@ -443,48 +443,6 @@ def test_run_operator_dataset_inlets_and_outlets(caplog): assert test_operator.outlets == [] -@pytest.mark.skipif( - version.parse(airflow_version) not in PARTIALLY_SUPPORTED_AIRFLOW_VERSIONS, - reason="Airflow 2.9.0 and 2.9.1 have a breaking change in Dataset URIs", - # https://github.com/apache/airflow/issues/39486 -) -@pytest.mark.integration -def test_run_operator_dataset_emission_fails(caplog): - - with DAG("test-id-1", start_date=datetime(2022, 1, 1)) as dag: - seed_operator = DbtSeedLocalOperator( - profile_config=real_profile_config, - project_dir=DBT_PROJ_DIR, - task_id="seed", - dbt_cmd_flags=["--select", "raw_customers"], - install_deps=True, - append_env=True, - ) - run_operator = DbtRunLocalOperator( - profile_config=real_profile_config, - project_dir=DBT_PROJ_DIR, - task_id="run", - dbt_cmd_flags=["--models", "stg_customers"], - install_deps=True, - append_env=True, - ) - - seed_operator >> run_operator - - with pytest.raises(AirflowCompatibilityError) as exc: - run_test_dag(dag) - - err_msg = str(exc.value) - assert ( - "Apache Airflow 2.9.0 & 2.9.1 introduced a breaking change in Dataset URIs, to be fixed in newer versions" - in err_msg - ) - assert ( - "If you want to use Cosmos with one of these Airflow versions, you will have to disable emission of Datasets" - in err_msg - ) - - @pytest.mark.skipif( version.parse(airflow_version) not in PARTIALLY_SUPPORTED_AIRFLOW_VERSIONS, reason="Airflow 2.9.0 and 2.9.1 have a breaking change in Dataset URIs", @@ -957,3 +915,22 @@ def test_dbt_local_operator_on_kill_sigterm(mock_send_sigterm) -> None: dbt_base_operator.on_kill() mock_send_sigterm.assert_called_once() + + +def test_handle_exception_subprocess(caplog): + """ + Test the handle_exception_subprocess method of the DbtLocalBaseOperator class for non-zero dbt exit code. + """ + operator = ConcreteDbtLocalBaseOperator( + profile_config=None, + task_id="my-task", + project_dir="my/dir", + ) + result = FullOutputSubprocessResult(exit_code=1, output="test", full_output=["n" * n for n in range(1, 1000)]) + + caplog.set_level(logging.ERROR) + # Test when exit_code is non-zero + with pytest.raises(AirflowException) as err_context: + operator.handle_exception_subprocess(result) + assert len(str(err_context.value)) < 100 # Ensure the error message is not too long + assert len(caplog.text) > 1000 # Ensure the log message is not truncated diff --git a/tests/profiles/athena/test_athena_access_key.py b/tests/profiles/athena/test_athena_access_key.py index 71ba1eb05..c0a25b7e9 100644 --- a/tests/profiles/athena/test_athena_access_key.py +++ b/tests/profiles/athena/test_athena_access_key.py @@ -1,8 +1,10 @@ "Tests for the Athena profile." +from __future__ import annotations import json import sys from collections import namedtuple +from unittest import mock from unittest.mock import MagicMock, patch import pytest @@ -39,12 +41,7 @@ def get_credentials(self) -> Credentials: yield mock_aws_hook -@pytest.fixture() -def mock_athena_conn(): # type: ignore - """ - Sets the connection as an environment variable. - """ - +def mock_conn_value(token: str | None = None) -> Connection: conn = Connection( conn_id="my_athena_connection", conn_type="aws", @@ -52,7 +49,7 @@ def mock_athena_conn(): # type: ignore password="my_aws_secret_key", extra=json.dumps( { - "aws_session_token": "token123", + "aws_session_token": token, "database": "my_database", "region_name": "us-east-1", "s3_staging_dir": "s3://my_bucket/dbt/", @@ -60,7 +57,25 @@ def mock_athena_conn(): # type: ignore } ), ) + return conn + +@pytest.fixture() +def mock_athena_conn(): # type: ignore + """ + Sets the connection as an environment variable. + """ + conn = mock_conn_value(token="token123") + with patch("airflow.hooks.base.BaseHook.get_connection", return_value=conn): + yield conn + + +@pytest.fixture() +def mock_athena_conn_without_token(): # type: ignore + """ + Sets the connection as an environment variable. + """ + conn = mock_conn_value(token=None) with patch("airflow.hooks.base.BaseHook.get_connection", return_value=conn): yield conn @@ -151,6 +166,28 @@ def test_athena_profile_args( } +@mock.patch("cosmos.profiles.athena.access_key.AthenaAccessKeyProfileMapping._get_temporary_credentials") +def test_athena_profile_args_without_token(mock_temp_cred, mock_athena_conn_without_token: Connection) -> None: + """ + Tests that the profile values get set correctly for Athena. + """ + ReadOnlyCredentials = namedtuple("ReadOnlyCredentials", ["access_key", "secret_key", "token"]) + credentials = ReadOnlyCredentials(access_key="my_aws_access_key", secret_key="my_aws_secret_key", token=None) + mock_temp_cred.return_value = credentials + + profile_mapping = get_automatic_profile_mapping(mock_athena_conn_without_token.conn_id) + + assert profile_mapping.profile == { + "type": "athena", + "aws_access_key_id": "my_aws_access_key", + "aws_secret_access_key": "{{ env_var('COSMOS_CONN_AWS_AWS_SECRET_ACCESS_KEY') }}", + "database": mock_athena_conn_without_token.extra_dejson.get("database"), + "region_name": mock_athena_conn_without_token.extra_dejson.get("region_name"), + "s3_staging_dir": mock_athena_conn_without_token.extra_dejson.get("s3_staging_dir"), + "schema": mock_athena_conn_without_token.extra_dejson.get("schema"), + } + + def test_athena_profile_args_overrides( mock_athena_conn: Connection, ) -> None: diff --git a/tests/profiles/bigquery/test_bq_service_account_keyfile_dict.py b/tests/profiles/bigquery/test_bq_service_account_keyfile_dict.py index 4e56f5ba1..6f0d60b8d 100755 --- a/tests/profiles/bigquery/test_bq_service_account_keyfile_dict.py +++ b/tests/profiles/bigquery/test_bq_service_account_keyfile_dict.py @@ -64,8 +64,8 @@ def test_connection_claiming_succeeds(mock_bigquery_conn_with_dict: Connection): def test_connection_claiming_fails(mock_bigquery_conn_with_dict: Connection): - # Remove the `dataset` key, which is mandatory - mock_bigquery_conn_with_dict.extra = json.dumps({"project": "my_project", "keyfile_dict": sample_keyfile_dict}) + # Remove the `project` key, which is mandatory + mock_bigquery_conn_with_dict.extra = json.dumps({"dataset": "my_dataset", "keyfile_dict": sample_keyfile_dict}) profile_mapping = GoogleCloudServiceAccountDictProfileMapping(mock_bigquery_conn_with_dict, {}) assert not profile_mapping.can_claim_connection() @@ -96,7 +96,6 @@ def test_mock_profile(mock_bigquery_conn_with_dict: Connection): "type": "bigquery", "method": "service-account-json", "project": "mock_value", - "dataset": "mock_value", "threads": 1, "keyfile_json": None, } diff --git a/tests/test_config.py b/tests/test_config.py index acca546be..d7dc24cbe 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -199,15 +199,34 @@ def test_render_config_env_vars_deprecated(): @pytest.mark.parametrize( - "execution_mode, expectation", + "execution_mode, invocation_mode, expectation", [ - (ExecutionMode.LOCAL, does_not_raise()), - (ExecutionMode.VIRTUALENV, pytest.raises(CosmosValueError)), - (ExecutionMode.KUBERNETES, pytest.raises(CosmosValueError)), - (ExecutionMode.DOCKER, pytest.raises(CosmosValueError)), - (ExecutionMode.AZURE_CONTAINER_INSTANCE, pytest.raises(CosmosValueError)), + (ExecutionMode.LOCAL, InvocationMode.DBT_RUNNER, does_not_raise()), + (ExecutionMode.LOCAL, InvocationMode.SUBPROCESS, does_not_raise()), + (ExecutionMode.LOCAL, None, does_not_raise()), + (ExecutionMode.VIRTUALENV, InvocationMode.DBT_RUNNER, pytest.raises(CosmosValueError)), + (ExecutionMode.VIRTUALENV, InvocationMode.SUBPROCESS, does_not_raise()), + (ExecutionMode.VIRTUALENV, None, does_not_raise()), + (ExecutionMode.KUBERNETES, InvocationMode.DBT_RUNNER, pytest.raises(CosmosValueError)), + (ExecutionMode.DOCKER, InvocationMode.DBT_RUNNER, pytest.raises(CosmosValueError)), + (ExecutionMode.AZURE_CONTAINER_INSTANCE, InvocationMode.DBT_RUNNER, pytest.raises(CosmosValueError)), ], ) -def test_execution_config_with_invocation_option(execution_mode, expectation): +def test_execution_config_with_invocation_option(execution_mode, invocation_mode, expectation): with expectation: - ExecutionConfig(execution_mode=execution_mode, invocation_mode=InvocationMode.DBT_RUNNER) + ExecutionConfig(execution_mode=execution_mode, invocation_mode=invocation_mode) + + +@pytest.mark.parametrize( + "execution_mode, expected_invocation_mode", + [ + (ExecutionMode.LOCAL, None), + (ExecutionMode.VIRTUALENV, InvocationMode.SUBPROCESS), + (ExecutionMode.KUBERNETES, None), + (ExecutionMode.DOCKER, None), + (ExecutionMode.AZURE_CONTAINER_INSTANCE, None), + ], +) +def test_execution_config_default_config(execution_mode, expected_invocation_mode): + execution_config = ExecutionConfig(execution_mode=execution_mode) + assert execution_config.invocation_mode == expected_invocation_mode diff --git a/tests/test_settings.py b/tests/test_settings.py new file mode 100644 index 000000000..d9f5e0f6e --- /dev/null +++ b/tests/test_settings.py @@ -0,0 +1,11 @@ +import os +from importlib import reload +from unittest.mock import patch + +from cosmos import settings + + +@patch.dict(os.environ, {"AIRFLOW__COSMOS__ENABLE_CACHE": "False"}, clear=True) +def test_enable_cache_env_var(): + reload(settings) + assert settings.enable_cache is False