From 55f6b4f98a80a1575fb547b4626a467e6b96b2a5 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 16:47:51 -0800 Subject: [PATCH 01/33] run mypy on all files, including unchanged --- .github/workflows/python_lint.yml | 12 +++--------- bin/run-mypy-on-modified-files.sh | 13 ------------- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 23 deletions(-) delete mode 100755 bin/run-mypy-on-modified-files.sh diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml index f437f3b7f..e3f690b4c 100644 --- a/.github/workflows/python_lint.yml +++ b/.github/workflows/python_lint.yml @@ -72,14 +72,8 @@ jobs: - name: Install dependencies run: poetry install --all-extras - # Job-specifc step(s): + # Job-specific step(s): - # For now, we run mypy only on modified files - - name: Get changed Python files - id: changed-py-files - uses: tj-actions/changed-files@v43 - with: - files: "airbyte_cdk/**/*.py" - - name: Run mypy on changed files + - name: Run mypy if: steps.changed-py-files.outputs.any_changed == 'true' - run: poetry run mypy ${{ steps.changed-py-files.outputs.all_changed_files }} --config-file mypy.ini --install-types --non-interactive + run: poetry run mypy --config-file mypy.ini --install-types --non-interactive diff --git a/bin/run-mypy-on-modified-files.sh b/bin/run-mypy-on-modified-files.sh deleted file mode 100755 index 537e3c843..000000000 --- a/bin/run-mypy-on-modified-files.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env sh - -set -e - -# Ensure script always runs from the project directory. -cd "$(dirname "${0}")/.." || exit 1 - -# TODO change this to include unit_tests as well once it's in a good state -{ - git diff --name-only --diff-filter=d --relative ':(exclude)unit_tests' - git diff --name-only --diff-filter=d --staged --relative ':(exclude)unit_tests' - git diff --name-only --diff-filter=d main... --relative ':(exclude)unit_tests' -} | grep -E '\.py$' | sort | uniq | xargs mypy --config-file mypy.ini --install-types --non-interactive diff --git a/pyproject.toml b/pyproject.toml index 8713fdbf5..c1d3fb37a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -140,7 +140,7 @@ format-fix = {sequence = ["_format-fix-ruff", "_format-fix-prettier"], help = "F # Linting/Typing check tasks _lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} -type-check = {cmd = "bin/run-mypy-on-modified-files.sh", help = "Type check modified files with mypy."} +type-check = {cmd = "poetry run mypy", help = "Type check modified files with mypy."} lint = {sequence = ["_lint-ruff", "type-check"], help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero"} # Lockfile check task From 4d48d3fc881e357bc6c58737fd2bcd22826325d2 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 16:48:39 -0800 Subject: [PATCH 02/33] fix mypy syntax --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c1d3fb37a..8bdc00b7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -140,7 +140,7 @@ format-fix = {sequence = ["_format-fix-ruff", "_format-fix-prettier"], help = "F # Linting/Typing check tasks _lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} -type-check = {cmd = "poetry run mypy", help = "Type check modified files with mypy."} +type-check = {cmd = "poetry run mypy .", help = "Type check modified files with mypy."} lint = {sequence = ["_lint-ruff", "type-check"], help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero"} # Lockfile check task From d3ea7248266fd4e4bb13de24ad3c3bcf0a68b483 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 16:51:44 -0800 Subject: [PATCH 03/33] chore: `poetry add --dev types-requests` --- poetry.lock | 21 +++++++++++++++------ pyproject.toml | 1 + 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0fef0a902..d6b0ac52a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4279,11 +4279,6 @@ files = [ {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, - {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, @@ -4889,6 +4884,20 @@ files = [ {file = "types_pytz-2024.2.0.20241003-py3-none-any.whl", hash = "sha256:3e22df1336c0c6ad1d29163c8fda82736909eb977281cb823c57f8bae07118b7"}, ] +[[package]] +name = "types-requests" +version = "2.32.0.20241016" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-requests-2.32.0.20241016.tar.gz", hash = "sha256:0d9cad2f27515d0e3e3da7134a1b6f28fb97129d86b867f24d9c726452634d95"}, + {file = "types_requests-2.32.0.20241016-py3-none-any.whl", hash = "sha256:4195d62d6d3e043a4eaaf08ff8a62184584d2e8684e9d2aa178c7915a7da3747"}, +] + +[package.dependencies] +urllib3 = ">=2" + [[package]] name = "typing-extensions" version = "4.12.2" @@ -5317,4 +5326,4 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.13" -content-hash = "1ec99125d31558fbc2e78f3aa109da6d35bc476a39af0e0cdf77a6d357f1b214" +content-hash = "d6e0b1a43f483da35e74c41c4f6a4f80c032ee902ce588eeb30505e2ac47255a" diff --git a/pyproject.toml b/pyproject.toml index 8bdc00b7e..ad56ea842 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,6 +99,7 @@ pytest-cov = "*" pytest-httpserver = "*" pytest-mock = "*" requests-mock = "*" +types-requests = "^2.32.0.20241016" [tool.poetry.extras] file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"] From 3b9b38937b58ed840ee38ed55ffb525cb8acbf4a Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 17:51:14 -0800 Subject: [PATCH 04/33] chore: `poetry add --dev types-python-dateutil` --- poetry.lock | 13 ++++++++++++- pyproject.toml | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index d6b0ac52a..bd939564d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4873,6 +4873,17 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20241003" +description = "Typing stubs for python-dateutil" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"}, + {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"}, +] + [[package]] name = "types-pytz" version = "2024.2.0.20241003" @@ -5326,4 +5337,4 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.13" -content-hash = "d6e0b1a43f483da35e74c41c4f6a4f80c032ee902ce588eeb30505e2ac47255a" +content-hash = "3c688a6115c64070dc14266a4699fcf8b9c8ce2c48f3fe55ce128de590ff9f85" diff --git a/pyproject.toml b/pyproject.toml index ad56ea842..9435c4f92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,6 +100,7 @@ pytest-httpserver = "*" pytest-mock = "*" requests-mock = "*" types-requests = "^2.32.0.20241016" +types-python-dateutil = "^2.9.0.20241003" [tool.poetry.extras] file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"] From 69fc701ff8b3d77539a811e6ce35742bf287f5d5 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 17:53:04 -0800 Subject: [PATCH 05/33] chore: `poetry add --dev types-PyYaml` --- poetry.lock | 13 ++++++++++++- pyproject.toml | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index bd939564d..beecbd107 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4895,6 +4895,17 @@ files = [ {file = "types_pytz-2024.2.0.20241003-py3-none-any.whl", hash = "sha256:3e22df1336c0c6ad1d29163c8fda82736909eb977281cb823c57f8bae07118b7"}, ] +[[package]] +name = "types-pyyaml" +version = "6.0.12.20240917" +description = "Typing stubs for PyYAML" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-PyYAML-6.0.12.20240917.tar.gz", hash = "sha256:d1405a86f9576682234ef83bcb4e6fff7c9305c8b1fbad5e0bcd4f7dbdc9c587"}, + {file = "types_PyYAML-6.0.12.20240917-py3-none-any.whl", hash = "sha256:392b267f1c0fe6022952462bf5d6523f31e37f6cea49b14cee7ad634b6301570"}, +] + [[package]] name = "types-requests" version = "2.32.0.20241016" @@ -5337,4 +5348,4 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.13" -content-hash = "3c688a6115c64070dc14266a4699fcf8b9c8ce2c48f3fe55ce128de590ff9f85" +content-hash = "c9b702bb12589195befeb5310edbe1bc648beedd010f7e0f8e4eafb783619ac5" diff --git a/pyproject.toml b/pyproject.toml index 9435c4f92..28b02b513 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,7 @@ pytest-mock = "*" requests-mock = "*" types-requests = "^2.32.0.20241016" types-python-dateutil = "^2.9.0.20241003" +types-pyyaml = "^6.0.12.20240917" [tool.poetry.extras] file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"] From f168266e538c88aecdaa0e6d9420f2134b417b38 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 18:20:34 -0800 Subject: [PATCH 06/33] chore: replace deprecated `deprecated` refs with PEP 702 `deprecated` --- .../interpolated_request_options_provider.py | 2 +- .../declarative/retrievers/async_retriever.py | 2 +- .../stream/abstract_file_based_stream.py | 4 +-- .../file_based/stream/concurrent/adapters.py | 4 +-- .../streams/concurrent/abstract_stream.py | 2 +- .../sources/streams/concurrent/adapters.py | 2 +- .../concurrent/availability_strategy.py | 2 +- airbyte_cdk/sources/streams/core.py | 11 ++++---- airbyte_cdk/sources/streams/http/http.py | 26 +++++++++---------- .../test_concurrent_declarative_source.py | 2 +- 10 files changed, 28 insertions(+), 29 deletions(-) diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py index e8e4ee643..c327b83da 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py @@ -5,7 +5,7 @@ from dataclasses import InitVar, dataclass, field from typing import Any, Mapping, MutableMapping, Optional, Union -from deprecated import deprecated +from typing_extensions import deprecated from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import ( diff --git a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py index 5cea85bcb..c60581590 100644 --- a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py @@ -4,7 +4,7 @@ from dataclasses import InitVar, dataclass, field from typing import Any, Callable, Iterable, Mapping, Optional -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import FailureType from airbyte_cdk.sources.declarative.async_job.job_orchestrator import ( diff --git a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py index 5423ffa9f..ef258b34d 100644 --- a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py @@ -6,7 +6,7 @@ from functools import cache, cached_property, lru_cache from typing import Any, Dict, Iterable, List, Mapping, Optional, Type -from deprecated import deprecated +from typing_extensions import deprecated from airbyte_cdk import AirbyteMessage from airbyte_cdk.models import SyncMode @@ -179,7 +179,7 @@ def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool: ) @cached_property - @deprecated(version="3.7.0") + @deprecated("Deprecated as of CDK version 3.7.0.") def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy: return self._availability_strategy diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py index ca4671eda..9d7bcf48b 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py @@ -7,7 +7,7 @@ from functools import cache, lru_cache from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import ( AirbyteLogMessage, @@ -143,7 +143,7 @@ def supports_incremental(self) -> bool: return self._legacy_stream.supports_incremental @property - @deprecated(version="3.7.0") + @deprecated("Deprecated as of CDK version 3.7.0.") def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy: return self._legacy_stream.availability_strategy diff --git a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py index decf645b7..b41b1c2d8 100644 --- a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py +++ b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py @@ -5,7 +5,7 @@ from abc import ABC, abstractmethod from typing import Any, Iterable, Mapping, Optional -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import AirbyteStream from airbyte_cdk.sources.source import ExperimentalClassWarning diff --git a/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte_cdk/sources/streams/concurrent/adapters.py index fe00cf677..baf129a66 100644 --- a/airbyte_cdk/sources/streams/concurrent/adapters.py +++ b/airbyte_cdk/sources/streams/concurrent/adapters.py @@ -8,7 +8,7 @@ from functools import lru_cache from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import ( AirbyteLogMessage, diff --git a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py index c69a9f3e5..ed24b2980 100644 --- a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py +++ b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py @@ -6,7 +6,7 @@ from abc import ABC, abstractmethod from typing import Optional -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.sources.source import ExperimentalClassWarning diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 51c3682d6..8320b8269 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -10,7 +10,7 @@ from functools import cached_property, lru_cache from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union -from deprecated import deprecated +from typing_extensions import deprecated import airbyte_cdk.sources.utils.casing as casing from airbyte_cdk.models import ( @@ -92,8 +92,8 @@ def state(self, value: MutableMapping[str, Any]) -> None: @deprecated( - version="0.87.0", - reason="Deprecated in favor of the CheckpointMixin which offers similar functionality", + "Deprecated as of CDK version 0.87.0. ", + "Deprecated in favor of the CheckpointMixin which offers similar functionality.", ) class IncrementalMixin(CheckpointMixin, ABC): """Mixin to make stream incremental. @@ -117,9 +117,8 @@ class StreamClassification: # Moved to class declaration since get_updated_state is called on every record for incremental syncs, and thus the @deprecated decorator as well. @deprecated( - version="0.1.49", - reason="Deprecated method get_updated_state, You should use explicit state property instead, see IncrementalMixin docs.", - action="ignore", + "Deprecated as of CDK version 0.1.49. ", + "Deprecated method get_updated_state, You should use explicit state property instead, see IncrementalMixin docs.", ) class Stream(ABC): """ diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index f465671be..3be146907 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -9,8 +9,8 @@ from urllib.parse import urljoin import requests -from deprecated import deprecated from requests.auth import AuthBase +from typing_extensions import deprecated from airbyte_cdk.models import AirbyteMessage, FailureType, SyncMode from airbyte_cdk.models import Type as MessageType @@ -121,8 +121,8 @@ def http_method(self) -> str: @property @deprecated( - version="3.0.0", - reason="You should set error_handler explicitly in HttpStream.get_error_handler() instead.", + "Deprecated as of CDK version 3.0.0. ", + "You should set error_handler explicitly in HttpStream.get_error_handler() instead.", ) def raise_on_http_errors(self) -> bool: """ @@ -132,8 +132,8 @@ def raise_on_http_errors(self) -> bool: @property @deprecated( - version="3.0.0", - reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) def max_retries(self) -> Union[int, None]: """ @@ -143,8 +143,8 @@ def max_retries(self) -> Union[int, None]: @property @deprecated( - version="3.0.0", - reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) def max_time(self) -> Union[int, None]: """ @@ -154,8 +154,8 @@ def max_time(self) -> Union[int, None]: @property @deprecated( - version="3.0.0", - reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) def retry_factor(self) -> float: """ @@ -603,8 +603,8 @@ def stream_slices( @deprecated( - version="3.0.0", - reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", + "Deprecated as of CDK version 3.0.0." + "You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead." ) class HttpStreamAdapterBackoffStrategy(BackoffStrategy): def __init__(self, stream: HttpStream): @@ -619,8 +619,8 @@ def backoff_time( @deprecated( - version="3.0.0", - reason="You should set error_handler explicitly in HttpStream.get_error_handler() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set error_handler explicitly in HttpStream.get_error_handler() instead." ) class HttpStreamAdapterHttpStatusErrorHandler(HttpStatusErrorHandler): def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa diff --git a/unit_tests/sources/declarative/test_concurrent_declarative_source.py b/unit_tests/sources/declarative/test_concurrent_declarative_source.py index f9f89fae6..2920ff0a9 100644 --- a/unit_tests/sources/declarative/test_concurrent_declarative_source.py +++ b/unit_tests/sources/declarative/test_concurrent_declarative_source.py @@ -10,7 +10,7 @@ import freezegun import isodate import pendulum -from deprecated.classic import deprecated +from typing_extensions import deprecated from airbyte_cdk.models import ( AirbyteMessage, From 9c27d7e6cb72230097ec03cf179f532614bca340 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 18:25:12 -0800 Subject: [PATCH 07/33] chore: add missing cachetools stubs --- poetry.lock | 170 +++++++++++++++++++++++-------------------------- pyproject.toml | 1 + 2 files changed, 79 insertions(+), 92 deletions(-) diff --git a/poetry.lock b/poetry.lock index beecbd107..b7b9736bf 100644 --- a/poetry.lock +++ b/poetry.lock @@ -930,23 +930,6 @@ files = [ marshmallow = ">=3.18.0,<4.0.0" typing-inspect = ">=0.4.0,<1" -[[package]] -name = "deprecated" -version = "1.2.15" -description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" -files = [ - {file = "Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320"}, - {file = "deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d"}, -] - -[package.dependencies] -wrapt = ">=1.10,<2" - -[package.extras] -dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "jinja2 (>=3.0.3,<3.1.0)", "setuptools", "sphinx (<2)", "tox"] - [[package]] name = "docutils" version = "0.17.1" @@ -4873,6 +4856,46 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "types-cachetools" +version = "5.5.0.20240820" +description = "Typing stubs for cachetools" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-cachetools-5.5.0.20240820.tar.gz", hash = "sha256:b888ab5c1a48116f7799cd5004b18474cd82b5463acb5ffb2db2fc9c7b053bc0"}, + {file = "types_cachetools-5.5.0.20240820-py3-none-any.whl", hash = "sha256:efb2ed8bf27a4b9d3ed70d33849f536362603a90b8090a328acf0cd42fda82e2"}, +] + +[[package]] +name = "types-cffi" +version = "1.16.0.20240331" +description = "Typing stubs for cffi" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-cffi-1.16.0.20240331.tar.gz", hash = "sha256:b8b20d23a2b89cfed5f8c5bc53b0cb8677c3aac6d970dbc771e28b9c698f5dee"}, + {file = "types_cffi-1.16.0.20240331-py3-none-any.whl", hash = "sha256:a363e5ea54a4eb6a4a105d800685fde596bc318089b025b27dee09849fe41ff0"}, +] + +[package.dependencies] +types-setuptools = "*" + +[[package]] +name = "types-pyopenssl" +version = "24.1.0.20240722" +description = "Typing stubs for pyOpenSSL" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-pyOpenSSL-24.1.0.20240722.tar.gz", hash = "sha256:47913b4678a01d879f503a12044468221ed8576263c1540dcb0484ca21b08c39"}, + {file = "types_pyOpenSSL-24.1.0.20240722-py3-none-any.whl", hash = "sha256:6a7a5d2ec042537934cfb4c9d4deb0e16c4c6250b09358df1f083682fe6fda54"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-cffi = "*" + [[package]] name = "types-python-dateutil" version = "2.9.0.20241003" @@ -4906,6 +4929,21 @@ files = [ {file = "types_PyYAML-6.0.12.20240917-py3-none-any.whl", hash = "sha256:392b267f1c0fe6022952462bf5d6523f31e37f6cea49b14cee7ad634b6301570"}, ] +[[package]] +name = "types-redis" +version = "4.6.0.20241004" +description = "Typing stubs for redis" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-redis-4.6.0.20241004.tar.gz", hash = "sha256:5f17d2b3f9091ab75384153bfa276619ffa1cf6a38da60e10d5e6749cc5b902e"}, + {file = "types_redis-4.6.0.20241004-py3-none-any.whl", hash = "sha256:ef5da68cb827e5f606c8f9c0b49eeee4c2669d6d97122f301d3a55dc6a63f6ed"}, +] + +[package.dependencies] +cryptography = ">=35.0.0" +types-pyOpenSSL = "*" + [[package]] name = "types-requests" version = "2.32.0.20241016" @@ -4920,6 +4958,28 @@ files = [ [package.dependencies] urllib3 = ">=2" +[[package]] +name = "types-setuptools" +version = "75.6.0.20241126" +description = "Typing stubs for setuptools" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types_setuptools-75.6.0.20241126-py3-none-any.whl", hash = "sha256:aaae310a0e27033c1da8457d4d26ac673b0c8a0de7272d6d4708e263f2ea3b9b"}, + {file = "types_setuptools-75.6.0.20241126.tar.gz", hash = "sha256:7bf25ad4be39740e469f9268b6beddda6e088891fa5a27e985c6ce68bf62ace0"}, +] + +[[package]] +name = "types-ujson" +version = "5.10.0.20240515" +description = "Typing stubs for ujson" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-ujson-5.10.0.20240515.tar.gz", hash = "sha256:ceae7127f0dafe4af5dd0ecf98ee13e9d75951ef963b5c5a9b7ea92e0d71f0d7"}, + {file = "types_ujson-5.10.0.20240515-py3-none-any.whl", hash = "sha256:02bafc36b3a93d2511757a64ff88bd505e0a57fba08183a9150fbcfcb2015310"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -5128,80 +5188,6 @@ MarkupSafe = ">=2.1.1" [package.extras] watchdog = ["watchdog (>=2.3)"] -[[package]] -name = "wrapt" -version = "1.17.0" -description = "Module for decorators, wrappers and monkey patching." -optional = false -python-versions = ">=3.8" -files = [ - {file = "wrapt-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a0c23b8319848426f305f9cb0c98a6e32ee68a36264f45948ccf8e7d2b941f8"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1ca5f060e205f72bec57faae5bd817a1560fcfc4af03f414b08fa29106b7e2d"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e185ec6060e301a7e5f8461c86fb3640a7beb1a0f0208ffde7a65ec4074931df"}, - {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb90765dd91aed05b53cd7a87bd7f5c188fcd95960914bae0d32c5e7f899719d"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:879591c2b5ab0a7184258274c42a126b74a2c3d5a329df16d69f9cee07bba6ea"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fce6fee67c318fdfb7f285c29a82d84782ae2579c0e1b385b7f36c6e8074fffb"}, - {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0698d3a86f68abc894d537887b9bbf84d29bcfbc759e23f4644be27acf6da301"}, - {file = "wrapt-1.17.0-cp310-cp310-win32.whl", hash = "sha256:69d093792dc34a9c4c8a70e4973a3361c7a7578e9cd86961b2bbf38ca71e4e22"}, - {file = "wrapt-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:f28b29dc158ca5d6ac396c8e0a2ef45c4e97bb7e65522bfc04c989e6fe814575"}, - {file = "wrapt-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74bf625b1b4caaa7bad51d9003f8b07a468a704e0644a700e936c357c17dd45a"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f2a28eb35cf99d5f5bd12f5dd44a0f41d206db226535b37b0c60e9da162c3ed"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:81b1289e99cf4bad07c23393ab447e5e96db0ab50974a280f7954b071d41b489"}, - {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2939cd4a2a52ca32bc0b359015718472d7f6de870760342e7ba295be9ebaf9"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a9653131bda68a1f029c52157fd81e11f07d485df55410401f745007bd6d339"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4e4b4385363de9052dac1a67bfb535c376f3d19c238b5f36bddc95efae15e12d"}, - {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bdf62d25234290db1837875d4dceb2151e4ea7f9fff2ed41c0fde23ed542eb5b"}, - {file = "wrapt-1.17.0-cp311-cp311-win32.whl", hash = "sha256:5d8fd17635b262448ab8f99230fe4dac991af1dabdbb92f7a70a6afac8a7e346"}, - {file = "wrapt-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:92a3d214d5e53cb1db8b015f30d544bc9d3f7179a05feb8f16df713cecc2620a"}, - {file = "wrapt-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:89fc28495896097622c3fc238915c79365dd0ede02f9a82ce436b13bd0ab7569"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:875d240fdbdbe9e11f9831901fb8719da0bd4e6131f83aa9f69b96d18fae7504"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ed16d95fd142e9c72b6c10b06514ad30e846a0d0917ab406186541fe68b451"}, - {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18b956061b8db634120b58f668592a772e87e2e78bc1f6a906cfcaa0cc7991c1"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:daba396199399ccabafbfc509037ac635a6bc18510ad1add8fd16d4739cdd106"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4d63f4d446e10ad19ed01188d6c1e1bb134cde8c18b0aa2acfd973d41fcc5ada"}, - {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8a5e7cc39a45fc430af1aefc4d77ee6bad72c5bcdb1322cfde852c15192b8bd4"}, - {file = "wrapt-1.17.0-cp312-cp312-win32.whl", hash = "sha256:0a0a1a1ec28b641f2a3a2c35cbe86c00051c04fffcfcc577ffcdd707df3f8635"}, - {file = "wrapt-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:3c34f6896a01b84bab196f7119770fd8466c8ae3dfa73c59c0bb281e7b588ce7"}, - {file = "wrapt-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:714c12485aa52efbc0fc0ade1e9ab3a70343db82627f90f2ecbc898fdf0bb181"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da427d311782324a376cacb47c1a4adc43f99fd9d996ffc1b3e8529c4074d393"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba1739fb38441a27a676f4de4123d3e858e494fac05868b7a281c0a383c098f4"}, - {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e711fc1acc7468463bc084d1b68561e40d1eaa135d8c509a65dd534403d83d7b"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:140ea00c87fafc42739bd74a94a5a9003f8e72c27c47cd4f61d8e05e6dec8721"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:73a96fd11d2b2e77d623a7f26e004cc31f131a365add1ce1ce9a19e55a1eef90"}, - {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0b48554952f0f387984da81ccfa73b62e52817a4386d070c75e4db7d43a28c4a"}, - {file = "wrapt-1.17.0-cp313-cp313-win32.whl", hash = "sha256:498fec8da10e3e62edd1e7368f4b24aa362ac0ad931e678332d1b209aec93045"}, - {file = "wrapt-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd136bb85f4568fffca995bd3c8d52080b1e5b225dbf1c2b17b66b4c5fa02838"}, - {file = "wrapt-1.17.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:17fcf043d0b4724858f25b8826c36e08f9fb2e475410bece0ec44a22d533da9b"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4a557d97f12813dc5e18dad9fa765ae44ddd56a672bb5de4825527c847d6379"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0229b247b0fc7dee0d36176cbb79dbaf2a9eb7ecc50ec3121f40ef443155fb1d"}, - {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8425cfce27b8b20c9b89d77fb50e368d8306a90bf2b6eef2cdf5cd5083adf83f"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9c900108df470060174108012de06d45f514aa4ec21a191e7ab42988ff42a86c"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:4e547b447073fc0dbfcbff15154c1be8823d10dab4ad401bdb1575e3fdedff1b"}, - {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:914f66f3b6fc7b915d46c1cc424bc2441841083de01b90f9e81109c9759e43ab"}, - {file = "wrapt-1.17.0-cp313-cp313t-win32.whl", hash = "sha256:a4192b45dff127c7d69b3bdfb4d3e47b64179a0b9900b6351859f3001397dabf"}, - {file = "wrapt-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4f643df3d4419ea3f856c5c3f40fec1d65ea2e89ec812c83f7767c8730f9827a"}, - {file = "wrapt-1.17.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:69c40d4655e078ede067a7095544bcec5a963566e17503e75a3a3e0fe2803b13"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f495b6754358979379f84534f8dd7a43ff8cff2558dcdea4a148a6e713a758f"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:baa7ef4e0886a6f482e00d1d5bcd37c201b383f1d314643dfb0367169f94f04c"}, - {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8fc931382e56627ec4acb01e09ce66e5c03c384ca52606111cee50d931a342d"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8f8909cdb9f1b237786c09a810e24ee5e15ef17019f7cecb207ce205b9b5fcce"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ad47b095f0bdc5585bced35bd088cbfe4177236c7df9984b3cc46b391cc60627"}, - {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:948a9bd0fb2c5120457b07e59c8d7210cbc8703243225dbd78f4dfc13c8d2d1f"}, - {file = "wrapt-1.17.0-cp38-cp38-win32.whl", hash = "sha256:5ae271862b2142f4bc687bdbfcc942e2473a89999a54231aa1c2c676e28f29ea"}, - {file = "wrapt-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:f335579a1b485c834849e9075191c9898e0731af45705c2ebf70e0cd5d58beed"}, - {file = "wrapt-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d751300b94e35b6016d4b1e7d0e7bbc3b5e1751e2405ef908316c2a9024008a1"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7264cbb4a18dc4acfd73b63e4bcfec9c9802614572025bdd44d0721983fc1d9c"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33539c6f5b96cf0b1105a0ff4cf5db9332e773bb521cc804a90e58dc49b10578"}, - {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c30970bdee1cad6a8da2044febd824ef6dc4cc0b19e39af3085c763fdec7de33"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bc7f729a72b16ee21795a943f85c6244971724819819a41ddbaeb691b2dd85ad"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6ff02a91c4fc9b6a94e1c9c20f62ea06a7e375f42fe57587f004d1078ac86ca9"}, - {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dfb7cff84e72e7bf975b06b4989477873dcf160b2fd89959c629535df53d4e0"}, - {file = "wrapt-1.17.0-cp39-cp39-win32.whl", hash = "sha256:2399408ac33ffd5b200480ee858baa58d77dd30e0dd0cab6a8a9547135f30a88"}, - {file = "wrapt-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:4f763a29ee6a20c529496a20a7bcb16a73de27f5da6a843249c7047daf135977"}, - {file = "wrapt-1.17.0-py3-none-any.whl", hash = "sha256:d2c63b93548eda58abf5188e505ffed0229bf675f7c3090f8e36ad55b8cbc371"}, - {file = "wrapt-1.17.0.tar.gz", hash = "sha256:16187aa2317c731170a88ef35e8937ae0f533c402872c1ee5e6d079fcf320801"}, -] - [[package]] name = "xlsxwriter" version = "3.2.0" @@ -5348,4 +5334,4 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.13" -content-hash = "c9b702bb12589195befeb5310edbe1bc648beedd010f7e0f8e4eafb783619ac5" +content-hash = "e0ef863b4ed5381ffe0ae40e03c327394611d3cbde70fc81e027aaa1d1db3f27" diff --git a/pyproject.toml b/pyproject.toml index 28b02b513..275cf0aae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,7 @@ requests-mock = "*" types-requests = "^2.32.0.20241016" types-python-dateutil = "^2.9.0.20241003" types-pyyaml = "^6.0.12.20240917" +types-cachetools = "^5.5.0.20240820" [tool.poetry.extras] file-based = ["avro", "fastavro", "pyarrow", "unstructured", "pdf2image", "pdfminer.six", "unstructured.pytesseract", "pytesseract", "markdown", "python-calamine", "python-snappy"] From 9b08030b23868f5eb2f86a5ce3cf7d6636911230 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 18:25:43 -0800 Subject: [PATCH 08/33] chore: remove unused `Deprecated` dependency --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 275cf0aae..e04d1e111 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,6 @@ python = "^3.10,<3.13" airbyte-protocol-models-dataclasses = "^0.14" backoff = "*" cachetools = "*" -Deprecated = "~1.2" dpath = "^2.1.6" dunamai = "^1.22.0" genson = "1.3.0" From f54eb7cf0587f0a4921000ba77214e1c84ee2d7e Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 19:06:20 -0800 Subject: [PATCH 09/33] chore: fix orjson imports --- airbyte_cdk/cli/source_declarative_manifest/_run.py | 2 +- airbyte_cdk/config_observation.py | 2 +- airbyte_cdk/connector_builder/main.py | 2 +- airbyte_cdk/destinations/destination.py | 2 +- airbyte_cdk/entrypoint.py | 2 +- airbyte_cdk/logger.py | 2 +- airbyte_cdk/sources/file_based/file_types/csv_parser.py | 2 +- airbyte_cdk/sources/file_based/file_types/excel_parser.py | 2 +- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py | 2 +- airbyte_cdk/test/entrypoint_wrapper.py | 2 +- airbyte_cdk/utils/traced_exception.py | 2 +- unit_tests/connector_builder/test_connector_builder_handler.py | 2 +- unit_tests/connector_builder/test_message_grouper.py | 2 +- unit_tests/destinations/test_destination.py | 2 +- .../incremental/test_per_partition_cursor_integration.py | 2 +- .../declarative/partition_routers/test_parent_state_stream.py | 2 +- unit_tests/sources/test_source.py | 2 +- unit_tests/test/test_entrypoint_wrapper.py | 2 +- unit_tests/test_entrypoint.py | 2 +- unit_tests/utils/test_traced_exception.py | 2 +- 20 files changed, 20 insertions(+), 20 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index ba0517850..26b6ff9a7 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -25,7 +25,7 @@ from pathlib import Path from typing import Any, cast -from orjson import orjson +import orjson from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch from airbyte_cdk.models import ( diff --git a/airbyte_cdk/config_observation.py b/airbyte_cdk/config_observation.py index 9a58c0391..ae85e8277 100644 --- a/airbyte_cdk/config_observation.py +++ b/airbyte_cdk/config_observation.py @@ -10,7 +10,7 @@ from copy import copy from typing import Any, List, MutableMapping -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteControlConnectorConfigMessage, diff --git a/airbyte_cdk/connector_builder/main.py b/airbyte_cdk/connector_builder/main.py index 5d3dfb68e..e122cee8c 100644 --- a/airbyte_cdk/connector_builder/main.py +++ b/airbyte_cdk/connector_builder/main.py @@ -6,7 +6,7 @@ import sys from typing import Any, List, Mapping, Optional, Tuple -from orjson import orjson +import orjson from airbyte_cdk.connector import BaseConnector from airbyte_cdk.connector_builder.connector_builder_handler import ( diff --git a/airbyte_cdk/destinations/destination.py b/airbyte_cdk/destinations/destination.py index f1d799945..547f96684 100644 --- a/airbyte_cdk/destinations/destination.py +++ b/airbyte_cdk/destinations/destination.py @@ -9,7 +9,7 @@ from abc import ABC, abstractmethod from typing import Any, Iterable, List, Mapping -from orjson import orjson +import orjson from airbyte_cdk.connector import Connector from airbyte_cdk.exception_handler import init_uncaught_exception_handler diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 9de9e60a8..de7ff746f 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -15,8 +15,8 @@ from typing import Any, DefaultDict, Iterable, List, Mapping, Optional from urllib.parse import urlparse +import orjson import requests -from orjson import orjson from requests import PreparedRequest, Response, Session from airbyte_cdk.connector import TConfig diff --git a/airbyte_cdk/logger.py b/airbyte_cdk/logger.py index 8b7f288b3..48b25215d 100644 --- a/airbyte_cdk/logger.py +++ b/airbyte_cdk/logger.py @@ -7,7 +7,7 @@ import logging.config from typing import Any, Callable, Mapping, Optional, Tuple -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteLogMessage, diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index 1b7fcfed5..a367d7ead 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -12,7 +12,7 @@ from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple from uuid import uuid4 -from orjson import orjson +import orjson from airbyte_cdk.models import FailureType from airbyte_cdk.sources.file_based.config.csv_format import ( diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py index a95df6033..2598e2533 100644 --- a/airbyte_cdk/sources/file_based/file_types/excel_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/excel_parser.py @@ -7,10 +7,10 @@ from pathlib import Path from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union +import orjson import pandas as pd from numpy import datetime64, issubdtype from numpy import dtype as dtype_ -from orjson import orjson from pydantic.v1 import BaseModel from airbyte_cdk.sources.file_based.config.file_based_stream_config import ( diff --git a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py index 67132dd87..722ad329b 100644 --- a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py @@ -6,7 +6,7 @@ import logging from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union -from orjson import orjson +import orjson from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError diff --git a/airbyte_cdk/test/entrypoint_wrapper.py b/airbyte_cdk/test/entrypoint_wrapper.py index 847570cd6..2fe3151af 100644 --- a/airbyte_cdk/test/entrypoint_wrapper.py +++ b/airbyte_cdk/test/entrypoint_wrapper.py @@ -23,7 +23,7 @@ from pathlib import Path from typing import Any, List, Mapping, Optional, Union -from orjson import orjson +import orjson from pydantic import ValidationError as V2ValidationError from serpyco_rs import SchemaValidationError diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index 75b6eab16..c50ae16a5 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -5,7 +5,7 @@ import traceback from typing import Optional -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteConnectionStatus, diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 234a6e9cd..aac00a889 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -11,9 +11,9 @@ from unittest import mock from unittest.mock import MagicMock, patch +import orjson import pytest import requests -from orjson import orjson from airbyte_cdk import connector_builder from airbyte_cdk.connector_builder.connector_builder_handler import ( diff --git a/unit_tests/connector_builder/test_message_grouper.py b/unit_tests/connector_builder/test_message_grouper.py index bf63f0555..c3fc73308 100644 --- a/unit_tests/connector_builder/test_message_grouper.py +++ b/unit_tests/connector_builder/test_message_grouper.py @@ -6,8 +6,8 @@ from typing import Any, Iterator, List, Mapping from unittest.mock import MagicMock, Mock, patch +import orjson import pytest -from orjson import orjson from airbyte_cdk.connector_builder.message_grouper import MessageGrouper from airbyte_cdk.connector_builder.models import ( diff --git a/unit_tests/destinations/test_destination.py b/unit_tests/destinations/test_destination.py index 4294192e6..71eb85063 100644 --- a/unit_tests/destinations/test_destination.py +++ b/unit_tests/destinations/test_destination.py @@ -9,8 +9,8 @@ from typing import Any, Dict, Iterable, List, Mapping, Union from unittest.mock import ANY +import orjson import pytest -from orjson import orjson from airbyte_cdk.destinations import Destination from airbyte_cdk.destinations import destination as destination_module diff --git a/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py b/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py index c9c0dff70..46b726758 100644 --- a/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py +++ b/unit_tests/sources/declarative/incremental/test_per_partition_cursor_integration.py @@ -5,7 +5,7 @@ import logging from unittest.mock import MagicMock, patch -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteStateBlob, diff --git a/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py b/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py index 495ef7345..b65f1f724 100644 --- a/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py +++ b/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py @@ -4,9 +4,9 @@ from typing import Any, List, Mapping, MutableMapping, Optional, Union from unittest.mock import MagicMock +import orjson import pytest import requests_mock -from orjson import orjson from airbyte_cdk.models import ( AirbyteMessage, diff --git a/unit_tests/sources/test_source.py b/unit_tests/sources/test_source.py index 60c1ecf57..9554d2242 100644 --- a/unit_tests/sources/test_source.py +++ b/unit_tests/sources/test_source.py @@ -8,8 +8,8 @@ from contextlib import nullcontext as does_not_raise from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union +import orjson import pytest -from orjson import orjson from serpyco_rs import SchemaValidationError from airbyte_cdk.models import ( diff --git a/unit_tests/test/test_entrypoint_wrapper.py b/unit_tests/test/test_entrypoint_wrapper.py index fad3db953..a8d02fca9 100644 --- a/unit_tests/test/test_entrypoint_wrapper.py +++ b/unit_tests/test/test_entrypoint_wrapper.py @@ -7,7 +7,7 @@ from unittest import TestCase from unittest.mock import Mock, patch -from orjson import orjson +import orjson from airbyte_cdk.models import ( AirbyteAnalyticsTraceMessage, diff --git a/unit_tests/test_entrypoint.py b/unit_tests/test_entrypoint.py index a5ffb21fb..2e50c11f9 100644 --- a/unit_tests/test_entrypoint.py +++ b/unit_tests/test_entrypoint.py @@ -11,9 +11,9 @@ from unittest.mock import MagicMock, patch import freezegun +import orjson import pytest import requests -from orjson import orjson from airbyte_cdk import AirbyteEntrypoint from airbyte_cdk import entrypoint as entrypoint_module diff --git a/unit_tests/utils/test_traced_exception.py b/unit_tests/utils/test_traced_exception.py index ac3c4318a..0e5b58439 100644 --- a/unit_tests/utils/test_traced_exception.py +++ b/unit_tests/utils/test_traced_exception.py @@ -3,8 +3,8 @@ # +import orjson import pytest -from orjson import orjson from airbyte_cdk.models import ( AirbyteErrorTraceMessage, From 8202ce55326947cd45ea5e10a8f3e6a098a7ca40 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Mon, 2 Dec 2024 21:38:31 -0800 Subject: [PATCH 10/33] chore: misc mypy typing fixes --- .github/dependabot.yml | 8 +-- .github/workflows/publish_sdm_connector.yml | 13 ++-- .github/workflows/pytest_matrix.yml | 8 +-- .../cli/source_declarative_manifest/_run.py | 8 ++- .../destinations/vector_db_based/writer.py | 3 + .../sources/connector_state_manager.py | 2 +- .../auth/selective_authenticator.py | 7 ++- .../declarative_component_schema.yaml | 33 +++++------ .../declarative/decoders/noop_decoder.py | 5 +- ...stract_file_based_availability_strategy.py | 14 +++-- ...efault_file_based_availability_strategy.py | 21 +++++-- .../file_based/file_types/avro_parser.py | 4 +- .../sources/streams/concurrent/cursor.py | 4 +- airbyte_cdk/sources/streams/core.py | 9 ++- .../requests_native_auth/abstract_token.py | 3 +- .../http/requests_native_auth/oauth.py | 59 +++++++++++++------ airbyte_cdk/sources/utils/transform.py | 49 ++++++++++----- airbyte_cdk/utils/airbyte_secrets_utils.py | 2 +- airbyte_cdk/utils/event_timing.py | 20 +++---- airbyte_cdk/utils/message_utils.py | 7 ++- .../utils/spec_schema_transformations.py | 5 +- airbyte_cdk/utils/traced_exception.py | 25 ++++---- unit_tests/destinations/test_destination.py | 24 ++++++-- .../schema/source_test/SourceTest.py | 2 +- 24 files changed, 214 insertions(+), 121 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9b727cf9c..2d7ddad1a 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -13,10 +13,10 @@ updates: interval: daily labels: - chore - open-pull-requests-limit: 8 # default is 5 + open-pull-requests-limit: 8 # default is 5 - package-ecosystem: github-actions - open-pull-requests-limit: 5 # default is 5 + open-pull-requests-limit: 5 # default is 5 directory: "/" commit-message: prefix: "ci(deps): " @@ -29,5 +29,5 @@ updates: minor-and-patch: applies-to: version-updates update-types: - - patch - - minor + - patch + - minor diff --git a/.github/workflows/publish_sdm_connector.yml b/.github/workflows/publish_sdm_connector.yml index e994314f8..e90274b7d 100644 --- a/.github/workflows/publish_sdm_connector.yml +++ b/.github/workflows/publish_sdm_connector.yml @@ -7,12 +7,11 @@ on: workflow_dispatch: inputs: version: - description: - The version to publish, ie 1.0.0 or 1.0.0-dev1. - If omitted, and if run from a release branch, the version will be - inferred from the git tag. - If omitted, and if run from a non-release branch, then only a SHA-based - Docker tag will be created. + description: The version to publish, ie 1.0.0 or 1.0.0-dev1. + If omitted, and if run from a release branch, the version will be + inferred from the git tag. + If omitted, and if run from a non-release branch, then only a SHA-based + Docker tag will be created. required: false dry_run: description: If true, the workflow will not push to DockerHub. @@ -24,7 +23,6 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: Detect Release Tag Version if: startsWith(github.ref, 'refs/tags/v') run: | @@ -167,7 +165,6 @@ jobs: tags: | airbyte/source-declarative-manifest:${{ env.VERSION }} - - name: Build and push ('latest' tag) # Only run if version is set and IS_PRERELEASE is false if: env.VERSION != '' && env.IS_PRERELEASE == 'false' && github.event.inputs.dry_run == 'false' diff --git a/.github/workflows/pytest_matrix.yml b/.github/workflows/pytest_matrix.yml index 740bc06c4..fce75a27b 100644 --- a/.github/workflows/pytest_matrix.yml +++ b/.github/workflows/pytest_matrix.yml @@ -12,10 +12,10 @@ on: branches: - main paths: - - 'airbyte_cdk/**' - - 'unit_tests/**' - - 'poetry.lock' - - 'pyproject.toml' + - "airbyte_cdk/**" + - "unit_tests/**" + - "poetry.lock" + - "pyproject.toml" pull_request: jobs: diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 26b6ff9a7..977be7a4a 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -160,10 +160,14 @@ def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource: connector builder. """ try: + config: Mapping[str, Any] | None + catalog: ConfiguredAirbyteCatalog | None + state: list[AirbyteStateMessage] config, catalog, state = _parse_inputs_into_config_catalog_state(args) - if "__injected_declarative_manifest" not in config: + if config is None or "__injected_declarative_manifest" not in config: raise ValueError( - f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}" + "Invalid config: `__injected_declarative_manifest` should be provided at the root " + f"of the config but config only has keys: {list(config.keys() if config else [])}" ) return ConcurrentDeclarativeSource( config=config, diff --git a/airbyte_cdk/destinations/vector_db_based/writer.py b/airbyte_cdk/destinations/vector_db_based/writer.py index 268e49ef7..532c7add9 100644 --- a/airbyte_cdk/destinations/vector_db_based/writer.py +++ b/airbyte_cdk/destinations/vector_db_based/writer.py @@ -85,6 +85,9 @@ def write( record_chunks, record_id_to_delete = self.processor.process(message.record) self.chunks[(message.record.namespace, message.record.stream)].extend(record_chunks) if record_id_to_delete is not None: + if message.record is None: + raise ValueError("Record messages cannot have null `record` property.") + self.ids_to_delete[(message.record.namespace, message.record.stream)].append( record_id_to_delete ) diff --git a/airbyte_cdk/sources/connector_state_manager.py b/airbyte_cdk/sources/connector_state_manager.py index 56b581279..a53e7785d 100644 --- a/airbyte_cdk/sources/connector_state_manager.py +++ b/airbyte_cdk/sources/connector_state_manager.py @@ -131,7 +131,7 @@ def _extract_from_state_message( else: streams = { HashableStreamDescriptor( - name=per_stream_state.stream.stream_descriptor.name, + name=per_stream_state.stream.stream_descriptor.name, # type: ignore[union-attr] # stream has stream_descriptor namespace=per_stream_state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # stream has stream_descriptor ): per_stream_state.stream.stream_state # type: ignore[union-attr] # stream has stream_state for per_stream_state in state diff --git a/airbyte_cdk/sources/declarative/auth/selective_authenticator.py b/airbyte_cdk/sources/declarative/auth/selective_authenticator.py index 9f8a55193..bc276cb99 100644 --- a/airbyte_cdk/sources/declarative/auth/selective_authenticator.py +++ b/airbyte_cdk/sources/declarative/auth/selective_authenticator.py @@ -28,7 +28,12 @@ def __new__( # type: ignore[misc] **kwargs: Any, ) -> DeclarativeAuthenticator: try: - selected_key = str(dpath.get(config, authenticator_selection_path)) + selected_key = str( + dpath.get( + config, # type: ignore [arg-type] # Dpath wants mutable mapping but doesn't need it. + authenticator_selection_path, + ) + ) except KeyError as err: raise ValueError( "The path from `authenticator_selection_path` is not found in the config." diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 83b318bf0..b5f6428c5 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -2044,7 +2044,7 @@ definitions: The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc. The placeholders are replaced during the processing to provide neccessary values. examples: - - access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}} + - access_token_url: https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}} access_token_headers: title: (Optional) DeclarativeOAuth Access Token Headers type: object @@ -2052,9 +2052,10 @@ definitions: description: |- The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step. examples: - - access_token_headers: { - "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}" - } + - access_token_headers: + { + "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}", + } access_token_params: title: (Optional) DeclarativeOAuth Access Token Query Params (Json Encoded) type: object @@ -2063,18 +2064,19 @@ definitions: The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step. When this property is provided, the query params will be encoded as `Json` and included in the outgoing API request. examples: - - access_token_params: { - "{auth_code_key}": "{{auth_code_key}}", - "{client_id_key}": "{{client_id_key}}", - "{client_secret_key}": "{{client_secret_key}}" - } + - access_token_params: + { + "{auth_code_key}": "{{auth_code_key}}", + "{client_id_key}": "{{client_id_key}}", + "{client_secret_key}": "{{client_secret_key}}", + } extract_output: title: DeclarativeOAuth Extract Output type: array items: type: string description: |- - The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. + The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. examples: - extract_output: ["access_token", "refresh_token", "other_field"] state: @@ -2086,17 +2088,14 @@ definitions: - max description: |- The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed, - including length and complexity. + including length and complexity. properties: min: type: integer max: type: integer examples: - - state: { - "min": 7, - "max": 128, - } + - state: { "min": 7, "max": 128 } client_id_key: title: (Optional) DeclarativeOAuth Client ID Key Override type: string @@ -2122,14 +2121,14 @@ definitions: title: (Optional) DeclarativeOAuth State Key Override type: string description: |- - The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. + The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. examples: - state_key: "my_custom_state_key_key_name" auth_code_key: title: (Optional) DeclarativeOAuth Auth Code Key Override type: string description: |- - The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. + The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. examples: - auth_code_key: "my_custom_auth_code_key_name" redirect_uri_key: diff --git a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py index 13281026d..a90bbe019 100644 --- a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py @@ -14,5 +14,8 @@ class NoopDecoder(Decoder): def is_stream_response(self) -> bool: return False - def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: + def decode( # type: ignore[override] + self, + response: requests.Response, + ) -> Generator[Mapping[str, Any], None, None]: yield from [{}] diff --git a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py index c0234ca17..3cfac5d02 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py @@ -22,8 +22,11 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy): @abstractmethod - def check_availability( - self, stream: Stream, logger: logging.Logger, _: Optional[Source] + def check_availability( # type: ignore[override] + self, + stream: Stream, + logger: logging.Logger, + _: Optional[Source], ) -> Tuple[bool, Optional[str]]: """ Perform a connection check for the stream. @@ -34,7 +37,10 @@ def check_availability( @abstractmethod def check_availability_and_parsability( - self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source] + self, + stream: AbstractFileBasedStream, + logger: logging.Logger, + _: Optional[Source], ) -> Tuple[bool, Optional[str]]: """ Performs a connection check for the stream, as well as additional checks that @@ -46,7 +52,7 @@ def check_availability_and_parsability( class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy): - def __init__(self, stream: "AbstractFileBasedStream"): + def __init__(self, stream: AbstractFileBasedStream) -> None: self.stream = stream def check_availability(self, logger: logging.Logger) -> StreamAvailability: diff --git a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index cf985d9ee..1b4917889 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -28,9 +28,12 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy def __init__(self, stream_reader: AbstractFileBasedStreamReader): self.stream_reader = stream_reader - def check_availability( - self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source] - ) -> Tuple[bool, Optional[str]]: # type: ignore[override] + def check_availability( # type: ignore[override] + self, + stream: AbstractFileBasedStream, + logger: logging.Logger, + _: Optional[Source], + ) -> Tuple[bool, Optional[str]]: """ Perform a connection check for the stream (verify that we can list files from the stream). @@ -44,7 +47,10 @@ def check_availability( return True, None def check_availability_and_parsability( - self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source] + self, + stream: AbstractFileBasedStream, + logger: logging.Logger, + _: Optional[Source], ) -> Tuple[bool, Optional[str]]: """ Perform a connection check for the stream. @@ -82,7 +88,7 @@ def check_availability_and_parsability( return True, None - def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile: + def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile: """ Check that we can list files from the stream. @@ -102,7 +108,10 @@ def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile: return file def _check_parse_record( - self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger + self, + stream: AbstractFileBasedStream, + file: RemoteFile, + logger: logging.Logger, ) -> None: parser = stream.get_parser() diff --git a/airbyte_cdk/sources/file_based/file_types/avro_parser.py b/airbyte_cdk/sources/file_based/file_types/avro_parser.py index 61e3a2c82..7efea4a1c 100644 --- a/airbyte_cdk/sources/file_based/file_types/avro_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/avro_parser.py @@ -3,7 +3,7 @@ # import logging -from typing import Any, Dict, Iterable, Mapping, Optional, Tuple +from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, cast import fastavro @@ -183,7 +183,7 @@ def parse_records( avro_reader = fastavro.reader(fp) schema = avro_reader.writer_schema schema_field_name_to_type = { - field["name"]: field["type"] for field in schema["fields"] + field["name"]: cast(dict, field["type"]) for field in schema["fields"] } for record in avro_reader: line_no += 1 diff --git a/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte_cdk/sources/streams/concurrent/cursor.py index b40709078..7d46c627a 100644 --- a/airbyte_cdk/sources/streams/concurrent/cursor.py +++ b/airbyte_cdk/sources/streams/concurrent/cursor.py @@ -218,7 +218,9 @@ def observe(self, record: Record) -> None: ) cursor_value = self._extract_cursor_value(record) - if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value: + if record.associated_slice is not None and ( + most_recent_cursor_value is None or most_recent_cursor_value < cursor_value + ): self._most_recent_cursor_value_per_partition[record.associated_slice] = cursor_value def _extract_cursor_value(self, record: Record) -> Any: diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 8320b8269..c087cdfdb 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -115,11 +115,6 @@ class StreamClassification: has_multiple_slices: bool -# Moved to class declaration since get_updated_state is called on every record for incremental syncs, and thus the @deprecated decorator as well. -@deprecated( - "Deprecated as of CDK version 0.1.49. ", - "Deprecated method get_updated_state, You should use explicit state property instead, see IncrementalMixin docs.", -) class Stream(ABC): """ Base abstract class for an Airbyte Stream. Makes no assumption of the Stream's underlying transport protocol. @@ -435,6 +430,10 @@ def state_checkpoint_interval(self) -> Optional[int]: """ return None + @deprecated( + "Deprecated method `get_updated_state` as of CDK version 0.1.49. " + "Please use explicit state property instead, see `IncrementalMixin` docs.", + ) def get_updated_state( self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] ) -> MutableMapping[str, Any]: diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py index db59600db..ffcc8e851 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py @@ -5,13 +5,14 @@ from abc import abstractmethod from typing import Any, Mapping +import requests from requests.auth import AuthBase class AbstractHeaderAuthenticator(AuthBase): """Abstract class for an header-based authenticators that add a header to outgoing HTTP requests.""" - def __call__(self, request): + def __call__(self, request: requests.PreparedRequest) -> Any: """Attach the HTTP headers required to authenticate on the HTTP request""" request.headers.update(self.get_auth_header()) return request diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index bdc5eddcd..047c8ca87 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -30,12 +30,12 @@ def __init__( client_id: str, client_secret: str, refresh_token: str, - scopes: List[str] = None, - token_expiry_date: pendulum.DateTime = None, - token_expiry_date_format: str = None, + scopes: List[str] | None = None, + token_expiry_date: pendulum.DateTime | None = None, + token_expiry_date_format: str | None = None, access_token_name: str = "access_token", expires_in_name: str = "expires_in", - refresh_request_body: Mapping[str, Any] = None, + refresh_request_body: Mapping[str, Any] | None = None, grant_type: str = "refresh_token", token_expiry_is_time_of_expiration: bool = False, refresh_token_error_status_codes: Tuple[int, ...] = (), @@ -90,7 +90,7 @@ def get_grant_type(self) -> str: def get_token_expiry_date(self) -> pendulum.DateTime: return self._token_expiry_date - def set_token_expiry_date(self, value: Union[str, int]): + def set_token_expiry_date(self, value: Union[str, int]) -> None: self._token_expiry_date = self._parse_token_expiration_date(value) @property @@ -106,7 +106,7 @@ def access_token(self) -> str: return self._access_token @access_token.setter - def access_token(self, value: str): + def access_token(self, value: str) -> None: self._access_token = value @@ -207,27 +207,50 @@ def get_client_secret(self) -> str: @property def access_token(self) -> str: - return dpath.get(self._connector_config, self._access_token_config_path, default="") + return dpath.get( + self._connector_config, + self._access_token_config_path, + default="", + ) @access_token.setter - def access_token(self, new_access_token: str): - dpath.new(self._connector_config, self._access_token_config_path, new_access_token) + def access_token(self, new_access_token: str) -> None: + dpath.new( + self._connector_config, + self._access_token_config_path, + new_access_token, + ) def get_refresh_token(self) -> str: - return dpath.get(self._connector_config, self._refresh_token_config_path, default="") + return dpath.get( + self._connector_config, + self._refresh_token_config_path, + default="", + ) - def set_refresh_token(self, new_refresh_token: str): - dpath.new(self._connector_config, self._refresh_token_config_path, new_refresh_token) + def set_refresh_token(self, new_refresh_token: str) -> None: + dpath.new( + self._connector_config, + self._refresh_token_config_path, + new_refresh_token, + ) def get_token_expiry_date(self) -> pendulum.DateTime: expiry_date = dpath.get( - self._connector_config, self._token_expiry_date_config_path, default="" + self._connector_config, + self._token_expiry_date_config_path, + default="", ) return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) - def set_token_expiry_date(self, new_token_expiry_date): + def set_token_expiry_date( # type: ignore[override] + self, + new_token_expiry_date: pendulum.DateTime, + ) -> None: dpath.new( - self._connector_config, self._token_expiry_date_config_path, str(new_token_expiry_date) + self._connector_config, + self._token_expiry_date_config_path, + str(new_token_expiry_date), ) def token_has_expired(self) -> bool: @@ -253,7 +276,7 @@ def get_access_token(self) -> str: new_access_token, access_token_expires_in, new_refresh_token = ( self.refresh_access_token() ) - new_token_expiry_date = self.get_new_token_expiry_date( + new_token_expiry_date: pendulum.DateTime = self.get_new_token_expiry_date( access_token_expires_in, self._token_expiry_date_format ) self.access_token = new_access_token @@ -270,7 +293,9 @@ def get_access_token(self) -> str: emit_configuration_as_airbyte_control_message(self._connector_config) return self.access_token - def refresh_access_token(self) -> Tuple[str, str, str]: + def refresh_access_token( # type: ignore[override] + self, + ) -> Tuple[str, str, str]: response_json = self._get_refresh_access_token_response() return ( response_json[self.get_access_token_name()], diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index ef52c5fd3..7b42f5470 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -5,9 +5,9 @@ import logging from distutils.util import strtobool from enum import Flag, auto -from typing import Any, Callable, Dict, Mapping, Optional +from typing import Any, Callable, Dict, Generator, Mapping, Optional, cast -from jsonschema import Draft7Validator, ValidationError, validators +from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators json_to_python_simple = { "string": str, @@ -30,7 +30,7 @@ class TransformConfig(Flag): ``` """ - # No action taken, default behaviour. Cannot be combined with any other options. + # No action taken, default behavior. Cannot be combined with any other options. NoTransform = auto() # Applies default type casting with default_convert method which converts # values by applying simple type casting to specified jsonschema type. @@ -67,15 +67,15 @@ def __init__(self, config: TransformConfig): ) def registerCustomTransform( - self, normalization_callback: Callable[[Any, Dict[str, Any]], Any] - ) -> Callable: + self, normalization_callback: Callable[[Any, dict[str, Any]], Any] + ) -> Callable[[Any, dict[str, Any]], Any]: """ Register custom normalization callback. :param normalization_callback function to be used for value normalization. Takes original value and part type schema. Should return normalized value. See docs/connector-development/cdk-python/schemas.md for details. - :return Same callbeck, this is usefull for using registerCustomTransform function as decorator. + :return Same callback, this is useful for using registerCustomTransform function as decorator. """ if TransformConfig.CustomSchemaNormalization not in self._config: raise Exception( @@ -141,7 +141,11 @@ def default_convert(original_item: Any, subschema: Dict[str, Any]) -> Any: return original_item return original_item - def __get_normalizer(self, schema_key: str, original_validator: Callable): + def __get_normalizer( + self, + schema_key: str, + original_validator: Callable, # type: ignore[type-arg] + ) -> Generator[Any, Any, None]: """ Traverse through object fields using native jsonschema validator and apply normalization function. :param schema_key related json schema key that currently being validated/normalized. @@ -149,8 +153,11 @@ def __get_normalizer(self, schema_key: str, original_validator: Callable): """ def normalizator( - validator_instance: Callable, property_value: Any, instance: Any, schema: Dict[str, Any] - ): + validator_instance: Validator, + property_value: Any, + instance: Any, + schema: Dict[str, Any], + ) -> Generator[Any, Any, None]: """ Jsonschema validator callable it uses for validating instance. We override default Draft7Validator to perform value transformation @@ -163,10 +170,13 @@ def normalizator( : """ - def resolve(subschema): + def resolve(subschema: dict[str, Any]) -> dict[str, Any]: if "$ref" in subschema: - _, resolved = validator_instance.resolver.resolve(subschema["$ref"]) - return resolved + _, resolved = cast( + RefResolver, + validator_instance.resolver, + ).resolve(subschema["$ref"]) + return cast(dict[str, Any], resolved) return subschema # Transform object and array values before running json schema type checking for each element. @@ -185,11 +195,20 @@ def resolve(subschema): instance[index] = self.__normalize(item, subschema) # Running native jsonschema traverse algorithm after field normalization is done. - yield from original_validator(validator_instance, property_value, instance, schema) + yield from original_validator( + validator_instance, + property_value, + instance, + schema, + ) return normalizator - def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]): + def transform( + self, + record: Dict[str, Any], + schema: Mapping[str, Any], + ) -> None: """ Normalize and validate according to config. :param record: record instance for normalization/transformation. All modification are done by modifying existent object. @@ -201,7 +220,7 @@ def transform(self, record: Dict[str, Any], schema: Mapping[str, Any]): for e in normalizer.iter_errors(record): """ just calling normalizer.validate() would throw an exception on - first validation occurences and stop processing rest of schema. + first validation occurrences and stop processing rest of schema. """ logger.warning(self.get_error_message(e)) diff --git a/airbyte_cdk/utils/airbyte_secrets_utils.py b/airbyte_cdk/utils/airbyte_secrets_utils.py index 45279e573..bb5a6be59 100644 --- a/airbyte_cdk/utils/airbyte_secrets_utils.py +++ b/airbyte_cdk/utils/airbyte_secrets_utils.py @@ -47,7 +47,7 @@ def get_secrets( result = [] for path in secret_paths: try: - result.append(dpath.get(config, path)) + result.append(dpath.get(config, path)) # type: ignore # dpath expect MutableMapping but doesn't need it except KeyError: # Since we try to get paths to all known secrets in the spec, in the case of oneOfs, some secret fields may not be present # In that case, a KeyError is thrown. This is expected behavior. diff --git a/airbyte_cdk/utils/event_timing.py b/airbyte_cdk/utils/event_timing.py index 447543ec0..3f489c096 100644 --- a/airbyte_cdk/utils/event_timing.py +++ b/airbyte_cdk/utils/event_timing.py @@ -7,7 +7,7 @@ import time from contextlib import contextmanager from dataclasses import dataclass, field -from typing import Optional +from typing import Any, Generator, Literal, Optional logger = logging.getLogger("airbyte") @@ -18,13 +18,13 @@ class EventTimer: Event nesting follows a LIFO pattern, so finish will apply to the last started event. """ - def __init__(self, name): + def __init__(self, name: str) -> None: self.name = name - self.events = {} + self.events: dict[str, Any] = {} self.count = 0 - self.stack = [] + self.stack: list[Any] = [] - def start_event(self, name): + def start_event(self, name: str) -> None: """ Start a new event and push it to the stack. """ @@ -32,7 +32,7 @@ def start_event(self, name): self.count += 1 self.stack.insert(0, self.events[name]) - def finish_event(self): + def finish_event(self) -> None: """ Finish the current event and pop it from the stack. """ @@ -43,7 +43,7 @@ def finish_event(self): else: logger.warning(f"{self.name} finish_event called without start_event") - def report(self, order_by="name"): + def report(self, order_by: Literal["name", "duration"] = "name") -> str: """ :param order_by: 'name' or 'duration' """ @@ -69,15 +69,15 @@ def duration(self) -> float: return (self.end - self.start) / 1e9 return float("+inf") - def __str__(self): + def __str__(self) -> str: return f"{self.name} {datetime.timedelta(seconds=self.duration)}" - def finish(self): + def finish(self) -> None: self.end = time.perf_counter_ns() @contextmanager -def create_timer(name): +def create_timer(name: str) -> Generator[EventTimer, Any, None]: """ Creates a new EventTimer as a context manager to improve code readability. """ diff --git a/airbyte_cdk/utils/message_utils.py b/airbyte_cdk/utils/message_utils.py index 7e740b788..148b19ae2 100644 --- a/airbyte_cdk/utils/message_utils.py +++ b/airbyte_cdk/utils/message_utils.py @@ -8,15 +8,16 @@ def get_stream_descriptor(message: AirbyteMessage) -> HashableStreamDescriptor: match message.type: case Type.RECORD: return HashableStreamDescriptor( - name=message.record.stream, namespace=message.record.namespace - ) # type: ignore[union-attr] # record has `stream` and `namespace` + name=message.record.stream, # type: ignore[union-attr] # record has `stream` + namespace=message.record.namespace, # type: ignore[union-attr] # record has `namespace` + ) case Type.STATE: if not message.state.stream or not message.state.stream.stream_descriptor: # type: ignore[union-attr] # state has `stream` raise ValueError( "State message was not in per-stream state format, which is required for record counts." ) return HashableStreamDescriptor( - name=message.state.stream.stream_descriptor.name, + name=message.state.stream.stream_descriptor.name, # type: ignore[union-attr] # state has `stream` namespace=message.state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # state has `stream` ) case _: diff --git a/airbyte_cdk/utils/spec_schema_transformations.py b/airbyte_cdk/utils/spec_schema_transformations.py index 8d47f83e5..fdc41a541 100644 --- a/airbyte_cdk/utils/spec_schema_transformations.py +++ b/airbyte_cdk/utils/spec_schema_transformations.py @@ -4,11 +4,12 @@ import json import re +from typing import Any from jsonschema import RefResolver -def resolve_refs(schema: dict) -> dict: +def resolve_refs(schema: dict[str, Any]) -> dict[str, Any]: """ For spec schemas generated using Pydantic models, the resulting JSON schema can contain refs between object relationships. @@ -20,6 +21,6 @@ def resolve_refs(schema: dict) -> dict: str_schema = str_schema.replace( ref_block, json.dumps(json_schema_ref_resolver.resolve(ref)[1]) ) - pyschema: dict = json.loads(str_schema) + pyschema: dict[str, Any] = json.loads(str_schema) del pyschema["definitions"] return pyschema diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index c50ae16a5..e9b9cfb83 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -3,7 +3,7 @@ # import time import traceback -from typing import Optional +from typing import Any, Optional import orjson @@ -104,8 +104,8 @@ def from_exception( cls, exc: BaseException, stream_descriptor: Optional[StreamDescriptor] = None, - *args, - **kwargs, + *args: Any, + **kwargs: Any, ) -> "AirbyteTracedException": # type: ignore # ignoring because of args and kwargs """ Helper to create an AirbyteTracedException from an existing exception @@ -130,14 +130,19 @@ def as_sanitized_airbyte_message( stream_descriptors are defined, the one from `as_sanitized_airbyte_message` will be discarded. """ error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor) + if error_message.trace is None or error_message.trace.error is None: + raise ValueError("Trace `error` property should not be None.") + if error_message.trace.error.message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage - error_message.trace.error.message = filter_secrets(error_message.trace.error.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage - if error_message.trace.error.internal_message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage - error_message.trace.error.internal_message = filter_secrets( - error_message.trace.error.internal_message + error_message.trace.error.message = filter_secrets( + error_message.trace.error.message, ) # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + if error_message.trace.error.internal_message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.internal_message = filter_secrets( # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.internal_message # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + ) if error_message.trace.error.stack_trace: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage - error_message.trace.error.stack_trace = filter_secrets( - error_message.trace.error.stack_trace - ) # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.stack_trace = filter_secrets( # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + error_message.trace.error.stack_trace # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + ) return error_message diff --git a/unit_tests/destinations/test_destination.py b/unit_tests/destinations/test_destination.py index 71eb85063..14f52be15 100644 --- a/unit_tests/destinations/test_destination.py +++ b/unit_tests/destinations/test_destination.py @@ -6,6 +6,7 @@ import io import json from os import PathLike +from pathlib import Path from typing import Any, Dict, Iterable, List, Mapping, Union from unittest.mock import ANY @@ -129,7 +130,7 @@ class OrderedIterableMatcher(Iterable): in an ordered fashion """ - def attempt_consume(self, iterator): + def attempt_consume(self, iterator) -> Any | None: try: return next(iterator) except StopIteration: @@ -138,7 +139,7 @@ def attempt_consume(self, iterator): def __iter__(self): return iter(self.iterable) - def __init__(self, iterable: Iterable): + def __init__(self, iterable: Iterable) -> None: self.iterable = iterable def __eq__(self, other): @@ -149,7 +150,11 @@ def __eq__(self, other): class TestRun: - def test_run_initializes_exception_handler(self, mocker, destination: Destination): + def test_run_initializes_exception_handler( + self, + mocker, + destination: Destination, + ) -> None: mocker.patch.object(destination_module, "init_uncaught_exception_handler") mocker.patch.object(destination, "parse_args") mocker.patch.object(destination, "run_cmd") @@ -158,7 +163,11 @@ def test_run_initializes_exception_handler(self, mocker, destination: Destinatio destination_module.logger ) - def test_run_spec(self, mocker, destination: Destination): + def test_run_spec( + self, + mocker, + destination: Destination, + ) -> None: args = {"command": "spec"} parsed_args = argparse.Namespace(**args) @@ -203,7 +212,12 @@ def test_run_check(self, mocker, destination: Destination, tmp_path): # verify output was correct assert returned_check_result == _wrapped(expected_check_result) - def test_run_check_with_invalid_config(self, mocker, destination: Destination, tmp_path): + def test_run_check_with_invalid_config( + self, + mocker, + destination: Destination, + tmp_path: Path, + ) -> None: file_path = tmp_path / "config.json" invalid_config = {"not": "valid"} write_file(file_path, invalid_config) diff --git a/unit_tests/sources/declarative/schema/source_test/SourceTest.py b/unit_tests/sources/declarative/schema/source_test/SourceTest.py index 8d6a26cb7..d8b3f64d5 100644 --- a/unit_tests/sources/declarative/schema/source_test/SourceTest.py +++ b/unit_tests/sources/declarative/schema/source_test/SourceTest.py @@ -4,5 +4,5 @@ class SourceTest: - def __init__(self): + def __init__(self) -> None: pass From 7e50af9c9e999405c9a342e1b758b5a5d468a06f Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 08:26:41 -0800 Subject: [PATCH 11/33] ci: remove stale if condition --- .github/workflows/python_lint.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml index e3f690b4c..5cbebf066 100644 --- a/.github/workflows/python_lint.yml +++ b/.github/workflows/python_lint.yml @@ -75,5 +75,4 @@ jobs: # Job-specific step(s): - name: Run mypy - if: steps.changed-py-files.outputs.any_changed == 'true' run: poetry run mypy --config-file mypy.ini --install-types --non-interactive From 5c65b4686964f17729989625b4ba1106de4e2d08 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 08:28:01 -0800 Subject: [PATCH 12/33] fix deprecated syntax --- airbyte_cdk/sources/streams/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index c087cdfdb..0da0e0fa6 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -92,8 +92,8 @@ def state(self, value: MutableMapping[str, Any]) -> None: @deprecated( - "Deprecated as of CDK version 0.87.0. ", - "Deprecated in favor of the CheckpointMixin which offers similar functionality.", + "Deprecated as of CDK version 0.87.0. " + "Deprecated in favor of the `CheckpointMixin` which offers similar functionality." ) class IncrementalMixin(CheckpointMixin, ABC): """Mixin to make stream incremental. From 70bda72778a1a1a9828f1214acf6697e9c09e0f9 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 08:36:49 -0800 Subject: [PATCH 13/33] chore: deprecated decorator readability and syntax fixes --- .../sources/declarative/retrievers/async_retriever.py | 5 ++++- .../sources/file_based/stream/concurrent/adapters.py | 5 ++++- .../sources/streams/concurrent/abstract_stream.py | 5 ++++- airbyte_cdk/sources/streams/concurrent/adapters.py | 5 ++++- .../streams/concurrent/availability_strategy.py | 10 ++++++++-- airbyte_cdk/sources/streams/core.py | 2 +- airbyte_cdk/sources/streams/http/http.py | 4 ++-- 7 files changed, 27 insertions(+), 9 deletions(-) diff --git a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py index c60581590..3d9a3ead9 100644 --- a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py @@ -21,7 +21,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) @dataclass class AsyncRetriever(Retriever): config: Config diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py index 9d7bcf48b..fb0efc82c 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py @@ -56,7 +56,10 @@ """ -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBasedStream): @classmethod def create_from_stream( diff --git a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py index b41b1c2d8..26e6f09d4 100644 --- a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py +++ b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py @@ -14,7 +14,10 @@ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class AbstractStream(ABC): """ AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK. diff --git a/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte_cdk/sources/streams/concurrent/adapters.py index baf129a66..93f2efbde 100644 --- a/airbyte_cdk/sources/streams/concurrent/adapters.py +++ b/airbyte_cdk/sources/streams/concurrent/adapters.py @@ -50,7 +50,10 @@ """ -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class StreamFacade(AbstractStreamFacade[DefaultStream], Stream): """ The StreamFacade is a Stream that wraps an AbstractStream and exposes it as a Stream. diff --git a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py index ed24b2980..118a7d0bb 100644 --- a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py +++ b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py @@ -48,7 +48,10 @@ def message(self) -> Optional[str]: STREAM_AVAILABLE = StreamAvailable() -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class AbstractAvailabilityStrategy(ABC): """ AbstractAvailabilityStrategy is an experimental interface developed as part of the Concurrent CDK. @@ -68,7 +71,10 @@ def check_availability(self, logger: logging.Logger) -> StreamAvailability: """ -@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) +@deprecated( + "This class is experimental. Use at your own risk.", + category=ExperimentalClassWarning, +) class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy): """ An availability strategy that always indicates a stream is available. diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 0da0e0fa6..933d6f404 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -432,7 +432,7 @@ def state_checkpoint_interval(self) -> Optional[int]: @deprecated( "Deprecated method `get_updated_state` as of CDK version 0.1.49. " - "Please use explicit state property instead, see `IncrementalMixin` docs.", + "Please use explicit state property instead, see `IncrementalMixin` docs." ) def get_updated_state( self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index 3be146907..d28351743 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -121,8 +121,8 @@ def http_method(self) -> str: @property @deprecated( - "Deprecated as of CDK version 3.0.0. ", - "You should set error_handler explicitly in HttpStream.get_error_handler() instead.", + "Deprecated as of CDK version 3.0.0. " + "You should set error_handler explicitly in HttpStream.get_error_handler() instead." ) def raise_on_http_errors(self) -> bool: """ From 22afbf67c4a0c5322c35f40b2399c9d56e39a447 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 08:45:30 -0800 Subject: [PATCH 14/33] allow unquoted class name via future annotations --- .../abstract_file_based_availability_strategy.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py index 3cfac5d02..e5f015511 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py @@ -2,6 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from __future__ import annotations + import logging from abc import abstractmethod from typing import TYPE_CHECKING, Optional, Tuple From df0005438d3cfa971f6710bf0b973c92d983cfdf Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 08:56:00 -0800 Subject: [PATCH 15/33] fix test expecting ValueError --- airbyte_cdk/sources/streams/concurrent/cursor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte_cdk/sources/streams/concurrent/cursor.py index aca3fd085..4f103b224 100644 --- a/airbyte_cdk/sources/streams/concurrent/cursor.py +++ b/airbyte_cdk/sources/streams/concurrent/cursor.py @@ -232,9 +232,7 @@ def observe(self, record: Record) -> None: try: cursor_value = self._extract_cursor_value(record) - if record.associated_slice is not None and ( - most_recent_cursor_value is None or most_recent_cursor_value < cursor_value - ): + if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value: self._most_recent_cursor_value_per_partition[record.associated_slice] = cursor_value except ValueError: self._log_for_record_without_cursor_value() From d7d1fda7e994a7f5de5ec8e1bfe1db286e7ce9c0 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 08:56:27 -0800 Subject: [PATCH 16/33] add future annotations import --- .../default_file_based_availability_strategy.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index 1b4917889..09cff8f83 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -2,6 +2,8 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +from __future__ import annotations + import logging import traceback from typing import TYPE_CHECKING, Optional, Tuple @@ -25,7 +27,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy): - def __init__(self, stream_reader: AbstractFileBasedStreamReader): + def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None: self.stream_reader = stream_reader def check_availability( # type: ignore[override] From 192e90548885a0920d9d753bbc3b04e2eccc3d0d Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 09:05:04 -0800 Subject: [PATCH 17/33] chore: pr review and cleanup --- airbyte_cdk/sources/declarative/decoders/noop_decoder.py | 2 +- .../abstract_file_based_availability_strategy.py | 2 +- .../default_file_based_availability_strategy.py | 2 +- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py | 2 +- pyproject.toml | 1 + 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py index a90bbe019..cf0bc56eb 100644 --- a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py @@ -14,7 +14,7 @@ class NoopDecoder(Decoder): def is_stream_response(self) -> bool: return False - def decode( # type: ignore[override] + def decode( # type: ignore[override] # Signature doesn't match base class self, response: requests.Response, ) -> Generator[Mapping[str, Any], None, None]: diff --git a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py index e5f015511..12e1740b6 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py @@ -24,7 +24,7 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy): @abstractmethod - def check_availability( # type: ignore[override] + def check_availability( # type: ignore[override] # Signature doesn't match base class self, stream: Stream, logger: logging.Logger, diff --git a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index 09cff8f83..c9d416a72 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -30,7 +30,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None: self.stream_reader = stream_reader - def check_availability( # type: ignore[override] + def check_availability( # type: ignore[override] # Signature doesn't match base class self, stream: AbstractFileBasedStream, logger: logging.Logger, diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index 047c8ca87..6f278db8a 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -293,7 +293,7 @@ def get_access_token(self) -> str: emit_configuration_as_airbyte_control_message(self._connector_config) return self.access_token - def refresh_access_token( # type: ignore[override] + def refresh_access_token( # type: ignore[override] # Signature doesn't match base class self, ) -> Tuple[str, str, str]: response_json = self._get_refresh_access_token_response() diff --git a/pyproject.toml b/pyproject.toml index e04d1e111..a755b0c9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,6 +98,7 @@ pytest-cov = "*" pytest-httpserver = "*" pytest-mock = "*" requests-mock = "*" +# Stubs packages for mypy typing types-requests = "^2.32.0.20241016" types-python-dateutil = "^2.9.0.20241003" types-pyyaml = "^6.0.12.20240917" From 93e453495296696f94592b4ce9b42a0d49fb9a08 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 10:08:14 -0800 Subject: [PATCH 18/33] more noqas --- .../cli/source_declarative_manifest/_run.py | 2 +- .../connector_builder/message_grouper.py | 4 ++-- .../destinations/vector_db_based/writer.py | 16 +++++++++---- .../declarative/datetime/min_max_datetime.py | 12 +++++++--- .../declarative/interpolation/jinja.py | 6 ++--- .../sources/embedded/base_integration.py | 5 ++-- .../file_based/file_types/avro_parser.py | 23 +++++++++++-------- .../file_based/file_types/excel_parser.py | 7 ++++-- airbyte_cdk/sources/http_logger.py | 6 ++--- .../sources/streams/concurrent/adapters.py | 2 +- airbyte_cdk/sources/streams/core.py | 3 ++- .../http/requests_native_auth/oauth.py | 2 +- airbyte_cdk/sources/utils/transform.py | 2 +- 13 files changed, 56 insertions(+), 34 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 977be7a4a..376695fef 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -72,7 +72,7 @@ def __init__( super().__init__( catalog=catalog, config=config, - state=state, + state=state, # type: ignore [arg-type] path_to_yaml="manifest.yaml", ) diff --git a/airbyte_cdk/connector_builder/message_grouper.py b/airbyte_cdk/connector_builder/message_grouper.py index aa3a42931..a0ec54c9f 100644 --- a/airbyte_cdk/connector_builder/message_grouper.py +++ b/airbyte_cdk/connector_builder/message_grouper.py @@ -274,7 +274,7 @@ def _get_message_groups( if message.trace.type == TraceType.ERROR: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has trace.type yield message.trace elif message.type == MessageType.RECORD: - current_page_records.append(message.record.data) # type: ignore[union-attr] # AirbyteMessage with MessageType.RECORD has record.data + current_page_records.append(message.record.data) # type: ignore[arg-type, union-attr] # AirbyteMessage with MessageType.RECORD has record.data records_count += 1 schema_inferrer.accumulate(message.record) datetime_format_inferrer.accumulate(message.record) @@ -355,7 +355,7 @@ def _close_page( StreamReadPages( request=current_page_request, response=current_page_response, - records=deepcopy(current_page_records), + records=deepcopy(current_page_records), # type: ignore [arg-type] ) # type: ignore ) current_page_records.clear() diff --git a/airbyte_cdk/destinations/vector_db_based/writer.py b/airbyte_cdk/destinations/vector_db_based/writer.py index 532c7add9..1dcb4e009 100644 --- a/airbyte_cdk/destinations/vector_db_based/writer.py +++ b/airbyte_cdk/destinations/vector_db_based/writer.py @@ -83,14 +83,22 @@ def write( yield message elif message.type == Type.RECORD: record_chunks, record_id_to_delete = self.processor.process(message.record) - self.chunks[(message.record.namespace, message.record.stream)].extend(record_chunks) + self.chunks[ + ( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]" + message.record.namespace, # type: ignore [union-attr] # record not None + message.record.stream, # type: ignore [union-attr] # record not None + ) + ].extend(record_chunks) if record_id_to_delete is not None: if message.record is None: raise ValueError("Record messages cannot have null `record` property.") - self.ids_to_delete[(message.record.namespace, message.record.stream)].append( - record_id_to_delete - ) + self.ids_to_delete[ + ( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]" + message.record.namespace, # type: ignore [union-attr] # record not None + message.record.stream, # type: ignore [union-attr] # record not None + ) + ].append(record_id_to_delete) self.number_of_chunks += len(record_chunks) if self.number_of_chunks >= self.batch_size: self._process_batch() diff --git a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index 1edf92432..0c16daf97 100644 --- a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -41,12 +41,12 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self.datetime = InterpolatedString.create(self.datetime, parameters=parameters or {}) self._parser = DatetimeParser() self.min_datetime = ( - InterpolatedString.create(self.min_datetime, parameters=parameters) + InterpolatedString.create(self.min_datetime, parameters=parameters) # type: ignore [assignment] # expression has type "InterpolatedString | None", variable has type "InterpolatedString | str" if self.min_datetime else None ) # type: ignore self.max_datetime = ( - InterpolatedString.create(self.max_datetime, parameters=parameters) + InterpolatedString.create(self.max_datetime, parameters=parameters) # type: ignore [assignment] # expression has type "InterpolatedString | None", variable has type "InterpolatedString | str" if self.max_datetime else None ) # type: ignore @@ -66,7 +66,13 @@ def get_datetime( datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z" time = self._parser.parse( - str(self.datetime.eval(config, **additional_parameters)), datetime_format + str( + self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval" + config, + **additional_parameters, + ) + ), + datetime_format, ) # type: ignore # datetime is always cast to an interpolated string if self.min_datetime: diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py index ecbe9a349..317ed7dfd 100644 --- a/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -120,7 +120,7 @@ def _literal_eval(self, result: Optional[str], valid_types: Optional[Tuple[Type[ def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]: try: undeclared = self._find_undeclared_variables(s) - undeclared_not_in_context = {var for var in undeclared if var not in context} + undeclared_not_in_context = {var for var in undeclared if var not in context} # type: ignore [attr-defined] # `Template` class not iterable if undeclared_not_in_context: raise ValueError( f"Jinja macro has undeclared variables: {undeclared_not_in_context}. Context: {context}" @@ -137,11 +137,11 @@ def _find_undeclared_variables(self, s: Optional[str]) -> Template: Find undeclared variables and cache them """ ast = self._environment.parse(s) # type: ignore # parse is able to handle None - return meta.find_undeclared_variables(ast) + return meta.find_undeclared_variables(ast) # type: ignore [return-value] # Expected `Template` but got `set[str]` @cache def _compile(self, s: Optional[str]) -> Template: """ We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader """ - return self._environment.from_string(s) + return self._environment.from_string(s) # type: ignore [arg-type] # Expected `str | Template` but passed `str | None` diff --git a/airbyte_cdk/sources/embedded/base_integration.py b/airbyte_cdk/sources/embedded/base_integration.py index c2e67408e..77917b0a1 100644 --- a/airbyte_cdk/sources/embedded/base_integration.py +++ b/airbyte_cdk/sources/embedded/base_integration.py @@ -52,8 +52,9 @@ def _load_data( for message in self.source.read(self.config, configured_catalog, state): if message.type == Type.RECORD: output = self._handle_record( - message.record, get_defined_id(stream, message.record.data) - ) # type: ignore[union-attr] # record has `data` + message.record, + get_defined_id(stream, message.record.data), # type: ignore[union-attr, arg-type] + ) if output: yield output elif message.type is Type.STATE and message.state: diff --git a/airbyte_cdk/sources/file_based/file_types/avro_parser.py b/airbyte_cdk/sources/file_based/file_types/avro_parser.py index 7efea4a1c..e1aa2c4cb 100644 --- a/airbyte_cdk/sources/file_based/file_types/avro_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/avro_parser.py @@ -64,18 +64,20 @@ async def infer_schema( raise ValueError(f"Expected ParquetFormat, got {avro_format}") with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: - avro_reader = fastavro.reader(fp) + avro_reader = fastavro.reader(fp) # type: ignore [arg-type] avro_schema = avro_reader.writer_schema - if not avro_schema["type"] == "record": - unsupported_type = avro_schema["type"] + if not avro_schema["type"] == "record": # type: ignore [index, call-overload] + unsupported_type = avro_schema["type"] # type: ignore [index, call-overload] raise ValueError( f"Only record based avro files are supported. Found {unsupported_type}" ) json_schema = { - field["name"]: AvroParser._convert_avro_type_to_json( - avro_format, field["name"], field["type"] + field["name"]: AvroParser._convert_avro_type_to_json( # type: ignore [index] + avro_format, + field["name"], # type: ignore [index] + field["type"], # type: ignore [index] ) - for field in avro_schema["fields"] + for field in avro_schema["fields"] # type: ignore [index, call-overload] } return json_schema @@ -180,18 +182,19 @@ def parse_records( line_no = 0 try: with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: - avro_reader = fastavro.reader(fp) + avro_reader = fastavro.reader(fp) # type: ignore [arg-type] schema = avro_reader.writer_schema schema_field_name_to_type = { - field["name"]: cast(dict, field["type"]) for field in schema["fields"] + field["name"]: cast(dict[str, Any], field["type"]) # type: ignore [index] + for field in schema["fields"] # type: ignore [index, call-overload] # If schema is not dict, it is not subscriptable by strings } for record in avro_reader: line_no += 1 yield { record_field: self._to_output_value( avro_format, - schema_field_name_to_type[record_field], - record[record_field], + schema_field_name_to_type[record_field], # type: ignore [index] # Any not subscriptable + record[record_field], # type: ignore [index] # Any not subscriptable ) for record_field, record_value in schema_field_name_to_type.items() } diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py index 2598e2533..e9bc82b81 100644 --- a/airbyte_cdk/sources/file_based/file_types/excel_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/excel_parser.py @@ -70,7 +70,10 @@ async def infer_schema( for column, df_type in df.dtypes.items(): # Choose the broadest data type if the column's data type differs in dataframes prev_frame_column_type = fields.get(column) - fields[column] = self.dtype_to_json_type(prev_frame_column_type, df_type) + fields[column] = self.dtype_to_json_type( # type: ignore [index] + prev_frame_column_type, + df_type, + ) schema = { field: ( @@ -187,4 +190,4 @@ def open_and_parse_file(fp: Union[IOBase, str, Path]) -> pd.DataFrame: Returns: pd.DataFrame: Parsed data from the Excel file. """ - return pd.ExcelFile(fp, engine="calamine").parse() + return pd.ExcelFile(fp, engine="calamine").parse() # type: ignore [arg-type] diff --git a/airbyte_cdk/sources/http_logger.py b/airbyte_cdk/sources/http_logger.py index 58d6aed30..33ccc68ac 100644 --- a/airbyte_cdk/sources/http_logger.py +++ b/airbyte_cdk/sources/http_logger.py @@ -14,7 +14,7 @@ def format_http_message( title: str, description: str, stream_name: Optional[str], - is_auxiliary: bool = None, + is_auxiliary: bool | None = None, ) -> LogMessage: request = response.request log_message = { @@ -42,10 +42,10 @@ def format_http_message( "url": {"full": request.url}, } if is_auxiliary is not None: - log_message["http"]["is_auxiliary"] = is_auxiliary + log_message["http"]["is_auxiliary"] = is_auxiliary # type: ignore [index] if stream_name: log_message["airbyte_cdk"] = {"stream": {"name": stream_name}} - return log_message + return log_message # type: ignore [return-value] # got "dict[str, object]", expected "dict[str, JsonType]" def _normalize_body_string(body_str: Optional[Union[str, bytes]]) -> Optional[str]: diff --git a/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte_cdk/sources/streams/concurrent/adapters.py index 93f2efbde..f304bfb21 100644 --- a/airbyte_cdk/sources/streams/concurrent/adapters.py +++ b/airbyte_cdk/sources/streams/concurrent/adapters.py @@ -300,7 +300,7 @@ def read(self) -> Iterable[Record]: yield Record( data=data_to_return, stream_name=self.stream_name(), - associated_slice=self._slice, + associated_slice=self._slice, # type: ignore [arg-type] ) else: self._message_repository.emit_message(record_data) diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 933d6f404..6b51efb7f 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -216,7 +216,8 @@ def read( # type: ignore # ignoring typing for ConnectorStateManager because o # Some connectors have streams that implement get_updated_state(), but do not define a cursor_field. This # should be fixed on the stream implementation, but we should also protect against this in the CDK as well stream_state_tracker = self.get_updated_state( - stream_state_tracker, record_data + stream_state_tracker, + record_data, ) self._observe_state(checkpoint_reader, stream_state_tracker) record_counter += 1 diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index 6f278db8a..37726815f 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -164,7 +164,7 @@ def __init__( self._client_id = ( client_id if client_id is not None - else dpath.get(connector_config, ("credentials", "client_id")) + else dpath.get(connector_config, ("credentials", "client_id")), # type: ignore [arg-type] ) self._client_secret = ( client_secret diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 7b42f5470..d6885e8c3 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -145,7 +145,7 @@ def __get_normalizer( self, schema_key: str, original_validator: Callable, # type: ignore[type-arg] - ) -> Generator[Any, Any, None]: + ) -> Callable[[Any, Any, Any, dict[str, Any]], Generator[Any, Any, None]]: """ Traverse through object fields using native jsonschema validator and apply normalization function. :param schema_key related json schema key that currently being validated/normalized. From 7f053539a0ffb2a85cc6f6912981eaaecfb5c815 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 12:15:55 -0800 Subject: [PATCH 19/33] fix inadvertent tuple --- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index 37726815f..a9129e653 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -164,7 +164,7 @@ def __init__( self._client_id = ( client_id if client_id is not None - else dpath.get(connector_config, ("credentials", "client_id")), # type: ignore [arg-type] + else dpath.get(connector_config, ("credentials", "client_id")) # type: ignore [arg-type] ) self._client_secret = ( client_secret From 65dae78da9584d87c64136ba9027a7c26a19e4de Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 12:18:48 -0800 Subject: [PATCH 20/33] add mypy target directory --- .github/workflows/python_lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml index 5cbebf066..f1befef15 100644 --- a/.github/workflows/python_lint.yml +++ b/.github/workflows/python_lint.yml @@ -75,4 +75,4 @@ jobs: # Job-specific step(s): - name: Run mypy - run: poetry run mypy --config-file mypy.ini --install-types --non-interactive + run: poetry run mypy --config-file mypy.ini --non-interactive airbyte_cdk From 92e69946e88ad8eb67372cf16208bea98c8932a0 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 12:21:08 -0800 Subject: [PATCH 21/33] ci: remove non-interactive flag --- .github/workflows/python_lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_lint.yml b/.github/workflows/python_lint.yml index f1befef15..6dbc6b1c4 100644 --- a/.github/workflows/python_lint.yml +++ b/.github/workflows/python_lint.yml @@ -75,4 +75,4 @@ jobs: # Job-specific step(s): - name: Run mypy - run: poetry run mypy --config-file mypy.ini --non-interactive airbyte_cdk + run: poetry run mypy --config-file mypy.ini airbyte_cdk From 3eff135bfe6ee6c8628b1e8cdf40d5289cb7e629 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 12:50:43 -0800 Subject: [PATCH 22/33] chore: fix or noqa remaining mypy issues (now 100% pass) --- .../cli/source_declarative_manifest/_run.py | 4 +- .../connector_builder/message_grouper.py | 8 +-- .../destinations/vector_db_based/embedder.py | 4 +- airbyte_cdk/entrypoint.py | 9 ++-- .../declarative/datetime/min_max_datetime.py | 2 +- .../incremental/datetime_based_cursor.py | 8 +-- .../substream_partition_router.py | 16 ++++-- .../error_handlers/http_response_filter.py | 10 ++-- .../paginators/default_paginator.py | 2 +- ...datetime_based_request_options_provider.py | 2 +- .../file_based/file_types/excel_parser.py | 9 ++-- .../cursor/default_file_based_cursor.py | 2 +- airbyte_cdk/sources/streams/core.py | 6 +-- airbyte_cdk/sources/streams/http/http.py | 2 +- .../http/requests_native_auth/oauth.py | 50 ++++++++++--------- airbyte_cdk/test/entrypoint_wrapper.py | 13 +++-- 16 files changed, 86 insertions(+), 61 deletions(-) diff --git a/airbyte_cdk/cli/source_declarative_manifest/_run.py b/airbyte_cdk/cli/source_declarative_manifest/_run.py index 376695fef..232ac302f 100644 --- a/airbyte_cdk/cli/source_declarative_manifest/_run.py +++ b/airbyte_cdk/cli/source_declarative_manifest/_run.py @@ -152,7 +152,9 @@ def handle_remote_manifest_command(args: list[str]) -> None: ) -def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource: +def create_declarative_source( + args: list[str], +) -> ConcurrentDeclarativeSource: # type: ignore [type-arg] """Creates the source with the injected config. This essentially does what other low-code sources do at build time, but at runtime, diff --git a/airbyte_cdk/connector_builder/message_grouper.py b/airbyte_cdk/connector_builder/message_grouper.py index a0ec54c9f..44ccdc5a7 100644 --- a/airbyte_cdk/connector_builder/message_grouper.py +++ b/airbyte_cdk/connector_builder/message_grouper.py @@ -232,7 +232,7 @@ def _get_message_groups( current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message current_slice_pages = [] at_least_one_page_in_group = False - elif message.type == MessageType.LOG and message.log.message.startswith( + elif message.type == MessageType.LOG and message.log.message.startswith( # type: ignore[union-attr] # None doesn't have 'message' SliceLogger.SLICE_LOG_PREFIX ): # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message # parsing the first slice @@ -280,7 +280,7 @@ def _get_message_groups( datetime_format_inferrer.accumulate(message.record) elif ( message.type == MessageType.CONTROL - and message.control.type == OrchestratorType.CONNECTOR_CONFIG + and message.control.type == OrchestratorType.CONNECTOR_CONFIG # type: ignore[union-attr] # None doesn't have 'type' ): # type: ignore[union-attr] # AirbyteMessage with MessageType.CONTROL has control.type yield message.control elif message.type == MessageType.STATE: @@ -310,8 +310,8 @@ def _need_to_close_page( and message.type == MessageType.LOG and ( MessageGrouper._is_page_http_request(json_message) - or message.log.message.startswith("slice:") - ) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message + or message.log.message.startswith("slice:") # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message + ) ) @staticmethod diff --git a/airbyte_cdk/destinations/vector_db_based/embedder.py b/airbyte_cdk/destinations/vector_db_based/embedder.py index a7610aea4..6889c8e16 100644 --- a/airbyte_cdk/destinations/vector_db_based/embedder.py +++ b/airbyte_cdk/destinations/vector_db_based/embedder.py @@ -107,7 +107,7 @@ def embedding_dimensions(self) -> int: class OpenAIEmbedder(BaseOpenAIEmbedder): def __init__(self, config: OpenAIEmbeddingConfigModel, chunk_size: int): super().__init__( - OpenAIEmbeddings( + OpenAIEmbeddings( # type: ignore [call-arg] openai_api_key=config.openai_key, max_retries=15, disallowed_special=() ), chunk_size, @@ -118,7 +118,7 @@ class AzureOpenAIEmbedder(BaseOpenAIEmbedder): def __init__(self, config: AzureOpenAIEmbeddingConfigModel, chunk_size: int): # Azure OpenAI API has — as of 20230927 — a limit of 16 documents per request super().__init__( - OpenAIEmbeddings( + OpenAIEmbeddings( # type: ignore [call-arg] openai_api_key=config.openai_key, chunk_size=16, max_retries=15, diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index de7ff746f..19cb4dfe7 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -248,17 +248,18 @@ def handle_record_counts( case Type.RECORD: stream_message_count[ HashableStreamDescriptor( - name=message.record.stream, namespace=message.record.namespace + name=message.record.stream, # type: ignore[union-attr] # record has `stream` + namespace=message.record.namespace, # type: ignore[union-attr] # record has `namespace` ) - ] += 1.0 # type: ignore[union-attr] # record has `stream` and `namespace` + ] += 1.0 case Type.STATE: stream_descriptor = message_utils.get_stream_descriptor(message) # Set record count from the counter onto the state message message.state.sourceStats = message.state.sourceStats or AirbyteStateStats() # type: ignore[union-attr] # state has `sourceStats` - message.state.sourceStats.recordCount = stream_message_count.get( + message.state.sourceStats.recordCount = stream_message_count.get( # type: ignore[union-attr] # state has `sourceStats` stream_descriptor, 0.0 - ) # type: ignore[union-attr] # state has `sourceStats` + ) # Reset the counter stream_message_count[stream_descriptor] = 0.0 diff --git a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index 0c16daf97..eb407db44 100644 --- a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -111,7 +111,7 @@ def create( if isinstance(interpolated_string_or_min_max_datetime, InterpolatedString) or isinstance( interpolated_string_or_min_max_datetime, str ): - return MinMaxDatetime( + return MinMaxDatetime( # type: ignore [call-arg] datetime=interpolated_string_or_min_max_datetime, parameters=parameters ) else: diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 03998c479..4c9df46d0 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -133,7 +133,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: :param stream_state: The state of the stream as returned by get_stream_state """ self._cursor = ( - stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None + stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None # type: ignore [union-attr] ) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__ def observe(self, stream_slice: StreamSlice, record: Record) -> None: @@ -158,7 +158,9 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: ) if ( self._is_within_daterange_boundaries( - record, stream_slice.get(start_field), stream_slice.get(end_field) + record, + stream_slice.get(start_field), # type: ignore [arg-type] + stream_slice.get(end_field), # type: ignore [arg-type] ) # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date and is_highest_observed_cursor_value ): @@ -368,7 +370,7 @@ def _get_request_options( self._partition_field_start.eval(self.config) ) if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( + options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr] self._partition_field_end.eval(self.config) ) # type: ignore # field_name is always casted to an interpolated string return options diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index 28925ae6a..adac145be 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -130,7 +130,7 @@ def _get_request_option( if value: params.update( { - parent_config.request_option.field_name.eval( + parent_config.request_option.field_name.eval( # type: ignore [union-attr] config=self.config ): value } @@ -162,7 +162,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: extra_fields = None if parent_stream_config.extra_fields: extra_fields = [ - [field_path_part.eval(self.config) for field_path_part in field_path] + [field_path_part.eval(self.config) for field_path_part in field_path] # type: ignore [union-attr] for field_path in parent_stream_config.extra_fields ] # type: ignore # extra_fields is always casted to an interpolated string @@ -192,7 +192,10 @@ def stream_slices(self) -> Iterable[StreamSlice]: message=f"Parent stream returned records as invalid type {type(parent_record)}" ) try: - partition_value = dpath.get(parent_record, parent_field) + partition_value = dpath.get( + parent_record, # type: ignore [arg-type] + parent_field, + ) except KeyError: continue @@ -228,7 +231,10 @@ def _extract_extra_fields( if extra_fields: for extra_field_path in extra_fields: try: - extra_field_value = dpath.get(parent_record, extra_field_path) + extra_field_value = dpath.get( + parent_record, # type: ignore [arg-type] + extra_field_path, + ) self.logger.debug( f"Extracted extra_field_path: {extra_field_path} with value: {extra_field_value}" ) @@ -291,7 +297,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: if not parent_state and incremental_dependency: # Attempt to retrieve child state substream_state = list(stream_state.values()) - substream_state = substream_state[0] if substream_state else {} + substream_state = substream_state[0] if substream_state else {} # type: ignore [assignment] # Incorrect type for assignment parent_state = {} # Copy child state to parent streams with incremental dependencies diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py index 82dfb661b..a64b31c27 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py @@ -151,16 +151,18 @@ def _create_error_message(self, response: requests.Response) -> Optional[str]: :param response: The HTTP response which can be used during interpolation :return: The evaluated error message string to be emitted """ - return self.error_message.eval( + return self.error_message.eval( # type: ignore [no-any-return, union-attr] self.config, response=self._safe_response_json(response), headers=response.headers ) # type: ignore # error_message is always cast to an interpolated string def _response_matches_predicate(self, response: requests.Response) -> bool: return ( bool( - self.predicate.condition - and self.predicate.eval( - None, response=self._safe_response_json(response), headers=response.headers + self.predicate.condition # type: ignore [union-attr] + and self.predicate.eval( # type: ignore [union-attr] + None, # type: ignore [arg-type] + response=self._safe_response_json(response), # type: ignore [arg-type] + headers=response.headers, ) ) if self.predicate diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py index c660f0327..e9476447a 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py @@ -194,7 +194,7 @@ def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping and self.pagination_strategy.get_page_size() and self.page_size_option.inject_into == option_type ): - options[self.page_size_option.field_name.eval(config=self.config)] = ( + options[self.page_size_option.field_name.eval(config=self.config)] = ( # type: ignore [union-attr] self.pagination_strategy.get_page_size() ) # type: ignore # field_name is always cast to an interpolated string return options diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index 5ce7c9a3d..9e9228c95 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -85,7 +85,7 @@ def _get_request_options( self._partition_field_start.eval(self.config) ) if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( + options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr] self._partition_field_end.eval(self.config) ) # type: ignore # field_name is always casted to an interpolated string return options diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py index e9bc82b81..5a0332171 100644 --- a/airbyte_cdk/sources/file_based/file_types/excel_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/excel_parser.py @@ -69,7 +69,7 @@ async def infer_schema( df = self.open_and_parse_file(fp) for column, df_type in df.dtypes.items(): # Choose the broadest data type if the column's data type differs in dataframes - prev_frame_column_type = fields.get(column) + prev_frame_column_type = fields.get(column) # type: ignore [call-overload] fields[column] = self.dtype_to_json_type( # type: ignore [index] prev_frame_column_type, df_type, @@ -139,7 +139,10 @@ def file_read_mode(self) -> FileReadMode: return FileReadMode.READ_BINARY @staticmethod - def dtype_to_json_type(current_type: Optional[str], dtype: dtype_) -> str: + def dtype_to_json_type( + current_type: Optional[str], + dtype: dtype_, # type: ignore [type-arg] + ) -> str: """ Convert Pandas DataFrame types to Airbyte Types. @@ -190,4 +193,4 @@ def open_and_parse_file(fp: Union[IOBase, str, Path]) -> pd.DataFrame: Returns: pd.DataFrame: Parsed data from the Excel file. """ - return pd.ExcelFile(fp, engine="calamine").parse() # type: ignore [arg-type] + return pd.ExcelFile(fp, engine="calamine").parse() # type: ignore [arg-type, call-overload, no-any-return] diff --git a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py index 814bc1a1e..08ad8c3ae 100644 --- a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py @@ -21,7 +21,7 @@ class DefaultFileBasedCursor(AbstractFileBasedCursor): CURSOR_FIELD = "_ab_source_file_last_modified" def __init__(self, stream_config: FileBasedStreamConfig, **_: Any): - super().__init__(stream_config) + super().__init__(stream_config) # type: ignore [safe-super] self._file_to_datetime_history: MutableMapping[str, str] = {} self._time_window_if_history_is_full = timedelta( days=stream_config.days_to_sync_if_history_is_full diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 6b51efb7f..6973d94ff 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -217,7 +217,7 @@ def read( # type: ignore # ignoring typing for ConnectorStateManager because o # should be fixed on the stream implementation, but we should also protect against this in the CDK as well stream_state_tracker = self.get_updated_state( stream_state_tracker, - record_data, + record_data, # type: ignore [arg-type] ) self._observe_state(checkpoint_reader, stream_state_tracker) record_counter += 1 @@ -277,7 +277,7 @@ def read_only_records(self, state: Optional[Mapping[str, Any]] = None) -> Iterab if state else {}, # read() expects MutableMapping instead of Mapping which is used more often state_manager=None, - internal_config=InternalConfig(), + internal_config=InternalConfig(), # type: ignore [call-arg] ) @abstractmethod @@ -653,7 +653,7 @@ def _checkpoint_state( # type: ignore # ignoring typing for ConnectorStateMana # todo: This can be consolidated into one ConnectorStateManager.update_and_create_state_message() method, but I want # to reduce changes right now and this would span concurrent as well state_manager.update_state_for_stream(self.name, self.namespace, stream_state) - return state_manager.create_state_message(self.name, self.namespace) + return state_manager.create_state_message(self.name, self.namespace) # type: ignore [no-any-return] @property def configured_json_schema(self) -> Optional[Dict[str, Any]]: diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index d28351743..e6cb84277 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -594,7 +594,7 @@ def stream_slices( # Skip non-records (eg AirbyteLogMessage) if isinstance(parent_record, AirbyteMessage): if parent_record.type == MessageType.RECORD: - parent_record = parent_record.record.data + parent_record = parent_record.record.data # type: ignore [assignment, union-attr] # Incorrect type for assignment else: continue elif isinstance(parent_record, Record): diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index a9129e653..8e5c71458 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -52,7 +52,7 @@ def __init__( self._refresh_request_body = refresh_request_body self._grant_type = grant_type - self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) + self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) # type: ignore [no-untyped-call] self._token_expiry_date_format = token_expiry_date_format self._token_expiry_is_time_of_expiration = token_expiry_is_time_of_expiration self._access_token = None @@ -75,14 +75,14 @@ def get_refresh_token(self) -> str: def get_access_token_name(self) -> str: return self._access_token_name - def get_scopes(self) -> [str]: - return self._scopes + def get_scopes(self) -> list[str]: + return self._scopes # type: ignore [return-value] def get_expires_in_name(self) -> str: return self._expires_in_name def get_refresh_request_body(self) -> Mapping[str, Any]: - return self._refresh_request_body + return self._refresh_request_body # type: ignore [return-value] def get_grant_type(self) -> str: return self._grant_type @@ -103,11 +103,11 @@ def token_expiry_date_format(self) -> Optional[str]: @property def access_token(self) -> str: - return self._access_token + return self._access_token # type: ignore [return-value] @access_token.setter def access_token(self, value: str) -> None: - self._access_token = value + self._access_token = value # type: ignore [assignment] # Incorrect type for assignment class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator): @@ -124,11 +124,11 @@ def __init__( self, connector_config: Mapping[str, Any], token_refresh_endpoint: str, - scopes: List[str] = None, + scopes: List[str] | None = None, access_token_name: str = "access_token", expires_in_name: str = "expires_in", refresh_token_name: str = "refresh_token", - refresh_request_body: Mapping[str, Any] = None, + refresh_request_body: Mapping[str, Any] | None = None, grant_type: str = "refresh_token", client_id: Optional[str] = None, client_secret: Optional[str] = None, @@ -162,14 +162,17 @@ def __init__( message_repository (MessageRepository): the message repository used to emit logs on HTTP requests and control message on config update """ self._client_id = ( - client_id + client_id # type: ignore [assignment] # Incorrect type for assignment if client_id is not None else dpath.get(connector_config, ("credentials", "client_id")) # type: ignore [arg-type] ) self._client_secret = ( - client_secret + client_secret # type: ignore [assignment] # Incorrect type for assignment if client_secret is not None - else dpath.get(connector_config, ("credentials", "client_secret")) + else dpath.get( + connector_config, # type: ignore [arg-type] + ("credentials", "client_secret"), + ) ) self._access_token_config_path = access_token_config_path self._refresh_token_config_path = refresh_token_config_path @@ -207,8 +210,8 @@ def get_client_secret(self) -> str: @property def access_token(self) -> str: - return dpath.get( - self._connector_config, + return dpath.get( # type: ignore [return-value] + self._connector_config, # type: ignore [arg-type] self._access_token_config_path, default="", ) @@ -216,39 +219,39 @@ def access_token(self) -> str: @access_token.setter def access_token(self, new_access_token: str) -> None: dpath.new( - self._connector_config, + self._connector_config, # type: ignore [arg-type] self._access_token_config_path, new_access_token, ) def get_refresh_token(self) -> str: - return dpath.get( - self._connector_config, + return dpath.get( # type: ignore [return-value] + self._connector_config, # type: ignore [arg-type] self._refresh_token_config_path, default="", ) def set_refresh_token(self, new_refresh_token: str) -> None: dpath.new( - self._connector_config, + self._connector_config, # type: ignore [arg-type] self._refresh_token_config_path, new_refresh_token, ) def get_token_expiry_date(self) -> pendulum.DateTime: expiry_date = dpath.get( - self._connector_config, + self._connector_config, # type: ignore [arg-type] self._token_expiry_date_config_path, default="", ) - return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) + return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) # type: ignore [arg-type, return-value, no-untyped-call] def set_token_expiry_date( # type: ignore[override] self, new_token_expiry_date: pendulum.DateTime, ) -> None: dpath.new( - self._connector_config, + self._connector_config, # type: ignore [arg-type] self._token_expiry_date_config_path, str(new_token_expiry_date), ) @@ -259,7 +262,8 @@ def token_has_expired(self) -> bool: @staticmethod def get_new_token_expiry_date( - access_token_expires_in: str, token_expiry_date_format: str = None + access_token_expires_in: str, + token_expiry_date_format: str | None = None, ) -> pendulum.DateTime: if token_expiry_date_format: return pendulum.from_format(access_token_expires_in, token_expiry_date_format) @@ -287,10 +291,10 @@ def get_access_token(self) -> str: # message directly in the console, this is needed if not isinstance(self._message_repository, NoopMessageRepository): self._message_repository.emit_message( - create_connector_config_control_message(self._connector_config) + create_connector_config_control_message(self._connector_config) # type: ignore [arg-type] ) else: - emit_configuration_as_airbyte_control_message(self._connector_config) + emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore [arg-type] return self.access_token def refresh_access_token( # type: ignore[override] # Signature doesn't match base class diff --git a/airbyte_cdk/test/entrypoint_wrapper.py b/airbyte_cdk/test/entrypoint_wrapper.py index 2fe3151af..60514bf73 100644 --- a/airbyte_cdk/test/entrypoint_wrapper.py +++ b/airbyte_cdk/test/entrypoint_wrapper.py @@ -129,14 +129,19 @@ def _get_trace_message_by_trace_type(self, trace_type: TraceType) -> List[Airbyt return [ message for message in self._get_message_by_types([Type.TRACE]) - if message.trace.type == trace_type - ] # type: ignore[union-attr] # trace has `type` + if message.trace.type == trace_type # type: ignore[union-attr] # trace has `type` + ] def is_in_logs(self, pattern: str) -> bool: """Check if any log message case-insensitive matches the pattern.""" return any( - re.search(pattern, entry.log.message, flags=re.IGNORECASE) for entry in self.logs - ) # type: ignore[union-attr] # log has `message` + re.search( + pattern, + entry.log.message, # type: ignore[union-attr] # log has `message` + flags=re.IGNORECASE, + ) + for entry in self.logs + ) def is_not_in_logs(self, pattern: str) -> bool: """Check if no log message matches the case-insensitive pattern.""" From d68ac349915893d152ee02418850658764013f1f Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 13:03:05 -0800 Subject: [PATCH 23/33] chore: fix mypy task in poe --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a755b0c9e..8c81a42e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -144,7 +144,7 @@ format-fix = {sequence = ["_format-fix-ruff", "_format-fix-prettier"], help = "F # Linting/Typing check tasks _lint-ruff = {cmd = "poetry run ruff check .", help = "Lint with Ruff."} -type-check = {cmd = "poetry run mypy .", help = "Type check modified files with mypy."} +type-check = {cmd = "poetry run mypy airbyte_cdk", help = "Type check modified files with mypy."} lint = {sequence = ["_lint-ruff", "type-check"], help = "Lint all code. Includes type checking.", ignore_fail = "return_non_zero"} # Lockfile check task From 59d585e5bd96d972abe555f0153fd428af8f9572 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 13:17:36 -0800 Subject: [PATCH 24/33] chore: fix type: `set[str]` instead of `Template` --- airbyte_cdk/sources/declarative/interpolation/jinja.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py index 317ed7dfd..71837b0d1 100644 --- a/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -120,7 +120,7 @@ def _literal_eval(self, result: Optional[str], valid_types: Optional[Tuple[Type[ def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]: try: undeclared = self._find_undeclared_variables(s) - undeclared_not_in_context = {var for var in undeclared if var not in context} # type: ignore [attr-defined] # `Template` class not iterable + undeclared_not_in_context = {var for var in undeclared if var not in context} if undeclared_not_in_context: raise ValueError( f"Jinja macro has undeclared variables: {undeclared_not_in_context}. Context: {context}" @@ -132,12 +132,12 @@ def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]: return s @cache - def _find_undeclared_variables(self, s: Optional[str]) -> Template: + def _find_undeclared_variables(self, s: Optional[str]) -> set[str]: """ Find undeclared variables and cache them """ ast = self._environment.parse(s) # type: ignore # parse is able to handle None - return meta.find_undeclared_variables(ast) # type: ignore [return-value] # Expected `Template` but got `set[str]` + return meta.find_undeclared_variables(ast) @cache def _compile(self, s: Optional[str]) -> Template: From 40f9bcad0a27ba661ab88971a37b1fd3cd4441fb Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 13:20:00 -0800 Subject: [PATCH 25/33] revert: undo added guard statement --- airbyte_cdk/destinations/vector_db_based/writer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/airbyte_cdk/destinations/vector_db_based/writer.py b/airbyte_cdk/destinations/vector_db_based/writer.py index 1dcb4e009..45c7c7326 100644 --- a/airbyte_cdk/destinations/vector_db_based/writer.py +++ b/airbyte_cdk/destinations/vector_db_based/writer.py @@ -90,9 +90,6 @@ def write( ) ].extend(record_chunks) if record_id_to_delete is not None: - if message.record is None: - raise ValueError("Record messages cannot have null `record` property.") - self.ids_to_delete[ ( # type: ignore [index] # expected "tuple[str, str]", got "tuple[str | Any | None, str | Any]" message.record.namespace, # type: ignore [union-attr] # record not None From c8c0ef486754de30ae1640e3462f7c9b84b0f349 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 13:22:49 -0800 Subject: [PATCH 26/33] revert: undo added guard statement (2) --- airbyte_cdk/utils/traced_exception.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index e9b9cfb83..05f841d13 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -130,9 +130,6 @@ def as_sanitized_airbyte_message( stream_descriptors are defined, the one from `as_sanitized_airbyte_message` will be discarded. """ error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor) - if error_message.trace is None or error_message.trace.error is None: - raise ValueError("Trace `error` property should not be None.") - if error_message.trace.error.message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage error_message.trace.error.message = filter_secrets( error_message.trace.error.message, From c29447c0cfda2e336f010c91b0d5fd126051f12c Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 13:24:01 -0800 Subject: [PATCH 27/33] add missing noqas --- airbyte_cdk/utils/traced_exception.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index 05f841d13..54c1e52e0 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -131,8 +131,8 @@ def as_sanitized_airbyte_message( """ error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor) if error_message.trace.error.message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage - error_message.trace.error.message = filter_secrets( - error_message.trace.error.message, + error_message.trace.error.message = filter_secrets( # type: ignore[union-attr] + error_message.trace.error.message, # type: ignore[union-attr] ) # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage if error_message.trace.error.internal_message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage error_message.trace.error.internal_message = filter_secrets( # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage From 39d66af4530e6a8c26137a15b3285d2265d7e263 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 13:58:06 -0800 Subject: [PATCH 28/33] remove unused type ignores --- airbyte_cdk/connector_builder/message_grouper.py | 8 ++++---- airbyte_cdk/entrypoint.py | 4 ++-- airbyte_cdk/logger.py | 2 +- airbyte_cdk/sources/abstract_source.py | 2 +- airbyte_cdk/sources/config.py | 2 +- airbyte_cdk/sources/connector_state_manager.py | 11 ++++++++--- airbyte_cdk/sources/declarative/auth/oauth.py | 2 +- .../declarative/concurrent_declarative_source.py | 2 +- .../declarative/incremental/datetime_based_cursor.py | 6 +++--- .../sources/declarative/interpolation/jinja.py | 2 +- .../sources/declarative/interpolation/macros.py | 2 +- .../declarative/parsers/model_to_component_factory.py | 11 +++++------ .../partition_routers/substream_partition_router.py | 4 ++-- .../error_handlers/default_error_handler.py | 2 +- .../requesters/error_handlers/http_response_filter.py | 6 +++--- .../datetime_based_request_options_provider.py | 2 +- airbyte_cdk/sources/declarative/spec/spec.py | 2 +- .../sources/file_based/file_types/csv_parser.py | 4 ++-- airbyte_cdk/sources/streams/concurrent/cursor.py | 2 +- .../datetime_stream_state_converter.py | 4 ++-- airbyte_cdk/sources/streams/core.py | 2 +- airbyte_cdk/sources/streams/http/http.py | 10 +++++----- airbyte_cdk/sources/streams/http/http_client.py | 4 ++-- airbyte_cdk/sources/utils/record_helper.py | 2 +- airbyte_cdk/sources/utils/schema_helpers.py | 2 +- airbyte_cdk/test/entrypoint_wrapper.py | 2 +- airbyte_cdk/test/mock_http/response_builder.py | 2 +- airbyte_cdk/utils/traced_exception.py | 4 ++-- 28 files changed, 56 insertions(+), 52 deletions(-) diff --git a/airbyte_cdk/connector_builder/message_grouper.py b/airbyte_cdk/connector_builder/message_grouper.py index 44ccdc5a7..ce43afab8 100644 --- a/airbyte_cdk/connector_builder/message_grouper.py +++ b/airbyte_cdk/connector_builder/message_grouper.py @@ -71,7 +71,7 @@ def _cursor_field_to_nested_and_composite_field( is_nested_key = isinstance(field[0], str) if is_nested_key: - return [field] # type: ignore # the type of field is expected to be List[str] here + return [field] raise ValueError(f"Unknown type for cursor field `{field}") @@ -234,7 +234,7 @@ def _get_message_groups( at_least_one_page_in_group = False elif message.type == MessageType.LOG and message.log.message.startswith( # type: ignore[union-attr] # None doesn't have 'message' SliceLogger.SLICE_LOG_PREFIX - ): # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message + ): # parsing the first slice current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message elif message.type == MessageType.LOG: @@ -281,7 +281,7 @@ def _get_message_groups( elif ( message.type == MessageType.CONTROL and message.control.type == OrchestratorType.CONNECTOR_CONFIG # type: ignore[union-attr] # None doesn't have 'type' - ): # type: ignore[union-attr] # AirbyteMessage with MessageType.CONTROL has control.type + ): yield message.control elif message.type == MessageType.STATE: latest_state_message = message.state # type: ignore[assignment] @@ -356,7 +356,7 @@ def _close_page( request=current_page_request, response=current_page_response, records=deepcopy(current_page_records), # type: ignore [arg-type] - ) # type: ignore + ) ) current_page_records.clear() diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 19cb4dfe7..2864bbb0b 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -22,7 +22,7 @@ from airbyte_cdk.connector import TConfig from airbyte_cdk.exception_handler import init_uncaught_exception_handler from airbyte_cdk.logger import init_logger -from airbyte_cdk.models import ( # type: ignore [attr-defined] +from airbyte_cdk.models import ( AirbyteConnectionStatus, AirbyteMessage, AirbyteMessageSerializer, @@ -281,7 +281,7 @@ def set_up_secret_filter(config: TConfig, connection_specification: Mapping[str, @staticmethod def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str: - return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode() # type: ignore[no-any-return] # orjson.dumps(message).decode() always returns string + return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode() @classmethod def extract_state(cls, args: List[str]) -> Optional[Any]: diff --git a/airbyte_cdk/logger.py b/airbyte_cdk/logger.py index 48b25215d..78061b605 100644 --- a/airbyte_cdk/logger.py +++ b/airbyte_cdk/logger.py @@ -78,7 +78,7 @@ def format(self, record: logging.LogRecord) -> str: log_message = AirbyteMessage( type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message) ) - return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode() # type: ignore[no-any-return] # orjson.dumps(message).decode() always returns string + return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode() @staticmethod def extract_extra_args_from_record(record: logging.LogRecord) -> Mapping[str, Any]: diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index 34ba816bc..ab9ee48b8 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -200,7 +200,7 @@ def read( if len(stream_name_to_exception) > 0: error_message = generate_failed_streams_error_message( {key: [value] for key, value in stream_name_to_exception.items()} - ) # type: ignore # for some reason, mypy can't figure out the types for key and value + ) logger.info(error_message) # We still raise at least one exception when a stream raises an exception because the platform currently relies # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error diff --git a/airbyte_cdk/sources/config.py b/airbyte_cdk/sources/config.py index 8679ebbb7..ea91b17f3 100644 --- a/airbyte_cdk/sources/config.py +++ b/airbyte_cdk/sources/config.py @@ -24,4 +24,4 @@ def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]: rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf expand_refs(schema) schema.pop("description", None) # description added from the docstring - return schema # type: ignore[no-any-return] + return schema diff --git a/airbyte_cdk/sources/connector_state_manager.py b/airbyte_cdk/sources/connector_state_manager.py index a53e7785d..914374a55 100644 --- a/airbyte_cdk/sources/connector_state_manager.py +++ b/airbyte_cdk/sources/connector_state_manager.py @@ -4,7 +4,7 @@ import copy from dataclasses import dataclass -from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union +from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union, cast from airbyte_cdk.models import ( AirbyteMessage, @@ -15,6 +15,7 @@ StreamDescriptor, ) from airbyte_cdk.models import Type as MessageType +from airbyte_cdk.models.airbyte_protocol import AirbyteGlobalState, AirbyteStateBlob @dataclass(frozen=True) @@ -118,8 +119,12 @@ def _extract_from_state_message( is_global = cls._is_global_state(state) if is_global: - global_state = state[0].global_ # type: ignore # We verified state is a list in _is_global_state - shared_state = copy.deepcopy(global_state.shared_state, {}) # type: ignore[union-attr] # global_state has shared_state + # We already validate that this is a global state message, not None: + global_state = cast(AirbyteGlobalState, state[0].global_) + # global_state has shared_state, also not None: + shared_state: AirbyteStateBlob = cast( + AirbyteStateBlob, copy.deepcopy(global_state.shared_state, {}) + ) streams = { HashableStreamDescriptor( name=per_stream_state.stream_descriptor.name, diff --git a/airbyte_cdk/sources/declarative/auth/oauth.py b/airbyte_cdk/sources/declarative/auth/oauth.py index 18bcc9fce..8ec671f3e 100644 --- a/airbyte_cdk/sources/declarative/auth/oauth.py +++ b/airbyte_cdk/sources/declarative/auth/oauth.py @@ -135,7 +135,7 @@ def get_grant_type(self) -> str: return self.grant_type.eval(self.config) # type: ignore # eval returns a string in this context def get_refresh_request_body(self) -> Mapping[str, Any]: - return self._refresh_request_body.eval(self.config) # type: ignore # eval should return a Mapping in this context + return self._refresh_request_body.eval(self.config) def get_token_expiry_date(self) -> pendulum.DateTime: return self._token_expiry_date # type: ignore # _token_expiry_date is a pendulum.DateTime. It is never None despite what mypy thinks diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py index 40fd23edb..f41a1d287 100644 --- a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py +++ b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py @@ -128,7 +128,7 @@ def __init__( initial_number_of_partitions_to_generate=initial_number_of_partitions_to_generate, logger=self.logger, slice_logger=self._slice_logger, - message_repository=self.message_repository, # type: ignore # message_repository is always instantiated with a value by factory + message_repository=self.message_repository, ) def read( diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 4c9df46d0..d6d329aec 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -134,7 +134,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: """ self._cursor = ( stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None # type: ignore [union-attr] - ) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__ + ) def observe(self, stream_slice: StreamSlice, record: Record) -> None: """ @@ -161,7 +161,7 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: record, stream_slice.get(start_field), # type: ignore [arg-type] stream_slice.get(end_field), # type: ignore [arg-type] - ) # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date + ) and is_highest_observed_cursor_value ): self._highest_observed_cursor_field_value = record_cursor_value @@ -372,7 +372,7 @@ def _get_request_options( if self.end_time_option and self.end_time_option.inject_into == option_type: options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr] self._partition_field_end.eval(self.config) - ) # type: ignore # field_name is always casted to an interpolated string + ) return options def should_be_synced(self, record: Record) -> bool: diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py index 71837b0d1..ec5e861cd 100644 --- a/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -27,7 +27,7 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment): def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool: if attr in ["_partition"]: return True - return super().is_safe_attribute(obj, attr, value) # type: ignore # for some reason, mypy says 'Returning Any from function declared to return "bool"' + return super().is_safe_attribute(obj, attr, value) class JinjaInterpolation(Interpolation): diff --git a/airbyte_cdk/sources/declarative/interpolation/macros.py b/airbyte_cdk/sources/declarative/interpolation/macros.py index ce448c127..e786f0116 100644 --- a/airbyte_cdk/sources/declarative/interpolation/macros.py +++ b/airbyte_cdk/sources/declarative/interpolation/macros.py @@ -116,7 +116,7 @@ def duration(datestring: str) -> Union[datetime.timedelta, isodate.Duration]: Usage: `"{{ now_utc() - duration('P1D') }}"` """ - return parse_duration(datestring) # type: ignore # mypy thinks this returns Any for some reason + return parse_duration(datestring) def format_datetime( diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 49e047d37..331d4a464 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -397,7 +397,7 @@ def __init__( self._disable_retries = disable_retries self._disable_cache = disable_cache self._disable_resumable_full_refresh = disable_resumable_full_refresh - self._message_repository = message_repository or InMemoryMessageRepository( # type: ignore + self._message_repository = message_repository or InMemoryMessageRepository( self._evaluate_log_level(emit_connector_builder_messages) ) @@ -645,7 +645,7 @@ def create_legacy_to_per_partition_state_migration( declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. config, declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] - ) # type: ignore # The retriever type was already checked + ) def create_session_token_authenticator( self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any @@ -675,7 +675,7 @@ def create_session_token_authenticator( return ModelToComponentFactory.create_bearer_authenticator( BearerAuthenticatorModel(type="BearerAuthenticator", api_token=""), # type: ignore # $parameters has a default value config, - token_provider=token_provider, # type: ignore # $parameters defaults to None + token_provider=token_provider, ) else: return ModelToComponentFactory.create_api_key_authenticator( @@ -822,7 +822,6 @@ def create_concurrent_cursor_from_datetime_based_cursor( input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, is_sequential_state=True, cursor_granularity=cursor_granularity, - # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice ) start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] @@ -896,7 +895,7 @@ def create_concurrent_cursor_from_datetime_based_cursor( stream_name=stream_name, stream_namespace=stream_namespace, stream_state=stream_state, - message_repository=self._message_repository, # type: ignore # message_repository is always instantiated with a value by factory + message_repository=self._message_repository, connector_state_manager=state_manager, connector_state_converter=connector_state_converter, cursor_field=cursor_field, @@ -1709,7 +1708,7 @@ def create_oauth_authenticator( refresh_token=model.refresh_token, scopes=model.scopes, token_expiry_date=model.token_expiry_date, - token_expiry_date_format=model.token_expiry_date_format, # type: ignore + token_expiry_date_format=model.token_expiry_date_format, token_expiry_is_time_of_expiration=bool(model.token_expiry_date_format), token_refresh_endpoint=model.token_refresh_endpoint, config=config, diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index adac145be..1c7bb6961 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -134,7 +134,7 @@ def _get_request_option( config=self.config ): value } - ) # type: ignore # field_name is always casted to an interpolated string + ) return params def stream_slices(self) -> Iterable[StreamSlice]: @@ -164,7 +164,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: extra_fields = [ [field_path_part.eval(self.config) for field_path_part in field_path] # type: ignore [union-attr] for field_path in parent_stream_config.extra_fields - ] # type: ignore # extra_fields is always casted to an interpolated string + ] # read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does # not support either substreams or RFR, but something that needs to be considered once we do diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py index 1340e8595..b70ceaaeb 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py @@ -141,7 +141,7 @@ def backoff_time( for backoff_strategy in self.backoff_strategies: backoff = backoff_strategy.backoff_time( response_or_exception=response_or_exception, attempt_count=attempt_count - ) # type: ignore # attempt_count maintained for compatibility with low code CDK + ) if backoff: return backoff return backoff diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py index a64b31c27..a2fc80007 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py @@ -153,7 +153,7 @@ def _create_error_message(self, response: requests.Response) -> Optional[str]: """ return self.error_message.eval( # type: ignore [no-any-return, union-attr] self.config, response=self._safe_response_json(response), headers=response.headers - ) # type: ignore # error_message is always cast to an interpolated string + ) def _response_matches_predicate(self, response: requests.Response) -> bool: return ( @@ -161,13 +161,13 @@ def _response_matches_predicate(self, response: requests.Response) -> bool: self.predicate.condition # type: ignore [union-attr] and self.predicate.eval( # type: ignore [union-attr] None, # type: ignore [arg-type] - response=self._safe_response_json(response), # type: ignore [arg-type] + response=self._safe_response_json(response), headers=response.headers, ) ) if self.predicate else False - ) # type: ignore # predicate is always cast to an interpolated string + ) def _response_contains_error_message(self, response: requests.Response) -> bool: if not self.error_message_contains: diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index 9e9228c95..05e06db71 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -87,5 +87,5 @@ def _get_request_options( if self.end_time_option and self.end_time_option.inject_into == option_type: options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr] self._partition_field_end.eval(self.config) - ) # type: ignore # field_name is always casted to an interpolated string + ) return options diff --git a/airbyte_cdk/sources/declarative/spec/spec.py b/airbyte_cdk/sources/declarative/spec/spec.py index 05fa079bf..914e99e93 100644 --- a/airbyte_cdk/sources/declarative/spec/spec.py +++ b/airbyte_cdk/sources/declarative/spec/spec.py @@ -9,7 +9,7 @@ AdvancedAuth, ConnectorSpecification, ConnectorSpecificationSerializer, -) # type: ignore [attr-defined] +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index a367d7ead..e3010690e 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -117,7 +117,7 @@ def _get_headers(self, fp: IOBase, config_format: CsvFormat, dialect_name: str) """ # Note that this method assumes the dialect has already been registered if we're parsing the headers if isinstance(config_format.header_definition, CsvHeaderUserProvided): - return config_format.header_definition.column_names # type: ignore # should be CsvHeaderUserProvided given the type + return config_format.header_definition.column_names if isinstance(config_format.header_definition, CsvHeaderAutogenerated): self._skip_rows( @@ -229,7 +229,7 @@ def parse_records( if discovered_schema: property_types = { col: prop["type"] for col, prop in discovered_schema["properties"].items() - } # type: ignore # discovered_schema["properties"] is known to be a mapping + } deduped_property_types = CsvParser._pre_propcess_property_types(property_types) else: deduped_property_types = {} diff --git a/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte_cdk/sources/streams/concurrent/cursor.py index 4f103b224..cbce82a94 100644 --- a/airbyte_cdk/sources/streams/concurrent/cursor.py +++ b/airbyte_cdk/sources/streams/concurrent/cursor.py @@ -473,7 +473,7 @@ def should_be_synced(self, record: Record) -> bool: :return: True if the record's cursor value falls within the sync boundaries """ try: - record_cursor_value: CursorValueType = self._extract_cursor_value(record) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__ + record_cursor_value: CursorValueType = self._extract_cursor_value(record) except ValueError: self._log_for_record_without_cursor_value() return True diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py index 714789af3..3f53a9234 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py @@ -141,7 +141,7 @@ def parse_timestamp(self, timestamp: int) -> datetime: raise ValueError( f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})" ) - return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types + return dt_object class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter): @@ -178,7 +178,7 @@ def parse_timestamp(self, timestamp: str) -> datetime: raise ValueError( f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})" ) - return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types + return dt_object class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter): diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 6973d94ff..d908be675 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -317,7 +317,7 @@ def as_airbyte_stream(self) -> AirbyteStream: # If we can offer incremental we always should. RFR is always less reliable than incremental which uses a real cursor value if self.supports_incremental: stream.source_defined_cursor = self.source_defined_cursor - stream.supported_sync_modes.append(SyncMode.incremental) # type: ignore + stream.supported_sync_modes.append(SyncMode.incremental) stream.default_cursor_field = self._wrapped_cursor_field() keys = Stream._wrapped_primary_key(self.primary_key) diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index e6cb84277..40eab27a3 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -639,15 +639,15 @@ def interpret_response( return ErrorResolution( response_action=ResponseAction.RATE_LIMITED, failure_type=FailureType.transient_error, - error_message=f"Response status code: {response_or_exception.status_code}. Retrying...", # type: ignore[union-attr] + error_message=f"Response status code: {response_or_exception.status_code}. Retrying...", ) return ErrorResolution( response_action=ResponseAction.RETRY, failure_type=FailureType.transient_error, - error_message=f"Response status code: {response_or_exception.status_code}. Retrying...", # type: ignore[union-attr] + error_message=f"Response status code: {response_or_exception.status_code}. Retrying...", ) else: - if response_or_exception.ok: # type: ignore # noqa + if response_or_exception.ok: return ErrorResolution( response_action=ResponseAction.SUCCESS, failure_type=None, @@ -657,13 +657,13 @@ def interpret_response( return ErrorResolution( response_action=ResponseAction.FAIL, failure_type=FailureType.transient_error, - error_message=f"Response status code: {response_or_exception.status_code}. Unexpected error. Failed.", # type: ignore[union-attr] + error_message=f"Response status code: {response_or_exception.status_code}. Unexpected error. Failed.", ) else: return ErrorResolution( response_action=ResponseAction.IGNORE, failure_type=FailureType.transient_error, - error_message=f"Response status code: {response_or_exception.status_code}. Ignoring...", # type: ignore[union-attr] + error_message=f"Response status code: {response_or_exception.status_code}. Ignoring...", ) else: self._logger.error(f"Received unexpected response type: {type(response_or_exception)}") diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index 91e2a63d9..4f99bbeba 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -144,7 +144,7 @@ def _request_session(self) -> requests.Session: sqlite_path = "file::memory:?cache=shared" return CachedLimiterSession( sqlite_path, backend="sqlite", api_budget=self._api_budget, match_headers=True - ) # type: ignore # there are no typeshed stubs for requests_cache + ) else: return LimiterSession(api_budget=self._api_budget) @@ -324,7 +324,7 @@ def _send( formatter = log_formatter self._message_repository.log_message( Level.DEBUG, - lambda: formatter(response), # type: ignore # log_formatter is always cast to a callable + lambda: formatter(response), ) self._handle_error_resolution( diff --git a/airbyte_cdk/sources/utils/record_helper.py b/airbyte_cdk/sources/utils/record_helper.py index e45601c22..3d2cbcecf 100644 --- a/airbyte_cdk/sources/utils/record_helper.py +++ b/airbyte_cdk/sources/utils/record_helper.py @@ -35,7 +35,7 @@ def stream_data_to_airbyte_message( # need it to normalize values against json schema. By default no action # taken unless configured. See # docs/connector-development/cdk-python/schemas.md for details. - transformer.transform(data, schema) # type: ignore + transformer.transform(data, schema) if is_file_transfer_message: message = AirbyteFileTransferRecordMessage( stream=stream_name, file=data, emitted_at=now_millis, data={} diff --git a/airbyte_cdk/sources/utils/schema_helpers.py b/airbyte_cdk/sources/utils/schema_helpers.py index b8d2507c6..f15578238 100644 --- a/airbyte_cdk/sources/utils/schema_helpers.py +++ b/airbyte_cdk/sources/utils/schema_helpers.py @@ -194,7 +194,7 @@ class InternalConfig(BaseModel): def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]: kwargs["by_alias"] = True kwargs["exclude_unset"] = True - return super().dict(*args, **kwargs) # type: ignore[no-any-return] + return super().dict(*args, **kwargs) def is_limit_reached(self, records_counter: int) -> bool: """ diff --git a/airbyte_cdk/test/entrypoint_wrapper.py b/airbyte_cdk/test/entrypoint_wrapper.py index 60514bf73..f8e85bfb0 100644 --- a/airbyte_cdk/test/entrypoint_wrapper.py +++ b/airbyte_cdk/test/entrypoint_wrapper.py @@ -63,7 +63,7 @@ def __init__(self, messages: List[str], uncaught_exception: Optional[BaseExcepti @staticmethod def _parse_message(message: str) -> AirbyteMessage: try: - return AirbyteMessageSerializer.load(orjson.loads(message)) # type: ignore[no-any-return] # Serializer.load() always returns AirbyteMessage + return AirbyteMessageSerializer.load(orjson.loads(message)) except (orjson.JSONDecodeError, SchemaValidationError): # The platform assumes that logs that are not of AirbyteMessage format are log messages return AirbyteMessage( diff --git a/airbyte_cdk/test/mock_http/response_builder.py b/airbyte_cdk/test/mock_http/response_builder.py index b517343e6..7f9583827 100644 --- a/airbyte_cdk/test/mock_http/response_builder.py +++ b/airbyte_cdk/test/mock_http/response_builder.py @@ -183,7 +183,7 @@ def build(self) -> HttpResponse: def _get_unit_test_folder(execution_folder: str) -> FilePath: # FIXME: This function should be removed after the next CDK release to avoid breaking amazon-seller-partner test code. - return get_unit_test_folder(execution_folder) # type: ignore # get_unit_test_folder is known to return a FilePath + return get_unit_test_folder(execution_folder) def find_template(resource: str, execution_folder: str) -> Dict[str, Any]: diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index 54c1e52e0..59dbab2a5 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -106,7 +106,7 @@ def from_exception( stream_descriptor: Optional[StreamDescriptor] = None, *args: Any, **kwargs: Any, - ) -> "AirbyteTracedException": # type: ignore # ignoring because of args and kwargs + ) -> "AirbyteTracedException": """ Helper to create an AirbyteTracedException from an existing exception :param exc: the exception that caused the error @@ -133,7 +133,7 @@ def as_sanitized_airbyte_message( if error_message.trace.error.message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage error_message.trace.error.message = filter_secrets( # type: ignore[union-attr] error_message.trace.error.message, # type: ignore[union-attr] - ) # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage + ) if error_message.trace.error.internal_message: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage error_message.trace.error.internal_message = filter_secrets( # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage error_message.trace.error.internal_message # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has AirbyteTraceMessage From d3c2d1003755de9d11224064c077934b14ff0226 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 15:29:08 -0800 Subject: [PATCH 29/33] ci: comment-out source-s3 and destination-pinecone --- .github/workflows/connector-tests.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/connector-tests.yml b/.github/workflows/connector-tests.yml index c441fc01e..23d73dcab 100644 --- a/.github/workflows/connector-tests.yml +++ b/.github/workflows/connector-tests.yml @@ -74,10 +74,11 @@ jobs: cdk_extra: n/a - connector: source-chargebee cdk_extra: n/a - - connector: source-s3 - cdk_extra: file-based - - connector: destination-pinecone - cdk_extra: vector-db-based + # These two are behind in CDK updates and can't be used as tests until they are updated: + # - connector: source-s3 + # cdk_extra: file-based + # - connector: destination-pinecone + # cdk_extra: vector-db-based - connector: destination-motherduck cdk_extra: sql # ZenDesk currently failing (as of 2024-12-02) From b77d9576851cafa40776965ef1d6d2fe8a0c7022 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Tue, 3 Dec 2024 15:33:04 -0800 Subject: [PATCH 30/33] ci: fix skip indicator --- .github/workflows/connector-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/connector-tests.yml b/.github/workflows/connector-tests.yml index 23d73dcab..981ff4244 100644 --- a/.github/workflows/connector-tests.yml +++ b/.github/workflows/connector-tests.yml @@ -92,7 +92,7 @@ jobs: # - connector: source-pokeapi # cdk_extra: n/a - name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})" + name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs['src'] == 'false' || needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})" steps: - name: Abort if extra not changed (${{matrix.cdk_extra}}) id: no_changes From eb2ea3c3718b781d999102ca170e29a6ddb933f5 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 4 Dec 2024 09:46:35 -0800 Subject: [PATCH 31/33] comment-out deprecated decorator in "hot" codepath, with link to follow-on issue --- airbyte_cdk/sources/streams/core.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index d908be675..d9bf95f48 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -431,10 +431,12 @@ def state_checkpoint_interval(self) -> Optional[int]: """ return None - @deprecated( - "Deprecated method `get_updated_state` as of CDK version 0.1.49. " - "Please use explicit state property instead, see `IncrementalMixin` docs." - ) + # Commented-out to avoid any runtime penalty, since this is used in a hot per-record codepath. + # To be evaluated for re-introduction here: https://github.com/airbytehq/airbyte-python-cdk/issues/116 + # @deprecated( + # "Deprecated method `get_updated_state` as of CDK version 0.1.49. " + # "Please use explicit state property instead, see `IncrementalMixin` docs." + # ) def get_updated_state( self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] ) -> MutableMapping[str, Any]: From b62f634bd6b7e355766f038913b80808a2f07e8b Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 4 Dec 2024 09:47:21 -0800 Subject: [PATCH 32/33] comment-out flaky chargebee test --- .github/workflows/connector-tests.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/connector-tests.yml b/.github/workflows/connector-tests.yml index 981ff4244..0f7033f1f 100644 --- a/.github/workflows/connector-tests.yml +++ b/.github/workflows/connector-tests.yml @@ -72,8 +72,9 @@ jobs: cdk_extra: n/a - connector: source-shopify cdk_extra: n/a - - connector: source-chargebee - cdk_extra: n/a + # Chargebee is being flaky: + # - connector: source-chargebee + # cdk_extra: n/a # These two are behind in CDK updates and can't be used as tests until they are updated: # - connector: source-s3 # cdk_extra: file-based From bd2e5affd196b2f36dfc0fb8bbbae07771dde188 Mon Sep 17 00:00:00 2001 From: Aaron Steers Date: Wed, 4 Dec 2024 09:57:13 -0800 Subject: [PATCH 33/33] add deprecated notice to docstring --- airbyte_cdk/sources/streams/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index d9bf95f48..a9aa8550a 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -440,7 +440,9 @@ def state_checkpoint_interval(self) -> Optional[int]: def get_updated_state( self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] ) -> MutableMapping[str, Any]: - """Override to extract state from the latest record. Needed to implement incremental sync. + """DEPRECATED. Please use explicit state property instead, see `IncrementalMixin` docs. + + Override to extract state from the latest record. Needed to implement incremental sync. Inspects the latest record extracted from the data source and the current state object and return an updated state object.