From 8ddb3d98d4868775835332f4e80ea51221b60079 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 11 Nov 2024 17:41:50 +0100 Subject: [PATCH 01/17] Airbyte CDK: add gzipjson decoder --- .../declarative_component_schema.yaml | 43 + .../sources/declarative/decoders/__init__.py | 4 +- .../declarative/decoders/json_decoder.py | 30 +- .../models/declarative_component_schema.py | 1365 +++++++++-------- 4 files changed, 778 insertions(+), 664 deletions(-) diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index 3fcbbf34..3d378a28 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -1750,6 +1750,45 @@ definitions: type: type: string enum: [XmlDecoder] + CustomDecoder: + title: Custom Decoder + description: Use this to implement custom decoder logic. + type: object + additionalProperties: true + required: + - type + - class_name + properties: + type: + type: string + enum: [CustomDecoder] + class_name: + title: Class Name + description: Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Decoder. The format is `source_..`. + type: string + additionalProperties: true + examples: + - "source_amazon_ads.components.GzipJsonlDecoder" + $parameters: + type: object + additionalProperties: true + GzipJsonDecoder: + title: GzipJson Decoder + description: Use this if the response is Gzip compressed Json. + type: object + additionalProperties: true + required: + - type + properties: + type: + type: string + enum: [GzipJsonDecoder] + encoding: + type: string + default: utf-8 + $parameters: + type: object + additionalProperties: true ListPartitionRouter: title: List Partition Router description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests. @@ -2404,10 +2443,12 @@ definitions: title: Decoder description: Component decoding the response so records can be extracted. anyOf: + - "$ref": "#/definitions/CustomDecoder" - "$ref": "#/definitions/JsonDecoder" - "$ref": "#/definitions/JsonlDecoder" - "$ref": "#/definitions/IterableDecoder" - "$ref": "#/definitions/XmlDecoder" + - "$ref": "#/definitions/GzipJsonDecoder" $parameters: type: object additionalProperties: true @@ -2520,10 +2561,12 @@ definitions: title: Decoder description: Component decoding the response so records can be extracted. anyOf: + - "$ref": "#/definitions/CustomDecoder" - "$ref": "#/definitions/JsonDecoder" - "$ref": "#/definitions/JsonlDecoder" - "$ref": "#/definitions/IterableDecoder" - "$ref": "#/definitions/XmlDecoder" + - "$ref": "#/definitions/GzipJsonDecoder" $parameters: type: object additionalProperties: true diff --git a/airbyte_cdk/sources/declarative/decoders/__init__.py b/airbyte_cdk/sources/declarative/decoders/__init__.py index b67561e9..7452fe99 100644 --- a/airbyte_cdk/sources/declarative/decoders/__init__.py +++ b/airbyte_cdk/sources/declarative/decoders/__init__.py @@ -3,9 +3,9 @@ # from airbyte_cdk.sources.declarative.decoders.decoder import Decoder -from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder, IterableDecoder +from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder, IterableDecoder, GzipJsonDecoder from airbyte_cdk.sources.declarative.decoders.noop_decoder import NoopDecoder from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator import PaginationDecoderDecorator from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder -__all__ = ["Decoder", "JsonDecoder", "JsonlDecoder", "IterableDecoder", "NoopDecoder", "PaginationDecoderDecorator", "XmlDecoder"] +__all__ = ["Decoder", "JsonDecoder", "JsonlDecoder", "IterableDecoder", "GzipJsonDecoder", "NoopDecoder", "PaginationDecoderDecorator", "XmlDecoder"] diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 986bbd87..b327577c 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -4,6 +4,7 @@ import logging from dataclasses import InitVar, dataclass +from gzip import decompress from typing import Any, Generator, Mapping import requests @@ -30,18 +31,20 @@ def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], No """ try: body_json = response.json() - if not isinstance(body_json, list): - body_json = [body_json] - if len(body_json) == 0: - yield {} - else: - yield from body_json + yield from self.parse_body_json(body_json) except requests.exceptions.JSONDecodeError: - logger.warning( - f"Response cannot be parsed into json: {response.status_code=}, {response.text=}" - ) + logger.warning(f"Response cannot be parsed into json: {response.status_code=}, {response.text=}") yield {} + @staticmethod + def parse_body_json(body_json: Mapping[str, Any] | list) -> Generator[Mapping[str, Any], None, None]: + if not isinstance(body_json, list): + body_json = [body_json] + if len(body_json) == 0: + yield {} + else: + yield from body_json + @dataclass class IterableDecoder(Decoder): @@ -75,3 +78,12 @@ def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], No # https://github.com/airbytehq/airbyte-internal-issues/issues/8436 for record in response.iter_lines(): yield orjson.loads(record) + + +@dataclass +class GzipJsonDecoder(JsonDecoder): + encoding: str = "utf-8" + + def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: + raw_string = decompress(response.content).decode(encoding=self.encoding) + yield from self.parse_body_json(orjson.loads(raw_string)) diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 43848eae..3f69b8c4 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -11,314 +11,314 @@ class AuthFlowType(Enum): - oauth2_0 = "oauth2.0" - oauth1_0 = "oauth1.0" + oauth2_0 = 'oauth2.0' + oauth1_0 = 'oauth1.0' class BasicHttpAuthenticator(BaseModel): - type: Literal["BasicHttpAuthenticator"] + type: Literal['BasicHttpAuthenticator'] username: str = Field( ..., - description="The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.", + description='The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.', examples=["{{ config['username'] }}", "{{ config['api_key'] }}"], - title="Username", + title='Username', ) password: Optional[str] = Field( - "", - description="The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.", - examples=["{{ config['password'] }}", ""], - title="Password", + '', + description='The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.', + examples=["{{ config['password'] }}", ''], + title='Password', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class BearerAuthenticator(BaseModel): - type: Literal["BearerAuthenticator"] + type: Literal['BearerAuthenticator'] api_token: str = Field( ..., - description="Token to inject as request header for authenticating with the API.", + description='Token to inject as request header for authenticating with the API.', examples=["{{ config['api_key'] }}", "{{ config['token'] }}"], - title="Bearer Token", + title='Bearer Token', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CheckStream(BaseModel): - type: Literal["CheckStream"] + type: Literal['CheckStream'] stream_names: List[str] = Field( ..., - description="Names of the streams to try reading from when running a check operation.", - examples=[["users"], ["users", "contacts"]], - title="Stream Names", + description='Names of the streams to try reading from when running a check operation.', + examples=[['users'], ['users', 'contacts']], + title='Stream Names', ) class ConcurrencyLevel(BaseModel): - type: Optional[Literal["ConcurrencyLevel"]] = None + type: Optional[Literal['ConcurrencyLevel']] = None default_concurrency: Union[int, str] = Field( ..., - description="The amount of concurrency that will applied during a sync. This value can be hardcoded or user-defined in the config if different users have varying volume thresholds in the target API.", + description='The amount of concurrency that will applied during a sync. This value can be hardcoded or user-defined in the config if different users have varying volume thresholds in the target API.', examples=[10, "{{ config['num_workers'] or 10 }}"], - title="Default Concurrency", + title='Default Concurrency', ) max_concurrency: Optional[int] = Field( None, - description="The maximum level of concurrency that will be used during a sync. This becomes a required field when the default_concurrency derives from the config, because it serves as a safeguard against a user-defined threshold that is too high.", + description='The maximum level of concurrency that will be used during a sync. This becomes a required field when the default_concurrency derives from the config, because it serves as a safeguard against a user-defined threshold that is too high.', examples=[20, 100], - title="Max Concurrency", + title='Max Concurrency', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class ConstantBackoffStrategy(BaseModel): - type: Literal["ConstantBackoffStrategy"] + type: Literal['ConstantBackoffStrategy'] backoff_time_in_seconds: Union[float, str] = Field( ..., - description="Backoff time in seconds.", + description='Backoff time in seconds.', examples=[30, 30.5, "{{ config['backoff_time'] }}"], - title="Backoff Time", + title='Backoff Time', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CursorPagination(BaseModel): - type: Literal["CursorPagination"] + type: Literal['CursorPagination'] cursor_value: str = Field( ..., - description="Value of the cursor defining the next page to fetch.", + description='Value of the cursor defining the next page to fetch.', examples=[ - "{{ headers.link.next.cursor }}", + '{{ headers.link.next.cursor }}', "{{ last_record['key'] }}", "{{ response['nextPage'] }}", ], - title="Cursor Value", + title='Cursor Value', ) page_size: Optional[int] = Field( None, - description="The number of records to include in each pages.", + description='The number of records to include in each pages.', examples=[100], - title="Page Size", + title='Page Size', ) stop_condition: Optional[str] = Field( None, - description="Template string evaluating when to stop paginating.", + description='Template string evaluating when to stop paginating.', examples=[ - "{{ response.data.has_more is false }}", + '{{ response.data.has_more is false }}', "{{ 'next' not in headers['link'] }}", ], - title="Stop Condition", + title='Stop Condition', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomAuthenticator"] + type: Literal['CustomAuthenticator'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.", - examples=["source_railz.components.ShortLivedTokenAuthenticator"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.', + examples=['source_railz.components.ShortLivedTokenAuthenticator'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomBackoffStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomBackoffStrategy"] + type: Literal['CustomBackoffStrategy'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomBackoffStrategy"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomBackoffStrategy'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomErrorHandler(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomErrorHandler"] + type: Literal['CustomErrorHandler'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.", - examples=["source_railz.components.MyCustomErrorHandler"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.', + examples=['source_railz.components.MyCustomErrorHandler'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomIncrementalSync(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomIncrementalSync"] + type: Literal['CustomIncrementalSync'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.", - examples=["source_railz.components.MyCustomIncrementalSync"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.', + examples=['source_railz.components.MyCustomIncrementalSync'], + title='Class Name', ) cursor_field: str = Field( ..., - description="The location of the value on a record that will be used as a bookmark during sync.", + description='The location of the value on a record that will be used as a bookmark during sync.', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomPaginationStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomPaginationStrategy"] + type: Literal['CustomPaginationStrategy'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomPaginationStrategy"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomPaginationStrategy'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomRecordExtractor(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomRecordExtractor"] + type: Literal['CustomRecordExtractor'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomRecordExtractor"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomRecordExtractor'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomRecordFilter(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomRecordFilter"] + type: Literal['CustomRecordFilter'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom record filter strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomCustomRecordFilter"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom record filter strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomCustomRecordFilter'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomRequester(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomRequester"] + type: Literal['CustomRequester'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomRecordExtractor"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomRecordExtractor'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomRetriever(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomRetriever"] + type: Literal['CustomRetriever'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.", - examples=["source_railz.components.MyCustomRetriever"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.', + examples=['source_railz.components.MyCustomRetriever'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomPartitionRouter(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomPartitionRouter"] + type: Literal['CustomPartitionRouter'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.", - examples=["source_railz.components.MyCustomPartitionRouter"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.', + examples=['source_railz.components.MyCustomPartitionRouter'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomSchemaLoader(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomSchemaLoader"] + type: Literal['CustomSchemaLoader'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_..`.", - examples=["source_railz.components.MyCustomSchemaLoader"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_..`.', + examples=['source_railz.components.MyCustomSchemaLoader'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomStateMigration(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomStateMigration"] + type: Literal['CustomStateMigration'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom state migration. The format is `source_..`.", - examples=["source_railz.components.MyCustomStateMigration"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom state migration. The format is `source_..`.', + examples=['source_railz.components.MyCustomStateMigration'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class CustomTransformation(BaseModel): class Config: extra = Extra.allow - type: Literal["CustomTransformation"] + type: Literal['CustomTransformation'] class_name: str = Field( ..., - description="Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.", - examples=["source_railz.components.MyCustomTransformation"], - title="Class Name", + description='Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.', + examples=['source_railz.components.MyCustomTransformation'], + title='Class Name', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class LegacyToPerPartitionStateMigration(BaseModel): class Config: extra = Extra.allow - type: Optional[Literal["LegacyToPerPartitionStateMigration"]] = None + type: Optional[Literal['LegacyToPerPartitionStateMigration']] = None class Algorithm(Enum): - HS256 = "HS256" - HS384 = "HS384" - HS512 = "HS512" - ES256 = "ES256" - ES256K = "ES256K" - ES384 = "ES384" - ES512 = "ES512" - RS256 = "RS256" - RS384 = "RS384" - RS512 = "RS512" - PS256 = "PS256" - PS384 = "PS384" - PS512 = "PS512" - EdDSA = "EdDSA" + HS256 = 'HS256' + HS384 = 'HS384' + HS512 = 'HS512' + ES256 = 'ES256' + ES256K = 'ES256K' + ES384 = 'ES384' + ES512 = 'ES512' + RS256 = 'RS256' + RS384 = 'RS384' + RS512 = 'RS512' + PS256 = 'PS256' + PS384 = 'PS384' + PS512 = 'PS512' + EdDSA = 'EdDSA' class JwtHeaders(BaseModel): @@ -327,21 +327,21 @@ class Config: kid: Optional[str] = Field( None, - description="Private key ID for user account.", + description='Private key ID for user account.', examples=["{{ config['kid'] }}"], - title="Key Identifier", + title='Key Identifier', ) typ: Optional[str] = Field( - "JWT", - description="The media type of the complete JWT.", - examples=["JWT"], - title="Type", + 'JWT', + description='The media type of the complete JWT.', + examples=['JWT'], + title='Type', ) cty: Optional[str] = Field( None, - description="Content type of JWT header.", - examples=["JWT"], - title="Content Type", + description='Content type of JWT header.', + examples=['JWT'], + title='Content Type', ) @@ -351,28 +351,28 @@ class Config: iss: Optional[str] = Field( None, - description="The user/principal that issued the JWT. Commonly a value unique to the user.", + description='The user/principal that issued the JWT. Commonly a value unique to the user.', examples=["{{ config['iss'] }}"], - title="Issuer", + title='Issuer', ) sub: Optional[str] = Field( None, - description="The subject of the JWT. Commonly defined by the API.", - title="Subject", + description='The subject of the JWT. Commonly defined by the API.', + title='Subject', ) aud: Optional[str] = Field( None, - description="The recipient that the JWT is intended for. Commonly defined by the API.", - examples=["appstoreconnect-v1"], - title="Audience", + description='The recipient that the JWT is intended for. Commonly defined by the API.', + examples=['appstoreconnect-v1'], + title='Audience', ) class JwtAuthenticator(BaseModel): - type: Literal["JwtAuthenticator"] + type: Literal['JwtAuthenticator'] secret_key: str = Field( ..., - description="Secret used to sign the JSON web token.", + description='Secret used to sign the JSON web token.', examples=["{{ config['secret_key'] }}"], ) base64_encode_secret_key: Optional[bool] = Field( @@ -381,515 +381,544 @@ class JwtAuthenticator(BaseModel): ) algorithm: Algorithm = Field( ..., - description="Algorithm used to sign the JSON web token.", - examples=["ES256", "HS256", "RS256", "{{ config['algorithm'] }}"], + description='Algorithm used to sign the JSON web token.', + examples=['ES256', 'HS256', 'RS256', "{{ config['algorithm'] }}"], ) token_duration: Optional[int] = Field( 1200, - description="The amount of time in seconds a JWT token can be valid after being issued.", + description='The amount of time in seconds a JWT token can be valid after being issued.', examples=[1200, 3600], - title="Token Duration", + title='Token Duration', ) header_prefix: Optional[str] = Field( None, - description="The prefix to be used within the Authentication header.", - examples=["Bearer", "Basic"], - title="Header Prefix", + description='The prefix to be used within the Authentication header.', + examples=['Bearer', 'Basic'], + title='Header Prefix', ) jwt_headers: Optional[JwtHeaders] = Field( None, - description="JWT headers used when signing JSON web token.", - title="JWT Headers", + description='JWT headers used when signing JSON web token.', + title='JWT Headers', ) additional_jwt_headers: Optional[Dict[str, Any]] = Field( None, - description="Additional headers to be included with the JWT headers object.", - title="Additional JWT Headers", + description='Additional headers to be included with the JWT headers object.', + title='Additional JWT Headers', ) jwt_payload: Optional[JwtPayload] = Field( None, - description="JWT Payload used when signing JSON web token.", - title="JWT Payload", + description='JWT Payload used when signing JSON web token.', + title='JWT Payload', ) additional_jwt_payload: Optional[Dict[str, Any]] = Field( None, - description="Additional properties to be added to the JWT payload.", - title="Additional JWT Payload Properties", + description='Additional properties to be added to the JWT payload.', + title='Additional JWT Payload Properties', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class RefreshTokenUpdater(BaseModel): refresh_token_name: Optional[str] = Field( - "refresh_token", - description="The name of the property which contains the updated refresh token in the response from the token refresh endpoint.", - examples=["refresh_token"], - title="Refresh Token Property Name", + 'refresh_token', + description='The name of the property which contains the updated refresh token in the response from the token refresh endpoint.', + examples=['refresh_token'], + title='Refresh Token Property Name', ) access_token_config_path: Optional[List[str]] = Field( - ["credentials", "access_token"], - description="Config path to the access token. Make sure the field actually exists in the config.", - examples=[["credentials", "access_token"], ["access_token"]], - title="Config Path To Access Token", + ['credentials', 'access_token'], + description='Config path to the access token. Make sure the field actually exists in the config.', + examples=[['credentials', 'access_token'], ['access_token']], + title='Config Path To Access Token', ) refresh_token_config_path: Optional[List[str]] = Field( - ["credentials", "refresh_token"], - description="Config path to the access token. Make sure the field actually exists in the config.", - examples=[["credentials", "refresh_token"], ["refresh_token"]], - title="Config Path To Refresh Token", + ['credentials', 'refresh_token'], + description='Config path to the access token. Make sure the field actually exists in the config.', + examples=[['credentials', 'refresh_token'], ['refresh_token']], + title='Config Path To Refresh Token', ) token_expiry_date_config_path: Optional[List[str]] = Field( - ["credentials", "token_expiry_date"], - description="Config path to the expiry date. Make sure actually exists in the config.", - examples=[["credentials", "token_expiry_date"]], - title="Config Path To Expiry Date", + ['credentials', 'token_expiry_date'], + description='Config path to the expiry date. Make sure actually exists in the config.', + examples=[['credentials', 'token_expiry_date']], + title='Config Path To Expiry Date', ) refresh_token_error_status_codes: Optional[List[int]] = Field( [], - description="Status Codes to Identify refresh token error in response (Refresh Token Error Key and Refresh Token Error Values should be also specified). Responses with one of the error status code and containing an error value will be flagged as a config error", + description='Status Codes to Identify refresh token error in response (Refresh Token Error Key and Refresh Token Error Values should be also specified). Responses with one of the error status code and containing an error value will be flagged as a config error', examples=[[400, 500]], - title="Refresh Token Error Status Codes", + title='Refresh Token Error Status Codes', ) refresh_token_error_key: Optional[str] = Field( - "", - description="Key to Identify refresh token error in response (Refresh Token Error Status Codes and Refresh Token Error Values should be also specified).", - examples=["error"], - title="Refresh Token Error Key", + '', + description='Key to Identify refresh token error in response (Refresh Token Error Status Codes and Refresh Token Error Values should be also specified).', + examples=['error'], + title='Refresh Token Error Key', ) refresh_token_error_values: Optional[List[str]] = Field( [], description='List of values to check for exception during token refresh process. Used to check if the error found in the response matches the key from the Refresh Token Error Key field (e.g. response={"error": "invalid_grant"}). Only responses with one of the error status code and containing an error value will be flagged as a config error', - examples=[["invalid_grant", "invalid_permissions"]], - title="Refresh Token Error Values", + examples=[['invalid_grant', 'invalid_permissions']], + title='Refresh Token Error Values', ) class OAuthAuthenticator(BaseModel): - type: Literal["OAuthAuthenticator"] + type: Literal['OAuthAuthenticator'] client_id: str = Field( ..., - description="The OAuth client ID. Fill it in the user inputs.", + description='The OAuth client ID. Fill it in the user inputs.', examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"], - title="Client ID", + title='Client ID', ) client_secret: str = Field( ..., - description="The OAuth client secret. Fill it in the user inputs.", + description='The OAuth client secret. Fill it in the user inputs.', examples=[ "{{ config['client_secret }}", "{{ config['credentials']['client_secret }}", ], - title="Client Secret", + title='Client Secret', ) refresh_token: Optional[str] = Field( None, - description="Credential artifact used to get a new access token.", + description='Credential artifact used to get a new access token.', examples=[ "{{ config['refresh_token'] }}", "{{ config['credentials]['refresh_token'] }}", ], - title="Refresh Token", + title='Refresh Token', ) token_refresh_endpoint: str = Field( ..., - description="The full URL to call to obtain a new access token.", - examples=["https://connect.squareup.com/oauth2/token"], - title="Token Refresh Endpoint", + description='The full URL to call to obtain a new access token.', + examples=['https://connect.squareup.com/oauth2/token'], + title='Token Refresh Endpoint', ) access_token_name: Optional[str] = Field( - "access_token", - description="The name of the property which contains the access token in the response from the token refresh endpoint.", - examples=["access_token"], - title="Access Token Property Name", + 'access_token', + description='The name of the property which contains the access token in the response from the token refresh endpoint.', + examples=['access_token'], + title='Access Token Property Name', ) expires_in_name: Optional[str] = Field( - "expires_in", - description="The name of the property which contains the expiry date in the response from the token refresh endpoint.", - examples=["expires_in"], - title="Token Expiry Property Name", + 'expires_in', + description='The name of the property which contains the expiry date in the response from the token refresh endpoint.', + examples=['expires_in'], + title='Token Expiry Property Name', ) grant_type: Optional[str] = Field( - "refresh_token", - description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.", - examples=["refresh_token", "client_credentials"], - title="Grant Type", + 'refresh_token', + description='Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.', + examples=['refresh_token', 'client_credentials'], + title='Grant Type', ) refresh_request_body: Optional[Dict[str, Any]] = Field( None, - description="Body of the request sent to get a new access token.", + description='Body of the request sent to get a new access token.', examples=[ { - "applicationId": "{{ config['application_id'] }}", - "applicationSecret": "{{ config['application_secret'] }}", - "token": "{{ config['token'] }}", + 'applicationId': "{{ config['application_id'] }}", + 'applicationSecret': "{{ config['application_secret'] }}", + 'token': "{{ config['token'] }}", } ], - title="Refresh Request Body", + title='Refresh Request Body', ) scopes: Optional[List[str]] = Field( None, - description="List of scopes that should be granted to the access token.", - examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], - title="Scopes", + description='List of scopes that should be granted to the access token.', + examples=[ + ['crm.list.read', 'crm.objects.contacts.read', 'crm.schema.contacts.read'] + ], + title='Scopes', ) token_expiry_date: Optional[str] = Field( None, - description="The access token expiry date.", - examples=["2023-04-06T07:12:10.421833+00:00", 1680842386], - title="Token Expiry Date", + description='The access token expiry date.', + examples=['2023-04-06T07:12:10.421833+00:00', 1680842386], + title='Token Expiry Date', ) token_expiry_date_format: Optional[str] = Field( None, - description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.", - examples=["%Y-%m-%d %H:%M:%S.%f+00:00"], - title="Token Expiry Date Format", + description='The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.', + examples=['%Y-%m-%d %H:%M:%S.%f+00:00'], + title='Token Expiry Date Format', ) refresh_token_updater: Optional[RefreshTokenUpdater] = Field( None, - description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.", - title="Token Updater", + description='When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.', + title='Token Updater', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DpathExtractor(BaseModel): - type: Literal["DpathExtractor"] + type: Literal['DpathExtractor'] field_path: List[str] = Field( ..., description='List of potentially nested fields describing the full path of the field to extract. Use "*" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).', examples=[ - ["data"], - ["data", "records"], - ["data", "{{ parameters.name }}"], - ["data", "*", "record"], + ['data'], + ['data', 'records'], + ['data', '{{ parameters.name }}'], + ['data', '*', 'record'], ], - title="Field Path", + title='Field Path', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class ExponentialBackoffStrategy(BaseModel): - type: Literal["ExponentialBackoffStrategy"] + type: Literal['ExponentialBackoffStrategy'] factor: Optional[Union[float, str]] = Field( 5, - description="Multiplicative constant applied on each retry.", - examples=[5, 5.5, "10"], - title="Factor", + description='Multiplicative constant applied on each retry.', + examples=[5, 5.5, '10'], + title='Factor', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SessionTokenRequestBearerAuthenticator(BaseModel): - type: Literal["Bearer"] + type: Literal['Bearer'] class HttpMethod(Enum): - GET = "GET" - POST = "POST" + GET = 'GET' + POST = 'POST' class Action(Enum): - SUCCESS = "SUCCESS" - FAIL = "FAIL" - RETRY = "RETRY" - IGNORE = "IGNORE" - RATE_LIMITED = "RATE_LIMITED" + SUCCESS = 'SUCCESS' + FAIL = 'FAIL' + RETRY = 'RETRY' + IGNORE = 'IGNORE' + RATE_LIMITED = 'RATE_LIMITED' class FailureType(Enum): - system_error = "system_error" - config_error = "config_error" - transient_error = "transient_error" + system_error = 'system_error' + config_error = 'config_error' + transient_error = 'transient_error' class HttpResponseFilter(BaseModel): - type: Literal["HttpResponseFilter"] + type: Literal['HttpResponseFilter'] action: Optional[Action] = Field( None, - description="Action to execute if a response matches the filter.", - examples=["SUCCESS", "FAIL", "RETRY", "IGNORE", "RATE_LIMITED"], - title="Action", + description='Action to execute if a response matches the filter.', + examples=['SUCCESS', 'FAIL', 'RETRY', 'IGNORE', 'RATE_LIMITED'], + title='Action', ) failure_type: Optional[FailureType] = Field( None, - description="Failure type of traced exception if a response matches the filter.", - examples=["system_error", "config_error", "transient_error"], - title="Failure Type", + description='Failure type of traced exception if a response matches the filter.', + examples=['system_error', 'config_error', 'transient_error'], + title='Failure Type', ) error_message: Optional[str] = Field( None, - description="Error Message to display if the response matches the filter.", - title="Error Message", + description='Error Message to display if the response matches the filter.', + title='Error Message', ) error_message_contains: Optional[str] = Field( None, - description="Match the response if its error message contains the substring.", - example=["This API operation is not enabled for this site"], - title="Error Message Substring", + description='Match the response if its error message contains the substring.', + example=['This API operation is not enabled for this site'], + title='Error Message Substring', ) http_codes: Optional[List[int]] = Field( None, - description="Match the response if its HTTP code is included in this list.", + description='Match the response if its HTTP code is included in this list.', examples=[[420, 429], [500]], - title="HTTP Codes", + title='HTTP Codes', ) predicate: Optional[str] = Field( None, - description="Match the response if the predicate evaluates to true.", + description='Match the response if the predicate evaluates to true.', examples=[ "{{ 'Too much requests' in response }}", "{{ 'error_code' in response and response['error_code'] == 'ComplexityException' }}", ], - title="Predicate", + title='Predicate', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class InlineSchemaLoader(BaseModel): - type: Literal["InlineSchemaLoader"] + type: Literal['InlineSchemaLoader'] schema_: Optional[Dict[str, Any]] = Field( None, - alias="schema", + alias='schema', description='Describes a streams\' schema. Refer to the Data Types documentation for more details on which types are valid.', - title="Schema", + title='Schema', ) class JsonFileSchemaLoader(BaseModel): - type: Literal["JsonFileSchemaLoader"] + type: Literal['JsonFileSchemaLoader'] file_path: Optional[str] = Field( None, description="Path to the JSON file defining the schema. The path is relative to the connector module's root.", - example=["./schemas/users.json"], - title="File Path", + example=['./schemas/users.json'], + title='File Path', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class JsonDecoder(BaseModel): - type: Literal["JsonDecoder"] + type: Literal['JsonDecoder'] class JsonlDecoder(BaseModel): - type: Literal["JsonlDecoder"] + type: Literal['JsonlDecoder'] class KeysToLower(BaseModel): - type: Literal["KeysToLower"] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + type: Literal['KeysToLower'] + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class IterableDecoder(BaseModel): - type: Literal["IterableDecoder"] + type: Literal['IterableDecoder'] class XmlDecoder(BaseModel): - type: Literal["XmlDecoder"] + type: Literal['XmlDecoder'] + + +class CustomDecoder(BaseModel): + class Config: + extra = Extra.allow + + type: Literal['CustomDecoder'] + class_name: str = Field( + ..., + description='Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Decoder. The format is `source_..`.', + examples=['source_amazon_ads.components.GzipJsonlDecoder'], + title='Class Name', + ) + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + + +class GzipJsonDecoder(BaseModel): + class Config: + extra = Extra.allow + + type: Literal['GzipJsonDecoder'] + encoding: Optional[str] = 'utf-8' + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class MinMaxDatetime(BaseModel): - type: Literal["MinMaxDatetime"] + type: Literal['MinMaxDatetime'] datetime: str = Field( ..., - description="Datetime value.", - examples=["2021-01-01", "2021-01-01T00:00:00Z", "{{ config['start_time'] }}"], - title="Datetime", + description='Datetime value.', + examples=['2021-01-01', '2021-01-01T00:00:00Z', "{{ config['start_time'] }}"], + title='Datetime', ) datetime_format: Optional[str] = Field( - "", + '', description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], - title="Datetime Format", + examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s'], + title='Datetime Format', ) max_datetime: Optional[str] = Field( None, - description="Ceiling applied on the datetime value. Must be formatted with the datetime_format field.", - examples=["2021-01-01T00:00:00Z", "2021-01-01"], - title="Max Datetime", + description='Ceiling applied on the datetime value. Must be formatted with the datetime_format field.', + examples=['2021-01-01T00:00:00Z', '2021-01-01'], + title='Max Datetime', ) min_datetime: Optional[str] = Field( None, - description="Floor applied on the datetime value. Must be formatted with the datetime_format field.", - examples=["2010-01-01T00:00:00Z", "2010-01-01"], - title="Min Datetime", + description='Floor applied on the datetime value. Must be formatted with the datetime_format field.', + examples=['2010-01-01T00:00:00Z', '2010-01-01'], + title='Min Datetime', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class NoAuth(BaseModel): - type: Literal["NoAuth"] - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + type: Literal['NoAuth'] + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class NoPagination(BaseModel): - type: Literal["NoPagination"] + type: Literal['NoPagination'] class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( - None, - description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", - examples=[ - {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, - { - "app_id": { - "type": "string", - "path_in_connector_config": ["info", "app_id"], - } - }, - ], - title="OAuth user input", + oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = ( + Field( + None, + description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", + examples=[ + {'app_id': {'type': 'string', 'path_in_connector_config': ['app_id']}}, + { + 'app_id': { + 'type': 'string', + 'path_in_connector_config': ['info', 'app_id'], + } + }, + ], + title='OAuth user input', + ) ) complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }", examples=[ { - "refresh_token": { - "type": "string,", - "path_in_connector_config": ["credentials", "refresh_token"], + 'refresh_token': { + 'type': 'string,', + 'path_in_connector_config': ['credentials', 'refresh_token'], } } ], - title="OAuth output specification", + title='OAuth output specification', ) complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( None, - description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", - examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], - title="OAuth input specification", + description='OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }', + examples=[ + {'client_id': {'type': 'string'}, 'client_secret': {'type': 'string'}} + ], + title='OAuth input specification', ) complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", examples=[ { - "client_id": { - "type": "string,", - "path_in_connector_config": ["credentials", "client_id"], + 'client_id': { + 'type': 'string,', + 'path_in_connector_config': ['credentials', 'client_id'], }, - "client_secret": { - "type": "string,", - "path_in_connector_config": ["credentials", "client_secret"], + 'client_secret': { + 'type': 'string,', + 'path_in_connector_config': ['credentials', 'client_secret'], }, } ], - title="OAuth server output specification", + title='OAuth server output specification', ) class OffsetIncrement(BaseModel): - type: Literal["OffsetIncrement"] + type: Literal['OffsetIncrement'] page_size: Optional[Union[int, str]] = Field( None, - description="The number of records to include in each pages.", + description='The number of records to include in each pages.', examples=[100, "{{ config['page_size'] }}"], - title="Limit", + title='Limit', ) inject_on_first_request: Optional[bool] = Field( False, - description="Using the `offset` with value `0` during the first request", - title="Inject Offset", + description='Using the `offset` with value `0` during the first request', + title='Inject Offset', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class PageIncrement(BaseModel): - type: Literal["PageIncrement"] + type: Literal['PageIncrement'] page_size: Optional[Union[int, str]] = Field( None, - description="The number of records to include in each pages.", - examples=[100, "100", "{{ config['page_size'] }}"], - title="Page Size", + description='The number of records to include in each pages.', + examples=[100, '100', "{{ config['page_size'] }}"], + title='Page Size', ) start_from_page: Optional[int] = Field( 0, - description="Index of the first page to request.", + description='Index of the first page to request.', examples=[0, 1], - title="Start From Page", + title='Start From Page', ) inject_on_first_request: Optional[bool] = Field( False, - description="Using the `page number` with value defined by `start_from_page` during the first request", - title="Inject Page Number", + description='Using the `page number` with value defined by `start_from_page` during the first request', + title='Inject Page Number', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class PrimaryKey(BaseModel): __root__: Union[str, List[str], List[List[str]]] = Field( ..., - description="The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.", - examples=["id", ["code", "type"]], - title="Primary Key", + description='The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.', + examples=['id', ['code', 'type']], + title='Primary Key', ) class RecordFilter(BaseModel): - type: Literal["RecordFilter"] + type: Literal['RecordFilter'] condition: Optional[str] = Field( - "", - description="The predicate to filter a record. Records will be removed if evaluated to False.", + '', + description='The predicate to filter a record. Records will be removed if evaluated to False.', examples=[ "{{ record['created_at'] >= stream_interval['start_time'] }}", "{{ record.status in ['active', 'expired'] }}", ], ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SchemaNormalization(Enum): - None_ = "None" - Default = "Default" + None_ = 'None' + Default = 'Default' class RemoveFields(BaseModel): - type: Literal["RemoveFields"] + type: Literal['RemoveFields'] condition: Optional[str] = Field( - "", - description="The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.,", + '', + description='The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.,', examples=[ "{{ property|string == '' }}", - "{{ property is integer }}", - "{{ property|length > 5 }}", + '{{ property is integer }}', + '{{ property|length > 5 }}', "{{ property == 'some_string_to_match' }}", ], ) field_pointers: List[List[str]] = Field( ..., - description="Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.", - examples=[["tags"], [["content", "html"], ["content", "plain_text"]]], - title="Field Paths", + description='Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.', + examples=[['tags'], [['content', 'html'], ['content', 'plain_text']]], + title='Field Paths', ) class RequestPath(BaseModel): - type: Literal["RequestPath"] + type: Literal['RequestPath'] class InjectInto(Enum): - request_parameter = "request_parameter" - header = "header" - body_data = "body_data" - body_json = "body_json" + request_parameter = 'request_parameter' + header = 'header' + body_data = 'body_data' + body_json = 'body_json' class RequestOption(BaseModel): - type: Literal["RequestOption"] + type: Literal['RequestOption'] field_name: str = Field( ..., - description="Configures which key should be used in the location that the descriptor is being injected into", - examples=["segment_id"], - title="Request Option", + description='Configures which key should be used in the location that the descriptor is being injected into', + examples=['segment_id'], + title='Request Option', ) inject_into: InjectInto = Field( ..., - description="Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.", - examples=["request_parameter", "header", "body_data", "body_json"], - title="Inject Into", + description='Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.', + examples=['request_parameter', 'header', 'body_data', 'body_json'], + title='Inject Into', ) @@ -901,54 +930,54 @@ class Config: class LegacySessionTokenAuthenticator(BaseModel): - type: Literal["LegacySessionTokenAuthenticator"] + type: Literal['LegacySessionTokenAuthenticator'] header: str = Field( ..., - description="The name of the session token header that will be injected in the request", - examples=["X-Session"], - title="Session Request Header", + description='The name of the session token header that will be injected in the request', + examples=['X-Session'], + title='Session Request Header', ) login_url: str = Field( ..., - description="Path of the login URL (do not include the base URL)", - examples=["session"], - title="Login Path", + description='Path of the login URL (do not include the base URL)', + examples=['session'], + title='Login Path', ) session_token: Optional[str] = Field( None, - description="Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair", + description='Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair', example=["{{ config['session_token'] }}"], - title="Session Token", + title='Session Token', ) session_token_response_key: str = Field( ..., - description="Name of the key of the session token to be extracted from the response", - examples=["id"], - title="Response Token Response Key", + description='Name of the key of the session token to be extracted from the response', + examples=['id'], + title='Response Token Response Key', ) username: Optional[str] = Field( None, - description="Username used to authenticate and obtain a session token", + description='Username used to authenticate and obtain a session token', examples=[" {{ config['username'] }}"], - title="Username", + title='Username', ) password: Optional[str] = Field( - "", - description="Password used to authenticate and obtain a session token", - examples=["{{ config['password'] }}", ""], - title="Password", + '', + description='Password used to authenticate and obtain a session token', + examples=["{{ config['password'] }}", ''], + title='Password', ) validate_session_url: str = Field( ..., - description="Path of the URL to use to validate that the session token is valid (do not include the base URL)", - examples=["user/current"], - title="Validate Session Path", + description='Path of the URL to use to validate that the session token is valid (do not include the base URL)', + examples=['user/current'], + title='Validate Session Path', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class AsyncJobStatusMap(BaseModel): - type: Optional[Literal["AsyncJobStatusMap"]] = None + type: Optional[Literal['AsyncJobStatusMap']] = None running: List[str] completed: List[str] failed: List[str] @@ -956,65 +985,65 @@ class AsyncJobStatusMap(BaseModel): class ValueType(Enum): - string = "string" - number = "number" - integer = "integer" - boolean = "boolean" + string = 'string' + number = 'number' + integer = 'integer' + boolean = 'boolean' class WaitTimeFromHeader(BaseModel): - type: Literal["WaitTimeFromHeader"] + type: Literal['WaitTimeFromHeader'] header: str = Field( ..., - description="The name of the response header defining how long to wait before retrying.", - examples=["Retry-After"], - title="Response Header Name", + description='The name of the response header defining how long to wait before retrying.', + examples=['Retry-After'], + title='Response Header Name', ) regex: Optional[str] = Field( None, - description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", - examples=["([-+]?\\d+)"], - title="Extraction Regex", + description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', + examples=['([-+]?\\d+)'], + title='Extraction Regex', ) max_waiting_time_in_seconds: Optional[float] = Field( None, - description="Given the value extracted from the header is greater than this value, stop the stream.", + description='Given the value extracted from the header is greater than this value, stop the stream.', examples=[3600], - title="Max Waiting Time in Seconds", + title='Max Waiting Time in Seconds', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class WaitUntilTimeFromHeader(BaseModel): - type: Literal["WaitUntilTimeFromHeader"] + type: Literal['WaitUntilTimeFromHeader'] header: str = Field( ..., - description="The name of the response header defining how long to wait before retrying.", - examples=["wait_time"], - title="Response Header", + description='The name of the response header defining how long to wait before retrying.', + examples=['wait_time'], + title='Response Header', ) min_wait: Optional[Union[float, str]] = Field( None, - description="Minimum time to wait before retrying.", - examples=[10, "60"], - title="Minimum Wait Time", + description='Minimum time to wait before retrying.', + examples=[10, '60'], + title='Minimum Wait Time', ) regex: Optional[str] = Field( None, - description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", - examples=["([-+]?\\d+)"], - title="Extraction Regex", + description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', + examples=['([-+]?\\d+)'], + title='Extraction Regex', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class AddedFieldDefinition(BaseModel): - type: Literal["AddedFieldDefinition"] + type: Literal['AddedFieldDefinition'] path: List[str] = Field( ..., - description="List of strings defining the path where to add the value on the record.", - examples=[["segment_id"], ["metadata", "segment_id"]], - title="Path", + description='List of strings defining the path where to add the value on the record.', + examples=[['segment_id'], ['metadata', 'segment_id']], + title='Path', ) value: str = Field( ..., @@ -1024,167 +1053,167 @@ class AddedFieldDefinition(BaseModel): "{{ record['MetaData']['LastUpdatedTime'] }}", "{{ stream_partition['segment_id'] }}", ], - title="Value", + title='Value', ) value_type: Optional[ValueType] = Field( None, - description="Type of the value. If not specified, the type will be inferred from the value.", - title="Value Type", + description='Type of the value. If not specified, the type will be inferred from the value.', + title='Value Type', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class AddFields(BaseModel): - type: Literal["AddFields"] + type: Literal['AddFields'] fields: List[AddedFieldDefinition] = Field( ..., - description="List of transformations (path and corresponding value) that will be added to the record.", - title="Fields", + description='List of transformations (path and corresponding value) that will be added to the record.', + title='Fields', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class ApiKeyAuthenticator(BaseModel): - type: Literal["ApiKeyAuthenticator"] + type: Literal['ApiKeyAuthenticator'] api_token: Optional[str] = Field( None, - description="The API key to inject in the request. Fill it in the user inputs.", + description='The API key to inject in the request. Fill it in the user inputs.', examples=["{{ config['api_key'] }}", "Token token={{ config['api_key'] }}"], - title="API Key", + title='API Key', ) header: Optional[str] = Field( None, - description="The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.", - examples=["Authorization", "Api-Token", "X-Auth-Token"], - title="Header Name", + description='The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.', + examples=['Authorization', 'Api-Token', 'X-Auth-Token'], + title='Header Name', ) inject_into: Optional[RequestOption] = Field( None, - description="Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.", + description='Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.', examples=[ - {"inject_into": "header", "field_name": "Authorization"}, - {"inject_into": "request_parameter", "field_name": "authKey"}, + {'inject_into': 'header', 'field_name': 'Authorization'}, + {'inject_into': 'request_parameter', 'field_name': 'authKey'}, ], - title="Inject API Key Into Outgoing HTTP Request", + title='Inject API Key Into Outgoing HTTP Request', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class AuthFlow(BaseModel): auth_flow_type: Optional[AuthFlowType] = Field( - None, description="The type of auth to use", title="Auth flow type" + None, description='The type of auth to use', title='Auth flow type' ) predicate_key: Optional[List[str]] = Field( None, - description="JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.", - examples=[["credentials", "auth_type"]], - title="Predicate key", + description='JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.', + examples=[['credentials', 'auth_type']], + title='Predicate key', ) predicate_value: Optional[str] = Field( None, - description="Value of the predicate_key fields for the advanced auth to be applicable.", - examples=["Oauth"], - title="Predicate value", + description='Value of the predicate_key fields for the advanced auth to be applicable.', + examples=['Oauth'], + title='Predicate value', ) oauth_config_specification: Optional[OAuthConfigSpecification] = None class DatetimeBasedCursor(BaseModel): - type: Literal["DatetimeBasedCursor"] + type: Literal['DatetimeBasedCursor'] cursor_field: str = Field( ..., - description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.", - examples=["created_at", "{{ config['record_cursor'] }}"], - title="Cursor Field", + description='The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.', + examples=['created_at', "{{ config['record_cursor'] }}"], + title='Cursor Field', ) datetime_format: str = Field( ..., - description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", - examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"], - title="Outgoing Datetime Format", + description='The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', + examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s', '%ms', '%s_as_float'], + title='Outgoing Datetime Format', ) start_datetime: Union[str, MinMaxDatetime] = Field( ..., - description="The datetime that determines the earliest record that should be synced.", - examples=["2020-01-1T00:00:00Z", "{{ config['start_time'] }}"], - title="Start Datetime", + description='The datetime that determines the earliest record that should be synced.', + examples=['2020-01-1T00:00:00Z', "{{ config['start_time'] }}"], + title='Start Datetime', ) cursor_datetime_formats: Optional[List[str]] = Field( None, - description="The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.", - title="Cursor Datetime Formats", + description='The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.', + title='Cursor Datetime Formats', ) cursor_granularity: Optional[str] = Field( None, - description="Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.", - examples=["PT1S"], - title="Cursor Granularity", + description='Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.', + examples=['PT1S'], + title='Cursor Granularity', ) end_datetime: Optional[Union[str, MinMaxDatetime]] = Field( None, - description="The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.", - examples=["2021-01-1T00:00:00Z", "{{ now_utc() }}", "{{ day_delta(-1) }}"], - title="End Datetime", + description='The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.', + examples=['2021-01-1T00:00:00Z', '{{ now_utc() }}', '{{ day_delta(-1) }}'], + title='End Datetime', ) end_time_option: Optional[RequestOption] = Field( None, - description="Optionally configures how the end datetime will be sent in requests to the source API.", - title="Inject End Time Into Outgoing HTTP Request", + description='Optionally configures how the end datetime will be sent in requests to the source API.', + title='Inject End Time Into Outgoing HTTP Request', ) is_data_feed: Optional[bool] = Field( None, - description="A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.", - title="Whether the target API is formatted as a data feed", + description='A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.', + title='Whether the target API is formatted as a data feed', ) is_client_side_incremental: Optional[bool] = Field( None, - description="If the target API endpoint does not take cursor values to filter records and returns all records anyway, the connector with this cursor will filter out records locally, and only emit new records from the last sync, hence incremental. This means that all records would be read from the API, but only new records will be emitted to the destination.", - title="Whether the target API does not support filtering and returns all data (the cursor filters records in the client instead of the API side)", + description='If the target API endpoint does not take cursor values to filter records and returns all records anyway, the connector with this cursor will filter out records locally, and only emit new records from the last sync, hence incremental. This means that all records would be read from the API, but only new records will be emitted to the destination.', + title='Whether the target API does not support filtering and returns all data (the cursor filters records in the client instead of the API side)', ) is_compare_strictly: Optional[bool] = Field( False, - description="Set to True if the target API does not accept queries where the start time equal the end time.", - title="Whether to skip requests if the start time equals the end time", + description='Set to True if the target API does not accept queries where the start time equal the end time.', + title='Whether to skip requests if the start time equals the end time', ) global_substream_cursor: Optional[bool] = Field( False, - description="This setting optimizes performance when the parent stream has thousands of partitions by storing the cursor as a single value rather than per partition. Notably, the substream state is updated only at the end of the sync, which helps prevent data loss in case of a sync failure. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/incremental-syncs).", - title="Whether to store cursor as one value instead of per partition", + description='This setting optimizes performance when the parent stream has thousands of partitions by storing the cursor as a single value rather than per partition. Notably, the substream state is updated only at the end of the sync, which helps prevent data loss in case of a sync failure. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/incremental-syncs).', + title='Whether to store cursor as one value instead of per partition', ) lookback_window: Optional[str] = Field( None, - description="Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.", - examples=["P1D", "P{{ config['lookback_days'] }}D"], - title="Lookback Window", + description='Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.', + examples=['P1D', "P{{ config['lookback_days'] }}D"], + title='Lookback Window', ) partition_field_end: Optional[str] = Field( None, - description="Name of the partition start time field.", - examples=["ending_time"], - title="Partition Field End", + description='Name of the partition start time field.', + examples=['ending_time'], + title='Partition Field End', ) partition_field_start: Optional[str] = Field( None, - description="Name of the partition end time field.", - examples=["starting_time"], - title="Partition Field Start", + description='Name of the partition end time field.', + examples=['starting_time'], + title='Partition Field Start', ) start_time_option: Optional[RequestOption] = Field( None, - description="Optionally configures how the start datetime will be sent in requests to the source API.", - title="Inject Start Time Into Outgoing HTTP Request", + description='Optionally configures how the start datetime will be sent in requests to the source API.', + title='Inject Start Time Into Outgoing HTTP Request', ) step: Optional[str] = Field( None, - description="The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.", - examples=["P1W", "{{ config['step_increment'] }}"], - title="Step", + description='The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.', + examples=['P1W', "{{ config['step_increment'] }}"], + title='Step', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DefaultErrorHandler(BaseModel): - type: Literal["DefaultErrorHandler"] + type: Literal['DefaultErrorHandler'] backoff_strategies: Optional[ List[ Union[ @@ -1197,124 +1226,124 @@ class DefaultErrorHandler(BaseModel): ] ] = Field( None, - description="List of backoff strategies to use to determine how long to wait before retrying a retryable request.", - title="Backoff Strategies", + description='List of backoff strategies to use to determine how long to wait before retrying a retryable request.', + title='Backoff Strategies', ) max_retries: Optional[int] = Field( 5, - description="The maximum number of time to retry a retryable request before giving up and failing.", + description='The maximum number of time to retry a retryable request before giving up and failing.', examples=[5, 0, 10], - title="Max Retry Count", + title='Max Retry Count', ) response_filters: Optional[List[HttpResponseFilter]] = Field( None, description="List of response filters to iterate on when deciding how to handle an error. When using an array of multiple filters, the filters will be applied sequentially and the response will be selected if it matches any of the filter's predicate.", - title="Response Filters", + title='Response Filters', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DefaultPaginator(BaseModel): - type: Literal["DefaultPaginator"] + type: Literal['DefaultPaginator'] pagination_strategy: Union[ CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement ] = Field( ..., - description="Strategy defining how records are paginated.", - title="Pagination Strategy", + description='Strategy defining how records are paginated.', + title='Pagination Strategy', ) page_size_option: Optional[RequestOption] = None page_token_option: Optional[Union[RequestOption, RequestPath]] = None - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SessionTokenRequestApiKeyAuthenticator(BaseModel): - type: Literal["ApiKey"] + type: Literal['ApiKey'] inject_into: RequestOption = Field( ..., - description="Configure how the API Key will be sent in requests to the source API.", + description='Configure how the API Key will be sent in requests to the source API.', examples=[ - {"inject_into": "header", "field_name": "Authorization"}, - {"inject_into": "request_parameter", "field_name": "authKey"}, + {'inject_into': 'header', 'field_name': 'Authorization'}, + {'inject_into': 'request_parameter', 'field_name': 'authKey'}, ], - title="Inject API Key Into Outgoing HTTP Request", + title='Inject API Key Into Outgoing HTTP Request', ) class ListPartitionRouter(BaseModel): - type: Literal["ListPartitionRouter"] + type: Literal['ListPartitionRouter'] cursor_field: str = Field( ..., description='While iterating over list values, the name of field used to reference a list value. The partition value can be accessed with string interpolation. e.g. "{{ stream_partition[\'my_key\'] }}" where "my_key" is the value of the cursor_field.', - examples=["section", "{{ config['section_key'] }}"], - title="Current Partition Value Identifier", + examples=['section', "{{ config['section_key'] }}"], + title='Current Partition Value Identifier', ) values: Union[str, List[str]] = Field( ..., - description="The list of attributes being iterated over and used as input for the requests made to the source API.", - examples=[["section_a", "section_b", "section_c"], "{{ config['sections'] }}"], - title="Partition Values", + description='The list of attributes being iterated over and used as input for the requests made to the source API.', + examples=[['section_a', 'section_b', 'section_c'], "{{ config['sections'] }}"], + title='Partition Values', ) request_option: Optional[RequestOption] = Field( None, - description="A request option describing where the list value should be injected into and under what field name if applicable.", - title="Inject Partition Value Into Outgoing HTTP Request", + description='A request option describing where the list value should be injected into and under what field name if applicable.', + title='Inject Partition Value Into Outgoing HTTP Request', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class RecordSelector(BaseModel): - type: Literal["RecordSelector"] + type: Literal['RecordSelector'] extractor: Union[CustomRecordExtractor, DpathExtractor] record_filter: Optional[Union[CustomRecordFilter, RecordFilter]] = Field( None, - description="Responsible for filtering records to be emitted by the Source.", - title="Record Filter", + description='Responsible for filtering records to be emitted by the Source.', + title='Record Filter', ) schema_normalization: Optional[SchemaNormalization] = SchemaNormalization.None_ - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class Spec(BaseModel): - type: Literal["Spec"] + type: Literal['Spec'] connection_specification: Dict[str, Any] = Field( ..., - description="A connection specification describing how a the connector can be configured.", - title="Connection Specification", + description='A connection specification describing how a the connector can be configured.', + title='Connection Specification', ) documentation_url: Optional[str] = Field( None, description="URL of the connector's documentation page.", - examples=["https://docs.airbyte.com/integrations/sources/dremio"], - title="Documentation URL", + examples=['https://docs.airbyte.com/integrations/sources/dremio'], + title='Documentation URL', ) advanced_auth: Optional[AuthFlow] = Field( None, - description="Advanced specification for configuring the authentication flow.", - title="Advanced Auth", + description='Advanced specification for configuring the authentication flow.', + title='Advanced Auth', ) class CompositeErrorHandler(BaseModel): - type: Literal["CompositeErrorHandler"] + type: Literal['CompositeErrorHandler'] error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler]] = Field( ..., - description="List of error handlers to iterate on to determine how to handle a failed response.", - title="Error Handlers", + description='List of error handlers to iterate on to determine how to handle a failed response.', + title='Error Handlers', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DeclarativeSource(BaseModel): class Config: extra = Extra.forbid - type: Literal["DeclarativeSource"] + type: Literal['DeclarativeSource'] check: CheckStream streams: List[DeclarativeStream] version: str = Field( ..., - description="The version of the Airbyte CDK used to build and test the source.", + description='The version of the Airbyte CDK used to build and test the source.', ) schemas: Optional[Schemas] = None definitions: Optional[Dict[str, Any]] = None @@ -1322,11 +1351,11 @@ class Config: concurrency_level: Optional[ConcurrencyLevel] = None metadata: Optional[Dict[str, Any]] = Field( None, - description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", + description='For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.', ) description: Optional[str] = Field( None, - description="A description of the connector. It will be presented on the Source documentation page.", + description='A description of the connector. It will be presented on the Source documentation page.', ) @@ -1334,12 +1363,12 @@ class SelectiveAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal["SelectiveAuthenticator"] + type: Literal['SelectiveAuthenticator'] authenticator_selection_path: List[str] = Field( ..., - description="Path of the field in config with selected authenticator name", - examples=[["auth"], ["auth", "type"]], - title="Authenticator Selection Path", + description='Path of the field in config with selected authenticator name', + examples=[['auth'], ['auth', 'type']], + title='Authenticator Selection Path', ) authenticators: Dict[ str, @@ -1356,128 +1385,132 @@ class Config: ], ] = Field( ..., - description="Authenticators to select from.", + description='Authenticators to select from.', examples=[ { - "authenticators": { - "token": "#/definitions/ApiKeyAuthenticator", - "oauth": "#/definitions/OAuthAuthenticator", - "jwt": "#/definitions/JwtAuthenticator", + 'authenticators': { + 'token': '#/definitions/ApiKeyAuthenticator', + 'oauth': '#/definitions/OAuthAuthenticator', + 'jwt': '#/definitions/JwtAuthenticator', } } ], - title="Authenticators", + title='Authenticators', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class DeclarativeStream(BaseModel): class Config: extra = Extra.allow - type: Literal["DeclarativeStream"] + type: Literal['DeclarativeStream'] retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field( ..., - description="Component used to coordinate how records are extracted across stream slices and request pages.", - title="Retriever", + description='Component used to coordinate how records are extracted across stream slices and request pages.', + title='Retriever', ) - incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field( - None, - description="Component used to fetch data incrementally based on a time field in the data.", - title="Incremental Sync", - ) - name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") - primary_key: Optional[PrimaryKey] = Field( - "", description="The primary key of the stream.", title="Primary Key" - ) - schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]] = ( + incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = ( Field( None, - description="Component used to retrieve the schema for the current stream.", - title="Schema Loader", + description='Component used to fetch data incrementally based on a time field in the data.', + title='Incremental Sync', ) ) + name: Optional[str] = Field( + '', description='The stream name.', example=['Users'], title='Name' + ) + primary_key: Optional[PrimaryKey] = Field( + '', description='The primary key of the stream.', title='Primary Key' + ) + schema_loader: Optional[ + Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader] + ] = Field( + None, + description='Component used to retrieve the schema for the current stream.', + title='Schema Loader', + ) transformations: Optional[ List[Union[AddFields, CustomTransformation, RemoveFields, KeysToLower]] ] = Field( None, - description="A list of transformations to be applied to each output record.", - title="Transformations", + description='A list of transformations to be applied to each output record.', + title='Transformations', ) state_migrations: Optional[ List[Union[LegacyToPerPartitionStateMigration, CustomStateMigration]] ] = Field( [], - description="Array of state migrations to be applied on the input state", - title="State Migrations", + description='Array of state migrations to be applied on the input state', + title='State Migrations', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SessionTokenAuthenticator(BaseModel): - type: Literal["SessionTokenAuthenticator"] + type: Literal['SessionTokenAuthenticator'] login_requester: HttpRequester = Field( ..., - description="Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.", + description='Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.', examples=[ { - "type": "HttpRequester", - "url_base": "https://my_api.com", - "path": "/login", - "authenticator": { - "type": "BasicHttpAuthenticator", - "username": "{{ config.username }}", - "password": "{{ config.password }}", + 'type': 'HttpRequester', + 'url_base': 'https://my_api.com', + 'path': '/login', + 'authenticator': { + 'type': 'BasicHttpAuthenticator', + 'username': '{{ config.username }}', + 'password': '{{ config.password }}', }, } ], - title="Login Requester", + title='Login Requester', ) session_token_path: List[str] = Field( ..., - description="The path in the response body returned from the login requester to the session token.", - examples=[["access_token"], ["result", "token"]], - title="Session Token Path", + description='The path in the response body returned from the login requester to the session token.', + examples=[['access_token'], ['result', 'token']], + title='Session Token Path', ) expiration_duration: Optional[str] = Field( None, - description="The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.", - examples=["PT1H", "P1D"], - title="Expiration Duration", + description='The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.', + examples=['PT1H', 'P1D'], + title='Expiration Duration', ) request_authentication: Union[ SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator ] = Field( ..., - description="Authentication method to use for requests sent to the API, specifying how to inject the session token.", - title="Data Request Authentication", + description='Authentication method to use for requests sent to the API, specifying how to inject the session token.', + title='Data Request Authentication', ) decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field( - None, description="Component used to decode the response.", title="Decoder" + None, description='Component used to decode the response.', title='Decoder' ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class HttpRequester(BaseModel): - type: Literal["HttpRequester"] + type: Literal['HttpRequester'] url_base: str = Field( ..., - description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", + description='Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', examples=[ - "https://connect.squareup.com/v2", + 'https://connect.squareup.com/v2', "{{ config['base_url'] or 'https://app.posthog.com'}}/api/", ], - title="API Base URL", + title='API Base URL', ) path: str = Field( ..., - description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", + description='Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', examples=[ - "/products", + '/products', "/quotes/{{ stream_partition['id'] }}/quote_line_groups", "/trades/{{ config['symbol_id'] }}/history", ], - title="URL Path", + title='URL Path', ) authenticator: Optional[ Union[ @@ -1494,111 +1527,111 @@ class HttpRequester(BaseModel): ] ] = Field( None, - description="Authentication method to use for requests sent to the API.", - title="Authenticator", + description='Authentication method to use for requests sent to the API.', + title='Authenticator', ) error_handler: Optional[ Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler] ] = Field( None, - description="Error handler component that defines how to handle errors.", - title="Error Handler", + description='Error handler component that defines how to handle errors.', + title='Error Handler', ) http_method: Optional[HttpMethod] = Field( HttpMethod.GET, - description="The HTTP method used to fetch data from the source (can be GET or POST).", - examples=["GET", "POST"], - title="HTTP Method", + description='The HTTP method used to fetch data from the source (can be GET or POST).', + examples=['GET', 'POST'], + title='HTTP Method', ) request_body_data: Optional[Union[str, Dict[str, str]]] = Field( None, - description="Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.", + description='Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.', examples=[ '[{"clause": {"type": "timestamp", "operator": 10, "parameters":\n [{"value": {{ stream_interval[\'start_time\'] | int * 1000 }} }]\n }, "orderBy": 1, "columnName": "Timestamp"}]/\n' ], - title="Request Body Payload (Non-JSON)", + title='Request Body Payload (Non-JSON)', ) request_body_json: Optional[Union[str, Dict[str, Any]]] = Field( None, - description="Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.", + description='Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.', examples=[ - {"sort_order": "ASC", "sort_field": "CREATED_AT"}, - {"key": "{{ config['value'] }}"}, - {"sort": {"field": "updated_at", "order": "ascending"}}, + {'sort_order': 'ASC', 'sort_field': 'CREATED_AT'}, + {'key': "{{ config['value'] }}"}, + {'sort': {'field': 'updated_at', 'order': 'ascending'}}, ], - title="Request Body JSON Payload", + title='Request Body JSON Payload', ) request_headers: Optional[Union[str, Dict[str, str]]] = Field( None, - description="Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.", - examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}], - title="Request Headers", + description='Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.', + examples=[{'Output-Format': 'JSON'}, {'Version': "{{ config['version'] }}"}], + title='Request Headers', ) request_parameters: Optional[Union[str, Dict[str, str]]] = Field( None, - description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.", + description='Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.', examples=[ - {"unit": "day"}, + {'unit': 'day'}, { - "query": 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' + 'query': 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' }, - {"searchIn": "{{ ','.join(config.get('search_in', [])) }}"}, - {"sort_by[asc]": "updated_at"}, + {'searchIn': "{{ ','.join(config.get('search_in', [])) }}"}, + {'sort_by[asc]': 'updated_at'}, ], - title="Query Parameters", + title='Query Parameters', ) use_cache: Optional[bool] = Field( False, - description="Enables stream requests caching. This field is automatically set by the CDK.", - title="Use Cache", + description='Enables stream requests caching. This field is automatically set by the CDK.', + title='Use Cache', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class ParentStreamConfig(BaseModel): - type: Literal["ParentStreamConfig"] + type: Literal['ParentStreamConfig'] parent_key: str = Field( ..., - description="The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.", - examples=["id", "{{ config['parent_record_id'] }}"], - title="Parent Key", + description='The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.', + examples=['id', "{{ config['parent_record_id'] }}"], + title='Parent Key', ) stream: DeclarativeStream = Field( - ..., description="Reference to the parent stream.", title="Parent Stream" + ..., description='Reference to the parent stream.', title='Parent Stream' ) partition_field: str = Field( ..., - description="While iterating over parent records during a sync, the parent_key value can be referenced by using this field.", - examples=["parent_id", "{{ config['parent_partition_field'] }}"], - title="Current Parent Key Value Identifier", + description='While iterating over parent records during a sync, the parent_key value can be referenced by using this field.', + examples=['parent_id', "{{ config['parent_partition_field'] }}"], + title='Current Parent Key Value Identifier', ) request_option: Optional[RequestOption] = Field( None, - description="A request option describing where the parent key value should be injected into and under what field name if applicable.", - title="Request Option", + description='A request option describing where the parent key value should be injected into and under what field name if applicable.', + title='Request Option', ) incremental_dependency: Optional[bool] = Field( False, - description="Indicates whether the parent stream should be read incrementally based on updates in the child stream.", - title="Incremental Dependency", + description='Indicates whether the parent stream should be read incrementally based on updates in the child stream.', + title='Incremental Dependency', ) extra_fields: Optional[List[List[str]]] = Field( None, - description="Array of field paths to include as additional fields in the stream slice. Each path is an array of strings representing keys to access fields in the respective parent record. Accessible via `stream_slice.extra_fields`. Missing fields are set to `None`.", - title="Extra Fields", + description='Array of field paths to include as additional fields in the stream slice. Each path is an array of strings representing keys to access fields in the respective parent record. Accessible via `stream_slice.extra_fields`. Missing fields are set to `None`.', + title='Extra Fields', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SimpleRetriever(BaseModel): - type: Literal["SimpleRetriever"] + type: Literal['SimpleRetriever'] record_selector: RecordSelector = Field( ..., - description="Component that describes how to extract records from a HTTP response.", + description='Component that describes how to extract records from a HTTP response.', ) requester: Union[CustomRequester, HttpRequester] = Field( ..., - description="Requester component that describes how to prepare HTTP requests to send to the source API.", + description='Requester component that describes how to prepare HTTP requests to send to the source API.', ) paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1606,55 +1639,68 @@ class SimpleRetriever(BaseModel): ) ignore_stream_slicer_parameters_on_paginated_requests: Optional[bool] = Field( False, - description="If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.", + description='If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.', ) partition_router: Optional[ Union[ CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], + List[ + Union[ + CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter + ] + ], ] ] = Field( [], - description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", - title="Partition Router", + description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', + title='Partition Router', ) - decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder, XmlDecoder]] = Field( + decoder: Optional[ + Union[ + CustomDecoder, + JsonDecoder, + JsonlDecoder, + IterableDecoder, + XmlDecoder, + GzipJsonDecoder, + ] + ] = Field( None, - description="Component decoding the response so records can be extracted.", - title="Decoder", + description='Component decoding the response so records can be extracted.', + title='Decoder', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class AsyncRetriever(BaseModel): - type: Literal["AsyncRetriever"] + type: Literal['AsyncRetriever'] record_selector: RecordSelector = Field( ..., - description="Component that describes how to extract records from a HTTP response.", + description='Component that describes how to extract records from a HTTP response.', ) status_mapping: AsyncJobStatusMap = Field( - ..., description="Async Job Status to Airbyte CDK Async Job Status mapping." + ..., description='Async Job Status to Airbyte CDK Async Job Status mapping.' ) status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field( - ..., description="Responsible for fetching the actual status of the async job." + ..., description='Responsible for fetching the actual status of the async job.' ) urls_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field( ..., - description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.", + description='Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.', ) creation_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.", + description='Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.', ) polling_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.", + description='Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.', ) download_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.", + description='Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.', ) download_paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1666,36 +1712,49 @@ class AsyncRetriever(BaseModel): ) delete_requester: Optional[Union[CustomRequester, HttpRequester]] = Field( None, - description="Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.", + description='Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.', ) partition_router: Optional[ Union[ CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], + List[ + Union[ + CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter + ] + ], ] ] = Field( [], - description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", - title="Partition Router", + description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', + title='Partition Router', ) - decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder, XmlDecoder]] = Field( + decoder: Optional[ + Union[ + CustomDecoder, + JsonDecoder, + JsonlDecoder, + IterableDecoder, + XmlDecoder, + GzipJsonDecoder, + ] + ] = Field( None, - description="Component decoding the response so records can be extracted.", - title="Decoder", + description='Component decoding the response so records can be extracted.', + title='Decoder', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') class SubstreamPartitionRouter(BaseModel): - type: Literal["SubstreamPartitionRouter"] + type: Literal['SubstreamPartitionRouter'] parent_stream_configs: List[ParentStreamConfig] = Field( ..., - description="Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.", - title="Parent Stream Configs", + description='Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.', + title='Parent Stream Configs', ) - parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") + parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') CompositeErrorHandler.update_forward_refs() @@ -1704,4 +1763,4 @@ class SubstreamPartitionRouter(BaseModel): DeclarativeStream.update_forward_refs() SessionTokenAuthenticator.update_forward_refs() SimpleRetriever.update_forward_refs() -AsyncRetriever.update_forward_refs() +AsyncRetriever.update_forward_refs() \ No newline at end of file From 77441b8d019656a08ebe4fe5e44af7cdbc9ee79d Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 11 Nov 2024 17:43:00 +0100 Subject: [PATCH 02/17] Airbyte CDK: add unittest --- .../declarative/decoders/test_json_decoder.py | 78 ++++++++++++++++++- 1 file changed, 75 insertions(+), 3 deletions(-) diff --git a/unit_tests/sources/declarative/decoders/test_json_decoder.py b/unit_tests/sources/declarative/decoders/test_json_decoder.py index 861b6e27..f4122b02 100644 --- a/unit_tests/sources/declarative/decoders/test_json_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_json_decoder.py @@ -1,6 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +import gzip import json import os @@ -10,9 +11,8 @@ from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder from airbyte_cdk.sources.declarative.models import DeclarativeStream as DeclarativeStreamModel -from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ( - ModelToComponentFactory, -) +from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ModelToComponentFactory +from sources.declarative.decoders import GzipJsonDecoder @pytest.mark.parametrize( @@ -120,3 +120,75 @@ def get_body(): counter += 1 assert counter == lines_in_response * len(stream_slices) + +@pytest.mark.parametrize( + "encoding", + [ + "utf-8", + "utf", + ], + ids=["utf-8", "utf"], +) +def test_gzipjson_decoder(requests_mock, encoding): + response_to_compress = json.dumps([ + { + "campaignId": 214078428, + "campaignName": "sample-campaign-name-214078428", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021" + }, + { + "campaignId": 44504582, + "campaignName": "sample-campaign-name-44504582", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021" + }, + { + "campaignId": 509144838, + "campaignName": "sample-campaign-name-509144838", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021" + }, + { + "campaignId": 231712082, + "campaignName": "sample-campaign-name-231712082", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021" + }, + { + "campaignId": 895306040, + "campaignName": "sample-campaign-name-895306040", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021" + } + ]) + body = gzip.compress(response_to_compress.encode(encoding)) + + requests_mock.register_uri("GET", "https://airbyte.io/", content=body) + response = requests.get("https://airbyte.io/") + assert len(list(GzipJsonDecoder(parameters={}, encoding=encoding).decode(response))) == 5 \ No newline at end of file From 7202fb5d7890e8dd8b1227ae8299e286ac83b7ad Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 11 Nov 2024 17:43:53 +0100 Subject: [PATCH 03/17] Airbyte CDK: add parser --- .../parsers/model_to_component_factory.py | 1045 ++++------------- 1 file changed, 254 insertions(+), 791 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 2812ba81..0d8c2435 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -9,21 +9,7 @@ import inspect import re from functools import partial -from typing import ( - Any, - Callable, - Dict, - List, - Mapping, - MutableMapping, - Optional, - Tuple, - Type, - Union, - get_args, - get_origin, - get_type_hints, -) +from typing import Any, Callable, Dict, List, Mapping, MutableMapping, Optional, Tuple, Type, Union, get_args, get_origin, get_type_hints from airbyte_cdk.models import FailureType, Level from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager @@ -32,14 +18,9 @@ from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator -from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( - DeclarativeAuthenticator, - NoAuth, -) +from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator, NoAuth from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm -from airbyte_cdk.sources.declarative.auth.oauth import ( - DeclarativeSingleUseRefreshTokenOauth2Authenticator, -) +from airbyte_cdk.sources.declarative.auth.oauth import DeclarativeSingleUseRefreshTokenOauth2Authenticator from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator from airbyte_cdk.sources.declarative.auth.token import ( ApiKeyAuthenticator, @@ -47,35 +28,23 @@ BearerAuthenticator, LegacySessionTokenAuthenticator, ) -from airbyte_cdk.sources.declarative.auth.token_provider import ( - InterpolatedStringTokenProvider, - SessionTokenProvider, - TokenProvider, -) +from airbyte_cdk.sources.declarative.auth.token_provider import InterpolatedStringTokenProvider, SessionTokenProvider, TokenProvider from airbyte_cdk.sources.declarative.checks import CheckStream from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream from airbyte_cdk.sources.declarative.decoders import ( Decoder, + GzipJsonDecoder, IterableDecoder, JsonDecoder, JsonlDecoder, PaginationDecoderDecorator, XmlDecoder, ) -from airbyte_cdk.sources.declarative.extractors import ( - DpathExtractor, - RecordFilter, - RecordSelector, - ResponseToFileExtractor, -) -from airbyte_cdk.sources.declarative.extractors.record_filter import ( - ClientSideIncrementalRecordFilterDecorator, -) -from airbyte_cdk.sources.declarative.extractors.record_selector import ( - SCHEMA_TRANSFORMER_TYPE_MAPPING, -) +from airbyte_cdk.sources.declarative.extractors import DpathExtractor, RecordFilter, RecordSelector, ResponseToFileExtractor +from airbyte_cdk.sources.declarative.extractors.record_filter import ClientSideIncrementalRecordFilterDecorator +from airbyte_cdk.sources.declarative.extractors.record_selector import SCHEMA_TRANSFORMER_TYPE_MAPPING from airbyte_cdk.sources.declarative.incremental import ( ChildPartitionResumableFullRefreshCursor, CursorFactory, @@ -88,216 +57,90 @@ ) from airbyte_cdk.sources.declarative.interpolation import InterpolatedString from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping -from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import ( - LegacyToPerPartitionStateMigration, -) +from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import LegacyToPerPartitionStateMigration from airbyte_cdk.sources.declarative.models import CustomStateMigration -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - AddedFieldDefinition as AddedFieldDefinitionModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - AddFields as AddFieldsModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - ApiKeyAuthenticator as ApiKeyAuthenticatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - AsyncJobStatusMap as AsyncJobStatusMapModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - AsyncRetriever as AsyncRetrieverModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - BasicHttpAuthenticator as BasicHttpAuthenticatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - BearerAuthenticator as BearerAuthenticatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CheckStream as CheckStreamModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CompositeErrorHandler as CompositeErrorHandlerModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - ConcurrencyLevel as ConcurrencyLevelModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - ConstantBackoffStrategy as ConstantBackoffStrategyModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CursorPagination as CursorPaginationModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomAuthenticator as CustomAuthenticatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomBackoffStrategy as CustomBackoffStrategyModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomErrorHandler as CustomErrorHandlerModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomIncrementalSync as CustomIncrementalSyncModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomPaginationStrategy as CustomPaginationStrategyModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomPartitionRouter as CustomPartitionRouterModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomRecordExtractor as CustomRecordExtractorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomRecordFilter as CustomRecordFilterModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomRequester as CustomRequesterModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomRetriever as CustomRetrieverModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomSchemaLoader as CustomSchemaLoader, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomTransformation as CustomTransformationModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - DatetimeBasedCursor as DatetimeBasedCursorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - DeclarativeStream as DeclarativeStreamModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - DefaultErrorHandler as DefaultErrorHandlerModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - DefaultPaginator as DefaultPaginatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - DpathExtractor as DpathExtractorModel, -) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import AddedFieldDefinition as AddedFieldDefinitionModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import AddFields as AddFieldsModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ApiKeyAuthenticator as ApiKeyAuthenticatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import AsyncJobStatusMap as AsyncJobStatusMapModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import AsyncRetriever as AsyncRetrieverModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import BasicHttpAuthenticator as BasicHttpAuthenticatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import BearerAuthenticator as BearerAuthenticatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CheckStream as CheckStreamModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CompositeErrorHandler as CompositeErrorHandlerModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ConcurrencyLevel as ConcurrencyLevelModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ConstantBackoffStrategy as ConstantBackoffStrategyModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CursorPagination as CursorPaginationModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomAuthenticator as CustomAuthenticatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomBackoffStrategy as CustomBackoffStrategyModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomDecoder as CustomDecoderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomErrorHandler as CustomErrorHandlerModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomIncrementalSync as CustomIncrementalSyncModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomPaginationStrategy as CustomPaginationStrategyModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomPartitionRouter as CustomPartitionRouterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRecordExtractor as CustomRecordExtractorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRecordFilter as CustomRecordFilterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRequester as CustomRequesterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRetriever as CustomRetrieverModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomSchemaLoader as CustomSchemaLoader +from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomTransformation as CustomTransformationModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import DatetimeBasedCursor as DatetimeBasedCursorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import DeclarativeStream as DeclarativeStreamModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import DefaultErrorHandler as DefaultErrorHandlerModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import DefaultPaginator as DefaultPaginatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import DpathExtractor as DpathExtractorModel from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( ExponentialBackoffStrategy as ExponentialBackoffStrategyModel, ) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - HttpRequester as HttpRequesterModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - HttpResponseFilter as HttpResponseFilterModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - InlineSchemaLoader as InlineSchemaLoaderModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - IterableDecoder as IterableDecoderModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - JsonDecoder as JsonDecoderModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - JsonFileSchemaLoader as JsonFileSchemaLoaderModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - JsonlDecoder as JsonlDecoderModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - JwtAuthenticator as JwtAuthenticatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - JwtHeaders as JwtHeadersModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - JwtPayload as JwtPayloadModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - KeysToLower as KeysToLowerModel, -) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import GzipJsonDecoder as GzipJsonDecoderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import HttpRequester as HttpRequesterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import HttpResponseFilter as HttpResponseFilterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import InlineSchemaLoader as InlineSchemaLoaderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import IterableDecoder as IterableDecoderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import JsonDecoder as JsonDecoderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import JsonFileSchemaLoader as JsonFileSchemaLoaderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import JsonlDecoder as JsonlDecoderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtAuthenticator as JwtAuthenticatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtHeaders as JwtHeadersModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtPayload as JwtPayloadModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import KeysToLower as KeysToLowerModel from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel, ) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel, ) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - ListPartitionRouter as ListPartitionRouterModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - MinMaxDatetime as MinMaxDatetimeModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - NoAuth as NoAuthModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - NoPagination as NoPaginationModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - OAuthAuthenticator as OAuthAuthenticatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - OffsetIncrement as OffsetIncrementModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - PageIncrement as PageIncrementModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - ParentStreamConfig as ParentStreamConfigModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - RecordFilter as RecordFilterModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - RecordSelector as RecordSelectorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - RemoveFields as RemoveFieldsModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - RequestOption as RequestOptionModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - RequestPath as RequestPathModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - SelectiveAuthenticator as SelectiveAuthenticatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - SessionTokenAuthenticator as SessionTokenAuthenticatorModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - SimpleRetriever as SimpleRetrieverModel, -) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ListPartitionRouter as ListPartitionRouterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import MinMaxDatetime as MinMaxDatetimeModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import NoAuth as NoAuthModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import NoPagination as NoPaginationModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import OAuthAuthenticator as OAuthAuthenticatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import OffsetIncrement as OffsetIncrementModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import PageIncrement as PageIncrementModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ParentStreamConfig as ParentStreamConfigModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import RecordFilter as RecordFilterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import RecordSelector as RecordSelectorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import RemoveFields as RemoveFieldsModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import RequestOption as RequestOptionModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import RequestPath as RequestPathModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import SelectiveAuthenticator as SelectiveAuthenticatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import SessionTokenAuthenticator as SessionTokenAuthenticatorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import SimpleRetriever as SimpleRetrieverModel from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - SubstreamPartitionRouter as SubstreamPartitionRouterModel, -) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import SubstreamPartitionRouter as SubstreamPartitionRouterModel from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - WaitTimeFromHeader as WaitTimeFromHeaderModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel, -) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - XmlDecoder as XmlDecoderModel, -) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import WaitTimeFromHeader as WaitTimeFromHeaderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import XmlDecoder as XmlDecoderModel from airbyte_cdk.sources.declarative.partition_routers import ( CartesianProductStreamSlicer, ListPartitionRouter, SinglePartitionRouter, SubstreamPartitionRouter, ) -from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( - ParentStreamConfig, -) +from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ParentStreamConfig from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption -from airbyte_cdk.sources.declarative.requesters.error_handlers import ( - CompositeErrorHandler, - DefaultErrorHandler, - HttpResponseFilter, -) +from airbyte_cdk.sources.declarative.requesters.error_handlers import CompositeErrorHandler, DefaultErrorHandler, HttpResponseFilter from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( ConstantBackoffStrategy, ExponentialBackoffStrategy, @@ -305,11 +148,7 @@ WaitUntilTimeFromHeaderBackoffStrategy, ) from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository -from airbyte_cdk.sources.declarative.requesters.paginators import ( - DefaultPaginator, - NoPagination, - PaginatorTestReadDecorator, -) +from airbyte_cdk.sources.declarative.requesters.paginators import DefaultPaginator, NoPagination, PaginatorTestReadDecorator from airbyte_cdk.sources.declarative.requesters.paginators.strategies import ( CursorPaginationStrategy, CursorStopCondition, @@ -326,36 +165,19 @@ ) from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod -from airbyte_cdk.sources.declarative.retrievers import ( - AsyncRetriever, - SimpleRetriever, - SimpleRetrieverTestReadDecorator, -) -from airbyte_cdk.sources.declarative.schema import ( - DefaultSchemaLoader, - InlineSchemaLoader, - JsonFileSchemaLoader, -) +from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, SimpleRetriever, SimpleRetrieverTestReadDecorator +from airbyte_cdk.sources.declarative.schema import DefaultSchemaLoader, InlineSchemaLoader, JsonFileSchemaLoader from airbyte_cdk.sources.declarative.spec import Spec from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer -from airbyte_cdk.sources.declarative.transformations import ( - AddFields, - RecordTransformation, - RemoveFields, -) +from airbyte_cdk.sources.declarative.transformations import AddFields, RecordTransformation, RemoveFields from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition -from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import ( - KeysToLowerTransformation, -) -from airbyte_cdk.sources.message import ( - InMemoryMessageRepository, - LogAppenderMessageRepositoryDecorator, - MessageRepository, -) +from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import KeysToLowerTransformation +from airbyte_cdk.sources.message import InMemoryMessageRepository, LogAppenderMessageRepositoryDecorator, MessageRepository from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( CustomFormatConcurrentStreamStateConverter, DateTimeStreamStateConverter, + EpochValueConcurrentStreamStateConverter, ) from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction from airbyte_cdk.sources.types import Config @@ -367,6 +189,7 @@ class ModelToComponentFactory: + EPOCH_DATETIME_FORMAT = "%s" def __init__( @@ -402,6 +225,7 @@ def _init_mappings(self) -> None: CursorPaginationModel: self.create_cursor_pagination, CustomAuthenticatorModel: self.create_custom_component, CustomBackoffStrategyModel: self.create_custom_component, + CustomDecoderModel: self.create_custom_component, CustomErrorHandlerModel: self.create_custom_component, CustomIncrementalSyncModel: self.create_custom_component, CustomRecordExtractorModel: self.create_custom_component, @@ -425,6 +249,7 @@ def _init_mappings(self) -> None: InlineSchemaLoaderModel: self.create_inline_schema_loader, JsonDecoderModel: self.create_json_decoder, JsonlDecoderModel: self.create_jsonl_decoder, + GzipJsonDecoderModel: self.create_gzipjson_decoder, KeysToLowerModel: self.create_keys_to_lower_transformation, IterableDecoderModel: self.create_iterable_decoder, XmlDecoderModel: self.create_xml_decoder, @@ -458,11 +283,7 @@ def _init_mappings(self) -> None: self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} def create_component( - self, - model_type: Type[BaseModel], - component_definition: ComponentDefinition, - config: Config, - **kwargs: Any, + self, model_type: Type[BaseModel], component_definition: ComponentDefinition, config: Config, **kwargs: Any ) -> Any: """ Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and @@ -477,38 +298,26 @@ def create_component( component_type = component_definition.get("type") if component_definition.get("type") != model_type.__name__: - raise ValueError( - f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" - ) + raise ValueError(f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead") declarative_component_model = model_type.parse_obj(component_definition) if not isinstance(declarative_component_model, model_type): - raise ValueError( - f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" - ) + raise ValueError(f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}") - return self._create_component_from_model( - model=declarative_component_model, config=config, **kwargs - ) + return self._create_component_from_model(model=declarative_component_model, config=config, **kwargs) def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: - raise ValueError( - f"{model.__class__} with attributes {model} is not a valid component type" - ) + raise ValueError(f"{model.__class__} with attributes {model} is not a valid component type") component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) if not component_constructor: raise ValueError(f"Could not find constructor for {model.__class__}") return component_constructor(model=model, config=config, **kwargs) @staticmethod - def create_added_field_definition( - model: AddedFieldDefinitionModel, config: Config, **kwargs: Any - ) -> AddedFieldDefinition: - interpolated_value = InterpolatedString.create( - model.value, parameters=model.parameters or {} - ) + def create_added_field_definition(model: AddedFieldDefinitionModel, config: Config, **kwargs: Any) -> AddedFieldDefinition: + interpolated_value = InterpolatedString.create(model.value, parameters=model.parameters or {}) return AddedFieldDefinition( path=model.path, value=interpolated_value, @@ -520,18 +329,14 @@ def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any added_field_definitions = [ self._create_component_from_model( model=added_field_definition_model, - value_type=ModelToComponentFactory._json_schema_type_name_to_type( - added_field_definition_model.value_type - ), + value_type=ModelToComponentFactory._json_schema_type_name_to_type(added_field_definition_model.value_type), config=config, ) for added_field_definition_model in model.fields ] return AddFields(fields=added_field_definitions, parameters=model.parameters or {}) - def create_keys_to_lower_transformation( - self, model: KeysToLowerModel, config: Config, **kwargs: Any - ) -> KeysToLowerTransformation: + def create_keys_to_lower_transformation(self, model: KeysToLowerModel, config: Config, **kwargs: Any) -> KeysToLowerTransformation: return KeysToLowerTransformation() @staticmethod @@ -548,25 +353,16 @@ def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[ @staticmethod def create_api_key_authenticator( - model: ApiKeyAuthenticatorModel, - config: Config, - token_provider: Optional[TokenProvider] = None, - **kwargs: Any, + model: ApiKeyAuthenticatorModel, config: Config, token_provider: Optional[TokenProvider] = None, **kwargs: Any ) -> ApiKeyAuthenticator: if model.inject_into is None and model.header is None: - raise ValueError( - "Expected either inject_into or header to be set for ApiKeyAuthenticator" - ) + raise ValueError("Expected either inject_into or header to be set for ApiKeyAuthenticator") if model.inject_into is not None and model.header is not None: - raise ValueError( - "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" - ) + raise ValueError("inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option") if token_provider is not None and model.api_token != "": - raise ValueError( - "If token_provider is set, api_token is ignored and has to be set to empty string." - ) + raise ValueError("If token_provider is set, api_token is ignored and has to be set to empty string.") request_option = ( RequestOption( @@ -585,11 +381,7 @@ def create_api_key_authenticator( token_provider=( token_provider if token_provider is not None - else InterpolatedStringTokenProvider( - api_token=model.api_token or "", - config=config, - parameters=model.parameters or {}, - ) + else InterpolatedStringTokenProvider(api_token=model.api_token or "", config=config, parameters=model.parameters or {}) ), request_option=request_option, config=config, @@ -608,44 +400,26 @@ def create_legacy_to_per_partition_state_migration( f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" ) partition_router = retriever.partition_router - if not isinstance( - partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) - ): + if not isinstance(partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)): raise ValueError( f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" ) if not hasattr(partition_router, "parent_stream_configs"): - raise ValueError( - "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." - ) + raise ValueError("LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration.") - return LegacyToPerPartitionStateMigration( - declarative_stream.retriever.partition_router, - declarative_stream.incremental_sync, - config, - declarative_stream.parameters, - ) # type: ignore # The retriever type was already checked + return LegacyToPerPartitionStateMigration(declarative_stream.retriever.partition_router, declarative_stream.incremental_sync, config, declarative_stream.parameters) # type: ignore # The retriever type was already checked def create_session_token_authenticator( self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: - decoder = ( - self._create_component_from_model(model=model.decoder, config=config) - if model.decoder - else JsonDecoder(parameters={}) - ) + decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={}) login_requester = self._create_component_from_model( - model=model.login_requester, - config=config, - name=f"{name}_login_requester", - decoder=decoder, + model=model.login_requester, config=config, name=f"{name}_login_requester", decoder=decoder ) token_provider = SessionTokenProvider( login_requester=login_requester, session_token_path=model.session_token_path, - expiration_duration=parse_duration(model.expiration_duration) - if model.expiration_duration - else None, + expiration_duration=parse_duration(model.expiration_duration) if model.expiration_duration else None, parameters=model.parameters or {}, message_repository=self._message_repository, decoder=decoder, @@ -658,46 +432,28 @@ def create_session_token_authenticator( ) else: return ModelToComponentFactory.create_api_key_authenticator( - ApiKeyAuthenticatorModel( - type="ApiKeyAuthenticator", - api_token="", - inject_into=model.request_authentication.inject_into, - ), # type: ignore # $parameters and headers default to None + ApiKeyAuthenticatorModel(type="ApiKeyAuthenticator", api_token="", inject_into=model.request_authentication.inject_into), # type: ignore # $parameters and headers default to None config=config, token_provider=token_provider, ) @staticmethod - def create_basic_http_authenticator( - model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any - ) -> BasicHttpAuthenticator: + def create_basic_http_authenticator(model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any) -> BasicHttpAuthenticator: return BasicHttpAuthenticator( - password=model.password or "", - username=model.username, - config=config, - parameters=model.parameters or {}, + password=model.password or "", username=model.username, config=config, parameters=model.parameters or {} ) @staticmethod def create_bearer_authenticator( - model: BearerAuthenticatorModel, - config: Config, - token_provider: Optional[TokenProvider] = None, - **kwargs: Any, + model: BearerAuthenticatorModel, config: Config, token_provider: Optional[TokenProvider] = None, **kwargs: Any ) -> BearerAuthenticator: if token_provider is not None and model.api_token != "": - raise ValueError( - "If token_provider is set, api_token is ignored and has to be set to empty string." - ) + raise ValueError("If token_provider is set, api_token is ignored and has to be set to empty string.") return BearerAuthenticator( token_provider=( token_provider if token_provider is not None - else InterpolatedStringTokenProvider( - api_token=model.api_token or "", - config=config, - parameters=model.parameters or {}, - ) + else InterpolatedStringTokenProvider(api_token=model.api_token or "", config=config, parameters=model.parameters or {}) ), config=config, parameters=model.parameters or {}, @@ -707,21 +463,14 @@ def create_bearer_authenticator( def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream: return CheckStream(stream_names=model.stream_names, parameters={}) - def create_composite_error_handler( - self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any - ) -> CompositeErrorHandler: + def create_composite_error_handler(self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any) -> CompositeErrorHandler: error_handlers = [ - self._create_component_from_model(model=error_handler_model, config=config) - for error_handler_model in model.error_handlers + self._create_component_from_model(model=error_handler_model, config=config) for error_handler_model in model.error_handlers ] - return CompositeErrorHandler( - error_handlers=error_handlers, parameters=model.parameters or {} - ) + return CompositeErrorHandler(error_handlers=error_handlers, parameters=model.parameters or {}) @staticmethod - def create_concurrency_level( - model: ConcurrencyLevelModel, config: Config, **kwargs: Any - ) -> ConcurrencyLevel: + def create_concurrency_level(model: ConcurrencyLevelModel, config: Config, **kwargs: Any) -> ConcurrencyLevel: return ConcurrencyLevel( default_concurrency=model.default_concurrency, max_concurrency=model.max_concurrency, @@ -740,32 +489,26 @@ def create_concurrent_cursor_from_datetime_based_cursor( stream_state: MutableMapping[str, Any], **kwargs: Any, ) -> Tuple[ConcurrentCursor, DateTimeStreamStateConverter]: + component_type = component_definition.get("type") if component_definition.get("type") != model_type.__name__: - raise ValueError( - f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" - ) + raise ValueError(f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead") datetime_based_cursor_model = model_type.parse_obj(component_definition) if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): - raise ValueError( - f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" - ) + raise ValueError(f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}") interpolated_cursor_field = InterpolatedString.create( - datetime_based_cursor_model.cursor_field, - parameters=datetime_based_cursor_model.parameters or {}, + datetime_based_cursor_model.cursor_field, parameters=datetime_based_cursor_model.parameters or {} ) cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) interpolated_partition_field_start = InterpolatedString.create( - datetime_based_cursor_model.partition_field_start or "start_time", - parameters=datetime_based_cursor_model.parameters or {}, + datetime_based_cursor_model.partition_field_start or "start_time", parameters=datetime_based_cursor_model.parameters or {} ) interpolated_partition_field_end = InterpolatedString.create( - datetime_based_cursor_model.partition_field_end or "end_time", - parameters=datetime_based_cursor_model.parameters or {}, + datetime_based_cursor_model.partition_field_end or "end_time", parameters=datetime_based_cursor_model.parameters or {} ) slice_boundary_fields = ( @@ -776,17 +519,12 @@ def create_concurrent_cursor_from_datetime_based_cursor( datetime_format = datetime_based_cursor_model.datetime_format cursor_granularity = ( - parse_duration(datetime_based_cursor_model.cursor_granularity) - if datetime_based_cursor_model.cursor_granularity - else None + parse_duration(datetime_based_cursor_model.cursor_granularity) if datetime_based_cursor_model.cursor_granularity else None ) lookback_window = None interpolated_lookback_window = ( - InterpolatedString.create( - datetime_based_cursor_model.lookback_window, - parameters=datetime_based_cursor_model.parameters or {}, - ) + InterpolatedString.create(datetime_based_cursor_model.lookback_window, parameters=datetime_based_cursor_model.parameters or {}) if datetime_based_cursor_model.lookback_window else None ) @@ -796,40 +534,34 @@ def create_concurrent_cursor_from_datetime_based_cursor( lookback_window = parse_duration(evaluated_lookback_window) connector_state_converter: DateTimeStreamStateConverter - connector_state_converter = CustomFormatConcurrentStreamStateConverter( - datetime_format=datetime_format, - input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, - is_sequential_state=True, - cursor_granularity=cursor_granularity, - # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice - ) + if datetime_format == self.EPOCH_DATETIME_FORMAT: + connector_state_converter = EpochValueConcurrentStreamStateConverter(is_sequential_state=True) + else: + connector_state_converter = CustomFormatConcurrentStreamStateConverter( + datetime_format=datetime_format, + input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, + is_sequential_state=True, + cursor_granularity=cursor_granularity, + # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice + ) start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): - start_date_runtime_value = self.create_min_max_datetime( - model=datetime_based_cursor_model.start_datetime, config=config - ) + start_date_runtime_value = self.create_min_max_datetime(model=datetime_based_cursor_model.start_datetime, config=config) else: start_date_runtime_value = datetime_based_cursor_model.start_datetime end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): - end_date_runtime_value = self.create_min_max_datetime( - model=datetime_based_cursor_model.end_datetime, config=config - ) + end_date_runtime_value = self.create_min_max_datetime(model=datetime_based_cursor_model.end_datetime, config=config) else: end_date_runtime_value = datetime_based_cursor_model.end_datetime interpolated_start_date = MinMaxDatetime.create( - interpolated_string_or_min_max_datetime=start_date_runtime_value, - parameters=datetime_based_cursor_model.parameters, + interpolated_string_or_min_max_datetime=start_date_runtime_value, parameters=datetime_based_cursor_model.parameters ) interpolated_end_date = ( - None - if not end_date_runtime_value - else MinMaxDatetime.create( - end_date_runtime_value, datetime_based_cursor_model.parameters - ) + None if not end_date_runtime_value else MinMaxDatetime.create(end_date_runtime_value, datetime_based_cursor_model.parameters) ) # If datetime format is not specified then start/end datetime should inherit it from the stream slicer @@ -840,14 +572,10 @@ def create_concurrent_cursor_from_datetime_based_cursor( start_date = interpolated_start_date.get_datetime(config=config) end_date_provider = ( - partial(interpolated_end_date.get_datetime, config) - if interpolated_end_date - else connector_state_converter.get_end_provider() + partial(interpolated_end_date.get_datetime, config) if interpolated_end_date else connector_state_converter.get_end_provider() ) - if ( - datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity - ) or ( + if (datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity) or ( not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity ): raise ValueError( @@ -856,12 +584,9 @@ def create_concurrent_cursor_from_datetime_based_cursor( ) # When step is not defined, default to a step size from the starting date to the present moment - step_length = datetime.timedelta.max + step_length = datetime.datetime.now(tz=datetime.timezone.utc) - start_date interpolated_step = ( - InterpolatedString.create( - datetime_based_cursor_model.step, - parameters=datetime_based_cursor_model.parameters or {}, - ) + InterpolatedString.create(datetime_based_cursor_model.step, parameters=datetime_based_cursor_model.parameters or {}) if datetime_based_cursor_model.step else None ) @@ -890,9 +615,7 @@ def create_concurrent_cursor_from_datetime_based_cursor( ) @staticmethod - def create_constant_backoff_strategy( - model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any - ) -> ConstantBackoffStrategy: + def create_constant_backoff_strategy(model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any) -> ConstantBackoffStrategy: return ConstantBackoffStrategy( backoff_time_in_seconds=model.backoff_time_in_seconds, config=config, @@ -910,9 +633,7 @@ def create_cursor_pagination( decoder_to_use = decoder else: if not isinstance(decoder, (JsonDecoder, XmlDecoder)): - raise ValueError( - f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." - ) + raise ValueError(f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead.") decoder_to_use = PaginationDecoderDecorator(decoder=decoder) return CursorPaginationStrategy( @@ -948,28 +669,18 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components for model_field, model_value in model_args.items(): # If a custom component field doesn't have a type set, we try to use the type hints to infer the type - if ( - isinstance(model_value, dict) - and "type" not in model_value - and model_field in component_fields - ): - derived_type = self._derive_component_type_from_type_hints( - component_fields.get(model_field) - ) + if isinstance(model_value, dict) and "type" not in model_value and model_field in component_fields: + derived_type = self._derive_component_type_from_type_hints(component_fields.get(model_field)) if derived_type: model_value["type"] = derived_type if self._is_component(model_value): - model_args[model_field] = self._create_nested_component( - model, model_field, model_value, config - ) + model_args[model_field] = self._create_nested_component(model, model_field, model_value, config) elif isinstance(model_value, list): vals = [] for v in model_value: if isinstance(v, dict) and "type" not in v and model_field in component_fields: - derived_type = self._derive_component_type_from_type_hints( - component_fields.get(model_field) - ) + derived_type = self._derive_component_type_from_type_hints(component_fields.get(model_field)) if derived_type: v["type"] = derived_type if self._is_component(v): @@ -978,11 +689,7 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> vals.append(v) model_args[model_field] = vals - kwargs = { - class_field: model_args[class_field] - for class_field in component_fields.keys() - if class_field in model_args - } + kwargs = {class_field: model_args[class_field] for class_field in component_fields.keys() if class_field in model_args} return custom_component_class(**kwargs) @staticmethod @@ -1026,9 +733,7 @@ def _extract_missing_parameters(error: TypeError) -> List[str]: else: return [] - def _create_nested_component( - self, model: Any, model_field: str, model_value: Any, config: Config - ) -> Any: + def _create_nested_component(self, model: Any, model_field: str, model_value: Any, config: Config) -> Any: type_name = model_value.get("type", None) if not type_name: # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent @@ -1047,29 +752,16 @@ def _create_nested_component( model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs model_parameters = model_value.get("$parameters", {}) - matching_parameters = { - kwarg: model_parameters[kwarg] - for kwarg in constructor_kwargs - if kwarg in model_parameters - } - return self._create_component_from_model( - model=parsed_model, config=config, **matching_parameters - ) + matching_parameters = {kwarg: model_parameters[kwarg] for kwarg in constructor_kwargs if kwarg in model_parameters} + return self._create_component_from_model(model=parsed_model, config=config, **matching_parameters) except TypeError as error: missing_parameters = self._extract_missing_parameters(error) if missing_parameters: raise ValueError( f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " - + ", ".join( - ( - f"{type_name}.$parameters.{parameter}" - for parameter in missing_parameters - ) - ) + + ", ".join((f"{type_name}.$parameters.{parameter}" for parameter in missing_parameters)) ) - raise TypeError( - f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" - ) + raise TypeError(f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}") else: raise ValueError( f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" @@ -1079,26 +771,18 @@ def _create_nested_component( def _is_component(model_value: Any) -> bool: return isinstance(model_value, dict) and model_value.get("type") is not None - def create_datetime_based_cursor( - self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any - ) -> DatetimeBasedCursor: + def create_datetime_based_cursor(self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any) -> DatetimeBasedCursor: start_datetime: Union[str, MinMaxDatetime] = ( - model.start_datetime - if isinstance(model.start_datetime, str) - else self.create_min_max_datetime(model.start_datetime, config) + model.start_datetime if isinstance(model.start_datetime, str) else self.create_min_max_datetime(model.start_datetime, config) ) end_datetime: Union[str, MinMaxDatetime, None] = None if model.is_data_feed and model.end_datetime: raise ValueError("Data feed does not support end_datetime") if model.is_data_feed and model.is_client_side_incremental: - raise ValueError( - "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." - ) + raise ValueError("`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them.") if model.end_datetime: end_datetime = ( - model.end_datetime - if isinstance(model.end_datetime, str) - else self.create_min_max_datetime(model.end_datetime, config) + model.end_datetime if isinstance(model.end_datetime, str) else self.create_min_max_datetime(model.end_datetime, config) ) end_time_option = ( @@ -1122,9 +806,7 @@ def create_datetime_based_cursor( return DatetimeBasedCursor( cursor_field=model.cursor_field, - cursor_datetime_formats=model.cursor_datetime_formats - if model.cursor_datetime_formats - else [], + cursor_datetime_formats=model.cursor_datetime_formats if model.cursor_datetime_formats else [], cursor_granularity=model.cursor_granularity, datetime_format=model.datetime_format, end_datetime=end_datetime, @@ -1141,9 +823,7 @@ def create_datetime_based_cursor( parameters=model.parameters or {}, ) - def create_declarative_stream( - self, model: DeclarativeStreamModel, config: Config, **kwargs: Any - ) -> DeclarativeStream: + def create_declarative_stream(self, model: DeclarativeStreamModel, config: Config, **kwargs: Any) -> DeclarativeStream: # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in @@ -1152,9 +832,7 @@ def create_declarative_stream( primary_key = model.primary_key.__root__ if model.primary_key else None stop_condition_on_cursor = ( - model.incremental_sync - and hasattr(model.incremental_sync, "is_data_feed") - and model.incremental_sync.is_data_feed + model.incremental_sync and hasattr(model.incremental_sync, "is_data_feed") and model.incremental_sync.is_data_feed ) client_side_incremental_sync = None if ( @@ -1162,25 +840,13 @@ def create_declarative_stream( and hasattr(model.incremental_sync, "is_client_side_incremental") and model.incremental_sync.is_client_side_incremental ): - supported_slicers = ( - DatetimeBasedCursor, - GlobalSubstreamCursor, - PerPartitionWithGlobalCursor, - ) + supported_slicers = (DatetimeBasedCursor, GlobalSubstreamCursor, PerPartitionWithGlobalCursor) if combined_slicers and not isinstance(combined_slicers, supported_slicers): - raise ValueError( - "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead" - ) + raise ValueError("Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead") client_side_incremental_sync = { - "date_time_based_cursor": self._create_component_from_model( - model=model.incremental_sync, config=config - ), + "date_time_based_cursor": self._create_component_from_model(model=model.incremental_sync, config=config), "substream_cursor": ( - combined_slicers - if isinstance( - combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) - ) - else None + combined_slicers if isinstance(combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)) else None ), } @@ -1220,9 +886,7 @@ def create_declarative_stream( transformations = [] if model.transformations: for transformation_model in model.transformations: - transformations.append( - self._create_component_from_model(model=transformation_model, config=config) - ) + transformations.append(self._create_component_from_model(model=transformation_model, config=config)) retriever = self._create_component_from_model( model=model.retriever, config=config, @@ -1245,9 +909,7 @@ def create_declarative_stream( state_transformations = [] if model.schema_loader: - schema_loader = self._create_component_from_model( - model=model.schema_loader, config=config - ) + schema_loader = self._create_component_from_model(model=model.schema_loader, config=config) else: options = model.parameters or {} if "name" not in options: @@ -1265,9 +927,7 @@ def create_declarative_stream( parameters=model.parameters or {}, ) - def _merge_stream_slicers( - self, model: DeclarativeStreamModel, config: Config - ) -> Optional[StreamSlicer]: + def _merge_stream_slicers(self, model: DeclarativeStreamModel, config: Config) -> Optional[StreamSlicer]: stream_slicer = None if ( hasattr(model.retriever, "partition_router") @@ -1278,85 +938,50 @@ def _merge_stream_slicers( if isinstance(stream_slicer_model, list): stream_slicer = CartesianProductStreamSlicer( - [ - self._create_component_from_model(model=slicer, config=config) - for slicer in stream_slicer_model - ], - parameters={}, + [self._create_component_from_model(model=slicer, config=config) for slicer in stream_slicer_model], parameters={} ) else: - stream_slicer = self._create_component_from_model( - model=stream_slicer_model, config=config - ) + stream_slicer = self._create_component_from_model(model=stream_slicer_model, config=config) if model.incremental_sync and stream_slicer: incremental_sync_model = model.incremental_sync - if ( - hasattr(incremental_sync_model, "global_substream_cursor") - and incremental_sync_model.global_substream_cursor - ): - cursor_component = self._create_component_from_model( - model=incremental_sync_model, config=config - ) - return GlobalSubstreamCursor( - stream_cursor=cursor_component, partition_router=stream_slicer - ) + if hasattr(incremental_sync_model, "global_substream_cursor") and incremental_sync_model.global_substream_cursor: + cursor_component = self._create_component_from_model(model=incremental_sync_model, config=config) + return GlobalSubstreamCursor(stream_cursor=cursor_component, partition_router=stream_slicer) else: - cursor_component = self._create_component_from_model( - model=incremental_sync_model, config=config - ) + cursor_component = self._create_component_from_model(model=incremental_sync_model, config=config) return PerPartitionWithGlobalCursor( cursor_factory=CursorFactory( - lambda: self._create_component_from_model( - model=incremental_sync_model, config=config - ), + lambda: self._create_component_from_model(model=incremental_sync_model, config=config), ), partition_router=stream_slicer, stream_cursor=cursor_component, ) elif model.incremental_sync: - return ( - self._create_component_from_model(model=model.incremental_sync, config=config) - if model.incremental_sync - else None - ) + return self._create_component_from_model(model=model.incremental_sync, config=config) if model.incremental_sync else None elif stream_slicer: # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor` return PerPartitionCursor( - cursor_factory=CursorFactory( - create_function=partial(ChildPartitionResumableFullRefreshCursor, {}) - ), + cursor_factory=CursorFactory(create_function=partial(ChildPartitionResumableFullRefreshCursor, {})), partition_router=stream_slicer, ) - elif ( - hasattr(model.retriever, "paginator") - and model.retriever.paginator - and not stream_slicer - ): + elif hasattr(model.retriever, "paginator") and model.retriever.paginator and not stream_slicer: # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor` return ResumableFullRefreshCursor(parameters={}) else: return None - def create_default_error_handler( - self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any - ) -> DefaultErrorHandler: + def create_default_error_handler(self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any) -> DefaultErrorHandler: backoff_strategies = [] if model.backoff_strategies: for backoff_strategy_model in model.backoff_strategies: - backoff_strategies.append( - self._create_component_from_model(model=backoff_strategy_model, config=config) - ) + backoff_strategies.append(self._create_component_from_model(model=backoff_strategy_model, config=config)) response_filters = [] if model.response_filters: for response_filter_model in model.response_filters: - response_filters.append( - self._create_component_from_model(model=response_filter_model, config=config) - ) - response_filters.append( - HttpResponseFilter(config=config, parameters=model.parameters or {}) - ) + response_filters.append(self._create_component_from_model(model=response_filter_model, config=config)) + response_filters.append(HttpResponseFilter(config=config, parameters=model.parameters or {})) return DefaultErrorHandler( backoff_strategies=backoff_strategies, @@ -1377,25 +1002,17 @@ def create_default_paginator( ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: if decoder: if not isinstance(decoder, (JsonDecoder, XmlDecoder)): - raise ValueError( - f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." - ) + raise ValueError(f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead.") decoder_to_use = PaginationDecoderDecorator(decoder=decoder) else: decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) page_size_option = ( - self._create_component_from_model(model=model.page_size_option, config=config) - if model.page_size_option - else None + self._create_component_from_model(model=model.page_size_option, config=config) if model.page_size_option else None ) page_token_option = ( - self._create_component_from_model(model=model.page_token_option, config=config) - if model.page_token_option - else None - ) - pagination_strategy = self._create_component_from_model( - model=model.pagination_strategy, config=config, decoder=decoder_to_use + self._create_component_from_model(model=model.page_token_option, config=config) if model.page_token_option else None ) + pagination_strategy = self._create_component_from_model(model=model.pagination_strategy, config=config, decoder=decoder_to_use) if cursor_used_for_stop_condition: pagination_strategy = StopConditionPaginationStrategyDecorator( pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) @@ -1414,55 +1031,29 @@ def create_default_paginator( return paginator def create_dpath_extractor( - self, - model: DpathExtractorModel, - config: Config, - decoder: Optional[Decoder] = None, - **kwargs: Any, + self, model: DpathExtractorModel, config: Config, decoder: Optional[Decoder] = None, **kwargs: Any ) -> DpathExtractor: if decoder: decoder_to_use = decoder else: decoder_to_use = JsonDecoder(parameters={}) model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] - return DpathExtractor( - decoder=decoder_to_use, - field_path=model_field_path, - config=config, - parameters=model.parameters or {}, - ) + return DpathExtractor(decoder=decoder_to_use, field_path=model_field_path, config=config, parameters=model.parameters or {}) @staticmethod - def create_exponential_backoff_strategy( - model: ExponentialBackoffStrategyModel, config: Config - ) -> ExponentialBackoffStrategy: - return ExponentialBackoffStrategy( - factor=model.factor or 5, parameters=model.parameters or {}, config=config - ) + def create_exponential_backoff_strategy(model: ExponentialBackoffStrategyModel, config: Config) -> ExponentialBackoffStrategy: + return ExponentialBackoffStrategy(factor=model.factor or 5, parameters=model.parameters or {}, config=config) - def create_http_requester( - self, model: HttpRequesterModel, decoder: Decoder, config: Config, *, name: str - ) -> HttpRequester: + def create_http_requester(self, model: HttpRequesterModel, decoder: Decoder, config: Config, *, name: str) -> HttpRequester: authenticator = ( - self._create_component_from_model( - model=model.authenticator, - config=config, - url_base=model.url_base, - name=name, - decoder=decoder, - ) + self._create_component_from_model(model=model.authenticator, config=config, url_base=model.url_base, name=name, decoder=decoder) if model.authenticator else None ) error_handler = ( self._create_component_from_model(model=model.error_handler, config=config) if model.error_handler - else DefaultErrorHandler( - backoff_strategies=[], - response_filters=[], - config=config, - parameters=model.parameters or {}, - ) + else DefaultErrorHandler(backoff_strategies=[], response_filters=[], config=config, parameters=model.parameters or {}) ) request_options_provider = InterpolatedRequestOptionsProvider( @@ -1497,9 +1088,7 @@ def create_http_requester( ) @staticmethod - def create_http_response_filter( - model: HttpResponseFilterModel, config: Config, **kwargs: Any - ) -> HttpResponseFilter: + def create_http_response_filter(model: HttpResponseFilterModel, config: Config, **kwargs: Any) -> HttpResponseFilter: if model.action: action = ResponseAction(model.action.value) else: @@ -1523,9 +1112,7 @@ def create_http_response_filter( ) @staticmethod - def create_inline_schema_loader( - model: InlineSchemaLoaderModel, config: Config, **kwargs: Any - ) -> InlineSchemaLoader: + def create_inline_schema_loader(model: InlineSchemaLoaderModel, config: Config, **kwargs: Any) -> InlineSchemaLoader: return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) @staticmethod @@ -1533,15 +1120,11 @@ def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) return JsonDecoder(parameters={}) @staticmethod - def create_jsonl_decoder( - model: JsonlDecoderModel, config: Config, **kwargs: Any - ) -> JsonlDecoder: + def create_jsonl_decoder(model: JsonlDecoderModel, config: Config, **kwargs: Any) -> JsonlDecoder: return JsonlDecoder(parameters={}) @staticmethod - def create_iterable_decoder( - model: IterableDecoderModel, config: Config, **kwargs: Any - ) -> IterableDecoder: + def create_iterable_decoder(model: IterableDecoderModel, config: Config, **kwargs: Any) -> IterableDecoder: return IterableDecoder(parameters={}) @staticmethod @@ -1549,17 +1132,15 @@ def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> return XmlDecoder(parameters={}) @staticmethod - def create_json_file_schema_loader( - model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any - ) -> JsonFileSchemaLoader: - return JsonFileSchemaLoader( - file_path=model.file_path or "", config=config, parameters=model.parameters or {} - ) + def create_gzipjson_decoder(model: GzipJsonDecoderModel, config: Config, **kwargs: Any) -> GzipJsonDecoder: + return GzipJsonDecoder(parameters={}, encoding=model.encoding) + + @staticmethod + def create_json_file_schema_loader(model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any) -> JsonFileSchemaLoader: + return JsonFileSchemaLoader(file_path=model.file_path or "", config=config, parameters=model.parameters or {}) @staticmethod - def create_jwt_authenticator( - model: JwtAuthenticatorModel, config: Config, **kwargs: Any - ) -> JwtAuthenticator: + def create_jwt_authenticator(model: JwtAuthenticatorModel, config: Config, **kwargs: Any) -> JwtAuthenticator: jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) return JwtAuthenticator( @@ -1581,9 +1162,7 @@ def create_jwt_authenticator( ) @staticmethod - def create_list_partition_router( - model: ListPartitionRouterModel, config: Config, **kwargs: Any - ) -> ListPartitionRouter: + def create_list_partition_router(model: ListPartitionRouterModel, config: Config, **kwargs: Any) -> ListPartitionRouter: request_option = ( RequestOption( inject_into=RequestOptionType(model.request_option.inject_into.value), @@ -1602,9 +1181,7 @@ def create_list_partition_router( ) @staticmethod - def create_min_max_datetime( - model: MinMaxDatetimeModel, config: Config, **kwargs: Any - ) -> MinMaxDatetime: + def create_min_max_datetime(model: MinMaxDatetimeModel, config: Config, **kwargs: Any) -> MinMaxDatetime: return MinMaxDatetime( datetime=model.datetime, datetime_format=model.datetime_format or "", @@ -1618,43 +1195,29 @@ def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: return NoAuth(parameters=model.parameters or {}) @staticmethod - def create_no_pagination( - model: NoPaginationModel, config: Config, **kwargs: Any - ) -> NoPagination: + def create_no_pagination(model: NoPaginationModel, config: Config, **kwargs: Any) -> NoPagination: return NoPagination(parameters={}) - def create_oauth_authenticator( - self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any - ) -> DeclarativeOauth2Authenticator: + def create_oauth_authenticator(self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any) -> DeclarativeOauth2Authenticator: if model.refresh_token_updater: # ignore type error because fixing it would have a lot of dependencies, revisit later return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore config, - InterpolatedString.create( - model.token_refresh_endpoint, parameters=model.parameters or {} - ).eval(config), + InterpolatedString.create(model.token_refresh_endpoint, parameters=model.parameters or {}).eval(config), access_token_name=InterpolatedString.create( model.access_token_name or "access_token", parameters=model.parameters or {} ).eval(config), refresh_token_name=model.refresh_token_updater.refresh_token_name, - expires_in_name=InterpolatedString.create( - model.expires_in_name or "expires_in", parameters=model.parameters or {} - ).eval(config), - client_id=InterpolatedString.create( - model.client_id, parameters=model.parameters or {} - ).eval(config), - client_secret=InterpolatedString.create( - model.client_secret, parameters=model.parameters or {} - ).eval(config), + expires_in_name=InterpolatedString.create(model.expires_in_name or "expires_in", parameters=model.parameters or {}).eval( + config + ), + client_id=InterpolatedString.create(model.client_id, parameters=model.parameters or {}).eval(config), + client_secret=InterpolatedString.create(model.client_secret, parameters=model.parameters or {}).eval(config), access_token_config_path=model.refresh_token_updater.access_token_config_path, refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, - grant_type=InterpolatedString.create( - model.grant_type or "refresh_token", parameters=model.parameters or {} - ).eval(config), - refresh_request_body=InterpolatedMapping( - model.refresh_request_body or {}, parameters=model.parameters or {} - ).eval(config), + grant_type=InterpolatedString.create(model.grant_type or "refresh_token", parameters=model.parameters or {}).eval(config), + refresh_request_body=InterpolatedMapping(model.refresh_request_body or {}, parameters=model.parameters or {}).eval(config), scopes=model.scopes, token_expiry_date_format=model.token_expiry_date_format, message_repository=self._message_repository, @@ -1682,9 +1245,7 @@ def create_oauth_authenticator( ) @staticmethod - def create_offset_increment( - model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any - ) -> OffsetIncrement: + def create_offset_increment(model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any) -> OffsetIncrement: if isinstance(decoder, PaginationDecoderDecorator): if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)): raise ValueError( @@ -1693,9 +1254,7 @@ def create_offset_increment( decoder_to_use = decoder else: if not isinstance(decoder, (JsonDecoder, XmlDecoder)): - raise ValueError( - f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." - ) + raise ValueError(f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead.") decoder_to_use = PaginationDecoderDecorator(decoder=decoder) return OffsetIncrement( page_size=model.page_size, @@ -1706,9 +1265,7 @@ def create_offset_increment( ) @staticmethod - def create_page_increment( - model: PageIncrementModel, config: Config, **kwargs: Any - ) -> PageIncrement: + def create_page_increment(model: PageIncrementModel, config: Config, **kwargs: Any) -> PageIncrement: return PageIncrement( page_size=model.page_size, config=config, @@ -1717,15 +1274,9 @@ def create_page_increment( parameters=model.parameters or {}, ) - def create_parent_stream_config( - self, model: ParentStreamConfigModel, config: Config, **kwargs: Any - ) -> ParentStreamConfig: + def create_parent_stream_config(self, model: ParentStreamConfigModel, config: Config, **kwargs: Any) -> ParentStreamConfig: declarative_stream = self._create_component_from_model(model.stream, config=config) - request_option = ( - self._create_component_from_model(model.request_option, config=config) - if model.request_option - else None - ) + request_option = self._create_component_from_model(model.request_option, config=config) if model.request_option else None return ParentStreamConfig( parent_key=model.parent_key, request_option=request_option, @@ -1738,21 +1289,15 @@ def create_parent_stream_config( ) @staticmethod - def create_record_filter( - model: RecordFilterModel, config: Config, **kwargs: Any - ) -> RecordFilter: - return RecordFilter( - condition=model.condition or "", config=config, parameters=model.parameters or {} - ) + def create_record_filter(model: RecordFilterModel, config: Config, **kwargs: Any) -> RecordFilter: + return RecordFilter(condition=model.condition or "", config=config, parameters=model.parameters or {}) @staticmethod def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: return RequestPath(parameters={}) @staticmethod - def create_request_option( - model: RequestOptionModel, config: Config, **kwargs: Any - ) -> RequestOption: + def create_request_option(model: RequestOptionModel, config: Config, **kwargs: Any) -> RequestOption: inject_into = RequestOptionType(model.inject_into.value) return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={}) @@ -1767,26 +1312,16 @@ def create_record_selector( **kwargs: Any, ) -> RecordSelector: assert model.schema_normalization is not None # for mypy - extractor = self._create_component_from_model( - model=model.extractor, decoder=decoder, config=config - ) - record_filter = ( - self._create_component_from_model(model.record_filter, config=config) - if model.record_filter - else None - ) + extractor = self._create_component_from_model(model=model.extractor, decoder=decoder, config=config) + record_filter = self._create_component_from_model(model.record_filter, config=config) if model.record_filter else None if client_side_incremental_sync: record_filter = ClientSideIncrementalRecordFilterDecorator( config=config, parameters=model.parameters, - condition=model.record_filter.condition - if (model.record_filter and hasattr(model.record_filter, "condition")) - else None, + condition=model.record_filter.condition if (model.record_filter and hasattr(model.record_filter, "condition")) else None, **client_side_incremental_sync, ) - schema_normalization = TypeTransformer( - SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization] - ) + schema_normalization = TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) return RecordSelector( extractor=extractor, @@ -1798,20 +1333,11 @@ def create_record_selector( ) @staticmethod - def create_remove_fields( - model: RemoveFieldsModel, config: Config, **kwargs: Any - ) -> RemoveFields: - return RemoveFields( - field_pointers=model.field_pointers, condition=model.condition or "", parameters={} - ) - - def create_selective_authenticator( - self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any - ) -> DeclarativeAuthenticator: - authenticators = { - name: self._create_component_from_model(model=auth, config=config) - for name, auth in model.authenticators.items() - } + def create_remove_fields(model: RemoveFieldsModel, config: Config, **kwargs: Any) -> RemoveFields: + return RemoveFields(field_pointers=model.field_pointers, condition=model.condition or "", parameters={}) + + def create_selective_authenticator(self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any) -> DeclarativeAuthenticator: + authenticators = {name: self._create_component_from_model(model=auth, config=config) for name, auth in model.authenticators.items()} # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error return SelectiveAuthenticator( # type: ignore[abstract] config=config, @@ -1850,14 +1376,8 @@ def create_simple_retriever( client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[RecordTransformation], ) -> SimpleRetriever: - decoder = ( - self._create_component_from_model(model=model.decoder, config=config) - if model.decoder - else JsonDecoder(parameters={}) - ) - requester = self._create_component_from_model( - model=model.requester, decoder=decoder, config=config, name=name - ) + decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={}) + requester = self._create_component_from_model(model=model.requester, decoder=decoder, config=config, name=name) record_selector = self._create_component_from_model( model=model.record_selector, config=config, @@ -1865,19 +1385,12 @@ def create_simple_retriever( transformations=transformations, client_side_incremental_sync=client_side_incremental_sync, ) - url_base = ( - model.requester.url_base - if hasattr(model.requester, "url_base") - else requester.get_url_base() - ) + url_base = model.requester.url_base if hasattr(model.requester, "url_base") else requester.get_url_base() # Define cursor only if per partition or common incremental support is needed cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None - if ( - not isinstance(stream_slicer, DatetimeBasedCursor) - or type(stream_slicer) is not DatetimeBasedCursor - ): + if not isinstance(stream_slicer, DatetimeBasedCursor) or type(stream_slicer) is not DatetimeBasedCursor: # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods). # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's @@ -1901,9 +1414,7 @@ def create_simple_retriever( else NoPagination(parameters={}) ) - ignore_stream_slicer_parameters_on_paginated_requests = ( - model.ignore_stream_slicer_parameters_on_paginated_requests or False - ) + ignore_stream_slicer_parameters_on_paginated_requests = model.ignore_stream_slicer_parameters_on_paginated_requests or False if self._limit_slices_fetched or self._emit_connector_builder_messages: return SimpleRetrieverTestReadDecorator( @@ -1970,19 +1481,14 @@ def create_async_retriever( config: Config, *, name: str, - primary_key: Optional[ - Union[str, List[str], List[List[str]]] - ], # this seems to be needed to match create_simple_retriever + primary_key: Optional[Union[str, List[str], List[List[str]]]], # this seems to be needed to match create_simple_retriever stream_slicer: Optional[StreamSlicer], client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[RecordTransformation], **kwargs: Any, ) -> AsyncRetriever: - decoder = ( - self._create_component_from_model(model=model.decoder, config=config) - if model.decoder - else JsonDecoder(parameters={}) - ) + + decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={}) record_selector = self._create_component_from_model( model=model.record_selector, config=config, @@ -1992,23 +1498,14 @@ def create_async_retriever( ) stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) creation_requester = self._create_component_from_model( - model=model.creation_requester, - decoder=decoder, - config=config, - name=f"job creation - {name}", + model=model.creation_requester, decoder=decoder, config=config, name=f"job creation - {name}" ) polling_requester = self._create_component_from_model( - model=model.polling_requester, - decoder=decoder, - config=config, - name=f"job polling - {name}", + model=model.polling_requester, decoder=decoder, config=config, name=f"job polling - {name}" ) job_download_components_name = f"job download - {name}" download_requester = self._create_component_from_model( - model=model.download_requester, - decoder=decoder, - config=config, - name=job_download_components_name, + model=model.download_requester, decoder=decoder, config=config, name=job_download_components_name ) download_retriever = SimpleRetriever( requester=download_requester, @@ -2023,9 +1520,7 @@ def create_async_retriever( primary_key=None, name=job_download_components_name, paginator=( - self._create_component_from_model( - model=model.download_paginator, decoder=decoder, config=config, url_base="" - ) + self._create_component_from_model(model=model.download_paginator, decoder=decoder, config=config, url_base="") if model.download_paginator else NoPagination(parameters={}) ), @@ -2033,31 +1528,17 @@ def create_async_retriever( parameters={}, ) abort_requester = ( - self._create_component_from_model( - model=model.abort_requester, - decoder=decoder, - config=config, - name=f"job abort - {name}", - ) + self._create_component_from_model(model=model.abort_requester, decoder=decoder, config=config, name=f"job abort - {name}") if model.abort_requester else None ) delete_requester = ( - self._create_component_from_model( - model=model.delete_requester, - decoder=decoder, - config=config, - name=f"job delete - {name}", - ) + self._create_component_from_model(model=model.delete_requester, decoder=decoder, config=config, name=f"job delete - {name}") if model.delete_requester else None ) - status_extractor = self._create_component_from_model( - model=model.status_extractor, decoder=decoder, config=config, name=name - ) - urls_extractor = self._create_component_from_model( - model=model.urls_extractor, decoder=decoder, config=config, name=name - ) + status_extractor = self._create_component_from_model(model=model.status_extractor, decoder=decoder, config=config, name=name) + urls_extractor = self._create_component_from_model(model=model.urls_extractor, decoder=decoder, config=config, name=name) job_repository: AsyncJobRepository = AsyncHttpJobRepository( creation_requester=creation_requester, polling_requester=polling_requester, @@ -2073,9 +1554,7 @@ def create_async_retriever( job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( job_repository, stream_slices, - JobTracker( - 1 - ), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1 + JobTracker(1), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1 self._message_repository, has_bulk_parent=False, # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk ), @@ -2101,22 +1580,14 @@ def create_substream_partition_router( if model.parent_stream_configs: parent_stream_configs.extend( [ - self._create_message_repository_substream_wrapper( - model=parent_stream_config, config=config - ) + self._create_message_repository_substream_wrapper(model=parent_stream_config, config=config) for parent_stream_config in model.parent_stream_configs ] ) - return SubstreamPartitionRouter( - parent_stream_configs=parent_stream_configs, - parameters=model.parameters or {}, - config=config, - ) + return SubstreamPartitionRouter(parent_stream_configs=parent_stream_configs, parameters=model.parameters or {}, config=config) - def _create_message_repository_substream_wrapper( - self, model: ParentStreamConfigModel, config: Config - ) -> Any: + def _create_message_repository_substream_wrapper(self, model: ParentStreamConfigModel, config: Config) -> Any: substream_factory = ModelToComponentFactory( limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, limit_slices_fetched=self._limit_slices_fetched, @@ -2132,17 +1603,13 @@ def _create_message_repository_substream_wrapper( return substream_factory._create_component_from_model(model=model, config=config) @staticmethod - def create_wait_time_from_header( - model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any - ) -> WaitTimeFromHeaderBackoffStrategy: + def create_wait_time_from_header(model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any) -> WaitTimeFromHeaderBackoffStrategy: return WaitTimeFromHeaderBackoffStrategy( header=model.header, parameters=model.parameters or {}, config=config, regex=model.regex, - max_waiting_time_in_seconds=model.max_waiting_time_in_seconds - if model.max_waiting_time_in_seconds is not None - else None, + max_waiting_time_in_seconds=model.max_waiting_time_in_seconds if model.max_waiting_time_in_seconds is not None else None, ) @staticmethod @@ -2150,15 +1617,11 @@ def create_wait_until_time_from_header( model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any ) -> WaitUntilTimeFromHeaderBackoffStrategy: return WaitUntilTimeFromHeaderBackoffStrategy( - header=model.header, - parameters=model.parameters or {}, - config=config, - min_wait=model.min_wait, - regex=model.regex, + header=model.header, parameters=model.parameters or {}, config=config, min_wait=model.min_wait, regex=model.regex ) def get_message_repository(self) -> MessageRepository: return self._message_repository def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: - return Level.DEBUG if emit_connector_builder_messages else Level.INFO + return Level.DEBUG if emit_connector_builder_messages else Level.INFO \ No newline at end of file From 8614e16633c1f006ba0fe863068f7694fa30df6b Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 11 Nov 2024 17:48:49 +0100 Subject: [PATCH 04/17] Airbyte CDK: fix import --- unit_tests/sources/declarative/decoders/test_json_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unit_tests/sources/declarative/decoders/test_json_decoder.py b/unit_tests/sources/declarative/decoders/test_json_decoder.py index f4122b02..0af625d7 100644 --- a/unit_tests/sources/declarative/decoders/test_json_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_json_decoder.py @@ -12,7 +12,7 @@ from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder from airbyte_cdk.sources.declarative.models import DeclarativeStream as DeclarativeStreamModel from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ModelToComponentFactory -from sources.declarative.decoders import GzipJsonDecoder +from airbyte_cdk.sources.declarative.decoders import GzipJsonDecoder @pytest.mark.parametrize( From 52d52b2b761ae63b79ff2b5cd9ad4fbed7889031 Mon Sep 17 00:00:00 2001 From: octavia-squidington-iii Date: Mon, 11 Nov 2024 16:50:59 +0000 Subject: [PATCH 05/17] Auto-fix lint and format issues --- .../declarative/decoders/json_decoder.py | 8 +- .../models/declarative_component_schema.py | 1336 ++++++++--------- .../parsers/model_to_component_factory.py | 1032 ++++++++++--- .../declarative/decoders/test_json_decoder.py | 123 +- 4 files changed, 1524 insertions(+), 975 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index b327577c..14816c86 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -33,11 +33,15 @@ def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], No body_json = response.json() yield from self.parse_body_json(body_json) except requests.exceptions.JSONDecodeError: - logger.warning(f"Response cannot be parsed into json: {response.status_code=}, {response.text=}") + logger.warning( + f"Response cannot be parsed into json: {response.status_code=}, {response.text=}" + ) yield {} @staticmethod - def parse_body_json(body_json: Mapping[str, Any] | list) -> Generator[Mapping[str, Any], None, None]: + def parse_body_json( + body_json: Mapping[str, Any] | list, + ) -> Generator[Mapping[str, Any], None, None]: if not isinstance(body_json, list): body_json = [body_json] if len(body_json) == 0: diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index 3f69b8c4..6b0f3ca9 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -11,314 +11,314 @@ class AuthFlowType(Enum): - oauth2_0 = 'oauth2.0' - oauth1_0 = 'oauth1.0' + oauth2_0 = "oauth2.0" + oauth1_0 = "oauth1.0" class BasicHttpAuthenticator(BaseModel): - type: Literal['BasicHttpAuthenticator'] + type: Literal["BasicHttpAuthenticator"] username: str = Field( ..., - description='The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.', + description="The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.", examples=["{{ config['username'] }}", "{{ config['api_key'] }}"], - title='Username', + title="Username", ) password: Optional[str] = Field( - '', - description='The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.', - examples=["{{ config['password'] }}", ''], - title='Password', + "", + description="The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.", + examples=["{{ config['password'] }}", ""], + title="Password", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class BearerAuthenticator(BaseModel): - type: Literal['BearerAuthenticator'] + type: Literal["BearerAuthenticator"] api_token: str = Field( ..., - description='Token to inject as request header for authenticating with the API.', + description="Token to inject as request header for authenticating with the API.", examples=["{{ config['api_key'] }}", "{{ config['token'] }}"], - title='Bearer Token', + title="Bearer Token", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CheckStream(BaseModel): - type: Literal['CheckStream'] + type: Literal["CheckStream"] stream_names: List[str] = Field( ..., - description='Names of the streams to try reading from when running a check operation.', - examples=[['users'], ['users', 'contacts']], - title='Stream Names', + description="Names of the streams to try reading from when running a check operation.", + examples=[["users"], ["users", "contacts"]], + title="Stream Names", ) class ConcurrencyLevel(BaseModel): - type: Optional[Literal['ConcurrencyLevel']] = None + type: Optional[Literal["ConcurrencyLevel"]] = None default_concurrency: Union[int, str] = Field( ..., - description='The amount of concurrency that will applied during a sync. This value can be hardcoded or user-defined in the config if different users have varying volume thresholds in the target API.', + description="The amount of concurrency that will applied during a sync. This value can be hardcoded or user-defined in the config if different users have varying volume thresholds in the target API.", examples=[10, "{{ config['num_workers'] or 10 }}"], - title='Default Concurrency', + title="Default Concurrency", ) max_concurrency: Optional[int] = Field( None, - description='The maximum level of concurrency that will be used during a sync. This becomes a required field when the default_concurrency derives from the config, because it serves as a safeguard against a user-defined threshold that is too high.', + description="The maximum level of concurrency that will be used during a sync. This becomes a required field when the default_concurrency derives from the config, because it serves as a safeguard against a user-defined threshold that is too high.", examples=[20, 100], - title='Max Concurrency', + title="Max Concurrency", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ConstantBackoffStrategy(BaseModel): - type: Literal['ConstantBackoffStrategy'] + type: Literal["ConstantBackoffStrategy"] backoff_time_in_seconds: Union[float, str] = Field( ..., - description='Backoff time in seconds.', + description="Backoff time in seconds.", examples=[30, 30.5, "{{ config['backoff_time'] }}"], - title='Backoff Time', + title="Backoff Time", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CursorPagination(BaseModel): - type: Literal['CursorPagination'] + type: Literal["CursorPagination"] cursor_value: str = Field( ..., - description='Value of the cursor defining the next page to fetch.', + description="Value of the cursor defining the next page to fetch.", examples=[ - '{{ headers.link.next.cursor }}', + "{{ headers.link.next.cursor }}", "{{ last_record['key'] }}", "{{ response['nextPage'] }}", ], - title='Cursor Value', + title="Cursor Value", ) page_size: Optional[int] = Field( None, - description='The number of records to include in each pages.', + description="The number of records to include in each pages.", examples=[100], - title='Page Size', + title="Page Size", ) stop_condition: Optional[str] = Field( None, - description='Template string evaluating when to stop paginating.', + description="Template string evaluating when to stop paginating.", examples=[ - '{{ response.data.has_more is false }}', + "{{ response.data.has_more is false }}", "{{ 'next' not in headers['link'] }}", ], - title='Stop Condition', + title="Stop Condition", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomAuthenticator'] + type: Literal["CustomAuthenticator"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.', - examples=['source_railz.components.ShortLivedTokenAuthenticator'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.", + examples=["source_railz.components.ShortLivedTokenAuthenticator"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomBackoffStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomBackoffStrategy'] + type: Literal["CustomBackoffStrategy"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomBackoffStrategy'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomBackoffStrategy"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomErrorHandler(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomErrorHandler'] + type: Literal["CustomErrorHandler"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.', - examples=['source_railz.components.MyCustomErrorHandler'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.", + examples=["source_railz.components.MyCustomErrorHandler"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomIncrementalSync(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomIncrementalSync'] + type: Literal["CustomIncrementalSync"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.', - examples=['source_railz.components.MyCustomIncrementalSync'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.", + examples=["source_railz.components.MyCustomIncrementalSync"], + title="Class Name", ) cursor_field: str = Field( ..., - description='The location of the value on a record that will be used as a bookmark during sync.', + description="The location of the value on a record that will be used as a bookmark during sync.", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomPaginationStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomPaginationStrategy'] + type: Literal["CustomPaginationStrategy"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomPaginationStrategy'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomPaginationStrategy"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRecordExtractor(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRecordExtractor'] + type: Literal["CustomRecordExtractor"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRecordExtractor'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRecordExtractor"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRecordFilter(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRecordFilter'] + type: Literal["CustomRecordFilter"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom record filter strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomCustomRecordFilter'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom record filter strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomCustomRecordFilter"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRequester(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRequester'] + type: Literal["CustomRequester"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRecordExtractor'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRecordExtractor"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRetriever(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRetriever'] + type: Literal["CustomRetriever"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRetriever'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRetriever"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomPartitionRouter(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomPartitionRouter'] + type: Literal["CustomPartitionRouter"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.', - examples=['source_railz.components.MyCustomPartitionRouter'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.", + examples=["source_railz.components.MyCustomPartitionRouter"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomSchemaLoader(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomSchemaLoader'] + type: Literal["CustomSchemaLoader"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_..`.', - examples=['source_railz.components.MyCustomSchemaLoader'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_..`.", + examples=["source_railz.components.MyCustomSchemaLoader"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomStateMigration(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomStateMigration'] + type: Literal["CustomStateMigration"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom state migration. The format is `source_..`.', - examples=['source_railz.components.MyCustomStateMigration'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom state migration. The format is `source_..`.", + examples=["source_railz.components.MyCustomStateMigration"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomTransformation(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomTransformation'] + type: Literal["CustomTransformation"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.', - examples=['source_railz.components.MyCustomTransformation'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.", + examples=["source_railz.components.MyCustomTransformation"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class LegacyToPerPartitionStateMigration(BaseModel): class Config: extra = Extra.allow - type: Optional[Literal['LegacyToPerPartitionStateMigration']] = None + type: Optional[Literal["LegacyToPerPartitionStateMigration"]] = None class Algorithm(Enum): - HS256 = 'HS256' - HS384 = 'HS384' - HS512 = 'HS512' - ES256 = 'ES256' - ES256K = 'ES256K' - ES384 = 'ES384' - ES512 = 'ES512' - RS256 = 'RS256' - RS384 = 'RS384' - RS512 = 'RS512' - PS256 = 'PS256' - PS384 = 'PS384' - PS512 = 'PS512' - EdDSA = 'EdDSA' + HS256 = "HS256" + HS384 = "HS384" + HS512 = "HS512" + ES256 = "ES256" + ES256K = "ES256K" + ES384 = "ES384" + ES512 = "ES512" + RS256 = "RS256" + RS384 = "RS384" + RS512 = "RS512" + PS256 = "PS256" + PS384 = "PS384" + PS512 = "PS512" + EdDSA = "EdDSA" class JwtHeaders(BaseModel): @@ -327,21 +327,21 @@ class Config: kid: Optional[str] = Field( None, - description='Private key ID for user account.', + description="Private key ID for user account.", examples=["{{ config['kid'] }}"], - title='Key Identifier', + title="Key Identifier", ) typ: Optional[str] = Field( - 'JWT', - description='The media type of the complete JWT.', - examples=['JWT'], - title='Type', + "JWT", + description="The media type of the complete JWT.", + examples=["JWT"], + title="Type", ) cty: Optional[str] = Field( None, - description='Content type of JWT header.', - examples=['JWT'], - title='Content Type', + description="Content type of JWT header.", + examples=["JWT"], + title="Content Type", ) @@ -351,28 +351,28 @@ class Config: iss: Optional[str] = Field( None, - description='The user/principal that issued the JWT. Commonly a value unique to the user.', + description="The user/principal that issued the JWT. Commonly a value unique to the user.", examples=["{{ config['iss'] }}"], - title='Issuer', + title="Issuer", ) sub: Optional[str] = Field( None, - description='The subject of the JWT. Commonly defined by the API.', - title='Subject', + description="The subject of the JWT. Commonly defined by the API.", + title="Subject", ) aud: Optional[str] = Field( None, - description='The recipient that the JWT is intended for. Commonly defined by the API.', - examples=['appstoreconnect-v1'], - title='Audience', + description="The recipient that the JWT is intended for. Commonly defined by the API.", + examples=["appstoreconnect-v1"], + title="Audience", ) class JwtAuthenticator(BaseModel): - type: Literal['JwtAuthenticator'] + type: Literal["JwtAuthenticator"] secret_key: str = Field( ..., - description='Secret used to sign the JSON web token.', + description="Secret used to sign the JSON web token.", examples=["{{ config['secret_key'] }}"], ) base64_encode_secret_key: Optional[bool] = Field( @@ -381,544 +381,538 @@ class JwtAuthenticator(BaseModel): ) algorithm: Algorithm = Field( ..., - description='Algorithm used to sign the JSON web token.', - examples=['ES256', 'HS256', 'RS256', "{{ config['algorithm'] }}"], + description="Algorithm used to sign the JSON web token.", + examples=["ES256", "HS256", "RS256", "{{ config['algorithm'] }}"], ) token_duration: Optional[int] = Field( 1200, - description='The amount of time in seconds a JWT token can be valid after being issued.', + description="The amount of time in seconds a JWT token can be valid after being issued.", examples=[1200, 3600], - title='Token Duration', + title="Token Duration", ) header_prefix: Optional[str] = Field( None, - description='The prefix to be used within the Authentication header.', - examples=['Bearer', 'Basic'], - title='Header Prefix', + description="The prefix to be used within the Authentication header.", + examples=["Bearer", "Basic"], + title="Header Prefix", ) jwt_headers: Optional[JwtHeaders] = Field( None, - description='JWT headers used when signing JSON web token.', - title='JWT Headers', + description="JWT headers used when signing JSON web token.", + title="JWT Headers", ) additional_jwt_headers: Optional[Dict[str, Any]] = Field( None, - description='Additional headers to be included with the JWT headers object.', - title='Additional JWT Headers', + description="Additional headers to be included with the JWT headers object.", + title="Additional JWT Headers", ) jwt_payload: Optional[JwtPayload] = Field( None, - description='JWT Payload used when signing JSON web token.', - title='JWT Payload', + description="JWT Payload used when signing JSON web token.", + title="JWT Payload", ) additional_jwt_payload: Optional[Dict[str, Any]] = Field( None, - description='Additional properties to be added to the JWT payload.', - title='Additional JWT Payload Properties', + description="Additional properties to be added to the JWT payload.", + title="Additional JWT Payload Properties", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RefreshTokenUpdater(BaseModel): refresh_token_name: Optional[str] = Field( - 'refresh_token', - description='The name of the property which contains the updated refresh token in the response from the token refresh endpoint.', - examples=['refresh_token'], - title='Refresh Token Property Name', + "refresh_token", + description="The name of the property which contains the updated refresh token in the response from the token refresh endpoint.", + examples=["refresh_token"], + title="Refresh Token Property Name", ) access_token_config_path: Optional[List[str]] = Field( - ['credentials', 'access_token'], - description='Config path to the access token. Make sure the field actually exists in the config.', - examples=[['credentials', 'access_token'], ['access_token']], - title='Config Path To Access Token', + ["credentials", "access_token"], + description="Config path to the access token. Make sure the field actually exists in the config.", + examples=[["credentials", "access_token"], ["access_token"]], + title="Config Path To Access Token", ) refresh_token_config_path: Optional[List[str]] = Field( - ['credentials', 'refresh_token'], - description='Config path to the access token. Make sure the field actually exists in the config.', - examples=[['credentials', 'refresh_token'], ['refresh_token']], - title='Config Path To Refresh Token', + ["credentials", "refresh_token"], + description="Config path to the access token. Make sure the field actually exists in the config.", + examples=[["credentials", "refresh_token"], ["refresh_token"]], + title="Config Path To Refresh Token", ) token_expiry_date_config_path: Optional[List[str]] = Field( - ['credentials', 'token_expiry_date'], - description='Config path to the expiry date. Make sure actually exists in the config.', - examples=[['credentials', 'token_expiry_date']], - title='Config Path To Expiry Date', + ["credentials", "token_expiry_date"], + description="Config path to the expiry date. Make sure actually exists in the config.", + examples=[["credentials", "token_expiry_date"]], + title="Config Path To Expiry Date", ) refresh_token_error_status_codes: Optional[List[int]] = Field( [], - description='Status Codes to Identify refresh token error in response (Refresh Token Error Key and Refresh Token Error Values should be also specified). Responses with one of the error status code and containing an error value will be flagged as a config error', + description="Status Codes to Identify refresh token error in response (Refresh Token Error Key and Refresh Token Error Values should be also specified). Responses with one of the error status code and containing an error value will be flagged as a config error", examples=[[400, 500]], - title='Refresh Token Error Status Codes', + title="Refresh Token Error Status Codes", ) refresh_token_error_key: Optional[str] = Field( - '', - description='Key to Identify refresh token error in response (Refresh Token Error Status Codes and Refresh Token Error Values should be also specified).', - examples=['error'], - title='Refresh Token Error Key', + "", + description="Key to Identify refresh token error in response (Refresh Token Error Status Codes and Refresh Token Error Values should be also specified).", + examples=["error"], + title="Refresh Token Error Key", ) refresh_token_error_values: Optional[List[str]] = Field( [], description='List of values to check for exception during token refresh process. Used to check if the error found in the response matches the key from the Refresh Token Error Key field (e.g. response={"error": "invalid_grant"}). Only responses with one of the error status code and containing an error value will be flagged as a config error', - examples=[['invalid_grant', 'invalid_permissions']], - title='Refresh Token Error Values', + examples=[["invalid_grant", "invalid_permissions"]], + title="Refresh Token Error Values", ) class OAuthAuthenticator(BaseModel): - type: Literal['OAuthAuthenticator'] + type: Literal["OAuthAuthenticator"] client_id: str = Field( ..., - description='The OAuth client ID. Fill it in the user inputs.', + description="The OAuth client ID. Fill it in the user inputs.", examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"], - title='Client ID', + title="Client ID", ) client_secret: str = Field( ..., - description='The OAuth client secret. Fill it in the user inputs.', + description="The OAuth client secret. Fill it in the user inputs.", examples=[ "{{ config['client_secret }}", "{{ config['credentials']['client_secret }}", ], - title='Client Secret', + title="Client Secret", ) refresh_token: Optional[str] = Field( None, - description='Credential artifact used to get a new access token.', + description="Credential artifact used to get a new access token.", examples=[ "{{ config['refresh_token'] }}", "{{ config['credentials]['refresh_token'] }}", ], - title='Refresh Token', + title="Refresh Token", ) token_refresh_endpoint: str = Field( ..., - description='The full URL to call to obtain a new access token.', - examples=['https://connect.squareup.com/oauth2/token'], - title='Token Refresh Endpoint', + description="The full URL to call to obtain a new access token.", + examples=["https://connect.squareup.com/oauth2/token"], + title="Token Refresh Endpoint", ) access_token_name: Optional[str] = Field( - 'access_token', - description='The name of the property which contains the access token in the response from the token refresh endpoint.', - examples=['access_token'], - title='Access Token Property Name', + "access_token", + description="The name of the property which contains the access token in the response from the token refresh endpoint.", + examples=["access_token"], + title="Access Token Property Name", ) expires_in_name: Optional[str] = Field( - 'expires_in', - description='The name of the property which contains the expiry date in the response from the token refresh endpoint.', - examples=['expires_in'], - title='Token Expiry Property Name', + "expires_in", + description="The name of the property which contains the expiry date in the response from the token refresh endpoint.", + examples=["expires_in"], + title="Token Expiry Property Name", ) grant_type: Optional[str] = Field( - 'refresh_token', - description='Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.', - examples=['refresh_token', 'client_credentials'], - title='Grant Type', + "refresh_token", + description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.", + examples=["refresh_token", "client_credentials"], + title="Grant Type", ) refresh_request_body: Optional[Dict[str, Any]] = Field( None, - description='Body of the request sent to get a new access token.', + description="Body of the request sent to get a new access token.", examples=[ { - 'applicationId': "{{ config['application_id'] }}", - 'applicationSecret': "{{ config['application_secret'] }}", - 'token': "{{ config['token'] }}", + "applicationId": "{{ config['application_id'] }}", + "applicationSecret": "{{ config['application_secret'] }}", + "token": "{{ config['token'] }}", } ], - title='Refresh Request Body', + title="Refresh Request Body", ) scopes: Optional[List[str]] = Field( None, - description='List of scopes that should be granted to the access token.', - examples=[ - ['crm.list.read', 'crm.objects.contacts.read', 'crm.schema.contacts.read'] - ], - title='Scopes', + description="List of scopes that should be granted to the access token.", + examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], + title="Scopes", ) token_expiry_date: Optional[str] = Field( None, - description='The access token expiry date.', - examples=['2023-04-06T07:12:10.421833+00:00', 1680842386], - title='Token Expiry Date', + description="The access token expiry date.", + examples=["2023-04-06T07:12:10.421833+00:00", 1680842386], + title="Token Expiry Date", ) token_expiry_date_format: Optional[str] = Field( None, - description='The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.', - examples=['%Y-%m-%d %H:%M:%S.%f+00:00'], - title='Token Expiry Date Format', + description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.", + examples=["%Y-%m-%d %H:%M:%S.%f+00:00"], + title="Token Expiry Date Format", ) refresh_token_updater: Optional[RefreshTokenUpdater] = Field( None, - description='When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.', - title='Token Updater', + description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.", + title="Token Updater", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DpathExtractor(BaseModel): - type: Literal['DpathExtractor'] + type: Literal["DpathExtractor"] field_path: List[str] = Field( ..., description='List of potentially nested fields describing the full path of the field to extract. Use "*" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).', examples=[ - ['data'], - ['data', 'records'], - ['data', '{{ parameters.name }}'], - ['data', '*', 'record'], + ["data"], + ["data", "records"], + ["data", "{{ parameters.name }}"], + ["data", "*", "record"], ], - title='Field Path', + title="Field Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ExponentialBackoffStrategy(BaseModel): - type: Literal['ExponentialBackoffStrategy'] + type: Literal["ExponentialBackoffStrategy"] factor: Optional[Union[float, str]] = Field( 5, - description='Multiplicative constant applied on each retry.', - examples=[5, 5.5, '10'], - title='Factor', + description="Multiplicative constant applied on each retry.", + examples=[5, 5.5, "10"], + title="Factor", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenRequestBearerAuthenticator(BaseModel): - type: Literal['Bearer'] + type: Literal["Bearer"] class HttpMethod(Enum): - GET = 'GET' - POST = 'POST' + GET = "GET" + POST = "POST" class Action(Enum): - SUCCESS = 'SUCCESS' - FAIL = 'FAIL' - RETRY = 'RETRY' - IGNORE = 'IGNORE' - RATE_LIMITED = 'RATE_LIMITED' + SUCCESS = "SUCCESS" + FAIL = "FAIL" + RETRY = "RETRY" + IGNORE = "IGNORE" + RATE_LIMITED = "RATE_LIMITED" class FailureType(Enum): - system_error = 'system_error' - config_error = 'config_error' - transient_error = 'transient_error' + system_error = "system_error" + config_error = "config_error" + transient_error = "transient_error" class HttpResponseFilter(BaseModel): - type: Literal['HttpResponseFilter'] + type: Literal["HttpResponseFilter"] action: Optional[Action] = Field( None, - description='Action to execute if a response matches the filter.', - examples=['SUCCESS', 'FAIL', 'RETRY', 'IGNORE', 'RATE_LIMITED'], - title='Action', + description="Action to execute if a response matches the filter.", + examples=["SUCCESS", "FAIL", "RETRY", "IGNORE", "RATE_LIMITED"], + title="Action", ) failure_type: Optional[FailureType] = Field( None, - description='Failure type of traced exception if a response matches the filter.', - examples=['system_error', 'config_error', 'transient_error'], - title='Failure Type', + description="Failure type of traced exception if a response matches the filter.", + examples=["system_error", "config_error", "transient_error"], + title="Failure Type", ) error_message: Optional[str] = Field( None, - description='Error Message to display if the response matches the filter.', - title='Error Message', + description="Error Message to display if the response matches the filter.", + title="Error Message", ) error_message_contains: Optional[str] = Field( None, - description='Match the response if its error message contains the substring.', - example=['This API operation is not enabled for this site'], - title='Error Message Substring', + description="Match the response if its error message contains the substring.", + example=["This API operation is not enabled for this site"], + title="Error Message Substring", ) http_codes: Optional[List[int]] = Field( None, - description='Match the response if its HTTP code is included in this list.', + description="Match the response if its HTTP code is included in this list.", examples=[[420, 429], [500]], - title='HTTP Codes', + title="HTTP Codes", ) predicate: Optional[str] = Field( None, - description='Match the response if the predicate evaluates to true.', + description="Match the response if the predicate evaluates to true.", examples=[ "{{ 'Too much requests' in response }}", "{{ 'error_code' in response and response['error_code'] == 'ComplexityException' }}", ], - title='Predicate', + title="Predicate", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class InlineSchemaLoader(BaseModel): - type: Literal['InlineSchemaLoader'] + type: Literal["InlineSchemaLoader"] schema_: Optional[Dict[str, Any]] = Field( None, - alias='schema', + alias="schema", description='Describes a streams\' schema. Refer to the Data Types documentation for more details on which types are valid.', - title='Schema', + title="Schema", ) class JsonFileSchemaLoader(BaseModel): - type: Literal['JsonFileSchemaLoader'] + type: Literal["JsonFileSchemaLoader"] file_path: Optional[str] = Field( None, description="Path to the JSON file defining the schema. The path is relative to the connector module's root.", - example=['./schemas/users.json'], - title='File Path', + example=["./schemas/users.json"], + title="File Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class JsonDecoder(BaseModel): - type: Literal['JsonDecoder'] + type: Literal["JsonDecoder"] class JsonlDecoder(BaseModel): - type: Literal['JsonlDecoder'] + type: Literal["JsonlDecoder"] class KeysToLower(BaseModel): - type: Literal['KeysToLower'] - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + type: Literal["KeysToLower"] + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class IterableDecoder(BaseModel): - type: Literal['IterableDecoder'] + type: Literal["IterableDecoder"] class XmlDecoder(BaseModel): - type: Literal['XmlDecoder'] + type: Literal["XmlDecoder"] class CustomDecoder(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomDecoder'] + type: Literal["CustomDecoder"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Decoder. The format is `source_..`.', - examples=['source_amazon_ads.components.GzipJsonlDecoder'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom decoding. Has to be a sub class of Decoder. The format is `source_..`.", + examples=["source_amazon_ads.components.GzipJsonlDecoder"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class GzipJsonDecoder(BaseModel): class Config: extra = Extra.allow - type: Literal['GzipJsonDecoder'] - encoding: Optional[str] = 'utf-8' - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + type: Literal["GzipJsonDecoder"] + encoding: Optional[str] = "utf-8" + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class MinMaxDatetime(BaseModel): - type: Literal['MinMaxDatetime'] + type: Literal["MinMaxDatetime"] datetime: str = Field( ..., - description='Datetime value.', - examples=['2021-01-01', '2021-01-01T00:00:00Z', "{{ config['start_time'] }}"], - title='Datetime', + description="Datetime value.", + examples=["2021-01-01", "2021-01-01T00:00:00Z", "{{ config['start_time'] }}"], + title="Datetime", ) datetime_format: Optional[str] = Field( - '', + "", description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s'], - title='Datetime Format', + examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], + title="Datetime Format", ) max_datetime: Optional[str] = Field( None, - description='Ceiling applied on the datetime value. Must be formatted with the datetime_format field.', - examples=['2021-01-01T00:00:00Z', '2021-01-01'], - title='Max Datetime', + description="Ceiling applied on the datetime value. Must be formatted with the datetime_format field.", + examples=["2021-01-01T00:00:00Z", "2021-01-01"], + title="Max Datetime", ) min_datetime: Optional[str] = Field( None, - description='Floor applied on the datetime value. Must be formatted with the datetime_format field.', - examples=['2010-01-01T00:00:00Z', '2010-01-01'], - title='Min Datetime', + description="Floor applied on the datetime value. Must be formatted with the datetime_format field.", + examples=["2010-01-01T00:00:00Z", "2010-01-01"], + title="Min Datetime", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class NoAuth(BaseModel): - type: Literal['NoAuth'] - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + type: Literal["NoAuth"] + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class NoPagination(BaseModel): - type: Literal['NoPagination'] + type: Literal["NoPagination"] class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = ( - Field( - None, - description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", - examples=[ - {'app_id': {'type': 'string', 'path_in_connector_config': ['app_id']}}, - { - 'app_id': { - 'type': 'string', - 'path_in_connector_config': ['info', 'app_id'], - } - }, - ], - title='OAuth user input', - ) + oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( + None, + description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", + examples=[ + {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, + { + "app_id": { + "type": "string", + "path_in_connector_config": ["info", "app_id"], + } + }, + ], + title="OAuth user input", ) complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }", examples=[ { - 'refresh_token': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'refresh_token'], + "refresh_token": { + "type": "string,", + "path_in_connector_config": ["credentials", "refresh_token"], } } ], - title='OAuth output specification', + title="OAuth output specification", ) complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( None, - description='OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }', - examples=[ - {'client_id': {'type': 'string'}, 'client_secret': {'type': 'string'}} - ], - title='OAuth input specification', + description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", + examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], + title="OAuth input specification", ) complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", examples=[ { - 'client_id': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'client_id'], + "client_id": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_id"], }, - 'client_secret': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'client_secret'], + "client_secret": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_secret"], }, } ], - title='OAuth server output specification', + title="OAuth server output specification", ) class OffsetIncrement(BaseModel): - type: Literal['OffsetIncrement'] + type: Literal["OffsetIncrement"] page_size: Optional[Union[int, str]] = Field( None, - description='The number of records to include in each pages.', + description="The number of records to include in each pages.", examples=[100, "{{ config['page_size'] }}"], - title='Limit', + title="Limit", ) inject_on_first_request: Optional[bool] = Field( False, - description='Using the `offset` with value `0` during the first request', - title='Inject Offset', + description="Using the `offset` with value `0` during the first request", + title="Inject Offset", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class PageIncrement(BaseModel): - type: Literal['PageIncrement'] + type: Literal["PageIncrement"] page_size: Optional[Union[int, str]] = Field( None, - description='The number of records to include in each pages.', - examples=[100, '100', "{{ config['page_size'] }}"], - title='Page Size', + description="The number of records to include in each pages.", + examples=[100, "100", "{{ config['page_size'] }}"], + title="Page Size", ) start_from_page: Optional[int] = Field( 0, - description='Index of the first page to request.', + description="Index of the first page to request.", examples=[0, 1], - title='Start From Page', + title="Start From Page", ) inject_on_first_request: Optional[bool] = Field( False, - description='Using the `page number` with value defined by `start_from_page` during the first request', - title='Inject Page Number', + description="Using the `page number` with value defined by `start_from_page` during the first request", + title="Inject Page Number", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class PrimaryKey(BaseModel): __root__: Union[str, List[str], List[List[str]]] = Field( ..., - description='The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.', - examples=['id', ['code', 'type']], - title='Primary Key', + description="The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.", + examples=["id", ["code", "type"]], + title="Primary Key", ) class RecordFilter(BaseModel): - type: Literal['RecordFilter'] + type: Literal["RecordFilter"] condition: Optional[str] = Field( - '', - description='The predicate to filter a record. Records will be removed if evaluated to False.', + "", + description="The predicate to filter a record. Records will be removed if evaluated to False.", examples=[ "{{ record['created_at'] >= stream_interval['start_time'] }}", "{{ record.status in ['active', 'expired'] }}", ], ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SchemaNormalization(Enum): - None_ = 'None' - Default = 'Default' + None_ = "None" + Default = "Default" class RemoveFields(BaseModel): - type: Literal['RemoveFields'] + type: Literal["RemoveFields"] condition: Optional[str] = Field( - '', - description='The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.,', + "", + description="The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.,", examples=[ "{{ property|string == '' }}", - '{{ property is integer }}', - '{{ property|length > 5 }}', + "{{ property is integer }}", + "{{ property|length > 5 }}", "{{ property == 'some_string_to_match' }}", ], ) field_pointers: List[List[str]] = Field( ..., - description='Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.', - examples=[['tags'], [['content', 'html'], ['content', 'plain_text']]], - title='Field Paths', + description="Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.", + examples=[["tags"], [["content", "html"], ["content", "plain_text"]]], + title="Field Paths", ) class RequestPath(BaseModel): - type: Literal['RequestPath'] + type: Literal["RequestPath"] class InjectInto(Enum): - request_parameter = 'request_parameter' - header = 'header' - body_data = 'body_data' - body_json = 'body_json' + request_parameter = "request_parameter" + header = "header" + body_data = "body_data" + body_json = "body_json" class RequestOption(BaseModel): - type: Literal['RequestOption'] + type: Literal["RequestOption"] field_name: str = Field( ..., - description='Configures which key should be used in the location that the descriptor is being injected into', - examples=['segment_id'], - title='Request Option', + description="Configures which key should be used in the location that the descriptor is being injected into", + examples=["segment_id"], + title="Request Option", ) inject_into: InjectInto = Field( ..., - description='Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.', - examples=['request_parameter', 'header', 'body_data', 'body_json'], - title='Inject Into', + description="Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.", + examples=["request_parameter", "header", "body_data", "body_json"], + title="Inject Into", ) @@ -930,54 +924,54 @@ class Config: class LegacySessionTokenAuthenticator(BaseModel): - type: Literal['LegacySessionTokenAuthenticator'] + type: Literal["LegacySessionTokenAuthenticator"] header: str = Field( ..., - description='The name of the session token header that will be injected in the request', - examples=['X-Session'], - title='Session Request Header', + description="The name of the session token header that will be injected in the request", + examples=["X-Session"], + title="Session Request Header", ) login_url: str = Field( ..., - description='Path of the login URL (do not include the base URL)', - examples=['session'], - title='Login Path', + description="Path of the login URL (do not include the base URL)", + examples=["session"], + title="Login Path", ) session_token: Optional[str] = Field( None, - description='Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair', + description="Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair", example=["{{ config['session_token'] }}"], - title='Session Token', + title="Session Token", ) session_token_response_key: str = Field( ..., - description='Name of the key of the session token to be extracted from the response', - examples=['id'], - title='Response Token Response Key', + description="Name of the key of the session token to be extracted from the response", + examples=["id"], + title="Response Token Response Key", ) username: Optional[str] = Field( None, - description='Username used to authenticate and obtain a session token', + description="Username used to authenticate and obtain a session token", examples=[" {{ config['username'] }}"], - title='Username', + title="Username", ) password: Optional[str] = Field( - '', - description='Password used to authenticate and obtain a session token', - examples=["{{ config['password'] }}", ''], - title='Password', + "", + description="Password used to authenticate and obtain a session token", + examples=["{{ config['password'] }}", ""], + title="Password", ) validate_session_url: str = Field( ..., - description='Path of the URL to use to validate that the session token is valid (do not include the base URL)', - examples=['user/current'], - title='Validate Session Path', + description="Path of the URL to use to validate that the session token is valid (do not include the base URL)", + examples=["user/current"], + title="Validate Session Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AsyncJobStatusMap(BaseModel): - type: Optional[Literal['AsyncJobStatusMap']] = None + type: Optional[Literal["AsyncJobStatusMap"]] = None running: List[str] completed: List[str] failed: List[str] @@ -985,65 +979,65 @@ class AsyncJobStatusMap(BaseModel): class ValueType(Enum): - string = 'string' - number = 'number' - integer = 'integer' - boolean = 'boolean' + string = "string" + number = "number" + integer = "integer" + boolean = "boolean" class WaitTimeFromHeader(BaseModel): - type: Literal['WaitTimeFromHeader'] + type: Literal["WaitTimeFromHeader"] header: str = Field( ..., - description='The name of the response header defining how long to wait before retrying.', - examples=['Retry-After'], - title='Response Header Name', + description="The name of the response header defining how long to wait before retrying.", + examples=["Retry-After"], + title="Response Header Name", ) regex: Optional[str] = Field( None, - description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', - examples=['([-+]?\\d+)'], - title='Extraction Regex', + description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + examples=["([-+]?\\d+)"], + title="Extraction Regex", ) max_waiting_time_in_seconds: Optional[float] = Field( None, - description='Given the value extracted from the header is greater than this value, stop the stream.', + description="Given the value extracted from the header is greater than this value, stop the stream.", examples=[3600], - title='Max Waiting Time in Seconds', + title="Max Waiting Time in Seconds", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class WaitUntilTimeFromHeader(BaseModel): - type: Literal['WaitUntilTimeFromHeader'] + type: Literal["WaitUntilTimeFromHeader"] header: str = Field( ..., - description='The name of the response header defining how long to wait before retrying.', - examples=['wait_time'], - title='Response Header', + description="The name of the response header defining how long to wait before retrying.", + examples=["wait_time"], + title="Response Header", ) min_wait: Optional[Union[float, str]] = Field( None, - description='Minimum time to wait before retrying.', - examples=[10, '60'], - title='Minimum Wait Time', + description="Minimum time to wait before retrying.", + examples=[10, "60"], + title="Minimum Wait Time", ) regex: Optional[str] = Field( None, - description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', - examples=['([-+]?\\d+)'], - title='Extraction Regex', + description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + examples=["([-+]?\\d+)"], + title="Extraction Regex", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AddedFieldDefinition(BaseModel): - type: Literal['AddedFieldDefinition'] + type: Literal["AddedFieldDefinition"] path: List[str] = Field( ..., - description='List of strings defining the path where to add the value on the record.', - examples=[['segment_id'], ['metadata', 'segment_id']], - title='Path', + description="List of strings defining the path where to add the value on the record.", + examples=[["segment_id"], ["metadata", "segment_id"]], + title="Path", ) value: str = Field( ..., @@ -1053,167 +1047,167 @@ class AddedFieldDefinition(BaseModel): "{{ record['MetaData']['LastUpdatedTime'] }}", "{{ stream_partition['segment_id'] }}", ], - title='Value', + title="Value", ) value_type: Optional[ValueType] = Field( None, - description='Type of the value. If not specified, the type will be inferred from the value.', - title='Value Type', + description="Type of the value. If not specified, the type will be inferred from the value.", + title="Value Type", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AddFields(BaseModel): - type: Literal['AddFields'] + type: Literal["AddFields"] fields: List[AddedFieldDefinition] = Field( ..., - description='List of transformations (path and corresponding value) that will be added to the record.', - title='Fields', + description="List of transformations (path and corresponding value) that will be added to the record.", + title="Fields", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ApiKeyAuthenticator(BaseModel): - type: Literal['ApiKeyAuthenticator'] + type: Literal["ApiKeyAuthenticator"] api_token: Optional[str] = Field( None, - description='The API key to inject in the request. Fill it in the user inputs.', + description="The API key to inject in the request. Fill it in the user inputs.", examples=["{{ config['api_key'] }}", "Token token={{ config['api_key'] }}"], - title='API Key', + title="API Key", ) header: Optional[str] = Field( None, - description='The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.', - examples=['Authorization', 'Api-Token', 'X-Auth-Token'], - title='Header Name', + description="The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.", + examples=["Authorization", "Api-Token", "X-Auth-Token"], + title="Header Name", ) inject_into: Optional[RequestOption] = Field( None, - description='Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.', + description="Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.", examples=[ - {'inject_into': 'header', 'field_name': 'Authorization'}, - {'inject_into': 'request_parameter', 'field_name': 'authKey'}, + {"inject_into": "header", "field_name": "Authorization"}, + {"inject_into": "request_parameter", "field_name": "authKey"}, ], - title='Inject API Key Into Outgoing HTTP Request', + title="Inject API Key Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AuthFlow(BaseModel): auth_flow_type: Optional[AuthFlowType] = Field( - None, description='The type of auth to use', title='Auth flow type' + None, description="The type of auth to use", title="Auth flow type" ) predicate_key: Optional[List[str]] = Field( None, - description='JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.', - examples=[['credentials', 'auth_type']], - title='Predicate key', + description="JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.", + examples=[["credentials", "auth_type"]], + title="Predicate key", ) predicate_value: Optional[str] = Field( None, - description='Value of the predicate_key fields for the advanced auth to be applicable.', - examples=['Oauth'], - title='Predicate value', + description="Value of the predicate_key fields for the advanced auth to be applicable.", + examples=["Oauth"], + title="Predicate value", ) oauth_config_specification: Optional[OAuthConfigSpecification] = None class DatetimeBasedCursor(BaseModel): - type: Literal['DatetimeBasedCursor'] + type: Literal["DatetimeBasedCursor"] cursor_field: str = Field( ..., - description='The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.', - examples=['created_at', "{{ config['record_cursor'] }}"], - title='Cursor Field', + description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.", + examples=["created_at", "{{ config['record_cursor'] }}"], + title="Cursor Field", ) datetime_format: str = Field( ..., - description='The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s', '%ms', '%s_as_float'], - title='Outgoing Datetime Format', + description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", + examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"], + title="Outgoing Datetime Format", ) start_datetime: Union[str, MinMaxDatetime] = Field( ..., - description='The datetime that determines the earliest record that should be synced.', - examples=['2020-01-1T00:00:00Z', "{{ config['start_time'] }}"], - title='Start Datetime', + description="The datetime that determines the earliest record that should be synced.", + examples=["2020-01-1T00:00:00Z", "{{ config['start_time'] }}"], + title="Start Datetime", ) cursor_datetime_formats: Optional[List[str]] = Field( None, - description='The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.', - title='Cursor Datetime Formats', + description="The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.", + title="Cursor Datetime Formats", ) cursor_granularity: Optional[str] = Field( None, - description='Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.', - examples=['PT1S'], - title='Cursor Granularity', + description="Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.", + examples=["PT1S"], + title="Cursor Granularity", ) end_datetime: Optional[Union[str, MinMaxDatetime]] = Field( None, - description='The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.', - examples=['2021-01-1T00:00:00Z', '{{ now_utc() }}', '{{ day_delta(-1) }}'], - title='End Datetime', + description="The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.", + examples=["2021-01-1T00:00:00Z", "{{ now_utc() }}", "{{ day_delta(-1) }}"], + title="End Datetime", ) end_time_option: Optional[RequestOption] = Field( None, - description='Optionally configures how the end datetime will be sent in requests to the source API.', - title='Inject End Time Into Outgoing HTTP Request', + description="Optionally configures how the end datetime will be sent in requests to the source API.", + title="Inject End Time Into Outgoing HTTP Request", ) is_data_feed: Optional[bool] = Field( None, - description='A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.', - title='Whether the target API is formatted as a data feed', + description="A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.", + title="Whether the target API is formatted as a data feed", ) is_client_side_incremental: Optional[bool] = Field( None, - description='If the target API endpoint does not take cursor values to filter records and returns all records anyway, the connector with this cursor will filter out records locally, and only emit new records from the last sync, hence incremental. This means that all records would be read from the API, but only new records will be emitted to the destination.', - title='Whether the target API does not support filtering and returns all data (the cursor filters records in the client instead of the API side)', + description="If the target API endpoint does not take cursor values to filter records and returns all records anyway, the connector with this cursor will filter out records locally, and only emit new records from the last sync, hence incremental. This means that all records would be read from the API, but only new records will be emitted to the destination.", + title="Whether the target API does not support filtering and returns all data (the cursor filters records in the client instead of the API side)", ) is_compare_strictly: Optional[bool] = Field( False, - description='Set to True if the target API does not accept queries where the start time equal the end time.', - title='Whether to skip requests if the start time equals the end time', + description="Set to True if the target API does not accept queries where the start time equal the end time.", + title="Whether to skip requests if the start time equals the end time", ) global_substream_cursor: Optional[bool] = Field( False, - description='This setting optimizes performance when the parent stream has thousands of partitions by storing the cursor as a single value rather than per partition. Notably, the substream state is updated only at the end of the sync, which helps prevent data loss in case of a sync failure. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/incremental-syncs).', - title='Whether to store cursor as one value instead of per partition', + description="This setting optimizes performance when the parent stream has thousands of partitions by storing the cursor as a single value rather than per partition. Notably, the substream state is updated only at the end of the sync, which helps prevent data loss in case of a sync failure. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/incremental-syncs).", + title="Whether to store cursor as one value instead of per partition", ) lookback_window: Optional[str] = Field( None, - description='Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.', - examples=['P1D', "P{{ config['lookback_days'] }}D"], - title='Lookback Window', + description="Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.", + examples=["P1D", "P{{ config['lookback_days'] }}D"], + title="Lookback Window", ) partition_field_end: Optional[str] = Field( None, - description='Name of the partition start time field.', - examples=['ending_time'], - title='Partition Field End', + description="Name of the partition start time field.", + examples=["ending_time"], + title="Partition Field End", ) partition_field_start: Optional[str] = Field( None, - description='Name of the partition end time field.', - examples=['starting_time'], - title='Partition Field Start', + description="Name of the partition end time field.", + examples=["starting_time"], + title="Partition Field Start", ) start_time_option: Optional[RequestOption] = Field( None, - description='Optionally configures how the start datetime will be sent in requests to the source API.', - title='Inject Start Time Into Outgoing HTTP Request', + description="Optionally configures how the start datetime will be sent in requests to the source API.", + title="Inject Start Time Into Outgoing HTTP Request", ) step: Optional[str] = Field( None, - description='The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.', - examples=['P1W', "{{ config['step_increment'] }}"], - title='Step', + description="The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.", + examples=["P1W", "{{ config['step_increment'] }}"], + title="Step", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DefaultErrorHandler(BaseModel): - type: Literal['DefaultErrorHandler'] + type: Literal["DefaultErrorHandler"] backoff_strategies: Optional[ List[ Union[ @@ -1226,124 +1220,124 @@ class DefaultErrorHandler(BaseModel): ] ] = Field( None, - description='List of backoff strategies to use to determine how long to wait before retrying a retryable request.', - title='Backoff Strategies', + description="List of backoff strategies to use to determine how long to wait before retrying a retryable request.", + title="Backoff Strategies", ) max_retries: Optional[int] = Field( 5, - description='The maximum number of time to retry a retryable request before giving up and failing.', + description="The maximum number of time to retry a retryable request before giving up and failing.", examples=[5, 0, 10], - title='Max Retry Count', + title="Max Retry Count", ) response_filters: Optional[List[HttpResponseFilter]] = Field( None, description="List of response filters to iterate on when deciding how to handle an error. When using an array of multiple filters, the filters will be applied sequentially and the response will be selected if it matches any of the filter's predicate.", - title='Response Filters', + title="Response Filters", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DefaultPaginator(BaseModel): - type: Literal['DefaultPaginator'] + type: Literal["DefaultPaginator"] pagination_strategy: Union[ CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement ] = Field( ..., - description='Strategy defining how records are paginated.', - title='Pagination Strategy', + description="Strategy defining how records are paginated.", + title="Pagination Strategy", ) page_size_option: Optional[RequestOption] = None page_token_option: Optional[Union[RequestOption, RequestPath]] = None - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenRequestApiKeyAuthenticator(BaseModel): - type: Literal['ApiKey'] + type: Literal["ApiKey"] inject_into: RequestOption = Field( ..., - description='Configure how the API Key will be sent in requests to the source API.', + description="Configure how the API Key will be sent in requests to the source API.", examples=[ - {'inject_into': 'header', 'field_name': 'Authorization'}, - {'inject_into': 'request_parameter', 'field_name': 'authKey'}, + {"inject_into": "header", "field_name": "Authorization"}, + {"inject_into": "request_parameter", "field_name": "authKey"}, ], - title='Inject API Key Into Outgoing HTTP Request', + title="Inject API Key Into Outgoing HTTP Request", ) class ListPartitionRouter(BaseModel): - type: Literal['ListPartitionRouter'] + type: Literal["ListPartitionRouter"] cursor_field: str = Field( ..., description='While iterating over list values, the name of field used to reference a list value. The partition value can be accessed with string interpolation. e.g. "{{ stream_partition[\'my_key\'] }}" where "my_key" is the value of the cursor_field.', - examples=['section', "{{ config['section_key'] }}"], - title='Current Partition Value Identifier', + examples=["section", "{{ config['section_key'] }}"], + title="Current Partition Value Identifier", ) values: Union[str, List[str]] = Field( ..., - description='The list of attributes being iterated over and used as input for the requests made to the source API.', - examples=[['section_a', 'section_b', 'section_c'], "{{ config['sections'] }}"], - title='Partition Values', + description="The list of attributes being iterated over and used as input for the requests made to the source API.", + examples=[["section_a", "section_b", "section_c"], "{{ config['sections'] }}"], + title="Partition Values", ) request_option: Optional[RequestOption] = Field( None, - description='A request option describing where the list value should be injected into and under what field name if applicable.', - title='Inject Partition Value Into Outgoing HTTP Request', + description="A request option describing where the list value should be injected into and under what field name if applicable.", + title="Inject Partition Value Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RecordSelector(BaseModel): - type: Literal['RecordSelector'] + type: Literal["RecordSelector"] extractor: Union[CustomRecordExtractor, DpathExtractor] record_filter: Optional[Union[CustomRecordFilter, RecordFilter]] = Field( None, - description='Responsible for filtering records to be emitted by the Source.', - title='Record Filter', + description="Responsible for filtering records to be emitted by the Source.", + title="Record Filter", ) schema_normalization: Optional[SchemaNormalization] = SchemaNormalization.None_ - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class Spec(BaseModel): - type: Literal['Spec'] + type: Literal["Spec"] connection_specification: Dict[str, Any] = Field( ..., - description='A connection specification describing how a the connector can be configured.', - title='Connection Specification', + description="A connection specification describing how a the connector can be configured.", + title="Connection Specification", ) documentation_url: Optional[str] = Field( None, description="URL of the connector's documentation page.", - examples=['https://docs.airbyte.com/integrations/sources/dremio'], - title='Documentation URL', + examples=["https://docs.airbyte.com/integrations/sources/dremio"], + title="Documentation URL", ) advanced_auth: Optional[AuthFlow] = Field( None, - description='Advanced specification for configuring the authentication flow.', - title='Advanced Auth', + description="Advanced specification for configuring the authentication flow.", + title="Advanced Auth", ) class CompositeErrorHandler(BaseModel): - type: Literal['CompositeErrorHandler'] + type: Literal["CompositeErrorHandler"] error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler]] = Field( ..., - description='List of error handlers to iterate on to determine how to handle a failed response.', - title='Error Handlers', + description="List of error handlers to iterate on to determine how to handle a failed response.", + title="Error Handlers", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DeclarativeSource(BaseModel): class Config: extra = Extra.forbid - type: Literal['DeclarativeSource'] + type: Literal["DeclarativeSource"] check: CheckStream streams: List[DeclarativeStream] version: str = Field( ..., - description='The version of the Airbyte CDK used to build and test the source.', + description="The version of the Airbyte CDK used to build and test the source.", ) schemas: Optional[Schemas] = None definitions: Optional[Dict[str, Any]] = None @@ -1351,11 +1345,11 @@ class Config: concurrency_level: Optional[ConcurrencyLevel] = None metadata: Optional[Dict[str, Any]] = Field( None, - description='For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.', + description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", ) description: Optional[str] = Field( None, - description='A description of the connector. It will be presented on the Source documentation page.', + description="A description of the connector. It will be presented on the Source documentation page.", ) @@ -1363,12 +1357,12 @@ class SelectiveAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal['SelectiveAuthenticator'] + type: Literal["SelectiveAuthenticator"] authenticator_selection_path: List[str] = Field( ..., - description='Path of the field in config with selected authenticator name', - examples=[['auth'], ['auth', 'type']], - title='Authenticator Selection Path', + description="Path of the field in config with selected authenticator name", + examples=[["auth"], ["auth", "type"]], + title="Authenticator Selection Path", ) authenticators: Dict[ str, @@ -1385,132 +1379,128 @@ class Config: ], ] = Field( ..., - description='Authenticators to select from.', + description="Authenticators to select from.", examples=[ { - 'authenticators': { - 'token': '#/definitions/ApiKeyAuthenticator', - 'oauth': '#/definitions/OAuthAuthenticator', - 'jwt': '#/definitions/JwtAuthenticator', + "authenticators": { + "token": "#/definitions/ApiKeyAuthenticator", + "oauth": "#/definitions/OAuthAuthenticator", + "jwt": "#/definitions/JwtAuthenticator", } } ], - title='Authenticators', + title="Authenticators", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DeclarativeStream(BaseModel): class Config: extra = Extra.allow - type: Literal['DeclarativeStream'] + type: Literal["DeclarativeStream"] retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field( ..., - description='Component used to coordinate how records are extracted across stream slices and request pages.', - title='Retriever', - ) - incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = ( - Field( - None, - description='Component used to fetch data incrementally based on a time field in the data.', - title='Incremental Sync', - ) + description="Component used to coordinate how records are extracted across stream slices and request pages.", + title="Retriever", ) - name: Optional[str] = Field( - '', description='The stream name.', example=['Users'], title='Name' + incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field( + None, + description="Component used to fetch data incrementally based on a time field in the data.", + title="Incremental Sync", ) + name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") primary_key: Optional[PrimaryKey] = Field( - '', description='The primary key of the stream.', title='Primary Key' + "", description="The primary key of the stream.", title="Primary Key" ) - schema_loader: Optional[ - Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader] - ] = Field( - None, - description='Component used to retrieve the schema for the current stream.', - title='Schema Loader', + schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]] = ( + Field( + None, + description="Component used to retrieve the schema for the current stream.", + title="Schema Loader", + ) ) transformations: Optional[ List[Union[AddFields, CustomTransformation, RemoveFields, KeysToLower]] ] = Field( None, - description='A list of transformations to be applied to each output record.', - title='Transformations', + description="A list of transformations to be applied to each output record.", + title="Transformations", ) state_migrations: Optional[ List[Union[LegacyToPerPartitionStateMigration, CustomStateMigration]] ] = Field( [], - description='Array of state migrations to be applied on the input state', - title='State Migrations', + description="Array of state migrations to be applied on the input state", + title="State Migrations", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenAuthenticator(BaseModel): - type: Literal['SessionTokenAuthenticator'] + type: Literal["SessionTokenAuthenticator"] login_requester: HttpRequester = Field( ..., - description='Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.', + description="Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.", examples=[ { - 'type': 'HttpRequester', - 'url_base': 'https://my_api.com', - 'path': '/login', - 'authenticator': { - 'type': 'BasicHttpAuthenticator', - 'username': '{{ config.username }}', - 'password': '{{ config.password }}', + "type": "HttpRequester", + "url_base": "https://my_api.com", + "path": "/login", + "authenticator": { + "type": "BasicHttpAuthenticator", + "username": "{{ config.username }}", + "password": "{{ config.password }}", }, } ], - title='Login Requester', + title="Login Requester", ) session_token_path: List[str] = Field( ..., - description='The path in the response body returned from the login requester to the session token.', - examples=[['access_token'], ['result', 'token']], - title='Session Token Path', + description="The path in the response body returned from the login requester to the session token.", + examples=[["access_token"], ["result", "token"]], + title="Session Token Path", ) expiration_duration: Optional[str] = Field( None, - description='The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.', - examples=['PT1H', 'P1D'], - title='Expiration Duration', + description="The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.", + examples=["PT1H", "P1D"], + title="Expiration Duration", ) request_authentication: Union[ SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator ] = Field( ..., - description='Authentication method to use for requests sent to the API, specifying how to inject the session token.', - title='Data Request Authentication', + description="Authentication method to use for requests sent to the API, specifying how to inject the session token.", + title="Data Request Authentication", ) decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field( - None, description='Component used to decode the response.', title='Decoder' + None, description="Component used to decode the response.", title="Decoder" ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class HttpRequester(BaseModel): - type: Literal['HttpRequester'] + type: Literal["HttpRequester"] url_base: str = Field( ..., - description='Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', + description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ - 'https://connect.squareup.com/v2', + "https://connect.squareup.com/v2", "{{ config['base_url'] or 'https://app.posthog.com'}}/api/", ], - title='API Base URL', + title="API Base URL", ) path: str = Field( ..., - description='Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', + description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ - '/products', + "/products", "/quotes/{{ stream_partition['id'] }}/quote_line_groups", "/trades/{{ config['symbol_id'] }}/history", ], - title='URL Path', + title="URL Path", ) authenticator: Optional[ Union[ @@ -1527,111 +1517,111 @@ class HttpRequester(BaseModel): ] ] = Field( None, - description='Authentication method to use for requests sent to the API.', - title='Authenticator', + description="Authentication method to use for requests sent to the API.", + title="Authenticator", ) error_handler: Optional[ Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler] ] = Field( None, - description='Error handler component that defines how to handle errors.', - title='Error Handler', + description="Error handler component that defines how to handle errors.", + title="Error Handler", ) http_method: Optional[HttpMethod] = Field( HttpMethod.GET, - description='The HTTP method used to fetch data from the source (can be GET or POST).', - examples=['GET', 'POST'], - title='HTTP Method', + description="The HTTP method used to fetch data from the source (can be GET or POST).", + examples=["GET", "POST"], + title="HTTP Method", ) request_body_data: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.', + description="Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.", examples=[ '[{"clause": {"type": "timestamp", "operator": 10, "parameters":\n [{"value": {{ stream_interval[\'start_time\'] | int * 1000 }} }]\n }, "orderBy": 1, "columnName": "Timestamp"}]/\n' ], - title='Request Body Payload (Non-JSON)', + title="Request Body Payload (Non-JSON)", ) request_body_json: Optional[Union[str, Dict[str, Any]]] = Field( None, - description='Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.', + description="Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.", examples=[ - {'sort_order': 'ASC', 'sort_field': 'CREATED_AT'}, - {'key': "{{ config['value'] }}"}, - {'sort': {'field': 'updated_at', 'order': 'ascending'}}, + {"sort_order": "ASC", "sort_field": "CREATED_AT"}, + {"key": "{{ config['value'] }}"}, + {"sort": {"field": "updated_at", "order": "ascending"}}, ], - title='Request Body JSON Payload', + title="Request Body JSON Payload", ) request_headers: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.', - examples=[{'Output-Format': 'JSON'}, {'Version': "{{ config['version'] }}"}], - title='Request Headers', + description="Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.", + examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}], + title="Request Headers", ) request_parameters: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.', + description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.", examples=[ - {'unit': 'day'}, + {"unit": "day"}, { - 'query': 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' + "query": 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' }, - {'searchIn': "{{ ','.join(config.get('search_in', [])) }}"}, - {'sort_by[asc]': 'updated_at'}, + {"searchIn": "{{ ','.join(config.get('search_in', [])) }}"}, + {"sort_by[asc]": "updated_at"}, ], - title='Query Parameters', + title="Query Parameters", ) use_cache: Optional[bool] = Field( False, - description='Enables stream requests caching. This field is automatically set by the CDK.', - title='Use Cache', + description="Enables stream requests caching. This field is automatically set by the CDK.", + title="Use Cache", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ParentStreamConfig(BaseModel): - type: Literal['ParentStreamConfig'] + type: Literal["ParentStreamConfig"] parent_key: str = Field( ..., - description='The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.', - examples=['id', "{{ config['parent_record_id'] }}"], - title='Parent Key', + description="The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.", + examples=["id", "{{ config['parent_record_id'] }}"], + title="Parent Key", ) stream: DeclarativeStream = Field( - ..., description='Reference to the parent stream.', title='Parent Stream' + ..., description="Reference to the parent stream.", title="Parent Stream" ) partition_field: str = Field( ..., - description='While iterating over parent records during a sync, the parent_key value can be referenced by using this field.', - examples=['parent_id', "{{ config['parent_partition_field'] }}"], - title='Current Parent Key Value Identifier', + description="While iterating over parent records during a sync, the parent_key value can be referenced by using this field.", + examples=["parent_id", "{{ config['parent_partition_field'] }}"], + title="Current Parent Key Value Identifier", ) request_option: Optional[RequestOption] = Field( None, - description='A request option describing where the parent key value should be injected into and under what field name if applicable.', - title='Request Option', + description="A request option describing where the parent key value should be injected into and under what field name if applicable.", + title="Request Option", ) incremental_dependency: Optional[bool] = Field( False, - description='Indicates whether the parent stream should be read incrementally based on updates in the child stream.', - title='Incremental Dependency', + description="Indicates whether the parent stream should be read incrementally based on updates in the child stream.", + title="Incremental Dependency", ) extra_fields: Optional[List[List[str]]] = Field( None, - description='Array of field paths to include as additional fields in the stream slice. Each path is an array of strings representing keys to access fields in the respective parent record. Accessible via `stream_slice.extra_fields`. Missing fields are set to `None`.', - title='Extra Fields', + description="Array of field paths to include as additional fields in the stream slice. Each path is an array of strings representing keys to access fields in the respective parent record. Accessible via `stream_slice.extra_fields`. Missing fields are set to `None`.", + title="Extra Fields", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SimpleRetriever(BaseModel): - type: Literal['SimpleRetriever'] + type: Literal["SimpleRetriever"] record_selector: RecordSelector = Field( ..., - description='Component that describes how to extract records from a HTTP response.', + description="Component that describes how to extract records from a HTTP response.", ) requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API.', + description="Requester component that describes how to prepare HTTP requests to send to the source API.", ) paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1639,23 +1629,19 @@ class SimpleRetriever(BaseModel): ) ignore_stream_slicer_parameters_on_paginated_requests: Optional[bool] = Field( False, - description='If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.', + description="If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.", ) partition_router: Optional[ Union[ CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[ - Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] - ], + List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], ] ] = Field( [], - description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', - title='Partition Router', + description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", + title="Partition Router", ) decoder: Optional[ Union[ @@ -1668,39 +1654,39 @@ class SimpleRetriever(BaseModel): ] ] = Field( None, - description='Component decoding the response so records can be extracted.', - title='Decoder', + description="Component decoding the response so records can be extracted.", + title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AsyncRetriever(BaseModel): - type: Literal['AsyncRetriever'] + type: Literal["AsyncRetriever"] record_selector: RecordSelector = Field( ..., - description='Component that describes how to extract records from a HTTP response.', + description="Component that describes how to extract records from a HTTP response.", ) status_mapping: AsyncJobStatusMap = Field( - ..., description='Async Job Status to Airbyte CDK Async Job Status mapping.' + ..., description="Async Job Status to Airbyte CDK Async Job Status mapping." ) status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field( - ..., description='Responsible for fetching the actual status of the async job.' + ..., description="Responsible for fetching the actual status of the async job." ) urls_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field( ..., - description='Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.', + description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.", ) creation_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.', + description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.", ) polling_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.', + description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.", ) download_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.', + description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.", ) download_paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1712,23 +1698,19 @@ class AsyncRetriever(BaseModel): ) delete_requester: Optional[Union[CustomRequester, HttpRequester]] = Field( None, - description='Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.', + description="Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.", ) partition_router: Optional[ Union[ CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[ - Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] - ], + List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], ] ] = Field( [], - description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', - title='Partition Router', + description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", + title="Partition Router", ) decoder: Optional[ Union[ @@ -1741,20 +1723,20 @@ class AsyncRetriever(BaseModel): ] ] = Field( None, - description='Component decoding the response so records can be extracted.', - title='Decoder', + description="Component decoding the response so records can be extracted.", + title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SubstreamPartitionRouter(BaseModel): - type: Literal['SubstreamPartitionRouter'] + type: Literal["SubstreamPartitionRouter"] parent_stream_configs: List[ParentStreamConfig] = Field( ..., - description='Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.', - title='Parent Stream Configs', + description="Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.", + title="Parent Stream Configs", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") CompositeErrorHandler.update_forward_refs() @@ -1763,4 +1745,4 @@ class SubstreamPartitionRouter(BaseModel): DeclarativeStream.update_forward_refs() SessionTokenAuthenticator.update_forward_refs() SimpleRetriever.update_forward_refs() -AsyncRetriever.update_forward_refs() \ No newline at end of file +AsyncRetriever.update_forward_refs() diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 0d8c2435..f89ec3b2 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -9,7 +9,21 @@ import inspect import re from functools import partial -from typing import Any, Callable, Dict, List, Mapping, MutableMapping, Optional, Tuple, Type, Union, get_args, get_origin, get_type_hints +from typing import ( + Any, + Callable, + Dict, + List, + Mapping, + MutableMapping, + Optional, + Tuple, + Type, + Union, + get_args, + get_origin, + get_type_hints, +) from airbyte_cdk.models import FailureType, Level from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager @@ -18,9 +32,14 @@ from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator -from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator, NoAuth +from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( + DeclarativeAuthenticator, + NoAuth, +) from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm -from airbyte_cdk.sources.declarative.auth.oauth import DeclarativeSingleUseRefreshTokenOauth2Authenticator +from airbyte_cdk.sources.declarative.auth.oauth import ( + DeclarativeSingleUseRefreshTokenOauth2Authenticator, +) from airbyte_cdk.sources.declarative.auth.selective_authenticator import SelectiveAuthenticator from airbyte_cdk.sources.declarative.auth.token import ( ApiKeyAuthenticator, @@ -28,7 +47,11 @@ BearerAuthenticator, LegacySessionTokenAuthenticator, ) -from airbyte_cdk.sources.declarative.auth.token_provider import InterpolatedStringTokenProvider, SessionTokenProvider, TokenProvider +from airbyte_cdk.sources.declarative.auth.token_provider import ( + InterpolatedStringTokenProvider, + SessionTokenProvider, + TokenProvider, +) from airbyte_cdk.sources.declarative.checks import CheckStream from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime @@ -42,9 +65,18 @@ PaginationDecoderDecorator, XmlDecoder, ) -from airbyte_cdk.sources.declarative.extractors import DpathExtractor, RecordFilter, RecordSelector, ResponseToFileExtractor -from airbyte_cdk.sources.declarative.extractors.record_filter import ClientSideIncrementalRecordFilterDecorator -from airbyte_cdk.sources.declarative.extractors.record_selector import SCHEMA_TRANSFORMER_TYPE_MAPPING +from airbyte_cdk.sources.declarative.extractors import ( + DpathExtractor, + RecordFilter, + RecordSelector, + ResponseToFileExtractor, +) +from airbyte_cdk.sources.declarative.extractors.record_filter import ( + ClientSideIncrementalRecordFilterDecorator, +) +from airbyte_cdk.sources.declarative.extractors.record_selector import ( + SCHEMA_TRANSFORMER_TYPE_MAPPING, +) from airbyte_cdk.sources.declarative.incremental import ( ChildPartitionResumableFullRefreshCursor, CursorFactory, @@ -57,90 +89,222 @@ ) from airbyte_cdk.sources.declarative.interpolation import InterpolatedString from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping -from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import LegacyToPerPartitionStateMigration +from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_migration import ( + LegacyToPerPartitionStateMigration, +) from airbyte_cdk.sources.declarative.models import CustomStateMigration -from airbyte_cdk.sources.declarative.models.declarative_component_schema import AddedFieldDefinition as AddedFieldDefinitionModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import AddFields as AddFieldsModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ApiKeyAuthenticator as ApiKeyAuthenticatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import AsyncJobStatusMap as AsyncJobStatusMapModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import AsyncRetriever as AsyncRetrieverModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import BasicHttpAuthenticator as BasicHttpAuthenticatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import BearerAuthenticator as BearerAuthenticatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CheckStream as CheckStreamModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CompositeErrorHandler as CompositeErrorHandlerModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ConcurrencyLevel as ConcurrencyLevelModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ConstantBackoffStrategy as ConstantBackoffStrategyModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CursorPagination as CursorPaginationModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomAuthenticator as CustomAuthenticatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomBackoffStrategy as CustomBackoffStrategyModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomDecoder as CustomDecoderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomErrorHandler as CustomErrorHandlerModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomIncrementalSync as CustomIncrementalSyncModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomPaginationStrategy as CustomPaginationStrategyModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomPartitionRouter as CustomPartitionRouterModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRecordExtractor as CustomRecordExtractorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRecordFilter as CustomRecordFilterModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRequester as CustomRequesterModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomRetriever as CustomRetrieverModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomSchemaLoader as CustomSchemaLoader -from airbyte_cdk.sources.declarative.models.declarative_component_schema import CustomTransformation as CustomTransformationModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import DatetimeBasedCursor as DatetimeBasedCursorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import DeclarativeStream as DeclarativeStreamModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import DefaultErrorHandler as DefaultErrorHandlerModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import DefaultPaginator as DefaultPaginatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import DpathExtractor as DpathExtractorModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + AddedFieldDefinition as AddedFieldDefinitionModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + AddFields as AddFieldsModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + ApiKeyAuthenticator as ApiKeyAuthenticatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + AsyncJobStatusMap as AsyncJobStatusMapModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + AsyncRetriever as AsyncRetrieverModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + BasicHttpAuthenticator as BasicHttpAuthenticatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + BearerAuthenticator as BearerAuthenticatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CheckStream as CheckStreamModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CompositeErrorHandler as CompositeErrorHandlerModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + ConcurrencyLevel as ConcurrencyLevelModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + ConstantBackoffStrategy as ConstantBackoffStrategyModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CursorPagination as CursorPaginationModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomAuthenticator as CustomAuthenticatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomBackoffStrategy as CustomBackoffStrategyModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomDecoder as CustomDecoderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomErrorHandler as CustomErrorHandlerModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomIncrementalSync as CustomIncrementalSyncModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomPaginationStrategy as CustomPaginationStrategyModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomPartitionRouter as CustomPartitionRouterModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomRecordExtractor as CustomRecordExtractorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomRecordFilter as CustomRecordFilterModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomRequester as CustomRequesterModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomRetriever as CustomRetrieverModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomSchemaLoader as CustomSchemaLoader, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + CustomTransformation as CustomTransformationModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + DatetimeBasedCursor as DatetimeBasedCursorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + DeclarativeStream as DeclarativeStreamModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + DefaultErrorHandler as DefaultErrorHandlerModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + DefaultPaginator as DefaultPaginatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + DpathExtractor as DpathExtractorModel, +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( ExponentialBackoffStrategy as ExponentialBackoffStrategyModel, ) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import GzipJsonDecoder as GzipJsonDecoderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import HttpRequester as HttpRequesterModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import HttpResponseFilter as HttpResponseFilterModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import InlineSchemaLoader as InlineSchemaLoaderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import IterableDecoder as IterableDecoderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import JsonDecoder as JsonDecoderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import JsonFileSchemaLoader as JsonFileSchemaLoaderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import JsonlDecoder as JsonlDecoderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtAuthenticator as JwtAuthenticatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtHeaders as JwtHeadersModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtPayload as JwtPayloadModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import KeysToLower as KeysToLowerModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + GzipJsonDecoder as GzipJsonDecoderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + HttpRequester as HttpRequesterModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + HttpResponseFilter as HttpResponseFilterModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + InlineSchemaLoader as InlineSchemaLoaderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + IterableDecoder as IterableDecoderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + JsonDecoder as JsonDecoderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + JsonFileSchemaLoader as JsonFileSchemaLoaderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + JsonlDecoder as JsonlDecoderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + JwtAuthenticator as JwtAuthenticatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + JwtHeaders as JwtHeadersModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + JwtPayload as JwtPayloadModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + KeysToLower as KeysToLowerModel, +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel, ) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( LegacyToPerPartitionStateMigration as LegacyToPerPartitionStateMigrationModel, ) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ListPartitionRouter as ListPartitionRouterModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import MinMaxDatetime as MinMaxDatetimeModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import NoAuth as NoAuthModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import NoPagination as NoPaginationModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import OAuthAuthenticator as OAuthAuthenticatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import OffsetIncrement as OffsetIncrementModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import PageIncrement as PageIncrementModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ParentStreamConfig as ParentStreamConfigModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import RecordFilter as RecordFilterModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import RecordSelector as RecordSelectorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import RemoveFields as RemoveFieldsModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import RequestOption as RequestOptionModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import RequestPath as RequestPathModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import SelectiveAuthenticator as SelectiveAuthenticatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import SessionTokenAuthenticator as SessionTokenAuthenticatorModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import SimpleRetriever as SimpleRetrieverModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + ListPartitionRouter as ListPartitionRouterModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + MinMaxDatetime as MinMaxDatetimeModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + NoAuth as NoAuthModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + NoPagination as NoPaginationModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + OAuthAuthenticator as OAuthAuthenticatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + OffsetIncrement as OffsetIncrementModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + PageIncrement as PageIncrementModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + ParentStreamConfig as ParentStreamConfigModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + RecordFilter as RecordFilterModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + RecordSelector as RecordSelectorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + RemoveFields as RemoveFieldsModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + RequestOption as RequestOptionModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + RequestPath as RequestPathModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + SelectiveAuthenticator as SelectiveAuthenticatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + SessionTokenAuthenticator as SessionTokenAuthenticatorModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + SimpleRetriever as SimpleRetrieverModel, +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import SubstreamPartitionRouter as SubstreamPartitionRouterModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + SubstreamPartitionRouter as SubstreamPartitionRouterModel, +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType -from airbyte_cdk.sources.declarative.models.declarative_component_schema import WaitTimeFromHeader as WaitTimeFromHeaderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel -from airbyte_cdk.sources.declarative.models.declarative_component_schema import XmlDecoder as XmlDecoderModel +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + WaitTimeFromHeader as WaitTimeFromHeaderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + WaitUntilTimeFromHeader as WaitUntilTimeFromHeaderModel, +) +from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( + XmlDecoder as XmlDecoderModel, +) from airbyte_cdk.sources.declarative.partition_routers import ( CartesianProductStreamSlicer, ListPartitionRouter, SinglePartitionRouter, SubstreamPartitionRouter, ) -from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ParentStreamConfig +from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( + ParentStreamConfig, +) from airbyte_cdk.sources.declarative.requesters import HttpRequester, RequestOption -from airbyte_cdk.sources.declarative.requesters.error_handlers import CompositeErrorHandler, DefaultErrorHandler, HttpResponseFilter +from airbyte_cdk.sources.declarative.requesters.error_handlers import ( + CompositeErrorHandler, + DefaultErrorHandler, + HttpResponseFilter, +) from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies import ( ConstantBackoffStrategy, ExponentialBackoffStrategy, @@ -148,7 +312,11 @@ WaitUntilTimeFromHeaderBackoffStrategy, ) from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository -from airbyte_cdk.sources.declarative.requesters.paginators import DefaultPaginator, NoPagination, PaginatorTestReadDecorator +from airbyte_cdk.sources.declarative.requesters.paginators import ( + DefaultPaginator, + NoPagination, + PaginatorTestReadDecorator, +) from airbyte_cdk.sources.declarative.requesters.paginators.strategies import ( CursorPaginationStrategy, CursorStopCondition, @@ -165,14 +333,32 @@ ) from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod -from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, SimpleRetriever, SimpleRetrieverTestReadDecorator -from airbyte_cdk.sources.declarative.schema import DefaultSchemaLoader, InlineSchemaLoader, JsonFileSchemaLoader +from airbyte_cdk.sources.declarative.retrievers import ( + AsyncRetriever, + SimpleRetriever, + SimpleRetrieverTestReadDecorator, +) +from airbyte_cdk.sources.declarative.schema import ( + DefaultSchemaLoader, + InlineSchemaLoader, + JsonFileSchemaLoader, +) from airbyte_cdk.sources.declarative.spec import Spec from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer -from airbyte_cdk.sources.declarative.transformations import AddFields, RecordTransformation, RemoveFields +from airbyte_cdk.sources.declarative.transformations import ( + AddFields, + RecordTransformation, + RemoveFields, +) from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition -from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import KeysToLowerTransformation -from airbyte_cdk.sources.message import InMemoryMessageRepository, LogAppenderMessageRepositoryDecorator, MessageRepository +from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import ( + KeysToLowerTransformation, +) +from airbyte_cdk.sources.message import ( + InMemoryMessageRepository, + LogAppenderMessageRepositoryDecorator, + MessageRepository, +) from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( CustomFormatConcurrentStreamStateConverter, @@ -189,7 +375,6 @@ class ModelToComponentFactory: - EPOCH_DATETIME_FORMAT = "%s" def __init__( @@ -283,7 +468,11 @@ def _init_mappings(self) -> None: self.TYPE_NAME_TO_MODEL = {cls.__name__: cls for cls in self.PYDANTIC_MODEL_TO_CONSTRUCTOR} def create_component( - self, model_type: Type[BaseModel], component_definition: ComponentDefinition, config: Config, **kwargs: Any + self, + model_type: Type[BaseModel], + component_definition: ComponentDefinition, + config: Config, + **kwargs: Any, ) -> Any: """ Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and @@ -298,26 +487,38 @@ def create_component( component_type = component_definition.get("type") if component_definition.get("type") != model_type.__name__: - raise ValueError(f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead") + raise ValueError( + f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" + ) declarative_component_model = model_type.parse_obj(component_definition) if not isinstance(declarative_component_model, model_type): - raise ValueError(f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}") + raise ValueError( + f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" + ) - return self._create_component_from_model(model=declarative_component_model, config=config, **kwargs) + return self._create_component_from_model( + model=declarative_component_model, config=config, **kwargs + ) def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: - raise ValueError(f"{model.__class__} with attributes {model} is not a valid component type") + raise ValueError( + f"{model.__class__} with attributes {model} is not a valid component type" + ) component_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(model.__class__) if not component_constructor: raise ValueError(f"Could not find constructor for {model.__class__}") return component_constructor(model=model, config=config, **kwargs) @staticmethod - def create_added_field_definition(model: AddedFieldDefinitionModel, config: Config, **kwargs: Any) -> AddedFieldDefinition: - interpolated_value = InterpolatedString.create(model.value, parameters=model.parameters or {}) + def create_added_field_definition( + model: AddedFieldDefinitionModel, config: Config, **kwargs: Any + ) -> AddedFieldDefinition: + interpolated_value = InterpolatedString.create( + model.value, parameters=model.parameters or {} + ) return AddedFieldDefinition( path=model.path, value=interpolated_value, @@ -329,14 +530,18 @@ def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any added_field_definitions = [ self._create_component_from_model( model=added_field_definition_model, - value_type=ModelToComponentFactory._json_schema_type_name_to_type(added_field_definition_model.value_type), + value_type=ModelToComponentFactory._json_schema_type_name_to_type( + added_field_definition_model.value_type + ), config=config, ) for added_field_definition_model in model.fields ] return AddFields(fields=added_field_definitions, parameters=model.parameters or {}) - def create_keys_to_lower_transformation(self, model: KeysToLowerModel, config: Config, **kwargs: Any) -> KeysToLowerTransformation: + def create_keys_to_lower_transformation( + self, model: KeysToLowerModel, config: Config, **kwargs: Any + ) -> KeysToLowerTransformation: return KeysToLowerTransformation() @staticmethod @@ -353,16 +558,25 @@ def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[ @staticmethod def create_api_key_authenticator( - model: ApiKeyAuthenticatorModel, config: Config, token_provider: Optional[TokenProvider] = None, **kwargs: Any + model: ApiKeyAuthenticatorModel, + config: Config, + token_provider: Optional[TokenProvider] = None, + **kwargs: Any, ) -> ApiKeyAuthenticator: if model.inject_into is None and model.header is None: - raise ValueError("Expected either inject_into or header to be set for ApiKeyAuthenticator") + raise ValueError( + "Expected either inject_into or header to be set for ApiKeyAuthenticator" + ) if model.inject_into is not None and model.header is not None: - raise ValueError("inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option") + raise ValueError( + "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" + ) if token_provider is not None and model.api_token != "": - raise ValueError("If token_provider is set, api_token is ignored and has to be set to empty string.") + raise ValueError( + "If token_provider is set, api_token is ignored and has to be set to empty string." + ) request_option = ( RequestOption( @@ -381,7 +595,11 @@ def create_api_key_authenticator( token_provider=( token_provider if token_provider is not None - else InterpolatedStringTokenProvider(api_token=model.api_token or "", config=config, parameters=model.parameters or {}) + else InterpolatedStringTokenProvider( + api_token=model.api_token or "", + config=config, + parameters=model.parameters or {}, + ) ), request_option=request_option, config=config, @@ -400,26 +618,44 @@ def create_legacy_to_per_partition_state_migration( f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" ) partition_router = retriever.partition_router - if not isinstance(partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel)): + if not isinstance( + partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) + ): raise ValueError( f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" ) if not hasattr(partition_router, "parent_stream_configs"): - raise ValueError("LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration.") + raise ValueError( + "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." + ) - return LegacyToPerPartitionStateMigration(declarative_stream.retriever.partition_router, declarative_stream.incremental_sync, config, declarative_stream.parameters) # type: ignore # The retriever type was already checked + return LegacyToPerPartitionStateMigration( + declarative_stream.retriever.partition_router, + declarative_stream.incremental_sync, + config, + declarative_stream.parameters, + ) # type: ignore # The retriever type was already checked def create_session_token_authenticator( self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any ) -> Union[ApiKeyAuthenticator, BearerAuthenticator]: - decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={}) + decoder = ( + self._create_component_from_model(model=model.decoder, config=config) + if model.decoder + else JsonDecoder(parameters={}) + ) login_requester = self._create_component_from_model( - model=model.login_requester, config=config, name=f"{name}_login_requester", decoder=decoder + model=model.login_requester, + config=config, + name=f"{name}_login_requester", + decoder=decoder, ) token_provider = SessionTokenProvider( login_requester=login_requester, session_token_path=model.session_token_path, - expiration_duration=parse_duration(model.expiration_duration) if model.expiration_duration else None, + expiration_duration=parse_duration(model.expiration_duration) + if model.expiration_duration + else None, parameters=model.parameters or {}, message_repository=self._message_repository, decoder=decoder, @@ -432,28 +668,46 @@ def create_session_token_authenticator( ) else: return ModelToComponentFactory.create_api_key_authenticator( - ApiKeyAuthenticatorModel(type="ApiKeyAuthenticator", api_token="", inject_into=model.request_authentication.inject_into), # type: ignore # $parameters and headers default to None + ApiKeyAuthenticatorModel( + type="ApiKeyAuthenticator", + api_token="", + inject_into=model.request_authentication.inject_into, + ), # type: ignore # $parameters and headers default to None config=config, token_provider=token_provider, ) @staticmethod - def create_basic_http_authenticator(model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any) -> BasicHttpAuthenticator: + def create_basic_http_authenticator( + model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any + ) -> BasicHttpAuthenticator: return BasicHttpAuthenticator( - password=model.password or "", username=model.username, config=config, parameters=model.parameters or {} + password=model.password or "", + username=model.username, + config=config, + parameters=model.parameters or {}, ) @staticmethod def create_bearer_authenticator( - model: BearerAuthenticatorModel, config: Config, token_provider: Optional[TokenProvider] = None, **kwargs: Any + model: BearerAuthenticatorModel, + config: Config, + token_provider: Optional[TokenProvider] = None, + **kwargs: Any, ) -> BearerAuthenticator: if token_provider is not None and model.api_token != "": - raise ValueError("If token_provider is set, api_token is ignored and has to be set to empty string.") + raise ValueError( + "If token_provider is set, api_token is ignored and has to be set to empty string." + ) return BearerAuthenticator( token_provider=( token_provider if token_provider is not None - else InterpolatedStringTokenProvider(api_token=model.api_token or "", config=config, parameters=model.parameters or {}) + else InterpolatedStringTokenProvider( + api_token=model.api_token or "", + config=config, + parameters=model.parameters or {}, + ) ), config=config, parameters=model.parameters or {}, @@ -463,14 +717,21 @@ def create_bearer_authenticator( def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream: return CheckStream(stream_names=model.stream_names, parameters={}) - def create_composite_error_handler(self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any) -> CompositeErrorHandler: + def create_composite_error_handler( + self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any + ) -> CompositeErrorHandler: error_handlers = [ - self._create_component_from_model(model=error_handler_model, config=config) for error_handler_model in model.error_handlers + self._create_component_from_model(model=error_handler_model, config=config) + for error_handler_model in model.error_handlers ] - return CompositeErrorHandler(error_handlers=error_handlers, parameters=model.parameters or {}) + return CompositeErrorHandler( + error_handlers=error_handlers, parameters=model.parameters or {} + ) @staticmethod - def create_concurrency_level(model: ConcurrencyLevelModel, config: Config, **kwargs: Any) -> ConcurrencyLevel: + def create_concurrency_level( + model: ConcurrencyLevelModel, config: Config, **kwargs: Any + ) -> ConcurrencyLevel: return ConcurrencyLevel( default_concurrency=model.default_concurrency, max_concurrency=model.max_concurrency, @@ -489,26 +750,32 @@ def create_concurrent_cursor_from_datetime_based_cursor( stream_state: MutableMapping[str, Any], **kwargs: Any, ) -> Tuple[ConcurrentCursor, DateTimeStreamStateConverter]: - component_type = component_definition.get("type") if component_definition.get("type") != model_type.__name__: - raise ValueError(f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead") + raise ValueError( + f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead" + ) datetime_based_cursor_model = model_type.parse_obj(component_definition) if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): - raise ValueError(f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}") + raise ValueError( + f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" + ) interpolated_cursor_field = InterpolatedString.create( - datetime_based_cursor_model.cursor_field, parameters=datetime_based_cursor_model.parameters or {} + datetime_based_cursor_model.cursor_field, + parameters=datetime_based_cursor_model.parameters or {}, ) cursor_field = CursorField(interpolated_cursor_field.eval(config=config)) interpolated_partition_field_start = InterpolatedString.create( - datetime_based_cursor_model.partition_field_start or "start_time", parameters=datetime_based_cursor_model.parameters or {} + datetime_based_cursor_model.partition_field_start or "start_time", + parameters=datetime_based_cursor_model.parameters or {}, ) interpolated_partition_field_end = InterpolatedString.create( - datetime_based_cursor_model.partition_field_end or "end_time", parameters=datetime_based_cursor_model.parameters or {} + datetime_based_cursor_model.partition_field_end or "end_time", + parameters=datetime_based_cursor_model.parameters or {}, ) slice_boundary_fields = ( @@ -519,12 +786,17 @@ def create_concurrent_cursor_from_datetime_based_cursor( datetime_format = datetime_based_cursor_model.datetime_format cursor_granularity = ( - parse_duration(datetime_based_cursor_model.cursor_granularity) if datetime_based_cursor_model.cursor_granularity else None + parse_duration(datetime_based_cursor_model.cursor_granularity) + if datetime_based_cursor_model.cursor_granularity + else None ) lookback_window = None interpolated_lookback_window = ( - InterpolatedString.create(datetime_based_cursor_model.lookback_window, parameters=datetime_based_cursor_model.parameters or {}) + InterpolatedString.create( + datetime_based_cursor_model.lookback_window, + parameters=datetime_based_cursor_model.parameters or {}, + ) if datetime_based_cursor_model.lookback_window else None ) @@ -535,7 +807,9 @@ def create_concurrent_cursor_from_datetime_based_cursor( connector_state_converter: DateTimeStreamStateConverter if datetime_format == self.EPOCH_DATETIME_FORMAT: - connector_state_converter = EpochValueConcurrentStreamStateConverter(is_sequential_state=True) + connector_state_converter = EpochValueConcurrentStreamStateConverter( + is_sequential_state=True + ) else: connector_state_converter = CustomFormatConcurrentStreamStateConverter( datetime_format=datetime_format, @@ -547,21 +821,30 @@ def create_concurrent_cursor_from_datetime_based_cursor( start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): - start_date_runtime_value = self.create_min_max_datetime(model=datetime_based_cursor_model.start_datetime, config=config) + start_date_runtime_value = self.create_min_max_datetime( + model=datetime_based_cursor_model.start_datetime, config=config + ) else: start_date_runtime_value = datetime_based_cursor_model.start_datetime end_date_runtime_value: Optional[Union[InterpolatedString, str, MinMaxDatetime]] if isinstance(datetime_based_cursor_model.end_datetime, MinMaxDatetimeModel): - end_date_runtime_value = self.create_min_max_datetime(model=datetime_based_cursor_model.end_datetime, config=config) + end_date_runtime_value = self.create_min_max_datetime( + model=datetime_based_cursor_model.end_datetime, config=config + ) else: end_date_runtime_value = datetime_based_cursor_model.end_datetime interpolated_start_date = MinMaxDatetime.create( - interpolated_string_or_min_max_datetime=start_date_runtime_value, parameters=datetime_based_cursor_model.parameters + interpolated_string_or_min_max_datetime=start_date_runtime_value, + parameters=datetime_based_cursor_model.parameters, ) interpolated_end_date = ( - None if not end_date_runtime_value else MinMaxDatetime.create(end_date_runtime_value, datetime_based_cursor_model.parameters) + None + if not end_date_runtime_value + else MinMaxDatetime.create( + end_date_runtime_value, datetime_based_cursor_model.parameters + ) ) # If datetime format is not specified then start/end datetime should inherit it from the stream slicer @@ -572,10 +855,14 @@ def create_concurrent_cursor_from_datetime_based_cursor( start_date = interpolated_start_date.get_datetime(config=config) end_date_provider = ( - partial(interpolated_end_date.get_datetime, config) if interpolated_end_date else connector_state_converter.get_end_provider() + partial(interpolated_end_date.get_datetime, config) + if interpolated_end_date + else connector_state_converter.get_end_provider() ) - if (datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity) or ( + if ( + datetime_based_cursor_model.step and not datetime_based_cursor_model.cursor_granularity + ) or ( not datetime_based_cursor_model.step and datetime_based_cursor_model.cursor_granularity ): raise ValueError( @@ -586,7 +873,10 @@ def create_concurrent_cursor_from_datetime_based_cursor( # When step is not defined, default to a step size from the starting date to the present moment step_length = datetime.datetime.now(tz=datetime.timezone.utc) - start_date interpolated_step = ( - InterpolatedString.create(datetime_based_cursor_model.step, parameters=datetime_based_cursor_model.parameters or {}) + InterpolatedString.create( + datetime_based_cursor_model.step, + parameters=datetime_based_cursor_model.parameters or {}, + ) if datetime_based_cursor_model.step else None ) @@ -615,7 +905,9 @@ def create_concurrent_cursor_from_datetime_based_cursor( ) @staticmethod - def create_constant_backoff_strategy(model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any) -> ConstantBackoffStrategy: + def create_constant_backoff_strategy( + model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any + ) -> ConstantBackoffStrategy: return ConstantBackoffStrategy( backoff_time_in_seconds=model.backoff_time_in_seconds, config=config, @@ -633,7 +925,9 @@ def create_cursor_pagination( decoder_to_use = decoder else: if not isinstance(decoder, (JsonDecoder, XmlDecoder)): - raise ValueError(f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead.") + raise ValueError( + f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." + ) decoder_to_use = PaginationDecoderDecorator(decoder=decoder) return CursorPaginationStrategy( @@ -669,18 +963,28 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> # the custom component and this code performs a second parse to convert the sub-fields first into models, then declarative components for model_field, model_value in model_args.items(): # If a custom component field doesn't have a type set, we try to use the type hints to infer the type - if isinstance(model_value, dict) and "type" not in model_value and model_field in component_fields: - derived_type = self._derive_component_type_from_type_hints(component_fields.get(model_field)) + if ( + isinstance(model_value, dict) + and "type" not in model_value + and model_field in component_fields + ): + derived_type = self._derive_component_type_from_type_hints( + component_fields.get(model_field) + ) if derived_type: model_value["type"] = derived_type if self._is_component(model_value): - model_args[model_field] = self._create_nested_component(model, model_field, model_value, config) + model_args[model_field] = self._create_nested_component( + model, model_field, model_value, config + ) elif isinstance(model_value, list): vals = [] for v in model_value: if isinstance(v, dict) and "type" not in v and model_field in component_fields: - derived_type = self._derive_component_type_from_type_hints(component_fields.get(model_field)) + derived_type = self._derive_component_type_from_type_hints( + component_fields.get(model_field) + ) if derived_type: v["type"] = derived_type if self._is_component(v): @@ -689,7 +993,11 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> vals.append(v) model_args[model_field] = vals - kwargs = {class_field: model_args[class_field] for class_field in component_fields.keys() if class_field in model_args} + kwargs = { + class_field: model_args[class_field] + for class_field in component_fields.keys() + if class_field in model_args + } return custom_component_class(**kwargs) @staticmethod @@ -733,7 +1041,9 @@ def _extract_missing_parameters(error: TypeError) -> List[str]: else: return [] - def _create_nested_component(self, model: Any, model_field: str, model_value: Any, config: Config) -> Any: + def _create_nested_component( + self, model: Any, model_field: str, model_value: Any, config: Config + ) -> Any: type_name = model_value.get("type", None) if not type_name: # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent @@ -752,16 +1062,29 @@ def _create_nested_component(self, model: Any, model_field: str, model_value: An model_constructor = self.PYDANTIC_MODEL_TO_CONSTRUCTOR.get(parsed_model.__class__) constructor_kwargs = inspect.getfullargspec(model_constructor).kwonlyargs model_parameters = model_value.get("$parameters", {}) - matching_parameters = {kwarg: model_parameters[kwarg] for kwarg in constructor_kwargs if kwarg in model_parameters} - return self._create_component_from_model(model=parsed_model, config=config, **matching_parameters) + matching_parameters = { + kwarg: model_parameters[kwarg] + for kwarg in constructor_kwargs + if kwarg in model_parameters + } + return self._create_component_from_model( + model=parsed_model, config=config, **matching_parameters + ) except TypeError as error: missing_parameters = self._extract_missing_parameters(error) if missing_parameters: raise ValueError( f"Error creating component '{type_name}' with parent custom component {model.class_name}: Please provide " - + ", ".join((f"{type_name}.$parameters.{parameter}" for parameter in missing_parameters)) + + ", ".join( + ( + f"{type_name}.$parameters.{parameter}" + for parameter in missing_parameters + ) + ) ) - raise TypeError(f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}") + raise TypeError( + f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" + ) else: raise ValueError( f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" @@ -771,18 +1094,26 @@ def _create_nested_component(self, model: Any, model_field: str, model_value: An def _is_component(model_value: Any) -> bool: return isinstance(model_value, dict) and model_value.get("type") is not None - def create_datetime_based_cursor(self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any) -> DatetimeBasedCursor: + def create_datetime_based_cursor( + self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any + ) -> DatetimeBasedCursor: start_datetime: Union[str, MinMaxDatetime] = ( - model.start_datetime if isinstance(model.start_datetime, str) else self.create_min_max_datetime(model.start_datetime, config) + model.start_datetime + if isinstance(model.start_datetime, str) + else self.create_min_max_datetime(model.start_datetime, config) ) end_datetime: Union[str, MinMaxDatetime, None] = None if model.is_data_feed and model.end_datetime: raise ValueError("Data feed does not support end_datetime") if model.is_data_feed and model.is_client_side_incremental: - raise ValueError("`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them.") + raise ValueError( + "`Client side incremental` cannot be applied with `data feed`. Choose only 1 from them." + ) if model.end_datetime: end_datetime = ( - model.end_datetime if isinstance(model.end_datetime, str) else self.create_min_max_datetime(model.end_datetime, config) + model.end_datetime + if isinstance(model.end_datetime, str) + else self.create_min_max_datetime(model.end_datetime, config) ) end_time_option = ( @@ -806,7 +1137,9 @@ def create_datetime_based_cursor(self, model: DatetimeBasedCursorModel, config: return DatetimeBasedCursor( cursor_field=model.cursor_field, - cursor_datetime_formats=model.cursor_datetime_formats if model.cursor_datetime_formats else [], + cursor_datetime_formats=model.cursor_datetime_formats + if model.cursor_datetime_formats + else [], cursor_granularity=model.cursor_granularity, datetime_format=model.datetime_format, end_datetime=end_datetime, @@ -823,7 +1156,9 @@ def create_datetime_based_cursor(self, model: DatetimeBasedCursorModel, config: parameters=model.parameters or {}, ) - def create_declarative_stream(self, model: DeclarativeStreamModel, config: Config, **kwargs: Any) -> DeclarativeStream: + def create_declarative_stream( + self, model: DeclarativeStreamModel, config: Config, **kwargs: Any + ) -> DeclarativeStream: # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in @@ -832,7 +1167,9 @@ def create_declarative_stream(self, model: DeclarativeStreamModel, config: Confi primary_key = model.primary_key.__root__ if model.primary_key else None stop_condition_on_cursor = ( - model.incremental_sync and hasattr(model.incremental_sync, "is_data_feed") and model.incremental_sync.is_data_feed + model.incremental_sync + and hasattr(model.incremental_sync, "is_data_feed") + and model.incremental_sync.is_data_feed ) client_side_incremental_sync = None if ( @@ -840,13 +1177,25 @@ def create_declarative_stream(self, model: DeclarativeStreamModel, config: Confi and hasattr(model.incremental_sync, "is_client_side_incremental") and model.incremental_sync.is_client_side_incremental ): - supported_slicers = (DatetimeBasedCursor, GlobalSubstreamCursor, PerPartitionWithGlobalCursor) + supported_slicers = ( + DatetimeBasedCursor, + GlobalSubstreamCursor, + PerPartitionWithGlobalCursor, + ) if combined_slicers and not isinstance(combined_slicers, supported_slicers): - raise ValueError("Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead") + raise ValueError( + "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead" + ) client_side_incremental_sync = { - "date_time_based_cursor": self._create_component_from_model(model=model.incremental_sync, config=config), + "date_time_based_cursor": self._create_component_from_model( + model=model.incremental_sync, config=config + ), "substream_cursor": ( - combined_slicers if isinstance(combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)) else None + combined_slicers + if isinstance( + combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) + ) + else None ), } @@ -886,7 +1235,9 @@ def create_declarative_stream(self, model: DeclarativeStreamModel, config: Confi transformations = [] if model.transformations: for transformation_model in model.transformations: - transformations.append(self._create_component_from_model(model=transformation_model, config=config)) + transformations.append( + self._create_component_from_model(model=transformation_model, config=config) + ) retriever = self._create_component_from_model( model=model.retriever, config=config, @@ -909,7 +1260,9 @@ def create_declarative_stream(self, model: DeclarativeStreamModel, config: Confi state_transformations = [] if model.schema_loader: - schema_loader = self._create_component_from_model(model=model.schema_loader, config=config) + schema_loader = self._create_component_from_model( + model=model.schema_loader, config=config + ) else: options = model.parameters or {} if "name" not in options: @@ -927,7 +1280,9 @@ def create_declarative_stream(self, model: DeclarativeStreamModel, config: Confi parameters=model.parameters or {}, ) - def _merge_stream_slicers(self, model: DeclarativeStreamModel, config: Config) -> Optional[StreamSlicer]: + def _merge_stream_slicers( + self, model: DeclarativeStreamModel, config: Config + ) -> Optional[StreamSlicer]: stream_slicer = None if ( hasattr(model.retriever, "partition_router") @@ -938,50 +1293,85 @@ def _merge_stream_slicers(self, model: DeclarativeStreamModel, config: Config) - if isinstance(stream_slicer_model, list): stream_slicer = CartesianProductStreamSlicer( - [self._create_component_from_model(model=slicer, config=config) for slicer in stream_slicer_model], parameters={} + [ + self._create_component_from_model(model=slicer, config=config) + for slicer in stream_slicer_model + ], + parameters={}, ) else: - stream_slicer = self._create_component_from_model(model=stream_slicer_model, config=config) + stream_slicer = self._create_component_from_model( + model=stream_slicer_model, config=config + ) if model.incremental_sync and stream_slicer: incremental_sync_model = model.incremental_sync - if hasattr(incremental_sync_model, "global_substream_cursor") and incremental_sync_model.global_substream_cursor: - cursor_component = self._create_component_from_model(model=incremental_sync_model, config=config) - return GlobalSubstreamCursor(stream_cursor=cursor_component, partition_router=stream_slicer) + if ( + hasattr(incremental_sync_model, "global_substream_cursor") + and incremental_sync_model.global_substream_cursor + ): + cursor_component = self._create_component_from_model( + model=incremental_sync_model, config=config + ) + return GlobalSubstreamCursor( + stream_cursor=cursor_component, partition_router=stream_slicer + ) else: - cursor_component = self._create_component_from_model(model=incremental_sync_model, config=config) + cursor_component = self._create_component_from_model( + model=incremental_sync_model, config=config + ) return PerPartitionWithGlobalCursor( cursor_factory=CursorFactory( - lambda: self._create_component_from_model(model=incremental_sync_model, config=config), + lambda: self._create_component_from_model( + model=incremental_sync_model, config=config + ), ), partition_router=stream_slicer, stream_cursor=cursor_component, ) elif model.incremental_sync: - return self._create_component_from_model(model=model.incremental_sync, config=config) if model.incremental_sync else None + return ( + self._create_component_from_model(model=model.incremental_sync, config=config) + if model.incremental_sync + else None + ) elif stream_slicer: # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor` return PerPartitionCursor( - cursor_factory=CursorFactory(create_function=partial(ChildPartitionResumableFullRefreshCursor, {})), + cursor_factory=CursorFactory( + create_function=partial(ChildPartitionResumableFullRefreshCursor, {}) + ), partition_router=stream_slicer, ) - elif hasattr(model.retriever, "paginator") and model.retriever.paginator and not stream_slicer: + elif ( + hasattr(model.retriever, "paginator") + and model.retriever.paginator + and not stream_slicer + ): # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor` return ResumableFullRefreshCursor(parameters={}) else: return None - def create_default_error_handler(self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any) -> DefaultErrorHandler: + def create_default_error_handler( + self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any + ) -> DefaultErrorHandler: backoff_strategies = [] if model.backoff_strategies: for backoff_strategy_model in model.backoff_strategies: - backoff_strategies.append(self._create_component_from_model(model=backoff_strategy_model, config=config)) + backoff_strategies.append( + self._create_component_from_model(model=backoff_strategy_model, config=config) + ) response_filters = [] if model.response_filters: for response_filter_model in model.response_filters: - response_filters.append(self._create_component_from_model(model=response_filter_model, config=config)) - response_filters.append(HttpResponseFilter(config=config, parameters=model.parameters or {})) + response_filters.append( + self._create_component_from_model(model=response_filter_model, config=config) + ) + response_filters.append( + HttpResponseFilter(config=config, parameters=model.parameters or {}) + ) return DefaultErrorHandler( backoff_strategies=backoff_strategies, @@ -1002,17 +1392,25 @@ def create_default_paginator( ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]: if decoder: if not isinstance(decoder, (JsonDecoder, XmlDecoder)): - raise ValueError(f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead.") + raise ValueError( + f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." + ) decoder_to_use = PaginationDecoderDecorator(decoder=decoder) else: decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={})) page_size_option = ( - self._create_component_from_model(model=model.page_size_option, config=config) if model.page_size_option else None + self._create_component_from_model(model=model.page_size_option, config=config) + if model.page_size_option + else None ) page_token_option = ( - self._create_component_from_model(model=model.page_token_option, config=config) if model.page_token_option else None + self._create_component_from_model(model=model.page_token_option, config=config) + if model.page_token_option + else None + ) + pagination_strategy = self._create_component_from_model( + model=model.pagination_strategy, config=config, decoder=decoder_to_use ) - pagination_strategy = self._create_component_from_model(model=model.pagination_strategy, config=config, decoder=decoder_to_use) if cursor_used_for_stop_condition: pagination_strategy = StopConditionPaginationStrategyDecorator( pagination_strategy, CursorStopCondition(cursor_used_for_stop_condition) @@ -1031,29 +1429,55 @@ def create_default_paginator( return paginator def create_dpath_extractor( - self, model: DpathExtractorModel, config: Config, decoder: Optional[Decoder] = None, **kwargs: Any + self, + model: DpathExtractorModel, + config: Config, + decoder: Optional[Decoder] = None, + **kwargs: Any, ) -> DpathExtractor: if decoder: decoder_to_use = decoder else: decoder_to_use = JsonDecoder(parameters={}) model_field_path: List[Union[InterpolatedString, str]] = [x for x in model.field_path] - return DpathExtractor(decoder=decoder_to_use, field_path=model_field_path, config=config, parameters=model.parameters or {}) + return DpathExtractor( + decoder=decoder_to_use, + field_path=model_field_path, + config=config, + parameters=model.parameters or {}, + ) @staticmethod - def create_exponential_backoff_strategy(model: ExponentialBackoffStrategyModel, config: Config) -> ExponentialBackoffStrategy: - return ExponentialBackoffStrategy(factor=model.factor or 5, parameters=model.parameters or {}, config=config) + def create_exponential_backoff_strategy( + model: ExponentialBackoffStrategyModel, config: Config + ) -> ExponentialBackoffStrategy: + return ExponentialBackoffStrategy( + factor=model.factor or 5, parameters=model.parameters or {}, config=config + ) - def create_http_requester(self, model: HttpRequesterModel, decoder: Decoder, config: Config, *, name: str) -> HttpRequester: + def create_http_requester( + self, model: HttpRequesterModel, decoder: Decoder, config: Config, *, name: str + ) -> HttpRequester: authenticator = ( - self._create_component_from_model(model=model.authenticator, config=config, url_base=model.url_base, name=name, decoder=decoder) + self._create_component_from_model( + model=model.authenticator, + config=config, + url_base=model.url_base, + name=name, + decoder=decoder, + ) if model.authenticator else None ) error_handler = ( self._create_component_from_model(model=model.error_handler, config=config) if model.error_handler - else DefaultErrorHandler(backoff_strategies=[], response_filters=[], config=config, parameters=model.parameters or {}) + else DefaultErrorHandler( + backoff_strategies=[], + response_filters=[], + config=config, + parameters=model.parameters or {}, + ) ) request_options_provider = InterpolatedRequestOptionsProvider( @@ -1088,7 +1512,9 @@ def create_http_requester(self, model: HttpRequesterModel, decoder: Decoder, con ) @staticmethod - def create_http_response_filter(model: HttpResponseFilterModel, config: Config, **kwargs: Any) -> HttpResponseFilter: + def create_http_response_filter( + model: HttpResponseFilterModel, config: Config, **kwargs: Any + ) -> HttpResponseFilter: if model.action: action = ResponseAction(model.action.value) else: @@ -1112,7 +1538,9 @@ def create_http_response_filter(model: HttpResponseFilterModel, config: Config, ) @staticmethod - def create_inline_schema_loader(model: InlineSchemaLoaderModel, config: Config, **kwargs: Any) -> InlineSchemaLoader: + def create_inline_schema_loader( + model: InlineSchemaLoaderModel, config: Config, **kwargs: Any + ) -> InlineSchemaLoader: return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) @staticmethod @@ -1120,11 +1548,15 @@ def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) return JsonDecoder(parameters={}) @staticmethod - def create_jsonl_decoder(model: JsonlDecoderModel, config: Config, **kwargs: Any) -> JsonlDecoder: + def create_jsonl_decoder( + model: JsonlDecoderModel, config: Config, **kwargs: Any + ) -> JsonlDecoder: return JsonlDecoder(parameters={}) @staticmethod - def create_iterable_decoder(model: IterableDecoderModel, config: Config, **kwargs: Any) -> IterableDecoder: + def create_iterable_decoder( + model: IterableDecoderModel, config: Config, **kwargs: Any + ) -> IterableDecoder: return IterableDecoder(parameters={}) @staticmethod @@ -1132,15 +1564,23 @@ def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> return XmlDecoder(parameters={}) @staticmethod - def create_gzipjson_decoder(model: GzipJsonDecoderModel, config: Config, **kwargs: Any) -> GzipJsonDecoder: + def create_gzipjson_decoder( + model: GzipJsonDecoderModel, config: Config, **kwargs: Any + ) -> GzipJsonDecoder: return GzipJsonDecoder(parameters={}, encoding=model.encoding) @staticmethod - def create_json_file_schema_loader(model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any) -> JsonFileSchemaLoader: - return JsonFileSchemaLoader(file_path=model.file_path or "", config=config, parameters=model.parameters or {}) + def create_json_file_schema_loader( + model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any + ) -> JsonFileSchemaLoader: + return JsonFileSchemaLoader( + file_path=model.file_path or "", config=config, parameters=model.parameters or {} + ) @staticmethod - def create_jwt_authenticator(model: JwtAuthenticatorModel, config: Config, **kwargs: Any) -> JwtAuthenticator: + def create_jwt_authenticator( + model: JwtAuthenticatorModel, config: Config, **kwargs: Any + ) -> JwtAuthenticator: jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) return JwtAuthenticator( @@ -1162,7 +1602,9 @@ def create_jwt_authenticator(model: JwtAuthenticatorModel, config: Config, **kwa ) @staticmethod - def create_list_partition_router(model: ListPartitionRouterModel, config: Config, **kwargs: Any) -> ListPartitionRouter: + def create_list_partition_router( + model: ListPartitionRouterModel, config: Config, **kwargs: Any + ) -> ListPartitionRouter: request_option = ( RequestOption( inject_into=RequestOptionType(model.request_option.inject_into.value), @@ -1181,7 +1623,9 @@ def create_list_partition_router(model: ListPartitionRouterModel, config: Config ) @staticmethod - def create_min_max_datetime(model: MinMaxDatetimeModel, config: Config, **kwargs: Any) -> MinMaxDatetime: + def create_min_max_datetime( + model: MinMaxDatetimeModel, config: Config, **kwargs: Any + ) -> MinMaxDatetime: return MinMaxDatetime( datetime=model.datetime, datetime_format=model.datetime_format or "", @@ -1195,29 +1639,43 @@ def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: return NoAuth(parameters=model.parameters or {}) @staticmethod - def create_no_pagination(model: NoPaginationModel, config: Config, **kwargs: Any) -> NoPagination: + def create_no_pagination( + model: NoPaginationModel, config: Config, **kwargs: Any + ) -> NoPagination: return NoPagination(parameters={}) - def create_oauth_authenticator(self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any) -> DeclarativeOauth2Authenticator: + def create_oauth_authenticator( + self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any + ) -> DeclarativeOauth2Authenticator: if model.refresh_token_updater: # ignore type error because fixing it would have a lot of dependencies, revisit later return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore config, - InterpolatedString.create(model.token_refresh_endpoint, parameters=model.parameters or {}).eval(config), + InterpolatedString.create( + model.token_refresh_endpoint, parameters=model.parameters or {} + ).eval(config), access_token_name=InterpolatedString.create( model.access_token_name or "access_token", parameters=model.parameters or {} ).eval(config), refresh_token_name=model.refresh_token_updater.refresh_token_name, - expires_in_name=InterpolatedString.create(model.expires_in_name or "expires_in", parameters=model.parameters or {}).eval( - config - ), - client_id=InterpolatedString.create(model.client_id, parameters=model.parameters or {}).eval(config), - client_secret=InterpolatedString.create(model.client_secret, parameters=model.parameters or {}).eval(config), + expires_in_name=InterpolatedString.create( + model.expires_in_name or "expires_in", parameters=model.parameters or {} + ).eval(config), + client_id=InterpolatedString.create( + model.client_id, parameters=model.parameters or {} + ).eval(config), + client_secret=InterpolatedString.create( + model.client_secret, parameters=model.parameters or {} + ).eval(config), access_token_config_path=model.refresh_token_updater.access_token_config_path, refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path, token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path, - grant_type=InterpolatedString.create(model.grant_type or "refresh_token", parameters=model.parameters or {}).eval(config), - refresh_request_body=InterpolatedMapping(model.refresh_request_body or {}, parameters=model.parameters or {}).eval(config), + grant_type=InterpolatedString.create( + model.grant_type or "refresh_token", parameters=model.parameters or {} + ).eval(config), + refresh_request_body=InterpolatedMapping( + model.refresh_request_body or {}, parameters=model.parameters or {} + ).eval(config), scopes=model.scopes, token_expiry_date_format=model.token_expiry_date_format, message_repository=self._message_repository, @@ -1245,7 +1703,9 @@ def create_oauth_authenticator(self, model: OAuthAuthenticatorModel, config: Con ) @staticmethod - def create_offset_increment(model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any) -> OffsetIncrement: + def create_offset_increment( + model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any + ) -> OffsetIncrement: if isinstance(decoder, PaginationDecoderDecorator): if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)): raise ValueError( @@ -1254,7 +1714,9 @@ def create_offset_increment(model: OffsetIncrementModel, config: Config, decoder decoder_to_use = decoder else: if not isinstance(decoder, (JsonDecoder, XmlDecoder)): - raise ValueError(f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead.") + raise ValueError( + f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." + ) decoder_to_use = PaginationDecoderDecorator(decoder=decoder) return OffsetIncrement( page_size=model.page_size, @@ -1265,7 +1727,9 @@ def create_offset_increment(model: OffsetIncrementModel, config: Config, decoder ) @staticmethod - def create_page_increment(model: PageIncrementModel, config: Config, **kwargs: Any) -> PageIncrement: + def create_page_increment( + model: PageIncrementModel, config: Config, **kwargs: Any + ) -> PageIncrement: return PageIncrement( page_size=model.page_size, config=config, @@ -1274,9 +1738,15 @@ def create_page_increment(model: PageIncrementModel, config: Config, **kwargs: A parameters=model.parameters or {}, ) - def create_parent_stream_config(self, model: ParentStreamConfigModel, config: Config, **kwargs: Any) -> ParentStreamConfig: + def create_parent_stream_config( + self, model: ParentStreamConfigModel, config: Config, **kwargs: Any + ) -> ParentStreamConfig: declarative_stream = self._create_component_from_model(model.stream, config=config) - request_option = self._create_component_from_model(model.request_option, config=config) if model.request_option else None + request_option = ( + self._create_component_from_model(model.request_option, config=config) + if model.request_option + else None + ) return ParentStreamConfig( parent_key=model.parent_key, request_option=request_option, @@ -1289,15 +1759,21 @@ def create_parent_stream_config(self, model: ParentStreamConfigModel, config: Co ) @staticmethod - def create_record_filter(model: RecordFilterModel, config: Config, **kwargs: Any) -> RecordFilter: - return RecordFilter(condition=model.condition or "", config=config, parameters=model.parameters or {}) + def create_record_filter( + model: RecordFilterModel, config: Config, **kwargs: Any + ) -> RecordFilter: + return RecordFilter( + condition=model.condition or "", config=config, parameters=model.parameters or {} + ) @staticmethod def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: return RequestPath(parameters={}) @staticmethod - def create_request_option(model: RequestOptionModel, config: Config, **kwargs: Any) -> RequestOption: + def create_request_option( + model: RequestOptionModel, config: Config, **kwargs: Any + ) -> RequestOption: inject_into = RequestOptionType(model.inject_into.value) return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={}) @@ -1312,16 +1788,26 @@ def create_record_selector( **kwargs: Any, ) -> RecordSelector: assert model.schema_normalization is not None # for mypy - extractor = self._create_component_from_model(model=model.extractor, decoder=decoder, config=config) - record_filter = self._create_component_from_model(model.record_filter, config=config) if model.record_filter else None + extractor = self._create_component_from_model( + model=model.extractor, decoder=decoder, config=config + ) + record_filter = ( + self._create_component_from_model(model.record_filter, config=config) + if model.record_filter + else None + ) if client_side_incremental_sync: record_filter = ClientSideIncrementalRecordFilterDecorator( config=config, parameters=model.parameters, - condition=model.record_filter.condition if (model.record_filter and hasattr(model.record_filter, "condition")) else None, + condition=model.record_filter.condition + if (model.record_filter and hasattr(model.record_filter, "condition")) + else None, **client_side_incremental_sync, ) - schema_normalization = TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization]) + schema_normalization = TypeTransformer( + SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization] + ) return RecordSelector( extractor=extractor, @@ -1333,11 +1819,20 @@ def create_record_selector( ) @staticmethod - def create_remove_fields(model: RemoveFieldsModel, config: Config, **kwargs: Any) -> RemoveFields: - return RemoveFields(field_pointers=model.field_pointers, condition=model.condition or "", parameters={}) - - def create_selective_authenticator(self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any) -> DeclarativeAuthenticator: - authenticators = {name: self._create_component_from_model(model=auth, config=config) for name, auth in model.authenticators.items()} + def create_remove_fields( + model: RemoveFieldsModel, config: Config, **kwargs: Any + ) -> RemoveFields: + return RemoveFields( + field_pointers=model.field_pointers, condition=model.condition or "", parameters={} + ) + + def create_selective_authenticator( + self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any + ) -> DeclarativeAuthenticator: + authenticators = { + name: self._create_component_from_model(model=auth, config=config) + for name, auth in model.authenticators.items() + } # SelectiveAuthenticator will return instance of DeclarativeAuthenticator or raise ValueError error return SelectiveAuthenticator( # type: ignore[abstract] config=config, @@ -1376,8 +1871,14 @@ def create_simple_retriever( client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[RecordTransformation], ) -> SimpleRetriever: - decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={}) - requester = self._create_component_from_model(model=model.requester, decoder=decoder, config=config, name=name) + decoder = ( + self._create_component_from_model(model=model.decoder, config=config) + if model.decoder + else JsonDecoder(parameters={}) + ) + requester = self._create_component_from_model( + model=model.requester, decoder=decoder, config=config, name=name + ) record_selector = self._create_component_from_model( model=model.record_selector, config=config, @@ -1385,12 +1886,19 @@ def create_simple_retriever( transformations=transformations, client_side_incremental_sync=client_side_incremental_sync, ) - url_base = model.requester.url_base if hasattr(model.requester, "url_base") else requester.get_url_base() + url_base = ( + model.requester.url_base + if hasattr(model.requester, "url_base") + else requester.get_url_base() + ) # Define cursor only if per partition or common incremental support is needed cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None - if not isinstance(stream_slicer, DatetimeBasedCursor) or type(stream_slicer) is not DatetimeBasedCursor: + if ( + not isinstance(stream_slicer, DatetimeBasedCursor) + or type(stream_slicer) is not DatetimeBasedCursor + ): # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods). # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's @@ -1414,7 +1922,9 @@ def create_simple_retriever( else NoPagination(parameters={}) ) - ignore_stream_slicer_parameters_on_paginated_requests = model.ignore_stream_slicer_parameters_on_paginated_requests or False + ignore_stream_slicer_parameters_on_paginated_requests = ( + model.ignore_stream_slicer_parameters_on_paginated_requests or False + ) if self._limit_slices_fetched or self._emit_connector_builder_messages: return SimpleRetrieverTestReadDecorator( @@ -1481,14 +1991,19 @@ def create_async_retriever( config: Config, *, name: str, - primary_key: Optional[Union[str, List[str], List[List[str]]]], # this seems to be needed to match create_simple_retriever + primary_key: Optional[ + Union[str, List[str], List[List[str]]] + ], # this seems to be needed to match create_simple_retriever stream_slicer: Optional[StreamSlicer], client_side_incremental_sync: Optional[Dict[str, Any]] = None, transformations: List[RecordTransformation], **kwargs: Any, ) -> AsyncRetriever: - - decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={}) + decoder = ( + self._create_component_from_model(model=model.decoder, config=config) + if model.decoder + else JsonDecoder(parameters={}) + ) record_selector = self._create_component_from_model( model=model.record_selector, config=config, @@ -1498,14 +2013,23 @@ def create_async_retriever( ) stream_slicer = stream_slicer or SinglePartitionRouter(parameters={}) creation_requester = self._create_component_from_model( - model=model.creation_requester, decoder=decoder, config=config, name=f"job creation - {name}" + model=model.creation_requester, + decoder=decoder, + config=config, + name=f"job creation - {name}", ) polling_requester = self._create_component_from_model( - model=model.polling_requester, decoder=decoder, config=config, name=f"job polling - {name}" + model=model.polling_requester, + decoder=decoder, + config=config, + name=f"job polling - {name}", ) job_download_components_name = f"job download - {name}" download_requester = self._create_component_from_model( - model=model.download_requester, decoder=decoder, config=config, name=job_download_components_name + model=model.download_requester, + decoder=decoder, + config=config, + name=job_download_components_name, ) download_retriever = SimpleRetriever( requester=download_requester, @@ -1520,7 +2044,9 @@ def create_async_retriever( primary_key=None, name=job_download_components_name, paginator=( - self._create_component_from_model(model=model.download_paginator, decoder=decoder, config=config, url_base="") + self._create_component_from_model( + model=model.download_paginator, decoder=decoder, config=config, url_base="" + ) if model.download_paginator else NoPagination(parameters={}) ), @@ -1528,17 +2054,31 @@ def create_async_retriever( parameters={}, ) abort_requester = ( - self._create_component_from_model(model=model.abort_requester, decoder=decoder, config=config, name=f"job abort - {name}") + self._create_component_from_model( + model=model.abort_requester, + decoder=decoder, + config=config, + name=f"job abort - {name}", + ) if model.abort_requester else None ) delete_requester = ( - self._create_component_from_model(model=model.delete_requester, decoder=decoder, config=config, name=f"job delete - {name}") + self._create_component_from_model( + model=model.delete_requester, + decoder=decoder, + config=config, + name=f"job delete - {name}", + ) if model.delete_requester else None ) - status_extractor = self._create_component_from_model(model=model.status_extractor, decoder=decoder, config=config, name=name) - urls_extractor = self._create_component_from_model(model=model.urls_extractor, decoder=decoder, config=config, name=name) + status_extractor = self._create_component_from_model( + model=model.status_extractor, decoder=decoder, config=config, name=name + ) + urls_extractor = self._create_component_from_model( + model=model.urls_extractor, decoder=decoder, config=config, name=name + ) job_repository: AsyncJobRepository = AsyncHttpJobRepository( creation_requester=creation_requester, polling_requester=polling_requester, @@ -1554,7 +2094,9 @@ def create_async_retriever( job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( job_repository, stream_slices, - JobTracker(1), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1 + JobTracker( + 1 + ), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1 self._message_repository, has_bulk_parent=False, # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk ), @@ -1580,14 +2122,22 @@ def create_substream_partition_router( if model.parent_stream_configs: parent_stream_configs.extend( [ - self._create_message_repository_substream_wrapper(model=parent_stream_config, config=config) + self._create_message_repository_substream_wrapper( + model=parent_stream_config, config=config + ) for parent_stream_config in model.parent_stream_configs ] ) - return SubstreamPartitionRouter(parent_stream_configs=parent_stream_configs, parameters=model.parameters or {}, config=config) + return SubstreamPartitionRouter( + parent_stream_configs=parent_stream_configs, + parameters=model.parameters or {}, + config=config, + ) - def _create_message_repository_substream_wrapper(self, model: ParentStreamConfigModel, config: Config) -> Any: + def _create_message_repository_substream_wrapper( + self, model: ParentStreamConfigModel, config: Config + ) -> Any: substream_factory = ModelToComponentFactory( limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, limit_slices_fetched=self._limit_slices_fetched, @@ -1603,13 +2153,17 @@ def _create_message_repository_substream_wrapper(self, model: ParentStreamConfig return substream_factory._create_component_from_model(model=model, config=config) @staticmethod - def create_wait_time_from_header(model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any) -> WaitTimeFromHeaderBackoffStrategy: + def create_wait_time_from_header( + model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any + ) -> WaitTimeFromHeaderBackoffStrategy: return WaitTimeFromHeaderBackoffStrategy( header=model.header, parameters=model.parameters or {}, config=config, regex=model.regex, - max_waiting_time_in_seconds=model.max_waiting_time_in_seconds if model.max_waiting_time_in_seconds is not None else None, + max_waiting_time_in_seconds=model.max_waiting_time_in_seconds + if model.max_waiting_time_in_seconds is not None + else None, ) @staticmethod @@ -1617,11 +2171,15 @@ def create_wait_until_time_from_header( model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any ) -> WaitUntilTimeFromHeaderBackoffStrategy: return WaitUntilTimeFromHeaderBackoffStrategy( - header=model.header, parameters=model.parameters or {}, config=config, min_wait=model.min_wait, regex=model.regex + header=model.header, + parameters=model.parameters or {}, + config=config, + min_wait=model.min_wait, + regex=model.regex, ) def get_message_repository(self) -> MessageRepository: return self._message_repository def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: - return Level.DEBUG if emit_connector_builder_messages else Level.INFO \ No newline at end of file + return Level.DEBUG if emit_connector_builder_messages else Level.INFO diff --git a/unit_tests/sources/declarative/decoders/test_json_decoder.py b/unit_tests/sources/declarative/decoders/test_json_decoder.py index 0af625d7..8d4d22f7 100644 --- a/unit_tests/sources/declarative/decoders/test_json_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_json_decoder.py @@ -11,7 +11,9 @@ from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder, JsonlDecoder from airbyte_cdk.sources.declarative.models import DeclarativeStream as DeclarativeStreamModel -from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ModelToComponentFactory +from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ( + ModelToComponentFactory, +) from airbyte_cdk.sources.declarative.decoders import GzipJsonDecoder @@ -121,6 +123,7 @@ def get_body(): assert counter == lines_in_response * len(stream_slices) + @pytest.mark.parametrize( "encoding", [ @@ -130,65 +133,67 @@ def get_body(): ids=["utf-8", "utf"], ) def test_gzipjson_decoder(requests_mock, encoding): - response_to_compress = json.dumps([ - { - "campaignId": 214078428, - "campaignName": "sample-campaign-name-214078428", - "adGroupId": "6490134", - "adId": "665320125", - "targetId": "791320341", - "asin": "G000PSH142", - "advertisedAsin": "G000PSH142", - "keywordBid": "511234974", - "keywordId": "965783021" - }, - { - "campaignId": 44504582, - "campaignName": "sample-campaign-name-44504582", - "adGroupId": "6490134", - "adId": "665320125", - "targetId": "791320341", - "asin": "G000PSH142", - "advertisedAsin": "G000PSH142", - "keywordBid": "511234974", - "keywordId": "965783021" - }, - { - "campaignId": 509144838, - "campaignName": "sample-campaign-name-509144838", - "adGroupId": "6490134", - "adId": "665320125", - "targetId": "791320341", - "asin": "G000PSH142", - "advertisedAsin": "G000PSH142", - "keywordBid": "511234974", - "keywordId": "965783021" - }, - { - "campaignId": 231712082, - "campaignName": "sample-campaign-name-231712082", - "adGroupId": "6490134", - "adId": "665320125", - "targetId": "791320341", - "asin": "G000PSH142", - "advertisedAsin": "G000PSH142", - "keywordBid": "511234974", - "keywordId": "965783021" - }, - { - "campaignId": 895306040, - "campaignName": "sample-campaign-name-895306040", - "adGroupId": "6490134", - "adId": "665320125", - "targetId": "791320341", - "asin": "G000PSH142", - "advertisedAsin": "G000PSH142", - "keywordBid": "511234974", - "keywordId": "965783021" - } - ]) + response_to_compress = json.dumps( + [ + { + "campaignId": 214078428, + "campaignName": "sample-campaign-name-214078428", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021", + }, + { + "campaignId": 44504582, + "campaignName": "sample-campaign-name-44504582", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021", + }, + { + "campaignId": 509144838, + "campaignName": "sample-campaign-name-509144838", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021", + }, + { + "campaignId": 231712082, + "campaignName": "sample-campaign-name-231712082", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021", + }, + { + "campaignId": 895306040, + "campaignName": "sample-campaign-name-895306040", + "adGroupId": "6490134", + "adId": "665320125", + "targetId": "791320341", + "asin": "G000PSH142", + "advertisedAsin": "G000PSH142", + "keywordBid": "511234974", + "keywordId": "965783021", + }, + ] + ) body = gzip.compress(response_to_compress.encode(encoding)) requests_mock.register_uri("GET", "https://airbyte.io/", content=body) response = requests.get("https://airbyte.io/") - assert len(list(GzipJsonDecoder(parameters={}, encoding=encoding).decode(response))) == 5 \ No newline at end of file + assert len(list(GzipJsonDecoder(parameters={}, encoding=encoding).decode(response))) == 5 From 3fe7ac4b9917cbba5eeb691e4b575f861e17c5e2 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 11 Nov 2024 17:54:21 +0100 Subject: [PATCH 06/17] Airbyte CDK: rev non-related changes --- .../parsers/model_to_component_factory.py | 22 +++++++------------ 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index f89ec3b2..f7f43edc 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -363,7 +363,6 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( CustomFormatConcurrentStreamStateConverter, DateTimeStreamStateConverter, - EpochValueConcurrentStreamStateConverter, ) from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction from airbyte_cdk.sources.types import Config @@ -806,18 +805,13 @@ def create_concurrent_cursor_from_datetime_based_cursor( lookback_window = parse_duration(evaluated_lookback_window) connector_state_converter: DateTimeStreamStateConverter - if datetime_format == self.EPOCH_DATETIME_FORMAT: - connector_state_converter = EpochValueConcurrentStreamStateConverter( - is_sequential_state=True - ) - else: - connector_state_converter = CustomFormatConcurrentStreamStateConverter( - datetime_format=datetime_format, - input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, - is_sequential_state=True, - cursor_granularity=cursor_granularity, - # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice - ) + connector_state_converter = CustomFormatConcurrentStreamStateConverter( + datetime_format=datetime_format, + input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats, + is_sequential_state=True, + cursor_granularity=cursor_granularity, + # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice + ) start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime] if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel): @@ -871,7 +865,7 @@ def create_concurrent_cursor_from_datetime_based_cursor( ) # When step is not defined, default to a step size from the starting date to the present moment - step_length = datetime.datetime.now(tz=datetime.timezone.utc) - start_date + step_length = datetime.timedelta.max interpolated_step = ( InterpolatedString.create( datetime_based_cursor_model.step, From bba1a93f6decd0b7db74a54213aa30305fd92d61 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 11 Nov 2024 18:01:43 +0100 Subject: [PATCH 07/17] Airbyte CDK: fix mypy --- .../sources/declarative/decoders/json_decoder.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 14816c86..3d8486bb 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -5,7 +5,7 @@ import logging from dataclasses import InitVar, dataclass from gzip import decompress -from typing import Any, Generator, Mapping +from typing import Any, Generator, Mapping, MutableMapping import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder @@ -25,7 +25,7 @@ class JsonDecoder(Decoder): def is_stream_response(self) -> bool: return False - def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: + def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: """ Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping. """ @@ -41,7 +41,7 @@ def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], No @staticmethod def parse_body_json( body_json: Mapping[str, Any] | list, - ) -> Generator[Mapping[str, Any], None, None]: + ) -> Generator[MutableMapping[str, Any], None, None]: if not isinstance(body_json, list): body_json = [body_json] if len(body_json) == 0: @@ -61,7 +61,7 @@ class IterableDecoder(Decoder): def is_stream_response(self) -> bool: return True - def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: + def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: for line in response.iter_lines(): yield {"record": line.decode()} @@ -77,7 +77,7 @@ class JsonlDecoder(Decoder): def is_stream_response(self) -> bool: return True - def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: + def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: # TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional? # https://github.com/airbytehq/airbyte-internal-issues/issues/8436 for record in response.iter_lines(): @@ -88,6 +88,6 @@ def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], No class GzipJsonDecoder(JsonDecoder): encoding: str = "utf-8" - def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: + def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: raw_string = decompress(response.content).decode(encoding=self.encoding) yield from self.parse_body_json(orjson.loads(raw_string)) From 8446427fbec8207ce1e07a4fa31bc57f0b8071ba Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 12:37:24 +0100 Subject: [PATCH 08/17] Airbyte CDK: fix mypy Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/declarative/decoders/json_decoder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 3d8486bb..d4d040ea 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -5,7 +5,7 @@ import logging from dataclasses import InitVar, dataclass from gzip import decompress -from typing import Any, Generator, Mapping, MutableMapping +from typing import Any, Generator, Mapping, MutableMapping, List import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder @@ -40,7 +40,7 @@ def decode(self, response: requests.Response) -> Generator[MutableMapping[str, A @staticmethod def parse_body_json( - body_json: Mapping[str, Any] | list, + body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]], ) -> Generator[MutableMapping[str, Any], None, None]: if not isinstance(body_json, list): body_json = [body_json] From 8e7242f26acfd80c673410a486b613c0f6885651 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 12:51:15 +0100 Subject: [PATCH 09/17] Airbyte CDK: fix mypy [skip ci] Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/declarative/decoders/json_decoder.py | 4 ++-- .../migrations/legacy_to_per_partition_state_migration.py | 6 +++--- .../declarative/parsers/model_to_component_factory.py | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index d4d040ea..a0690461 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -5,7 +5,7 @@ import logging from dataclasses import InitVar, dataclass from gzip import decompress -from typing import Any, Generator, Mapping, MutableMapping, List +from typing import Any, Generator, Mapping, MutableMapping, List, Optional import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder @@ -86,7 +86,7 @@ def decode(self, response: requests.Response) -> Generator[MutableMapping[str, A @dataclass class GzipJsonDecoder(JsonDecoder): - encoding: str = "utf-8" + encoding: Optional[str] = "utf-8" def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: raw_string = decompress(response.content).decode(encoding=self.encoding) diff --git a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py index 38546168..3b4ce575 100644 --- a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py @@ -4,7 +4,7 @@ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration -from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor, SubstreamPartitionRouter +from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor, SubstreamPartitionRouter, CustomPartitionRouter, CustomIncrementalSync from airbyte_cdk.sources.declarative.models.declarative_component_schema import ParentStreamConfig @@ -31,8 +31,8 @@ class LegacyToPerPartitionStateMigration(StateMigration): def __init__( self, - partition_router: SubstreamPartitionRouter, - cursor: DatetimeBasedCursor, + partition_router: CustomPartitionRouter | SubstreamPartitionRouter, + cursor: CustomIncrementalSync | DatetimeBasedCursor, config: Mapping[str, Any], parameters: Mapping[str, Any], ): diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index f7f43edc..60e4dc3e 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -629,10 +629,10 @@ def create_legacy_to_per_partition_state_migration( ) return LegacyToPerPartitionStateMigration( - declarative_stream.retriever.partition_router, - declarative_stream.incremental_sync, + partition_router, + declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. config, - declarative_stream.parameters, + declarative_stream.parameters, # type: ignore ) # type: ignore # The retriever type was already checked def create_session_token_authenticator( From 4bb8f12c4d20ec969fa5d81d3cd0678d90718210 Mon Sep 17 00:00:00 2001 From: octavia-squidington-iii Date: Wed, 13 Nov 2024 11:52:18 +0000 Subject: [PATCH 10/17] Auto-fix lint and format issues --- .../sources/declarative/decoders/json_decoder.py | 16 ++++++++++++---- .../legacy_to_per_partition_state_migration.py | 7 ++++++- .../parsers/model_to_component_factory.py | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index a0690461..79c9613f 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -25,7 +25,9 @@ class JsonDecoder(Decoder): def is_stream_response(self) -> bool: return False - def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: + def decode( + self, response: requests.Response + ) -> Generator[MutableMapping[str, Any], None, None]: """ Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping. """ @@ -61,7 +63,9 @@ class IterableDecoder(Decoder): def is_stream_response(self) -> bool: return True - def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: + def decode( + self, response: requests.Response + ) -> Generator[MutableMapping[str, Any], None, None]: for line in response.iter_lines(): yield {"record": line.decode()} @@ -77,7 +81,9 @@ class JsonlDecoder(Decoder): def is_stream_response(self) -> bool: return True - def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: + def decode( + self, response: requests.Response + ) -> Generator[MutableMapping[str, Any], None, None]: # TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional? # https://github.com/airbytehq/airbyte-internal-issues/issues/8436 for record in response.iter_lines(): @@ -88,6 +94,8 @@ def decode(self, response: requests.Response) -> Generator[MutableMapping[str, A class GzipJsonDecoder(JsonDecoder): encoding: Optional[str] = "utf-8" - def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]: + def decode( + self, response: requests.Response + ) -> Generator[MutableMapping[str, Any], None, None]: raw_string = decompress(response.content).decode(encoding=self.encoding) yield from self.parse_body_json(orjson.loads(raw_string)) diff --git a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py index 3b4ce575..0c30993c 100644 --- a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py @@ -4,7 +4,12 @@ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration -from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor, SubstreamPartitionRouter, CustomPartitionRouter, CustomIncrementalSync +from airbyte_cdk.sources.declarative.models import ( + DatetimeBasedCursor, + SubstreamPartitionRouter, + CustomPartitionRouter, + CustomIncrementalSync, +) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ParentStreamConfig diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 60e4dc3e..d2929053 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -632,7 +632,7 @@ def create_legacy_to_per_partition_state_migration( partition_router, declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. config, - declarative_stream.parameters, # type: ignore + declarative_stream.parameters, # type: ignore ) # type: ignore # The retriever type was already checked def create_session_token_authenticator( From 4335e11ae5fbffea688f8c5bef7e1d2f7b539072 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 13:03:06 +0100 Subject: [PATCH 11/17] Airbyte CDK: fix mypy Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/declarative/decoders/json_decoder.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 79c9613f..f4f0d7ce 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -3,9 +3,9 @@ # import logging -from dataclasses import InitVar, dataclass +from dataclasses import InitVar, dataclass, field from gzip import decompress -from typing import Any, Generator, Mapping, MutableMapping, List, Optional +from typing import Any, Generator, Mapping, MutableMapping, List import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder @@ -92,7 +92,7 @@ def decode( @dataclass class GzipJsonDecoder(JsonDecoder): - encoding: Optional[str] = "utf-8" + encoding: str = field(default="utf-8") def decode( self, response: requests.Response From c0ea647908d7acdb35f5cf870367bd7332957e13 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 14:50:39 +0100 Subject: [PATCH 12/17] Airbyte CDK: fix mypy Signed-off-by: Artem Inzhyyants --- .../declarative/decoders/json_decoder.py | 17 +++++++++++++---- .../legacy_to_per_partition_state_migration.py | 8 +++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index f4f0d7ce..5a79506c 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -1,11 +1,11 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # - +import codecs import logging from dataclasses import InitVar, dataclass, field from gzip import decompress -from typing import Any, Generator, Mapping, MutableMapping, List +from typing import Any, Generator, Mapping, MutableMapping, List, Optional import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder @@ -92,10 +92,19 @@ def decode( @dataclass class GzipJsonDecoder(JsonDecoder): - encoding: str = field(default="utf-8") + encoding: Optional[str] + + def __post_init__(self, parameters: Mapping[str, Any]) -> None: + if self.encoding: + try: + codecs.lookup(self.encoding) + except LookupError: + raise ValueError( + f"Invalid encoding '{self.encoding}'. Please check provided encoding" + ) def decode( self, response: requests.Response ) -> Generator[MutableMapping[str, Any], None, None]: - raw_string = decompress(response.content).decode(encoding=self.encoding) + raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8") yield from self.parse_body_json(orjson.loads(raw_string)) diff --git a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py index 0c30993c..7dc5f8fd 100644 --- a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py @@ -52,8 +52,10 @@ def __init__( self._cursor.cursor_field, parameters=self._parameters ).eval(self._config) - def _get_partition_field(self, partition_router: SubstreamPartitionRouter) -> str: - parent_stream_config = partition_router.parent_stream_configs[0] + def _get_partition_field( + self, partition_router: CustomPartitionRouter | SubstreamPartitionRouter + ) -> str: + parent_stream_config = partition_router.parent_stream_configs[0] # type: ignore # custom partition will introduce this atribute if needed # Retrieve the partition field with a condition, as properties are returned as a dictionary for custom components. partition_field = ( @@ -69,7 +71,7 @@ def should_migrate(self, stream_state: Mapping[str, Any]) -> bool: return False # There is exactly one parent stream - number_of_parent_streams = len(self._partition_router.parent_stream_configs) + number_of_parent_streams = len(self._partition_router.parent_stream_configs) # type: ignore # custom partition will introduce this attribute if needed if number_of_parent_streams != 1: # There should be exactly one parent stream return False From eabac078a7d1a6a50bdbc3a3107346feb5797199 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 14:52:33 +0100 Subject: [PATCH 13/17] Airbyte CDK: fix mypy Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/declarative/decoders/json_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 5a79506c..2da06d1e 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -3,7 +3,7 @@ # import codecs import logging -from dataclasses import InitVar, dataclass, field +from dataclasses import InitVar, dataclass from gzip import decompress from typing import Any, Generator, Mapping, MutableMapping, List, Optional From a10c1a1db17039a47a98db56184ea0c042d5b89e Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 14:57:41 +0100 Subject: [PATCH 14/17] Airbyte CDK: fix orjson import Signed-off-by: Artem Inzhyyants --- airbyte_cdk/sources/declarative/decoders/json_decoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 2da06d1e..d04504af 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -9,7 +9,7 @@ import requests from airbyte_cdk.sources.declarative.decoders.decoder import Decoder -from orjson import orjson +import orjson logger = logging.getLogger("airbyte") From 4a260a2db4a0def2750287183a03222dc93ab5f6 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 17:10:17 +0100 Subject: [PATCH 15/17] Airbyte CDK: update mypy Signed-off-by: Artem Inzhyyants --- .../migrations/legacy_to_per_partition_state_migration.py | 6 +++--- .../declarative/parsers/model_to_component_factory.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py index 7dc5f8fd..1df0aed4 100644 --- a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py @@ -36,7 +36,7 @@ class LegacyToPerPartitionStateMigration(StateMigration): def __init__( self, - partition_router: CustomPartitionRouter | SubstreamPartitionRouter, + partition_router: SubstreamPartitionRouter, cursor: CustomIncrementalSync | DatetimeBasedCursor, config: Mapping[str, Any], parameters: Mapping[str, Any], @@ -53,9 +53,9 @@ def __init__( ).eval(self._config) def _get_partition_field( - self, partition_router: CustomPartitionRouter | SubstreamPartitionRouter + self, partition_router: SubstreamPartitionRouter ) -> str: - parent_stream_config = partition_router.parent_stream_configs[0] # type: ignore # custom partition will introduce this atribute if needed + parent_stream_config = partition_router.parent_stream_configs[0] # Retrieve the partition field with a condition, as properties are returned as a dictionary for custom components. partition_field = ( diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index d2929053..aed2452f 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -632,7 +632,7 @@ def create_legacy_to_per_partition_state_migration( partition_router, declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. config, - declarative_stream.parameters, # type: ignore + declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] ) # type: ignore # The retriever type was already checked def create_session_token_authenticator( From 95f748e2d08374ce4273dd7fb2c58780786930f9 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 17:19:43 +0100 Subject: [PATCH 16/17] Airbyte CDK: update mypy Signed-off-by: Artem Inzhyyants --- .../migrations/legacy_to_per_partition_state_migration.py | 1 - .../declarative/parsers/model_to_component_factory.py | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py index 1df0aed4..a615bca1 100644 --- a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py @@ -7,7 +7,6 @@ from airbyte_cdk.sources.declarative.models import ( DatetimeBasedCursor, SubstreamPartitionRouter, - CustomPartitionRouter, CustomIncrementalSync, ) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ParentStreamConfig diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index aed2452f..9674dc0a 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -628,8 +628,13 @@ def create_legacy_to_per_partition_state_migration( "LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration." ) + if not hasattr(declarative_stream, "incremental_sync"): + raise ValueError( + "LegacyToPerPartitionStateMigrations can only be applied with an incremental_sync configuration." + ) + return LegacyToPerPartitionStateMigration( - partition_router, + partition_router, # type: ignore # was already checked above declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. config, declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any] From 466806529f6d3ae9680925058ff2197642e31121 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Wed, 13 Nov 2024 17:23:31 +0100 Subject: [PATCH 17/17] Airbyte CDK: update lint Signed-off-by: Artem Inzhyyants --- .../migrations/legacy_to_per_partition_state_migration.py | 4 +--- .../sources/declarative/parsers/model_to_component_factory.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py index a615bca1..8000b187 100644 --- a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py @@ -51,9 +51,7 @@ def __init__( self._cursor.cursor_field, parameters=self._parameters ).eval(self._config) - def _get_partition_field( - self, partition_router: SubstreamPartitionRouter - ) -> str: + def _get_partition_field(self, partition_router: SubstreamPartitionRouter) -> str: parent_stream_config = partition_router.parent_stream_configs[0] # Retrieve the partition field with a condition, as properties are returned as a dictionary for custom components. diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 9674dc0a..d2dd9d9d 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -634,7 +634,7 @@ def create_legacy_to_per_partition_state_migration( ) return LegacyToPerPartitionStateMigration( - partition_router, # type: ignore # was already checked above + partition_router, # type: ignore # was already checked above declarative_stream.incremental_sync, # type: ignore # was already checked. Migration can be applied only to incremental streams. config, declarative_stream.parameters, # type: ignore # different type is expected here Mapping[str, Any], got Dict[str, Any]