From 624bfd09b72a2edd340a2a284890e2e8dadf0655 Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Tue, 30 Jan 2024 13:47:46 +0100 Subject: [PATCH 01/12] =?UTF-8?q?=F0=9F=9A=80=20Bumped=20version=20after?= =?UTF-8?q?=20release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_viadot.py | 2 +- viadot/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_viadot.py b/tests/test_viadot.py index 71c3d6187..29cd4e622 100644 --- a/tests/test_viadot.py +++ b/tests/test_viadot.py @@ -2,4 +2,4 @@ def test_version(): - assert __version__ == "0.4.25" + assert __version__ == "0.4.26" diff --git a/viadot/__init__.py b/viadot/__init__.py index 1cc3baa70..9c8003d45 100644 --- a/viadot/__init__.py +++ b/viadot/__init__.py @@ -1 +1 @@ -__version__ = "0.4.25" +__version__ = "0.4.26" From c28e057a4e010611630bfb05a781538fbd580a03 Mon Sep 17 00:00:00 2001 From: mgwinner Date: Tue, 6 Feb 2024 13:09:39 +0100 Subject: [PATCH 02/12] =?UTF-8?q?=F0=9F=8E=A8=20Delete=20promote=5Fto=20ta?= =?UTF-8?q?sk=20from=20the=20flow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/adls_to_azure_sql.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index c12cc7e1d..abac388b3 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -327,22 +327,6 @@ def gen_flow(self) -> Flow: flow=self, ) - promote_to_conformed_task = AzureDataLakeCopy(timeout=self.timeout) - promote_to_conformed_task.bind( - from_path=self.adls_path, - to_path=self.adls_path_conformed, - sp_credentials_secret=self.adls_sp_credentials_secret, - vault_name=self.vault_name, - flow=self, - ) - promote_to_operations_task = AzureDataLakeCopy(timeout=self.timeout) - promote_to_operations_task.bind( - from_path=self.adls_path_conformed, - to_path=self.adls_path_operations, - sp_credentials_secret=self.adls_sp_credentials_secret, - vault_name=self.vault_name, - flow=self, - ) create_table_task = AzureSQLCreateTable(timeout=self.timeout) create_table_task.bind( schema=self.schema, @@ -372,9 +356,5 @@ def gen_flow(self) -> Flow: df_reorder.set_upstream(lake_to_df_task, flow=self) df_to_csv.set_upstream(df_reorder, flow=self) - promote_to_conformed_task.set_upstream(df_to_csv, flow=self) create_table_task.set_upstream(df_to_csv, flow=self) - promote_to_operations_task.set_upstream( - promote_to_conformed_task, flow=self - ) bulk_insert_task.set_upstream(create_table_task, flow=self) From 02b1a8a8697b058e07e2191f11a5aff73c1e0d88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Gwinner?= <34861507+malgorzatagwinner@users.noreply.github.com> Date: Tue, 13 Feb 2024 14:25:51 +0100 Subject: [PATCH 03/12] Delete first promote_test --- tests/integration/flows/test_adls_to_azure_sql.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/integration/flows/test_adls_to_azure_sql.py b/tests/integration/flows/test_adls_to_azure_sql.py index e3ae45623..b83a59e51 100644 --- a/tests/integration/flows/test_adls_to_azure_sql.py +++ b/tests/integration/flows/test_adls_to_azure_sql.py @@ -9,16 +9,6 @@ from viadot.flows.adls_to_azure_sql import check_dtypes_sort, df_to_csv_task -def test_get_promoted_adls_path_csv_file(): - adls_path_file = "raw/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.csv" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_file) - promoted_path = flow.get_promoted_path(env="conformed") - assert ( - promoted_path - == "conformed/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.csv" - ) - - def test_get_promoted_adls_path_parquet_file(): adls_path_file = "raw/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.parquet" flow = ADLSToAzureSQL(name="test", adls_path=adls_path_file) From 7c76e7ac10b87c78b3f5bb016d09d17b51f7211d Mon Sep 17 00:00:00 2001 From: mgwinner Date: Tue, 13 Feb 2024 14:39:33 +0100 Subject: [PATCH 04/12] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Delete=20get=5Fpromo?= =?UTF-8?q?ted=20tests=20and=20get=5Fpromoted=20path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flows/test_adls_to_azure_sql.py | 35 ------------------- viadot/flows/adls_to_azure_sql.py | 16 --------- 2 files changed, 51 deletions(-) diff --git a/tests/integration/flows/test_adls_to_azure_sql.py b/tests/integration/flows/test_adls_to_azure_sql.py index b83a59e51..34cef2f9e 100644 --- a/tests/integration/flows/test_adls_to_azure_sql.py +++ b/tests/integration/flows/test_adls_to_azure_sql.py @@ -9,41 +9,6 @@ from viadot.flows.adls_to_azure_sql import check_dtypes_sort, df_to_csv_task -def test_get_promoted_adls_path_parquet_file(): - adls_path_file = "raw/supermetrics/adls_ga_load_times_fr_test/2021-07-14T13%3A09%3A02.997357%2B00%3A00.parquet" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_file) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_file_starts_with_slash(): - adls_path_dir_starts_with_slash = "/raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_starts_with_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir_slash(): - adls_path_dir_slash = "raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir(): - adls_path_dir = "raw/supermetrics/adls_ga_load_times_fr_test" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - -def test_get_promoted_adls_path_dir_starts_with_slash(): - adls_path_dir_starts_with_slash = "/raw/supermetrics/adls_ga_load_times_fr_test/" - flow = ADLSToAzureSQL(name="test", adls_path=adls_path_dir_starts_with_slash) - promoted_path = flow.get_promoted_path(env="conformed") - assert promoted_path == "conformed/supermetrics/adls_ga_load_times_fr_test.csv" - - def test_df_to_csv_task(): d = {"col1": ["rat", "\tdog"], "col2": ["cat", 4]} df = pd.DataFrame(data=d) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index abac388b3..d41ed129a 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -221,8 +221,6 @@ def __init__( self.overwrite_adls = overwrite_adls self.if_empty = if_empty self.adls_sp_credentials_secret = adls_sp_credentials_secret - self.adls_path_conformed = self.get_promoted_path(env="conformed") - self.adls_path_operations = self.get_promoted_path(env="operations") # AzureSQLCreateTable self.table = table @@ -257,20 +255,6 @@ def _map_if_exists(if_exists: str) -> str: def slugify(name): return name.replace(" ", "_").lower() - def get_promoted_path(self, env: str) -> str: - adls_path_clean = self.adls_path.strip("/") - extension = adls_path_clean.split(".")[-1].strip() - if extension == "parquet": - file_name = adls_path_clean.split("/")[-2] + ".csv" - common_path = "/".join(adls_path_clean.split("/")[1:-2]) - else: - file_name = adls_path_clean.split("/")[-1] - common_path = "/".join(adls_path_clean.split("/")[1:-1]) - - promoted_path = os.path.join(env, common_path, file_name) - - return promoted_path - def gen_flow(self) -> Flow: lake_to_df_task = AzureDataLakeToDF(timeout=self.timeout) df = lake_to_df_task.bind( From e1d3f741cd702d2a670ef032399bdd9cc6f791ed Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Wed, 28 Feb 2024 07:53:08 +0100 Subject: [PATCH 05/12] =?UTF-8?q?=F0=9F=90=9B=20Changed=20dbt=20packages?= =?UTF-8?q?=20in=20requirements.txt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 +- requirements.txt | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c49b99fd7..033175d07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed ### Changed - +- Changed requirements.txt to level up version of dbt-sqlserver in order to fix bug with `MAXRECURSION` error in dbt_run ## [0.4.25] - 2024-01-30 ### Added diff --git a/requirements.txt b/requirements.txt index 72bbb20b2..990e4a89c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,8 +39,7 @@ aiolimiter==1.0.0 protobuf>=3.19.0, <3.20 avro-python3==1.10.2 pygit2>=1.10.1, <1.11.0 -dbt-core==1.3.2 -dbt-sqlserver==1.3.1 +dbt-sqlserver @ git+https://github.com/djagoda881/dbt-sqlserver.git@v1.3.latest_option_clause lumaCLI==0.0.19 Office365-REST-Python-Client==2.4.4 TM1py==1.11.3 From 74f0bf5c1517f5499e09c65aeee9fea3297bfb26 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Fri, 1 Mar 2024 15:33:33 +0100 Subject: [PATCH 06/12] =?UTF-8?q?=F0=9F=8E=A8=20Improve=20CHANGELOG=20info?= =?UTF-8?q?rmation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 033175d07..d4207c636 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Fixed +- Changed requirements.txt to level up version of dbt-sqlserver in order to fix bug with `MAXRECURSION` error in dbt_run ### Changed -- Changed requirements.txt to level up version of dbt-sqlserver in order to fix bug with `MAXRECURSION` error in dbt_run + +### Removed +- Removed dbt-core==1.3.2 from requirements.txt ## [0.4.25] - 2024-01-30 ### Added From f5ac9274c7af34070cc933272dcd0b41d453fc61 Mon Sep 17 00:00:00 2001 From: dominikjedlinski Date: Fri, 15 Mar 2024 15:01:01 +0100 Subject: [PATCH 07/12] changed upstream order for df_validation --- viadot/flows/adls_to_azure_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index c12cc7e1d..ed3877fd6 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -369,8 +369,8 @@ def gen_flow(self) -> Flow: if self.validate_df_dict: validate_df.bind(df=df, tests=self.validate_df_dict, flow=self) validate_df.set_upstream(lake_to_df_task, flow=self) + df_reorder.set_upstream(validate_df, flow=self) - df_reorder.set_upstream(lake_to_df_task, flow=self) df_to_csv.set_upstream(df_reorder, flow=self) promote_to_conformed_task.set_upstream(df_to_csv, flow=self) create_table_task.set_upstream(df_to_csv, flow=self) From bec22d9f46678f5f2a26d30bd0d5ae6ebae960b9 Mon Sep 17 00:00:00 2001 From: dominikjedlinski Date: Mon, 18 Mar 2024 13:22:37 +0100 Subject: [PATCH 08/12] updated upstream order --- viadot/flows/adls_to_azure_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index ed3877fd6..f95250ab7 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -368,9 +368,9 @@ def gen_flow(self) -> Flow: # data validation function (optional) if self.validate_df_dict: validate_df.bind(df=df, tests=self.validate_df_dict, flow=self) - validate_df.set_upstream(lake_to_df_task, flow=self) df_reorder.set_upstream(validate_df, flow=self) + df_to_csv.set_upstream(dtypes, flow=self) df_to_csv.set_upstream(df_reorder, flow=self) promote_to_conformed_task.set_upstream(df_to_csv, flow=self) create_table_task.set_upstream(df_to_csv, flow=self) From b2db3b4abc35375d86ca315d6575ccf70ad680c4 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 19 Mar 2024 14:27:53 +0100 Subject: [PATCH 09/12] =?UTF-8?q?=F0=9F=90=9B=20fixed=20parameter=20litera?= =?UTF-8?q?l=20definition.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/tasks/azure_sql.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/viadot/tasks/azure_sql.py b/viadot/tasks/azure_sql.py index b6481975a..c4bc00484 100644 --- a/viadot/tasks/azure_sql.py +++ b/viadot/tasks/azure_sql.py @@ -64,7 +64,7 @@ def run( table: str, dtypes: Dict[str, Any], sep: str = None, - if_exists: Literal = ["fail", "replace", "append", "delete"], + if_exists: Literal["fail", "replace", "append", "delete"] = "fail", ): """ Create a table from an Azure Blob object. @@ -75,8 +75,8 @@ def run( schema (str): Destination schema. table (str): Destination table. dtypes (Dict[str, Any]): Data types to force. - sep (str): The separator to use to read the CSV file. - if_exists (Literal, optional): What to do if the table already exists. + sep (str, optional): The separator to use to read the CSV file. Defaults to None. + if_exists (Literal["fail", "replace", "append", "delete"], optional): What to do if the table already exists. Defaults to "fail". """ fqn = f"{schema}.{table}" if schema else table From b73d5e74043683b381d2ee9e0ce621074e4a625e Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Tue, 19 Mar 2024 14:30:33 +0100 Subject: [PATCH 10/12] =?UTF-8?q?=F0=9F=93=9D=20updated=20CHANGELOG.md?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c49b99fd7..e0dae1557 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Fixed +- `tasks/azure_sql` fixed parameter definition issue in `CreateTableFromBlob` task. ### Changed From 5169f329d8e9e668caae6f4ecc0490183158a2c4 Mon Sep 17 00:00:00 2001 From: adrian-wojcik Date: Tue, 26 Mar 2024 14:51:12 +0100 Subject: [PATCH 11/12] =?UTF-8?q?=F0=9F=8E=A8=20improve=20structure=20of?= =?UTF-8?q?=20changelog=20inputs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4207c636..44b4a3521 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Fixed -- Changed requirements.txt to level up version of dbt-sqlserver in order to fix bug with `MAXRECURSION` error in dbt_run +- Changed `requirements.txt` to level up version of `dbt-sqlserver` in order to fix bug with `MAXRECURSION` error in dbt_run ### Changed ### Removed -- Removed dbt-core==1.3.2 from requirements.txt +- Removed `dbt-core==1.3.2` from `requirements.txt` ## [0.4.25] - 2024-01-30 ### Added From 9e4aae834e4e3e46ecd60e52315d09406713426f Mon Sep 17 00:00:00 2001 From: Diego <108733861+Diego-H-S@users.noreply.github.com> Date: Tue, 26 Mar 2024 15:07:09 +0100 Subject: [PATCH 12/12] Update CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: RafaƂ Ziemianek <49795849+Rafalz13@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0dae1557..dfe42548f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Fixed -- `tasks/azure_sql` fixed parameter definition issue in `CreateTableFromBlob` task. +- Fixed the `if_exists` parameter definition in the `CreateTableFromBlob` task. ### Changed