From a5dd22b1c511b719c91973a23d2ea6855aaf9607 Mon Sep 17 00:00:00 2001 From: kiurieva Date: Thu, 7 Dec 2023 15:29:04 +0100 Subject: [PATCH 01/16] Fixed json normalize --- viadot/sources/vid_club.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py index 4da4e4f45..b32ee01b1 100644 --- a/viadot/sources/vid_club.py +++ b/viadot/sources/vid_club.py @@ -230,7 +230,7 @@ def get_response( ind = False if "data" in keys_list: - df = json_normalize(response["data"]) + df = pd.json_normalize(response["data"]) df = pd.DataFrame(df) length = df.shape[0] page = 1 @@ -246,7 +246,7 @@ def get_response( url=url, headers=headers, method="GET", verify=False ) response = r.json() - df_page = json_normalize(response["data"]) + df_page = pd.json_normalize(response["data"]) df_page = pd.DataFrame(df_page) if source == "product": df_page = df_page.transpose() From d0f33a9953125f6fe95d94240390b52bcb1bcb44 Mon Sep 17 00:00:00 2001 From: burzekj Date: Tue, 12 Dec 2023 14:08:24 +0100 Subject: [PATCH 02/16] =?UTF-8?q?=F0=9F=90=9B=20fixed=20github=20action=20?= =?UTF-8?q?bug=20related=20to=20black=20formatter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/build.yml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 006f22e39..865087f6a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -64,18 +64,6 @@ jobs: run: | pip install black black --check . - continue-on-error: true - - - name: Commit Black changes to the pull request - if: ${{ always() && steps.blackCheck.outcome == 'failure' }} - run: | - git config --global user.name 'github-actions[bot]' - git config --global user.email 'github-actions[bot]@users.noreply.github.com' - git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY - black . - git checkout $GITHUB_HEAD_REF - git commit -am "🎨 Format Python code with Black" - git push - name: Test with pytest if: always() From 2883a94cc153e88fa5bce4b9b5a207e99a3bb24b Mon Sep 17 00:00:00 2001 From: burzekj Date: Tue, 12 Dec 2023 18:03:53 +0100 Subject: [PATCH 03/16] =?UTF-8?q?=E2=9C=A8=20Extanded=20logic=20for=20if?= =?UTF-8?q?=5Fempty=20param=20in=20ADLSToAzureSQL=20flow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + viadot/flows/adls_to_azure_sql.py | 166 ++++++++++++++++-------------- 2 files changed, 87 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a30ad98b8..80c1882e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Added logic for if_empty param: `check_if_df_empty` task to `ADLSToAzureSQL` flow ### Fixed diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index a9e49c6b6..0951efaa6 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -3,7 +3,7 @@ from typing import Any, Dict, List, Literal import pandas as pd -from prefect import Flow, task +from prefect import Flow, task, case from prefect.backend import get_key_value from prefect.engine import signals from prefect.utilities import logging @@ -18,6 +18,7 @@ DownloadGitHubFile, ) from viadot.tasks.azure_data_lake import AzureDataLakeDownload +from viadot.task_utils import check_if_df_empty logger = logging.get_logger(__name__) @@ -136,7 +137,7 @@ def __init__( write_sep: str = "\t", remove_tab: bool = False, overwrite_adls: bool = True, - if_empty: str = "warn", + if_empty: Literal["fail", "warn", "skip"] = "skip", adls_sp_credentials_secret: str = None, dtypes: Dict[str, Any] = None, check_dtypes_order: bool = True, @@ -166,7 +167,7 @@ def __init__( write_sep (str, optional): The delimiter for the output CSV file. Defaults to "\t". remove_tab (bool, optional): Whether to remove tab delimiters from the data. Defaults to False. overwrite_adls (bool, optional): Whether to overwrite the file in ADLS. Defaults to True. - if_empty (str, optional): What to do if the Supermetrics query returns no data. Defaults to "warn". + if_empty (str, optional): What to do if the loaded DataFrame is empty. Defaults to "skip". adls_sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. Defaults to None. @@ -274,91 +275,96 @@ def gen_flow(self) -> Flow: flow=self, ) - if not self.dtypes: - download_json_file_task = AzureDataLakeDownload(timeout=self.timeout) - download_json_file_task.bind( - from_path=self.json_shema_path, - to_path=self.local_json_path, + df_empty = check_if_df_empty.bind(df, self.if_empty, flow=self) + + with case(df_empty, False): + if not self.dtypes: + download_json_file_task = AzureDataLakeDownload(timeout=self.timeout) + download_json_file_task.bind( + from_path=self.json_shema_path, + to_path=self.local_json_path, + sp_credentials_secret=self.adls_sp_credentials_secret, + flow=self, + ) + dtypes = map_data_types_task.bind(self.local_json_path, flow=self) + map_data_types_task.set_upstream(download_json_file_task, flow=self) + else: + dtypes = check_dtypes_sort.bind( + df, + dtypes=self.dtypes, + apply=self.check_dtypes_order, + flow=self, + ) + + check_column_order_task = CheckColumnOrder(timeout=self.timeout) + df_reorder = check_column_order_task.bind( + table=self.table, + schema=self.schema, + df=df, + if_exists=self.if_exists, + credentials_secret=self.sqldb_credentials_secret, + flow=self, + ) + if self.check_col_order == False: + df_to_csv = df_to_csv_task.bind( + df=df, + path=self.local_file_path, + sep=self.write_sep, + remove_tab=self.remove_tab, + flow=self, + ) + else: + df_to_csv = df_to_csv_task.bind( + df=df_reorder, + path=self.local_file_path, + sep=self.write_sep, + remove_tab=self.remove_tab, + flow=self, + ) + + promote_to_conformed_task = AzureDataLakeCopy(timeout=self.timeout) + promote_to_conformed_task.bind( + from_path=self.adls_path, + to_path=self.adls_path_conformed, sp_credentials_secret=self.adls_sp_credentials_secret, + vault_name=self.vault_name, flow=self, ) - dtypes = map_data_types_task.bind(self.local_json_path, flow=self) - map_data_types_task.set_upstream(download_json_file_task, flow=self) - else: - dtypes = check_dtypes_sort.bind( - df, - dtypes=self.dtypes, - apply=self.check_dtypes_order, + promote_to_operations_task = AzureDataLakeCopy(timeout=self.timeout) + promote_to_operations_task.bind( + from_path=self.adls_path_conformed, + to_path=self.adls_path_operations, + sp_credentials_secret=self.adls_sp_credentials_secret, + vault_name=self.vault_name, flow=self, ) - - check_column_order_task = CheckColumnOrder(timeout=self.timeout) - df_reorder = check_column_order_task.bind( - table=self.table, - schema=self.schema, - df=df, - if_exists=self.if_exists, - credentials_secret=self.sqldb_credentials_secret, - flow=self, - ) - if self.check_col_order == False: - df_to_csv = df_to_csv_task.bind( - df=df, - path=self.local_file_path, - sep=self.write_sep, - remove_tab=self.remove_tab, + create_table_task = AzureSQLCreateTable(timeout=self.timeout) + create_table_task.bind( + schema=self.schema, + table=self.table, + dtypes=dtypes, + if_exists=self._map_if_exists(self.if_exists), + credentials_secret=self.sqldb_credentials_secret, + vault_name=self.vault_name, flow=self, ) - else: - df_to_csv = df_to_csv_task.bind( - df=df_reorder, + bulk_insert_task = BCPTask(timeout=self.timeout) + bulk_insert_task.bind( path=self.local_file_path, - sep=self.write_sep, - remove_tab=self.remove_tab, + schema=self.schema, + table=self.table, + error_log_file_path=self.name.replace(" ", "_") + ".log", + on_error=self.on_bcp_error, + credentials_secret=self.sqldb_credentials_secret, + vault_name=self.vault_name, flow=self, ) - promote_to_conformed_task = AzureDataLakeCopy(timeout=self.timeout) - promote_to_conformed_task.bind( - from_path=self.adls_path, - to_path=self.adls_path_conformed, - sp_credentials_secret=self.adls_sp_credentials_secret, - vault_name=self.vault_name, - flow=self, - ) - promote_to_operations_task = AzureDataLakeCopy(timeout=self.timeout) - promote_to_operations_task.bind( - from_path=self.adls_path_conformed, - to_path=self.adls_path_operations, - sp_credentials_secret=self.adls_sp_credentials_secret, - vault_name=self.vault_name, - flow=self, - ) - create_table_task = AzureSQLCreateTable(timeout=self.timeout) - create_table_task.bind( - schema=self.schema, - table=self.table, - dtypes=dtypes, - if_exists=self._map_if_exists(self.if_exists), - credentials_secret=self.sqldb_credentials_secret, - vault_name=self.vault_name, - flow=self, - ) - bulk_insert_task = BCPTask(timeout=self.timeout) - bulk_insert_task.bind( - path=self.local_file_path, - schema=self.schema, - table=self.table, - error_log_file_path=self.name.replace(" ", "_") + ".log", - on_error=self.on_bcp_error, - credentials_secret=self.sqldb_credentials_secret, - vault_name=self.vault_name, - flow=self, - ) - - df_reorder.set_upstream(lake_to_df_task, flow=self) - df_to_csv.set_upstream(df_reorder, flow=self) - promote_to_conformed_task.set_upstream(df_to_csv, flow=self) - create_table_task.set_upstream(df_to_csv, flow=self) - promote_to_operations_task.set_upstream(promote_to_conformed_task, flow=self) - bulk_insert_task.set_upstream(create_table_task, flow=self) + df_reorder.set_upstream(lake_to_df_task, flow=self) + df_to_csv.set_upstream(df_reorder, flow=self) + promote_to_conformed_task.set_upstream(df_to_csv, flow=self) + create_table_task.set_upstream(df_to_csv, flow=self) + promote_to_operations_task.set_upstream( + promote_to_conformed_task, flow=self + ) + bulk_insert_task.set_upstream(create_table_task, flow=self) From 20be35369a6e36c5b0f06a1e97e9faa5daae3eef Mon Sep 17 00:00:00 2001 From: Jakub Burzec <125436423+burzekj@users.noreply.github.com> Date: Wed, 13 Dec 2023 11:20:36 +0100 Subject: [PATCH 04/16] Update viadot/flows/adls_to_azure_sql.py Co-authored-by: Marcin Purtak <44641138+marcinpurtak@users.noreply.github.com> --- viadot/flows/adls_to_azure_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index 0951efaa6..95b1340ca 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -167,7 +167,7 @@ def __init__( write_sep (str, optional): The delimiter for the output CSV file. Defaults to "\t". remove_tab (bool, optional): Whether to remove tab delimiters from the data. Defaults to False. overwrite_adls (bool, optional): Whether to overwrite the file in ADLS. Defaults to True. - if_empty (str, optional): What to do if the loaded DataFrame is empty. Defaults to "skip". + if_empty (str, optional): What to do if the loaded DataFrame is empty. Defaults to "skip" which acts like ignore this setting. adls_sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake. Defaults to None. From 9b7d1f7976597dddd3b08c4b448d94b12f9797ab Mon Sep 17 00:00:00 2001 From: burzekj Date: Fri, 15 Dec 2023 14:30:40 +0100 Subject: [PATCH 05/16] =?UTF-8?q?=E2=9C=A8=20Added=20validate=5Fdf=20func?= =?UTF-8?q?=20into=20ADLSToAzureSQL=20flow?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/flows/adls_to_azure_sql.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/viadot/flows/adls_to_azure_sql.py b/viadot/flows/adls_to_azure_sql.py index a9e49c6b6..466c1156c 100644 --- a/viadot/flows/adls_to_azure_sql.py +++ b/viadot/flows/adls_to_azure_sql.py @@ -17,6 +17,7 @@ CheckColumnOrder, DownloadGitHubFile, ) +from viadot.task_utils import validate_df from viadot.tasks.azure_data_lake import AzureDataLakeDownload logger = logging.get_logger(__name__) @@ -150,6 +151,7 @@ def __init__( tags: List[str] = ["promotion"], vault_name: str = None, timeout: int = 3600, + validate_df_dict: Dict[str, Any] = None, *args: List[any], **kwargs: Dict[str, Any], ): @@ -186,6 +188,8 @@ def __init__( vault_name (str, optional): The name of the vault from which to obtain the secrets. Defaults to None. timeout(int, optional): The amount of time (in seconds) to wait while running this task before a timeout occurs. Defaults to 3600. + validate_df_dict (Dict[str,Any], optional): A dictionary with optional list of tests to verify the output dataframe. + If defined, triggers the `validate_df` task from task_utils. Defaults to None. """ adls_path = adls_path.strip("/") @@ -236,6 +240,7 @@ def __init__( self.tags = tags self.vault_name = vault_name self.timeout = timeout + self.validate_df_dict = validate_df_dict super().__init__(*args, name=name, **kwargs) @@ -356,6 +361,11 @@ def gen_flow(self) -> Flow: flow=self, ) + # data validation function (optional) + if self.validate_df_dict: + validate_df.bind(df=df, tests=self.validate_df_dict, flow=self) + validate_df.set_upstream(lake_to_df_task, flow=self) + df_reorder.set_upstream(lake_to_df_task, flow=self) df_to_csv.set_upstream(df_reorder, flow=self) promote_to_conformed_task.set_upstream(df_to_csv, flow=self) From 40382b699b85db1f29ef600825acd847ff37776b Mon Sep 17 00:00:00 2001 From: burzekj Date: Tue, 16 Jan 2024 11:48:55 +0100 Subject: [PATCH 06/16] =?UTF-8?q?=E2=9C=85=20Added=20tests=20for=20validat?= =?UTF-8?q?e=5Fdf=20param?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../flows/test_adls_to_azure_sql.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/integration/flows/test_adls_to_azure_sql.py b/tests/integration/flows/test_adls_to_azure_sql.py index e13dc31b2..d52e4023e 100644 --- a/tests/integration/flows/test_adls_to_azure_sql.py +++ b/tests/integration/flows/test_adls_to_azure_sql.py @@ -101,3 +101,32 @@ def test_check_dtypes_sort(): assert False except signals.FAIL: assert True + + +def test_adls_to_azure_sql_mocked(TEST_CSV_FILE_PATH): + with mock.patch.object(ADLSToAzureSQL, "run", return_value=True) as mock_method: + instance = ADLSToAzureSQL( + name="test_adls_to_azure_sql_flow", + adls_path=TEST_CSV_FILE_PATH, + schema="sandbox", + table="test_bcp", + dtypes={"test_str": "VARCHAR(25)", "test_int": "INT"}, + if_exists="replace", + ) + instance.run() + mock_method.assert_called_with() + + +def test_adls_to_azure_sql_mocked_validate_df_param(TEST_CSV_FILE_PATH): + with mock.patch.object(ADLSToAzureSQL, "run", return_value=True) as mock_method: + instance = ADLSToAzureSQL( + name="test_adls_to_azure_sql_flow", + adls_path=TEST_CSV_FILE_PATH, + schema="sandbox", + table="test_bcp", + dtypes={"test_str": "VARCHAR(25)", "test_int": "INT"}, + if_exists="replace", + validate_df_dict={"column_list_to_match": ["test_str", "test_int"]}, + ) + instance.run() + mock_method.assert_called_with() From 33e8293cc9932708f0427a52318af150c2badce3 Mon Sep 17 00:00:00 2001 From: burzekj Date: Tue, 16 Jan 2024 14:59:25 +0100 Subject: [PATCH 07/16] =?UTF-8?q?=E2=9C=85=20Added=20test=20for=20wrong=20?= =?UTF-8?q?param=20passed=20to=20ADLSToAzureSQL=20class?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../integration/flows/test_adls_to_azure_sql.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/integration/flows/test_adls_to_azure_sql.py b/tests/integration/flows/test_adls_to_azure_sql.py index d52e4023e..e3ae45623 100644 --- a/tests/integration/flows/test_adls_to_azure_sql.py +++ b/tests/integration/flows/test_adls_to_azure_sql.py @@ -130,3 +130,19 @@ def test_adls_to_azure_sql_mocked_validate_df_param(TEST_CSV_FILE_PATH): ) instance.run() mock_method.assert_called_with() + + +def test_adls_to_azure_sql_mocked_wrong_param(TEST_CSV_FILE_PATH): + with pytest.raises(TypeError) as excinfo: + instance = ADLSToAzureSQL( + name="test_adls_to_azure_sql_flow", + adls_path=TEST_CSV_FILE_PATH, + schema="sandbox", + table="test_bcp", + dtypes={"test_str": "VARCHAR(25)", "test_int": "INT"}, + if_exists="replace", + validate_df_dit={"column_list_to_match": ["test_str", "test_int"]}, + ) + instance.run() + + assert "validate_df_dit" in str(excinfo) From cd60a8559c9d205ce611592de1ec94773eff928e Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Fri, 8 Dec 2023 14:24:09 +0100 Subject: [PATCH 08/16] =?UTF-8?q?=F0=9F=9A=80=20Bumped=20version=20after?= =?UTF-8?q?=20release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_viadot.py | 2 +- viadot/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_viadot.py b/tests/test_viadot.py index 7493c14f5..71c3d6187 100644 --- a/tests/test_viadot.py +++ b/tests/test_viadot.py @@ -2,4 +2,4 @@ def test_version(): - assert __version__ == "0.4.24" + assert __version__ == "0.4.25" diff --git a/viadot/__init__.py b/viadot/__init__.py index 9e6207df0..1cc3baa70 100644 --- a/viadot/__init__.py +++ b/viadot/__init__.py @@ -1 +1 @@ -__version__ = "0.4.24" +__version__ = "0.4.25" From 752eb99515719aef6f69ecfd92a4afceb58ca993 Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Tue, 16 Jan 2024 16:09:37 +0100 Subject: [PATCH 09/16] Added geopy library to requirements --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4d6c3a15f..72bbb20b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -45,4 +45,5 @@ lumaCLI==0.0.19 Office365-REST-Python-Client==2.4.4 TM1py==1.11.3 nltk==3.8.1 -scikit-learn==1.3.2 \ No newline at end of file +scikit-learn==1.3.2 +geopy==2.4.1 \ No newline at end of file From c2f70d27371a9f7c00406abf84fdb3c1e401938b Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Tue, 16 Jan 2024 16:20:14 +0100 Subject: [PATCH 10/16] Updated CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80c1882e1..2ecc321f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Added logic for if_empty param: `check_if_df_empty` task to `ADLSToAzureSQL` flow +- Added logic for if_empty param: `check_if_df_empty` task to `ADLSToAzureSQL` flow. +- Added `geopy` library to `requirements`. ### Fixed From 2fa2367f637910b1fa7af36709859f0266290f39 Mon Sep 17 00:00:00 2001 From: kiurieva Date: Wed, 17 Jan 2024 09:29:04 +0100 Subject: [PATCH 11/16] Deleted unused libraries import --- viadot/sources/vid_club.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py index b32ee01b1..6cd3c00a5 100644 --- a/viadot/sources/vid_club.py +++ b/viadot/sources/vid_club.py @@ -1,14 +1,10 @@ -import json -import os -import urllib -from pandas.io.json import json_normalize -from datetime import date, datetime, timedelta +from datetime import datetime, timedelta from typing import Any, Dict, List, Literal, Tuple import pandas as pd from prefect.utilities import logging -from ..exceptions import CredentialError, ValidationError +from ..exceptions import ValidationError from ..utils import handle_api_response from .base import Source From d20ebcff9c89a96fb49594e617da261ebf97ce44 Mon Sep 17 00:00:00 2001 From: burzekj Date: Wed, 17 Jan 2024 11:59:20 +0100 Subject: [PATCH 12/16] Change CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80c1882e1..7f81230a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added - Added logic for if_empty param: `check_if_df_empty` task to `ADLSToAzureSQL` flow +- Added new parameter `validate_df_dict` to `ADLSToAzureSQL` class ### Fixed From 682e43d194b8f6e50d21bc7872d3bf6e9bc7bfae Mon Sep 17 00:00:00 2001 From: kiurieva Date: Wed, 17 Jan 2024 15:48:22 +0100 Subject: [PATCH 13/16] Cleaned small flaws in tests --- .../integration/flows/test_aselite_to_adls.py | 3 -- tests/integration/tasks/test_duckdb.py | 0 .../{test_genesys_task.py => test_genesys.py} | 0 tests/integration/tasks/test_github.py | 7 --- .../{test_sqlite_insert.py => test_sqlite.py} | 0 tests/unit/tasks/test_uk_carbon_intensity.py | 22 +-------- tests/unit/test_base.py | 48 ------------------- tests/unit/test_duckdb.py | 2 +- tests/unit/test_supermetrics.py | 4 +- 9 files changed, 4 insertions(+), 82 deletions(-) delete mode 100644 tests/integration/tasks/test_duckdb.py rename tests/integration/tasks/{test_genesys_task.py => test_genesys.py} (100%) rename tests/integration/tasks/{test_sqlite_insert.py => test_sqlite.py} (100%) diff --git a/tests/integration/flows/test_aselite_to_adls.py b/tests/integration/flows/test_aselite_to_adls.py index 48146c293..458bd785f 100644 --- a/tests/integration/flows/test_aselite_to_adls.py +++ b/tests/integration/flows/test_aselite_to_adls.py @@ -15,9 +15,6 @@ TMP_FILE_NAME = "test_flow.csv" MAIN_DF = None -df_task = ASELiteToDF() -file_to_adls_task = AzureDataLakeUpload() - def test_aselite_to_adls(): credentials_secret = PrefectSecret("aselite").run() diff --git a/tests/integration/tasks/test_duckdb.py b/tests/integration/tasks/test_duckdb.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/integration/tasks/test_genesys_task.py b/tests/integration/tasks/test_genesys.py similarity index 100% rename from tests/integration/tasks/test_genesys_task.py rename to tests/integration/tasks/test_genesys.py diff --git a/tests/integration/tasks/test_github.py b/tests/integration/tasks/test_github.py index 171dbfb65..1c5dfbc0a 100644 --- a/tests/integration/tasks/test_github.py +++ b/tests/integration/tasks/test_github.py @@ -2,13 +2,6 @@ from viadot.tasks.github import DownloadGitHubFile -# def test_github_clone_task(): -# clone_repo_task = CloneRepo() -# repo = "fishtown-analytics/dbt" -# repo_name = repo.split("/")[-1] -# clone_repo_task.run(repo=repo) -# assert os.path.exists(repo_name) - def test_download_github_file(): task = DownloadGitHubFile() diff --git a/tests/integration/tasks/test_sqlite_insert.py b/tests/integration/tasks/test_sqlite.py similarity index 100% rename from tests/integration/tasks/test_sqlite_insert.py rename to tests/integration/tasks/test_sqlite.py diff --git a/tests/unit/tasks/test_uk_carbon_intensity.py b/tests/unit/tasks/test_uk_carbon_intensity.py index fb96b8604..fb611e0bc 100644 --- a/tests/unit/tasks/test_uk_carbon_intensity.py +++ b/tests/unit/tasks/test_uk_carbon_intensity.py @@ -1,8 +1,7 @@ import os -import openpyxl import pytest -from openpyxl import load_workbook + from viadot.tasks.open_apis.uk_carbon_intensity import StatsToCSV, StatsToExcel @@ -22,22 +21,3 @@ def ukci_task_excel(): ukci_task_excel = StatsToExcel() yield ukci_task_excel os.remove(TEST_FILE_PATH_EXCEL) - - -# def test_uk_carbon_intensity_to_csv(ukci_task): -# ukci_task.run(path=TEST_FILE_PATH) -# if_exist = os.path.isfile(TEST_FILE_PATH) -# assert if_exist == True - - -# def test_uk_carbon_intensity_to_excel(ukci_task_excel): -# ukci_task_excel.run(path=TEST_FILE_PATH_EXCEL) -# if_exist = os.path.isfile(TEST_FILE_PATH_EXCEL) -# assert if_exist == True - - -# def test_uk_carbon_intensity_to_excel_contain(ukci_task_excel): -# ukci_task_excel.run(path=TEST_FILE_PATH_EXCEL) -# excel_file = load_workbook(TEST_FILE_PATH_EXCEL) -# value = excel_file["A1"].value -# assert value == "from" diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py index c20336b29..f7069e181 100644 --- a/tests/unit/test_base.py +++ b/tests/unit/test_base.py @@ -81,51 +81,3 @@ def test_handle_if_empty(caplog): src._handle_if_empty(if_empty="fail") with pytest.raises(SKIP): src._handle_if_empty(if_empty="skip") - - -# def test_to_csv_append(): -# """Test whether `to_csv()` with the append option writes data of correct shape""" -# driver = "/usr/lib/x86_64-linux-gnu/odbc/libsqlite3odbc.so" -# db_name = "testfile.sqlite" -# server = "localhost" -# source = SQL( -# credentials=dict(driver=driver, db_name=db_name, server=server, user=None) -# ) - -# # Generate test table. -# df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) -# source.create_table("test", dtypes={"a": "INT", "b": "INT"}, if_exists="replace") -# source.insert_into(TABLE, df) - -# # Write the table to a CSV three times in `append` mode. -# for i in range(3): -# source.to_csv(path=PATH, query="SELECT * FROM test", if_exists="append") - -# # Read the CSV and validate no. of rows and columns. -# out_df = pd.read_csv(PATH, sep="\t") - -# target_length = 3 * df.shape[0] -# target_width = df.shape[0] - -# actual_length = out_df.shape[0] -# actual_width = out_df.shape[1] - -# assert actual_length == target_length and actual_width == target_width - -# # Clean up. -# os.remove(PATH) - - -# GitHub changes the string and makes the test fail -# def test_conn_str(): -# s = SQL( -# driver=CREDENTIALS["driver"], -# server=CREDENTIALS["server"], -# db=CREDENTIALS["db_name"], -# user=CREDENTIALS["user"], -# pw=CREDENTIALS["password"], -# ) -# assert ( -# s.conn_str -# == "DRIVER=ODBC Driver 17 for SQL Server;SERVER=s123.database.windows.net;DATABASE=a-b-c;UID={my_user@example.com};PWD={a123;@4}" -# ) diff --git a/tests/unit/test_duckdb.py b/tests/unit/test_duckdb.py index 4cd5ee0e4..eb3800f82 100644 --- a/tests/unit/test_duckdb.py +++ b/tests/unit/test_duckdb.py @@ -85,7 +85,7 @@ def test_create_table_from_multiple_parquet(duckdb): duckdb.run(f"DROP SCHEMA {SCHEMA}") -def test__check_if_table_exists(duckdb, TEST_PARQUET_FILE_PATH): +def test_check_if_table_exists(duckdb, TEST_PARQUET_FILE_PATH): assert not duckdb._check_if_table_exists(table=TABLE, schema=SCHEMA) duckdb.create_table_from_parquet( schema=SCHEMA, table=TABLE, path=TEST_PARQUET_FILE_PATH diff --git a/tests/unit/test_supermetrics.py b/tests/unit/test_supermetrics.py index 36b1320ba..4dace1bd9 100644 --- a/tests/unit/test_supermetrics.py +++ b/tests/unit/test_supermetrics.py @@ -105,7 +105,7 @@ } -def test___get_col_names_google_analytics_pivoted(): +def test_get_col_names_google_analytics_pivoted(): columns = Supermetrics._get_col_names_google_analytics(response=RESPONSE_PIVOTED) assert columns == [ "Date", @@ -117,6 +117,6 @@ def test___get_col_names_google_analytics_pivoted(): ] -def test___get_col_names_google_analytics_pivoted_no_data(): +def test_get_col_names_google_analytics_pivoted_no_data(): with pytest.raises(ValueError): Supermetrics._get_col_names_google_analytics(response=RESPONSE_PIVOTED_NO_DATA) From b3d4b7fb01101ac7f47c36cfd37d67e6c4c43272 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Wed, 24 Jan 2024 09:04:41 +0100 Subject: [PATCH 14/16] =?UTF-8?q?=F0=9F=93=9D=20added=20new=20viestype.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- viadot/sources/genesys.py | 1 + viadot/tasks/genesys.py | 1 + 2 files changed, 2 insertions(+) diff --git a/viadot/sources/genesys.py b/viadot/sources/genesys.py index d9109b313..a3201a5d7 100644 --- a/viadot/sources/genesys.py +++ b/viadot/sources/genesys.py @@ -323,6 +323,7 @@ def download_all_reporting_exports( "queue_interaction_detail_view", "agent_status_detail_view", "agent_interaction_detail_view", + "agent_timeline_summary_view", ]: file_name = f"{self.view_type.upper()}_{next(self.count)}_{date}" elif single_report[4].lower() in [ diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py index 04d4bc8b1..1a4b55ce6 100644 --- a/viadot/tasks/genesys.py +++ b/viadot/tasks/genesys.py @@ -425,6 +425,7 @@ def run( "agent_status_summary_view", "agent_status_detail_view", "agent_interaction_detail_view", + "agent_timeline_summary_view", ]: genesys.genesys_api_connection( post_data_list=post_data_list, end_point=end_point From edd1d381764c41b5ebfcb74f0a24fbaaab23b5f2 Mon Sep 17 00:00:00 2001 From: Diego-H-S Date: Wed, 24 Jan 2024 09:06:15 +0100 Subject: [PATCH 15/16] =?UTF-8?q?=F0=9F=93=9D=20updated=20CHANGELOG.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f81230a5..1b9137ee8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added logic for if_empty param: `check_if_df_empty` task to `ADLSToAzureSQL` flow - Added new parameter `validate_df_dict` to `ADLSToAzureSQL` class +- Added new ViewType `agent_timeline_summary_view` to Genesys. ### Fixed From 25f4b1e295a9caef8babcdb49dff16da488cc5d7 Mon Sep 17 00:00:00 2001 From: Rafalz13 Date: Tue, 30 Jan 2024 09:54:44 +0100 Subject: [PATCH 16/16] =?UTF-8?q?=F0=9F=93=9D=20Updated=20Changelog=20befo?= =?UTF-8?q?re=20release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a512c1f3..c49b99fd7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,17 +6,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Added logic for if_empty param: `check_if_df_empty` task to `ADLSToAzureSQL` flow. -- Added `geopy` library to `requirements`. -- Added logic for if_empty param: `check_if_df_empty` task to `ADLSToAzureSQL` flow -- Added new parameter `validate_df_dict` to `ADLSToAzureSQL` class -- Added new ViewType `agent_timeline_summary_view` to Genesys. - ### Fixed ### Changed + +## [0.4.25] - 2024-01-30 +### Added +- Added logic for if_empty param: `check_if_df_empty` task to `ADLSToAzureSQL` flow. +- Added `geopy` library to `requirements`. +- Added new parameter `validate_df_dict` to `ADLSToAzureSQL` class. +- Added new ViewType `agent_timeline_summary_view` to Genesys. + + ## [0.4.24] - 2023-12-08 ### Fixed - `task_utils/get_nested_value` fixed issue with non dict parameter passed without level(1st workflow)