From 551203c037000e3d46ffb6a392efbad597a87180 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Wed, 15 Nov 2023 14:11:44 +0100
Subject: [PATCH 01/54] =?UTF-8?q?=F0=9F=9A=80=20Bumped=20version=20after?=
 =?UTF-8?q?=20release?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_viadot.py | 2 +-
 viadot/__init__.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_viadot.py b/tests/test_viadot.py
index 675dbfbdc..72df7dfca 100644
--- a/tests/test_viadot.py
+++ b/tests/test_viadot.py
@@ -2,4 +2,4 @@
 
 
 def test_version():
-    assert __version__ == "0.4.22"
+    assert __version__ == "0.4.23"
diff --git a/viadot/__init__.py b/viadot/__init__.py
index ece529aa1..c6dd1e2c0 100644
--- a/viadot/__init__.py
+++ b/viadot/__init__.py
@@ -1 +1 @@
-__version__ = "0.4.22"
+__version__ = "0.4.23"

From 53eae885bd9b8ddc34d4cf0ecb6ccddc1b0e358e Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Fri, 17 Nov 2023 13:31:53 +0100
Subject: [PATCH 02/54] =?UTF-8?q?=F0=9F=93=9D=20Added=20docstring=20to=20`?=
 =?UTF-8?q?slugify()`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/utils.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/viadot/utils.py b/viadot/utils.py
index 5e3de784c..6ece6982e 100644
--- a/viadot/utils.py
+++ b/viadot/utils.py
@@ -23,6 +23,14 @@
 
 
 def slugify(name: str) -> str:
+    """Function to change spaces to underscores and convert all characters to lowercase.
+
+    Args:
+        name (str): String to convert.
+
+    Returns:
+        str: Output text after conversion.
+    """
     return name.replace(" ", "_").lower()
 
 

From b056a05db18a63087aa36ac8e9437b409ec6a088 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Fri, 17 Nov 2023 13:40:02 +0100
Subject: [PATCH 03/54] =?UTF-8?q?=E2=9C=85=20Added=20tests=20for=20`slugif?=
 =?UTF-8?q?y()`=20and=20`handle=5Fapi=5Fresponse()`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/unit/test_utils.py | 65 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 6 deletions(-)

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 75ef30e97..cf1805a0d 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,8 +1,10 @@
+import json
 import logging
 import os
 
 import pandas as pd
 import pytest
+from viadot.exceptions import APIError
 
 from viadot.signals import SKIP
 from viadot.utils import (
@@ -10,13 +12,15 @@
     check_if_empty_file,
     gen_bulk_insert_query_from_df,
     check_value,
+    slugify,
+    handle_api_response,
 )
 
 EMPTY_CSV_PATH = "empty.csv"
 EMPTY_PARQUET_PATH = "empty.parquet"
 
 
-class ClassForDecorator:
+class ClassForMetadataDecorator:
     source = "Source_name"
 
     def __init__(self):
@@ -34,6 +38,13 @@ def to_df_decorated_parameter(self):
         return self.df
 
 
+def test_slugify():
+    """To test slugify() function functionalities work"""
+    test_string = "Text With Spaces Before Changes"
+    string_after_changes = slugify(test_string)
+    assert string_after_changes == "text_with_spaces_before_changes"
+
+
 def test_single_quotes_inside():
     TEST_VALUE = "a'b"
     df1 = pd.DataFrame(
@@ -139,17 +150,17 @@ def test_check_if_empty_file_no_data(caplog):
 
 
 def test_add_viadot_metadata_columns_base():
-    df_base = ClassForDecorator().to_df()
-    df_decorated = ClassForDecorator().to_df_decorated()
+    df_base = ClassForMetadataDecorator().to_df()
+    df_decorated = ClassForMetadataDecorator().to_df_decorated()
 
     assert df_base.columns.to_list() == ["a", "b"]
     assert df_decorated.columns.to_list() == ["a", "b", "_viadot_source"]
-    assert df_decorated["_viadot_source"][0] == "ClassForDecorator"
+    assert df_decorated["_viadot_source"][0] == "ClassForMetadataDecorator"
 
 
 def test_add_viadot_metadata_columns_with_parameter():
-    df_base = ClassForDecorator().to_df()
-    df_decorated = ClassForDecorator().to_df_decorated_parameter()
+    df_base = ClassForMetadataDecorator().to_df()
+    df_decorated = ClassForMetadataDecorator().to_df_decorated_parameter()
 
     assert df_base.columns.to_list() == ["a", "b"]
     assert df_decorated.columns.to_list() == ["a", "b", "_viadot_source"]
@@ -202,3 +213,45 @@ def test_check_value_nonexistent_key():
     }
     result = check_value(json_data, ["nonexistent_key"])
     assert result is None
+
+
+def test_handle_api_response_wrong_method():
+    """Test to check if ValueError is thrown when wrong method is used."""
+
+    api_url = "https://api.api-ninjas.com/v1/randomuser"
+    with pytest.raises(ValueError, match="Method not found."):
+        handle_api_response(url=api_url, method="WRONG_METHOD")
+
+
+def test_handle_api_response_credentials_not_provided():
+    """Test to check if APIError is thrown when credentials are not provided."""
+
+    api_url = "https://api.api-ninjas.com/v1/randomuser"
+    with pytest.raises(
+        APIError, match="Perhaps your account credentials need to be refreshed?"
+    ):
+        handle_api_response(url=api_url)
+
+
+def test_handle_api_response_wrong_url():
+    """Test to check if APIError is thrown when api_url is wrong."""
+
+    api_url = "https://test.com/"
+    with pytest.raises(APIError, match="failed due to connection issues."):
+        handle_api_response(url=api_url)
+
+
+def test_handle_api_response_unknown_error():
+    """Test to check if APIError is thrown when there is something other than "url" under api_url."""
+
+    api_url = "test_string"
+    with pytest.raises(APIError, match="Unknown error"):
+        handle_api_response(url=api_url)
+
+
+def test_handle_api_response_return_type():
+    """Test to check if the connection is successful."""
+
+    api_url = "https://jsonplaceholder.typicode.com/posts"
+    response = handle_api_response(url=api_url)
+    assert response.status_code == 200

From ef4a6500008900338c3837e78e9930885a06c92f Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Fri, 17 Nov 2023 13:54:13 +0100
Subject: [PATCH 04/54] =?UTF-8?q?=E2=9C=85=20Added=20missing=20test=20for?=
 =?UTF-8?q?=20`check=5Fvalue()`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/unit/test_utils.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index cf1805a0d..400541b38 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -167,8 +167,8 @@ def test_add_viadot_metadata_columns_with_parameter():
     assert df_decorated["_viadot_source"][0] == "Source_name"
 
 
-# Sample test checking the correctness of the function when the key is found
 def test_check_value_found():
+    """Sample test checking the correctness of the function when the key is found."""
     json_data = {
         "first_known_lvl": {
             "second_known_lvl": {"third_known_lvl": {"searched_phrase": "phrase"}}
@@ -181,8 +181,8 @@ def test_check_value_found():
     assert result == "phrase"
 
 
-# Sample test checking the correctness of the function when the key is not found
 def test_check_value_not_found():
+    """Sample test checking the correctness of the function when the key is not found."""
     json_data = {
         "first_known_lvl": {
             "second_known_lvl": {
@@ -197,15 +197,16 @@ def test_check_value_not_found():
     assert result is None
 
 
-# Sample test checking the correctness of the function with an empty dictionary
 def test_check_value_empty_dict():
+    """Sample test checking the correctness of the function with an empty dictionary."""
     json_data = {}
     result = check_value(json_data, ["searched_phrase"])
     assert result is None
 
 
-# Sample test checking the correctness of the function with a nonexistent key
 def test_check_value_nonexistent_key():
+    """Sample test checking the correctness of the function with a nonexistent key."""
+
     json_data = {
         "first_known_lvl": {
             "second_known_lvl": {"third_known_lvl": {"searched_phrase": "phrase"}}
@@ -215,6 +216,14 @@ def test_check_value_nonexistent_key():
     assert result is None
 
 
+def test_check_value_base_is_not_dict():
+    result = check_value(
+        base="this_is_not_dict",
+        levels=["searched_phrase"],
+    )
+    assert result == "this_is_not_dict"
+
+
 def test_handle_api_response_wrong_method():
     """Test to check if ValueError is thrown when wrong method is used."""
 

From 78ea4132a9e28750146e7a9fc5dec7ecb5e3b878 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Wed, 22 Nov 2023 12:30:24 +0100
Subject: [PATCH 05/54] =?UTF-8?q?=E2=9C=85=20Added=20SQL=20related=20missi?=
 =?UTF-8?q?ng=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/unit/test_utils.py | 41 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 400541b38..ea555b20a 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -7,11 +7,13 @@
 from viadot.exceptions import APIError
 
 from viadot.signals import SKIP
+from viadot.sources import AzureSQL
 from viadot.utils import (
     add_viadot_metadata_columns,
     check_if_empty_file,
     gen_bulk_insert_query_from_df,
     check_value,
+    get_sql_server_table_dtypes,
     slugify,
     handle_api_response,
 )
@@ -38,6 +40,12 @@ def to_df_decorated_parameter(self):
         return self.df
 
 
+@pytest.fixture(scope="function")
+def azure_sql(TEST_CSV_FILE_PATH, TEST_CSV_FILE_BLOB_PATH):
+    azure_sql = AzureSQL(config_key="AZURE_SQL")
+    yield azure_sql
+
+
 def test_slugify():
     """To test slugify() function functionalities work"""
     test_string = "Text With Spaces Before Changes"
@@ -45,7 +53,7 @@ def test_slugify():
     assert string_after_changes == "text_with_spaces_before_changes"
 
 
-def test_single_quotes_inside():
+def test_bulk_insert_query_from_df_single_quotes_inside():
     TEST_VALUE = "a'b"
     df1 = pd.DataFrame(
         {
@@ -67,7 +75,7 @@ def test_single_quotes_inside():
     ), test_insert_query
 
 
-def test_single_quotes_outside():
+def test_bulk_insert_query_from_df_single_quotes_outside():
     TEST_VALUE = "'a'"
     df1 = pd.DataFrame(
         {
@@ -89,7 +97,7 @@ def test_single_quotes_outside():
     ), test_insert_query
 
 
-def test_double_quotes_inside():
+def test_bulk_insert_query_from_df_double_quotes_inside():
     TEST_VALUE = 'a "b"'
     df1 = pd.DataFrame(
         {
@@ -111,6 +119,16 @@ def test_double_quotes_inside():
     ), test_insert_query
 
 
+def test_bulk_insert_query_from_df_not_implemeted():
+    TEST_VALUE = 'a "b"'
+    df1 = pd.DataFrame({"a": [TEST_VALUE]})
+    with pytest.raises(
+        NotImplementedError,
+        match="this function only handles DataFrames with at least two columns.",
+    ):
+        gen_bulk_insert_query_from_df(df1, table_fqn="test_schema.test_table")
+
+
 def test_check_if_empty_file_csv(caplog):
     with open(EMPTY_CSV_PATH, "w"):
         pass
@@ -264,3 +282,20 @@ def test_handle_api_response_return_type():
     api_url = "https://jsonplaceholder.typicode.com/posts"
     response = handle_api_response(url=api_url)
     assert response.status_code == 200
+
+
+def test_get_sql_server_table_dtypes(azure_sql):
+    """Checks if dtypes is generated in a good way using `get_sql_server_table_dtypes` function."""
+
+    SCHEMA = "sandbox"
+    TABLE = "test_table_dtypes"
+    dtypes = {"country": "VARCHAR(100)", "sales": "INT"}
+
+    azure_sql.create_table(
+        schema=SCHEMA, table=TABLE, dtypes=dtypes, if_exists="replace"
+    )
+
+    dtypes = get_sql_server_table_dtypes(schema=SCHEMA, table=TABLE, con=azure_sql.con)
+    assert isinstance(dtypes, dict)
+    assert list(dtypes.keys()) == ["country", "sales"]
+    assert list(dtypes.values()) == ["varchar(100)", "int"]

From 91ea25c64e39e09be9180066a9f570e8c4d5b58d Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Wed, 22 Nov 2023 15:37:10 +0100
Subject: [PATCH 06/54] =?UTF-8?q?=E2=9C=85=20Added=20missing=20test=20for?=
 =?UTF-8?q?=20`gen=5Fbulk=5Finsert=5Fquery=5Ffrom=5Fdf`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/unit/test_utils.py | 27 +++++++++++++++++++++++++--
 viadot/utils.py          |  8 +++++---
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index ea555b20a..d6e1b9b4c 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,4 +1,3 @@
-import json
 import logging
 import os
 
@@ -41,7 +40,13 @@ def to_df_decorated_parameter(self):
 
 
 @pytest.fixture(scope="function")
-def azure_sql(TEST_CSV_FILE_PATH, TEST_CSV_FILE_BLOB_PATH):
+def example_dataframe():
+    data = [(1, "_suffixnan", 1), (2, "Noneprefix", 0), (3, "fooNULLbar", 1, 2.34)]
+    return pd.DataFrame(data, columns=["id", "name", "is_deleted", "balance"])
+
+
+@pytest.fixture(scope="function")
+def azure_sql():
     azure_sql = AzureSQL(config_key="AZURE_SQL")
     yield azure_sql
 
@@ -129,6 +134,24 @@ def test_bulk_insert_query_from_df_not_implemeted():
         gen_bulk_insert_query_from_df(df1, table_fqn="test_schema.test_table")
 
 
+def test_bulk_insert_query_from_df_full_return(example_dataframe):
+    result = gen_bulk_insert_query_from_df(
+        example_dataframe,
+        table_fqn="users",
+        chunksize=1000,
+        status="APPROVED",
+        address=None,
+    )
+
+    expected_result = """INSERT INTO users (id, name, is_deleted, balance, status, address)
+
+VALUES (1, '_suffixnan', 1, NULL, 'APPROVED', NULL),
+       (2, 'Noneprefix', 0, NULL, 'APPROVED', NULL),
+       (3, 'fooNULLbar', 1, 2.34, 'APPROVED', NULL)"""
+
+    assert result == expected_result
+
+
 def test_check_if_empty_file_csv(caplog):
     with open(EMPTY_CSV_PATH, "w"):
         pass
diff --git a/viadot/utils.py b/viadot/utils.py
index 6ece6982e..fd2c11cae 100644
--- a/viadot/utils.py
+++ b/viadot/utils.py
@@ -145,12 +145,12 @@ def get_flow_last_run_date(flow_name: str) -> str:
 
 
 def get_sql_server_table_dtypes(
-    table, con: pyodbc.Connection, schema: str = None
+    table: str, con: pyodbc.Connection, schema: str = None
 ) -> dict:
     """Get column names and types from a SQL Server database table.
 
     Args:
-        table (_type_): The table for which to fetch dtypes.
+        table (str): The table for which to fetch dtypes.
         con (pyodbc.Connection): The connection to the database where the table is located.
         schema (str, optional): The schema where the table is located. Defaults to None.
 
@@ -265,7 +265,7 @@ def build_merge_query(
 
 
 def gen_bulk_insert_query_from_df(
-    df: pd.DataFrame, table_fqn: str, chunksize=1000, **kwargs
+    df: pd.DataFrame, table_fqn: str, chunksize: int = 1000, **kwargs
 ) -> str:
     """
     Converts a DataFrame to a bulk INSERT query.
@@ -273,6 +273,7 @@ def gen_bulk_insert_query_from_df(
     Args:
         df (pd.DataFrame): The DataFrame which data should be put into the INSERT query.
         table_fqn (str): The fully qualified name (schema.table) of the table to be inserted into.
+        chunksize (int, optional): The size of chunk. Defaults to 1000.
 
     Returns:
         str: A bulk insert query that will insert all data from `df` into `table_fqn`.
@@ -288,6 +289,7 @@ def gen_bulk_insert_query_from_df(
     >>> query = gen_bulk_insert_query_from_df(df, "users", status="APPROVED", address=None)
     >>> print(query)
     INSERT INTO users (id, name, is_deleted, balance, status, address)
+
     VALUES (1, '_suffixnan', 1, NULL, 'APPROVED', NULL),
            (2, 'Noneprefix', 0, NULL, 'APPROVED', NULL),
            (3, 'fooNULLbar', 1, 2.34, 'APPROVED', NULL);

From bcdfa981e066619bf2f856f30a11e7426c420407 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Wed, 22 Nov 2023 15:47:03 +0100
Subject: [PATCH 07/54] =?UTF-8?q?=E2=9C=85=20Added=20missing=20test=20for?=
 =?UTF-8?q?=20`union=5Fdict()`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/unit/test_utils.py | 10 ++++++++++
 viadot/utils.py          | 10 +++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index d6e1b9b4c..0b2ed5782 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -15,6 +15,7 @@
     get_sql_server_table_dtypes,
     slugify,
     handle_api_response,
+    union_dict,
 )
 
 EMPTY_CSV_PATH = "empty.csv"
@@ -322,3 +323,12 @@ def test_get_sql_server_table_dtypes(azure_sql):
     assert isinstance(dtypes, dict)
     assert list(dtypes.keys()) == ["country", "sales"]
     assert list(dtypes.values()) == ["varchar(100)", "int"]
+
+
+def test_union_dict_return():
+    """Check if dictionaries are unioned in the correct way."""
+    a = {"a": 1}
+    b = {"b": 2}
+    unioned_dict = union_dict(a, b)
+    assert isinstance(unioned_dict, dict)
+    assert unioned_dict == {"a": 1, "b": 2}
diff --git a/viadot/utils.py b/viadot/utils.py
index fd2c11cae..4690c8cbd 100644
--- a/viadot/utils.py
+++ b/viadot/utils.py
@@ -354,21 +354,21 @@ def _gen_insert_query_from_records(records: List[tuple]) -> str:
         return _gen_insert_query_from_records(tuples_escaped)
 
 
-def union_dict(*dicts):
+def union_dict(*dicts) -> dict:
     """
-    Function that union list of dictionaries
+    Function that union list of dictionaries into a singe dictionary.
 
     Args:
-        dicts (List[Dict]): list of dictionaries with credentials.
+        *dicts: Variable number of dictionaries to be unioned.
 
     Returns:
-        Dict: A single dictionary createb by union method.
+        dict: A single dictionary containing the combined key-value pairs from all input dictionaries.
 
     Examples:
 
     >>> a = {"a":1}
     >>> b = {"b":2}
-    >>> union_credentials_dict(a ,b)
+    >>> union_dict(a ,b)
     {'a': 1, 'b': 2}
 
     """

From 74beb6e487da679bd649f8adbbd890fc319fbd7d Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Fri, 24 Nov 2023 12:54:59 +0100
Subject: [PATCH 08/54] =?UTF-8?q?=E2=9C=A8=20Added=20new=20logic=20to=20ma?=
 =?UTF-8?q?p=20and=20reorder=20output=20df?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/flows/genesys_to_adls.py |  8 +++++++
 viadot/tasks/genesys.py         | 39 ++++++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/viadot/flows/genesys_to_adls.py b/viadot/flows/genesys_to_adls.py
index 830c02c71..1cebe5a65 100644
--- a/viadot/flows/genesys_to_adls.py
+++ b/viadot/flows/genesys_to_adls.py
@@ -89,6 +89,8 @@ def __init__(
         report_url: str = None,
         report_columns: List[str] = None,
         conversationId_list: List[str] = None,
+        mapping_dict: Dict[str, Any] = None,
+        columns_order: List[str] = None,
         key_list: List[str] = None,
         local_file_path: str = "",
         adls_file_path: str = None,
@@ -137,6 +139,8 @@ def __init__(
             report_url (str, optional): The url of report generated in json response. Defaults to None.
             report_columns (List[str], optional): List of exisiting column in report. Defaults to None.
             conversationId_list (List[str], optional): List of conversationId passed as attribute of GET method. Defaults to None.
+            mapping_dict (dict, optional): Mapping dictionary from user in json format. Defaults to None.
+            columns_order (List, optional): Columns order list to change column order inside pd.DataFrame. Defaults to None.
             key_list (List[str], optional): List of keys needed to specify the columns in the GET request method. Defaults to None.
             local_file_path (str, optional): The local path from which to upload the file(s). Defaults to "".
             adls_file_path (str, optional): The destination path at ADLS. Defaults to None.
@@ -164,6 +168,8 @@ def __init__(
         self.report_url = report_url
         self.report_columns = report_columns
         self.conversationId_list = conversationId_list
+        self.mapping_dict = mapping_dict
+        self.columns_order = columns_order
         self.key_list = key_list
         self.start_date = start_date
         self.end_date = end_date
@@ -199,6 +205,8 @@ def gen_flow(self) -> Flow:
             end_date=self.end_date,
             environment=self.environment,
             conversationId_list=self.conversationId_list,
+            mapping_dict=self.mapping_dict,
+            columns_order=self.columns_order,
             key_list=self.key_list,
             credentials_genesys=self.credentials_genesys,
             flow=self,
diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 942249ac2..88014a6be 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -33,6 +33,8 @@ def __init__(
         local_file_path: str = "",
         sep: str = "\t",
         conversationId_list: List[str] = None,
+        mapping_dict: Dict[str, Any] = None,
+        columns_order: List[str] = None,
         key_list: List[str] = None,
         credentials_genesys: Dict[str, Any] = None,
         validate_df_dict: Dict[str, Any] = None,
@@ -56,6 +58,8 @@ def __init__(
             local_file_path (str, optional): The local path from which to upload the file(s). Defaults to "".
             sep (str, optional): Separator in csv file. Defaults to "\t".
             conversationId_list (List[str], optional): List of conversationId passed as attribute of GET method. Defaults to None.
+            mapping_dict (dict, optional): Mapping dictionary from user in json format. Defaults to None.
+            columns_order (List, optional): Columns order list to change column order inside pd.DataFrame. Defaults to None.
             key_list (List[str], optional): List of keys needed to specify the columns in the GET request method. Defaults to None.
             validate_df_dict (Dict[str,Any], optional): A dictionary with optional list of tests to verify the output dataframe. If defined, triggers
                 the `validate_df` task from task_utils. Defaults to None.
@@ -76,6 +80,8 @@ def __init__(
         self.local_file_path = local_file_path
         self.sep = sep
         self.conversationId_list = conversationId_list
+        self.mapping_dict = mapping_dict
+        self.columns_order = columns_order
         self.key_list = key_list
         self.validate_df_dict = validate_df_dict
 
@@ -298,6 +304,8 @@ def merge_conversations_dfs(self, data_to_merge: list) -> DataFrame:
         "report_columns",
         "credentials_genesys",
         "conversationId_list",
+        "mapping_dict",
+        "columns_order",
         "key_list",
         "validate_df_dict",
     )
@@ -314,6 +322,8 @@ def run(
         end_date: str = None,
         report_columns: List[str] = None,
         conversationId_list: List[str] = None,
+        mapping_dict: Dict[str, Any] = None,
+        columns_order: List[str] = None,
         key_list: List[str] = None,
         credentials_genesys: Dict[str, Any] = None,
         validate_df_dict: Dict[str, Any] = None,
@@ -334,6 +344,8 @@ def run(
             report_url (str, optional): The url of report generated in json response. Defaults to None.
             report_columns (List[str], optional): List of exisiting column in report. Defaults to None.
             conversationId_list (List[str], optional): List of conversationId passed as attribute of GET method. Defaults to None.
+            mapping_dict (dict, optional): Mapping dictionary from user in json format. Defaults to None.
+            columns_order (List, optional): Columns order list to change column order inside pd.DataFrame. Defaults to None.
             key_list (List[str], optional): List of keys needed to specify the columns in the GET request method. Defaults to None.
             validate_df_dict (Dict[str,Any], optional): A dictionary with optional list of tests to verify the output dataframe. If defined, triggers
                 the `validate_df` task from task_utils. Defaults to None.
@@ -461,8 +473,16 @@ def run(
 
             date = start_date.replace("-", "")
             file_name = f"conversations_detail_{date}".upper() + ".csv"
+
+            # Possible transformation of DataFrame
+            if mapping_dict:
+                final_df.rename(columns=mapping_dict, inplace=True)
+            if columns_order:
+                final_df = df[columns_order]
+
             if validate_df_dict:
                 validate_df.run(df=final_df, tests=validate_df_dict)
+
             final_df.to_csv(
                 os.path.join(self.local_file_path, file_name),
                 index=False,
@@ -494,14 +514,21 @@ def run(
                 data_list.append(temp_dict)
 
             df = pd.DataFrame(data_list)
-            df = df[df.columns[-1:]].join(df[df.columns[:-1]])
+
+            # Possible transformation of DataFrame
+            if mapping_dict:
+                df.rename(columns=mapping_dict, inplace=True)
+            if columns_order:
+                df = df[columns_order]
+
+            if validate_df_dict:
+                validate_df.run(df=df, tests=validate_df_dict)
 
             start = start_date.replace("-", "")
             end = end_date.replace("-", "")
 
             file_name = f"WEBMESSAGE_{start}-{end}.csv"
-            if validate_df_dict:
-                validate_df.run(df=df, tests=validate_df_dict)
+
             df.to_csv(
                 os.path.join(file_name),
                 index=False,
@@ -568,6 +595,12 @@ def run(
 
             df = pd.DataFrame(data_list)
 
+            # Possible transformation of DataFrame
+            if mapping_dict:
+                df.rename(columns=mapping_dict, inplace=True)
+            if columns_order:
+                df = df[columns_order]
+
             # data validation function (optional)
             if validate_df_dict:
                 validate_df.run(df=df, tests=validate_df_dict)

From 2885731a960eeca8e627aa580c179b4ca955842e Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Fri, 24 Nov 2023 13:03:09 +0100
Subject: [PATCH 09/54] Added CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ef880c75..e07bac6e9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+- Added new params for mapping and reordering DataFrame for `Genesys` task and flow.
 
 ### Fixed
 

From e6c82e361bbfe169f730bcf23b2eb599f9085552 Mon Sep 17 00:00:00 2001
From: kiurieva <kiurieva@dyvenia.com>
Date: Fri, 24 Nov 2023 14:36:47 +0100
Subject: [PATCH 10/54] Updated api url in connector

---
 viadot/sources/vid_club.py | 84 ++++++++++++++++++++++++++------------
 1 file changed, 58 insertions(+), 26 deletions(-)

diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
index e7819577a..327d9abf7 100644
--- a/viadot/sources/vid_club.py
+++ b/viadot/sources/vid_club.py
@@ -1,6 +1,7 @@
 import json
 import os
 import urllib
+from pandas.io.json import json_normalize
 from datetime import date, datetime, timedelta
 from typing import Any, Dict, List, Literal, Tuple
 
@@ -46,7 +47,7 @@ def build_query(
         api_url: str,
         items_per_page: int,
         source: Literal["jobs", "product", "company", "survey"] = None,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
     ) -> str:
         """
         Builds the query from the inputs.
@@ -128,7 +129,7 @@ def check_connection(
         from_date: str = "2022-03-22",
         to_date: str = None,
         items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
         url: str = None,
     ) -> Tuple[Dict[str, Any], str]:
         """
@@ -160,20 +161,37 @@ def check_connection(
         if url is None:
             url = self.credentials["url"]
 
-        first_url = self.build_query(
-            source=source,
-            from_date=from_date,
-            to_date=to_date,
-            api_url=url,
-            items_per_page=items_per_page,
-            region=region,
-        )
-        headers = self.headers
-        response = handle_api_response(
-            url=first_url, headers=headers, method="GET", verify=False
-        )
-        response = response.json()
-
+        if source in ["jobs", "product", "company"]:
+            first_url = self.build_query(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                api_url=url,
+                items_per_page=items_per_page,
+            )
+            headers = self.headers
+            response = handle_api_response(
+                url=first_url, headers=headers, method="GET", verify=False
+            )
+            response = response.json()
+        elif source == "survey":
+            first_url = self.build_query(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                api_url=url,
+                items_per_page=items_per_page,
+                region=region,
+            )
+            headers = self.headers
+            response = handle_api_response(
+                url=first_url, headers=headers, method="GET", verify=False
+            )
+            response = response.json()
+        else:
+            raise ValidationError(
+                "Pick one these sources: jobs, product, company, survey"
+            )
         return (response, first_url)
 
     def get_response(
@@ -182,7 +200,7 @@ def get_response(
         from_date: str = "2022-03-22",
         to_date: str = None,
         items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
     ) -> pd.DataFrame:
         """
         Basing on the pagination type retrieved using check_connection function, gets the response from the API queried and transforms it into DataFrame.
@@ -207,14 +225,26 @@ def get_response(
             )
         if to_date == None:
             to_date = datetime.today().strftime("%Y-%m-%d")
+        if source in ["jobs", "product", "company"]:
+            response, first_url = self.check_connection(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                items_per_page=items_per_page,
+            )
 
-        response, first_url = self.check_connection(
-            source=source,
-            from_date=from_date,
-            to_date=to_date,
-            items_per_page=items_per_page,
-            region=region,
-        )
+        elif source == "survey":
+            response, first_url = self.check_connection(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                items_per_page=items_per_page,
+                region=region,
+            )
+        else:
+            raise ValidationError(
+                "Pick one these sources: jobs, product, company, survey"
+            )
 
         if isinstance(response, dict):
             keys_list = list(response.keys())
@@ -229,7 +259,8 @@ def get_response(
             ind = False
 
         if "data" in keys_list:
-            df = pd.DataFrame(response["data"])
+            df = json_normalize(response["data"])
+            df = pd.DataFrame(df)
             length = df.shape[0]
             page = 1
 
@@ -244,7 +275,8 @@ def get_response(
                     url=url, headers=headers, method="GET", verify=False
                 )
                 response = r.json()
-                df_page = pd.DataFrame(response["data"])
+                df_page = json_normalize(response["data"])
+                df_page = pd.DataFrame(df_page)
                 if source == "product":
                     df_page = df_page.transpose()
                 length = df_page.shape[0]

From a638571f35246c01e2216dd0a4c5774d5cf5522b Mon Sep 17 00:00:00 2001
From: kiurieva <kiurieva@dyvenia.com>
Date: Fri, 24 Nov 2023 14:52:45 +0100
Subject: [PATCH 11/54] Updated api url in connector

---
 viadot/sources/vid_club.py | 84 ++++++++++++++++++++++++++------------
 1 file changed, 58 insertions(+), 26 deletions(-)

diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
index e7819577a..327d9abf7 100644
--- a/viadot/sources/vid_club.py
+++ b/viadot/sources/vid_club.py
@@ -1,6 +1,7 @@
 import json
 import os
 import urllib
+from pandas.io.json import json_normalize
 from datetime import date, datetime, timedelta
 from typing import Any, Dict, List, Literal, Tuple
 
@@ -46,7 +47,7 @@ def build_query(
         api_url: str,
         items_per_page: int,
         source: Literal["jobs", "product", "company", "survey"] = None,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
     ) -> str:
         """
         Builds the query from the inputs.
@@ -128,7 +129,7 @@ def check_connection(
         from_date: str = "2022-03-22",
         to_date: str = None,
         items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
         url: str = None,
     ) -> Tuple[Dict[str, Any], str]:
         """
@@ -160,20 +161,37 @@ def check_connection(
         if url is None:
             url = self.credentials["url"]
 
-        first_url = self.build_query(
-            source=source,
-            from_date=from_date,
-            to_date=to_date,
-            api_url=url,
-            items_per_page=items_per_page,
-            region=region,
-        )
-        headers = self.headers
-        response = handle_api_response(
-            url=first_url, headers=headers, method="GET", verify=False
-        )
-        response = response.json()
-
+        if source in ["jobs", "product", "company"]:
+            first_url = self.build_query(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                api_url=url,
+                items_per_page=items_per_page,
+            )
+            headers = self.headers
+            response = handle_api_response(
+                url=first_url, headers=headers, method="GET", verify=False
+            )
+            response = response.json()
+        elif source == "survey":
+            first_url = self.build_query(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                api_url=url,
+                items_per_page=items_per_page,
+                region=region,
+            )
+            headers = self.headers
+            response = handle_api_response(
+                url=first_url, headers=headers, method="GET", verify=False
+            )
+            response = response.json()
+        else:
+            raise ValidationError(
+                "Pick one these sources: jobs, product, company, survey"
+            )
         return (response, first_url)
 
     def get_response(
@@ -182,7 +200,7 @@ def get_response(
         from_date: str = "2022-03-22",
         to_date: str = None,
         items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
     ) -> pd.DataFrame:
         """
         Basing on the pagination type retrieved using check_connection function, gets the response from the API queried and transforms it into DataFrame.
@@ -207,14 +225,26 @@ def get_response(
             )
         if to_date == None:
             to_date = datetime.today().strftime("%Y-%m-%d")
+        if source in ["jobs", "product", "company"]:
+            response, first_url = self.check_connection(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                items_per_page=items_per_page,
+            )
 
-        response, first_url = self.check_connection(
-            source=source,
-            from_date=from_date,
-            to_date=to_date,
-            items_per_page=items_per_page,
-            region=region,
-        )
+        elif source == "survey":
+            response, first_url = self.check_connection(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                items_per_page=items_per_page,
+                region=region,
+            )
+        else:
+            raise ValidationError(
+                "Pick one these sources: jobs, product, company, survey"
+            )
 
         if isinstance(response, dict):
             keys_list = list(response.keys())
@@ -229,7 +259,8 @@ def get_response(
             ind = False
 
         if "data" in keys_list:
-            df = pd.DataFrame(response["data"])
+            df = json_normalize(response["data"])
+            df = pd.DataFrame(df)
             length = df.shape[0]
             page = 1
 
@@ -244,7 +275,8 @@ def get_response(
                     url=url, headers=headers, method="GET", verify=False
                 )
                 response = r.json()
-                df_page = pd.DataFrame(response["data"])
+                df_page = json_normalize(response["data"])
+                df_page = pd.DataFrame(df_page)
                 if source == "product":
                     df_page = df_page.transpose()
                 length = df_page.shape[0]

From 5092684bb5450d6bd918c98e521efcbacdccdb34 Mon Sep 17 00:00:00 2001
From: Kateryna Iurieva <kiurieva@dyvenia.com>
Date: Mon, 27 Nov 2023 13:35:35 +0100
Subject: [PATCH 12/54] Update viadot/sources/vid_club.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrian Wójcik <107313911+adrian-wojcik@users.noreply.github.com>
---
 viadot/sources/vid_club.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
index 327d9abf7..a4dacc4cb 100644
--- a/viadot/sources/vid_club.py
+++ b/viadot/sources/vid_club.py
@@ -190,7 +190,7 @@ def check_connection(
             response = response.json()
         else:
             raise ValidationError(
-                "Pick one these sources: jobs, product, company, survey"
+                "Pick one of these sources: jobs, product, company, survey"
             )
         return (response, first_url)
 

From 45250eadd91120c23e849afe2beffa53f9ead469 Mon Sep 17 00:00:00 2001
From: Kateryna Iurieva <kiurieva@dyvenia.com>
Date: Mon, 27 Nov 2023 13:35:40 +0100
Subject: [PATCH 13/54] Update viadot/sources/vid_club.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrian Wójcik <107313911+adrian-wojcik@users.noreply.github.com>
---
 viadot/sources/vid_club.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
index a4dacc4cb..497064c0b 100644
--- a/viadot/sources/vid_club.py
+++ b/viadot/sources/vid_club.py
@@ -243,7 +243,7 @@ def get_response(
             )
         else:
             raise ValidationError(
-                "Pick one these sources: jobs, product, company, survey"
+                "Pick one of these sources: jobs, product, company, survey"
             )
 
         if isinstance(response, dict):

From 19b9812bf7f928184877dda04cad91c514fbb393 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Tue, 28 Nov 2023 13:43:55 +0100
Subject: [PATCH 14/54] function to check if df empty

---
 viadot/flows/sharepoint_to_adls.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 18e392a55..160e8355a 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -3,7 +3,8 @@
 from typing import Any, Dict, List
 
 import pendulum
-from prefect import Flow
+from prefect import Flow, task
+from typing import Literal
 from prefect.backend import set_key_value
 from prefect.utilities import logging
 
@@ -186,6 +187,11 @@ def gen_flow(self) -> Flow:
     def slugify(name):
         return name.replace(" ", "_").lower()
 
+    @task(slug="check_df")
+    def check_if_df_empty(df):
+        if len(df.index) == 0:
+            logger.info("No data in the response. Df empty")
+
 
 class SharepointListToADLS(Flow):
     def __init__(
@@ -207,6 +213,7 @@ def __init__(
         output_file_extension: str = ".parquet",
         validate_df_dict: dict = None,
         set_prefect_kv: bool = False,
+        if_no_data_returned: Literal["continue", "warn", "fail"] = "continue",
         *args: List[any],
         **kwargs: Dict[str, Any],
     ):
@@ -321,7 +328,7 @@ def __init__(
         self.gen_flow()
 
     def gen_flow(self) -> Flow:
-        s = SharepointListToDF(
+        df = SharepointListToDF(
             path=self.path,
             list_title=self.list_title,
             site_url=self.site_url,
@@ -331,12 +338,12 @@ def gen_flow(self) -> Flow:
             row_count=self.row_count,
             credentials_secret=self.sp_cert_credentials_secret,
         )
-        df = s.run()
 
         if self.validate_df_dict:
             validation_task = validate_df(df=df, tests=self.validate_df_dict, flow=self)
             validation_task.set_upstream(df, flow=self)
 
+        check_if_df_empty.bind(df, flow=self)
         df_with_metadata = add_ingestion_metadata_task.bind(df, flow=self)
         dtypes_dict = df_get_data_types_task.bind(df_with_metadata, flow=self)
         df_mapped = df_map_mixed_dtypes_for_parquet.bind(

From c4f07df790c17232f446d3ad2eaf8298da89ede9 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Tue, 28 Nov 2023 14:57:06 +0100
Subject: [PATCH 15/54] function to check if df empty enhanced

---
 viadot/flows/sharepoint_to_adls.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 160e8355a..2a2d6adb6 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -187,10 +187,11 @@ def gen_flow(self) -> Flow:
     def slugify(name):
         return name.replace(" ", "_").lower()
 
-    @task(slug="check_df")
-    def check_if_df_empty(df):
-        if len(df.index) == 0:
-            logger.info("No data in the response. Df empty")
+
+@task(slug="check_df")
+def check_if_df_empty(df):
+    if len(df.index) == 0:
+        logger.info("No data in the response. Df empty.")
 
 
 class SharepointListToADLS(Flow):

From df729f56cab4be4d85eebbf05f214bcef23b41e7 Mon Sep 17 00:00:00 2001
From: cgildenia <cgildenia@dyvenia.com>
Date: Wed, 29 Nov 2023 10:59:01 +0100
Subject: [PATCH 16/54] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20merged=20path=20and?=
 =?UTF-8?q?=20adls=5Ffile=5Fname=20into=20file=5Fname?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/flows/sharepoint_to_adls.py | 31 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 18e392a55..1f120c418 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -193,9 +193,10 @@ def __init__(
         name: str,
         list_title: str,
         site_url: str,
-        path: str,
+        file_name: str,
+        # path: str,
         adls_dir_path: str,
-        adls_file_name: str,
+        # adls_file_name: str,
         filters: dict = None,
         required_fields: List[str] = None,
         field_property: str = "Title",
@@ -219,6 +220,7 @@ def __init__(
             name (str): Prefect flow name.
             list_title (str): Title of Sharepoint List.
             site_url (str): URL to set of Sharepoint Lists.
+            file_name (str): PENDING
             path (str): Local file path. Default to None.
             adls_dir_path (str): Azure Data Lake destination folder/catalog path. Defaults to None.
             adls_file_name (str): Name of file in ADLS. Defaults to None.
@@ -272,7 +274,7 @@ def __init__(
         """
 
         # SharepointListToDF
-        self.path = path
+        self.file_name = file_name
         self.list_title = list_title
         self.site_url = site_url
         self.required_fields = required_fields
@@ -285,32 +287,29 @@ def __init__(
 
         # AzureDataLakeUpload
         self.adls_dir_path = adls_dir_path
-        self.adls_file_name = adls_file_name
         self.overwrite = overwrite_adls
         self.adls_sp_credentials_secret = adls_sp_credentials_secret
         self.output_file_extension = output_file_extension
         self.set_prefect_kv = set_prefect_kv
         self.now = str(pendulum.now("utc"))
-        if self.path is not None:
+        if self.file_name is not None:
             self.local_file_path = (
-                self.path + self.slugify(name) + self.output_file_extension
+                self.file_name + self.slugify(name) + self.output_file_extension
             )
-        else:
-            self.local_file_path = self.slugify(name) + self.output_file_extension
-        self.local_json_path = self.slugify(name) + ".json"
-        self.adls_dir_path = adls_dir_path
-        if adls_file_name is not None:
-            self.adls_file_path = os.path.join(adls_dir_path, adls_file_name)
+            self.adls_file_path = os.path.join(adls_dir_path, file_name)
             self.adls_schema_file_dir_file = os.path.join(
-                adls_dir_path, "schema", Path(adls_file_name).stem + ".json"
+                adls_dir_path, "schema", Path(file_name).stem + ".json"
             )
         else:
+            self.local_file_path = self.slugify(name) + self.output_file_extension
             self.adls_file_path = os.path.join(
                 adls_dir_path, self.now + self.output_file_extension
             )
             self.adls_schema_file_dir_file = os.path.join(
                 adls_dir_path, "schema", self.now + ".json"
             )
+        self.local_json_path = self.slugify(name) + ".json"
+        self.adls_dir_path = adls_dir_path
 
         super().__init__(
             name=name,
@@ -322,7 +321,7 @@ def __init__(
 
     def gen_flow(self) -> Flow:
         s = SharepointListToDF(
-            path=self.path,
+            path=self.file_name,
             list_title=self.list_title,
             site_url=self.site_url,
             required_fields=self.required_fields,
@@ -345,13 +344,13 @@ def gen_flow(self) -> Flow:
 
         df_to_file = df_to_parquet.bind(
             df=df_mapped,
-            path=self.path,
+            path=self.file_name,
             flow=self,
         )
 
         file_to_adls_task = AzureDataLakeUpload()
         file_to_adls_task.bind(
-            from_path=self.path,
+            from_path=self.file_name,
             to_path=self.adls_dir_path,
             overwrite=self.overwrite,
             sp_credentials_secret=self.adls_sp_credentials_secret,

From 759adf2107be74f33cd85a912123e4ae5da349dd Mon Sep 17 00:00:00 2001
From: cgildenia <cgildenia@dyvenia.com>
Date: Wed, 29 Nov 2023 11:01:39 +0100
Subject: [PATCH 17/54] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20updated=20docstring?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/flows/sharepoint_to_adls.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 1f120c418..43b51df49 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -194,9 +194,7 @@ def __init__(
         list_title: str,
         site_url: str,
         file_name: str,
-        # path: str,
         adls_dir_path: str,
-        # adls_file_name: str,
         filters: dict = None,
         required_fields: List[str] = None,
         field_property: str = "Title",
@@ -220,10 +218,8 @@ def __init__(
             name (str): Prefect flow name.
             list_title (str): Title of Sharepoint List.
             site_url (str): URL to set of Sharepoint Lists.
-            file_name (str): PENDING
-            path (str): Local file path. Default to None.
+            file_name (str): Name of file in ADLS. Defaults to None.
             adls_dir_path (str): Azure Data Lake destination folder/catalog path. Defaults to None.
-            adls_file_name (str): Name of file in ADLS. Defaults to None.
             filters (dict, optional): Dictionary with operators which filters the SharepointList output. Defaults to None.
                         allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')
                         allowed conjunction: ('&','|')

From 495f3c560f10d87684f341f83edab0243ecbac18 Mon Sep 17 00:00:00 2001
From: cgildenia <cgildenia@dyvenia.com>
Date: Wed, 29 Nov 2023 12:33:14 +0100
Subject: [PATCH 18/54] =?UTF-8?q?=E2=9C=A8=20added=20to=5Fcsv=20in=20list?=
 =?UTF-8?q?=20class?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/flows/sharepoint_to_adls.py | 34 ++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 43b51df49..bcc8c5881 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -65,6 +65,7 @@ def __init__(
             Defaults to None.
             overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to False.
             if_empty (str, optional): What to do if query returns no data. Defaults to "warn".
+            if_exists (str, optional): What to do if the file already exists. Defaults to "replace".
             validate_df_dict (dict, optional): A dictionary with optional list of tests to verify the output
             dataframe. If defined, triggers the `validate_df` task from task_utils. Defaults to None.
             timeout(int, optional): The amount of time (in seconds) to wait while running this task before
@@ -206,6 +207,7 @@ def __init__(
         output_file_extension: str = ".parquet",
         validate_df_dict: dict = None,
         set_prefect_kv: bool = False,
+        if_exists: str = "replace",
         *args: List[any],
         **kwargs: Dict[str, Any],
     ):
@@ -264,6 +266,7 @@ def __init__(
             output_file_extension (str, optional): Extension of the resulting file to be stored. Defaults to ".parquet".
             validate_df_dict (dict, optional): Whether to do an extra df validation before ADLS upload or not to do. Defaults to None.
             set_prefect_kv (bool, optional): Whether to do key-value parameters in KV Store or not. Defaults to False.
+            if_exists (str, optional): What to do if the file already exists. Defaults to "replace".
 
         Returns:
             .parquet file inside ADLS.
@@ -280,6 +283,7 @@ def __init__(
         self.vault_name = vault_name
         self.row_count = row_count
         self.validate_df_dict = validate_df_dict
+        self.if_exists = if_exists
 
         # AzureDataLakeUpload
         self.adls_dir_path = adls_dir_path
@@ -290,7 +294,8 @@ def __init__(
         self.now = str(pendulum.now("utc"))
         if self.file_name is not None:
             self.local_file_path = (
-                self.file_name + self.slugify(name) + self.output_file_extension
+                self.file_name.split('.')[0] + self.output_file_extension
+                # self.file_name + self.slugify(name) + self.output_file_extension
             )
             self.adls_file_path = os.path.join(adls_dir_path, file_name)
             self.adls_schema_file_dir_file = os.path.join(
@@ -338,15 +343,30 @@ def gen_flow(self) -> Flow:
             df_with_metadata, dtypes_dict, flow=self
         )
 
-        df_to_file = df_to_parquet.bind(
-            df=df_mapped,
-            path=self.file_name,
-            flow=self,
-        )
+        # df_to_file = df_to_parquet.bind(
+        #     df=df_mapped,
+        #     path=self.file_name,
+        #     flow=self,
+        # )
+        
+        if self.output_file_extension == ".csv":
+            df_to_file = df_to_csv.bind(
+                df=df_with_metadata,
+                path=self.local_file_path,
+                if_exists=self.if_exists,
+                flow=self,
+            )
+        else:
+            df_to_file = df_to_parquet.bind(
+                df=df_mapped,
+                path=self.local_file_path,
+                if_exists=self.if_exists,
+                flow=self,
+            )
 
         file_to_adls_task = AzureDataLakeUpload()
         file_to_adls_task.bind(
-            from_path=self.file_name,
+            from_path=self.local_file_path,
             to_path=self.adls_dir_path,
             overwrite=self.overwrite,
             sp_credentials_secret=self.adls_sp_credentials_secret,

From 8a615189178235ea397cfbf8b419a0057e28b981 Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Wed, 29 Nov 2023 13:04:33 +0100
Subject: [PATCH 19/54] =?UTF-8?q?=E2=9C=85=20added=20more=20tests=20for=20?=
 =?UTF-8?q?genesys=20Task?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/tasks/test_genesys_task.py | 39 +++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/tests/integration/tasks/test_genesys_task.py b/tests/integration/tasks/test_genesys_task.py
index eb4978fa6..424a45c52 100644
--- a/tests/integration/tasks/test_genesys_task.py
+++ b/tests/integration/tasks/test_genesys_task.py
@@ -106,7 +106,10 @@ def genesys_api_connection(post_data_list, end_point, method="POST"):
                         "messages": [],
                     }
                 ],
+                "pageCount": 2,
+                "entities": [{"id": "xxx"}],
             }
+
         else:
             report = {
                 "conversations": [
@@ -307,7 +310,7 @@ def test_genesys_conversations(mock_genesys, var_dictionary):
 
 @mock.patch("viadot.tasks.genesys.Genesys", return_value=MockGenesysTask)
 @pytest.mark.conv
-def test_genesys_webmsg(mock_genesys, var_dictionary):
+def test_genesys_webmsg_conversations(mock_genesys, var_dictionary):
     to_csv = GenesysToCSV()
     file_name = to_csv.run(
         view_type=None,
@@ -324,3 +327,37 @@ def test_genesys_webmsg(mock_genesys, var_dictionary):
 
     mock_genesys.assert_called_once()
     assert file_name[0] == f"WEBMESSAGE_{start}-{end}.csv"
+
+
+@mock.patch("viadot.tasks.genesys.Genesys", return_value=MockGenesysTask)
+@pytest.mark.conv
+def test_genesys_users(mock_genesys, var_dictionary):
+    to_csv = GenesysToCSV()
+    file_name = to_csv.run(
+        view_type=None,
+        end_point="users",
+        conversationId_list=var_dictionary["v_list"],
+        post_data_list=[""],
+        key_list=var_dictionary["key_list"],
+        start_date=var_dictionary["start_date"],
+        end_date=var_dictionary["end_date"],
+    )
+
+    mock_genesys.assert_called_once()
+    assert file_name[0] == f"All_Genesys_Users.csv"
+
+
+@mock.patch("viadot.tasks.genesys.Genesys", return_value=MockGenesysTask)
+@pytest.mark.conv
+def test_genesys_queue_performance_detail_view(mock_genesys, var_dictionary):
+    genesys = GenesysToCSV()
+    output = genesys.run(
+        view_type="queue_performance_detail_view",
+        end_point=None,
+        conversationId_list=var_dictionary["v_list"],
+        post_data_list=[""],
+        key_list=var_dictionary["key_list"],
+        start_date=var_dictionary["start_date"],
+        end_date=var_dictionary["end_date"],
+    )
+    assert output is None

From 34859ed8516eae4919f946722cc7473fa5fd1102 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Wed, 29 Nov 2023 13:26:49 +0100
Subject: [PATCH 20/54] Modified logic for check df. Df check and flow Finish
 in the flow added

---
 viadot/flows/sharepoint_to_adls.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 2a2d6adb6..5bca3cc8e 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -7,6 +7,7 @@
 from typing import Literal
 from prefect.backend import set_key_value
 from prefect.utilities import logging
+from prefect.engine.state import Finished
 
 from viadot.task_utils import (
     add_ingestion_metadata_task,
@@ -189,9 +190,20 @@ def slugify(name):
 
 
 @task(slug="check_df")
-def check_if_df_empty(df):
+def check_if_df_empty(df, if_no_data_returned: str = "skip"):
+    # -> to task.utils
+    class NoDataReturnedError(Exception):
+        def __init__(self, message):
+            self.message = message
+
     if len(df.index) == 0:
-        logger.info("No data in the response. Df empty.")
+        if if_no_data_returned == "skip":
+            logger.info("No data in the source response. Df empty.")
+        elif if_no_data_returned == "warn":
+            logger.warning("No data in the source response. Df empty.")
+        elif if_no_data_returned == "fail":
+            raise NoDataReturnedError("No data in the source response. Df empty.")
+        return True
 
 
 class SharepointListToADLS(Flow):
@@ -214,7 +226,7 @@ def __init__(
         output_file_extension: str = ".parquet",
         validate_df_dict: dict = None,
         set_prefect_kv: bool = False,
-        if_no_data_returned: Literal["continue", "warn", "fail"] = "continue",
+        if_no_data_returned: Literal["skip", "warn", "fail"] = "skip",
         *args: List[any],
         **kwargs: Dict[str, Any],
     ):
@@ -290,6 +302,7 @@ def __init__(
         self.vault_name = vault_name
         self.row_count = row_count
         self.validate_df_dict = validate_df_dict
+        self.if_no_data_returned = if_no_data_returned
 
         # AzureDataLakeUpload
         self.adls_dir_path = adls_dir_path
@@ -339,12 +352,18 @@ def gen_flow(self) -> Flow:
             row_count=self.row_count,
             credentials_secret=self.sp_cert_credentials_secret,
         )
+        df_empty = check_if_df_empty.bind(df, self.if_no_data_returned)
+
+        if df_empty:
+            if self.if_no_data_returned == "warn":
+                raise Finished(
+                    "Flow finished because there is no new data for ingestion."
+                )
 
         if self.validate_df_dict:
             validation_task = validate_df(df=df, tests=self.validate_df_dict, flow=self)
             validation_task.set_upstream(df, flow=self)
 
-        check_if_df_empty.bind(df, flow=self)
         df_with_metadata = add_ingestion_metadata_task.bind(df, flow=self)
         dtypes_dict = df_get_data_types_task.bind(df_with_metadata, flow=self)
         df_mapped = df_map_mixed_dtypes_for_parquet.bind(

From b10a9518944dc9161a669006a01305526058bbe5 Mon Sep 17 00:00:00 2001
From: cgildenia <cgildenia@dyvenia.com>
Date: Wed, 29 Nov 2023 13:44:56 +0100
Subject: [PATCH 21/54] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20removed=20unused=20c?=
 =?UTF-8?q?ode?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/flows/sharepoint_to_adls.py | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index bcc8c5881..a539d6628 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -207,7 +207,6 @@ def __init__(
         output_file_extension: str = ".parquet",
         validate_df_dict: dict = None,
         set_prefect_kv: bool = False,
-        if_exists: str = "replace",
         *args: List[any],
         **kwargs: Dict[str, Any],
     ):
@@ -266,7 +265,6 @@ def __init__(
             output_file_extension (str, optional): Extension of the resulting file to be stored. Defaults to ".parquet".
             validate_df_dict (dict, optional): Whether to do an extra df validation before ADLS upload or not to do. Defaults to None.
             set_prefect_kv (bool, optional): Whether to do key-value parameters in KV Store or not. Defaults to False.
-            if_exists (str, optional): What to do if the file already exists. Defaults to "replace".
 
         Returns:
             .parquet file inside ADLS.
@@ -283,7 +281,6 @@ def __init__(
         self.vault_name = vault_name
         self.row_count = row_count
         self.validate_df_dict = validate_df_dict
-        self.if_exists = if_exists
 
         # AzureDataLakeUpload
         self.adls_dir_path = adls_dir_path
@@ -295,7 +292,6 @@ def __init__(
         if self.file_name is not None:
             self.local_file_path = (
                 self.file_name.split('.')[0] + self.output_file_extension
-                # self.file_name + self.slugify(name) + self.output_file_extension
             )
             self.adls_file_path = os.path.join(adls_dir_path, file_name)
             self.adls_schema_file_dir_file = os.path.join(
@@ -331,36 +327,27 @@ def gen_flow(self) -> Flow:
             row_count=self.row_count,
             credentials_secret=self.sp_cert_credentials_secret,
         )
-        df = s.run()
 
         if self.validate_df_dict:
-            validation_task = validate_df(df=df, tests=self.validate_df_dict, flow=self)
-            validation_task.set_upstream(df, flow=self)
+            validation_task = validate_df(df=s, tests=self.validate_df_dict, flow=self)
+            validation_task.set_upstream(s, flow=self)
 
-        df_with_metadata = add_ingestion_metadata_task.bind(df, flow=self)
+        df_with_metadata = add_ingestion_metadata_task.bind(s, flow=self)
         dtypes_dict = df_get_data_types_task.bind(df_with_metadata, flow=self)
         df_mapped = df_map_mixed_dtypes_for_parquet.bind(
             df_with_metadata, dtypes_dict, flow=self
         )
-
-        # df_to_file = df_to_parquet.bind(
-        #     df=df_mapped,
-        #     path=self.file_name,
-        #     flow=self,
-        # )
         
         if self.output_file_extension == ".csv":
             df_to_file = df_to_csv.bind(
                 df=df_with_metadata,
                 path=self.local_file_path,
-                if_exists=self.if_exists,
                 flow=self,
             )
         else:
             df_to_file = df_to_parquet.bind(
                 df=df_mapped,
                 path=self.local_file_path,
-                if_exists=self.if_exists,
                 flow=self,
             )
 

From 6dc7a6fa54e527a0706c221f16fe19eb8cb9f2a7 Mon Sep 17 00:00:00 2001
From: cgildenia <cgildenia@dyvenia.com>
Date: Thu, 30 Nov 2023 10:05:10 +0100
Subject: [PATCH 22/54] =?UTF-8?q?=E2=9C=A8=20added=20separator=20argument?=
 =?UTF-8?q?=20for=20csv=20saving?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/flows/sharepoint_to_adls.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index a539d6628..787c78436 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -205,6 +205,7 @@ def __init__(
         vault_name: str = None,
         overwrite_adls: bool = True,
         output_file_extension: str = ".parquet",
+        sep: str = "\t",
         validate_df_dict: dict = None,
         set_prefect_kv: bool = False,
         *args: List[any],
@@ -263,6 +264,7 @@ def __init__(
             vault_name (str, optional): KeyVaultSecret name. Default to None.
             overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to True.
             output_file_extension (str, optional): Extension of the resulting file to be stored. Defaults to ".parquet".
+            sep (str, optional): The separator to use in the CSV. Defaults to "\t".
             validate_df_dict (dict, optional): Whether to do an extra df validation before ADLS upload or not to do. Defaults to None.
             set_prefect_kv (bool, optional): Whether to do key-value parameters in KV Store or not. Defaults to False.
 
@@ -287,11 +289,12 @@ def __init__(
         self.overwrite = overwrite_adls
         self.adls_sp_credentials_secret = adls_sp_credentials_secret
         self.output_file_extension = output_file_extension
+        self.sep = sep
         self.set_prefect_kv = set_prefect_kv
         self.now = str(pendulum.now("utc"))
         if self.file_name is not None:
             self.local_file_path = (
-                self.file_name.split('.')[0] + self.output_file_extension
+                self.file_name.split(".")[0] + self.output_file_extension
             )
             self.adls_file_path = os.path.join(adls_dir_path, file_name)
             self.adls_schema_file_dir_file = os.path.join(
@@ -317,7 +320,7 @@ def __init__(
         self.gen_flow()
 
     def gen_flow(self) -> Flow:
-        s = SharepointListToDF(
+        df = SharepointListToDF(
             path=self.file_name,
             list_title=self.list_title,
             site_url=self.site_url,
@@ -329,19 +332,20 @@ def gen_flow(self) -> Flow:
         )
 
         if self.validate_df_dict:
-            validation_task = validate_df(df=s, tests=self.validate_df_dict, flow=self)
-            validation_task.set_upstream(s, flow=self)
+            validation_task = validate_df(df=df, tests=self.validate_df_dict, flow=self)
+            validation_task.set_upstream(df, flow=self)
 
-        df_with_metadata = add_ingestion_metadata_task.bind(s, flow=self)
+        df_with_metadata = add_ingestion_metadata_task.bind(df, flow=self)
         dtypes_dict = df_get_data_types_task.bind(df_with_metadata, flow=self)
         df_mapped = df_map_mixed_dtypes_for_parquet.bind(
             df_with_metadata, dtypes_dict, flow=self
         )
-        
+
         if self.output_file_extension == ".csv":
             df_to_file = df_to_csv.bind(
                 df=df_with_metadata,
                 path=self.local_file_path,
+                sep=self.sep,
                 flow=self,
             )
         else:

From 7c6b9e36094303bef3aa0cf4ef3a1316d83e0da0 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 30 Nov 2023 11:07:17 +0100
Subject: [PATCH 23/54] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Replaced=20`check=5F?=
 =?UTF-8?q?value`=20and=20`get=5Fnested=5Fdict`=20with=20one=20-=20`get=5F?=
 =?UTF-8?q?nested=5Fvalue`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/utils.py | 60 +++++++++++++++++++++++++++----------------------
 1 file changed, 33 insertions(+), 27 deletions(-)

diff --git a/viadot/utils.py b/viadot/utils.py
index 4690c8cbd..03ea95aea 100644
--- a/viadot/utils.py
+++ b/viadot/utils.py
@@ -461,37 +461,43 @@ def wrapper(*args, **kwargs) -> pd.DataFrame:
     return decorator
 
 
-def get_nested_dict(d):
-    if isinstance(d, dict):
-        for lvl in d.values():
-            if isinstance(lvl, dict):
-                return get_nested_dict(lvl)
-            else:
-                return d
-    else:
-        return None
-
-
-def check_value(base: Union[Dict, Any], levels: List) -> Union[None, Any]:
+def get_nested_value(
+    nested_dict: dict,
+    levels_to_search: List[str] = None,
+) -> Union[None, Any]:
     """
-    Task to extract data from nested json file if there is any under passed parameters.
-    Otherwise return None.
+    Retrieve a value from a nested dictionary based on specified levels if the `levels_to_search` are provided.
+    Retrieve a key:value pair of the first deepest pair if `levels_to_search` is not provided.
 
     Args:
-        base (Dict, Any): variable with base lvl of the json, for example:
-                          json_file["first_known_lvl"]["second_known_lvl"]["third_known_lvl"]
-        levels (List): List of potential lower levels of nested json for data retrieval. For example:
-                       ["first_lvl_below_base", "second_lvl_below_base", "searched_phrase"]
+        nested_dict (dict): The nested dictionary to search for the value.
+        levels_to_search (List[str], optional): List of keys representing the levels to search. Defaults to None.
+            If provided, the function will attempt to retrieve the value at the specified levels.
+            If not provided, the function will recursively search for the first non-dictionary value.
 
     Returns:
-        Union[None, Any]: Searched value for the lowest level, in example data under "searched_phrase" key.
+        Union[None, Any]: The searched value for the specified level or the first key:value pair when
+            first non-dictionary value found during recursive search.
+            Returns None if the nested_dict is not a dictionary or if the specified levels are not found.
     """
-
-    for lvl in levels:
-        if isinstance(base, dict):
-            base = base.get(lvl)
-            if base is None:
-                return None
+    try:
+        if levels_to_search is not None:
+            for lvl in levels_to_search:
+                if isinstance(nested_dict[lvl], dict):
+                    return get_nested_value(
+                        nested_dict=nested_dict[levels_to_search.pop(0)],
+                        levels_to_search=levels_to_search,
+                    )
+                else:
+                    return nested_dict[lvl]
         else:
-            return base
-    return base
+            for lvl in nested_dict.values():
+                if isinstance(lvl, dict):
+                    return get_nested_value(nested_dict=lvl)
+                else:
+                    return nested_dict
+    except KeyError as e:
+        return None
+    except TypeError as e:
+        logger.error(f"The 'nested_dict' must be a dictionary. {e}")
+        return None

From 102fc933d27f79c723c03ed3f67440f23cd4b5b3 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 30 Nov 2023 11:09:01 +0100
Subject: [PATCH 24/54] =?UTF-8?q?=F0=9F=90=9B=20Added=20tests=20for=20`tes?=
 =?UTF-8?q?t=5Fnested=5Fvalue`=20function?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/unit/test_utils.py | 124 +++++++++++++++++++++------------------
 1 file changed, 66 insertions(+), 58 deletions(-)

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 0b2ed5782..e0177b3ea 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -11,7 +11,8 @@
     add_viadot_metadata_columns,
     check_if_empty_file,
     gen_bulk_insert_query_from_df,
-    check_value,
+    get_flow_last_run_date,
+    get_nested_value,
     get_sql_server_table_dtypes,
     slugify,
     handle_api_response,
@@ -52,6 +53,27 @@ def azure_sql():
     yield azure_sql
 
 
+@pytest.fixture(scope="function")
+def nested_dict():
+    nested_dict = {
+        "first_known_lvl": {
+            "second_known_lvl": {
+                "third_known_lvl": {
+                    "searched_lvl": {
+                        "searched_phrase_1": "First value",
+                        "searched_phrase_2": None,
+                        "searched_phrase_3": "Found it!",
+                    }
+                }
+            }
+        },
+        "first_known_lvl_2": {
+            "second_known_lvl_2": {"searched_phrase_2": "Found it_2!"}
+        },
+    }
+    return nested_dict
+
+
 def test_slugify():
     """To test slugify() function functionalities work"""
     test_string = "Text With Spaces Before Changes"
@@ -209,63 +231,6 @@ def test_add_viadot_metadata_columns_with_parameter():
     assert df_decorated["_viadot_source"][0] == "Source_name"
 
 
-def test_check_value_found():
-    """Sample test checking the correctness of the function when the key is found."""
-    json_data = {
-        "first_known_lvl": {
-            "second_known_lvl": {"third_known_lvl": {"searched_phrase": "phrase"}}
-        }
-    }
-    result = check_value(
-        json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"],
-        ["searched_phrase"],
-    )
-    assert result == "phrase"
-
-
-def test_check_value_not_found():
-    """Sample test checking the correctness of the function when the key is not found."""
-    json_data = {
-        "first_known_lvl": {
-            "second_known_lvl": {
-                "third_known_lvl": {"other_phrase": "This won't be found"}
-            }
-        }
-    }
-    result = check_value(
-        json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"],
-        ["searched_phrase"],
-    )
-    assert result is None
-
-
-def test_check_value_empty_dict():
-    """Sample test checking the correctness of the function with an empty dictionary."""
-    json_data = {}
-    result = check_value(json_data, ["searched_phrase"])
-    assert result is None
-
-
-def test_check_value_nonexistent_key():
-    """Sample test checking the correctness of the function with a nonexistent key."""
-
-    json_data = {
-        "first_known_lvl": {
-            "second_known_lvl": {"third_known_lvl": {"searched_phrase": "phrase"}}
-        }
-    }
-    result = check_value(json_data, ["nonexistent_key"])
-    assert result is None
-
-
-def test_check_value_base_is_not_dict():
-    result = check_value(
-        base="this_is_not_dict",
-        levels=["searched_phrase"],
-    )
-    assert result == "this_is_not_dict"
-
-
 def test_handle_api_response_wrong_method():
     """Test to check if ValueError is thrown when wrong method is used."""
 
@@ -332,3 +297,46 @@ def test_union_dict_return():
     unioned_dict = union_dict(a, b)
     assert isinstance(unioned_dict, dict)
     assert unioned_dict == {"a": 1, "b": 2}
+
+
+def test_get_nested_value_found(nested_dict):
+    """Sample test checking the correctness of the function when the key is found."""
+    result = get_nested_value(
+        nested_dict=nested_dict["first_known_lvl"]["second_known_lvl"][
+            "third_known_lvl"
+        ],
+        levels_to_search=["searched_lvl", "searched_phrase_3"],
+    )
+    assert result == "Found it!"
+
+
+def test_get_nested_value_not_found(nested_dict):
+    """Sample test checking the correctness of the function when the key is not found."""
+    result = get_nested_value(
+        nested_dict["first_known_lvl"]["second_known_lvl"]["third_known_lvl"],
+        levels_to_search=["searched_wrong_lvl"],
+    )
+    assert result is None
+
+
+def test_get_nested_value_nested_dict_is_string(caplog):
+    """Sample test checking the correctness of the function when non-dictionary value is provided as nested_dict."""
+    with caplog.at_level(logging.WARNING):
+        get_nested_value(
+            nested_dict="this_is_not_dict",
+            levels_to_search=["searched_phrase"],
+        )
+        assert "The 'nested_dict' must be a dictionary." in caplog.text
+
+
+def test_get_nested_value_without_levels(nested_dict):
+    """Sample test checking the correctness of the function when only `nested_value` is provided."""
+    result_1 = get_nested_value(nested_dict=nested_dict)
+    result_2 = get_nested_value(nested_dict=nested_dict["first_known_lvl_2"])
+
+    assert result_1 == {
+        "searched_phrase_1": "First value",
+        "searched_phrase_2": None,
+        "searched_phrase_3": "Found it!",
+    }
+    assert result_2 == {"searched_phrase_2": "Found it_2!"}

From 895671638c8766421a9d5ecfedc8fa776886e628 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 30 Nov 2023 11:10:06 +0100
Subject: [PATCH 25/54] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Changed=20funtion=20?=
 =?UTF-8?q?from=20`get=5Fnested=5Fdict`=20to=20`get=5Fnested=5Fvalue`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/sharepoint.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/viadot/sources/sharepoint.py b/viadot/sources/sharepoint.py
index fbbd1b08b..08e616326 100644
--- a/viadot/sources/sharepoint.py
+++ b/viadot/sources/sharepoint.py
@@ -10,7 +10,7 @@
 from office365.sharepoint.client_context import ClientContext
 from prefect.utilities import logging
 
-from viadot.utils import get_nested_dict
+from viadot.utils import get_nested_value
 
 from ..config import local_config
 from ..exceptions import CredentialError
@@ -168,7 +168,7 @@ def _unpack_fields(
         item_values_dict = list_item.properties
         if item_values_dict:
             for field, val in item_values_dict.items():
-                nested_dict = get_nested_dict(val)
+                nested_dict = get_nested_value(val)
                 # Check if the values are nested
                 if nested_dict != None:
                     # Check if field has expandable type

From dfacdfab48b098ba306a22dfbaf6e5dec1ca29d8 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 30 Nov 2023 11:33:24 +0100
Subject: [PATCH 26/54] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Changed=20funtion=20?=
 =?UTF-8?q?from=20`check=5Fvalue`=20to=20`get=5Fnested=5Fvalue`=20in=20gen?=
 =?UTF-8?q?esys?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/tasks/genesys.py | 50 +++++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 942249ac2..0b9d803b8 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -13,7 +13,7 @@
 
 from viadot.exceptions import APIError
 from viadot.sources import Genesys
-from viadot.utils import check_value
+from viadot.utils import get_nested_value
 from viadot.task_utils import *
 
 logger = logging.get_logger()
@@ -537,31 +537,43 @@ def run(
                 # For loop to extract data from specific page
                 for id in range(0, num_ids):
                     record_dict = {}
-                    record_dict["Id"] = check_value(json_file["entities"][id], ["id"])
-                    record_dict["Name"] = check_value(
-                        json_file["entities"][id], ["name"]
+                    record_dict["Id"] = get_nested_value(
+                        nested_dict=json_file["entities"][id], levels_to_search=["id"]
                     )
-                    record_dict["DivisionName"] = check_value(
-                        json_file["entities"][id], ["division", "name"]
+                    record_dict["Name"] = get_nested_value(
+                        nested_dict=json_file["entities"][id], levels_to_search=["name"]
                     )
-                    record_dict["Email"] = check_value(
-                        json_file["entities"][id], ["email"]
+                    record_dict["DivisionName"] = get_nested_value(
+                        nested_dict=json_file["entities"][id],
+                        levels_to_search=["division", "name"],
                     )
-                    record_dict["State"] = check_value(
-                        json_file["entities"][id], ["state"]
+                    record_dict["Email"] = get_nested_value(
+                        nested_dict=json_file["entities"][id],
+                        levels_to_search=["email"],
                     )
-                    record_dict["Title"] = check_value(
-                        json_file["entities"][id], ["title"]
+                    record_dict["State"] = get_nested_value(
+                        nested_dict=json_file["entities"][id],
+                        levels_to_search=["state"],
                     )
-                    record_dict["Username"] = check_value(
-                        json_file["entities"][id], ["username"]
+                    record_dict["Title"] = get_nested_value(
+                        nested_dict=json_file["entities"][id],
+                        levels_to_search=["title"],
                     )
-                    record_dict["SystemPresence"] = check_value(
-                        json_file["entities"][id],
-                        ["presence", "presenceDefinition", "systemPresence"],
+                    record_dict["Username"] = get_nested_value(
+                        nested_dict=json_file["entities"][id],
+                        levels_to_search=["username"],
                     )
-                    record_dict["DateLastLogin"] = check_value(
-                        json_file["entities"][id], ["dateLastLogin"]
+                    record_dict["SystemPresence"] = get_nested_value(
+                        nested_dict=json_file["entities"][id],
+                        levels_to_search=[
+                            "presence",
+                            "presenceDefinition",
+                            "systemPresence",
+                        ],
+                    )
+                    record_dict["DateLastLogin"] = get_nested_value(
+                        nested_dict=json_file["entities"][id],
+                        levels_to_search=["dateLastLogin"],
                     )
 
                     data_list.append(record_dict)

From 416ca6e85b155cceb69fbaa1935b248833a1a96d Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Fri, 1 Dec 2023 10:35:05 +0100
Subject: [PATCH 27/54] Changed dosc string for new class arguments

---
 viadot/flows/genesys_to_adls.py | 13 +++++++++++++
 viadot/tasks/genesys.py         | 26 ++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/viadot/flows/genesys_to_adls.py b/viadot/flows/genesys_to_adls.py
index 1cebe5a65..29ec1277a 100644
--- a/viadot/flows/genesys_to_adls.py
+++ b/viadot/flows/genesys_to_adls.py
@@ -140,7 +140,20 @@ def __init__(
             report_columns (List[str], optional): List of exisiting column in report. Defaults to None.
             conversationId_list (List[str], optional): List of conversationId passed as attribute of GET method. Defaults to None.
             mapping_dict (dict, optional): Mapping dictionary from user in json format. Defaults to None.
+                Example of mapping_dict:
+                mapping_dict = {
+                    "col1": "column1",
+                    "col_3": "column3",
+                    "colum2": "column2",
+                }
+                where keys in dictionary mapping_dict are current DataFrame columns names.
             columns_order (List, optional): Columns order list to change column order inside pd.DataFrame. Defaults to None.
+                Example of columns_order:
+                columns_order = [
+                    "column1",
+                    "column2",
+                    "column3",
+                ]
             key_list (List[str], optional): List of keys needed to specify the columns in the GET request method. Defaults to None.
             local_file_path (str, optional): The local path from which to upload the file(s). Defaults to "".
             adls_file_path (str, optional): The destination path at ADLS. Defaults to None.
diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 88014a6be..d974dd587 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -59,7 +59,20 @@ def __init__(
             sep (str, optional): Separator in csv file. Defaults to "\t".
             conversationId_list (List[str], optional): List of conversationId passed as attribute of GET method. Defaults to None.
             mapping_dict (dict, optional): Mapping dictionary from user in json format. Defaults to None.
+                Example of mapping_dict:
+                mapping_dict = {
+                    "col1": "column1",
+                    "col_3": "column3",
+                    "colum2": "column2",
+                }
+                where keys in dictionary mapping_dict are current DataFrame columns names.
             columns_order (List, optional): Columns order list to change column order inside pd.DataFrame. Defaults to None.
+                Example of columns_order:
+                columns_order = [
+                    "column1",
+                    "column2",
+                    "column3",
+                ]
             key_list (List[str], optional): List of keys needed to specify the columns in the GET request method. Defaults to None.
             validate_df_dict (Dict[str,Any], optional): A dictionary with optional list of tests to verify the output dataframe. If defined, triggers
                 the `validate_df` task from task_utils. Defaults to None.
@@ -345,7 +358,20 @@ def run(
             report_columns (List[str], optional): List of exisiting column in report. Defaults to None.
             conversationId_list (List[str], optional): List of conversationId passed as attribute of GET method. Defaults to None.
             mapping_dict (dict, optional): Mapping dictionary from user in json format. Defaults to None.
+                Example of mapping_dict:
+                mapping_dict = {
+                    "col1": "column1",
+                    "col_3": "column3",
+                    "colum2": "column2",
+                }
+                where keys in dictionary mapping_dict are current DataFrame columns names.
             columns_order (List, optional): Columns order list to change column order inside pd.DataFrame. Defaults to None.
+                Example of columns_order:
+                columns_order = [
+                    "column1",
+                    "column2",
+                    "column3",
+                ]
             key_list (List[str], optional): List of keys needed to specify the columns in the GET request method. Defaults to None.
             validate_df_dict (Dict[str,Any], optional): A dictionary with optional list of tests to verify the output dataframe. If defined, triggers
                 the `validate_df` task from task_utils. Defaults to None.

From b6cfd413cc9448fdf28f603e98bbeaa919538a15 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Fri, 1 Dec 2023 11:54:20 +0100
Subject: [PATCH 28/54] update for sharepoint list to df with function for
 checking df

---
 viadot/flows/sharepoint_to_adls.py | 119 +++++++++++++++--------------
 1 file changed, 60 insertions(+), 59 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 5bca3cc8e..2c233eaf8 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -3,11 +3,12 @@
 from typing import Any, Dict, List
 
 import pendulum
-from prefect import Flow, task
+from prefect import Flow, task, case
+from prefect.engine.state import Failed
+from prefect.engine.runner import ENDRUN
 from typing import Literal
 from prefect.backend import set_key_value
 from prefect.utilities import logging
-from prefect.engine.state import Finished
 
 from viadot.task_utils import (
     add_ingestion_metadata_task,
@@ -190,20 +191,19 @@ def slugify(name):
 
 
 @task(slug="check_df")
-def check_if_df_empty(df, if_no_data_returned: str = "skip"):
+def check_if_df_empty(df, if_no_data_returned: str = "fail"):
     # -> to task.utils
-    class NoDataReturnedError(Exception):
+    class NoDataReturnedError(BaseException):
         def __init__(self, message):
             self.message = message
 
-    if len(df.index) == 0:
-        if if_no_data_returned == "skip":
-            logger.info("No data in the source response. Df empty.")
-        elif if_no_data_returned == "warn":
+    if df.empty:
+        if if_no_data_returned == "warn":
             logger.warning("No data in the source response. Df empty.")
+            return True
+            # raise ENDRUN(state=Failed("Failed task raised"))
         elif if_no_data_returned == "fail":
-            raise NoDataReturnedError("No data in the source response. Df empty.")
-        return True
+            raise NoDataReturnedError("No data in the source response. Df empty...")
 
 
 class SharepointListToADLS(Flow):
@@ -352,63 +352,64 @@ def gen_flow(self) -> Flow:
             row_count=self.row_count,
             credentials_secret=self.sp_cert_credentials_secret,
         )
-        df_empty = check_if_df_empty.bind(df, self.if_no_data_returned)
 
-        if df_empty:
-            if self.if_no_data_returned == "warn":
-                raise Finished(
-                    "Flow finished because there is no new data for ingestion."
-                )
-
-        if self.validate_df_dict:
-            validation_task = validate_df(df=df, tests=self.validate_df_dict, flow=self)
-            validation_task.set_upstream(df, flow=self)
-
-        df_with_metadata = add_ingestion_metadata_task.bind(df, flow=self)
-        dtypes_dict = df_get_data_types_task.bind(df_with_metadata, flow=self)
-        df_mapped = df_map_mixed_dtypes_for_parquet.bind(
-            df_with_metadata, dtypes_dict, flow=self
-        )
+        if self.if_no_data_returned != "skip":
+            df_empty = check_if_df_empty.bind(df, self.if_no_data_returned, flow=self)
+            # If df empty there is no reason to run other tasks
+        else:
+            df_empty = False
 
-        df_to_file = df_to_parquet.bind(
-            df=df_mapped,
-            path=self.path,
-            flow=self,
-        )
+        with case(df_empty, False):
+            if self.validate_df_dict:
+                validation_task = validate_df(
+                    df=df, tests=self.validate_df_dict, flow=self
+                )
+                validation_task.set_upstream(df, flow=self)
 
-        file_to_adls_task = AzureDataLakeUpload()
-        file_to_adls_task.bind(
-            from_path=self.path,
-            to_path=self.adls_dir_path,
-            overwrite=self.overwrite,
-            sp_credentials_secret=self.adls_sp_credentials_secret,
-            flow=self,
-        )
+            df_with_metadata = add_ingestion_metadata_task.bind(df, flow=self)
+            dtypes_dict = df_get_data_types_task.bind(df_with_metadata, flow=self)
+            df_mapped = df_map_mixed_dtypes_for_parquet.bind(
+                df_with_metadata, dtypes_dict, flow=self
+            )
 
-        dtypes_to_json_task.bind(
-            dtypes_dict=dtypes_dict, local_json_path=self.local_json_path, flow=self
-        )
+            df_to_file = df_to_parquet.bind(
+                df=df_mapped,
+                path=self.path,
+                flow=self,
+            )
 
-        json_to_adls_task = AzureDataLakeUpload()
-        json_to_adls_task.bind(
-            from_path=self.local_json_path,
-            to_path=self.adls_schema_file_dir_file,
-            overwrite=self.overwrite,
-            sp_credentials_secret=self.adls_sp_credentials_secret,
-            flow=self,
-        )
+            file_to_adls_task = AzureDataLakeUpload()
+            file_to_adls_task.bind(
+                from_path=self.path,
+                to_path=self.adls_dir_path,
+                overwrite=self.overwrite,
+                sp_credentials_secret=self.adls_sp_credentials_secret,
+                flow=self,
+            )
 
-        if self.validate_df_dict:
-            df_with_metadata.set_upstream(validation_task, flow=self)
+            dtypes_to_json_task.bind(
+                dtypes_dict=dtypes_dict, local_json_path=self.local_json_path, flow=self
+            )
 
-        df_mapped.set_upstream(df_with_metadata, flow=self)
-        dtypes_to_json_task.set_upstream(df_mapped, flow=self)
-        df_to_file.set_upstream(dtypes_to_json_task, flow=self)
+            json_to_adls_task = AzureDataLakeUpload()
+            json_to_adls_task.bind(
+                from_path=self.local_json_path,
+                to_path=self.adls_schema_file_dir_file,
+                overwrite=self.overwrite,
+                sp_credentials_secret=self.adls_sp_credentials_secret,
+                flow=self,
+            )
 
-        file_to_adls_task.set_upstream(df_to_file, flow=self)
-        json_to_adls_task.set_upstream(dtypes_to_json_task, flow=self)
-        if self.set_prefect_kv == True:
-            set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
+            if self.validate_df_dict:
+                df_with_metadata.set_upstream(validation_task, flow=self)
+            dtypes_dict.set_upstream(df_with_metadata, flow=self)
+            df_mapped.set_upstream(df_with_metadata, flow=self)
+            dtypes_to_json_task.set_upstream(df_mapped, flow=self)
+            df_to_file.set_upstream(dtypes_to_json_task, flow=self)
+            file_to_adls_task.set_upstream(df_to_file, flow=self)
+            json_to_adls_task.set_upstream(dtypes_to_json_task, flow=self)
+            if self.set_prefect_kv == True:
+                set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
 
     @staticmethod
     def slugify(name):

From 9c260505e197c60a25f7717e481c014617154117 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Sat, 2 Dec 2023 16:47:23 +0100
Subject: [PATCH 29/54] changed with case in sharepoint to adls

---
 viadot/flows/sharepoint_to_adls.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 2c233eaf8..b2d6d22cb 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -204,6 +204,10 @@ def __init__(self, message):
             # raise ENDRUN(state=Failed("Failed task raised"))
         elif if_no_data_returned == "fail":
             raise NoDataReturnedError("No data in the source response. Df empty...")
+        elif if_no_data_returned == "skip":
+            return False
+    else:
+        return False
 
 
 class SharepointListToADLS(Flow):
@@ -353,11 +357,7 @@ def gen_flow(self) -> Flow:
             credentials_secret=self.sp_cert_credentials_secret,
         )
 
-        if self.if_no_data_returned != "skip":
-            df_empty = check_if_df_empty.bind(df, self.if_no_data_returned, flow=self)
-            # If df empty there is no reason to run other tasks
-        else:
-            df_empty = False
+        df_empty = check_if_df_empty.bind(df, self.if_no_data_returned, flow=self)
 
         with case(df_empty, False):
             if self.validate_df_dict:

From 371223b8d4d50405e0397e684ff596f030fdd80f Mon Sep 17 00:00:00 2001
From: cgildenia <cgildenia@dyvenia.com>
Date: Mon, 4 Dec 2023 13:23:28 +0100
Subject: [PATCH 30/54] =?UTF-8?q?=E2=9C=85=20added=20tests=20for=20sharepo?=
 =?UTF-8?q?int=20list?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../flows/test_sharepoint_to_adls.py          | 72 ++++++++++++++++++-
 1 file changed, 70 insertions(+), 2 deletions(-)

diff --git a/tests/integration/flows/test_sharepoint_to_adls.py b/tests/integration/flows/test_sharepoint_to_adls.py
index b3019bd67..51a1c1956 100644
--- a/tests/integration/flows/test_sharepoint_to_adls.py
+++ b/tests/integration/flows/test_sharepoint_to_adls.py
@@ -6,15 +6,18 @@
 import pytest
 from prefect.tasks.secrets import PrefectSecret
 
-from viadot.flows import SharepointToADLS
+from viadot.flows import SharepointToADLS, SharepointListToADLS
 from viadot.tasks import AzureDataLakeRemove
 
-ADLS_FILE_NAME = str(pendulum.now("utc")) + ".csv"
+ADLS_FILE_NAME = pendulum.now("utc").strftime("%Y-%m-%d_%H:%M:%S_%Z%z")
 ADLS_DIR_PATH = "raw/tests/"
 CREDENTIALS_SECRET = PrefectSecret("AZURE_DEFAULT_ADLS_SERVICE_PRINCIPAL_SECRET").run()
 DATA = {"country": [1, 2], "sales": [3, 4]}
 
 
+SharepointToADLS
+
+
 @mock.patch(
     "viadot.tasks.SharepointToDF.run",
     return_value=pd.DataFrame(data=DATA),
@@ -73,3 +76,68 @@ def test_sharepoint_to_adls_run_flow_overwrite_false(mocked_class):
     assert result.is_failed()
     os.remove("test_sharepoint_to_adls_run_flow_overwrite_false.csv")
     os.remove("test_sharepoint_to_adls_run_flow_overwrite_false.json")
+
+
+# SharepointListToADLS
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_csv(mocked_class):
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow",
+        output_file_extension=".csv",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME,
+        list_title="",
+        site_url="",
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove(ADLS_FILE_NAME + ".csv")
+    os.remove("test_sharepoint_to_adls_run_flow.json")
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_parquet(mocked_class):
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow",
+        output_file_extension=".parquet",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME,
+        list_title="",
+        site_url="",
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove(ADLS_FILE_NAME + ".parquet")
+    os.remove("test_sharepoint_to_adls_run_flow.json")
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_overwrite_true(mocked_class):
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow_overwrite_true",
+        output_file_extension=".csv",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME,
+        overwrite_adls=True,
+        list_title="",
+        site_url="",
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove(ADLS_FILE_NAME + ".csv")
+    os.remove("test_sharepoint_to_adls_run_flow_overwrite_true.json")

From c0ae042be3b8685109b37b1cd65a29bb75efc77e Mon Sep 17 00:00:00 2001
From: adrian-wojcik <wan.ext@velux.com>
Date: Mon, 4 Dec 2023 13:32:59 +0100
Subject: [PATCH 31/54] =?UTF-8?q?=F0=9F=90=9B=20Added=20warning=20logger?=
 =?UTF-8?q?=20for=20credential?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/sap_rfc.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py
index 6432ac8e8..f39fee297 100644
--- a/viadot/sources/sap_rfc.py
+++ b/viadot/sources/sap_rfc.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pandas as pd
 from prefect.utilities import logging
+from prefect.engine.state import Failed
 
 try:
     import pyrfc
@@ -257,7 +258,14 @@ def __init__(
 
         self._con = None
         DEFAULT_CREDENTIALS = local_config.get("SAP").get("DEV")
-        credentials = kwargs.pop("credentials", None) or DEFAULT_CREDENTIALS
+
+        credentials = kwargs.pop("credentials", None)
+        if credentials is None:
+            credentials = DEFAULT_CREDENTIALS
+            logger.warning(
+                "WARNING!!! Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
+            )
+
         if credentials is None:
             raise CredentialError("Missing credentials.")
 

From a999d91516b63647b593351629998bbfdb5e3096 Mon Sep 17 00:00:00 2001
From: adrian-wojcik <wan.ext@velux.com>
Date: Mon, 4 Dec 2023 13:40:35 +0100
Subject: [PATCH 32/54] Add changes to changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ef880c75..2ba7a52ea 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 ### Changed
-
+- Changed __init__ in SAPRFC class in source in order to raise warning in prefect when credentials will be taken from DEV.
 
 ## [0.4.22] - 2023-11-15
 ### Added

From d69733b631ae78c48167b2cef0f5ed6a76e253b8 Mon Sep 17 00:00:00 2001
From: adrian-wojcik <wan.ext@velux.com>
Date: Mon, 4 Dec 2023 13:58:32 +0100
Subject: [PATCH 33/54] =?UTF-8?q?=F0=9F=8E=A8=20Delete=20"WARNING!!!"=20fr?=
 =?UTF-8?q?om=20warning=20message?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/sap_rfc.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py
index f39fee297..806e61250 100644
--- a/viadot/sources/sap_rfc.py
+++ b/viadot/sources/sap_rfc.py
@@ -263,7 +263,7 @@ def __init__(
         if credentials is None:
             credentials = DEFAULT_CREDENTIALS
             logger.warning(
-                "WARNING!!! Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
+                "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
             )
 
         if credentials is None:

From 5e4fa56a09049af109d4fd26f41c405d78b410ec Mon Sep 17 00:00:00 2001
From: gwieloch <mwieloch@dyvenia.com>
Date: Mon, 4 Dec 2023 15:17:24 +0100
Subject: [PATCH 34/54] added conn.close after each session to sappw

---
 viadot/sources/sap_bw.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/viadot/sources/sap_bw.py b/viadot/sources/sap_bw.py
index 94e3347a9..90b70dfec 100644
--- a/viadot/sources/sap_bw.py
+++ b/viadot/sources/sap_bw.py
@@ -1,7 +1,6 @@
 import textwrap
 from typing import List
-
-from pyrfc import Connection
+import pyrfc
 
 from viadot.exceptions import CredentialError, ValidationError
 from viadot.sources.base import Source
@@ -31,14 +30,15 @@ def __init__(self, credentials: dict, *args, **kwargs):
 
         super().__init__(*args, credentials=credentials, **kwargs)
 
-    def get_connection(self) -> Connection:
+    def get_connection(self) -> pyrfc.Connection:
         """
         Function to create the connection with SAP BW.
 
         Returns:
             Connection: Connection to SAP.
         """
-        return Connection(
+
+        return pyrfc.Connection(
             ashost=self.credentials.get("ashost"),
             sysnr=self.credentials.get("sysnr"),
             user=self.credentials.get("user"),
@@ -126,5 +126,6 @@ def get_output_data(self, mdx_query: str) -> dict:
 
         datasetid = properties["DATASETID"]
         query_output = conn.call("RSR_MDX_GET_FLAT_DATA", DATASETID=datasetid)
+        conn.close()  # close connection after full session
 
         return query_output

From b08dbfffc5e129a32a9ab7fd2ab39d6224bcb0ca Mon Sep 17 00:00:00 2001
From: cgildenia <cgildenia@dyvenia.com>
Date: Mon, 4 Dec 2023 16:05:00 +0100
Subject: [PATCH 35/54] =?UTF-8?q?=E2=9C=A8=20list=20extension=20is=20now?=
 =?UTF-8?q?=20a=20literal?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/flows/sharepoint_to_adls.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 787c78436..79b511c53 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -1,6 +1,6 @@
 import os
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Literal
 
 import pendulum
 from prefect import Flow
@@ -65,7 +65,6 @@ def __init__(
             Defaults to None.
             overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to False.
             if_empty (str, optional): What to do if query returns no data. Defaults to "warn".
-            if_exists (str, optional): What to do if the file already exists. Defaults to "replace".
             validate_df_dict (dict, optional): A dictionary with optional list of tests to verify the output
             dataframe. If defined, triggers the `validate_df` task from task_utils. Defaults to None.
             timeout(int, optional): The amount of time (in seconds) to wait while running this task before
@@ -204,7 +203,7 @@ def __init__(
         sp_cert_credentials_secret: str = None,
         vault_name: str = None,
         overwrite_adls: bool = True,
-        output_file_extension: str = ".parquet",
+        output_file_extension: Literal[".parquet", ".csv"] = ".parquet",
         sep: str = "\t",
         validate_df_dict: dict = None,
         set_prefect_kv: bool = False,
@@ -263,7 +262,7 @@ def __init__(
                                     If not passed it will take cred's from your .config/credentials.json Default to None.
             vault_name (str, optional): KeyVaultSecret name. Default to None.
             overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to True.
-            output_file_extension (str, optional): Extension of the resulting file to be stored. Defaults to ".parquet".
+            output_file_extension (str, optional): Extension of the resulting file to be stored, either ".csv" or ".parquet". Defaults to ".parquet".
             sep (str, optional): The separator to use in the CSV. Defaults to "\t".
             validate_df_dict (dict, optional): Whether to do an extra df validation before ADLS upload or not to do. Defaults to None.
             set_prefect_kv (bool, optional): Whether to do key-value parameters in KV Store or not. Defaults to False.
@@ -348,12 +347,14 @@ def gen_flow(self) -> Flow:
                 sep=self.sep,
                 flow=self,
             )
-        else:
+        elif self.output_file_extension == ".parquet":
             df_to_file = df_to_parquet.bind(
                 df=df_mapped,
                 path=self.local_file_path,
                 flow=self,
             )
+        else:
+            raise ValueError("Output file extension can only be '.csv' or '.parquet'")
 
         file_to_adls_task = AzureDataLakeUpload()
         file_to_adls_task.bind(

From 8540e19f585f3422167354a3d2568031e2bd91a3 Mon Sep 17 00:00:00 2001
From: cgildenia <cgildenia@dyvenia.com>
Date: Mon, 4 Dec 2023 16:25:28 +0100
Subject: [PATCH 36/54] =?UTF-8?q?=E2=9C=85=20added=20wrong=20extension=20t?=
 =?UTF-8?q?est?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../flows/test_sharepoint_to_adls.py          | 40 ++++++++++++++-----
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/tests/integration/flows/test_sharepoint_to_adls.py b/tests/integration/flows/test_sharepoint_to_adls.py
index 51a1c1956..5603c49ee 100644
--- a/tests/integration/flows/test_sharepoint_to_adls.py
+++ b/tests/integration/flows/test_sharepoint_to_adls.py
@@ -9,15 +9,14 @@
 from viadot.flows import SharepointToADLS, SharepointListToADLS
 from viadot.tasks import AzureDataLakeRemove
 
-ADLS_FILE_NAME = pendulum.now("utc").strftime("%Y-%m-%d_%H:%M:%S_%Z%z")
+ADLS_FILE_NAME = str(pendulum.now("utc")) + ".csv"
+ADLS_FILE_NAME_LIST = pendulum.now("utc").strftime("%Y-%m-%d_%H:%M:%S_%Z%z")
 ADLS_DIR_PATH = "raw/tests/"
 CREDENTIALS_SECRET = PrefectSecret("AZURE_DEFAULT_ADLS_SERVICE_PRINCIPAL_SECRET").run()
 DATA = {"country": [1, 2], "sales": [3, 4]}
 
 
-SharepointToADLS
-
-
+# SharepointToADLS
 @mock.patch(
     "viadot.tasks.SharepointToDF.run",
     return_value=pd.DataFrame(data=DATA),
@@ -72,7 +71,6 @@ def test_sharepoint_to_adls_run_flow_overwrite_false(mocked_class):
         overwrite_adls=False,
     )
     result = flow.run()
-
     assert result.is_failed()
     os.remove("test_sharepoint_to_adls_run_flow_overwrite_false.csv")
     os.remove("test_sharepoint_to_adls_run_flow_overwrite_false.json")
@@ -90,13 +88,13 @@ def test_sharepoint_list_to_adls_run_flow_csv(mocked_class):
         output_file_extension=".csv",
         adls_sp_credentials_secret=CREDENTIALS_SECRET,
         adls_dir_path=ADLS_DIR_PATH,
-        file_name=ADLS_FILE_NAME,
+        file_name=ADLS_FILE_NAME_LIST,
         list_title="",
         site_url="",
     )
     result = flow.run()
     assert result.is_successful()
-    os.remove(ADLS_FILE_NAME + ".csv")
+    os.remove(ADLS_FILE_NAME_LIST + ".csv")
     os.remove("test_sharepoint_to_adls_run_flow.json")
 
 
@@ -111,16 +109,36 @@ def test_sharepoint_list_to_adls_run_flow_parquet(mocked_class):
         output_file_extension=".parquet",
         adls_sp_credentials_secret=CREDENTIALS_SECRET,
         adls_dir_path=ADLS_DIR_PATH,
-        file_name=ADLS_FILE_NAME,
+        file_name=ADLS_FILE_NAME_LIST,
         list_title="",
         site_url="",
     )
     result = flow.run()
     assert result.is_successful()
-    os.remove(ADLS_FILE_NAME + ".parquet")
+    os.remove(ADLS_FILE_NAME_LIST + ".parquet")
     os.remove("test_sharepoint_to_adls_run_flow.json")
 
 
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_wrong_extension(mocked_class):
+    with pytest.raises(ValueError) as exc:
+        flow = SharepointListToADLS(
+            "test_sharepoint_to_adls_run_flow",
+            output_file_extension=".s",
+            adls_sp_credentials_secret=CREDENTIALS_SECRET,
+            adls_dir_path=ADLS_DIR_PATH,
+            file_name=ADLS_FILE_NAME_LIST,
+            list_title="",
+            site_url="",
+        )
+        result = flow.run()
+    assert "Output file extension can only be '.csv' or '.parquet'" in str(exc.value)
+
+
 @mock.patch(
     "viadot.tasks.SharepointListToDF.run",
     return_value=pd.DataFrame(data=DATA),
@@ -132,12 +150,12 @@ def test_sharepoint_list_to_adls_run_flow_overwrite_true(mocked_class):
         output_file_extension=".csv",
         adls_sp_credentials_secret=CREDENTIALS_SECRET,
         adls_dir_path=ADLS_DIR_PATH,
-        file_name=ADLS_FILE_NAME,
+        file_name=ADLS_FILE_NAME_LIST,
         overwrite_adls=True,
         list_title="",
         site_url="",
     )
     result = flow.run()
     assert result.is_successful()
-    os.remove(ADLS_FILE_NAME + ".csv")
+    os.remove(ADLS_FILE_NAME_LIST + ".csv")
     os.remove("test_sharepoint_to_adls_run_flow_overwrite_true.json")

From 91e9d0bbeb5ace04806801798bdff36aac48f36b Mon Sep 17 00:00:00 2001
From: adrian-wojcik <wan.ext@velux.com>
Date: Tue, 5 Dec 2023 10:01:27 +0100
Subject: [PATCH 37/54] =?UTF-8?q?=F0=9F=8E=A8=20Change=20structure=20of=20?=
 =?UTF-8?q?'if'=20instruction=20and=20added=20to=20SAPRFCV2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/sap_rfc.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py
index 806e61250..16cd5483d 100644
--- a/viadot/sources/sap_rfc.py
+++ b/viadot/sources/sap_rfc.py
@@ -7,7 +7,6 @@
 import numpy as np
 import pandas as pd
 from prefect.utilities import logging
-from prefect.engine.state import Failed
 
 try:
     import pyrfc
@@ -262,13 +261,12 @@ def __init__(
         credentials = kwargs.pop("credentials", None)
         if credentials is None:
             credentials = DEFAULT_CREDENTIALS
+            if credentials is None:
+                raise CredentialError("Missing credentials.")
             logger.warning(
                 "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
             )
 
-        if credentials is None:
-            raise CredentialError("Missing credentials.")
-
         super().__init__(*args, credentials=credentials, **kwargs)
 
         self.sep = sep
@@ -702,9 +700,15 @@ def __init__(
 
         self._con = None
         DEFAULT_CREDENTIALS = local_config.get("SAP").get("DEV")
-        credentials = kwargs.pop("credentials", None) or DEFAULT_CREDENTIALS
+
+        credentials = kwargs.pop("credentials", None)
         if credentials is None:
-            raise CredentialError("Missing credentials.")
+            credentials = DEFAULT_CREDENTIALS
+            if credentials is None:
+                raise CredentialError("Missing credentials.")
+            logger.warning(
+                "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
+            )
 
         super().__init__(*args, credentials=credentials, **kwargs)
 

From 6334ad427bafd4ffd29d8310694bba3e5bcca4b7 Mon Sep 17 00:00:00 2001
From: adrian-wojcik <wan.ext@velux.com>
Date: Tue, 5 Dec 2023 10:07:12 +0100
Subject: [PATCH 38/54] =?UTF-8?q?=E2=9C=85=20Added=20tests=20for=20new=20f?=
 =?UTF-8?q?unctionalities=20for=20SAPRFC=20and=20SAPRFCV2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md                      |  4 ++--
 tests/integration/test_sap_rfc.py | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2ba7a52ea..68c49c1c6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,11 +6,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
-
+- Added tests for new functionalities in SAPRFC and SAPRFCV2 regarding passing credentials
 ### Fixed
 
 ### Changed
-- Changed __init__ in SAPRFC class in source in order to raise warning in prefect when credentials will be taken from DEV.
+- Changed __init__ in SAPRFC and SAPRFCV2 class in source in order to raise warning in prefect when credentials will be taken from DEV.
 
 ## [0.4.22] - 2023-11-15
 ### Added
diff --git a/tests/integration/test_sap_rfc.py b/tests/integration/test_sap_rfc.py
index 20078d312..fd2298323 100644
--- a/tests/integration/test_sap_rfc.py
+++ b/tests/integration/test_sap_rfc.py
@@ -187,3 +187,19 @@ def test___build_pandas_filter_query_v2():
         sap2._build_pandas_filter_query(sap2.client_side_filters)
         == "thirdlongcolname == 01234"
     ), sap2._build_pandas_filter_query(sap2.client_side_filters)
+    
+    
+def test_default_credentials_warning_SAPRFC(caplog):
+    _ = SAPRFC()
+    assert (
+        "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
+        in caplog.text
+    )
+
+
+def test_default_credentials_warning_SAPRFCV2(caplog):
+    _ = SAPRFCV2()
+    assert (
+        "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
+        in caplog.text
+    )

From 4f3efc96d72830e2033452d1c9110b0b072a88ff Mon Sep 17 00:00:00 2001
From: kiurieva <kiurieva@dyvenia.com>
Date: Tue, 5 Dec 2023 12:02:09 +0100
Subject: [PATCH 39/54] cleaned check_connection and get_response methods

---
 viadot/flows/vid_club_to_adls.py |  4 +-
 viadot/sources/vid_club.py       | 81 ++++++++++----------------------
 viadot/tasks/vid_club.py         |  4 +-
 3 files changed, 30 insertions(+), 59 deletions(-)

diff --git a/viadot/flows/vid_club_to_adls.py b/viadot/flows/vid_club_to_adls.py
index 40f53d8ae..de7267479 100644
--- a/viadot/flows/vid_club_to_adls.py
+++ b/viadot/flows/vid_club_to_adls.py
@@ -31,7 +31,7 @@ def __init__(
         from_date: str = "2022-03-22",
         to_date: str = None,
         items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
         days_interval: int = 30,
         cols_to_drop: List[str] = None,
         vid_club_credentials: Dict[str, Any] = None,
@@ -60,7 +60,7 @@ def __init__(
             from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
             to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
             items_per_page (int, optional): Number of entries per page. Defaults to 100.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to None (parameter is not used in url). [December 2023 status: value 'all' does not work for company and jobs]
             days_interval (int, optional): Days specified in date range per API call (test showed that 30-40 is optimal for performance). Defaults to 30.
             cols_to_drop (List[str], optional): List of columns to drop. Defaults to None.
             vid_club_credentials (Dict[str, Any], optional): Stores the credentials information. Defaults to None.
diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
index 327d9abf7..9aef751ad 100644
--- a/viadot/sources/vid_club.py
+++ b/viadot/sources/vid_club.py
@@ -58,7 +58,7 @@ def build_query(
             api_url (str): Generic part of the URL to Vid Club API.
             items_per_page (int): number of entries per page.
             source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to None (parameter is not used in url). [December 2023 status: value 'all' does not work for company and jobs]
 
         Returns:
             str: Final query with all filters added.
@@ -67,7 +67,8 @@ def build_query(
             ValidationError: If any source different than the ones in the list are used.
         """
         if source in ["jobs", "product", "company"]:
-            url = f"{api_url}{source}?from={from_date}&to={to_date}&region={region}&limit={items_per_page}"
+            region_url_string = f"&region={region}" if region else ""
+            url = f"{api_url}{source}?from={from_date}&to={to_date}{region_url_string}&limit={items_per_page}"
         elif source == "survey":
             url = f"{api_url}{source}?language=en&type=question"
         else:
@@ -141,7 +142,7 @@ def check_connection(
             from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
             to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
             items_per_page (int, optional): Number of entries per page. 100 entries by default.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to None (parameter is not used in url). [December 2023 status: value 'all' does not work for company and jobs]
             url (str, optional): Generic part of the URL to Vid Club API. Defaults to None.
 
         Returns:
@@ -161,37 +162,19 @@ def check_connection(
         if url is None:
             url = self.credentials["url"]
 
-        if source in ["jobs", "product", "company"]:
-            first_url = self.build_query(
-                source=source,
-                from_date=from_date,
-                to_date=to_date,
-                api_url=url,
-                items_per_page=items_per_page,
-            )
-            headers = self.headers
-            response = handle_api_response(
-                url=first_url, headers=headers, method="GET", verify=False
-            )
-            response = response.json()
-        elif source == "survey":
-            first_url = self.build_query(
-                source=source,
-                from_date=from_date,
-                to_date=to_date,
-                api_url=url,
-                items_per_page=items_per_page,
-                region=region,
-            )
-            headers = self.headers
-            response = handle_api_response(
-                url=first_url, headers=headers, method="GET", verify=False
-            )
-            response = response.json()
-        else:
-            raise ValidationError(
-                "Pick one these sources: jobs, product, company, survey"
-            )
+        first_url = self.build_query(
+            source=source,
+            from_date=from_date,
+            to_date=to_date,
+            api_url=url,
+            items_per_page=items_per_page,
+            region=region,
+        )
+        headers = self.headers
+        response = handle_api_response(
+            url=first_url, headers=headers, method="GET", verify=False
+        )
+        response = response.json()
         return (response, first_url)
 
     def get_response(
@@ -210,7 +193,7 @@ def get_response(
             from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
             to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
             items_per_page (int, optional): Number of entries per page. 100 entries by default.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to None (parameter is not used in url). [December 2023 status: value 'all' does not work for company and jobs]
 
         Returns:
             pd.DataFrame: Table of the data carried in the response.
@@ -225,26 +208,14 @@ def get_response(
             )
         if to_date == None:
             to_date = datetime.today().strftime("%Y-%m-%d")
-        if source in ["jobs", "product", "company"]:
-            response, first_url = self.check_connection(
-                source=source,
-                from_date=from_date,
-                to_date=to_date,
-                items_per_page=items_per_page,
-            )
 
-        elif source == "survey":
-            response, first_url = self.check_connection(
-                source=source,
-                from_date=from_date,
-                to_date=to_date,
-                items_per_page=items_per_page,
-                region=region,
-            )
-        else:
-            raise ValidationError(
-                "Pick one these sources: jobs, product, company, survey"
-            )
+        response, first_url = self.check_connection(
+            source=source,
+            from_date=from_date,
+            to_date=to_date,
+            items_per_page=items_per_page,
+            region=region,
+        )
 
         if isinstance(response, dict):
             keys_list = list(response.keys())
@@ -304,7 +275,7 @@ def total_load(
             from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
             to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
             items_per_page (int, optional): Number of entries per page. 100 entries by default.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to None (parameter is not used in url). [December 2023 status: value 'all' does not work for company and jobs]
             days_interval (int, optional): Days specified in date range per api call (test showed that 30-40 is optimal for performance). Defaults to 30.
 
         Returns:
diff --git a/viadot/tasks/vid_club.py b/viadot/tasks/vid_club.py
index 0814a306f..aba7025dc 100644
--- a/viadot/tasks/vid_club.py
+++ b/viadot/tasks/vid_club.py
@@ -85,7 +85,7 @@ def run(
         from_date: str = "2022-03-22",
         to_date: str = None,
         items_per_page: int = 100,
-        region: str = "all",
+        region: str = None,
         days_interval: int = 30,
         cols_to_drop: List[str] = None,
     ) -> pd.DataFrame:
@@ -98,7 +98,7 @@ def run(
             from_date (str, optional): Start date for the query, by default is the oldest date in the data, '2022-03-22'.
             to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
             items_per_page (int, optional): Number of entries per page. 100 entries by default.
-            region (str, optional): Region filter for the query. Valid inputs: ["bg", "hu", "hr", "pl", "ro", "si", "all"]. Defaults to "all".
+            region (str, optional): Region filter for the query. Valid inputs: ["bg", "hu", "hr", "pl", "ro", "si", "all"]. Defaults to None.
             days_interval (int, optional): Days specified in date range per api call (test showed that 30-40 is optimal for performance). Defaults to 30.
             cols_to_drop (List[str], optional): List of columns to drop. Defaults to None.
 

From 635c146116a5501d9bfe34218fe946766bf4cf09 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Tue, 5 Dec 2023 13:01:36 +0100
Subject: [PATCH 40/54] changed raise to endrun

---
 viadot/flows/sharepoint_to_adls.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index b2d6d22cb..0255d69e2 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -193,17 +193,13 @@ def slugify(name):
 @task(slug="check_df")
 def check_if_df_empty(df, if_no_data_returned: str = "fail"):
     # -> to task.utils
-    class NoDataReturnedError(BaseException):
-        def __init__(self, message):
-            self.message = message
 
     if df.empty:
         if if_no_data_returned == "warn":
             logger.warning("No data in the source response. Df empty.")
             return True
-            # raise ENDRUN(state=Failed("Failed task raised"))
         elif if_no_data_returned == "fail":
-            raise NoDataReturnedError("No data in the source response. Df empty...")
+            raise ENDRUN(state=Failed("No data in the source response. Df empty..."))
         elif if_no_data_returned == "skip":
             return False
     else:

From 48ddcd65336a60d2a8bd8fc6403aded2157398bd Mon Sep 17 00:00:00 2001
From: kiurieva <kiurieva@dyvenia.com>
Date: Tue, 5 Dec 2023 15:52:54 +0100
Subject: [PATCH 41/54] unify region parameter

---
 viadot/sources/vid_club.py | 2 +-
 viadot/tasks/vid_club.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
index 9aef751ad..fe6e76098 100644
--- a/viadot/sources/vid_club.py
+++ b/viadot/sources/vid_club.py
@@ -263,7 +263,7 @@ def total_load(
         from_date: str = "2022-03-22",
         to_date: str = None,
         items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
         days_interval: int = 30,
     ) -> pd.DataFrame:
         """
diff --git a/viadot/tasks/vid_club.py b/viadot/tasks/vid_club.py
index aba7025dc..aff0e09ea 100644
--- a/viadot/tasks/vid_club.py
+++ b/viadot/tasks/vid_club.py
@@ -85,7 +85,7 @@ def run(
         from_date: str = "2022-03-22",
         to_date: str = None,
         items_per_page: int = 100,
-        region: str = None,
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = None,
         days_interval: int = 30,
         cols_to_drop: List[str] = None,
     ) -> pd.DataFrame:

From be1a72e93ae0a3e2c90bba3780af38605f1ca0bc Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Tue, 5 Dec 2023 16:41:48 +0100
Subject: [PATCH 42/54] Moved tasks to task_utils + tests added

---
 tests/integration/tasks/test_task_utils.py |  42 +++-
 viadot/flows/sharepoint_to_adls.py         |  17 +-
 viadot/task_utils.py                       | 233 ++++++++++++++++++++-
 3 files changed, 274 insertions(+), 18 deletions(-)

diff --git a/tests/integration/tasks/test_task_utils.py b/tests/integration/tasks/test_task_utils.py
index f22d55022..d10cceb4d 100644
--- a/tests/integration/tasks/test_task_utils.py
+++ b/tests/integration/tasks/test_task_utils.py
@@ -1,9 +1,15 @@
+import pytest
 import pandas as pd
 from prefect.backend import get_key_value, set_key_value
 from prefect.engine.state import Failed, Success
 from prefect.tasks.secrets import PrefectSecret
 
-from viadot.task_utils import custom_mail_state_handler, set_new_kv
+from viadot.task_utils import (
+    custom_mail_state_handler,
+    set_new_kv,
+    search_for_msg_in_logs,
+    check_if_df_empty,
+)
 
 
 def test_custom_state_handler():
@@ -28,3 +34,37 @@ def test_set_new_kv():
     result = get_key_value("test_for_setting_kv")
     assert result == "72"
     set_key_value(key="test_for_setting_kv", value=None)
+
+
+def test_search_for_msg_in_logs():
+    logs = [
+        {"message": "Error occurred"},
+        {"message": "Warning: Invalid input"},
+        {"message": "Log message"},
+    ]
+
+    # Test when the message is found in the logs
+    assert search_for_msg_in_logs.run(logs, "Error occurred") == True
+
+    # Test when the message is not found in the logs
+    assert search_for_msg_in_logs.run(logs, "Info message") == False
+
+
+def test_check_if_df_empty():
+    df = pd.DataFrame()
+    from prefect.engine import signals
+
+    # Test when the DataFrame is empty and if_no_data_returned is "warn"
+    assert check_if_df_empty.run(df, if_no_data_returned="warn") == True
+
+    # Test when the DataFrame is empty and if_no_data_returned is "fail"
+    try:
+        check_if_df_empty.run(df, if_no_data_returned="fail")
+    except:
+        print("Task failed")
+    # Test when the DataFrame is empty and if_no_data_returned is "skip"
+    assert check_if_df_empty.run(df, if_no_data_returned="skip") == False
+
+    # Test when the DataFrame is not empty
+    df = pd.DataFrame({"col": [1, 2, 3]})
+    assert check_if_df_empty.run(df) == False
diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 0255d69e2..6ee31f56e 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -21,6 +21,7 @@
 )
 from viadot.tasks import AzureDataLakeUpload
 from viadot.tasks.sharepoint import SharepointListToDF, SharepointToDF
+from viadot.task_utils import check_if_df_empty
 
 logger = logging.get_logger()
 
@@ -190,22 +191,6 @@ def slugify(name):
         return name.replace(" ", "_").lower()
 
 
-@task(slug="check_df")
-def check_if_df_empty(df, if_no_data_returned: str = "fail"):
-    # -> to task.utils
-
-    if df.empty:
-        if if_no_data_returned == "warn":
-            logger.warning("No data in the source response. Df empty.")
-            return True
-        elif if_no_data_returned == "fail":
-            raise ENDRUN(state=Failed("No data in the source response. Df empty..."))
-        elif if_no_data_returned == "skip":
-            return False
-    else:
-        return False
-
-
 class SharepointListToADLS(Flow):
     def __init__(
         self,
diff --git a/viadot/task_utils.py b/viadot/task_utils.py
index 6a532f932..c87387efb 100644
--- a/viadot/task_utils.py
+++ b/viadot/task_utils.py
@@ -3,9 +3,10 @@
 import os
 import re
 import shutil
+import pendulum
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, List, Literal, Union, cast
+from typing import TYPE_CHECKING, Any, Callable, List, Literal, Union, cast, Tuple
 
 import pandas as pd
 import prefect
@@ -14,6 +15,7 @@
 from prefect import Flow, Task, task
 from prefect.backend import set_key_value
 from prefect.engine.state import Failed
+from prefect.engine.runner import ENDRUN
 from prefect.storage import Git
 from prefect.tasks.secrets import PrefectSecret
 from prefect.utilities import logging
@@ -792,3 +794,232 @@ def validate_df(df: pd.DataFrame, tests: dict = None) -> None:
         raise ValidationError(
             f"Validation failed for {failed_tests} test/tests: {failed_tests_msg}"
         )
+
+
+@task(timeout=3600, slug="check_df")
+def check_if_df_empty(df, if_no_data_returned: str = "fail") -> bool:
+    """
+    Check if a DataFrame received as a data source response is empty.
+    If fail is expected , this task will finish with ENDRUN(Failed()) state.
+
+    Args:
+        df (pandas.DataFrame): The DataFrame to check.
+        if_no_data_returned (str, optional): The action to take if no data is returned in the DataFrame.
+            Options are "fail" (default), "warn", or "skip".
+
+    Returns:
+        bool: True if the DataFrame is empty and the action is "warn", False otherwise.
+
+    Raises:
+        ENDRUN: If the DataFrame is empty and the action is "fail".
+
+    Example:
+        >>> df = pd.DataFrame()
+        >>> check_if_df_empty(df, if_no_data_returned="warn")
+        True
+    """
+    if df.empty:
+        if if_no_data_returned == "warn":
+            logger.warning("No data in the source response. Df empty.")
+            return True
+        elif if_no_data_returned == "fail":
+            raise ENDRUN(state=Failed("No data in the source response. Df empty..."))
+        elif if_no_data_returned == "skip":
+            return False
+    else:
+        return False
+
+
+@task(timeout=3600)
+def get_flow_run_id(client: prefect.Client, flow_name: str, state: str) -> str:
+    """Gets the last flow run ID based on the name of the flow and time of its run in descending order of th flows runs
+
+    Args:
+        client (prefect.Client): The Prefect client used to execute the GraphQL query.
+        flow_name (str): The name of the flow to search for.
+        state (str): The state of the flow run to filter by.
+
+    Returns:
+        str: The ID of the last flow run that matches the given flow name and state.
+
+    Raises:
+        ValueError: If the given flow name cannot be found in the Prefect Cloud API.
+
+    Example:
+        >>> client = prefect.Client()
+        >>> flow_name = "My Flow"
+        >>> state = "SUCCESS"
+        >>> get_flow_run_id(client, flow_name, state)
+        "flow_run_id_12345"
+    """
+    # Construct the GraphQL query
+    query = f"""
+        {{
+        flow_run(
+            where: {{
+                flow: {{
+                    name: {{_eq: "{flow_name}"}}
+                }}
+                state: {{_eq: "{state}"}}
+            }}
+            order_by : {{end_time: desc}}
+            limit : 1
+        ){{
+            id
+        }}
+        }}
+        """
+    # Execute the GraphQL query
+    response = client.graphql(query)
+    result_data = response.get("data").get("flow_run")
+    if result_data:
+        flow_run_id = result_data.get("id")[0]
+        return flow_run_id
+    else:
+        raise ValueError("Given flow name cannot be found in the Prefect Cloud API")
+
+
+@task(timeout=3600)
+def get_task_logs(client: prefect.Client, flow_run_id: str, task_slug: str) -> List:
+    """
+    Retrieves the logs for a specific task in a flow run using the Prefect client and GraphQL query.
+
+    Args:
+        client (prefect.Client): The Prefect client used to execute the GraphQL query.
+        flow_run_id (str): The ID of the flow run.
+        task_slug (str): The slug of the task to retrieve logs for.
+
+    Returns:
+        List[Dict[str, Union[str, List[Dict[str, str]]]]]: A list of log entries for the specified task.
+            Each log entry is a dictionary with 'message' and 'level' keys.
+
+    Raises:
+        ValueError: If no data is available for the given task slug.
+
+    Example:
+        >>> client = prefect.Client()
+        >>> flow_run_id = "flow_run_id_12345"
+        >>> task_slug = "my_task"
+        >>> get_task_logs(client, flow_run_id, task_slug)
+        [{'message': 'Log message 1', 'level': 'INFO'}, {'message': 'Log message 2', 'level': 'DEBUG'}]
+    """
+    # Construct the GraphQL query
+    query = f"""
+        {{
+            task_run(
+                where: {{
+                    flow_run_id: {{_eq: "{flow_run_id}"}},
+                    task: {{slug: {{_eq: "{task_slug}"}}}}
+                }}
+            ) {{
+                state
+                logs {{
+                    message
+                    level
+                }}
+            }}
+        }}
+    """
+    # Execute the GraphQL query
+    logger.info("Executing GraphQL query to get task logs")
+    response = client.graphql(query)
+    result_data = response.get("data").get("task_run")
+    # Extract task logs
+    if result_data:
+        logs = result_data[0].get("logs")
+        return logs
+    else:
+        raise ValueError("No data available for the given task slug")
+
+
+@task(timeout=3600)
+def send_email_notification(
+    from_address: Union[str, Tuple],
+    to_address: Union[str, List[str], List[Tuple], Tuple[str]],
+    content: str,
+    subject: str,
+    vault_name: str,
+    mail_credentials_secret: str,
+    timezone: str = "Europe/Warsaw",
+) -> str:
+    """
+    Sends an email notification using SendGrid API.
+
+    Args:
+        from_address (Union[str, Tuple]): The email address of the sender.
+        to_address (Union[str, List[str], List[Tuple], Tuple[str]]): The email address(es) of the recipient(s).
+        content (str): The content of the email.
+        subject (str): The subject of the email.
+        vault_name (str): The name of the Azure Key Vault.
+        mail_credentials_secret (str): The secret name for the SendGrid API key.
+        timezone (str, optional): The timezone to use for the current datetime. Defaults to "Europe/Warsaw".
+
+    Returns:
+        str: The response from the SendGrid API.
+
+    Raises:
+        Exception: If the API key is not provided.
+
+    Example:
+        >>> send_email_notification("sender@example.com", "recipient@example.com", "Hello!", "Test Email", "my-vault", "sendgrid-api-key")
+        'Email sent successfully'
+    """
+
+    # Retrieve the SendGrid API key from the secret
+    if mail_credentials_secret is None:
+        mail_credentials_secret = PrefectSecret("SENDGRID_DEFAULT_SECRET").run()
+    elif mail_credentials_secret is not None:
+        credentials_str = AzureKeyVaultSecret(
+            mail_credentials_secret, vault_name=vault_name
+        ).run()
+        api_key = json.loads(credentials_str).get("API_KEY")
+    else:
+        raise Exception("Please provide API KEY")
+
+    # Get the current datetime in the specified timezone
+    curr_dt = pendulum.now(tz=timezone)
+
+    # Create the email message
+    message = Mail(
+        from_email=from_address,
+        to_emails=to_address,
+        subject=subject,
+        html_content=f"<strong>{content}</strong>",
+    )
+
+    # Send the email using SendGrid API
+    send_grid = SendGridAPIClient(api_key)
+    response = send_grid.send(message)
+    return response
+
+
+@task(timeout=3600)
+def search_for_msg_in_logs(logs: list, log_info: str) -> bool:
+    """
+    Searches for a specific message in Prefect flow or task logs.
+
+    Args:
+        logs (list): The logs to search in.
+        log_info (str): The message to search for.
+
+    Returns:
+        bool: True if the message is found, False otherwise.
+
+    Example:
+        >>> logs = [
+        ...     {"message": "Error occurred"},
+        ...     {"message": "Warning: Invalid input"},
+        ...     {"message": "Log message"}
+        ... ]
+        >>> search_for_msg_in_logs(logs, "Error occurred")
+        True
+    """
+    found_msg = False
+
+    # Iterate over each log entry
+    for value in logs:
+        if value.get("message") == log_info:
+            found_msg = True
+            break
+
+    return found_msg

From 2f7ed2a1b6cb6fdf124a8322ff0a0d9ebf18cf48 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Wed, 6 Dec 2023 09:10:56 +0000
Subject: [PATCH 43/54] =?UTF-8?q?=F0=9F=8E=A8=20Format=20Python=20code=20w?=
 =?UTF-8?q?ith=20Black?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/flows/test_sharepoint_to_adls.py | 2 +-
 tests/integration/test_sap_rfc.py                  | 4 ++--
 tests/unit/test_utils.py                           | 2 +-
 viadot/sources/sap_bw.py                           | 1 +
 viadot/tasks/genesys.py                            | 2 +-
 viadot/utils.py                                    | 2 +-
 6 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/integration/flows/test_sharepoint_to_adls.py b/tests/integration/flows/test_sharepoint_to_adls.py
index 5603c49ee..f0597c41a 100644
--- a/tests/integration/flows/test_sharepoint_to_adls.py
+++ b/tests/integration/flows/test_sharepoint_to_adls.py
@@ -6,7 +6,7 @@
 import pytest
 from prefect.tasks.secrets import PrefectSecret
 
-from viadot.flows import SharepointToADLS, SharepointListToADLS
+from viadot.flows import SharepointListToADLS, SharepointToADLS
 from viadot.tasks import AzureDataLakeRemove
 
 ADLS_FILE_NAME = str(pendulum.now("utc")) + ".csv"
diff --git a/tests/integration/test_sap_rfc.py b/tests/integration/test_sap_rfc.py
index fd2298323..0ca9a2a1c 100644
--- a/tests/integration/test_sap_rfc.py
+++ b/tests/integration/test_sap_rfc.py
@@ -187,8 +187,8 @@ def test___build_pandas_filter_query_v2():
         sap2._build_pandas_filter_query(sap2.client_side_filters)
         == "thirdlongcolname == 01234"
     ), sap2._build_pandas_filter_query(sap2.client_side_filters)
-    
-    
+
+
 def test_default_credentials_warning_SAPRFC(caplog):
     _ = SAPRFC()
     assert (
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 75ef30e97..c29fbc014 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -8,8 +8,8 @@
 from viadot.utils import (
     add_viadot_metadata_columns,
     check_if_empty_file,
-    gen_bulk_insert_query_from_df,
     check_value,
+    gen_bulk_insert_query_from_df,
 )
 
 EMPTY_CSV_PATH = "empty.csv"
diff --git a/viadot/sources/sap_bw.py b/viadot/sources/sap_bw.py
index 90b70dfec..e70f79b36 100644
--- a/viadot/sources/sap_bw.py
+++ b/viadot/sources/sap_bw.py
@@ -1,5 +1,6 @@
 import textwrap
 from typing import List
+
 import pyrfc
 
 from viadot.exceptions import CredentialError, ValidationError
diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index d974dd587..feafbaccf 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -13,8 +13,8 @@
 
 from viadot.exceptions import APIError
 from viadot.sources import Genesys
-from viadot.utils import check_value
 from viadot.task_utils import *
+from viadot.utils import check_value
 
 logger = logging.get_logger()
 
diff --git a/viadot/utils.py b/viadot/utils.py
index 5e3de784c..cd34adb8a 100644
--- a/viadot/utils.py
+++ b/viadot/utils.py
@@ -2,7 +2,7 @@
 import os
 import re
 from itertools import chain
-from typing import Union, Any, Callable, Dict, List, Literal
+from typing import Any, Callable, Dict, List, Literal, Union
 
 import pandas as pd
 import prefect

From 7aac8b0c7033dba90127e3ee1be89502670ddbc9 Mon Sep 17 00:00:00 2001
From: kiurieva <kiurieva@dyvenia.com>
Date: Wed, 6 Dec 2023 12:39:53 +0100
Subject: [PATCH 44/54] Fixed total_load method, updated tests

---
 tests/integration/test_vid_club.py | 1 -
 viadot/sources/vid_club.py         | 5 +++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_vid_club.py b/tests/integration/test_vid_club.py
index 6c2bd4544..50c3015cf 100644
--- a/tests/integration/test_vid_club.py
+++ b/tests/integration/test_vid_club.py
@@ -66,7 +66,6 @@ def test_url_string():
     expected_elements = [
         f"from={from_date}",
         f"to={to_date}",
-        "region=all",
         f"limit={items_per_page}",
         api_url,
     ]
diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
index fe6e76098..4da4e4f45 100644
--- a/viadot/sources/vid_club.py
+++ b/viadot/sources/vid_club.py
@@ -310,6 +310,11 @@ def total_load(
                 items_per_page=items_per_page,
                 region=region,
             )
+        list_columns = df.columns[
+            df.applymap(lambda x: isinstance(x, list)).any()
+        ].tolist()
+        for i in list_columns:
+            df[i] = df[i].apply(lambda x: tuple(x) if isinstance(x, list) else x)
         df.drop_duplicates(inplace=True)
 
         if df.empty:

From 8f447aa93ce4e0956070a03b6897d44633b00522 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Wed, 6 Dec 2023 14:41:50 +0100
Subject: [PATCH 45/54] =?UTF-8?q?=F0=9F=94=8A=20Updated=20logger=20warning?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/test_sap_rfc.py | 4 ++--
 viadot/sources/sap_rfc.py         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_sap_rfc.py b/tests/integration/test_sap_rfc.py
index 0ca9a2a1c..28ab044a2 100644
--- a/tests/integration/test_sap_rfc.py
+++ b/tests/integration/test_sap_rfc.py
@@ -192,7 +192,7 @@ def test___build_pandas_filter_query_v2():
 def test_default_credentials_warning_SAPRFC(caplog):
     _ = SAPRFC()
     assert (
-        "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
+        "Your credentials will use DEV environment. If you would like to use different one - please specified it."
         in caplog.text
     )
 
@@ -200,6 +200,6 @@ def test_default_credentials_warning_SAPRFC(caplog):
 def test_default_credentials_warning_SAPRFCV2(caplog):
     _ = SAPRFCV2()
     assert (
-        "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
+        "Your credentials will use DEV environment. If you would like to use different one - please specified it."
         in caplog.text
     )
diff --git a/viadot/sources/sap_rfc.py b/viadot/sources/sap_rfc.py
index 16cd5483d..a9d109148 100644
--- a/viadot/sources/sap_rfc.py
+++ b/viadot/sources/sap_rfc.py
@@ -264,7 +264,7 @@ def __init__(
             if credentials is None:
                 raise CredentialError("Missing credentials.")
             logger.warning(
-                "Your credentials will use DEV environment. If you would like to use different one - please specified it in 'sap_credentials' variable inside the flow."
+                "Your credentials will use DEV environment. If you would like to use different one - please specified it."
             )
 
         super().__init__(*args, credentials=credentials, **kwargs)

From 8f6075bb33a01651ac42f70409e9af7de4189eca Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Wed, 6 Dec 2023 14:43:45 +0100
Subject: [PATCH 46/54] Updated tests, removed not necessary imports, changed
 file path parameter

---
 .../flows/test_sharepoint_to_adls.py          | 19 +++++++++++++++----
 viadot/flows/sharepoint_to_adls.py            |  4 ++--
 viadot/task_utils.py                          |  1 +
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/tests/integration/flows/test_sharepoint_to_adls.py b/tests/integration/flows/test_sharepoint_to_adls.py
index bf7b1e5e5..93a31f5d1 100644
--- a/tests/integration/flows/test_sharepoint_to_adls.py
+++ b/tests/integration/flows/test_sharepoint_to_adls.py
@@ -171,6 +171,11 @@ def test_sharepoint_list_to_adls_run_flow_overwrite_true(mocked_class):
 )
 @pytest.mark.run
 def test_sharepoint_list_to_adls_run_flow_fail_on_no_data_returned(mocked_class):
+    """
+    Test will check if flow is failing when empty DF is passed
+    with the given parameter if_no_data_returned = "fail"
+    CSV file should not be generated!
+    """
     flow = SharepointListToADLS(
         "test_sharepoint_to_adls_run_flow",
         output_file_extension=".csv",
@@ -183,8 +188,6 @@ def test_sharepoint_list_to_adls_run_flow_fail_on_no_data_returned(mocked_class)
     )
     result = flow.run()
     assert result.is_failed()
-    os.remove(ADLS_FILE_NAME_LIST + ".csv")
-    os.remove("test_sharepoint_to_adls_run_flow.json")
 
 
 @mock.patch(
@@ -193,6 +196,11 @@ def test_sharepoint_list_to_adls_run_flow_fail_on_no_data_returned(mocked_class)
 )
 @pytest.mark.run
 def test_sharepoint_list_to_adls_run_flow_success_on_no_data_returned(mocked_class):
+    """
+    Test will check if flow will succeed when empty DF is passed
+    with the given parameter if_no_data_returned = "skip"
+    Empty csv should be generated!
+    """
     flow = SharepointListToADLS(
         "test_sharepoint_to_adls_run_flow",
         output_file_extension=".csv",
@@ -217,6 +225,11 @@ def test_sharepoint_list_to_adls_run_flow_success_on_no_data_returned(mocked_cla
 def test_sharepoint_list_to_adls_run_flow_success_warn_on_no_data_returned(
     mocked_class,
 ):
+    """
+    Test will check if flow is failing when empty DF is passed
+    with the given parameter if_no_data_returned = "warn"
+    CSV file should not be generated!
+    """
     # Get prefect client instance
     flow = SharepointListToADLS(
         "test_sharepoint_to_adls_run_flow",
@@ -230,5 +243,3 @@ def test_sharepoint_list_to_adls_run_flow_success_warn_on_no_data_returned(
     )
     result = flow.run()
     assert result.is_successful()
-    os.remove(ADLS_FILE_NAME_LIST + ".csv")
-    os.remove("test_sharepoint_to_adls_run_flow.json")
diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index edac65238..e9dfe4b72 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -224,7 +224,7 @@ def __init__(
             name (str): Prefect flow name.
             list_title (str): Title of Sharepoint List.
             site_url (str): URL to set of Sharepoint Lists.
-            file_name (str): Name of file in ADLS. Defaults to None.
+            file_name (str): Name of file(without extension) in ADLS . Defaults to None.
             adls_dir_path (str): Azure Data Lake destination folder/catalog path. Defaults to None.
             filters (dict, optional): Dictionary with operators which filters the SharepointList output. Defaults to None.
                         allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')
@@ -371,7 +371,7 @@ def gen_flow(self) -> Flow:
 
             file_to_adls_task = AzureDataLakeUpload()
             file_to_adls_task.bind(
-                from_path=self.path,
+                from_path=self.local_file_path,
                 to_path=self.adls_dir_path,
                 overwrite=self.overwrite,
                 sp_credentials_secret=self.adls_sp_credentials_secret,
diff --git a/viadot/task_utils.py b/viadot/task_utils.py
index c87387efb..b7a518033 100644
--- a/viadot/task_utils.py
+++ b/viadot/task_utils.py
@@ -29,6 +29,7 @@
 from viadot.exceptions import CredentialError, ValidationError
 from viadot.tasks import AzureDataLakeUpload, AzureKeyVaultSecret
 
+
 logger = logging.get_logger()
 METADATA_COLUMNS = {"_viadot_downloaded_at_utc": "DATETIME"}
 

From 8d326ea069b2df6ee275479c988e917c88a01555 Mon Sep 17 00:00:00 2001
From: Marcin Purtak <44641138+marcinpurtak@users.noreply.github.com>
Date: Wed, 6 Dec 2023 15:08:03 +0100
Subject: [PATCH 47/54] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Added literals for if_no_data_returned and missing dots

Co-authored-by: Rafał Ziemianek <49795849+Rafalz13@users.noreply.github.com>
---
 viadot/task_utils.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/viadot/task_utils.py b/viadot/task_utils.py
index b7a518033..6fc00d760 100644
--- a/viadot/task_utils.py
+++ b/viadot/task_utils.py
@@ -798,14 +798,14 @@ def validate_df(df: pd.DataFrame, tests: dict = None) -> None:
 
 
 @task(timeout=3600, slug="check_df")
-def check_if_df_empty(df, if_no_data_returned: str = "fail") -> bool:
+def check_if_df_empty(df, if_no_data_returned: Literal["fail", "warn", "skip"] = "fail") -> bool:
     """
     Check if a DataFrame received as a data source response is empty.
     If fail is expected , this task will finish with ENDRUN(Failed()) state.
 
     Args:
         df (pandas.DataFrame): The DataFrame to check.
-        if_no_data_returned (str, optional): The action to take if no data is returned in the DataFrame.
+        if_no_data_returned (Literal["fail", "warn", "skip"], optional): The action to take if no data is returned in the DataFrame. Defaults to "fail".
             Options are "fail" (default), "warn", or "skip".
 
     Returns:
@@ -833,7 +833,7 @@ def check_if_df_empty(df, if_no_data_returned: str = "fail") -> bool:
 
 @task(timeout=3600)
 def get_flow_run_id(client: prefect.Client, flow_name: str, state: str) -> str:
-    """Gets the last flow run ID based on the name of the flow and time of its run in descending order of th flows runs
+    """Gets the last flow run ID based on the name of the flow and time of its run in descending order of the flow runs.
 
     Args:
         client (prefect.Client): The Prefect client used to execute the GraphQL query.
@@ -922,7 +922,7 @@ def get_task_logs(client: prefect.Client, flow_run_id: str, task_slug: str) -> L
         }}
     """
     # Execute the GraphQL query
-    logger.info("Executing GraphQL query to get task logs")
+    logger.info("Executing GraphQL query to get task logs.")
     response = client.graphql(query)
     result_data = response.get("data").get("task_run")
     # Extract task logs
@@ -930,7 +930,7 @@ def get_task_logs(client: prefect.Client, flow_run_id: str, task_slug: str) -> L
         logs = result_data[0].get("logs")
         return logs
     else:
-        raise ValueError("No data available for the given task slug")
+        raise ValueError("No data available for the given task slug.")
 
 
 @task(timeout=3600)

From b63d16e756df04ed28f14fe0d839666b794c47d5 Mon Sep 17 00:00:00 2001
From: Marcin Purtak <44641138+marcinpurtak@users.noreply.github.com>
Date: Wed, 6 Dec 2023 15:12:09 +0100
Subject: [PATCH 48/54] Dot added
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Rafał Ziemianek <49795849+Rafalz13@users.noreply.github.com>
---
 viadot/flows/sharepoint_to_adls.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index e9dfe4b72..0d26d75aa 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -224,7 +224,7 @@ def __init__(
             name (str): Prefect flow name.
             list_title (str): Title of Sharepoint List.
             site_url (str): URL to set of Sharepoint Lists.
-            file_name (str): Name of file(without extension) in ADLS . Defaults to None.
+            file_name (str): Name of file (without extension) in ADLS. Defaults to None.
             adls_dir_path (str): Azure Data Lake destination folder/catalog path. Defaults to None.
             filters (dict, optional): Dictionary with operators which filters the SharepointList output. Defaults to None.
                         allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')

From 9fd6e6f628c0f9d710e643c1a557540dc2b02ac5 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Wed, 6 Dec 2023 15:14:04 +0100
Subject: [PATCH 49/54] Formatting fix

---
 viadot/task_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/viadot/task_utils.py b/viadot/task_utils.py
index 6fc00d760..4459c715d 100644
--- a/viadot/task_utils.py
+++ b/viadot/task_utils.py
@@ -798,7 +798,9 @@ def validate_df(df: pd.DataFrame, tests: dict = None) -> None:
 
 
 @task(timeout=3600, slug="check_df")
-def check_if_df_empty(df, if_no_data_returned: Literal["fail", "warn", "skip"] = "fail") -> bool:
+def check_if_df_empty(
+    df, if_no_data_returned: Literal["fail", "warn", "skip"] = "fail"
+) -> bool:
     """
     Check if a DataFrame received as a data source response is empty.
     If fail is expected , this task will finish with ENDRUN(Failed()) state.

From bd0d155049ff628f9d530b10917a49370552f6bf Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 7 Dec 2023 10:52:29 +0100
Subject: [PATCH 50/54] =?UTF-8?q?=F0=9F=90=9B=20Changed=20`cols=5Fto=5Fdro?=
 =?UTF-8?q?p`=20in=20VidClub?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/tasks/test_vid_club.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/tasks/test_vid_club.py b/tests/integration/tasks/test_vid_club.py
index 8fad7fdde..6ba849c13 100644
--- a/tests/integration/tasks/test_vid_club.py
+++ b/tests/integration/tasks/test_vid_club.py
@@ -56,7 +56,7 @@ def test_drop_columns(var_dictionary):
     Args:
         var_dictionary: Dictionary with example arguments for run method.
     """
-    cols_to_drop = ["regionID", "submissionDate"]
+    cols_to_drop = ["__v", "status"]
     vc_to_df = VidClubToDF(credentials=CREDENTIALS)
 
     output_with_dropped = vc_to_df.run(

From 41204afbcd496b4f4a781de859b14aed93294871 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 7 Dec 2023 11:05:40 +0100
Subject: [PATCH 51/54] =?UTF-8?q?=F0=9F=90=9B=20Changed=20test=20for=20Vid?=
 =?UTF-8?q?Club=20flow?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/flows/test_vidclub_to_adls.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/integration/flows/test_vidclub_to_adls.py b/tests/integration/flows/test_vidclub_to_adls.py
index 0f6705579..79592aeb9 100644
--- a/tests/integration/flows/test_vidclub_to_adls.py
+++ b/tests/integration/flows/test_vidclub_to_adls.py
@@ -82,11 +82,9 @@ def test_vidclub_validate_df_task_fail(caplog):
         overwrite_adls=True,
         validate_df_dict={
             "column_size": {"submissionID": 5},
-            "column_unique_values": ["regionID"],
+            "column_unique_values": ["id"],
         },
     )
 
-    try:
-        flow.run()
-    except ValidationError:
-        pass
+    result = flow.run()
+    assert result.is_failed()

From 434fc66da8be416b95510abb1c48ee48b4498d17 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 7 Dec 2023 11:50:27 +0100
Subject: [PATCH 52/54] =?UTF-8?q?=F0=9F=94=A5=20Removed=20unused=20paramet?=
 =?UTF-8?q?ers?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/flows/test_bigquery_to_adls.py   | 12 +++---------
 tests/integration/flows/test_mysql_to_adls.py      |  2 --
 tests/integration/flows/test_salesforce_to_adls.py |  2 --
 tests/integration/flows/test_vidclub_to_adls.py    |  2 --
 4 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/tests/integration/flows/test_bigquery_to_adls.py b/tests/integration/flows/test_bigquery_to_adls.py
index b4503c6e9..e6116c9c0 100644
--- a/tests/integration/flows/test_bigquery_to_adls.py
+++ b/tests/integration/flows/test_bigquery_to_adls.py
@@ -101,13 +101,9 @@ def test_bigquery_to_adls_validate_df_fail(mocked_data):
         adls_sp_credentials_secret=ADLS_CREDENTIAL_SECRET,
         validate_df_dict={"column_list_to_match": ["type", "country", "test"]},
     )
-    try:
-        result = flow_bigquery.run()
-    except ValidationError:
-        pass
 
-    os.remove("test_bigquery_to_adls_validate_df_fail.parquet")
-    os.remove("test_bigquery_to_adls_validate_df_fail.json")
+    result = flow_bigquery.run()
+    assert result.is_failed()
 
 
 @mock.patch(
@@ -138,7 +134,5 @@ def test_bigquery_to_adls_validate_df_success(mocked_data):
     os.remove("test_bigquery_to_adls_validate_df_success.parquet")
     os.remove("test_bigquery_to_adls_validate_df_success.json")
 
-    rm = AzureDataLakeRemove(
-        path=ADLS_DIR_PATH + ADLS_FILE_NAME, vault_name="azuwevelcrkeyv001s"
-    )
+    rm = AzureDataLakeRemove(path=ADLS_DIR_PATH + ADLS_FILE_NAME)
     rm.run(sp_credentials_secret=ADLS_CREDENTIAL_SECRET)
diff --git a/tests/integration/flows/test_mysql_to_adls.py b/tests/integration/flows/test_mysql_to_adls.py
index c968d48a3..768b5cf7c 100644
--- a/tests/integration/flows/test_mysql_to_adls.py
+++ b/tests/integration/flows/test_mysql_to_adls.py
@@ -18,7 +18,6 @@ def test_adls_gen1_to_azure_sql_new_mock(TEST_PARQUET_FILE_PATH):
             query=query,
             file_path=TEST_PARQUET_FILE_PATH,
             to_path=f"raw/examples/{TEST_PARQUET_FILE_PATH}",
-            sp_credentials_secret="App-Azure-CR-DatalakeGen2-AIA-DEV",
             overwrite_adls=True,
         )
         flow.run()
@@ -32,7 +31,6 @@ def test_validate_df(TEST_PARQUET_FILE_PATH):
             country_short="DE",
             query=query,
             file_path=TEST_PARQUET_FILE_PATH,
-            sp_credentials_secret="App-Azure-CR-DatalakeGen2-AIA",
             to_path=f"raw/examples/{TEST_PARQUET_FILE_PATH}",
             validate_df_dict={"column_size": {"sales_org": 3}},
         )
diff --git a/tests/integration/flows/test_salesforce_to_adls.py b/tests/integration/flows/test_salesforce_to_adls.py
index 8c032f308..b58c51f3a 100644
--- a/tests/integration/flows/test_salesforce_to_adls.py
+++ b/tests/integration/flows/test_salesforce_to_adls.py
@@ -30,7 +30,6 @@ def test_salesforce_to_adls():
     os.remove("test_salesforce_to_adls_run_flow.json")
     rm = AzureDataLakeRemove(
         path=ADLS_DIR_PATH + ADLS_FILE_NAME,
-        vault_name="azuwevelcrkeyv001s",
     )
     rm.run(sp_credentials_secret=credentials_secret)
 
@@ -56,6 +55,5 @@ def test_salesforce_to_adls_validate_success():
     os.remove("test_salesforce_to_adls_run_flow.json")
     rm = AzureDataLakeRemove(
         path=ADLS_DIR_PATH + ADLS_FILE_NAME,
-        vault_name="azuwevelcrkeyv001s",
     )
     rm.run(sp_credentials_secret=credentials_secret)
diff --git a/tests/integration/flows/test_vidclub_to_adls.py b/tests/integration/flows/test_vidclub_to_adls.py
index 79592aeb9..c3a7dcaf4 100644
--- a/tests/integration/flows/test_vidclub_to_adls.py
+++ b/tests/integration/flows/test_vidclub_to_adls.py
@@ -47,7 +47,6 @@ def test_vidclub_validate_df_task_success(caplog):
         to_date="2023-10-25",
         adls_dir_path="raw/tests",
         adls_file_name="test.parquet",
-        adls_sp_credentials_secret="App-Azure-CR-DatalakeGen2-AIA",
         overwrite_adls=True,
         validate_df_dict={
             "column_size": {"submissionID": 5},
@@ -78,7 +77,6 @@ def test_vidclub_validate_df_task_fail(caplog):
         to_date="2023-10-25",
         adls_dir_path="raw/tests",
         adls_file_name="test.parquet",
-        adls_sp_credentials_secret="App-Azure-CR-DatalakeGen2-AIA",
         overwrite_adls=True,
         validate_df_dict={
             "column_size": {"submissionID": 5},

From a9b6f66aaf48a24ab7fd3c4e97e93765d0a9be13 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 7 Dec 2023 12:12:01 +0100
Subject: [PATCH 53/54] =?UTF-8?q?=F0=9F=94=A5=20Removed=20test=20for=20`ge?=
 =?UTF-8?q?t=5Fsql=5Fserver=5Ftable=5Fdtypes`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/unit/test_utils.py | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 21517ec8e..38564ed9e 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -6,14 +6,11 @@
 from viadot.exceptions import APIError
 
 from viadot.signals import SKIP
-from viadot.sources import AzureSQL
 from viadot.utils import (
     add_viadot_metadata_columns,
     check_if_empty_file,
     gen_bulk_insert_query_from_df,
-    get_flow_last_run_date,
     get_nested_value,
-    get_sql_server_table_dtypes,
     slugify,
     handle_api_response,
     union_dict,
@@ -48,12 +45,6 @@ def example_dataframe():
     return pd.DataFrame(data, columns=["id", "name", "is_deleted", "balance"])
 
 
-@pytest.fixture(scope="function")
-def azure_sql():
-    azure_sql = AzureSQL(config_key="AZURE_SQL")
-    yield azure_sql
-
-
 @pytest.fixture(scope="function")
 def nested_dict():
     nested_dict = {
@@ -274,23 +265,6 @@ def test_handle_api_response_return_type():
     assert response.status_code == 200
 
 
-def test_get_sql_server_table_dtypes(azure_sql):
-    """Checks if dtypes is generated in a good way using `get_sql_server_table_dtypes` function."""
-
-    SCHEMA = "sandbox"
-    TABLE = "test_table_dtypes"
-    dtypes = {"country": "VARCHAR(100)", "sales": "INT"}
-
-    azure_sql.create_table(
-        schema=SCHEMA, table=TABLE, dtypes=dtypes, if_exists="replace"
-    )
-
-    dtypes = get_sql_server_table_dtypes(schema=SCHEMA, table=TABLE, con=azure_sql.con)
-    assert isinstance(dtypes, dict)
-    assert list(dtypes.keys()) == ["country", "sales"]
-    assert list(dtypes.values()) == ["varchar(100)", "int"]
-
-
 def test_union_dict_return():
     """Check if dictionaries are unioned in the correct way."""
     a = {"a": 1}

From a1044222efce72843fe32f3d8317f5268468fdb3 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Thu, 7 Dec 2023 12:25:58 +0100
Subject: [PATCH 54/54] =?UTF-8?q?=F0=9F=93=9D=20Updated=20Changelog=20befo?=
 =?UTF-8?q?re=20release?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe9d467f9..c0fe463f2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,16 +6,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
-- Added tests for new functionalities in SAPRFC and SAPRFCV2 regarding passing credentials
+
+### Fixed
+
+### Changed
+
+## [0.4.23] - 2023-12-07
+### Added
+- Added tests for new functionalities in SAPRFC and SAPRFCV2 regarding passing credentials.
 - Added new params for mapping and reordering DataFrame for `Genesys` task and flow.
-- Tasks to search for logs in the flow
-- Tasks to find flow ID
-- Tasks used to control flows in multiflows by searching for a given log from a given task
+- Added `get_task_logs` task to search for logs in the flow
+- Added `get_flow_run_id` task to find flow ID.
+- Added `search_for_msg_in_logs` task used to control flows in multiflows by searching for a given log message from a given task.
+- Added closing session to `SAPBW`.
+- Added `CSV` as a new output extension to `SharepointListToADLS` flow.
+
 ### Fixed
+- Fixed creation of URL in `VidClub` source class. When the `region=None` the region parameter will not be included in the URL.
 
 ### Changed
-- if_no_data_returned added for sharepoint list flow which can fail,warn in case of no data returend or skip (continue) execution in the old way
-- Changed __init__ in SAPRFC and SAPRFCV2 class in source in order to raise warning in prefect when credentials will be taken from DEV.
+- `if_no_data_returned` added for sharepoint list flow which can fail, warn in case of no data returend or skip (continue) execution in the old way.
+- Changed `__init__` in `SAPRFC` and `SAPRFCV2` class in source in order to raise warning in prefect when credentials will be taken from DEV.
 
 
 ## [0.4.22] - 2023-11-15