From 3fd261002eec974690256526f70c003cd3ede36e Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 24 Oct 2023 12:24:52 +0200
Subject: [PATCH 01/86] pass global variable to function

---
 viadot/sources/customer_gauge.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/viadot/sources/customer_gauge.py b/viadot/sources/customer_gauge.py
index a696bba32..beee84944 100644
--- a/viadot/sources/customer_gauge.py
+++ b/viadot/sources/customer_gauge.py
@@ -215,19 +215,25 @@ def flatten_json(self, json_response: Dict[str, Any] = None) -> Dict[str, Any]:
         Returns:
             Dict[str, Any]: The flattened dictionary.
         """
-        out = {}
+        result = {}
 
-        def flattify(x, key=""):
-            if type(x) is dict:
+        if not isinstance(json_response, dict):
+            raise TypeError("Input must be a dictionary.")
+
+        def flattify(x, key="", out = None):
+            if out is None:
+                out = result
+
+            if isinstance(x, dict):
                 for a in x:
-                    flattify(x[a], key + a + "_")
+                    flattify(x[a], key + a + "_", out)
             else:
                 out[key[:-1]] = x
 
         flattify(json_response)
 
-        return out
-
+        return result
+        
     def to_df(self, json_response: Dict[str, Any] = None) -> pd.DataFrame:
         """
         Flatten dictionary structure and convert it into pandas DataFrame. Cleans column names.

From c543c1d0fd42b9158a6bc30f0946dabcd08b9f32 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Wed, 25 Oct 2023 04:41:53 +0200
Subject: [PATCH 02/86] limit source class to producing list of dicts

---
 viadot/sources/customer_gauge.py | 83 +++-----------------------------
 1 file changed, 7 insertions(+), 76 deletions(-)

diff --git a/viadot/sources/customer_gauge.py b/viadot/sources/customer_gauge.py
index beee84944..4fccf45f5 100644
--- a/viadot/sources/customer_gauge.py
+++ b/viadot/sources/customer_gauge.py
@@ -165,87 +165,21 @@ def get_cursor(self, json_response: Dict[str, Any] = None) -> int:
             )
 
         return cur
-
-    def properties_cleaning(
-        self, json_response: Dict[str, Any] = None
-    ) -> Dict[str, Any]:
-        """
-        Returns initialy cleaned data. The cleaning of the additional params is depend on the endpoint.
-
-        Args:
-            json_response (Dict[str, Any], optional): Dictionary with nested structure that contains data and cursor parameter value. Defaults to None.
-
-        Returns:
-            Dict[str, Any]: Dictionary that contains cleaned data corresponding to one record.
-        """
-        clean_properties = {
-            d["field"]: d["reference"] for d in json_response["properties"]
-        }
-        json_response["properties"] = clean_properties
-
-        if self.endpoint == "responses":
-            json_response["drivers"] = (
-                " ".join(map(str, json_response["drivers"]))
-                .replace("label", ",")
-                .replace(r"{',':", " ")
-                .replace(r"'", "")
-                .replace("}", "")
-                .strip()
-                .replace("  ", ",")
-            )
-            json_response["tags"] = " ".join(map(str, json_response["tags"])).replace(
-                "[]", ""
-            )
-            json_response["questions"] = " ".join(
-                map(str, json_response["questions"])
-            ).replace("[]", "")
-        else:
-            pass
-
-        return json_response
-
-    def flatten_json(self, json_response: Dict[str, Any] = None) -> Dict[str, Any]:
-        """
-        Function that flattens a nested structure of the JSON object into a single-level dictionary.
-        Uses a nested `flatten()` function to recursively combine nested keys in the JSON object with '_' to create the flattened keys.
-
-        Args:
-            json_response (Dict[str, Any], optional): JSON object represented as a nested dictionary. Defaults to None.
-
-        Returns:
-            Dict[str, Any]: The flattened dictionary.
-        """
-        result = {}
-
-        if not isinstance(json_response, dict):
-            raise TypeError("Input must be a dictionary.")
-
-        def flattify(x, key="", out = None):
-            if out is None:
-                out = result
-
-            if isinstance(x, dict):
-                for a in x:
-                    flattify(x[a], key + a + "_", out)
-            else:
-                out[key[:-1]] = x
-
-        flattify(json_response)
-
-        return result
         
-    def to_df(self, json_response: Dict[str, Any] = None) -> pd.DataFrame:
+    def to_df(self, 
+    json_response: Dict[str, Any] = None,
+    ) -> List[Dict[str, Any]]:
         """
-        Flatten dictionary structure and convert it into pandas DataFrame. Cleans column names.
+        Extract and return the 'data' part of a JSON response as a list of dictionaries. 
 
         Args:
             json_response (Dict[str, Any], optional): JSON object represented as a nested dictionary that contains data and cursor parameter value. Defaults to None.
 
         Raises:
-            ValueError: If data value not found.
+            ValueError: If the 'data' key is not present in the provided JSON response.
 
         Returns:
-            pd.DataFrame: pandas.DataFrame
+            List[Dict[str, Any]]: A list of dictionaries containing data from the 'data' part of the JSON response.
         """
         try:
             response_json = json_response["data"]
@@ -253,8 +187,5 @@ def to_df(self, json_response: Dict[str, Any] = None) -> pd.DataFrame:
             raise ValueError(
                 "Provided argument doesn't contain 'data' value. Pass json returned from the endpoint."
             )
-        clean_json = list(map(self.properties_cleaning, response_json))
-        df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
-        df.columns = df.columns.str.lower().str.replace(" ", "_")
 
-        return df
+        return response_json

From d1189a4e02e8c9c5d38b61435954ea1b9bf2767b Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Wed, 25 Oct 2023 04:43:32 +0200
Subject: [PATCH 03/86] adjust function name

---
 viadot/sources/customer_gauge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/sources/customer_gauge.py b/viadot/sources/customer_gauge.py
index 4fccf45f5..317a35876 100644
--- a/viadot/sources/customer_gauge.py
+++ b/viadot/sources/customer_gauge.py
@@ -166,7 +166,7 @@ def get_cursor(self, json_response: Dict[str, Any] = None) -> int:
 
         return cur
         
-    def to_df(self, 
+    def to_list_of_jsons(self, 
     json_response: Dict[str, Any] = None,
     ) -> List[Dict[str, Any]]:
         """

From 1b3f9bd0d81677d789f5f03fe30e8839bed28452 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Wed, 25 Oct 2023 04:45:01 +0200
Subject: [PATCH 04/86] add List dtype

---
 viadot/sources/customer_gauge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/sources/customer_gauge.py b/viadot/sources/customer_gauge.py
index 317a35876..7b581bfe3 100644
--- a/viadot/sources/customer_gauge.py
+++ b/viadot/sources/customer_gauge.py
@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Any, Dict, Literal
+from typing import Any, Dict, Literal, List
 
 import pandas as pd
 from prefect.utilities import logging

From d2c453f52d0040b81aeec2b8fdc92a155a8ea7cc Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Wed, 25 Oct 2023 05:04:32 +0200
Subject: [PATCH 05/86] remove extracting data function

---
 viadot/sources/customer_gauge.py | 26 +-------------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/viadot/sources/customer_gauge.py b/viadot/sources/customer_gauge.py
index 7b581bfe3..6ceeccd02 100644
--- a/viadot/sources/customer_gauge.py
+++ b/viadot/sources/customer_gauge.py
@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Any, Dict, Literal, List
+from typing import Any, Dict, Literal
 
 import pandas as pd
 from prefect.utilities import logging
@@ -165,27 +165,3 @@ def get_cursor(self, json_response: Dict[str, Any] = None) -> int:
             )
 
         return cur
-        
-    def to_list_of_jsons(self, 
-    json_response: Dict[str, Any] = None,
-    ) -> List[Dict[str, Any]]:
-        """
-        Extract and return the 'data' part of a JSON response as a list of dictionaries. 
-
-        Args:
-            json_response (Dict[str, Any], optional): JSON object represented as a nested dictionary that contains data and cursor parameter value. Defaults to None.
-
-        Raises:
-            ValueError: If the 'data' key is not present in the provided JSON response.
-
-        Returns:
-            List[Dict[str, Any]]: A list of dictionaries containing data from the 'data' part of the JSON response.
-        """
-        try:
-            response_json = json_response["data"]
-        except:
-            raise ValueError(
-                "Provided argument doesn't contain 'data' value. Pass json returned from the endpoint."
-            )
-
-        return response_json

From 70d2d4064b54c2fc85ebbcb956ee3f746e2ad419 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Wed, 25 Oct 2023 08:06:48 +0200
Subject: [PATCH 06/86] add cleaning functions to task class

---
 viadot/tasks/customer_gauge.py | 217 ++++++++++++++++++++++++++++++---
 1 file changed, 200 insertions(+), 17 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 4f1f26bbd..373ca1501 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -1,6 +1,6 @@
 import json
 from datetime import datetime
-from typing import Literal
+from typing import Any, Dict, Literal, List
 
 import pandas as pd
 from prefect import Task
@@ -31,18 +31,26 @@ def __init__(
         **kwargs,
     ):
         """
-        Task CustomerGaugeToDF for downloading the selected range of data from Customer Gauge endpoint and return as one pandas DataFrame.
+        Task CustomerGaugeToDF for downloading the selected range of data from Customer Gauge 
+        endpoint and return as one pandas DataFrame.
 
         Args:
-            endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint to connect. Defaults to None.
-            total_load (bool, optional): Indicate whether to download the data to the latest. If 'False', only one API call is executed (up to 1000 records). Defaults to True.
+            endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint 
+            to connect. Defaults to None.
+            total_load (bool, optional): Indicate whether to download the data to the latest. 
+            If 'False', only one API call is executed (up to 1000 records). Defaults to True.
             endpoint_url (str, optional): Endpoint URL. Defaults to None.
             cursor (int, optional): Cursor value to navigate to the page. Defaults to None.
-            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. Defaults to 1000.
-            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], optional): Specifies the date type which filter date range. Defaults to None.
-            start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. Defaults to None.
-            end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. Defaults to None.
-            timeout (int, optional): The time (in seconds) to wait while running this task before a timeout occurs. Defaults to 3600.
+            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. 
+            Defaults to 1000.
+            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], 
+            optional): Specifies the date type which filter date range. Defaults to None.
+            start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. 
+            Defaults to None.
+            end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. 
+            Defaults to None.
+            timeout (int, optional): The time (in seconds) to wait while running this task before 
+            a timeout occurs. Defaults to 3600.
         """
         self.endpoint = endpoint
         self.total_load = total_load
@@ -59,6 +67,175 @@ def __init__(
             *args,
             **kwargs,
         )
+    def get_data(self, 
+        json_response: Dict[str, Any] = None,
+    ) -> List[Dict[str, Any]]:
+        """
+        Extract and return the 'data' part of a JSON response as a list of dictionaries.
+
+        Args:
+            json_response (Dict[str, Any], optional): JSON object represented as a nested 
+            dictionary that contains data and cursor parameter value. Defaults to None.
+
+        Raises:
+            ValueError: If the 'data' key is not present in the provided JSON response.
+
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries containing data from the 'data' 
+            part of the JSON response.
+        """
+        try:
+            jsons_list = json_response["data"]
+        except:
+            raise ValueError(
+                "Provided argument doesn't contain 'data' value. Pass json returned from the endpoint."
+            )
+
+        return jsons_list
+
+    def _field_reference_unpacker(
+        self, 
+        json_response: Dict[str, Any],
+        field: str,
+    ) -> Dict[str, Any]:
+        """
+        Unpack and modify dictionaries within the specified field of a JSON response.
+
+        This function takes a JSON response and a field name. It processes dictionaries
+        within the specified field, checking if each dictionary contains exactly two items.
+        If a dictionary meets this criteria, it is transformed into a new dictionary, 
+        where the first key becomes a key, and the second key becomes its associated value
+
+        Args:
+            json_response (Dict[str, Any], optional): JSON response with data.
+            field (str): The key (column) of the dictionary to be modified.
+
+        Returns:
+            Dict[str, Any]: The JSON response with modified nested dictionaries
+            within the specified field.
+        """
+
+        result = {}
+        for i, dictionary in enumerate(json_response[field]):
+            if isinstance(dictionary, dict) and len(dictionary.items()) == 2:
+                list_properties = list(dictionary.values())
+                result[list_properties[0]] = list_properties[1]
+        if result:
+            # print(f"All elements in '{field}' are unpacked successfully.")
+            json_response[field] = result
+
+        return json_response
+
+    def _nested_dict_transformer(
+        self, 
+        json_response: Dict[str, Any],
+        field: str,
+    ) -> Dict[str, Any]:
+        """
+        Modify nested dictionaries within the specified field of a JSON response.
+
+        This function takes a JSON response and a field name. It modifies nested
+        dictionaries within the specified field by adding an index and underscore
+        to the keys. The modified dictionary is then updated in the JSON response.
+
+        Args:
+            json_response (Dict[str, Any], optional): JSON response with data.
+            field (str): The key (column) of the dictionary to be modified.
+
+        Returns:
+            Dict[str, Any]: The JSON response with modified nested dictionaries
+        within the specified field.
+        """
+        d={}
+        for i, dictionary in enumerate(json_response[field], start=1): 
+            for key, value in dictionary.items():
+                d[f'{i}_{key}'] = value
+
+        json_response[field] = d
+
+        return json_response
+    
+    def column_unpacker(
+        self, 
+        json_list: List[Dict[str, Any]] = None,
+        method1_cols: List[str] = None,
+        method2_cols: List[str] = None,
+        ) -> List[Dict[str, Any]]:
+
+        """
+        Unpack and modify specific columns in a list of dictionaries using two methods, chosen by the user. 
+        If user wants to use field_reference_unpacker, he needs to provide list of fields in `method1_cols` 
+        argument, if user wants to use nested_dict_transformer - uses 'method2_cols' argument.
+
+        Args:
+            json_list (List[Dict[str, Any]): A list of dictionaries containing the data.
+            method1_cols (List[str]): Columns to unpack and modify using field_reference_unpacker.
+            method2_cols (List[str]): Columns to unpack and modify using nested_dict_transformer.
+
+        Raises:
+            ValueError: _description_
+
+        Returns:
+            List[Dict[str, Any]]: The updated list of dictionaries after column unpacking and modification.
+        """
+
+        if json_list is None:
+            raise ValueError("Input 'json_list' is required.")
+
+        def unpack_columns(columns, unpack_function):
+            for field in columns:
+                if field in json_list[0]:
+                    print(f"Unpacking column '{field}'...")
+                    try:
+                        json_list_clean = list(map(lambda x: unpack_function(x, field), json_list))
+                        print(f"All elements in '{field}' are unpacked successfully.")
+                    except:
+                        print(f"No transformation were made in '{field}', because didn't contain list of key-value data.")
+                else:
+                    print(f"Column '{field}' not found.")
+            return json_list_clean
+
+        if method1_cols is not None:
+            json_list = unpack_columns(columns = method1_cols, unpack_function = self._field_reference_unpacker)
+
+        if method2_cols is not None:
+            json_list = unpack_columns(columns = method2_cols, unpack_function = self._nested_dict_transformer)
+        
+        return json_list
+
+
+    def flatten_json(self, json_response: Dict[str, Any] = None) -> Dict[str, Any]:
+        """
+        Function that flattens a nested structure of the JSON object into 
+        a single-level dictionary.Uses a nested `flatten()` function to recursively 
+        combine nested keys in the JSON object with '_' to create the flattened keys.
+
+        Args:
+            json_response (Dict[str, Any], optional): JSON object represented as 
+            a nested dictionary. Defaults to None.
+
+        Returns:
+            Dict[str, Any]: The flattened dictionary.
+        """
+        result = {}
+
+        if not isinstance(json_response, dict):
+            raise TypeError("Input must be a dictionary.")
+
+        def flattify(x, key="", out = None):
+            if out is None:
+                out = result
+
+            if isinstance(x, dict):
+                for a in x:
+                    flattify(x[a], key + a + "_", out)
+            else:
+                out[key[:-1]] = x
+
+        flattify(json_response)
+
+        return result
+      
 
     def __call__(self):
         """Download Customer Gauge data to a DF"""
@@ -86,6 +263,8 @@ def run(
         ] = None,
         start_date: datetime = None,
         end_date: datetime = None,
+        method1_cols: List[str] = None,
+        method2_cols: List[str] = None,
         credentials_secret: str = "CUSTOMER-GAUGE",
         vault_name: str = None,
     ) -> pd.DataFrame:
@@ -115,7 +294,7 @@ def run(
         except (ValueError, TypeError) as e:
             logger.error(e)
 
-        df_list = []
+        total_json = []
 
         customer_gauge = CustomerGauge(
             endpoint=endpoint, url=endpoint_url, credentials=credentials
@@ -131,8 +310,10 @@ def run(
             end_date=end_date,
         )
         cur = customer_gauge.get_cursor(json_data)
-        df = customer_gauge.to_df(json_data)
-        df_list.append(df)
+
+        jsn = self.get_data(json_data)
+        total_json += jsn
+
         if total_load == True:
             if cursor is None:
                 logger.info(
@@ -142,12 +323,14 @@ def run(
                 logger.info(
                     f"Downloading starting from the {cursor} cursor. Process might take a few minutes..."
                 )
-            while df.empty == False:
+            while jsn:
                 json_data = customer_gauge.get_json_response(cursor=cur)
                 cur = customer_gauge.get_cursor(json_data)
-                df = customer_gauge.to_df(json_data)
-                df_list.append(df)
+                jsn = self.get_data(json_data)
+                total_json += jsn
 
-        df_total = pd.concat(df_list, ignore_index=True)
+        clean_json = self.column_unpacker(json_list = total_json, method1_cols = method1_cols, method2_cols = method2_cols)
+        df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
+        df.columns = df.columns.str.lower().str.replace(" ", "_")
 
-        return df_total
+        return df

From d1ecde2447060bc8d3ccd83cd953ac80c4b4a108 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Wed, 25 Oct 2023 08:39:11 +0200
Subject: [PATCH 07/86] cleaning data from empty sqaure brackets

---
 viadot/tasks/customer_gauge.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 373ca1501..854721fb5 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -236,6 +236,24 @@ def flattify(x, key="", out = None):
 
         return result
       
+    def square_brackets_remover(
+        self, 
+        df: pd.DataFrame = None
+    ) -> pd.DataFrame:
+        """
+        Replace square brackets "[]" with an empty string in a pandas DataFrame.
+
+        Args:
+            df (pd.DataFrame, optional): Replace square brackets "[]" with an empty string 
+            in a pandas DataFrame. Defaults to None.
+
+        Returns:
+            pd.DataFrame: The modified DataFrame with square brackets replaced by an empty string.
+        """
+
+        df = df.astype(str)
+        df = df.applymap(lambda x: x.strip("[]"))
+        return df
 
     def __call__(self):
         """Download Customer Gauge data to a DF"""
@@ -331,6 +349,7 @@ def run(
 
         clean_json = self.column_unpacker(json_list = total_json, method1_cols = method1_cols, method2_cols = method2_cols)
         df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
+        df = self.square_brackets_remover(df)
         df.columns = df.columns.str.lower().str.replace(" ", "_")
 
         return df

From a1f145dc1de88309ca6a3a5945888023690615c4 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 26 Oct 2023 09:35:01 +0200
Subject: [PATCH 08/86] replace print with logger

---
 viadot/tasks/customer_gauge.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 854721fb5..01c2f47ae 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -121,7 +121,6 @@ def _field_reference_unpacker(
                 list_properties = list(dictionary.values())
                 result[list_properties[0]] = list_properties[1]
         if result:
-            # print(f"All elements in '{field}' are unpacked successfully.")
             json_response[field] = result
 
         return json_response
@@ -185,14 +184,14 @@ def column_unpacker(
         def unpack_columns(columns, unpack_function):
             for field in columns:
                 if field in json_list[0]:
-                    print(f"Unpacking column '{field}'...")
+                    logger.info(f"Unpacking column '{field}'...")
                     try:
                         json_list_clean = list(map(lambda x: unpack_function(x, field), json_list))
-                        print(f"All elements in '{field}' are unpacked successfully.")
+                        logger.info(f"All elements in '{field}' are unpacked successfully.")
                     except:
-                        print(f"No transformation were made in '{field}', because didn't contain list of key-value data.")
+                        logger.info(f"No transformation were made in '{field}', because didn't contain list of key-value data.")
                 else:
-                    print(f"Column '{field}' not found.")
+                    logger.info(f"Column '{field}' not found.")
             return json_list_clean
 
         if method1_cols is not None:

From 0b35c83e0f55d49e69ff48d9213c533a63898051 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 26 Oct 2023 09:52:54 +0200
Subject: [PATCH 09/86] add new args to docstrings

---
 viadot/tasks/customer_gauge.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 01c2f47ae..bc8bfb521 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -26,6 +26,8 @@ def __init__(
         ] = None,
         start_date: datetime = None,
         end_date: datetime = None,
+        method1_cols: List[str] = None,
+        method2_cols: List[str] = None,
         timeout: int = 3600,
         *args,
         **kwargs,
@@ -49,6 +51,8 @@ def __init__(
             Defaults to None.
             end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. 
             Defaults to None.
+            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
+            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
             timeout (int, optional): The time (in seconds) to wait while running this task before 
             a timeout occurs. Defaults to 3600.
         """
@@ -60,6 +64,8 @@ def __init__(
         self.date_field = date_field
         self.start_date = start_date
         self.end_date = end_date
+        self.method1_cols = method1_cols
+        self.method2_cols = method2_cols
 
         super().__init__(
             name="customer_gauge_to_df",
@@ -168,8 +174,8 @@ def column_unpacker(
 
         Args:
             json_list (List[Dict[str, Any]): A list of dictionaries containing the data.
-            method1_cols (List[str]): Columns to unpack and modify using field_reference_unpacker.
-            method2_cols (List[str]): Columns to unpack and modify using nested_dict_transformer.
+            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
+            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
 
         Raises:
             ValueError: _description_
@@ -267,6 +273,8 @@ def __call__(self):
         "date_field",
         "start_date",
         "end_date",
+        "method1_cols",
+        "method2_cols",
     )
     def run(
         self,
@@ -297,6 +305,8 @@ def run(
             date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], optional): Specifies the date type which filter date range. Defaults to None.
             start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. Defaults to None.
             end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. Defaults to None.
+            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
+            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.            
             credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
             vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None.
 

From 28109846833553c3b9e25e929edd02c5d23392a2 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 26 Oct 2023 09:58:39 +0200
Subject: [PATCH 10/86] add info about method used

---
 viadot/tasks/customer_gauge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index bc8bfb521..ec75e0445 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -190,7 +190,7 @@ def column_unpacker(
         def unpack_columns(columns, unpack_function):
             for field in columns:
                 if field in json_list[0]:
-                    logger.info(f"Unpacking column '{field}'...")
+                    logger.info(f"Unpacking column '{field}' with {unpack_function.__name__} method...")
                     try:
                         json_list_clean = list(map(lambda x: unpack_function(x, field), json_list))
                         logger.info(f"All elements in '{field}' are unpacked successfully.")

From a32c185c23616f857f8098208641fe7364af29b9 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 26 Oct 2023 10:10:47 +0200
Subject: [PATCH 11/86] add final loggers

---
 viadot/tasks/customer_gauge.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index ec75e0445..3d16af20d 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -357,8 +357,10 @@ def run(
                 total_json += jsn
 
         clean_json = self.column_unpacker(json_list = total_json, method1_cols = method1_cols, method2_cols = method2_cols)
+        logger.info("Inserting data into the DataFrame...")
         df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
         df = self.square_brackets_remover(df)
         df.columns = df.columns.str.lower().str.replace(" ", "_")
+        logger.info("DataFrame: Ready. Data: Inserted. Let the magic happen!")
 
         return df

From e9187e1e0afbb0c9e83350a8ce53cc5c0534e0b6 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 26 Oct 2023 10:16:55 +0200
Subject: [PATCH 12/86] adjust docstrings

---
 viadot/tasks/customer_gauge.py | 55 +++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 20 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 3d16af20d..bc95bc136 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -38,23 +38,25 @@ def __init__(
 
         Args:
             endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint 
-            to connect. Defaults to None.
+                to connect. Defaults to None.
             total_load (bool, optional): Indicate whether to download the data to the latest. 
-            If 'False', only one API call is executed (up to 1000 records). Defaults to True.
+                If 'False', only one API call is executed (up to 1000 records). Defaults to True.
             endpoint_url (str, optional): Endpoint URL. Defaults to None.
             cursor (int, optional): Cursor value to navigate to the page. Defaults to None.
             pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. 
-            Defaults to 1000.
+                Defaults to 1000.
             date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], 
-            optional): Specifies the date type which filter date range. Defaults to None.
+                optional): Specifies the date type which filter date range. Defaults to None.
             start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. 
-            Defaults to None.
+                Defaults to None.
             end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. 
-            Defaults to None.
+                Defaults to None.
             method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
+                Defaults to None.
             method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
+                Defaults to None.
             timeout (int, optional): The time (in seconds) to wait while running this task before 
-            a timeout occurs. Defaults to 3600.
+                a timeout occurs. Defaults to 3600.
         """
         self.endpoint = endpoint
         self.total_load = total_load
@@ -174,8 +176,10 @@ def column_unpacker(
 
         Args:
             json_list (List[Dict[str, Any]): A list of dictionaries containing the data.
-            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
-            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
+            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. 
+                Defaults to None.
+            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. 
+                Defaults to None.
 
         Raises:
             ValueError: _description_
@@ -294,21 +298,32 @@ def run(
         vault_name: str = None,
     ) -> pd.DataFrame:
         """
-        Run method. Downloading the selected range of data from Customer Gauge endpoint and return as one pandas DataFrame.
+        Run method. Downloading the selected range of data from Customer Gauge endpoint and return 
+        as one pandas DataFrame.
 
         Args:
-            endpoint (Literal["responses", "non-responses"]): Indicate which endpoint to connect. Defaults to None.
-            total_load (bool, optional): Indicate whether to download the data to the latest. If 'False', only one API call is executed (up to 1000 records). Defaults to True.
+            endpoint (Literal["responses", "non-responses"]): Indicate which endpoint to connect. 
+                Defaults to None.
+            total_load (bool, optional): Indicate whether to download the data to the latest. If 
+                'False', only one API call is executed (up to 1000 records). Defaults to True.
             endpoint_url (str, optional): Endpoint URL. Defaults to None.
             cursor (int, optional): Cursor value to navigate to the page. Defaults to None.
-            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. Defaults to 1000.
-            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], optional): Specifies the date type which filter date range. Defaults to None.
-            start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. Defaults to None.
-            end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. Defaults to None.
-            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
-            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.            
-            credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
-            vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None.
+            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. 
+                Defaults to 1000.
+            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], 
+                optional): Specifies the date type which filter date range. Defaults to None.
+            start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. 
+                Defaults to None.
+            end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. 
+                Defaults to None.
+            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. 
+                Defaults to None.
+            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. 
+                Defaults to None.       
+            credentials_secret (str, optional): The name of the Azure Key Vault secret containing a 
+                dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
+            vault_name (str, optional): The name of the vault from which to obtain the secret. 
+                Defaults to None.
 
         Returns:
             pd.DataFrame: Final pandas DataFrame.

From 618666d37cd007eae460a8db1745669a0a062870 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 26 Oct 2023 10:23:27 +0200
Subject: [PATCH 13/86] update docstrings

---
 viadot/flows/customer_gauge_to_adls.py | 48 ++++++++++++++++----------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/viadot/flows/customer_gauge_to_adls.py b/viadot/flows/customer_gauge_to_adls.py
index 8053aeda3..e1bfd7108 100644
--- a/viadot/flows/customer_gauge_to_adls.py
+++ b/viadot/flows/customer_gauge_to_adls.py
@@ -37,6 +37,8 @@ def __init__(
         ] = None,
         start_date: datetime = None,
         end_date: datetime = None,
+        method1_cols: List[str] = None,
+        method2_cols: List[str] = None,
         customer_gauge_credentials_secret: str = "CUSTOMER-GAUGE",
         anonymize: bool = False,
         columns_to_anonymize: List[str] = None,
@@ -57,42 +59,50 @@ def __init__(
         **kwargs: Dict[str, Any]
     ):
         """
-        Flow for downloading data from the Customer Gauge's endpoints (Responses and Non-Responses) via API to a CSV or Parquet file.
-        The data anonimization is optional.Then upload it to Azure Data Lake.
+        Flow for downloading data from the Customer Gauge's endpoints (Responses and Non-Responses) via API
+        to a CSV or Parquet file.The data anonimization is optional.Then upload it to Azure Data Lake.
 
         Args:
             name (str): The name of the flow.
-            endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint to connect. Defaults to None.
+            endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint to connect. 
+                Defaults to None.
             endpoint_url (str, optional): Full URL for pointing to specific endpoint. Defaults to None.
-            total_load (bool, optional): Indicate whether to download the data to the latest. If 'False', only one API call is executed (up to 1000 records).
-                Defaults to True.
+            total_load (bool, optional): Indicate whether to download the data to the latest. If 'False', 
+                only one API call is executed (up to 1000 records). Defaults to True.
             cursor (int, optional): Cursor value to navigate to the page. Defaults to None.
-            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. Defaults to 1000.
-            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], optional): Specifies the date type which filter date range.
-                Defaults to None.
+            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. 
+                Defaults to 1000.
+            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], optional): 
+                Specifies the date type which filter date range. Defaults to None.
             start_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. Defaults to None.
             end_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. Defaults to None.
-            customer_gauge_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with ['client_id', 'client_secret'].
-                Defaults to "CUSTOMER-GAUGE".
+            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. Defaults to None.
+            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. Defaults to None.            
+            customer_gauge_credentials_secret (str, optional): The name of the Azure Key Vault secret containing 
+                a dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
             vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None.
             anonymize (bool, optional): Indicates if anonymize selected columns. Defaults to False.
             columns_to_anonymize (List[str], optional): List of columns to anonymize. Defaults to None.
-            anonymize_method  (Literal["mask", "hash"], optional): Method of anonymizing data. "mask" -> replace the data with "value" arg.
-                "hash" -> replace the data with the hash value of an object (using `hash()` method). Defaults to "mask".
+            anonymize_method  (Literal["mask", "hash"], optional): Method of anonymizing data. "mask" -> replace the 
+                data with "value" arg. "hash" -> replace the data with the hash value of an object (using `hash()` 
+                method). Defaults to "mask".
             anonymize_value (str, optional): Value to replace the data. Defaults to "***".
-            date_column (str, optional): Name of the date column used to identify rows that are older than a specified number of days. Defaults to None.
-            days (int, optional): The number of days beyond which we want to anonymize the data, e.g. older that 2 years can be: 2*365. Defaults to None.
+            date_column (str, optional): Name of the date column used to identify rows that are older than a specified 
+                number of days. Defaults to None.
+            days (int, optional): The number of days beyond which we want to anonymize the data, e.g. older than 
+                2 years can be: 2*365. Defaults to None.
             output_file_extension (str, optional): Output file extension - to allow selection of .csv for data
                 which is not easy to handle with parquet. Defaults to ".parquet".
             adls_dir_path (str, optional): Azure Data Lake destination folder/catalog path. Defaults to None.
             local_file_path (str, optional): Local destination path. Defaults to None.
             adls_file_name (str, optional): Name of file in ADLS. Defaults to None.
-            adls_sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with
-                ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure Data Lake.
-                Defaults to None.
+            adls_sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary 
+                with ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure 
+                Data Lake. Defaults to None.
             overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to False.
             if_exists (str, optional): What to do if the file exists. Defaults to "replace".
-            timeout (int, optional): The time (in seconds) to wait while running this task before a timeout occurs. Defaults to 3600.
+            timeout (int, optional): The time (in seconds) to wait while running this task before a timeout occurs. 
+                Defaults to 3600.
         """
         # CustomerGaugeToDF
         self.endpoint = endpoint
@@ -103,6 +113,8 @@ def __init__(
         self.date_field = date_field
         self.start_date = start_date
         self.end_date = end_date
+        self.method1_cols = method1_cols
+        self.method2_cols = method2_cols
         self.customer_gauge_credentials_secret = customer_gauge_credentials_secret
 
         # anonymize_df

From 7732545f33e4b63c0e4319c9cb9a53aa068d9e3f Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 26 Oct 2023 10:27:51 +0200
Subject: [PATCH 14/86] adjust readability

---
 viadot/tasks/customer_gauge.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index bc95bc136..4cf328332 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -199,16 +199,23 @@ def unpack_columns(columns, unpack_function):
                         json_list_clean = list(map(lambda x: unpack_function(x, field), json_list))
                         logger.info(f"All elements in '{field}' are unpacked successfully.")
                     except:
-                        logger.info(f"No transformation were made in '{field}', because didn't contain list of key-value data.")
+                        logger.info(f"No transformation were made in '{field}', 
+                        because didn't contain list of key-value data.")
                 else:
                     logger.info(f"Column '{field}' not found.")
             return json_list_clean
 
         if method1_cols is not None:
-            json_list = unpack_columns(columns = method1_cols, unpack_function = self._field_reference_unpacker)
+            json_list = unpack_columns(
+                columns = method1_cols, 
+                unpack_function = self._field_reference_unpacker
+                )
 
         if method2_cols is not None:
-            json_list = unpack_columns(columns = method2_cols, unpack_function = self._nested_dict_transformer)
+            json_list = unpack_columns(
+                columns = method2_cols, 
+                unpack_function = self._nested_dict_transformer
+                )
         
         return json_list
 
@@ -359,7 +366,8 @@ def run(
         if total_load == True:
             if cursor is None:
                 logger.info(
-                    f"Downloading all the data from the {self.endpoint or self.endpoint_url} endpoint. Process might take a few minutes..."
+                    f"Downloading all the data from the {self.endpoint or self.endpoint_url} endpoint. 
+                    Process might take a few minutes..."
                 )
             else:
                 logger.info(
@@ -371,7 +379,10 @@ def run(
                 jsn = self.get_data(json_data)
                 total_json += jsn
 
-        clean_json = self.column_unpacker(json_list = total_json, method1_cols = method1_cols, method2_cols = method2_cols)
+        clean_json = self.column_unpacker(
+            json_list = total_json, 
+            method1_cols = method1_cols, 
+            method2_cols = method2_cols)
         logger.info("Inserting data into the DataFrame...")
         df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
         df = self.square_brackets_remover(df)

From f23bb2da7ae6001edaa84ca9f60fe7386a139ca6 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 26 Oct 2023 11:05:32 +0200
Subject: [PATCH 15/86] fix loggers

---
 viadot/tasks/customer_gauge.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 4cf328332..cd22ca392 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -199,8 +199,8 @@ def unpack_columns(columns, unpack_function):
                         json_list_clean = list(map(lambda x: unpack_function(x, field), json_list))
                         logger.info(f"All elements in '{field}' are unpacked successfully.")
                     except:
-                        logger.info(f"No transformation were made in '{field}', 
-                        because didn't contain list of key-value data.")
+                        logger.info(f"No transformation were made in '{field}'," 
+                        "because didn't contain list of key-value data.")
                 else:
                     logger.info(f"Column '{field}' not found.")
             return json_list_clean
@@ -366,8 +366,8 @@ def run(
         if total_load == True:
             if cursor is None:
                 logger.info(
-                    f"Downloading all the data from the {self.endpoint or self.endpoint_url} endpoint. 
-                    Process might take a few minutes..."
+                    f"Downloading all the data from the {self.endpoint or self.endpoint_url} endpoint." 
+                    "Process might take a few minutes..."
                 )
             else:
                 logger.info(

From d08612432ab2317a8d0ffdf30ba16ff2a9214889 Mon Sep 17 00:00:00 2001
From: m-paz <pazdziormateusz@gmail.com>
Date: Thu, 26 Oct 2023 16:58:40 +0100
Subject: [PATCH 16/86] =?UTF-8?q?=F0=9F=93=9D=20Bumped=20version=20after?=
 =?UTF-8?q?=20release?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_viadot.py | 2 +-
 viadot/__init__.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_viadot.py b/tests/test_viadot.py
index 1f0874453..675dbfbdc 100644
--- a/tests/test_viadot.py
+++ b/tests/test_viadot.py
@@ -2,4 +2,4 @@
 
 
 def test_version():
-    assert __version__ == "0.4.21"
+    assert __version__ == "0.4.22"
diff --git a/viadot/__init__.py b/viadot/__init__.py
index e427a5547..ece529aa1 100644
--- a/viadot/__init__.py
+++ b/viadot/__init__.py
@@ -1 +1 @@
-__version__ = "0.4.21"
+__version__ = "0.4.22"

From 2714f33b0a6e7d5356186d936ae08ab97a7b1c57 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Fri, 27 Oct 2023 08:21:33 +0200
Subject: [PATCH 17/86] =?UTF-8?q?=F0=9F=90=9B=20Fixed=20typos=20in=20docum?=
 =?UTF-8?q?entation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md             | 2 +-
 viadot/sources/sap_bw.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 76eb3280b..507c590cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added `SharepointListToDF` task class.
 - Added `SharepointListToADLS` flow class.
 - Added tests for `SharepointList`.
-- Added `get_nested_dict` to untils.py.
+- Added `get_nested_dict` to utils.py.
 
 ### Fixed
 
diff --git a/viadot/sources/sap_bw.py b/viadot/sources/sap_bw.py
index 8f4fb0583..94e3347a9 100644
--- a/viadot/sources/sap_bw.py
+++ b/viadot/sources/sap_bw.py
@@ -101,7 +101,7 @@ def get_output_data(self, mdx_query: str) -> dict:
                     {
                     "COLUMN": 0,
                     "ROW": 0,
-                    "DATA": "VELUX Deutschland GmbH",
+                    "DATA": "DATA",
                     "VALUE_DATA_TYPE": "CHAR",
                     "CELL_STATUS": ""
                     },...

From cecebf971230bef747f08efb2bc7fc3e8a566730 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 31 Oct 2023 14:40:15 +0100
Subject: [PATCH 18/86] add drivers cleaner

---
 viadot/tasks/customer_gauge.py | 40 +++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index cd22ca392..03b30b286 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -121,6 +121,9 @@ def _field_reference_unpacker(
         Returns:
             Dict[str, Any]: The JSON response with modified nested dictionaries
             within the specified field.
+            
+        Raises:
+            ValueError: If a dictionary within the specified field doesn't contain exactly two items.
         """
 
         result = {}
@@ -128,6 +131,8 @@ def _field_reference_unpacker(
             if isinstance(dictionary, dict) and len(dictionary.items()) == 2:
                 list_properties = list(dictionary.values())
                 result[list_properties[0]] = list_properties[1]
+            else:
+                raise ValueError()
         if result:
             json_response[field] = result
 
@@ -154,11 +159,11 @@ def _nested_dict_transformer(
         within the specified field.
         """
         d={}
-        for i, dictionary in enumerate(json_response[field], start=1): 
+        for i, dictionary in enumerate(json_response[field], start=1):
             for key, value in dictionary.items():
                 d[f'{i}_{key}'] = value
-
-        json_response[field] = d
+        if d:
+            json_response[field] = d
 
         return json_response
     
@@ -192,11 +197,12 @@ def column_unpacker(
             raise ValueError("Input 'json_list' is required.")
 
         def unpack_columns(columns, unpack_function):
+            json_list_clean = json_list.copy()
             for field in columns:
-                if field in json_list[0]:
+                if field in json_list_clean[0]:
                     logger.info(f"Unpacking column '{field}' with {unpack_function.__name__} method...")
                     try:
-                        json_list_clean = list(map(lambda x: unpack_function(x, field), json_list))
+                        json_list_clean = list(map(lambda x: unpack_function(x, field), json_list_clean))
                         logger.info(f"All elements in '{field}' are unpacked successfully.")
                     except:
                         logger.info(f"No transformation were made in '{field}'," 
@@ -270,6 +276,28 @@ def square_brackets_remover(
         df = df.astype(str)
         df = df.applymap(lambda x: x.strip("[]"))
         return df
+    
+    def _drivers_cleaner(
+        self,
+        drivers: str = None
+    ) -> str:
+        """
+        Clean and format the 'drivers' data.
+
+        Args:
+            drivers (str, optional): Column name of the data to be cleaned. Defaults to None.
+
+        Returns:
+            str: A cleaned and formatted string of driver data.
+        """
+
+        drivers = drivers.split("}, {")
+        cleaned_drivers = []
+        for driver in drivers:
+            driver = driver.replace("{", "").replace("}", "")
+            driver = driver.replace("'", "").replace("label: ", "")
+            cleaned_drivers.append(driver)
+        return ', '.join(cleaned_drivers)  
 
     def __call__(self):
         """Download Customer Gauge data to a DF"""
@@ -386,6 +414,8 @@ def run(
         logger.info("Inserting data into the DataFrame...")
         df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
         df = self.square_brackets_remover(df)
+        if endpoint == "responses":
+            df["drivers"] = df["drivers"].apply(self._drivers_cleaner)
         df.columns = df.columns.str.lower().str.replace(" ", "_")
         logger.info("DataFrame: Ready. Data: Inserted. Let the magic happen!")
 

From 9894f965e555c856d060dcc07efa759af2ca636d Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 31 Oct 2023 15:53:01 +0100
Subject: [PATCH 19/86] add new args

---
 viadot/flows/customer_gauge_to_adls.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/viadot/flows/customer_gauge_to_adls.py b/viadot/flows/customer_gauge_to_adls.py
index e1bfd7108..080dda23d 100644
--- a/viadot/flows/customer_gauge_to_adls.py
+++ b/viadot/flows/customer_gauge_to_adls.py
@@ -176,6 +176,8 @@ def gen_flow(self) -> Flow:
             date_field=self.date_field,
             start_date=self.start_date,
             end_date=self.end_date,
+            method1_cols=self.method1_cols,
+            method2_cols=self.method2_cols,
             vault_name=self.vault_name,
             credentials_secret=self.customer_gauge_credentials_secret,
             flow=self,

From 24898c8027b8de2513a63962e550592e6b2469b5 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 31 Oct 2023 18:28:24 +0100
Subject: [PATCH 20/86] temp tests comment

---
 tests/integration/test_customer_gauge.py | 88 ++++++++++++------------
 1 file changed, 44 insertions(+), 44 deletions(-)

diff --git a/tests/integration/test_customer_gauge.py b/tests/integration/test_customer_gauge.py
index 666a73251..119615100 100644
--- a/tests/integration/test_customer_gauge.py
+++ b/tests/integration/test_customer_gauge.py
@@ -17,50 +17,50 @@ def test_get_json_content():
     assert isinstance(json_response["cursor"], dict)
 
 
-def test_properties_cleaning():
-    json_response = CG.get_json_response()
-    data = json_response["data"][2].copy()
-    cleaned_data = CG.properties_cleaning(data.copy())
-    assert isinstance(data["properties"], list)
-    assert isinstance(cleaned_data["properties"], dict)
-
-
-def test_flatten_json():
-    nested_json = {
-        "user": {
-            "name": "Jane",
-            "address": {
-                "street": "456 Elm St",
-                "city": "San Francisco",
-                "state": "CA",
-                "zip": "94109",
-                "country": {"name": "United States", "code": "US"},
-            },
-            "phone_numbers": {"type": "home", "number": "555-4321"},
-        }
-    }
-
-    expected_output = {
-        "user_name": "Jane",
-        "user_address_street": "456 Elm St",
-        "user_address_city": "San Francisco",
-        "user_address_state": "CA",
-        "user_address_zip": "94109",
-        "user_address_country_name": "United States",
-        "user_address_country_code": "US",
-        "user_phone_numbers_type": "home",
-        "user_phone_numbers_number": "555-4321",
-    }
-
-    output = CG.flatten_json(nested_json)
-    assert output == expected_output
-
-
-def test_pagesize_and_to_df():
-    json_response = CG.get_json_response(pagesize=1)
-    df = CG.to_df(json_response)
-    assert isinstance(df, pd.DataFrame)
-    assert len(df) == 1
+# def test_properties_cleaning():
+#     json_response = CG.get_json_response()
+#     data = json_response["data"][2].copy()
+#     cleaned_data = CG.properties_cleaning(data.copy())
+#     assert isinstance(data["properties"], list)
+#     assert isinstance(cleaned_data["properties"], dict)
+
+
+# def test_flatten_json():
+#     nested_json = {
+#         "user": {
+#             "name": "Jane",
+#             "address": {
+#                 "street": "456 Elm St",
+#                 "city": "San Francisco",
+#                 "state": "CA",
+#                 "zip": "94109",
+#                 "country": {"name": "United States", "code": "US"},
+#             },
+#             "phone_numbers": {"type": "home", "number": "555-4321"},
+#         }
+#     }
+
+#     expected_output = {
+#         "user_name": "Jane",
+#         "user_address_street": "456 Elm St",
+#         "user_address_city": "San Francisco",
+#         "user_address_state": "CA",
+#         "user_address_zip": "94109",
+#         "user_address_country_name": "United States",
+#         "user_address_country_code": "US",
+#         "user_phone_numbers_type": "home",
+#         "user_phone_numbers_number": "555-4321",
+#     }
+
+#     output = CG.flatten_json(nested_json)
+#     assert output == expected_output
+
+
+# def test_pagesize_and_to_df():
+#     json_response = CG.get_json_response(pagesize=1)
+#     df = CG.to_df(json_response)
+#     assert isinstance(df, pd.DataFrame)
+#     assert len(df) == 1
 
 
 def test_pass_specific_cursor():

From 7b6556f171744d4bdc466a56c3bb2f9cc17c177a Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 31 Oct 2023 18:47:20 +0100
Subject: [PATCH 21/86] changelog update

---
 CHANGELOG.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 507c590cf..2bcf45aef 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Added
+
+### Fixed
+
+### Changed
+- Modified `CustomerGauge` source class with simplified logic to return json structure.
+- Expand `CustomerGaugeToDF` task class with separate cleaning functions and handling nested json structure flattening with two new methods `_field_reference_unpacker` and `_nested_dict_transformer`.
+- Change `CustomerGaugeToADLS` to containg new arguments.
 
 ## [0.4.21] - 2023-10-26
 ### Added

From ac7f63bd0e380a92902ebdb8b15b307f09443b4d Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Wed, 1 Nov 2023 15:37:44 +0100
Subject: [PATCH 22/86] multichoice fields and polish letters extension for
 sharepoint list

---
 viadot/flows/sharepoint_to_adls.py |  9 +++--
 viadot/sources/sharepoint.py       | 58 +++++++++++++++++++-----------
 2 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index eaf747bab..410538e7b 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -215,8 +215,13 @@ def __init__(
         site_url (str): URL to set of Sharepoint Lists. Default to None.
         required_fields (List[str]): Required fields(columns) need to be extracted from
                                      Sharepoint List. Default to None.
-        field_property (List[str]): Property to expand with expand query method.
-                                    All propertys can be found under list.item.properties.
+        field_property (List[str]): Property to expand fields with expand query method.
+                                    For example: User fields could be expanded and "Title"
+                                    or "ID" could be extracted
+                                    -> usefull to get user name instead of ID
+                                    All properties can be found under list.item.properties.
+                                    WARNING! Field types and properties might change which could
+                                    lead to errors - extension of sp connector would be required.
                                     Default to ["Title"]
         filters (dict): Dictionary with operators which filters the SharepointList output.
                         allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')
diff --git a/viadot/sources/sharepoint.py b/viadot/sources/sharepoint.py
index 096de825b..7f57bd658 100644
--- a/viadot/sources/sharepoint.py
+++ b/viadot/sources/sharepoint.py
@@ -18,6 +18,7 @@
 
 logger = logging.get_logger()
 
+
 # Print out how many rows was extracted in specific iteration
 def log_of_progress(items):
     logger.info("Items read: {0}".format(len(items)))
@@ -112,7 +113,6 @@ def get_connection(
         self,
         site_url: str = None,
     ):
-
         # Connecting into Sharepoint with AuthenticationContext
         try:
             auth_context = AuthenticationContext(site_url)
@@ -137,24 +137,33 @@ def _unpack_fields(
         self,
         list_item,
         selected_fields: dict = None,
-    ):
-
+    ) -> dict:
         # Creating the body of dictionary
         new_dict = dict()
-
         # For loop scanning the propertys of searching fields
         item_values_dict = list_item.properties
         for field, val in item_values_dict.items():
             nested_dict = get_nested_dict(val)
-            # Check if the dictionary is nested
-            if nested_dict != None:
-                # It might be that there are different field properties than expected
-                nested_value = nested_dict.get(selected_fields["FieldProperty"])
-                if nested_value != None:
-                    new_dict[field] = nested_value
+            # Check if field has expandable type
+            if field in selected_fields["FieldToExpand"]:
+                # Check if the values are nested
+                if nested_dict != None:
+                    # It might be that there are different field properties than expected
+                    nested_value = nested_dict.get(
+                        selected_fields["FieldExpandProperty"]
+                    )
+                    if nested_value != None:
+                        new_dict[field] = nested_value
+                    else:
+                        logger.info("Property of the extandable field not recognized!")
+                        raise ValueError("Check if given field property is valid!")
+                elif field in selected_fields["MultiChoiceField"]:
+                    # Field type of multi choice could have more than 1 selection.
+                    new_dict[field] = ";".join(nested_dict.values())
                 else:
-                    logger.info("I'm not the right value")
-                    raise ValueError
+                    raise ValueError(
+                        "Get nested dict for not recognized type of field! Check field types in the source"
+                    )
             else:
                 new_dict[field] = val
 
@@ -166,7 +175,6 @@ def get_fields(
         site_url: str = None,
         required_fields: List[str] = None,
     ):
-
         ctx = self.get_connection(site_url=site_url)
 
         # Get list of lists object by List Title
@@ -182,22 +190,25 @@ def get_fields(
 
         else:
             list_fields_required = [
-                list_fields_all.get_by_internal_name_or_title(field).get()
+                list_fields_all.get_by_internal_name_or_title(field)
+                .get()
+                .execute_query()
                 for field in required_fields
             ]
-            ctx.execute_batch()
 
             return list_fields_required
 
-    def select_expandable_user_fields(
+    def select_fields(
         self,
         list_title: str = None,
         site_url: str = None,
         required_fields: List[str] = None,
         field_property: str = "Title",
-    ):
+    ) -> dict:
         """
-        Method to expand fields and get more informations.
+        Method to create a data structure for handling info about
+            selection of fields with details about possible expansion for more data or details.
+        Field types to extract more values can be: "User*", "MultiChoice"
         field_property to expand can be: ID, Title, FieldTypeKind, TypeAsString and many more.
             -> more properties can be discovered by getting list.item.properties.
             Default to "Title"
@@ -220,12 +231,17 @@ def select_expandable_user_fields(
             for field in list_fields
             if fnmatch(field.properties["TypeAsString"], f"User*")
         ]
-
+        multi_choice_fields = [
+            field.properties["InternalName"]
+            for field in list_fields
+            if fnmatch(field.properties["TypeAsString"], "MultiChoice")
+        ]
         # Creating the body of the function output
         selected_fields = {
             "FieldInternalNames": fields_to_select,
             "FieldToExpand": fields_to_expand,
-            "FieldProperty": field_property,
+            "FieldExpandProperty": field_property,
+            "MultiChoiceField": multi_choice_fields,
         }
 
         return selected_fields
@@ -508,7 +524,7 @@ def list_item_to_df(
         download_all = False
 
         # extracting requeird_fields SP_List objects
-        selected_fields = self.select_expandable_user_fields(
+        selected_fields = self.select_fields(
             list_title=list_title,
             site_url=site_url,
             required_fields=required_fields,

From 0af6efba1f256385f144afa727bfbebe62f8e1c0 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Thu, 2 Nov 2023 10:22:17 +0100
Subject: [PATCH 23/86] =?UTF-8?q?=E2=9C=A8=20Added=20TM1=20connector?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/__init__.py |  1 +
 viadot/sources/tm1.py      | 75 ++++++++++++++++++++++++++++++++++++
 viadot/tasks/__init__.py   |  1 +
 viadot/tasks/tm1.py        | 78 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 155 insertions(+)
 create mode 100644 viadot/sources/tm1.py
 create mode 100644 viadot/tasks/tm1.py

diff --git a/viadot/sources/__init__.py b/viadot/sources/__init__.py
index c0d96abe2..7f0bf6f51 100644
--- a/viadot/sources/__init__.py
+++ b/viadot/sources/__init__.py
@@ -30,6 +30,7 @@
 from .mindful import Mindful
 from .sql_server import SQLServer
 from .sqlite import SQLite
+from .tm1 import TM1
 
 # APIS
 from .uk_carbon_intensity import UKCarbonIntensity
diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
new file mode 100644
index 000000000..c921e8c87
--- /dev/null
+++ b/viadot/sources/tm1.py
@@ -0,0 +1,75 @@
+import pandas as pd
+
+from typing import Any, Dict, Literal
+from TM1py.Services import TM1Service
+from prefect.utilities import logging
+
+
+from ..config import local_config
+from ..exceptions import CredentialError
+from .base import Source
+
+logger = logging.get_logger(__name__)
+
+
+class TM1(Source):
+    def __init__(
+        self,
+        credentials: Dict[str, Any] = None,
+        config_key: str = "TM1",
+        cube: str = None,
+        view: str = None,
+        limit: int = None,
+        private: bool = False,
+        verify: bool = False,
+        *args,
+        **kwargs,
+    ):
+        DEFAULT_CREDENTIALS = local_config.get(config_key)
+        credentials = credentials or DEFAULT_CREDENTIALS
+
+        required_credentials = ["address", "port", "username", "password"]
+        if any([cred_key not in credentials for cred_key in required_credentials]):
+            not_found = [c for c in required_credentials if c not in credentials]
+            raise CredentialError(f"Missing credential(s): '{not_found}'.")
+
+        self.config_key = config_key
+        self.cube = cube
+        self.view = view
+        self.limit = limit
+        self.private = private
+        self.verify = verify
+
+        super().__init__(*args, credentials=credentials, **kwargs)
+
+    def get_connection(self) -> TM1Service:
+        return TM1Service(
+            address=self.credentials["address"],
+            port=self.credentials["port"],
+            user=self.credentials["username"],
+            password=self.credentials["password"],
+            ssl=self.verify,
+        )
+
+    def get_cubes_names(self) -> list:
+        conn = self.get_connection
+        return conn.cubes.get_all_names()
+
+    def get_views_names(self) -> list:
+        conn = self.get_connection
+        return conn.views.get_all_names(self.cube)
+
+    def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFrame:
+        conn = self.get_connection()
+        df = conn.cubes.cells.execute_view_dataframe(
+            cube_name=self.cube,
+            view_name=self.view,
+            private=self.private,
+            top=self.limit,
+        )
+        logger.info(
+            f"Data was successfully transformed into DataFrame: {len(df.columns)} columns and {len(df)} rows."
+        )
+        if df.empty is True:
+            self._handle_if_empty(if_empty)
+        return df
diff --git a/viadot/tasks/__init__.py b/viadot/tasks/__init__.py
index ecba1d5c5..e70c89540 100644
--- a/viadot/tasks/__init__.py
+++ b/viadot/tasks/__init__.py
@@ -58,3 +58,4 @@
 from .vid_club import VidClubToDF
 from .git import CloneRepo
 from .luma import LumaIngest
+from .tm1 import TM1ToParquet
diff --git a/viadot/tasks/tm1.py b/viadot/tasks/tm1.py
new file mode 100644
index 000000000..b7be64b9d
--- /dev/null
+++ b/viadot/tasks/tm1.py
@@ -0,0 +1,78 @@
+from prefect import Task
+from typing import Any, Dict
+from prefect.utilities.tasks import defaults_from_attrs
+
+from ..sources import TM1
+
+
+class TM1ToParquet(Task):
+    def __init__(
+        self,
+        credentials: Dict[str, Any] = None,
+        config_key: str = "TM1",
+        cube: str = None,
+        view: str = None,
+        limit: int = None,
+        private: bool = False,
+        verify: bool = False,
+        path: str = None,
+        if_empty: str = "skip",
+        timeout=3600,
+        *args,
+        **kwargs,
+    ):
+        self.credentials = credentials
+        self.config_key = config_key
+        self.cube = cube
+        self.view = view
+        self.limit = limit
+        self.private = private
+        self.verify = verify
+        self.path = path
+        self.if_empty = if_empty
+
+        super().__init__(
+            name="tm1_to_parquet",
+            timeout=timeout,
+            *args,
+            **kwargs,
+        )
+
+    def __call__(self, *args, **kwargs):
+        """Load TM1 data to Parquet"""
+        return super().__call__(*args, **kwargs)
+
+    @defaults_from_attrs(
+        "credentials",
+        "config_key",
+        "cube",
+        "view",
+        "limit",
+        "private",
+        "verify",
+        "if_empty",
+        "path",
+    )
+    def run(
+        self,
+        credentials: Dict[str, Any] = None,
+        config_key: str = None,
+        cube: str = None,
+        view: str = None,
+        limit: int = None,
+        private: bool = None,
+        verify: bool = None,
+        path: str = None,
+        if_empty: str = None,
+    ):
+        tm1 = TM1(
+            credentials=credentials,
+            config_key=config_key,
+            cube=cube,
+            view=view,
+            limit=limit,
+            private=private,
+            verify=verify,
+        )
+        df = tm1.to_df()
+        return df.to_parquet(path=path, if_empty=if_empty)

From 6894b79b29ecc39b4f7a5f8c04a013563969a580 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Thu, 2 Nov 2023 11:27:31 +0100
Subject: [PATCH 24/86] =?UTF-8?q?=F0=9F=8E=A8=20Changed=20to=5Fparquet=20t?=
 =?UTF-8?q?o=20to=5Fdf?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/tasks/__init__.py |  2 +-
 viadot/tasks/tm1.py      | 13 ++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/viadot/tasks/__init__.py b/viadot/tasks/__init__.py
index e70c89540..541be70ab 100644
--- a/viadot/tasks/__init__.py
+++ b/viadot/tasks/__init__.py
@@ -58,4 +58,4 @@
 from .vid_club import VidClubToDF
 from .git import CloneRepo
 from .luma import LumaIngest
-from .tm1 import TM1ToParquet
+from .tm1 import TM1ToDF
diff --git a/viadot/tasks/tm1.py b/viadot/tasks/tm1.py
index b7be64b9d..cde043e0a 100644
--- a/viadot/tasks/tm1.py
+++ b/viadot/tasks/tm1.py
@@ -5,7 +5,7 @@
 from ..sources import TM1
 
 
-class TM1ToParquet(Task):
+class TM1ToDF(Task):
     def __init__(
         self,
         credentials: Dict[str, Any] = None,
@@ -15,7 +15,6 @@ def __init__(
         limit: int = None,
         private: bool = False,
         verify: bool = False,
-        path: str = None,
         if_empty: str = "skip",
         timeout=3600,
         *args,
@@ -28,18 +27,17 @@ def __init__(
         self.limit = limit
         self.private = private
         self.verify = verify
-        self.path = path
         self.if_empty = if_empty
 
         super().__init__(
-            name="tm1_to_parquet",
+            name="tm1_to_df",
             timeout=timeout,
             *args,
             **kwargs,
         )
 
     def __call__(self, *args, **kwargs):
-        """Load TM1 data to Parquet"""
+        """Load TM1 data to pandas DataFrame"""
         return super().__call__(*args, **kwargs)
 
     @defaults_from_attrs(
@@ -51,7 +49,6 @@ def __call__(self, *args, **kwargs):
         "private",
         "verify",
         "if_empty",
-        "path",
     )
     def run(
         self,
@@ -62,7 +59,6 @@ def run(
         limit: int = None,
         private: bool = None,
         verify: bool = None,
-        path: str = None,
         if_empty: str = None,
     ):
         tm1 = TM1(
@@ -74,5 +70,4 @@ def run(
             private=private,
             verify=verify,
         )
-        df = tm1.to_df()
-        return df.to_parquet(path=path, if_empty=if_empty)
+        return tm1.to_df(if_empty=if_empty)

From b439d503ec6959baa101de45baad49a842c7f3b1 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Thu, 2 Nov 2023 14:35:49 +0100
Subject: [PATCH 25/86] =?UTF-8?q?=F0=9F=93=9D=20Added=20documentation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/tm1.py | 51 +++++++++++++++++++++++++++++++++++++++++++
 viadot/tasks/tm1.py   | 39 ++++++++++++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index c921e8c87..93522fda1 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -13,6 +13,10 @@
 
 
 class TM1(Source):
+    """
+    Class for downloading data from TM1 Software using TM1py library
+    """
+
     def __init__(
         self,
         credentials: Dict[str, Any] = None,
@@ -25,6 +29,24 @@ def __init__(
         *args,
         **kwargs,
     ):
+        """
+        Creating an instance of TM1 source class.
+
+        Args:
+            credentials (Dict[str, Any], optional): Credentials stored in a dictionary. Required credentials: username,
+                password, address, port. Defaults to None.
+            config_key (str, optional): Credential key to dictionary where credentials are stored. Defaults to "TM1".
+            cube (str, optional): Cube name from which data will be downloaded. Defaults to None.
+            view (str, optional): View name from which data will be downloaded. Defaults to None.
+            limit (str, optional): How many rows should be extracted. If None all the avaiable rows will
+                be downloaded. Defaults to None.
+            private (bool, optional): Whether or not data download shoulb be private. Defaults to False.
+            verify (bool, optional): Whether or not verify SSL certificates while. Defaults to False.
+
+
+        Raises:
+            CredentialError: When credentials are not found.
+        """
         DEFAULT_CREDENTIALS = local_config.get(config_key)
         credentials = credentials or DEFAULT_CREDENTIALS
 
@@ -43,6 +65,12 @@ def __init__(
         super().__init__(*args, credentials=credentials, **kwargs)
 
     def get_connection(self) -> TM1Service:
+        """
+        Start a connection to TM1 instance.
+
+        Returns:
+            TM1Service: Service instance if connection is succesfull.
+        """
         return TM1Service(
             address=self.credentials["address"],
             port=self.credentials["port"],
@@ -52,14 +80,37 @@ def get_connection(self) -> TM1Service:
         )
 
     def get_cubes_names(self) -> list:
+        """
+        Get list of avaiable cubes in TM1 instance.
+
+        Returns:
+            list: List containing avaiable cubes names.
+
+        """
         conn = self.get_connection
         return conn.cubes.get_all_names()
 
     def get_views_names(self) -> list:
+        """
+        Get list of avaiable views in TM1 instance.
+
+        Returns:
+            list: List containing avaiable views names.
+
+        """
         conn = self.get_connection
         return conn.views.get_all_names(self.cube)
 
     def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFrame:
+        """
+        Function for downloading data from TM1 to pd.DataFrame.
+
+        Args:
+            if_empty (Literal["warn", "fail", "skip"], optional): What to do if output DataFrame is empty. Defaults to "skip".
+
+        Returns:
+            pd.DataFrame: DataFrame with data downloaded from TM1 view.
+        """
         conn = self.get_connection()
         df = conn.cubes.cells.execute_view_dataframe(
             cube_name=self.cube,
diff --git a/viadot/tasks/tm1.py b/viadot/tasks/tm1.py
index cde043e0a..1ea659252 100644
--- a/viadot/tasks/tm1.py
+++ b/viadot/tasks/tm1.py
@@ -1,3 +1,5 @@
+import pandas as pd
+
 from prefect import Task
 from typing import Any, Dict
 from prefect.utilities.tasks import defaults_from_attrs
@@ -20,6 +22,22 @@ def __init__(
         *args,
         **kwargs,
     ):
+        """
+        Task for downloading data from TM1 view to pandas DataFrame.
+
+        Args:
+            credentials (Dict[str, Any], optional): Credentials stored in a dictionary. Required credentials: username,
+                password, address, port. Defaults to None.
+            config_key (str, optional): Credential key to dictionary where credentials are stored. Defaults to "TM1".
+            cube (str, optional): Cube name from which data will be downloaded. Defaults to None.
+            view (str, optional): View name from which data will be downloaded. Defaults to None.
+            limit (str, optional): How many rows should be extracted. If None all the avaiable rows will
+                be downloaded. Defaults to None.
+            private (bool, optional): Whether or not data download shoulb be private. Defaults to False.
+            verify (bool, optional): Whether or not verify SSL certificates while. Defaults to False.
+            if_empty (Literal["warn", "fail", "skip"], optional): What to do if output DataFrame is empty. Defaults to "skip".
+
+        """
         self.credentials = credentials
         self.config_key = config_key
         self.cube = cube
@@ -60,7 +78,26 @@ def run(
         private: bool = None,
         verify: bool = None,
         if_empty: str = None,
-    ):
+    ) -> pd.DataFrame:
+        """
+        Run method for TM1ToDF class.
+
+        Args:
+            credentials (Dict[str, Any], optional): Credentials stored in a dictionary. Required credentials: username,
+                password, address, port. Defaults to None.
+            config_key (str, optional): Credential key to dictionary where credentials are stored. Defaults to None.
+            cube (str, optional): Cube name from which data will be downloaded. Defaults to None.
+            view (str, optional): View name from which data will be downloaded. Defaults to None.
+            limit (str, optional): How many rows should be extracted. If None all the avaiable rows will
+                be downloaded. Defaults to None.
+            private (bool, optional): Whether or not data download shoulb be private. Defaults to None.
+            verify (bool, optional): Whether or not verify SSL certificates while. Defaults to None.
+            if_empty (Literal["warn", "fail", "skip"], optional): What to do if output DataFrame is empty. Defaults to None.
+
+        Returns:
+            pd.DataFrame: DataFrame with data downloaded from TM1 view.
+
+        """
         tm1 = TM1(
             credentials=credentials,
             config_key=config_key,

From 200b05f5848738dc4cb49b2aeda903a43fc8405b Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Thu, 2 Nov 2023 14:36:08 +0100
Subject: [PATCH 26/86] =?UTF-8?q?=E2=9C=85=20Added=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/tasks/test_tm1.py | 15 ++++++++++++
 tests/integration/test_tm1.py       | 36 +++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)
 create mode 100644 tests/integration/tasks/test_tm1.py
 create mode 100644 tests/integration/test_tm1.py

diff --git a/tests/integration/tasks/test_tm1.py b/tests/integration/tasks/test_tm1.py
new file mode 100644
index 000000000..96dd58dfb
--- /dev/null
+++ b/tests/integration/tasks/test_tm1.py
@@ -0,0 +1,15 @@
+import pandas as pd
+
+from viadot.tasks import TM1ToDF
+from viadot.config import local_config
+
+CUBE = local_config.get("test_cube")
+VIEW = local_config.get("test_view")
+
+
+def test_tm1_to_df():
+    tm1 = TM1ToDF(CUBE, VIEW)
+    df = tm1.run()
+
+    assert isinstance(df, pd.DataFrame)
+    assert df.empty is False
diff --git a/tests/integration/test_tm1.py b/tests/integration/test_tm1.py
new file mode 100644
index 000000000..3676a8ef6
--- /dev/null
+++ b/tests/integration/test_tm1.py
@@ -0,0 +1,36 @@
+import pandas as pd
+
+from viadot.sources import TM1
+from viadot.config import local_config
+
+CUBE = local_config.get("test_cube")
+VIEW = local_config.get("test_view")
+
+
+def test_get_connection():
+    tm1_source = TM1()
+    connection = tm1_source.get_connection()
+
+    assert connection is not None
+
+
+def test_get_cubes_names():
+    tm1_source = TM1()
+    cubes = tm1_source.get_cubes_names()
+
+    assert len(cubes) > 0
+
+
+def test_get_cubes_names():
+    tm1_source = TM1(cube=CUBE)
+    views = tm1_source.get_views_names()
+
+    assert len(views) > 0
+
+
+def test_to_df():
+    tm1_source = TM1(cube=CUBE, view=VIEW)
+    df = tm1_source.to_df()
+
+    assert isinstance(df, pd.DataFrame)
+    assert df.empty is False

From 6ec73fbd96e1cf7be1fc9480b968aee3132c95c3 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Thu, 2 Nov 2023 14:37:10 +0100
Subject: [PATCH 27/86] =?UTF-8?q?=F0=9F=93=9D=20Updated=20changelog?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 507c590cf..61fd6633e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Added
+- Added `TM1` source class.
+- Added `TM1ToDF` task class.
 
 ## [0.4.21] - 2023-10-26
 ### Added

From 3401db7c5d27ac73ccc1706fb45b6ca43525d665 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Thu, 2 Nov 2023 14:41:09 +0100
Subject: [PATCH 28/86] =?UTF-8?q?=E2=9C=A8=20Added=20TM1py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 768887e4a..896b11d1a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -43,3 +43,4 @@ dbt-core==1.3.2
 dbt-sqlserver==1.3.1
 lumaCLI==0.0.19
 Office365-REST-Python-Client==2.4.4
+TM1py==1.11.3

From 2be81098299df13ec684ceae64c52709f5185395 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Fri, 3 Nov 2023 08:26:29 +0100
Subject: [PATCH 29/86] =?UTF-8?q?=F0=9F=8E=A8=20Changed=20get=20views=20an?=
 =?UTF-8?q?d=20get=20cubes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/tm1.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index 93522fda1..54ab010c4 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -87,7 +87,7 @@ def get_cubes_names(self) -> list:
             list: List containing avaiable cubes names.
 
         """
-        conn = self.get_connection
+        conn = self.get_connection()
         return conn.cubes.get_all_names()
 
     def get_views_names(self) -> list:
@@ -98,7 +98,7 @@ def get_views_names(self) -> list:
             list: List containing avaiable views names.
 
         """
-        conn = self.get_connection
+        conn = self.get_connection()
         return conn.views.get_all_names(self.cube)
 
     def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFrame:

From d9320cb917d7024bc35dbcd5912678c3e8b1084e Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Tue, 7 Nov 2023 15:32:05 +0100
Subject: [PATCH 30/86] =?UTF-8?q?=E2=9C=A8=20Added=20mdx=20option?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/tm1.py | 29 ++++++++++++++++++++++-------
 viadot/tasks/tm1.py   |  7 +++++++
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index 54ab010c4..c9a1d875c 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -6,7 +6,7 @@
 
 
 from ..config import local_config
-from ..exceptions import CredentialError
+from ..exceptions import CredentialError,ValidationError
 from .base import Source
 
 logger = logging.get_logger(__name__)
@@ -21,6 +21,7 @@ def __init__(
         self,
         credentials: Dict[str, Any] = None,
         config_key: str = "TM1",
+        mdx_query: str = None,
         cube: str = None,
         view: str = None,
         limit: int = None,
@@ -36,6 +37,7 @@ def __init__(
             credentials (Dict[str, Any], optional): Credentials stored in a dictionary. Required credentials: username,
                 password, address, port. Defaults to None.
             config_key (str, optional): Credential key to dictionary where credentials are stored. Defaults to "TM1".
+            mdx_query (str, optional): MDX select query needed to download the data. Defaults to None.
             cube (str, optional): Cube name from which data will be downloaded. Defaults to None.
             view (str, optional): View name from which data will be downloaded. Defaults to None.
             limit (str, optional): How many rows should be extracted. If None all the avaiable rows will
@@ -56,6 +58,7 @@ def __init__(
             raise CredentialError(f"Missing credential(s): '{not_found}'.")
 
         self.config_key = config_key
+        self.mdx_query = mdx_query
         self.cube = cube
         self.view = view
         self.limit = limit
@@ -110,14 +113,26 @@ def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFr
 
         Returns:
             pd.DataFrame: DataFrame with data downloaded from TM1 view.
+        
+        Raises:
+            ValidationError: When mdx and cube + view are not specified or when combination of both is specified.
         """
         conn = self.get_connection()
-        df = conn.cubes.cells.execute_view_dataframe(
-            cube_name=self.cube,
-            view_name=self.view,
-            private=self.private,
-            top=self.limit,
-        )
+
+        if self.mdx_query is None and (self.cube is None or self.view is None):
+            raise ValidationError("MDX query or cube and view are required.")
+        if self.cube is not None and self.view is not None:
+            df = conn.cubes.cells.execute_view_dataframe(
+                cube_name=self.cube,
+                view_name=self.view,
+                private=self.private,
+                top=self.limit,
+            )
+        elif self.mdx_query is not None:
+            df = conn.cubes.cells.execute_mdx_dataframe(self.mdx_query)
+        else:
+            raise ValidationError("Specify only one: MDX query or cube and view.")
+
         logger.info(
             f"Data was successfully transformed into DataFrame: {len(df.columns)} columns and {len(df)} rows."
         )
diff --git a/viadot/tasks/tm1.py b/viadot/tasks/tm1.py
index 1ea659252..06b96ccd2 100644
--- a/viadot/tasks/tm1.py
+++ b/viadot/tasks/tm1.py
@@ -12,6 +12,7 @@ def __init__(
         self,
         credentials: Dict[str, Any] = None,
         config_key: str = "TM1",
+        mdx_query: str = None,
         cube: str = None,
         view: str = None,
         limit: int = None,
@@ -29,6 +30,7 @@ def __init__(
             credentials (Dict[str, Any], optional): Credentials stored in a dictionary. Required credentials: username,
                 password, address, port. Defaults to None.
             config_key (str, optional): Credential key to dictionary where credentials are stored. Defaults to "TM1".
+            mdx_query (str, optional): MDX select query needed to download the data. Defaults to None.
             cube (str, optional): Cube name from which data will be downloaded. Defaults to None.
             view (str, optional): View name from which data will be downloaded. Defaults to None.
             limit (str, optional): How many rows should be extracted. If None all the avaiable rows will
@@ -40,6 +42,7 @@ def __init__(
         """
         self.credentials = credentials
         self.config_key = config_key
+        self.mdx_query = mdx_query
         self.cube = cube
         self.view = view
         self.limit = limit
@@ -61,6 +64,7 @@ def __call__(self, *args, **kwargs):
     @defaults_from_attrs(
         "credentials",
         "config_key",
+        "mdx_query",
         "cube",
         "view",
         "limit",
@@ -72,6 +76,7 @@ def run(
         self,
         credentials: Dict[str, Any] = None,
         config_key: str = None,
+        mdx_query: str = None,
         cube: str = None,
         view: str = None,
         limit: int = None,
@@ -86,6 +91,7 @@ def run(
             credentials (Dict[str, Any], optional): Credentials stored in a dictionary. Required credentials: username,
                 password, address, port. Defaults to None.
             config_key (str, optional): Credential key to dictionary where credentials are stored. Defaults to None.
+            mdx_query (str, optional): MDX select query needed to download the data. Defaults to None.
             cube (str, optional): Cube name from which data will be downloaded. Defaults to None.
             view (str, optional): View name from which data will be downloaded. Defaults to None.
             limit (str, optional): How many rows should be extracted. If None all the avaiable rows will
@@ -101,6 +107,7 @@ def run(
         tm1 = TM1(
             credentials=credentials,
             config_key=config_key,
+            mdx_query=mdx_query,
             cube=cube,
             view=view,
             limit=limit,

From fb3f981977079b29563a0da5b4572736ec606ce6 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Tue, 7 Nov 2023 15:50:20 +0100
Subject: [PATCH 31/86] =?UTF-8?q?=E2=9C=A8=20Added=20extra=20functions=20t?=
 =?UTF-8?q?o=20check=20structure?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/tm1.py | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index c9a1d875c..25c1d9487 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -24,6 +24,8 @@ def __init__(
         mdx_query: str = None,
         cube: str = None,
         view: str = None,
+        dimension: str = None,
+        hierarchy: str =None,
         limit: int = None,
         private: bool = False,
         verify: bool = False,
@@ -40,6 +42,8 @@ def __init__(
             mdx_query (str, optional): MDX select query needed to download the data. Defaults to None.
             cube (str, optional): Cube name from which data will be downloaded. Defaults to None.
             view (str, optional): View name from which data will be downloaded. Defaults to None.
+            dimension (str, optional): Diemension name. Defaults to None.
+            hierarchy (str, optional): Hierarchy name. Defaults to None.
             limit (str, optional): How many rows should be extracted. If None all the avaiable rows will
                 be downloaded. Defaults to None.
             private (bool, optional): Whether or not data download shoulb be private. Defaults to False.
@@ -61,6 +65,8 @@ def __init__(
         self.mdx_query = mdx_query
         self.cube = cube
         self.view = view
+        self.dimension = dimension
+        self.hierarchy = hierarchy
         self.limit = limit
         self.private = private
         self.verify = verify
@@ -95,7 +101,7 @@ def get_cubes_names(self) -> list:
 
     def get_views_names(self) -> list:
         """
-        Get list of avaiable views in TM1 instance.
+        Get list of avaiable views in TM1 cube instance.
 
         Returns:
             list: List containing avaiable views names.
@@ -103,6 +109,39 @@ def get_views_names(self) -> list:
         """
         conn = self.get_connection()
         return conn.views.get_all_names(self.cube)
+    
+    def get_diemensions_names(self) -> list:
+        """
+        Get list of avaiable dimensions in TM1 instance.
+
+        Returns:
+            list: List containing avaiable dimensions names.
+
+        """
+        conn = self.get_connection()
+        return conn.dimensions.get_all_names()
+    
+    def get_hierarchies_names(self) -> list:
+        """
+        Get list of avaiable hierarchies in TM1 dimension instance.
+
+        Returns:
+            list: List containing avaiable hierarchies names.
+
+        """
+        conn = self.get_connection()
+        return conn.hierarchies.get_all_names(self.dimension)
+    
+    def get_available_elements(self) -> list:
+        """
+        Get list of avaiable elements in TM1 instance based on hierarchy and diemension.
+
+        Returns:
+            list: List containing avaiable elements names.
+
+        """
+        conn = self.get_connection()
+        return conn.elements.get_element_names(dimension_name= self.dimension, hierarchy_name = self.hierarchy)
 
     def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFrame:
         """

From a9cd4ae49ab6e16314d0018152fb16655f4edf73 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Tue, 7 Nov 2023 15:57:43 +0100
Subject: [PATCH 32/86] =?UTF-8?q?=F0=9F=8E=A8=20Formatted=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/tm1.py | 1 -
 viadot/tasks/tm1.py   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index 25c1d9487..0fac5e098 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -11,7 +11,6 @@
 
 logger = logging.get_logger(__name__)
 
-
 class TM1(Source):
     """
     Class for downloading data from TM1 Software using TM1py library
diff --git a/viadot/tasks/tm1.py b/viadot/tasks/tm1.py
index 06b96ccd2..a4926dc55 100644
--- a/viadot/tasks/tm1.py
+++ b/viadot/tasks/tm1.py
@@ -6,7 +6,6 @@
 
 from ..sources import TM1
 
-
 class TM1ToDF(Task):
     def __init__(
         self,

From 92e621e034621927ae1c741cd91c089a33be03ea Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Tue, 7 Nov 2023 16:04:25 +0100
Subject: [PATCH 33/86] =?UTF-8?q?=F0=9F=8E=A8=20Formatted=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/tm1.py | 17 ++++++++++-------
 viadot/tasks/tm1.py   |  1 +
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index 0fac5e098..77155b07c 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -6,11 +6,12 @@
 
 
 from ..config import local_config
-from ..exceptions import CredentialError,ValidationError
+from ..exceptions import CredentialError, ValidationError
 from .base import Source
 
 logger = logging.get_logger(__name__)
 
+
 class TM1(Source):
     """
     Class for downloading data from TM1 Software using TM1py library
@@ -24,7 +25,7 @@ def __init__(
         cube: str = None,
         view: str = None,
         dimension: str = None,
-        hierarchy: str =None,
+        hierarchy: str = None,
         limit: int = None,
         private: bool = False,
         verify: bool = False,
@@ -108,7 +109,7 @@ def get_views_names(self) -> list:
         """
         conn = self.get_connection()
         return conn.views.get_all_names(self.cube)
-    
+
     def get_diemensions_names(self) -> list:
         """
         Get list of avaiable dimensions in TM1 instance.
@@ -119,7 +120,7 @@ def get_diemensions_names(self) -> list:
         """
         conn = self.get_connection()
         return conn.dimensions.get_all_names()
-    
+
     def get_hierarchies_names(self) -> list:
         """
         Get list of avaiable hierarchies in TM1 dimension instance.
@@ -130,7 +131,7 @@ def get_hierarchies_names(self) -> list:
         """
         conn = self.get_connection()
         return conn.hierarchies.get_all_names(self.dimension)
-    
+
     def get_available_elements(self) -> list:
         """
         Get list of avaiable elements in TM1 instance based on hierarchy and diemension.
@@ -140,7 +141,9 @@ def get_available_elements(self) -> list:
 
         """
         conn = self.get_connection()
-        return conn.elements.get_element_names(dimension_name= self.dimension, hierarchy_name = self.hierarchy)
+        return conn.elements.get_element_names(
+            dimension_name=self.dimension, hierarchy_name=self.hierarchy
+        )
 
     def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFrame:
         """
@@ -151,7 +154,7 @@ def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFr
 
         Returns:
             pd.DataFrame: DataFrame with data downloaded from TM1 view.
-        
+
         Raises:
             ValidationError: When mdx and cube + view are not specified or when combination of both is specified.
         """
diff --git a/viadot/tasks/tm1.py b/viadot/tasks/tm1.py
index a4926dc55..06b96ccd2 100644
--- a/viadot/tasks/tm1.py
+++ b/viadot/tasks/tm1.py
@@ -6,6 +6,7 @@
 
 from ..sources import TM1
 
+
 class TM1ToDF(Task):
     def __init__(
         self,

From 96b66cad68f054bbe4eb55943e0e677c1f82b66f Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Mon, 6 Nov 2023 15:15:52 +0100
Subject: [PATCH 34/86] add keyerror to except

---
 .../flows/test_customer_gauge_to_adls.py      |   2 -
 viadot/sources/vid_club.py                    | 315 ------------------
 viadot/tasks/customer_gauge.py                |   4 +-
 3 files changed, 2 insertions(+), 319 deletions(-)
 delete mode 100644 viadot/sources/vid_club.py

diff --git a/tests/integration/flows/test_customer_gauge_to_adls.py b/tests/integration/flows/test_customer_gauge_to_adls.py
index 0e7afd3e2..34c7336bc 100644
--- a/tests/integration/flows/test_customer_gauge_to_adls.py
+++ b/tests/integration/flows/test_customer_gauge_to_adls.py
@@ -91,5 +91,3 @@ def test_customer_gauge_to_adls_run_flow_validation_failure(mocked_class):
     except ValidationError:
         pass
 
-    os.remove("test_customer_gauge_to_adls_run_flow_validation_failure.parquet")
-    os.remove("test_customer_gauge_to_adls_run_flow_validation_failure.json")
diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
deleted file mode 100644
index e7819577a..000000000
--- a/viadot/sources/vid_club.py
+++ /dev/null
@@ -1,315 +0,0 @@
-import json
-import os
-import urllib
-from datetime import date, datetime, timedelta
-from typing import Any, Dict, List, Literal, Tuple
-
-import pandas as pd
-from prefect.utilities import logging
-
-from ..exceptions import CredentialError, ValidationError
-from ..utils import handle_api_response
-from .base import Source
-
-logger = logging.get_logger()
-
-
-class VidClub(Source):
-    """
-    A class implementing the Vid Club API.
-
-    Documentation for this API is located at: https://evps01.envoo.net/vipapi/
-    There are 4 endpoints where to get the data.
-    """
-
-    def __init__(self, credentials: Dict[str, Any], *args, **kwargs):
-        """
-        Create an instance of VidClub.
-
-        Args:
-            credentials (Dict[str, Any]): Credentials to Vid Club APIs containing token.
-
-        Raises:
-            CredentialError: If credentials are not provided as a parameter.
-        """
-        self.headers = {
-            "Authorization": "Bearer " + credentials["token"],
-            "Content-Type": "application/json",
-        }
-
-        super().__init__(*args, credentials=credentials, **kwargs)
-
-    def build_query(
-        self,
-        from_date: str,
-        to_date: str,
-        api_url: str,
-        items_per_page: int,
-        source: Literal["jobs", "product", "company", "survey"] = None,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
-    ) -> str:
-        """
-        Builds the query from the inputs.
-
-        Args:
-            from_date (str): Start date for the query.
-            to_date (str): End date for the query, if empty, will be executed as datetime.today().strftime("%Y-%m-%d").
-            api_url (str): Generic part of the URL to Vid Club API.
-            items_per_page (int): number of entries per page.
-            source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
-
-        Returns:
-            str: Final query with all filters added.
-
-        Raises:
-            ValidationError: If any source different than the ones in the list are used.
-        """
-        if source in ["jobs", "product", "company"]:
-            url = f"{api_url}{source}?from={from_date}&to={to_date}&region={region}&limit={items_per_page}"
-        elif source == "survey":
-            url = f"{api_url}{source}?language=en&type=question"
-        else:
-            raise ValidationError(
-                "Pick one these sources: jobs, product, company, survey"
-            )
-        return url
-
-    def intervals(
-        self, from_date: str, to_date: str, days_interval: int
-    ) -> Tuple[List[str], List[str]]:
-        """
-        Breaks dates range into smaller by provided days interval.
-
-        Args:
-            from_date (str): Start date for the query in "%Y-%m-%d" format.
-            to_date (str): End date for the query, if empty, will be executed as datetime.today().strftime("%Y-%m-%d").
-            days_interval (int): Days specified in date range per api call (test showed that 30-40 is optimal for performance).
-
-        Returns:
-            List[str], List[str]: Starts and Ends lists that contains information about date ranges for specific period and time interval.
-
-        Raises:
-            ValidationError: If the final date of the query is before the start date.
-        """
-
-        if to_date == None:
-            to_date = datetime.today().strftime("%Y-%m-%d")
-
-        end_date = datetime.strptime(to_date, "%Y-%m-%d").date()
-        start_date = datetime.strptime(from_date, "%Y-%m-%d").date()
-
-        from_date_obj = datetime.strptime(from_date, "%Y-%m-%d")
-
-        to_date_obj = datetime.strptime(to_date, "%Y-%m-%d")
-        delta = to_date_obj - from_date_obj
-
-        if delta.days < 0:
-            raise ValidationError("to_date cannot be earlier than from_date.")
-
-        interval = timedelta(days=days_interval)
-        starts = []
-        ends = []
-
-        period_start = start_date
-        while period_start < end_date:
-            period_end = min(period_start + interval, end_date)
-            starts.append(period_start.strftime("%Y-%m-%d"))
-            ends.append(period_end.strftime("%Y-%m-%d"))
-            period_start = period_end
-        if len(starts) == 0 and len(ends) == 0:
-            starts.append(from_date)
-            ends.append(to_date)
-        return starts, ends
-
-    def check_connection(
-        self,
-        source: Literal["jobs", "product", "company", "survey"] = None,
-        from_date: str = "2022-03-22",
-        to_date: str = None,
-        items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
-        url: str = None,
-    ) -> Tuple[Dict[str, Any], str]:
-        """
-        Initiate first connection to API to retrieve piece of data with information about type of pagination in API URL.
-        This option is added because type of pagination for endpoints is being changed in the future from page number to 'next' id.
-
-        Args:
-            source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
-            from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
-            to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
-            items_per_page (int, optional): Number of entries per page. 100 entries by default.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
-            url (str, optional): Generic part of the URL to Vid Club API. Defaults to None.
-
-        Returns:
-            Tuple[Dict[str, Any], str]: Dictionary with first response from API with JSON containing data and used URL string.
-
-        Raises:
-            ValidationError: If from_date is earlier than 2022-03-22.
-            ValidationError: If to_date is earlier than from_date.
-        """
-
-        if from_date < "2022-03-22":
-            raise ValidationError("from_date cannot be earlier than 2022-03-22.")
-
-        if to_date < from_date:
-            raise ValidationError("to_date cannot be earlier than from_date.")
-
-        if url is None:
-            url = self.credentials["url"]
-
-        first_url = self.build_query(
-            source=source,
-            from_date=from_date,
-            to_date=to_date,
-            api_url=url,
-            items_per_page=items_per_page,
-            region=region,
-        )
-        headers = self.headers
-        response = handle_api_response(
-            url=first_url, headers=headers, method="GET", verify=False
-        )
-        response = response.json()
-
-        return (response, first_url)
-
-    def get_response(
-        self,
-        source: Literal["jobs", "product", "company", "survey"] = None,
-        from_date: str = "2022-03-22",
-        to_date: str = None,
-        items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
-    ) -> pd.DataFrame:
-        """
-        Basing on the pagination type retrieved using check_connection function, gets the response from the API queried and transforms it into DataFrame.
-
-        Args:
-            source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
-            from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
-            to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
-            items_per_page (int, optional): Number of entries per page. 100 entries by default.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
-
-        Returns:
-            pd.DataFrame: Table of the data carried in the response.
-
-        Raises:
-            ValidationError: If any source different than the ones in the list are used.
-        """
-        headers = self.headers
-        if source not in ["jobs", "product", "company", "survey"]:
-            raise ValidationError(
-                "The source has to be: jobs, product, company or survey"
-            )
-        if to_date == None:
-            to_date = datetime.today().strftime("%Y-%m-%d")
-
-        response, first_url = self.check_connection(
-            source=source,
-            from_date=from_date,
-            to_date=to_date,
-            items_per_page=items_per_page,
-            region=region,
-        )
-
-        if isinstance(response, dict):
-            keys_list = list(response.keys())
-        elif isinstance(response, list):
-            keys_list = list(response[0].keys())
-        else:
-            keys_list = []
-
-        if "next" in keys_list:
-            ind = True
-        else:
-            ind = False
-
-        if "data" in keys_list:
-            df = pd.DataFrame(response["data"])
-            length = df.shape[0]
-            page = 1
-
-            while length == items_per_page:
-                if ind == True:
-                    next = response["next"]
-                    url = f"{first_url}&next={next}"
-                else:
-                    page += 1
-                    url = f"{first_url}&page={page}"
-                r = handle_api_response(
-                    url=url, headers=headers, method="GET", verify=False
-                )
-                response = r.json()
-                df_page = pd.DataFrame(response["data"])
-                if source == "product":
-                    df_page = df_page.transpose()
-                length = df_page.shape[0]
-                df = pd.concat((df, df_page), axis=0)
-        else:
-            df = pd.DataFrame(response)
-
-        return df
-
-    def total_load(
-        self,
-        source: Literal["jobs", "product", "company", "survey"] = None,
-        from_date: str = "2022-03-22",
-        to_date: str = None,
-        items_per_page: int = 100,
-        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
-        days_interval: int = 30,
-    ) -> pd.DataFrame:
-        """
-        Looping get_response and iterating by date ranges defined in intervals. Stores outputs as DataFrames in a list.
-        At the end, daframes are concatenated in one and dropped duplicates that would appear when quering.
-
-        Args:
-            source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
-            from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
-            to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
-            items_per_page (int, optional): Number of entries per page. 100 entries by default.
-            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
-            days_interval (int, optional): Days specified in date range per api call (test showed that 30-40 is optimal for performance). Defaults to 30.
-
-        Returns:
-            pd.DataFrame: Dataframe of the concatanated data carried in the responses.
-        """
-
-        starts, ends = self.intervals(
-            from_date=from_date, to_date=to_date, days_interval=days_interval
-        )
-
-        dfs_list = []
-        if len(starts) > 0 and len(ends) > 0:
-            for start, end in zip(starts, ends):
-                logger.info(f"ingesting data for dates [{start}]-[{end}]...")
-                df = self.get_response(
-                    source=source,
-                    from_date=start,
-                    to_date=end,
-                    items_per_page=items_per_page,
-                    region=region,
-                )
-                dfs_list.append(df)
-                if len(dfs_list) > 1:
-                    df = pd.concat(dfs_list, axis=0, ignore_index=True)
-                else:
-                    df = pd.DataFrame(dfs_list[0])
-        else:
-            df = self.get_response(
-                source=source,
-                from_date=from_date,
-                to_date=to_date,
-                items_per_page=items_per_page,
-                region=region,
-            )
-        df.drop_duplicates(inplace=True)
-
-        if df.empty:
-            logger.error("No data for this date range")
-
-        return df
diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 03b30b286..abaa1fc2e 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -94,8 +94,8 @@ def get_data(self,
         """
         try:
             jsons_list = json_response["data"]
-        except:
-            raise ValueError(
+        except KeyError:
+            logger.info(
                 "Provided argument doesn't contain 'data' value. Pass json returned from the endpoint."
             )
 

From e9b4fcc65ae62bbe886054849f4c01a9fdab5f2d Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Mon, 6 Nov 2023 15:20:02 +0100
Subject: [PATCH 35/86] add valuerror log

---
 viadot/tasks/customer_gauge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index abaa1fc2e..aac47337b 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -132,7 +132,7 @@ def _field_reference_unpacker(
                 list_properties = list(dictionary.values())
                 result[list_properties[0]] = list_properties[1]
             else:
-                raise ValueError()
+                raise ValueError(f"Dictionary within the specified field doesn't contain exactly two items.")
         if result:
             json_response[field] = result
 

From 56b01823397693858e9ad1a5b7492dcfed941f2d Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Mon, 6 Nov 2023 15:22:08 +0100
Subject: [PATCH 36/86] rename variable

---
 viadot/tasks/customer_gauge.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index aac47337b..c2782670d 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -158,12 +158,12 @@ def _nested_dict_transformer(
             Dict[str, Any]: The JSON response with modified nested dictionaries
         within the specified field.
         """
-        d={}
+        result={}
         for i, dictionary in enumerate(json_response[field], start=1):
             for key, value in dictionary.items():
-                d[f'{i}_{key}'] = value
-        if d:
-            json_response[field] = d
+                result[f'{i}_{key}'] = value
+        if result:
+            json_response[field] = result
 
         return json_response
     

From 082ac69d6fb842647bddd14d6cba3f8e95afaad5 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Mon, 6 Nov 2023 15:27:21 +0100
Subject: [PATCH 37/86] Update docstrings for column_unpacker

---
 viadot/tasks/customer_gauge.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index c2782670d..15ee94d15 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -175,9 +175,11 @@ def column_unpacker(
         ) -> List[Dict[str, Any]]:
 
         """
-        Unpack and modify specific columns in a list of dictionaries using two methods, chosen by the user. 
-        If user wants to use field_reference_unpacker, he needs to provide list of fields in `method1_cols` 
-        argument, if user wants to use nested_dict_transformer - uses 'method2_cols' argument.
+        Function to unpack and modify specific columns in a list of dictionaries by using one of two methods, 
+        chosen by the user. 
+        If user would like to use field_reference_unpacker, he/she needs to provide list of fields as strings in 
+        `method1_cols`  parameter,  if user would like to use nested_dict_transformer he/she needs to provide list of 
+         fields as strings in method2_cols parameter.  
 
         Args:
             json_list (List[Dict[str, Any]): A list of dictionaries containing the data.

From 25c92f898a0cb84a8c2fa22d2cd36d2c45e77295 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Mon, 6 Nov 2023 15:35:07 +0100
Subject: [PATCH 38/86] update errors

---
 viadot/tasks/customer_gauge.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 15ee94d15..28f26ae9e 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -189,7 +189,7 @@ def column_unpacker(
                 Defaults to None.
 
         Raises:
-            ValueError: _description_
+            ValueError: Input 'json_list' is required.
 
         Returns:
             List[Dict[str, Any]]: The updated list of dictionaries after column unpacking and modification.
@@ -238,6 +238,9 @@ def flatten_json(self, json_response: Dict[str, Any] = None) -> Dict[str, Any]:
             json_response (Dict[str, Any], optional): JSON object represented as 
             a nested dictionary. Defaults to None.
 
+        Raises:
+            TypeError: If the 'json_response' not a dictionary.
+
         Returns:
             Dict[str, Any]: The flattened dictionary.
         """

From 8b96fb983faae41bf23a4d5244565a7879cb2fa1 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 7 Nov 2023 13:52:52 +0100
Subject: [PATCH 39/86] precise exceptions

---
 viadot/tasks/customer_gauge.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 28f26ae9e..5872e7fa7 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -206,9 +206,11 @@ def unpack_columns(columns, unpack_function):
                     try:
                         json_list_clean = list(map(lambda x: unpack_function(x, field), json_list_clean))
                         logger.info(f"All elements in '{field}' are unpacked successfully.")
-                    except:
+                    except ValueError as ve:
                         logger.info(f"No transformation were made in '{field}'," 
                         "because didn't contain list of key-value data.")
+                    except Exception as e:
+                        logger.info(f"Error while unpacking {field}: {e}")
                 else:
                     logger.info(f"Column '{field}' not found.")
             return json_list_clean

From c8c5bb071a27aae6a0fbbd69c1153d9a358653ee Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 7 Nov 2023 14:25:37 +0100
Subject: [PATCH 40/86] checking duplicated columns

---
 viadot/tasks/customer_gauge.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 5872e7fa7..a1f9b2145 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -189,7 +189,9 @@ def column_unpacker(
                 Defaults to None.
 
         Raises:
-            ValueError: Input 'json_list' is required.
+            ValueError: If 'json_list' is not provided.
+            ValueError: If specified columns do not exist in the JSON data.
+            ValueError: If columns are mentioned in both 'method1_cols' and 'method2_cols'. 
 
         Returns:
             List[Dict[str, Any]]: The updated list of dictionaries after column unpacking and modification.
@@ -215,17 +217,24 @@ def unpack_columns(columns, unpack_function):
                     logger.info(f"Column '{field}' not found.")
             return json_list_clean
 
-        if method1_cols is not None:
-            json_list = unpack_columns(
-                columns = method1_cols, 
-                unpack_function = self._field_reference_unpacker
-                )
-
-        if method2_cols is not None:
-            json_list = unpack_columns(
-                columns = method2_cols, 
-                unpack_function = self._nested_dict_transformer
+        duplicated_cols = set(method1_cols).intersection(set(method2_cols))
+        if duplicated_cols:
+            raise ValueError(
+                f"{duplicated_cols} were mentioned in both method1_cols and method2_cols." 
+                " It's not possible to apply two methods to the same field."
                 )
+        else:
+            if method1_cols is not None:
+                json_list = unpack_columns(
+                    columns = method1_cols, 
+                    unpack_function = self._field_reference_unpacker
+                    )
+
+            if method2_cols is not None:
+                json_list = unpack_columns(
+                    columns = method2_cols, 
+                    unpack_function = self._nested_dict_transformer
+                    )
         
         return json_list
 

From 21983216140985296ccdc0990e665a635a2f553e Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 7 Nov 2023 14:52:49 +0100
Subject: [PATCH 41/86] adjust flattify nested function

---
 viadot/tasks/customer_gauge.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index a1f9b2145..7e88a59ee 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -242,7 +242,7 @@ def unpack_columns(columns, unpack_function):
     def flatten_json(self, json_response: Dict[str, Any] = None) -> Dict[str, Any]:
         """
         Function that flattens a nested structure of the JSON object into 
-        a single-level dictionary.Uses a nested `flatten()` function to recursively 
+        a single-level dictionary. It uses a nested `flattify()` function to recursively 
         combine nested keys in the JSON object with '_' to create the flattened keys.
 
         Args:
@@ -260,15 +260,15 @@ def flatten_json(self, json_response: Dict[str, Any] = None) -> Dict[str, Any]:
         if not isinstance(json_response, dict):
             raise TypeError("Input must be a dictionary.")
 
-        def flattify(x, key="", out = None):
+        def flattify(field, key="", out = None):
             if out is None:
                 out = result
 
-            if isinstance(x, dict):
-                for a in x:
-                    flattify(x[a], key + a + "_", out)
+            if isinstance(field, dict):
+                for item in field.keys():
+                    flattify(field[item], key + item + "_", out)
             else:
-                out[key[:-1]] = x
+                out[key[:-1]] = field
 
         flattify(json_response)
 

From c2d9dbaa6b38e65d12a27c79eb993c364389201a Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 7 Nov 2023 15:08:39 +0100
Subject: [PATCH 42/86] improve if 'drivers' condition

---
 viadot/tasks/customer_gauge.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 7e88a59ee..547c0e303 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -430,7 +430,7 @@ def run(
         logger.info("Inserting data into the DataFrame...")
         df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
         df = self.square_brackets_remover(df)
-        if endpoint == "responses":
+        if "drivers" in list(df.columns):
             df["drivers"] = df["drivers"].apply(self._drivers_cleaner)
         df.columns = df.columns.str.lower().str.replace(" ", "_")
         logger.info("DataFrame: Ready. Data: Inserted. Let the magic happen!")

From 01ec7388c1930ecd085c412352c72afb0b7374a6 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 7 Nov 2023 15:37:52 +0100
Subject: [PATCH 43/86] update tests for source

---
 tests/integration/test_customer_gauge.py | 46 ------------------------
 1 file changed, 46 deletions(-)

diff --git a/tests/integration/test_customer_gauge.py b/tests/integration/test_customer_gauge.py
index 119615100..596cf029c 100644
--- a/tests/integration/test_customer_gauge.py
+++ b/tests/integration/test_customer_gauge.py
@@ -17,52 +17,6 @@ def test_get_json_content():
     assert isinstance(json_response["cursor"], dict)
 
 
-# def test_properties_cleaning():
-#     json_response = CG.get_json_response()
-#     data = json_response["data"][2].copy()
-#     cleaned_data = CG.properties_cleaning(data.copy())
-#     assert isinstance(data["properties"], list)
-#     assert isinstance(cleaned_data["properties"], dict)
-
-
-# def test_flatten_json():
-#     nested_json = {
-#         "user": {
-#             "name": "Jane",
-#             "address": {
-#                 "street": "456 Elm St",
-#                 "city": "San Francisco",
-#                 "state": "CA",
-#                 "zip": "94109",
-#                 "country": {"name": "United States", "code": "US"},
-#             },
-#             "phone_numbers": {"type": "home", "number": "555-4321"},
-#         }
-#     }
-
-#     expected_output = {
-#         "user_name": "Jane",
-#         "user_address_street": "456 Elm St",
-#         "user_address_city": "San Francisco",
-#         "user_address_state": "CA",
-#         "user_address_zip": "94109",
-#         "user_address_country_name": "United States",
-#         "user_address_country_code": "US",
-#         "user_phone_numbers_type": "home",
-#         "user_phone_numbers_number": "555-4321",
-#     }
-
-#     output = CG.flatten_json(nested_json)
-#     assert output == expected_output
-
-
-# def test_pagesize_and_to_df():
-#     json_response = CG.get_json_response(pagesize=1)
-#     df = CG.to_df(json_response)
-#     assert isinstance(df, pd.DataFrame)
-#     assert len(df) == 1
-
-
 def test_pass_specific_cursor():
     # for default pagesize=1000 returned cursor value should be bigger than passed
     cur = random.randint(1, 9999)

From a0a9afc0384bb89643b121845625ad2b42091464 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 7 Nov 2023 16:52:12 +0100
Subject: [PATCH 44/86] update get_data function

---
 viadot/tasks/customer_gauge.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 547c0e303..cad205479 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -86,18 +86,20 @@ def get_data(self,
             dictionary that contains data and cursor parameter value. Defaults to None.
 
         Raises:
-            ValueError: If the 'data' key is not present in the provided JSON response.
+            KeyError: If the 'data' key is not present in the provided JSON response.
 
         Returns:
             List[Dict[str, Any]]: A list of dictionaries containing data from the 'data' 
             part of the JSON response.
         """
+        jsons_list=[]
         try:
             jsons_list = json_response["data"]
         except KeyError:
-            logger.info(
+            logger.error(
                 "Provided argument doesn't contain 'data' value. Pass json returned from the endpoint."
             )
+            raise
 
         return jsons_list
 
@@ -216,8 +218,8 @@ def unpack_columns(columns, unpack_function):
                 else:
                     logger.info(f"Column '{field}' not found.")
             return json_list_clean
-
-        duplicated_cols = set(method1_cols).intersection(set(method2_cols))
+        if method1_cols and method2_cols:
+            duplicated_cols = set(method1_cols).intersection(set(method2_cols))
         if duplicated_cols:
             raise ValueError(
                 f"{duplicated_cols} were mentioned in both method1_cols and method2_cols." 

From 70ddfba80962c10868efbb9a33ac27309bf77e48 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 7 Nov 2023 18:13:59 +0100
Subject: [PATCH 45/86] add typeerror handling

---
 viadot/tasks/customer_gauge.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index cad205479..cec4d4c5f 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -161,11 +161,14 @@ def _nested_dict_transformer(
         within the specified field.
         """
         result={}
-        for i, dictionary in enumerate(json_response[field], start=1):
-            for key, value in dictionary.items():
-                result[f'{i}_{key}'] = value
-        if result:
-            json_response[field] = result
+        try:
+            for i, dictionary in enumerate(json_response[field], start=1):
+                for key, value in dictionary.items():
+                    result[f'{i}_{key}'] = value
+            if result:
+                json_response[field] = result
+        except TypeError as te:
+            logger.error(te)
 
         return json_response
     
@@ -198,7 +201,8 @@ def column_unpacker(
         Returns:
             List[Dict[str, Any]]: The updated list of dictionaries after column unpacking and modification.
         """
-
+        duplicated_cols = []
+        
         if json_list is None:
             raise ValueError("Input 'json_list' is required.")
 

From 1cda2da307f6aea0ea9513c4db5e3cee10ae1d37 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Tue, 7 Nov 2023 19:45:07 +0100
Subject: [PATCH 46/86] add tests to task

---
 .../integration/tasks/test_customer_gauge.py  | 309 +++++++++++++++++
 viadot/sources/vid_club.py                    | 315 ++++++++++++++++++
 2 files changed, 624 insertions(+)
 create mode 100644 viadot/sources/vid_club.py

diff --git a/tests/integration/tasks/test_customer_gauge.py b/tests/integration/tasks/test_customer_gauge.py
index 732205814..6cbd17078 100644
--- a/tests/integration/tasks/test_customer_gauge.py
+++ b/tests/integration/tasks/test_customer_gauge.py
@@ -8,10 +8,319 @@
 CUR = 185000
 PAGESIZE = 1000
 
+DATA_JSON = {'contact': {'first_name': '***', 'last_name': '***'},
+   'number_customer': 266,
+   'date_email_sent': '2018-02-05 10:42:28',
+   'properties': [{'field': 'Postal Code', 'reference': '999'},
+    {'field': 'City', 'reference': 'Eldorado'},
+    {'field': 'Currency', 'reference': None},
+    {'field': 'Item Quantity', 'reference': '7'},
+    {'field': 'PostingDate', 'reference': '2018-01-10 00:00:00'}],
+   'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
+   'drivers': [{'label': 'Product Quality and Product Performance'},
+    {'label': 'Function and Design'},
+    {'label': 'Value for Money'},
+    {'label': 'Packaging'}]}
+
+RAW_JSON  = {'data': [{'contact': {'first_name': '***', 'last_name': '***'},
+   'number_customer': 266,
+   'date_email_sent': '2018-02-05 10:42:28',
+   'properties': [{'field': 'Postal Code', 'reference': '999'},
+    {'field': 'City', 'reference': 'Eldorado'},
+    {'field': 'Currency', 'reference': None},
+    {'field': 'Item Quantity', 'reference': '7'},
+    {'field': 'PostingDate', 'reference': '2018-01-10 00:00:00'}],
+   'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
+   'drivers': [{'label': 'Product Quality and Product Performance'},
+    {'label': 'Function and Design'},
+    {'label': 'Value for Money'},
+    {'label': 'Packaging'}]},
+  {'contact': {'first_name': '***', 'last_name': '***'},
+   'number_customer': 206,
+   'date_email_sent': '2018-02-05 10:41:01',
+   'properties': [{'field': 'Postal Code', 'reference': '0000'},
+    {'field': 'City', 'reference': 'Neverland'},
+    {'field': 'Currency', 'reference': None},
+    {'field': 'Item Quantity', 'reference': '1'},
+    {'field': 'PostingDate', 'reference': '2018-01-26 00:00:00'}],
+   'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
+   'drivers': [{'label': 'The website of the online shop (overall impression)'},
+    {'label': 'Waiting period'}]}],
+ 'cursor': {'next': 37}}
+
+WRONG_DATA  = {'cols':[
+    {'field': 'City', 'reference': 'Eldorado'},
+    {'field': 'Currency', 'reference': None},
+    {'field': 'Item Quantity', 'reference': '7'},
+    {'field': 'PostingDate', 'reference': '2018-01-10 00:00:00'}]}
 
 @pytest.mark.looping_api_calls
 def test_customer_gauge_to_df_loop():
+    """
+    Test the 'run' method with looping API calls.
+    """
     df = CG.run(total_load=True, cursor=CUR, pagesize=PAGESIZE)
 
     assert isinstance(df, pd.DataFrame)
     assert len(df) > PAGESIZE
+
+
+@pytest.mark.get_data
+def test_get_data():
+    """
+    Test the 'get_data' method with valid JSON data.
+    """
+    json_data = CG.get_data(RAW_JSON)
+    assert isinstance(json_data, list)
+
+
+@pytest.mark.get_data_error
+def test_get_data_error_raising():
+    """
+    Test the 'get_data' method with invalid JSON data that raises a KeyError.
+    """
+    with pytest.raises(KeyError):
+        CG.get_data(WRONG_DATA)
+
+
+@pytest.mark.field_reference_unpacker_success
+def test_field_reference_unpacker():
+    """
+    Test the '_field_reference_unpacker' method with valid data. It should unpack and modify dictionaries within the specified field and return the expected result.
+    """ 
+    data = DATA_JSON.copy()
+    field = 'properties'
+    expected_result = {
+        'contact': {'first_name': '***', 'last_name': '***'},
+        'number_customer': 266,
+        'date_email_sent': '2018-02-05 10:42:28',
+        'properties': {'Postal Code': '999',
+        'City': 'Eldorado',
+        'Currency': None,
+        'Item Quantity': '7',
+        'PostingDate': '2018-01-10 00:00:00'},
+        'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
+        'drivers': [{'label': 'Product Quality and Product Performance'},
+        {'label': 'Function and Design'},
+        {'label': 'Value for Money'},
+        {'label': 'Packaging'}]
+    }
+
+    result = CG._field_reference_unpacker(json_response=data, field=field)
+
+    assert result == expected_result
+
+@pytest.mark.field_reference_unpacker_value_error
+def test_field_reference_unpacker_invalid_data_format():
+    """
+    Test the '_field_reference_unpacker' method with invalid data format that should raise a ValueError. It should raise a ValueError exception.
+    """
+    data = DATA_JSON.copy()
+    field='contact'
+    with pytest.raises(ValueError, match=r"Dictionary within the specified field doesn't contain exactly two items."):
+        CG._field_reference_unpacker(json_response=data, field=field)
+ 
+
+@pytest.mark.field_reference_unpacker_key_error
+def test_field_reference_unpacker_missing_field():
+    """
+    Test the '_field_reference_unpacker' method with a missing field that should raise a KeyError. It should raise a KeyError exception.
+    """
+    data = DATA_JSON.copy()
+    field = "non_existent_field"
+    with pytest.raises(KeyError):
+        CG._field_reference_unpacker(json_response=data, field=field)
+
+
+@pytest.mark.nested_dict_transformer_success
+def test_nested_dict_transformer():
+    """
+    Test the '_nested_dict_transformer' method with valid data. It should modify nested dictionaries within the specified field and return the expected result.
+    """
+    data = DATA_JSON.copy()
+    field = 'drivers'
+    expected_result = {'contact': {'first_name': '***', 'last_name': '***'},
+        'number_customer': 266,
+        'date_email_sent': '2018-02-05 10:42:28',
+        'properties': [{'field': 'Postal Code', 'reference': '999'},
+        {'field': 'City', 'reference': 'Eldorado'},
+        {'field': 'Currency', 'reference': None},
+        {'field': 'Item Quantity', 'reference': '7'},
+        {'field': 'PostingDate', 'reference': '2018-01-10 00:00:00'}],
+        'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
+        'drivers': {'1_label': 'Product Quality and Product Performance',
+        '2_label': 'Function and Design',
+        '3_label': 'Value for Money',
+        '4_label': 'Packaging'}}
+
+    result = CG._nested_dict_transformer(json_response=data, field=field)
+
+    assert result == expected_result
+
+
+@pytest.mark.nested_dict_transformer_type_error
+def test_nested_dict_transformer_invalid_data_format():
+    """
+    Test the '_nested_dict_transformer' method with invalid data format. It should return the same data without modification.
+    """
+    data = DATA_JSON.copy()
+    field='number_customer'
+    result = CG._nested_dict_transformer(json_response=data, field=field)
+
+    assert result == data
+
+
+@pytest.mark.nested_dict_transformer_key_error
+def test_nested_dict_transformer_missing_field():
+    """
+    Test the '_nested_dict_transformer' method with a missing field that should raise a KeyError.
+    """
+    data = DATA_JSON.copy()
+    field = "non_existent_field"
+    with pytest.raises(KeyError):
+        CG._nested_dict_transformer(json_response=data, field=field)
+
+
+@pytest.mark.column_unpacker_success
+def test_column_unpacker_success_method1_and_method2():
+    """
+    Test the 'column_unpacker' method with valid data and both Method 1 and Method 2 columns specified. It should return the expected result.
+    """
+    data = RAW_JSON['data'].copy()
+    method1_cols = ['properties']
+    method2_cols = ['drivers']
+
+    expected_result = [
+        {'contact': {'first_name': '***', 'last_name': '***'},
+        'number_customer': 266,
+        'date_email_sent': '2018-02-05 10:42:28',
+        'properties': {
+            'Postal Code': '999',
+            'City': 'Eldorado',
+            'Currency': None,
+            'Item Quantity': '7',
+            'PostingDate': '2018-01-10 00:00:00'
+            },
+        'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
+        'drivers': {'1_label': 'Product Quality and Product Performance',
+        '2_label': 'Function and Design',
+        '3_label': 'Value for Money',
+        '4_label': 'Packaging'}},
+        {'contact': {'first_name': '***', 'last_name': '***'},
+        'number_customer': 206,
+        'date_email_sent': '2018-02-05 10:41:01',
+        'properties': {
+            'Postal Code': '0000',
+            'City': 'Neverland',
+            'Currency': None,
+            'Item Quantity': '1',
+            'PostingDate': '2018-01-26 00:00:00'
+            },
+        'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
+        'drivers': {'1_label': 'The website of the online shop (overall impression)',
+        '2_label': 'Waiting period'}}
+    ]
+
+    result = CG.column_unpacker(json_list=data, method1_cols=method1_cols, method2_cols=method2_cols)
+
+    assert result == expected_result
+
+
+@pytest.mark.test_column_unpacker_missing_json_argument
+def test_column_unpacker_missing_json_list():
+    """
+    Test the 'column_unpacker' method with missing 'json_list' argument. It should raise a ValueError.
+    """
+    method1_cols = ['properties']
+    method2_cols = ['drivers']
+    with pytest.raises(ValueError, match="Input 'json_list' is required."):
+        CG.column_unpacker(json_list=None, method1_cols=method1_cols, method2_cols=method2_cols)
+
+
+@pytest.mark.test_column_unpacker_duplicate_columns
+def test_column_unpacker_duplicate_columns():
+    """
+    Test the 'column_unpacker' method with duplicate columns specified in both Method 1 and Method 2. It should raise a ValueError.
+    """
+    data = RAW_JSON['data'].copy()
+    method1_cols = ['properties']
+    method2_cols = ['properties']
+    with pytest.raises(ValueError, match="{'properties'} were mentioned in both method1_cols and method2_cols. It's not possible to apply two methods to the same field."):
+        CG.column_unpacker(json_list=data, method1_cols=method1_cols, method2_cols=method2_cols)
+
+
+@pytest.mark.test_flatten_json
+def test_flatten_json():
+    """
+    Test the 'flatten_json' method with nested JSON data. It should return a flattened dictionary with expected keys and values.
+    """
+    nested_json = {
+        "user": {
+            "name": "Jane",
+            "address": {
+                "street": "456 Elm St",
+                "city": "San Francisco",
+                "state": "CA",
+                "zip": "94109",
+                "country": {"name": "United States", "code": "US"},
+            },
+            "phone_numbers": {"type": "home", "number": "555-4321"},
+        }
+    }
+
+    expected_output = {
+        "user_name": "Jane",
+        "user_address_street": "456 Elm St",
+        "user_address_city": "San Francisco",
+        "user_address_state": "CA",
+        "user_address_zip": "94109",
+        "user_address_country_name": "United States",
+        "user_address_country_code": "US",
+        "user_phone_numbers_type": "home",
+        "user_phone_numbers_number": "555-4321",
+    }
+
+    output = CG.flatten_json(nested_json)
+    assert output == expected_output
+
+
+@pytest.mark.flatten_json_non_dict_input
+def test_flatten_json_non_dict_input():
+    """
+    Test the 'flatten_json' method with non-dictionary input. It should raise a TypeError.
+    """
+    input_json = [1, 2, 3]
+    with pytest.raises(TypeError):
+        CG.flatten_json(input_json)
+
+
+@pytest.mark.square_brackets_remover
+def test_square_brackets_remover_success():
+    """
+    Test the 'square_brackets_remover' method with a DataFrame containing square brackets. It should remove square brackets from the DataFrame.
+    """
+    data = {
+        "Column1": ["Value1", "[Value2]", "Value3", "[Value4]"],
+        "Column2": ["1", "[2]", "3", "[4]"],
+    }
+    sample_df = pd.DataFrame(data)
+
+    expected_data = {
+        "Column1": ["Value1", "Value2", "Value3", "Value4"],
+        "Column2": ["1", "2", "3", "4"],
+    }
+    expected_df = pd.DataFrame(expected_data)
+
+    result = CG.square_brackets_remover(sample_df)
+    pd.testing.assert_frame_equal(result, expected_df)
+
+
+@pytest.mark.drivers_cleaner
+def test_drivers_cleaner_success():
+    """
+    Test the '_drivers_cleaner' method with valid 'drivers' data. It should clean and format the 'drivers' data and return the expected result.
+    """
+    data = "{'label': 'Driver1'}, {'label': 'Driver2'}, {'label': 'Driver3'}"
+    expected_result = "Driver1, Driver2, Driver3"
+    result = CG._drivers_cleaner(data)
+    assert result == expected_result
\ No newline at end of file
diff --git a/viadot/sources/vid_club.py b/viadot/sources/vid_club.py
new file mode 100644
index 000000000..e7819577a
--- /dev/null
+++ b/viadot/sources/vid_club.py
@@ -0,0 +1,315 @@
+import json
+import os
+import urllib
+from datetime import date, datetime, timedelta
+from typing import Any, Dict, List, Literal, Tuple
+
+import pandas as pd
+from prefect.utilities import logging
+
+from ..exceptions import CredentialError, ValidationError
+from ..utils import handle_api_response
+from .base import Source
+
+logger = logging.get_logger()
+
+
+class VidClub(Source):
+    """
+    A class implementing the Vid Club API.
+
+    Documentation for this API is located at: https://evps01.envoo.net/vipapi/
+    There are 4 endpoints where to get the data.
+    """
+
+    def __init__(self, credentials: Dict[str, Any], *args, **kwargs):
+        """
+        Create an instance of VidClub.
+
+        Args:
+            credentials (Dict[str, Any]): Credentials to Vid Club APIs containing token.
+
+        Raises:
+            CredentialError: If credentials are not provided as a parameter.
+        """
+        self.headers = {
+            "Authorization": "Bearer " + credentials["token"],
+            "Content-Type": "application/json",
+        }
+
+        super().__init__(*args, credentials=credentials, **kwargs)
+
+    def build_query(
+        self,
+        from_date: str,
+        to_date: str,
+        api_url: str,
+        items_per_page: int,
+        source: Literal["jobs", "product", "company", "survey"] = None,
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+    ) -> str:
+        """
+        Builds the query from the inputs.
+
+        Args:
+            from_date (str): Start date for the query.
+            to_date (str): End date for the query, if empty, will be executed as datetime.today().strftime("%Y-%m-%d").
+            api_url (str): Generic part of the URL to Vid Club API.
+            items_per_page (int): number of entries per page.
+            source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+
+        Returns:
+            str: Final query with all filters added.
+
+        Raises:
+            ValidationError: If any source different than the ones in the list are used.
+        """
+        if source in ["jobs", "product", "company"]:
+            url = f"{api_url}{source}?from={from_date}&to={to_date}&region={region}&limit={items_per_page}"
+        elif source == "survey":
+            url = f"{api_url}{source}?language=en&type=question"
+        else:
+            raise ValidationError(
+                "Pick one these sources: jobs, product, company, survey"
+            )
+        return url
+
+    def intervals(
+        self, from_date: str, to_date: str, days_interval: int
+    ) -> Tuple[List[str], List[str]]:
+        """
+        Breaks dates range into smaller by provided days interval.
+
+        Args:
+            from_date (str): Start date for the query in "%Y-%m-%d" format.
+            to_date (str): End date for the query, if empty, will be executed as datetime.today().strftime("%Y-%m-%d").
+            days_interval (int): Days specified in date range per api call (test showed that 30-40 is optimal for performance).
+
+        Returns:
+            List[str], List[str]: Starts and Ends lists that contains information about date ranges for specific period and time interval.
+
+        Raises:
+            ValidationError: If the final date of the query is before the start date.
+        """
+
+        if to_date == None:
+            to_date = datetime.today().strftime("%Y-%m-%d")
+
+        end_date = datetime.strptime(to_date, "%Y-%m-%d").date()
+        start_date = datetime.strptime(from_date, "%Y-%m-%d").date()
+
+        from_date_obj = datetime.strptime(from_date, "%Y-%m-%d")
+
+        to_date_obj = datetime.strptime(to_date, "%Y-%m-%d")
+        delta = to_date_obj - from_date_obj
+
+        if delta.days < 0:
+            raise ValidationError("to_date cannot be earlier than from_date.")
+
+        interval = timedelta(days=days_interval)
+        starts = []
+        ends = []
+
+        period_start = start_date
+        while period_start < end_date:
+            period_end = min(period_start + interval, end_date)
+            starts.append(period_start.strftime("%Y-%m-%d"))
+            ends.append(period_end.strftime("%Y-%m-%d"))
+            period_start = period_end
+        if len(starts) == 0 and len(ends) == 0:
+            starts.append(from_date)
+            ends.append(to_date)
+        return starts, ends
+
+    def check_connection(
+        self,
+        source: Literal["jobs", "product", "company", "survey"] = None,
+        from_date: str = "2022-03-22",
+        to_date: str = None,
+        items_per_page: int = 100,
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        url: str = None,
+    ) -> Tuple[Dict[str, Any], str]:
+        """
+        Initiate first connection to API to retrieve piece of data with information about type of pagination in API URL.
+        This option is added because type of pagination for endpoints is being changed in the future from page number to 'next' id.
+
+        Args:
+            source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
+            from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
+            to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
+            items_per_page (int, optional): Number of entries per page. 100 entries by default.
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+            url (str, optional): Generic part of the URL to Vid Club API. Defaults to None.
+
+        Returns:
+            Tuple[Dict[str, Any], str]: Dictionary with first response from API with JSON containing data and used URL string.
+
+        Raises:
+            ValidationError: If from_date is earlier than 2022-03-22.
+            ValidationError: If to_date is earlier than from_date.
+        """
+
+        if from_date < "2022-03-22":
+            raise ValidationError("from_date cannot be earlier than 2022-03-22.")
+
+        if to_date < from_date:
+            raise ValidationError("to_date cannot be earlier than from_date.")
+
+        if url is None:
+            url = self.credentials["url"]
+
+        first_url = self.build_query(
+            source=source,
+            from_date=from_date,
+            to_date=to_date,
+            api_url=url,
+            items_per_page=items_per_page,
+            region=region,
+        )
+        headers = self.headers
+        response = handle_api_response(
+            url=first_url, headers=headers, method="GET", verify=False
+        )
+        response = response.json()
+
+        return (response, first_url)
+
+    def get_response(
+        self,
+        source: Literal["jobs", "product", "company", "survey"] = None,
+        from_date: str = "2022-03-22",
+        to_date: str = None,
+        items_per_page: int = 100,
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+    ) -> pd.DataFrame:
+        """
+        Basing on the pagination type retrieved using check_connection function, gets the response from the API queried and transforms it into DataFrame.
+
+        Args:
+            source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
+            from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
+            to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
+            items_per_page (int, optional): Number of entries per page. 100 entries by default.
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+
+        Returns:
+            pd.DataFrame: Table of the data carried in the response.
+
+        Raises:
+            ValidationError: If any source different than the ones in the list are used.
+        """
+        headers = self.headers
+        if source not in ["jobs", "product", "company", "survey"]:
+            raise ValidationError(
+                "The source has to be: jobs, product, company or survey"
+            )
+        if to_date == None:
+            to_date = datetime.today().strftime("%Y-%m-%d")
+
+        response, first_url = self.check_connection(
+            source=source,
+            from_date=from_date,
+            to_date=to_date,
+            items_per_page=items_per_page,
+            region=region,
+        )
+
+        if isinstance(response, dict):
+            keys_list = list(response.keys())
+        elif isinstance(response, list):
+            keys_list = list(response[0].keys())
+        else:
+            keys_list = []
+
+        if "next" in keys_list:
+            ind = True
+        else:
+            ind = False
+
+        if "data" in keys_list:
+            df = pd.DataFrame(response["data"])
+            length = df.shape[0]
+            page = 1
+
+            while length == items_per_page:
+                if ind == True:
+                    next = response["next"]
+                    url = f"{first_url}&next={next}"
+                else:
+                    page += 1
+                    url = f"{first_url}&page={page}"
+                r = handle_api_response(
+                    url=url, headers=headers, method="GET", verify=False
+                )
+                response = r.json()
+                df_page = pd.DataFrame(response["data"])
+                if source == "product":
+                    df_page = df_page.transpose()
+                length = df_page.shape[0]
+                df = pd.concat((df, df_page), axis=0)
+        else:
+            df = pd.DataFrame(response)
+
+        return df
+
+    def total_load(
+        self,
+        source: Literal["jobs", "product", "company", "survey"] = None,
+        from_date: str = "2022-03-22",
+        to_date: str = None,
+        items_per_page: int = 100,
+        region: Literal["bg", "hu", "hr", "pl", "ro", "si", "all"] = "all",
+        days_interval: int = 30,
+    ) -> pd.DataFrame:
+        """
+        Looping get_response and iterating by date ranges defined in intervals. Stores outputs as DataFrames in a list.
+        At the end, daframes are concatenated in one and dropped duplicates that would appear when quering.
+
+        Args:
+            source (Literal["jobs", "product", "company", "survey"], optional): The endpoint source to be accessed. Defaults to None.
+            from_date (str, optional): Start date for the query, by default is the oldest date in the data 2022-03-22.
+            to_date (str, optional): End date for the query. By default None, which will be executed as datetime.today().strftime("%Y-%m-%d") in code.
+            items_per_page (int, optional): Number of entries per page. 100 entries by default.
+            region (Literal["bg", "hu", "hr", "pl", "ro", "si", "all"], optional): Region filter for the query. Defaults to "all". [July 2023 status: parameter works only for 'all' on API]
+            days_interval (int, optional): Days specified in date range per api call (test showed that 30-40 is optimal for performance). Defaults to 30.
+
+        Returns:
+            pd.DataFrame: Dataframe of the concatanated data carried in the responses.
+        """
+
+        starts, ends = self.intervals(
+            from_date=from_date, to_date=to_date, days_interval=days_interval
+        )
+
+        dfs_list = []
+        if len(starts) > 0 and len(ends) > 0:
+            for start, end in zip(starts, ends):
+                logger.info(f"ingesting data for dates [{start}]-[{end}]...")
+                df = self.get_response(
+                    source=source,
+                    from_date=start,
+                    to_date=end,
+                    items_per_page=items_per_page,
+                    region=region,
+                )
+                dfs_list.append(df)
+                if len(dfs_list) > 1:
+                    df = pd.concat(dfs_list, axis=0, ignore_index=True)
+                else:
+                    df = pd.DataFrame(dfs_list[0])
+        else:
+            df = self.get_response(
+                source=source,
+                from_date=from_date,
+                to_date=to_date,
+                items_per_page=items_per_page,
+                region=region,
+            )
+        df.drop_duplicates(inplace=True)
+
+        if df.empty:
+            logger.error("No data for this date range")
+
+        return df

From 3fca2c30ea2a23c5796ef4778e8ec4201563a99b Mon Sep 17 00:00:00 2001
From: Diego-H-S <dhidalgosoto@dyvenia.com>
Date: Wed, 8 Nov 2023 13:18:32 +0100
Subject: [PATCH 47/86] =?UTF-8?q?=F0=9F=8E=A8=20added=20new=20agent=20inte?=
 =?UTF-8?q?raction=20view=20type=20to=20Genesys.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/genesys.py | 1 +
 viadot/tasks/genesys.py   | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/viadot/sources/genesys.py b/viadot/sources/genesys.py
index 6be907a66..71dfa5209 100644
--- a/viadot/sources/genesys.py
+++ b/viadot/sources/genesys.py
@@ -322,6 +322,7 @@ def download_all_reporting_exports(
                 "queue_performance_detail_view",
                 "queue_interaction_detail_view",
                 "agent_status_detail_view",
+                "agent_interaction_detail_view",
             ]:
                 file_name = f"{self.view_type.upper()}_{next(self.count)}_{date}"
             elif single_report[4].lower() in [
diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index de47ddebf..7e67dab07 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -10,8 +10,8 @@
 from prefect.engine import signals
 from prefect.utilities import logging
 from prefect.utilities.tasks import defaults_from_attrs
-from viadot.task_utils import *
 
+from viadot.task_utils import validate_df
 from viadot.exceptions import APIError
 from viadot.sources import Genesys
 
@@ -385,6 +385,7 @@ def run(
             "agent_performance_summary_view",
             "agent_status_summary_view",
             "agent_status_detail_view",
+            "agent_interaction_detail_view",
         ]:
             genesys.genesys_api_connection(
                 post_data_list=post_data_list, end_point=end_point

From 2e596b980f0a19bb83538e9bf657d1615c236440 Mon Sep 17 00:00:00 2001
From: Diego-H-S <dhidalgosoto@dyvenia.com>
Date: Wed, 8 Nov 2023 13:20:17 +0100
Subject: [PATCH 48/86] =?UTF-8?q?=F0=9F=93=9D=20updated=20CHANGELOG.md?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 507c590cf..2bea72907 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+- Added new view type `agent_interaction_view_type` in `Genesys`source.
+
 ## [0.4.21] - 2023-10-26
 ### Added
 - Added `validate_df` task to task_utils.

From 2470fdfa4e9be043cb8697a306007cc942cfc8f0 Mon Sep 17 00:00:00 2001
From: Diego-H-S <dhidalgosoto@dyvenia.com>
Date: Wed, 8 Nov 2023 14:14:43 +0100
Subject: [PATCH 49/86] =?UTF-8?q?=F0=9F=93=9D=20updated=20import.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/tasks/genesys.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 7e67dab07..428e699a0 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -11,7 +11,7 @@
 from prefect.utilities import logging
 from prefect.utilities.tasks import defaults_from_attrs
 
-from viadot.task_utils import validate_df
+from viadot.task_utils import *
 from viadot.exceptions import APIError
 from viadot.sources import Genesys
 

From c9c3c500a4ddea07837d71f394565556d472ce82 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 9 Nov 2023 12:24:48 +0100
Subject: [PATCH 50/86] fix typos in changelog

---
 CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bcf45aef..c1127a079 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,8 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 - Modified `CustomerGauge` source class with simplified logic to return json structure.
-- Expand `CustomerGaugeToDF` task class with separate cleaning functions and handling nested json structure flattening with two new methods `_field_reference_unpacker` and `_nested_dict_transformer`.
-- Change `CustomerGaugeToADLS` to containg new arguments.
+- Expanded `CustomerGaugeToDF` task class with separate cleaning functions and handling nested json structure flattening with two new methods `_field_reference_unpacker` and `_nested_dict_transformer`.
+- Changed `CustomerGaugeToADLS` to containing new arguments.
 
 ## [0.4.21] - 2023-10-26
 ### Added

From b46b3e9f3a4b270d52442da73e10ca4c65c6305e Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Thu, 9 Nov 2023 13:04:20 +0100
Subject: [PATCH 51/86] simplify cleaning drivers

---
 viadot/tasks/customer_gauge.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index cec4d4c5f..b24a09f75 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -313,13 +313,9 @@ def _drivers_cleaner(
             str: A cleaned and formatted string of driver data.
         """
 
-        drivers = drivers.split("}, {")
-        cleaned_drivers = []
-        for driver in drivers:
-            driver = driver.replace("{", "").replace("}", "")
-            driver = driver.replace("'", "").replace("label: ", "")
-            cleaned_drivers.append(driver)
-        return ', '.join(cleaned_drivers)  
+        cleaned_drivers = drivers.replace("{", "").replace("}", "").replace("'", "").replace("label: ", "")
+        
+        return cleaned_drivers
 
     def __call__(self):
         """Download Customer Gauge data to a DF"""

From f10ddfed4fd3b140c6dbdd26f3465936bc87e51a Mon Sep 17 00:00:00 2001
From: adrian-wojcik <wan.ext@velux.com>
Date: Fri, 10 Nov 2023 14:49:30 +0100
Subject: [PATCH 52/86] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20Splitted=20test=20fo?=
 =?UTF-8?q?r=20Eurostat=20on=20source=20tests=20and=20task=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md                             |   3 +
 tests/integration/tasks/test_eurostat.py | 132 ---------------------
 tests/integration/test_eurostat.py       | 140 +++++++++++++++++++++++
 3 files changed, 143 insertions(+), 132 deletions(-)
 create mode 100644 tests/integration/test_eurostat.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 76eb3280b..2e3709821 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Changed
+- Splitted test for Eurostat on source tests and task tests
+
 ## [0.4.21] - 2023-10-26
 ### Added
 - Added `validate_df` task to task_utils.
diff --git a/tests/integration/tasks/test_eurostat.py b/tests/integration/tasks/test_eurostat.py
index 475d60190..7fa01dd58 100644
--- a/tests/integration/tasks/test_eurostat.py
+++ b/tests/integration/tasks/test_eurostat.py
@@ -6,138 +6,6 @@
 from viadot.tasks import eurostat
 
 
-def test_and_validate_dataset_code_without_params(caplog):
-    """This function is designed to test the accuracy of the data retrieval feature in a program.
-    Specifically, it tests to ensure that the program returns a non-empty DataFrame when a correct
-    dataset code is provided without any parameters. The function is intended to be used in software
-    development to verify that the program is correctly retrieving data from the appropriate dataset.
-    """
-    task = eurostat.EurostatToDF(dataset_code="ILC_DI04").run()
-    assert isinstance(task, pd.DataFrame)
-    assert not task.empty
-    assert caplog.text == ""
-
-
-def test_wrong_dataset_code_logger(caplog):
-    """This function is designed to test the accuracy of the error logging feature in a program.
-    Specifically, it tests to ensure that the program is able to correctly identify and log errors
-    when provided with only incorrect dataset code.
-    The function is intended to be used in software development to identify correct type errors
-    and messages in the program's handling of codes.
-    """
-    task = eurostat.EurostatToDF(dataset_code="ILC_DI04E")
-
-    with pytest.raises(ValueError, match="DataFrame is empty!"):
-        with caplog.at_level(logging.ERROR):
-            task.run()
-    assert (
-        f"Failed to fetch data for ILC_DI04E, please check correctness of dataset code!"
-        in caplog.text
-    )
-
-
-def test_wrong_parameters_codes_logger(caplog):
-    """This function is designed to test the accuracy of the error logging feature in a program.
-    Specifically, it tests to ensure that the program is able to correctly identify and log errors
-    when provided with a correct dataset_code and correct parameters are provided, but both parameters codes are incorrect.
-    The function is intended to be used in software development to identify correct type errors
-    and messages in the program's handling of codes.
-    """
-    task = eurostat.EurostatToDF(
-        dataset_code="ILC_DI04",
-        params={"hhtyp": "total1", "indic_il": "non_existing_code"},
-    )
-
-    with pytest.raises(ValueError, match="DataFrame is empty!"):
-        with caplog.at_level(logging.ERROR):
-            task.run()
-    assert (
-        f"Parameters codes: 'total1 | non_existing_code' are not available. Please check your spelling!"
-        in caplog.text
-    )
-    assert (
-        f"You can find everything via link: https://ec.europa.eu/eurostat/databrowser/view/ILC_DI04/default/table?lang=en"
-        in caplog.text
-    )
-
-
-def test_parameter_codes_as_list_logger(caplog):
-    """This function is designed to test the accuracy of the error logging feature in a program.
-    Specifically, it tests to ensure that the program is able to correctly identify and log errors
-    when provided with a correct dataset code, correct parameters, but incorrect parameters codes structure
-    (as a list with strings, instead of single string).
-    The function is intended to be used in software development to identify correct type errors
-    and messages in the program's handling of codes.
-    """
-
-    task = eurostat.EurostatToDF(
-        dataset_code="ILC_DI04",
-        params={"hhtyp": ["totale", "nottotale"], "indic_il": "med_e"},
-    )
-    with pytest.raises(ValueError, match="Wrong structure of params!"):
-        with caplog.at_level(logging.ERROR):
-            task.run()
-    assert (
-        "You can provide only one code per one parameter as 'str' in params!\n"
-        in caplog.text
-    )
-    assert (
-        "CORRECT: params = {'unit': 'EUR'} | INCORRECT: params = {'unit': ['EUR', 'USD', 'PLN']}"
-        in caplog.text
-    )
-
-
-def test_wrong_parameters(caplog):
-    """This function is designed to test the accuracy of the error logging feature in a program.
-    Specifically, it tests to ensure that the program is able to correctly identify and log errors
-    when provided with a correct dataset_code, but incorrect parameters keys.
-    The function is intended to be used in software development to identify correct type errors
-    and messages in the program's handling of codes.
-    """
-
-    task = eurostat.EurostatToDF(
-        dataset_code="ILC_DI04", params={"hhhtyp": "total", "indic_ilx": "med_e"}
-    )
-    with pytest.raises(ValueError, match="DataFrame is empty!"):
-        with caplog.at_level(logging.ERROR):
-            task.run()
-    assert (
-        f"Parameters: 'hhhtyp | indic_ilx' are not in dataset. Please check your spelling!\n"
-        in caplog.text
-    )
-    assert (
-        f"Possible parameters: freq | hhtyp | indic_il | unit | geo | time"
-        in caplog.text
-    )
-
-
-def test_params_as_list():
-    """This function is designed to test the accuracy of the error logging feature in a program.
-    Specifically, it tests to ensure that the program is able to correctly identify and log error
-    when provided with a correct dataset_code, but incorrect params structure (as list instead of dict).
-    The function is intended to be used in software development to identify correct type errors
-    and messages in the program's handling of codes.
-    """
-    with pytest.raises(TypeError, match="Params should be a dictionary."):
-        eurostat.EurostatToDF(dataset_code="ILC_DI04", params=["total", "med_e"]).run()
-
-
-def test_correct_params_and_dataset_code(caplog):
-    """This function is designed to test the accuracy of the data retrieval feature in a program.
-    Specifically, it tests to ensure that the program returns a non-empty DataFrame when a correct
-    dataset code is provided with correct params. The function is intended to be used in software
-    development to verify that the program is correctly retrieving data from the appropriate dataset.
-    """
-
-    task = eurostat.EurostatToDF(
-        dataset_code="ILC_DI04", params={"hhtyp": "total", "indic_il": "med_e"}
-    ).run()
-
-    assert isinstance(task, pd.DataFrame)
-    assert not task.empty
-    assert caplog.text == ""
-
-
 def task_correct_requested_columns(caplog):
     """This function is designed to test the accuracy of the data retrieval feature in a program.
     Specifically, it tests to ensure that the program is able to correctly identify and log error
diff --git a/tests/integration/test_eurostat.py b/tests/integration/test_eurostat.py
new file mode 100644
index 000000000..6fb64cbea
--- /dev/null
+++ b/tests/integration/test_eurostat.py
@@ -0,0 +1,140 @@
+import logging
+
+import pandas as pd
+import pytest
+
+from viadot.sources import Eurostat
+
+
+def test_and_validate_dataset_code_without_params(caplog):
+    """This function is designed to test the accuracy of the data retrieval feature in a program.
+    Specifically, it tests to ensure that the program returns a non-empty DataFrame when a correct
+    dataset code is provided without any parameters. The function is intended to be used in software
+    development to verify that the program is correctly retrieving data from the appropriate dataset.
+    """
+    source = Eurostat(dataset_code="ILC_DI04").get_data_frame_from_response()
+    assert isinstance(source, pd.DataFrame)
+    assert not source.empty
+    assert caplog.text == ""
+
+
+def test_wrong_dataset_code_logger(caplog):
+    """This function is designed to test the accuracy of the error logging feature in a program.
+    Specifically, it tests to ensure that the program is able to correctly identify and log errors
+    when provided with only incorrect dataset code.
+    The function is intended to be used in software development to identify correct type errors
+    and messages in the program's handling of codes.
+    """
+    source = Eurostat(dataset_code="ILC_DI04E")
+
+    with pytest.raises(ValueError, match="DataFrame is empty!"):
+        with caplog.at_level(logging.ERROR):
+            source.get_data_frame_from_response()
+    assert (
+        f"Failed to fetch data for ILC_DI04E, please check correctness of dataset code!"
+        in caplog.text
+    )
+
+
+def test_wrong_parameters_codes_logger(caplog):
+    """This function is designed to test the accuracy of the error logging feature in a program.
+    Specifically, it tests to ensure that the program is able to correctly identify and log errors
+    when provided with a correct dataset_code and correct parameters are provided, but both parameters codes are incorrect.
+    The function is intended to be used in software development to identify correct type errors
+    and messages in the program's handling of codes.
+    """
+    source = Eurostat(
+        dataset_code="ILC_DI04",
+        params={"hhtyp": "total1", "indic_il": "non_existing_code"},
+    )
+
+    with pytest.raises(ValueError, match="DataFrame is empty!"):
+        with caplog.at_level(logging.ERROR):
+            source.get_data_frame_from_response()
+    assert (
+        f"Parameters codes: 'total1 | non_existing_code' are not available. Please check your spelling!"
+        in caplog.text
+    )
+    assert (
+        f"You can find everything via link: https://ec.europa.eu/eurostat/databrowser/view/ILC_DI04/default/table?lang=en"
+        in caplog.text
+    )
+
+
+def test_parameter_codes_as_list_logger(caplog):
+    """This function is designed to test the accuracy of the error logging feature in a program.
+    Specifically, it tests to ensure that the program is able to correctly identify and log errors
+    when provided with a correct dataset code, correct parameters, but incorrect parameters codes structure
+    (as a list with strings, instead of single string).
+    The function is intended to be used in software development to identify correct type errors
+    and messages in the program's handling of codes.
+    """
+
+    source = Eurostat(
+        dataset_code="ILC_DI04",
+        params={"hhtyp": ["totale", "nottotale"], "indic_il": "med_e"},
+    )
+    with pytest.raises(ValueError, match="Wrong structure of params!"):
+        with caplog.at_level(logging.ERROR):
+            source.get_data_frame_from_response()
+    assert (
+        "You can provide only one code per one parameter as 'str' in params!\n"
+        in caplog.text
+    )
+    assert (
+        "CORRECT: params = {'unit': 'EUR'} | INCORRECT: params = {'unit': ['EUR', 'USD', 'PLN']}"
+        in caplog.text
+    )
+
+
+def test_wrong_parameters(caplog):
+    """This function is designed to test the accuracy of the error logging feature in a program.
+    Specifically, it tests to ensure that the program is able to correctly identify and log errors
+    when provided with a correct dataset_code, but incorrect parameters keys.
+    The function is intended to be used in software development to identify correct type errors
+    and messages in the program's handling of codes.
+    """
+
+    source = Eurostat(
+        dataset_code="ILC_DI04", params={"hhhtyp": "total", "indic_ilx": "med_e"}
+    )
+    with pytest.raises(ValueError, match="DataFrame is empty!"):
+        with caplog.at_level(logging.ERROR):
+            source.get_data_frame_from_response()
+    assert (
+        f"Parameters: 'hhhtyp | indic_ilx' are not in dataset. Please check your spelling!\n"
+        in caplog.text
+    )
+    assert (
+        f"Possible parameters: freq | hhtyp | indic_il | unit | geo | time"
+        in caplog.text
+    )
+
+
+def test_params_as_list():
+    """This function is designed to test the accuracy of the error logging feature in a program.
+    Specifically, it tests to ensure that the program is able to correctly identify and log error
+    when provided with a correct dataset_code, but incorrect params structure (as list instead of dict).
+    The function is intended to be used in software development to identify correct type errors
+    and messages in the program's handling of codes.
+    """
+    with pytest.raises(TypeError, match="Params should be a dictionary."):
+        Eurostat(
+            dataset_code="ILC_DI04", params=["total", "med_e"]
+        ).get_data_frame_from_response()
+
+
+def test_correct_params_and_dataset_code(caplog):
+    """This function is designed to test the accuracy of the data retrieval feature in a program.
+    Specifically, it tests to ensure that the program returns a non-empty DataFrame when a correct
+    dataset code is provided with correct params. The function is intended to be used in software
+    development to verify that the program is correctly retrieving data from the appropriate dataset.
+    """
+
+    source = Eurostat(
+        dataset_code="ILC_DI04", params={"hhtyp": "total", "indic_il": "med_e"}
+    ).get_data_frame_from_response()
+
+    assert isinstance(source, pd.DataFrame)
+    assert not source.empty
+    assert caplog.text == ""

From 5fe4fe14cc5d35eb12c47ab8ac8c81f6574655dc Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Fri, 10 Nov 2023 16:31:21 +0100
Subject: [PATCH 53/86] =?UTF-8?q?=E2=9C=85=20Added=20missing=20tests=20for?=
 =?UTF-8?q?=20Mediatool?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/test_mediatool.py | 37 +++++++++++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_mediatool.py b/tests/integration/test_mediatool.py
index f5a2d81a2..45b9da48b 100644
--- a/tests/integration/test_mediatool.py
+++ b/tests/integration/test_mediatool.py
@@ -5,7 +5,7 @@
 import pytest
 from prefect.tasks.secrets import PrefectSecret
 
-from viadot.exceptions import APIError
+from viadot.exceptions import APIError, CredentialError
 from viadot.sources import Mediatool
 from viadot.task_utils import credentials_loader
 
@@ -13,6 +13,11 @@
 MTOOL = Mediatool(credentials=CREDENTIALS)
 
 
+def test_init_empty_credentials():
+    with pytest.raises(CredentialError, match=r"Missing credentials."):
+        Mediatool(credentials={})
+
+
 def test_get_campaigns_df():
     camps = MTOOL.get_campaigns(CREDENTIALS["ORG"])
     assert isinstance(camps, pd.DataFrame)
@@ -29,6 +34,13 @@ def test_get_organizations():
     assert isinstance(orgs, pd.DataFrame)
 
 
+def test_get_organizations_return_list():
+    orgs = MTOOL.get_organizations(
+        user_id=CREDENTIALS["USER_ID"], return_dataframe=False
+    )
+    assert isinstance(orgs, list)
+
+
 def test_get_media_entries():
     media_entries = MTOOL.get_media_entries(
         organization_id=CREDENTIALS["ORG"], columns=["_id"]
@@ -36,6 +48,13 @@ def test_get_media_entries():
     assert isinstance(media_entries, pd.DataFrame)
 
 
+def test_get_media_entries_wrong_columns(caplog):
+    MTOOL.get_media_entries(
+        organization_id=CREDENTIALS["ORG"], columns=["wrong_column", "random_column"]
+    )
+    assert "Columns ['wrong_column', 'random_column'] are incorrect." in caplog.text
+
+
 def test_get_media_types_correct_id():
     media_types = MTOOL.get_media_types(media_type_ids=[CREDENTIALS["MEDIA_TYPE_ID"]])
     assert isinstance(media_types, pd.DataFrame)
@@ -48,11 +67,25 @@ def test_get_media_types_wrong_id():
         _ = MTOOL.get_media_types(["040404"])
 
 
-def test_get_vehicles(caplog):
+def test_get_media_types_return_list():
+    media_types = MTOOL.get_media_types(
+        media_type_ids=[CREDENTIALS["MEDIA_TYPE_ID"]], return_dataframe=False
+    )
+    assert isinstance(media_types, list)
+
+
+def test_get_vehicles_wrong_ids(caplog):
     _ = MTOOL.get_vehicles(vehicle_ids=["100000", "200000"])
     assert "Vehicle were not found for: ['100000', '200000']" in caplog.text
 
 
+def test_get_vehicles_return_dict():
+    vehicles = MTOOL.get_vehicles(
+        vehicle_ids=[CREDENTIALS["VEHICLE_ID"]], return_dataframe=False
+    )
+    assert isinstance(vehicles, dict)
+
+
 def test_rename_columns_correct():
     data = {"id": [1, 2], "amount": [3, 4]}
     df = pd.DataFrame(data=data)

From 5972e27650ce33d1392f71efa3549e47569ab7da Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Fri, 10 Nov 2023 16:34:01 +0100
Subject: [PATCH 54/86] =?UTF-8?q?=F0=9F=90=9B=20Fixed=20return=20types=20a?=
 =?UTF-8?q?nd=20error=20handling=20for=20Mediatool=20source?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/mediatool.py | 40 +++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/viadot/sources/mediatool.py b/viadot/sources/mediatool.py
index 88e358ee7..4b40c8739 100644
--- a/viadot/sources/mediatool.py
+++ b/viadot/sources/mediatool.py
@@ -1,7 +1,7 @@
 import inspect
 import json
 from datetime import date, timedelta
-from typing import List
+from typing import List, Union
 
 import pandas as pd
 from prefect.utilities import logging
@@ -35,14 +35,14 @@ def __init__(
             organization_id (str, optional): Organization ID. Defaults to None.
             user_id (str, optional): User ID. Defaults to None.
         """
-        if credentials is not None:
-            try:
-                self.header = {"Authorization": f"Bearer {credentials.get('TOKEN')}"}
-            except:
-                raise CredentialError("Credentials not found.")
+        if any([rq not in credentials for rq in ["TOKEN", "USER_ID"]]):
+            raise CredentialError(
+                "Missing credentials. 'TOKEN' and 'USER_ID' are required."
+            )
 
         super().__init__(*args, credentials=credentials, **kwargs)
 
+        self.header = {"Authorization": f"Bearer {self.credentials.get('TOKEN')}"}
         self.organization_id = organization_id or self.credentials.get(
             "ORGANIZATION_ID"
         )
@@ -80,7 +80,7 @@ def get_media_entries(
         end_date: str = None,
         time_delta: int = 360,
         return_dataframe: bool = True,
-    ) -> pd.DataFrame:
+    ) -> Union[pd.DataFrame, dict]:
         """
         Get data for media entries. This is a main function. Media entries contain IDs for most of the fields
         for other endpoints.Returns DataFrame or Dict.
@@ -95,7 +95,7 @@ def get_media_entries(
                 Defaults to True.
 
         Returns:
-            pd.DataFrame: Default return dataframe If 'return_daframe=False' then return list of dicts.
+            Union[pd.DataFrame, dict]: Default return dataframe If 'return_daframe=False' then return list of dicts.
         """
         today = date.today()
 
@@ -119,9 +119,11 @@ def get_media_entries(
                 columns = df.columns
             try:
                 df_filtered = df[columns]
-            except KeyError as e:
-                logger.info(e)
-            return df_filtered
+                return df_filtered
+            except KeyError:
+                logger.error(
+                    f"Columns {columns} are incorrect. Whole dictionary for 'mediaEntries' will be returned."
+                )
 
         return response_dict["mediaEntries"]
 
@@ -137,7 +139,7 @@ def get_campaigns(
                 Defaults to True.
 
         Returns:
-            pd.DataFrame: Default return dataframe If 'return_daframe=False' then return list of dicts.
+            pd.DataFrame: Default return dataframe If 'return_daframe=False' then return dictionary.
         """
         url_campaigns = (
             f"https://api.mediatool.com/organizations/{organization_id}/campaigns"
@@ -168,7 +170,7 @@ def get_vehicles(
         self,
         vehicle_ids: List[str],
         return_dataframe: bool = True,
-    ) -> pd.DataFrame:
+    ) -> Union[pd.DataFrame, dict]:
         """
         Get vehicles data based on the organization IDs. Returns DataFrame or Dict.
 
@@ -178,7 +180,7 @@ def get_vehicles(
                 Defaults to True.
 
         Returns:
-            pd.DataFrame: Default return dataframe. If 'return_daframe=False' then return list of dicts.
+            Union[pd.DataFrame, dict]: Default return dataframe. If 'return_daframe=False' then return dictionary.
         """
         response_dict = {}
         dfs = []
@@ -211,11 +213,11 @@ def get_vehicles(
                 return df_updated
             return None
 
-        return response_dict["vehicles"]
+        return response_dict["vehicle"]
 
     def get_organizations(
         self, user_id: str = None, return_dataframe: bool = True
-    ) -> pd.DataFrame:
+    ) -> Union[pd.DataFrame, List[dict]]:
         """
         Get organizations data based on the user ID. Returns DataFrame or Dict.
 
@@ -225,7 +227,7 @@ def get_organizations(
             Defaults to True.
 
         Returns:
-            pd.DataFrame: Default return dataframe. If 'return_daframe=False' then return list of dicts.
+            Union[pd.DataFrame, List[dict]]: Default return dataframe. If 'return_daframe=False' then return list of dicts.
         """
         user_id = user_id or self.user_id
         url_organizations = f"https://api.mediatool.com/users/{user_id}/organizations"
@@ -258,7 +260,7 @@ def get_organizations(
 
     def get_media_types(
         self, media_type_ids: List[str], return_dataframe: bool = True
-    ) -> pd.DataFrame:
+    ) -> Union[pd.DataFrame, List[dict]]:
         """
         Get media types data based on the media types ID. User have to provide list of media type IDs.
         Returns DataFrame or Dict.
@@ -269,7 +271,7 @@ def get_media_types(
                 Defaults to True.
 
         Returns:
-            pd.DataFrame: Default return dataframe. If 'return_daframe=False' then return list of dicts.
+            Union[pd.DataFrame, List[dict]]: Default return dataframe. If 'return_daframe=False' then return list of dicts.
         """
         list_media_types = []
         for id_media_type in media_type_ids:

From 047e9cd8a2247ca794e67915411baad75bdcc042 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Fri, 10 Nov 2023 16:35:50 +0100
Subject: [PATCH 55/86] =?UTF-8?q?=E2=9C=85=20Added=20missing=20tests=20for?=
 =?UTF-8?q?=20Hubspot=20source=20class?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/test_hubspot.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/integration/test_hubspot.py b/tests/integration/test_hubspot.py
index 0907e031a..5963df3ee 100644
--- a/tests/integration/test_hubspot.py
+++ b/tests/integration/test_hubspot.py
@@ -2,6 +2,7 @@
 
 import pandas as pd
 import pytest
+from viadot.exceptions import CredentialError
 
 from viadot.sources import Hubspot
 from viadot.task_utils import credentials_loader
@@ -40,6 +41,11 @@ def var_dictionary():
     yield variables
 
 
+def test_credentials_not_provided():
+    with pytest.raises(CredentialError, match="Credentials not found."):
+        Hubspot(credentials={})
+
+
 def test_clean_special_characters():
     test_value = "762##28cd7-e$69d-4708-be31-726bb!859befd"
     clean_chars = HUBSPOT.clean_special_characters(value=test_value)
@@ -79,3 +85,8 @@ def test_to_json(var_dictionary):
     trigger = HUBSPOT.to_json(url=api_url, body=api_body, method="POST")
 
     assert isinstance(trigger, dict)
+
+
+def test_get_properties_url(var_dictionary):
+    url = HUBSPOT.get_properties_url(endpoint=var_dictionary["endpoint"])
+    assert isinstance(url, str)

From b2d7ad07594e6d91d9d043efd382ab0ae823bf6a Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Fri, 10 Nov 2023 16:36:31 +0100
Subject: [PATCH 56/86] =?UTF-8?q?=E2=9C=85=20Added=20missing=20tests=20for?=
 =?UTF-8?q?=20Genesys=20source=20class?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/test_genesys.py | 32 +++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_genesys.py b/tests/integration/test_genesys.py
index 817e590b5..8508978f1 100644
--- a/tests/integration/test_genesys.py
+++ b/tests/integration/test_genesys.py
@@ -1,6 +1,7 @@
 from unittest import mock
 
 import pytest
+import logging
 
 from viadot.sources import Genesys
 
@@ -138,6 +139,13 @@ def test_default_credential_param():
     assert g.credentials != None and type(g.credentials) == dict
 
 
+@pytest.mark.init
+def test_default_credentials_provided(caplog):
+    with caplog.at_level(logging.INFO):
+        Genesys(credentials_genesys={"CREDENTIALS_KEY": "value"})
+    assert "Credentials provided by user" in caplog.text
+
+
 @pytest.mark.init
 def test_environment_param():
     g = Genesys()
@@ -169,15 +177,35 @@ def test_generate_api_connection(mock_api_response, var_dictionary):
     mock_api_response.assert_called()
 
 
+def test_api_connection_return_type():
+    conn_dict = Genesys().genesys_api_connection(post_data_list=["test_value_to_post"])
+    assert isinstance(conn_dict, dict)
+
+
+def test_load_reporting_exports_return_type(caplog):
+    with caplog.at_level(logging.INFO):
+        load_return = Genesys().load_reporting_exports()
+    assert isinstance(load_return, dict)
+
+    assert "loaded" in caplog.text
+
+
 @mock.patch.object(Genesys, "download_report")
 @pytest.mark.dependency(depends=["test_generate_api_connection"])
 @pytest.mark.download
-def test_download_reports(mock_download_files, var_dictionary):
+def test_download_reports(mock_download_files, var_dictionary, caplog):
     g = Genesys()
     g.ids_mapping = var_dictionary["ids_mapping"]
     g.report_data = var_dictionary["report_data"]
     g.start_date = var_dictionary["start_date"]
-    file_name_list = g.download_all_reporting_exports()
+    with caplog.at_level(logging.INFO):
+        file_name_list = g.download_all_reporting_exports()
+    assert "IDS_MAPPING loaded" in caplog.text
+
+    g.ids_mapping = None
+    with caplog.at_level(logging.WARNING):
+        file_name_list = g.download_all_reporting_exports()
+    assert "IDS_MAPPING is not provided" in caplog.text
 
     assert type(file_name_list) == list and len(file_name_list) > 0
     mock_download_files.assert_called()

From 4e5c6c2b95f44015fea87a7914a775f69e3ad0a0 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Fri, 10 Nov 2023 16:37:54 +0100
Subject: [PATCH 57/86] =?UTF-8?q?=E2=9C=85=20Added=20missing=20tests=20for?=
 =?UTF-8?q?=20CustomerGauge=20source=20class?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/test_customer_gauge.py | 30 +++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/tests/integration/test_customer_gauge.py b/tests/integration/test_customer_gauge.py
index 666a73251..2a24c87d1 100644
--- a/tests/integration/test_customer_gauge.py
+++ b/tests/integration/test_customer_gauge.py
@@ -9,6 +9,16 @@
 CG = CustomerGauge(endpoint=ENDPOINT)
 
 
+def test_wrong_endpoint():
+    with pytest.raises(ValueError, match="Incorrect endpoint name"):
+        CustomerGauge(endpoint=["wrong_endpoint"])
+
+
+def test_endpoint_and_url_not_provided():
+    with pytest.raises(ValueError, match="Provide endpoint name"):
+        CustomerGauge()
+
+
 def test_get_json_content():
     json_response = CG.get_json_response()
     assert isinstance(json_response, dict)
@@ -21,8 +31,10 @@ def test_properties_cleaning():
     json_response = CG.get_json_response()
     data = json_response["data"][2].copy()
     cleaned_data = CG.properties_cleaning(data.copy())
+
     assert isinstance(data["properties"], list)
     assert isinstance(cleaned_data["properties"], dict)
+    assert r"{',':" or "label" or "}" in json_response["drivers"]
 
 
 def test_flatten_json():
@@ -63,6 +75,13 @@ def test_pagesize_and_to_df():
     assert len(df) == 1
 
 
+def test_to_df_with_wrong_json_response():
+    with pytest.raises(
+        ValueError, match="Provided argument doesn't contain 'data' value"
+    ):
+        CG.to_df(json_response={})
+
+
 def test_pass_specific_cursor():
     # for default pagesize=1000 returned cursor value should be bigger than passed
     cur = random.randint(1, 9999)
@@ -71,11 +90,16 @@ def test_pass_specific_cursor():
     assert cur_retrieved > cur
 
 
+def test_cursor_is_not_provided():
+    with pytest.raises(
+        ValueError, match="Provided argument doesn't contain 'cursor' value"
+    ):
+        CG.get_cursor(json_response={})
+
+
 def test_uncomplete_date_arguments():
     with pytest.raises(ValueError, match="Missing date arguments"):
-        json_response = CG.get_json_response(
-            date_field="date_sent", start_date="2012-01-03"
-        )
+        CG.get_json_response(date_field="date_sent", start_date="2012-01-03")
 
 
 def test_endpoint_url_argument():

From d717a09474602e878f8d3e2c932a9b4cf281e966 Mon Sep 17 00:00:00 2001
From: Diego-H-S <dhidalgosoto@dyvenia.com>
Date: Mon, 13 Nov 2023 10:04:00 +0100
Subject: [PATCH 58/86] =?UTF-8?q?=F0=9F=93=9D=20updated=20docstrings.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/genesys.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viadot/sources/genesys.py b/viadot/sources/genesys.py
index 71dfa5209..d9109b313 100644
--- a/viadot/sources/genesys.py
+++ b/viadot/sources/genesys.py
@@ -344,7 +344,7 @@ def download_all_reporting_exports(
             if store_file_names is True:
                 file_name_list.append(file_name + "." + self.file_extension)
 
-        self.logger.info("Al reports were successfully dowonload.")
+        self.logger.info("All reports were successfully downloaded.")
 
         if store_file_names is True:
             self.logger.info("Successfully genetared file names list.")

From bbd0a256d9d6dc27f201aad3843e39f3202f50bc Mon Sep 17 00:00:00 2001
From: adrian-wojcik <wan.ext@velux.com>
Date: Mon, 13 Nov 2023 11:30:34 +0100
Subject: [PATCH 59/86] =?UTF-8?q?=E2=9C=85=20Added=20additional=20test=20f?=
 =?UTF-8?q?or=2064=20line=20from=20viadot.task?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/tasks/test_eurostat.py | 26 +++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/tests/integration/tasks/test_eurostat.py b/tests/integration/tasks/test_eurostat.py
index 7fa01dd58..11297072e 100644
--- a/tests/integration/tasks/test_eurostat.py
+++ b/tests/integration/tasks/test_eurostat.py
@@ -23,7 +23,7 @@ def task_correct_requested_columns(caplog):
     assert isinstance(task, pd.DataFrame)
     assert not task.empty
     assert caplog.text == ""
-    assert list(task.columns) == task.needed_columns
+    assert list(task.columns) == task.requested_columns
 
 
 def test_wrong_needed_columns_names(caplog):
@@ -90,3 +90,27 @@ def test_requested_columns_not_in_list():
             params={"hhtyp": "total", "indic_il": "med_e"},
             requested_columns="updated",
         ).run()
+
+
+def test_requested_columns_not_provided(caplog):
+    """Test the behavior when 'requested_columns' are not provided to EurostatToDF.
+
+    This test checks the behavior of the EurostatToDF class when 'requested_columns' are not provided.
+    It ensures that the resulting DataFrame is of the correct type, not empty, and that no error
+    messages are logged using the 'caplog' fixture.
+
+    Parameters:
+    - caplog: pytest fixture for capturing log messages.
+
+    Usage:
+    - Invoke this test function to check the behavior of EurostatToDF when 'requested_columns' are not provided.
+    """
+    task = eurostat.EurostatToDF(
+        dataset_code="ILC_DI04",
+        params={"hhtyp": "total", "indic_il": "med_e"},
+    )
+    task.run()
+
+    assert isinstance(task, pd.DataFrame)
+    assert not task.empty
+    assert caplog.text == ""

From 52709cb35c73167bc052b8df9c82cda9ab69946d Mon Sep 17 00:00:00 2001
From: adrian-wojcik <wan.ext@velux.com>
Date: Mon, 13 Nov 2023 12:09:39 +0100
Subject: [PATCH 60/86] =?UTF-8?q?=F0=9F=8E=A8=20Changed=20Task=20object=20?=
 =?UTF-8?q?reference=20and=20fixed=20bug=20in=20new=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/tasks/test_eurostat.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/integration/tasks/test_eurostat.py b/tests/integration/tasks/test_eurostat.py
index 11297072e..ed3f20596 100644
--- a/tests/integration/tasks/test_eurostat.py
+++ b/tests/integration/tasks/test_eurostat.py
@@ -3,7 +3,7 @@
 import pandas as pd
 import pytest
 
-from viadot.tasks import eurostat
+from viadot.tasks import EurostatToDF
 
 
 def task_correct_requested_columns(caplog):
@@ -13,7 +13,7 @@ def task_correct_requested_columns(caplog):
     The function is intended to be used in software development to verify that the program is correctly
     retrieving data from the appropriate dataset.
     """
-    task = eurostat.EurostatToDF(
+    task = EurostatToDF(
         dataset_code="ILC_DI04",
         params={"hhtyp": "total", "indic_il": "med_e"},
         requested_columns=["updated", "geo", "indicator"],
@@ -33,7 +33,7 @@ def test_wrong_needed_columns_names(caplog):
     The function is intended to be used in software development to identify correct type errors
     and messages in the program's handling of codes.
     """
-    task = eurostat.EurostatToDF(
+    task = EurostatToDF(
         dataset_code="ILC_DI04",
         params={"hhtyp": "total", "indic_il": "med_e"},
         requested_columns=["updated1", "geo1", "indicator1"],
@@ -56,7 +56,7 @@ def test_wrong_params_and_wrong_requested_columns_names(caplog):
     params validation. The function is intended to be used in software development to identify correct type errors
     and messages in the program's handling of codes.
     """
-    task = eurostat.EurostatToDF(
+    task = EurostatToDF(
         dataset_code="ILC_DI04",
         params={"hhhtyp": "total", "indic_ilx": "med_e"},
         requested_columns=["updated1", "geo1", "indicator1"],
@@ -85,7 +85,7 @@ def test_requested_columns_not_in_list():
     with pytest.raises(
         TypeError, match="Requested columns should be provided as list of strings."
     ):
-        eurostat.EurostatToDF(
+        EurostatToDF(
             dataset_code="ILC_DI04",
             params={"hhtyp": "total", "indic_il": "med_e"},
             requested_columns="updated",
@@ -105,12 +105,12 @@ def test_requested_columns_not_provided(caplog):
     Usage:
     - Invoke this test function to check the behavior of EurostatToDF when 'requested_columns' are not provided.
     """
-    task = eurostat.EurostatToDF(
+    task = EurostatToDF(
         dataset_code="ILC_DI04",
         params={"hhtyp": "total", "indic_il": "med_e"},
     )
-    task.run()
+    df = task.run()
 
-    assert isinstance(task, pd.DataFrame)
-    assert not task.empty
+    assert isinstance(df, pd.DataFrame)
+    assert not df.empty
     assert caplog.text == ""

From f69a8ef8416b33415d8c6cdb5f2e16c2d52cf6d5 Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Mon, 13 Nov 2023 14:39:47 +0100
Subject: [PATCH 61/86] rename unpacking cols parameters

---
 .../integration/tasks/test_customer_gauge.py  | 20 +++----
 viadot/flows/customer_gauge_to_adls.py        | 16 +++---
 viadot/tasks/customer_gauge.py                | 56 +++++++++----------
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/tests/integration/tasks/test_customer_gauge.py b/tests/integration/tasks/test_customer_gauge.py
index 6cbd17078..d95ea14cd 100644
--- a/tests/integration/tasks/test_customer_gauge.py
+++ b/tests/integration/tasks/test_customer_gauge.py
@@ -187,8 +187,8 @@ def test_column_unpacker_success_method1_and_method2():
     Test the 'column_unpacker' method with valid data and both Method 1 and Method 2 columns specified. It should return the expected result.
     """
     data = RAW_JSON['data'].copy()
-    method1_cols = ['properties']
-    method2_cols = ['drivers']
+    unpack_by_field_reference_cols = ['properties']
+    unpack_by_nested_dict_transformer = ['drivers']
 
     expected_result = [
         {'contact': {'first_name': '***', 'last_name': '***'},
@@ -221,7 +221,7 @@ def test_column_unpacker_success_method1_and_method2():
         '2_label': 'Waiting period'}}
     ]
 
-    result = CG.column_unpacker(json_list=data, method1_cols=method1_cols, method2_cols=method2_cols)
+    result = CG.column_unpacker(json_list=data, unpack_by_field_reference_cols=unpack_by_field_reference_cols, unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer)
 
     assert result == expected_result
 
@@ -231,10 +231,10 @@ def test_column_unpacker_missing_json_list():
     """
     Test the 'column_unpacker' method with missing 'json_list' argument. It should raise a ValueError.
     """
-    method1_cols = ['properties']
-    method2_cols = ['drivers']
+    unpack_by_field_reference_cols = ['properties']
+    unpack_by_nested_dict_transformer = ['drivers']
     with pytest.raises(ValueError, match="Input 'json_list' is required."):
-        CG.column_unpacker(json_list=None, method1_cols=method1_cols, method2_cols=method2_cols)
+        CG.column_unpacker(json_list=None, unpack_by_field_reference_cols=unpack_by_field_reference_cols, unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer)
 
 
 @pytest.mark.test_column_unpacker_duplicate_columns
@@ -243,10 +243,10 @@ def test_column_unpacker_duplicate_columns():
     Test the 'column_unpacker' method with duplicate columns specified in both Method 1 and Method 2. It should raise a ValueError.
     """
     data = RAW_JSON['data'].copy()
-    method1_cols = ['properties']
-    method2_cols = ['properties']
-    with pytest.raises(ValueError, match="{'properties'} were mentioned in both method1_cols and method2_cols. It's not possible to apply two methods to the same field."):
-        CG.column_unpacker(json_list=data, method1_cols=method1_cols, method2_cols=method2_cols)
+    unpack_by_field_reference_cols = ['properties']
+    unpack_by_nested_dict_transformer = ['properties']
+    with pytest.raises(ValueError, match="{'properties'} were mentioned in both unpack_by_field_reference_cols and unpack_by_nested_dict_transformer. It's not possible to apply two methods to the same field."):
+        CG.column_unpacker(json_list=data, unpack_by_field_reference_cols=unpack_by_field_reference_cols, unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer)
 
 
 @pytest.mark.test_flatten_json
diff --git a/viadot/flows/customer_gauge_to_adls.py b/viadot/flows/customer_gauge_to_adls.py
index f314511eb..6af62a340 100644
--- a/viadot/flows/customer_gauge_to_adls.py
+++ b/viadot/flows/customer_gauge_to_adls.py
@@ -38,8 +38,8 @@ def __init__(
         ] = None,
         start_date: datetime = None,
         end_date: datetime = None,
-        method1_cols: List[str] = None,
-        method2_cols: List[str] = None,
+        unpack_by_field_reference_cols: List[str] = None,
+        unpack_by_nested_dict_transformer: List[str] = None,
         customer_gauge_credentials_secret: str = "CUSTOMER-GAUGE",
         anonymize: bool = False,
         columns_to_anonymize: List[str] = None,
@@ -78,8 +78,8 @@ def __init__(
                 Specifies the date type which filter date range. Defaults to None.
             start_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. Defaults to None.
             end_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. Defaults to None.
-            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. Defaults to None.
-            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. Defaults to None.            
+            unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. Defaults to None.
+            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. Defaults to None.            
             customer_gauge_credentials_secret (str, optional): The name of the Azure Key Vault secret containing 
                 a dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
             vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None.
@@ -116,8 +116,8 @@ def __init__(
         self.date_field = date_field
         self.start_date = start_date
         self.end_date = end_date
-        self.method1_cols = method1_cols
-        self.method2_cols = method2_cols
+        self.unpack_by_field_reference_cols = unpack_by_field_reference_cols
+        self.unpack_by_nested_dict_transformer = unpack_by_nested_dict_transformer
         self.customer_gauge_credentials_secret = customer_gauge_credentials_secret
 
         # validate_df
@@ -182,8 +182,8 @@ def gen_flow(self) -> Flow:
             date_field=self.date_field,
             start_date=self.start_date,
             end_date=self.end_date,
-            method1_cols=self.method1_cols,
-            method2_cols=self.method2_cols,
+            unpack_by_field_reference_cols=self.unpack_by_field_reference_cols,
+            unpack_by_nested_dict_transformer=self.unpack_by_nested_dict_transformer,
             vault_name=self.vault_name,
             credentials_secret=self.customer_gauge_credentials_secret,
             flow=self,
diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index b24a09f75..72a1a013f 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -26,8 +26,8 @@ def __init__(
         ] = None,
         start_date: datetime = None,
         end_date: datetime = None,
-        method1_cols: List[str] = None,
-        method2_cols: List[str] = None,
+        unpack_by_field_reference_cols: List[str] = None,
+        unpack_by_nested_dict_transformer: List[str] = None,
         timeout: int = 3600,
         *args,
         **kwargs,
@@ -51,9 +51,9 @@ def __init__(
                 Defaults to None.
             end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. 
                 Defaults to None.
-            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
+            unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
                 Defaults to None.
-            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
+            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
                 Defaults to None.
             timeout (int, optional): The time (in seconds) to wait while running this task before 
                 a timeout occurs. Defaults to 3600.
@@ -66,8 +66,8 @@ def __init__(
         self.date_field = date_field
         self.start_date = start_date
         self.end_date = end_date
-        self.method1_cols = method1_cols
-        self.method2_cols = method2_cols
+        self.unpack_by_field_reference_cols = unpack_by_field_reference_cols
+        self.unpack_by_nested_dict_transformer = unpack_by_nested_dict_transformer
 
         super().__init__(
             name="customer_gauge_to_df",
@@ -175,28 +175,28 @@ def _nested_dict_transformer(
     def column_unpacker(
         self, 
         json_list: List[Dict[str, Any]] = None,
-        method1_cols: List[str] = None,
-        method2_cols: List[str] = None,
+        unpack_by_field_reference_cols: List[str] = None,
+        unpack_by_nested_dict_transformer: List[str] = None,
         ) -> List[Dict[str, Any]]:
 
         """
         Function to unpack and modify specific columns in a list of dictionaries by using one of two methods, 
         chosen by the user. 
         If user would like to use field_reference_unpacker, he/she needs to provide list of fields as strings in 
-        `method1_cols`  parameter,  if user would like to use nested_dict_transformer he/she needs to provide list of 
-         fields as strings in method2_cols parameter.  
+        `unpack_by_field_reference_cols`  parameter,  if user would like to use nested_dict_transformer he/she needs to provide list of 
+         fields as strings in unpack_by_nested_dict_transformer parameter.  
 
         Args:
             json_list (List[Dict[str, Any]): A list of dictionaries containing the data.
-            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. 
+            unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. 
                 Defaults to None.
-            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. 
+            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. 
                 Defaults to None.
 
         Raises:
             ValueError: If 'json_list' is not provided.
             ValueError: If specified columns do not exist in the JSON data.
-            ValueError: If columns are mentioned in both 'method1_cols' and 'method2_cols'. 
+            ValueError: If columns are mentioned in both 'unpack_by_field_reference_cols' and 'unpack_by_nested_dict_transformer'. 
 
         Returns:
             List[Dict[str, Any]]: The updated list of dictionaries after column unpacking and modification.
@@ -222,23 +222,23 @@ def unpack_columns(columns, unpack_function):
                 else:
                     logger.info(f"Column '{field}' not found.")
             return json_list_clean
-        if method1_cols and method2_cols:
-            duplicated_cols = set(method1_cols).intersection(set(method2_cols))
+        if unpack_by_field_reference_cols and unpack_by_nested_dict_transformer:
+            duplicated_cols = set(unpack_by_field_reference_cols).intersection(set(unpack_by_nested_dict_transformer))
         if duplicated_cols:
             raise ValueError(
-                f"{duplicated_cols} were mentioned in both method1_cols and method2_cols." 
+                f"{duplicated_cols} were mentioned in both unpack_by_field_reference_cols and unpack_by_nested_dict_transformer." 
                 " It's not possible to apply two methods to the same field."
                 )
         else:
-            if method1_cols is not None:
+            if unpack_by_field_reference_cols is not None:
                 json_list = unpack_columns(
-                    columns = method1_cols, 
+                    columns = unpack_by_field_reference_cols, 
                     unpack_function = self._field_reference_unpacker
                     )
 
-            if method2_cols is not None:
+            if unpack_by_nested_dict_transformer is not None:
                 json_list = unpack_columns(
-                    columns = method2_cols, 
+                    columns = unpack_by_nested_dict_transformer, 
                     unpack_function = self._nested_dict_transformer
                     )
         
@@ -330,8 +330,8 @@ def __call__(self):
         "date_field",
         "start_date",
         "end_date",
-        "method1_cols",
-        "method2_cols",
+        "unpack_by_field_reference_cols",
+        "unpack_by_nested_dict_transformer",
     )
     def run(
         self,
@@ -345,8 +345,8 @@ def run(
         ] = None,
         start_date: datetime = None,
         end_date: datetime = None,
-        method1_cols: List[str] = None,
-        method2_cols: List[str] = None,
+        unpack_by_field_reference_cols: List[str] = None,
+        unpack_by_nested_dict_transformer: List[str] = None,
         credentials_secret: str = "CUSTOMER-GAUGE",
         vault_name: str = None,
     ) -> pd.DataFrame:
@@ -369,9 +369,9 @@ def run(
                 Defaults to None.
             end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. 
                 Defaults to None.
-            method1_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. 
+            unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. 
                 Defaults to None.
-            method2_cols (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. 
+            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. 
                 Defaults to None.       
             credentials_secret (str, optional): The name of the Azure Key Vault secret containing a 
                 dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
@@ -427,8 +427,8 @@ def run(
 
         clean_json = self.column_unpacker(
             json_list = total_json, 
-            method1_cols = method1_cols, 
-            method2_cols = method2_cols)
+            unpack_by_field_reference_cols = unpack_by_field_reference_cols, 
+            unpack_by_nested_dict_transformer = unpack_by_nested_dict_transformer)
         logger.info("Inserting data into the DataFrame...")
         df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
         df = self.square_brackets_remover(df)

From f967c82f9bdf20827dae4ee9dc985f2836c62b3a Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Mon, 13 Nov 2023 14:45:50 +0100
Subject: [PATCH 62/86] =?UTF-8?q?=E2=9C=85=20Added=20more=20tests?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/test_tm1.py | 77 ++++++++++++++++++++++++++++++++---
 viadot/sources/tm1.py         |  8 ++--
 2 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/tests/integration/test_tm1.py b/tests/integration/test_tm1.py
index 3676a8ef6..ea21f0984 100644
--- a/tests/integration/test_tm1.py
+++ b/tests/integration/test_tm1.py
@@ -1,10 +1,13 @@
 import pandas as pd
-
+import pytest
 from viadot.sources import TM1
 from viadot.config import local_config
+from viadot.exceptions import CredentialError,ValidationError
 
-CUBE = local_config.get("test_cube")
-VIEW = local_config.get("test_view")
+CUBE = local_config.get("TM1").get("test_cube")
+VIEW = local_config.get("TM1").get("test_view")
+DIMENSION = local_config.get("TM1").get("test_dim")
+HIERARCHY= local_config.get("TM1").get("test_hierarchy")
 
 
 def test_get_connection():
@@ -13,6 +16,12 @@ def test_get_connection():
 
     assert connection is not None
 
+def test_get_connection_fail():
+    test_creds ={"address":"Addres", "port": 123, "username": "user", }
+    with pytest.raises(CredentialError):
+        tm1_source = TM1(credentials=test_creds)
+
+
 
 def test_get_cubes_names():
     tm1_source = TM1()
@@ -20,17 +29,75 @@ def test_get_cubes_names():
 
     assert len(cubes) > 0
 
+def test_get_dimensions_names():
+    tm1_source = TM1()
+    dim = tm1_source.get_dimensions_names()
 
-def test_get_cubes_names():
+    assert len(dim) > 0
+
+def test_get_views_names():
     tm1_source = TM1(cube=CUBE)
     views = tm1_source.get_views_names()
 
     assert len(views) > 0
 
+def test_get_hierarchies_names():
+    tm1_source = TM1(dimension=DIMENSION)
+    hierarchies = tm1_source.get_hierarchies_names()
+
+    assert len(hierarchies) >0
+
+def test_get_available_elements():
+    tm1_source = TM1(dimension=DIMENSION, hierarchy=HIERARCHY)
+    elements = tm1_source.get_available_elements()
+
+    assert len(elements) >0
 
-def test_to_df():
+def test_to_df_view():
     tm1_source = TM1(cube=CUBE, view=VIEW)
     df = tm1_source.to_df()
 
     assert isinstance(df, pd.DataFrame)
     assert df.empty is False
+
+def test_to_df_mdx():
+    query ="""
+        select
+        {
+        [version].[version].[Budget]
+        }
+        on columns,
+        {
+        [company].[company].MEMBERS
+        }
+        on rows
+
+        FROM  """ + f"{CUBE}"
+    
+    tm1_source = TM1(mdx_query=query)
+    df = tm1_source.to_df(if_empty="pass")
+
+    assert isinstance(df, pd.DataFrame)
+
+def test_to_df_fail_both():
+    query ="""
+        select
+        {
+        [version].[version].[Budget]
+        }
+        on columns,
+        {
+        [company].[company].MEMBERS
+        }
+        on rows
+
+        FROM  """ + f"{CUBE}"
+    
+    tm1_source = TM1(mdx_query=query, cube=CUBE)
+    with pytest.raises(ValidationError, match="Specify only one: MDX query or cube and view."):
+        tm1_source.to_df(if_empty="pass")
+
+def test_to_df_fail_no():
+    tm1_source = TM1()
+    with pytest.raises(ValidationError, match="MDX query or cube and view are required."):
+        tm1_source.to_df(if_empty="pass")
diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index 77155b07c..cd91ba369 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -110,7 +110,7 @@ def get_views_names(self) -> list:
         conn = self.get_connection()
         return conn.views.get_all_names(self.cube)
 
-    def get_diemensions_names(self) -> list:
+    def get_dimensions_names(self) -> list:
         """
         Get list of avaiable dimensions in TM1 instance.
 
@@ -162,7 +162,9 @@ def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFr
 
         if self.mdx_query is None and (self.cube is None or self.view is None):
             raise ValidationError("MDX query or cube and view are required.")
-        if self.cube is not None and self.view is not None:
+        elif self.mdx_query is not None and (self.cube is not None or self.view is not None):
+            raise ValidationError("Specify only one: MDX query or cube and view.")
+        elif self.cube is not None and self.view is not None:
             df = conn.cubes.cells.execute_view_dataframe(
                 cube_name=self.cube,
                 view_name=self.view,
@@ -171,8 +173,6 @@ def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFr
             )
         elif self.mdx_query is not None:
             df = conn.cubes.cells.execute_mdx_dataframe(self.mdx_query)
-        else:
-            raise ValidationError("Specify only one: MDX query or cube and view.")
 
         logger.info(
             f"Data was successfully transformed into DataFrame: {len(df.columns)} columns and {len(df)} rows."

From f245dbb55fa2f60d6551941a13d7f0472a56da23 Mon Sep 17 00:00:00 2001
From: Angelika Tarnawa <angelikat26@gmail.com>
Date: Mon, 13 Nov 2023 14:52:11 +0100
Subject: [PATCH 63/86] =?UTF-8?q?=F0=9F=93=9D=20updated=20documentation=20?=
 =?UTF-8?q?+=20formatting?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/test_tm1.py | 50 +++++++++++++++++++++++++----------
 viadot/sources/tm1.py         | 12 ++++++---
 2 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/tests/integration/test_tm1.py b/tests/integration/test_tm1.py
index ea21f0984..ae2b321b9 100644
--- a/tests/integration/test_tm1.py
+++ b/tests/integration/test_tm1.py
@@ -2,12 +2,12 @@
 import pytest
 from viadot.sources import TM1
 from viadot.config import local_config
-from viadot.exceptions import CredentialError,ValidationError
+from viadot.exceptions import CredentialError, ValidationError
 
 CUBE = local_config.get("TM1").get("test_cube")
 VIEW = local_config.get("TM1").get("test_view")
 DIMENSION = local_config.get("TM1").get("test_dim")
-HIERARCHY= local_config.get("TM1").get("test_hierarchy")
+HIERARCHY = local_config.get("TM1").get("test_hierarchy")
 
 
 def test_get_connection():
@@ -16,42 +16,51 @@ def test_get_connection():
 
     assert connection is not None
 
+
 def test_get_connection_fail():
-    test_creds ={"address":"Addres", "port": 123, "username": "user", }
+    test_creds = {
+        "address": "Addres",
+        "port": 123,
+        "username": "user",
+    }
     with pytest.raises(CredentialError):
         tm1_source = TM1(credentials=test_creds)
 
 
-
 def test_get_cubes_names():
     tm1_source = TM1()
     cubes = tm1_source.get_cubes_names()
 
     assert len(cubes) > 0
 
+
 def test_get_dimensions_names():
     tm1_source = TM1()
     dim = tm1_source.get_dimensions_names()
 
     assert len(dim) > 0
 
+
 def test_get_views_names():
     tm1_source = TM1(cube=CUBE)
     views = tm1_source.get_views_names()
 
     assert len(views) > 0
 
+
 def test_get_hierarchies_names():
     tm1_source = TM1(dimension=DIMENSION)
     hierarchies = tm1_source.get_hierarchies_names()
 
-    assert len(hierarchies) >0
+    assert len(hierarchies) > 0
+
 
 def test_get_available_elements():
     tm1_source = TM1(dimension=DIMENSION, hierarchy=HIERARCHY)
     elements = tm1_source.get_available_elements()
 
-    assert len(elements) >0
+    assert len(elements) > 0
+
 
 def test_to_df_view():
     tm1_source = TM1(cube=CUBE, view=VIEW)
@@ -60,8 +69,10 @@ def test_to_df_view():
     assert isinstance(df, pd.DataFrame)
     assert df.empty is False
 
+
 def test_to_df_mdx():
-    query ="""
+    query = (
+        """
         select
         {
         [version].[version].[Budget]
@@ -72,15 +83,19 @@ def test_to_df_mdx():
         }
         on rows
 
-        FROM  """ + f"{CUBE}"
-    
+        FROM  """
+        + f"{CUBE}"
+    )
+
     tm1_source = TM1(mdx_query=query)
     df = tm1_source.to_df(if_empty="pass")
 
     assert isinstance(df, pd.DataFrame)
 
+
 def test_to_df_fail_both():
-    query ="""
+    query = (
+        """
         select
         {
         [version].[version].[Budget]
@@ -91,13 +106,20 @@ def test_to_df_fail_both():
         }
         on rows
 
-        FROM  """ + f"{CUBE}"
-    
+        FROM  """
+        + f"{CUBE}"
+    )
+
     tm1_source = TM1(mdx_query=query, cube=CUBE)
-    with pytest.raises(ValidationError, match="Specify only one: MDX query or cube and view."):
+    with pytest.raises(
+        ValidationError, match="Specify only one: MDX query or cube and view."
+    ):
         tm1_source.to_df(if_empty="pass")
 
+
 def test_to_df_fail_no():
     tm1_source = TM1()
-    with pytest.raises(ValidationError, match="MDX query or cube and view are required."):
+    with pytest.raises(
+        ValidationError, match="MDX query or cube and view are required."
+    ):
         tm1_source.to_df(if_empty="pass")
diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index cd91ba369..9a182bb97 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -33,7 +33,8 @@ def __init__(
         **kwargs,
     ):
         """
-        Creating an instance of TM1 source class.
+        Creating an instance of TM1 source class. To download the data to the dataframe user needs to specify MDX query or
+            combination of cube and view.
 
         Args:
             credentials (Dict[str, Any], optional): Credentials stored in a dictionary. Required credentials: username,
@@ -42,7 +43,7 @@ def __init__(
             mdx_query (str, optional): MDX select query needed to download the data. Defaults to None.
             cube (str, optional): Cube name from which data will be downloaded. Defaults to None.
             view (str, optional): View name from which data will be downloaded. Defaults to None.
-            dimension (str, optional): Diemension name. Defaults to None.
+            dimension (str, optional): Dimension name. Defaults to None.
             hierarchy (str, optional): Hierarchy name. Defaults to None.
             limit (str, optional): How many rows should be extracted. If None all the avaiable rows will
                 be downloaded. Defaults to None.
@@ -147,7 +148,8 @@ def get_available_elements(self) -> list:
 
     def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFrame:
         """
-        Function for downloading data from TM1 to pd.DataFrame.
+        Function for downloading data from TM1 to pd.DataFrame. To download the data to the dataframe user needs to specify MDX query or
+            combination of cube and view.
 
         Args:
             if_empty (Literal["warn", "fail", "skip"], optional): What to do if output DataFrame is empty. Defaults to "skip".
@@ -162,7 +164,9 @@ def to_df(self, if_empty: Literal["warn", "fail", "skip"] = "skip") -> pd.DataFr
 
         if self.mdx_query is None and (self.cube is None or self.view is None):
             raise ValidationError("MDX query or cube and view are required.")
-        elif self.mdx_query is not None and (self.cube is not None or self.view is not None):
+        elif self.mdx_query is not None and (
+            self.cube is not None or self.view is not None
+        ):
             raise ValidationError("Specify only one: MDX query or cube and view.")
         elif self.cube is not None and self.view is not None:
             df = conn.cubes.cells.execute_view_dataframe(

From 4277c1b840496eba5a41a17fa02ff25d525e5a0e Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Mon, 13 Nov 2023 16:26:49 +0100
Subject: [PATCH 64/86] add credentials_secret parameter

---
 viadot/sources/customer_gauge.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/viadot/sources/customer_gauge.py b/viadot/sources/customer_gauge.py
index 6ceeccd02..5fff4387e 100644
--- a/viadot/sources/customer_gauge.py
+++ b/viadot/sources/customer_gauge.py
@@ -20,6 +20,7 @@ def __init__(
         endpoint: Literal["responses", "non-responses"] = None,
         url: str = None,
         credentials: Dict[str, Any] = None,
+        credentials_secret: str = "CUSTOMER-GAUGE",
     ):
         """
         A class to connect and download data using Customer Gauge API.
@@ -31,7 +32,8 @@ def __init__(
             endpoint (Literal["responses", "non-responses"]): Indicate which endpoint to connect. Defaults to None.
             url (str, optional): Endpoint URL. Defaults to None.
             credentials (Dict[str, Any], optional): Credentials to connect with API containing client_id, client_secret. Defaults to None.
-
+            credentials_secret (str, optional): The name of the secret stored in local_config containing a 
+                dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
         Raises:
             ValueError: If endpoint is not provided or incorect.
             CredentialError: If credentials are not provided in local_config or directly as a parameter
@@ -50,11 +52,12 @@ def __init__(
             raise ValueError(
                 "Provide endpoint name. Choose: 'responses' or 'non-responses'. Otherwise, provide URL"
             )
+        self.credentials_secret = credentials_secret
 
         if credentials is not None:
             self.credentials = credentials
         else:
-            self.credentials = local_config.get("CustomerGauge")
+            self.credentials = local_config.get(credentials_secret)
             if self.credentials is None:
                 raise CredentialError("Credentials not provided.")
 

From 49ecccb15d0ddc543db04a4420d63a349e97fd4d Mon Sep 17 00:00:00 2001
From: "hha.ext" <hha.ext@velux.com>
Date: Mon, 13 Nov 2023 16:35:28 +0100
Subject: [PATCH 65/86] add error raising tests

---
 tests/integration/test_customer_gauge.py | 25 ++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/integration/test_customer_gauge.py b/tests/integration/test_customer_gauge.py
index 596cf029c..a20ea006e 100644
--- a/tests/integration/test_customer_gauge.py
+++ b/tests/integration/test_customer_gauge.py
@@ -4,6 +4,7 @@
 import pytest
 
 from viadot.sources import CustomerGauge
+from viadot.exceptions import CredentialError
 
 ENDPOINT = random.choice(["responses", "non-responses"])
 CG = CustomerGauge(endpoint=ENDPOINT)
@@ -38,3 +39,27 @@ def test_endpoint_url_argument():
     CG = CustomerGauge(url=ENDPOINT_URL)
     json_response = CG.get_json_response()
     assert isinstance(json_response, dict)
+
+@pytest.mark.endpoint_valueerror
+def test_wrong_endpoint_valueerror_raising():
+    with pytest.raises(ValueError, match=r"Incorrect endpoint name. Choose: 'responses' or 'non-responses'"):
+        wrong_endpoint_name = "wrong-endpoint"
+        CG = CustomerGauge(endpoint = wrong_endpoint_name)
+
+@pytest.mark.endpoint_valueerror
+def test_no_endpoint_valueerror_raising():
+    with pytest.raises(ValueError, match=r"Provide endpoint name. Choose: 'responses' or 'non-responses'. Otherwise, provide URL"):
+        CG = CustomerGauge()
+
+@pytest.mark.endpoint_credentialserror
+def test_credentialserror_raising():
+    wrong_secret="wrong"
+    with pytest.raises(CredentialError, match=r"Credentials not provided."):
+        CG = CustomerGauge(endpoint=ENDPOINT, credentials_secret=wrong_secret)
+
+@pytest.mark.get_cursor_valueerror
+def test_get_cursor_valueerror_raising():
+    wrong_json = {}
+    with pytest.raises(ValueError, match=r"Provided argument doesn't contain 'cursor' value. Pass json returned from the endpoint."):
+        CG = CustomerGauge(endpoint=ENDPOINT)
+        CG.get_cursor(json_response=wrong_json)
\ No newline at end of file

From d2fb298a626537ad51037276df3c5f6ff512fd9e Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Mon, 13 Nov 2023 16:58:50 +0100
Subject: [PATCH 66/86] =?UTF-8?q?=F0=9F=93=9D=20Added=20missing=20docstrin?=
 =?UTF-8?q?gs=20and=20return=20type?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/sources/epicor.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/viadot/sources/epicor.py b/viadot/sources/epicor.py
index a3e59c432..ad28019d0 100644
--- a/viadot/sources/epicor.py
+++ b/viadot/sources/epicor.py
@@ -104,6 +104,7 @@ def parse_orders_xml(xml_data: str) -> pd.DataFrame:
 
     Args:
         xml_data (str, required): Response from Epicor API in form of xml
+
     Returns:
         pd.DataFrame: DataFrame containing parsed orders data.
     """
@@ -221,7 +222,11 @@ def __init__(
         super().__init__(*args, credentials=credentials, **kwargs)
 
     def generate_token(self) -> str:
-        "Function to generate API access token that is valid for 24 hours"
+        """Function to generate API access token that is valid for 24 hours.
+
+        Returns:
+            str: Generated token.
+        """
 
         url = (
             "http://"
@@ -243,7 +248,11 @@ def generate_token(self) -> str:
         return token
 
     def generate_url(self) -> str:
-        "Function to generate url to download data"
+        """Function to generate url to download data
+
+        Returns:
+            str: Output url string.
+        """
 
         return (
             "http://"
@@ -282,8 +291,12 @@ def get_xml_response(self):
         )
         return response
 
-    def to_df(self):
-        "Function for creating pandas DataFrame from Epicor API response"
+    def to_df(self) -> pd.DataFrame:
+        """Function for creating pandas DataFrame from Epicor API response
+
+        Returns:
+            pd.DataFrame: Output DataFrame.
+        """
         data = self.get_xml_response()
         df = parse_orders_xml(data)
         return df

From 738f2a4bcdea18710877df721e8199ca1aeeb289 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Mon, 13 Nov 2023 17:01:08 +0100
Subject: [PATCH 67/86] =?UTF-8?q?=E2=9C=85=20Added=20missing=20tests=20for?=
 =?UTF-8?q?=20Epicor=20source=20class?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/test_epicor.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_epicor.py b/tests/integration/test_epicor.py
index 8e9155059..77c338a88 100644
--- a/tests/integration/test_epicor.py
+++ b/tests/integration/test_epicor.py
@@ -1,7 +1,8 @@
 import pytest
+import pandas as pd
 
 from viadot.config import local_config
-from viadot.exceptions import DataRangeError
+from viadot.exceptions import CredentialError, DataRangeError
 from viadot.sources import Epicor
 
 
@@ -48,3 +49,25 @@ def test_connection(epicor):
 def test_validate_filter(epicor_error):
     with pytest.raises(DataRangeError):
         epicor_error.validate_filter()
+
+
+def test_credentials_not_provided():
+    with pytest.raises(CredentialError):
+        Epicor(
+            base_url=local_config.get("EPICOR").get("test_url"),
+            credentials={"username": "user12", "port": 1111},
+            filters_xml="""
+            <OrderQuery>
+                <QueryFields>
+                    <CompanyNumber>001</CompanyNumber>
+                    <BegInvoiceDate></BegInvoiceDate>
+                    <EndInvoiceDate>2022-05-16</EndInvoiceDate>
+                    <RecordCount>3</RecordCount>
+                </QueryFields>
+            </OrderQuery>""",
+        )
+
+
+def test_to_df_return_type(epicor):
+    df = epicor.to_df()
+    assert isinstance(df, pd.DataFrame)

From cdb879f43c3f04222507303ba2b289133db763eb Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Tue, 14 Nov 2023 12:21:53 +0100
Subject: [PATCH 68/86] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Added=20`set=5Fprefe?=
 =?UTF-8?q?ct=5Fkv`=20parameter=20to=20`BigQueryToADLS`=20flow?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/flows/bigquery_to_adls.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/viadot/flows/bigquery_to_adls.py b/viadot/flows/bigquery_to_adls.py
index e09981ebe..cd092066f 100644
--- a/viadot/flows/bigquery_to_adls.py
+++ b/viadot/flows/bigquery_to_adls.py
@@ -43,6 +43,7 @@ def __init__(
         if_exists: str = "replace",
         validate_df_dict: dict = None,
         timeout: int = 3600,
+        set_prefect_kv: bool = False,
         *args: List[Any],
         **kwargs: Dict[str, Any],
     ):
@@ -84,6 +85,7 @@ def __init__(
             When passed, `validate_df` task validation tests are triggered. Defaults to None.
             timeout(int, optional): The amount of time (in seconds) to wait while running this task before
                 a timeout occurs. Defaults to 3600.
+            set_prefect_kv(int, optional): Specifies whether to set a key-value pair in the Prefect KV Store. Defaults to False.
         """
         # BigQueryToDF
         self.query = query
@@ -125,6 +127,8 @@ def __init__(
                 adls_dir_path, "schema", self.now + ".json"
             )
 
+        self.set_prefect_kv = set_prefect_kv
+
         super().__init__(*args, name=name, **kwargs)
 
         self.gen_flow()
@@ -205,4 +209,5 @@ def gen_flow(self) -> Flow:
         df_to_be_loaded.set_upstream(dtypes_dict, flow=self)
         file_to_adls_task.set_upstream(df_to_file, flow=self)
         json_to_adls_task.set_upstream(dtypes_to_json_task, flow=self)
-        set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
+        if self.set_prefect_kv is True:
+            set_key_value(key=self.adls_dir_path, value=self.adls_file_path)

From e514deb4b2736d2a25c3fd351b7d0515d781470e Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Tue, 14 Nov 2023 12:31:42 +0100
Subject: [PATCH 69/86] =?UTF-8?q?=F0=9F=93=9D=20Updated=20Changelog?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cb9154c7c..1ebe1047d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Added `TM1` source class.
 - Added `TM1ToDF` task class.
+- Added `set_prefect_kv` parameter to `BigQueryToADLS` with `False` as a default. If there is a need to create new pair in KV Store the parameter can be changed to `True`.
 
 ### Changed
 - Splitted test for Eurostat on source tests and task tests

From 3991b5f0ef4854b2f730a5ea8ae63ec4e540a301 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Fri, 10 Nov 2023 16:09:29 +0100
Subject: [PATCH 70/86] Sharepoint list connector extension for multichoice
 fields with some small fixes and docstring update

---
 CHANGELOG.md                         |  19 ++
 tests/integration/test_sharepoint.py | 138 +++++++++++---
 viadot/flows/sharepoint_to_adls.py   |  80 ++++----
 viadot/sources/sharepoint.py         | 263 ++++++++++++++++++++-------
 viadot/tasks/sharepoint.py           | 100 ++++++++--
 5 files changed, 457 insertions(+), 143 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 507c590cf..208908bb9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+- Modified `SharepointList` source class: 
+  -> docstrings update
+- Modified `SharepointToADLS` flow class:
+  -> docstrings update
+  -> changed key_value_param: bool = False to prevent forced KV store append
+- Modified `SharepointListToADLS` flow class:
+  -> changed key_value_param: bool = False to prevent forced KV store append
+- Modified `SharepointList` source class:
+  -> docstrings update
+  -> Changed `_unpack_fields` method to handle Sharepoint MultiChoiceField type + small improvements
+  -> Changed `get_fields` method to handle special characters - different approach to call get() and execute_query()
+  -> Renamed method from `select_expandable_user_fields` to `select_fields` + update for MultiChoiceField type
+  -> Changed `check_filters` method errors messages and more checks added
+  -> Changed `operators_mapping` method errors messages
+  -> Changed `make_filter_for_df` method errors messages
+- Modified `SharepointListToDF` task class:
+  -> docstrings update
+  -> Added `_rename_duplicated_fields` method to find and rename duplicated columns
+
 
 ## [0.4.21] - 2023-10-26
 ### Added
diff --git a/tests/integration/test_sharepoint.py b/tests/integration/test_sharepoint.py
index 502ffded0..38fdfa8a7 100644
--- a/tests/integration/test_sharepoint.py
+++ b/tests/integration/test_sharepoint.py
@@ -2,7 +2,6 @@
 import re
 
 import pandas as pd
-from copy import deepcopy
 import pytest
 from prefect.tasks.secrets import PrefectSecret
 
@@ -10,7 +9,7 @@
 from viadot.exceptions import CredentialError
 from viadot.sources import Sharepoint
 from viadot.task_utils import df_get_data_types_task
-from viadot.tasks.sharepoint import SharepointToDF
+from viadot.tasks.sharepoint import SharepointToDF, SharepointListToDF
 from viadot.sources import SharepointList
 
 
@@ -168,10 +167,11 @@ def test_get_data_types(file_name):
     assert "String" in dtypes
 
 
+### SECTION FOR TESTING SHAREPOINT LIST CONNECTOR ###
 @pytest.fixture(scope="session")
 def sharepoint_list():
     """
-    Fixture for creating a Sharepoint class instance.
+    Fixture for creating a SharepointList class instance.
     The class instance can be used within a test functions to interact with Sharepoint.
     """
     spl = SharepointList()
@@ -187,15 +187,31 @@ def test_valid_filters(sharepoint_list):
     assert result is True
 
 
-def test_invalid_dtype(sharepoint_list):
+def test_filters_missing_dtype(sharepoint_list):
+    filters = {
+        "filter1": {"operator1": ">", "value1": 10},
+    }
+    with pytest.raises(
+        ValueError,
+        match=re.escape("dtype for filter1 is missing!"),
+    ):
+        sharepoint_list.check_filters(filters)
+
+
+def test_filters_invalid_dtype(sharepoint_list):
     filters = {
         "filter1": {"dtype": "list", "operator1": ">", "value1": 10},
     }
-    with pytest.raises(ValueError, match="dtype not allowed!"):
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "dtype not allowed! Expected: ['datetime', 'date', 'bool', 'int', 'float', 'complex', 'str'] got: list ."
+        ),
+    ):
         sharepoint_list.check_filters(filters)
 
 
-def test_missing_operator1(sharepoint_list):
+def test_filters_missing_operator1(sharepoint_list):
     filters = {
         "filter1": {"dtype": "int", "value1": 10},
     }
@@ -203,23 +219,28 @@ def test_missing_operator1(sharepoint_list):
         sharepoint_list.check_filters(filters)
 
 
-def test_invalid_operator1(sharepoint_list):
+def test_filters_invalid_operator1(sharepoint_list):
     filters = {
         "filter1": {"dtype": "int", "operator1": "*", "value1": 10},
     }
-    with pytest.raises(ValueError, match="Operator type not allowed!"):
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Operator1 type not allowed! Expected: ['<', '>', '<=', '>=', '==', '!='] got: * ."
+        ),
+    ):
         sharepoint_list.check_filters(filters)
 
 
-def test_missing_value1(sharepoint_list):
+def test_filters_missing_value1(sharepoint_list):
     filters = {
         "filter1": {"dtype": "int", "operator1": ">", "value1": None},
     }
-    with pytest.raises(ValueError, match="Value for operator1 is missing!"):
+    with pytest.raises(ValueError, match="Value1 for operator1 is missing!"):
         sharepoint_list.check_filters(filters)
 
 
-def test_missing_operators_conjuction(sharepoint_list):
+def test_filters_missing_operators_conjuction(sharepoint_list):
     filters = {
         "filter1": {
             "dtype": "int",
@@ -229,11 +250,16 @@ def test_missing_operators_conjuction(sharepoint_list):
             "value2": 20,
         },
     }
-    with pytest.raises(ValueError, match="Operators for conjuction is missing!"):
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Operator for conjuction is missing! Expected: ['&', '|'] got empty."
+        ),
+    ):
         sharepoint_list.check_filters(filters)
 
 
-def test_invalid_operators_conjuction(sharepoint_list):
+def test_filters_invalid_operators_conjuction(sharepoint_list):
     filters = {
         "filter1": {
             "dtype": "int",
@@ -244,11 +270,16 @@ def test_invalid_operators_conjuction(sharepoint_list):
             "operators_conjuction": "!",
         },
     }
-    with pytest.raises(ValueError, match="Operators for conjuction not allowed!"):
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Operator for conjuction not allowed! Expected: ['&', '|'] got ! ."
+        ),
+    ):
         sharepoint_list.check_filters(filters)
 
 
-def test_invalid_filters_conjuction(sharepoint_list):
+def test_filters_conjuction_not_allowed(sharepoint_list):
     filters = {
         "filter1": {
             "dtype": "int",
@@ -258,7 +289,32 @@ def test_invalid_filters_conjuction(sharepoint_list):
         },
     }
     with pytest.raises(
-        ValueError, match="Filters operators for conjuction not allowed!"
+        ValueError,
+        match=re.escape(
+            "Filters conjuction allowed only when more then one filter provided!"
+        ),
+    ):
+        sharepoint_list.check_filters(filters)
+
+
+def test_filters_invalid_conjuction(sharepoint_list):
+    filters = {
+        "filter1": {
+            "dtype": "int",
+            "value1": 10,
+            "operator1": ">",
+            "filters_conjuction": "!",
+        },
+        "filter2": {
+            "dtype": "int",
+            "operator1": "==",
+        },
+    }
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "Filter operator for conjuction not allowed! Expected: ['&', '|'] got ! ."
+        ),
     ):
         sharepoint_list.check_filters(filters)
 
@@ -266,27 +322,47 @@ def test_invalid_filters_conjuction(sharepoint_list):
 def test_valid_mapping(sharepoint_list):
     filters = {
         "filter1": {
+            "dtype": "int",
+            "value1": 10,
+            "value2": 20,
             "operator1": ">",
             "operator2": "<=",
             "operators_conjuction": "&",
             "filters_conjuction": "|",
         },
-        "filter2": {"operator1": "==", "operator2": "!=", "operators_conjuction": "|"},
+        "filter2": {
+            "dtype": "int",
+            "value1": 30,
+            "value2": 0,
+            "operator1": "==",
+            "operator2": "!=",
+            "operators_conjuction": "|",
+        },
     }
     expected_result = {
         "filter1": {
+            "dtype": "int",
+            "value1": 10,
+            "value2": 20,
             "operator1": "gt",
             "operator2": "le",
             "operators_conjuction": "and",
             "filters_conjuction": "or",
         },
-        "filter2": {"operator1": "eq", "operator2": "ne", "operators_conjuction": "or"},
+        "filter2": {
+            "dtype": "int",
+            "value1": 30,
+            "value2": 0,
+            "operator1": "eq",
+            "operator2": "ne",
+            "operators_conjuction": "or",
+        },
     }
-    result = sharepoint_list.operators_mapping(deepcopy(filters))
+    result = sharepoint_list.operators_mapping(filters)
     assert result == expected_result
 
 
-def test_invalid_comparison_operator(sharepoint_list):
+def test_operators_mapping_invalid_comparison_operator(sharepoint_list):
     filters = {
         "filter1": {
             "operator1": "*",
@@ -297,10 +373,10 @@ def test_invalid_comparison_operator(sharepoint_list):
     }
     error_message = "This comparison operator: * is not allowed. Please read the function documentation for details!"
     with pytest.raises(ValueError, match=re.escape(error_message)):
-        sharepoint_list.operators_mapping(deepcopy(filters))
+        sharepoint_list.operators_mapping(filters)
 
 
-def test_invalid_logical_operator(sharepoint_list):
+def test_operators_mapping_invalid_logical_operator(sharepoint_list):
     filters = {
         "filter1": {
             "operator1": ">",
@@ -309,9 +385,23 @@ def test_invalid_logical_operator(sharepoint_list):
             "filters_conjuction": "|",
         },
     }
-    error_message = "This conjuction(logical) operator: ! is not allowed. Please read the function documentation for details!"
+    error_message = "This conjuction (logical) operator: ! is not allowed. Please read the function documentation for details!"
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        sharepoint_list.operators_mapping(filters)
+
+
+def test_operators_mapping_invalid_filters_logical_operator(sharepoint_list):
+    filters = {
+        "filter1": {
+            "operator1": ">",
+            "operator2": "<=",
+            "operators_conjuction": "&",
+            "filters_conjuction": "!",
+        },
+    }
+    error_message = "This filters conjuction (logical) operator: ! is not allowed. Please read the function documentation for details!"
     with pytest.raises(ValueError, match=re.escape(error_message)):
-        sharepoint_list.operators_mapping(deepcopy(filters))
+        sharepoint_list.operators_mapping(filters)
 
 
 def test_single_filter_datetime_api(sharepoint_list):
diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 410538e7b..6191317d0 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -42,6 +42,7 @@ def __init__(
         if_exists: str = "replace",
         validate_df_dict: dict = None,
         timeout: int = 3600,
+        key_value_param: bool = False,
         *args: List[any],
         **kwargs: Dict[str, Any],
     ):
@@ -69,6 +70,7 @@ def __init__(
             dataframe. If defined, triggers the `validate_df` task from task_utils. Defaults to None.
             timeout(int, optional): The amount of time (in seconds) to wait while running this task before
                 a timeout occurs. Defaults to 3600.
+            key_value_param (bool, optional): Wheter to do key-value parameters in KV Store or not. Defaults to False.
         """
         # SharepointToDF
         self.if_empty = if_empty
@@ -86,6 +88,7 @@ def __init__(
         self.adls_sp_credentials_secret = adls_sp_credentials_secret
         self.if_exists = if_exists
         self.output_file_extension = output_file_extension
+        self.key_value_param = key_value_param
         self.now = str(pendulum.now("utc"))
         if self.local_dir_path is not None:
             self.local_file_path = (
@@ -177,7 +180,8 @@ def gen_flow(self) -> Flow:
 
         file_to_adls_task.set_upstream(df_to_file, flow=self)
         json_to_adls_task.set_upstream(dtypes_to_json_task, flow=self)
-        set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
+        if self.key_value_param == True:
+            set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
 
     @staticmethod
     def slugify(name):
@@ -188,42 +192,35 @@ class SharepointListToADLS(Flow):
     def __init__(
         self,
         name: str,
-        list_title: str = None,
-        site_url: str = None,
+        list_title: str,
+        site_url: str,
+        path: str,
+        adls_dir_path: str,
+        adls_file_name: str,
+        filters: dict = None,
         required_fields: List[str] = None,
         field_property: str = "Title",
-        filters: dict = None,
         row_count: int = 5000,
+        adls_sp_credentials_secret: str = None,
         sp_cert_credentials_secret: str = None,
         vault_name: str = None,
-        path: str = None,
-        adls_dir_path: str = None,
-        adls_file_name: str = None,
-        adls_sp_credentials_secret: str = None,
         overwrite_adls: bool = True,
         output_file_extension: str = ".parquet",
         validate_df_dict: dict = None,
+        key_value_param: bool = False,
         *args: List[any],
         **kwargs: Dict[str, Any],
     ):
-        """
-        Run Flow SharepointListToADLS.
+        """_summary_
 
         Args:
-        name (str): Prefect flow name.
-        list_title (str): Title of Sharepoint List. Default to None.
-        site_url (str): URL to set of Sharepoint Lists. Default to None.
-        required_fields (List[str]): Required fields(columns) need to be extracted from
-                                     Sharepoint List. Default to None.
-        field_property (List[str]): Property to expand fields with expand query method.
-                                    For example: User fields could be expanded and "Title"
-                                    or "ID" could be extracted
-                                    -> usefull to get user name instead of ID
-                                    All properties can be found under list.item.properties.
-                                    WARNING! Field types and properties might change which could
-                                    lead to errors - extension of sp connector would be required.
-                                    Default to ["Title"]
-        filters (dict): Dictionary with operators which filters the SharepointList output.
+            name (str): Prefect flow name.
+            list_title (str): Title of Sharepoint List.
+            site_url (str): URL to set of Sharepoint Lists.
+            path (str): Local file path. Default to None.
+            adls_dir_path (str): Azure Data Lake destination folder/catalog path. Defaults to None.
+            adls_file_name (str): Name of file in ADLS. Defaults to None.
+            filters (dict, optional): Dictionary with operators which filters the SharepointList output.
                         allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')
                         allowed conjuction: ('&','|')
                         allowed operators: ('<','>','<=','>=','==','!=')
@@ -247,16 +244,27 @@ def __init__(
                                 'operator1':'==',
                                 },
                         }
-        row_count (int): Number of downloaded rows in single request. Default to 5000.
-        sp_cert_credentials_secret (str): Credentials to verify Sharepoint connection. Default to None.
-        vault_name (str): KeyVaultSecret name. Default to None.
-        path (str): Local file path. Default to None.
-        adls_dir_path (str): Azure Data Lake destination folder/catalog path. Defaults to None.
-        adls_file_name (str, optional): Name of file in ADLS. Defaults to None.
-        adls_sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary with
-                                                    ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID,
-                                                    CLIENT_SECRET) for the Azure Data Lake. Defaults to None.
-        overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to True.
+                        Defaults to None.
+            required_fields (List[str], optional): Required fields(columns) need to be extracted from
+                                     Sharepoint List. Defaults to None.
+            field_property (str, optional): Property to expand fields with expand query method.
+                                    For example: User fields could be expanded and "Title"
+                                    or "ID" could be extracted
+                                    -> usefull to get user name instead of ID
+                                    All properties can be found under list.item.properties.
+                                    WARNING! Field types and properties might change which could
+                                    lead to errors - extension of sp connector would be required.
+                                    Default to ["Title"]. Defaults to "Title".
+            row_count (int, optional): Number of downloaded rows in single request.Defaults to 5000.
+            adls_sp_credentials_secret (str, optional): Credentials to connect to Azure ADLS
+                                    If not passed it will take cred's from your .config/credentials.json Defaults to None.
+            sp_cert_credentials_secret (str, optional): Credentials to verify Sharepoint connection.
+                                    If not passed it will take cred's from your .config/credentials.json Default to None.
+            vault_name (str, optional): KeyVaultSecret name. Default to None.
+            overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to True.
+            output_file_extension (str, optional): _description_. Defaults to ".parquet".
+            validate_df_dict (dict, optional): Wheter to do an extra df validation before ADLS upload or not to do. Defaults to None.
+            key_value_param (bool, optional): Wheter to do key-value parameters in KV Store or not. Defaults to False.
 
         Returns:
             .parquet file inside ADLS.
@@ -280,6 +288,7 @@ def __init__(
         self.overwrite = overwrite_adls
         self.adls_sp_credentials_secret = adls_sp_credentials_secret
         self.output_file_extension = output_file_extension
+        self.key_value_param = key_value_param
         self.now = str(pendulum.now("utc"))
         if self.path is not None:
             self.local_file_path = (
@@ -370,7 +379,8 @@ def gen_flow(self) -> Flow:
 
         file_to_adls_task.set_upstream(df_to_file, flow=self)
         json_to_adls_task.set_upstream(dtypes_to_json_task, flow=self)
-        set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
+        if self.key_value_param == True:
+            set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
 
     @staticmethod
     def slugify(name):
diff --git a/viadot/sources/sharepoint.py b/viadot/sources/sharepoint.py
index 7f57bd658..7f1bc523c 100644
--- a/viadot/sources/sharepoint.py
+++ b/viadot/sources/sharepoint.py
@@ -85,8 +85,10 @@ def download_file(
 
 class SharepointList(Source):
     """
-    A Sharepoint_List class to connect and download data from sharpoint lists.
-
+    A Sharepoint_List class to connect and download data from Sharepoint lists.
+    Warning!
+        Please be carefull with selection of the column names because once sharepoint list is opened inside a browser it may display columns in different languages.
+        Because of that the resulting file or output might have different column names then the one which u see in the browser.
     Args:
         credentials (dict): Credentials should include:
            - "tenant"
@@ -102,6 +104,20 @@ def __init__(
         *args,
         **kwargs,
     ):
+        """_summary_
+
+        Args:
+            credentials (Dict[str, Any], optional): Credentials should include:
+           - "tenant"
+           - "client_id"
+           - "scopes"
+           - "thumbprint"
+           - "private_key"
+
+        Raises:
+            CredentialError: If no credentials are pased
+            and local config doesn't contain them neiter
+        """
         DEFAULT_CREDENTIALS = local_config.get("SHAREPOINT_CERT")
         credentials = credentials or DEFAULT_CREDENTIALS
         if credentials is None:
@@ -109,11 +125,16 @@ def __init__(
 
         super().__init__(*args, credentials=credentials, **kwargs)
 
-    def get_connection(
-        self,
-        site_url: str = None,
-    ):
-        # Connecting into Sharepoint with AuthenticationContext
+    def get_connection(self, site_url: str):
+        """Function for connecting into Sharepoint with AuthenticationContext
+
+        Args:
+            site_url (str): url of the sharepoint list
+
+        Returns:
+            ctx: authentication context
+        """
+        logger.info("Connecting into Sharepoint with AuthenticationContexts")
         try:
             auth_context = AuthenticationContext(site_url)
             auth_context.with_client_certificate(
@@ -132,51 +153,80 @@ def get_connection(
 
         return self.ctx
 
-    # Function for extracting list items from search fields
     def _unpack_fields(
         self,
         list_item,
-        selected_fields: dict = None,
+        selected_fields: dict,
     ) -> dict:
+        """Function for extracting and unpacking list items from the search fields
+
+        Args:
+            list_items (office365 list item): A list with office365 list item objects (rows)
+            selected_fields (dict): A dict with fields selected for ingestion, generated by SharepointList.select_fields()
+
+        Raises:
+            ValueError: "Check if given field property is valid!"
+            ValueError: "Get nested dict for not recognized type of field! Check field types in the source"
+            ValueError: "Get empty properties for list items"
+
+        Returns:
+            dict: A dictionary with Column: Value pairs for each row from the list
+        """
         # Creating the body of dictionary
         new_dict = dict()
         # For loop scanning the propertys of searching fields
         item_values_dict = list_item.properties
-        for field, val in item_values_dict.items():
-            nested_dict = get_nested_dict(val)
-            # Check if field has expandable type
-            if field in selected_fields["FieldToExpand"]:
+        if item_values_dict:
+            for field, val in item_values_dict.items():
+                nested_dict = get_nested_dict(val)
                 # Check if the values are nested
                 if nested_dict != None:
-                    # It might be that there are different field properties than expected
-                    nested_value = nested_dict.get(
-                        selected_fields["FieldExpandProperty"]
-                    )
-                    if nested_value != None:
-                        new_dict[field] = nested_value
+                    # Check if field has expandable type
+                    if field in selected_fields["FieldToExpand"]:
+                        # It might be that there are different field properties than expected
+                        nested_value = nested_dict.get(
+                            selected_fields["FieldExpandProperty"]
+                        )
+                        if nested_value != None:
+                            new_dict[field] = nested_value
+                        else:
+                            raise ValueError("Check if given field property is valid!")
+                    elif field in selected_fields["MultiChoiceField"]:
+                        # Field type of multi choice could have more than 1 selection.
+                        new_dict[field] = ";".join(nested_dict.values())
                     else:
-                        logger.info("Property of the extandable field not recognized!")
-                        raise ValueError("Check if given field property is valid!")
-                elif field in selected_fields["MultiChoiceField"]:
-                    # Field type of multi choice could have more than 1 selection.
-                    new_dict[field] = ";".join(nested_dict.values())
+                        raise ValueError(
+                            "Get nested dict for not recognized type of field! Check field types in the source"
+                        )
                 else:
-                    raise ValueError(
-                        "Get nested dict for not recognized type of field! Check field types in the source"
-                    )
-            else:
-                new_dict[field] = val
-
+                    new_dict[field] = val
+        else:
+            raise ValueError(
+                "Get empty properties for list items. Check if parameter list_item collection containes any data -> item objects."
+            )
         return new_dict
 
     def get_fields(
         self,
-        list_title: str = None,
-        site_url: str = None,
+        list_title: str,
+        site_url: str,
         required_fields: List[str] = None,
-    ):
-        ctx = self.get_connection(site_url=site_url)
+    ) -> List:
+        """
+        Function for geting list of fields objects from the sharepoint list.
+        It can get all fields available if required_fields not passed
+        or just the one which are in the list required_fields.
+
+        Args:
+            list_title (str): name of the sharepoint list
+            site_url (str): url to the sharepoint list with "/" at the end
+            required_fields (List[str], optional ): List of required fields to ingest. It will get all fields if not passed.
+
+        Returns:
+            List: list with office365 sharepoint list field objects
+        """
 
+        ctx = self.get_connection(site_url=site_url)
         # Get list of lists object by List Title
         self.list_object = ctx.web.lists.get_by_title(list_title)
         list_fields_all = self.list_object.fields
@@ -200,18 +250,32 @@ def get_fields(
 
     def select_fields(
         self,
-        list_title: str = None,
-        site_url: str = None,
+        list_title: str,
+        site_url: str,
         required_fields: List[str] = None,
         field_property: str = "Title",
     ) -> dict:
         """
         Method to create a data structure for handling info about
-            selection of fields with details about possible expansion for more data or details.
+        selection of fields with details about possible expansion for more data or details.
         Field types to extract more values can be: "User*", "MultiChoice"
         field_property to expand can be: ID, Title, FieldTypeKind, TypeAsString and many more.
             -> more properties can be discovered by getting list.item.properties.
-            Default to "Title"
+
+        Args:
+            list_title (str): _description_. Defaults to None.
+            site_url (str): _description_. Defaults to None.
+            required_fields (List[str], optional): _description_. Defaults to None.
+            field_property (str, optional): Property to extract from nested fields
+                like column with type User*. Defaults to "Title".
+
+        Returns:
+            dict:  selected_fields = {
+                    "FieldInternalNames": List of fields to select with its InternalNames (from api),
+                    "FieldToExpand": fields_to_expand,-> fields which could be expanded to get more data from API
+                    "FieldExpandProperty": field_property, property of the expandable field which will be extracted
+                    "MultiChoiceField": List of fields which can have multiple values in 1 row
+        }
         """
 
         list_fields = self.get_fields(
@@ -248,30 +312,73 @@ def select_fields(
 
     def check_filters(
         self,
-        filters: dict = None,
+        filters: dict,
     ) -> bool:
         """
         Function to check if filters dict is valid.
-            example1: if operator2 is present value2 must be in place as well
-            example2: if dtype is not on allowed list it will throw an error
+        Please check and apply only allowed filter settings:
+            allowed_dtypes = ["datetime", "date", "bool", "int", "float", "complex", "str"]
+            allowed_conjuction = ["&", "|"]
+            allowed_operators = ["<", ">", "<=", ">=", "==", "!="]
+        Operator conjuction is only possible if there are 2 values like: value <= 1 | value == 5
+        Filter conjuction is only possible if there are more then 1 filters for ex. date and creator
+
+        Args:
+            filters (dict): A dictionary containing filter settings
+                Example:
+                        filters = {
+                                    "Created": {
+                                            "dtype": "datetime",
+                                            "value1": yesterday_date,
+                                            "value2": today_date,
+                                            "operator1": ">=",
+                                            "operator2": "<=",
+                                            "operators_conjuction": "&",
+                                            "filters_conjuction": "&",
+                                            },
+                                    "Factory": {
+                                        "dtype": "str",
+                                        "value1": "NM-PL",
+                                        "operator1": "==",
+                                        },
+                                    }
+
+        Raises:
+            ValueError: If dtype not in allowed list
+            ValueError: If comparison operator1 not in allowed list
+            ValueError: If value for operator1 is missing
+            ValueError: If comparison operator1 for the first value is missing
+            ValueError: If comparison operator2 not in allowed list
+            ValueError: If value for operator2 is missing
+            ValueError: If comparison operator2 for the first value is missing
+            ValueError: If operator conjuction is missing while there are 2 values and 2 operators passed
+            ValueError: If operator conjuction is not in the allowed list
+            ValueError: If operator conjuction provided why only one filter value is given
+            ValueError: If filter conjuction provided without 2nd filter
+            ValueError: If filter conjuction not in the allowed list
+
+        Returns:
+            bool: True if all checks passed
         """
 
         allowed_dtypes = ["datetime", "date", "bool", "int", "float", "complex", "str"]
         allowed_conjuction = ["&", "|"]
         allowed_operators = ["<", ">", "<=", ">=", "==", "!="]
 
-        for parameters in filters.values():
+        for filter_name, parameters in filters.items():
+            if not parameters.get("dtype"):
+                raise ValueError(f"dtype for {filter_name} is missing!")
             if parameters.get("dtype") not in allowed_dtypes:
                 raise ValueError(
-                    f"dtype not allowed! Expected {allowed_dtypes} got: {parameters.get('dtype')}."
+                    f"dtype not allowed! Expected: {allowed_dtypes} got: {parameters.get('dtype')} ."
                 )
             if parameters.get("operator1"):
                 if parameters.get("operator1") not in allowed_operators:
                     raise ValueError(
-                        f"Operator type not allowed! Expected {allowed_operators} got: {parameters.get('operator1')}."
+                        f"Operator1 type not allowed! Expected: {allowed_operators} got: {parameters.get('operator1')} ."
                     )
                 if not parameters.get("value1"):
-                    raise ValueError("Value for operator1 is missing!")
+                    raise ValueError("Value1 for operator1 is missing!")
             elif not parameters.get("operator1"):
                 raise ValueError("Operator1 is missing!")
             if (
@@ -279,22 +386,22 @@ def check_filters(
                 and parameters.get("operators_conjuction") is not None
             ):
                 raise ValueError(
-                    f"Operator conjuction allowed only with more than one filter operator!"
+                    f"Operator conjuction allowed only with more then one filter operator!"
                 )
             if parameters.get("operator2"):
                 if parameters.get("operator2") not in allowed_operators:
                     raise ValueError(
-                        f"Operator type not allowed! Expected {allowed_operators} got: {parameters.get('operator2')}."
+                        f"Operator2 type not allowed! Expected: {allowed_operators} got: {parameters.get('operator2')} ."
                     )
                 if not parameters.get("value2"):
-                    raise ValueError("Value for operator2 is missing!")
+                    raise ValueError("Value2 for operator2 is missing!")
                 if not parameters.get("operators_conjuction"):
                     raise ValueError(
-                        f"Operators for conjuction is missing! Expected {allowed_conjuction} got empty."
+                        f"Operator for conjuction is missing! Expected: {allowed_conjuction} got empty."
                     )
                 if parameters.get("operators_conjuction") not in allowed_conjuction:
                     raise ValueError(
-                        f"Operators for conjuction not allowed! Expected {allowed_conjuction} got {parameters.get('operators_conjuction')}."
+                        f"Operator for conjuction not allowed! Expected: {allowed_conjuction} got {parameters.get('operators_conjuction')} ."
                     )
             if parameters.get("filters_conjuction"):
                 if (
@@ -302,27 +409,42 @@ def check_filters(
                     and parameters.get("filters_conjuction") is not None
                 ):
                     raise ValueError(
-                        f"Filters conjuction allowed only with more than one filter column!"
+                        f"Filters conjuction allowed only when more then one filter provided!"
                     )
                 if parameters.get("filters_conjuction") not in allowed_conjuction:
                     raise ValueError(
-                        f"Filters operators for conjuction not allowed! Expected {allowed_conjuction} got {parameters.get('filters_conjuction')}."
+                        f"Filter operator for conjuction not allowed! Expected: {allowed_conjuction} got {parameters.get('filters_conjuction')} ."
                     )
 
         return True
 
     def operators_mapping(
         self,
-        filters: dict = None,
+        filters: dict,
     ) -> dict:
         """
         Function for mapping comparison and conjuction(logical) operators of filters to the format which is recognized by Microsoft API.
+        Allowed operators:
+            <
+            >
+            <=
+            >=
+            ==
+            !=
+            "&"
+            "|"
 
         Args:
-            filters (dict): A dictionar which contains operators.
+            filters (dict): A dictionary which contains operators.
+
+        Raises:
+            ValueError: If operator1 not allowed
+            ValueError: If operator2 not allowed
+            ValueError: If operators conjuction not allowed
+            ValueError: If filters conjuction not allowed
 
         Returns:
-            New modified dict.
+            dict: New modified dict with mapped operators.
         """
 
         filters_dict = deepcopy(filters)
@@ -361,7 +483,7 @@ def operators_mapping(
                     ]
                 else:
                     raise ValueError(
-                        f"This conjuction(logical) operator: {logical_op_to_change} is not allowed. Please read the function documentation for details!"
+                        f"This conjuction (logical) operator: {logical_op_to_change} is not allowed. Please read the function documentation for details!"
                     )
             if parameters.get("filters_conjuction"):
                 logical_fl_to_change = parameters.get("filters_conjuction")
@@ -369,12 +491,12 @@ def operators_mapping(
                     parameters["filters_conjuction"] = logical_op[logical_fl_to_change]
                 else:
                     raise ValueError(
-                        f"This conjuction(logical) operator: {logical_fl_to_change} is not allowed. Please read the function documentation for details!"
+                        f"This filters conjuction (logical) operator: {logical_fl_to_change} is not allowed. Please read the function documentation for details!"
                     )
 
         return filters_dict
 
-    def make_filter_for_api(self, filters: dict) -> "str":
+    def make_filter_for_api(self, filters: dict) -> str:
         """
         Function changing type of operators to match MS API style as 'str' passing to URL call.
 
@@ -382,7 +504,7 @@ def make_filter_for_api(self, filters: dict) -> "str":
             filters (dict): A dictionar which contains operators.
 
         Returns:
-            Output as string to pass as filter parameter to API.
+            str: Output as filtering string to pass as filter parameter to API.
         """
 
         filter_text = ""
@@ -422,16 +544,16 @@ def make_filter_for_api(self, filters: dict) -> "str":
 
     def make_filter_for_df(
         self,
-        filters: dict = None,
-    ) -> "str":
+        filters: dict,
+    ) -> str:
         """
-        Function changing dict operators into pandas DataFrame filters.
+        Function changing filters into pandas DataFrame filtering string used later for filtering the DF.
 
         Args:
-            filters (dict): A dictionar which contains operators.
+            filters (dict): A dictionary which contains operators.
 
         Returns:
-            Output as string to pass as filter to DataFrame.
+            str: Output as string to pass as filter to DataFrame.
         """
 
         filter_in_df = "df.loc["
@@ -469,6 +591,9 @@ def list_item_to_df(
     ):
         """
         Method to extract data from Sharepoint List into DataFrame.
+        If filters are passed, function will try to extract only filtered data to reduce the amount of data to transfer.
+        If there is no filter or there is an throttling (max rows returned limit reached)
+            exception ,then 2nd workflow will start and download all data which will be filtered later in the data frame.
 
         Args:
             list_title (str): Title of Sharepoint List. Default to None.
@@ -504,6 +629,10 @@ def list_item_to_df(
                             }
             row_count (int): Number of downloaded rows in single request. Default to 5000.
 
+        Raises:
+            AttributeError: If filter column not included inside required fields list.
+            ValueError: If there is no filter passed - > will extract all fields and filter later.
+
         Returns:
             pd.DataFrame
         """
@@ -515,7 +644,7 @@ def list_item_to_df(
             for key in filters:
                 if key not in required_fields:
                     raise AttributeError(
-                        f"Filter '{key}' not included inside required fields. It is obligatory to extract data which is filtered!"
+                        f"Filter '{key}' column not included inside required fields. It is obligatory to extract data which is filtered!"
                     )
 
             # changing the body of the filter for MS API call
@@ -523,7 +652,7 @@ def list_item_to_df(
 
         download_all = False
 
-        # extracting requeird_fields SP_List objects
+        # extracting required_fields SP_List objects
         selected_fields = self.select_fields(
             list_title=list_title,
             site_url=site_url,
@@ -534,7 +663,7 @@ def list_item_to_df(
         try:
             # Extract data below 5k rows or max limitation of the specific SP List with basic filtering.
             if filters is None:
-                raise ValueError("There is no filter. Starting extraxction all data")
+                raise ValueError("There is no filter. Switching to extract all fields.")
             else:
                 list_items = (
                     self.list_object.items.filter(filter_text)
diff --git a/viadot/tasks/sharepoint.py b/viadot/tasks/sharepoint.py
index 2a1cb0bc4..635f9a5ae 100644
--- a/viadot/tasks/sharepoint.py
+++ b/viadot/tasks/sharepoint.py
@@ -245,7 +245,7 @@ class SharepointListToDF(Task):
         field_property (List[str]): Property to expand with expand query method.
                                     All propertys can be found under list.item.properties.
                                     Default to ["Title"]
-        filters (dict): Dictionary with operators which filters the SharepointList output.
+        filters (dict, optional): Dictionary with operators which filters the SharepointList output.
                         allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')
                         allowed conjuction: ('&','|')
                         allowed operators: ('<','>','<=','>=','==','!=')
@@ -277,9 +277,9 @@ class SharepointListToDF(Task):
 
     def __init__(
         self,
-        path: str = None,
-        list_title: str = None,
-        site_url: str = None,
+        path: str,
+        list_title: str,
+        site_url: str,
         required_fields: List[str] = None,
         field_property: str = "Title",
         filters: dict = None,
@@ -289,7 +289,6 @@ def __init__(
         *args,
         **kwargs,
     ):
-
         self.path = path
         self.list_title = list_title
         self.site_url = site_url
@@ -300,6 +299,11 @@ def __init__(
         self.vault_name = vault_name
         self.credentials_secret = credentials_secret
 
+        super().__init__(
+            *args,
+            **kwargs,
+        )
+
         if not credentials_secret:
             # Attempt to read a default for the service principal secret name
             try:
@@ -313,16 +317,65 @@ def __init__(
             ).run()
             self.credentials = json.loads(credentials_str)
 
-        super().__init__(
-            *args,
-            **kwargs,
-        )
-
     def __call__(self):
         """Download Sharepoint_List data to a .parquet file"""
         super().__call__(self)
 
+    def _rename_duplicated_fields(self, df):
+        """
+        Renames duplicated columns in a DataFrame by appending a numerical suffix.
+        Function to check if there are fields with
+        the same name but in different style (lower, upper)
+        It might happen that fields returned by get_fields() will be different
+        than actual list items fields ( from it's properties)
+        It is specific to sharepoint lists.
+        MS allowed users to create fields with simillar names (but with different letters style)
+        fields with same values. For example Id and ID - > office select function doesn't
+        recognize upper/lower cases.
+
+        Args:
+            df (pd.DataFrame): The input DataFrame with potentially duplicated columns.
+            required_fields (list): List of fields that should not be considered for renaming.
+
+        Returns:
+            pd.DataFrame: DataFrame with duplicated columns renamed to ensure uniqueness.
+
+        Example:
+            Given DataFrame df:
+            ```
+            A  B  C  B  D
+            0  1  2  3  4  5
+            ```
+
+            Required fields = ['A', 'B']
+            After calling _rename_duplicated_fields(df, required_fields):
+            ```
+            A  B  C  B2  D
+            0  1  2  3   4  5
+            ```
+        """
+        col_to_compare = df.columns.tolist()
+        i = 1
+        for column in df.columns.tolist():
+            if not column in self.required_fields:
+                col_to_compare.remove(column)
+                if column.lower() in [to_cmp.lower() for to_cmp in col_to_compare]:
+                    i += 1
+                    logger.info(f"Found duplicated column: {column} !")
+                    logger.info(f"Renaming from {column} to {column}{i}")
+                    df = df.rename(columns={f"{column}": f"{column}{i}"})
+        return df
+
     def _convert_camel_case_to_words(self, input_str: str) -> str:
+        """
+        Function for converting internal names joined as camelCase column names  to regular words
+
+        Args:
+            input_str (str): Column name
+
+        Returns:
+            str: Converted column name
+        """
 
         self.input_str = input_str
 
@@ -331,11 +384,23 @@ def _convert_camel_case_to_words(self, input_str: str) -> str:
 
         return converted
 
-    def change_column_name(
-        self,
-        df: pd.DataFrame = None,
-    ):
-        s = SharepointList()
+    def change_column_name(self, df: pd.DataFrame, credentials: str = None):
+        """
+        Function for changing coded internal column names (Unicode style) to human readable names.
+        !Warning!
+            Names are taken from field properties Title!
+            Because of that the resulting column name might have different then initial name.
+
+        Args:
+            df (pd.DataFrame): A data frame with loaded column names from sharepoint list.
+            credentials (str): Credentials str for sharepoint connection establishing. Defaults to None.
+
+        Returns:
+            pd.DataFrame: Data frame with changed column names
+        """
+        s = SharepointList(
+            credentials=self.credentials,
+        )
         list_fields = s.get_fields(
             list_title=self.list_title,
             site_url=self.site_url,
@@ -364,7 +429,7 @@ def change_column_name(
 
         # Rename columns names inside DataFrame
         df = df.rename(columns=dictionary)
-
+        # Check again for duplicates
         return df
 
     def run(
@@ -389,7 +454,8 @@ def run(
             row_count=self.row_count,
         )
 
-        df = self.change_column_name(df=df_raw)
+        df_col_changed = self.change_column_name(df=df_raw)
+        df = self._rename_duplicated_fields(df=df_col_changed)
         self.logger.info("Successfully changed structure of the DataFrame")
 
         return df

From ffe6078ad94d00104504cc539fecfd22b28a8ab4 Mon Sep 17 00:00:00 2001
From: Marcin Purtak <44641138+marcinpurtak@users.noreply.github.com>
Date: Tue, 14 Nov 2023 15:38:23 +0100
Subject: [PATCH 71/86] removed obsolete comment sharepoint.py

---
 viadot/tasks/sharepoint.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/viadot/tasks/sharepoint.py b/viadot/tasks/sharepoint.py
index 635f9a5ae..d6535356b 100644
--- a/viadot/tasks/sharepoint.py
+++ b/viadot/tasks/sharepoint.py
@@ -429,7 +429,6 @@ def change_column_name(self, df: pd.DataFrame, credentials: str = None):
 
         # Rename columns names inside DataFrame
         df = df.rename(columns=dictionary)
-        # Check again for duplicates
         return df
 
     def run(

From c3fb0a7038ffec2323bbda15ea2d199f30b3871f Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Tue, 14 Nov 2023 15:56:45 +0100
Subject: [PATCH 72/86] =?UTF-8?q?=E2=9C=A8=20new=20logic=20to=20extracting?=
 =?UTF-8?q?=20users=20from=20genesys?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/tasks/genesys.py | 83 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 428e699a0..96d5bdd03 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -10,8 +10,8 @@
 from prefect.engine import signals
 from prefect.utilities import logging
 from prefect.utilities.tasks import defaults_from_attrs
-
 from viadot.task_utils import *
+
 from viadot.exceptions import APIError
 from viadot.sources import Genesys
 
@@ -385,7 +385,6 @@ def run(
             "agent_performance_summary_view",
             "agent_status_summary_view",
             "agent_status_detail_view",
-            "agent_interaction_detail_view",
         ]:
             genesys.genesys_api_connection(
                 post_data_list=post_data_list, end_point=end_point
@@ -510,3 +509,83 @@ def run(
             logger.info("Downloaded the data from the Genesys into the CSV.")
 
             return [file_name]
+
+        elif view_type is None and end_point == "users":
+            # First call to API to get information about amount of pages to extract
+            temp_json = genesys.genesys_api_connection(
+                post_data_list=post_data_list,
+                end_point=f"{end_point}/?pageSize=500&pageNumber=1&expand=presence,dateLastLogin,groups,employerInfo,lasttokenissued&state=any",
+                method="GET",
+            )
+            last_page = temp_json["pageCount"] + 1
+
+            # Function to extract nested data from json file
+            def check_value(base, lvls):
+                for lvl in lvls:
+                    if isinstance(base, dict):
+                        base = base.get(lvl)
+                        if base is None:
+                            return None
+                    else:
+                        return base
+                return base
+
+            data_list = []
+
+            # For loop to donwload all pages from Genesys GET API
+            for n in range(1, last_page):
+                json_file = genesys.genesys_api_connection(
+                    post_data_list=post_data_list,
+                    end_point=f"{end_point}/?pageSize=500&pageNumber={n}&expand=presence,dateLastLogin,groups,employerInfo,lasttokenissued&state=any",
+                    method="GET",
+                )
+                logger.info(f"Downloaded: {n} page")
+
+                num_ids = len(json_file["entities"])
+
+                # For loop to extract data from specific page
+                for id in range(0, num_ids):
+                    record_dict = {}
+                    record_dict["Id"] = check_value(json_file["entities"][id], ["id"])
+                    record_dict["Name"] = check_value(
+                        json_file["entities"][id], ["name"]
+                    )
+                    record_dict["DivisionName"] = check_value(
+                        json_file["entities"][id], ["division", "name"]
+                    )
+                    record_dict["Email"] = check_value(
+                        json_file["entities"][id], ["email"]
+                    )
+                    record_dict["State"] = check_value(
+                        json_file["entities"][id], ["state"]
+                    )
+                    record_dict["Title"] = check_value(
+                        json_file["entities"][id], ["title"]
+                    )
+                    record_dict["Username"] = check_value(
+                        json_file["entities"][id], ["username"]
+                    )
+                    record_dict["SystemPresence"] = check_value(
+                        json_file["entities"][id],
+                        ["presence", "presenceDefinition", "systemPresence"],
+                    )
+                    record_dict["DateLastLogin"] = check_value(
+                        json_file["entities"][id], ["dateLastLogin"]
+                    )
+
+                    data_list.append(record_dict)
+
+            df = pd.DataFrame(data_list)
+
+            # data validation function (optional)
+            if validate_df_dict:
+                validate_df.run(df=df, tests=validate_df_dict)
+
+            file_name = "All_Genesys_Users.csv"
+            df.to_csv(
+                os.path.join(file_name),
+                index=False,
+                sep="\t",
+            )
+
+            return [file_name]

From 826f729ad82f83e3ac2be6d0887e3cbf9fcd3959 Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Tue, 14 Nov 2023 16:04:05 +0100
Subject: [PATCH 73/86] =?UTF-8?q?=F0=9F=94=8A=20Added=20CHANGELOG.md?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ebe1047d..f128c8c00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 - Added new view type `agent_interaction_view_type` in `Genesys`source.
+- Added new logic for endpoint `users` in `Genesys`task.
 
 
 ## [0.4.21] - 2023-10-26

From 53eb33eb1fb0a518067a24594034c3de4a90ea20 Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Tue, 14 Nov 2023 16:34:10 +0100
Subject: [PATCH 74/86] =?UTF-8?q?=F0=9F=90=9B=20fixed=20bug=20in=20extract?=
 =?UTF-8?q?ing=20data=20from=20json?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md            | 1 +
 viadot/tasks/genesys.py | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1ebe1047d..96982445e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 - Splitted test for Eurostat on source tests and task tests
+- Fixed bug for endpoint `conversations` in GET method in `Genesys` Task.
 
 ### Added
 - Added new view type `agent_interaction_view_type` in `Genesys`source.
diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 428e699a0..e1819f1d6 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -487,9 +487,10 @@ def run(
                 temp_dict = {
                     key: value for (key, value) in attributes.items() if key in key_list
                 }
-                temp_dict["conversationId"] = json_file["id"]
-                temp_dict["startTime"] = json_file["startTime"]
-                temp_dict["endTime"] = json_file["endTime"]
+                temp_dict["conversationId"] = json_file.get("id")
+                temp_dict["startTime"] = json_file.get("startTime")
+                temp_dict["endTime"] = json_file.get("endTime")
+                
                 data_list.append(temp_dict)
 
             df = pd.DataFrame(data_list)

From 5235f32398d1c51eb711d39bad8b22cecd65826b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 14 Nov 2023 15:48:48 +0000
Subject: [PATCH 75/86] =?UTF-8?q?=F0=9F=8E=A8=20Format=20Python=20code=20w?=
 =?UTF-8?q?ith=20Black?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../flows/test_bigquery_to_adls.py            |   9 +-
 ...test_cloud_for_customers_report_to_adls.py |   2 +-
 .../flows/test_customer_gauge_to_adls.py      |   3 +-
 .../integration/flows/test_hubspot_to_adls.py |   2 +-
 .../flows/test_mediatool_to_adls.py           |   2 +-
 tests/integration/flows/test_mysql_to_adls.py |   1 +
 .../flows/test_salesforce_to_adls.py          |   2 +-
 .../integration/flows/test_sap_bw_to_adls.py  |   2 +-
 .../integration/flows/test_sap_rfc_to_adls.py |   2 +-
 .../flows/test_supermetrics_to_adls.py        |   2 +-
 .../integration/flows/test_vidclub_to_adls.py |   2 +-
 .../integration/tasks/test_customer_gauge.py  | 297 +++++++++++-------
 tests/integration/tasks/test_tm1.py           |   2 +-
 tests/integration/test_customer_gauge.py      |  27 +-
 tests/integration/test_epicor.py              |   2 +-
 tests/integration/test_genesys.py             |   2 +-
 tests/integration/test_hubspot.py             |   2 +-
 tests/integration/test_sharepoint.py          |   5 +-
 tests/integration/test_tm1.py                 |   3 +-
 tests/unit/test_task_utils.py                 |   2 +-
 tests/unit/test_utils.py                      |   2 +-
 viadot/flows/__init__.py                      |   2 +-
 viadot/flows/customer_gauge_to_adls.py        |  24 +-
 viadot/flows/sharepoint_to_adls.py            |   3 +-
 viadot/flows/supermetrics_to_adls.py          |   2 +-
 viadot/flows/transform_and_catalog.py         |   4 +-
 viadot/sources/bigquery.py                    |   2 +-
 viadot/sources/customer_gauge.py              |   2 +-
 viadot/sources/mindful.py                     |   4 +-
 viadot/sources/sharepoint.py                  |  20 +-
 viadot/sources/tm1.py                         |   7 +-
 viadot/task_utils.py                          |   2 +-
 viadot/tasks/__init__.py                      |   8 +-
 viadot/tasks/customer_gauge.py                | 176 ++++++-----
 viadot/tasks/genesys.py                       |   2 +-
 viadot/tasks/luma.py                          |   2 +
 viadot/tasks/sap_bw.py                        |   2 +-
 viadot/tasks/sharepoint.py                    |   6 +-
 viadot/tasks/tm1.py                           |   4 +-
 39 files changed, 367 insertions(+), 278 deletions(-)

diff --git a/tests/integration/flows/test_bigquery_to_adls.py b/tests/integration/flows/test_bigquery_to_adls.py
index de793344a..b4503c6e9 100644
--- a/tests/integration/flows/test_bigquery_to_adls.py
+++ b/tests/integration/flows/test_bigquery_to_adls.py
@@ -1,15 +1,14 @@
 import os
+from unittest import mock
 
+import pandas as pd
 import pendulum
 import pytest
-from unittest import mock
-import pandas as pd
-
 from prefect.tasks.secrets import PrefectSecret
-from viadot.flows import BigQueryToADLS
-from viadot.tasks import AzureDataLakeRemove
 
 from viadot.exceptions import ValidationError
+from viadot.flows import BigQueryToADLS
+from viadot.tasks import AzureDataLakeRemove
 
 ADLS_DIR_PATH = "raw/tests/"
 ADLS_FILE_NAME = str(pendulum.now("utc")) + ".parquet"
diff --git a/tests/integration/flows/test_cloud_for_customers_report_to_adls.py b/tests/integration/flows/test_cloud_for_customers_report_to_adls.py
index f0661e314..b0c3128c5 100644
--- a/tests/integration/flows/test_cloud_for_customers_report_to_adls.py
+++ b/tests/integration/flows/test_cloud_for_customers_report_to_adls.py
@@ -1,6 +1,6 @@
 from viadot.config import local_config
-from viadot.flows import CloudForCustomersReportToADLS
 from viadot.exceptions import ValidationError
+from viadot.flows import CloudForCustomersReportToADLS
 
 
 def test_cloud_for_customers_report_to_adls():
diff --git a/tests/integration/flows/test_customer_gauge_to_adls.py b/tests/integration/flows/test_customer_gauge_to_adls.py
index 34c7336bc..6da0bf8b7 100644
--- a/tests/integration/flows/test_customer_gauge_to_adls.py
+++ b/tests/integration/flows/test_customer_gauge_to_adls.py
@@ -4,8 +4,8 @@
 import pandas as pd
 import pytest
 
-from viadot.flows import CustomerGaugeToADLS
 from viadot.exceptions import ValidationError
+from viadot.flows import CustomerGaugeToADLS
 
 DATA = {
     "user_name": ["Jane", "Bob"],
@@ -90,4 +90,3 @@ def test_customer_gauge_to_adls_run_flow_validation_failure(mocked_class):
         flow.run()
     except ValidationError:
         pass
-
diff --git a/tests/integration/flows/test_hubspot_to_adls.py b/tests/integration/flows/test_hubspot_to_adls.py
index d960fc079..e0c06c20f 100644
--- a/tests/integration/flows/test_hubspot_to_adls.py
+++ b/tests/integration/flows/test_hubspot_to_adls.py
@@ -5,8 +5,8 @@
 import pandas as pd
 import pytest
 
-from viadot.flows import HubspotToADLS
 from viadot.exceptions import ValidationError
+from viadot.flows import HubspotToADLS
 
 DATA = {
     "id": {"0": "820306930"},
diff --git a/tests/integration/flows/test_mediatool_to_adls.py b/tests/integration/flows/test_mediatool_to_adls.py
index d7b5b2658..65cfadf8f 100644
--- a/tests/integration/flows/test_mediatool_to_adls.py
+++ b/tests/integration/flows/test_mediatool_to_adls.py
@@ -4,8 +4,8 @@
 import pandas as pd
 import pytest
 
-from viadot.flows import MediatoolToADLS
 from viadot.exceptions import ValidationError
+from viadot.flows import MediatoolToADLS
 
 DATA = {"country": ["DK", "DE"], "sales": [3, 4]}
 ADLS_FILE_NAME = "test_mediatool.parquet"
diff --git a/tests/integration/flows/test_mysql_to_adls.py b/tests/integration/flows/test_mysql_to_adls.py
index 942bab99d..c968d48a3 100644
--- a/tests/integration/flows/test_mysql_to_adls.py
+++ b/tests/integration/flows/test_mysql_to_adls.py
@@ -1,4 +1,5 @@
 from unittest import mock
+
 from viadot.flows.mysql_to_adls import MySqlToADLS
 
 query = """SELECT * FROM `example-views`.`sales`"""
diff --git a/tests/integration/flows/test_salesforce_to_adls.py b/tests/integration/flows/test_salesforce_to_adls.py
index ec68a1227..8c032f308 100644
--- a/tests/integration/flows/test_salesforce_to_adls.py
+++ b/tests/integration/flows/test_salesforce_to_adls.py
@@ -2,9 +2,9 @@
 
 from prefect.tasks.secrets import PrefectSecret
 
+from viadot.exceptions import ValidationError
 from viadot.flows import SalesforceToADLS
 from viadot.tasks import AzureDataLakeRemove
-from viadot.exceptions import ValidationError
 
 ADLS_FILE_NAME = "test_salesforce.parquet"
 ADLS_DIR_PATH = "raw/tests/"
diff --git a/tests/integration/flows/test_sap_bw_to_adls.py b/tests/integration/flows/test_sap_bw_to_adls.py
index 2c01049e8..4259e5c16 100644
--- a/tests/integration/flows/test_sap_bw_to_adls.py
+++ b/tests/integration/flows/test_sap_bw_to_adls.py
@@ -4,8 +4,8 @@
 import pandas as pd
 import pytest
 
-from viadot.flows import SAPBWToADLS
 from viadot.exceptions import ValidationError
+from viadot.flows import SAPBWToADLS
 
 DATA = {
     "[0CALMONTH].[LEVEL01].[DESCRIPTION]": ["January 2023"],
diff --git a/tests/integration/flows/test_sap_rfc_to_adls.py b/tests/integration/flows/test_sap_rfc_to_adls.py
index ed33fa320..5503b4684 100644
--- a/tests/integration/flows/test_sap_rfc_to_adls.py
+++ b/tests/integration/flows/test_sap_rfc_to_adls.py
@@ -1,8 +1,8 @@
 from viadot.config import local_config
+from viadot.exceptions import ValidationError
 from viadot.flows import SAPRFCToADLS
 from viadot.sources import AzureDataLake
 from viadot.tasks import AzureDataLakeRemove
-from viadot.exceptions import ValidationError
 
 try:
     import pyrfc
diff --git a/tests/integration/flows/test_supermetrics_to_adls.py b/tests/integration/flows/test_supermetrics_to_adls.py
index 9738ddeb1..15deaa01a 100644
--- a/tests/integration/flows/test_supermetrics_to_adls.py
+++ b/tests/integration/flows/test_supermetrics_to_adls.py
@@ -4,8 +4,8 @@
 import pytest
 from prefect.storage import Local
 
-from viadot.flows import SupermetricsToADLS
 from viadot.exceptions import ValidationError
+from viadot.flows import SupermetricsToADLS
 
 CWD = os.getcwd()
 adls_dir_path = "raw/tests/supermetrics"
diff --git a/tests/integration/flows/test_vidclub_to_adls.py b/tests/integration/flows/test_vidclub_to_adls.py
index c18eaad10..0f6705579 100644
--- a/tests/integration/flows/test_vidclub_to_adls.py
+++ b/tests/integration/flows/test_vidclub_to_adls.py
@@ -4,8 +4,8 @@
 import pandas as pd
 import pytest
 
-from viadot.flows import VidClubToADLS
 from viadot.exceptions import ValidationError
+from viadot.flows import VidClubToADLS
 
 DATA = {"col1": ["aaa", "bbb", "ccc"], "col2": [11, 22, 33]}
 ADLS_FILE_NAME = "test_vid_club.parquet"
diff --git a/tests/integration/tasks/test_customer_gauge.py b/tests/integration/tasks/test_customer_gauge.py
index d95ea14cd..0c524fd0a 100644
--- a/tests/integration/tasks/test_customer_gauge.py
+++ b/tests/integration/tasks/test_customer_gauge.py
@@ -8,51 +8,77 @@
 CUR = 185000
 PAGESIZE = 1000
 
-DATA_JSON = {'contact': {'first_name': '***', 'last_name': '***'},
-   'number_customer': 266,
-   'date_email_sent': '2018-02-05 10:42:28',
-   'properties': [{'field': 'Postal Code', 'reference': '999'},
-    {'field': 'City', 'reference': 'Eldorado'},
-    {'field': 'Currency', 'reference': None},
-    {'field': 'Item Quantity', 'reference': '7'},
-    {'field': 'PostingDate', 'reference': '2018-01-10 00:00:00'}],
-   'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
-   'drivers': [{'label': 'Product Quality and Product Performance'},
-    {'label': 'Function and Design'},
-    {'label': 'Value for Money'},
-    {'label': 'Packaging'}]}
-
-RAW_JSON  = {'data': [{'contact': {'first_name': '***', 'last_name': '***'},
-   'number_customer': 266,
-   'date_email_sent': '2018-02-05 10:42:28',
-   'properties': [{'field': 'Postal Code', 'reference': '999'},
-    {'field': 'City', 'reference': 'Eldorado'},
-    {'field': 'Currency', 'reference': None},
-    {'field': 'Item Quantity', 'reference': '7'},
-    {'field': 'PostingDate', 'reference': '2018-01-10 00:00:00'}],
-   'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
-   'drivers': [{'label': 'Product Quality and Product Performance'},
-    {'label': 'Function and Design'},
-    {'label': 'Value for Money'},
-    {'label': 'Packaging'}]},
-  {'contact': {'first_name': '***', 'last_name': '***'},
-   'number_customer': 206,
-   'date_email_sent': '2018-02-05 10:41:01',
-   'properties': [{'field': 'Postal Code', 'reference': '0000'},
-    {'field': 'City', 'reference': 'Neverland'},
-    {'field': 'Currency', 'reference': None},
-    {'field': 'Item Quantity', 'reference': '1'},
-    {'field': 'PostingDate', 'reference': '2018-01-26 00:00:00'}],
-   'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
-   'drivers': [{'label': 'The website of the online shop (overall impression)'},
-    {'label': 'Waiting period'}]}],
- 'cursor': {'next': 37}}
-
-WRONG_DATA  = {'cols':[
-    {'field': 'City', 'reference': 'Eldorado'},
-    {'field': 'Currency', 'reference': None},
-    {'field': 'Item Quantity', 'reference': '7'},
-    {'field': 'PostingDate', 'reference': '2018-01-10 00:00:00'}]}
+DATA_JSON = {
+    "contact": {"first_name": "***", "last_name": "***"},
+    "number_customer": 266,
+    "date_email_sent": "2018-02-05 10:42:28",
+    "properties": [
+        {"field": "Postal Code", "reference": "999"},
+        {"field": "City", "reference": "Eldorado"},
+        {"field": "Currency", "reference": None},
+        {"field": "Item Quantity", "reference": "7"},
+        {"field": "PostingDate", "reference": "2018-01-10 00:00:00"},
+    ],
+    "custom_fields": [{"field": "Assignment_ID", "reference": None}],
+    "drivers": [
+        {"label": "Product Quality and Product Performance"},
+        {"label": "Function and Design"},
+        {"label": "Value for Money"},
+        {"label": "Packaging"},
+    ],
+}
+
+RAW_JSON = {
+    "data": [
+        {
+            "contact": {"first_name": "***", "last_name": "***"},
+            "number_customer": 266,
+            "date_email_sent": "2018-02-05 10:42:28",
+            "properties": [
+                {"field": "Postal Code", "reference": "999"},
+                {"field": "City", "reference": "Eldorado"},
+                {"field": "Currency", "reference": None},
+                {"field": "Item Quantity", "reference": "7"},
+                {"field": "PostingDate", "reference": "2018-01-10 00:00:00"},
+            ],
+            "custom_fields": [{"field": "Assignment_ID", "reference": None}],
+            "drivers": [
+                {"label": "Product Quality and Product Performance"},
+                {"label": "Function and Design"},
+                {"label": "Value for Money"},
+                {"label": "Packaging"},
+            ],
+        },
+        {
+            "contact": {"first_name": "***", "last_name": "***"},
+            "number_customer": 206,
+            "date_email_sent": "2018-02-05 10:41:01",
+            "properties": [
+                {"field": "Postal Code", "reference": "0000"},
+                {"field": "City", "reference": "Neverland"},
+                {"field": "Currency", "reference": None},
+                {"field": "Item Quantity", "reference": "1"},
+                {"field": "PostingDate", "reference": "2018-01-26 00:00:00"},
+            ],
+            "custom_fields": [{"field": "Assignment_ID", "reference": None}],
+            "drivers": [
+                {"label": "The website of the online shop (overall impression)"},
+                {"label": "Waiting period"},
+            ],
+        },
+    ],
+    "cursor": {"next": 37},
+}
+
+WRONG_DATA = {
+    "cols": [
+        {"field": "City", "reference": "Eldorado"},
+        {"field": "Currency", "reference": None},
+        {"field": "Item Quantity", "reference": "7"},
+        {"field": "PostingDate", "reference": "2018-01-10 00:00:00"},
+    ]
+}
+
 
 @pytest.mark.looping_api_calls
 def test_customer_gauge_to_df_loop():
@@ -87,39 +113,47 @@ def test_get_data_error_raising():
 def test_field_reference_unpacker():
     """
     Test the '_field_reference_unpacker' method with valid data. It should unpack and modify dictionaries within the specified field and return the expected result.
-    """ 
+    """
     data = DATA_JSON.copy()
-    field = 'properties'
+    field = "properties"
     expected_result = {
-        'contact': {'first_name': '***', 'last_name': '***'},
-        'number_customer': 266,
-        'date_email_sent': '2018-02-05 10:42:28',
-        'properties': {'Postal Code': '999',
-        'City': 'Eldorado',
-        'Currency': None,
-        'Item Quantity': '7',
-        'PostingDate': '2018-01-10 00:00:00'},
-        'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
-        'drivers': [{'label': 'Product Quality and Product Performance'},
-        {'label': 'Function and Design'},
-        {'label': 'Value for Money'},
-        {'label': 'Packaging'}]
+        "contact": {"first_name": "***", "last_name": "***"},
+        "number_customer": 266,
+        "date_email_sent": "2018-02-05 10:42:28",
+        "properties": {
+            "Postal Code": "999",
+            "City": "Eldorado",
+            "Currency": None,
+            "Item Quantity": "7",
+            "PostingDate": "2018-01-10 00:00:00",
+        },
+        "custom_fields": [{"field": "Assignment_ID", "reference": None}],
+        "drivers": [
+            {"label": "Product Quality and Product Performance"},
+            {"label": "Function and Design"},
+            {"label": "Value for Money"},
+            {"label": "Packaging"},
+        ],
     }
 
     result = CG._field_reference_unpacker(json_response=data, field=field)
 
     assert result == expected_result
 
+
 @pytest.mark.field_reference_unpacker_value_error
 def test_field_reference_unpacker_invalid_data_format():
     """
     Test the '_field_reference_unpacker' method with invalid data format that should raise a ValueError. It should raise a ValueError exception.
     """
     data = DATA_JSON.copy()
-    field='contact'
-    with pytest.raises(ValueError, match=r"Dictionary within the specified field doesn't contain exactly two items."):
+    field = "contact"
+    with pytest.raises(
+        ValueError,
+        match=r"Dictionary within the specified field doesn't contain exactly two items.",
+    ):
         CG._field_reference_unpacker(json_response=data, field=field)
- 
+
 
 @pytest.mark.field_reference_unpacker_key_error
 def test_field_reference_unpacker_missing_field():
@@ -138,20 +172,26 @@ def test_nested_dict_transformer():
     Test the '_nested_dict_transformer' method with valid data. It should modify nested dictionaries within the specified field and return the expected result.
     """
     data = DATA_JSON.copy()
-    field = 'drivers'
-    expected_result = {'contact': {'first_name': '***', 'last_name': '***'},
-        'number_customer': 266,
-        'date_email_sent': '2018-02-05 10:42:28',
-        'properties': [{'field': 'Postal Code', 'reference': '999'},
-        {'field': 'City', 'reference': 'Eldorado'},
-        {'field': 'Currency', 'reference': None},
-        {'field': 'Item Quantity', 'reference': '7'},
-        {'field': 'PostingDate', 'reference': '2018-01-10 00:00:00'}],
-        'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
-        'drivers': {'1_label': 'Product Quality and Product Performance',
-        '2_label': 'Function and Design',
-        '3_label': 'Value for Money',
-        '4_label': 'Packaging'}}
+    field = "drivers"
+    expected_result = {
+        "contact": {"first_name": "***", "last_name": "***"},
+        "number_customer": 266,
+        "date_email_sent": "2018-02-05 10:42:28",
+        "properties": [
+            {"field": "Postal Code", "reference": "999"},
+            {"field": "City", "reference": "Eldorado"},
+            {"field": "Currency", "reference": None},
+            {"field": "Item Quantity", "reference": "7"},
+            {"field": "PostingDate", "reference": "2018-01-10 00:00:00"},
+        ],
+        "custom_fields": [{"field": "Assignment_ID", "reference": None}],
+        "drivers": {
+            "1_label": "Product Quality and Product Performance",
+            "2_label": "Function and Design",
+            "3_label": "Value for Money",
+            "4_label": "Packaging",
+        },
+    }
 
     result = CG._nested_dict_transformer(json_response=data, field=field)
 
@@ -164,7 +204,7 @@ def test_nested_dict_transformer_invalid_data_format():
     Test the '_nested_dict_transformer' method with invalid data format. It should return the same data without modification.
     """
     data = DATA_JSON.copy()
-    field='number_customer'
+    field = "number_customer"
     result = CG._nested_dict_transformer(json_response=data, field=field)
 
     assert result == data
@@ -186,42 +226,54 @@ def test_column_unpacker_success_method1_and_method2():
     """
     Test the 'column_unpacker' method with valid data and both Method 1 and Method 2 columns specified. It should return the expected result.
     """
-    data = RAW_JSON['data'].copy()
-    unpack_by_field_reference_cols = ['properties']
-    unpack_by_nested_dict_transformer = ['drivers']
+    data = RAW_JSON["data"].copy()
+    unpack_by_field_reference_cols = ["properties"]
+    unpack_by_nested_dict_transformer = ["drivers"]
 
     expected_result = [
-        {'contact': {'first_name': '***', 'last_name': '***'},
-        'number_customer': 266,
-        'date_email_sent': '2018-02-05 10:42:28',
-        'properties': {
-            'Postal Code': '999',
-            'City': 'Eldorado',
-            'Currency': None,
-            'Item Quantity': '7',
-            'PostingDate': '2018-01-10 00:00:00'
+        {
+            "contact": {"first_name": "***", "last_name": "***"},
+            "number_customer": 266,
+            "date_email_sent": "2018-02-05 10:42:28",
+            "properties": {
+                "Postal Code": "999",
+                "City": "Eldorado",
+                "Currency": None,
+                "Item Quantity": "7",
+                "PostingDate": "2018-01-10 00:00:00",
+            },
+            "custom_fields": [{"field": "Assignment_ID", "reference": None}],
+            "drivers": {
+                "1_label": "Product Quality and Product Performance",
+                "2_label": "Function and Design",
+                "3_label": "Value for Money",
+                "4_label": "Packaging",
             },
-        'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
-        'drivers': {'1_label': 'Product Quality and Product Performance',
-        '2_label': 'Function and Design',
-        '3_label': 'Value for Money',
-        '4_label': 'Packaging'}},
-        {'contact': {'first_name': '***', 'last_name': '***'},
-        'number_customer': 206,
-        'date_email_sent': '2018-02-05 10:41:01',
-        'properties': {
-            'Postal Code': '0000',
-            'City': 'Neverland',
-            'Currency': None,
-            'Item Quantity': '1',
-            'PostingDate': '2018-01-26 00:00:00'
+        },
+        {
+            "contact": {"first_name": "***", "last_name": "***"},
+            "number_customer": 206,
+            "date_email_sent": "2018-02-05 10:41:01",
+            "properties": {
+                "Postal Code": "0000",
+                "City": "Neverland",
+                "Currency": None,
+                "Item Quantity": "1",
+                "PostingDate": "2018-01-26 00:00:00",
             },
-        'custom_fields': [{'field': 'Assignment_ID', 'reference': None}],
-        'drivers': {'1_label': 'The website of the online shop (overall impression)',
-        '2_label': 'Waiting period'}}
+            "custom_fields": [{"field": "Assignment_ID", "reference": None}],
+            "drivers": {
+                "1_label": "The website of the online shop (overall impression)",
+                "2_label": "Waiting period",
+            },
+        },
     ]
 
-    result = CG.column_unpacker(json_list=data, unpack_by_field_reference_cols=unpack_by_field_reference_cols, unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer)
+    result = CG.column_unpacker(
+        json_list=data,
+        unpack_by_field_reference_cols=unpack_by_field_reference_cols,
+        unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer,
+    )
 
     assert result == expected_result
 
@@ -231,10 +283,14 @@ def test_column_unpacker_missing_json_list():
     """
     Test the 'column_unpacker' method with missing 'json_list' argument. It should raise a ValueError.
     """
-    unpack_by_field_reference_cols = ['properties']
-    unpack_by_nested_dict_transformer = ['drivers']
+    unpack_by_field_reference_cols = ["properties"]
+    unpack_by_nested_dict_transformer = ["drivers"]
     with pytest.raises(ValueError, match="Input 'json_list' is required."):
-        CG.column_unpacker(json_list=None, unpack_by_field_reference_cols=unpack_by_field_reference_cols, unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer)
+        CG.column_unpacker(
+            json_list=None,
+            unpack_by_field_reference_cols=unpack_by_field_reference_cols,
+            unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer,
+        )
 
 
 @pytest.mark.test_column_unpacker_duplicate_columns
@@ -242,11 +298,18 @@ def test_column_unpacker_duplicate_columns():
     """
     Test the 'column_unpacker' method with duplicate columns specified in both Method 1 and Method 2. It should raise a ValueError.
     """
-    data = RAW_JSON['data'].copy()
-    unpack_by_field_reference_cols = ['properties']
-    unpack_by_nested_dict_transformer = ['properties']
-    with pytest.raises(ValueError, match="{'properties'} were mentioned in both unpack_by_field_reference_cols and unpack_by_nested_dict_transformer. It's not possible to apply two methods to the same field."):
-        CG.column_unpacker(json_list=data, unpack_by_field_reference_cols=unpack_by_field_reference_cols, unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer)
+    data = RAW_JSON["data"].copy()
+    unpack_by_field_reference_cols = ["properties"]
+    unpack_by_nested_dict_transformer = ["properties"]
+    with pytest.raises(
+        ValueError,
+        match="{'properties'} were mentioned in both unpack_by_field_reference_cols and unpack_by_nested_dict_transformer. It's not possible to apply two methods to the same field.",
+    ):
+        CG.column_unpacker(
+            json_list=data,
+            unpack_by_field_reference_cols=unpack_by_field_reference_cols,
+            unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer,
+        )
 
 
 @pytest.mark.test_flatten_json
@@ -323,4 +386,4 @@ def test_drivers_cleaner_success():
     data = "{'label': 'Driver1'}, {'label': 'Driver2'}, {'label': 'Driver3'}"
     expected_result = "Driver1, Driver2, Driver3"
     result = CG._drivers_cleaner(data)
-    assert result == expected_result
\ No newline at end of file
+    assert result == expected_result
diff --git a/tests/integration/tasks/test_tm1.py b/tests/integration/tasks/test_tm1.py
index 96dd58dfb..68527b5f7 100644
--- a/tests/integration/tasks/test_tm1.py
+++ b/tests/integration/tasks/test_tm1.py
@@ -1,7 +1,7 @@
 import pandas as pd
 
-from viadot.tasks import TM1ToDF
 from viadot.config import local_config
+from viadot.tasks import TM1ToDF
 
 CUBE = local_config.get("test_cube")
 VIEW = local_config.get("test_view")
diff --git a/tests/integration/test_customer_gauge.py b/tests/integration/test_customer_gauge.py
index ea22569c4..a29ff3585 100644
--- a/tests/integration/test_customer_gauge.py
+++ b/tests/integration/test_customer_gauge.py
@@ -3,8 +3,8 @@
 import pandas as pd
 import pytest
 
-from viadot.sources import CustomerGauge
 from viadot.exceptions import CredentialError
+from viadot.sources import CustomerGauge
 
 ENDPOINT = random.choice(["responses", "non-responses"])
 CG = CustomerGauge(endpoint=ENDPOINT)
@@ -55,26 +55,39 @@ def test_endpoint_url_argument():
     json_response = CG.get_json_response()
     assert isinstance(json_response, dict)
 
+
 @pytest.mark.endpoint_valueerror
 def test_wrong_endpoint_valueerror_raising():
-    with pytest.raises(ValueError, match=r"Incorrect endpoint name. Choose: 'responses' or 'non-responses'"):
+    with pytest.raises(
+        ValueError,
+        match=r"Incorrect endpoint name. Choose: 'responses' or 'non-responses'",
+    ):
         wrong_endpoint_name = "wrong-endpoint"
-        CG = CustomerGauge(endpoint = wrong_endpoint_name)
+        CG = CustomerGauge(endpoint=wrong_endpoint_name)
+
 
 @pytest.mark.endpoint_valueerror
 def test_no_endpoint_valueerror_raising():
-    with pytest.raises(ValueError, match=r"Provide endpoint name. Choose: 'responses' or 'non-responses'. Otherwise, provide URL"):
+    with pytest.raises(
+        ValueError,
+        match=r"Provide endpoint name. Choose: 'responses' or 'non-responses'. Otherwise, provide URL",
+    ):
         CG = CustomerGauge()
 
+
 @pytest.mark.endpoint_credentialserror
 def test_credentialserror_raising():
-    wrong_secret="wrong"
+    wrong_secret = "wrong"
     with pytest.raises(CredentialError, match=r"Credentials not provided."):
         CG = CustomerGauge(endpoint=ENDPOINT, credentials_secret=wrong_secret)
 
+
 @pytest.mark.get_cursor_valueerror
 def test_get_cursor_valueerror_raising():
     wrong_json = {}
-    with pytest.raises(ValueError, match=r"Provided argument doesn't contain 'cursor' value. Pass json returned from the endpoint."):
+    with pytest.raises(
+        ValueError,
+        match=r"Provided argument doesn't contain 'cursor' value. Pass json returned from the endpoint.",
+    ):
         CG = CustomerGauge(endpoint=ENDPOINT)
-        CG.get_cursor(json_response=wrong_json)
\ No newline at end of file
+        CG.get_cursor(json_response=wrong_json)
diff --git a/tests/integration/test_epicor.py b/tests/integration/test_epicor.py
index 77c338a88..60c1f3410 100644
--- a/tests/integration/test_epicor.py
+++ b/tests/integration/test_epicor.py
@@ -1,5 +1,5 @@
-import pytest
 import pandas as pd
+import pytest
 
 from viadot.config import local_config
 from viadot.exceptions import CredentialError, DataRangeError
diff --git a/tests/integration/test_genesys.py b/tests/integration/test_genesys.py
index 8508978f1..f91318b96 100644
--- a/tests/integration/test_genesys.py
+++ b/tests/integration/test_genesys.py
@@ -1,7 +1,7 @@
+import logging
 from unittest import mock
 
 import pytest
-import logging
 
 from viadot.sources import Genesys
 
diff --git a/tests/integration/test_hubspot.py b/tests/integration/test_hubspot.py
index 5963df3ee..c3f303b4c 100644
--- a/tests/integration/test_hubspot.py
+++ b/tests/integration/test_hubspot.py
@@ -2,8 +2,8 @@
 
 import pandas as pd
 import pytest
-from viadot.exceptions import CredentialError
 
+from viadot.exceptions import CredentialError
 from viadot.sources import Hubspot
 from viadot.task_utils import credentials_loader
 
diff --git a/tests/integration/test_sharepoint.py b/tests/integration/test_sharepoint.py
index 502ffded0..82090b6a5 100644
--- a/tests/integration/test_sharepoint.py
+++ b/tests/integration/test_sharepoint.py
@@ -1,17 +1,16 @@
 import os
 import re
+from copy import deepcopy
 
 import pandas as pd
-from copy import deepcopy
 import pytest
 from prefect.tasks.secrets import PrefectSecret
 
 from viadot.config import local_config
 from viadot.exceptions import CredentialError
-from viadot.sources import Sharepoint
+from viadot.sources import Sharepoint, SharepointList
 from viadot.task_utils import df_get_data_types_task
 from viadot.tasks.sharepoint import SharepointToDF
-from viadot.sources import SharepointList
 
 
 def get_url() -> str:
diff --git a/tests/integration/test_tm1.py b/tests/integration/test_tm1.py
index ae2b321b9..c0d887a61 100644
--- a/tests/integration/test_tm1.py
+++ b/tests/integration/test_tm1.py
@@ -1,8 +1,9 @@
 import pandas as pd
 import pytest
-from viadot.sources import TM1
+
 from viadot.config import local_config
 from viadot.exceptions import CredentialError, ValidationError
+from viadot.sources import TM1
 
 CUBE = local_config.get("TM1").get("test_cube")
 VIEW = local_config.get("TM1").get("test_view")
diff --git a/tests/unit/test_task_utils.py b/tests/unit/test_task_utils.py
index e77c24fdd..969b699a4 100644
--- a/tests/unit/test_task_utils.py
+++ b/tests/unit/test_task_utils.py
@@ -19,8 +19,8 @@
     df_to_parquet,
     dtypes_to_json_task,
     union_dfs_task,
-    write_to_json,
     validate_df,
+    write_to_json,
 )
 
 
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index a94eaff9f..777617244 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -6,9 +6,9 @@
 
 from viadot.signals import SKIP
 from viadot.utils import (
+    add_viadot_metadata_columns,
     check_if_empty_file,
     gen_bulk_insert_query_from_df,
-    add_viadot_metadata_columns,
 )
 
 EMPTY_CSV_PATH = "empty.csv"
diff --git a/viadot/flows/__init__.py b/viadot/flows/__init__.py
index e138735d6..2f30c04d8 100644
--- a/viadot/flows/__init__.py
+++ b/viadot/flows/__init__.py
@@ -11,7 +11,7 @@
 from .genesys_to_adls import GenesysToADLS
 from .outlook_to_adls import OutlookToADLS
 from .salesforce_to_adls import SalesforceToADLS
-from .sharepoint_to_adls import SharepointToADLS, SharepointListToADLS
+from .sharepoint_to_adls import SharepointListToADLS, SharepointToADLS
 from .supermetrics_to_adls import SupermetricsToADLS
 from .supermetrics_to_azure_sql import SupermetricsToAzureSQL
 
diff --git a/viadot/flows/customer_gauge_to_adls.py b/viadot/flows/customer_gauge_to_adls.py
index 6af62a340..82e14d5b4 100644
--- a/viadot/flows/customer_gauge_to_adls.py
+++ b/viadot/flows/customer_gauge_to_adls.py
@@ -66,40 +66,40 @@ def __init__(
 
         Args:
             name (str): The name of the flow.
-            endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint to connect. 
+            endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint to connect.
                 Defaults to None.
             endpoint_url (str, optional): Full URL for pointing to specific endpoint. Defaults to None.
-            total_load (bool, optional): Indicate whether to download the data to the latest. If 'False', 
+            total_load (bool, optional): Indicate whether to download the data to the latest. If 'False',
                 only one API call is executed (up to 1000 records). Defaults to True.
             cursor (int, optional): Cursor value to navigate to the page. Defaults to None.
-            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. 
+            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000.
                 Defaults to 1000.
-            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], optional): 
+            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], optional):
                 Specifies the date type which filter date range. Defaults to None.
             start_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. Defaults to None.
             end_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. Defaults to None.
             unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. Defaults to None.
-            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. Defaults to None.            
-            customer_gauge_credentials_secret (str, optional): The name of the Azure Key Vault secret containing 
+            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. Defaults to None.
+            customer_gauge_credentials_secret (str, optional): The name of the Azure Key Vault secret containing
                 a dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
             vault_name (str, optional): The name of the vault from which to obtain the secret. Defaults to None.
             anonymize (bool, optional): Indicates if anonymize selected columns. Defaults to False.
             columns_to_anonymize (List[str], optional): List of columns to anonymize. Defaults to None.
-            anonymize_method  (Literal["mask", "hash"], optional): Method of anonymizing data. "mask" -> replace the 
-                data with "value" arg. "hash" -> replace the data with the hash value of an object (using `hash()` 
+            anonymize_method  (Literal["mask", "hash"], optional): Method of anonymizing data. "mask" -> replace the
+                data with "value" arg. "hash" -> replace the data with the hash value of an object (using `hash()`
                 method). Defaults to "mask".
             anonymize_value (str, optional): Value to replace the data. Defaults to "***".
-            date_column (str, optional): Name of the date column used to identify rows that are older than a specified 
+            date_column (str, optional): Name of the date column used to identify rows that are older than a specified
                 number of days. Defaults to None.
-            days (int, optional): The number of days beyond which we want to anonymize the data, e.g. older than 
+            days (int, optional): The number of days beyond which we want to anonymize the data, e.g. older than
                 2 years can be: 2*365. Defaults to None.
             output_file_extension (str, optional): Output file extension - to allow selection of .csv for data
                 which is not easy to handle with parquet. Defaults to ".parquet".
             adls_dir_path (str, optional): Azure Data Lake destination folder/catalog path. Defaults to None.
             local_file_path (str, optional): Local destination path. Defaults to None.
             adls_file_name (str, optional): Name of file in ADLS. Defaults to None.
-            adls_sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary 
-                with ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure 
+            adls_sp_credentials_secret (str, optional): The name of the Azure Key Vault secret containing a dictionary
+                with ACCOUNT_NAME and Service Principal credentials (TENANT_ID, CLIENT_ID, CLIENT_SECRET) for the Azure
                 Data Lake. Defaults to None.
             overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to False.
             if_exists (str, optional): What to do if the file exists. Defaults to "replace".
diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index eaf747bab..c9e131361 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -17,8 +17,7 @@
     validate_df,
 )
 from viadot.tasks import AzureDataLakeUpload
-from viadot.tasks.sharepoint import SharepointToDF, SharepointListToDF
-
+from viadot.tasks.sharepoint import SharepointListToDF, SharepointToDF
 
 logger = logging.get_logger()
 
diff --git a/viadot/flows/supermetrics_to_adls.py b/viadot/flows/supermetrics_to_adls.py
index 80253eb88..cff39fc89 100644
--- a/viadot/flows/supermetrics_to_adls.py
+++ b/viadot/flows/supermetrics_to_adls.py
@@ -18,8 +18,8 @@
     dtypes_to_json_task,
     union_dfs_task,
     update_dtypes_dict,
-    write_to_json,
     validate_df,
+    write_to_json,
 )
 from viadot.tasks import (
     AzureDataLakeUpload,
diff --git a/viadot/flows/transform_and_catalog.py b/viadot/flows/transform_and_catalog.py
index 1de5c4430..08ac6b895 100644
--- a/viadot/flows/transform_and_catalog.py
+++ b/viadot/flows/transform_and_catalog.py
@@ -1,13 +1,13 @@
 import os
-from pathlib import Path
 import shutil
+from pathlib import Path
 from typing import Dict, List, Union
 
 from prefect import Flow, task
 from prefect.tasks.shell import ShellTask
 from prefect.triggers import any_successful
 
-from viadot.tasks import CloneRepo, AzureKeyVaultSecret, LumaIngest
+from viadot.tasks import AzureKeyVaultSecret, CloneRepo, LumaIngest
 
 
 @task(trigger=any_successful)
diff --git a/viadot/sources/bigquery.py b/viadot/sources/bigquery.py
index 1be69e866..32d1dac2c 100644
--- a/viadot/sources/bigquery.py
+++ b/viadot/sources/bigquery.py
@@ -6,8 +6,8 @@
 
 from ..config import local_config
 from ..exceptions import CredentialError, DBDataAccessError
-from .base import Source
 from ..utils import add_viadot_metadata_columns
+from .base import Source
 
 
 class BigQuery(Source):
diff --git a/viadot/sources/customer_gauge.py b/viadot/sources/customer_gauge.py
index 5fff4387e..819f92a90 100644
--- a/viadot/sources/customer_gauge.py
+++ b/viadot/sources/customer_gauge.py
@@ -32,7 +32,7 @@ def __init__(
             endpoint (Literal["responses", "non-responses"]): Indicate which endpoint to connect. Defaults to None.
             url (str, optional): Endpoint URL. Defaults to None.
             credentials (Dict[str, Any], optional): Credentials to connect with API containing client_id, client_secret. Defaults to None.
-            credentials_secret (str, optional): The name of the secret stored in local_config containing a 
+            credentials_secret (str, optional): The name of the secret stored in local_config containing a
                 dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
         Raises:
             ValueError: If endpoint is not provided or incorect.
diff --git a/viadot/sources/mindful.py b/viadot/sources/mindful.py
index 254eecb9d..2698adb15 100644
--- a/viadot/sources/mindful.py
+++ b/viadot/sources/mindful.py
@@ -1,12 +1,12 @@
 import os
-from io import StringIO
 from datetime import datetime, timedelta
+from io import StringIO
 from typing import Any, Dict, Literal, Tuple
 
 import pandas as pd
 import prefect
-from requests.models import Response
 from requests.auth import HTTPBasicAuth
+from requests.models import Response
 
 from viadot.exceptions import APIError
 from viadot.sources.base import Source
diff --git a/viadot/sources/sharepoint.py b/viadot/sources/sharepoint.py
index 096de825b..6e935eee2 100644
--- a/viadot/sources/sharepoint.py
+++ b/viadot/sources/sharepoint.py
@@ -1,20 +1,20 @@
-from ..config import local_config
-from ..exceptions import CredentialError
-from .base import Source
-from viadot.utils import get_nested_dict
-
-from typing import Any, Dict, List
-from fnmatch import fnmatch
-from datetime import datetime
 from copy import deepcopy
-import pandas as pd
+from datetime import datetime
+from fnmatch import fnmatch
+from typing import Any, Dict, List
 
+import pandas as pd
 import sharepy
 from office365.runtime.auth.authentication_context import AuthenticationContext
-from office365.sharepoint.client_context import ClientContext
 from office365.runtime.client_request_exception import ClientRequestException
+from office365.sharepoint.client_context import ClientContext
 from prefect.utilities import logging
 
+from viadot.utils import get_nested_dict
+
+from ..config import local_config
+from ..exceptions import CredentialError
+from .base import Source
 
 logger = logging.get_logger()
 
diff --git a/viadot/sources/tm1.py b/viadot/sources/tm1.py
index 9a182bb97..fcb1dae7a 100644
--- a/viadot/sources/tm1.py
+++ b/viadot/sources/tm1.py
@@ -1,9 +1,8 @@
-import pandas as pd
-
 from typing import Any, Dict, Literal
-from TM1py.Services import TM1Service
-from prefect.utilities import logging
 
+import pandas as pd
+from prefect.utilities import logging
+from TM1py.Services import TM1Service
 
 from ..config import local_config
 from ..exceptions import CredentialError, ValidationError
diff --git a/viadot/task_utils.py b/viadot/task_utils.py
index 6173e2994..6a532f932 100644
--- a/viadot/task_utils.py
+++ b/viadot/task_utils.py
@@ -1,8 +1,8 @@
 import copy
 import json
 import os
-import shutil
 import re
+import shutil
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Callable, List, Literal, Union, cast
diff --git a/viadot/tasks/__init__.py b/viadot/tasks/__init__.py
index 541be70ab..7dc3d61cd 100644
--- a/viadot/tasks/__init__.py
+++ b/viadot/tasks/__init__.py
@@ -31,7 +31,7 @@
 from .outlook import OutlookToDF
 from .prefect_date_range import GetFlowNewDateRange
 from .salesforce import SalesforceBulkUpsert, SalesforceToDF, SalesforceUpsert
-from .sharepoint import SharepointToDF, SharepointListToDF
+from .sharepoint import SharepointListToDF, SharepointToDF
 from .sqlite import SQLiteInsert, SQLiteQuery, SQLiteSQLtoDF
 from .supermetrics import SupermetricsToCSV, SupermetricsToDF
 
@@ -50,12 +50,12 @@
 from .duckdb import DuckDBCreateTableFromParquet, DuckDBQuery, DuckDBToDF
 from .epicor import EpicorOrdersToDF
 from .eurostat import EurostatToDF
+from .git import CloneRepo
 from .hubspot import HubspotToDF
+from .luma import LumaIngest
 from .mediatool import MediatoolToDF
 from .mindful import MindfulToCSV
 from .sftp import SftpList, SftpToDF
 from .sql_server import SQLServerCreateTable, SQLServerQuery, SQLServerToDF
-from .vid_club import VidClubToDF
-from .git import CloneRepo
-from .luma import LumaIngest
 from .tm1 import TM1ToDF
+from .vid_club import VidClubToDF
diff --git a/viadot/tasks/customer_gauge.py b/viadot/tasks/customer_gauge.py
index 72a1a013f..ecb5e0de5 100644
--- a/viadot/tasks/customer_gauge.py
+++ b/viadot/tasks/customer_gauge.py
@@ -1,6 +1,6 @@
 import json
 from datetime import datetime
-from typing import Any, Dict, Literal, List
+from typing import Any, Dict, List, Literal
 
 import pandas as pd
 from prefect import Task
@@ -33,29 +33,29 @@ def __init__(
         **kwargs,
     ):
         """
-        Task CustomerGaugeToDF for downloading the selected range of data from Customer Gauge 
+        Task CustomerGaugeToDF for downloading the selected range of data from Customer Gauge
         endpoint and return as one pandas DataFrame.
 
         Args:
-            endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint 
+            endpoint (Literal["responses", "non-responses"], optional): Indicate which endpoint
                 to connect. Defaults to None.
-            total_load (bool, optional): Indicate whether to download the data to the latest. 
+            total_load (bool, optional): Indicate whether to download the data to the latest.
                 If 'False', only one API call is executed (up to 1000 records). Defaults to True.
             endpoint_url (str, optional): Endpoint URL. Defaults to None.
             cursor (int, optional): Cursor value to navigate to the page. Defaults to None.
-            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. 
+            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000.
                 Defaults to 1000.
-            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], 
+            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"],
                 optional): Specifies the date type which filter date range. Defaults to None.
-            start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. 
+            start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format.
                 Defaults to None.
-            end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. 
+            end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format.
                 Defaults to None.
             unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
                 Defaults to None.
             unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
                 Defaults to None.
-            timeout (int, optional): The time (in seconds) to wait while running this task before 
+            timeout (int, optional): The time (in seconds) to wait while running this task before
                 a timeout occurs. Defaults to 3600.
         """
         self.endpoint = endpoint
@@ -75,24 +75,26 @@ def __init__(
             *args,
             **kwargs,
         )
-    def get_data(self, 
+
+    def get_data(
+        self,
         json_response: Dict[str, Any] = None,
     ) -> List[Dict[str, Any]]:
         """
         Extract and return the 'data' part of a JSON response as a list of dictionaries.
 
         Args:
-            json_response (Dict[str, Any], optional): JSON object represented as a nested 
+            json_response (Dict[str, Any], optional): JSON object represented as a nested
             dictionary that contains data and cursor parameter value. Defaults to None.
 
         Raises:
             KeyError: If the 'data' key is not present in the provided JSON response.
 
         Returns:
-            List[Dict[str, Any]]: A list of dictionaries containing data from the 'data' 
+            List[Dict[str, Any]]: A list of dictionaries containing data from the 'data'
             part of the JSON response.
         """
-        jsons_list=[]
+        jsons_list = []
         try:
             jsons_list = json_response["data"]
         except KeyError:
@@ -104,7 +106,7 @@ def get_data(self,
         return jsons_list
 
     def _field_reference_unpacker(
-        self, 
+        self,
         json_response: Dict[str, Any],
         field: str,
     ) -> Dict[str, Any]:
@@ -113,7 +115,7 @@ def _field_reference_unpacker(
 
         This function takes a JSON response and a field name. It processes dictionaries
         within the specified field, checking if each dictionary contains exactly two items.
-        If a dictionary meets this criteria, it is transformed into a new dictionary, 
+        If a dictionary meets this criteria, it is transformed into a new dictionary,
         where the first key becomes a key, and the second key becomes its associated value
 
         Args:
@@ -123,7 +125,7 @@ def _field_reference_unpacker(
         Returns:
             Dict[str, Any]: The JSON response with modified nested dictionaries
             within the specified field.
-            
+
         Raises:
             ValueError: If a dictionary within the specified field doesn't contain exactly two items.
         """
@@ -134,14 +136,16 @@ def _field_reference_unpacker(
                 list_properties = list(dictionary.values())
                 result[list_properties[0]] = list_properties[1]
             else:
-                raise ValueError(f"Dictionary within the specified field doesn't contain exactly two items.")
+                raise ValueError(
+                    f"Dictionary within the specified field doesn't contain exactly two items."
+                )
         if result:
             json_response[field] = result
 
         return json_response
 
     def _nested_dict_transformer(
-        self, 
+        self,
         json_response: Dict[str, Any],
         field: str,
     ) -> Dict[str, Any]:
@@ -160,49 +164,49 @@ def _nested_dict_transformer(
             Dict[str, Any]: The JSON response with modified nested dictionaries
         within the specified field.
         """
-        result={}
+        result = {}
         try:
             for i, dictionary in enumerate(json_response[field], start=1):
                 for key, value in dictionary.items():
-                    result[f'{i}_{key}'] = value
+                    result[f"{i}_{key}"] = value
             if result:
                 json_response[field] = result
         except TypeError as te:
             logger.error(te)
 
         return json_response
-    
+
     def column_unpacker(
-        self, 
+        self,
         json_list: List[Dict[str, Any]] = None,
         unpack_by_field_reference_cols: List[str] = None,
         unpack_by_nested_dict_transformer: List[str] = None,
-        ) -> List[Dict[str, Any]]:
+    ) -> List[Dict[str, Any]]:
 
         """
-        Function to unpack and modify specific columns in a list of dictionaries by using one of two methods, 
-        chosen by the user. 
-        If user would like to use field_reference_unpacker, he/she needs to provide list of fields as strings in 
-        `unpack_by_field_reference_cols`  parameter,  if user would like to use nested_dict_transformer he/she needs to provide list of 
-         fields as strings in unpack_by_nested_dict_transformer parameter.  
+        Function to unpack and modify specific columns in a list of dictionaries by using one of two methods,
+        chosen by the user.
+        If user would like to use field_reference_unpacker, he/she needs to provide list of fields as strings in
+        `unpack_by_field_reference_cols`  parameter,  if user would like to use nested_dict_transformer he/she needs to provide list of
+         fields as strings in unpack_by_nested_dict_transformer parameter.
 
         Args:
             json_list (List[Dict[str, Any]): A list of dictionaries containing the data.
-            unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. 
+            unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
                 Defaults to None.
-            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. 
+            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
                 Defaults to None.
 
         Raises:
             ValueError: If 'json_list' is not provided.
             ValueError: If specified columns do not exist in the JSON data.
-            ValueError: If columns are mentioned in both 'unpack_by_field_reference_cols' and 'unpack_by_nested_dict_transformer'. 
+            ValueError: If columns are mentioned in both 'unpack_by_field_reference_cols' and 'unpack_by_nested_dict_transformer'.
 
         Returns:
             List[Dict[str, Any]]: The updated list of dictionaries after column unpacking and modification.
         """
         duplicated_cols = []
-        
+
         if json_list is None:
             raise ValueError("Input 'json_list' is required.")
 
@@ -210,49 +214,59 @@ def unpack_columns(columns, unpack_function):
             json_list_clean = json_list.copy()
             for field in columns:
                 if field in json_list_clean[0]:
-                    logger.info(f"Unpacking column '{field}' with {unpack_function.__name__} method...")
+                    logger.info(
+                        f"Unpacking column '{field}' with {unpack_function.__name__} method..."
+                    )
                     try:
-                        json_list_clean = list(map(lambda x: unpack_function(x, field), json_list_clean))
-                        logger.info(f"All elements in '{field}' are unpacked successfully.")
+                        json_list_clean = list(
+                            map(lambda x: unpack_function(x, field), json_list_clean)
+                        )
+                        logger.info(
+                            f"All elements in '{field}' are unpacked successfully."
+                        )
                     except ValueError as ve:
-                        logger.info(f"No transformation were made in '{field}'," 
-                        "because didn't contain list of key-value data.")
+                        logger.info(
+                            f"No transformation were made in '{field}',"
+                            "because didn't contain list of key-value data."
+                        )
                     except Exception as e:
                         logger.info(f"Error while unpacking {field}: {e}")
                 else:
                     logger.info(f"Column '{field}' not found.")
             return json_list_clean
+
         if unpack_by_field_reference_cols and unpack_by_nested_dict_transformer:
-            duplicated_cols = set(unpack_by_field_reference_cols).intersection(set(unpack_by_nested_dict_transformer))
+            duplicated_cols = set(unpack_by_field_reference_cols).intersection(
+                set(unpack_by_nested_dict_transformer)
+            )
         if duplicated_cols:
             raise ValueError(
-                f"{duplicated_cols} were mentioned in both unpack_by_field_reference_cols and unpack_by_nested_dict_transformer." 
+                f"{duplicated_cols} were mentioned in both unpack_by_field_reference_cols and unpack_by_nested_dict_transformer."
                 " It's not possible to apply two methods to the same field."
-                )
+            )
         else:
             if unpack_by_field_reference_cols is not None:
                 json_list = unpack_columns(
-                    columns = unpack_by_field_reference_cols, 
-                    unpack_function = self._field_reference_unpacker
-                    )
+                    columns=unpack_by_field_reference_cols,
+                    unpack_function=self._field_reference_unpacker,
+                )
 
             if unpack_by_nested_dict_transformer is not None:
                 json_list = unpack_columns(
-                    columns = unpack_by_nested_dict_transformer, 
-                    unpack_function = self._nested_dict_transformer
-                    )
-        
-        return json_list
+                    columns=unpack_by_nested_dict_transformer,
+                    unpack_function=self._nested_dict_transformer,
+                )
 
+        return json_list
 
     def flatten_json(self, json_response: Dict[str, Any] = None) -> Dict[str, Any]:
         """
-        Function that flattens a nested structure of the JSON object into 
-        a single-level dictionary. It uses a nested `flattify()` function to recursively 
+        Function that flattens a nested structure of the JSON object into
+        a single-level dictionary. It uses a nested `flattify()` function to recursively
         combine nested keys in the JSON object with '_' to create the flattened keys.
 
         Args:
-            json_response (Dict[str, Any], optional): JSON object represented as 
+            json_response (Dict[str, Any], optional): JSON object represented as
             a nested dictionary. Defaults to None.
 
         Raises:
@@ -266,7 +280,7 @@ def flatten_json(self, json_response: Dict[str, Any] = None) -> Dict[str, Any]:
         if not isinstance(json_response, dict):
             raise TypeError("Input must be a dictionary.")
 
-        def flattify(field, key="", out = None):
+        def flattify(field, key="", out=None):
             if out is None:
                 out = result
 
@@ -279,16 +293,13 @@ def flattify(field, key="", out = None):
         flattify(json_response)
 
         return result
-      
-    def square_brackets_remover(
-        self, 
-        df: pd.DataFrame = None
-    ) -> pd.DataFrame:
+
+    def square_brackets_remover(self, df: pd.DataFrame = None) -> pd.DataFrame:
         """
         Replace square brackets "[]" with an empty string in a pandas DataFrame.
 
         Args:
-            df (pd.DataFrame, optional): Replace square brackets "[]" with an empty string 
+            df (pd.DataFrame, optional): Replace square brackets "[]" with an empty string
             in a pandas DataFrame. Defaults to None.
 
         Returns:
@@ -298,11 +309,8 @@ def square_brackets_remover(
         df = df.astype(str)
         df = df.applymap(lambda x: x.strip("[]"))
         return df
-    
-    def _drivers_cleaner(
-        self,
-        drivers: str = None
-    ) -> str:
+
+    def _drivers_cleaner(self, drivers: str = None) -> str:
         """
         Clean and format the 'drivers' data.
 
@@ -313,8 +321,13 @@ def _drivers_cleaner(
             str: A cleaned and formatted string of driver data.
         """
 
-        cleaned_drivers = drivers.replace("{", "").replace("}", "").replace("'", "").replace("label: ", "")
-        
+        cleaned_drivers = (
+            drivers.replace("{", "")
+            .replace("}", "")
+            .replace("'", "")
+            .replace("label: ", "")
+        )
+
         return cleaned_drivers
 
     def __call__(self):
@@ -351,31 +364,31 @@ def run(
         vault_name: str = None,
     ) -> pd.DataFrame:
         """
-        Run method. Downloading the selected range of data from Customer Gauge endpoint and return 
+        Run method. Downloading the selected range of data from Customer Gauge endpoint and return
         as one pandas DataFrame.
 
         Args:
-            endpoint (Literal["responses", "non-responses"]): Indicate which endpoint to connect. 
+            endpoint (Literal["responses", "non-responses"]): Indicate which endpoint to connect.
                 Defaults to None.
-            total_load (bool, optional): Indicate whether to download the data to the latest. If 
+            total_load (bool, optional): Indicate whether to download the data to the latest. If
                 'False', only one API call is executed (up to 1000 records). Defaults to True.
             endpoint_url (str, optional): Endpoint URL. Defaults to None.
             cursor (int, optional): Cursor value to navigate to the page. Defaults to None.
-            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000. 
+            pagesize (int, optional): Number of responses (records) returned per page, max value = 1000.
                 Defaults to 1000.
-            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"], 
+            date_field (Literal["date_creation", "date_order", "date_sent", "date_survey_response"],
                 optional): Specifies the date type which filter date range. Defaults to None.
-            start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format. 
+            start_date (datetime, optional): Defines the period end date in yyyy-mm-dd format.
                 Defaults to None.
-            end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format. 
+            end_date (datetime, optional): Defines the period start date in yyyy-mm-dd format.
                 Defaults to None.
-            unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`. 
+            unpack_by_field_reference_cols (List[str]): Columns to unpack and modify using `_field_reference_unpacker`.
+                Defaults to None.
+            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`.
                 Defaults to None.
-            unpack_by_nested_dict_transformer (List[str]): Columns to unpack and modify using `_nested_dict_transformer`. 
-                Defaults to None.       
-            credentials_secret (str, optional): The name of the Azure Key Vault secret containing a 
+            credentials_secret (str, optional): The name of the Azure Key Vault secret containing a
                 dictionary with ['client_id', 'client_secret']. Defaults to "CUSTOMER-GAUGE".
-            vault_name (str, optional): The name of the vault from which to obtain the secret. 
+            vault_name (str, optional): The name of the vault from which to obtain the secret.
                 Defaults to None.
 
         Returns:
@@ -412,7 +425,7 @@ def run(
         if total_load == True:
             if cursor is None:
                 logger.info(
-                    f"Downloading all the data from the {self.endpoint or self.endpoint_url} endpoint." 
+                    f"Downloading all the data from the {self.endpoint or self.endpoint_url} endpoint."
                     "Process might take a few minutes..."
                 )
             else:
@@ -426,9 +439,10 @@ def run(
                 total_json += jsn
 
         clean_json = self.column_unpacker(
-            json_list = total_json, 
-            unpack_by_field_reference_cols = unpack_by_field_reference_cols, 
-            unpack_by_nested_dict_transformer = unpack_by_nested_dict_transformer)
+            json_list=total_json,
+            unpack_by_field_reference_cols=unpack_by_field_reference_cols,
+            unpack_by_nested_dict_transformer=unpack_by_nested_dict_transformer,
+        )
         logger.info("Inserting data into the DataFrame...")
         df = pd.DataFrame(list(map(self.flatten_json, clean_json)))
         df = self.square_brackets_remover(df)
diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 428e699a0..bf69db2e0 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -11,9 +11,9 @@
 from prefect.utilities import logging
 from prefect.utilities.tasks import defaults_from_attrs
 
-from viadot.task_utils import *
 from viadot.exceptions import APIError
 from viadot.sources import Genesys
+from viadot.task_utils import *
 
 logger = logging.get_logger()
 
diff --git a/viadot/tasks/luma.py b/viadot/tasks/luma.py
index 5b78ebc27..11eb91e45 100644
--- a/viadot/tasks/luma.py
+++ b/viadot/tasks/luma.py
@@ -1,5 +1,7 @@
 import json
+
 from prefect.tasks.shell import ShellTask
+
 from .azure_key_vault import AzureKeyVaultSecret
 
 
diff --git a/viadot/tasks/sap_bw.py b/viadot/tasks/sap_bw.py
index acc92c246..0d8d7b2e3 100644
--- a/viadot/tasks/sap_bw.py
+++ b/viadot/tasks/sap_bw.py
@@ -1,12 +1,12 @@
 import pandas as pd
 from prefect import Task
 from prefect.tasks.secrets import PrefectSecret
-from viadot.tasks import AzureKeyVaultSecret
 from prefect.utilities import logging
 
 from viadot.exceptions import ValidationError
 from viadot.sources import SAPBW
 from viadot.task_utils import *
+from viadot.tasks import AzureKeyVaultSecret
 
 logger = logging.get_logger()
 
diff --git a/viadot/tasks/sharepoint.py b/viadot/tasks/sharepoint.py
index 2a1cb0bc4..c4d670617 100644
--- a/viadot/tasks/sharepoint.py
+++ b/viadot/tasks/sharepoint.py
@@ -1,10 +1,10 @@
-from typing import List
-import pandas as pd
 import copy
 import json
 import os
 import re
+from typing import List
 
+import pandas as pd
 from prefect import Task
 from prefect.tasks.secrets import PrefectSecret
 from prefect.utilities import logging
@@ -12,8 +12,8 @@
 
 from ..exceptions import ValidationError
 from ..sources import Sharepoint, SharepointList
-from .azure_key_vault import AzureKeyVaultSecret
 from ..utils import add_viadot_metadata_columns
+from .azure_key_vault import AzureKeyVaultSecret
 
 logger = logging.get_logger()
 
diff --git a/viadot/tasks/tm1.py b/viadot/tasks/tm1.py
index 06b96ccd2..56d4401f0 100644
--- a/viadot/tasks/tm1.py
+++ b/viadot/tasks/tm1.py
@@ -1,7 +1,7 @@
-import pandas as pd
+from typing import Any, Dict
 
+import pandas as pd
 from prefect import Task
-from typing import Any, Dict
 from prefect.utilities.tasks import defaults_from_attrs
 
 from ..sources import TM1

From b9bcdd1c2ea7c251fbf46982845a796fd448bc8b Mon Sep 17 00:00:00 2001
From: Jakub Burzec <125436423+burzekj@users.noreply.github.com>
Date: Wed, 15 Nov 2023 09:25:01 +0100
Subject: [PATCH 76/86] Update viadot/tasks/genesys.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Rafał Ziemianek <49795849+Rafalz13@users.noreply.github.com>
---
 viadot/tasks/genesys.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 96d5bdd03..187ba1150 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -385,6 +385,7 @@ def run(
             "agent_performance_summary_view",
             "agent_status_summary_view",
             "agent_status_detail_view",
+            "agent_interaction_detail_view",
         ]:
             genesys.genesys_api_connection(
                 post_data_list=post_data_list, end_point=end_point

From ce1cb0e5165df628e009476452643ae1082ea57d Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Wed, 15 Nov 2023 09:39:01 +0100
Subject: [PATCH 77/86] Typos, spelling and docstring update

---
 viadot/flows/sharepoint_to_adls.py |  36 +++---
 viadot/sources/sharepoint.py       | 174 ++++++++++++++---------------
 viadot/tasks/sharepoint.py         |  20 ++--
 3 files changed, 112 insertions(+), 118 deletions(-)

diff --git a/viadot/flows/sharepoint_to_adls.py b/viadot/flows/sharepoint_to_adls.py
index 6191317d0..9baab7d34 100644
--- a/viadot/flows/sharepoint_to_adls.py
+++ b/viadot/flows/sharepoint_to_adls.py
@@ -42,7 +42,7 @@ def __init__(
         if_exists: str = "replace",
         validate_df_dict: dict = None,
         timeout: int = 3600,
-        key_value_param: bool = False,
+        set_prefect_kv: bool = False,
         *args: List[any],
         **kwargs: Dict[str, Any],
     ):
@@ -70,7 +70,7 @@ def __init__(
             dataframe. If defined, triggers the `validate_df` task from task_utils. Defaults to None.
             timeout(int, optional): The amount of time (in seconds) to wait while running this task before
                 a timeout occurs. Defaults to 3600.
-            key_value_param (bool, optional): Wheter to do key-value parameters in KV Store or not. Defaults to False.
+            set_prefect_kv (bool, optional): Whether to do key-value parameters in KV Store or not. Defaults to False.
         """
         # SharepointToDF
         self.if_empty = if_empty
@@ -88,7 +88,7 @@ def __init__(
         self.adls_sp_credentials_secret = adls_sp_credentials_secret
         self.if_exists = if_exists
         self.output_file_extension = output_file_extension
-        self.key_value_param = key_value_param
+        self.set_prefect_kv = set_prefect_kv
         self.now = str(pendulum.now("utc"))
         if self.local_dir_path is not None:
             self.local_file_path = (
@@ -180,7 +180,7 @@ def gen_flow(self) -> Flow:
 
         file_to_adls_task.set_upstream(df_to_file, flow=self)
         json_to_adls_task.set_upstream(dtypes_to_json_task, flow=self)
-        if self.key_value_param == True:
+        if self.set_prefect_kv == True:
             set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
 
     @staticmethod
@@ -207,11 +207,14 @@ def __init__(
         overwrite_adls: bool = True,
         output_file_extension: str = ".parquet",
         validate_df_dict: dict = None,
-        key_value_param: bool = False,
+        set_prefect_kv: bool = False,
         *args: List[any],
         **kwargs: Dict[str, Any],
     ):
-        """_summary_
+        """
+        Flow for ingesting sharepoint list items(rows) with a given (or all) columns.
+        It allows to filter the output by column values.
+        Data is ingested from MS Sharepoint list (with given name and url ) and stored in MS Azure ADLS.
 
         Args:
             name (str): Prefect flow name.
@@ -220,9 +223,9 @@ def __init__(
             path (str): Local file path. Default to None.
             adls_dir_path (str): Azure Data Lake destination folder/catalog path. Defaults to None.
             adls_file_name (str): Name of file in ADLS. Defaults to None.
-            filters (dict, optional): Dictionary with operators which filters the SharepointList output.
+            filters (dict, optional): Dictionary with operators which filters the SharepointList output. Defaults to None.
                         allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')
-                        allowed conjuction: ('&','|')
+                        allowed conjunction: ('&','|')
                         allowed operators: ('<','>','<=','>=','==','!=')
                         Example how to build the dict:
                         filters = {
@@ -233,8 +236,8 @@ def __init__(
                                 'value2':'YYYY-MM-DD',
                                 'operator1':'>=',
                                 'operator2':'<=',
-                                'operators_conjuction':'&', # conjuction operators allowed only when 2 values passed
-                                'filters_conjuction':'&', # conjuction filters allowed only when 2 columns passed
+                                'operators_conjunction':'&', # conjunction operators allowed only when 2 values passed
+                                'filters_conjunction':'&', # conjunction filters allowed only when 2 columns passed
                                 }
                                 ,
                         'Column_name_2' :
@@ -244,13 +247,12 @@ def __init__(
                                 'operator1':'==',
                                 },
                         }
-                        Defaults to None.
             required_fields (List[str], optional): Required fields(columns) need to be extracted from
                                      Sharepoint List. Defaults to None.
             field_property (str, optional): Property to expand fields with expand query method.
                                     For example: User fields could be expanded and "Title"
                                     or "ID" could be extracted
-                                    -> usefull to get user name instead of ID
+                                    -> useful to get user name instead of ID
                                     All properties can be found under list.item.properties.
                                     WARNING! Field types and properties might change which could
                                     lead to errors - extension of sp connector would be required.
@@ -262,9 +264,9 @@ def __init__(
                                     If not passed it will take cred's from your .config/credentials.json Default to None.
             vault_name (str, optional): KeyVaultSecret name. Default to None.
             overwrite_adls (bool, optional): Whether to overwrite files in the lake. Defaults to True.
-            output_file_extension (str, optional): _description_. Defaults to ".parquet".
-            validate_df_dict (dict, optional): Wheter to do an extra df validation before ADLS upload or not to do. Defaults to None.
-            key_value_param (bool, optional): Wheter to do key-value parameters in KV Store or not. Defaults to False.
+            output_file_extension (str, optional): Extension of the resulting file to be stored. Defaults to ".parquet".
+            validate_df_dict (dict, optional): Whether to do an extra df validation before ADLS upload or not to do. Defaults to None.
+            set_prefect_kv (bool, optional): Whether to do key-value parameters in KV Store or not. Defaults to False.
 
         Returns:
             .parquet file inside ADLS.
@@ -288,7 +290,7 @@ def __init__(
         self.overwrite = overwrite_adls
         self.adls_sp_credentials_secret = adls_sp_credentials_secret
         self.output_file_extension = output_file_extension
-        self.key_value_param = key_value_param
+        self.set_prefect_kv = set_prefect_kv
         self.now = str(pendulum.now("utc"))
         if self.path is not None:
             self.local_file_path = (
@@ -379,7 +381,7 @@ def gen_flow(self) -> Flow:
 
         file_to_adls_task.set_upstream(df_to_file, flow=self)
         json_to_adls_task.set_upstream(dtypes_to_json_task, flow=self)
-        if self.key_value_param == True:
+        if self.set_prefect_kv == True:
             set_key_value(key=self.adls_dir_path, value=self.adls_file_path)
 
     @staticmethod
diff --git a/viadot/sources/sharepoint.py b/viadot/sources/sharepoint.py
index 7f1bc523c..832633806 100644
--- a/viadot/sources/sharepoint.py
+++ b/viadot/sources/sharepoint.py
@@ -84,27 +84,17 @@ def download_file(
 
 
 class SharepointList(Source):
-    """
-    A Sharepoint_List class to connect and download data from Sharepoint lists.
-    Warning!
-        Please be carefull with selection of the column names because once sharepoint list is opened inside a browser it may display columns in different languages.
-        Because of that the resulting file or output might have different column names then the one which u see in the browser.
-    Args:
-        credentials (dict): Credentials should include:
-           - "tenant"
-           - "client_id"
-           - "scopes"
-           - "thumbprint"
-           - "private_key"
-    """
-
     def __init__(
         self,
         credentials: Dict[str, Any] = None,
         *args,
         **kwargs,
     ):
-        """_summary_
+        """
+        A Sharepoint_List class to connect and download data from Sharepoint lists.
+        Warning!
+            Please be careful with selection of the column names because once sharepoint list is opened inside a browser it may display columns in different languages.
+            Because of that the resulting file or output might have different column names then the one which u see in the browser.
 
         Args:
             credentials (Dict[str, Any], optional): Credentials should include:
@@ -115,8 +105,8 @@ def __init__(
            - "private_key"
 
         Raises:
-            CredentialError: If no credentials are pased
-            and local config doesn't contain them neiter
+            CredentialError: If no credentials are passed
+            and local config doesn't contain them neither
         """
         DEFAULT_CREDENTIALS = local_config.get("SHAREPOINT_CERT")
         credentials = credentials or DEFAULT_CREDENTIALS
@@ -126,15 +116,15 @@ def __init__(
         super().__init__(*args, credentials=credentials, **kwargs)
 
     def get_connection(self, site_url: str):
-        """Function for connecting into Sharepoint with AuthenticationContext
+        """Function for connecting into Sharepoint with AuthenticationContext.
 
         Args:
-            site_url (str): url of the sharepoint list
+            site_url (str): URL of the sharepoint list.
 
         Returns:
-            ctx: authentication context
+            ctx: Authentication context.
         """
-        logger.info("Connecting into Sharepoint with AuthenticationContexts")
+        logger.info("Connecting into Sharepoint with AuthenticationContexts.")
         try:
             auth_context = AuthenticationContext(site_url)
             auth_context.with_client_certificate(
@@ -158,7 +148,7 @@ def _unpack_fields(
         list_item,
         selected_fields: dict,
     ) -> dict:
-        """Function for extracting and unpacking list items from the search fields
+        """Function for extracting and unpacking list items from the search fields.
 
         Args:
             list_items (office365 list item): A list with office365 list item objects (rows)
@@ -166,15 +156,15 @@ def _unpack_fields(
 
         Raises:
             ValueError: "Check if given field property is valid!"
-            ValueError: "Get nested dict for not recognized type of field! Check field types in the source"
-            ValueError: "Get empty properties for list items"
+            ValueError: "Get nested dict for not recognized type of field! Check field types in the source."
+            ValueError: "Get empty properties for list items."
 
         Returns:
-            dict: A dictionary with Column: Value pairs for each row from the list
+            dict: A dictionary with Column: Value pairs for each row from the list.
         """
         # Creating the body of dictionary
         new_dict = dict()
-        # For loop scanning the propertys of searching fields
+        # For loop scanning the properties of searching fields
         item_values_dict = list_item.properties
         if item_values_dict:
             for field, val in item_values_dict.items():
@@ -196,13 +186,13 @@ def _unpack_fields(
                         new_dict[field] = ";".join(nested_dict.values())
                     else:
                         raise ValueError(
-                            "Get nested dict for not recognized type of field! Check field types in the source"
+                            "Get nested dict for not recognized type of field! Check field types in the source."
                         )
                 else:
                     new_dict[field] = val
         else:
             raise ValueError(
-                "Get empty properties for list items. Check if parameter list_item collection containes any data -> item objects."
+                "Get empty properties for list items. Check if parameter list_item collection contains any data -> item objects."
             )
         return new_dict
 
@@ -213,17 +203,17 @@ def get_fields(
         required_fields: List[str] = None,
     ) -> List:
         """
-        Function for geting list of fields objects from the sharepoint list.
+        Function for getting list of fields objects from the sharepoint list.
         It can get all fields available if required_fields not passed
         or just the one which are in the list required_fields.
 
         Args:
-            list_title (str): name of the sharepoint list
-            site_url (str): url to the sharepoint list with "/" at the end
+            list_title (str): Name of the sharepoint list.
+            site_url (str): URL to the sharepoint list with "/" at the end.
             required_fields (List[str], optional ): List of required fields to ingest. It will get all fields if not passed.
 
         Returns:
-            List: list with office365 sharepoint list field objects
+            List: List with office365 sharepoint list field objects.
         """
 
         ctx = self.get_connection(site_url=site_url)
@@ -263,9 +253,9 @@ def select_fields(
             -> more properties can be discovered by getting list.item.properties.
 
         Args:
-            list_title (str): _description_. Defaults to None.
-            site_url (str): _description_. Defaults to None.
-            required_fields (List[str], optional): _description_. Defaults to None.
+            list_title (str): A title of the sharepoint list. Defaults to None.
+            site_url (str): A sharepoint list URL. Defaults to None.
+            required_fields (List[str], optional): List of fields(columns) to be ingested. Defaults to None.
             field_property (str, optional): Property to extract from nested fields
                 like column with type User*. Defaults to "Title".
 
@@ -318,10 +308,10 @@ def check_filters(
         Function to check if filters dict is valid.
         Please check and apply only allowed filter settings:
             allowed_dtypes = ["datetime", "date", "bool", "int", "float", "complex", "str"]
-            allowed_conjuction = ["&", "|"]
+            allowed_conjunction = ["&", "|"]
             allowed_operators = ["<", ">", "<=", ">=", "==", "!="]
-        Operator conjuction is only possible if there are 2 values like: value <= 1 | value == 5
-        Filter conjuction is only possible if there are more then 1 filters for ex. date and creator
+        Operator conjunction is only possible if there are 2 values like: value <= 1 | value == 5
+        Filter conjunction is only possible if there are more then 1 filters for ex. date and creator
 
         Args:
             filters (dict): A dictionary containing filter settings
@@ -333,8 +323,8 @@ def check_filters(
                                             "value2": today_date,
                                             "operator1": ">=",
                                             "operator2": "<=",
-                                            "operators_conjuction": "&",
-                                            "filters_conjuction": "&",
+                                            "operators_conjunction": "&",
+                                            "filters_conjunction": "&",
                                             },
                                     "Factory": {
                                         "dtype": "str",
@@ -344,25 +334,25 @@ def check_filters(
                                     }
 
         Raises:
-            ValueError: If dtype not in allowed list
-            ValueError: If comparison operator1 not in allowed list
-            ValueError: If value for operator1 is missing
-            ValueError: If comparison operator1 for the first value is missing
-            ValueError: If comparison operator2 not in allowed list
-            ValueError: If value for operator2 is missing
-            ValueError: If comparison operator2 for the first value is missing
-            ValueError: If operator conjuction is missing while there are 2 values and 2 operators passed
-            ValueError: If operator conjuction is not in the allowed list
-            ValueError: If operator conjuction provided why only one filter value is given
-            ValueError: If filter conjuction provided without 2nd filter
-            ValueError: If filter conjuction not in the allowed list
+            ValueError: If dtype not in allowed list.
+            ValueError: If comparison operator1 not in allowed list.
+            ValueError: If value for operator1 is missing.
+            ValueError: If comparison operator1 for the first value is missing.
+            ValueError: If comparison operator2 not in allowed list.
+            ValueError: If value for operator2 is missing.
+            ValueError: If comparison operator2 for the first value is missing.
+            ValueError: If operator conjunction is missing while there are 2 values and 2 operators passed.
+            ValueError: If operator conjunction is not in the allowed list.
+            ValueError: If operator conjunction provided why only one filter value is given.
+            ValueError: If filter conjunction provided without 2nd filter.
+            ValueError: If filter conjunction not in the allowed list.
 
         Returns:
-            bool: True if all checks passed
+            bool: True if all checks passed.
         """
 
         allowed_dtypes = ["datetime", "date", "bool", "int", "float", "complex", "str"]
-        allowed_conjuction = ["&", "|"]
+        allowed_conjunction = ["&", "|"]
         allowed_operators = ["<", ">", "<=", ">=", "==", "!="]
 
         for filter_name, parameters in filters.items():
@@ -383,10 +373,10 @@ def check_filters(
                 raise ValueError("Operator1 is missing!")
             if (
                 not parameters.get("operator2")
-                and parameters.get("operators_conjuction") is not None
+                and parameters.get("operators_conjunction") is not None
             ):
                 raise ValueError(
-                    f"Operator conjuction allowed only with more then one filter operator!"
+                    f"Operator conjunction allowed only with more then one filter operator!"
                 )
             if parameters.get("operator2"):
                 if parameters.get("operator2") not in allowed_operators:
@@ -395,25 +385,25 @@ def check_filters(
                     )
                 if not parameters.get("value2"):
                     raise ValueError("Value2 for operator2 is missing!")
-                if not parameters.get("operators_conjuction"):
+                if not parameters.get("operators_conjunction"):
                     raise ValueError(
-                        f"Operator for conjuction is missing! Expected: {allowed_conjuction} got empty."
+                        f"Operator for conjunction is missing! Expected: {allowed_conjunction} got empty."
                     )
-                if parameters.get("operators_conjuction") not in allowed_conjuction:
+                if parameters.get("operators_conjunction") not in allowed_conjunction:
                     raise ValueError(
-                        f"Operator for conjuction not allowed! Expected: {allowed_conjuction} got {parameters.get('operators_conjuction')} ."
+                        f"Operator for conjunction not allowed! Expected: {allowed_conjunction} got {parameters.get('operators_conjunction')} ."
                     )
-            if parameters.get("filters_conjuction"):
+            if parameters.get("filters_conjunction"):
                 if (
                     len(filters.keys()) == 1
-                    and parameters.get("filters_conjuction") is not None
+                    and parameters.get("filters_conjunction") is not None
                 ):
                     raise ValueError(
-                        f"Filters conjuction allowed only when more then one filter provided!"
+                        f"Filters conjunction allowed only when more then one filter provided!"
                     )
-                if parameters.get("filters_conjuction") not in allowed_conjuction:
+                if parameters.get("filters_conjunction") not in allowed_conjunction:
                     raise ValueError(
-                        f"Filter operator for conjuction not allowed! Expected: {allowed_conjuction} got {parameters.get('filters_conjuction')} ."
+                        f"Filter operator for conjunction not allowed! Expected: {allowed_conjunction} got {parameters.get('filters_conjunction')} ."
                     )
 
         return True
@@ -423,7 +413,7 @@ def operators_mapping(
         filters: dict,
     ) -> dict:
         """
-        Function for mapping comparison and conjuction(logical) operators of filters to the format which is recognized by Microsoft API.
+        Function for mapping comparison and conjunction(logical) operators of filters to the format which is recognized by Microsoft API.
         Allowed operators:
             <
             >
@@ -438,10 +428,10 @@ def operators_mapping(
             filters (dict): A dictionary which contains operators.
 
         Raises:
-            ValueError: If operator1 not allowed
-            ValueError: If operator2 not allowed
-            ValueError: If operators conjuction not allowed
-            ValueError: If filters conjuction not allowed
+            ValueError: If operator1 not allowed.
+            ValueError: If operator2 not allowed.
+            ValueError: If operators conjunction not allowed.
+            ValueError: If filters conjunction not allowed.
 
         Returns:
             dict: New modified dict with mapped operators.
@@ -475,23 +465,23 @@ def operators_mapping(
                     raise ValueError(
                         f"This comparison operator: {operator2_to_change} is not allowed. Please read the function documentation for details!"
                     )
-            if parameters.get("operators_conjuction"):
-                logical_op_to_change = parameters.get("operators_conjuction")
+            if parameters.get("operators_conjunction"):
+                logical_op_to_change = parameters.get("operators_conjunction")
                 if logical_op_to_change in logical_op.keys():
-                    parameters["operators_conjuction"] = logical_op[
+                    parameters["operators_conjunction"] = logical_op[
                         logical_op_to_change
                     ]
                 else:
                     raise ValueError(
-                        f"This conjuction (logical) operator: {logical_op_to_change} is not allowed. Please read the function documentation for details!"
+                        f"This conjunction (logical) operator: {logical_op_to_change} is not allowed. Please read the function documentation for details!"
                     )
-            if parameters.get("filters_conjuction"):
-                logical_fl_to_change = parameters.get("filters_conjuction")
+            if parameters.get("filters_conjunction"):
+                logical_fl_to_change = parameters.get("filters_conjunction")
                 if logical_fl_to_change in logical_op.keys():
-                    parameters["filters_conjuction"] = logical_op[logical_fl_to_change]
+                    parameters["filters_conjunction"] = logical_op[logical_fl_to_change]
                 else:
                     raise ValueError(
-                        f"This filters conjuction (logical) operator: {logical_fl_to_change} is not allowed. Please read the function documentation for details!"
+                        f"This filters conjunction (logical) operator: {logical_fl_to_change} is not allowed. Please read the function documentation for details!"
                     )
 
         return filters_dict
@@ -501,7 +491,7 @@ def make_filter_for_api(self, filters: dict) -> str:
         Function changing type of operators to match MS API style as 'str' passing to URL call.
 
         Args:
-            filters (dict): A dictionar which contains operators.
+            filters (dict): A dictionary which contains operators.
 
         Returns:
             str: Output as filtering string to pass as filter parameter to API.
@@ -525,7 +515,7 @@ def make_filter_for_api(self, filters: dict) -> str:
                     ).isoformat()
                     filter_text = (
                         filter_text
-                        + f" {parameters.get('operators_conjuction')} {column} {parameters.get('operator2')} datetime'{from_date2}' "
+                        + f" {parameters.get('operators_conjunction')} {column} {parameters.get('operator2')} datetime'{from_date2}' "
                     )
             elif parameters.get("dtype") not in ["datetime", "date"]:
                 filter_text = (
@@ -537,8 +527,8 @@ def make_filter_for_api(self, filters: dict) -> str:
                         filter_text
                         + f"{column} {parameters.get('operator2')} '{parameters.get('value2')}'"
                     )
-            if parameters.get("filters_conjuction"):
-                filter_text = filter_text + f"{parameters.get('filters_conjuction')} "
+            if parameters.get("filters_conjunction"):
+                filter_text = filter_text + f"{parameters.get('filters_conjunction')} "
 
         return filter_text
 
@@ -567,11 +557,13 @@ def make_filter_for_df(
             if parameters.get("operator2"):
                 filter_in_df = (
                     filter_in_df
-                    + f") {parameters.get('operators_conjuction')} (df.{column} {parameters.get('operator2', '')} '{parameters.get('value2', '')}'"
+                    + f") {parameters.get('operators_conjunction')} (df.{column} {parameters.get('operator2', '')} '{parameters.get('value2', '')}'"
                 )
 
-            if parameters.get("filters_conjuction"):
-                filter_in_df = filter_in_df + ")" + parameters.get("filters_conjuction")
+            if parameters.get("filters_conjunction"):
+                filter_in_df = (
+                    filter_in_df + ")" + parameters.get("filters_conjunction")
+                )
 
             else:
                 filter_in_df = filter_in_df + ")"
@@ -601,11 +593,11 @@ def list_item_to_df(
             required_fields (List[str]): Required fields(columns) need to be extracted from
                                          Sharepoint List. Default to None.
             field_property (List[str]): Property to expand with expand query method.
-                                        All propertys can be found under list.item.properties.
+                                        All properties can be found under list.item.properties.
                                         Default to ["Title"]
             filters (dict): Dictionary with operators which filters the SharepointList output.
                             allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')
-                            allowed conjuction: ('&','|')
+                            allowed conjunction: ('&','|')
                             allowed operators: ('<','>','<=','>=','==','!=')
                             Example how to build the dict:
                             filters = {
@@ -616,8 +608,8 @@ def list_item_to_df(
                                     'value2':'YYYY-MM-DD',
                                     'operator1':'>=',
                                     'operator2':'<=',
-                                    'operators_conjuction':'&',
-                                    'filters_conjuction':'&',
+                                    'operators_conjunction':'&',
+                                    'filters_conjunction':'&',
                                     }
                                     ,
                             'Column_name_2' :
@@ -675,7 +667,7 @@ def list_item_to_df(
             self.ctx.execute_query()
 
         except (ClientRequestException, ValueError) as e:
-            # Extract all data from specific SP List without basic filtering. Additional logic for filtering applied on DataFreame level.
+            # Extract all data from specific SP List without basic filtering. Additional logic for filtering applied on DataFrame level.
             logger.info(f"Exception SPQueryThrottledException occurred: {e}")
             list_items = (
                 self.list_object.items.get_all(row_count, log_of_progress)
@@ -691,7 +683,7 @@ def list_item_to_df(
         )
 
         if download_all == True and filters is not None:
-            # Filter for desired range of created date  and for factory Namyslow PL
+            # Apply filters to the data frame -> accordingly to the filter dict passed as na parameter
             self.logger.info("Filtering df with all data output")
             filter_for_df = self.make_filter_for_df(filters)
             df = eval(filter_for_df)
diff --git a/viadot/tasks/sharepoint.py b/viadot/tasks/sharepoint.py
index 635f9a5ae..f87134bb7 100644
--- a/viadot/tasks/sharepoint.py
+++ b/viadot/tasks/sharepoint.py
@@ -243,11 +243,11 @@ class SharepointListToDF(Task):
         required_fields (List[str]): Required fields(columns) need to be extracted from
                                         Sharepoint List. Default to None.
         field_property (List[str]): Property to expand with expand query method.
-                                    All propertys can be found under list.item.properties.
+                                    All properties can be found under list.item.properties.
                                     Default to ["Title"]
-        filters (dict, optional): Dictionary with operators which filters the SharepointList output.
+        filters (dict, optional): Dictionary with operators which filters the SharepointList output. Default to None.
                         allowed dtypes: ('datetime','date','bool','int', 'float', 'complex', 'str')
-                        allowed conjuction: ('&','|')
+                        allowed conjunction: ('&','|')
                         allowed operators: ('<','>','<=','>=','==','!=')
                         Example how to build the dict:
                         filters = {
@@ -258,8 +258,8 @@ class SharepointListToDF(Task):
                                 'value2':'YYYY-MM-DD',
                                 'operator1':'>=',
                                 'operator2':'<=',
-                                'operators_conjuction':'&',
-                                'filters_conjuction':'&',
+                                'operators_conjunction':'&',
+                                'filters_conjunction':'&',
                                 }
                                 ,
                         'Column_name_2' :
@@ -329,7 +329,7 @@ def _rename_duplicated_fields(self, df):
         It might happen that fields returned by get_fields() will be different
         than actual list items fields ( from it's properties)
         It is specific to sharepoint lists.
-        MS allowed users to create fields with simillar names (but with different letters style)
+        MS allowed users to create fields with similar names (but with different letters style)
         fields with same values. For example Id and ID - > office select function doesn't
         recognize upper/lower cases.
 
@@ -368,13 +368,13 @@ def _rename_duplicated_fields(self, df):
 
     def _convert_camel_case_to_words(self, input_str: str) -> str:
         """
-        Function for converting internal names joined as camelCase column names  to regular words
+        Function for converting internal names joined as camelCase column names  to regular words.
 
         Args:
-            input_str (str): Column name
+            input_str (str): Column name.
 
         Returns:
-            str: Converted column name
+            str: Converted column name.
         """
 
         self.input_str = input_str
@@ -396,7 +396,7 @@ def change_column_name(self, df: pd.DataFrame, credentials: str = None):
             credentials (str): Credentials str for sharepoint connection establishing. Defaults to None.
 
         Returns:
-            pd.DataFrame: Data frame with changed column names
+            pd.DataFrame: Data frame with changed column names.
         """
         s = SharepointList(
             credentials=self.credentials,

From 7d4941929984cbadbf982523fe5b942f19ecbcef Mon Sep 17 00:00:00 2001
From: mgwinner <mgwinner@dyvenia.com>
Date: Wed, 15 Nov 2023 09:58:32 +0100
Subject: [PATCH 78/86] =?UTF-8?q?=E2=9C=A8=20Add=20new=20requirements?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md     | 1 +
 requirements.txt | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c8a63ab1..a71c3496e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added `TM1` source class.
 - Added `TM1ToDF` task class.
 - Added `set_prefect_kv` parameter to `BigQueryToADLS` with `False` as a default. If there is a need to create new pair in KV Store the parameter can be changed to `True`.
+- Added libraries `nltk` and `sklearn` to `requirements`.
 
 ### Fixed
 
diff --git a/requirements.txt b/requirements.txt
index 896b11d1a..4d6c3a15f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -44,3 +44,5 @@ dbt-sqlserver==1.3.1
 lumaCLI==0.0.19
 Office365-REST-Python-Client==2.4.4
 TM1py==1.11.3
+nltk==3.8.1
+scikit-learn==1.3.2
\ No newline at end of file

From e1e49df7f8c5babdd885e2be8e22cd1c416002c8 Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Wed, 15 Nov 2023 10:17:48 +0100
Subject: [PATCH 79/86] =?UTF-8?q?=E2=9C=85=20=20Added=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/integration/tasks/test_task_utils.py | 51 +++++++++++++++++++++-
 viadot/task_utils.py                       | 25 +++++++++++
 viadot/tasks/genesys.py                    | 13 +-----
 3 files changed, 76 insertions(+), 13 deletions(-)

diff --git a/tests/integration/tasks/test_task_utils.py b/tests/integration/tasks/test_task_utils.py
index f22d55022..63887d40a 100644
--- a/tests/integration/tasks/test_task_utils.py
+++ b/tests/integration/tasks/test_task_utils.py
@@ -3,7 +3,7 @@
 from prefect.engine.state import Failed, Success
 from prefect.tasks.secrets import PrefectSecret
 
-from viadot.task_utils import custom_mail_state_handler, set_new_kv
+from viadot.task_utils import custom_mail_state_handler, set_new_kv, check_value
 
 
 def test_custom_state_handler():
@@ -28,3 +28,52 @@ def test_set_new_kv():
     result = get_key_value("test_for_setting_kv")
     assert result == "72"
     set_key_value(key="test_for_setting_kv", value=None)
+
+
+# Sample test checking the correctness of the function when the key is found
+def test_check_value_found():
+    json_data = {
+        "first_known_lvl": {
+            "second_known_lvl": {
+                "third_known_lvl": {
+                    "searched_phrase": "phrase"
+                }
+            }
+        }
+    }
+    result = check_value(json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"], ["searched_phrase"])
+    assert result == "phrase"
+
+# Sample test checking the correctness of the function when the key is not found
+def test_check_value_not_found():
+    json_data = {
+        "first_known_lvl": {
+            "second_known_lvl": {
+                "third_known_lvl": {
+                    "other_phrase": "This won't be found"
+                }
+            }
+        }
+    }
+    result = check_value(json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"], ["searched_phrase"])
+    assert result is None
+
+# Sample test checking the correctness of the function with an empty dictionary
+def test_check_value_empty_dict():
+    json_data = {}
+    result = check_value(json_data, ["searched_phrase"])
+    assert result is None
+
+# Sample test checking the correctness of the function with a nonexistent key
+def test_check_value_nonexistent_key():
+    json_data = {
+        "first_known_lvl": {
+            "second_known_lvl": {
+                "third_known_lvl": {
+                    "searched_phrase": "phrase"
+                }
+            }
+        }
+    }
+    result = check_value(json_data, ["nonexistent_key"])
+    assert result is None
\ No newline at end of file
diff --git a/viadot/task_utils.py b/viadot/task_utils.py
index 6173e2994..41494a929 100644
--- a/viadot/task_utils.py
+++ b/viadot/task_utils.py
@@ -792,3 +792,28 @@ def validate_df(df: pd.DataFrame, tests: dict = None) -> None:
         raise ValidationError(
             f"Validation failed for {failed_tests} test/tests: {failed_tests_msg}"
         )
+
+
+def check_value(base, lvls: List):
+    """
+            Task to extract data from nested json file if there is any under passed parameters.
+            Otherwise return None.
+
+    Args:
+        base: variable with base lvl of the json, fo example:
+                json_file["first_known_lvl"]["second_known_lvl"]["third_known_lvl"]
+        lvls (List): List of potential lower levels of nested json for data retrieval. For example:
+                ["first_lvl_below_base", "second_lvl_below_base", "searched_phrase"]
+
+    Return:
+        Searched value for the lowest level, in example data under "searched_phrase" key.
+    """
+
+    for lvl in lvls:
+        if isinstance(base, dict):
+            base = base.get(lvl)
+            if base is None:
+                return None
+        else:
+            return base
+    return base
\ No newline at end of file
diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 187ba1150..7986d9b0c 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -520,20 +520,9 @@ def run(
             )
             last_page = temp_json["pageCount"] + 1
 
-            # Function to extract nested data from json file
-            def check_value(base, lvls):
-                for lvl in lvls:
-                    if isinstance(base, dict):
-                        base = base.get(lvl)
-                        if base is None:
-                            return None
-                    else:
-                        return base
-                return base
-
             data_list = []
 
-            # For loop to donwload all pages from Genesys GET API
+            # For loop to download all pages from Genesys GET API
             for n in range(1, last_page):
                 json_file = genesys.genesys_api_connection(
                     post_data_list=post_data_list,

From 1e37dcd8eb099173812ed2b0462c520a4c0c7e48 Mon Sep 17 00:00:00 2001
From: Marcin Purtak <44641138+marcinpurtak@users.noreply.github.com>
Date: Wed, 15 Nov 2023 10:20:13 +0100
Subject: [PATCH 80/86] Update CHANGELOG.md

---
 CHANGELOG.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9fd6fedf8..4201189c0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,9 +19,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   -> docstrings update
 - Modified `SharepointToADLS` flow class:
   -> docstrings update
-  -> changed key_value_param: bool = False to prevent forced KV store append
+  -> changed set_prefect_kv: bool = False to prevent forced KV store append
 - Modified `SharepointListToADLS` flow class:
-  -> changed key_value_param: bool = False to prevent forced KV store append
+  -> changed set_prefect_kv: bool = False to prevent forced KV store append
 - Modified `SharepointList` source class:
   -> docstrings update
   -> Changed `_unpack_fields` method to handle Sharepoint MultiChoiceField type + small improvements
@@ -662,4 +662,4 @@ specified in the `SUPERMETRICS_DEFAULT_USER` secret
 - Moved from poetry to pip
 
 ### Fixed
-- Fix `AzureBlobStorage`'s `to_storage()` method is missing the final upload blob part
\ No newline at end of file
+- Fix `AzureBlobStorage`'s `to_storage()` method is missing the final upload blob part

From f31406705eea2196603b8c13727e5d84af66379f Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Wed, 15 Nov 2023 10:41:27 +0100
Subject: [PATCH 81/86] =?UTF-8?q?=F0=9F=8E=A8=20output=20dataframe=20corre?=
 =?UTF-8?q?ctions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 viadot/tasks/genesys.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index cf470c170..4268e830e 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -490,11 +490,24 @@ def run(
                 temp_dict["conversationId"] = json_file.get("id")
                 temp_dict["startTime"] = json_file.get("startTime")
                 temp_dict["endTime"] = json_file.get("endTime")
-                
                 data_list.append(temp_dict)
 
+            desired_order = [
+                "startTime",
+                "endTime",
+                "LOB",
+                "CustomerOutcomeResult",
+                "CustomerOutcomeTrack",
+                "LastUtterance",
+                "Final Sub Intent",
+                "SubIntent",
+                "Final Main Intent",
+                "conversationId",
+            ]
+
             df = pd.DataFrame(data_list)
-            df = df[df.columns[-1:]].join(df[df.columns[:-1]])
+            df = df[desired_order]
+            df.rename(columns={"LastUtterance": "CustomerTextInput"}, inplace=True)
 
             start = start_date.replace("-", "")
             end = end_date.replace("-", "")

From 82cbd3f3409343f7bc18d0e94870cd94518d5b3c Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Wed, 15 Nov 2023 11:12:32 +0100
Subject: [PATCH 82/86] Fixed typo in tests

---
 tests/integration/test_sharepoint.py | 52 ++++++++++++++--------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/tests/integration/test_sharepoint.py b/tests/integration/test_sharepoint.py
index dbbbf1e70..c45925ccb 100644
--- a/tests/integration/test_sharepoint.py
+++ b/tests/integration/test_sharepoint.py
@@ -240,7 +240,7 @@ def test_filters_missing_value1(sharepoint_list):
         sharepoint_list.check_filters(filters)
 
 
-def test_filters_missing_operators_conjuction(sharepoint_list):
+def test_filters_missing_operators_conjunction(sharepoint_list):
     filters = {
         "filter1": {
             "dtype": "int",
@@ -253,13 +253,13 @@ def test_filters_missing_operators_conjuction(sharepoint_list):
     with pytest.raises(
         ValueError,
         match=re.escape(
-            "Operator for conjuction is missing! Expected: ['&', '|'] got empty."
+            "Operator for conjunction is missing! Expected: ['&', '|'] got empty."
         ),
     ):
         sharepoint_list.check_filters(filters)
 
 
-def test_filters_invalid_operators_conjuction(sharepoint_list):
+def test_filters_invalid_operators_conjunction(sharepoint_list):
     filters = {
         "filter1": {
             "dtype": "int",
@@ -267,43 +267,43 @@ def test_filters_invalid_operators_conjuction(sharepoint_list):
             "value1": 10,
             "operator2": "<",
             "value2": 20,
-            "operators_conjuction": "!",
+            "operators_conjunction": "!",
         },
     }
     with pytest.raises(
         ValueError,
         match=re.escape(
-            "Operator for conjuction not allowed! Expected: ['&', '|'] got ! ."
+            "Operator for conjunction not allowed! Expected: ['&', '|'] got ! ."
         ),
     ):
         sharepoint_list.check_filters(filters)
 
 
-def test_filters_conjuction_not_allowed(sharepoint_list):
+def test_filters_conjunction_not_allowed(sharepoint_list):
     filters = {
         "filter1": {
             "dtype": "int",
             "operator1": ">",
             "value1": 10,
-            "filters_conjuction": "!",
+            "filters_conjunction": "!",
         },
     }
     with pytest.raises(
         ValueError,
         match=re.escape(
-            "Filters conjuction allowed only when more then one filter provided!"
+            "Filters conjunction allowed only when more then one filter provided!"
         ),
     ):
         sharepoint_list.check_filters(filters)
 
 
-def test_filters_invalid_conjuction(sharepoint_list):
+def test_filters_invalid_conjunction(sharepoint_list):
     filters = {
         "filter1": {
             "dtype": "int",
             "value1": 10,
             "operator1": ">",
-            "filters_conjuction": "!",
+            "filters_conjunction": "!",
         },
         "filter2": {
             "dtype": "int",
@@ -313,7 +313,7 @@ def test_filters_invalid_conjuction(sharepoint_list):
     with pytest.raises(
         ValueError,
         match=re.escape(
-            "Filter operator for conjuction not allowed! Expected: ['&', '|'] got ! ."
+            "Filter operator for conjunction not allowed! Expected: ['&', '|'] got ! ."
         ),
     ):
         sharepoint_list.check_filters(filters)
@@ -327,8 +327,8 @@ def test_valid_mapping(sharepoint_list):
             "value2": 20,
             "operator1": ">",
             "operator2": "<=",
-            "operators_conjuction": "&",
-            "filters_conjuction": "|",
+            "operators_conjunction": "&",
+            "filters_conjunction": "|",
         },
         "filter2": {
             "dtype": "int",
@@ -336,7 +336,7 @@ def test_valid_mapping(sharepoint_list):
             "value2": 0,
             "operator1": "==",
             "operator2": "!=",
-            "operators_conjuction": "|",
+            "operators_conjunction": "|",
         },
     }
     expected_result = {
@@ -346,8 +346,8 @@ def test_valid_mapping(sharepoint_list):
             "value2": 20,
             "operator1": "gt",
             "operator2": "le",
-            "operators_conjuction": "and",
-            "filters_conjuction": "or",
+            "operators_conjunction": "and",
+            "filters_conjunction": "or",
         },
         "filter2": {
             "dtype": "int",
@@ -355,7 +355,7 @@ def test_valid_mapping(sharepoint_list):
             "value2": 0,
             "operator1": "eq",
             "operator2": "ne",
-            "operators_conjuction": "or",
+            "operators_conjunction": "or",
         },
     }
     result = sharepoint_list.operators_mapping(filters)
@@ -367,8 +367,8 @@ def test_operators_mapping_invalid_comparison_operator(sharepoint_list):
         "filter1": {
             "operator1": "*",
             "operator2": "<=",
-            "operators_conjuction": "&",
-            "filters_conjuction": "|",
+            "operators_conjunction": "&",
+            "filters_conjunction": "|",
         },
     }
     error_message = "This comparison operator: * is not allowed. Please read the function documentation for details!"
@@ -381,11 +381,11 @@ def test_operators_mapping_invalid_logical_operator(sharepoint_list):
         "filter1": {
             "operator1": ">",
             "operator2": "<=",
-            "operators_conjuction": "!",
-            "filters_conjuction": "|",
+            "operators_conjunction": "!",
+            "filters_conjunction": "|",
         },
     }
-    error_message = "This conjuction (logical) operator: ! is not allowed. Please read the function documentation for details!"
+    error_message = "This conjunction (logical) operator: ! is not allowed. Please read the function documentation for details!"
     with pytest.raises(ValueError, match=re.escape(error_message)):
         sharepoint_list.operators_mapping(filters)
 
@@ -395,11 +395,11 @@ def test_operators_mapping_invalid_filters_logical_operator(sharepoint_list):
         "filter1": {
             "operator1": ">",
             "operator2": "<=",
-            "operators_conjuction": "&",
-            "filters_conjuction": "!",
+            "operators_conjunction": "&",
+            "filters_conjunction": "!",
         },
     }
-    error_message = "This filters conjuction (logical) operator: ! is not allowed. Please read the function documentation for details!"
+    error_message = "This filters conjunction (logical) operator: ! is not allowed. Please read the function documentation for details!"
     with pytest.raises(ValueError, match=re.escape(error_message)):
         sharepoint_list.operators_mapping(filters)
 
@@ -438,7 +438,7 @@ def test_single_df_filter(sharepoint_list):
 
 def test_multiple_df_filters(sharepoint_list):
     filters = {
-        "column1": {"operator1": ">", "value1": 10, "filters_conjuction": "&"},
+        "column1": {"operator1": ">", "value1": 10, "filters_conjunction": "&"},
         "column2": {"operator1": "<", "value1": 20},
     }
     result = sharepoint_list.make_filter_for_df(filters)

From 35b278311640e925523abe98b63f885737d83718 Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Wed, 15 Nov 2023 11:25:20 +0100
Subject: [PATCH 83/86] moved fun from task_utils to utils

---
 tests/integration/tasks/test_task_utils.py | 53 +---------------------
 tests/unit/test_utils.py                   | 49 ++++++++++++++++++++
 viadot/task_utils.py                       | 27 +----------
 viadot/tasks/genesys.py                    |  1 +
 viadot/utils.py                            | 27 ++++++++++-
 5 files changed, 79 insertions(+), 78 deletions(-)

diff --git a/tests/integration/tasks/test_task_utils.py b/tests/integration/tasks/test_task_utils.py
index 63887d40a..5d77ab7e9 100644
--- a/tests/integration/tasks/test_task_utils.py
+++ b/tests/integration/tasks/test_task_utils.py
@@ -3,7 +3,7 @@
 from prefect.engine.state import Failed, Success
 from prefect.tasks.secrets import PrefectSecret
 
-from viadot.task_utils import custom_mail_state_handler, set_new_kv, check_value
+from viadot.task_utils import custom_mail_state_handler, set_new_kv
 
 
 def test_custom_state_handler():
@@ -27,53 +27,4 @@ def test_set_new_kv():
     set_new_kv.run(kv_name="test_for_setting_kv", df=df, filter_column="col1")
     result = get_key_value("test_for_setting_kv")
     assert result == "72"
-    set_key_value(key="test_for_setting_kv", value=None)
-
-
-# Sample test checking the correctness of the function when the key is found
-def test_check_value_found():
-    json_data = {
-        "first_known_lvl": {
-            "second_known_lvl": {
-                "third_known_lvl": {
-                    "searched_phrase": "phrase"
-                }
-            }
-        }
-    }
-    result = check_value(json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"], ["searched_phrase"])
-    assert result == "phrase"
-
-# Sample test checking the correctness of the function when the key is not found
-def test_check_value_not_found():
-    json_data = {
-        "first_known_lvl": {
-            "second_known_lvl": {
-                "third_known_lvl": {
-                    "other_phrase": "This won't be found"
-                }
-            }
-        }
-    }
-    result = check_value(json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"], ["searched_phrase"])
-    assert result is None
-
-# Sample test checking the correctness of the function with an empty dictionary
-def test_check_value_empty_dict():
-    json_data = {}
-    result = check_value(json_data, ["searched_phrase"])
-    assert result is None
-
-# Sample test checking the correctness of the function with a nonexistent key
-def test_check_value_nonexistent_key():
-    json_data = {
-        "first_known_lvl": {
-            "second_known_lvl": {
-                "third_known_lvl": {
-                    "searched_phrase": "phrase"
-                }
-            }
-        }
-    }
-    result = check_value(json_data, ["nonexistent_key"])
-    assert result is None
\ No newline at end of file
+    set_key_value(key="test_for_setting_kv", value=None)
\ No newline at end of file
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 777617244..1ca967141 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -9,6 +9,7 @@
     add_viadot_metadata_columns,
     check_if_empty_file,
     gen_bulk_insert_query_from_df,
+    check_value,
 )
 
 EMPTY_CSV_PATH = "empty.csv"
@@ -153,3 +154,51 @@ def test_add_viadot_metadata_columns_with_parameter():
     assert df_base.columns.to_list() == ["a", "b"]
     assert df_decorated.columns.to_list() == ["a", "b", "_viadot_source"]
     assert df_decorated["_viadot_source"][0] == "Source_name"
+
+# Sample test checking the correctness of the function when the key is found
+def test_check_value_found():
+    json_data = {
+        "first_known_lvl": {
+            "second_known_lvl": {
+                "third_known_lvl": {
+                    "searched_phrase": "phrase"
+                }
+            }
+        }
+    }
+    result = check_value(json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"], ["searched_phrase"])
+    assert result == "phrase"
+
+# Sample test checking the correctness of the function when the key is not found
+def test_check_value_not_found():
+    json_data = {
+        "first_known_lvl": {
+            "second_known_lvl": {
+                "third_known_lvl": {
+                    "other_phrase": "This won't be found"
+                }
+            }
+        }
+    }
+    result = check_value(json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"], ["searched_phrase"])
+    assert result is None
+
+# Sample test checking the correctness of the function with an empty dictionary
+def test_check_value_empty_dict():
+    json_data = {}
+    result = check_value(json_data, ["searched_phrase"])
+    assert result is None
+
+# Sample test checking the correctness of the function with a nonexistent key
+def test_check_value_nonexistent_key():
+    json_data = {
+        "first_known_lvl": {
+            "second_known_lvl": {
+                "third_known_lvl": {
+                    "searched_phrase": "phrase"
+                }
+            }
+        }
+    }
+    result = check_value(json_data, ["nonexistent_key"])
+    assert result is None
diff --git a/viadot/task_utils.py b/viadot/task_utils.py
index 924b2e6a6..32be339af 100644
--- a/viadot/task_utils.py
+++ b/viadot/task_utils.py
@@ -791,29 +791,4 @@ def validate_df(df: pd.DataFrame, tests: dict = None) -> None:
         failed_tests_msg = ", ".join(failed_tests_list)
         raise ValidationError(
             f"Validation failed for {failed_tests} test/tests: {failed_tests_msg}"
-        )
-
-
-def check_value(base, lvls: List):
-    """
-            Task to extract data from nested json file if there is any under passed parameters.
-            Otherwise return None.
-
-    Args:
-        base: variable with base lvl of the json, fo example:
-                json_file["first_known_lvl"]["second_known_lvl"]["third_known_lvl"]
-        lvls (List): List of potential lower levels of nested json for data retrieval. For example:
-                ["first_lvl_below_base", "second_lvl_below_base", "searched_phrase"]
-
-    Return:
-        Searched value for the lowest level, in example data under "searched_phrase" key.
-    """
-
-    for lvl in lvls:
-        if isinstance(base, dict):
-            base = base.get(lvl)
-            if base is None:
-                return None
-        else:
-            return base
-    return base
\ No newline at end of file
+        )
\ No newline at end of file
diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index cbc18292e..628af1177 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -13,6 +13,7 @@
 
 from viadot.exceptions import APIError
 from viadot.sources import Genesys
+from viadot.utils import check_value
 from viadot.task_utils import *
 
 logger = logging.get_logger()
diff --git a/viadot/utils.py b/viadot/utils.py
index d05cfdd95..e77323aca 100644
--- a/viadot/utils.py
+++ b/viadot/utils.py
@@ -2,7 +2,7 @@
 import os
 import re
 from itertools import chain
-from typing import Any, Callable, Dict, List, Literal
+from typing import Union, Any, Callable, Dict, List, Literal
 
 import pandas as pd
 import prefect
@@ -460,3 +460,28 @@ def get_nested_dict(d):
                 return d
     else:
         return None
+    
+
+def check_value(base: Union[Dict, Any], levels: List) -> Union[None, Any]:
+    """        
+    Task to extract data from nested json file if there is any under passed parameters.
+    Otherwise return None.
+
+    Args:
+        base (Dict, Any): variable with base lvl of the json, for example:
+                          json_file["first_known_lvl"]["second_known_lvl"]["third_known_lvl"]
+        levels (List): List of potential lower levels of nested json for data retrieval. For example:
+                       ["first_lvl_below_base", "second_lvl_below_base", "searched_phrase"]
+
+    Returns:
+        Union[None, Any]: Searched value for the lowest level, in example data under "searched_phrase" key.
+    """
+
+    for lvl in levels:
+        if isinstance(base, dict):
+            base = base.get(lvl)
+            if base is None:
+                return None
+        else:
+            return base
+    return base
\ No newline at end of file

From ad02ad4ed2ea05d1c681b394650f2ca6df29c628 Mon Sep 17 00:00:00 2001
From: burzekj <jburzec@dyvenia.com>
Date: Wed, 15 Nov 2023 11:48:28 +0100
Subject: [PATCH 84/86] blackformatter changes

---
 tests/integration/tasks/test_task_utils.py |  2 +-
 tests/unit/test_utils.py                   | 30 +++++++++++-----------
 viadot/task_utils.py                       |  2 +-
 viadot/utils.py                            |  6 ++---
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/tests/integration/tasks/test_task_utils.py b/tests/integration/tasks/test_task_utils.py
index 5d77ab7e9..f22d55022 100644
--- a/tests/integration/tasks/test_task_utils.py
+++ b/tests/integration/tasks/test_task_utils.py
@@ -27,4 +27,4 @@ def test_set_new_kv():
     set_new_kv.run(kv_name="test_for_setting_kv", df=df, filter_column="col1")
     result = get_key_value("test_for_setting_kv")
     assert result == "72"
-    set_key_value(key="test_for_setting_kv", value=None)
\ No newline at end of file
+    set_key_value(key="test_for_setting_kv", value=None)
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 1ca967141..75ef30e97 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -155,49 +155,49 @@ def test_add_viadot_metadata_columns_with_parameter():
     assert df_decorated.columns.to_list() == ["a", "b", "_viadot_source"]
     assert df_decorated["_viadot_source"][0] == "Source_name"
 
+
 # Sample test checking the correctness of the function when the key is found
 def test_check_value_found():
     json_data = {
         "first_known_lvl": {
-            "second_known_lvl": {
-                "third_known_lvl": {
-                    "searched_phrase": "phrase"
-                }
-            }
+            "second_known_lvl": {"third_known_lvl": {"searched_phrase": "phrase"}}
         }
     }
-    result = check_value(json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"], ["searched_phrase"])
+    result = check_value(
+        json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"],
+        ["searched_phrase"],
+    )
     assert result == "phrase"
 
+
 # Sample test checking the correctness of the function when the key is not found
 def test_check_value_not_found():
     json_data = {
         "first_known_lvl": {
             "second_known_lvl": {
-                "third_known_lvl": {
-                    "other_phrase": "This won't be found"
-                }
+                "third_known_lvl": {"other_phrase": "This won't be found"}
             }
         }
     }
-    result = check_value(json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"], ["searched_phrase"])
+    result = check_value(
+        json_data["first_known_lvl"]["second_known_lvl"]["third_known_lvl"],
+        ["searched_phrase"],
+    )
     assert result is None
 
+
 # Sample test checking the correctness of the function with an empty dictionary
 def test_check_value_empty_dict():
     json_data = {}
     result = check_value(json_data, ["searched_phrase"])
     assert result is None
 
+
 # Sample test checking the correctness of the function with a nonexistent key
 def test_check_value_nonexistent_key():
     json_data = {
         "first_known_lvl": {
-            "second_known_lvl": {
-                "third_known_lvl": {
-                    "searched_phrase": "phrase"
-                }
-            }
+            "second_known_lvl": {"third_known_lvl": {"searched_phrase": "phrase"}}
         }
     }
     result = check_value(json_data, ["nonexistent_key"])
diff --git a/viadot/task_utils.py b/viadot/task_utils.py
index 32be339af..6a532f932 100644
--- a/viadot/task_utils.py
+++ b/viadot/task_utils.py
@@ -791,4 +791,4 @@ def validate_df(df: pd.DataFrame, tests: dict = None) -> None:
         failed_tests_msg = ", ".join(failed_tests_list)
         raise ValidationError(
             f"Validation failed for {failed_tests} test/tests: {failed_tests_msg}"
-        )
\ No newline at end of file
+        )
diff --git a/viadot/utils.py b/viadot/utils.py
index e77323aca..5e3de784c 100644
--- a/viadot/utils.py
+++ b/viadot/utils.py
@@ -460,10 +460,10 @@ def get_nested_dict(d):
                 return d
     else:
         return None
-    
+
 
 def check_value(base: Union[Dict, Any], levels: List) -> Union[None, Any]:
-    """        
+    """
     Task to extract data from nested json file if there is any under passed parameters.
     Otherwise return None.
 
@@ -484,4 +484,4 @@ def check_value(base: Union[Dict, Any], levels: List) -> Union[None, Any]:
                 return None
         else:
             return base
-    return base
\ No newline at end of file
+    return base

From e33cd9d13d78ca866646b7eba1c44f4150c71669 Mon Sep 17 00:00:00 2001
From: marcinpurtak <mpurtak@gmail.com>
Date: Wed, 15 Nov 2023 12:44:22 +0100
Subject: [PATCH 85/86] Revert of changes for desired columns

---
 viadot/tasks/genesys.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/viadot/tasks/genesys.py b/viadot/tasks/genesys.py
index 2655552bf..942249ac2 100644
--- a/viadot/tasks/genesys.py
+++ b/viadot/tasks/genesys.py
@@ -493,22 +493,8 @@ def run(
                 temp_dict["endTime"] = json_file.get("endTime")
                 data_list.append(temp_dict)
 
-            desired_order = [
-                "startTime",
-                "endTime",
-                "LOB",
-                "CustomerOutcomeResult",
-                "CustomerOutcomeTrack",
-                "LastUtterance",
-                "Final Sub Intent",
-                "SubIntent",
-                "Final Main Intent",
-                "conversationId",
-            ]
-
             df = pd.DataFrame(data_list)
-            df = df[desired_order]
-            df.rename(columns={"LastUtterance": "CustomerTextInput"}, inplace=True)
+            df = df[df.columns[-1:]].join(df[df.columns[:-1]])
 
             start = start_date.replace("-", "")
             end = end_date.replace("-", "")

From 9a8b3c936f7f2fb4e395620f38b6947929aaf864 Mon Sep 17 00:00:00 2001
From: Rafalz13 <rziemianek@dyvenia.com>
Date: Wed, 15 Nov 2023 12:53:54 +0100
Subject: [PATCH 86/86] =?UTF-8?q?=F0=9F=93=9D=20Updated=20Changelog=20befo?=
 =?UTF-8?q?re=20release?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ce6cef40..2ef880c75 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,37 +5,43 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Added
+
+### Fixed
+
+### Changed
+
 
+## [0.4.22] - 2023-11-15
 ### Added
-- Added new view type `agent_interaction_view_type` in `Genesys`source.
 - Added `TM1` source class.
 - Added `TM1ToDF` task class.
 - Added `set_prefect_kv` parameter to `BigQueryToADLS` with `False` as a default. If there is a need to create new pair in KV Store the parameter can be changed to `True`.
-- Added `_rename_duplicated_fields` method to `SharepointListToDF` task class for finding and rename duplicated columns
+- Added `_rename_duplicated_fields` method to `SharepointListToDF` task class for finding and rename duplicated columns.
 - Added new view type `agent_interaction_view_type` in `Genesys`source.
-- Added libraries `nltk` and `sklearn` to `requirements`.
 - Added new logic for endpoint `users` in `Genesys`task.
+- Added libraries `nltk` and `sklearn` to `requirements`.
 
 ### Fixed
 - Fixed bug for endpoint `conversations` in GET method in `Genesys` Task.
 
 ### Changed
-- Splitted test for Eurostat on source tests and task tests
+- Splitted test for `Eurostat` on source tests and task tests.
 - Modified `SharepointList` source class: 
-  -> docstrings update
+  -> docstrings update.
 - Modified `SharepointToADLS` flow class:
-  -> docstrings update
-  -> changed set_prefect_kv: bool = False to prevent forced KV store append
+  -> docstrings update.
+  -> changed set_prefect_kv: bool = False to prevent forced KV store append.
 - Modified `SharepointListToADLS` flow class:
-  -> changed set_prefect_kv: bool = False to prevent forced KV store append
+  -> changed set_prefect_kv: bool = False to prevent forced KV store append.
 - Modified `SharepointList` source class:
-  -> docstrings update
-  -> Changed `_unpack_fields` method to handle Sharepoint MultiChoiceField type + small improvements
-  -> Changed `get_fields` method to handle special characters - different approach to call get() and execute_query()
-  -> Renamed method from `select_expandable_user_fields` to `select_fields` + update for MultiChoiceField type
-  -> Changed `check_filters` method errors messages and more checks added
-  -> Changed `operators_mapping` method errors messages
-  -> Changed `make_filter_for_df` method errors messages
+  -> docstrings update.
+  -> Changed `_unpack_fields` method to handle Sharepoint MultiChoiceField type + small improvements.
+  -> Changed `get_fields` method to handle special characters - different approach to call get() and execute_query().
+  -> Renamed method from `select_expandable_user_fields` to `select_fields` + update for MultiChoiceField type.
+  -> Changed `check_filters` method errors messages and more checks added.
+  -> Changed `operators_mapping` method errors messages.
+  -> Changed `make_filter_for_df` method errors messages.
 - Modified `SharepointListToDF` task class:
   -> docstrings update
 - Splitted test for Eurostat on source tests and task tests.
@@ -43,8 +49,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Expanded `CustomerGaugeToDF` task class with separate cleaning functions and handling nested json structure flattening with two new methods `_field_reference_unpacker` and `_nested_dict_transformer`.
 - Changed `CustomerGaugeToADLS` to containing new arguments.
 
-### Fixed
-
 
 ## [0.4.21] - 2023-10-26
 ### Added
@@ -60,6 +64,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Changed
 - Changed `GenesysToCSV` logic for end_point == "conversations". Added new fields to extraction.
 
+
 ## [0.4.20] - 2023-10-12
 ### Added
 - Added `Office365-REST-Python-Client` library to `requirements`.