Merge pull request #828 from dyvenia/dev

Release 0.4.23 PR
dyvenia · Dec 7, 2023 · b4e3156 · b4e3156
2 parents ee8c34d + a104422
commit b4e3156
Show file tree

Hide file tree

Showing 25 changed files with 986 additions and 224 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+## [0.4.23] - 2023-12-07
+### Added
+- Added tests for new functionalities in SAPRFC and SAPRFCV2 regarding passing credentials.
+- Added new params for mapping and reordering DataFrame for `Genesys` task and flow.
+- Added `get_task_logs` task to search for logs in the flow
+- Added `get_flow_run_id` task to find flow ID.
+- Added `search_for_msg_in_logs` task used to control flows in multiflows by searching for a given log message from a given task.
+- Added closing session to `SAPBW`.
+- Added `CSV` as a new output extension to `SharepointListToADLS` flow.
+
+### Fixed
+- Fixed creation of URL in `VidClub` source class. When the `region=None` the region parameter will not be included in the URL.
+
+### Changed
+- `if_no_data_returned` added for sharepoint list flow which can fail, warn in case of no data returend or skip (continue) execution in the old way.
+- Changed `__init__` in `SAPRFC` and `SAPRFCV2` class in source in order to raise warning in prefect when credentials will be taken from DEV.
+
 
 ## [0.4.22] - 2023-11-15
 ### Added

diff --git a/tests/integration/flows/test_bigquery_to_adls.py b/tests/integration/flows/test_bigquery_to_adls.py
@@ -101,13 +101,9 @@ def test_bigquery_to_adls_validate_df_fail(mocked_data):
         adls_sp_credentials_secret=ADLS_CREDENTIAL_SECRET,
         validate_df_dict={"column_list_to_match": ["type", "country", "test"]},
     )
-    try:
-        result = flow_bigquery.run()
-    except ValidationError:
-        pass
 
-    os.remove("test_bigquery_to_adls_validate_df_fail.parquet")
-    os.remove("test_bigquery_to_adls_validate_df_fail.json")
+    result = flow_bigquery.run()
+    assert result.is_failed()
 
 
 @mock.patch(
@@ -138,7 +134,5 @@ def test_bigquery_to_adls_validate_df_success(mocked_data):
     os.remove("test_bigquery_to_adls_validate_df_success.parquet")
     os.remove("test_bigquery_to_adls_validate_df_success.json")
 
-    rm = AzureDataLakeRemove(
-        path=ADLS_DIR_PATH + ADLS_FILE_NAME, vault_name="azuwevelcrkeyv001s"
-    )
+    rm = AzureDataLakeRemove(path=ADLS_DIR_PATH + ADLS_FILE_NAME)
     rm.run(sp_credentials_secret=ADLS_CREDENTIAL_SECRET)
diff --git a/tests/integration/flows/test_mysql_to_adls.py b/tests/integration/flows/test_mysql_to_adls.py
@@ -18,7 +18,6 @@ def test_adls_gen1_to_azure_sql_new_mock(TEST_PARQUET_FILE_PATH):
             query=query,
             file_path=TEST_PARQUET_FILE_PATH,
             to_path=f"raw/examples/{TEST_PARQUET_FILE_PATH}",
-            sp_credentials_secret="App-Azure-CR-DatalakeGen2-AIA-DEV",
             overwrite_adls=True,
         )
         flow.run()
@@ -32,7 +31,6 @@ def test_validate_df(TEST_PARQUET_FILE_PATH):
             country_short="DE",
             query=query,
             file_path=TEST_PARQUET_FILE_PATH,
-            sp_credentials_secret="App-Azure-CR-DatalakeGen2-AIA",
             to_path=f"raw/examples/{TEST_PARQUET_FILE_PATH}",
             validate_df_dict={"column_size": {"sales_org": 3}},
         )

diff --git a/tests/integration/flows/test_salesforce_to_adls.py b/tests/integration/flows/test_salesforce_to_adls.py
@@ -30,7 +30,6 @@ def test_salesforce_to_adls():
     os.remove("test_salesforce_to_adls_run_flow.json")
     rm = AzureDataLakeRemove(
         path=ADLS_DIR_PATH + ADLS_FILE_NAME,
-        vault_name="azuwevelcrkeyv001s",
     )
     rm.run(sp_credentials_secret=credentials_secret)
 
@@ -56,6 +55,5 @@ def test_salesforce_to_adls_validate_success():
     os.remove("test_salesforce_to_adls_run_flow.json")
     rm = AzureDataLakeRemove(
         path=ADLS_DIR_PATH + ADLS_FILE_NAME,
-        vault_name="azuwevelcrkeyv001s",
     )
     rm.run(sp_credentials_secret=credentials_secret)
diff --git a/tests/integration/flows/test_sharepoint_to_adls.py b/tests/integration/flows/test_sharepoint_to_adls.py
@@ -5,14 +5,19 @@
 import pendulum
 import pytest
 from prefect.tasks.secrets import PrefectSecret
+from viadot.flows import SharepointToADLS, SharepointListToADLS
 
-from viadot.flows import SharepointToADLS
 from viadot.tasks import AzureDataLakeRemove
 
 ADLS_FILE_NAME = str(pendulum.now("utc")) + ".csv"
+ADLS_FILE_NAME_LIST = pendulum.now("utc").strftime("%Y-%m-%d_%H:%M:%S_%Z%z")
 ADLS_DIR_PATH = "raw/tests/"
 CREDENTIALS_SECRET = PrefectSecret("AZURE_DEFAULT_ADLS_SERVICE_PRINCIPAL_SECRET").run()
 DATA = {"country": [1, 2], "sales": [3, 4]}
+EMPTY_DATA = {}
+
+
+# SharepointToADLS
 
 
 @mock.patch(
@@ -69,7 +74,173 @@ def test_sharepoint_to_adls_run_flow_overwrite_false(mocked_class):
         overwrite_adls=False,
     )
     result = flow.run()
-
     assert result.is_failed()
     os.remove("test_sharepoint_to_adls_run_flow_overwrite_false.csv")
     os.remove("test_sharepoint_to_adls_run_flow_overwrite_false.json")
+
+
+# SharepointListToADLS
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_csv(mocked_class):
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow",
+        output_file_extension=".csv",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME_LIST,
+        list_title="",
+        site_url="",
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove(ADLS_FILE_NAME_LIST + ".csv")
+    os.remove("test_sharepoint_to_adls_run_flow.json")
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_parquet(mocked_class):
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow",
+        output_file_extension=".parquet",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME_LIST,
+        list_title="",
+        site_url="",
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove(ADLS_FILE_NAME_LIST + ".parquet")
+    os.remove("test_sharepoint_to_adls_run_flow.json")
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_wrong_extension(mocked_class):
+    with pytest.raises(ValueError) as exc:
+        flow = SharepointListToADLS(
+            "test_sharepoint_to_adls_run_flow",
+            output_file_extension=".s",
+            adls_sp_credentials_secret=CREDENTIALS_SECRET,
+            adls_dir_path=ADLS_DIR_PATH,
+            file_name=ADLS_FILE_NAME_LIST,
+            list_title="",
+            site_url="",
+        )
+        result = flow.run()
+    assert "Output file extension can only be '.csv' or '.parquet'" in str(exc.value)
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_overwrite_true(mocked_class):
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow_overwrite_true",
+        output_file_extension=".csv",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME_LIST,
+        overwrite_adls=True,
+        list_title="",
+        site_url="",
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove(ADLS_FILE_NAME_LIST + ".csv")
+    os.remove("test_sharepoint_to_adls_run_flow_overwrite_true.json")
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=EMPTY_DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_fail_on_no_data_returned(mocked_class):
+    """
+    Test will check if flow is failing when empty DF is passed
+    with the given parameter if_no_data_returned = "fail"
+    CSV file should not be generated!
+    """
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow",
+        output_file_extension=".csv",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME_LIST,
+        list_title="",
+        site_url="",
+        if_no_data_returned="fail",
+    )
+    result = flow.run()
+    assert result.is_failed()
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=EMPTY_DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_success_on_no_data_returned(mocked_class):
+    """
+    Test will check if flow will succeed when empty DF is passed
+    with the given parameter if_no_data_returned = "skip"
+    Empty csv should be generated!
+    """
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow",
+        output_file_extension=".csv",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME_LIST,
+        list_title="",
+        site_url="",
+        if_no_data_returned="skip",
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove(ADLS_FILE_NAME_LIST + ".csv")
+    os.remove("test_sharepoint_to_adls_run_flow.json")
+
+
+@mock.patch(
+    "viadot.tasks.SharepointListToDF.run",
+    return_value=pd.DataFrame(data=EMPTY_DATA),
+)
+@pytest.mark.run
+def test_sharepoint_list_to_adls_run_flow_success_warn_on_no_data_returned(
+    mocked_class,
+):
+    """
+    Test will check if flow is failing when empty DF is passed
+    with the given parameter if_no_data_returned = "warn"
+    CSV file should not be generated!
+    """
+    # Get prefect client instance
+    flow = SharepointListToADLS(
+        "test_sharepoint_to_adls_run_flow",
+        output_file_extension=".csv",
+        adls_sp_credentials_secret=CREDENTIALS_SECRET,
+        adls_dir_path=ADLS_DIR_PATH,
+        file_name=ADLS_FILE_NAME_LIST,
+        list_title="",
+        site_url="",
+        if_no_data_returned="warn",
+    )
+    result = flow.run()
+    assert result.is_successful()
diff --git a/tests/integration/flows/test_vidclub_to_adls.py b/tests/integration/flows/test_vidclub_to_adls.py
@@ -47,7 +47,6 @@ def test_vidclub_validate_df_task_success(caplog):
         to_date="2023-10-25",
         adls_dir_path="raw/tests",
         adls_file_name="test.parquet",
-        adls_sp_credentials_secret="App-Azure-CR-DatalakeGen2-AIA",
         overwrite_adls=True,
         validate_df_dict={
             "column_size": {"submissionID": 5},
@@ -78,15 +77,12 @@ def test_vidclub_validate_df_task_fail(caplog):
         to_date="2023-10-25",
         adls_dir_path="raw/tests",
         adls_file_name="test.parquet",
-        adls_sp_credentials_secret="App-Azure-CR-DatalakeGen2-AIA",
         overwrite_adls=True,
         validate_df_dict={
             "column_size": {"submissionID": 5},
-            "column_unique_values": ["regionID"],
+            "column_unique_values": ["id"],
         },
     )
 
-    try:
-        flow.run()
-    except ValidationError:
-        pass
+    result = flow.run()
+    assert result.is_failed()
diff --git a/tests/integration/tasks/test_genesys_task.py b/tests/integration/tasks/test_genesys_task.py
@@ -106,7 +106,10 @@ def genesys_api_connection(post_data_list, end_point, method="POST"):
                         "messages": [],
                     }
                 ],
+                "pageCount": 2,
+                "entities": [{"id": "xxx"}],
             }
+
         else:
             report = {
                 "conversations": [
@@ -307,7 +310,7 @@ def test_genesys_conversations(mock_genesys, var_dictionary):
 
 @mock.patch("viadot.tasks.genesys.Genesys", return_value=MockGenesysTask)
 @pytest.mark.conv
-def test_genesys_webmsg(mock_genesys, var_dictionary):
+def test_genesys_webmsg_conversations(mock_genesys, var_dictionary):
     to_csv = GenesysToCSV()
     file_name = to_csv.run(
         view_type=None,
@@ -324,3 +327,37 @@ def test_genesys_webmsg(mock_genesys, var_dictionary):
 
     mock_genesys.assert_called_once()
     assert file_name[0] == f"WEBMESSAGE_{start}-{end}.csv"
+
+
+@mock.patch("viadot.tasks.genesys.Genesys", return_value=MockGenesysTask)
+@pytest.mark.conv
+def test_genesys_users(mock_genesys, var_dictionary):
+    to_csv = GenesysToCSV()
+    file_name = to_csv.run(
+        view_type=None,
+        end_point="users",
+        conversationId_list=var_dictionary["v_list"],
+        post_data_list=[""],
+        key_list=var_dictionary["key_list"],
+        start_date=var_dictionary["start_date"],
+        end_date=var_dictionary["end_date"],
+    )
+
+    mock_genesys.assert_called_once()
+    assert file_name[0] == f"All_Genesys_Users.csv"
+
+
+@mock.patch("viadot.tasks.genesys.Genesys", return_value=MockGenesysTask)
+@pytest.mark.conv
+def test_genesys_queue_performance_detail_view(mock_genesys, var_dictionary):
+    genesys = GenesysToCSV()
+    output = genesys.run(
+        view_type="queue_performance_detail_view",
+        end_point=None,
+        conversationId_list=var_dictionary["v_list"],
+        post_data_list=[""],
+        key_list=var_dictionary["key_list"],
+        start_date=var_dictionary["start_date"],
+        end_date=var_dictionary["end_date"],
+    )
+    assert output is None