Skip to content

Commit

Permalink
Merge pull request #687 from dyvenia/dev
Browse files Browse the repository at this point in the history
Release 0.4.15 PR
  • Loading branch information
Rafalz13 authored May 11, 2023
2 parents ce91048 + 50964cb commit 204feb1
Show file tree
Hide file tree
Showing 21 changed files with 1,941 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-2.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ jobs:
file: docker/Dockerfile
platforms: linux/amd64
push: true
tags: ghcr.io/${{ github.repository }}/viadot:2.0-latest
tags: ghcr.io/${{ github.repository }}/viadot:2.0-latest
22 changes: 21 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]


## [0.4.15] - 2023-05-11
### Added
- Added `BusinessCore` source class
- Added `BusinessCoreToParquet` task class
- Added `verify` parameter to `handle_api_response()`.
- Added `to_parquet()` in `base.py`
- Added new source class `SAPRFCV2` in `sap_rfc.py` with new approximation.
- Added new parameter `rfc_replacement` to `sap_rfc_to_adls.py` to replace
an extra separator character within a string column to avoid conflicts.
- Added `rfc_unique_id` in `SAPRFCV2` to merge chunks on this column.
- Added `close_connection()` to `SAPRFC` and `SAPRFCV2`

### Fixed
- Removed `try-except` sentence and added a new logic to remove extra separators in `sap_rfc.py`
source file, to vaoid a mismatch in columns lenght between iterative connections to SAP tables.
- When `SAP` tables are updated during `sap_rfc.py` scrip running, if there are chunks, the
columns in the next chunk are unrelated rows.
- Fixed `sap_rfc.py` source file to not breakdown by both,
and extra separator in a row and adding new rows in SAP table between iterations.


## [0.4.14] - 2023-04-13
### Added
- Added `anonymize_df` task function to `task_utils.py` to anonymize data in the dataframe in selected columns.
Expand Down Expand Up @@ -56,7 +77,6 @@ This parameter enables user to decide whether or not filter should be validated.
### Changed
- Changed data extraction logic for `Outlook` data.


## [0.4.10] - 2022-11-16
### Added
- Added `credentials_loader` function in utils
Expand Down
29 changes: 29 additions & 0 deletions tests/integration/flows/test_eurostat_to_adls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from unittest import mock
import pytest
import pandas as pd
import os

from viadot.flows import EurostatToADLS

DATA = {"geo": ["PL", "DE", "NL"], "indicator": [35, 55, 77]}
ADLS_FILE_NAME = "test_eurostat.parquet"
ADLS_DIR_PATH = "raw/tests/"


@mock.patch(
"viadot.tasks.EurostatToDF.run",
return_value=pd.DataFrame(data=DATA),
)
@pytest.mark.run
def test_eurostat_to_adls_run_flow(mocked_class):
flow = EurostatToADLS(
"test_eurostat_to_adls_flow_run",
dataset_code="ILC_DI04",
overwrite_adls=True,
adls_dir_path=ADLS_DIR_PATH,
adls_file_name=ADLS_FILE_NAME,
)
result = flow.run()
assert result.is_successful()
os.remove("test_eurostat_to_adls_flow_run.parquet")
os.remove("test_eurostat_to_adls_flow_run.json")
223 changes: 223 additions & 0 deletions tests/integration/tasks/test_eurostat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
import pytest
import pandas as pd
import logging

from viadot.tasks import eurostat


def test_and_validate_dataset_code_without_params(caplog):
"""This function is designed to test the accuracy of the data retrieval feature in a program.
Specifically, it tests to ensure that the program returns a non-empty DataFrame when a correct
dataset code is provided without any parameters. The function is intended to be used in software
development to verify that the program is correctly retrieving data from the appropriate dataset.
"""
task = eurostat.EurostatToDF(dataset_code="ILC_DI04").run()
assert isinstance(task, pd.DataFrame)
assert not task.empty
assert caplog.text == ""


def test_wrong_dataset_code_logger(caplog):
"""This function is designed to test the accuracy of the error logging feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log errors
when provided with only incorrect dataset code.
The function is intended to be used in software development to identify correct type errors
and messages in the program's handling of codes.
"""
task = eurostat.EurostatToDF(dataset_code="ILC_DI04E")

with pytest.raises(ValueError, match="DataFrame is empty!"):
with caplog.at_level(logging.ERROR):
task.run()
assert (
f"Failed to fetch data for ILC_DI04E, please check correctness of dataset code!"
in caplog.text
)


def test_wrong_parameters_codes_logger(caplog):
"""This function is designed to test the accuracy of the error logging feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log errors
when provided with a correct dataset_code and correct parameters are provided, but both parameters codes are incorrect.
The function is intended to be used in software development to identify correct type errors
and messages in the program's handling of codes.
"""
task = eurostat.EurostatToDF(
dataset_code="ILC_DI04",
params={"hhtyp": "total1", "indic_il": "non_existing_code"},
)

with pytest.raises(ValueError, match="DataFrame is empty!"):
with caplog.at_level(logging.ERROR):
task.run()
assert (
f"Parameters codes: 'total1 | non_existing_code' are not available. Please check your spelling!"
in caplog.text
)
assert (
f"You can find everything via link: https://ec.europa.eu/eurostat/databrowser/view/ILC_DI04/default/table?lang=en"
in caplog.text
)


def test_parameter_codes_as_list_logger(caplog):
"""This function is designed to test the accuracy of the error logging feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log errors
when provided with a correct dataset code, correct parameters, but incorrect parameters codes structure
(as a list with strings, instead of single string).
The function is intended to be used in software development to identify correct type errors
and messages in the program's handling of codes.
"""

task = eurostat.EurostatToDF(
dataset_code="ILC_DI04",
params={"hhtyp": ["totale", "nottotale"], "indic_il": "med_e"},
)
with pytest.raises(ValueError, match="Wrong structure of params!"):
with caplog.at_level(logging.ERROR):
task.run()
assert (
"You can provide only one code per one parameter as 'str' in params!\n"
in caplog.text
)
assert (
"CORRECT: params = {'unit': 'EUR'} | INCORRECT: params = {'unit': ['EUR', 'USD', 'PLN']}"
in caplog.text
)


def test_wrong_parameters(caplog):
"""This function is designed to test the accuracy of the error logging feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log errors
when provided with a correct dataset_code, but incorrect parameters keys.
The function is intended to be used in software development to identify correct type errors
and messages in the program's handling of codes.
"""

task = eurostat.EurostatToDF(
dataset_code="ILC_DI04", params={"hhhtyp": "total", "indic_ilx": "med_e"}
)
with pytest.raises(ValueError, match="DataFrame is empty!"):
with caplog.at_level(logging.ERROR):
task.run()
assert (
f"Parameters: 'hhhtyp | indic_ilx' are not in dataset. Please check your spelling!\n"
in caplog.text
)
assert (
f"Possible parameters: freq | hhtyp | indic_il | unit | geo | time"
in caplog.text
)


def test_params_as_list():
"""This function is designed to test the accuracy of the error logging feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log error
when provided with a correct dataset_code, but incorrect params structure (as list instead of dict).
The function is intended to be used in software development to identify correct type errors
and messages in the program's handling of codes.
"""
with pytest.raises(TypeError, match="Params should be a dictionary."):
eurostat.EurostatToDF(dataset_code="ILC_DI04", params=["total", "med_e"]).run()


def test_correct_params_and_dataset_code(caplog):
"""This function is designed to test the accuracy of the data retrieval feature in a program.
Specifically, it tests to ensure that the program returns a non-empty DataFrame when a correct
dataset code is provided with correct params. The function is intended to be used in software
development to verify that the program is correctly retrieving data from the appropriate dataset.
"""

task = eurostat.EurostatToDF(
dataset_code="ILC_DI04", params={"hhtyp": "total", "indic_il": "med_e"}
).run()

assert isinstance(task, pd.DataFrame)
assert not task.empty
assert caplog.text == ""


def task_correct_requested_columns(caplog):
"""This function is designed to test the accuracy of the data retrieval feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log error
when provided with a correct dataset_code, correct params and correct requested_columns.
The function is intended to be used in software development to verify that the program is correctly
retrieving data from the appropriate dataset.
"""
task = eurostat.EurostatToDF(
dataset_code="ILC_DI04",
params={"hhtyp": "total", "indic_il": "med_e"},
requested_columns=["updated", "geo", "indicator"],
)
task.run()

assert isinstance(task, pd.DataFrame)
assert not task.empty
assert caplog.text == ""
assert list(task.columns) == task.needed_columns


def test_wrong_needed_columns_names(caplog):
"""This function is designed to test the accuracy of the error logging feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log error
when provided with a correct dataset_code, correct parameters, but incorrect names of requested columns.
The function is intended to be used in software development to identify correct type errors
and messages in the program's handling of codes.
"""
task = eurostat.EurostatToDF(
dataset_code="ILC_DI04",
params={"hhtyp": "total", "indic_il": "med_e"},
requested_columns=["updated1", "geo1", "indicator1"],
)
with pytest.raises(ValueError, match="Provided columns are not available!"):
with caplog.at_level(logging.ERROR):
task.run()
assert (
f"Name of the columns: 'updated1 | geo1 | indicator1' are not in DataFrame. Please check spelling!\n"
in caplog.text
)
assert f"Available columns: geo | time | indicator | label | updated" in caplog.text


def test_wrong_params_and_wrong_requested_columns_names(caplog):
"""This function is designed to test the accuracy of the error logging feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log error
when provided with a correct dataset_code, incorrect parameters and incorrect names of requested columns.
Test should log errors only related with wrong params - we are trying to check if program will stop after
params validation. The function is intended to be used in software development to identify correct type errors
and messages in the program's handling of codes.
"""
task = eurostat.EurostatToDF(
dataset_code="ILC_DI04",
params={"hhhtyp": "total", "indic_ilx": "med_e"},
requested_columns=["updated1", "geo1", "indicator1"],
)
with pytest.raises(ValueError, match="DataFrame is empty!"):
with caplog.at_level(logging.ERROR):
task.run()
assert (
f"Parameters: 'hhhtyp | indic_ilx' are not in dataset. Please check your spelling!\n"
in caplog.text
)
assert (
f"Possible parameters: freq | hhtyp | indic_il | unit | geo | time"
in caplog.text
)


def test_requested_columns_not_in_list():
"""This function is designed to test the accuracy of the error logging feature in a program.
Specifically, it tests to ensure that the program is able to correctly identify and log error
when provided with a correct dataset_code, correct params but incorrect requested_columns structure
(as single string instead of list with strings).
The function is intended to be used in software development to identify correct type errors
and messages in the program's handling of codes.
"""
with pytest.raises(
TypeError, match="Requested columns should be provided as list of strings."
):
eurostat.EurostatToDF(
dataset_code="ILC_DI04",
params={"hhtyp": "total", "indic_il": "med_e"},
requested_columns="updated",
).run()
49 changes: 49 additions & 0 deletions tests/integration/test_business_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import pytest
from unittest.mock import patch, Mock
import pandas as pd
from viadot.sources import BusinessCore


@pytest.fixture(scope="module")
def business_core():
return BusinessCore(
url="https://api.businesscore.ae/api/GetCustomerData",
filters_dict={
"BucketCount": 10,
"BucketNo": 1,
"FromDate": None,
"ToDate": None,
},
credentials={"username": "test", "password": "test123"},
)


@patch("viadot.sources.business_core.handle_api_response")
def test_generate_token(mock_api_response, business_core):
mock_api_response.return_value = Mock(text='{"access_token": "12345"}')
token = business_core.generate_token()
assert token == "12345"


def test_clean_filters_dict(business_core):
filters = business_core.clean_filters_dict()
assert filters == {
"BucketCount": 10,
"BucketNo": 1,
"FromDate": "&",
"ToDate": "&",
}


def test_to_df(business_core):
with patch.object(
business_core,
"get_data",
return_value={"MasterDataList": [{"id": 1, "name": "John Doe"}]},
):
df = business_core.to_df()
assert isinstance(df, pd.DataFrame)
assert len(df.columns) == 2
assert len(df) == 1
assert df["id"].tolist() == [1]
assert df["name"].tolist() == ["John Doe"]
Loading

0 comments on commit 204feb1

Please sign in to comment.