Skip to content

Commit

Permalink
38 value error in case of lists in features dict (#39)
Browse files Browse the repository at this point in the history
* Fix converting features from dict to df

* Modify tests

* Remove content_type and add warning about api versions

Co-authored-by: Agata Rubacka <[email protected]>
  • Loading branch information
czwartaoslpoj and Agata Rubacka authored Jan 19, 2023
1 parent 78300cc commit 175f93d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 25 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ Configuration

AI Core requires from the user, to authenticate with token, generated with user's AI CORE username and password.

**WARNING**: Bear in mind, that this type of authentication is working only for v2 of AI Core API.

**There are two ways in which user can generate needed token:**

1. **Generate token directly with utility function** (this approach requires user to pass url to AICore as well):
Expand Down Expand Up @@ -195,7 +197,7 @@ The `run` function has the following signature:

~~~python
Task|Result = algorithm.run(data, parameters=parameters, evaluate=True, encrypt=False, callback_url=None,
callback_param=None, file_content_type=None, monitor=True)
callback_param=None, monitor=True)
~~~

- `features`: data to be processed by the algorithm, it can be:
Expand Down
24 changes: 13 additions & 11 deletions compredict/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,12 @@ def __process_data(self, data, type_of_data, compression=None):
In case of data provided as path to file, make sure that file is of correct type.
In case of parameters provided as dict: create json file from dict.
In case of features provided as list or dict: create DataFrame from dict or list,
and then write DatFrame into parquet file.
In case of features provided as dict: create DataFrame from dict and then write
DataFrame into parquet file.
In case of features provided as DataFrame: write DataFrame into parquet file.
:param data: The data to be sent for computation and prediction.
:type data: dict | list| str | pandas
:type data: dict | str | pandas
:param type_of_data: Data can be of type: 'features' or of type: 'parameters'.
Features will be always converted into parquet file, whereas parameters into json file.
:return: opened file, bool indicating if file should be removed afterwards.
Expand All @@ -233,13 +233,14 @@ def __process_data(self, data, type_of_data, compression=None):
return open(data, "rb+"), False

file = NamedTemporaryFile('wb+', delete=False)
if type_of_data == "parameters":
self.__write_json_file(file, data, compression=compression)

if isinstance(data, dict):
if type_of_data == 'parameters':
self.__write_json_file(file, data, compression=compression)
else:
data = DataFrame(data, index=[0])
elif isinstance(data, list):
data = DataFrame(data)
data = DataFrame(data)

if type_of_data == 'features':
data.to_parquet(file.name, compression=compression)
Expand Down Expand Up @@ -277,7 +278,7 @@ def __remove_file(file, is_to_remove):

def run_algorithm(self,
algorithm_id: str,
features: Union[str, DataFrame, dict, List[dict]],
features: Union[str, DataFrame, dict],
version: Optional[str] = None,
evaluate: bool = True,
callback_url: Optional[Union[str, List[str]]] = None,
Expand All @@ -289,8 +290,8 @@ def run_algorithm(self,
Run the given algorithm id with the passed data. The user have the ability to toggle encryption and evaluation.
:param algorithm_id: String identifier of the algorithm
:param features: Features can be specified as path to features .parquet file, dictionary,
list of dictionaries or pandas.Dataframe.
:param features: Features can be specified as path to features .parquet file, dictionary
or pandas.Dataframe.
:param version: Choose the version of the algorithm you would like to call. Defaults to latest version.
:param evaluate: Boolean to whether evaluate the results of predictions or not.
:param callback_param: The callback additional parameter to be sent with results.
Expand Down Expand Up @@ -338,7 +339,7 @@ def run_algorithm(self,

def train_algorithm(self,
algorithm_id: str,
features: Union[str, DataFrame, dict, List[dict]],
features: Union[str, DataFrame, dict],
version: Optional[str] = None,
export_new_version: Optional[bool] = None,
parameters: Optional[Union[str, dict]] = None,
Expand All @@ -348,7 +349,8 @@ def train_algorithm(self,
Train fit algorithm with the passed data.
:param algorithm_id: String identifier of the algorithm.
:param features: JSON format of the data given with the correct keys as specified in the algorithm's template.
:param features: Features can be specified as path to features .parquet file, dictionary
or pandas.Dataframe.
:param version: Choose the version of the algorithm you would like to call. Default is latest version.
:param export_new_version: The trained model will be exported to a new version if True.
Otherwise, the requested version will be updated. If None, then the model’s default behavior
Expand Down
32 changes: 19 additions & 13 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_last_error(response_400, mocker, connection):

def test_run_algorithm(api_client, mocker, response_200):
algorithm_id = "id"
data = {"data": "some_data"}
data = {"data": [1, 2, 3], "test": [3, 4, 5]}
callback_url = ["1callback", "2callback"]
callback_param = [{1: "first"}, {2: "second"}]

Expand Down Expand Up @@ -97,7 +97,7 @@ def test_run_algorithm_with_value_error(api_client, features_path):

def test_run_algorithm_with_type_error(mocker, api_client):
algorithm_id = "id"
data = {"data": "some_data"}
data = {"data": [1, 2, 3], "test": [3, 4, 5]}
callback_url = ["1callback", "2callback", "3callback"]
callback_param = [{1: "first"}, {2: "second"}]
mocker.patch('builtins.dict', side_effect=AttributeError)
Expand All @@ -110,7 +110,7 @@ def test_run_algorithm_with_type_error(mocker, api_client):
def test_run_algorithm_with_client_error(mocker, api_client, response_400):
api_client.connection.fail_on_error = True
algorithm_id = "algorithm-slug"
data = {"data": "some_data"}
data = {"data": [1, 2, 3], "test": [3, 4, 5]}
mocker.patch('requests.post', return_value=response_400)

with pytest.raises(ClientError):
Expand All @@ -119,7 +119,7 @@ def test_run_algorithm_with_client_error(mocker, api_client, response_400):

def test_run_algorithm_with_server_error(mocker, api_client, response_500):
algorithm_id = "id"
data = {"data": "some_data"}
data = {"data": [1, 2, 3], "test": [3, 4, 5]}
mocker.patch('requests.post', return_value=response_500)
mocker.patch('compredict.connection.Connection.handle_response', side_effect=ServerError)

Expand Down Expand Up @@ -187,11 +187,9 @@ def test_raise_errors_if_file_type_incorrect_with_value_error(file_path, file_ty
@pytest.mark.parametrize(
'data, type_of_data, file, to_delete',
[
({"test": 2200}, 'parameters', BufferedRandom, True),
({"features": "some_features", "features_2": "different_features"}, "features", BufferedRandom, True),
([{"features": "some_features", "features_2": "different_features"},
{"features": "some_features", "features_2": "different_features"}], "features", BufferedRandom, True),
(DataFrame({"features": "some_features", "features_2": "different_features"}, index=[0]), "features",
({"test": 2200, "another_test": [1, 4, 6]}, 'parameters', BufferedRandom, True),
({"features": [1, 2, 4, 6, 8, 10], "features_2": [1, 5, 19, 34, 1, 4]}, "features", BufferedRandom, True),
(DataFrame({"features": [9, 0, 2, 5], "features_2": [0, 2, 3, 6]}), "features",
BufferedRandom, True),
(DataFrame([{"features": "some_features", "features_2": "different_features"},
{"features": "some_features", "features_2": "different_features"}]), "features", BufferedRandom,
Expand All @@ -218,6 +216,14 @@ def test_process_parameters_data_provided_as_path_to_file(api_client):
assert not to_delete


def test_process_features_with_value_error(api_client):
"""Parquet file schema requires columns to be of the same length, the same
is, when dictionary is converted into pandas DataFrame."""
features = {"features": [1, 2, 4, 6, 0], "features_2": [1, 5, 19, 34, 1, 4]}
with pytest.raises(ValueError):
api_client._api__process_data(features, "features")


def test_build_get_arguments(api_client):
type = "input"
version = "1.2.2"
Expand Down Expand Up @@ -318,7 +324,7 @@ def test_cancel_task(api_client, mocker, response_202_cancelled_task):

def test_printing_error(mocker, api_client, response_500):
algorithm_id = "id"
data = {"data": "some_data"}
data = data = {"data": [1, 2, 3], "test": [3, 4, 5]}
mocker.patch('requests.post', return_value=response_500)
mocker.patch('compredict.connection.Connection.handle_response',
side_effect=ServerError("This is error that is going to be printed"))
Expand All @@ -332,7 +338,7 @@ def test_printing_error(mocker, api_client, response_500):

def test_train_algorithm(mocker, api_client, response_200_with_job_id):
algorithm_id = "algorithm-slug"
data = {"data": "some_data"}
data = data = {"data": [1, 2, 3], "test": [3, 4, 5]}
mocker.patch('requests.post', return_value=response_200_with_job_id)
result_task = api_client.train_algorithm(algorithm_id, data)
assert isinstance(result_task, Task)
Expand All @@ -342,7 +348,7 @@ def test_train_algorithm(mocker, api_client, response_200_with_job_id):
def test_train_algorithm_with_client_error(mocker, api_client, response_400):
api_client.connection.fail_on_error = True
algorithm_id = "trainable-algorithm"
data = {"data": "some_data"}
data = {"data": [1, 2, 3], "test": [3, 4, 5]}
mocker.patch('requests.post', return_value=response_400)

with pytest.raises(ClientError):
Expand All @@ -351,7 +357,7 @@ def test_train_algorithm_with_client_error(mocker, api_client, response_400):

def test_train_algorithm_with_server_error(mocker, api_client, response_500):
algorithm_id = "trainable-algorithm"
data = {"data": "some_data"}
data = {"data": [1, 2, 3], "test": [3, 4, 5]}
mocker.patch('requests.post', return_value=response_500)
mocker.patch('compredict.connection.Connection.handle_response', side_effect=ServerError)

Expand Down

0 comments on commit 175f93d

Please sign in to comment.