38 value error in case of lists in features dict (#39)

* Fix converting features from dict to df * Modify tests * Remove content_type and add warning about api versions Co-authored-by: Agata Rubacka <[email protected]>
COMPREDICT-GmbH · Jan 19, 2023 · 175f93d · 175f93d
1 parent 78300cc
commit 175f93d
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 25 deletions.
diff --git a/README.md b/README.md
@@ -40,6 +40,8 @@ Configuration
 
 AI Core requires from the user, to authenticate with token, generated with user's AI CORE username and password.
 
+**WARNING**: Bear in mind, that this type of authentication is working only for v2 of AI Core API.
+
 **There are two ways in which user can generate needed token:**
 
 1. **Generate token directly with utility function** (this approach requires user to pass url to AICore as well):
@@ -195,7 +197,7 @@ The `run` function has the following signature:
 
 ~~~python
 Task|Result = algorithm.run(data, parameters=parameters, evaluate=True, encrypt=False, callback_url=None, 
-                            callback_param=None, file_content_type=None, monitor=True)
+                            callback_param=None, monitor=True)
 ~~~
 
 - `features`: data to be processed by the algorithm, it can be:

diff --git a/compredict/client.py b/compredict/client.py
@@ -217,12 +217,12 @@ def __process_data(self, data, type_of_data, compression=None):
         In case of data provided as path to file, make sure that file is of correct type.
 
         In case of parameters provided as dict: create json file from dict.
-        In case of features provided as list or dict: create DataFrame from dict or list,
-        and then write DatFrame into parquet file.
+        In case of features provided as dict: create DataFrame from dict and then write
+        DataFrame into parquet file.
         In case of features provided as DataFrame: write DataFrame into parquet file.
 
         :param data: The data to be sent for computation and prediction.
-        :type data: dict | list| str | pandas
+        :type data: dict | str | pandas
         :param type_of_data: Data can be of type: 'features' or of type: 'parameters'.
         Features will be always converted into parquet file, whereas parameters into json file.
         :return: opened file, bool indicating if file should be removed afterwards.
@@ -233,13 +233,14 @@ def __process_data(self, data, type_of_data, compression=None):
             return open(data, "rb+"), False
 
         file = NamedTemporaryFile('wb+', delete=False)
+        if type_of_data == "parameters":
+            self.__write_json_file(file, data, compression=compression)
+
         if isinstance(data, dict):
             if type_of_data == 'parameters':
                 self.__write_json_file(file, data, compression=compression)
             else:
-                data = DataFrame(data, index=[0])
-        elif isinstance(data, list):
-            data = DataFrame(data)
+                data = DataFrame(data)
 
         if type_of_data == 'features':
             data.to_parquet(file.name, compression=compression)
@@ -277,7 +278,7 @@ def __remove_file(file, is_to_remove):
 
     def run_algorithm(self,
                       algorithm_id: str,
-                      features: Union[str, DataFrame, dict, List[dict]],
+                      features: Union[str, DataFrame, dict],
                       version: Optional[str] = None,
                       evaluate: bool = True,
                       callback_url: Optional[Union[str, List[str]]] = None,
@@ -289,8 +290,8 @@ def run_algorithm(self,
         Run the given algorithm id with the passed data. The user have the ability to toggle encryption and evaluation.
 
         :param algorithm_id: String identifier of the algorithm
-        :param features: Features can be specified as path to features .parquet file, dictionary,
-        list of dictionaries or pandas.Dataframe.
+        :param features: Features can be specified as path to features .parquet file, dictionary
+        or pandas.Dataframe.
         :param version: Choose the version of the algorithm you would like to call. Defaults to latest version.
         :param evaluate: Boolean to whether evaluate the results of predictions or not.
         :param callback_param: The callback additional parameter to be sent with results.
@@ -338,7 +339,7 @@ def run_algorithm(self,
 
     def train_algorithm(self,
                         algorithm_id: str,
-                        features: Union[str, DataFrame, dict, List[dict]],
+                        features: Union[str, DataFrame, dict],
                         version: Optional[str] = None,
                         export_new_version: Optional[bool] = None,
                         parameters: Optional[Union[str, dict]] = None,
@@ -348,7 +349,8 @@ def train_algorithm(self,
         Train fit algorithm with the passed data.
 
         :param algorithm_id: String identifier of the algorithm.
-        :param features: JSON format of the data given with the correct keys as specified in the algorithm's template.
+        :param features: Features can be specified as path to features .parquet file, dictionary
+        or pandas.Dataframe.
         :param version: Choose the version of the algorithm you would like to call. Default is latest version.
         :param export_new_version: The trained model will be exported to a new version if True.
                Otherwise, the requested version will be updated. If None, then the model’s default behavior

diff --git a/tests/test_client.py b/tests/test_client.py
@@ -46,7 +46,7 @@ def test_last_error(response_400, mocker, connection):
 
 def test_run_algorithm(api_client, mocker, response_200):
     algorithm_id = "id"
-    data = {"data": "some_data"}
+    data = {"data": [1, 2, 3], "test": [3, 4, 5]}
     callback_url = ["1callback", "2callback"]
     callback_param = [{1: "first"}, {2: "second"}]
 
@@ -97,7 +97,7 @@ def test_run_algorithm_with_value_error(api_client, features_path):
 
 def test_run_algorithm_with_type_error(mocker, api_client):
     algorithm_id = "id"
-    data = {"data": "some_data"}
+    data = {"data": [1, 2, 3], "test": [3, 4, 5]}
     callback_url = ["1callback", "2callback", "3callback"]
     callback_param = [{1: "first"}, {2: "second"}]
     mocker.patch('builtins.dict', side_effect=AttributeError)
@@ -110,7 +110,7 @@ def test_run_algorithm_with_type_error(mocker, api_client):
 def test_run_algorithm_with_client_error(mocker, api_client, response_400):
     api_client.connection.fail_on_error = True
     algorithm_id = "algorithm-slug"
-    data = {"data": "some_data"}
+    data = {"data": [1, 2, 3], "test": [3, 4, 5]}
     mocker.patch('requests.post', return_value=response_400)
 
     with pytest.raises(ClientError):
@@ -119,7 +119,7 @@ def test_run_algorithm_with_client_error(mocker, api_client, response_400):
 
 def test_run_algorithm_with_server_error(mocker, api_client, response_500):
     algorithm_id = "id"
-    data = {"data": "some_data"}
+    data = {"data": [1, 2, 3], "test": [3, 4, 5]}
     mocker.patch('requests.post', return_value=response_500)
     mocker.patch('compredict.connection.Connection.handle_response', side_effect=ServerError)
 
@@ -187,11 +187,9 @@ def test_raise_errors_if_file_type_incorrect_with_value_error(file_path, file_ty
 @pytest.mark.parametrize(
     'data, type_of_data, file, to_delete',
     [
-        ({"test": 2200}, 'parameters', BufferedRandom, True),
-        ({"features": "some_features", "features_2": "different_features"}, "features", BufferedRandom, True),
-        ([{"features": "some_features", "features_2": "different_features"},
-          {"features": "some_features", "features_2": "different_features"}], "features", BufferedRandom, True),
-        (DataFrame({"features": "some_features", "features_2": "different_features"}, index=[0]), "features",
+        ({"test": 2200, "another_test": [1, 4, 6]}, 'parameters', BufferedRandom, True),
+        ({"features": [1, 2, 4, 6, 8, 10], "features_2": [1, 5, 19, 34, 1, 4]}, "features", BufferedRandom, True),
+        (DataFrame({"features": [9, 0, 2, 5], "features_2": [0, 2, 3, 6]}), "features",
          BufferedRandom, True),
         (DataFrame([{"features": "some_features", "features_2": "different_features"},
                     {"features": "some_features", "features_2": "different_features"}]), "features", BufferedRandom,
@@ -218,6 +216,14 @@ def test_process_parameters_data_provided_as_path_to_file(api_client):
     assert not to_delete
 
 
+def test_process_features_with_value_error(api_client):
+    """Parquet file schema requires columns to be of the same length, the same
+    is, when dictionary is converted into pandas DataFrame."""
+    features = {"features": [1, 2, 4, 6, 0], "features_2": [1, 5, 19, 34, 1, 4]}
+    with pytest.raises(ValueError):
+        api_client._api__process_data(features, "features")
+
+
 def test_build_get_arguments(api_client):
     type = "input"
     version = "1.2.2"
@@ -318,7 +324,7 @@ def test_cancel_task(api_client, mocker, response_202_cancelled_task):
 
 def test_printing_error(mocker, api_client, response_500):
     algorithm_id = "id"
-    data = {"data": "some_data"}
+    data = data = {"data": [1, 2, 3], "test": [3, 4, 5]}
     mocker.patch('requests.post', return_value=response_500)
     mocker.patch('compredict.connection.Connection.handle_response',
                  side_effect=ServerError("This is error that is going to be printed"))
@@ -332,7 +338,7 @@ def test_printing_error(mocker, api_client, response_500):
 
 def test_train_algorithm(mocker, api_client, response_200_with_job_id):
     algorithm_id = "algorithm-slug"
-    data = {"data": "some_data"}
+    data = data = {"data": [1, 2, 3], "test": [3, 4, 5]}
     mocker.patch('requests.post', return_value=response_200_with_job_id)
     result_task = api_client.train_algorithm(algorithm_id, data)
     assert isinstance(result_task, Task)
@@ -342,7 +348,7 @@ def test_train_algorithm(mocker, api_client, response_200_with_job_id):
 def test_train_algorithm_with_client_error(mocker, api_client, response_400):
     api_client.connection.fail_on_error = True
     algorithm_id = "trainable-algorithm"
-    data = {"data": "some_data"}
+    data = {"data": [1, 2, 3], "test": [3, 4, 5]}
     mocker.patch('requests.post', return_value=response_400)
 
     with pytest.raises(ClientError):
@@ -351,7 +357,7 @@ def test_train_algorithm_with_client_error(mocker, api_client, response_400):
 
 def test_train_algorithm_with_server_error(mocker, api_client, response_500):
     algorithm_id = "trainable-algorithm"
-    data = {"data": "some_data"}
+    data = {"data": [1, 2, 3], "test": [3, 4, 5]}
     mocker.patch('requests.post', return_value=response_500)
     mocker.patch('compredict.connection.Connection.handle_response', side_effect=ServerError)