diff --git a/morpheus/_lib/include/morpheus/messages/meta.hpp b/morpheus/_lib/include/morpheus/messages/meta.hpp index 06ca3dd7c8..750236df4a 100644 --- a/morpheus/_lib/include/morpheus/messages/meta.hpp +++ b/morpheus/_lib/include/morpheus/messages/meta.hpp @@ -122,8 +122,6 @@ class MORPHEUS_EXPORT MessageMeta */ virtual std::optional ensure_sliceable_index(); - pybind11::object get_py_object() const; - /** * @brief Creates a deep copy of DataFrame with the specified ranges. * diff --git a/morpheus/_lib/src/messages/meta.cpp b/morpheus/_lib/src/messages/meta.cpp index 910784eae0..b141a0e6f0 100644 --- a/morpheus/_lib/src/messages/meta.cpp +++ b/morpheus/_lib/src/messages/meta.cpp @@ -140,11 +140,6 @@ void MessageMeta::set_data(const std::vector& column_names, const s } } -py::object MessageMeta::get_py_object() const -{ - return this->m_data->get_py_object(); -} - MutableTableInfo MessageMeta::get_mutable_info() const { return this->m_data->get_mutable_info(); diff --git a/morpheus/_lib/src/messages/multi.cpp b/morpheus/_lib/src/messages/multi.cpp index fc2fa11512..6e42e839d7 100644 --- a/morpheus/_lib/src/messages/multi.cpp +++ b/morpheus/_lib/src/messages/multi.cpp @@ -264,33 +264,50 @@ std::vector MultiMessageInterfaceProxy::get_meta_column_names(const pybind11::object MultiMessageInterfaceProxy::get_meta(MultiMessage& self) { - return MultiMessageInterfaceProxy::get_meta(self, std::vector{}); + // Need to release the GIL before calling `get_meta()` + pybind11::gil_scoped_release no_gil; + + // Get the column and convert to cudf + auto info = self.get_meta(); + + // Convert to a python datatable. Automatically gets the GIL + return CudfHelper::table_from_table_info(info); } pybind11::object MultiMessageInterfaceProxy::get_meta(MultiMessage& self, std::string col_name) { - return MultiMessageInterfaceProxy::get_meta(self)[col_name.c_str()]; + TableInfo info; + + { + // Need to release the GIL before calling `get_meta()` + pybind11::gil_scoped_release no_gil; + + // Get the column and convert to cudf + info = self.get_meta(); + } + + auto py_table = CudfHelper::table_from_table_info(info); + + // Now convert it to a series by selecting only the column + return py_table[col_name.c_str()]; } pybind11::object MultiMessageInterfaceProxy::get_meta(MultiMessage& self, std::vector columns) { - pybind11::object df = self.meta->get_py_object(); - - auto row_indexer = pybind11::slice( - pybind11::int_(self.mess_offset), pybind11::int_(self.mess_offset + self.mess_count), pybind11::none()); + // Need to release the GIL before calling `get_meta()` + pybind11::gil_scoped_release no_gil; - if (columns.empty()) - { - return df.attr("iloc")[row_indexer]; - } + // Get the column and convert to cudf + auto info = self.get_meta(columns); - return df.attr("iloc")[row_indexer][py::cast(columns)]; + // Convert to a python datatable. Automatically gets the GIL + return CudfHelper::table_from_table_info(info); } pybind11::object MultiMessageInterfaceProxy::get_meta(MultiMessage& self, pybind11::none none_obj) { // Just offload to the overload without columns. This overload is needed to match the python interface - return MultiMessageInterfaceProxy::get_meta(self, std::vector{}); + return MultiMessageInterfaceProxy::get_meta(self); } pybind11::object MultiMessageInterfaceProxy::get_meta_list(MultiMessage& self, pybind11::object col_name) diff --git a/tests/messages/test_message_meta.py b/tests/messages/test_message_meta.py index 1d98e7a60d..b5e2606976 100644 --- a/tests/messages/test_message_meta.py +++ b/tests/messages/test_message_meta.py @@ -124,7 +124,8 @@ def test_using_ctx_outside_with_block(df: DataFrameType): pytest.raises(AttributeError, operator.setitem, ctx, 'col', 5) -def test_update_dataframe(df: DataFrameType): +@pytest.mark.use_cudf +def test_update_dataframe_via_mutable_dataframe(df: DataFrameType): """ Change the DF in various ways pass to cpp, read back and check if the updates present @@ -174,6 +175,9 @@ def test_update_dataframe(df: DataFrameType): assert col_new_name in cdf.columns assert cdf[col_new_name].isin(col_new_struct).all() + # Ensure that the data matches the original data + DatasetManager.assert_df_equal(cdf[col_new_name], col_new_struct, assert_msg="Should be identical") + # new int column in range 1-row_count col_new_int = list(range(1, row_count + 1)) @@ -189,66 +193,39 @@ def test_update_dataframe(df: DataFrameType): cdf = meta.copy_dataframe() assert cdf[col_new_name].isin(col_new_struct).all() - # delete the new column from the DF - with meta.mutable_dataframe() as df_: - df_.drop(col_new_name, axis=1, inplace=True) - cdf = meta.copy_dataframe() - assert col_new_name not in cdf.columns + # Update the struct column with a new value + new_struct = {"book": "The Great Gatsby", "year": 1925} - # add the new column back - with meta.mutable_dataframe() as df_: - df_.insert(0, col_new_name, col_new_struct) - cdf = meta.copy_dataframe() - assert col_new_name in cdf.columns - assert cdf[col_new_name].isin(col_new_struct).all() + # save the contents of the struct cell + old_struct = cdf[col_new_name].iloc[0] - # duplicate a row - first_row = df.iloc[0] - last_row = df.iloc[-1] - with meta.mutable_dataframe() as df_: - df_ = df_.append(first_row) - cdf = meta.copy_dataframe() - # (fixme) Michael: Why is the following assert failing? we cannot - # append rows to df_ but can append to cdf. - assert cdf.shape[0] == row_count + 1 - - # (fixme) remove the duplicated row if the previous step was successful - - # change the contents of a cell - # (fixme): Michael: I am not able to change the contents of a struct cell. - # I am getting "ValueError: Unsupported dtype", expected behavior? - row_idx = 0 - col_idx = 1 - old_value = df.iloc[row_idx, col_idx] - question_of_life = 42 + # change the contents of a struct cell with meta.mutable_dataframe() as df_: - df_.iloc[row_idx, col_idx] = question_of_life + df_[col_new_name].iloc[0] = new_struct cdf = meta.copy_dataframe() - assert cdf.iloc[row_idx, col_idx] == question_of_life + assert cdf[col_new_name].iloc[0] == new_struct - # restore the contents of the first cell + # Update just the year of the struct column + new_year = 2022 with meta.mutable_dataframe() as df_: - df_.iloc[row_idx, col_idx] = old_value + df_[col_new_name].struct.field("year").iloc[0] = new_year cdf = meta.copy_dataframe() - assert cdf.iloc[row_idx, col_idx] == old_value + assert cdf[col_new_name].iloc[0] == {**new_struct, **{"year": new_year}} - # (fixme): this entire block doesn't work. Michael, expected behavior? - - # replace the contents of the first row with the last row + # restore the contents of the struct cell with meta.mutable_dataframe() as df_: - df_.iloc[0] = last_row + df_[col_new_name].iloc[0] = old_struct cdf = meta.copy_dataframe() - DatasetManager.assert_df_equal(cdf.iloc[0], last_row, assert_msg="Should be identical") + assert cdf[col_new_name].iloc[0] == old_struct - # restore the contents of the first row + # delete the new column from the DF with meta.mutable_dataframe() as df_: - df_.iloc[0] = first_row + df_.drop(col_new_name, axis=1, inplace=True) cdf = meta.copy_dataframe() - DatasetManager.assert_df_equal(cdf.iloc[0], first_row, assert_msg="Should be identical") + assert col_new_name not in cdf.columns -@pytest.mark.use_cpp -def test_update_dataframe_cpp(df: DataFrameType): +def test_update_dataframe(df: DataFrameType): """ Change the DF in various ways via cpp, read back and check if the updates present @@ -318,8 +295,6 @@ def test_update_dataframe_cpp(df: DataFrameType): meta.set_data(col_new_int_name, col_new_int) assert meta.get_data()[col_new_int_name].isin(col_new_int).all() # pylint: disable=unsubscriptable-object - # (fixme) how do you remove columns and update individual cells? - @pytest.mark.use_cpp def test_pandas_df_cpp(dataset_pandas: DatasetManager):