Skip to content

Commit

Permalink
Undoing some changes now that testing is reliably passing
Browse files Browse the repository at this point in the history
  • Loading branch information
mdemoret-nv committed Jun 17, 2024
1 parent 7b92faf commit c6aeba3
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 67 deletions.
2 changes: 0 additions & 2 deletions morpheus/_lib/include/morpheus/messages/meta.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,6 @@ class MORPHEUS_EXPORT MessageMeta
*/
virtual std::optional<std::string> ensure_sliceable_index();

pybind11::object get_py_object() const;

/**
* @brief Creates a deep copy of DataFrame with the specified ranges.
*
Expand Down
5 changes: 0 additions & 5 deletions morpheus/_lib/src/messages/meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,6 @@ void MessageMeta::set_data(const std::vector<std::string>& column_names, const s
}
}

py::object MessageMeta::get_py_object() const
{
return this->m_data->get_py_object();
}

MutableTableInfo MessageMeta::get_mutable_info() const
{
return this->m_data->get_mutable_info();
Expand Down
41 changes: 29 additions & 12 deletions morpheus/_lib/src/messages/multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,33 +264,50 @@ std::vector<std::string> MultiMessageInterfaceProxy::get_meta_column_names(const

pybind11::object MultiMessageInterfaceProxy::get_meta(MultiMessage& self)
{
return MultiMessageInterfaceProxy::get_meta(self, std::vector<std::string>{});
// Need to release the GIL before calling `get_meta()`
pybind11::gil_scoped_release no_gil;

// Get the column and convert to cudf
auto info = self.get_meta();

// Convert to a python datatable. Automatically gets the GIL
return CudfHelper::table_from_table_info(info);
}

pybind11::object MultiMessageInterfaceProxy::get_meta(MultiMessage& self, std::string col_name)
{
return MultiMessageInterfaceProxy::get_meta(self)[col_name.c_str()];
TableInfo info;

{
// Need to release the GIL before calling `get_meta()`
pybind11::gil_scoped_release no_gil;

// Get the column and convert to cudf
info = self.get_meta();
}

auto py_table = CudfHelper::table_from_table_info(info);

// Now convert it to a series by selecting only the column
return py_table[col_name.c_str()];
}

pybind11::object MultiMessageInterfaceProxy::get_meta(MultiMessage& self, std::vector<std::string> columns)
{
pybind11::object df = self.meta->get_py_object();

auto row_indexer = pybind11::slice(
pybind11::int_(self.mess_offset), pybind11::int_(self.mess_offset + self.mess_count), pybind11::none());
// Need to release the GIL before calling `get_meta()`
pybind11::gil_scoped_release no_gil;

if (columns.empty())
{
return df.attr("iloc")[row_indexer];
}
// Get the column and convert to cudf
auto info = self.get_meta(columns);

return df.attr("iloc")[row_indexer][py::cast(columns)];
// Convert to a python datatable. Automatically gets the GIL
return CudfHelper::table_from_table_info(info);
}

pybind11::object MultiMessageInterfaceProxy::get_meta(MultiMessage& self, pybind11::none none_obj)
{
// Just offload to the overload without columns. This overload is needed to match the python interface
return MultiMessageInterfaceProxy::get_meta(self, std::vector<std::string>{});
return MultiMessageInterfaceProxy::get_meta(self);
}

pybind11::object MultiMessageInterfaceProxy::get_meta_list(MultiMessage& self, pybind11::object col_name)
Expand Down
71 changes: 23 additions & 48 deletions tests/messages/test_message_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ def test_using_ctx_outside_with_block(df: DataFrameType):
pytest.raises(AttributeError, operator.setitem, ctx, 'col', 5)


def test_update_dataframe(df: DataFrameType):
@pytest.mark.use_cudf
def test_update_dataframe_via_mutable_dataframe(df: DataFrameType):
"""
Change the DF in various ways pass to cpp, read back and check if
the updates present
Expand Down Expand Up @@ -174,6 +175,9 @@ def test_update_dataframe(df: DataFrameType):
assert col_new_name in cdf.columns
assert cdf[col_new_name].isin(col_new_struct).all()

# Ensure that the data matches the original data
DatasetManager.assert_df_equal(cdf[col_new_name], col_new_struct, assert_msg="Should be identical")

# new int column in range 1-row_count
col_new_int = list(range(1, row_count + 1))

Expand All @@ -189,66 +193,39 @@ def test_update_dataframe(df: DataFrameType):
cdf = meta.copy_dataframe()
assert cdf[col_new_name].isin(col_new_struct).all()

# delete the new column from the DF
with meta.mutable_dataframe() as df_:
df_.drop(col_new_name, axis=1, inplace=True)
cdf = meta.copy_dataframe()
assert col_new_name not in cdf.columns
# Update the struct column with a new value
new_struct = {"book": "The Great Gatsby", "year": 1925}

# add the new column back
with meta.mutable_dataframe() as df_:
df_.insert(0, col_new_name, col_new_struct)
cdf = meta.copy_dataframe()
assert col_new_name in cdf.columns
assert cdf[col_new_name].isin(col_new_struct).all()
# save the contents of the struct cell
old_struct = cdf[col_new_name].iloc[0]

# duplicate a row
first_row = df.iloc[0]
last_row = df.iloc[-1]
with meta.mutable_dataframe() as df_:
df_ = df_.append(first_row)
cdf = meta.copy_dataframe()
# (fixme) Michael: Why is the following assert failing? we cannot
# append rows to df_ but can append to cdf.
assert cdf.shape[0] == row_count + 1

# (fixme) remove the duplicated row if the previous step was successful

# change the contents of a cell
# (fixme): Michael: I am not able to change the contents of a struct cell.
# I am getting "ValueError: Unsupported dtype", expected behavior?
row_idx = 0
col_idx = 1
old_value = df.iloc[row_idx, col_idx]
question_of_life = 42
# change the contents of a struct cell
with meta.mutable_dataframe() as df_:
df_.iloc[row_idx, col_idx] = question_of_life
df_[col_new_name].iloc[0] = new_struct
cdf = meta.copy_dataframe()
assert cdf.iloc[row_idx, col_idx] == question_of_life
assert cdf[col_new_name].iloc[0] == new_struct

# restore the contents of the first cell
# Update just the year of the struct column
new_year = 2022
with meta.mutable_dataframe() as df_:
df_.iloc[row_idx, col_idx] = old_value
df_[col_new_name].struct.field("year").iloc[0] = new_year
cdf = meta.copy_dataframe()
assert cdf.iloc[row_idx, col_idx] == old_value
assert cdf[col_new_name].iloc[0] == {**new_struct, **{"year": new_year}}

# (fixme): this entire block doesn't work. Michael, expected behavior?

# replace the contents of the first row with the last row
# restore the contents of the struct cell
with meta.mutable_dataframe() as df_:
df_.iloc[0] = last_row
df_[col_new_name].iloc[0] = old_struct
cdf = meta.copy_dataframe()
DatasetManager.assert_df_equal(cdf.iloc[0], last_row, assert_msg="Should be identical")
assert cdf[col_new_name].iloc[0] == old_struct

# restore the contents of the first row
# delete the new column from the DF
with meta.mutable_dataframe() as df_:
df_.iloc[0] = first_row
df_.drop(col_new_name, axis=1, inplace=True)
cdf = meta.copy_dataframe()
DatasetManager.assert_df_equal(cdf.iloc[0], first_row, assert_msg="Should be identical")
assert col_new_name not in cdf.columns


@pytest.mark.use_cpp
def test_update_dataframe_cpp(df: DataFrameType):
def test_update_dataframe(df: DataFrameType):
"""
Change the DF in various ways via cpp, read back and check if
the updates present
Expand Down Expand Up @@ -318,8 +295,6 @@ def test_update_dataframe_cpp(df: DataFrameType):
meta.set_data(col_new_int_name, col_new_int)
assert meta.get_data()[col_new_int_name].isin(col_new_int).all() # pylint: disable=unsubscriptable-object

# (fixme) how do you remove columns and update individual cells?


@pytest.mark.use_cpp
def test_pandas_df_cpp(dataset_pandas: DatasetManager):
Expand Down

0 comments on commit c6aeba3

Please sign in to comment.