From 3114285e0f197be2c9bee99b56bac7cf7c52582e Mon Sep 17 00:00:00 2001 From: Christopher Harris Date: Thu, 22 Aug 2024 15:56:15 +0000 Subject: [PATCH] add temporary cudf string copy_range workaround --- python/morpheus/morpheus/_lib/src/messages/multi.cpp | 11 ++++++++++- python/morpheus/morpheus/messages/multi_message.py | 11 ++++++++++- tests/conftest.py | 5 +++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/python/morpheus/morpheus/_lib/src/messages/multi.cpp b/python/morpheus/morpheus/_lib/src/messages/multi.cpp index 6e42e839d7..72fddfa8ec 100644 --- a/python/morpheus/morpheus/_lib/src/messages/multi.cpp +++ b/python/morpheus/morpheus/_lib/src/messages/multi.cpp @@ -397,7 +397,16 @@ void MultiMessageInterfaceProxy::set_meta(MultiMessage& self, pybind11::object c } // Perform the update via slices - df.attr("loc")[pybind11::make_tuple(df.attr("index")[row_indexer], columns)] = value; + auto is_string_dtype = pybind11::module_::import("cudf.api.types").attr("is_string_dtype"); + auto series = pybind11::module_::import("cudf").attr("Series"); + + if (is_string_dtype(series(value)).cast()) { + df[columns] = pybind11::str(); + df[columns].attr("iloc")[row_indexer] = value; + } else { + df.attr("loc")[pybind11::make_tuple(df.attr("index")[row_indexer], columns)] = value; + } + // Reset the index if we changed it if (!saved_index.is_none()) diff --git a/python/morpheus/morpheus/messages/multi_message.py b/python/morpheus/morpheus/messages/multi_message.py index 44e1bb6cba..eb8f1863bf 100644 --- a/python/morpheus/morpheus/messages/multi_message.py +++ b/python/morpheus/morpheus/messages/multi_message.py @@ -291,7 +291,16 @@ def set_meta(self, columns: typing.Union[None, str, typing.List[str]], value): saved_index = df.index df.reset_index(drop=True, inplace=True) - df.loc[df.index[row_indexer], columns] = value + + # TODO: when value is strings, make all values empty strings + from cudf.api.types import is_string_dtype + + if is_string_dtype(cudf.Series(value)): + df[columns] = "" + df[columns].iloc[row_indexer] = value + else: + df.loc[df.index[row_indexer], columns] = value + df.set_index(saved_index, inplace=True) else: # Need to determine the boolean mask to use indexes with df.loc diff --git a/tests/conftest.py b/tests/conftest.py index 2e4be942ef..8853907bf6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -25,9 +25,14 @@ import types import typing import warnings +import sys from pathlib import Path from unittest import mock +import sys +if "/home/coder" in sys.path: + sys.path.remove("/home/coder") + import pytest import requests