Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into david-conda-timeout-2073
Browse files Browse the repository at this point in the history
  • Loading branch information
dagardner-nv authored Dec 16, 2024
2 parents 90f0c45 + 5e1116d commit 1fa4887
Show file tree
Hide file tree
Showing 29 changed files with 141 additions and 63 deletions.
1 change: 1 addition & 0 deletions ci/conda/recipes/morpheus-libs/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ outputs:
- cudf {{ rapids_version }}
- cython 3.0.*
- glog >=0.7.1,<0.8
- indicators=2.3
- libcudf {{ rapids_version }}
- librdkafka >=1.9.2,<1.10.0a0
- mrc {{ minor_version }}
Expand Down
1 change: 1 addition & 0 deletions ci/conda/recipes/morpheus/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ outputs:
- cudf {{ rapids_version }}
- cython 3.0.*
- glog >=0.7.1,<0.8
- indicators=2.3
- libcudf {{ rapids_version }}
- librdkafka >=1.9.2,<1.10.0a0
- mrc {{ minor_version }}
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ dependencies:
- grpcio-status
- gtest=1.14
- gxx=12.1
- httpx>=0.23,<0.28
- huggingface_hub=0.20.2
- include-what-you-use=0.20
- indicators=2.3
Expand Down Expand Up @@ -136,6 +137,7 @@ dependencies:
- faiss-cpu
- google-search-results==2.4
- langchain-nvidia-ai-endpoints==0.0.11
- langchain-openai==0.1.3
- langchain==0.1.16
- milvus==2.3.5
- nemollm==0.3.5
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/examples_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies:
- feedparser=6.0
- grpcio
- grpcio-status
- httpx>=0.23,<0.28
- huggingface_hub=0.20.2
- jsonpatch>=1.33
- kfp
Expand Down Expand Up @@ -73,6 +74,7 @@ dependencies:
- faiss-cpu
- google-search-results==2.4
- langchain-nvidia-ai-endpoints==0.0.11
- langchain-openai==0.1.3
- langchain==0.1.16
- milvus==2.3.5
- nemollm==0.3.5
Expand Down
23 changes: 13 additions & 10 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,14 @@ dependencies:
common:
- output_types: [requirements]
packages:
- faiss-cpu
- google-search-results==2.4
- langchain==0.1.16
- langchain-nvidia-ai-endpoints==0.0.11
- &faiss-cpu faiss-cpu
- &google-search-results google-search-results==2.4
- &langchain langchain==0.1.16
- &langchain-nvidia-ai-endpoints langchain-nvidia-ai-endpoints==0.0.11
- &langchain-openai langchain-openai==0.1.3
- milvus==2.3.5 # update to match pymilvus when available
- pymilvus==2.3.6
- nemollm==0.3.5
- &nemollm nemollm==0.3.5

example-dfp-prod:
common:
Expand Down Expand Up @@ -487,6 +488,7 @@ dependencies:
- &transformers transformers=4.36.2 # newer versions are incompatible with our pinned version of huggingface_hub
- anyio>=3.7
- arxiv=1.4
- httpx>=0.23,<0.28 # work-around for https://github.com/openai/openai-python/issues/1915
- huggingface_hub=0.20.2 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762
- jsonpatch>=1.33
- newspaper3k=0.2
Expand All @@ -499,11 +501,12 @@ dependencies:
- requests-toolbelt=1.0 # Transitive dep needed by nemollm, specified here to ensure we get a compatible version
- pip
- pip:
- langchain==0.1.16
- langchain-nvidia-ai-endpoints==0.0.11
- faiss-cpu
- google-search-results==2.4
- nemollm==0.3.5
- *faiss-cpu
- *google-search-results
- *langchain
- *langchain-nvidia-ai-endpoints
- *langchain-openai
- *nemollm
- sentence-transformers==2.7 # using pip now instead of conda to avoid install of pytorch cpu

model-training-tuning:
Expand Down
5 changes: 5 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ COPY . ./

RUN --mount=type=cache,id=workspace_cache,target=/workspace/.cache,sharing=locked \
--mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
# Install git-lfs before running the build to avoid errors during conda build
/opt/conda/bin/mamba install -y -n base -c conda-forge "git-lfs" &&\
source activate base &&\
Expand Down Expand Up @@ -227,6 +228,7 @@ COPY ${MORPHEUS_ROOT_HOST}/conda/environments/dev_cuda-${CUDA_MAJOR_VER}${CUDA_M

# Update the morpheus environment
RUN --mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
# Temp add channel_alias to get around conda 404 errors
conda config --env --set channel_alias ${CONDA_CHANNEL_ALIAS} &&\
/opt/conda/bin/conda env update --solver=libmamba -n morpheus --file conda/environments/dev.yaml &&\
Expand Down Expand Up @@ -263,6 +265,7 @@ COPY . ./

RUN --mount=type=cache,id=workspace_cache,target=/workspace/.cache,sharing=locked \
--mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
# Install git-lfs before running the build to avoid errors during conda build
/opt/conda/bin/mamba install -y -n base -c conda-forge "git-lfs" &&\
source activate base &&\
Expand All @@ -285,6 +288,7 @@ COPY . ./
RUN --mount=type=cache,id=workspace_cache,target=/workspace/.cache,sharing=locked \
--mount=type=bind,from=conda_bld_morpheus,source=/opt/conda/conda-bld,target=/opt/conda/conda-bld \
--mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
source activate morpheus &&\
CONDA_ALWAYS_YES=true /opt/conda/bin/mamba install -n morpheus \
-c local \
Expand Down Expand Up @@ -314,6 +318,7 @@ COPY "${MORPHEUS_ROOT_HOST}/conda/environments/runtime_cuda-${CUDA_MAJOR_VER}${C
# Mount Morpheus conda package build in `conda_bld_morpheus`
RUN --mount=type=bind,from=conda_bld_morpheus,source=/opt/conda/conda-bld,target=/opt/conda/conda-bld \
--mount=type=cache,id=conda_pkgs,target=/opt/conda/pkgs,sharing=locked \
--mount=type=cache,id=pip_cache,target=/root/.cache/pip,sharing=locked \
# CVE-2018-20225 for the base pip, not the env one
# conda will ignore the request to remove pip
python -m pip uninstall -y pip && \
Expand Down
2 changes: 1 addition & 1 deletion examples/llm/agents/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from langchain.agents import initialize_agent
from langchain.agents import load_tools
from langchain.agents.agent import AgentExecutor
from langchain.llms.openai import OpenAI
from langchain_openai import OpenAI

from morpheus.config import Config
from morpheus.pipeline.linear_pipeline import LinearPipeline
Expand Down
24 changes: 12 additions & 12 deletions examples/log_parsing/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,16 @@ class TritonInferenceLogParsing(TritonInferenceWorker):
"""

def build_output_message(self, msg: ControlMessage) -> ControlMessage:
seq_ids = cp.zeros((msg.tensors().count, 3), dtype=cp.uint32)
seq_ids[:, 0] = cp.arange(0, msg.tensors().count, dtype=cp.uint32)
seq_ids = cp.zeros((msg.tensor_count(), 3), dtype=cp.uint32)
seq_ids[:, 0] = cp.arange(0, msg.tensor_count(), dtype=cp.uint32)
seq_ids[:, 2] = msg.tensors().get_tensor('seq_ids')[:, 2]

memory = TensorMemory(
count=msg.tensors().count,
count=msg.tensor_count(),
tensors={
'confidences': cp.zeros((msg.tensors().count, self._inputs[list(self._inputs.keys())[0]].shape[1])),
'labels': cp.zeros((msg.tensors().count, self._inputs[list(self._inputs.keys())[0]].shape[1])),
'input_ids': cp.zeros((msg.tensors().count, msg.tensors().get_tensor('input_ids').shape[1])),
'confidences': cp.zeros((msg.tensor_count(), self._inputs[list(self._inputs.keys())[0]].shape[1])),
'labels': cp.zeros((msg.tensor_count(), self._inputs[list(self._inputs.keys())[0]].shape[1])),
'input_ids': cp.zeros((msg.tensor_count(), msg.tensors().get_tensor('input_ids').shape[1])),
'seq_ids': seq_ids
})

Expand Down Expand Up @@ -154,19 +154,19 @@ def _convert_one_response(output: ControlMessage, inf: ControlMessage, res: Tens
seq_offset = seq_ids[0, 0].item()
seq_count = seq_ids[-1, 0].item() + 1 - seq_offset

input_ids[batch_offset:inf.tensors().count + batch_offset, :] = inf.tensors().get_tensor('input_ids')
out_seq_ids[batch_offset:inf.tensors().count + batch_offset, :] = seq_ids
input_ids[batch_offset:inf.tensor_count() + batch_offset, :] = inf.tensors().get_tensor('input_ids')
out_seq_ids[batch_offset:inf.tensor_count() + batch_offset, :] = seq_ids

resp_confidences = res.get_tensor('confidences')
resp_labels = res.get_tensor('labels')

# Two scenarios:
if (inf.payload().count == inf.tensors().count):
if (inf.payload().count == inf.tensor_count()):
assert seq_count == res.count
confidences[batch_offset:inf.tensors().count + batch_offset, :] = resp_confidences
labels[batch_offset:inf.tensors().count + batch_offset, :] = resp_labels
confidences[batch_offset:inf.tensor_count() + batch_offset, :] = resp_confidences
labels[batch_offset:inf.tensor_count() + batch_offset, :] = resp_labels
else:
assert inf.tensors().count == res.count
assert inf.tensor_count() == res.count

mess_ids = seq_ids[:, 0].get().tolist()

Expand Down
43 changes: 39 additions & 4 deletions python/morpheus/morpheus/_lib/cudf_helpers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import itertools

import cudf
from cudf.core.column import ColumnBase
from cudf.core.dtypes import StructDtype

from libcpp.string cimport string
Expand All @@ -26,8 +29,6 @@ from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport size_type

from cudf._lib.column cimport Column
from cudf._lib.utils cimport data_from_unique_ptr
from cudf._lib.utils cimport table_view_from_table

##### THE FOLLOWING CODE IS COPIED FROM CUDF AND SHOULD BE REMOVED WHEN UPDATING TO cudf>=24.12 #####
# see https://github.com/rapidsai/cudf/pull/17193 for details
Expand All @@ -39,6 +40,7 @@ cimport pylibcudf.libcudf.copying as cpp_copying
from pylibcudf.libcudf.column.column_view cimport column_view
from libcpp.memory cimport make_unique, unique_ptr
from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf cimport Table as plc_Table
from cudf._lib.scalar cimport DeviceScalar

# imports needed for from_column_view_with_fix
Expand Down Expand Up @@ -289,8 +291,35 @@ cdef public api:
index_names = schema_infos[0:index_col_count] if index_col_count > 0 else None
column_names = schema_infos[index_col_count:]

data, index = data_from_unique_ptr(move(table.tbl), column_names=column_names, index_names=index_names)
plc_table = plc_Table.from_libcudf(move(table.tbl))

if index_names is None:
index = None
data = {
col_name: ColumnBase.from_pylibcudf(col)
for col_name, col in zip(
column_names, plc_table.columns()
)
}
else:
result_columns = [
ColumnBase.from_pylibcudf(col)
for col in plc_table.columns()
]
index = cudf.Index._from_data(
dict(
zip(
index_names,
result_columns[: len(index_names)],
)
)
)
data = dict(
zip(
column_names,
result_columns[len(index_names) :],
)
)
df = cudf.DataFrame._from_data(data, index)

# Update the struct field names after the DataFrame is created
Expand Down Expand Up @@ -356,7 +385,13 @@ cdef public api:

cdef vector[string] temp_col_names = get_column_names(table, True)

cdef table_view input_table_view = table_view_from_table(table, ignore_index=False)
cdef plc_Table plc_table = plc_Table(
[
col.to_pylibcudf(mode="read")
for col in itertools.chain(table.index._columns, table._columns)
]
)
cdef table_view input_table_view = plc_table.view()
cdef vector[string] index_names
cdef vector[string] column_names

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "morpheus/export.h" // for MORPHEUS_EXPORT
#include "morpheus/messages/meta.hpp" // for MessageMeta
#include "morpheus/types.hpp"
#include "morpheus/utilities/json_types.hpp" // for json_t

#include <pybind11/pytypes.h> // for object, dict, list
Expand Down Expand Up @@ -197,6 +198,13 @@ class MORPHEUS_EXPORT ControlMessage
*/
void tensors(const std::shared_ptr<TensorMemory>& tensor_memory);

/**
* @brief Get the length of tensors in the tensor memory.
*
* @return The length of tensors in the tensor memory.
*/
TensorIndex tensor_count();

/**
* @brief Get the type of task associated with the control message.
* @return An enum value indicating the task type.
Expand Down Expand Up @@ -262,6 +270,7 @@ class MORPHEUS_EXPORT ControlMessage
ControlMessageType m_cm_type{ControlMessageType::NONE};
std::shared_ptr<MessageMeta> m_payload{nullptr};
std::shared_ptr<TensorMemory> m_tensors{nullptr};
TensorIndex m_tensor_count{0};

morpheus::utilities::json_t m_tasks{};
morpheus::utilities::json_t m_config{};
Expand Down
1 change: 1 addition & 0 deletions python/morpheus/morpheus/_lib/messages/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class ControlMessage():
def task_type(self) -> ControlMessageType: ...
@typing.overload
def task_type(self, task_type: ControlMessageType) -> None: ...
def tensor_count(self) -> int: ...
@typing.overload
def tensors(self) -> TensorMemory: ...
@typing.overload
Expand Down
1 change: 1 addition & 0 deletions python/morpheus/morpheus/_lib/messages/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ PYBIND11_MODULE(messages, _module)
py::arg("meta"))
.def("tensors", pybind11::overload_cast<>(&ControlMessage::tensors))
.def("tensors", pybind11::overload_cast<const std::shared_ptr<TensorMemory>&>(&ControlMessage::tensors))
.def("tensor_count", &ControlMessage::tensor_count)
.def("remove_task", &ControlMessage::remove_task, py::arg("task_type"))
.def("set_metadata", &ControlMessage::set_metadata, py::arg("key"), py::arg("value"))
.def("task_type", pybind11::overload_cast<>(&ControlMessage::task_type))
Expand Down
15 changes: 11 additions & 4 deletions python/morpheus/morpheus/_lib/src/messages/control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,10 @@ ControlMessage::ControlMessage(const morpheus::utilities::json_t& _config) :

ControlMessage::ControlMessage(const ControlMessage& other)
{
m_cm_type = other.m_cm_type;
m_payload = other.m_payload;
m_tensors = other.m_tensors;
m_cm_type = other.m_cm_type;
m_payload = other.m_payload;
m_tensors = other.m_tensors;
m_tensor_count = other.m_tensor_count;

m_config = other.m_config;
m_tasks = other.m_tasks;
Expand Down Expand Up @@ -256,7 +257,13 @@ std::shared_ptr<TensorMemory> ControlMessage::tensors()

void ControlMessage::tensors(const std::shared_ptr<TensorMemory>& tensors)
{
m_tensors = tensors;
m_tensors = tensors;
m_tensor_count = tensors ? tensors->count : 0;
}

TensorIndex ControlMessage::tensor_count()
{
return m_tensor_count;
}

ControlMessageType ControlMessage::task_type()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ static ShapeType get_seq_ids(const std::shared_ptr<ControlMessage>& message)
auto seq_ids = message->tensors()->get_tensor("seq_ids");
const auto item_size = seq_ids.dtype().item_size();

ShapeType host_seq_ids(message->tensors()->count);
ShapeType host_seq_ids(message->tensor_count());
MRC_CHECK_CUDA(cudaMemcpy2D(host_seq_ids.data(),
item_size,
seq_ids.data(),
Expand All @@ -82,7 +82,7 @@ static TensorObject get_tensor(std::shared_ptr<ControlMessage> message, std::str

static void reduce_outputs(std::shared_ptr<ControlMessage> const& message, TensorMap& output_tensors)
{
if (message->payload()->count() == message->tensors()->count)
if (message->payload()->count() == message->tensor_count())
{
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "morpheus/messages/control.hpp" // for ControlMessage
#include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory
#include "morpheus/messages/meta.hpp" // for MessageMeta
#include "morpheus/types.hpp"
#include "morpheus/utilities/json_types.hpp" // for PythonByteContainer

#include <gtest/gtest.h> // for Message, TestPartResult, AssertionResult, TestInfo
Expand Down Expand Up @@ -298,7 +299,8 @@ TEST_F(TestControlMessage, SetAndGetTensorMemory)
{
auto msg = ControlMessage();

auto tensorMemory = std::make_shared<TensorMemory>(0);
TensorIndex count = 5;
auto tensorMemory = std::make_shared<TensorMemory>(count);
// Optionally, modify tensorMemory here if it has any mutable state to test

// Set the tensor memory
Expand All @@ -309,6 +311,7 @@ TEST_F(TestControlMessage, SetAndGetTensorMemory)

// Verify that the retrieved tensor memory matches what was set
EXPECT_EQ(tensorMemory, retrievedTensorMemory);
EXPECT_EQ(count, msg.tensor_count());
}

// Test setting TensorMemory to nullptr
Expand Down
Loading

0 comments on commit 1fa4887

Please sign in to comment.