From dde1d13e0bb13d0026c81531d797359d81f24b11 Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Wed, 17 Apr 2024 15:51:20 -0400
Subject: [PATCH 01/38] Use conda env create --yes instead of --force (#1636)

conda dropped support for the --force flag to conda env create. This changes that flag name to --yes.
See https://github.com/conda/conda/blob/main/CHANGELOG.md#2430-2024-03-12 and https://github.com/rapidsai/miniforge-cuda/pull/63 for more info.

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Eli Fajardo (https://github.com/efajardo-nv)

Approvers:
  - David Gardner (https://github.com/dagardner-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1636
---
 ci/check_style.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/check_style.sh b/ci/check_style.sh
index 9205625726..beb561bb4f 100755
--- a/ci/check_style.sh
+++ b/ci/check_style.sh
@@ -16,7 +16,7 @@ rapids-dependency-file-generator \
   --file_key checks \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
-rapids-mamba-retry env create --force -f env.yaml -n checks
+rapids-mamba-retry env create --yes -f env.yaml -n checks
 conda activate checks
 
 # Run pre-commit checks

From 6b9cb71b0fd2b9ffa47202302cd40b340bb366c4 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Thu, 18 Apr 2024 07:47:22 -0700
Subject: [PATCH 02/38] Misc CI improvements (#1618)

* Fetch git tags when performing documentation builds. Allowing for the version number to appear properly in the generated documentation. This should allow for us to publish the documentation build from CI when performing a release.
* Allow overriding the GIT_URL, useful when performing CI against a commit/branch/tag that exists in a remote other than origin
* Replace list of CUDA architectures with RAPIDS place-holder (we were building for 60 even though we no longer support it)
* Construct the `CMAKE_BUILD_ALL_FEATURES` var in a more readable way
* Allow overriding the build dir, useful for local builds using `USE_HOST_GIT=1` to avoid conflicting with a potentially existing build directory
* Move generated env.yaml to `$WORKSPACE_TMP`, prevents the file from being written to the root of the git repo.
* Rather than init submodules in each stage by hand, use the submodules flag in the checkout action


## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1618
---
 .github/workflows/ci_pipe.yml    |  7 +++++
 ci/scripts/bootstrap_local_ci.sh |  2 ++
 ci/scripts/common.sh             |  2 +-
 ci/scripts/github/build.sh       | 24 ++++++-----------
 ci/scripts/github/checks.sh      | 24 ++++++-----------
 ci/scripts/github/common.sh      | 35 ++++++++++++++++++++++---
 ci/scripts/github/conda.sh       |  2 --
 ci/scripts/github/docs.sh        | 16 +++++-------
 ci/scripts/github/test.sh        | 20 +++++---------
 ci/scripts/run_ci_local.sh       | 45 ++++++++++++++++++++------------
 10 files changed, 99 insertions(+), 78 deletions(-)

diff --git a/.github/workflows/ci_pipe.yml b/.github/workflows/ci_pipe.yml
index 0ffd718b3d..34dfbd38cd 100644
--- a/.github/workflows/ci_pipe.yml
+++ b/.github/workflows/ci_pipe.yml
@@ -85,6 +85,7 @@ jobs:
           lfs: false
           path: 'morpheus'
           fetch-depth: 0
+          submodules: 'recursive'
 
       - name: Get AWS credentials using OIDC
         uses: aws-actions/configure-aws-credentials@v1-node16
@@ -115,6 +116,7 @@ jobs:
         with:
           lfs: false
           path: 'morpheus'
+          submodules: 'recursive'
 
       - name: Get AWS credentials using OIDC
         uses: aws-actions/configure-aws-credentials@v1-node16
@@ -149,6 +151,7 @@ jobs:
         with:
           lfs: false
           path: 'morpheus'
+          submodules: 'recursive'
 
       - name: Get AWS credentials using OIDC
         uses: aws-actions/configure-aws-credentials@v1-node16
@@ -180,6 +183,9 @@ jobs:
         with:
           lfs: false
           path: 'morpheus'
+          # Fetch tags so that documentation builds for releases will report the version number correctly
+          fetch-tags: true
+          submodules: 'recursive'
 
       - name: Get AWS credentials using OIDC
         uses: aws-actions/configure-aws-credentials@v1-node16
@@ -213,6 +219,7 @@ jobs:
           lfs: false
           path: 'morpheus'
           fetch-depth: 0
+          submodules: 'recursive'
 
       - name: Get AWS credentials using OIDC
         uses: aws-actions/configure-aws-credentials@v1-node16
diff --git a/ci/scripts/bootstrap_local_ci.sh b/ci/scripts/bootstrap_local_ci.sh
index 3051b13af1..45c68b3ae4 100755
--- a/ci/scripts/bootstrap_local_ci.sh
+++ b/ci/scripts/bootstrap_local_ci.sh
@@ -25,6 +25,8 @@ else
     git checkout ${GIT_BRANCH}
     git pull
     git checkout ${GIT_COMMIT}
+    git fetch --tags
+    git submodule update --init --recursive
 fi
 
 export MORPHEUS_ROOT=$(pwd)
diff --git a/ci/scripts/common.sh b/ci/scripts/common.sh
index 3bfa4e0870..75e83a0c7c 100644
--- a/ci/scripts/common.sh
+++ b/ci/scripts/common.sh
@@ -73,7 +73,7 @@ function get_modified_files() {
    local GIT_DIFF_BASE=${GIT_DIFF_BASE:-$(get_merge_base)}
 
    # If invoked by a git-commit-hook, this will be populated
-   local result=( $(git diff ${GIT_DIFF_ARGS} $(get_merge_base) | grep -P ${1:-'.*'}) )
+   local result=( $(git diff ${GIT_DIFF_ARGS} ${GIT_DIFF_BASE} | grep -P ${1:-'.*'}) )
 
    local files=()
 
diff --git a/ci/scripts/github/build.sh b/ci/scripts/github/build.sh
index b75107f637..5941ca03bd 100755
--- a/ci/scripts/github/build.sh
+++ b/ci/scripts/github/build.sh
@@ -21,41 +21,33 @@ source ${WORKSPACE}/ci/scripts/github/common.sh
 rapids-dependency-file-generator \
   --output conda \
   --file_key build \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${WORKSPACE_TMP}/env.yaml"
 
-update_conda_env env.yaml
+update_conda_env "${WORKSPACE_TMP}/env.yaml"
 
 log_toolchain
 
-git submodule update --init --recursive
-
 CMAKE_FLAGS="${CMAKE_BUILD_ALL_FEATURES}"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_BUILD_WHEEL=ON"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_BUILD_STUBS=OFF"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DCMAKE_BUILD_RPATH_USE_ORIGIN=ON"
-if [[ "${LOCAL_CI}" == "" ]]; then
-    CMAKE_FLAGS="${CMAKE_FLAGS} -DCCACHE_PROGRAM_PATH=$(which sccache)"
-fi
 
 rapids-logger "Configuring cmake for Morpheus with ${CMAKE_FLAGS}"
-cmake -B build -G Ninja ${CMAKE_FLAGS} .
+cmake ${CMAKE_FLAGS} .
 
 rapids-logger "Building Morpheus"
-cmake --build build --parallel ${PARALLEL_LEVEL}
+cmake --build ${BUILD_DIR} --parallel ${PARALLEL_LEVEL}
 
-if [[ "${LOCAL_CI}" == "" ]]; then
-    rapids-logger "sccache usage for morpheus build:"
-    sccache --show-stats
-fi
+log_sccache_stats
 
 rapids-logger "Archiving results"
-tar cfj "${WORKSPACE_TMP}/wheel.tar.bz" build/dist
+tar cfj "${WORKSPACE_TMP}/wheel.tar.bz" ${BUILD_DIR}/dist
 
-MORPHEUS_LIBS=($(find ${MORPHEUS_ROOT}/build/morpheus/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;) \
+MORPHEUS_LIBS=($(find ${MORPHEUS_ROOT}/${BUILD_DIR}/morpheus/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;) \
                 $(find ${MORPHEUS_ROOT}/examples -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;))
 tar cfj "${WORKSPACE_TMP}/morhpeus_libs.tar.bz" "${MORPHEUS_LIBS[@]}"
 
-CPP_TESTS=($(find ${MORPHEUS_ROOT}/build/morpheus/_lib/tests -name "*.x" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;))
+CPP_TESTS=($(find ${MORPHEUS_ROOT}/${BUILD_DIR}/morpheus/_lib/tests -name "*.x" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;))
 tar cfj "${WORKSPACE_TMP}/cpp_tests.tar.bz" "${CPP_TESTS[@]}"
 
 rapids-logger "Pushing results to ${DISPLAY_ARTIFACT_URL}"
diff --git a/ci/scripts/github/checks.sh b/ci/scripts/github/checks.sh
index 487e053c7a..22f06f1557 100755
--- a/ci/scripts/github/checks.sh
+++ b/ci/scripts/github/checks.sh
@@ -21,42 +21,34 @@ source ${WORKSPACE}/ci/scripts/github/common.sh
 rapids-dependency-file-generator \
   --output conda \
   --file_key build \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${WORKSPACE_TMP}/env.yaml"
 
-update_conda_env env.yaml
+update_conda_env "${WORKSPACE_TMP}/env.yaml"
 
 log_toolchain
 
 cd ${MORPHEUS_ROOT}
 
+# Fetching the base branch will try methods that might fail, then fallback to one that does, set +e for this section
+set +e
 fetch_base_branch
-
-git submodule update --init --recursive
+set -e
 
 rapids-logger "Configuring cmake for Morpheus"
 CMAKE_FLAGS="${CMAKE_BUILD_ALL_FEATURES}"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_BUILD_STUBS=OFF"
 export CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_INPLACE_BUILD=ON"
-if [[ "${LOCAL_CI}" == "" ]]; then
-    CMAKE_FLAGS="${CMAKE_FLAGS} -DCCACHE_PROGRAM_PATH=$(which sccache)"
-fi
 
-cmake -B build -G Ninja ${CMAKE_FLAGS} .
+cmake ${CMAKE_FLAGS} .
 
 rapids-logger "Building Morpheus"
-cmake --build build --parallel ${PARALLEL_LEVEL}
+cmake --build ${BUILD_DIR} --parallel ${PARALLEL_LEVEL}
 
-if [[ "${LOCAL_CI}" == "" ]]; then
-    rapids-logger "sccache usage for source build:"
-    sccache --show-stats
-fi
+log_sccache_stats
 
 rapids-logger "Installing Morpheus"
 pip install ./
 
-# Setting this prevents loading of cudf since we don't have a GPU
-export MORPHEUS_IN_SPHINX_BUILD=1
-
 rapids-logger "Checking copyright headers"
 python ${MORPHEUS_ROOT}/ci/scripts/copyright.py --verify-apache-v2 --git-diff-commits ${CHANGE_TARGET} ${GIT_COMMIT}
 
diff --git a/ci/scripts/github/common.sh b/ci/scripts/github/common.sh
index 3aa6c4c69e..a4269828c2 100644
--- a/ci/scripts/github/common.sh
+++ b/ci/scripts/github/common.sh
@@ -61,7 +61,26 @@ export SCCACHE_REGION="us-east-2"
 export SCCACHE_IDLE_TIMEOUT=32768
 #export SCCACHE_LOG=debug
 
-export CMAKE_BUILD_ALL_FEATURES="-DCMAKE_MESSAGE_CONTEXT_SHOW=ON -DMORPHEUS_CUDA_ARCHITECTURES=60;70;75;80 -DMORPHEUS_BUILD_BENCHMARKS=ON -DMORPHEUS_BUILD_EXAMPLES=ON -DMORPHEUS_BUILD_TESTS=ON -DMORPHEUS_USE_CONDA=ON -DMORPHEUS_PYTHON_INPLACE_BUILD=OFF -DMORPHEUS_PYTHON_BUILD_STUBS=ON -DMORPHEUS_USE_CCACHE=ON"
+# Set the build flags
+export BUILD_DIR=${BUILD_DIR:-build}
+
+_FLAGS=()
+_FLAGS+=("-B" "${BUILD_DIR}")
+_FLAGS+=("-G" "Ninja")
+_FLAGS+=("-DCMAKE_MESSAGE_CONTEXT_SHOW=ON")
+_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=RAPIDS")
+_FLAGS+=("-DMORPHEUS_USE_CONDA=ON")
+_FLAGS+=("-DMORPHEUS_USE_CCACHE=ON")
+_FLAGS+=("-DMORPHEUS_PYTHON_INPLACE_BUILD=OFF")
+_FLAGS+=("-DMORPHEUS_PYTHON_BUILD_STUBS=ON")
+_FLAGS+=("-DMORPHEUS_BUILD_BENCHMARKS=ON")
+_FLAGS+=("-DMORPHEUS_BUILD_EXAMPLES=ON")
+_FLAGS+=("-DMORPHEUS_BUILD_TESTS=ON")
+if [[ "${LOCAL_CI}" == "" ]]; then
+    _FLAGS+=("-DCCACHE_PROGRAM_PATH=$(which sccache)")
+fi
+export CMAKE_BUILD_ALL_FEATURES="${_FLAGS[@]}"
+unset _FLAGS
 
 export FETCH_STATUS=0
 
@@ -112,8 +131,11 @@ function fetch_base_branch_gh_api() {
 
 function fetch_base_branch_local() {
     rapids-logger "Retrieving base branch from git"
-    git remote add upstream ${GIT_UPSTREAM_URL}
-    git fetch upstream --tags
+    if [[ "${USE_HOST_GIT}" == "0" ]]; then
+        git remote add upstream ${GIT_UPSTREAM_URL}
+        git fetch upstream --tags
+    fi
+
     source ${MORPHEUS_ROOT}/ci/scripts/common.sh
     export BASE_BRANCH=$(get_base_branch)
     export CHANGE_TARGET="upstream/${BASE_BRANCH}"
@@ -147,6 +169,13 @@ function log_toolchain() {
     sccache --version
 }
 
+function log_sccache_stats() {
+    if [[ "${LOCAL_CI}" == "" ]]; then
+        rapids-logger "sccache usage for morpheus build:"
+        sccache --show-stats
+    fi
+}
+
 function upload_artifact() {
     FILE_NAME=$1
     BASE_NAME=$(basename "${FILE_NAME}")
diff --git a/ci/scripts/github/conda.sh b/ci/scripts/github/conda.sh
index f92374f222..4114bd9ab9 100755
--- a/ci/scripts/github/conda.sh
+++ b/ci/scripts/github/conda.sh
@@ -23,8 +23,6 @@ cd ${MORPHEUS_ROOT}
 
 fetch_base_branch
 
-git submodule update --init --recursive
-
 # Its important that we are in the base environment for the build
 rapids-logger "Activating Base Conda Environment"
 
diff --git a/ci/scripts/github/docs.sh b/ci/scripts/github/docs.sh
index f928d02a38..f4a33b91b3 100755
--- a/ci/scripts/github/docs.sh
+++ b/ci/scripts/github/docs.sh
@@ -21,15 +21,15 @@ source ${WORKSPACE}/ci/scripts/github/common.sh
 rapids-dependency-file-generator \
   --output conda \
   --file_key docs \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${WORKSPACE_TMP}/env.yaml"
 
-update_conda_env env.yaml
+update_conda_env "${WORKSPACE_TMP}/env.yaml"
 
 download_artifact "wheel.tar.bz"
 
 tar xf "${WORKSPACE_TMP}/wheel.tar.bz"
 
-pip install ${MORPHEUS_ROOT}/build/dist/*.whl
+pip install ${MORPHEUS_ROOT}/${BUILD_DIR}/dist/*.whl
 
 rapids-logger "Pulling LFS assets"
 cd ${MORPHEUS_ROOT}
@@ -37,17 +37,15 @@ cd ${MORPHEUS_ROOT}
 git lfs install
 ${MORPHEUS_ROOT}/scripts/fetch_data.py fetch docs examples
 
-git submodule update --init --recursive
-
 rapids-logger "Configuring for docs"
-cmake -B build -G Ninja ${CMAKE_BUILD_ALL_FEATURES} -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX} -DMORPHEUS_PYTHON_BUILD_STUBS=OFF -DMORPHEUS_BUILD_DOCS=ON .
+cmake ${CMAKE_BUILD_ALL_FEATURES} -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX} -DMORPHEUS_PYTHON_BUILD_STUBS=OFF -DMORPHEUS_BUILD_DOCS=ON .
 
 rapids-logger "Building docs"
-cmake --build build --parallel ${PARALLEL_LEVEL} --target install
-cmake --build build --parallel ${PARALLEL_LEVEL} --target morpheus_docs
+cmake --build ${BUILD_DIR} --parallel ${PARALLEL_LEVEL} --target install
+cmake --build ${BUILD_DIR} --parallel ${PARALLEL_LEVEL} --target morpheus_docs
 
 rapids-logger "Archiving the docs"
-tar cfj "${WORKSPACE_TMP}/docs.tar.bz" build/docs/html
+tar cfj "${WORKSPACE_TMP}/docs.tar.bz" ${BUILD_DIR}/docs/html
 
 rapids-logger "Pushing results to ${DISPLAY_ARTIFACT_URL}"
 set_job_summary_preamble
diff --git a/ci/scripts/github/test.sh b/ci/scripts/github/test.sh
index fe4fe23813..e050895083 100755
--- a/ci/scripts/github/test.sh
+++ b/ci/scripts/github/test.sh
@@ -22,34 +22,26 @@ source ${WORKSPACE}/ci/scripts/github/common.sh
 rapids-dependency-file-generator \
   --output conda \
   --file_key test \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${WORKSPACE_TMP}/env.yaml"
 
-update_conda_env env.yaml
+update_conda_env "${WORKSPACE_TMP}/env.yaml"
 
 log_toolchain
 
-git submodule update --init --recursive
-
 CMAKE_FLAGS="${CMAKE_BUILD_ALL_FEATURES}"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DCMAKE_BUILD_RPATH_USE_ORIGIN=ON"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_BUILD_STUBS=ON"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_BUILD_WHEEL=OFF"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON"
 CMAKE_FLAGS="${CMAKE_FLAGS} -DCMAKE_INSTALL_PREFIX=${CONDA_PREFIX}"
-if [[ "${LOCAL_CI}" == "" ]]; then
-    CMAKE_FLAGS="${CMAKE_FLAGS} -DCCACHE_PROGRAM_PATH=$(which sccache)"
-fi
 
 rapids-logger "Configuring cmake for Morpheus with ${CMAKE_FLAGS}"
-cmake -B build -G Ninja ${CMAKE_FLAGS} .
+cmake ${CMAKE_FLAGS} .
 
 rapids-logger "Building Morpheus"
-cmake --build build --parallel ${PARALLEL_LEVEL} --target install
+cmake --build ${BUILD_DIR} --parallel ${PARALLEL_LEVEL} --target install
 
-if [[ "${LOCAL_CI}" == "" ]]; then
-    rapids-logger "sccache usage for morpheus build:"
-    sccache --show-stats
-fi
+log_sccache_stats
 
 rapids-logger "Checking Python stub files"
 
@@ -62,7 +54,7 @@ if [[ $(git status --short --untracked | grep .pyi) != "" ]]; then
     exit 1
 fi
 
-CPP_TESTS=($(find ${MORPHEUS_ROOT}/build -name "*.x"))
+CPP_TESTS=($(find ${MORPHEUS_ROOT}/${BUILD_DIR} -name "*.x"))
 
 rapids-logger "Pulling LFS assets"
 
diff --git a/ci/scripts/run_ci_local.sh b/ci/scripts/run_ci_local.sh
index fb29fdf139..979fd07e23 100755
--- a/ci/scripts/run_ci_local.sh
+++ b/ci/scripts/run_ci_local.sh
@@ -45,7 +45,10 @@ MORPHEUS_ROOT=${MORPHEUS_ROOT:-$(git rev-parse --show-toplevel)}
 # match CI, the default)
 USE_HOST_GIT=${USE_HOST_GIT:-0}
 
-GIT_URL=$(git remote get-url origin)
+# Useful when using a host git repo to avoid conflicting with a potentially existing 'build' directory
+BUILD_DIR=${BUILD_DIR:-build-ci}
+
+GIT_URL=${GIT_URL:-$(git remote get-url origin)}
 GIT_URL=$(git_ssh_to_https ${GIT_URL})
 
 GIT_UPSTREAM_URL=$(git remote get-url upstream)
@@ -62,33 +65,41 @@ DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""}
 BUILD_CONTAINER="nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-${CONTAINER_VER}"
 TEST_CONTAINER="nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-${CONTAINER_VER}"
 
-ENV_LIST="--env LOCAL_CI_TMP=/ci_tmp"
-ENV_LIST="${ENV_LIST} --env GIT_URL=${GIT_URL}"
-ENV_LIST="${ENV_LIST} --env GIT_UPSTREAM_URL=${GIT_UPSTREAM_URL}"
-ENV_LIST="${ENV_LIST} --env GIT_BRANCH=${GIT_BRANCH}"
-ENV_LIST="${ENV_LIST} --env GIT_COMMIT=${GIT_COMMIT}"
-ENV_LIST="${ENV_LIST} --env PARALLEL_LEVEL=$(nproc)"
-ENV_LIST="${ENV_LIST} --env CUDA_VER=${CUDA_VER}"
-ENV_LIST="${ENV_LIST} --env SKIP_CONDA_ENV_UPDATE=${SKIP_CONDA_ENV_UPDATE}"
-ENV_LIST="${ENV_LIST} --env USE_HOST_GIT=${USE_HOST_GIT}"
+ENV_LIST=()
+ENV_LIST+=("--env" "LOCAL_CI_TMP=/ci_tmp")
+ENV_LIST+=("--env" "GIT_URL=${GIT_URL}")
+ENV_LIST+=("--env" "GIT_UPSTREAM_URL=${GIT_UPSTREAM_URL}")
+ENV_LIST+=("--env" "GIT_BRANCH=${GIT_BRANCH}")
+ENV_LIST+=("--env" "GIT_COMMIT=${GIT_COMMIT}")
+ENV_LIST+=("--env" "PARALLEL_LEVEL=$(nproc)")
+ENV_LIST+=("--env" "CUDA_VER=${CUDA_VER}")
+ENV_LIST+=("--env" "SKIP_CONDA_ENV_UPDATE=${SKIP_CONDA_ENV_UPDATE}")
+ENV_LIST+=("--env" "USE_HOST_GIT=${USE_HOST_GIT}")
+ENV_LIST+=("--env" "BUILD_DIR=${BUILD_DIR}")
 
 mkdir -p ${LOCAL_CI_TMP}
 cp ${MORPHEUS_ROOT}/ci/scripts/bootstrap_local_ci.sh ${LOCAL_CI_TMP}
 
 for STAGE in "${STAGES[@]}"; do
-    DOCKER_RUN_ARGS="--rm -ti --net=host -v "${LOCAL_CI_TMP}":/ci_tmp ${ENV_LIST} --env STAGE=${STAGE}"
+    DOCKER_RUN_ARGS=()
+    DOCKER_RUN_ARGS+=("--rm")
+    DOCKER_RUN_ARGS+=("-ti")
+    DOCKER_RUN_ARGS+=("--net=host")
+    DOCKER_RUN_ARGS+=("-v" "${LOCAL_CI_TMP}:/ci_tmp")
+    DOCKER_RUN_ARGS+=("${ENV_LIST[@]}")
+    DOCKER_RUN_ARGS+=("--env STAGE=${STAGE}")
     if [[ "${STAGE}" == "test" || "${USE_GPU}" == "1" ]]; then
         CONTAINER="${TEST_CONTAINER}"
-        DOCKER_RUN_ARGS="${DOCKER_RUN_ARGS} --runtime=nvidia"
-        DOCKER_RUN_ARGS="${DOCKER_RUN_ARGS} --gpus all"
-        DOCKER_RUN_ARGS="${DOCKER_RUN_ARGS} --cap-add=sys_nice"
+        DOCKER_RUN_ARGS+=("--runtime=nvidia")
+        DOCKER_RUN_ARGS+=("--gpus all")
+        DOCKER_RUN_ARGS+=("--cap-add=sys_nice")
     else
         CONTAINER="${BUILD_CONTAINER}"
-        DOCKER_RUN_ARGS="${DOCKER_RUN_ARGS} --runtime=runc"
+        DOCKER_RUN_ARGS+=("--runtime=runc")
     fi
 
     if [[ "${USE_HOST_GIT}" == "1" ]]; then
-        DOCKER_RUN_ARGS="${DOCKER_RUN_ARGS} -v ${MORPHEUS_ROOT}:/Morpheus"
+        DOCKER_RUN_ARGS+=("-v" "${MORPHEUS_ROOT}:/Morpheus")
     fi
 
     if [[ "${STAGE}" == "bash" ]]; then
@@ -99,7 +110,7 @@ for STAGE in "${STAGES[@]}"; do
 
     echo "Running ${STAGE} stage in ${CONTAINER}"
     set -x
-    docker run ${DOCKER_RUN_ARGS} ${DOCKER_EXTRA_ARGS} ${CONTAINER} ${DOCKER_RUN_CMD}
+    docker run ${DOCKER_RUN_ARGS[@]} ${DOCKER_EXTRA_ARGS} ${CONTAINER} ${DOCKER_RUN_CMD}
     set +x
 
     STATUS=$?

From 82ce14cc9a439c6310cf734e4fa55928f643c433 Mon Sep 17 00:00:00 2001
From: Christopher Harris <charris@nvidia.com>
Date: Fri, 19 Apr 2024 11:01:19 -0500
Subject: [PATCH 03/38] Fix a typo in the devcontainer base image (#1638)

Closes https://github.com/nv-morpheus/Morpheus/issues/1624, where the devcontainer fails to build due to a mis-typed base container.

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Christopher Harris (https://github.com/cwharris)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1638
---
 .devcontainer/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index 9195d475d4..c102b78a8a 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -13,6 +13,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM rapidsai/devcontainers:23.12-cpp-cuda12.1-mambaforge-ubuntu22.04 AS base
+FROM rapidsai/devcontainers:23.12-cpp-mambaforge-ubuntu22.04 AS base
 
 ENV PATH="${PATH}:/workspaces/morpheus/.devcontainer/bin"

From 883b804572b8a65aea947427f53bed97cfbff791 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Fri, 19 Apr 2024 13:43:27 -0700
Subject: [PATCH 04/38] Don't set pe_count for the C++ impl of the
 TritonInferenceStage (#1640)

* Ensure that both `pe_count` & `engines_per_pe` are both set to `1` for the C++ impl of the `TritonInferenceStage`
* Remove hard-coded `--num_threads=1` from validation scripts
* Disable hammah validation script until #1641 can be resolved
* Back-port of #1636

Closes #1639

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)
  - Eli Fajardo (https://github.com/efajardo-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1640
---
 ci/check_style.sh                                   |  2 +-
 morpheus/stages/inference/triton_inference_stage.py | 10 ++++++++++
 scripts/validation/val-run-all.sh                   | 10 ++++++++--
 scripts/validation/val-run-pipeline.sh              | 12 ++++++------
 4 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/ci/check_style.sh b/ci/check_style.sh
index 9205625726..beb561bb4f 100755
--- a/ci/check_style.sh
+++ b/ci/check_style.sh
@@ -16,7 +16,7 @@ rapids-dependency-file-generator \
   --file_key checks \
   --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
-rapids-mamba-retry env create --force -f env.yaml -n checks
+rapids-mamba-retry env create --yes -f env.yaml -n checks
 conda activate checks
 
 # Run pre-commit checks
diff --git a/morpheus/stages/inference/triton_inference_stage.py b/morpheus/stages/inference/triton_inference_stage.py
index e5901363f9..e6c5c0fbb7 100644
--- a/morpheus/stages/inference/triton_inference_stage.py
+++ b/morpheus/stages/inference/triton_inference_stage.py
@@ -781,3 +781,13 @@ def _get_cpp_inference_node(self, builder: mrc.Builder) -> mrc.SegmentObject:
                                             self._needs_logits,
                                             self._input_mapping,
                                             self._output_mapping)
+
+    def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject:
+        node = super()._build_single(builder, input_node)
+
+        # ensure that the C++ impl only uses a single progress engine
+        if (self._build_cpp_node()):
+            node.launch_options.pe_count = 1
+            node.launch_options.engines_per_pe = 1
+
+        return node
diff --git a/scripts/validation/val-run-all.sh b/scripts/validation/val-run-all.sh
index 905ee7f7e5..c85711cdbf 100755
--- a/scripts/validation/val-run-all.sh
+++ b/scripts/validation/val-run-all.sh
@@ -31,7 +31,10 @@ ensure_triton_running
 export USE_CPP=0
 
 ${SCRIPT_DIR}/abp/val-abp-all.sh
-${SCRIPT_DIR}/hammah/val-hammah-all.sh
+
+# Disabled per #1641
+# ${SCRIPT_DIR}/hammah/val-hammah-all.sh
+
 ${SCRIPT_DIR}/phishing/val-phishing-all.sh
 ${SCRIPT_DIR}/sid/val-sid-all.sh
 
@@ -39,6 +42,9 @@ ${SCRIPT_DIR}/sid/val-sid-all.sh
 export USE_CPP=1
 
 ${SCRIPT_DIR}/abp/val-abp-all.sh
-${SCRIPT_DIR}/hammah/val-hammah-all.sh
+
+# Disabled per #1641
+# ${SCRIPT_DIR}/hammah/val-hammah-all.sh
+
 ${SCRIPT_DIR}/phishing/val-phishing-all.sh
 ${SCRIPT_DIR}/sid/val-sid-all.sh
diff --git a/scripts/validation/val-run-pipeline.sh b/scripts/validation/val-run-pipeline.sh
index ee8b00075c..65641a1370 100755
--- a/scripts/validation/val-run-pipeline.sh
+++ b/scripts/validation/val-run-pipeline.sh
@@ -37,7 +37,7 @@ function run_pipeline_sid_minibert(){
    VAL_FILE=$4
    VAL_OUTPUT=$5
 
-   morpheus --log_level=DEBUG run --num_threads=1 --pipeline_batch_size=1024 --model_max_batch_size=32 --use_cpp=${USE_CPP} \
+   morpheus --log_level=DEBUG run --num_threads=$(nproc) --pipeline_batch_size=1024 --model_max_batch_size=32 --use_cpp=${USE_CPP} \
       pipeline-nlp --model_seq_length=256 \
       from-file --filename=${INPUT_FILE} \
       deserialize \
@@ -58,7 +58,7 @@ function run_pipeline_sid_bert(){
    VAL_FILE=$4
    VAL_OUTPUT=$5
 
-   morpheus --log_level=DEBUG run --num_threads=1 --pipeline_batch_size=1024 --model_max_batch_size=32 --use_cpp=${USE_CPP} \
+   morpheus --log_level=DEBUG run --num_threads=$(nproc) --pipeline_batch_size=1024 --model_max_batch_size=32 --use_cpp=${USE_CPP} \
       pipeline-nlp --model_seq_length=256 \
       from-file --filename=${INPUT_FILE} \
       deserialize \
@@ -79,7 +79,7 @@ function run_pipeline_abp_nvsmi(){
    VAL_FILE=$4
    VAL_OUTPUT=$5
 
-   morpheus --log_level=DEBUG run --num_threads=1 --pipeline_batch_size=1024 --model_max_batch_size=1024 --use_cpp=${USE_CPP} \
+   morpheus --log_level=DEBUG run --num_threads=$(nproc) --pipeline_batch_size=1024 --model_max_batch_size=1024 --use_cpp=${USE_CPP} \
       pipeline-fil --columns_file=${MORPHEUS_ROOT}/morpheus/data/columns_fil.txt \
       from-file --filename=${INPUT_FILE} \
       deserialize \
@@ -100,7 +100,7 @@ function run_pipeline_phishing_email(){
    VAL_FILE=$4
    VAL_OUTPUT=$5
 
-   morpheus --log_level=DEBUG run --num_threads=1 --pipeline_batch_size=1024 --model_max_batch_size=32 --use_cpp=${USE_CPP} \
+   morpheus --log_level=DEBUG run --num_threads=$(nproc) --pipeline_batch_size=1024 --model_max_batch_size=32 --use_cpp=${USE_CPP} \
       pipeline-nlp --model_seq_length=128 --labels_file=${MORPHEUS_ROOT}/morpheus/data/labels_phishing.txt \
       from-file --filename=${INPUT_FILE} \
       deserialize \
@@ -121,7 +121,7 @@ function run_pipeline_hammah_user123(){
    VAL_FILE=$4
    VAL_OUTPUT=$5
 
-   morpheus --log_level=DEBUG run --num_threads=1 --pipeline_batch_size=1024 --model_max_batch_size=1024 --use_cpp=${USE_CPP} \
+   morpheus --log_level=DEBUG run --num_threads=$(nproc) --pipeline_batch_size=1024 --model_max_batch_size=1024 --use_cpp=${USE_CPP} \
       pipeline-ae --columns_file="${MORPHEUS_ROOT}/morpheus/data/columns_ae_cloudtrail.txt" --userid_filter="user123" --userid_column_name="userIdentitysessionContextsessionIssueruserName" --timestamp_column_name="event_dt" \
       from-cloudtrail --input_glob="${MORPHEUS_ROOT}/models/datasets/validation-data/dfp-cloudtrail-*-input.csv" \
       train-ae --train_data_glob="${MORPHEUS_ROOT}/models/datasets/training-data/dfp-cloudtrail-*.csv" --source_stage_class=morpheus.stages.input.cloud_trail_source_stage.CloudTrailSourceStage --seed 42 \
@@ -143,7 +143,7 @@ function run_pipeline_hammah_role-g(){
    VAL_FILE=$4
    VAL_OUTPUT=$5
 
-   morpheus --log_level=DEBUG run --num_threads=1 --pipeline_batch_size=1024 --model_max_batch_size=1024 --use_cpp=${USE_CPP} \
+   morpheus --log_level=DEBUG run --num_threads=$(nproc) --pipeline_batch_size=1024 --model_max_batch_size=1024 --use_cpp=${USE_CPP} \
       pipeline-ae --columns_file="${MORPHEUS_ROOT}/morpheus/data/columns_ae_cloudtrail.txt" --userid_filter="role-g" --userid_column_name="userIdentitysessionContextsessionIssueruserName" --timestamp_column_name="event_dt" \
       from-cloudtrail --input_glob="${MORPHEUS_ROOT}/models/datasets/validation-data/dfp-cloudtrail-*-input.csv" \
       train-ae --train_data_glob="${MORPHEUS_ROOT}/models/datasets/training-data/dfp-cloudtrail-*.csv" --source_stage_class=morpheus.stages.input.cloud_trail_source_stage.CloudTrailSourceStage  --seed 42 \

From 31d963a357557c9e91b8ad608fca23cbf736bd93 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Mon, 22 Apr 2024 11:55:40 -0700
Subject: [PATCH 05/38] Fix vdb_upload runtime error (#1643)

* Add `ControlMessage` to the `accepted_types` for `InferenceStage` when in Python mode
* fix import of `CppTensorMemory`
* Set default value of `['rss']` for `--source_type` avoids issue where command line flag values are ignored.
* Fix bug in overrides of `config` fixture which prevented parameterization on the `use_cpp` fixture.
* fix type-o in config value `stop_after_rec` not `stop_after_sec`
* Ensure a default int value for `stop_after_rec` to avoid schema validation error
* Revert the default value for `--vector_db_resource_name` back to 'RSS', allowing the output of running this example to be used as the input for the RAG pipeline

Closes #1642
Closes #1645

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)
  - Eli Fajardo (https://github.com/efajardo-nv)

Approvers:
  - Yuchen Zhang (https://github.com/yuchenz427)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1643
---
 examples/llm/rag/README.md                        |  9 ++++-----
 examples/llm/vdb_upload/module/rss_source_pipe.py |  4 ++--
 examples/llm/vdb_upload/run.py                    |  4 ++--
 examples/llm/vdb_upload/vdb_config.yaml           |  4 ++--
 examples/llm/vdb_upload/vdb_utils.py              |  4 ++--
 morpheus/modules/input/rss_source.py              |  2 +-
 morpheus/modules/schemas/rss_source_schema.py     |  2 +-
 morpheus/stages/inference/inference_stage.py      |  5 ++++-
 morpheus/stages/input/rss_source_stage.py         |  2 +-
 .../stages/preprocess/preprocess_fil_stage.py     |  5 +++--
 .../stages/preprocess/preprocess_nlp_stage.py     | 15 ++++++++-------
 .../gnn_fraud_detection_pipeline/conftest.py      |  2 +-
 tests/examples/log_parsing/conftest.py            |  2 +-
 tests/examples/ransomware_detection/conftest.py   |  2 +-
 tests/stages/test_preprocess_fil_stage.py         |  2 +-
 tests/stages/test_preprocess_nlp_stage.py         |  2 +-
 tests/test_add_classifications_stage.py           |  2 +-
 tests/test_add_scores_stage.py                    |  2 +-
 18 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/examples/llm/rag/README.md b/examples/llm/rag/README.md
index 3868fab377..7c1579040e 100644
--- a/examples/llm/rag/README.md
+++ b/examples/llm/rag/README.md
@@ -214,14 +214,14 @@ pipeline option of `rag`:
 
 ```bash
 export NGC_API_KEY=[YOUR_KEY_HERE]
-NGC_API_KEY=${NGC_API_KEY} python examples/llm/main.py rag pipeline
+python examples/llm/main.py rag pipeline
 ```
 
 **Using OpenAI LLM models**
 
 ```bash
 export OPENAI_API_KEY=[YOUR_KEY_HERE]
-OPENAI_API_KEY=${OPENAI_API_KEY} python examples/llm/main.py rag pipeline
+python examples/llm/main.py rag pipeline --llm_service=OpenAI --model_name=gpt-3.5-turbo
 ```
 
 ### Run example (Persistent Pipeline):
@@ -232,14 +232,14 @@ OPENAI_API_KEY=${OPENAI_API_KEY} python examples/llm/main.py rag pipeline
 
 ```bash
 export NGC_API_KEY=[YOUR_KEY_HERE]
-python examples/llm/main.py rag persistent 
+python examples/llm/main.py rag persistent
 ```
 
 **Using OpenAI LLM models**
 
 ```bash
 export OPENAI_API_KEY=[YOUR_KEY_HERE]
-python examples/llm/main.py rag persistent 
+python examples/llm/main.py rag persistent
 ```
 
 ### Options:
@@ -273,4 +273,3 @@ The `rag` command has its own set of options and commands:
 
 - `persistant`
 - `pipeline`
-
diff --git a/examples/llm/vdb_upload/module/rss_source_pipe.py b/examples/llm/vdb_upload/module/rss_source_pipe.py
index c424e03dbc..ff61940b8c 100644
--- a/examples/llm/vdb_upload/module/rss_source_pipe.py
+++ b/examples/llm/vdb_upload/module/rss_source_pipe.py
@@ -48,7 +48,7 @@ class RSSSourcePipeSchema(BaseModel):
     output_batch_size: int = 2048
     request_timeout_sec: float = 2.0
     run_indefinitely: bool = True
-    stop_after_sec: int = 0
+    stop_after_rec: int = 0
     vdb_resource_name: str
     web_scraper_config: Optional[Dict[Any, Any]] = None
 
@@ -130,7 +130,7 @@ def _rss_source_pipe(builder: mrc.Builder):
         "cooldown_interval_sec": validated_config.cooldown_interval_sec,
         "request_timeout_sec": validated_config.request_timeout_sec,
         "interval_sec": validated_config.interval_sec,
-        "stop_after_sec": validated_config.stop_after_sec,
+        "stop_after_rec": validated_config.stop_after_rec,
     }
     rss_source_loader = RSSSourceLoaderFactory.get_instance("rss_source", {"rss_source": rss_source_config})
 
diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py
index 04627f8359..974e5ec213 100644
--- a/examples/llm/vdb_upload/run.py
+++ b/examples/llm/vdb_upload/run.py
@@ -104,7 +104,7 @@ def run():
 @click.option("--source_type",
               multiple=True,
               type=click.Choice(['rss', 'filesystem'], case_sensitive=False),
-              default=[],
+              default=['rss'],
               show_default=True,
               help="The type of source to use. Can specify multiple times for different source types.")
 @click.option(
@@ -128,7 +128,7 @@ def run():
 @click.option(
     "--vector_db_resource_name",
     type=str,
-    default="VDBUploadExample",
+    default="RSS",
     help="The identifier of the resource on which operations are to be performed in the vector database.",
 )
 @click.option(
diff --git a/examples/llm/vdb_upload/vdb_config.yaml b/examples/llm/vdb_upload/vdb_config.yaml
index 0c1af37d22..ac93a47615 100644
--- a/examples/llm/vdb_upload/vdb_config.yaml
+++ b/examples/llm/vdb_upload/vdb_config.yaml
@@ -75,7 +75,7 @@ vdb_pipeline:
         output_batch_size: 2048 # Number of chunked documents per output batch
         request_timeout_sec: 2.0
         run_indefinitely: true
-        stop_after_sec: 0
+        stop_after_rec: 0
         web_scraper_config:
           chunk_overlap: 51
           chunk_size: 512
@@ -300,4 +300,4 @@ vdb_pipeline:
               dtype: FLOAT_VECTOR
               description: Embedding vectors representing the data entry
               dim: 384 # Size of the embeddings to store in the vector database
-          description: Collection schema for diverse data sources
\ No newline at end of file
+          description: Collection schema for diverse data sources
diff --git a/examples/llm/vdb_upload/vdb_utils.py b/examples/llm/vdb_upload/vdb_utils.py
index 2b399fcd21..d3aed615d7 100644
--- a/examples/llm/vdb_upload/vdb_utils.py
+++ b/examples/llm/vdb_upload/vdb_utils.py
@@ -135,7 +135,7 @@ def _build_default_rss_source(enable_cache,
             "output_batch_size": 2048,
             "cache_dir": "./.cache/http",
             "cooldown_interval_sec": interval_secs,
-            "stop_after_sec": stop_after,
+            "stop_after_rec": stop_after or 0,
             "enable_cache": enable_cache,
             "enable_monitor": enable_monitors,
             "feed_input": feed_inputs if feed_inputs else build_rss_urls(),
@@ -448,7 +448,7 @@ def build_final_config(vdb_conf_path,
                                       interval_secs=60,
                                       run_indefinitely=True,
                                       stop_after=None,
-                                      vector_db_resource_name="VDBUploadExample",
+                                      vector_db_resource_name="RSS",
                                       content_chunking_size=128,
                                       rss_request_timeout_sec=30,
                                       feed_inputs=build_rss_urls()))
diff --git a/morpheus/modules/input/rss_source.py b/morpheus/modules/input/rss_source.py
index 6133e3d673..9f5dd6c316 100644
--- a/morpheus/modules/input/rss_source.py
+++ b/morpheus/modules/input/rss_source.py
@@ -101,7 +101,7 @@ def fetch_feeds() -> MessageMeta:
 
                     records_emitted += df_size
 
-                    if (0 < validated_config.stop_after_sec <= records_emitted):
+                    if (0 < validated_config.stop_after_rec <= records_emitted):
                         stop_requested = True
                         logger.info("Stop limit reached... preparing to halt the source.")
                         break
diff --git a/morpheus/modules/schemas/rss_source_schema.py b/morpheus/modules/schemas/rss_source_schema.py
index b0468b1ace..53c0928391 100644
--- a/morpheus/modules/schemas/rss_source_schema.py
+++ b/morpheus/modules/schemas/rss_source_schema.py
@@ -30,7 +30,7 @@ class RSSSourceSchema(BaseModel):
     cooldown_interval_sec: int = 600
     request_timeout_sec: float = 2.0
     interval_sec: int = 600
-    stop_after_sec: int = 0
+    stop_after_rec: int = 0
 
     class Config:
         extra = "forbid"
diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py
index e4111926e9..579ddccd53 100644
--- a/morpheus/stages/inference/inference_stage.py
+++ b/morpheus/stages/inference/inference_stage.py
@@ -192,7 +192,10 @@ def accepted_types(self) -> typing.Tuple:
         typing.Tuple
             Tuple of input types.
         """
-        return (MultiInferenceMessage, )
+        if (self._build_cpp_node()):
+            return (MultiInferenceMessage, )
+
+        return (MultiInferenceMessage, ControlMessage)
 
     def compute_schema(self, schema: StageSchema):
         schema.output_schema.set_type(MultiResponseMessage)
diff --git a/morpheus/stages/input/rss_source_stage.py b/morpheus/stages/input/rss_source_stage.py
index 31e408c290..d56a443542 100644
--- a/morpheus/stages/input/rss_source_stage.py
+++ b/morpheus/stages/input/rss_source_stage.py
@@ -81,7 +81,7 @@ def __init__(self,
             "rss_source": {
                 "feed_input": feed_input,
                 "interval_sec": interval_secs,
-                "stop_after_sec": stop_after,
+                "stop_after_rec": stop_after,
                 "run_indefinitely": run_indefinitely,
                 "batch_size": batch_size,
                 "enable_cache": enable_cache,
diff --git a/morpheus/stages/preprocess/preprocess_fil_stage.py b/morpheus/stages/preprocess/preprocess_fil_stage.py
index 45b1640d72..cbfc6a581f 100644
--- a/morpheus/stages/preprocess/preprocess_fil_stage.py
+++ b/morpheus/stages/preprocess/preprocess_fil_stage.py
@@ -23,6 +23,7 @@
 
 import cudf
 
+import morpheus._lib.messages as _messages
 import morpheus._lib.stages as _stages
 from morpheus.cli.register_stage import register_stage
 from morpheus.config import Config
@@ -32,7 +33,6 @@
 from morpheus.messages import MultiInferenceFILMessage
 from morpheus.messages import MultiInferenceMessage
 from morpheus.messages import MultiMessage
-from morpheus.messages import TensorMemory as CppTensorMemory
 from morpheus.stages.preprocess.preprocess_base_stage import PreprocessBaseStage
 
 logger = logging.getLogger(__name__)
@@ -123,7 +123,8 @@ def process_control_message(x: ControlMessage, fea_len: int, fea_cols: typing.Li
         seg_ids[:, 0] = cp.arange(0, count, dtype=cp.uint32)
         seg_ids[:, 2] = fea_len - 1
 
-        x.tensors(CppTensorMemory(count=count, tensors={"input__0": data, "seq_ids": seg_ids}))
+        # We need the C++ impl of TensorMemory until #1646 is resolved
+        x.tensors(_messages.TensorMemory(count=count, tensors={"input__0": data, "seq_ids": seg_ids}))
         return x
 
     @staticmethod
diff --git a/morpheus/stages/preprocess/preprocess_nlp_stage.py b/morpheus/stages/preprocess/preprocess_nlp_stage.py
index feace923dc..de610ab52c 100644
--- a/morpheus/stages/preprocess/preprocess_nlp_stage.py
+++ b/morpheus/stages/preprocess/preprocess_nlp_stage.py
@@ -24,6 +24,7 @@
 
 import cudf
 
+import morpheus._lib.messages as _messages
 import morpheus._lib.stages as _stages
 from morpheus.cli.register_stage import register_stage
 from morpheus.cli.utils import MorpheusRelativePath
@@ -35,7 +36,6 @@
 from morpheus.messages import MultiInferenceMessage
 from morpheus.messages import MultiInferenceNLPMessage
 from morpheus.messages import MultiMessage
-from morpheus.messages import TensorMemory as CppTensorMemory
 from morpheus.stages.preprocess.preprocess_base_stage import PreprocessBaseStage
 from morpheus.utils.cudf_subword_helper import tokenize_text_series
 
@@ -204,13 +204,14 @@ def process_control_message(message: ControlMessage,
 
         del text_series
 
+        # We need the C++ impl of TensorMemory until #1646 is resolved
         message.tensors(
-            CppTensorMemory(count=tokenized.input_ids.shape[0],
-                            tensors={
-                                "input_ids": tokenized.input_ids,
-                                "input_mask": tokenized.input_mask,
-                                "seq_ids": tokenized.segment_ids
-                            }))
+            _messages.TensorMemory(count=tokenized.input_ids.shape[0],
+                                   tensors={
+                                       "input_ids": tokenized.input_ids,
+                                       "input_mask": tokenized.input_mask,
+                                       "seq_ids": tokenized.segment_ids
+                                   }))
 
         message.set_metadata("inference_memory_params", {"inference_type": "nlp"})
         return message
diff --git a/tests/examples/gnn_fraud_detection_pipeline/conftest.py b/tests/examples/gnn_fraud_detection_pipeline/conftest.py
index a625d51862..30176f71e4 100644
--- a/tests/examples/gnn_fraud_detection_pipeline/conftest.py
+++ b/tests/examples/gnn_fraud_detection_pipeline/conftest.py
@@ -44,7 +44,7 @@ def cuml_fixture(fail_missing: bool):
 
 
 @pytest.fixture(name="config")
-def config_fixture(config):
+def config_fixture(config, use_cpp: bool):  # pylint: disable=unused-argument
     """
     The GNN fraud detection pipeline utilizes the "other" pipeline mode.
     """
diff --git a/tests/examples/log_parsing/conftest.py b/tests/examples/log_parsing/conftest.py
index d31891873a..f927c3fcc1 100644
--- a/tests/examples/log_parsing/conftest.py
+++ b/tests/examples/log_parsing/conftest.py
@@ -17,7 +17,7 @@
 
 
 @pytest.fixture(name="config")
-def config_fixture(config):
+def config_fixture(config, use_cpp: bool):  # pylint: disable=unused-argument
     """
     The log_parsing pipelie requires NLP mode. Set this here so all the tests don't need to set it themselves.
     """
diff --git a/tests/examples/ransomware_detection/conftest.py b/tests/examples/ransomware_detection/conftest.py
index e1c5e2541d..a92786555a 100644
--- a/tests/examples/ransomware_detection/conftest.py
+++ b/tests/examples/ransomware_detection/conftest.py
@@ -39,7 +39,7 @@ def dask_distributed(fail_missing: bool):
 
 
 @pytest.fixture(name="config")
-def config_fixture(config):
+def config_fixture(config, use_cpp: bool):  # pylint: disable=unused-argument
     """
     The ransomware detection pipeline utilizes the FIL pipeline mode.
     """
diff --git a/tests/stages/test_preprocess_fil_stage.py b/tests/stages/test_preprocess_fil_stage.py
index eb6dc8b620..638fcaa994 100644
--- a/tests/stages/test_preprocess_fil_stage.py
+++ b/tests/stages/test_preprocess_fil_stage.py
@@ -27,7 +27,7 @@
 
 
 @pytest.fixture(name='config')
-def fixture_config(config: Config):
+def fixture_config(config: Config, use_cpp: bool):  # pylint: disable=unused-argument
     config.feature_length = 1
     config.fil = ConfigFIL()
     config.fil.feature_columns = ["data"]
diff --git a/tests/stages/test_preprocess_nlp_stage.py b/tests/stages/test_preprocess_nlp_stage.py
index 9c2b5d4e39..22fc99e04a 100644
--- a/tests/stages/test_preprocess_nlp_stage.py
+++ b/tests/stages/test_preprocess_nlp_stage.py
@@ -29,7 +29,7 @@
 
 
 @pytest.fixture(name='config')
-def fixture_config(config: Config):
+def fixture_config(config: Config, use_cpp: bool):  # pylint: disable=unused-argument
     config.class_labels = [
         "address",
         "bank_acct",
diff --git a/tests/test_add_classifications_stage.py b/tests/test_add_classifications_stage.py
index 279963ba9a..80091f3dc5 100755
--- a/tests/test_add_classifications_stage.py
+++ b/tests/test_add_classifications_stage.py
@@ -31,7 +31,7 @@
 
 
 @pytest.fixture(name="config")
-def config_fixture(config: Config):
+def config_fixture(config: Config, use_cpp: bool):  # pylint: disable=unused-argument
     config.class_labels = ['frogs', 'lizards', 'toads']
     yield config
 
diff --git a/tests/test_add_scores_stage.py b/tests/test_add_scores_stage.py
index ad67709959..e454a0e35f 100755
--- a/tests/test_add_scores_stage.py
+++ b/tests/test_add_scores_stage.py
@@ -31,7 +31,7 @@
 
 
 @pytest.fixture(name='config')
-def fixture_config(config: Config):
+def fixture_config(config: Config, use_cpp: bool):  # pylint: disable=unused-argument
     config.class_labels = ['frogs', 'lizards', 'toads']
     config.feature_length = 12
     yield config

From 0a0a20d41c96dee8bf5f2f30165895efbde6641e Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Mon, 22 Apr 2024 16:09:14 -0700
Subject: [PATCH 06/38] Document current known issues in 24.03.02 (#1656)

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1656
---
 docs/source/extra_info/known_issues.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/source/extra_info/known_issues.md b/docs/source/extra_info/known_issues.md
index 2ade48011e..014fac3471 100644
--- a/docs/source/extra_info/known_issues.md
+++ b/docs/source/extra_info/known_issues.md
@@ -17,4 +17,9 @@ limitations under the License.
 
 # Known Issues
 
+- TrainAEStage fails with a Segmentation fault ([#1641](https://github.com/nv-morpheus/Morpheus/pull/1641))
+- vdb_upload example pipeline triggers an internal error in Triton ([#1649](https://github.com/nv-morpheus/Morpheus/pull/1649))
+- vdb_upload example pipeline error on inserting large strings ([#1650](https://github.com/nv-morpheus/Morpheus/pull/1650))
+- vdb_upload example pipeline only works with C++ mode disabled ([#1651](https://github.com/nv-morpheus/Morpheus/pull/1651))
+
 Refer to [open issues in the Morpheus project](https://github.com/nv-morpheus/Morpheus/issues)

From eb0bc254aaa26bbc0f64a4bc66f97ce2642d9c35 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Mon, 22 Apr 2024 16:21:46 -0700
Subject: [PATCH 07/38] Updating CHANGELOG

---
 CHANGELOG.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8f2994d2c0..d79781bdc6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
+# Morpheus 24.03.02 (22 Apr 2024)
+
+## 🐛 Bug Fixes
+
+- Don't set pe_count for the C++ impl of the TritonInferenceStage ([#1640](https://github.com/nv-morpheus/Morpheus/pull/1640)) [@dagardner-nv](https://github.com/dagardner-nv)
+- Fix vdb_upload runtime error ([#1643](https://github.com/nv-morpheus/Morpheus/pull/1643)) [@dagardner-nv](https://github.com/dagardner-nv)
+
+## 📖 Documentation
+
+- Document current known issues in 24.03.02 ([#1656](https://github.com/nv-morpheus/Morpheus/pull/1656)) [@dagardner-nv](https://github.com/dagardner-nv)
+
 # Morpheus 24.03.01 (10 Apr 2024)
 
 ## 🚨 Breaking Changes

From cbfea7d6708ad9f45ff41997b19ccd149773395b Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Tue, 23 Apr 2024 10:12:39 -0400
Subject: [PATCH 08/38] Fix `cupy_to_tensor` to also infer `uint8` and `int8`
 dtypes (#1621)

- Update to `DType::from_numpy` to handle strings that identify `uint8` and `int8` dtypes
- Add unit tests for DType
- Update to throw invalid argument exceptions on invalid numpy typestr's.

Closes #1619

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Eli Fajardo (https://github.com/efajardo-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1621
---
 .../_lib/include/morpheus/objects/dtype.hpp   |   1 +
 morpheus/_lib/src/objects/dtype.cpp           |  60 ++--
 morpheus/_lib/tests/CMakeLists.txt            |   6 +
 morpheus/_lib/tests/objects/test_dtype.cpp    | 286 ++++++++++++++++++
 4 files changed, 336 insertions(+), 17 deletions(-)
 create mode 100644 morpheus/_lib/tests/objects/test_dtype.cpp

diff --git a/morpheus/_lib/include/morpheus/objects/dtype.hpp b/morpheus/_lib/include/morpheus/objects/dtype.hpp
index aa8c42e92f..63dbd1594a 100644
--- a/morpheus/_lib/include/morpheus/objects/dtype.hpp
+++ b/morpheus/_lib/include/morpheus/objects/dtype.hpp
@@ -173,6 +173,7 @@ struct DType
     }
 
   private:
+    char byte_order_char() const;
     char type_char() const;
 
     TypeId m_type_id;
diff --git a/morpheus/_lib/src/objects/dtype.cpp b/morpheus/_lib/src/objects/dtype.cpp
index 912a945b3a..870cdb8059 100644
--- a/morpheus/_lib/src/objects/dtype.cpp
+++ b/morpheus/_lib/src/objects/dtype.cpp
@@ -20,7 +20,6 @@
 #include "morpheus/utilities/string_util.hpp"  // for MORPHEUS_CONCAT_STR
 
 #include <cudf/types.hpp>
-#include <glog/logging.h>  // for CHECK
 
 #include <map>
 #include <sstream>  // Needed by MORPHEUS_CONCAT_STR
@@ -30,7 +29,7 @@
 
 namespace {
 const std::map<char, std::map<size_t, morpheus::TypeId>> StrToTypeId = {
-    {'?', {{1, morpheus::TypeId::BOOL8}}},
+    {'b', {{1, morpheus::TypeId::BOOL8}}},
 
     {'i',
      {{1, morpheus::TypeId::INT8},
@@ -100,14 +99,7 @@ std::string DType::name() const
 
 std::string DType::type_str() const
 {
-    if (m_type_id != TypeId::BOOL8 && m_type_id != TypeId::STRING)
-    {
-        return MORPHEUS_CONCAT_STR("<" << this->type_char() << this->item_size());
-    }
-    else
-    {
-        return std::string{this->type_char()};
-    }
+    return MORPHEUS_CONCAT_STR(this->byte_order_char() << this->type_char() << this->item_size());
 }
 
 // Cudf representation
@@ -214,19 +206,22 @@ DType DType::from_cudf(cudf::type_id tid)
     case cudf::type_id::EMPTY:
     case cudf::type_id::NUM_TYPE_IDS:
     default:
-        throw std::runtime_error("Not supported");
+        throw std::invalid_argument("Not supported");
     }
 }
 
 DType DType::from_numpy(const std::string& numpy_str)
 {
-    CHECK(!numpy_str.empty()) << "Cannot create DataType from empty string";
+    if (numpy_str.empty())
+    {
+        throw std::invalid_argument("Cannot create DataType from empty string");
+    }
 
     char type_char    = numpy_str[0];
     size_t size_start = 1;
 
-    // Can start with < or > or none
-    if (numpy_str[0] == '<' || numpy_str[0] == '>')
+    // Can start with <, >, | or none
+    if (numpy_str[0] == '<' || numpy_str[0] == '>' || numpy_str[0] == '|')
     {
         type_char  = numpy_str[1];
         size_start = 2;
@@ -241,11 +236,17 @@ DType DType::from_numpy(const std::string& numpy_str)
     // Now lookup in the map
     auto found_type = StrToTypeId.find(type_char);
 
-    CHECK(found_type != StrToTypeId.end()) << "Type char '" << type_char << "' not supported";
+    if (found_type == StrToTypeId.end())
+    {
+        throw std::invalid_argument(MORPHEUS_CONCAT_STR("Type char '" << type_char << "' not supported"));
+    }
 
     auto found_enum = found_type->second.find(dtype_size);
 
-    CHECK(found_enum != found_type->second.end()) << "Type str '" << type_char << dtype_size << "' not supported";
+    if (found_enum == found_type->second.end())
+    {
+        throw std::invalid_argument(MORPHEUS_CONCAT_STR("Type str '" << type_char << dtype_size << "' not supported"));
+    }
 
     return {found_enum->second};
 }
@@ -299,6 +300,31 @@ DType DType::from_triton(const std::string& type_str)
     }
     else
     {
+        throw std::invalid_argument("Not supported");
+    }
+}
+
+char DType::byte_order_char() const
+{
+    switch (m_type_id)
+    {
+    case TypeId::BOOL8:
+    case TypeId::INT8:
+    case TypeId::UINT8:
+        return '|';
+    case TypeId::INT16:
+    case TypeId::UINT16:
+    case TypeId::INT32:
+    case TypeId::UINT32:
+    case TypeId::INT64:
+    case TypeId::UINT64:
+    case TypeId::FLOAT32:
+    case TypeId::FLOAT64:
+        return '<';
+    case TypeId::EMPTY:
+    case TypeId::NUM_TYPE_IDS:
+    case TypeId::STRING:
+    default:
         throw std::runtime_error("Not supported");
     }
 }
@@ -318,7 +344,7 @@ char DType::type_char() const
     case TypeId::UINT64:
         return 'u';
     case TypeId::BOOL8:
-        return '?';
+        return 'b';
     case TypeId::FLOAT32:
     case TypeId::FLOAT64:
         return 'f';
diff --git a/morpheus/_lib/tests/CMakeLists.txt b/morpheus/_lib/tests/CMakeLists.txt
index 7e71bd2eb1..a17a297aca 100644
--- a/morpheus/_lib/tests/CMakeLists.txt
+++ b/morpheus/_lib/tests/CMakeLists.txt
@@ -113,6 +113,12 @@ add_morpheus_test(
     modules/test_data_loader_module.cpp
 )
 
+add_morpheus_test(
+  NAME objects
+  FILES
+    objects/test_dtype.cpp
+)
+
 add_morpheus_test(
   NAME deserializers
   FILES
diff --git a/morpheus/_lib/tests/objects/test_dtype.cpp b/morpheus/_lib/tests/objects/test_dtype.cpp
new file mode 100644
index 0000000000..230d68dcd6
--- /dev/null
+++ b/morpheus/_lib/tests/objects/test_dtype.cpp
@@ -0,0 +1,286 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../test_utils/common.hpp"  // IWYU pragma: associated
+
+#include "morpheus/objects/dtype.hpp"  // for DType
+
+#include <cudf/types.hpp>
+#include <gtest/gtest.h>
+
+#include <stdexcept>
+
+using namespace morpheus;
+using namespace morpheus::test;
+
+TEST_CLASS(DType);
+
+TEST_F(TestDType, FromNumpyValidStr)
+{
+    DType dtype = DType::from_numpy("|i1");
+    ASSERT_EQ(dtype.type_id(), TypeId::INT8);
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|i1");
+
+    dtype = DType::from_numpy("<i2");
+    ASSERT_EQ(dtype.type_id(), TypeId::INT16);
+    ASSERT_EQ(dtype.item_size(), 2);
+    ASSERT_EQ(dtype.type_str(), "<i2");
+
+    dtype = DType::from_numpy("<i4");
+    ASSERT_EQ(dtype.type_id(), TypeId::INT32);
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<i4");
+
+    dtype = DType::from_numpy("<i8");
+    ASSERT_EQ(dtype.type_id(), TypeId::INT64);
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<i8");
+
+    dtype = DType::from_numpy("|u1");
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT8);
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|u1");
+
+    dtype = DType::from_numpy("<u2");
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT16);
+    ASSERT_EQ(dtype.item_size(), 2);
+    ASSERT_EQ(dtype.type_str(), "<u2");
+
+    dtype = DType::from_numpy("<u4");
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT32);
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<u4");
+
+    dtype = DType::from_numpy("<u8");
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT64);
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<u8");
+
+    dtype = DType::from_numpy("|b1");
+    ASSERT_EQ(dtype.type_id(), TypeId::BOOL8);
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|b1");
+
+    dtype = DType::from_numpy("<f4");
+    ASSERT_EQ(dtype.type_id(), TypeId::FLOAT32);
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<f4");
+
+    dtype = DType::from_numpy("<f8");
+    ASSERT_EQ(dtype.type_id(), TypeId::FLOAT64);
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<f8");
+}
+
+TEST_F(TestDType, FromNumpyInvalidStr)
+{
+    // invalid byte order char
+    EXPECT_THROW(DType::from_numpy("{i2"), std::invalid_argument);
+
+    // invalid type char
+    EXPECT_THROW(DType::from_numpy("<x2"), std::invalid_argument);
+
+    // invalid byte size
+    EXPECT_THROW(DType::from_numpy("<i9"), std::invalid_argument);
+
+    // all invalid
+    EXPECT_THROW(DType::from_numpy("{x9"), std::invalid_argument);
+    EXPECT_THROW(DType::from_numpy("zzzzz"), std::invalid_argument);
+    EXPECT_THROW(DType::from_numpy("123456"), std::invalid_argument);
+    EXPECT_THROW(DType::from_numpy("*&()#%"), std::invalid_argument);
+
+    // empty string
+    EXPECT_THROW(DType::from_numpy(""), std::invalid_argument);
+}
+
+TEST_F(TestDType, FromTritonValidStr)
+{
+    DType dtype = DType::from_triton("INT8");
+    ASSERT_EQ(dtype.type_id(), TypeId::INT8);
+    ASSERT_EQ(dtype.triton_str(), "INT8");
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|i1");
+
+    dtype = DType::from_triton("INT16");
+    ASSERT_EQ(dtype.type_id(), TypeId::INT16);
+    ASSERT_EQ(dtype.triton_str(), "INT16");
+    ASSERT_EQ(dtype.item_size(), 2);
+    ASSERT_EQ(dtype.type_str(), "<i2");
+
+    dtype = DType::from_triton("INT32");
+    ASSERT_EQ(dtype.type_id(), TypeId::INT32);
+    ASSERT_EQ(dtype.triton_str(), "INT32");
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<i4");
+
+    dtype = DType::from_triton("INT64");
+    ASSERT_EQ(dtype.type_id(), TypeId::INT64);
+    ASSERT_EQ(dtype.triton_str(), "INT64");
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<i8");
+
+    dtype = DType::from_triton("UINT8");
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT8);
+    ASSERT_EQ(dtype.triton_str(), "UINT8");
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|u1");
+
+    dtype = DType::from_triton("UINT16");
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT16);
+    ASSERT_EQ(dtype.triton_str(), "UINT16");
+    ASSERT_EQ(dtype.item_size(), 2);
+    ASSERT_EQ(dtype.type_str(), "<u2");
+
+    dtype = DType::from_triton("UINT32");
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT32);
+    ASSERT_EQ(dtype.triton_str(), "UINT32");
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<u4");
+
+    dtype = DType::from_triton("UINT64");
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT64);
+    ASSERT_EQ(dtype.triton_str(), "UINT64");
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<u8");
+
+    dtype = DType::from_triton("BOOL");
+    ASSERT_EQ(dtype.type_id(), TypeId::BOOL8);
+    ASSERT_EQ(dtype.triton_str(), "BOOL");
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|b1");
+
+    dtype = DType::from_triton("FP32");
+    ASSERT_EQ(dtype.type_id(), TypeId::FLOAT32);
+    ASSERT_EQ(dtype.triton_str(), "FP32");
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<f4");
+
+    dtype = DType::from_triton("FP64");
+    ASSERT_EQ(dtype.type_id(), TypeId::FLOAT64);
+    ASSERT_EQ(dtype.triton_str(), "FP64");
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<f8");
+}
+
+TEST_F(TestDType, FromTritonInvalidStr)
+{
+    EXPECT_THROW(DType::from_triton("BOOL8"), std::invalid_argument);
+    EXPECT_THROW(DType::from_triton("FLOAT32"), std::invalid_argument);
+    EXPECT_THROW(DType::from_triton("FLOAT64"), std::invalid_argument);
+    EXPECT_THROW(DType::from_triton("uint8"), std::invalid_argument);
+    EXPECT_THROW(DType::from_triton("zzzzz"), std::invalid_argument);
+    EXPECT_THROW(DType::from_triton("123456"), std::invalid_argument);
+    EXPECT_THROW(DType::from_triton("*&()#%"), std::invalid_argument);
+    EXPECT_THROW(DType::from_triton(""), std::invalid_argument);
+}
+
+TEST_F(TestDType, FromCudfSupported)
+{
+    DType dtype = DType::from_cudf(cudf::type_id::INT8);
+    ASSERT_EQ(dtype.type_id(), TypeId::INT8);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::INT8);
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|i1");
+
+    dtype = DType::from_cudf(cudf::type_id::INT16);
+    ASSERT_EQ(dtype.type_id(), TypeId::INT16);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::INT16);
+    ASSERT_EQ(dtype.item_size(), 2);
+    ASSERT_EQ(dtype.type_str(), "<i2");
+
+    dtype = DType::from_cudf(cudf::type_id::INT32);
+    ASSERT_EQ(dtype.type_id(), TypeId::INT32);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::INT32);
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<i4");
+
+    dtype = DType::from_cudf(cudf::type_id::INT64);
+    ASSERT_EQ(dtype.type_id(), TypeId::INT64);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::INT64);
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<i8");
+
+    dtype = DType::from_cudf(cudf::type_id::UINT8);
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT8);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::UINT8);
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|u1");
+
+    dtype = DType::from_cudf(cudf::type_id::UINT16);
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT16);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::UINT16);
+    ASSERT_EQ(dtype.item_size(), 2);
+    ASSERT_EQ(dtype.type_str(), "<u2");
+
+    dtype = DType::from_cudf(cudf::type_id::UINT32);
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT32);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::UINT32);
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<u4");
+
+    dtype = DType::from_cudf(cudf::type_id::UINT64);
+    ASSERT_EQ(dtype.type_id(), TypeId::UINT64);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::UINT64);
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<u8");
+
+    dtype = DType::from_cudf(cudf::type_id::BOOL8);
+    ASSERT_EQ(dtype.type_id(), TypeId::BOOL8);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::BOOL8);
+    ASSERT_EQ(dtype.item_size(), 1);
+    ASSERT_EQ(dtype.type_str(), "|b1");
+
+    dtype = DType::from_cudf(cudf::type_id::FLOAT32);
+    ASSERT_EQ(dtype.type_id(), TypeId::FLOAT32);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::FLOAT32);
+    ASSERT_EQ(dtype.item_size(), 4);
+    ASSERT_EQ(dtype.type_str(), "<f4");
+
+    dtype = DType::from_cudf(cudf::type_id::FLOAT64);
+    ASSERT_EQ(dtype.type_id(), TypeId::FLOAT64);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::FLOAT64);
+    ASSERT_EQ(dtype.item_size(), 8);
+    ASSERT_EQ(dtype.type_str(), "<f8");
+
+    dtype = DType::from_cudf(cudf::type_id::STRING);
+    ASSERT_EQ(dtype.type_id(), TypeId::STRING);
+    ASSERT_EQ(dtype.cudf_type_id(), cudf::type_id::STRING);
+    ASSERT_EQ(dtype.item_size(), 1);
+    // numpy typestr for string not supported
+    EXPECT_THROW(dtype.type_str(), std::runtime_error);
+}
+
+TEST_F(TestDType, FromCudfNotSupported)
+{
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::TIMESTAMP_DAYS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::TIMESTAMP_SECONDS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::TIMESTAMP_MILLISECONDS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::TIMESTAMP_MICROSECONDS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::TIMESTAMP_NANOSECONDS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::DURATION_DAYS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::DURATION_SECONDS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::DURATION_MILLISECONDS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::DURATION_NANOSECONDS), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::DICTIONARY32), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::LIST), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::DECIMAL32), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::DECIMAL64), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::DECIMAL128), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::STRUCT), std::invalid_argument);
+    EXPECT_THROW(DType::from_cudf(cudf::type_id::NUM_TYPE_IDS), std::invalid_argument);
+}
\ No newline at end of file

From c65a2e16fdb1021fba0bbd169efc76fb879f4a85 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Tue, 23 Apr 2024 11:08:52 -0700
Subject: [PATCH 09/38] Fix documentation for building examples (#1659)

* The Morpheus python package needs to be built and installed prior to building the examples, updated documentation to ensure `-DMORPHEUS_PYTHON_PERFORM_INSTALL=ON` is added

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1659
---
 docs/source/developer_guide/guides.md                    | 4 ++--
 docs/source/developer_guide/guides/3_simple_cpp_stage.md | 4 ++--
 docs/source/developer_guide/guides/4_source_cpp_stage.md | 4 ++--
 examples/developer_guide/4_rabbitmq_cpp_stage/README.md  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/source/developer_guide/guides.md b/docs/source/developer_guide/guides.md
index 9e4fba5ff7..2d141e96db 100644
--- a/docs/source/developer_guide/guides.md
+++ b/docs/source/developer_guide/guides.md
@@ -33,9 +33,9 @@ in both Python and C++.
 - [Simple C++ Stage](./guides/3_simple_cpp_stage.md)
 - [Creating a C++ Source Stage](./guides/4_source_cpp_stage.md)
 
-> **Note**: The code for the above guides can be found in the `examples/developer_guide` directory of the Morpheus repository. To build the C++ examples, pass `-DMORPHEUS_BUILD_EXAMPLES=ON` to CMake when building Morpheus. Users building Morpheus with the provided `scripts/compile.sh` script can do do by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
+> **Note**: The code for the above guides can be found in the `examples/developer_guide` directory of the Morpheus repository. To build the C++ examples, pass `-DMORPHEUS_BUILD_EXAMPLES=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON` to CMake when building Morpheus. Users building Morpheus with the provided `scripts/compile.sh` script can do do by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
 > ```bash
-> CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh
+> CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON" ./scripts/compile.sh
 > ```
 
 ## Morpheus Modules
diff --git a/docs/source/developer_guide/guides/3_simple_cpp_stage.md b/docs/source/developer_guide/guides/3_simple_cpp_stage.md
index 2b203ef42b..3b0982d21e 100644
--- a/docs/source/developer_guide/guides/3_simple_cpp_stage.md
+++ b/docs/source/developer_guide/guides/3_simple_cpp_stage.md
@@ -17,9 +17,9 @@ limitations under the License.
 
 # Simple C++ Stage
 ## Building the Example
-The code for this guide can be found in the `examples/developer_guide/3_simple_cpp_stage` directory of the Morpheus repository. There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` flag to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
+The code for this guide can be found in the `examples/developer_guide/3_simple_cpp_stage` directory of the Morpheus repository. There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` and `-DMORPHEUS_PYTHON_PERFORM_INSTALL=ON` flags to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
 ```bash
-CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh
+CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON" ./scripts/compile.sh
 ```
 
 The second method is to build the example as a standalone project. From the root of the Morpheus repo execute:
diff --git a/docs/source/developer_guide/guides/4_source_cpp_stage.md b/docs/source/developer_guide/guides/4_source_cpp_stage.md
index 8bc17f1347..476d0f661b 100644
--- a/docs/source/developer_guide/guides/4_source_cpp_stage.md
+++ b/docs/source/developer_guide/guides/4_source_cpp_stage.md
@@ -17,9 +17,9 @@ limitations under the License.
 
 # Creating a C++ Source Stage
 ## Building the Example
-The code for this guide can be found in the `examples/developer_guide/4_rabbitmq_cpp_stage` directory of the Morpheus repository. There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` flag to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
+The code for this guide can be found in the `examples/developer_guide/4_rabbitmq_cpp_stage` directory of the Morpheus repository. There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` and `-DMORPHEUS_PYTHON_PERFORM_INSTALL=ON` flags to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
 ```bash
-CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh
+CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON" ./scripts/compile.sh
 ```
 
 The second method is to build the example as a standalone project. From the root of the Morpheus repo execute:
diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md
index 2e3319b65e..ed55204f85 100644
--- a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md
+++ b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md
@@ -27,9 +27,9 @@ pip install -r examples/developer_guide/4_rabbitmq_cpp_stage/requirements.txt
 ```
 
 ## Building the Example
-There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` flag to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
+There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` and `-DMORPHEUS_PYTHON_PERFORM_INSTALL=ON` flags to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
 ```bash
-CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON" ./scripts/compile.sh
+CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON" ./scripts/compile.sh
 ```
 
 The second is to build the example as a standalone project. From the root of the Morpheus repo execute:

From ce7ab99b221927d3fbb5f317f87279be62d2109d Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 24 Apr 2024 08:11:00 -0700
Subject: [PATCH 10/38] Updating CHANGELOG

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d79781bdc6..bbd2ef563e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -->
-# Morpheus 24.03.02 (22 Apr 2024)
+# Morpheus 24.03.02 (24 Apr 2024)
 
 ## 🐛 Bug Fixes
 
@@ -24,6 +24,7 @@ limitations under the License.
 ## 📖 Documentation
 
 - Document current known issues in 24.03.02 ([#1656](https://github.com/nv-morpheus/Morpheus/pull/1656)) [@dagardner-nv](https://github.com/dagardner-nv)
+- Fix documentation for building examples ([#1659](https://github.com/nv-morpheus/Morpheus/pull/1659)) [@dagardner-nv](https://github.com/dagardner-nv)
 
 # Morpheus 24.03.01 (10 Apr 2024)
 

From e2942e6b49ac923b2ff7b22341bfb719546c00f3 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Wed, 24 Apr 2024 09:42:09 -0700
Subject: [PATCH 11/38] Fix type-o in documentation (#1662)

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1662
---
 examples/developer_guide/4_rabbitmq_cpp_stage/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md
index ed55204f85..c0710524e4 100644
--- a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md
+++ b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md
@@ -29,7 +29,7 @@ pip install -r examples/developer_guide/4_rabbitmq_cpp_stage/requirements.txt
 ## Building the Example
 There are two ways to build the example. The first is to build the examples along with Morpheus by passing the `-DMORPHEUS_BUILD_EXAMPLES=ON` and `-DMORPHEUS_PYTHON_PERFORM_INSTALL=ON` flags to cmake, for users using the `scripts/compile.sh` at the root of the Morpheus repo can do this by setting the `CMAKE_CONFIGURE_EXTRA_ARGS` environment variable:
 ```bash
-CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON" ./scripts/compile.sh
+CMAKE_CONFIGURE_EXTRA_ARGS="-DMORPHEUS_BUILD_EXAMPLES=ON -DMORPHEUS_PYTHON_PERFORM_INSTALL=ON" ./scripts/compile.sh
 ```
 
 The second is to build the example as a standalone project. From the root of the Morpheus repo execute:

From 88d5211487b9ce389f1a10f98e9bcafb59019a5f Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 24 Apr 2024 09:44:22 -0700
Subject: [PATCH 12/38] Updating CHANGELOG

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bbd2ef563e..8386e6ac8e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -25,6 +25,7 @@ limitations under the License.
 
 - Document current known issues in 24.03.02 ([#1656](https://github.com/nv-morpheus/Morpheus/pull/1656)) [@dagardner-nv](https://github.com/dagardner-nv)
 - Fix documentation for building examples ([#1659](https://github.com/nv-morpheus/Morpheus/pull/1659)) [@dagardner-nv](https://github.com/dagardner-nv)
+- Fix type-o in documentation ([#1662](https://github.com/nv-morpheus/Morpheus/pull/1662)) [@dagardner-nv](https://github.com/dagardner-nv)
 
 # Morpheus 24.03.01 (10 Apr 2024)
 

From ec183006194025ab8ff1408fe780a71a699c29d1 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Wed, 24 Apr 2024 12:38:40 -0700
Subject: [PATCH 13/38] Fix tests to detect issue #1626 (#1629)

* PR #659 inadvertently excluded the monitor stage from several of the end-to-end pipeline tests.
* Adds an environment variable `MORPHEUS_MONITOR_ALWAYS_ENABLED` which when set, will force the monitor stage to always be enabled.
* Adds an auto-use fixture `monitor_stage_always_enabled` which ensures the environment variable is set & present.

Requires nv-morpheus/MRC#473 to be merged first

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Christopher Harris (https://github.com/cwharris)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1629
---
 tests/benchmarks/test_bench_e2e_pipelines.py  |  6 +-
 tests/benchmarks/test_bench_monitor_stage.py  |  9 +--
 tests/conftest.py                             |  9 +++
 .../developer_guide/test_python_modules.py    |  4 +-
 .../test_dfp_inference_stage.py               |  7 +--
 .../test_dfp_postprocessing_stage.py          |  7 +--
 .../test_dfp_preprocessing_stage.py           |  7 +--
 tests/test_abp.py                             | 18 ++++--
 tests/test_abp_kafka.py                       | 12 ++--
 tests/test_dfp.py                             | 16 +++--
 tests/test_dfp_kafka.py                       | 12 ++--
 tests/test_monitor_stage.py                   | 19 +++---
 tests/test_phishing.py                        | 11 ++--
 tests/test_phishing_kafka.py                  | 12 ++--
 tests/test_sid.py                             | 62 ++++++++++++-------
 tests/test_sid_kafka.py                       | 12 ++--
 16 files changed, 137 insertions(+), 86 deletions(-)

diff --git a/tests/benchmarks/test_bench_e2e_pipelines.py b/tests/benchmarks/test_bench_e2e_pipelines.py
index 14283cf154..e99e7bbc07 100644
--- a/tests/benchmarks/test_bench_e2e_pipelines.py
+++ b/tests/benchmarks/test_bench_e2e_pipelines.py
@@ -67,7 +67,7 @@ def nlp_pipeline(config: Config, input_file, repeat, vocab_hash_file, output_fil
                              server_url=E2E_TEST_CONFIGS["triton_server_url"],
                              force_convert_inputs=True))
     pipeline.add_stage(AddClassificationsStage(config, threshold=0.5, prefix=""))
-    pipeline.add_stage(MonitorStage(config))
+    pipeline.add_stage(MonitorStage(config, log_level=logging.INFO))
     pipeline.add_stage(SerializeStage(config))
     pipeline.add_stage(WriteToFileStage(config, filename=output_file, overwrite=True))
 
@@ -89,7 +89,7 @@ def fil_pipeline(config: Config, input_file, repeat, output_file, model_name):
                              server_url=E2E_TEST_CONFIGS["triton_server_url"],
                              force_convert_inputs=True))
     pipeline.add_stage(AddClassificationsStage(config, threshold=0.5, prefix=""))
-    pipeline.add_stage(MonitorStage(config))
+    pipeline.add_stage(MonitorStage(config, log_level=logging.INFO))
     pipeline.add_stage(SerializeStage(config))
     pipeline.add_stage(WriteToFileStage(config, filename=output_file, overwrite=True))
 
@@ -111,7 +111,7 @@ def ae_pipeline(config: Config, input_glob, repeat, train_data_glob, output_file
     pipeline.add_stage(PreprocessAEStage(config))
     pipeline.add_stage(AutoEncoderInferenceStage(config))
     pipeline.add_stage(AddScoresStage(config))
-    pipeline.add_stage(MonitorStage(config))
+    pipeline.add_stage(MonitorStage(config, log_level=logging.INFO))
     pipeline.add_stage(SerializeStage(config))
     pipeline.add_stage(WriteToFileStage(config, filename=output_file, overwrite=True))
 
diff --git a/tests/benchmarks/test_bench_monitor_stage.py b/tests/benchmarks/test_bench_monitor_stage.py
index 7af2406acc..5ddbdef42d 100644
--- a/tests/benchmarks/test_bench_monitor_stage.py
+++ b/tests/benchmarks/test_bench_monitor_stage.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 import logging
+import typing
 
 import pytest
 from static_message_source import StaticMessageSource
@@ -29,7 +30,7 @@
 from morpheus.utils.logger import configure_logging
 
 
-def build_and_run_pipeline(config: Config, df: cudf.DataFrame):
+def build_and_run_pipeline(*, config: Config, df: cudf.DataFrame, morpheus_log_level: int):
 
     # Pipeline
     pipeline = LinearPipeline(config)
@@ -39,7 +40,7 @@ def build_and_run_pipeline(config: Config, df: cudf.DataFrame):
     pipeline.add_stage(DeserializeStage(config))
 
     # Stage we want to benchmark
-    pipeline.add_stage(MonitorStage(config))
+    pipeline.add_stage(MonitorStage(config, log_level=morpheus_log_level))
 
     pipeline.build()
     pipeline.run()
@@ -47,7 +48,7 @@ def build_and_run_pipeline(config: Config, df: cudf.DataFrame):
 
 @pytest.mark.benchmark
 @pytest.mark.parametrize("num_messages", [1, 100, 10000, 1000000])
-def test_monitor_stage(benchmark, num_messages):
+def test_monitor_stage(benchmark: typing.Callable, num_messages: int, morpheus_log_level: int):
 
     # Test Data
 
@@ -70,4 +71,4 @@ def test_monitor_stage(benchmark, num_messages):
     config.edge_buffer_size = 4
 
     # would prefer to benchmark just pipeline.run, but it asserts when called multiple times
-    benchmark(build_and_run_pipeline, config, df)
+    benchmark(build_and_run_pipeline, config=config, df=df, morpheus_log_level=morpheus_log_level)
diff --git a/tests/conftest.py b/tests/conftest.py
index 0a33fa7891..1f8f0ef425 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -861,6 +861,15 @@ def loglevel_fatal():
     _wrap_set_log_level(logging.FATAL)
 
 
+@pytest.fixture(scope="function")
+def morpheus_log_level():
+    """
+    Returns the log level of the morpheus logger
+    """
+    logger = logging.getLogger("morpheus")
+    yield logger.getEffectiveLevel()
+
+
 # ==== DataFrame Fixtures ====
 @pytest.fixture(scope="function")
 def dataset(df_type: typing.Literal['cudf', 'pandas']):
diff --git a/tests/examples/developer_guide/test_python_modules.py b/tests/examples/developer_guide/test_python_modules.py
index 1c433d6f78..aad7333ce7 100644
--- a/tests/examples/developer_guide/test_python_modules.py
+++ b/tests/examples/developer_guide/test_python_modules.py
@@ -38,7 +38,7 @@
     os.path.join(EXAMPLES_DIR, "my_compound_module_consumer_stage.py"),
     os.path.join(EXAMPLES_DIR, "my_test_module_consumer_stage.py")
 ])
-def test_pipeline(config: Config, import_mod: list[types.ModuleType]):
+def test_pipeline(config: Config, import_mod: list[types.ModuleType], morpheus_log_level: int):
     my_compound_module_consumer_stage = import_mod[-2]
     my_test_module_consumer_stage = import_mod[-1]
 
@@ -72,7 +72,7 @@ def test_pipeline(config: Config, import_mod: list[types.ModuleType]):
 
     pipeline.add_stage(my_test_module_consumer_stage.MyPassthroughModuleWrapper(config))
     pipeline.add_stage(my_compound_module_consumer_stage.MyCompoundOpModuleWrapper(config))
-    pipeline.add_stage(MonitorStage(config))
+    pipeline.add_stage(MonitorStage(config, log_level=morpheus_log_level))
     comp_stage = pipeline.add_stage(CompareDataFrameStage(config, expected_df))
 
     pipeline.run()
diff --git a/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py b/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py
index f4dda7c815..46defbbbee 100644
--- a/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py
+++ b/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py
@@ -71,19 +71,18 @@ def test_get_model(config: Config, mock_mlflow_client: mock.MagicMock, mock_mode
 
 
 @pytest.mark.usefixtures("reset_loglevel")
-@pytest.mark.parametrize('morpheus_log_level',
-                         [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG])
+@pytest.mark.parametrize('log_level', [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG])
 def test_on_data(
         config: Config,
         mock_mlflow_client: mock.MagicMock,  # pylint: disable=unused-argument
         mock_model_manager: mock.MagicMock,
         dfp_multi_message: "MultiDFPMessage",  # noqa: F821
-        morpheus_log_level: int,
+        log_level: int,
         dataset_pandas: DatasetManager):
     from dfp.messages.multi_dfp_message import MultiDFPMessage
     from dfp.stages.dfp_inference_stage import DFPInferenceStage
 
-    set_log_level(morpheus_log_level)
+    set_log_level(log_level)
 
     expected_results = list(range(1000, dfp_multi_message.mess_count + 1000))
 
diff --git a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py
index 4b13bacde5..6eed4c0d9e 100644
--- a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py
+++ b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py
@@ -35,14 +35,13 @@ def test_constructor(config: Config):
 
 @pytest.mark.usefixtures("reset_loglevel")
 @pytest.mark.parametrize('use_on_data', [True, False])
-@pytest.mark.parametrize('morpheus_log_level',
-                         [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG])
+@pytest.mark.parametrize('log_level', [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG])
 @mock.patch('dfp.stages.dfp_postprocessing_stage.datetime')
 def test_process_events_on_data(mock_datetime: mock.MagicMock,
                                 config: Config,
                                 dfp_multi_ae_message: MultiAEMessage,
                                 use_on_data: bool,
-                                morpheus_log_level: int):
+                                log_level: int):
     from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage
 
     mock_dt_obj = mock.MagicMock()
@@ -54,7 +53,7 @@ def test_process_events_on_data(mock_datetime: mock.MagicMock,
         df.loc[10, 'v2'] = np.nan
         df['event_time'] = ''
 
-    set_log_level(morpheus_log_level)
+    set_log_level(log_level)
     stage = DFPPostprocessingStage(config)
 
     # on_data is a thin wrapper around process_events, tests should be the same for non-empty messages
diff --git a/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py
index bf82381879..c7859cd90c 100644
--- a/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py
+++ b/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py
@@ -36,17 +36,16 @@ def test_constructor(config: Config):
 
 
 @pytest.mark.usefixtures("reset_loglevel")
-@pytest.mark.parametrize('morpheus_log_level',
-                         [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG])
+@pytest.mark.parametrize('log_level', [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG])
 def test_process_features(
         config: Config,
         dfp_multi_message: "MultiDFPMessage",  # noqa: F821
         dataset_pandas: DatasetManager,
-        morpheus_log_level: int):
+        log_level: int):
     from dfp.messages.multi_dfp_message import MultiDFPMessage
     from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage
 
-    set_log_level(morpheus_log_level)
+    set_log_level(log_level)
 
     expected_df = dfp_multi_message.get_meta_dataframe().copy(deep=True)
     expected_df['v210'] = expected_df['v2'] + 10
diff --git a/tests/test_abp.py b/tests/test_abp.py
index 86778bfdb6..a3248deb7e 100755
--- a/tests/test_abp.py
+++ b/tests/test_abp.py
@@ -52,7 +52,7 @@
 @pytest.mark.slow
 @pytest.mark.use_python
 @mock.patch('tritonclient.grpc.InferenceServerClient')
-def test_abp_no_cpp(mock_triton_client, config: Config, tmp_path):
+def test_abp_no_cpp(mock_triton_client: mock.MagicMock, config: Config, tmp_path: str, morpheus_log_level: int):
     mock_metadata = {
         "inputs": [{
             'name': 'input__0', 'datatype': 'FP32', "shape": [-1, FEATURE_LENGTH]
@@ -98,7 +98,8 @@ def test_abp_no_cpp(mock_triton_client, config: Config, tmp_path):
     pipe.add_stage(PreprocessFILStage(config))
     pipe.add_stage(
         TritonInferenceStage(config, model_name='abp-nvsmi-xgb', server_url='test:0000', force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config))
     pipe.add_stage(AddScoresStage(config, prefix="score_"))
     pipe.add_stage(
@@ -115,7 +116,7 @@ def test_abp_no_cpp(mock_triton_client, config: Config, tmp_path):
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_abp_cpp(config, tmp_path):
+def test_abp_cpp(config: Config, tmp_path: str, morpheus_log_level: int):
     config.mode = PipelineModes.FIL
     config.class_labels = ["mining"]
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
@@ -141,7 +142,8 @@ def test_abp_cpp(config, tmp_path):
     pipe.add_stage(
         TritonInferenceStage(config, model_name='abp-nvsmi-xgb', server_url='localhost:8001',
                              force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config))
     pipe.add_stage(AddScoresStage(config, prefix="score_"))
     pipe.add_stage(
@@ -158,7 +160,10 @@ def test_abp_cpp(config, tmp_path):
 @pytest.mark.slow
 @pytest.mark.use_python
 @mock.patch('tritonclient.grpc.InferenceServerClient')
-def test_abp_multi_segment_no_cpp(mock_triton_client, config: Config, tmp_path):
+def test_abp_multi_segment_no_cpp(mock_triton_client: mock.MagicMock,
+                                  config: Config,
+                                  tmp_path: str,
+                                  morpheus_log_level: int):
     mock_metadata = {
         "inputs": [{
             'name': 'input__0', 'datatype': 'FP32', "shape": [-1, FEATURE_LENGTH]
@@ -213,7 +218,8 @@ def test_abp_multi_segment_no_cpp(mock_triton_client, config: Config, tmp_path):
 
     pipe.add_segment_boundary(MultiResponseMessage)  # Boundary 3
 
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config))
 
     pipe.add_segment_boundary(MultiResponseMessage)  # Boundary 4
diff --git a/tests/test_abp_kafka.py b/tests/test_abp_kafka.py
index 0e1f040612..46306ff29c 100755
--- a/tests/test_abp_kafka.py
+++ b/tests/test_abp_kafka.py
@@ -61,7 +61,8 @@ def test_abp_no_cpp(mock_triton_client: mock.MagicMock,
                     config: Config,
                     kafka_bootstrap_servers: str,
                     kafka_topics: KafkaTopics,
-                    kafka_consumer: "KafkaConsumer"):
+                    kafka_consumer: "KafkaConsumer",
+                    morpheus_log_level: int):
     mock_metadata = {
         "inputs": [{
             'name': 'input__0', 'datatype': 'FP32', "shape": [-1, FEATURE_LENGTH]
@@ -115,7 +116,8 @@ def test_abp_no_cpp(mock_triton_client: mock.MagicMock,
     pipe.add_stage(PreprocessFILStage(config))
     pipe.add_stage(
         TritonInferenceStage(config, model_name='abp-nvsmi-xgb', server_url='test:0000', force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config))
     pipe.add_stage(SerializeStage(config))
     pipe.add_stage(
@@ -151,7 +153,8 @@ def test_abp_cpp(config: Config,
                  dataset_pandas: DatasetManager,
                  kafka_bootstrap_servers: str,
                  kafka_topics: KafkaTopics,
-                 kafka_consumer: "KafkaConsumer"):
+                 kafka_consumer: "KafkaConsumer",
+                 morpheus_log_level: int):
     config.mode = PipelineModes.FIL
     config.class_labels = ["mining"]
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
@@ -183,7 +186,8 @@ def test_abp_cpp(config: Config,
     pipe.add_stage(
         TritonInferenceStage(config, model_name='abp-nvsmi-xgb', server_url='localhost:8001',
                              force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config))
     pipe.add_stage(SerializeStage(config))
     pipe.add_stage(
diff --git a/tests/test_dfp.py b/tests/test_dfp.py
index d32ad3c1e8..2f3bacbdae 100755
--- a/tests/test_dfp.py
+++ b/tests/test_dfp.py
@@ -23,6 +23,7 @@
 
 from _utils import TEST_DIRS
 from _utils import calc_error_val
+from morpheus.config import Config
 from morpheus.config import ConfigAutoEncoder
 from morpheus.config import PipelineModes
 from morpheus.messages.message_meta import MessageMeta
@@ -50,7 +51,7 @@
 @pytest.mark.reload_modules([preprocess_ae_stage, train_ae_stage])
 @pytest.mark.usefixtures("reload_modules")
 @mock.patch('morpheus.stages.preprocess.train_ae_stage.AutoEncoder')
-def test_dfp_roleg(mock_ae, config, tmp_path):
+def test_dfp_roleg(mock_ae: mock.MagicMock, config: Config, tmp_path: str, morpheus_log_level: int):
     tensor_data = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_roleg_tensor.csv'), delimiter=',')
     anomaly_score = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_roleg_anomaly_score.csv'), delimiter=',')
     exp_results = pd.read_csv(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_roleg_exp_results.csv'))
@@ -107,7 +108,8 @@ def test_dfp_roleg(mock_ae, config, tmp_path):
                         cold_end=False,
                         filter_percent=90.0,
                         zscore_threshold=8.0))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(
         ValidationStage(config,
                         val_file_name=val_file_name,
@@ -135,7 +137,7 @@ def test_dfp_roleg(mock_ae, config, tmp_path):
 @pytest.mark.reload_modules([preprocess_ae_stage, train_ae_stage])
 @pytest.mark.usefixtures("reload_modules")
 @mock.patch('morpheus.stages.preprocess.train_ae_stage.AutoEncoder')
-def test_dfp_user123(mock_ae, config, tmp_path):
+def test_dfp_user123(mock_ae: mock.MagicMock, config: Config, tmp_path: str, morpheus_log_level: int):
     tensor_data = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_tensor.csv'), delimiter=',')
     anomaly_score = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_anomaly_score.csv'), delimiter=',')
     exp_results = pd.read_csv(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_exp_results.csv'))
@@ -190,7 +192,8 @@ def test_dfp_user123(mock_ae, config, tmp_path):
                         cold_end=False,
                         filter_percent=90.0,
                         zscore_threshold=8.0))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(
         ValidationStage(config,
                         val_file_name=val_file_name,
@@ -217,7 +220,7 @@ def test_dfp_user123(mock_ae, config, tmp_path):
 @pytest.mark.reload_modules([preprocess_ae_stage, train_ae_stage])
 @pytest.mark.usefixtures("reload_modules")
 @mock.patch('morpheus.stages.preprocess.train_ae_stage.AutoEncoder')
-def test_dfp_user123_multi_segment(mock_ae, config, tmp_path):
+def test_dfp_user123_multi_segment(mock_ae: mock.MagicMock, config: Config, tmp_path: str, morpheus_log_level: int):
     tensor_data = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_tensor.csv'), delimiter=',')
     anomaly_score = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_anomaly_score.csv'), delimiter=',')
     exp_results = pd.read_csv(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_exp_results.csv'))
@@ -278,7 +281,8 @@ def test_dfp_user123_multi_segment(mock_ae, config, tmp_path):
                         filter_percent=90.0,
                         zscore_threshold=8.0))
     pipe.add_segment_boundary(MultiResponseMessage)  # Boundary 6
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(
         ValidationStage(config,
                         val_file_name=val_file_name,
diff --git a/tests/test_dfp_kafka.py b/tests/test_dfp_kafka.py
index 5b28ae6f7c..8bd4900b96 100755
--- a/tests/test_dfp_kafka.py
+++ b/tests/test_dfp_kafka.py
@@ -64,7 +64,8 @@ def test_dfp_roleg(mock_ae: mock.MagicMock,
                    config: Config,
                    kafka_bootstrap_servers: str,
                    kafka_topics: KafkaTopics,
-                   kafka_consumer: "KafkaConsumer"):
+                   kafka_consumer: "KafkaConsumer",
+                   morpheus_log_level: int):
     tensor_data = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_roleg_tensor.csv'), delimiter=',')
     anomaly_score = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_roleg_anomaly_score.csv'), delimiter=',')
     exp_results = pd.read_csv(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_roleg_exp_results.csv'))
@@ -116,7 +117,8 @@ def test_dfp_roleg(mock_ae: mock.MagicMock,
                         cold_end=False,
                         filter_percent=90.0,
                         zscore_threshold=8.0))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(SerializeStage(config, include=[]))
     pipe.add_stage(
         WriteToKafkaStage(config, bootstrap_servers=kafka_bootstrap_servers, output_topic=kafka_topics.output_topic))
@@ -166,7 +168,8 @@ def test_dfp_user123(mock_ae: mock.MagicMock,
                      config: Config,
                      kafka_bootstrap_servers: str,
                      kafka_topics: KafkaTopics,
-                     kafka_consumer: "KafkaConsumer"):
+                     kafka_consumer: "KafkaConsumer",
+                     morpheus_log_level: int):
     tensor_data = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_tensor.csv'), delimiter=',')
     anomaly_score = np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_anomaly_score.csv'), delimiter=',')
     exp_results = pd.read_csv(os.path.join(TEST_DIRS.tests_data_dir, 'dfp_user123_exp_results.csv'))
@@ -217,7 +220,8 @@ def test_dfp_user123(mock_ae: mock.MagicMock,
                         cold_end=False,
                         filter_percent=90.0,
                         zscore_threshold=8.0))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(SerializeStage(config, include=[]))
     pipe.add_stage(
         WriteToKafkaStage(config, bootstrap_servers=kafka_bootstrap_servers, output_topic=kafka_topics.output_topic))
diff --git a/tests/test_monitor_stage.py b/tests/test_monitor_stage.py
index e023f159b3..68b1b35ca7 100755
--- a/tests/test_monitor_stage.py
+++ b/tests/test_monitor_stage.py
@@ -151,23 +151,22 @@ def test_progress_sink(mock_morph_tqdm: mock.MagicMock, config: Config):
 
 
 @pytest.mark.usefixtures("reset_loglevel")
-@pytest.mark.parametrize('morpheus_log_level',
-                         [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG])
+@pytest.mark.parametrize('log_level', [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG])
 @mock.patch('morpheus.stages.general.monitor_stage.MonitorController.sink_on_completed', autospec=True)
 @mock.patch('morpheus.stages.general.monitor_stage.MonitorController.progress_sink', autospec=True)
 def test_log_level(mock_progress_sink: mock.MagicMock,
                    mock_sink_on_completed: mock.MagicMock,
                    config: Config,
-                   morpheus_log_level: int):
+                   log_level: int):
     """
     Test ensures the monitor stage doesn't add itself to the MRC pipeline if not configured for the current log-level
     """
     input_file = os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.csv")
 
-    set_log_level(morpheus_log_level)
+    set_log_level(log_level)
     monitor_stage_level = logging.INFO
 
-    should_be_included = (morpheus_log_level <= monitor_stage_level)
+    should_be_included = (log_level <= monitor_stage_level)
 
     pipe = LinearPipeline(config)
     pipe.set_source(FileSourceStage(config, filename=input_file))
@@ -179,16 +178,13 @@ def test_log_level(mock_progress_sink: mock.MagicMock,
     assert mock_sink_on_completed.call_count == expected_call_count
 
 
-@pytest.mark.usefixtures("reset_loglevel")
 @pytest.mark.use_python
-def test_thread(config: Config):
+def test_thread(config: Config, morpheus_log_level: int):
     """
-    Test ensures the monitor stage doesn't add itself to the MRC pipeline if not configured for the current log-level
+    Test ensures the monitor stage executes on the same thread as the parent stage
     """
     input_file = os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.csv")
 
-    set_log_level(log_level=logging.INFO)
-
     monitor_thread_id = None
 
     # Create a dummy count function where we can save the thread id from the monitor stage
@@ -202,8 +198,9 @@ def fake_determine_count_fn(x):
     pipe = LinearPipeline(config)
     pipe.set_source(FileSourceStage(config, filename=input_file))
     dummy_stage = pipe.add_stage(RecordThreadIdStage(config))
-    pipe.add_stage(MonitorStage(config, determine_count_fn=fake_determine_count_fn))
+    pipe.add_stage(MonitorStage(config, determine_count_fn=fake_determine_count_fn, log_level=morpheus_log_level))
     pipe.run()
 
     # Check that the thread ids are the same
+    assert monitor_thread_id is not None
     assert dummy_stage.thread_id == monitor_thread_id
diff --git a/tests/test_phishing.py b/tests/test_phishing.py
index 4f434e993e..77e752ef3f 100755
--- a/tests/test_phishing.py
+++ b/tests/test_phishing.py
@@ -23,6 +23,7 @@
 from _utils import TEST_DIRS
 from _utils import calc_error_val
 from _utils import mk_async_infer
+from morpheus.config import Config
 from morpheus.config import PipelineModes
 from morpheus.pipeline import LinearPipeline
 from morpheus.stages.general.monitor_stage import MonitorStage
@@ -44,7 +45,7 @@
 @pytest.mark.slow
 @pytest.mark.use_python
 @mock.patch('tritonclient.grpc.InferenceServerClient')
-def test_email_no_cpp(mock_triton_client, config, tmp_path):
+def test_email_no_cpp(mock_triton_client: mock.MagicMock, config: Config, tmp_path: str, morpheus_log_level: int):
     mock_metadata = {
         "inputs": [{
             "name": "input_ids", "datatype": "INT64", "shape": [-1, FEATURE_LENGTH]
@@ -96,7 +97,8 @@ def test_email_no_cpp(mock_triton_client, config, tmp_path):
     pipe.add_stage(
         TritonInferenceStage(config, model_name='phishing-bert-onnx', server_url='test:0000',
                              force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config, labels=["is_phishing"], threshold=0.7))
     pipe.add_stage(
         ValidationStage(config, val_file_name=val_file_name, results_file_name=results_file_name, rel_tol=0.05))
@@ -111,7 +113,7 @@ def test_email_no_cpp(mock_triton_client, config, tmp_path):
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_email_cpp(config, tmp_path):
+def test_email_cpp(config: Config, tmp_path: str, morpheus_log_level: int):
     config.mode = PipelineModes.NLP
     config.class_labels = load_labels_file(os.path.join(TEST_DIRS.data_dir, "labels_phishing.txt"))
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
@@ -139,7 +141,8 @@ def test_email_cpp(config, tmp_path):
                              model_name='phishing-bert-onnx',
                              server_url='localhost:8001',
                              force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config, labels=["is_phishing"], threshold=0.7))
     pipe.add_stage(
         ValidationStage(config, val_file_name=val_file_name, results_file_name=results_file_name, rel_tol=0.05))
diff --git a/tests/test_phishing_kafka.py b/tests/test_phishing_kafka.py
index ba8fa1a14f..1a04061cc9 100755
--- a/tests/test_phishing_kafka.py
+++ b/tests/test_phishing_kafka.py
@@ -60,7 +60,8 @@ def test_email_no_cpp(mock_triton_client: mock.MagicMock,
                       config: Config,
                       kafka_bootstrap_servers: str,
                       kafka_topics: KafkaTopics,
-                      kafka_consumer: "KafkaConsumer"):
+                      kafka_consumer: "KafkaConsumer",
+                      morpheus_log_level: int):
     mock_metadata = {
         "inputs": [{
             "name": "input_ids", "datatype": "INT64", "shape": [-1, FEATURE_LENGTH]
@@ -120,7 +121,8 @@ def test_email_no_cpp(mock_triton_client: mock.MagicMock,
     pipe.add_stage(
         TritonInferenceStage(config, model_name='phishing-bert-onnx', server_url='test:0000',
                              force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config, labels=["is_phishing"], threshold=0.7))
     pipe.add_stage(SerializeStage(config))
     pipe.add_stage(
@@ -153,7 +155,8 @@ def test_email_cpp(dataset_pandas: DatasetManager,
                    config: Config,
                    kafka_bootstrap_servers: str,
                    kafka_topics: KafkaTopics,
-                   kafka_consumer: "KafkaConsumer"):
+                   kafka_consumer: "KafkaConsumer",
+                   morpheus_log_level: int):
     config.mode = PipelineModes.NLP
     config.class_labels = load_labels_file(os.path.join(TEST_DIRS.data_dir, "labels_phishing.txt"))
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
@@ -187,7 +190,8 @@ def test_email_cpp(dataset_pandas: DatasetManager,
                              model_name='phishing-bert-onnx',
                              server_url='localhost:8001',
                              force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config, labels=["is_phishing"], threshold=0.7))
     pipe.add_stage(SerializeStage(config))
     pipe.add_stage(
diff --git a/tests/test_sid.py b/tests/test_sid.py
index 67ca36161c..2221abe930 100755
--- a/tests/test_sid.py
+++ b/tests/test_sid.py
@@ -25,6 +25,7 @@
 from _utils import calc_error_val
 from _utils import compare_class_to_scores
 from _utils import mk_async_infer
+from morpheus.config import Config
 from morpheus.config import CppConfig
 from morpheus.config import PipelineModes
 from morpheus.pipeline import LinearPipeline
@@ -44,7 +45,15 @@
 MODEL_MAX_BATCH_SIZE = 32
 
 
-def _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_name: str = "data"):
+def _run_minibert_pipeline(
+    *,
+    config: Config,
+    tmp_path: str,
+    model_name: str,
+    truncated: bool,
+    morpheus_log_level: int,
+    data_col_name: str = "data",
+):
     """
     Runs just the Minibert Pipeline
     """
@@ -100,7 +109,8 @@ def _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_nam
                            column=data_col_name))
     pipe.add_stage(
         TritonInferenceStage(config, model_name=model_name, server_url='localhost:8001', force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config, threshold=0.5, prefix="si_"))
     pipe.add_stage(AddScoresStage(config, prefix="score_"))
     pipe.add_stage(
@@ -113,7 +123,13 @@ def _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_nam
     return calc_error_val(results_file_name)
 
 
-def _run_minibert(config, tmp_path, model_name, truncated, data_col_name: str = "data"):
+def _run_minibert(*,
+                  config: Config,
+                  tmp_path: str,
+                  model_name: str,
+                  truncated: bool,
+                  morpheus_log_level: int,
+                  data_col_name: str = "data"):
     """
     Runs the minibert pipeline and mocks the Triton Python interface
     """
@@ -145,15 +161,24 @@ def _run_minibert(config, tmp_path, model_name, truncated, data_col_name: str =
         async_infer = mk_async_infer(inf_results)
         mock_triton_client.async_infer.side_effect = async_infer
 
-        return _run_minibert_pipeline(config, tmp_path, model_name, truncated, data_col_name)
+        return _run_minibert_pipeline(config=config,
+                                      tmp_path=tmp_path,
+                                      model_name=model_name,
+                                      truncated=truncated,
+                                      data_col_name=data_col_name,
+                                      morpheus_log_level=morpheus_log_level)
 
 
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_minibert_no_trunc(config, tmp_path):
+def test_minibert_no_trunc(config: Config, tmp_path: str, morpheus_log_level: int):
 
-    results = _run_minibert(config, tmp_path, "sid-minibert-onnx-no-trunc", False)
+    results = _run_minibert(config=config,
+                            tmp_path=tmp_path,
+                            model_name="sid-minibert-onnx-no-trunc",
+                            truncated=False,
+                            morpheus_log_level=morpheus_log_level)
 
     # Not sure why these are different
     if (CppConfig.get_should_use_cpp()):
@@ -164,22 +189,15 @@ def test_minibert_no_trunc(config, tmp_path):
 
 @pytest.mark.slow
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_minibert_truncated(config, tmp_path):
-
-    results = _run_minibert(config, tmp_path, 'sid-minibert-onnx', True)
-
-    # Not sure why these are different
-    if (CppConfig.get_should_use_cpp()):
-        assert results.diff_rows == 1204
-    else:
-        assert results.diff_rows == 1333
-
-
-@pytest.mark.slow
-@pytest.mark.usefixtures("launch_mock_triton")
-def test_minibert_data_col_name(config, tmp_path):
-
-    results = _run_minibert(config, tmp_path, 'sid-minibert-onnx', True, "definitely_not_data")
+@pytest.mark.parametrize("data_col_name", ["data", "definitely_not_data"])
+def test_minibert_truncated(config: Config, tmp_path: str, morpheus_log_level: int, data_col_name: str):
+
+    results = _run_minibert(config=config,
+                            tmp_path=tmp_path,
+                            model_name='sid-minibert-onnx',
+                            truncated=True,
+                            data_col_name=data_col_name,
+                            morpheus_log_level=morpheus_log_level)
 
     # Not sure why these are different
     if (CppConfig.get_should_use_cpp()):
diff --git a/tests/test_sid_kafka.py b/tests/test_sid_kafka.py
index ecc87de4b3..a50544c9c9 100755
--- a/tests/test_sid_kafka.py
+++ b/tests/test_sid_kafka.py
@@ -58,7 +58,8 @@ def test_minibert_no_cpp(mock_triton_client: mock.MagicMock,
                          config: Config,
                          kafka_bootstrap_servers: str,
                          kafka_topics: KafkaTopics,
-                         kafka_consumer: "KafkaConsumer"):
+                         kafka_consumer: "KafkaConsumer",
+                         morpheus_log_level: int):
     mock_metadata = {
         "inputs": [{
             "name": "input_ids", "datatype": "INT32", "shape": [-1, FEATURE_LENGTH]
@@ -117,7 +118,8 @@ def test_minibert_no_cpp(mock_triton_client: mock.MagicMock,
                            add_special_tokens=False))
     pipe.add_stage(
         TritonInferenceStage(config, model_name='sid-minibert-onnx', server_url='fake:001', force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config, threshold=0.5, prefix="si_"))
     pipe.add_stage(SerializeStage(config))
     pipe.add_stage(
@@ -150,7 +152,8 @@ def test_minibert_cpp(dataset_pandas: DatasetManager,
                       config: Config,
                       kafka_bootstrap_servers: str,
                       kafka_topics: KafkaTopics,
-                      kafka_consumer: "KafkaConsumer"):
+                      kafka_consumer: "KafkaConsumer",
+                      morpheus_log_level: int):
     config.mode = PipelineModes.NLP
     config.class_labels = [
         "address",
@@ -187,7 +190,8 @@ def test_minibert_cpp(dataset_pandas: DatasetManager,
                              model_name='sid-minibert-onnx',
                              server_url='localhost:8001',
                              force_convert_inputs=True))
-    pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
+    pipe.add_stage(
+        MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf", log_level=morpheus_log_level))
     pipe.add_stage(AddClassificationsStage(config, threshold=0.5, prefix="si_"))
     pipe.add_stage(SerializeStage(config))
     pipe.add_stage(

From 69b8f193495c90438a4f338d643bcb7894b6f63d Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Wed, 24 Apr 2024 12:42:30 -0700
Subject: [PATCH 14/38] Fix mis-spelling of examples (#1664)

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1664
---
 examples/llm/agents/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/llm/agents/README.md b/examples/llm/agents/README.md
index 00c84baee9..f336fac245 100644
--- a/examples/llm/agents/README.md
+++ b/examples/llm/agents/README.md
@@ -118,7 +118,7 @@ This example demonstrates the basic implementation of Morpheus pipeline, showcas
 
 
 ```bash
-python exmaples/llm/main.py agents simple [OPTIONS]
+python examples/llm/main.py agents simple [OPTIONS]
 ```
 
 ### Options:
@@ -170,7 +170,7 @@ kafka-topics.sh --bootstrap-server ${BOOTSTRAP_SERVER} --alter --topic input --p
 Now Kafka example can be run using the following command with the below listed options:
 
 ```bash
-python exmaples/llm/main.py agents kafka [OPTIONS]
+python examples/llm/main.py agents kafka [OPTIONS]
 ```
 
 ### Options:

From 1e7db1fc4bdfb35f3f133a6ccfd41318ebc581b5 Mon Sep 17 00:00:00 2001
From: David Gardner <dagardner@nvidia.com>
Date: Wed, 24 Apr 2024 12:55:10 -0700
Subject: [PATCH 15/38] Updating CHANGELOG

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8386e6ac8e..ef1d9eff16 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,7 @@ limitations under the License.
 - Document current known issues in 24.03.02 ([#1656](https://github.com/nv-morpheus/Morpheus/pull/1656)) [@dagardner-nv](https://github.com/dagardner-nv)
 - Fix documentation for building examples ([#1659](https://github.com/nv-morpheus/Morpheus/pull/1659)) [@dagardner-nv](https://github.com/dagardner-nv)
 - Fix type-o in documentation ([#1662](https://github.com/nv-morpheus/Morpheus/pull/1662)) [@dagardner-nv](https://github.com/dagardner-nv)
+- Fix mis-spelling of examples ([#1664](https://github.com/nv-morpheus/Morpheus/pull/1664)) [@dagardner-nv](https://github.com/dagardner-nv)
 
 # Morpheus 24.03.01 (10 Apr 2024)
 

From 31b610191f4c520360e2b18635a7486d4d6ef540 Mon Sep 17 00:00:00 2001
From: Christopher Harris <charris@nvidia.com>
Date: Wed, 24 Apr 2024 15:43:38 -0500
Subject: [PATCH 16/38] `ControlMessage` support in `TritonInferenceStage` and
 `PreallocatorMixin` (#1610)

Introduces `ControlMessage` support for `TritonInferenceStage`, and updates some of the abp tests accordingly. Also updates `PreallocatorMixin` for `ControlMessage` support.

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Christopher Harris (https://github.com/cwharris)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1610
---
 .../stages/inference_client_stage.hpp         |  46 +++-
 .../include/morpheus/stages/preallocate.hpp   |   8 +
 .../src/stages/inference_client_stage.cpp     | 231 ++++++++++++++----
 morpheus/_lib/src/stages/triton_inference.cpp |   9 +-
 morpheus/_lib/stages/__init__.pyi             |  12 +-
 morpheus/_lib/stages/module.cpp               |  33 ++-
 .../stages/test_triton_inference_stage.cpp    |   3 +-
 morpheus/pipeline/preallocator_mixin.py       |  15 +-
 morpheus/pipeline/stage_base.py               |   2 +
 morpheus/stages/inference/inference_stage.py  |  61 +----
 .../inference/triton_inference_stage.py       |  24 +-
 .../stages/postprocess/validation_stage.py    |   3 +-
 .../preprocess/preprocess_base_stage.py       |  14 +-
 tests/test_abp.py                             |  36 ++-
 tests/test_inference_stage.py                 |  62 -----
 tests/test_sid.py                             |  19 +-
 16 files changed, 365 insertions(+), 213 deletions(-)

diff --git a/morpheus/_lib/include/morpheus/stages/inference_client_stage.hpp b/morpheus/_lib/include/morpheus/stages/inference_client_stage.hpp
index 24d142184d..fd115de5af 100644
--- a/morpheus/_lib/include/morpheus/stages/inference_client_stage.hpp
+++ b/morpheus/_lib/include/morpheus/stages/inference_client_stage.hpp
@@ -18,10 +18,12 @@
 #pragma once
 
 #include "morpheus/export.h"
+#include "morpheus/messages/control.hpp"
 #include "morpheus/messages/multi_inference.hpp"
 #include "morpheus/messages/multi_response.hpp"
 #include "morpheus/types.hpp"
 
+#include <boost/fiber/policy.hpp>
 #include <mrc/coroutines/async_generator.hpp>
 #include <mrc/coroutines/scheduler.hpp>
 #include <mrc/coroutines/task.hpp>
@@ -29,6 +31,7 @@
 #include <mrc/segment/object.hpp>
 #include <pybind11/pybind11.h>
 #include <pymrc/asyncio_runnable.hpp>
+#include <rxcpp/rx.hpp>
 
 #include <cstdint>
 #include <map>
@@ -93,12 +96,13 @@ class MORPHEUS_EXPORT IInferenceClient
  * @brief Perform inference with Triton Inference Server.
  * This class specifies which inference implementation category (Ex: NLP/FIL) is needed for inferencing.
  */
+template <typename InputT, typename OutputT>
 class MORPHEUS_EXPORT InferenceClientStage
-  : public mrc::pymrc::AsyncioRunnable<std::shared_ptr<MultiInferenceMessage>, std::shared_ptr<MultiResponseMessage>>
+  : public mrc::pymrc::AsyncioRunnable<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>
 {
   public:
-    using sink_type_t   = std::shared_ptr<MultiInferenceMessage>;
-    using source_type_t = std::shared_ptr<MultiResponseMessage>;
+    using sink_type_t   = std::shared_ptr<InputT>;
+    using source_type_t = std::shared_ptr<OutputT>;
 
     /**
      * @brief Construct a new Inference Client Stage object
@@ -117,11 +121,11 @@ class MORPHEUS_EXPORT InferenceClientStage
                          std::vector<TensorModelMapping> output_mapping);
 
     /**
-     * Process a single MultiInferenceMessage by running the constructor-provided inference client against it's Tensor,
-     * and yields the result as a MultiResponseMessage
+     * Process a single InputT by running the constructor-provided inference client against it's Tensor,
+     * and yields the result as a OutputT
      */
-    mrc::coroutines::AsyncGenerator<std::shared_ptr<MultiResponseMessage>> on_data(
-        std::shared_ptr<MultiInferenceMessage>&& data, std::shared_ptr<mrc::coroutines::Scheduler> on) override;
+    mrc::coroutines::AsyncGenerator<std::shared_ptr<OutputT>> on_data(
+        std::shared_ptr<InputT>&& data, std::shared_ptr<mrc::coroutines::Scheduler> on) override;
 
   private:
     std::string m_model_name;
@@ -142,7 +146,7 @@ class MORPHEUS_EXPORT InferenceClientStage
 struct MORPHEUS_EXPORT InferenceClientStageInterfaceProxy
 {
     /**
-     * @brief Create and initialize a InferenceClientStage, and return the result
+     * @brief Create and initialize a MultiMessage-based InferenceClientStage, and return the result
      *
      * @param builder : Pipeline context object reference
      * @param name : Name of a stage reference
@@ -152,9 +156,31 @@ struct MORPHEUS_EXPORT InferenceClientStageInterfaceProxy
      * @param needs_logits : Determines if logits are required.
      * @param inout_mapping : Dictionary used to map pipeline input/output names to Triton input/output names. Use this
      * if the Morpheus names do not match the model.
-     * @return std::shared_ptr<mrc::segment::Object<InferenceClientStage>>
+     * @return std::shared_ptr<mrc::segment::Object<InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>>>
      */
-    static std::shared_ptr<mrc::segment::Object<InferenceClientStage>> init(
+    static std::shared_ptr<mrc::segment::Object<InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>>>
+    init_mm(mrc::segment::Builder& builder,
+            const std::string& name,
+            std::string model_name,
+            std::string server_url,
+            bool needs_logits,
+            std::map<std::string, std::string> input_mapping,
+            std::map<std::string, std::string> output_mapping);
+
+    /**
+     * @brief Create and initialize a ControlMessage-based InferenceClientStage, and return the result
+     *
+     * @param builder : Pipeline context object reference
+     * @param name : Name of a stage reference
+     * @param model_name : Name of the model specifies which model can handle the inference requests that are sent to
+     * Triton inference
+     * @param server_url : Triton server URL.
+     * @param needs_logits : Determines if logits are required.
+     * @param inout_mapping : Dictionary used to map pipeline input/output names to Triton input/output names. Use this
+     * if the Morpheus names do not match the model.
+     * @return std::shared_ptr<mrc::segment::Object<InferenceClientStage<ControlMessage, ControlMessage>>>
+     */
+    static std::shared_ptr<mrc::segment::Object<InferenceClientStage<ControlMessage, ControlMessage>>> init_cm(
         mrc::segment::Builder& builder,
         const std::string& name,
         std::string model_name,
diff --git a/morpheus/_lib/include/morpheus/stages/preallocate.hpp b/morpheus/_lib/include/morpheus/stages/preallocate.hpp
index 30b6b186c6..ab1cabdde0 100644
--- a/morpheus/_lib/include/morpheus/stages/preallocate.hpp
+++ b/morpheus/_lib/include/morpheus/stages/preallocate.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/messages/control.hpp"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/messages/multi.hpp"
 #include "morpheus/objects/dtype.hpp"  // for TypeId
@@ -51,11 +52,18 @@ void preallocate(std::shared_ptr<morpheus::MessageMeta> msg,
     table.insert_missing_columns(columns);
 }
 
+void preallocate(std::shared_ptr<morpheus::ControlMessage> msg,
+                 const std::vector<std::tuple<std::string, morpheus::DType>>& columns)
+{
+    preallocate(msg->payload(), columns);
+}
+
 void preallocate(std::shared_ptr<morpheus::MultiMessage> msg,
                  const std::vector<std::tuple<std::string, morpheus::DType>>& columns)
 {
     preallocate(msg->meta, columns);
 }
+
 }  // namespace
 
 /****** Component public implementations *******************/
diff --git a/morpheus/_lib/src/stages/inference_client_stage.cpp b/morpheus/_lib/src/stages/inference_client_stage.cpp
index 069ccd557e..26428aa159 100644
--- a/morpheus/_lib/src/stages/inference_client_stage.cpp
+++ b/morpheus/_lib/src/stages/inference_client_stage.cpp
@@ -17,8 +17,13 @@
 
 #include "morpheus/stages/inference_client_stage.hpp"
 
+#include "morpheus/messages/control.hpp"
 #include "morpheus/messages/memory/response_memory.hpp"
 #include "morpheus/messages/memory/tensor_memory.hpp"
+#include "morpheus/messages/meta.hpp"
+#include "morpheus/messages/multi_inference.hpp"
+#include "morpheus/messages/multi_response.hpp"
+#include "morpheus/objects/data_table.hpp"
 #include "morpheus/objects/dev_mem_info.hpp"
 #include "morpheus/objects/dtype.hpp"
 #include "morpheus/objects/tensor.hpp"
@@ -26,22 +31,26 @@
 #include "morpheus/stages/triton_inference.hpp"
 #include "morpheus/utilities/matx_util.hpp"
 
-#include <boost/fiber/policy.hpp>
 #include <cuda_runtime.h>
 #include <glog/logging.h>
 #include <mrc/cuda/common.hpp>
+#include <pybind11/pybind11.h>
 
 #include <chrono>
 #include <compare>
 #include <coroutine>
+#include <memory>
 #include <mutex>
 #include <ostream>
 #include <ratio>
+#include <stdexcept>
 #include <utility>
 
 namespace {
 
-static morpheus::ShapeType get_seq_ids(const morpheus::InferenceClientStage::sink_type_t& message)
+using namespace morpheus;
+
+static ShapeType get_seq_ids(const std::shared_ptr<MultiInferenceMessage>& message)
 {
     // Take a copy of the sequence Ids allowing us to map rows in the response to rows in the dataframe
     // The output tensors we store in `reponse_memory` will all be of the same length as the the
@@ -49,7 +58,7 @@ static morpheus::ShapeType get_seq_ids(const morpheus::InferenceClientStage::sin
     auto seq_ids         = message->get_input("seq_ids");
     const auto item_size = seq_ids.dtype().item_size();
 
-    morpheus::ShapeType host_seq_ids(message->count);
+    ShapeType host_seq_ids(message->count);
     MRC_CHECK_CUDA(cudaMemcpy2D(host_seq_ids.data(),
                                 item_size,
                                 seq_ids.data(),
@@ -61,35 +70,109 @@ static morpheus::ShapeType get_seq_ids(const morpheus::InferenceClientStage::sin
     return host_seq_ids;
 }
 
-static void reduce_outputs(const morpheus::InferenceClientStage::sink_type_t& x, morpheus::TensorMap& output_tensors)
+static ShapeType get_seq_ids(const std::shared_ptr<ControlMessage>& message)
+{
+    // Take a copy of the sequence Ids allowing us to map rows in the response to rows in the dataframe
+    // The output tensors we store in `reponse_memory` will all be of the same length as the the
+    // dataframe. seq_ids has three columns, but we are only interested in the first column.
+    auto seq_ids         = message->tensors()->get_tensor("seq_ids");
+    const auto item_size = seq_ids.dtype().item_size();
+
+    ShapeType host_seq_ids(message->tensors()->count);
+    MRC_CHECK_CUDA(cudaMemcpy2D(host_seq_ids.data(),
+                                item_size,
+                                seq_ids.data(),
+                                seq_ids.stride(0) * item_size,
+                                item_size,
+                                host_seq_ids.size(),
+                                cudaMemcpyDeviceToHost));
+
+    return host_seq_ids;
+}
+
+static bool has_tensor(std::shared_ptr<MultiInferenceMessage> message, std::string const& tensor_name)
+{
+    return message->memory->has_tensor(tensor_name);
+}
+
+static bool has_tensor(std::shared_ptr<ControlMessage> message, std::string const& tensor_name)
+{
+    return message->tensors()->has_tensor(tensor_name);
+}
+
+static TensorObject get_tensor(std::shared_ptr<MultiInferenceMessage> message, std::string const& tensor_name)
+{
+    return message->get_input(tensor_name);
+}
+
+static TensorObject get_tensor(std::shared_ptr<ControlMessage> message, std::string const& tensor_name)
 {
+    return message->tensors()->get_tensor(tensor_name);
+}
+
+static void reduce_outputs(std::shared_ptr<MultiInferenceMessage> const& message, TensorMap& output_tensors)
+{
+    if (message->mess_count == message->count)
+    {
+        return;
+    }
+
     // When our tensor lengths are longer than our dataframe we will need to use the seq_ids array to
     // lookup how the values should map back into the dataframe.
-    auto host_seq_ids = get_seq_ids(x);
+    auto host_seq_ids = get_seq_ids(message);
 
     for (auto& mapping : output_tensors)
     {
         auto& output_tensor = mapping.second;
 
-        morpheus::ShapeType shape  = output_tensor.get_shape();
-        morpheus::ShapeType stride = output_tensor.get_stride();
+        ShapeType shape  = output_tensor.get_shape();
+        ShapeType stride = output_tensor.get_stride();
 
-        morpheus::ShapeType reduced_shape{shape};
-        reduced_shape[0] = x->mess_count;
+        ShapeType reduced_shape{shape};
+        reduced_shape[0] = message->mess_count;
 
-        auto reduced_buffer = morpheus::MatxUtil::reduce_max(
-            morpheus::DevMemInfo{
-                output_tensor.data(), output_tensor.dtype(), output_tensor.get_memory(), shape, stride},
+        auto reduced_buffer = MatxUtil::reduce_max(
+            DevMemInfo{output_tensor.data(), output_tensor.dtype(), output_tensor.get_memory(), shape, stride},
             host_seq_ids,
             0,
             reduced_shape);
 
-        output_tensor.swap(
-            morpheus::Tensor::create(std::move(reduced_buffer), output_tensor.dtype(), reduced_shape, stride, 0));
+        output_tensor.swap(Tensor::create(std::move(reduced_buffer), output_tensor.dtype(), reduced_shape, stride, 0));
     }
 }
 
-static void apply_logits(morpheus::TensorMap& output_tensors)
+static void reduce_outputs(std::shared_ptr<ControlMessage> const& message, TensorMap& output_tensors)
+{
+    if (message->payload()->count() == message->tensors()->count)
+    {
+        return;
+    }
+
+    // When our tensor lengths are longer than our dataframe we will need to use the seq_ids array to
+    // lookup how the values should map back into the dataframe.
+    auto host_seq_ids = get_seq_ids(message);
+
+    for (auto& mapping : output_tensors)
+    {
+        auto& output_tensor = mapping.second;
+
+        ShapeType shape  = output_tensor.get_shape();
+        ShapeType stride = output_tensor.get_stride();
+
+        ShapeType reduced_shape{shape};
+        reduced_shape[0] = message->payload()->count();
+
+        auto reduced_buffer = MatxUtil::reduce_max(
+            DevMemInfo{output_tensor.data(), output_tensor.dtype(), output_tensor.get_memory(), shape, stride},
+            host_seq_ids,
+            0,
+            reduced_shape);
+
+        output_tensor.swap(Tensor::create(std::move(reduced_buffer), output_tensor.dtype(), reduced_shape, stride, 0));
+    }
+}
+
+static void apply_logits(TensorMap& output_tensors)
 {
     for (auto& mapping : output_tensors)
     {
@@ -110,11 +193,12 @@ static void apply_logits(morpheus::TensorMap& output_tensors)
 
 namespace morpheus {
 
-InferenceClientStage::InferenceClientStage(std::unique_ptr<IInferenceClient>&& client,
-                                           std::string model_name,
-                                           bool needs_logits,
-                                           std::vector<TensorModelMapping> input_mapping,
-                                           std::vector<TensorModelMapping> output_mapping) :
+template <typename InputT, typename OutputT>
+InferenceClientStage<InputT, OutputT>::InferenceClientStage(std::unique_ptr<IInferenceClient>&& client,
+                                                            std::string model_name,
+                                                            bool needs_logits,
+                                                            std::vector<TensorModelMapping> input_mapping,
+                                                            std::vector<TensorModelMapping> output_mapping) :
   m_model_name(std::move(model_name)),
   m_client(std::move(client)),
   m_needs_logits(needs_logits),
@@ -149,8 +233,26 @@ struct ExponentialBackoff
     }
 };
 
-mrc::coroutines::AsyncGenerator<std::shared_ptr<MultiResponseMessage>> InferenceClientStage::on_data(
-    std::shared_ptr<MultiInferenceMessage>&& x, std::shared_ptr<mrc::coroutines::Scheduler> on)
+static std::shared_ptr<MultiResponseMessage> make_response(std::shared_ptr<MultiInferenceMessage> message,
+                                                           TensorMap&& output_tensor_map)
+{
+    // Final output of all mini-batches
+    auto response_mem = std::make_shared<ResponseMemory>(message->mess_count, std::move(output_tensor_map));
+
+    return std::make_shared<MultiResponseMessage>(
+        message->meta, message->mess_offset, message->mess_count, std::move(response_mem), 0, response_mem->count);
+}
+
+static std::shared_ptr<ControlMessage> make_response(std::shared_ptr<ControlMessage> message,
+                                                     TensorMap&& output_tensor_map)
+{
+    message->tensors(std::make_shared<TensorMemory>(message->payload()->count(), std::move(output_tensor_map)));
+    return message;
+}
+
+template <typename InputT, typename OutputT>
+mrc::coroutines::AsyncGenerator<std::shared_ptr<OutputT>> InferenceClientStage<InputT, OutputT>::on_data(
+    std::shared_ptr<InputT>&& message, std::shared_ptr<mrc::coroutines::Scheduler> on)
 {
     int32_t retry_count = 0;
 
@@ -192,9 +294,9 @@ mrc::coroutines::AsyncGenerator<std::shared_ptr<MultiResponseMessage>> Inference
 
             for (auto mapping : message_session->get_input_mappings(m_input_mapping))
             {
-                if (x->memory->has_tensor(mapping.tensor_field_name))
+                if (has_tensor(message, mapping.tensor_field_name))
                 {
-                    model_input_tensors[mapping.model_field_name].swap(x->get_input(mapping.tensor_field_name));
+                    model_input_tensors[mapping.model_field_name].swap(get_tensor(message, mapping.tensor_field_name));
                 }
             }
 
@@ -202,10 +304,7 @@ mrc::coroutines::AsyncGenerator<std::shared_ptr<MultiResponseMessage>> Inference
 
             co_await on->yield();
 
-            if (x->mess_count != x->count)
-            {
-                reduce_outputs(x, model_output_tensors);
-            }
+            reduce_outputs(message, model_output_tensors);
 
             // If we need to do logits, do that here
             if (m_needs_logits)
@@ -228,16 +327,28 @@ mrc::coroutines::AsyncGenerator<std::shared_ptr<MultiResponseMessage>> Inference
                 }
             }
 
-            // Final output of all mini-batches
-            auto response_mem = std::make_shared<ResponseMemory>(x->mess_count, std::move(output_tensor_map));
-
-            auto response = std::make_shared<MultiResponseMessage>(
-                x->meta, x->mess_offset, x->mess_count, std::move(response_mem), 0, response_mem->count);
+            auto result = make_response(message, std::move(output_tensor_map));
 
-            co_yield std::move(response);
+            co_yield result;
 
             co_return;
 
+        } catch (std::runtime_error ex)
+        {
+            auto lock = std::unique_lock(m_session_mutex);
+
+            if (m_session == message_session)
+            {
+                m_session.reset();
+            }
+
+            if (m_retry_max >= 0 and ++retry_count > m_retry_max)
+            {
+                throw;
+            }
+
+            LOG(WARNING) << "Exception while processing message for InferenceClientStage, attempting retry. ex.what(): "
+                         << ex.what();
         } catch (...)
         {
             auto lock = std::unique_lock(m_session_mutex);
@@ -260,14 +371,45 @@ mrc::coroutines::AsyncGenerator<std::shared_ptr<MultiResponseMessage>> Inference
 }
 
 // ************ InferenceClientStageInterfaceProxy********* //
-std::shared_ptr<mrc::segment::Object<InferenceClientStage>> InferenceClientStageInterfaceProxy::init(
-    mrc::segment::Builder& builder,
-    const std::string& name,
-    std::string server_url,
-    std::string model_name,
-    bool needs_logits,
-    std::map<std::string, std::string> input_mappings,
-    std::map<std::string, std::string> output_mappings)
+std::shared_ptr<mrc::segment::Object<InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>>>
+InferenceClientStageInterfaceProxy::init_mm(mrc::segment::Builder& builder,
+                                            const std::string& name,
+                                            std::string server_url,
+                                            std::string model_name,
+                                            bool needs_logits,
+                                            std::map<std::string, std::string> input_mappings,
+                                            std::map<std::string, std::string> output_mappings)
+{
+    std::vector<TensorModelMapping> input_mappings_{};
+    std::vector<TensorModelMapping> output_mappings_{};
+
+    for (auto& mapping : input_mappings)
+    {
+        input_mappings_.emplace_back(TensorModelMapping{mapping.first, mapping.second});
+    }
+
+    for (auto& mapping : output_mappings)
+    {
+        output_mappings_.emplace_back(TensorModelMapping{mapping.first, mapping.second});
+    }
+
+    auto triton_client           = std::make_unique<HttpTritonClient>(server_url);
+    auto triton_inference_client = std::make_unique<TritonInferenceClient>(std::move(triton_client), model_name);
+    auto stage = builder.construct_object<InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>>(
+        name, std::move(triton_inference_client), model_name, needs_logits, input_mappings_, output_mappings_);
+
+    return stage;
+}
+
+// ************ InferenceClientStageInterfaceProxy********* //
+std::shared_ptr<mrc::segment::Object<InferenceClientStage<ControlMessage, ControlMessage>>>
+InferenceClientStageInterfaceProxy::init_cm(mrc::segment::Builder& builder,
+                                            const std::string& name,
+                                            std::string server_url,
+                                            std::string model_name,
+                                            bool needs_logits,
+                                            std::map<std::string, std::string> input_mappings,
+                                            std::map<std::string, std::string> output_mappings)
 {
     std::vector<TensorModelMapping> input_mappings_{};
     std::vector<TensorModelMapping> output_mappings_{};
@@ -284,10 +426,13 @@ std::shared_ptr<mrc::segment::Object<InferenceClientStage>> InferenceClientStage
 
     auto triton_client           = std::make_unique<HttpTritonClient>(server_url);
     auto triton_inference_client = std::make_unique<TritonInferenceClient>(std::move(triton_client), model_name);
-    auto stage                   = builder.construct_object<InferenceClientStage>(
+    auto stage                   = builder.construct_object<InferenceClientStage<ControlMessage, ControlMessage>>(
         name, std::move(triton_inference_client), model_name, needs_logits, input_mappings_, output_mappings_);
 
     return stage;
 }
 
+template class InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>;
+template class InferenceClientStage<ControlMessage, ControlMessage>;
+
 }  // namespace morpheus
diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp
index 6464c3be5d..30f100e7ea 100644
--- a/morpheus/_lib/src/stages/triton_inference.cpp
+++ b/morpheus/_lib/src/stages/triton_inference.cpp
@@ -476,12 +476,13 @@ mrc::coroutines::Task<TensorMap> TritonInferenceClientSession::infer(TensorMap&&
 
             const uint8_t* output_ptr = nullptr;
             size_t output_ptr_size    = 0;
+
             CHECK_TRITON(results->RawData(model_output.name, &output_ptr, &output_ptr_size));
 
-            DCHECK_EQ(stop - start, output_shape[0]);
-            DCHECK_EQ(output_tensor.bytes(), output_ptr_size);
-            DCHECK_NOTNULL(output_ptr);            // NOLINT
-            DCHECK_NOTNULL(output_tensor.data());  // NOLINT
+            // DCHECK_EQ(stop - start, output_shape[0]);
+            // DCHECK_EQ(output_tensor.bytes(), output_ptr_size);
+            // DCHECK_NOTNULL(output_ptr);            // NOLINT
+            // DCHECK_NOTNULL(output_tensor.data());  // NOLINT
 
             MRC_CHECK_CUDA(cudaMemcpy(output_tensor.data(), output_ptr, output_ptr_size, cudaMemcpyHostToDevice));
         }
diff --git a/morpheus/_lib/stages/__init__.pyi b/morpheus/_lib/stages/__init__.pyi
index 515bab0c12..85767bdcef 100644
--- a/morpheus/_lib/stages/__init__.pyi
+++ b/morpheus/_lib/stages/__init__.pyi
@@ -24,8 +24,10 @@ __all__ = [
     "FilterDetectionsStage",
     "FilterSource",
     "HttpServerSourceStage",
-    "InferenceClientStage",
+    "InferenceClientStageCM",
+    "InferenceClientStageMM",
     "KafkaSourceStage",
+    "PreallocateControlMessageStage",
     "PreallocateMessageMetaStage",
     "PreallocateMultiMessageStage",
     "PreprocessFILControlMessageStage",
@@ -68,7 +70,10 @@ class FilterDetectionsStage(mrc.core.segment.SegmentObject):
 class HttpServerSourceStage(mrc.core.segment.SegmentObject):
     def __init__(self, builder: mrc.core.segment.Builder, name: str, bind_address: str = '127.0.0.1', port: int = 8080, endpoint: str = '/message', method: str = 'POST', accept_status: int = 201, sleep_time: float = 0.10000000149011612, queue_timeout: int = 5, max_queue_size: int = 1024, num_server_threads: int = 1, max_payload_size: int = 10485760, request_timeout: int = 30, lines: bool = False, stop_after: int = 0) -> None: ...
     pass
-class InferenceClientStage(mrc.core.segment.SegmentObject):
+class InferenceClientStageCM(mrc.core.segment.SegmentObject):
+    def __init__(self, builder: mrc.core.segment.Builder, name: str, server_url: str, model_name: str, needs_logits: bool, input_mapping: typing.Dict[str, str] = {}, output_mapping: typing.Dict[str, str] = {}) -> None: ...
+    pass
+class InferenceClientStageMM(mrc.core.segment.SegmentObject):
     def __init__(self, builder: mrc.core.segment.Builder, name: str, server_url: str, model_name: str, needs_logits: bool, input_mapping: typing.Dict[str, str] = {}, output_mapping: typing.Dict[str, str] = {}) -> None: ...
     pass
 class KafkaSourceStage(mrc.core.segment.SegmentObject):
@@ -77,6 +82,9 @@ class KafkaSourceStage(mrc.core.segment.SegmentObject):
     @typing.overload
     def __init__(self, builder: mrc.core.segment.Builder, name: str, max_batch_size: int, topics: typing.List[str], batch_timeout_ms: int, config: typing.Dict[str, str], disable_commits: bool = False, disable_pre_filtering: bool = False, stop_after: int = 0, async_commits: bool = True, oauth_callback: typing.Optional[function] = None) -> None: ...
     pass
+class PreallocateControlMessageStage(mrc.core.segment.SegmentObject):
+    def __init__(self, builder: mrc.core.segment.Builder, name: str, needed_columns: typing.List[typing.Tuple[str, morpheus._lib.common.TypeId]]) -> None: ...
+    pass
 class PreallocateMessageMetaStage(mrc.core.segment.SegmentObject):
     def __init__(self, builder: mrc.core.segment.Builder, name: str, needed_columns: typing.List[typing.Tuple[str, morpheus._lib.common.TypeId]]) -> None: ...
     pass
diff --git a/morpheus/_lib/stages/module.cpp b/morpheus/_lib/stages/module.cpp
index 7b0d7ea293..6cdba387f0 100644
--- a/morpheus/_lib/stages/module.cpp
+++ b/morpheus/_lib/stages/module.cpp
@@ -18,6 +18,8 @@
 #include "morpheus/messages/control.hpp"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/messages/multi.hpp"
+#include "morpheus/messages/multi_inference.hpp"
+#include "morpheus/messages/multi_response.hpp"
 #include "morpheus/objects/file_types.hpp"
 #include "morpheus/stages/add_classification.hpp"
 #include "morpheus/stages/add_scores.hpp"
@@ -164,11 +166,25 @@ PYBIND11_MODULE(stages, _module)
              py::arg("filter_source"),
              py::arg("field_name") = "probs");
 
-    py::class_<mrc::segment::Object<InferenceClientStage>,
+    py::class_<
+        mrc::segment::Object<InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>>,
+        mrc::segment::ObjectProperties,
+        std::shared_ptr<mrc::segment::Object<InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>>>>(
+        _module, "InferenceClientStageMM", py::multiple_inheritance())
+        .def(py::init<>(&InferenceClientStageInterfaceProxy::init_mm),
+             py::arg("builder"),
+             py::arg("name"),
+             py::arg("server_url"),
+             py::arg("model_name"),
+             py::arg("needs_logits"),
+             py::arg("input_mapping")  = py::dict(),
+             py::arg("output_mapping") = py::dict());
+
+    py::class_<mrc::segment::Object<InferenceClientStage<ControlMessage, ControlMessage>>,
                mrc::segment::ObjectProperties,
-               std::shared_ptr<mrc::segment::Object<InferenceClientStage>>>(
-        _module, "InferenceClientStage", py::multiple_inheritance())
-        .def(py::init<>(&InferenceClientStageInterfaceProxy::init),
+               std::shared_ptr<mrc::segment::Object<InferenceClientStage<ControlMessage, ControlMessage>>>>(
+        _module, "InferenceClientStageCM", py::multiple_inheritance())
+        .def(py::init<>(&InferenceClientStageInterfaceProxy::init_cm),
              py::arg("builder"),
              py::arg("name"),
              py::arg("server_url"),
@@ -206,6 +222,15 @@ PYBIND11_MODULE(stages, _module)
              py::arg("async_commits")         = true,
              py::arg("oauth_callback")        = py::none());
 
+    py::class_<mrc::segment::Object<PreallocateStage<ControlMessage>>,
+               mrc::segment::ObjectProperties,
+               std::shared_ptr<mrc::segment::Object<PreallocateStage<ControlMessage>>>>(
+        _module, "PreallocateControlMessageStage", py::multiple_inheritance())
+        .def(py::init<>(&PreallocateStageInterfaceProxy<ControlMessage>::init),
+             py::arg("builder"),
+             py::arg("name"),
+             py::arg("needed_columns"));
+
     py::class_<mrc::segment::Object<PreallocateStage<MessageMeta>>,
                mrc::segment::ObjectProperties,
                std::shared_ptr<mrc::segment::Object<PreallocateStage<MessageMeta>>>>(
diff --git a/morpheus/_lib/tests/stages/test_triton_inference_stage.cpp b/morpheus/_lib/tests/stages/test_triton_inference_stage.cpp
index c7a566b011..df7785d259 100644
--- a/morpheus/_lib/tests/stages/test_triton_inference_stage.cpp
+++ b/morpheus/_lib/tests/stages/test_triton_inference_stage.cpp
@@ -309,7 +309,8 @@ TEST_F(TestTritonInferenceStage, SingleRow)
     // create the fake triton client used for testing.
     auto triton_client           = std::make_unique<FakeTritonClient>();
     auto triton_inference_client = std::make_unique<morpheus::TritonInferenceClient>(std::move(triton_client), "");
-    auto stage = morpheus::InferenceClientStage(std::move(triton_inference_client), "", false, {}, {});
+    auto stage = morpheus::InferenceClientStage<morpheus::MultiInferenceMessage, morpheus::MultiResponseMessage>(
+        std::move(triton_inference_client), "", false, {}, {});
 
     // manually invoke the stage and iterate through the inference responses
     auto on           = std::make_shared<mrc::coroutines::TestScheduler>();
diff --git a/morpheus/pipeline/preallocator_mixin.py b/morpheus/pipeline/preallocator_mixin.py
index 61e9cd3702..c40ed6be04 100644
--- a/morpheus/pipeline/preallocator_mixin.py
+++ b/morpheus/pipeline/preallocator_mixin.py
@@ -28,6 +28,7 @@
 from morpheus.common import TypeId
 from morpheus.common import typeid_to_numpy_str
 from morpheus.config import CppConfig
+from morpheus.messages import ControlMessage
 from morpheus.messages import MessageMeta
 from morpheus.messages import MultiMessage
 from morpheus.utils.type_aliases import DataFrameType
@@ -85,6 +86,10 @@ def _preallocate_multi(self, msg: MultiMessage) -> MultiMessage:
         self._preallocate_meta(msg.meta)
         return msg
 
+    def _preallocate_control(self, msg: ControlMessage) -> ControlMessage:
+        self._preallocate_meta(msg.payload())
+        return msg
+
     def _post_build_single(self, builder: mrc.Builder, out_node: mrc.SegmentObject) -> mrc.SegmentObject:
         out_type = self.output_ports[0].output_type
         pretty_type = pretty_print_type_name(out_type)
@@ -92,17 +97,21 @@ def _post_build_single(self, builder: mrc.Builder, out_node: mrc.SegmentObject)
         if len(self._needed_columns) > 0:
             node_name = f"{self.unique_name}-preallocate"
 
-            if issubclass(out_type, (MessageMeta, MultiMessage)):
+            if issubclass(out_type, (ControlMessage, MessageMeta, MultiMessage)):
                 # Intentionally not using `_build_cpp_node` because `LinearBoundaryIngressStage` lacks a C++ impl
                 if CppConfig.get_should_use_cpp():
                     import morpheus._lib.stages as _stages
                     needed_columns = list(self._needed_columns.items())
-                    if issubclass(out_type, MessageMeta):
+                    if issubclass(out_type, ControlMessage):
+                        node = _stages.PreallocateControlMessageStage(builder, node_name, needed_columns)
+                    elif issubclass(out_type, MessageMeta):
                         node = _stages.PreallocateMessageMetaStage(builder, node_name, needed_columns)
                     else:
                         node = _stages.PreallocateMultiMessageStage(builder, node_name, needed_columns)
                 else:
-                    if issubclass(out_type, MessageMeta):
+                    if issubclass(out_type, ControlMessage):
+                        node = builder.make_node(node_name, ops.map(self._preallocate_control))
+                    elif issubclass(out_type, MessageMeta):
                         node = builder.make_node(node_name, ops.map(self._preallocate_meta))
                     else:
                         node = builder.make_node(node_name, ops.map(self._preallocate_multi))
diff --git a/morpheus/pipeline/stage_base.py b/morpheus/pipeline/stage_base.py
index 3aa3b2f450..290ed83992 100644
--- a/morpheus/pipeline/stage_base.py
+++ b/morpheus/pipeline/stage_base.py
@@ -80,6 +80,8 @@ class StageBase(ABC, collections.abc.Hashable):
 
     __ID_COUNTER = AtomicInteger(0)
 
+    _schema: _pipeline.StageSchema
+
     def __init__(self, config: Config):
         # Save the config
         self._config = config
diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py
index 579ddccd53..8b1fa75d3a 100644
--- a/morpheus/stages/inference/inference_stage.py
+++ b/morpheus/stages/inference/inference_stage.py
@@ -16,7 +16,6 @@
 import typing
 from abc import abstractmethod
 from functools import partial
-from functools import reduce
 
 import cupy as cp
 import mrc
@@ -192,13 +191,13 @@ def accepted_types(self) -> typing.Tuple:
         typing.Tuple
             Tuple of input types.
         """
-        if (self._build_cpp_node()):
-            return (MultiInferenceMessage, )
-
         return (MultiInferenceMessage, ControlMessage)
 
     def compute_schema(self, schema: StageSchema):
-        schema.output_schema.set_type(MultiResponseMessage)
+        if schema.input_type == ControlMessage:
+            schema.output_schema.set_type(ControlMessage)
+        else:
+            schema.output_schema.set_type(MultiResponseMessage)
 
     def supports_cpp_node(self):
         # Default to False unless derived classes override this value
@@ -285,10 +284,10 @@ def set_output_fut(resp: TensorMemory, inner_batch, batch_future: mrc.Future):
                 if (isinstance(_message, ControlMessage)):
                     _df = cudf.DataFrame(output_message.get_meta())
                     if (_df is not None and not _df.empty):
-                        embeddings = output_message.get_probs_tensor()
-                        _df["embedding"] = embeddings.tolist()
                         _message_meta = CppMessageMeta(df=_df)
                         _message.payload(_message_meta)
+                        _message.tensors().set_tensor("probs", output_message.get_probs_tensor())
+                        print(_df)
                     output_message = _message
 
                 return output_message
@@ -369,54 +368,6 @@ def _split_batches(x: MultiInferenceMessage, max_batch_size: int) -> typing.List
 
         return out_resp
 
-    @staticmethod
-    def _convert_response(
-            x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[TensorMemory]]) -> MultiResponseMessage:
-
-        # Convert a MultiInferenceMessage into a MultiResponseMessage
-        in_message = x[0]
-        out_message = x[1]
-
-        assert len(in_message) == len(out_message)
-
-        # Get the total output size
-        total_mess_count = reduce(lambda y, z: y + z.mess_count, in_message, 0)
-
-        # Create a message data to store the entire list
-        probs = cp.zeros((total_mess_count, out_message[0].get_tensor('probs').shape[1]))
-
-        saved_offset = in_message[0].mess_offset
-        saved_count = 0
-
-        for inf, res in zip(in_message, out_message):
-
-            # Ensure they all share the same meta object. Otherwise this doesn't work
-            # assert inf.meta is saved_meta
-
-            # Make sure we have a continuous list
-            assert inf.mess_offset == saved_offset + saved_count
-
-            assert inf.count == res.count
-
-            # Two scenarios:
-            if (inf.mess_count == inf.count):
-                # In message and out message have same count. Just use probs as is
-                probs[inf.offset:inf.offset + inf.count, :] = res.get_output('probs')
-            else:
-                mess_ids = inf.get_tensor("seq_ids")[:, 0].get().tolist()
-
-                # Out message has more reponses, so we have to do key based blending of probs
-                for i, idx in enumerate(mess_ids):
-                    probs[idx, :] = cp.maximum(probs[idx, :], res.get_output('probs')[i, :])
-
-            saved_count += inf.mess_count
-
-        assert saved_count == total_mess_count, "Did not set every element in output"
-
-        memory = TensorMemory(count=total_mess_count, tensors={'probs': probs})
-
-        return MultiResponseMessage.from_message(in_message[0], mess_count=saved_count, memory=memory)
-
     @staticmethod
     def _convert_one_response(output: MultiResponseMessage, inf: MultiInferenceMessage, res: TensorMemory):
         # Make sure we have a continuous list
diff --git a/morpheus/stages/inference/triton_inference_stage.py b/morpheus/stages/inference/triton_inference_stage.py
index e6c5c0fbb7..0b8a79dddf 100644
--- a/morpheus/stages/inference/triton_inference_stage.py
+++ b/morpheus/stages/inference/triton_inference_stage.py
@@ -32,6 +32,7 @@
 from morpheus.cli.register_stage import register_stage
 from morpheus.config import Config
 from morpheus.config import PipelineModes
+from morpheus.messages import ControlMessage
 from morpheus.messages import MultiInferenceMessage
 from morpheus.messages.memory.tensor_memory import TensorMemory
 from morpheus.stages.inference.inference_stage import InferenceStage
@@ -774,13 +775,22 @@ def _get_inference_worker(self, inf_queue: ProducerConsumerQueue) -> TritonInfer
                                      needs_logits=self._needs_logits)
 
     def _get_cpp_inference_node(self, builder: mrc.Builder) -> mrc.SegmentObject:
-        return _stages.InferenceClientStage(builder,
-                                            self.unique_name,
-                                            self._server_url,
-                                            self._model_name,
-                                            self._needs_logits,
-                                            self._input_mapping,
-                                            self._output_mapping)
+        if self._schema.input_type == ControlMessage:
+            return _stages.InferenceClientStageCM(builder,
+                                                  self.unique_name,
+                                                  self._server_url,
+                                                  self._model_name,
+                                                  self._needs_logits,
+                                                  self._input_mapping,
+                                                  self._output_mapping)
+
+        return _stages.InferenceClientStageMM(builder,
+                                              self.unique_name,
+                                              self._server_url,
+                                              self._model_name,
+                                              self._needs_logits,
+                                              self._input_mapping,
+                                              self._output_mapping)
 
     def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject:
         node = super()._build_single(builder, input_node)
diff --git a/morpheus/stages/postprocess/validation_stage.py b/morpheus/stages/postprocess/validation_stage.py
index 1d62f18cab..7ae46db06f 100644
--- a/morpheus/stages/postprocess/validation_stage.py
+++ b/morpheus/stages/postprocess/validation_stage.py
@@ -23,6 +23,7 @@
 
 from morpheus.cli.register_stage import register_stage
 from morpheus.config import Config
+from morpheus.messages import ControlMessage
 from morpheus.messages import MultiMessage
 from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage
 
@@ -114,7 +115,7 @@ def accepted_types(self) -> typing.Tuple:
             Accepted input types.
 
         """
-        return (MultiMessage, )
+        return (MultiMessage, ControlMessage)
 
     def _do_comparison(self):
         results = self.get_results(clear=False)
diff --git a/morpheus/stages/preprocess/preprocess_base_stage.py b/morpheus/stages/preprocess/preprocess_base_stage.py
index 3731912026..f115e38053 100644
--- a/morpheus/stages/preprocess/preprocess_base_stage.py
+++ b/morpheus/stages/preprocess/preprocess_base_stage.py
@@ -61,15 +61,15 @@ def compute_schema(self, schema: StageSchema):
         if (schema.input_type == ControlMessage):
             self._use_control_message = True
             out_type = ControlMessage
+            self._preprocess_fn = self._get_preprocess_fn()
         else:
             self._use_control_message = False
-
-        self._preprocess_fn = self._get_preprocess_fn()
-        preproc_sig = inspect.signature(self._preprocess_fn)
-        # If the innerfunction returns a type annotation, update the output type
-        if (preproc_sig.return_annotation
-                and typing_utils.issubtype(preproc_sig.return_annotation, MultiInferenceMessage)):
-            out_type = preproc_sig.return_annotation
+            self._preprocess_fn = self._get_preprocess_fn()
+            preproc_sig = inspect.signature(self._preprocess_fn)
+            # If the innerfunction returns a type annotation, update the output type
+            if (preproc_sig.return_annotation
+                    and typing_utils.issubtype(preproc_sig.return_annotation, MultiInferenceMessage)):
+                out_type = preproc_sig.return_annotation
 
         schema.output_schema.set_type(out_type)
 
diff --git a/tests/test_abp.py b/tests/test_abp.py
index a3248deb7e..334f87afeb 100755
--- a/tests/test_abp.py
+++ b/tests/test_abp.py
@@ -27,6 +27,7 @@
 from morpheus.config import Config
 from morpheus.config import ConfigFIL
 from morpheus.config import PipelineModes
+from morpheus.messages import ControlMessage
 from morpheus.messages import MessageMeta
 from morpheus.messages import MultiInferenceMessage
 from morpheus.messages import MultiMessage
@@ -116,7 +117,8 @@ def test_abp_no_cpp(mock_triton_client: mock.MagicMock, config: Config, tmp_path
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_abp_cpp(config: Config, tmp_path: str, morpheus_log_level: int):
+@pytest.mark.parametrize("message_type", [MultiMessage, ControlMessage])
+def test_abp_cpp(config: Config, tmp_path: str, message_type: type, morpheus_log_level: int):
     config.mode = PipelineModes.FIL
     config.class_labels = ["mining"]
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
@@ -134,7 +136,7 @@ def test_abp_cpp(config: Config, tmp_path: str, morpheus_log_level: int):
 
     pipe = LinearPipeline(config)
     pipe.set_source(FileSourceStage(config, filename=val_file_name, iterative=False))
-    pipe.add_stage(DeserializeStage(config))
+    pipe.add_stage(DeserializeStage(config, message_type=message_type))
     pipe.add_stage(PreprocessFILStage(config))
 
     # We are feeding TritonInferenceStage the port to the grpc server because that is what the validation tests do
@@ -147,11 +149,16 @@ def test_abp_cpp(config: Config, tmp_path: str, morpheus_log_level: int):
     pipe.add_stage(AddClassificationsStage(config))
     pipe.add_stage(AddScoresStage(config, prefix="score_"))
     pipe.add_stage(
-        ValidationStage(config, val_file_name=val_file_name, results_file_name=results_file_name, rel_tol=0.05))
+        ValidationStage(config,
+                        val_file_name=val_file_name,
+                        results_file_name=results_file_name,
+                        rel_tol=0.05,
+                        overwrite=True))
     pipe.add_stage(SerializeStage(config))
-    pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False))
+    pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
 
     pipe.run()
+
     compare_class_to_scores(out_file, config.class_labels, '', 'score_', threshold=0.5)
     results = calc_error_val(results_file_name)
     assert results.diff_rows == 0
@@ -243,7 +250,14 @@ def test_abp_multi_segment_no_cpp(mock_triton_client: mock.MagicMock,
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_abp_multi_segment_cpp(config, tmp_path):
+@pytest.mark.parametrize("message_type", [MultiMessage, ControlMessage])
+def test_abp_multi_segment_cpp(config, tmp_path, message_type):
+
+    def get_boundary_type(boundary_type):
+        if message_type == ControlMessage:
+            return ControlMessage
+        return boundary_type
+
     config.mode = PipelineModes.FIL
     config.class_labels = ["mining"]
     config.model_max_batch_size = MODEL_MAX_BATCH_SIZE
@@ -261,13 +275,13 @@ def test_abp_multi_segment_cpp(config, tmp_path):
 
     pipe = LinearPipeline(config)
     pipe.set_source(FileSourceStage(config, filename=val_file_name, iterative=False))
-    pipe.add_stage(DeserializeStage(config))
+    pipe.add_stage(DeserializeStage(config, message_type=message_type))
 
-    pipe.add_segment_boundary(MultiMessage)  # Boundary 1
+    pipe.add_segment_boundary(get_boundary_type(MultiMessage))  # Boundary 1
 
     pipe.add_stage(PreprocessFILStage(config))
 
-    pipe.add_segment_boundary(MultiInferenceMessage)  # Boundary 2
+    pipe.add_segment_boundary(get_boundary_type(MultiInferenceMessage))  # Boundary 2
 
     # We are feeding TritonInferenceStage the port to the grpc server because that is what the validation tests do
     # but the code under-the-hood replaces this with the port number of the http server
@@ -275,17 +289,17 @@ def test_abp_multi_segment_cpp(config, tmp_path):
         TritonInferenceStage(config, model_name='abp-nvsmi-xgb', server_url='localhost:8001',
                              force_convert_inputs=True))
 
-    pipe.add_segment_boundary(MultiResponseMessage)  # Boundary 3
+    pipe.add_segment_boundary(get_boundary_type(MultiResponseMessage))  # Boundary 3
 
     pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
     pipe.add_stage(AddClassificationsStage(config))
 
-    pipe.add_segment_boundary(MultiResponseMessage)  # Boundary 4
+    pipe.add_segment_boundary(get_boundary_type(MultiResponseMessage))  # Boundary 4
 
     pipe.add_stage(
         ValidationStage(config, val_file_name=val_file_name, results_file_name=results_file_name, rel_tol=0.05))
 
-    pipe.add_segment_boundary(MultiResponseMessage)  # Boundary 5
+    pipe.add_segment_boundary(get_boundary_type(MultiResponseMessage))  # Boundary 5
 
     pipe.add_stage(SerializeStage(config))
 
diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py
index ee1989f2f9..e34f5a5bd4 100755
--- a/tests/test_inference_stage.py
+++ b/tests/test_inference_stage.py
@@ -25,7 +25,6 @@
 from _utils.inference_worker import IW
 from morpheus.messages import ResponseMemory
 from morpheus.messages.memory.inference_memory import InferenceMemory
-from morpheus.messages.memory.tensor_memory import TensorMemory
 from morpheus.messages.message_meta import MessageMeta
 from morpheus.messages.multi_inference_message import MultiInferenceMessage
 from morpheus.messages.multi_response_message import MultiResponseMessage
@@ -121,67 +120,6 @@ def test_split_batches():
     mock_message.get_slice.assert_has_calls([mock.call(0, 3), mock.call(3, 7), mock.call(7, 10)])
 
 
-@pytest.mark.use_python
-def test_convert_response():
-    # Pylint currently fails to work with classmethod: https://github.com/pylint-dev/pylint/issues/981
-    # pylint: disable=no-member
-
-    message_sizes = [3, 2, 1, 7, 4]
-    total_size = sum(message_sizes)
-
-    full_input = _mk_message(mess_count=total_size, count=total_size)
-
-    input_messages = [
-        full_input.get_slice(sum(message_sizes[:i]), sum(message_sizes[:i]) + size) for i,
-        size in enumerate(message_sizes)
-    ]
-
-    full_output = cp.random.rand(total_size, 3)
-    output_memory = []
-
-    for i, count in enumerate(message_sizes):
-        output_memory.append(
-            ResponseMemory(count=count,
-                           tensors={"probs": full_output[sum(message_sizes[:i]):sum(message_sizes[:i]) + count, :]}))
-
-    resp = InferenceStageT._convert_response((input_messages, output_memory))
-    assert isinstance(resp, MultiResponseMessage)
-    assert resp.meta == full_input.meta
-    assert resp.mess_offset == 0
-    assert resp.mess_count == total_size
-    assert isinstance(resp.memory, TensorMemory)
-    assert resp.offset == 0
-    assert resp.count == total_size
-    assert (resp.memory.get_tensor("probs") == full_output).all()
-
-
-def test_convert_response_errors():
-    # Length of input messages doesn't match length of output messages
-    with pytest.raises(AssertionError):
-        InferenceStageT._convert_response(([1, 2, 3], [1, 2]))
-
-    # Message offst of the second message doesn't line up offset+count of the first
-    msg1 = _mk_message()
-    msg2 = _mk_message(mess_offset=12)
-
-    out_msg1 = ResponseMemory(count=1, tensors={"probs": cp.random.rand(1, 3)})
-    out_msg2 = ResponseMemory(count=1, tensors={"probs": cp.random.rand(1, 3)})
-
-    with pytest.raises(AssertionError):
-        InferenceStageT._convert_response(([msg1, msg2], [out_msg1, out_msg2]))
-
-    # mess_coutn and count don't match for msg2, and msg2.count != out_msg2.count
-    msg = _mk_message(mess_count=2, count=2)
-    msg1 = msg.get_slice(0, 1)
-    msg2 = msg.get_slice(1, 2)
-
-    out_msg1 = ResponseMemory(count=1, tensors={"probs": cp.random.rand(1, 3)})
-    out_msg2 = ResponseMemory(count=2, tensors={"probs": cp.random.rand(2, 3)})
-
-    with pytest.raises(AssertionError):
-        InferenceStageT._convert_response(([msg1, msg2], [out_msg1, out_msg2]))
-
-
 @pytest.mark.use_python
 def test_convert_one_response():
     # Pylint currently fails to work with classmethod: https://github.com/pylint-dev/pylint/issues/981
diff --git a/tests/test_sid.py b/tests/test_sid.py
index 2221abe930..b36903fd82 100755
--- a/tests/test_sid.py
+++ b/tests/test_sid.py
@@ -28,6 +28,8 @@
 from morpheus.config import Config
 from morpheus.config import CppConfig
 from morpheus.config import PipelineModes
+from morpheus.messages import ControlMessage
+from morpheus.messages import MultiMessage
 from morpheus.pipeline import LinearPipeline
 from morpheus.stages.general.monitor_stage import MonitorStage
 from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage
@@ -49,6 +51,7 @@ def _run_minibert_pipeline(
     *,
     config: Config,
     tmp_path: str,
+    message_type: type,
     model_name: str,
     truncated: bool,
     morpheus_log_level: int,
@@ -99,7 +102,7 @@ def _run_minibert_pipeline(
 
     pipe = LinearPipeline(config)
     pipe.set_source(FileSourceStage(config, filename=val_file_name, iterative=False))
-    pipe.add_stage(DeserializeStage(config))
+    pipe.add_stage(DeserializeStage(config, message_type=message_type))
     pipe.add_stage(
         PreprocessNLPStage(config,
                            vocab_hash_file=vocab_file_name,
@@ -126,6 +129,7 @@ def _run_minibert_pipeline(
 def _run_minibert(*,
                   config: Config,
                   tmp_path: str,
+                  message_type: type,
                   model_name: str,
                   truncated: bool,
                   morpheus_log_level: int,
@@ -163,6 +167,7 @@ def _run_minibert(*,
 
         return _run_minibert_pipeline(config=config,
                                       tmp_path=tmp_path,
+                                      message_type=message_type,
                                       model_name=model_name,
                                       truncated=truncated,
                                       data_col_name=data_col_name,
@@ -172,10 +177,12 @@ def _run_minibert(*,
 @pytest.mark.slow
 @pytest.mark.use_cpp
 @pytest.mark.usefixtures("launch_mock_triton")
-def test_minibert_no_trunc(config: Config, tmp_path: str, morpheus_log_level: int):
+@pytest.mark.parametrize("message_type", [MultiMessage, ControlMessage])
+def test_minibert_no_trunc(config: Config, tmp_path: str, message_type: type, morpheus_log_level: int):
 
     results = _run_minibert(config=config,
                             tmp_path=tmp_path,
+                            message_type=message_type,
                             model_name="sid-minibert-onnx-no-trunc",
                             truncated=False,
                             morpheus_log_level=morpheus_log_level)
@@ -190,10 +197,16 @@ def test_minibert_no_trunc(config: Config, tmp_path: str, morpheus_log_level: in
 @pytest.mark.slow
 @pytest.mark.usefixtures("launch_mock_triton")
 @pytest.mark.parametrize("data_col_name", ["data", "definitely_not_data"])
-def test_minibert_truncated(config: Config, tmp_path: str, morpheus_log_level: int, data_col_name: str):
+@pytest.mark.parametrize("message_type", [MultiMessage, ControlMessage])
+def test_minibert_truncated(config: Config,
+                            tmp_path: str,
+                            message_type: type,
+                            morpheus_log_level: int,
+                            data_col_name: str):
 
     results = _run_minibert(config=config,
                             tmp_path=tmp_path,
+                            message_type=message_type,
                             model_name='sid-minibert-onnx',
                             truncated=True,
                             data_col_name=data_col_name,

From 57d11a290365b1232a48812cb6ee7bda9ffccb7b Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Wed, 1 May 2024 12:54:45 -0700
Subject: [PATCH 17/38] Truncate strings exceeding max_length when inserting to
 Milvus (#1665)

* Adds new helper methods to `morpheus.io.utils`, `cudf_string_cols_exceed_max_bytes` and `truncate_string_cols_by_bytes`
* When `truncate_long_strings=True` `MilvusVectorDBResourceService` will truncate all `VARCHAR` fields according to the schema's `max_length`
* Add `truncate_long_strings=True` in config for `vdb_upload` pipeline
* Set C++ mode to default for example LLM pipelines
* Remove issues 1650 & 1651 from `known_issues.md`

Closes #1650
Closes #1651

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1665
---
 docs/source/extra_info/known_issues.md        |   2 -
 examples/llm/cli.py                           |   2 +-
 examples/llm/vdb_upload/pipeline.py           |  19 ++-
 examples/llm/vdb_upload/vdb_utils.py          |   3 +-
 morpheus/io/utils.py                          |  96 ++++++++++++
 .../service/vdb/milvus_vector_db_service.py   |  73 ++++++---
 morpheus/stages/inference/inference_stage.py  |   8 +-
 morpheus/utils/type_aliases.py                |   1 +
 tests/conftest.py                             |   6 +
 tests/io/test_io_utils.py                     | 134 +++++++++++++++++
 tests/test_milvus_vector_db_service.py        | 138 ++++++++++++++++++
 .../milvus_string_collection_conf.json        |   3 +
 12 files changed, 457 insertions(+), 28 deletions(-)
 create mode 100755 tests/io/test_io_utils.py
 create mode 100644 tests/tests_data/service/milvus_string_collection_conf.json

diff --git a/docs/source/extra_info/known_issues.md b/docs/source/extra_info/known_issues.md
index 014fac3471..9eeb53508e 100644
--- a/docs/source/extra_info/known_issues.md
+++ b/docs/source/extra_info/known_issues.md
@@ -19,7 +19,5 @@ limitations under the License.
 
 - TrainAEStage fails with a Segmentation fault ([#1641](https://github.com/nv-morpheus/Morpheus/pull/1641))
 - vdb_upload example pipeline triggers an internal error in Triton ([#1649](https://github.com/nv-morpheus/Morpheus/pull/1649))
-- vdb_upload example pipeline error on inserting large strings ([#1650](https://github.com/nv-morpheus/Morpheus/pull/1650))
-- vdb_upload example pipeline only works with C++ mode disabled ([#1651](https://github.com/nv-morpheus/Morpheus/pull/1651))
 
 Refer to [open issues in the Morpheus project](https://github.com/nv-morpheus/Morpheus/issues)
diff --git a/examples/llm/cli.py b/examples/llm/cli.py
index 1ea9198dc1..c8aea20320 100644
--- a/examples/llm/cli.py
+++ b/examples/llm/cli.py
@@ -32,7 +32,7 @@
               callback=parse_log_level,
               help="Specify the logging level to use.")
 @click.option('--use_cpp',
-              default=False,
+              default=True,
               type=bool,
               help=("Whether or not to use C++ node and message types or to prefer python. "
                     "Only use as a last resort if bugs are encountered"))
diff --git a/examples/llm/vdb_upload/pipeline.py b/examples/llm/vdb_upload/pipeline.py
index 494446d16c..5d5fbee8e4 100644
--- a/examples/llm/vdb_upload/pipeline.py
+++ b/examples/llm/vdb_upload/pipeline.py
@@ -19,7 +19,9 @@
 from vdb_upload.helper import process_vdb_sources
 
 from morpheus.config import Config
+from morpheus.messages import ControlMessage
 from morpheus.pipeline.pipeline import Pipeline
+from morpheus.pipeline.stage_decorator import stage
 from morpheus.stages.general.monitor_stage import MonitorStage
 from morpheus.stages.general.trigger_stage import TriggerStage
 from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage
@@ -78,6 +80,20 @@ def pipeline(pipeline_config: Config,
     monitor_2 = pipe.add_stage(
         MonitorStage(pipeline_config, description="Inference rate", unit="events", delayed_start=True))
 
+    @stage
+    def embedding_tensor_to_df(message: ControlMessage, *, embedding_tensor_name='probs') -> ControlMessage:
+        """
+        Copies the probs tensor to the 'embedding' field of the dataframe.
+        """
+        msg_meta = message.payload()
+        with msg_meta.mutable_dataframe() as df:
+            embedding_tensor = message.tensors().get_tensor(embedding_tensor_name)
+            df['embedding'] = embedding_tensor.tolist()
+
+        return message
+
+    embedding_tensor_to_df_stage = pipe.add_stage(embedding_tensor_to_df(pipeline_config))
+
     vector_db = pipe.add_stage(WriteToVectorDBStage(pipeline_config, **vdb_config))
 
     monitor_3 = pipe.add_stage(
@@ -96,7 +112,8 @@ def pipeline(pipeline_config: Config,
     pipe.add_edge(nlp_stage, monitor_1)
     pipe.add_edge(monitor_1, embedding_stage)
     pipe.add_edge(embedding_stage, monitor_2)
-    pipe.add_edge(monitor_2, vector_db)
+    pipe.add_edge(monitor_2, embedding_tensor_to_df_stage)
+    pipe.add_edge(embedding_tensor_to_df_stage, vector_db)
     pipe.add_edge(vector_db, monitor_3)
 
     start_time = time.time()
diff --git a/examples/llm/vdb_upload/vdb_utils.py b/examples/llm/vdb_upload/vdb_utils.py
index d3aed615d7..7740acbc7c 100644
--- a/examples/llm/vdb_upload/vdb_utils.py
+++ b/examples/llm/vdb_upload/vdb_utils.py
@@ -315,14 +315,15 @@ def build_cli_configs(source_type,
     cli_vdb_conf = {
         # Vector db upload has some significant transaction overhead, batch size here should be as large as possible
         'batch_size': 16384,
-        'resource_name': vector_db_resource_name,
         'embedding_size': embedding_size,
         'recreate': True,
+        'resource_name': vector_db_resource_name,
         'resource_schemas': {
             vector_db_resource_name:
                 build_defualt_milvus_config(embedding_size) if (vector_db_service == 'milvus') else None,
         },
         'service': vector_db_service,
+        'truncate_long_strings': True,
         'uri': vector_db_uri,
     }
 
diff --git a/morpheus/io/utils.py b/morpheus/io/utils.py
index 7c4cfce260..d8b286a8e8 100644
--- a/morpheus/io/utils.py
+++ b/morpheus/io/utils.py
@@ -14,7 +14,16 @@
 # limitations under the License.
 """IO utilities."""
 
+import logging
+
+import pandas as pd
+
+import cudf
+
 from morpheus.utils.type_aliases import DataFrameType
+from morpheus.utils.type_aliases import SeriesType
+
+logger = logging.getLogger(__name__)
 
 
 def filter_null_data(x: DataFrameType):
@@ -31,3 +40,90 @@ def filter_null_data(x: DataFrameType):
         return x
 
     return x[~x['data'].isna()]
+
+
+def cudf_string_cols_exceed_max_bytes(df: cudf.DataFrame, column_max_bytes: dict[str, int]) -> bool:
+    """
+    Checks a cudf DataFrame for string columns that exceed a maximum number of bytes and thus need to be truncated by
+    calling `truncate_string_cols_by_bytes`.
+
+    This method utilizes a cudf method `Series.str.byte_count()` method that pandas lacks, which can avoid a costly
+    call to truncate_string_cols_by_bytes.
+
+    Parameters
+    ----------
+    df : DataFrameType
+        The dataframe to check.
+    column_max_bytes: dict[str, int]
+        A mapping of string column names to the maximum number of bytes for each column.
+
+    Returns
+    -------
+    bool
+        True if truncation is needed, False otherwise.
+    """
+    if not isinstance(df, cudf.DataFrame):
+        raise ValueError("Expected cudf DataFrame")
+
+    for (col, max_bytes) in column_max_bytes.items():
+        series: cudf.Series = df[col]
+
+        assert series.dtype == 'object'
+
+        if series.str.byte_count().max() > max_bytes:
+            return True
+
+    return False
+
+
+def truncate_string_cols_by_bytes(df: DataFrameType,
+                                  column_max_bytes: dict[str, int],
+                                  warn_on_truncate: bool = True) -> bool:
+    """
+    Truncates all string columns in a dataframe to a maximum number of bytes. This operation is performed in-place on
+    the dataframe.
+
+    Parameters
+    ----------
+    df : DataFrameType
+        The dataframe to truncate.
+    column_max_bytes: dict[str, int]
+        A mapping of string column names to the maximum number of bytes for each column.
+    warn_on_truncate: bool, default True
+        Whether to log a warning when truncating a column.
+
+    Returns
+    -------
+    bool
+        True if truncation was performed, False otherwise.
+    """
+
+    performed_truncation = False
+    is_cudf = isinstance(df, cudf.DataFrame)
+
+    for (col, max_bytes) in column_max_bytes.items():
+        series: SeriesType = df[col]
+
+        if is_cudf:
+            series: pd.Series = series.to_pandas()
+
+        assert series.dtype == 'object', f"Expected string column '{col}'"
+
+        encoded_series = series.str.encode(encoding='utf-8', errors='strict')
+        if encoded_series.str.len().max() > max_bytes:
+            performed_truncation = True
+            if warn_on_truncate:
+                logger.warning("Truncating column '%s' to %d bytes", col, max_bytes)
+
+            truncated_series = encoded_series.str.slice(0, max_bytes)
+
+            # There is a possibility that slicing by max_len will slice a multi-byte character in half setting
+            # errors='ignore' will cause the resulting string to be truncated after the last full character
+            decoded_series = truncated_series.str.decode(encoding='utf-8', errors='ignore')
+
+            if is_cudf:
+                df[col] = cudf.Series.from_pandas(decoded_series)
+            else:
+                df[col] = decoded_series
+
+    return performed_truncation
diff --git a/morpheus/service/vdb/milvus_vector_db_service.py b/morpheus/service/vdb/milvus_vector_db_service.py
index 37cd82d1ba..09c68f15cd 100644
--- a/morpheus/service/vdb/milvus_vector_db_service.py
+++ b/morpheus/service/vdb/milvus_vector_db_service.py
@@ -20,18 +20,24 @@
 import typing
 from functools import wraps
 
-import pandas as pd
-
 import cudf
 
+from morpheus.io.utils import cudf_string_cols_exceed_max_bytes
+from morpheus.io.utils import truncate_string_cols_by_bytes
 from morpheus.service.vdb.vector_db_service import VectorDBResourceService
 from morpheus.service.vdb.vector_db_service import VectorDBService
+from morpheus.utils.type_aliases import DataFrameType
 
 logger = logging.getLogger(__name__)
 
 IMPORT_EXCEPTION = None
 IMPORT_ERROR_MESSAGE = "MilvusVectorDBResourceService requires the milvus and pymilvus packages to be installed."
 
+# Milvus has a max string length in bytes of 65,535. Multi-byte characters like "ñ" will have a string length of 1, the
+# byte length encoded as UTF-8 will be 2
+# https://milvus.io/docs/limitations.md#Length-of-a-string
+MAX_STRING_LENGTH_BYTES = 65_535
+
 try:
     import pymilvus
     from pymilvus.orm.mutation import MutationResult
@@ -222,9 +228,11 @@ class MilvusVectorDBResourceService(VectorDBResourceService):
         Name of the resource.
     client : MilvusClient
         An instance of the MilvusClient for interaction with the Milvus Vector Database.
+    truncate_long_strings : bool, optional
+        When true, truncate strings values that are longer than the max length of the field
     """
 
-    def __init__(self, name: str, client: "MilvusClient") -> None:
+    def __init__(self, name: str, client: "MilvusClient", truncate_long_strings: bool = False) -> None:
         if IMPORT_EXCEPTION is not None:
             raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
 
@@ -239,13 +247,24 @@ def __init__(self, name: str, client: "MilvusClient") -> None:
         self._vector_field = None
         self._fillna_fields_dict = {}
 
+        # Mapping of field name to max length for string fields
+        self._fields_max_length: dict[str, int] = {}
+
         for field in self._fields:
             if field.dtype == pymilvus.DataType.FLOAT_VECTOR:
                 self._vector_field = field.name
             else:
+                # Intentionally excluding pymilvus.DataType.STRING, in our current version it isn't supported, and in
+                # some database systems string types don't have a max length.
+                if field.dtype == pymilvus.DataType.VARCHAR:
+                    max_length = field.params.get('max_length')
+                    if max_length is not None:
+                        self._fields_max_length[field.name] = max_length
                 if not field.auto_id:
                     self._fillna_fields_dict[field.name] = field.dtype
 
+        self._truncate_long_strings = truncate_long_strings
+
         self._collection.load()
 
     def _set_up_collection(self):
@@ -275,13 +294,13 @@ def insert(self, data: list[list] | list[dict], **kwargs: dict[str, typing.Any])
 
         return self._insert_result_to_dict(result=result)
 
-    def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwargs: dict[str, typing.Any]) -> dict:
+    def insert_dataframe(self, df: DataFrameType, **kwargs: dict[str, typing.Any]) -> dict:
         """
         Insert a dataframe entires into the vector database.
 
         Parameters
         ----------
-        df : typing.Union[cudf.DataFrame, pd.DataFrame]
+        df : DataFrameType
             Dataframe to be inserted into the collection.
         **kwargs : dict[str, typing.Any]
             Extra keyword arguments specific to the vector database implementation.
@@ -291,10 +310,6 @@ def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwa
         dict
             Returns response content as a dictionary.
         """
-
-        if isinstance(df, cudf.DataFrame):
-            df = df.to_pandas()
-
         # Ensure that there are no None values in the DataFrame entries.
         for field_name, dtype in self._fillna_fields_dict.items():
             if dtype in (pymilvus.DataType.VARCHAR, pymilvus.DataType.STRING):
@@ -311,11 +326,24 @@ def insert_dataframe(self, df: typing.Union[cudf.DataFrame, pd.DataFrame], **kwa
             else:
                 logger.info("Skipped checking 'None' in the field: %s, with datatype: %s", field_name, dtype)
 
+        needs_truncate = self._truncate_long_strings
+        if needs_truncate and isinstance(df, cudf.DataFrame):
+            # Cudf specific optimization, we can avoid a costly call to truncate_string_cols_by_bytes if all of the
+            # string columns are already below the max length
+            needs_truncate = cudf_string_cols_exceed_max_bytes(df, self._fields_max_length)
+
         # From the schema, this is the list of columns we need, excluding any auto_id columns
         column_names = [field.name for field in self._fields if not field.auto_id]
 
+        collection_df = df[column_names]
+        if isinstance(collection_df, cudf.DataFrame):
+            collection_df = collection_df.to_pandas()
+
+        if needs_truncate:
+            truncate_string_cols_by_bytes(collection_df, self._fields_max_length, warn_on_truncate=True)
+
         # Note: dataframe columns has to be in the order of collection schema fields.s
-        result = self._collection.insert(data=df[column_names], **kwargs)
+        result = self._collection.insert(data=collection_df, **kwargs)
         self._collection.flush()
 
         return self._insert_result_to_dict(result=result)
@@ -575,6 +603,8 @@ class MilvusVectorDBService(VectorDBService):
         The port number for connecting to the Milvus server.
     alias : str, optional
         Alias for the Milvus connection, by default "default".
+    truncate_long_strings : bool, optional
+        When true, truncate strings values that are longer than the max length of the field
     **kwargs : dict
         Additional keyword arguments specific to the Milvus connection configuration.
     """
@@ -589,13 +619,17 @@ def __init__(self,
                  password: str = "",
                  db_name: str = "",
                  token: str = "",
+                 truncate_long_strings: bool = False,
                  **kwargs: dict[str, typing.Any]):
 
+        self._truncate_long_strings = truncate_long_strings
         self._client = MilvusClient(uri=uri, user=user, password=password, db_name=db_name, token=token, **kwargs)
 
     def load_resource(self, name: str, **kwargs: dict[str, typing.Any]) -> MilvusVectorDBResourceService:
-
-        return MilvusVectorDBResourceService(name=name, client=self._client, **kwargs)
+        return MilvusVectorDBResourceService(name=name,
+                                             client=self._client,
+                                             truncate_long_strings=self._truncate_long_strings,
+                                             **kwargs)
 
     def has_store_object(self, name: str) -> bool:
         """
@@ -688,7 +722,7 @@ def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.
                 for part in partition_conf["partitions"]:
                     self._client.create_partition(collection_name=name, partition_name=part["name"], timeout=timeout)
 
-    def _build_schema_conf(self, df: typing.Union[cudf.DataFrame, pd.DataFrame]) -> list[dict]:
+    def _build_schema_conf(self, df: DataFrameType) -> list[dict]:
         fields = []
 
         # Always add a primary key
@@ -708,7 +742,7 @@ def _build_schema_conf(self, df: typing.Union[cudf.DataFrame, pd.DataFrame]) ->
             }
 
             if (field_dict["dtype"] == pymilvus.DataType.VARCHAR):
-                field_dict["max_length"] = 65_535
+                field_dict["max_length"] = MAX_STRING_LENGTH_BYTES
 
             if (field_dict["dtype"] == pymilvus.DataType.FLOAT_VECTOR
                     or field_dict["dtype"] == pymilvus.DataType.BINARY_VECTOR):
@@ -726,7 +760,7 @@ def _build_schema_conf(self, df: typing.Union[cudf.DataFrame, pd.DataFrame]) ->
 
     def create_from_dataframe(self,
                               name: str,
-                              df: typing.Union[cudf.DataFrame, pd.DataFrame],
+                              df: DataFrameType,
                               overwrite: bool = False,
                               **kwargs: dict[str, typing.Any]) -> None:
         """
@@ -736,7 +770,7 @@ def create_from_dataframe(self,
         ----------
         name : str
             Name of the collection.
-        df : Union[cudf.DataFrame, pd.DataFrame]
+        df : DataFrameType
             The dataframe to create the collection from.
         overwrite : bool, optional
             Whether to overwrite the collection if it already exists. Default is False.
@@ -797,10 +831,7 @@ def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str,
         return resource.insert(data, **kwargs)
 
     @with_collection_lock
-    def insert_dataframe(self,
-                         name: str,
-                         df: typing.Union[cudf.DataFrame, pd.DataFrame],
-                         **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]:
+    def insert_dataframe(self, name: str, df: DataFrameType, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]:
         """
         Converts dataframe to rows and insert to a collection in the Milvus vector database.
 
@@ -808,7 +839,7 @@ def insert_dataframe(self,
         ----------
         name : str
             Name of the collection to be inserted.
-        df : typing.Union[cudf.DataFrame, pd.DataFrame]
+        df : DataFrameType
             Dataframe to be inserted in the collection.
         **kwargs : dict[str, typing.Any]
             Additional keyword arguments containing collection configuration.
diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py
index 8b1fa75d3a..ab12afe4d3 100644
--- a/morpheus/stages/inference/inference_stage.py
+++ b/morpheus/stages/inference/inference_stage.py
@@ -286,8 +286,12 @@ def set_output_fut(resp: TensorMemory, inner_batch, batch_future: mrc.Future):
                     if (_df is not None and not _df.empty):
                         _message_meta = CppMessageMeta(df=_df)
                         _message.payload(_message_meta)
-                        _message.tensors().set_tensor("probs", output_message.get_probs_tensor())
-                        print(_df)
+
+                        response_tensors = output_message.tensors
+                        cm_tensors = _message.tensors()
+                        for (name, tensor) in response_tensors.items():
+                            cm_tensors.set_tensor(name, tensor)
+
                     output_message = _message
 
                 return output_message
diff --git a/morpheus/utils/type_aliases.py b/morpheus/utils/type_aliases.py
index f944c3f9cb..cd394664e6 100644
--- a/morpheus/utils/type_aliases.py
+++ b/morpheus/utils/type_aliases.py
@@ -20,3 +20,4 @@
 import cudf
 
 DataFrameType = typing.Union[pd.DataFrame, cudf.DataFrame]
+SeriesType = typing.Union[pd.Series, cudf.Series]
diff --git a/tests/conftest.py b/tests/conftest.py
index 1f8f0ef425..30cc8f869d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1035,6 +1035,12 @@ def simple_collection_config_fixture():
     yield load_json_file(filename="service/milvus_simple_collection_conf.json")
 
 
+@pytest.fixture(scope="session", name="string_collection_config")
+def string_collection_config_fixture():
+    from _utils import load_json_file
+    yield load_json_file(filename="service/milvus_string_collection_conf.json")
+
+
 @pytest.fixture(name="nemollm", scope='session')
 def nemollm_fixture(fail_missing: bool):
     """
diff --git a/tests/io/test_io_utils.py b/tests/io/test_io_utils.py
new file mode 100755
index 0000000000..1ad46b75cb
--- /dev/null
+++ b/tests/io/test_io_utils.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections.abc import Callable
+
+import pytest
+
+import cudf
+
+from _utils.dataset_manager import DatasetManager
+from morpheus.io import utils as io_utils
+from morpheus.utils.type_aliases import DataFrameType
+
+MULTI_BYTE_STRINGS = ["ñäμɛ", "Moρφέας", "taç"]
+
+
+def _mk_df(df_class: Callable[..., DataFrameType], data: dict[str, list[str]]) -> DataFrameType:
+    """
+    Create a dataframe with a 'data' column containing the given data, and some other columns with different data types
+    """
+    num_rows = len(data[list(data.keys())[0]])
+
+    float_col = []
+    int_col = []
+    short_str_col = []
+    for i in range(num_rows):
+        float_col.append(i)
+        int_col.append(i)
+        short_str_col.append(f"{i}"[0:3])
+
+    df_data = data.copy()
+    df_data.update({"float_col": float_col, "int_col": int_col, "short_str_col": short_str_col})
+
+    return df_class(df_data)
+
+
+@pytest.mark.parametrize(
+    "data, max_bytes, expected",
+    [({
+        "data": MULTI_BYTE_STRINGS[:]
+    }, {
+        "data": 8
+    }, True), ({
+        "data": MULTI_BYTE_STRINGS[:], "ignored_col": ["a" * 20, "b" * 20, "c" * 20]
+    }, {
+        "data": 12
+    }, False), ({
+        "data": MULTI_BYTE_STRINGS[:]
+    }, {
+        "data": 20
+    }, False), ({
+        "data": ["." * 20]
+    }, {
+        "data": 19
+    }, True), ({
+        "data": ["." * 20]
+    }, {
+        "data": 20
+    }, False), ({
+        "data": ["." * 20]
+    }, {
+        "data": 21
+    }, False)])
+def test_cudf_needs_truncate(data: list[str], max_bytes: int, expected: bool):
+    df = _mk_df(cudf.DataFrame, data)
+    assert io_utils.cudf_string_cols_exceed_max_bytes(df, max_bytes) is expected
+
+
+@pytest.mark.parametrize("warn_on_truncate", [True, False])
+@pytest.mark.parametrize(
+    "data, max_bytes, expected_data",
+    [({
+        "multibyte_strings": MULTI_BYTE_STRINGS[:], "ascii_strings": ["a" * 20, "b" * 21, "c" * 19]
+    }, {
+        "multibyte_strings": 4, "ascii_strings": 20
+    }, {
+        "multibyte_strings": ["ñä", "Moρ", "taç"], "ascii_strings": ["a" * 20, "b" * 20, "c" * 19]
+    }),
+     ({
+         "data": MULTI_BYTE_STRINGS[:], "ignored_col": ["a" * 20, "b" * 20, "c" * 20]
+     }, {
+         "data": 5
+     }, {
+         "data": ["ñä", "Moρ", "taç"], "ignored_col": ["a" * 20, "b" * 20, "c" * 20]
+     }), ({
+         "data": MULTI_BYTE_STRINGS[:]
+     }, {
+         "data": 8
+     }, {
+         "data": ["ñäμɛ", "Moρφέ", "taç"]
+     }), ({
+         "data": MULTI_BYTE_STRINGS[:]
+     }, {
+         "data": 9
+     }, {
+         "data": ["ñäμɛ", "Moρφέ", "taç"]
+     }), ({
+         "data": MULTI_BYTE_STRINGS[:]
+     }, {
+         "data": 12
+     }, {
+         "data": MULTI_BYTE_STRINGS[:]
+     })])
+def test_truncate_string_cols_by_bytes(dataset: DatasetManager,
+                                       data: dict[str, list[str]],
+                                       max_bytes: int,
+                                       expected_data: dict[str, list[str]],
+                                       warn_on_truncate: bool):
+    df = _mk_df(dataset.df_class, data)
+
+    expect_truncation = (data != expected_data)
+    expected_df_class = dataset.df_class
+
+    expected_df = _mk_df(expected_df_class, expected_data)
+
+    performed_truncation = io_utils.truncate_string_cols_by_bytes(df, max_bytes, warn_on_truncate=warn_on_truncate)
+
+    assert performed_truncation is expect_truncation
+    assert isinstance(df, expected_df_class)
+
+    dataset.assert_df_equal(df, expected_df)
diff --git a/tests/test_milvus_vector_db_service.py b/tests/test_milvus_vector_db_service.py
index 723e7e7f8e..3d0548176d 100644
--- a/tests/test_milvus_vector_db_service.py
+++ b/tests/test_milvus_vector_db_service.py
@@ -16,14 +16,18 @@
 
 import json
 import random
+import string
 
 import numpy as np
 import pymilvus
 import pytest
 from pymilvus import DataType
+from pymilvus import MilvusException
 
 import cudf
 
+from _utils.dataset_manager import DatasetManager
+from morpheus.service.vdb.milvus_vector_db_service import MAX_STRING_LENGTH_BYTES
 from morpheus.service.vdb.milvus_vector_db_service import FieldSchemaEncoder
 from morpheus.service.vdb.milvus_vector_db_service import MilvusVectorDBService
 
@@ -71,6 +75,45 @@ def sample_field_fixture():
     return pymilvus.FieldSchema(name="test_field", dtype=pymilvus.DataType.INT64)
 
 
+def _mk_long_string(source_chars: str) -> str:
+    """
+    Yields a string longer than MAX_STRING_LENGTH_BYTES from source chars
+    """
+    source_chars_byte_len = len(source_chars.encode("utf-8"))
+    source_data = list(source_chars)
+
+    byte_len = 0
+    long_str_data = []
+    while byte_len <= MAX_STRING_LENGTH_BYTES:
+        long_str_data.extend(source_data)
+        byte_len += source_chars_byte_len
+
+    return "".join(long_str_data)
+
+
+@pytest.fixture(scope="module", name="long_ascii_string")
+def long_ascii_string_fixture():
+    """
+    Yields a string longer than MAX_STRING_LENGTH_BYTES containing only ascii (single-byte) characters
+    """
+    return _mk_long_string(string.ascii_letters)
+
+
+@pytest.fixture(scope="module", name="long_multibyte_string")
+def long_multibyte_string_fixture():
+    """
+    Yields a string longer than MAX_STRING_LENGTH_BYTES containing a mix of single and multi-byte characters
+    """
+    return _mk_long_string("Moρφέας")
+
+
+def _truncate_string_by_bytes(s: str, max_bytes: int) -> str:
+    """
+    Truncates a string to the given number of bytes
+    """
+    return s.encode("utf-8")[:max_bytes].decode("utf-8", errors="ignore")
+
+
 @pytest.mark.milvus
 def test_create_and_drop_collection(idx_part_collection_config: dict, milvus_service: MilvusVectorDBService):
     collection_name = "test_collection"
@@ -467,3 +510,98 @@ def test_fse_from_dict():
     result = FieldSchemaEncoder.from_dict(data)
     assert result.name == "test_field"
     assert result.dtype == pymilvus.DataType.INT64
+
+
+@pytest.mark.milvus
+@pytest.mark.slow
+@pytest.mark.parametrize("use_multi_byte_strings", [True, False], ids=["multi_byte", "ascii"])
+@pytest.mark.parametrize("truncate_long_strings", [True, False], ids=["truncate", "no_truncate"])
+@pytest.mark.parametrize("exceed_max_str_len", [True, False], ids=["exceed_max_len", "within_max_len"])
+def test_insert_dataframe(milvus_server_uri: str,
+                          string_collection_config: dict,
+                          dataset: DatasetManager,
+                          use_multi_byte_strings: bool,
+                          truncate_long_strings: bool,
+                          exceed_max_str_len: bool,
+                          long_ascii_string: str,
+                          long_multibyte_string: str):
+    num_rows = 10
+    collection_name = "test_insert_dataframe"
+
+    milvus_service = MilvusVectorDBService(uri=milvus_server_uri, truncate_long_strings=truncate_long_strings)
+
+    # Make sure to drop any existing collection from previous runs.
+    milvus_service.drop(collection_name)
+
+    # Create a collection.
+    milvus_service.create(collection_name, **string_collection_config)
+
+    short_str_col_len = -1
+    long_str_col_len = -1
+    for field_conf in string_collection_config["schema_conf"]["schema_fields"]:
+        if field_conf["name"] == "short_str_col":
+            short_str_col_len = field_conf["params"]["max_length"]
+
+        elif field_conf["name"] == "long_str_col":
+            long_str_col_len = field_conf["params"]["max_length"]
+
+    assert short_str_col_len > 0, "short_str_col length is not set"
+    assert long_str_col_len == MAX_STRING_LENGTH_BYTES, "long_str_col length is not set to MAX_STRING_LENGTH_BYTES"
+
+    # Construct the dataframe.
+    ids = []
+    embedding_data = []
+    long_str_col = []
+    short_str_col = []
+
+    if use_multi_byte_strings:
+        long_str = long_multibyte_string
+    else:
+        long_str = long_ascii_string
+
+    short_str = long_str[:7]
+    if not exceed_max_str_len:
+        short_str = _truncate_string_by_bytes(short_str, short_str_col_len)
+        long_str = _truncate_string_by_bytes(long_str, MAX_STRING_LENGTH_BYTES)
+
+    for i in range(num_rows):
+        ids.append(i)
+        embedding_data.append([i / 10.0] * 3)
+
+        long_str_col.append(long_str)
+        short_str_col.append(short_str)
+
+    df = dataset.df_class({
+        "id": ids, "embedding": embedding_data, "long_str_col": long_str_col, "short_str_col": short_str_col
+    })
+
+    expected_long_str = []
+    for long_str in long_str_col:
+        if truncate_long_strings:
+            expected_long_str.append(
+                long_str.encode("utf-8")[:MAX_STRING_LENGTH_BYTES].decode("utf-8", errors="ignore"))
+        else:
+            expected_long_str.append(long_str)
+
+    expected_df = dataset.df_class({
+        "id": ids, "embedding": embedding_data, "long_str_col": expected_long_str, "short_str_col": short_str_col
+    })
+
+    if (exceed_max_str_len and (not truncate_long_strings)):
+        with pytest.raises(MilvusException, match="string exceeds max length"):
+            milvus_service.insert_dataframe(collection_name, df)
+
+        return  # Skip the rest of the test if the string column exceeds the maximum length.
+
+    milvus_service.insert_dataframe(collection_name, df)
+
+    # Retrieve inserted data by primary keys.
+    retrieved_data = milvus_service.retrieve_by_keys(collection_name, ids)
+    assert len(retrieved_data) == num_rows
+
+    # Clean up the collection.
+    milvus_service.drop(collection_name)
+
+    result_df = dataset.df_class(retrieved_data)
+
+    dataset.compare_df(result_df, expected_df)
diff --git a/tests/tests_data/service/milvus_string_collection_conf.json b/tests/tests_data/service/milvus_string_collection_conf.json
new file mode 100644
index 0000000000..a75970a361
--- /dev/null
+++ b/tests/tests_data/service/milvus_string_collection_conf.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adbc34ae22c1037c8308b5521a01597a81d0ea117cc691e72566b463c0be6e9a
+size 1083

From 9d3de8a5c60d6a9be3f8ff7a71c6a75fce6791f3 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Wed, 1 May 2024 15:09:21 -0700
Subject: [PATCH 18/38] Strip HTML & XML tags from RSS feed input (#1670)

* Optionally strip HTML & XML tags embedded in RSS feeds

Requires PR #1665 to be merged first
Closes #1666

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)
  - Michael Demoret (https://github.com/mdemoret-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1670
---
 .../all_cuda-121_arch-x86_64.yaml             |  1 +
 .../dev_cuda-121_arch-x86_64.yaml             |  1 +
 .../examples_cuda-121_arch-x86_64.yaml        |  1 +
 .../runtime_cuda-121_arch-x86_64.yaml         |  1 +
 dependencies.yaml                             |  1 +
 .../llm/vdb_upload/module/rss_source_pipe.py  |  3 +
 examples/llm/vdb_upload/vdb_config.yaml       |  1 +
 examples/llm/vdb_upload/vdb_utils.py          |  1 +
 morpheus/controllers/rss_controller.py        | 54 ++++++++++++++-
 morpheus/modules/input/rss_source.py          | 33 +++++----
 morpheus/modules/schemas/rss_source_schema.py |  1 +
 morpheus/stages/input/rss_source_stage.py     |  8 ++-
 tests/controllers/test_rss_controller.py      | 67 ++++++++++++++++---
 13 files changed, 142 insertions(+), 31 deletions(-)

diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-121_arch-x86_64.yaml
index b440991aa3..3b310995fb 100644
--- a/conda/environments/all_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-121_arch-x86_64.yaml
@@ -13,6 +13,7 @@ dependencies:
 - appdirs
 - arxiv=1.4
 - automake
+- beautifulsoup4
 - benchmark=1.8.3
 - boost-cpp=1.84
 - boto3
diff --git a/conda/environments/dev_cuda-121_arch-x86_64.yaml b/conda/environments/dev_cuda-121_arch-x86_64.yaml
index 2ee99333a0..23ff2c707e 100644
--- a/conda/environments/dev_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/dev_cuda-121_arch-x86_64.yaml
@@ -11,6 +11,7 @@ channels:
 dependencies:
 - appdirs
 - automake
+- beautifulsoup4
 - benchmark=1.8.3
 - boost-cpp=1.84
 - breathe=4.35.0
diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-121_arch-x86_64.yaml
index 857b73aa85..11d5e535ce 100644
--- a/conda/environments/examples_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/examples_cuda-121_arch-x86_64.yaml
@@ -12,6 +12,7 @@ dependencies:
 - anyio>=3.7
 - appdirs
 - arxiv=1.4
+- beautifulsoup4
 - boto3
 - click >=8
 - cuml=24.02.*
diff --git a/conda/environments/runtime_cuda-121_arch-x86_64.yaml b/conda/environments/runtime_cuda-121_arch-x86_64.yaml
index 3f9543d426..80f6f995d2 100644
--- a/conda/environments/runtime_cuda-121_arch-x86_64.yaml
+++ b/conda/environments/runtime_cuda-121_arch-x86_64.yaml
@@ -10,6 +10,7 @@ channels:
 - pytorch
 dependencies:
 - appdirs
+- beautifulsoup4
 - click >=8
 - datacompy=0.10
 - dill=0.3.7
diff --git a/dependencies.yaml b/dependencies.yaml
index 616c1db3de..7f1f9145ef 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -249,6 +249,7 @@ dependencies:
           - &dill dill=0.3.7
           - &scikit-learn scikit-learn=1.3.2
           - appdirs
+          - beautifulsoup4
           - datacompy=0.10
           - elasticsearch==8.9.0
           - feedparser=6.0.10
diff --git a/examples/llm/vdb_upload/module/rss_source_pipe.py b/examples/llm/vdb_upload/module/rss_source_pipe.py
index ff61940b8c..55b309e032 100644
--- a/examples/llm/vdb_upload/module/rss_source_pipe.py
+++ b/examples/llm/vdb_upload/module/rss_source_pipe.py
@@ -49,6 +49,7 @@ class RSSSourcePipeSchema(BaseModel):
     request_timeout_sec: float = 2.0
     run_indefinitely: bool = True
     stop_after_rec: int = 0
+    strip_markup: bool = True
     vdb_resource_name: str
     web_scraper_config: Optional[Dict[Any, Any]] = None
 
@@ -98,6 +99,7 @@ def _rss_source_pipe(builder: mrc.Builder):
       - **request_timeout_sec**: Timeout in seconds for RSS feed requests.
       - **run_indefinitely**: Boolean to indicate continuous running.
       - **stop_after**: Number of records to process before stopping (0 for indefinite).
+      - **strip_markup**: When True, strip HTML & XML markup from feed content.
       - **web_scraper_config**: Configuration for the web scraper module.
         - **chunk_overlap**: Overlap size for chunks in web scraping.
         - **chunk_size**: Size of content chunks for processing.
@@ -131,6 +133,7 @@ def _rss_source_pipe(builder: mrc.Builder):
         "request_timeout_sec": validated_config.request_timeout_sec,
         "interval_sec": validated_config.interval_sec,
         "stop_after_rec": validated_config.stop_after_rec,
+        "strip_markup": validated_config.strip_markup,
     }
     rss_source_loader = RSSSourceLoaderFactory.get_instance("rss_source", {"rss_source": rss_source_config})
 
diff --git a/examples/llm/vdb_upload/vdb_config.yaml b/examples/llm/vdb_upload/vdb_config.yaml
index ac93a47615..5698cc2e83 100644
--- a/examples/llm/vdb_upload/vdb_config.yaml
+++ b/examples/llm/vdb_upload/vdb_config.yaml
@@ -76,6 +76,7 @@ vdb_pipeline:
         request_timeout_sec: 2.0
         run_indefinitely: true
         stop_after_rec: 0
+        strip_markup: true
         web_scraper_config:
           chunk_overlap: 51
           chunk_size: 512
diff --git a/examples/llm/vdb_upload/vdb_utils.py b/examples/llm/vdb_upload/vdb_utils.py
index 7740acbc7c..d9e39b2553 100644
--- a/examples/llm/vdb_upload/vdb_utils.py
+++ b/examples/llm/vdb_upload/vdb_utils.py
@@ -142,6 +142,7 @@ def _build_default_rss_source(enable_cache,
             "interval_sec": interval_secs,
             "request_timeout_sec": rss_request_timeout_sec,
             "run_indefinitely": run_indefinitely,
+            "strip_markup": True,
             "vdb_resource_name": vector_db_resource_name,
             "web_scraper_config": {
                 "chunk_size": content_chunking_size,
diff --git a/morpheus/controllers/rss_controller.py b/morpheus/controllers/rss_controller.py
index 5b9c36f369..a1972c406f 100644
--- a/morpheus/controllers/rss_controller.py
+++ b/morpheus/controllers/rss_controller.py
@@ -70,8 +70,17 @@ class RSSController:
          Cooldown interval in seconds if there is a failure in fetching or parsing the feed.
     request_timeout : float, optional, default = 2.0
         Request timeout in secs to fetch the feed.
+    strip_markup : bool, optional, default = False
+        When true, strip HTML & XML markup from the from the content, summary and title fields.
     """
 
+    # Fields which may contain HTML or XML content
+    MARKUP_FIELDS = (
+        "content",
+        "summary",
+        "title",
+    )
+
     def __init__(self,
                  feed_input: str | list[str],
                  batch_size: int = 128,
@@ -79,7 +88,8 @@ def __init__(self,
                  enable_cache: bool = False,
                  cache_dir: str = "./.cache/http",
                  cooldown_interval: int = 600,
-                 request_timeout: float = 2.0):
+                 request_timeout: float = 2.0,
+                 strip_markup: bool = False):
         if IMPORT_EXCEPTION is not None:
             raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION
 
@@ -92,6 +102,7 @@ def __init__(self,
         self._previous_entries = set()  # Stores the IDs of previous entries to prevent the processing of duplicates.
         self._cooldown_interval = cooldown_interval
         self._request_timeout = request_timeout
+        self._strip_markup = strip_markup
 
         # Validate feed_input
         for f in self._feed_input:
@@ -236,6 +247,44 @@ def _try_parse_feed(self, url: str) -> "feedparser.FeedParserDict":
 
         return feed
 
+    @staticmethod
+    def _strip_markup_from_field(field: str, mime_type: str) -> str:
+        if mime_type.endswith("xml"):
+            parser = "xml"
+        else:
+            parser = "html.parser"
+
+        try:
+            soup = BeautifulSoup(field, features=parser)
+            return soup.get_text()
+        except Exception as ex:
+            logger.error("Failed to strip tags from field: %s: %s", field, ex)
+            return field
+
+    def _strip_markup_from_fields(self, entry: "feedparser.FeedParserDict"):
+        """
+        Strip HTML & XML tags from the content, summary and title fields.
+
+        Per note in feedparser documentation even if a field is advertized as plain text, it may still contain HTML
+        https://feedparser.readthedocs.io/en/latest/html-sanitization.html
+        """
+        for field in self.MARKUP_FIELDS:
+            field_value = entry.get(field)
+            if field_value is not None:
+                if isinstance(field_value, list):
+                    for field_item in field_value:
+                        mime_type = field_item.get("type", "text/plain")
+                        field_item["value"] = self._strip_markup_from_field(field_item["value"], mime_type)
+                        field_item["type"] = "text/plain"
+                else:
+                    detail_field_name = f"{field}_detail"
+                    detail_field: dict = entry.get(detail_field_name, {})
+                    mime_type = detail_field.get("type", "text/plain")
+
+                    entry[field] = self._strip_markup_from_field(field_value, mime_type)
+                    detail_field["type"] = "text/plain"
+                    entry[detail_field_name] = detail_field
+
     def parse_feeds(self):
         """
         Parse the RSS feed using the feedparser library.
@@ -291,6 +340,9 @@ def fetch_dataframes(self):
                     entry_id = entry.get('id')
                     current_entries.add(entry_id)
                     if entry_id not in self._previous_entries:
+                        if self._strip_markup:
+                            self._strip_markup_from_fields(entry)
+
                         entry_accumulator.append(entry)
 
                         if self._batch_size > 0 and len(entry_accumulator) >= self._batch_size:
diff --git a/morpheus/modules/input/rss_source.py b/morpheus/modules/input/rss_source.py
index 9f5dd6c316..1454a67b05 100644
--- a/morpheus/modules/input/rss_source.py
+++ b/morpheus/modules/input/rss_source.py
@@ -32,30 +32,26 @@
 @register_module("rss_source", "morpheus")
 def _rss_source(builder: mrc.Builder):
     """
-    A module for applying simple DataFrame schema transform policies.
-
-    This module reads the configuration to determine how to set data types for columns, select, or rename them in the
-    dataframe.
+    A module for loading RSS feed items into a DataFrame.
 
     Parameters
     ----------
     builder : mrc.Builder
         The Morpheus pipeline builder object.
 
-    Notes
-    -------------
-    The configuration should be passed to the module through the `module_config` attribute of the builder. It should
-    contain a dictionary where each key is a column name, and the value is another dictionary with keys 'dtype' for
-    data type, 'op_type' for operation type ('select' or 'rename'), and optionally 'from' for the original column
-    name (if the column is to be renamed).
-
     Example Configuration
     ---------------------
     {
-        "summary": {"dtype": "str", "op_type": "select"},
-        "title": {"dtype": "str", "op_type": "select"},
-        "content": {"from": "page_content", "dtype": "str", "op_type": "rename"},
-        "source": {"from": "link", "dtype": "str", "op_type": "rename"}
+        "batch_size": 32,
+        "cache_dir": "./.cache/http",
+        "cooldown_interval_sec": 600,
+        "enable_cache": True,
+        "feed_input": ["https://nvidianews.nvidia.com/releases.xml"],
+        "interval_sec": 600,
+        "request_timeout_sec": 2.0,
+        run_indefinitely: True,
+        "stop_after_rec": 0,
+        "strip_markup": True,
     }
     """
 
@@ -77,7 +73,8 @@ def _rss_source(builder: mrc.Builder):
                                enable_cache=validated_config.enable_cache,
                                cache_dir=validated_config.cache_dir,
                                cooldown_interval=validated_config.cooldown_interval_sec,
-                               request_timeout=validated_config.request_timeout_sec)
+                               request_timeout=validated_config.request_timeout_sec,
+                               strip_markup=validated_config.strip_markup)
 
     stop_requested = False
 
@@ -108,9 +105,9 @@ def fetch_feeds() -> MessageMeta:
 
             except Exception as exc:
                 if not controller.run_indefinitely:
-                    logger.error("Failed either in the process of fetching or processing entries: %d.", exc)
+                    logger.error("Failed either in the process of fetching or processing entries: %s.", exc)
                     raise
-                logger.error("Failed either in the process of fetching or processing entries: %d.", exc)
+                logger.error("Failed either in the process of fetching or processing entries: %s.", exc)
 
             if not controller.run_indefinitely:
                 stop_requested = True
diff --git a/morpheus/modules/schemas/rss_source_schema.py b/morpheus/modules/schemas/rss_source_schema.py
index 53c0928391..38facfed0e 100644
--- a/morpheus/modules/schemas/rss_source_schema.py
+++ b/morpheus/modules/schemas/rss_source_schema.py
@@ -31,6 +31,7 @@ class RSSSourceSchema(BaseModel):
     request_timeout_sec: float = 2.0
     interval_sec: int = 600
     stop_after_rec: int = 0
+    strip_markup: bool = True
 
     class Config:
         extra = "forbid"
diff --git a/morpheus/stages/input/rss_source_stage.py b/morpheus/stages/input/rss_source_stage.py
index d56a443542..a67d7997cb 100644
--- a/morpheus/stages/input/rss_source_stage.py
+++ b/morpheus/stages/input/rss_source_stage.py
@@ -52,6 +52,8 @@ class RSSSourceStage(PreallocatorMixin, SingleOutputSource):
          Cooldown interval in seconds if there is a failure in fetching or parsing the feed.
     request_timeout : float, optional, default = 2.0
         Request timeout in secs to fetch the feed.
+    strip_markup : bool, optional, default = False
+        When true, strip HTML & XML markup from the from the content, summary and title fields.
     """
 
     def __init__(self,
@@ -64,7 +66,8 @@ def __init__(self,
                  enable_cache: bool = False,
                  cache_dir: str = "./.cache/http",
                  cooldown_interval: int = 600,
-                 request_timeout: float = 2.0):
+                 request_timeout: float = 2.0,
+                 strip_markup: bool = False):
         super().__init__(c)
         self._stop_requested = False
 
@@ -87,7 +90,8 @@ def __init__(self,
                 "enable_cache": enable_cache,
                 "cache_dir": cache_dir,
                 "cooldown_interval_sec": cooldown_interval,
-                "request_timeout_sec": request_timeout
+                "request_timeout_sec": request_timeout,
+                "strip_markup": strip_markup
             }
         }
 
diff --git a/tests/controllers/test_rss_controller.py b/tests/controllers/test_rss_controller.py
index dad981ad07..9cb42ca815 100644
--- a/tests/controllers/test_rss_controller.py
+++ b/tests/controllers/test_rss_controller.py
@@ -17,15 +17,18 @@
 from os import path
 from unittest.mock import Mock
 from unittest.mock import patch
+from xml.etree import ElementTree
 
 import feedparser
 import pytest
+from bs4 import BeautifulSoup
 
 import cudf
 
 from _utils import TEST_DIRS
 from morpheus.controllers.rss_controller import FeedStats
 from morpheus.controllers.rss_controller import RSSController
+from morpheus.utils.type_aliases import SeriesType
 
 test_urls = ["https://fake.nvidia.com/rss/HomePage.xml"]
 
@@ -66,6 +69,11 @@ def mock_get_response_fixture() -> Mock:
     return mock_response
 
 
+@pytest.fixture(scope="module", name="cisa_rss_feed")
+def cisa_rss_feed_fixture() -> str:
+    return [path.join(TEST_DIRS.tests_data_dir, 'service/cisa_rss_feed.xml')]
+
+
 @pytest.mark.parametrize("feed_input, expected_output", [(url, True) for url in test_urls])
 def test_run_indefinitely_true(feed_input: str, expected_output: bool):
     controller = RSSController(feed_input=feed_input)
@@ -95,9 +103,11 @@ def test_parse_feed_invalid_input(feed_input: list[str]):
         RSSController(feed_input=feed_input)
 
 
+@pytest.mark.parametrize("strip_markup", [False, True])
 @pytest.mark.parametrize("feed_input, expected_count", [(test_file_paths[0], 30)])
-def test_skip_duplicates_feed_inputs(feed_input: str, expected_count: int):
-    controller = RSSController(feed_input=[feed_input, feed_input])  # Pass duplicate feed inputs
+def test_skip_duplicates_feed_inputs(feed_input: str, expected_count: int, strip_markup: bool):
+    controller = RSSController(feed_input=[feed_input, feed_input],
+                               strip_markup=strip_markup)  # Pass duplicate feed inputs
     dataframes_generator = controller.fetch_dataframes()
     dataframe = next(dataframes_generator, None)
     assert isinstance(dataframe, cudf.DataFrame)
@@ -130,9 +140,10 @@ def test_fetch_dataframes_url(feed_input: str | list[str],
         assert len(dataframe) > 0
 
 
+@pytest.mark.parametrize("strip_markup", [False, True])
 @pytest.mark.parametrize("feed_input", [test_file_paths, test_file_paths[0]])
-def test_fetch_dataframes_filepath(feed_input: str | list[str]):
-    controller = RSSController(feed_input=feed_input)
+def test_fetch_dataframes_filepath(feed_input: str | list[str], strip_markup: bool):
+    controller = RSSController(feed_input=feed_input, strip_markup=strip_markup)
     dataframes_generator = controller.fetch_dataframes()
     dataframe = next(dataframes_generator, None)
     assert isinstance(dataframe, cudf.DataFrame)
@@ -140,18 +151,23 @@ def test_fetch_dataframes_filepath(feed_input: str | list[str]):
     assert len(dataframe) > 0
 
 
+@pytest.mark.parametrize("strip_markup", [False, True])
 @pytest.mark.parametrize("feed_input, batch_size", [(test_file_paths, 5)])
-def test_batch_size(feed_input: list[str], batch_size: int):
-    controller = RSSController(feed_input=feed_input, batch_size=batch_size)
+def test_batch_size(feed_input: list[str], batch_size: int, strip_markup: bool):
+    controller = RSSController(feed_input=feed_input, batch_size=batch_size, strip_markup=strip_markup)
     for df in controller.fetch_dataframes():
         assert isinstance(df, cudf.DataFrame)
         assert len(df) <= batch_size
 
 
+@pytest.mark.parametrize("strip_markup", [False, True])
 @pytest.mark.parametrize("feed_input, enable_cache", [(test_file_paths[0], False), (test_urls[0], True),
                                                       (test_urls[0], False)])
-def test_try_parse_feed_with_beautiful_soup(feed_input: str, enable_cache: bool, mock_get_response: Mock):
-    controller = RSSController(feed_input=feed_input, enable_cache=enable_cache)
+def test_try_parse_feed_with_beautiful_soup(feed_input: str,
+                                            enable_cache: bool,
+                                            mock_get_response: Mock,
+                                            strip_markup: bool):
+    controller = RSSController(feed_input=feed_input, enable_cache=enable_cache, strip_markup=strip_markup)
 
     # When enable_cache is set to 'True', the feed content is provided as input.
     feed_data = controller._try_parse_feed_with_beautiful_soup(mock_get_response.text)
@@ -226,13 +242,44 @@ def test_parse_feeds(mock_feed: feedparser.FeedParserDict):
             controller.get_feed_stats("http://testfeed.com")
 
 
+@pytest.mark.parametrize("strip_markup", [False, True])
 @pytest.mark.parametrize("feed_input", [test_urls[0]])
-def test_redundant_fetch(feed_input: str, mock_feed: feedparser.FeedParserDict, mock_get_response: Mock):
+def test_redundant_fetch(feed_input: str,
+                         mock_feed: feedparser.FeedParserDict,
+                         mock_get_response: Mock,
+                         strip_markup: bool):
 
-    controller = RSSController(feed_input=feed_input)
+    controller = RSSController(feed_input=feed_input, strip_markup=strip_markup)
     mock_feedparser_parse = patch("morpheus.controllers.rss_controller.feedparser.parse")
     with mock_feedparser_parse, patch("requests.Session.get", return_value=mock_get_response) as mocked_session_get:
         mock_feedparser_parse.return_value = mock_feed
         dataframes_generator = controller.fetch_dataframes()
         next(dataframes_generator, None)
         assert mocked_session_get.call_count == 1
+
+
+@pytest.mark.parametrize("strip_markup", [False, True])
+def test_strip_markup(cisa_rss_feed: list[str], strip_markup: bool):
+    # Construct expected data
+    tree = ElementTree.parse(cisa_rss_feed[0])
+
+    # feedparser will map the description field to the summary field
+    description_tags = tree.findall('./channel/item/description')
+    expected_summary_col = [(tag.text or "").strip() for tag in description_tags]
+
+    if strip_markup:
+        expected_summary_col = [
+            BeautifulSoup(summary, features="html.parser").get_text() for summary in expected_summary_col
+        ]
+
+    controller = RSSController(feed_input=cisa_rss_feed, strip_markup=strip_markup)
+    dataframes = list(controller.fetch_dataframes())
+
+    # The length number of dataframes and rows should be the same regardless if strip_markup is True or False
+    assert len(dataframes) == 1
+    dataframe = dataframes[0]
+    assert isinstance(dataframe, cudf.DataFrame)
+    assert len(dataframe) == 10
+
+    series: SeriesType = dataframe["summary"]
+    assert (series.to_pandas().values == expected_summary_col).all()

From 808c52ca1c0ec4a74695c68fff06d67c5fad7e83 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Thu, 2 May 2024 14:15:51 -0700
Subject: [PATCH 19/38] Update examples to execute from the root of the repo
 (#1674)

* Update top-level examples and Triton start-up commands to execute from the root of the repo
* Where possible set default values for cli flags, removing the need to set them for the common use-case
* Where possible remove the need for defining `MORPHEUS_ROOT`
* Ensure C++ Triton pipelines use port 8000 to avoid the warning about the grpc port.
* Optionally cast types in the C++ impl of the Triton stage when `force_convert_inputs=true` and the input and model types didn't match (previously types were always casted)
* Remove `--num_threads=1` restriction and configure logging for the `log_parsing` example
* Remove `--num_threads=8` restriction from `nlp_si_detection` since the pipeline has more than 8 stages.
* Don't invoke the C++ impl of preallocate if the type being requested isn't supported on the C++ side (strings)
* Don't use the C++ impl of the Triton stage if `use_shared_memory` is requested as this isn't supported in C++.
* Add missing `gnn-fraud-classification` stage to CLI alternative for `gnn_fraud_detection_pipeline` example

Closes #1671

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Eli Fajardo (https://github.com/efajardo-nv)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1674
---
 docs/source/examples.md                       |   2 +-
 examples/README.md                            |  22 +-
 examples/abp_pcap_detection/README.md         |  41 ++--
 examples/abp_pcap_detection/run.py            |   7 +-
 .../gnn_fraud_detection_pipeline/README.md    |  15 +-
 examples/gnn_fraud_detection_pipeline/run.py  |   8 +-
 examples/log_parsing/README.md                |  21 +-
 examples/log_parsing/run.py                   |   8 +-
 examples/nlp_si_detection/README.md           |   5 +-
 examples/nlp_si_detection/run.sh              |   2 +-
 examples/ransomware_detection/README.md       |  21 +-
 examples/ransomware_detection/run.py          |   4 +-
 examples/root_cause_analysis/README.md        |   5 +-
 morpheus/_lib/common/__init__.pyi             |   3 +
 morpheus/_lib/common/module.cpp               |   4 +
 .../_lib/include/morpheus/objects/dtype.hpp   | 104 +++++++++-
 .../stages/inference_client_stage.hpp         |   5 +
 .../morpheus/stages/triton_inference.hpp      |   8 +-
 morpheus/_lib/src/objects/dtype.cpp           |  17 ++
 .../src/stages/inference_client_stage.cpp     |  18 +-
 morpheus/_lib/src/stages/triton_inference.cpp |  35 +++-
 morpheus/_lib/stages/__init__.pyi             |   4 +-
 morpheus/_lib/stages/module.cpp               |   2 +
 morpheus/_lib/tests/objects/test_dtype.cpp    |  17 +-
 .../stages/test_triton_inference_stage.cpp    | 191 +++++++++++++++---
 morpheus/common/__init__.py                   |   2 +
 morpheus/pipeline/preallocator_mixin.py       |  10 +-
 .../inference/triton_inference_stage.py       |  11 +-
 28 files changed, 448 insertions(+), 144 deletions(-)

diff --git a/docs/source/examples.md b/docs/source/examples.md
index 5de469f588..bfe4f8e24c 100644
--- a/docs/source/examples.md
+++ b/docs/source/examples.md
@@ -24,7 +24,7 @@ limitations under the License.
 * [Example Ransomware Detection Morpheus Pipeline for AppShield Data](../../examples/ransomware_detection/README.md)
 * [Root Cause Analysis Acceleration & Predictive Maintenance Example](../../examples/root_cause_analysis/README.md)
 * [SID Visualization Example](../../examples/sid_visualization/README.md)
-* [Large Language Models (LLMs)](../../examples/llm/README.md)
+* Large Language Models (LLMs)
   * [Agents](../../examples/llm/agents/README.md)
   * [Completion](../../examples/llm/completion/README.md)
   * [VDB Upload](../../examples/llm/vdb_upload/README.md)
diff --git a/examples/README.md b/examples/README.md
index 1c001ffebe..4bdc94648f 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -15,10 +15,18 @@ See the License for the specific language governing permissions and
 limitations under the License.
 -->
 
-## Morpheus CLI Examples
-
-Examples run with the Morpheus CLI (`morpheus ...`) should be run from the repository root; otherwise, some filepath arguments may need to be changed.
-
-## Morpheus run.py Examples
-
-Examples run with python (`python run.py`) should be run from the example's directory; otherwise, relative Python imports may be broken.
+# Examples
+* [Anomalous Behavior Profiling with Forest Inference Library (FIL) Example](./abp_nvsmi_detection/README.md)
+* [ABP Detection Example Using Morpheus](./abp_pcap_detection/README.md)
+* [Digital Fingerprinting (DFP)](./digital_fingerprinting/README.md)
+* [GNN Fraud Detection Pipeline](./gnn_fraud_detection_pipeline/README.md)
+* [Example cyBERT Morpheus Pipeline for Apache Log Parsing](./log_parsing/README.md)
+* [Sensitive Information Detection with Natural Language Processing (NLP) Example](./nlp_si_detection/README.md)
+* [Example Ransomware Detection Morpheus Pipeline for AppShield Data](./ransomware_detection/README.md)
+* [Root Cause Analysis Acceleration & Predictive Maintenance Example](./root_cause_analysis/README.md)
+* [SID Visualization Example](./sid_visualization/README.md)
+* Large Language Models (LLMs)
+  * [Agents](./llm/agents/README.md)
+  * [Completion](./llm/completion/README.md)
+  * [VDB Upload](./llm/vdb_upload/README.md)
+  * [Retreival Augmented Generation (RAG)](./llm/rag/README.md)
diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md
index 371bd28e35..3cfae25aa9 100644
--- a/examples/abp_pcap_detection/README.md
+++ b/examples/abp_pcap_detection/README.md
@@ -27,14 +27,9 @@ docker pull nvcr.io/nvidia/tritonserver:23.06-py3
 ```
 
 ##### Deploy Triton Inference Server
-From the root of the Morpheus repo, navigate to the anomalous behavior profiling example directory:
+From the root of the Morpheus repo, run the following to launch Triton and load the `abp-pcap-xgb` model:
 ```bash
-cd examples/abp_pcap_detection
-```
-
-The following creates the Triton container, mounts the `abp-pcap-xgb` directory to `/models/abp-pcap-xgb` in the Triton container, and starts the Triton server:
-```bash
-docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/abp-pcap-xgb:/models/abp-pcap-xgb --name tritonserver nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models --exit-on-error=false
+docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/examples/abp_pcap_detection/abp-pcap-xgb:/models/abp-pcap-xgb --name tritonserver nvcr.io/nvidia/tritonserver:23.06-py3 tritonserver --model-repository=/models --exit-on-error=false
 ```
 
 ##### Verify Model Deployment
@@ -53,8 +48,7 @@ Use Morpheus to run the Anomalous Behavior Profiling Detection Pipeline with the
 
 From the root of the Morpheus repo, run:
 ```bash
-cd examples/abp_pcap_detection
-python run.py --help
+python examples/abp_pcap_detection/run.py --help
 ```
 
 Output:
@@ -62,44 +56,41 @@ Output:
 Usage: run.py [OPTIONS]
 
 Options:
-  --num_threads INTEGER RANGE     Number of internal pipeline threads to use
+  --num_threads INTEGER RANGE     Number of internal pipeline threads to use.
                                   [x>=1]
   --pipeline_batch_size INTEGER RANGE
                                   Internal batch size for the pipeline. Can be
                                   much larger than the model batch size. Also
-                                  used for Kafka consumers  [x>=1]
+                                  used for Kafka consumers.  [x>=1]
   --model_max_batch_size INTEGER RANGE
-                                  Max batch size to use for the model  [x>=1]
-  --input_file PATH               Input filepath  [required]
+                                  Max batch size to use for the model.  [x>=1]
+  --input_file PATH               Input filepath.  [required]
   --output_file TEXT              The path to the file where the inference
                                   output will be saved.
   --model_fea_length INTEGER RANGE
-                                  Features length to use for the model  [x>=1]
+                                  Features length to use for the model.
+                                  [x>=1]
   --model_name TEXT               The name of the model that is deployed on
-                                  Tritonserver
+                                  Tritonserver.
   --iterative                     Iterative mode will emit dataframes one at a
                                   time. Otherwise a list of dataframes is
                                   emitted. Iterative mode is good for
                                   interleaving source stages.
-  --server_url TEXT               Tritonserver url  [required]
-  --file_type [auto|json|csv]     Indicates what type of file to read.
+  --server_url TEXT               Tritonserver url.  [required]
+  --file_type [auto|csv|json]     Indicates what type of file to read.
                                   Specifying 'auto' will determine the file
                                   type from the extension.
   --help                          Show this message and exit.
 ```
 
-To launch the configured Morpheus pipeline with the sample data that is provided in `examples/data`, from the `examples/abp_pcap_detection` directory run the following:
+To launch the configured Morpheus pipeline with the sample data that is provided in `examples/data`, run the following:
 
 ```bash
-python run.py \
-	--input_file ../data/abp_pcap_dump.jsonlines \
-	--output_file ./pcap_out.jsonlines \
-	--model_name 'abp-pcap-xgb' \
-	--server_url localhost:8001
+python examples/abp_pcap_detection/run.py
 ```
 Note: Both Morpheus and Triton Inference Server containers must have access to the same GPUs in order for this example to work.
 
-The pipeline will process the input `pcap_dump.jsonlines` sample data and write it to `pcap_out.jsonlines`.
+The pipeline will process the input `abp_pcap_dump.jsonlines` sample data and write it to `pcap_out.jsonlines`.
 
 ### CLI Example
 The above example is illustrative of using the Python API to build a custom Morpheus Pipeline.
@@ -123,5 +114,3 @@ morpheus --log_level INFO --plugin "examples/abp_pcap_detection/abp_pcap_preproc
     to-file --filename "pcap_out.jsonlines" --overwrite \
     monitor --description "Write to file rate" --unit "to-file"
 ```
-
-Note: Triton is still needed to be launched from the `examples/abp_pcap_detection` directory.
diff --git a/examples/abp_pcap_detection/run.py b/examples/abp_pcap_detection/run.py
index 405f9bde6e..18d5c25e5d 100644
--- a/examples/abp_pcap_detection/run.py
+++ b/examples/abp_pcap_detection/run.py
@@ -33,6 +33,9 @@
 from morpheus.stages.preprocess.deserialize_stage import DeserializeStage
 from morpheus.utils.logger import configure_logging
 
+CUR_DIR = os.path.dirname(__file__)
+EX_DATA_DIR = os.path.join(CUR_DIR, "../data")
+
 
 @click.command()
 @click.option(
@@ -57,7 +60,7 @@
 @click.option(
     "--input_file",
     type=click.Path(exists=True, readable=True),
-    default="pcap.jsonlines",
+    default=os.path.join(EX_DATA_DIR, "abp_pcap_dump.jsonlines"),
     required=True,
     help="Input filepath.",
 )
@@ -84,7 +87,7 @@
     help=("Iterative mode will emit dataframes one at a time. Otherwise a list of dataframes is emitted. "
           "Iterative mode is good for interleaving source stages."),
 )
-@click.option("--server_url", required=True, help="Tritonserver url.")
+@click.option("--server_url", required=True, help="Tritonserver url.", default="localhost:8001")
 @click.option(
     "--file_type",
     type=click.Choice(FILE_TYPE_NAMES, case_sensitive=False),
diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md
index c61f288499..8f05229710 100644
--- a/examples/gnn_fraud_detection_pipeline/README.md
+++ b/examples/gnn_fraud_detection_pipeline/README.md
@@ -28,17 +28,10 @@ mamba env update \
 ```
 
 ## Running
-
-##### Setup Env Variable
-```bash
-export MORPHEUS_ROOT=$(pwd)
-```
-
 Use Morpheus to run the GNN fraud detection Pipeline with the transaction data. A pipeline has been configured in `run.py` with several command line options:
 
 ```bash
-cd ${MORPHEUS_ROOT}/examples/gnn_fraud_detection_pipeline
-python run.py --help
+python examples/gnn_fraud_detection_pipeline/run.py --help
 ```
 ```
 Usage: run.py [OPTIONS]
@@ -63,11 +56,10 @@ Options:
   --help                          Show this message and exit.
 ```
 
-To launch the configured Morpheus pipeline with the sample data that is provided at `$MORPHEUS_ROOT/models/dataset`, run the following:
+To launch the configured Morpheus pipeline, run the following:
 
 ```bash
-cd ${MORPHEUS_ROOT}/examples/gnn_fraud_detection_pipeline
-python run.py
+python examples/gnn_fraud_detection_pipeline/run.py
 ```
 ```
 ====Registering Pipeline====
@@ -125,6 +117,7 @@ morpheus --log_level INFO \
 	monitor --description "Graph construction rate" \
 	gnn-fraud-sage --model_dir  examples/gnn_fraud_detection_pipeline/model/ \
 	monitor --description "Inference rate" \
+	gnn-fraud-classification --model_xgb_file examples/gnn_fraud_detection_pipeline/model/xgb.pt \
 	monitor --description "Add classification rate" \
 	serialize \
 	to-file --filename "output.csv" --overwrite
diff --git a/examples/gnn_fraud_detection_pipeline/run.py b/examples/gnn_fraud_detection_pipeline/run.py
index 58374a8c2b..ae91845b86 100644
--- a/examples/gnn_fraud_detection_pipeline/run.py
+++ b/examples/gnn_fraud_detection_pipeline/run.py
@@ -32,6 +32,8 @@
 from stages.graph_construction_stage import FraudGraphConstructionStage
 from stages.graph_sage_stage import GraphSAGEStage
 
+CUR_DIR = os.path.dirname(__file__)
+
 
 @click.command()
 @click.option(
@@ -62,21 +64,21 @@
 @click.option(
     "--input_file",
     type=click.Path(exists=True, readable=True, dir_okay=False),
-    default="validation.csv",
+    default=os.path.join(CUR_DIR, "validation.csv"),
     required=True,
     help="Input data filepath.",
 )
 @click.option(
     "--training_file",
     type=click.Path(exists=True, readable=True, dir_okay=False),
-    default="training.csv",
+    default=os.path.join(CUR_DIR, "training.csv"),
     required=True,
     help="Training data filepath.",
 )
 @click.option(
     "--model_dir",
     type=click.Path(exists=True, readable=True, file_okay=False, dir_okay=True),
-    default="model",
+    default=os.path.join(CUR_DIR, "model"),
     required=True,
     help="Path to trained Hinsage & XGB models.",
 )
diff --git a/examples/log_parsing/README.md b/examples/log_parsing/README.md
index ce9790be06..425e1c0b1c 100644
--- a/examples/log_parsing/README.md
+++ b/examples/log_parsing/README.md
@@ -29,11 +29,6 @@ Example:
 docker pull nvcr.io/nvidia/tritonserver:23.06-py3
 ```
 
-##### Setup Env Variable
-```bash
-export MORPHEUS_ROOT=$(pwd)
-```
-
 ##### Start Triton Inference Server Container
 From the Morpheus repo root directory, run the following to launch Triton and load the `log-parsing-onnx` model:
 
@@ -56,19 +51,15 @@ Once Triton server finishes starting up, it will display the status of all loade
 
 ### Run Log Parsing Pipeline
 
-Run the following from the `examples/log_parsing` directory to start the log parsing pipeline:
+Run the following from the root of the Morpheus repo to start the log parsing pipeline:
 
 ```bash
-python run.py \
-    --num_threads 1 \
-    --input_file ${MORPHEUS_ROOT}/models/datasets/validation-data/log-parsing-validation-data-input.csv \
-    --output_file ./log-parsing-output.jsonlines \
+python examples/log_parsing/run.py \
+    --input_file=./models/datasets/validation-data/log-parsing-validation-data-input.csv \
     --model_vocab_hash_file=data/bert-base-cased-hash.txt \
-    --model_vocab_file=${MORPHEUS_ROOT}/models/training-tuning-scripts/sid-models/resources/bert-base-cased-vocab.txt \
-    --model_seq_length=256 \
+    --model_vocab_file=./models/training-tuning-scripts/sid-models/resources/bert-base-cased-vocab.txt \
     --model_name log-parsing-onnx \
-    --model_config_file=${MORPHEUS_ROOT}/models/log-parsing-models/log-parsing-config-20220418.json \
-    --server_url localhost:8001
+    --model_config_file=./models/log-parsing-models/log-parsing-config-20220418.json
 ```
 
 Use `--help` to display information about the command line options:
@@ -110,7 +101,7 @@ PYTHONPATH="examples/log_parsing" \
 morpheus --log_level INFO \
 	--plugin "inference" \
 	--plugin "postprocessing" \
-	run --num_threads 1 --pipeline_batch_size 1024 --model_max_batch_size 32  \
+	run --pipeline_batch_size 1024 --model_max_batch_size 32  \
 	pipeline-nlp \
 	from-file --filename ./models/datasets/validation-data/log-parsing-validation-data-input.csv  \
 	deserialize \
diff --git a/examples/log_parsing/run.py b/examples/log_parsing/run.py
index b0dfe76fd3..7fff20bd27 100644
--- a/examples/log_parsing/run.py
+++ b/examples/log_parsing/run.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import logging
 import os
 
 import click
@@ -28,6 +29,7 @@
 from morpheus.stages.output.write_to_file_stage import WriteToFileStage
 from morpheus.stages.preprocess.deserialize_stage import DeserializeStage
 from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage
+from morpheus.utils.logger import configure_logging
 
 
 @click.command()
@@ -79,7 +81,7 @@
     help="The name of the model that is deployed on Tritonserver.",
 )
 @click.option("--model_config_file", required=True, help="Model config file.")
-@click.option("--server_url", required=True, help="Tritonserver url.")
+@click.option("--server_url", required=True, help="Tritonserver url.", default="localhost:8001")
 def run_pipeline(
     num_threads,
     pipeline_batch_size,
@@ -93,6 +95,10 @@ def run_pipeline(
     model_config_file,
     server_url,
 ):
+
+    # Enable the default logger.
+    configure_logging(log_level=logging.INFO)
+
     config = Config()
     config.mode = PipelineModes.NLP
     config.num_threads = num_threads
diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md
index 32cc2f23b5..33081caf00 100644
--- a/examples/nlp_si_detection/README.md
+++ b/examples/nlp_si_detection/README.md
@@ -103,11 +103,10 @@ The following command line is the entire command to build and launch the pipelin
 
 From the Morpheus repo root directory, run:
 ```bash
-export MORPHEUS_ROOT=$(pwd)
 # Launch Morpheus printing debug messages
 morpheus --log_level=DEBUG \
-   `# Run a pipeline with 8 threads and a model batch size of 32 (Must match Triton config)` \
-   run --num_threads=8 --pipeline_batch_size=1024 --model_max_batch_size=32 \
+   `# Run a pipeline with a model batch size of 32 (Must match Triton config)` \
+   run --pipeline_batch_size=1024 --model_max_batch_size=32 \
    `# Specify a NLP pipeline with 256 sequence length (Must match Triton config)` \
    pipeline-nlp --model_seq_length=256 \
    `# 1st Stage: Read from file` \
diff --git a/examples/nlp_si_detection/run.sh b/examples/nlp_si_detection/run.sh
index f702784968..390418e545 100755
--- a/examples/nlp_si_detection/run.sh
+++ b/examples/nlp_si_detection/run.sh
@@ -19,7 +19,7 @@ SCRIPT_DIR=${SCRIPT_DIR:-"$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null
 export MORPHEUS_ROOT=${MORPHEUS_ROOT:-"$(realpath ${SCRIPT_DIR}/../..)"}
 
 morpheus --log_level=DEBUG \
-    run --num_threads=8 --pipeline_batch_size=1024 --model_max_batch_size=32 \
+    run --pipeline_batch_size=1024 --model_max_batch_size=32 \
     pipeline-nlp --model_seq_length=256 \
     from-file --filename=${MORPHEUS_ROOT}/examples/data/pcap_dump.jsonlines \
     deserialize \
diff --git a/examples/ransomware_detection/README.md b/examples/ransomware_detection/README.md
index 23f44e4ede..6c04feae46 100644
--- a/examples/ransomware_detection/README.md
+++ b/examples/ransomware_detection/README.md
@@ -35,15 +35,15 @@ export MORPHEUS_ROOT=$(pwd)
 ```
 
 ##### Start Triton Inference Server Container
-Run the following from the `examples/ransomware_detection` directory to launch Triton and load the `ransomw-model-short-rf` model:
-
+From the Morpheus repo root directory, run the following to launch Triton and load the `ransomw-model-short-rf` model:
 ```bash
 # Run Triton in explicit mode
-docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models/triton-model-repo nvcr.io/nvidia/tritonserver:23.06-py3 \
-   tritonserver --model-repository=/models/triton-model-repo \
-                --exit-on-error=false \
-                --model-control-mode=explicit \
-                --load-model ransomw-model-short-rf
+docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \
+    -v $PWD/examples/ransomware_detection/models:/models/triton-model-repo nvcr.io/nvidia/tritonserver:23.06-py3 \
+    tritonserver --model-repository=/models/triton-model-repo \
+                 --exit-on-error=false \
+                 --model-control-mode=explicit \
+                 --load-model ransomw-model-short-rf
 ```
 
 ##### Verify Model Deployment
@@ -67,14 +67,13 @@ mamba install 'dask>=2023.1.1' 'distributed>=2023.1.1'
 ```
 
 ## Run Ransomware Detection Pipeline
-Run the following from the `examples/ransomware_detection` directory to start the ransomware detection pipeline:
+Run the following from the root of the Morpheus repo to start the ransomware detection pipeline:
 
 ```bash
-python run.py --server_url=localhost:8001 \
+python examples/ransomware_detection/run.py --server_url=localhost:8001 \
               --sliding_window=3 \
               --model_name=ransomw-model-short-rf \
-              --conf_file=./config/ransomware_detection.yaml \
-              --input_glob=${MORPHEUS_ROOT}/examples/data/appshield/*/snapshot-*/*.json \
+              --input_glob=./examples/data/appshield/*/snapshot-*/*.json \
               --output_file=./ransomware_detection_output.jsonlines
 ```
 
diff --git a/examples/ransomware_detection/run.py b/examples/ransomware_detection/run.py
index 58296bd2ae..5a80265996 100644
--- a/examples/ransomware_detection/run.py
+++ b/examples/ransomware_detection/run.py
@@ -33,6 +33,8 @@
 from stages.create_features import CreateFeaturesRWStage
 from stages.preprocessing import PreprocessingRWStage
 
+CUR_DIR = os.path.dirname(__file__)
+
 
 @click.command()
 @click.option('--debug', default=False)
@@ -64,7 +66,7 @@
 @click.option(
     "--conf_file",
     type=click.STRING,
-    default="./config/ransomware_detection.yaml",
+    default=os.path.join(CUR_DIR, "config/ransomware_detection.yaml"),
     help="Ransomware detection configuration filepath.",
 )
 @click.option(
diff --git a/examples/root_cause_analysis/README.md b/examples/root_cause_analysis/README.md
index 2efd63c0ed..b456c3ff72 100644
--- a/examples/root_cause_analysis/README.md
+++ b/examples/root_cause_analysis/README.md
@@ -98,9 +98,6 @@ From the Morpheus repo root directory, run:
 
 ```bash
 export MORPHEUS_ROOT=$(pwd)
-```
-
-```bash
 morpheus --log_level=DEBUG \
 `# Run a pipeline with 5 threads and a model batch size of 32 (Must match Triton config)` \
 run --num_threads=8 --edge_buffer_size=4 --use_cpp=True --pipeline_batch_size=1024 --model_max_batch_size=32 \
@@ -113,7 +110,7 @@ deserialize \
 `# 3rd Stage: Preprocessing converts the input data into BERT tokens` \
 preprocess --column=log --vocab_hash_file=./data/bert-base-uncased-hash.txt --truncation=True --do_lower_case=True --add_special_tokens=False \
 `# 4th Stage: Send messages to Triton for inference. Specify the binary model loaded in Setup` \
-inf-triton --force_convert_inputs=True --model_name=root-cause-binary-onnx --server_url=localhost:8001 \
+inf-triton --model_name=root-cause-binary-onnx --server_url=localhost:8000 --force_convert_inputs=True \
 `# 5th Stage: Monitor stage prints throughput information to the console` \
 monitor --description='Inference rate' --smoothing=0.001 --unit inf \
 `# 6th Stage: Add scores from inference to the messages` \
diff --git a/morpheus/_lib/common/__init__.pyi b/morpheus/_lib/common/__init__.pyi
index a5ec2b692c..39e722bdec 100644
--- a/morpheus/_lib/common/__init__.pyi
+++ b/morpheus/_lib/common/__init__.pyi
@@ -18,6 +18,7 @@ __all__ = [
     "TypeId",
     "determine_file_type",
     "read_file_to_df",
+    "typeid_is_fully_supported",
     "typeid_to_numpy_str",
     "write_df_to_file"
 ]
@@ -199,6 +200,8 @@ def determine_file_type(filename: str) -> FileTypes:
     pass
 def read_file_to_df(filename: str, file_type: FileTypes = FileTypes.Auto) -> object:
     pass
+def typeid_is_fully_supported(arg0: TypeId) -> bool:
+    pass
 def typeid_to_numpy_str(arg0: TypeId) -> str:
     pass
 def write_df_to_file(df: object, filename: str, file_type: FileTypes = FileTypes.Auto, **kwargs) -> None:
diff --git a/morpheus/_lib/common/module.cpp b/morpheus/_lib/common/module.cpp
index 0c2ae40914..0bda85b975 100644
--- a/morpheus/_lib/common/module.cpp
+++ b/morpheus/_lib/common/module.cpp
@@ -129,6 +129,10 @@ PYBIND11_MODULE(common, _module)
         return DType(tid).type_str();
     });
 
+    _module.def("typeid_is_fully_supported", [](TypeId tid) {
+        return DType(tid).is_fully_supported();
+    });
+
     _module.def(
         "determine_file_type", py::overload_cast<const std::string&>(&determine_file_type), py::arg("filename"));
     _module.def("determine_file_type",
diff --git a/morpheus/_lib/include/morpheus/objects/dtype.hpp b/morpheus/_lib/include/morpheus/objects/dtype.hpp
index 63dbd1594a..2297460b52 100644
--- a/morpheus/_lib/include/morpheus/objects/dtype.hpp
+++ b/morpheus/_lib/include/morpheus/objects/dtype.hpp
@@ -33,6 +33,13 @@ namespace morpheus {
  */
 
 // Pulled from cuDF
+
+/**
+ * @brief Template function to calculate the size in bits of a given type.
+ *
+ * @tparam T The type to calculate the size for.
+ * @return The size in bits of the given type.
+ */
 template <typename T>
 constexpr std::size_t size_in_bits()
 {
@@ -40,8 +47,11 @@ constexpr std::size_t size_in_bits()
     return sizeof(T) * CHAR_BIT;
 }
 
-// Pulled from cudf
 #pragma GCC visibility push(default)
+
+/**
+ * @brief Enum class for representing data types used in Tensors and DataFrame columns.
+ */
 enum class TypeId : int32_t
 {
     EMPTY,    ///< Always null with no underlying data
@@ -78,40 +88,112 @@ enum class TypeId : int32_t
     NUM_TYPE_IDS  ///< Total number of type ids
 };
 
-/****** DType****************************************/
+/**
+ * @class DType
+ * @brief This class represents a data type specified by a TypeId.
+ */
 struct DType
 {
+    /**
+     * @brief Construct a DType for a given type specified by a TypeId.
+     *
+     * @param tid The TypeId to initialize the DType object with.
+     */
     DType(TypeId tid);
+
+    /**
+     * @brief Copy constructor.
+     *
+     * @param dtype The DType object to copy from.
+     */
     DType(const DType& dtype) = default;
+
+    /**
+     * @brief Equality operator.
+     *
+     * @param other The DType object to compare with.
+     * @return True if the two DType objects represent the same TypeId, false otherwise.
+     */
     bool operator==(const DType& other) const;
 
+    /**
+     * @brief Get the TypeId of the DType object.
+     *
+     * @return The TypeId of the DType object.
+     */
     TypeId type_id() const;
 
-    // Number of bytes per item
+    /**
+     * @brief Get the number of bytes per item.
+     *
+     * @return The number of bytes per item.
+     */
     size_t item_size() const;
 
-    // Pretty print
+    /**
+     * @brief Get the name of the DType object.
+     *
+     * @return The name of the DType object.
+     */
     std::string name() const;
 
-    // Returns the numpy string representation
+    /**
+     * @brief Get the numpy string representation of the DType object.
+     *
+     * @return The numpy string representation of the DType object.
+     */
     std::string type_str() const;
 
-    // Cudf representation
+    /**
+     * @brief Get the cudf type id of the DType object.
+     *
+     * @return The cudf type id of the DType object.
+     */
     cudf::type_id cudf_type_id() const;
 
-    // Returns the triton string representation
+    /**
+     * @brief Get the triton string representation of the DType object.
+     *
+     * @return The triton string representation of the DType object.
+     */
     std::string triton_str() const;
 
-    // From cudf
+    /**
+     * @brief Create a DType object from a cudf type id.
+     *
+     * @param id The cudf type id.
+     * @return A DType object.
+     */
     static DType from_cudf(cudf::type_id tid);
 
-    // From numpy
+    /**
+     * @brief Create a DType object from a numpy type string.
+     *
+     * @param type_str The numpy type string.
+     * @return A DType object.
+     */
     static DType from_numpy(const std::string& numpy_str);
 
-    // From triton
+    /**
+     * @brief Create a DType object from a triton type string.
+     *
+     * @param type_str The triton type string.
+     * @return A DType object.
+     */
     static DType from_triton(const std::string& type_str);
 
-    // from template
+    /**
+     * @brief Check if the DType object is fully supported.
+     *
+     * @return True if the DType object is fully supported, false otherwise.
+     */
+    bool is_fully_supported() const;
+
+    /**
+     * @brief Construct a DType object from a C++ type.
+     *
+     * @return A DType object.
+     */
     template <typename T>
     static DType create()
     {
diff --git a/morpheus/_lib/include/morpheus/stages/inference_client_stage.hpp b/morpheus/_lib/include/morpheus/stages/inference_client_stage.hpp
index fd115de5af..24f6934fdd 100644
--- a/morpheus/_lib/include/morpheus/stages/inference_client_stage.hpp
+++ b/morpheus/_lib/include/morpheus/stages/inference_client_stage.hpp
@@ -111,6 +111,7 @@ class MORPHEUS_EXPORT InferenceClientStage
      * @param model_name : Name of the model specifies which model can handle the inference requests that are sent to
      * Triton inference
      * @param needs_logits : Determines if logits are required.
+     * @param force_convert_inputs : Determines if inputs should be converted to the model's input format.
      * @param inout_mapping : Dictionary used to map pipeline input/output names to Triton input/output names. Use this
      * if the Morpheus names do not match the model.
      */
@@ -154,6 +155,7 @@ struct MORPHEUS_EXPORT InferenceClientStageInterfaceProxy
      * Triton inference
      * @param server_url : Triton server URL.
      * @param needs_logits : Determines if logits are required.
+     * @param force_convert_inputs : Determines if inputs should be converted to the model's input format.
      * @param inout_mapping : Dictionary used to map pipeline input/output names to Triton input/output names. Use this
      * if the Morpheus names do not match the model.
      * @return std::shared_ptr<mrc::segment::Object<InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>>>
@@ -164,6 +166,7 @@ struct MORPHEUS_EXPORT InferenceClientStageInterfaceProxy
             std::string model_name,
             std::string server_url,
             bool needs_logits,
+            bool force_convert_inputs,
             std::map<std::string, std::string> input_mapping,
             std::map<std::string, std::string> output_mapping);
 
@@ -176,6 +179,7 @@ struct MORPHEUS_EXPORT InferenceClientStageInterfaceProxy
      * Triton inference
      * @param server_url : Triton server URL.
      * @param needs_logits : Determines if logits are required.
+     * @param force_convert_inputs : Determines if inputs should be converted to the model's input format.
      * @param inout_mapping : Dictionary used to map pipeline input/output names to Triton input/output names. Use this
      * if the Morpheus names do not match the model.
      * @return std::shared_ptr<mrc::segment::Object<InferenceClientStage<ControlMessage, ControlMessage>>>
@@ -186,6 +190,7 @@ struct MORPHEUS_EXPORT InferenceClientStageInterfaceProxy
         std::string model_name,
         std::string server_url,
         bool needs_logits,
+        bool force_convert_inputs,
         std::map<std::string, std::string> input_mapping,
         std::map<std::string, std::string> output_mapping);
 };
diff --git a/morpheus/_lib/include/morpheus/stages/triton_inference.hpp b/morpheus/_lib/include/morpheus/stages/triton_inference.hpp
index 923a75e2b7..1cc8af06af 100644
--- a/morpheus/_lib/include/morpheus/stages/triton_inference.hpp
+++ b/morpheus/_lib/include/morpheus/stages/triton_inference.hpp
@@ -153,9 +153,12 @@ class MORPHEUS_EXPORT TritonInferenceClientSession : public IInferenceClientSess
     std::vector<TritonInOut> m_model_inputs;
     std::vector<TritonInOut> m_model_outputs;
     std::shared_ptr<ITritonClient> m_client;
+    bool m_force_convert_inputs;
 
   public:
-    TritonInferenceClientSession(std::shared_ptr<ITritonClient> client, std::string model_name);
+    TritonInferenceClientSession(std::shared_ptr<ITritonClient> client,
+                                 std::string model_name,
+                                 bool force_convert_inputs);
 
     /**
       @brief Gets the inference input mappings for Triton
@@ -178,9 +181,10 @@ class MORPHEUS_EXPORT TritonInferenceClient : public IInferenceClient
   private:
     std::shared_ptr<ITritonClient> m_client;
     std::string m_model_name;
+    bool m_force_convert_inputs;
 
   public:
-    TritonInferenceClient(std::unique_ptr<ITritonClient>&& client, std::string model_name);
+    TritonInferenceClient(std::unique_ptr<ITritonClient>&& client, std::string model_name, bool force_convert_inputs);
 
     /**
       @brief Creates a TritonInferenceClientSession
diff --git a/morpheus/_lib/src/objects/dtype.cpp b/morpheus/_lib/src/objects/dtype.cpp
index 870cdb8059..3f167b1e01 100644
--- a/morpheus/_lib/src/objects/dtype.cpp
+++ b/morpheus/_lib/src/objects/dtype.cpp
@@ -357,4 +357,21 @@ char DType::type_char() const
     }
 }
 
+bool DType::is_fully_supported() const
+{
+    try
+    {
+        byte_order_char();
+        cudf_type_id();
+        item_size();
+        triton_str();
+        type_char();
+    } catch (...)
+    {
+        return false;
+    }
+
+    return true;
+}
+
 }  // namespace morpheus
diff --git a/morpheus/_lib/src/stages/inference_client_stage.cpp b/morpheus/_lib/src/stages/inference_client_stage.cpp
index 26428aa159..d53364a650 100644
--- a/morpheus/_lib/src/stages/inference_client_stage.cpp
+++ b/morpheus/_lib/src/stages/inference_client_stage.cpp
@@ -333,6 +333,10 @@ mrc::coroutines::AsyncGenerator<std::shared_ptr<OutputT>> InferenceClientStage<I
 
             co_return;
 
+        } catch (std::invalid_argument ex)
+        {
+            // invalid_argument is terminal, don't attempt to retry
+            throw;
         } catch (std::runtime_error ex)
         {
             auto lock = std::unique_lock(m_session_mutex);
@@ -377,6 +381,7 @@ InferenceClientStageInterfaceProxy::init_mm(mrc::segment::Builder& builder,
                                             std::string server_url,
                                             std::string model_name,
                                             bool needs_logits,
+                                            bool force_convert_inputs,
                                             std::map<std::string, std::string> input_mappings,
                                             std::map<std::string, std::string> output_mappings)
 {
@@ -393,8 +398,9 @@ InferenceClientStageInterfaceProxy::init_mm(mrc::segment::Builder& builder,
         output_mappings_.emplace_back(TensorModelMapping{mapping.first, mapping.second});
     }
 
-    auto triton_client           = std::make_unique<HttpTritonClient>(server_url);
-    auto triton_inference_client = std::make_unique<TritonInferenceClient>(std::move(triton_client), model_name);
+    auto triton_client = std::make_unique<HttpTritonClient>(server_url);
+    auto triton_inference_client =
+        std::make_unique<TritonInferenceClient>(std::move(triton_client), model_name, force_convert_inputs);
     auto stage = builder.construct_object<InferenceClientStage<MultiInferenceMessage, MultiResponseMessage>>(
         name, std::move(triton_inference_client), model_name, needs_logits, input_mappings_, output_mappings_);
 
@@ -408,6 +414,7 @@ InferenceClientStageInterfaceProxy::init_cm(mrc::segment::Builder& builder,
                                             std::string server_url,
                                             std::string model_name,
                                             bool needs_logits,
+                                            bool force_convert_inputs,
                                             std::map<std::string, std::string> input_mappings,
                                             std::map<std::string, std::string> output_mappings)
 {
@@ -424,9 +431,10 @@ InferenceClientStageInterfaceProxy::init_cm(mrc::segment::Builder& builder,
         output_mappings_.emplace_back(TensorModelMapping{mapping.first, mapping.second});
     }
 
-    auto triton_client           = std::make_unique<HttpTritonClient>(server_url);
-    auto triton_inference_client = std::make_unique<TritonInferenceClient>(std::move(triton_client), model_name);
-    auto stage                   = builder.construct_object<InferenceClientStage<ControlMessage, ControlMessage>>(
+    auto triton_client = std::make_unique<HttpTritonClient>(server_url);
+    auto triton_inference_client =
+        std::make_unique<TritonInferenceClient>(std::move(triton_client), model_name, force_convert_inputs);
+    auto stage = builder.construct_object<InferenceClientStage<ControlMessage, ControlMessage>>(
         name, std::move(triton_inference_client), model_name, needs_logits, input_mappings_, output_mappings_);
 
     return stage;
diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp
index 30f100e7ea..a78beb5d11 100644
--- a/morpheus/_lib/src/stages/triton_inference.cpp
+++ b/morpheus/_lib/src/stages/triton_inference.cpp
@@ -258,9 +258,11 @@ triton::client::Error HttpTritonClient::async_infer(triton::client::InferenceSer
 }
 
 TritonInferenceClientSession::TritonInferenceClientSession(std::shared_ptr<ITritonClient> client,
-                                                           std::string model_name) :
+                                                           std::string model_name,
+                                                           bool force_convert_inputs) :
   m_client(std::move(client)),
-  m_model_name(std::move(model_name))
+  m_model_name(std::move(model_name)),
+  m_force_convert_inputs(force_convert_inputs)
 {
     // Now load the input/outputs for the model
 
@@ -433,8 +435,24 @@ mrc::coroutines::Task<TensorMap> TritonInferenceClientSession::infer(TensorMap&&
 
         for (auto model_input : m_model_inputs)
         {
-            auto inference_input_slice =
-                inputs[model_input.name].slice({start, 0}, {stop, -1}).as_type(model_input.datatype);
+            auto inference_input_slice = inputs.at(model_input.name).slice({start, 0}, {stop, -1});
+
+            if (inference_input_slice.dtype() != model_input.datatype)
+            {
+                if (m_force_convert_inputs)
+                {
+                    inference_input_slice.swap(inference_input_slice.as_type(model_input.datatype));
+                }
+                else
+                {
+                    std::string err_msg = MORPHEUS_CONCAT_STR(
+                        "Unexpected dtype for Triton input. Cannot automatically convert dtype due to loss of data."
+                        "Input Name: '"
+                        << model_input.name << ", Expected: " << model_input.datatype.name()
+                        << ", Actual dtype:" << inference_input_slice.dtype().name());
+                    throw std::invalid_argument(err_msg);
+                }
+            }
 
             inference_inputs.emplace_back(
                 TritonInferInput{model_input.name,
@@ -491,14 +509,17 @@ mrc::coroutines::Task<TensorMap> TritonInferenceClientSession::infer(TensorMap&&
     co_return model_output_tensors;
 };
 
-TritonInferenceClient::TritonInferenceClient(std::unique_ptr<ITritonClient>&& client, std::string model_name) :
+TritonInferenceClient::TritonInferenceClient(std::unique_ptr<ITritonClient>&& client,
+                                             std::string model_name,
+                                             bool force_convert_inputs) :
   m_client(std::move(client)),
-  m_model_name(std::move(model_name))
+  m_model_name(std::move(model_name)),
+  m_force_convert_inputs(force_convert_inputs)
 {}
 
 std::unique_ptr<IInferenceClientSession> TritonInferenceClient::create_session()
 {
-    return std::make_unique<TritonInferenceClientSession>(m_client, m_model_name);
+    return std::make_unique<TritonInferenceClientSession>(m_client, m_model_name, m_force_convert_inputs);
 }
 
 }  // namespace morpheus
diff --git a/morpheus/_lib/stages/__init__.pyi b/morpheus/_lib/stages/__init__.pyi
index 85767bdcef..78a0ff8091 100644
--- a/morpheus/_lib/stages/__init__.pyi
+++ b/morpheus/_lib/stages/__init__.pyi
@@ -71,10 +71,10 @@ class HttpServerSourceStage(mrc.core.segment.SegmentObject):
     def __init__(self, builder: mrc.core.segment.Builder, name: str, bind_address: str = '127.0.0.1', port: int = 8080, endpoint: str = '/message', method: str = 'POST', accept_status: int = 201, sleep_time: float = 0.10000000149011612, queue_timeout: int = 5, max_queue_size: int = 1024, num_server_threads: int = 1, max_payload_size: int = 10485760, request_timeout: int = 30, lines: bool = False, stop_after: int = 0) -> None: ...
     pass
 class InferenceClientStageCM(mrc.core.segment.SegmentObject):
-    def __init__(self, builder: mrc.core.segment.Builder, name: str, server_url: str, model_name: str, needs_logits: bool, input_mapping: typing.Dict[str, str] = {}, output_mapping: typing.Dict[str, str] = {}) -> None: ...
+    def __init__(self, builder: mrc.core.segment.Builder, name: str, server_url: str, model_name: str, needs_logits: bool, force_convert_inputs: bool, input_mapping: typing.Dict[str, str] = {}, output_mapping: typing.Dict[str, str] = {}) -> None: ...
     pass
 class InferenceClientStageMM(mrc.core.segment.SegmentObject):
-    def __init__(self, builder: mrc.core.segment.Builder, name: str, server_url: str, model_name: str, needs_logits: bool, input_mapping: typing.Dict[str, str] = {}, output_mapping: typing.Dict[str, str] = {}) -> None: ...
+    def __init__(self, builder: mrc.core.segment.Builder, name: str, server_url: str, model_name: str, needs_logits: bool, force_convert_inputs: bool, input_mapping: typing.Dict[str, str] = {}, output_mapping: typing.Dict[str, str] = {}) -> None: ...
     pass
 class KafkaSourceStage(mrc.core.segment.SegmentObject):
     @typing.overload
diff --git a/morpheus/_lib/stages/module.cpp b/morpheus/_lib/stages/module.cpp
index 6cdba387f0..1cf57663ac 100644
--- a/morpheus/_lib/stages/module.cpp
+++ b/morpheus/_lib/stages/module.cpp
@@ -177,6 +177,7 @@ PYBIND11_MODULE(stages, _module)
              py::arg("server_url"),
              py::arg("model_name"),
              py::arg("needs_logits"),
+             py::arg("force_convert_inputs"),
              py::arg("input_mapping")  = py::dict(),
              py::arg("output_mapping") = py::dict());
 
@@ -190,6 +191,7 @@ PYBIND11_MODULE(stages, _module)
              py::arg("server_url"),
              py::arg("model_name"),
              py::arg("needs_logits"),
+             py::arg("force_convert_inputs"),
              py::arg("input_mapping")  = py::dict(),
              py::arg("output_mapping") = py::dict());
 
diff --git a/morpheus/_lib/tests/objects/test_dtype.cpp b/morpheus/_lib/tests/objects/test_dtype.cpp
index 230d68dcd6..1f1a70bb51 100644
--- a/morpheus/_lib/tests/objects/test_dtype.cpp
+++ b/morpheus/_lib/tests/objects/test_dtype.cpp
@@ -22,6 +22,8 @@
 #include <cudf/types.hpp>
 #include <gtest/gtest.h>
 
+#include <cstdint>  // for int32_t
+#include <set>      // for set
 #include <stdexcept>
 
 using namespace morpheus;
@@ -283,4 +285,17 @@ TEST_F(TestDType, FromCudfNotSupported)
     EXPECT_THROW(DType::from_cudf(cudf::type_id::DECIMAL128), std::invalid_argument);
     EXPECT_THROW(DType::from_cudf(cudf::type_id::STRUCT), std::invalid_argument);
     EXPECT_THROW(DType::from_cudf(cudf::type_id::NUM_TYPE_IDS), std::invalid_argument);
-}
\ No newline at end of file
+}
+
+TEST_F(TestDType, IsFullySupported)
+{
+    std::set<TypeId> unsupported_types = {TypeId::EMPTY, TypeId::STRING, TypeId::NUM_TYPE_IDS};
+    for (auto type_id = static_cast<int32_t>(TypeId::EMPTY); type_id <= static_cast<int32_t>(TypeId::NUM_TYPE_IDS);
+         ++type_id)
+    {
+        auto enum_type_id = static_cast<TypeId>(type_id);
+        auto dtype        = DType(enum_type_id);
+
+        ASSERT_EQ(dtype.is_fully_supported(), !unsupported_types.contains(enum_type_id));
+    }
+}
diff --git a/morpheus/_lib/tests/stages/test_triton_inference_stage.cpp b/morpheus/_lib/tests/stages/test_triton_inference_stage.cpp
index df7785d259..170655e8c9 100644
--- a/morpheus/_lib/tests/stages/test_triton_inference_stage.cpp
+++ b/morpheus/_lib/tests/stages/test_triton_inference_stage.cpp
@@ -22,12 +22,14 @@
 #include "morpheus/messages/multi_inference.hpp"
 #include "morpheus/messages/multi_response.hpp"
 #include "morpheus/objects/dtype.hpp"
+#include "morpheus/objects/memory_descriptor.hpp"  // for MemoryDescriptor
 #include "morpheus/objects/tensor.hpp"
 #include "morpheus/objects/tensor_object.hpp"
 #include "morpheus/stages/inference_client_stage.hpp"
 #include "morpheus/stages/triton_inference.hpp"
 #include "morpheus/types.hpp"
 #include "morpheus/utilities/cudf_util.hpp"
+#include "morpheus/utilities/matx_util.hpp"
 
 #include <cuda_runtime.h>
 #include <cudf/column/column.hpp>
@@ -37,6 +39,7 @@
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
+#include <glog/logging.h>
 #include <gtest/gtest.h>
 #include <http_client.h>
 #include <mrc/coroutines/task.hpp>
@@ -44,15 +47,19 @@
 #include <pybind11/gil.h>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>  // for get_current_device_resource
 
 #include <cstdint>
 #include <functional>
+#include <initializer_list>  // for initializer_list
 #include <map>
 #include <memory>
 #include <numeric>
+#include <ostream>  // for operator<<, basic_ostream
 #include <stdexcept>
 #include <unordered_map>
 #include <utility>
+#include <vector>
 
 class FakeInferResult : public triton::client::InferResult
 {
@@ -117,6 +124,68 @@ class FakeInferResult : public triton::client::InferResult
 };
 
 class FakeTritonClient : public morpheus::ITritonClient
+{
+  public:
+    triton::client::Error is_server_live(bool* live) override
+    {
+        *live = true;
+        return triton::client::Error::Success;
+    }
+
+    triton::client::Error is_server_ready(bool* ready) override
+    {
+        *ready = true;
+        return triton::client::Error::Success;
+    }
+
+    triton::client::Error is_model_ready(bool* ready, std::string& model_name) override
+    {
+        *ready = true;
+        return triton::client::Error::Success;
+    }
+
+    triton::client::Error model_config(std::string* model_config, std::string& model_name) override
+    {
+        *model_config = R"({
+            "max_batch_size": 100
+        })";
+
+        return triton::client::Error::Success;
+    }
+
+    triton::client::Error model_metadata(std::string* model_metadata, std::string& model_name) override
+    {
+        *model_metadata = R"({
+            "inputs":[
+                {
+                    "name":"seq_ids",
+                    "shape": [0, 1],
+                    "datatype":"INT32"
+                }
+            ],
+            "outputs":[
+                {
+                    "name":"seq_ids",
+                    "shape": [0, 1],
+                    "datatype":"INT32"
+                }
+            ]})";
+
+        return triton::client::Error::Success;
+    }
+
+    triton::client::Error async_infer(triton::client::InferenceServerHttpClient::OnCompleteFn callback,
+                                      const triton::client::InferOptions& options,
+                                      const std::vector<morpheus::TritonInferInput>& inputs,
+                                      const std::vector<morpheus::TritonInferRequestedOutput>& outputs) override
+    {
+        callback(new FakeInferResult({{"seq_ids", std::vector<int32_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})}}));
+
+        return triton::client::Error::Success;
+    }
+};
+
+class ErrorProneTritonClient : public FakeTritonClient
 {
   private:
     bool m_is_server_live_has_errored  = false;
@@ -145,7 +214,7 @@ class FakeTritonClient : public morpheus::ITritonClient
             m_is_server_live = true;
         }
 
-        return triton::client::Error::Success;
+        return FakeTritonClient::is_server_live(live);
     }
 
     triton::client::Error is_server_ready(bool* ready) override
@@ -192,11 +261,7 @@ class FakeTritonClient : public morpheus::ITritonClient
             return triton::client::Error("model_config error");
         }
 
-        *model_config = R"({
-            "max_batch_size": 100
-        })";
-
-        return triton::client::Error::Success;
+        return FakeTritonClient::model_config(model_config, model_name);
     }
 
     triton::client::Error model_metadata(std::string* model_metadata, std::string& model_name) override
@@ -207,23 +272,7 @@ class FakeTritonClient : public morpheus::ITritonClient
             return triton::client::Error("model_metadata error");
         }
 
-        *model_metadata = R"({
-            "inputs":[
-                {
-                    "name":"seq_ids",
-                    "shape": [0, 1],
-                    "datatype":"INT32"
-                }
-            ],
-            "outputs":[
-                {
-                    "name":"seq_ids",
-                    "shape": [0, 1],
-                    "datatype":"INT32"
-                }
-            ]})";
-
-        return triton::client::Error::Success;
+        return FakeTritonClient::model_metadata(model_metadata, model_name);
     }
 
     triton::client::Error async_infer(triton::client::InferenceServerHttpClient::OnCompleteFn callback,
@@ -237,9 +286,7 @@ class FakeTritonClient : public morpheus::ITritonClient
             return triton::client::Error("async_infer error");
         }
 
-        callback(new FakeInferResult({{"seq_ids", std::vector<int32_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})}}));
-
-        return triton::client::Error::Success;
+        return FakeTritonClient::async_infer(callback, options, inputs, outputs);
     }
 };
 
@@ -307,8 +354,9 @@ TEST_F(TestTritonInferenceStage, SingleRow)
     auto message = std::make_shared<morpheus::MultiInferenceMessage>(meta, 0, count, memory);
 
     // create the fake triton client used for testing.
-    auto triton_client           = std::make_unique<FakeTritonClient>();
-    auto triton_inference_client = std::make_unique<morpheus::TritonInferenceClient>(std::move(triton_client), "");
+    auto triton_client = std::make_unique<ErrorProneTritonClient>();
+    auto triton_inference_client =
+        std::make_unique<morpheus::TritonInferenceClient>(std::move(triton_client), "", true);
     auto stage = morpheus::InferenceClientStage<morpheus::MultiInferenceMessage, morpheus::MultiResponseMessage>(
         std::move(triton_inference_client), "", false, {}, {});
 
@@ -342,3 +390,90 @@ TEST_F(TestTritonInferenceStage, SingleRow)
 
     ASSERT_EQ(results.size(), 1);
 }
+
+TEST_F(TestTritonInferenceStage, ForceConvert)
+{
+    using namespace morpheus;
+    const TypeId model_type = TypeId::INT32;
+    const std::size_t count = 10;
+
+    std::vector<TypeId> test_types = {TypeId::INT8,
+                                      TypeId::INT16,
+                                      TypeId::INT32,
+                                      TypeId::INT64,
+                                      TypeId::UINT8,
+                                      TypeId::UINT16,
+                                      TypeId::UINT32,
+                                      TypeId::UINT64};
+
+    for (const auto type_id : test_types)
+    {
+        for (bool force_convert_inputs : {true, false})
+        {
+            const bool expect_throw = (type_id != model_type) && !force_convert_inputs;
+            const auto dtype        = DType(type_id);
+
+            DVLOG(10) << "Testing type: " << dtype.name() << " with force_convert_inputs: " << force_convert_inputs
+                      << " and expect_throw: " << expect_throw;
+
+            // Create a seq_id tensor
+            auto md =
+                std::make_shared<MemoryDescriptor>(rmm::cuda_stream_per_thread, rmm::mr::get_current_device_resource());
+            auto seq_ids_buffer = MatxUtil::create_seq_ids(count, 1, type_id, md);
+
+            auto tensors = TensorMap();
+            tensors["seq_ids"].swap(Tensor::create(seq_ids_buffer, dtype, {count, 3}, {}));
+
+            // create the MultiInferenceMessage using the sequence id tensor.
+            auto memory  = std::make_shared<morpheus::TensorMemory>(count, std::move(tensors));
+            auto table   = create_test_table_with_metadata(count);
+            auto meta    = morpheus::MessageMeta::create_from_cpp(std::move(table), 1);
+            auto message = std::make_shared<morpheus::MultiInferenceMessage>(meta, 0, count, memory);
+
+            // create the fake triton client used for testing.
+            auto triton_client = std::make_unique<FakeTritonClient>();
+            auto triton_inference_client =
+                std::make_unique<morpheus::TritonInferenceClient>(std::move(triton_client), "", force_convert_inputs);
+            auto stage =
+                morpheus::InferenceClientStage<morpheus::MultiInferenceMessage, morpheus::MultiResponseMessage>(
+                    std::move(triton_inference_client), "", false, {}, {});
+
+            // manually invoke the stage and iterate through the inference responses
+            auto on           = std::make_shared<mrc::coroutines::TestScheduler>();
+            auto results_task = [](auto& stage, auto message, auto on)
+                -> mrc::coroutines::Task<std::vector<std::shared_ptr<morpheus::MultiResponseMessage>>> {
+                std::vector<std::shared_ptr<morpheus::MultiResponseMessage>> results;
+
+                auto responses_generator = stage.on_data(std::move(message), on);
+
+                auto iter = co_await responses_generator.begin();
+
+                while (iter != responses_generator.end())
+                {
+                    results.emplace_back(std::move(*iter));
+
+                    co_await ++iter;
+                }
+
+                co_return results;
+            }(stage, message, on);
+
+            results_task.resume();
+
+            while (on->resume_next()) {}
+
+            if (expect_throw)
+            {
+                ASSERT_THROW(results_task.promise().result(), std::invalid_argument);
+            }
+            else
+            {
+                ASSERT_NO_THROW(results_task.promise().result());
+
+                auto results = results_task.promise().result();
+
+                ASSERT_EQ(results.size(), 1);
+            }
+        }
+    }
+}
diff --git a/morpheus/common/__init__.py b/morpheus/common/__init__.py
index 01b1d97ba0..3170b82e66 100644
--- a/morpheus/common/__init__.py
+++ b/morpheus/common/__init__.py
@@ -23,6 +23,7 @@
 from morpheus._lib.common import TypeId
 from morpheus._lib.common import determine_file_type
 from morpheus._lib.common import read_file_to_df
+from morpheus._lib.common import typeid_is_fully_supported
 from morpheus._lib.common import typeid_to_numpy_str
 from morpheus._lib.common import write_df_to_file
 
@@ -34,6 +35,7 @@
     "HttpServer",
     "read_file_to_df",
     "Tensor",
+    "typeid_is_fully_supported",
     "typeid_to_numpy_str",
     "TypeId",
     "write_df_to_file",
diff --git a/morpheus/pipeline/preallocator_mixin.py b/morpheus/pipeline/preallocator_mixin.py
index c40ed6be04..acec20b9c7 100644
--- a/morpheus/pipeline/preallocator_mixin.py
+++ b/morpheus/pipeline/preallocator_mixin.py
@@ -26,6 +26,7 @@
 import cudf
 
 from morpheus.common import TypeId
+from morpheus.common import typeid_is_fully_supported
 from morpheus.common import typeid_to_numpy_str
 from morpheus.config import CppConfig
 from morpheus.messages import ControlMessage
@@ -90,6 +91,13 @@ def _preallocate_control(self, msg: ControlMessage) -> ControlMessage:
         self._preallocate_meta(msg.payload())
         return msg
 
+    def _all_types_supported_in_cpp(self) -> bool:
+        for column_type in self._needed_columns.values():
+            if not typeid_is_fully_supported(column_type):
+                return False
+
+        return True
+
     def _post_build_single(self, builder: mrc.Builder, out_node: mrc.SegmentObject) -> mrc.SegmentObject:
         out_type = self.output_ports[0].output_type
         pretty_type = pretty_print_type_name(out_type)
@@ -99,7 +107,7 @@ def _post_build_single(self, builder: mrc.Builder, out_node: mrc.SegmentObject)
 
             if issubclass(out_type, (ControlMessage, MessageMeta, MultiMessage)):
                 # Intentionally not using `_build_cpp_node` because `LinearBoundaryIngressStage` lacks a C++ impl
-                if CppConfig.get_should_use_cpp():
+                if CppConfig.get_should_use_cpp() and self._all_types_supported_in_cpp():
                     import morpheus._lib.stages as _stages
                     needed_columns = list(self._needed_columns.items())
                     if issubclass(out_type, ControlMessage):
diff --git a/morpheus/stages/inference/triton_inference_stage.py b/morpheus/stages/inference/triton_inference_stage.py
index 0b8a79dddf..c46cdcab48 100644
--- a/morpheus/stages/inference/triton_inference_stage.py
+++ b/morpheus/stages/inference/triton_inference_stage.py
@@ -756,7 +756,14 @@ def __init__(self,
 
     def supports_cpp_node(self) -> bool:
         # Get the value from the worker class
-        return TritonInferenceWorker.supports_cpp_node()
+        if TritonInferenceWorker.supports_cpp_node():
+            if not self._use_shared_memory:
+                return True
+
+            logger.warning("The C++ implementation of TritonInferenceStage does not support the use_shared_memory "
+                           "option. Falling back to Python implementation.")
+
+        return False
 
     def _get_inference_worker(self, inf_queue: ProducerConsumerQueue) -> TritonInferenceWorker:
         """
@@ -781,6 +788,7 @@ def _get_cpp_inference_node(self, builder: mrc.Builder) -> mrc.SegmentObject:
                                                   self._server_url,
                                                   self._model_name,
                                                   self._needs_logits,
+                                                  self._force_convert_inputs,
                                                   self._input_mapping,
                                                   self._output_mapping)
 
@@ -789,6 +797,7 @@ def _get_cpp_inference_node(self, builder: mrc.Builder) -> mrc.SegmentObject:
                                               self._server_url,
                                               self._model_name,
                                               self._needs_logits,
+                                              self._force_convert_inputs,
                                               self._input_mapping,
                                               self._output_mapping)
 

From fefa1cc5319217daf058e6bcd4a4ca2850cdba33 Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Thu, 2 May 2024 22:46:06 -0700
Subject: [PATCH 20/38] Update dfp-model-card.md

Updating to fields and presentation of fields for Model Card++ 3.0 Release.
---
 models/model-cards/dfp-model-card.md | 56 ++++------------------------
 1 file changed, 7 insertions(+), 49 deletions(-)

diff --git a/models/model-cards/dfp-model-card.md b/models/model-cards/dfp-model-card.md
index 420ceabfe0..d3d3a381f0 100644
--- a/models/model-cards/dfp-model-card.md
+++ b/models/model-cards/dfp-model-card.md
@@ -106,36 +106,6 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 
 ## Model Card ++ Bias Subcard
 
-### What is the gender balance of the model validation data?
-* Not Applicable
-
-### What is the racial/ethnicity balance of the model validation data?
-* Not Applicable
-
-### What is the age balance of the model validation data?
-* Not Applicable
-
-### What is the language balance of the model validation data?
-* English (cloudtrail logs): 100%
-
-### What is the geographic origin language balance of the model validation data?
-* Not Applicable
-
-### What is the educational background balance of the model validation data?
-* Not Applicable
-
-### What is the accent balance of the model validation data?
-* Not Applicable
-
-### What is the face/key point balance of the model validation data?
-* Not Applicable
-
-### What is the skin/tone balance of the model validation data?
-* Not Applicable
-
-### What is the religion balance of the model validation data?
-* Not Applicable
-
 ### Individuals from the following adversely impacted (protected classes) groups participate in model design and testing.
 * Not Applicable
 
@@ -147,7 +117,7 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 ### Name example applications and use cases for this model.
 * The model is primarily designed for testing purposes and serves as a small pretrained model specifically used to evaluate and validate the DFP pipeline. Its application is focused on assessing the effectiveness of the pipeline rather than being intended for broader use cases or specific applications beyond testing.
 
-### Fill in the blank for the model technique.
+### Intended Users.
 * This model is designed for developers seeking to test the DFP pipeline with a small pretrained model trained on a synthetic dataset.
 
 ### Name who is intended to benefit from this model.
@@ -157,16 +127,16 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 * The model calculates an anomaly score for each input based on the reconstruction loss obtained from the trained Autoencoder. This score represents the level of anomaly detected in the input data. Higher scores indicate a higher likelihood of anomalous behavior.
 * The model provides the reconstruction loss of each feature to facilitate further testing and debugging of the pipeline.
 
-### List the steps explaining how this model works.
+### Describe how this model works.
 * The model works by training on baseline behaviors and subsequently detecting deviations from the established baseline, triggering alerts accordingly.
 * [Training notebook](https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/training-tuning-scripts/dfp-models/hammah-20211017.ipynb)
 
-### Name the adversely impacted groups (protected classes) this has been tested to deliver comparable outcomes regardless of:
-* Not Applicable
-
 ### List the technical limitations of the model.
 * The model expects cloudtrail logs with specific features that match the training dataset. Data lacking the required features or requiring a different feature set may not be compatible with the model.
 
+### Has this been verified to have met prescribed NVIDIA quality standards?
+* Yes
+
 ### What performance metrics were used to affirm the model's performance?
 * The model's performance was evaluated based on its ability to correctly identify anomalous behavior in the synthetic dataset during testing.
 
@@ -181,10 +151,7 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 ### Link the location of the training dataset's repository (if able to share).
 * https://github.com/nv-morpheus/Morpheus/tree/branch-24.06/models/datasets/training-data/cloudtrail
 
-### Is the model used in an application with physical safety impact?
-* No
-
-### Describe physical safety impact (if present).
+### Describe the life critical impact (if present).
 * None
 
 ### Was model and dataset assessed for vulnerability for potential form of attack?
@@ -196,12 +163,6 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 ### Name use case restrictions for the model.
 * The model's use case is restricted to testing the Morpheus pipeline and may not be suitable for other applications.
 
-### Has this been verified to have met prescribed quality standards?
-* No
-
-### Name target quality Key Performance Indicators (KPIs) for which this has been tested.
-* None
-
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
 * No
 
@@ -236,10 +197,7 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 ### If PII collected for the development of this AI model, was it minimized to only what was required?
 * Not Applicable (no PII collected)
 
-### Is data in dataset traceable?
-* No
-
-### Are we able to identify and trace source of dataset?
+### Is there data provenance?
 * Yes ([fully synthetic dataset](https://github.com/nv-morpheus/Morpheus/tree/branch-24.06/models/datasets/training-data/cloudtrail))
 
 ### Does data labeling (annotation, metadata) comply with privacy laws?

From 33599b8077424ae8ddce03d228fb74dfb93cc58e Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Thu, 2 May 2024 22:46:17 -0700
Subject: [PATCH 21/38] Update root-cause-analysis-model-card.md

Updating to fields and presentation of fields for Model Card++ 3.0 Release.
---
 .../root-cause-analysis-model-card.md         | 80 ++-----------------
 1 file changed, 8 insertions(+), 72 deletions(-)

diff --git a/models/model-cards/root-cause-analysis-model-card.md b/models/model-cards/root-cause-analysis-model-card.md
index 0f6a332f52..bd8c301faf 100644
--- a/models/model-cards/root-cause-analysis-model-card.md
+++ b/models/model-cards/root-cause-analysis-model-card.md
@@ -21,63 +21,49 @@ limitations under the License.
 # Model Overview
 
 ## Description:
-
 * Root cause analysis is a binary classifier differentiating between ordinary logs and errors/problems/root causes in the log files. <br>
 
 ## References(s):
-
 * Devlin J. et al. (2018), BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding https://arxiv.org/abs/1810.04805 <br> 
 
 ## Model Architecture: 
-
 **Architecture Type:** 
-
 * Transformers <br>
 
 **Network Architecture:** 
-
 * BERT <br>
 
 ## Input: (Enter "None" As Needed)
 
 **Input Format:** 
-
 * CSV <br>
 
 **Input Parameters:** 
-
 * kern.log file contents <br>
 
 **Other Properties Related to Output:** 
-
 * N/A <br>
 
 ## Output: (Enter "None" As Needed)
 
 **Output Format:** 
-
 * Binary Results, Root Cause or Ordinary <br>
 
 **Output Parameters:** 
-
 * N/A <br>
 
 **Other Properties Related to Output:** 
-
 * N/A <br> 
 
 ## Software Integration:
 
 **Runtime(s):** 
-
 * Morpheus  <br>
 
 **Supported Hardware Platform(s):** <br>
-
 * Ampere/Turing <br>
 
 **Supported Operating System(s):** <br>
-
 * Linux <br>
 
 ## Model Version(s): 
@@ -88,67 +74,31 @@ limitations under the License.
 ## Training Dataset:
 
 **Link:** 
-
 * https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/datasets/training-data/root-cause-training-data.csv <br>
 
 **Properties (Quantity, Dataset Descriptions, Sensor(s)):** 
-
 * kern.log files from DGX machines <br>
 
 ## Evaluation Dataset:
 
 **Link:** 
-
 * https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/datasets/validation-data/root-cause-validation-data-input.jsonlines  <br>
 
 **Properties (Quantity, Dataset Descriptions, Sensor(s)):** 
-
 * kern.log files from DGX machines <br>
 
 ## Inference:
 
 **Engine:** 
-
 * Triton <br>
 
 **Test Hardware:** <br>
-
 * Other  <br>
 
 # Subcards
 
 ## Model Card ++ Bias Subcard
 
-### What is the gender balance of the model validation data?  
-* Not Applicable
-
-### What is the racial/ethnicity balance of the model validation data?
-* Not Applicable
-
-### What is the age balance of the model validation data?
-* Not Applicable
-
-### What is the language balance of the model validation data?
-* Not Applicable
-
-### What is the geographic origin language balance of the model validation data?
-* Not Applicable
-
-### What is the educational background balance of the model validation data?
-* Not Applicable
-
-### What is the accent balance of the model validation data?
-* Not Applicable
-
-### What is the face/key point balance of the model validation data? 
-* Not Applicable
-
-### What is the skin/tone balance of the model validation data?
-* Not Applicable
-
-### What is the religion balance of the model validation data?
-* Not Applicable
-
 ### Individuals from the following adversely impacted (protected classes) groups participate in model design and testing.
 * Not Applicable
 
@@ -160,26 +110,24 @@ limitations under the License.
 ### Name example applications and use cases for this model. 
 * The model is primarily designed for testing purposes and serves as a small pre-trained model specifically used to evaluate and validate the Root Cause Analysis pipeline. This model is an example of customized transformer-based root cause analysis. It can be used for pipeline testing purposes. It needs to be re-trained for specific root cause analysis or predictive maintenance needs with the fine-tuning scripts in the repo. The hyperparameters can be optimised to adjust to get the best results with another dataset. The aim is to get the model to predict some false positives that could be previously unknown error types. Users can use this root cause analysis approach with other log types too. If they have known failures in their logs, they can use them to train along with ordinary logs and can detect other root causes they weren't aware of before.
 
-### Fill in the blank for the model technique.
-
+### Intended Users.
 * This model is designed for developers seeking to test the root cause analysis pipeline with a small pre-trained model trained on a very small `kern.log` file from a DGX.
 
 ### Name who is intended to benefit from this model. 
-
 * The intended beneficiaries of this model are developers who aim to test the functionality of the DFP pipeline using synthetic datasets
 
 ### Describe the model output. 
 * This model output can be used as a binary result, Root cause or Ordinary 
 
-### List the steps explaining how this model works.  
+### Describe how this model works.  
 * A BERT model gets fine-tuned with the kern.log dataset and in the inference it predicts one of the binary classes. Root cause or Ordinary.
 
-### Name the adversely impacted groups (protected classes) this has been tested to deliver comparable outcomes regardless of:
-* Not Applicable
-
 ### List the technical limitations of the model. 
 * For different log types and content, different models need to be trained.
 
+### Has this been verified to have met prescribed NVIDIA quality standards?
+* Yes
+
 ### What performance metrics were used to affirm the model's performance?
 * F1
 
@@ -195,10 +143,7 @@ limitations under the License.
 ### Link the location of the training dataset's repository.
 * https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/datasets/training-data/root-cause-training-data.csv
 
-### Is the model used in an application with physical safety impact?
-* No
-
-### Describe physical safety impact (if present).
+### Describe the life critical impact (if present).
 * None
 
 ### Was model and dataset assessed for vulnerability for potential form of attack?
@@ -210,12 +155,6 @@ limitations under the License.
 ### Name use case restrictions for the model.
 * Different models need to be trained depending on the log types.
 
-### Has this been verified to have met prescribed quality standards?
-* No
-
-### Name target quality Key Performance Indicators (KPIs) for which this has been tested.  
-* N/A
-
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
 * No
 
@@ -232,7 +171,7 @@ limitations under the License.
 
 
 ### Generatable or reverse engineerable personally-identifiable information (PII)?
-* Neither
+* None
 
 ### Was consent obtained for any PII used?
 * N/A
@@ -249,12 +188,9 @@ limitations under the License.
 ### If PII collected for the development of this AI model, was it minimized to only what was required? 
 * N/A
 
-### Is data in dataset traceable?
+### Is there data provenance?
 * Original raw logs are not saved. The small sample in the repo is saved for testing the pipeline. 
 
-### Are we able to identify and trace source of dataset?
-* N/A
-
 ### Does data labeling (annotation, metadata) comply with privacy laws?
 * N/A
 

From eb8036f0577ec8aa4aa5579fd886162b7b4624bc Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Mon, 6 May 2024 16:45:11 -0700
Subject: [PATCH 22/38] Update abp-model-card.md

Adding "### Describe access restrictions

* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to."
---
 models/model-cards/abp-model-card.md | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/models/model-cards/abp-model-card.md b/models/model-cards/abp-model-card.md
index 3f9043db86..f7e49eed37 100644
--- a/models/model-cards/abp-model-card.md
+++ b/models/model-cards/abp-model-card.md
@@ -210,13 +210,9 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 
 * No
 
-### Are there explicit model and dataset restrictions?
+### Describe access restrictions
 
-* No
-
-### Are there access restrictions to systems, model, and data?
-
-* No
+* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to.
 
 ### Is there a digital signature?
 

From c010017b8a217ca6738704221100bc61cbd5fcf3 Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Mon, 6 May 2024 16:45:32 -0700
Subject: [PATCH 23/38] Update dfp-model-card.md

### Describe access restrictions

* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to."
---
 models/model-cards/dfp-model-card.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/models/model-cards/dfp-model-card.md b/models/model-cards/dfp-model-card.md
index 420ceabfe0..a049cf18fa 100644
--- a/models/model-cards/dfp-model-card.md
+++ b/models/model-cards/dfp-model-card.md
@@ -205,11 +205,9 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
 * No
 
-### Are there explicit model and dataset restrictions?
-* No
+### Describe access restrictions
 
-### Are there access restrictions to systems, model, and data?
-* No
+* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to.
 
 ### Is there a digital signature?
 * No

From 9108041fbbf51c3ee790de76b547a8e19cf0cf6c Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Mon, 6 May 2024 16:45:59 -0700
Subject: [PATCH 24/38] Update gnn-fsi-model-card.md

### Describe access restrictions

* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to."
---
 models/model-cards/gnn-fsi-model-card.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/models/model-cards/gnn-fsi-model-card.md b/models/model-cards/gnn-fsi-model-card.md
index ae76cd8edd..84ce630c55 100644
--- a/models/model-cards/gnn-fsi-model-card.md
+++ b/models/model-cards/gnn-fsi-model-card.md
@@ -169,11 +169,9 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
 * Not Applicable
 
-### Are there explicit model and dataset restrictions?
-* No
+### Describe access restrictions
 
-### Are there access restrictions to systems, model, and data?
-* No
+* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to.
 
 ### Is there a digital signature?
 * No

From 4e962d0b21cb4a712744e2ed052f065c3e848642 Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Mon, 6 May 2024 16:46:20 -0700
Subject: [PATCH 25/38] Update phishing-model-card.md

### Describe access restrictions

* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to."
---
 models/model-cards/phishing-model-card.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/models/model-cards/phishing-model-card.md b/models/model-cards/phishing-model-card.md
index 7699c256b2..a902a3fde5 100644
--- a/models/model-cards/phishing-model-card.md
+++ b/models/model-cards/phishing-model-card.md
@@ -204,11 +204,9 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
 * No
 
-### Are there explicit model and dataset restrictions?
-* No
+### Describe access restrictions
 
-### Are there access restrictions to systems, model, and data?
-* No
+* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to.
 
 ### Is there a digital signature?
 

From 4f2b6c88f645ccb89b4531546ee0d8e0e4611a2b Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Mon, 6 May 2024 16:46:50 -0700
Subject: [PATCH 26/38] Update root-cause-analysis-model-card.md

### Describe access restrictions

* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to."
---
 models/model-cards/root-cause-analysis-model-card.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/models/model-cards/root-cause-analysis-model-card.md b/models/model-cards/root-cause-analysis-model-card.md
index bd8c301faf..064019756b 100644
--- a/models/model-cards/root-cause-analysis-model-card.md
+++ b/models/model-cards/root-cause-analysis-model-card.md
@@ -158,11 +158,9 @@ limitations under the License.
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
 * No
 
-### Are there explicit model and dataset restrictions?
-* It is for pipeline testing purposes.
+### Describe access restrictions
 
-### Are there access restrictions to systems, model, and data?
-* No
+* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development. Restrictions enforce dataset access during training, and dataset license constraints adhered to.
 
 ### Is there a digital signature?
 * No

From c88527bee55a4e6b634e9a553d4a581a128e05a5 Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Tue, 7 May 2024 07:37:51 -0700
Subject: [PATCH 27/38] Update dfp-model-card.md (#1644)

Closes

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - https://github.com/HesAnEasyCoder

Approvers:
  - Devin Robison (https://github.com/drobison00)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1644
---
 models/model-cards/dfp-model-card.md | 81 +++++++---------------------
 1 file changed, 20 insertions(+), 61 deletions(-)

diff --git a/models/model-cards/dfp-model-card.md b/models/model-cards/dfp-model-card.md
index 420ceabfe0..1839daba05 100644
--- a/models/model-cards/dfp-model-card.md
+++ b/models/model-cards/dfp-model-card.md
@@ -45,13 +45,11 @@ The model architecture consists of an Autoencoder, where the reconstruction loss
 
 ## Output:
 **Output Format:**
-* Anomaly score and the reconstruction loss for each feature in a pandas dataframe
+* Anomaly score (per feature)
+* Reconstruction loss (per feature)
 
 **Output Parameters:**
-* None
-
-**Other Properties Related to Output:**
-* Not Applicable
+* Pandas Dataframe
 
 ## Software Integration:
 **Runtime(s):**
@@ -102,45 +100,21 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 **Test Hardware:**
 * Other
 
+## Ethical Considerations (For NVIDIA Models Only):
+NVIDIA believes Trustworthy AI is a shared responsibility and we have established policies and practices to enable development for a wide array of AI applications.  When downloaded or used in accordance with our terms of service, developers should work with their internal model team to ensure this model meets requirements for the relevant industry and use case and addresses unforeseen product misuse.  For more detailed information on ethical considerations for this model, please see the Model Card++ Explainability, Bias, Safety & Security, and Privacy Subcard
+
 # Subcards
 
 ## Model Card ++ Bias Subcard
 
-### What is the gender balance of the model validation data?
-* Not Applicable
-
-### What is the racial/ethnicity balance of the model validation data?
-* Not Applicable
-
-### What is the age balance of the model validation data?
-* Not Applicable
-
 ### What is the language balance of the model validation data?
 * English (cloudtrail logs): 100%
 
-### What is the geographic origin language balance of the model validation data?
-* Not Applicable
-
-### What is the educational background balance of the model validation data?
-* Not Applicable
-
-### What is the accent balance of the model validation data?
-* Not Applicable
-
-### What is the face/key point balance of the model validation data?
-* Not Applicable
-
-### What is the skin/tone balance of the model validation data?
-* Not Applicable
-
-### What is the religion balance of the model validation data?
-* Not Applicable
-
 ### Individuals from the following adversely impacted (protected classes) groups participate in model design and testing.
-* Not Applicable
+* None of the Above. 
 
 ### Describe measures taken to mitigate against unwanted bias.
-* Not Applicable
+* None of the Above. 
 
 ## Model Card ++ Explainability Subcard
 
@@ -161,12 +135,12 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 * The model works by training on baseline behaviors and subsequently detecting deviations from the established baseline, triggering alerts accordingly.
 * [Training notebook](https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/training-tuning-scripts/dfp-models/hammah-20211017.ipynb)
 
-### Name the adversely impacted groups (protected classes) this has been tested to deliver comparable outcomes regardless of:
-* Not Applicable
-
 ### List the technical limitations of the model.
 * The model expects cloudtrail logs with specific features that match the training dataset. Data lacking the required features or requiring a different feature set may not be compatible with the model.
 
+### Has this been verified to have met prescribed quality standards?
+* Yes
+
 ### What performance metrics were used to affirm the model's performance?
 * The model's performance was evaluated based on its ability to correctly identify anomalous behavior in the synthetic dataset during testing.
 
@@ -181,10 +155,7 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 ### Link the location of the training dataset's repository (if able to share).
 * https://github.com/nv-morpheus/Morpheus/tree/branch-24.06/models/datasets/training-data/cloudtrail
 
-### Is the model used in an application with physical safety impact?
-* No
-
-### Describe physical safety impact (if present).
+### Describe the life critical impact (if present).
 * None
 
 ### Was model and dataset assessed for vulnerability for potential form of attack?
@@ -196,30 +167,18 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 ### Name use case restrictions for the model.
 * The model's use case is restricted to testing the Morpheus pipeline and may not be suitable for other applications.
 
-### Has this been verified to have met prescribed quality standards?
-* No
-
-### Name target quality Key Performance Indicators (KPIs) for which this has been tested.
-* None
-
-### Is the model and dataset compliant with National Classification Management Society (NCMS)?
-* No
-
-### Are there explicit model and dataset restrictions?
-* No
+### Name explicit model and/or dataset restrictions.
+* The Principle of least privilege (PoLP) is applied limiting access for dataset generation and model development.
 
 ### Are there access restrictions to systems, model, and data?
 * No
 
-### Is there a digital signature?
-* No
-
 
 ## Model Card ++ Privacy Subcard
 
 
 ### Generatable or reverse engineerable personally-identifiable information (PII)?
-* Neither
+* None
 
 ### Was consent obtained for any PII used?
 * The synthetic data used in this model is generated using the [faker](https://github.com/joke2k/faker/blob/master/LICENSE.txt)  python package. The user agent field is generated by faker, which pulls items from its own dataset of fictitious values (located in the linked repo). Similarly, the event source field is randomly chosen from a list of event names provided in the AWS documentation. There are no privacy concerns or PII involved in this synthetic data generation process.
@@ -228,22 +187,22 @@ The evaluation dataset consists of AWS CloudTrail logs. It contains logs from tw
 * Not applicable
 
 ### How often is dataset reviewed?
-* The dataset is initially reviewed upon addition, and subsequent reviews are conducted as needed or upon request for any changes.
+* The dataset is initially reviewed upon addition, and subsequent reviews are conducted as needed or upon request for changes.
 
 ### Is a mechanism in place to honor data subject right of access or deletion of personal data?
-* No (as the dataset is fully synthetic)
+* No (dataset is fully synthetic)
 
 ### If PII collected for the development of this AI model, was it minimized to only what was required?
 * Not Applicable (no PII collected)
 
-### Is data in dataset traceable?
+### Is there data provenance?
 * No
 
 ### Are we able to identify and trace source of dataset?
 * Yes ([fully synthetic dataset](https://github.com/nv-morpheus/Morpheus/tree/branch-24.06/models/datasets/training-data/cloudtrail))
 
 ### Does data labeling (annotation, metadata) comply with privacy laws?
-* Not applicable (as the dataset is fully synthetic)
+* Not applicable (dataset is fully synthetic)
 
 ### Is data compliant with data subject requests for data correction or removal, if such a request was made?
-* Not applicable (as the dataset is fully synthetic)
+* Not applicable (dataset is fully synthetic)

From 4c3fc5d99d269f41458d045bc5d935945a1deac3 Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Wed, 8 May 2024 12:04:27 -0700
Subject: [PATCH 28/38] Update phishing-model-card.md (#1680)

Closes

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - https://github.com/HesAnEasyCoder

Approvers:
  - Devin Robison (https://github.com/drobison00)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1680
---
 models/model-cards/phishing-model-card.md | 48 ++---------------------
 1 file changed, 4 insertions(+), 44 deletions(-)

diff --git a/models/model-cards/phishing-model-card.md b/models/model-cards/phishing-model-card.md
index 7699c256b2..6cb445b8cc 100644
--- a/models/model-cards/phishing-model-card.md
+++ b/models/model-cards/phishing-model-card.md
@@ -28,60 +28,47 @@ limitations under the License.
 * Devlin J. et al. (2018), BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding https://arxiv.org/abs/1810.04805 <br> 
 
 ## Model Architecture: 
-
 **Architecture Type:** 
-
 * Transformers <br>
 
 **Network Architecture:** 
-
 * BERT <br>
 
 ## Input: (Enter "None" As Needed)
 
 **Input Format:** 
-
 * Evaluation script downloads the smsspamcollection.zip and extract tabular information into a dataframe <br>
 
 **Input Parameters:** 
-
 * SMS/emails <br>
 
 **Other Properties Related to Output:** 
-
 * N/A <br>
 
 ## Output: (Enter "None" As Needed)
 
 **Output Format:** 
-
 * Binary Results, Fraudulent or Benign <br>
 
 **Output Parameters:** 
-
 * N/A <br>
 
 **Other Properties Related to Output:** 
-
 * N/A <br> 
 
 
 ## Software Integration:
 
 **Runtime(s):** 
-
 * Morpheus  <br>
 
 **Supported Hardware Platform(s):** <br>
-
 * Ampere/Turing <br>
 
 **Supported Operating System(s):** <br>
-
 * Linux <br>
 
 ## Model Version(s): 
-
 * v1  <br>
 
 # Training & Evaluation: 
@@ -89,31 +76,25 @@ limitations under the License.
 ## Training Dataset:
 
 **Link:**  
-
 * http://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip <br>
 
 **Properties (Quantity, Dataset Descriptions, Sensor(s)):** 
-
 * Dataset consists of SMSs <br>
 
 ## Evaluation Dataset:
 
 **Link:** 
-
 * https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/datasets/validation-data/phishing-email-validation-data.jsonlines  <br>
 
 **Properties (Quantity, Dataset Descriptions, Sensor(s)):** 
-
 * Dataset consists of SMSs <br>
 
 ## Inference:
 
 **Engine:** 
-
 * Triton <br>
 
 **Test Hardware:** <br>
-
 * DGX (V100) <br>
 
 ## Ethical Considerations:
@@ -124,19 +105,15 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ## Model Card ++ Bias Subcard
 
 ### What is the language balance of the model validation data?
-
 * English
 
 ### What is the geographic origin language balance of the model validation data?
-
 * UK
 
 ### Individuals from the following adversely impacted (protected classes) groups participate in model design and testing.
-
 * Not Applicable
 
 ### Describe measures taken to mitigate against unwanted bias.
-
 * Not Applicable
 
 ## Model Card ++ Explainability Subcard
@@ -144,28 +121,22 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ### Name example applications and use cases for this model. 
 * The model is primarily designed for testing purposes and serves as a small pre-trained model specifically used to evaluate and validate the phishing detection pipeline. Its application is focused on assessing the effectiveness of the pipeline rather than being intended for broader use cases or specific applications beyond testing.
 
-### Fill in the blank for the model technique.
-
+### Intended Users.
 * This model is designed for developers seeking to test the phishing detection pipeline with a small pre-trained model.
 
 ### Name who is intended to benefit from this model. 
-
 * The intended beneficiaries of this model are developers who aim to test the performance and functionality of the phishing pipeline using synthetic datasets. It may not be suitable or provide significant value for real-world phishing messages. 
 
 ### Describe the model output. 
 * This model output can be used as a binary result, Phishing/Spam or Benign 
 
-### List the steps explaining how this model works.  
+### Describe how this model works.
 * A BERT model gets fine-tuned with the dataset and in the inference it predicts one of the binary classes. Phishing/Spam or Benign.
 
-### Name the adversely impacted groups (protected classes) this has been tested to deliver comparable outcomes regardless of:
-* Not Applicable
-
 ### List the technical limitations of the model. 
 * For different email/SMS types and content, different models need to be trained.
 
 ### Has this been verified to have met prescribed NVIDIA standards?
-
 * Yes
 
 ### What performance metrics were used to affirm the model's performance?
@@ -182,25 +153,18 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ### Link the location of the training dataset's repository.
 * http://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip
 
-### Is the model used in an application with physical safety impact?
-* No
-
-### Describe life-critical impact (if present).
+### Describe the life critical impact (if present).
 * None
 
 ### Was model and dataset assessed for vulnerability for potential form of attack?
 * No
 
 ### Name applications for the model.
-
 * The primary application for this model is testing the Morpheus phishing detection pipeline
 
 ### Name use case restrictions for the model.
 * This pretrained model's use case is restricted to testing the Morpheus pipeline and may not be suitable for other applications.
 
-### Name target quality Key Performance Indicators (KPIs) for which this has been tested.  
-* N/A
-
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
 * No
 
@@ -230,16 +194,12 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 * Unknown
 
 ### Is a mechanism in place to honor data subject right of access or deletion of personal data?
-
 * N/A
 
 ### If PII collected for the development of this AI model, was it minimized to only what was required? 
 * N/A
 
-### Is data in dataset traceable?
-* N/A
-
-### Are we able to identify and trace source of dataset?
+### Is there data provenance?
 * N/A
 
 ### Does data labeling (annotation, metadata) comply with privacy laws?

From a5a68413cad58c1d2264ca34eb3fa6f70ac32128 Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Wed, 8 May 2024 12:05:03 -0700
Subject: [PATCH 29/38] Update gnn-fsi-model-card.md (#1681)

Closes

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - https://github.com/HesAnEasyCoder

Approvers:
  - Devin Robison (https://github.com/drobison00)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1681
---
 models/model-cards/gnn-fsi-model-card.md | 27 ++++++------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/models/model-cards/gnn-fsi-model-card.md b/models/model-cards/gnn-fsi-model-card.md
index ae76cd8edd..92dede0b0e 100644
--- a/models/model-cards/gnn-fsi-model-card.md
+++ b/models/model-cards/gnn-fsi-model-card.md
@@ -115,23 +115,19 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ### Fill in the blank for the model technique.
 * This model is designed for developers seeking to test the GNN fraud detection pipeline with a small pretrained model on a synthetic dataset.
 
-### Name who is intended to benefit from this model. 
+### Intended Users.
 * The intended beneficiaries of this model are developers who aim to test the performance and functionality of the GNN fraud detection pipeline using synthetic datasets. It may not be suitable or provide significant value for real-world transactions. 
 
 ### Describe the model output.
 * This model outputs fraud probability score b/n (0 & 1). 
 
-### List the steps explaining how this model works. (e.g., )  
-* The model uses a bipartite heterogeneous graph representation as input for `GraphSAGE` for feature learning and `XGBoost` as a classifier. Since the input graph is heterogeneous, a heterogeneous implementation of `GraphSAGE` (HinSAGE) is used for feature embedding.<br>
-
-### Name the adversely impacted groups (protected classes) this has been tested to deliver comparable outcomes regardless of:
-* Not Applicable
+### Describe how this model works.  
+* The model uses a bipartite heterogeneous graph representation as input for `GraphSAGE` for feature learning and `XGBoost` as a classifier. Since the input graph is heterogeneous, a heterogeneous implementation of `GraphSAGE` (HinSAGE) is used for feature embedding.
 
 ### List the technical limitations of the model.
 * This model version requires a transactional data schema with entities (user, merchant, transaction) as requirement for the model.
 
 ### Has this been verified to have met prescribed NVIDIA standards?
-
 * Yes
 
 ### What performance metrics were used to affirm the model's performance?
@@ -148,11 +144,8 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ### Link the location of the training dataset's repository (if able to share).
 * [training dataset](models/datasets/training-data/fraud-detection-training-data.csv)
 
-### Is the model used in an application with physical safety impact?
-* No
-
-### Describe life-critical impact (if present).
-* Not Applicable
+### Describe the life critical impact (if present).
+* None
 
 ### Was model and dataset assessed for vulnerability for potential form of attack?
 * No
@@ -163,9 +156,6 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ### Name use case restrictions for the model.
 * The model's use case is restricted to testing the Morpheus pipeline and may not be suitable for other applications.
 
-### Name target quality Key Performance Indicators (KPIs) for which this has been tested.  
-* Not Applicable
-
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
 * Not Applicable
 
@@ -192,16 +182,13 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ### How often is dataset reviewed?
 * The dataset is initially reviewed upon addition, and subsequent reviews are conducted as needed or upon request for any changes.
 
-### Is a mechanism in place to honor data
+### Is a mechanism in place to honor data subject right of access or deletion of personal data?
 * Yes
 
 ### If PII collected for the development of this AI model, was it minimized to only what was required? 
 * Not applicable
 
-### Is data in dataset traceable?
-* No
-
-### Are we able to identify and trace source of dataset?
+### Is there data provenance?
 * Yes
 
 ### Does data labeling (annotation, metadata) comply with privacy laws?

From 26eb9f866926f54825c89e699086516873dbcc3d Mon Sep 17 00:00:00 2001
From: HesAnEasyCoder <105108698+HesAnEasyCoder@users.noreply.github.com>
Date: Wed, 8 May 2024 12:07:18 -0700
Subject: [PATCH 30/38] Update abp-model-card.md (#1683)

Closes

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - https://github.com/HesAnEasyCoder

Approvers:
  - Devin Robison (https://github.com/drobison00)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1683
---
 models/model-cards/abp-model-card.md | 79 +++-------------------------
 1 file changed, 6 insertions(+), 73 deletions(-)

diff --git a/models/model-cards/abp-model-card.md b/models/model-cards/abp-model-card.md
index 3f9043db86..efc48eed7c 100644
--- a/models/model-cards/abp-model-card.md
+++ b/models/model-cards/abp-model-card.md
@@ -21,31 +21,24 @@ limitations under the License.
 # Model Overview
 
 ## Description:
-
 * This model is an example of a binary XGBoost classifier to differentiate between anomalous GPU behavior, such as crypto mining / GPU malware, and non-anomalous GPU-based workflows (e.g., ML/DL training). This model is for demonstration purposes and not for production usage. <br>
 
 ## References(s):
-
 * Chen, Guestrin (2016) XGBoost. A scalable tree boosting system. https://arxiv.org/abs/1603.02754  <br> 
 
 ## Model Architecture: 
-
 **Architecture Type:** 
-
 * Gradient boosting <br>
 
 **Network Architecture:** 
-
 * XGBOOST <br>
 
 ## Input: (Enter "None" As Needed)
 
 **Input Format:** 
-
 * nvidia-smi output <br>
 
 **Input Parameters:** 
-
 * GPU statistics that are included in the nvidia-smi output <br>
 
 **Other Properties Related to Output:** N/A <br>
@@ -53,33 +46,26 @@ limitations under the License.
 ## Output: (Enter "None" As Needed)
 
 **Output Format:** 
-
 * Binary Results <br>
 
 **Output Parameters:** 
-
 * N/A <br>
 
 **Other Properties Related to Output:** 
-
 * N/A <br> 
 
 ## Software Integration:
 
 **Runtime(s):** 
-
 * Morpheus  <br>
 
 **Supported Hardware Platform(s):** <br>
-
 * Ampere/Turing <br>
 
 **Supported Operating System(s):** <br>
-
 * Linux <br>
 
 ## Model Version(s): 
-
 * v1  <br>
 
 # Training & Evaluation: 
@@ -87,31 +73,25 @@ limitations under the License.
 ## Training Dataset:
 
 **Link:** 
-
 * https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/datasets/training-data/abp-sample-nvsmi-training-data.json  <br>
 
 **Properties (Quantity, Dataset Descriptions, Sensor(s)):** 
-
 * Sample dataset consists of over 1000 nvidia-smi outputs <br>
 
 ## Evaluation Dataset:
 
 **Link:** 
-
 * https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/datasets/validation-data/abp-validation-data.jsonlines  <br>
 
 **Properties (Quantity, Dataset Descriptions, Sensor(s)):** 
-
 * Sample dataset consists of over 1000 nvidia-smi outputs <br>
 
 ## Inference:
 
 **Engine:** 
-
 * Triton <br>
 
 **Test Hardware:** <br>
-
 * DGX (V100) <br>
 
 ## Ethical Considerations:
@@ -122,146 +102,99 @@ NVIDIA believes Trustworthy AI is a shared responsibility and we have establishe
 ## Model Card ++ Bias Subcard
 
 ### Individuals from the following adversely impacted (protected classes) groups participate in model design and testing.
-
 * Not Applicable
 
 ### Describe measures taken to mitigate against unwanted bias.
-
 * Not Applicable
 
 ## Model Card ++ Explainability Subcard
 
 ### Name example applications and use cases for this model. 
-
 * The model is primarily designed for testing purposes and serves as a small model specifically used to evaluate and validate the ABP pipeline. Its application is focused on assessing the effectiveness of the pipeline rather than being intended for broader use cases or specific applications beyond testing.
 
-### Fill in the blank for the model technique.
-
+### Intended Users.
 * The model is primarily designed for testing purposes. This model is intended to be an example for developers that want to test Morpheus ABP pipeline.
 
 ### Name who is intended to benefit from this model. 
-
 * The intended beneficiaries of this model are developers who aim to test the functionality of the ABP models for detecting crypto mining.
 
 ### Describe the model output. 
-
 * This model output can be used as a binary result, Crypto mining or legitimate GPU usage. 
 
-### List the steps explaining how this model works.  
-
+### Describe how this model works.
 * nvidia-smi features are used as the input and the model predicts a label for each row 
 
-### Name the adversely impacted groups (protected classes) this has been tested to deliver comparable outcomes regardless of:
-
-* Not Applicable
-
 ### List the technical limitations of the model. 
-
 * For different GPU workloads different models need to be trained.
 
-### Has this been verified to have met prescribed NVIDIA standards?
-
+### Has this been verified to have met prescribed NVIDIA quality standards?
 * Yes
   
 ### What performance metrics were used to affirm the model's performance?
-
 * Accuracy
 
 ### What are the potential known risks to users and stakeholders?
-
 * N/A
 
 ### Link the relevant end user license agreement 
-
 * [Apache 2.0](http://www.apache.org/licenses/LICENSE-2.0)
 
 
 ## Model Card ++ Saftey & Security Subcard
 
 ### Link the location of the training dataset's repository.
-
 * https://github.com/nv-morpheus/Morpheus/blob/branch-24.06/models/datasets/training-data/abp-sample-nvsmi-training-data.json
 
-### Is the model used in an application with physical safety impact?
-
-* No
-
-### Describe life-critical impact (if present).
-
-* N/A
+### Describe the life critical impact (if present).
+* None
 
 ### Was model and dataset assessed for vulnerability for potential form of attack?
-
 * No
 
 ### Name applications for the model.
-
 * The primary application for this model is testing the Morpheus pipeline.
 
 ### Name use case restrictions for the model.
-
 * The model's use case is restricted to testing the Morpheus pipeline and may not be suitable for other applications.
 
-### Name target quality Key Performance Indicators (KPIs) for which this has been tested. 
-
-* N/A
-
 ### Is the model and dataset compliant with National Classification Management Society (NCMS)?
-
 * No
 
 ### Are there explicit model and dataset restrictions?
-
 * No
 
 ### Are there access restrictions to systems, model, and data?
-
 * No
 
 ### Is there a digital signature?
-
 * No
 
 ## Model Card ++ Privacy Subcard
 
 
 ### Generatable or reverse engineerable personally-identifiable information (PII)?
-
 * None
 
 ### Was consent obtained for any PII used?
-
 * N/A
 
 ### Protected classes used to create this model? (The following were used in model the model's training:)
-
 * N/A
   
-
 ### How often is dataset reviewed?
-
 * The dataset is initially reviewed upon addition, and subsequent reviews are conducted as needed or upon request for any changes.
 
 ### Is a mechanism in place to honor data subject right of access or deletion of personal data?
-
 * N/A
 
 ### If PII collected for the development of this AI model, was it minimized to only what was required? 
-
-* N/A
-
-### Is data in dataset traceable?
-
 * N/A
 
-### Are we able to identify and trace source of dataset?
-
+### Is there data provenance?
 * Yes
 
 ### Does data labeling (annotation, metadata) comply with privacy laws?
-
 * N/A
 
 ### Is data compliant with data subject requests for data correction or removal, if such a request was made?
-
 * N/A

From 78dab99ddceb8eab500a9abc0cd3ac605b94c4f7 Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Wed, 8 May 2024 13:22:53 -0700
Subject: [PATCH 31/38] Enable C++ mode for `abp_pcap_detection` example
 (#1687)

* Update the `pcap-preprocess` stage to ensure tensors are in row major as required by Triton.
* Update the `pcap-preprocess` stage to cast float64 data to model's expected input of float32, removing the need to specify  `force_convert_inputs`. Since this stage is specific to this pipeline & model.

Closes #1675

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1687
---
 examples/abp_pcap_detection/README.md           |  4 ++--
 .../abp_pcap_preprocessing.py                   |  3 ++-
 examples/abp_pcap_detection/run.py              | 17 +++--------------
 .../test_abp_pcap_preprocessing.py              | 12 ++++++++----
 4 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md
index 3cfae25aa9..440c3fb783 100644
--- a/examples/abp_pcap_detection/README.md
+++ b/examples/abp_pcap_detection/README.md
@@ -99,13 +99,13 @@ Alternately, the Morpheus command line could have been used to accomplish the sa
 From the root of the Morpheus repo, run:
 ```bash
 morpheus --log_level INFO --plugin "examples/abp_pcap_detection/abp_pcap_preprocessing.py" \
-    run --use_cpp False --pipeline_batch_size 100000 --model_max_batch_size 100000 \
+    run --pipeline_batch_size 100000 --model_max_batch_size 100000 \
     pipeline-fil --model_fea_length 13 --label=probs \
     from-file --filename examples/data/abp_pcap_dump.jsonlines --filter_null False \
     deserialize \
     pcap-preprocess \
     monitor --description "Preprocessing rate" \
-    inf-triton --model_name "abp-pcap-xgb" --server_url "localhost:8001" --force_convert_inputs=True \
+    inf-triton --model_name "abp-pcap-xgb" --server_url "localhost:8000" \
     monitor --description "Inference rate" --unit inf \
     add-class --label=probs \
     monitor --description "Add classification rate" --unit "add-class" \
diff --git a/examples/abp_pcap_detection/abp_pcap_preprocessing.py b/examples/abp_pcap_detection/abp_pcap_preprocessing.py
index 453dc2a419..59a8060854 100644
--- a/examples/abp_pcap_detection/abp_pcap_preprocessing.py
+++ b/examples/abp_pcap_detection/abp_pcap_preprocessing.py
@@ -170,7 +170,8 @@ def round_time_kernel(timestamp, rollup_time, secs):
         del df, grouped_df
 
         # Convert the dataframe to cupy the same way cuml does
-        data = cp.asarray(merged_df[fea_cols].to_cupy())
+        # Explicity casting to float32 to match the model's input, and setting row-major as required by Triton
+        data = cp.asarray(merged_df[fea_cols].to_cupy(), order='C', dtype=cp.float32)
         count = data.shape[0]
 
         for col in req_cols:
diff --git a/examples/abp_pcap_detection/run.py b/examples/abp_pcap_detection/run.py
index 18d5c25e5d..8937351d16 100644
--- a/examples/abp_pcap_detection/run.py
+++ b/examples/abp_pcap_detection/run.py
@@ -21,7 +21,6 @@
 from morpheus.cli.commands import FILE_TYPE_NAMES
 from morpheus.cli.utils import str_to_file_type
 from morpheus.config import Config
-from morpheus.config import CppConfig
 from morpheus.config import PipelineModes
 from morpheus.pipeline.linear_pipeline import LinearPipeline
 from morpheus.stages.general.monitor_stage import MonitorStage
@@ -87,7 +86,7 @@
     help=("Iterative mode will emit dataframes one at a time. Otherwise a list of dataframes is emitted. "
           "Iterative mode is good for interleaving source stages."),
 )
-@click.option("--server_url", required=True, help="Tritonserver url.", default="localhost:8001")
+@click.option("--server_url", required=True, help="Tritonserver url.", default="localhost:8000")
 @click.option(
     "--file_type",
     type=click.Choice(FILE_TYPE_NAMES, case_sensitive=False),
@@ -111,8 +110,6 @@ def run_pipeline(
     # Enable the default logger.
     configure_logging(log_level=logging.INFO)
 
-    CppConfig.set_should_use_cpp(False)
-
     # Its necessary to get the global config object and configure it for FIL mode.
     config = Config()
     config.mode = PipelineModes.FIL
@@ -124,8 +121,6 @@ def run_pipeline(
     config.feature_length = model_fea_length
     config.class_labels = ["probs"]
 
-    kwargs = {}
-
     # Create a linear pipeline object.
     pipeline = LinearPipeline(config)
 
@@ -154,13 +149,7 @@ def run_pipeline(
 
     # Add a inference stage.
     # This stage sends inference requests to the Tritonserver and captures the response.
-    pipeline.add_stage(
-        TritonInferenceStage(
-            config,
-            model_name=model_name,
-            server_url=server_url,
-            force_convert_inputs=True,
-        ))
+    pipeline.add_stage(TritonInferenceStage(config, model_name=model_name, server_url=server_url))
 
     # Add a monitor stage.
     # This stage logs the metrics (inf/sec) from the above stage.
@@ -176,7 +165,7 @@ def run_pipeline(
 
     # Add a serialize stage.
     # This stage includes & excludes columns from messages.
-    pipeline.add_stage(SerializeStage(config, **kwargs))
+    pipeline.add_stage(SerializeStage(config))
 
     # Add a monitor stage.
     # This stage logs the metrics (msg/sec) from the above stage.
diff --git a/tests/examples/abp_pcap_detection/test_abp_pcap_preprocessing.py b/tests/examples/abp_pcap_detection/test_abp_pcap_preprocessing.py
index 97443d65d6..90a3c067f4 100755
--- a/tests/examples/abp_pcap_detection/test_abp_pcap_preprocessing.py
+++ b/tests/examples/abp_pcap_detection/test_abp_pcap_preprocessing.py
@@ -62,6 +62,8 @@ def check_inf_message(msg: MultiInferenceFILMessage,
 
     input__0 = msg.memory.get_tensor('input__0')
     assert input__0.shape == (expected_count, expected_feature_length)
+    assert input__0.dtype == cp.float32
+    assert input__0.strides == (expected_feature_length * 4, 4)
     assert (input__0 == expected_input__0).all()
 
     seq_ids = msg.memory.get_tensor('seq_ids')
@@ -87,10 +89,12 @@ def test_abp_pcap_preprocessing(config: Config, dataset_cudf: DatasetManager,
     input_df = dataset_cudf.get_df(input_file, no_cache=True, filter_nulls=False)
 
     expected_flow_ids = input_df.src_ip + ":" + input_df.src_port + "=" + input_df.dest_ip + ":" + input_df.dest_port
-    expected_input__0 = cp.asarray(
-        np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir, 'examples/abp_pcap_detection/abp_pcap_expected_input_0.csv'),
-                   delimiter=",",
-                   skiprows=0))
+    expected_input__0 = cp.asarray(np.loadtxt(os.path.join(TEST_DIRS.tests_data_dir,
+                                                           'examples/abp_pcap_detection/abp_pcap_expected_input_0.csv'),
+                                              delimiter=",",
+                                              skiprows=0,
+                                              dtype=np.float32),
+                                   order='C')
 
     assert len(input_df) == 20
 

From bf80d93958e78c7e0312cd75ad71d1b75d0befce Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Wed, 8 May 2024 23:12:48 -0700
Subject: [PATCH 32/38] Support the filter_null parameter in the C++ impl of
 the FileSourceStage (#1689)

* Fixes bug where the `filter_null` constructor argument to the `FileSourceStage` was only implemented in the Python impl of the stage.
* Update `filter_null` feature to make the column(s) being filtered upon configurable, previously this was hard-coded to `"data"`
* Add new `get_column_names` helper method to `CuDFTableUtil`

Closes #1678

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1689
---
 .../include/morpheus/io/deserializers.hpp     |  2 +-
 .../include/morpheus/stages/file_source.hpp   | 35 +++++---
 .../include/morpheus/utilities/table_util.hpp | 28 ++++++-
 morpheus/_lib/src/io/deserializers.cpp        | 16 ++--
 morpheus/_lib/src/stages/file_source.cpp      | 41 +++++++--
 morpheus/_lib/src/utilities/table_util.cpp    | 45 +++++++++-
 morpheus/_lib/stages/__init__.pyi             |  4 +-
 morpheus/_lib/stages/module.cpp               | 24 ++++--
 morpheus/_lib/tests/CMakeLists.txt            |  6 ++
 .../_lib/tests/utilities/test_table_util.cpp  | 79 +++++++++++++++++
 morpheus/io/deserializers.py                  | 84 +++++++++++--------
 morpheus/io/utils.py                          |  6 +-
 morpheus/stages/input/file_source_stage.py    | 17 +++-
 tests/stages/test_file_source_stage_pipe.py   | 69 +++++++++++++++
 tests/tests_data/file_with_nans.csv           |  3 +
 tests/tests_data/file_with_nans.jsonlines     |  3 +
 tests/tests_data/file_with_nulls.csv          |  3 +
 tests/tests_data/file_with_nulls.jsonlines    |  3 +
 18 files changed, 390 insertions(+), 78 deletions(-)
 create mode 100644 morpheus/_lib/tests/utilities/test_table_util.cpp
 create mode 100755 tests/stages/test_file_source_stage_pipe.py
 create mode 100644 tests/tests_data/file_with_nans.csv
 create mode 100644 tests/tests_data/file_with_nans.jsonlines
 create mode 100644 tests/tests_data/file_with_nulls.csv
 create mode 100644 tests/tests_data/file_with_nulls.jsonlines

diff --git a/morpheus/_lib/include/morpheus/io/deserializers.hpp b/morpheus/_lib/include/morpheus/io/deserializers.hpp
index d98cad6e9a..569d503eb9 100644
--- a/morpheus/_lib/include/morpheus/io/deserializers.hpp
+++ b/morpheus/_lib/include/morpheus/io/deserializers.hpp
@@ -71,7 +71,7 @@ int get_index_col_count(const cudf::io::table_with_metadata& data_table);
 int prepare_df_index(cudf::io::table_with_metadata& data_table);
 
 /**
- * @brief Loads a cudf table from either CSV or JSON file returning the DataFrame as a Python object
+ * @brief Loads a cudf table from either CSV, JSON or Parquet file returning the DataFrame as a Python object
  *
  * @param filename : Name of the file that should be loaded into a table
  * @return pybind11::object
diff --git a/morpheus/_lib/include/morpheus/stages/file_source.hpp b/morpheus/_lib/include/morpheus/stages/file_source.hpp
index 6ed1ea4852..95ec2ebd64 100644
--- a/morpheus/_lib/include/morpheus/stages/file_source.hpp
+++ b/morpheus/_lib/include/morpheus/stages/file_source.hpp
@@ -31,6 +31,7 @@
 #include <optional>
 #include <string>
 #include <thread>
+#include <vector>
 
 namespace morpheus {
 /****** Component public implementations *******************/
@@ -61,13 +62,19 @@ class FileSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageM
      * @param repeat : Repeats the input dataset multiple times. Useful to extend small datasets for debugging
      * @param json_lines: Whether to force json or jsonlines parsing
      */
-    FileSourceStage(std::string filename, int repeat = 1, std::optional<bool> json_lines = std::nullopt);
+    FileSourceStage(std::string filename,
+                    int repeat                                   = 1,
+                    bool filter_null                             = true,
+                    std::vector<std::string> filter_null_columns = {},
+                    std::optional<bool> json_lines               = std::nullopt);
 
   private:
     subscriber_fn_t build();
 
     std::string m_filename;
     int m_repeat{1};
+    bool m_filter_null{true};
+    std::vector<std::string> m_filter_null_columns;
     std::optional<bool> m_json_lines;
 };
 
@@ -87,16 +94,22 @@ struct FileSourceStageInterfaceProxy
      * @param parser_kwargs : Optional arguments to pass to the file parser.
      * @return std::shared_ptr<mrc::segment::Object<FileSourceStage>>
      */
-    static std::shared_ptr<mrc::segment::Object<FileSourceStage>> init(mrc::segment::Builder& builder,
-                                                                       const std::string& name,
-                                                                       std::string filename,
-                                                                       int repeat                   = 1,
-                                                                       pybind11::dict parser_kwargs = pybind11::dict());
-    static std::shared_ptr<mrc::segment::Object<FileSourceStage>> init(mrc::segment::Builder& builder,
-                                                                       const std::string& name,
-                                                                       std::filesystem::path filename,
-                                                                       int repeat                   = 1,
-                                                                       pybind11::dict parser_kwargs = pybind11::dict());
+    static std::shared_ptr<mrc::segment::Object<FileSourceStage>> init(
+        mrc::segment::Builder& builder,
+        const std::string& name,
+        std::string filename,
+        int repeat                                   = 1,
+        bool filter_null                             = true,
+        std::vector<std::string> filter_null_columns = {},
+        pybind11::dict parser_kwargs                 = pybind11::dict());
+    static std::shared_ptr<mrc::segment::Object<FileSourceStage>> init(
+        mrc::segment::Builder& builder,
+        const std::string& name,
+        std::filesystem::path filename,
+        int repeat                                   = 1,
+        bool filter_null                             = true,
+        std::vector<std::string> filter_null_columns = {},
+        pybind11::dict parser_kwargs                 = pybind11::dict());
 };
 #pragma GCC visibility pop
 /** @} */  // end of group
diff --git a/morpheus/_lib/include/morpheus/utilities/table_util.hpp b/morpheus/_lib/include/morpheus/utilities/table_util.hpp
index b8797901ea..9cef0ee87b 100644
--- a/morpheus/_lib/include/morpheus/utilities/table_util.hpp
+++ b/morpheus/_lib/include/morpheus/utilities/table_util.hpp
@@ -15,10 +15,13 @@
  * limitations under the License.
  */
 
+#include "morpheus/export.h"  // for MORPHEUS_EXPORT
+
 #include <cudf/io/types.hpp>
 #include <cudf/table/table.hpp>  // IWYU pragma: keep
 
 #include <string>
+#include <vector>
 
 #pragma once
 
@@ -35,12 +38,33 @@ namespace morpheus {
 /**
  * @brief Structure that encapsulates cuDF table utilities.
  */
-struct CuDFTableUtil
+struct MORPHEUS_EXPORT CuDFTableUtil
 {
     /**
-     * TODO(Documentation)
+     * @brief Load a table from a file.
+     *
+     * @param filename The name of the file to load.
+     * @return cudf::io::table_with_metadata The table loaded from the file.
      */
     static cudf::io::table_with_metadata load_table(const std::string& filename);
+
+    /**
+     * @brief Get the column names from a cudf table_with_metadata.
+     *
+     * @param table The table to get the column names from.
+     * @return std::vector<std::string> The column names.
+     */
+    static std::vector<std::string> get_column_names(const cudf::io::table_with_metadata& table);
+
+    /**
+     * @brief Filters rows from a table that contain null values in a given columns.
+     * null values in columns other than those specified in `filter_columns` are not considered.
+     * Any missing columns are ignored.
+     *
+     * @param table The table to filter
+     * @param filter_columns The name of the columns to filter on
+     */
+    static void filter_null_data(cudf::io::table_with_metadata& table, const std::vector<std::string>& filter_columns);
 };
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/src/io/deserializers.cpp b/morpheus/_lib/src/io/deserializers.cpp
index 4704b1ba3d..032cffd57b 100644
--- a/morpheus/_lib/src/io/deserializers.cpp
+++ b/morpheus/_lib/src/io/deserializers.cpp
@@ -20,6 +20,7 @@
 #include "morpheus/utilities/cudf_util.hpp"  // for CudfHelper
 #include "morpheus/utilities/stage_util.hpp"
 #include "morpheus/utilities/string_util.hpp"
+#include "morpheus/utilities/table_util.hpp"  // for get_column_names
 
 #include <cudf/column/column.hpp>
 #include <cudf/io/csv.hpp>
@@ -29,8 +30,6 @@
 #include <cudf/types.hpp>        // for cudf::type_id
 #include <pybind11/pybind11.h>   // IWYU pragma: keep
 
-#include <algorithm>
-#include <iterator>
 #include <memory>
 #include <regex>
 #include <sstream>
@@ -50,7 +49,9 @@ namespace morpheus {
 
 std::vector<std::string> get_column_names_from_table(const cudf::io::table_with_metadata& table)
 {
-    return foreach_map(table.metadata.schema_info, [](auto schema) { return schema.name; });
+    return foreach_map(table.metadata.schema_info, [](auto schema) {
+        return schema.name;
+    });
 }
 
 cudf::io::table_with_metadata load_table_from_file(const std::string& filename,
@@ -69,7 +70,7 @@ cudf::io::table_with_metadata load_table_from_file(const std::string& filename,
     case FileTypes::JSON: {
         auto options =
             cudf::io::json_reader_options::builder(cudf::io::source_info{filename}).lines(json_lines.value_or(true));
-        table        = cudf::io::read_json(options.build());
+        table = cudf::io::read_json(options.build());
         break;
     }
     case FileTypes::CSV: {
@@ -106,12 +107,9 @@ pybind11::object read_file_to_df(const std::string& filename, FileTypes file_typ
 
 int get_index_col_count(const cudf::io::table_with_metadata& data_table)
 {
-    int index_col_count   = 0;
-    auto const& schema    = data_table.metadata.schema_info;
+    int index_col_count = 0;
 
-    std::vector<std::string> names;
-    names.reserve(schema.size());
-    std::transform(schema.cbegin(), schema.cend(), std::back_inserter(names), [](auto const& c) { return c.name; });
+    std::vector<std::string> names = CuDFTableUtil::get_column_names(data_table);
 
     // Check if we have a first column with INT64 data type
     if (names.size() >= 1 && data_table.tbl->get_column(0).type().id() == cudf::type_id::INT64)
diff --git a/morpheus/_lib/src/stages/file_source.cpp b/morpheus/_lib/src/stages/file_source.cpp
index 84a59f5f12..c3dce33693 100644
--- a/morpheus/_lib/src/stages/file_source.cpp
+++ b/morpheus/_lib/src/stages/file_source.cpp
@@ -24,6 +24,7 @@
 #include "morpheus/objects/file_types.hpp"
 #include "morpheus/objects/table_info.hpp"
 #include "morpheus/utilities/cudf_util.hpp"
+#include "morpheus/utilities/table_util.hpp"  // for filter_null_data
 
 #include <cudf/types.hpp>
 #include <glog/logging.h>
@@ -37,24 +38,39 @@
 #include <memory>
 #include <optional>
 #include <sstream>
+#include <stdexcept>  // for invalid_argument
 #include <utility>
-// IWYU thinks we need __alloc_traits<>::value_type for vector assignments
-// IWYU pragma: no_include <ext/alloc_traits.h>
 
 namespace morpheus {
 // Component public implementations
 // ************ FileSourceStage ************* //
-FileSourceStage::FileSourceStage(std::string filename, int repeat, std::optional<bool> json_lines) :
+FileSourceStage::FileSourceStage(std::string filename,
+                                 int repeat,
+                                 bool filter_null,
+                                 std::vector<std::string> filter_null_columns,
+                                 std::optional<bool> json_lines) :
   PythonSource(build()),
   m_filename(std::move(filename)),
   m_repeat(repeat),
+  m_filter_null(filter_null),
+  m_filter_null_columns(std::move(filter_null_columns)),
   m_json_lines(json_lines)
-{}
+{
+    if (m_filter_null && m_filter_null_columns.empty())
+    {
+        throw std::invalid_argument("Filter null columns must not be empty if filter_null is true");
+    }
+}
 
 FileSourceStage::subscriber_fn_t FileSourceStage::build()
 {
     return [this](rxcpp::subscriber<source_type_t> output) {
-        auto data_table     = load_table_from_file(m_filename, FileTypes::Auto, m_json_lines);
+        auto data_table = load_table_from_file(m_filename, FileTypes::Auto, m_json_lines);
+        if (m_filter_null)
+        {
+            CuDFTableUtil::filter_null_data(data_table, m_filter_null_columns);
+        }
+
         int index_col_count = prepare_df_index(data_table);
 
         // Next, create the message metadata. This gets reused for repeats
@@ -116,6 +132,8 @@ std::shared_ptr<mrc::segment::Object<FileSourceStage>> FileSourceStageInterfaceP
     const std::string& name,
     std::string filename,
     int repeat,
+    bool filter_null,
+    std::vector<std::string> filter_null_columns,
     pybind11::dict parser_kwargs)
 {
     std::optional<bool> json_lines = std::nullopt;
@@ -125,7 +143,8 @@ std::shared_ptr<mrc::segment::Object<FileSourceStage>> FileSourceStageInterfaceP
         json_lines = parser_kwargs["lines"].cast<bool>();
     }
 
-    auto stage = builder.construct_object<FileSourceStage>(name, filename, repeat, json_lines);
+    auto stage = builder.construct_object<FileSourceStage>(
+        name, filename, repeat, filter_null, std::move(filter_null_columns), json_lines);
 
     return stage;
 }
@@ -135,8 +154,16 @@ std::shared_ptr<mrc::segment::Object<FileSourceStage>> FileSourceStageInterfaceP
     const std::string& name,
     std::filesystem::path filename,
     int repeat,
+    bool filter_null,
+    std::vector<std::string> filter_null_columns,
     pybind11::dict parser_kwargs)
 {
-    return init(builder, name, filename.string(), repeat, std::move(parser_kwargs));
+    return init(builder,
+                name,
+                filename.string(),
+                repeat,
+                filter_null,
+                std::move(filter_null_columns),
+                std::move(parser_kwargs));
 }
 }  // namespace morpheus
diff --git a/morpheus/_lib/src/utilities/table_util.cpp b/morpheus/_lib/src/utilities/table_util.cpp
index 1c93493d92..d6aa159b6d 100644
--- a/morpheus/_lib/src/utilities/table_util.cpp
+++ b/morpheus/_lib/src/utilities/table_util.cpp
@@ -19,17 +19,24 @@
 
 #include <cudf/io/csv.hpp>
 #include <cudf/io/json.hpp>
+#include <cudf/stream_compaction.hpp>  // for drop_nulls
+#include <cudf/types.hpp>              // for size_type
 #include <glog/logging.h>
 #include <pybind11/pybind11.h>
 
+#include <algorithm>  // for find, transform
 #include <filesystem>
+#include <iterator>  // for back_insert_iterator, back_inserter
+#include <memory>    // for unique_ptr
 #include <ostream>    // needed for logging
 #include <stdexcept>  // for runtime_error
 
+namespace {
 namespace fs = std::filesystem;
 namespace py = pybind11;
-
-cudf::io::table_with_metadata morpheus::CuDFTableUtil::load_table(const std::string& filename)
+}  // namespace
+namespace morpheus {
+cudf::io::table_with_metadata CuDFTableUtil::load_table(const std::string& filename)
 {
     auto file_path = fs::path(filename);
 
@@ -52,3 +59,37 @@ cudf::io::table_with_metadata morpheus::CuDFTableUtil::load_table(const std::str
         throw std::runtime_error("Unknown extension");
     }
 }
+
+std::vector<std::string> CuDFTableUtil::get_column_names(const cudf::io::table_with_metadata& table)
+{
+    auto const& schema = table.metadata.schema_info;
+
+    std::vector<std::string> names;
+    names.reserve(schema.size());
+    std::transform(schema.cbegin(), schema.cend(), std::back_inserter(names), [](auto const& c) {
+        return c.name;
+    });
+
+    return names;
+}
+
+void CuDFTableUtil::filter_null_data(cudf::io::table_with_metadata& table,
+                                     const std::vector<std::string>& filter_columns)
+{
+    std::vector<cudf::size_type> filter_keys;
+    auto column_names = get_column_names(table);
+    for (const auto& column_name : filter_columns)
+    {
+        auto found_col = std::find(column_names.cbegin(), column_names.cend(), column_name);
+        if (found_col != column_names.cend())
+        {
+            filter_keys.push_back((found_col - column_names.cbegin()));
+        }
+    }
+
+    auto tv             = table.tbl->view();
+    auto filtered_table = cudf::drop_nulls(tv, filter_keys, filter_keys.size());
+
+    table.tbl.swap(filtered_table);
+}
+}  // namespace morpheus
diff --git a/morpheus/_lib/stages/__init__.pyi b/morpheus/_lib/stages/__init__.pyi
index 78a0ff8091..bfd66dcb64 100644
--- a/morpheus/_lib/stages/__init__.pyi
+++ b/morpheus/_lib/stages/__init__.pyi
@@ -60,9 +60,9 @@ class DeserializeMultiMessageStage(mrc.core.segment.SegmentObject):
     pass
 class FileSourceStage(mrc.core.segment.SegmentObject):
     @typing.overload
-    def __init__(self, builder: mrc.core.segment.Builder, name: str, filename: os.PathLike, repeat: int, parser_kwargs: dict) -> None: ...
+    def __init__(self, builder: mrc.core.segment.Builder, name: str, filename: os.PathLike, repeat: int, filter_null: bool, filter_null_columns: typing.List[str], parser_kwargs: dict) -> None: ...
     @typing.overload
-    def __init__(self, builder: mrc.core.segment.Builder, name: str, filename: str, repeat: int, parser_kwargs: dict) -> None: ...
+    def __init__(self, builder: mrc.core.segment.Builder, name: str, filename: str, repeat: int, filter_null: bool, filter_null_columns: typing.List[str], parser_kwargs: dict) -> None: ...
     pass
 class FilterDetectionsStage(mrc.core.segment.SegmentObject):
     def __init__(self, builder: mrc.core.segment.Builder, name: str, threshold: float, copy: bool, filter_source: morpheus._lib.common.FilterSource, field_name: str = 'probs') -> None: ...
diff --git a/morpheus/_lib/stages/module.cpp b/morpheus/_lib/stages/module.cpp
index 1cf57663ac..32c3c5e030 100644
--- a/morpheus/_lib/stages/module.cpp
+++ b/morpheus/_lib/stages/module.cpp
@@ -52,6 +52,7 @@
 #include <memory>
 #include <sstream>
 #include <string>
+#include <vector>
 
 namespace morpheus {
 namespace py = pybind11;
@@ -138,20 +139,33 @@ PYBIND11_MODULE(stages, _module)
                mrc::segment::ObjectProperties,
                std::shared_ptr<mrc::segment::Object<FileSourceStage>>>(
         _module, "FileSourceStage", py::multiple_inheritance())
-        .def(py::init(py::overload_cast<mrc::segment::Builder&, const std::string&, std::string, int, py::dict>(
-                 &FileSourceStageInterfaceProxy::init)),
+        .def(py::init(py::overload_cast<mrc::segment::Builder&,
+                                        const std::string&,
+                                        std::string,
+                                        int,
+                                        bool,
+                                        std::vector<std::string>,
+                                        py::dict>(&FileSourceStageInterfaceProxy::init)),
              py::arg("builder"),
              py::arg("name"),
              py::arg("filename"),
              py::arg("repeat"),
+             py::arg("filter_null"),
+             py::arg("filter_null_columns"),
              py::arg("parser_kwargs"))
-        .def(py::init(
-                 py::overload_cast<mrc::segment::Builder&, const std::string&, std::filesystem::path, int, py::dict>(
-                     &FileSourceStageInterfaceProxy::init)),
+        .def(py::init(py::overload_cast<mrc::segment::Builder&,
+                                        const std::string&,
+                                        std::filesystem::path,
+                                        int,
+                                        bool,
+                                        std::vector<std::string>,
+                                        py::dict>(&FileSourceStageInterfaceProxy::init)),
              py::arg("builder"),
              py::arg("name"),
              py::arg("filename"),
              py::arg("repeat"),
+             py::arg("filter_null"),
+             py::arg("filter_null_columns"),
              py::arg("parser_kwargs"));
 
     py::class_<mrc::segment::Object<FilterDetectionsStage>,
diff --git a/morpheus/_lib/tests/CMakeLists.txt b/morpheus/_lib/tests/CMakeLists.txt
index a17a297aca..e42e7717e8 100644
--- a/morpheus/_lib/tests/CMakeLists.txt
+++ b/morpheus/_lib/tests/CMakeLists.txt
@@ -188,4 +188,10 @@ add_morpheus_test(
     test_type_util.cpp
 )
 
+add_morpheus_test(
+  NAME table_util
+  FILES
+    utilities/test_table_util.cpp
+)
+
 list(POP_BACK CMAKE_MESSAGE_CONTEXT)
diff --git a/morpheus/_lib/tests/utilities/test_table_util.cpp b/morpheus/_lib/tests/utilities/test_table_util.cpp
new file mode 100644
index 0000000000..021b8a8322
--- /dev/null
+++ b/morpheus/_lib/tests/utilities/test_table_util.cpp
@@ -0,0 +1,79 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../test_utils/common.hpp"  // IWYU pragma: associated
+
+#include "morpheus/io/deserializers.hpp"
+#include "morpheus/utilities/table_util.hpp"  // for filter_null_data
+
+#include <cudf/io/types.hpp>     // for table_with_metadata
+#include <cudf/table/table.hpp>  // for table
+#include <gtest/gtest.h>
+
+#include <filesystem>
+#include <string>
+#include <utility>  // for pair
+#include <vector>
+// IWYU pragma: no_include <initializer_list>
+
+using namespace morpheus;
+
+TEST_CLASS(TableUtil);
+
+TEST_F(TestTableUtil, GetColumnNames)
+{
+    auto morpheus_root = test::get_morpheus_root();
+    auto input_files   = {morpheus_root / "tests/tests_data/file_with_nulls.csv",
+                          morpheus_root / "tests/tests_data/file_with_nulls.jsonlines"};
+
+    for (const auto& input_file : input_files)
+    {
+        auto table_w_meta = load_table_from_file(input_file);
+        auto column_names = CuDFTableUtil::get_column_names(table_w_meta);
+
+        EXPECT_EQ(column_names.size(), 2);
+        EXPECT_EQ(column_names[0], "data");
+        EXPECT_EQ(column_names[1], "other");
+    }
+}
+
+TEST_F(TestTableUtil, FilterNullData)
+{
+    auto morpheus_root = test::get_morpheus_root();
+    auto input_files   = {morpheus_root / "tests/tests_data/file_with_nans.csv",
+                          morpheus_root / "tests/tests_data/file_with_nans.jsonlines",
+                          morpheus_root / "tests/tests_data/file_with_nulls.csv",
+                          morpheus_root / "tests/tests_data/file_with_nulls.jsonlines"};
+    std::vector<std::pair<std::vector<std::string>, std::size_t>> expected_row_counts{
+        {{"data"}, 8}, {{"data"}, 8}, {{"other"}, 7}, {{"other"}, 7}, {{"data", "other"}, 5}};
+
+    for (const auto& input_file : input_files)
+    {
+        for (const auto& [filter_columns, expected_row_count] : expected_row_counts)
+        {
+            auto table_w_meta = load_table_from_file(input_file);
+
+            EXPECT_EQ(table_w_meta.tbl->num_columns(), 2);
+            EXPECT_EQ(table_w_meta.tbl->num_rows(), 10);
+
+            CuDFTableUtil::filter_null_data(table_w_meta, filter_columns);
+
+            EXPECT_EQ(table_w_meta.tbl->num_columns(), 2);
+            EXPECT_EQ(table_w_meta.tbl->num_rows(), expected_row_count);
+        }
+    }
+}
diff --git a/morpheus/io/deserializers.py b/morpheus/io/deserializers.py
index 293bc2a303..31499b4359 100644
--- a/morpheus/io/deserializers.py
+++ b/morpheus/io/deserializers.py
@@ -29,40 +29,11 @@
 from morpheus.utils.type_aliases import DataFrameType
 
 
-def read_file_to_df(file_name: typing.Union[str, io.IOBase],
-                    file_type: FileTypes = FileTypes.Auto,
-                    parser_kwargs: dict = None,
-                    filter_nulls: bool = True,
-                    df_type: typing.Literal["cudf", "pandas"] = "pandas") -> DataFrameType:
-    """
-    Reads a file into a dataframe and performs any of the necessary cleanup.
-
-    Parameters
-    ----------
-    file_name : str
-        File to read.
-    file_type : `morpheus.common.FileTypes`
-        Type of file. Leave as Auto to determine from the extension.
-    parser_kwargs : dict, optional
-        Any argument to pass onto the parse, by default {}. Ignored when C++ execution is enabled and `df_type="cudf"`
-    filter_nulls : bool, optional
-        Whether to filter null rows after loading, by default True.
-    df_type : typing.Literal[, optional
-        What type of parser to use. Options are 'cudf' and 'pandas', by default "pandas".
-
-    Returns
-    -------
-    DataFrameType
-        A parsed DataFrame.
-    """
-
-    # The C++ reader only supports cudf dataframes
-    if (CppConfig.get_should_use_cpp() and df_type == "cudf"):
-        df = read_file_to_df_cpp(file_name, file_type)
-        if (filter_nulls):
-            df = filter_null_data(df)
-        return df
-
+def _read_file_to_df_py(*,
+                        file_name: typing.Union[str, io.IOBase],
+                        file_type: FileTypes,
+                        parser_kwargs: dict,
+                        df_type: typing.Literal["cudf", "pandas"]) -> DataFrameType:
     if (parser_kwargs is None):
         parser_kwargs = {}
 
@@ -111,7 +82,50 @@ def read_file_to_df(file_name: typing.Union[str, io.IOBase],
 
     assert df is not None
 
+    return df
+
+
+def read_file_to_df(file_name: typing.Union[str, io.IOBase],
+                    file_type: FileTypes = FileTypes.Auto,
+                    parser_kwargs: dict = None,
+                    filter_nulls: bool = True,
+                    filter_null_columns: list[str] | str = 'data',
+                    df_type: typing.Literal["cudf", "pandas"] = "pandas") -> DataFrameType:
+    """
+    Reads a file into a dataframe and performs any of the necessary cleanup.
+
+    Parameters
+    ----------
+    file_name : str
+        File to read.
+    file_type : `morpheus.common.FileTypes`
+        Type of file. Leave as Auto to determine from the extension.
+    parser_kwargs : dict, optional
+        Any argument to pass onto the parse, by default {}. Ignored when C++ execution is enabled and `df_type="cudf"`
+    filter_nulls : bool, optional
+        Whether to filter null rows after loading, by default True.
+    filter_null_columns : list[str]|str, default = 'data'
+        Column or columns to filter null values from. Ignored when `filter_null` is False.
+    df_type : typing.Literal[, optional
+        What type of parser to use. Options are 'cudf' and 'pandas', by default "pandas".
+
+    Returns
+    -------
+    DataFrameType
+        A parsed DataFrame.
+    """
+
+    # The C++ reader only supports cudf dataframes
+    if (CppConfig.get_should_use_cpp() and df_type == "cudf"):
+        df = read_file_to_df_cpp(file_name, file_type)
+    else:
+        df = _read_file_to_df_py(file_name=file_name, file_type=file_type, parser_kwargs=parser_kwargs, df_type=df_type)
+
     if (filter_nulls):
-        df = filter_null_data(df)
+        if isinstance(filter_null_columns, str):
+            filter_null_columns = [filter_null_columns]
+
+        for col in filter_null_columns:
+            df = filter_null_data(df, column_name=col)
 
     return df
diff --git a/morpheus/io/utils.py b/morpheus/io/utils.py
index d8b286a8e8..9a20afb4d5 100644
--- a/morpheus/io/utils.py
+++ b/morpheus/io/utils.py
@@ -26,7 +26,7 @@
 logger = logging.getLogger(__name__)
 
 
-def filter_null_data(x: DataFrameType):
+def filter_null_data(x: DataFrameType, column_name: str = "data") -> DataFrameType:
     """
     Filters out null row in a dataframe's 'data' column if it exists.
 
@@ -34,12 +34,14 @@ def filter_null_data(x: DataFrameType):
     ----------
     x : DataFrameType
         The dataframe to fix.
+    column_name : str, default 'data'
+        The column name to filter on.
     """
 
     if ("data" not in x):
         return x
 
-    return x[~x['data'].isna()]
+    return x[~x[column_name].isna()]
 
 
 def cudf_string_cols_exceed_max_bytes(df: cudf.DataFrame, column_max_bytes: dict[str, int]) -> bool:
diff --git a/morpheus/stages/input/file_source_stage.py b/morpheus/stages/input/file_source_stage.py
index eb4630fb3e..9b3551dce6 100644
--- a/morpheus/stages/input/file_source_stage.py
+++ b/morpheus/stages/input/file_source_stage.py
@@ -57,8 +57,11 @@ class FileSourceStage(PreallocatorMixin, SingleOutputSource):
     repeat : int, default = 1, min = 1
         Repeats the input dataset multiple times. Useful to extend small datasets for debugging.
     filter_null : bool, default = True
-        Whether to filter rows with null 'data' column. Null values in the 'data' column can cause issues down
-        the line with processing. Setting this to True is recommended.
+        Whether to filter rows with null `filter_null_columns` columns. Null values in source data  can cause issues
+        down the line with processing. Setting this to True is recommended.
+    filter_null_columns : list[str], default = None
+        Column or columns to filter null values from. Ignored when `filter_null` is False. If None, and `filter_null`
+        is `True`, this will default to `["data"]`
     parser_kwargs : dict, default = {}
         Extra options to pass to the file parser.
     """
@@ -70,6 +73,7 @@ def __init__(self,
                  file_type: FileTypes = FileTypes.Auto,
                  repeat: int = 1,
                  filter_null: bool = True,
+                 filter_null_columns: list[str] = None,
                  parser_kwargs: dict = None):
 
         super().__init__(c)
@@ -79,6 +83,12 @@ def __init__(self,
         self._filename = filename
         self._file_type = file_type
         self._filter_null = filter_null
+
+        if filter_null_columns is None or len(filter_null_columns) == 0:
+            filter_null_columns = ["data"]
+
+        self._filter_null_columns = filter_null_columns
+
         self._parser_kwargs = parser_kwargs or {}
 
         self._input_count = None
@@ -114,6 +124,8 @@ def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject:
                                            self.unique_name,
                                            self._filename,
                                            self._repeat_count,
+                                           self._filter_null,
+                                           self._filter_null_columns,
                                            self._parser_kwargs)
         else:
             node = builder.make_source(self.unique_name, self._generate_frames())
@@ -126,6 +138,7 @@ def _generate_frames(self) -> typing.Iterable[MessageMeta]:
             self._filename,
             self._file_type,
             filter_nulls=self._filter_null,
+            filter_null_columns=self._filter_null_columns,
             parser_kwargs=self._parser_kwargs,
             df_type="cudf",
         )
diff --git a/tests/stages/test_file_source_stage_pipe.py b/tests/stages/test_file_source_stage_pipe.py
new file mode 100755
index 0000000000..59f9c76d63
--- /dev/null
+++ b/tests/stages/test_file_source_stage_pipe.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import pathlib
+
+import pandas as pd
+import pytest
+
+from _utils import TEST_DIRS
+from _utils import assert_results
+from morpheus.common import FileTypes
+from morpheus.common import determine_file_type
+from morpheus.config import Config
+from morpheus.io.deserializers import read_file_to_df
+from morpheus.pipeline import LinearPipeline
+from morpheus.stages.input.file_source_stage import FileSourceStage
+from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("input_file",
+                         [
+                             os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.csv"),
+                             os.path.join(TEST_DIRS.tests_data_dir, "filter_probs.parquet"),
+                             os.path.join(TEST_DIRS.tests_data_dir, 'examples/abp_pcap_detection/abp_pcap.jsonlines')
+                         ],
+                         ids=["csv", "parquet", "jsonlines"])
+@pytest.mark.parametrize("filter_null", [False, True], ids=["no_filter", "filter_null"])
+@pytest.mark.parametrize("use_pathlib", [False, True], ids=["no_pathlib", "pathlib"])
+@pytest.mark.parametrize("repeat", [1, 2, 5], ids=["repeat1", "repeat2", "repeat5"])
+def test_file_source_stage_pipe(config: Config, input_file: str, filter_null: bool, use_pathlib: bool, repeat: int):
+    parser_kwargs = {}
+    if determine_file_type(input_file) == FileTypes.JSON:
+        # kwarg specific to pandas.read_json
+        parser_kwargs['convert_dates'] = False
+
+    expected_df = read_file_to_df(file_name=input_file,
+                                  filter_nulls=filter_null,
+                                  df_type="pandas",
+                                  parser_kwargs=parser_kwargs)
+    expected_df = pd.concat([expected_df for _ in range(repeat)])
+
+    expected_df.reset_index(inplace=True)
+    expected_df.drop('index', axis=1, inplace=True)
+
+    if use_pathlib:
+        input_file = pathlib.Path(input_file)
+
+    pipe = LinearPipeline(config)
+    pipe.set_source(FileSourceStage(config, filename=input_file, repeat=repeat, filter_null=filter_null))
+    comp_stage = pipe.add_stage(
+        CompareDataFrameStage(config, compare_df=expected_df, exclude=["index"], reset_index=True))
+    pipe.run()
+
+    assert_results(comp_stage.get_results())
diff --git a/tests/tests_data/file_with_nans.csv b/tests/tests_data/file_with_nans.csv
new file mode 100644
index 0000000000..ff3a8643fa
--- /dev/null
+++ b/tests/tests_data/file_with_nans.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a173a9d2027a90c7df128dac1f9126160107954fc286a13d04bd94824d668b8
+size 76
diff --git a/tests/tests_data/file_with_nans.jsonlines b/tests/tests_data/file_with_nans.jsonlines
new file mode 100644
index 0000000000..7a9190ce40
--- /dev/null
+++ b/tests/tests_data/file_with_nans.jsonlines
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:559f654cd30742b2fe49ec6f163118b660b61f2c6ebe5acb13bdfeb907fe9865
+size 255
diff --git a/tests/tests_data/file_with_nulls.csv b/tests/tests_data/file_with_nulls.csv
new file mode 100644
index 0000000000..d2416abb19
--- /dev/null
+++ b/tests/tests_data/file_with_nulls.csv
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65dbc84b9c7ebe0132fbcab419fe681a1628cb7b1c08f09ca62c2b46fbd56c59
+size 46
diff --git a/tests/tests_data/file_with_nulls.jsonlines b/tests/tests_data/file_with_nulls.jsonlines
new file mode 100644
index 0000000000..af82d24f9f
--- /dev/null
+++ b/tests/tests_data/file_with_nulls.jsonlines
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1dfca1a616e66ebdcdb87d4adb9b15af594ca6fded67b4d9af8181b061e559f
+size 255

From 9719d9f0f5d368381004635e171399bc07380216 Mon Sep 17 00:00:00 2001
From: Aser Garcia <aser.garcia@outlook.com>
Date: Thu, 9 May 2024 11:19:48 -0400
Subject: [PATCH 33/38] Ensuring consistent use of the export macro
 `MORPHEUS_EXPORT` (#1672)

This PR makes exporting symbols consistent across header files. The docs and examples for the developer_guide is also updated.


Closes #1595

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Aser Garcia (https://github.com/aserGarcia)
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Yuchen Zhang (https://github.com/yuchenz427)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1672
---
 .../guides/3_simple_cpp_stage.md              | 29 +++++-----
 .../guides/4_source_cpp_stage.md              | 18 +++----
 .../developer_guide/guides/8_cpp_modules.md   |  7 +--
 .../src/simple_cpp_stage/_lib/pass_thru.hpp   | 11 ++--
 .../_lib/rabbitmq_source.hpp                  | 11 ++--
 .../include/morpheus/doca/doca_source.hpp     |  9 ++--
 .../_lib/include/morpheus/io/data_loader.hpp  |  8 ++-
 .../morpheus/io/data_loader_registry.hpp      |  7 ++-
 .../include/morpheus/io/deserializers.hpp     | 18 +++----
 .../_lib/include/morpheus/io/loaders/file.hpp |  7 +--
 .../_lib/include/morpheus/io/loaders/grpc.hpp |  7 +--
 .../include/morpheus/io/loaders/lambda.hpp    |  7 +--
 .../include/morpheus/io/loaders/payload.hpp   |  7 +--
 .../_lib/include/morpheus/io/loaders/rest.hpp |  7 +--
 .../_lib/include/morpheus/io/serializers.hpp  | 54 ++++++++++---------
 .../include/morpheus/messages/control.hpp     | 11 ++--
 .../messages/memory/inference_memory.hpp      |  8 ++-
 .../messages/memory/inference_memory_fil.hpp  |  5 +-
 .../messages/memory/inference_memory_nlp.hpp  |  9 ++--
 .../messages/memory/response_memory.hpp       |  9 ++--
 .../messages/memory/response_memory_probs.hpp |  8 +--
 .../messages/memory/tensor_memory.hpp         |  8 ++-
 .../_lib/include/morpheus/messages/meta.hpp   | 12 ++---
 .../_lib/include/morpheus/messages/multi.hpp  | 17 +++---
 .../morpheus/messages/multi_inference.hpp     |  8 +--
 .../morpheus/messages/multi_inference_fil.hpp |  9 ++--
 .../morpheus/messages/multi_inference_nlp.hpp |  8 +--
 .../morpheus/messages/multi_response.hpp      |  8 +--
 .../messages/multi_response_probs.hpp         |  9 ++--
 .../morpheus/messages/multi_tensor.hpp        |  8 ++-
 .../morpheus/modules/data_loader_module.hpp   |  6 +--
 .../_lib/include/morpheus/objects/dtype.hpp   | 10 ++--
 .../morpheus/objects/factory_registry.hpp     |  7 ++-
 .../include/morpheus/objects/fiber_queue.hpp  |  9 ++--
 .../include/morpheus/objects/file_types.hpp   | 14 +++--
 .../morpheus/objects/filter_source.hpp        |  9 ++--
 .../objects/mutable_table_ctx_mgr.hpp         |  6 +--
 .../include/morpheus/objects/rmm_tensor.hpp   |  7 ++-
 .../_lib/include/morpheus/objects/tensor.hpp  |  6 +--
 .../morpheus/objects/wrapped_tensor.hpp       |  5 +-
 .../morpheus/stages/add_classification.hpp    |  8 ++-
 .../include/morpheus/stages/add_scores.hpp    |  7 ++-
 .../morpheus/stages/add_scores_stage_base.hpp |  6 +--
 .../include/morpheus/stages/deserialize.hpp   |  9 ++--
 .../include/morpheus/stages/file_source.hpp   |  7 ++-
 .../morpheus/stages/filter_detection.hpp      |  8 ++-
 .../stages/http_server_source_stage.hpp       |  8 ++-
 .../include/morpheus/stages/kafka_source.hpp  | 10 ++--
 .../include/morpheus/stages/preallocate.hpp   | 10 ++--
 .../morpheus/stages/preprocess_fil.hpp        |  8 +--
 .../morpheus/stages/preprocess_nlp.hpp        |  9 ++--
 .../include/morpheus/stages/serialize.hpp     |  9 ++--
 .../include/morpheus/stages/write_to_file.hpp |  9 ++--
 .../morpheus/utilities/http_server.hpp        | 13 +++--
 .../morpheus/utilities/python_util.hpp        | 10 ++--
 55 files changed, 257 insertions(+), 297 deletions(-)

diff --git a/docs/source/developer_guide/guides/3_simple_cpp_stage.md b/docs/source/developer_guide/guides/3_simple_cpp_stage.md
index 3b0982d21e..678fc3074f 100644
--- a/docs/source/developer_guide/guides/3_simple_cpp_stage.md
+++ b/docs/source/developer_guide/guides/3_simple_cpp_stage.md
@@ -54,7 +54,7 @@ def supports_cpp_node(self):
     return True
 ```
 
-C++ message object declarations can be found in the header files that are located in the `morpheus/_lib/include/morpheus/messages` directory. For example, the `MessageMeta` class declaration is located in `morpheus/_lib/include/morpheus/messages/meta.hpp`. In code this would be included as:
+C++ message object declarations can be found in the header files that are located in the `morpheus/_lib/include/morpheus/messages` directory. For example, the `MessageMeta` class declaration is located in `morpheus/_lib/include/morpheus/messages/meta.hpp`. Since this code is outside of the morpheus directory it would be included as:
 
 ```cpp
 #include <morpheus/messages/meta.hpp>
@@ -89,6 +89,7 @@ While our Python implementation accepts messages of any type (in the form of Pyt
 To start with, we have our Morpheus and MRC-specific includes:
 
 ```cpp
+#include <morpheus/export.h>
 #include <morpheus/messages/multi.hpp>  // for MultiMessage
 #include <mrc/segment/builder.hpp>      // for Segment Builder
 #include <mrc/segment/object.hpp>       // for Segment Object
@@ -100,12 +101,10 @@ We'll want to define our stage in its own namespace. In this case, we will name
 ```cpp
 namespace morpheus_example {
 
-// pybind11 sets visibility to hidden by default; we want to export our symbols
-#pragma GCC visibility push(default)
-
 using namespace morpheus;
 
-class PassThruStage : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>
+// pybind11 sets visibility to hidden by default; we want to export our symbols
+class MORPHEUS_EXPORT PassThruStage : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>;
@@ -119,7 +118,13 @@ class PassThruStage : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage
 };
 ```
 
-We explicitly set the visibility for the stage object in the namespace to default. This is due to a pybind11 requirement for module implementations to default symbol visibility to hidden (`-fvisibility=hidden`). More details about this can be found in the [pybind11 documentation](https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes).
+We explicitly set the visibility for the stage object to default by importing:
+```cpp
+#include <morpheus/export.h>
+```
+Then adding `MORPHEUS_EXPORT`, which is defined in `/build/autogenerated/include/morpheus/export.h` and is compiler agnostic, to the definition of the stage object.  
+This is due to a pybind11 requirement for module implementations to default symbol visibility to hidden (`-fvisibility=hidden`). More details about this can be found in the [pybind11 documentation](https://pybind11.readthedocs.io/en/stable/faq.html#someclass-declared-with-greater-visibility-than-the-type-of-its-field-someclass-member-wattributes).
+Any object, struct, or function that is intended to be exported should have `MORPHEUS_EXPORT` included in the definition.
 
 For simplicity, we defined `base_t` as an alias for our base class type because the definition can be quite long. Our base class type also defines a few additional type aliases for us: `subscribe_fn_t`, `sink_type_t` and `source_type_t`. The `sink_type_t` and `source_type_t` aliases are shortcuts for the sink and source types that this stage will be reading and writing. In this case both the `sink_type_t` and `source_type_t` resolve to `std::shared_ptr<MultiMessage>`. `subscribe_fn_t` (read as "subscribe function type") is an alias for:
 
@@ -134,7 +139,7 @@ All Morpheus C++ stages receive an instance of an MRC Segment Builder and a name
 We will also define an interface proxy object to keep the class definition separated from the Python interface. This isn't strictly required, but it is a convention used internally by Morpheus. Our proxy object will define a static method named `init` which is responsible for constructing a `PassThruStage` instance and returning it wrapped in a `shared_ptr`. There are many common Python types that pybind11 [automatically converts](https://pybind11.readthedocs.io/en/latest/advanced/cast/overview.html#conversion-table) to their associated C++ types. The MRC `Builder` is a C++ object with Python bindings. However there are other instances such as checking for values of `None` where the casting from Python to C++ types is not automatic. The proxy interface object fulfills this need and is used to help insulate Python bindings from internal implementation details.
 
 ```cpp
-struct PassThruStageInterfaceProxy
+struct MORPHEUS_EXPORT PassThruStageInterfaceProxy
 {
     static std::shared_ptr<mrc::segment::Object<PassThruStage>> init(mrc::segment::Builder &builder,
                                                                      const std::string &name);
@@ -146,6 +151,7 @@ struct PassThruStageInterfaceProxy
 ```cpp
 #pragma once
 
+#include <morpheus/export.h>            // for exporting symbols
 #include <morpheus/messages/multi.hpp>  // for MultiMessage
 #include <mrc/segment/builder.hpp>      // for Segment Builder
 #include <mrc/segment/object.hpp>       // for Segment Object
@@ -156,12 +162,10 @@ struct PassThruStageInterfaceProxy
 
 namespace morpheus_example {
 
-// pybind11 sets visibility to hidden by default; we want to export our symbols
-#pragma GCC visibility push(default)
-
 using namespace morpheus;
 
-class PassThruStage : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>
+// pybind11 sets visibility to hidden by default; we want to export our symbols
+class MORPHEUS_EXPORT PassThruStage : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>;
@@ -174,13 +178,12 @@ class PassThruStage : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage
     subscribe_fn_t build_operator();
 };
 
-struct PassThruStageInterfaceProxy
+struct MORPHEUS_EXPORT PassThruStageInterfaceProxy
 {
     static std::shared_ptr<mrc::segment::Object<PassThruStage>> init(mrc::segment::Builder& builder,
                                                                      const std::string& name);
 };
 
-#pragma GCC visibility pop
 }  // namespace morpheus_example
 ```
 
diff --git a/docs/source/developer_guide/guides/4_source_cpp_stage.md b/docs/source/developer_guide/guides/4_source_cpp_stage.md
index 476d0f661b..6fdbd17939 100644
--- a/docs/source/developer_guide/guides/4_source_cpp_stage.md
+++ b/docs/source/developer_guide/guides/4_source_cpp_stage.md
@@ -43,6 +43,7 @@ Our includes:
 ```cpp
 #include <SimpleAmqpClient/SimpleAmqpClient.h>  // for AmqpClient::Channel::ptr_t
 #include <cudf/io/types.hpp>                    // for cudf::io::table_with_metadata
+#include <morpheus/export.h>                    // for exporting symbols
 #include <morpheus/messages/meta.hpp>           // for MessageMeta
 #include <mrc/segment/builder.hpp>              // for Segment Builder
 #include <mrc/segment/object.hpp>               // for Segment Object
@@ -60,13 +61,11 @@ Our namespace and class definition is:
 ```cpp
 namespace morpheus_rabbit {
 
-// pybind11 sets visibility to hidden by default; we want to export our symbols
-#pragma GCC visibility push(default)
-
 using namespace std::literals;
 using namespace morpheus;
 
-class RabbitMQSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
+//pybind11 sets visibility to hidden by default; we want to export our symbols
+class MORPHEUS_EXPORT RabbitMQSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>;
@@ -115,6 +114,7 @@ Wrapping it all together, our header file should be similar to:
 
 #include <SimpleAmqpClient/SimpleAmqpClient.h>  // for AmqpClient::Channel::ptr_t
 #include <cudf/io/types.hpp>                    // for cudf::io::table_with_metadata
+#include <morpheus/export.h>                    // for exporting symbols
 #include <morpheus/messages/meta.hpp>           // for MessageMeta
 #include <mrc/segment/builder.hpp>              // for Segment Builder
 #include <mrc/segment/object.hpp>               // for Segment Object
@@ -126,13 +126,11 @@ Wrapping it all together, our header file should be similar to:
 
 namespace morpheus_rabbit {
 
-// pybind11 sets visibility to hidden by default; we want to export our symbols
-#pragma GCC visibility push(default)
-
 using namespace std::literals;
 using namespace morpheus;
 
-class RabbitMQSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
+// pybind11 sets visibility to hidden by default; we want to export our symbols
+class MORPHEUS_EXPORT RabbitMQSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>;
@@ -162,7 +160,7 @@ class RabbitMQSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<Mess
 /**
  * @brief Interface proxy, used to insulate Python bindings.
  */
-struct RabbitMQSourceStageInterfaceProxy
+struct MORPHEUS_EXPORT RabbitMQSourceStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a RabbitMQSourceStage, and return the result.
@@ -175,7 +173,7 @@ struct RabbitMQSourceStageInterfaceProxy
                                                                            const std::string& queue_name,
                                                                            std::chrono::milliseconds poll_interval);
 };
-#pragma GCC visibility pop
+
 }  // namespace morpheus_rabbit
 ```
 
diff --git a/docs/source/developer_guide/guides/8_cpp_modules.md b/docs/source/developer_guide/guides/8_cpp_modules.md
index 30c3a9e072..a35f6a9f0f 100644
--- a/docs/source/developer_guide/guides/8_cpp_modules.md
+++ b/docs/source/developer_guide/guides/8_cpp_modules.md
@@ -31,13 +31,14 @@ The following example will create a simple C++ module that passes through the in
 
 ```c++
 #pragma once
+#include <morpheus/export.h>
 #include <mrc/modules/properties/persistent.hpp>
 #include <mrc/modules/segment_modules.hpp>
 #include <nlohmann/json.hpp>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
-class MyTestModule: public mrc::modules::SegmentModule, public mrc::modules::PersistentModule
+
+class MORPHEUS_EXPORT MyTestModule: public mrc::modules::SegmentModule, public mrc::modules::PersistentModule
 {
     using type_t = MyTestModule;
 
@@ -52,7 +53,7 @@ class MyTestModule: public mrc::modules::SegmentModule, public mrc::modules::Per
   private:
     int my_persistent_value{0};
 };
-#pragma GCC visibility pop
+
 }  // namespace morpheus
 ```
 
diff --git a/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.hpp b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.hpp
index a4be293fcb..94b80a761c 100644
--- a/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.hpp
+++ b/examples/developer_guide/3_simple_cpp_stage/src/simple_cpp_stage/_lib/pass_thru.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <morpheus/export.h>            // for exporting symbols
 #include <morpheus/messages/multi.hpp>  // for MultiMessage
 #include <mrc/segment/builder.hpp>      // for Segment Builder
 #include <mrc/segment/object.hpp>       // for Segment Object
@@ -32,12 +33,11 @@
 
 namespace morpheus_example {
 
-// pybind11 sets visibility to hidden by default; we want to export our symbols
-#pragma GCC visibility push(default)
-
 using namespace morpheus;
 
-class PassThruStage : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>
+// pybind11 sets visibility to hidden by default; we want to export our symbols
+class MORPHEUS_EXPORT PassThruStage
+  : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>;
@@ -50,11 +50,10 @@ class PassThruStage : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage
     subscribe_fn_t build_operator();
 };
 
-struct PassThruStageInterfaceProxy
+struct MORPHEUS_EXPORT PassThruStageInterfaceProxy
 {
     static std::shared_ptr<mrc::segment::Object<PassThruStage>> init(mrc::segment::Builder& builder,
                                                                      const std::string& name);
 };
 
-#pragma GCC visibility pop
 }  // namespace morpheus_example
diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp
index c117bcc5e9..2399a63388 100644
--- a/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp
+++ b/examples/developer_guide/4_rabbitmq_cpp_stage/src/rabbitmq_cpp_stage/_lib/rabbitmq_source.hpp
@@ -19,6 +19,7 @@
 
 #include <SimpleAmqpClient/Channel.h>
 #include <cudf/io/types.hpp>           // for cudf::io::table_with_metadata
+#include <morpheus/export.h>           // for exporting symbols
 #include <morpheus/messages/meta.hpp>  // for MessageMeta
 #include <mrc/segment/builder.hpp>     // for Segment Builder
 #include <mrc/segment/object.hpp>      // for Segment Object
@@ -36,13 +37,11 @@
 
 namespace morpheus_rabbit {
 
-// pybind11 sets visibility to hidden by default; we want to export our symbols
-#pragma GCC visibility push(default)
-
 using namespace std::literals;
 using namespace morpheus;
 
-class RabbitMQSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
+// pybind11 sets visibility to hidden by default; we want to export our symbols
+class MORPHEUS_EXPORT RabbitMQSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>;
@@ -72,7 +71,7 @@ class RabbitMQSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<Mess
 /**
  * @brief Interface proxy, used to insulate Python bindings.
  */
-struct RabbitMQSourceStageInterfaceProxy
+struct MORPHEUS_EXPORT RabbitMQSourceStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a RabbitMQSourceStage, and return the result.
@@ -85,5 +84,5 @@ struct RabbitMQSourceStageInterfaceProxy
                                                                            const std::string& queue_name,
                                                                            std::chrono::milliseconds poll_interval);
 };
-#pragma GCC visibility pop
+
 }  // namespace morpheus_rabbit
diff --git a/morpheus/_lib/doca/include/morpheus/doca/doca_source.hpp b/morpheus/_lib/doca/include/morpheus/doca/doca_source.hpp
index 90882d10b5..d56c289007 100644
--- a/morpheus/_lib/doca/include/morpheus/doca/doca_source.hpp
+++ b/morpheus/_lib/doca/include/morpheus/doca/doca_source.hpp
@@ -18,6 +18,7 @@
 #pragma once
 
 #include "morpheus/doca/common.hpp"
+#include "morpheus/export.h"
 #include "morpheus/messages/meta.hpp"
 
 #include <mrc/segment/builder.hpp>
@@ -35,14 +36,12 @@ struct DocaRxPipe;
 struct DocaSemaphore;
 }  // namespace doca
 
-#pragma GCC visibility push(default)
-
 /**
  * @brief Receives a firehose of raw packets from a GPUNetIO-enabled device.
  *
  * Tested only on ConnectX 6-Dx with a single GPU on the same NUMA node running firmware 24.35.2000
  */
-class DocaSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
+class MORPHEUS_EXPORT DocaSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>;
@@ -68,7 +67,7 @@ class DocaSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageM
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct DocaSourceStageInterfaceProxy
+struct MORPHEUS_EXPORT DocaSourceStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a DocaSourceStage, and return the result.
@@ -80,6 +79,4 @@ struct DocaSourceStageInterfaceProxy
                                                                        std::string const& traffic_type);
 };
 
-#pragma GCC visibility pop
-
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/io/data_loader.hpp b/morpheus/_lib/include/morpheus/io/data_loader.hpp
index b833d7b7e6..c04979d140 100644
--- a/morpheus/_lib/include/morpheus/io/data_loader.hpp
+++ b/morpheus/_lib/include/morpheus/io/data_loader.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/control.hpp"
 #include "morpheus/messages/meta.hpp"
 
@@ -28,8 +29,6 @@
 
 namespace morpheus {
 
-#pragma GCC visibility push(default)
-
 /**
  * @brief Abstract class for loading data from a source.
  *
@@ -37,7 +36,7 @@ namespace morpheus {
  * protected method for accessing the configuration information. It can be extended to implement
  * specific loading logic for different types of data sources.
  */
-class Loader
+class MORPHEUS_EXPORT Loader
 {
   public:
     /**
@@ -89,7 +88,7 @@ class Loader
  * loading data using these objects. It also defines a method for loading control messages from data
  * sources using the registered loaders.
  */
-class DataLoader
+class MORPHEUS_EXPORT DataLoader
 {
   public:
     /**
@@ -136,5 +135,4 @@ class DataLoader
     nlohmann::json m_config;
 };
 
-#pragma GCC visibility pop
 }  // namespace morpheus
\ No newline at end of file
diff --git a/morpheus/_lib/include/morpheus/io/data_loader_registry.hpp b/morpheus/_lib/include/morpheus/io/data_loader_registry.hpp
index 1c038c2617..ba457b30bd 100644
--- a/morpheus/_lib/include/morpheus/io/data_loader_registry.hpp
+++ b/morpheus/_lib/include/morpheus/io/data_loader_registry.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/io/data_loader.hpp"
 #include "morpheus/messages/control.hpp"
 #include "morpheus/objects/factory_registry.hpp"
@@ -28,13 +29,12 @@
 #include <string>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
 
-extern template class FactoryRegistry<Loader>;
+extern template class MORPHEUS_EXPORT FactoryRegistry<Loader>;
 
 using LoaderRegistry = FactoryRegistry<Loader>;  // NOLINT
 
-struct LoaderRegistryProxy
+struct MORPHEUS_EXPORT LoaderRegistryProxy
 {
     static void register_proxy_factory_fn(
         const std::string& name,
@@ -45,5 +45,4 @@ struct LoaderRegistryProxy
     static void register_factory_cleanup_fn(const std::string& name);
 };
 
-#pragma GCC visibility pop
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/io/deserializers.hpp b/morpheus/_lib/include/morpheus/io/deserializers.hpp
index 569d503eb9..321e92c50f 100644
--- a/morpheus/_lib/include/morpheus/io/deserializers.hpp
+++ b/morpheus/_lib/include/morpheus/io/deserializers.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/objects/file_types.hpp"  // for FileTypes
 
 #include <cudf/io/types.hpp>
@@ -27,7 +28,7 @@
 #include <vector>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 /**
  * @addtogroup IO
  * @{
@@ -41,7 +42,7 @@ namespace morpheus {
  * @param table The table to pull the columns from
  * @return std::vector<std::string>
  */
-std::vector<std::string> get_column_names_from_table(const cudf::io::table_with_metadata& table);
+std::vector<std::string> MORPHEUS_EXPORT get_column_names_from_table(const cudf::io::table_with_metadata& table);
 
 /**
  * @brief Loads a cudf table from either CSV or JSON file
@@ -49,9 +50,9 @@ std::vector<std::string> get_column_names_from_table(const cudf::io::table_with_
  * @param filename : Name of the file that should be loaded into a table
  * @return cudf::io::table_with_metadata
  */
-cudf::io::table_with_metadata load_table_from_file(const std::string& filename,
-                                                   FileTypes file_type            = FileTypes::Auto,
-                                                   std::optional<bool> json_lines = std::nullopt);
+cudf::io::table_with_metadata MORPHEUS_EXPORT load_table_from_file(const std::string& filename,
+                                                                   FileTypes file_type            = FileTypes::Auto,
+                                                                   std::optional<bool> json_lines = std::nullopt);
 
 /**
  * @brief Returns the number of index columns in `data_table`, in practice this will be a `0` or `1`
@@ -59,7 +60,7 @@ cudf::io::table_with_metadata load_table_from_file(const std::string& filename,
  * @param data_table : Table which contains the data and it's metadata
  * @return int
  */
-int get_index_col_count(const cudf::io::table_with_metadata& data_table);
+int MORPHEUS_EXPORT get_index_col_count(const cudf::io::table_with_metadata& data_table);
 
 /**
  * @brief Returns the number of index columns in `data_table`, in practice this will be a `0` or `1`
@@ -68,7 +69,7 @@ int get_index_col_count(const cudf::io::table_with_metadata& data_table);
  * @param data_table : Table which contains the data and it's metadata
  * @return int
  */
-int prepare_df_index(cudf::io::table_with_metadata& data_table);
+int MORPHEUS_EXPORT prepare_df_index(cudf::io::table_with_metadata& data_table);
 
 /**
  * @brief Loads a cudf table from either CSV, JSON or Parquet file returning the DataFrame as a Python object
@@ -76,8 +77,7 @@ int prepare_df_index(cudf::io::table_with_metadata& data_table);
  * @param filename : Name of the file that should be loaded into a table
  * @return pybind11::object
  */
-pybind11::object read_file_to_df(const std::string& filename, FileTypes file_type = FileTypes::Auto);
+pybind11::object MORPHEUS_EXPORT read_file_to_df(const std::string& filename, FileTypes file_type = FileTypes::Auto);
 
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/io/loaders/file.hpp b/morpheus/_lib/include/morpheus/io/loaders/file.hpp
index 63c977889f..b66d5ed989 100644
--- a/morpheus/_lib/include/morpheus/io/loaders/file.hpp
+++ b/morpheus/_lib/include/morpheus/io/loaders/file.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/io/data_loader.hpp"
 #include "morpheus/messages/control.hpp"
 
@@ -25,13 +26,13 @@
 #include <memory>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 /**
  * @brief Very simple raw data loader that takes a list of files containing data that can be converted into a cuDF
  * DataFrame. Loads the files into a cuDF DataFrame and returns a ControlMessage containing the DataFrame.
  *
  */
-class FileDataLoader : public Loader
+class MORPHEUS_EXPORT FileDataLoader : public Loader
 {
   public:
     ~FileDataLoader() = default;
@@ -41,5 +42,5 @@ class FileDataLoader : public Loader
 
     std::shared_ptr<ControlMessage> load(std::shared_ptr<ControlMessage> message, nlohmann::json task) final;
 };
-#pragma GCC visibility pop
+
 }  // namespace morpheus
\ No newline at end of file
diff --git a/morpheus/_lib/include/morpheus/io/loaders/grpc.hpp b/morpheus/_lib/include/morpheus/io/loaders/grpc.hpp
index 1bfceab839..571b6ddf96 100644
--- a/morpheus/_lib/include/morpheus/io/loaders/grpc.hpp
+++ b/morpheus/_lib/include/morpheus/io/loaders/grpc.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/io/data_loader.hpp"
 #include "morpheus/messages/control.hpp"
 
@@ -25,12 +26,12 @@
 #include <memory>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 /**
  * @brief Very simple raw data loader that takes payload data on the control message and returns it
  *
  */
-class GRPCDataLoader : public Loader
+class MORPHEUS_EXPORT GRPCDataLoader : public Loader
 {
   public:
     ~GRPCDataLoader() = default;
@@ -40,5 +41,5 @@ class GRPCDataLoader : public Loader
 
     std::shared_ptr<ControlMessage> load(std::shared_ptr<ControlMessage> message, nlohmann::json task) final;
 };
-#pragma GCC visibility pop
+
 }  // namespace morpheus
\ No newline at end of file
diff --git a/morpheus/_lib/include/morpheus/io/loaders/lambda.hpp b/morpheus/_lib/include/morpheus/io/loaders/lambda.hpp
index 4280cdacf6..7fbb028cdf 100644
--- a/morpheus/_lib/include/morpheus/io/loaders/lambda.hpp
+++ b/morpheus/_lib/include/morpheus/io/loaders/lambda.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/io/data_loader.hpp"
 #include "morpheus/messages/control.hpp"
 
@@ -26,12 +27,12 @@
 #include <memory>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 /**
  * @brief Very simple raw data loader that takes payload data on the control message and returns it
  *
  */
-class LambdaLoader : public Loader
+class MORPHEUS_EXPORT LambdaLoader : public Loader
 {
   public:
     ~LambdaLoader() = default;
@@ -46,5 +47,5 @@ class LambdaLoader : public Loader
   private:
     std::function<std::shared_ptr<ControlMessage>(std::shared_ptr<ControlMessage>, nlohmann::json)> m_lambda_load;
 };
-#pragma GCC visibility pop
+
 }  // namespace morpheus
\ No newline at end of file
diff --git a/morpheus/_lib/include/morpheus/io/loaders/payload.hpp b/morpheus/_lib/include/morpheus/io/loaders/payload.hpp
index 7ba30636c8..7344d00f06 100644
--- a/morpheus/_lib/include/morpheus/io/loaders/payload.hpp
+++ b/morpheus/_lib/include/morpheus/io/loaders/payload.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/io/data_loader.hpp"
 #include "morpheus/messages/control.hpp"
 
@@ -25,12 +26,12 @@
 #include <memory>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 /**
  * @brief Very simple raw data loader that takes payload data on the control message and returns it
  *
  */
-class PayloadDataLoader : public Loader
+class MORPHEUS_EXPORT PayloadDataLoader : public Loader
 {
   public:
     ~PayloadDataLoader() = default;
@@ -40,5 +41,5 @@ class PayloadDataLoader : public Loader
 
     std::shared_ptr<ControlMessage> load(std::shared_ptr<ControlMessage> control_message, nlohmann::json task) final;
 };
-#pragma GCC visibility pop
+
 }  // namespace morpheus
\ No newline at end of file
diff --git a/morpheus/_lib/include/morpheus/io/loaders/rest.hpp b/morpheus/_lib/include/morpheus/io/loaders/rest.hpp
index 06363b4fc2..6075c21d55 100644
--- a/morpheus/_lib/include/morpheus/io/loaders/rest.hpp
+++ b/morpheus/_lib/include/morpheus/io/loaders/rest.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/io/data_loader.hpp"
 #include "morpheus/messages/control.hpp"
 
@@ -25,12 +26,12 @@
 #include <memory>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 /**
  * @brief Very simple raw data loader that takes payload data on the control message and returns it
  *
  */
-class RESTDataLoader : public Loader
+class MORPHEUS_EXPORT RESTDataLoader : public Loader
 {
   public:
     ~RESTDataLoader() = default;
@@ -40,5 +41,5 @@ class RESTDataLoader : public Loader
 
     std::shared_ptr<ControlMessage> load(std::shared_ptr<ControlMessage> message, nlohmann::json task) final;
 };
-#pragma GCC visibility pop
+
 }  // namespace morpheus
\ No newline at end of file
diff --git a/morpheus/_lib/include/morpheus/io/serializers.hpp b/morpheus/_lib/include/morpheus/io/serializers.hpp
index 031a7d02e1..a32199192c 100644
--- a/morpheus/_lib/include/morpheus/io/serializers.hpp
+++ b/morpheus/_lib/include/morpheus/io/serializers.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/objects/file_types.hpp"
 #include "morpheus/objects/table_info.hpp"
 #include "morpheus/utilities/string_util.hpp"
@@ -30,7 +31,7 @@
 #include <utility>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 /**
  * @addtogroup IO
  * @{
@@ -46,11 +47,11 @@ namespace morpheus {
  * @param include_index_col : Determines whether or not to include the dataframe index
  * @param flush : When `true` flush `out_stream`.
  */
-void df_to_csv(const TableInfo& tbl,
-               std::ostream& out_stream,
-               bool include_header,
-               bool include_index_col = true,
-               bool flush             = false);
+void MORPHEUS_EXPORT df_to_csv(const TableInfo& tbl,
+                               std::ostream& out_stream,
+                               bool include_header,
+                               bool include_index_col = true,
+                               bool flush             = false);
 
 /**
  * @brief Serialize a dataframe to an output stream in JSON format
@@ -60,7 +61,7 @@ void df_to_csv(const TableInfo& tbl,
  * @param include_index_col : Determines whether or not to include the dataframe index
  * @return std::string
  */
-std::string df_to_csv(const TableInfo& tbl, bool include_header, bool include_index_col = true);
+std::string MORPHEUS_EXPORT df_to_csv(const TableInfo& tbl, bool include_header, bool include_index_col = true);
 
 /**
  * @brief Serialize a dataframe into a JSON formatted string
@@ -69,7 +70,10 @@ std::string df_to_csv(const TableInfo& tbl, bool include_header, bool include_in
  * @param include_index_col : Determines whether or not to include the dataframe index
  * @param flush : When `true` flush `out_stream`.
  */
-void df_to_json(const TableInfo& tbl, std::ostream& out_stream, bool include_index_col = true, bool flush = false);
+void MORPHEUS_EXPORT df_to_json(const TableInfo& tbl,
+                                std::ostream& out_stream,
+                                bool include_index_col = true,
+                                bool flush             = false);
 
 /**
  * @brief Serialize a dataframe into a JSON formatted string
@@ -80,7 +84,7 @@ void df_to_json(const TableInfo& tbl, std::ostream& out_stream, bool include_ind
  * Note the include_index_col is currently being ignored in both versions of `df_to_json` due to a known issue in
  * Pandas: https://github.com/pandas-dev/pandas/issues/37600
  */
-std::string df_to_json(const TableInfo& tbl, bool include_index_col = true);
+std::string MORPHEUS_EXPORT df_to_json(const TableInfo& tbl, bool include_index_col = true);
 
 /**
  * @brief Serialize a dataframe to an output stream in Parquet format
@@ -91,11 +95,11 @@ std::string df_to_json(const TableInfo& tbl, bool include_index_col = true);
  * @param include_index_col : Determines whether or not to include the dataframe index
  * @param flush : When `true` flush `out_stream`.
  */
-void df_to_parquet(const TableInfo& tbl,
-                   std::ostream& out_stream,
-                   bool include_header,
-                   bool include_index_col = true,
-                   bool flush             = false);
+void MORPHEUS_EXPORT df_to_parquet(const TableInfo& tbl,
+                                   std::ostream& out_stream,
+                                   bool include_header,
+                                   bool include_index_col = true,
+                                   bool flush             = false);
 
 /**
  * @brief Serialize a dataframe to an output stream in Parquet format
@@ -105,7 +109,7 @@ void df_to_parquet(const TableInfo& tbl,
  * @param include_index_col : Determines whether or not to include the dataframe index
  * @return std::string
  */
-std::string df_to_parquet(const TableInfo& tbl, bool include_header, bool include_index_col = true);
+std::string MORPHEUS_EXPORT df_to_parquet(const TableInfo& tbl, bool include_header, bool include_index_col = true);
 
 /**
  * @brief Loads a cudf table from a CSV, JSON or Parquet file returning the DataFrame as a Python object
@@ -114,10 +118,10 @@ std::string df_to_parquet(const TableInfo& tbl, bool include_header, bool includ
  * @return pybind11::object
  */
 template <typename... ArgsT>
-void write_df_to_file(const TableInfo& tbl,
-                      const std::string& filename,
-                      FileTypes file_type = FileTypes::Auto,
-                      ArgsT&&... args)
+void MORPHEUS_EXPORT write_df_to_file(const TableInfo& tbl,
+                                      const std::string& filename,
+                                      FileTypes file_type = FileTypes::Auto,
+                                      ArgsT&&... args)
 {
     if (file_type == FileTypes::Auto)
     {
@@ -148,10 +152,10 @@ void write_df_to_file(const TableInfo& tbl,
 }
 
 template <typename... ArgsT>
-void write_df_to_file(const MutableTableInfo& tbl,
-                      const std::string& filename,
-                      FileTypes file_type = FileTypes::Auto,
-                      ArgsT&&... args)
+void MORPHEUS_EXPORT write_df_to_file(const MutableTableInfo& tbl,
+                                      const std::string& filename,
+                                      FileTypes file_type = FileTypes::Auto,
+                                      ArgsT&&... args)
 {
     if (file_type == FileTypes::Auto)
     {
@@ -181,14 +185,12 @@ void write_df_to_file(const MutableTableInfo& tbl,
     }
 }
 
-struct SerializersProxy
+struct MORPHEUS_EXPORT SerializersProxy
 {
     static void write_df_to_file(pybind11::object df,
                                  std::string filename,
                                  FileTypes file_type,
                                  const pybind11::kwargs& kwargs);
 };
-
 /** @} */  // end of group
-#pragma GCC visibility pop
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/control.hpp b/morpheus/_lib/include/morpheus/messages/control.hpp
index 2eb45dea7e..6a96f844f8 100644
--- a/morpheus/_lib/include/morpheus/messages/control.hpp
+++ b/morpheus/_lib/include/morpheus/messages/control.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"           // for exporting symbols
 #include "morpheus/messages/meta.hpp"  // for MessageMeta
 
 #include <nlohmann/json.hpp>   // for json, basic_json
@@ -31,8 +32,7 @@
 
 namespace morpheus {
 
-#pragma GCC visibility push(default)
-enum class ControlMessageType
+enum class MORPHEUS_EXPORT ControlMessageType
 {
     NONE,
     INFERENCE,
@@ -161,7 +161,7 @@ enum class ControlMessageType
 //     std::shared_ptr<TensorMemory> m_tensors;
 // };
 
-class TensorMemory;
+class MORPHEUS_EXPORT TensorMemory;
 
 // System-clock for better compatibility with pybind11/chrono
 using time_point_t = std::chrono::time_point<std::chrono::system_clock>;
@@ -173,7 +173,7 @@ using time_point_t = std::chrono::time_point<std::chrono::system_clock>;
  * pointer to an associated message payload. It provides methods for accessing and modifying these
  * elements of the control message.
  */
-class ControlMessage
+class MORPHEUS_EXPORT ControlMessage
 {
   public:
     ControlMessage();
@@ -378,7 +378,7 @@ class ControlMessage
     std::map<std::string, time_point_t> m_timestamps{};
 };
 
-struct ControlMessageProxy
+struct MORPHEUS_EXPORT ControlMessageProxy
 {
     /**
      * @brief Creates a new ControlMessage instance from a configuration dictionary.
@@ -513,5 +513,4 @@ struct ControlMessageProxy
     static pybind11::dict filter_timestamp(ControlMessage& self, const std::string& regex_filter);
 };
 
-#pragma GCC visibility pop
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp
index 70ab9c538a..5e6c7d5acc 100644
--- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp
+++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/types.hpp"  // for TensorMap
 
@@ -32,13 +33,12 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief This is a base container class for data that will be used for inference stages. This class is designed to
     hold generic data as a `TensorObject`s
  *
  */
-class InferenceMemory : public TensorMemory
+class MORPHEUS_EXPORT InferenceMemory : public TensorMemory
 {
   public:
     /**
@@ -69,7 +69,7 @@ class InferenceMemory : public TensorMemory
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct InferenceMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
+struct MORPHEUS_EXPORT InferenceMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize a InferenceMemory object, and return a shared pointer to the result. Each array in
@@ -81,7 +81,5 @@ struct InferenceMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
      */
     static std::shared_ptr<InferenceMemory> init(TensorIndex count, pybind11::object& tensors);
 };
-#pragma GCC visibility pop
-
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp
index 0fb7b2882b..711db7901d 100644
--- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp
+++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp
@@ -86,11 +86,10 @@ class MORPHEUS_EXPORT InferenceMemoryFIL : public InferenceMemory
 };
 
 /****** InferenceMemoryFILInterfaceProxy *************************/
-#pragma GCC visibility push(default)
 /**
  * @brief Interface proxy, used to insulate python bindings
  */
-struct InferenceMemoryFILInterfaceProxy : public InferenceMemoryInterfaceProxy
+struct MORPHEUS_EXPORT InferenceMemoryFILInterfaceProxy : public InferenceMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize an InferenceMemoryFIL object, and return a shared pointer to the result
@@ -137,7 +136,5 @@ struct InferenceMemoryFILInterfaceProxy : public InferenceMemoryInterfaceProxy
      */
     static void set_seq_ids(InferenceMemoryFIL& self, pybind11::object cupy_values);
 };
-#pragma GCC visibility pop
-
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp
index 156c8f091f..3da20ad827 100644
--- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp
+++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/inference_memory.hpp"
 #include "morpheus/objects/tensor_object.hpp"
 #include "morpheus/types.hpp"  // for TensorIndex
@@ -39,7 +40,7 @@ namespace morpheus {
     usecases.
  *
  */
-class InferenceMemoryNLP : public InferenceMemory
+class MORPHEUS_EXPORT InferenceMemoryNLP : public InferenceMemory
 {
   public:
     /**
@@ -103,11 +104,11 @@ class InferenceMemoryNLP : public InferenceMemory
 };
 
 /****** InferenceMemoryNLPInterfaceProxy********************/
-#pragma GCC visibility push(default)
+
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct InferenceMemoryNLPInterfaceProxy : public InferenceMemoryInterfaceProxy
+struct MORPHEUS_EXPORT InferenceMemoryNLPInterfaceProxy : public InferenceMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize an InferenceMemoryNLP object, and return a shared pointer to the result
@@ -175,7 +176,5 @@ struct InferenceMemoryNLPInterfaceProxy : public InferenceMemoryInterfaceProxy
      */
     static void set_seq_ids(InferenceMemoryNLP& self, pybind11::object cupy_values);
 };
-#pragma GCC visibility pop
-
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp
index 69f243a2db..8a626cacff 100644
--- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp
+++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/types.hpp"  // for TensorMap
 
@@ -38,7 +39,7 @@ namespace morpheus {
  * @brief Output memory block holding the results of inference
  *
  */
-class ResponseMemory : public TensorMemory
+class MORPHEUS_EXPORT ResponseMemory : public TensorMemory
 {
   public:
     /**
@@ -66,12 +67,12 @@ class ResponseMemory : public TensorMemory
 };
 
 /****** ResponseMemoryInterfaceProxy *************************/
-#pragma GCC visibility push(default)
+
 /**
  * @brief Interface proxy, used to insulate python bindings.
  *
  */
-struct ResponseMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
+struct MORPHEUS_EXPORT ResponseMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize a ResponseMemory object, and return a shared pointer to the result. Each array in
@@ -83,7 +84,5 @@ struct ResponseMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
      */
     static std::shared_ptr<ResponseMemory> init(TensorIndex count, pybind11::object& tensors);
 };
-#pragma GCC visibility pop
-
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp
index f5f105fe4c..589c8020c5 100644
--- a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp
+++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/response_memory.hpp"
 #include "morpheus/objects/tensor_object.hpp"
 #include "morpheus/types.hpp"  // for TensorMap
@@ -39,7 +40,7 @@ namespace morpheus {
  * @brief Output memory block containing the inference response probabilities.
  *
  */
-class ResponseMemoryProbs : public ResponseMemory
+class MORPHEUS_EXPORT ResponseMemoryProbs : public ResponseMemory
 {
   public:
     /**
@@ -75,11 +76,11 @@ class ResponseMemoryProbs : public ResponseMemory
 };
 
 /****** ResponseMemoryProbsInterfaceProxy*******************/
-#pragma GCC visibility push(default)
+
 /**
  * @brief Interface proxy, used to insulate python bindings
  */
-struct ResponseMemoryProbsInterfaceProxy : public ResponseMemoryInterfaceProxy
+struct MORPHEUS_EXPORT ResponseMemoryProbsInterfaceProxy : public ResponseMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize a ResponseMemoryProbs object, and return a shared pointer to the result
@@ -107,7 +108,6 @@ struct ResponseMemoryProbsInterfaceProxy : public ResponseMemoryInterfaceProxy
      */
     static void set_probs(ResponseMemoryProbs& self, pybind11::object cupy_values);
 };
-#pragma GCC visibility pop
 
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp
index bdee9f4397..91b55a6603 100644
--- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp
+++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"                   // for exporting symbols
 #include "morpheus/objects/tensor_object.hpp"  // for TensorObject
 #include "morpheus/types.hpp"                  // for TensorMap, TensorIndex
 #include "morpheus/utilities/cupy_util.hpp"    // for CupyUtil
@@ -37,13 +38,12 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief Container for holding a collection of named `TensorObject`s in a `std::map` keyed by name.
  * Base class for `InferenceMemory` & `ResponseMemory`
  *
  */
-class TensorMemory
+class MORPHEUS_EXPORT TensorMemory
 {
   public:
     /**
@@ -157,7 +157,7 @@ class TensorMemory
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct TensorMemoryInterfaceProxy
+struct MORPHEUS_EXPORT TensorMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize a TensorMemory object, and return a shared pointer to the result. Each array in
@@ -236,7 +236,5 @@ struct TensorMemoryInterfaceProxy
      */
     static void set_tensor(TensorMemory& self, const std::string name, const pybind11::object& cupy_tensor);
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/meta.hpp b/morpheus/_lib/include/morpheus/messages/meta.hpp
index 11439b7e10..041185c14e 100644
--- a/morpheus/_lib/include/morpheus/messages/meta.hpp
+++ b/morpheus/_lib/include/morpheus/messages/meta.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/objects/data_table.hpp"  // for IDataTable
 #include "morpheus/objects/table_info.hpp"
 #include "morpheus/objects/tensor_object.hpp"
@@ -32,7 +33,6 @@
 
 namespace morpheus {
 
-#pragma GCC visibility push(default)
 /****** Component public implementations ******************/
 /****** MessageMeta****************************************/
 
@@ -42,14 +42,14 @@ namespace morpheus {
  * @file
  */
 
-class MutableTableCtxMgr;
+class MORPHEUS_EXPORT MutableTableCtxMgr;
 
 /**
  * @brief Container for class holding a data table, in practice a cudf DataFrame, with the ability to return both
  * Python and C++ representations of the table
  *
  */
-class MessageMeta
+class MORPHEUS_EXPORT MessageMeta
 {
   public:
     /**
@@ -160,7 +160,7 @@ class MessageMeta
  * to filter columns without copying the entire DataFrame
  *
  */
-class SlicedMessageMeta : public MessageMeta
+class MORPHEUS_EXPORT SlicedMessageMeta : public MessageMeta
 {
   public:
     SlicedMessageMeta(std::shared_ptr<MessageMeta> other,
@@ -187,7 +187,7 @@ class SlicedMessageMeta : public MessageMeta
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct MessageMetaInterfaceProxy
+struct MORPHEUS_EXPORT MessageMetaInterfaceProxy
 {
     /**
      * @brief Initialize MessageMeta cpp object with the given filename
@@ -298,7 +298,5 @@ struct MessageMetaInterfaceProxy
      */
     static std::optional<std::string> ensure_sliceable_index(MessageMeta& self);
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/multi.hpp b/morpheus/_lib/include/morpheus/messages/multi.hpp
index b4cf6a05a9..b4ba86b39a 100644
--- a/morpheus/_lib/include/morpheus/messages/multi.hpp
+++ b/morpheus/_lib/include/morpheus/messages/multi.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/objects/table_info.hpp"
 #include "morpheus/objects/tensor_object.hpp"
@@ -40,9 +41,7 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
-
-class MultiMessage;
+class MORPHEUS_EXPORT MultiMessage;
 
 /**
  * @brief All classes that are derived from MultiMessage should use this class. It will automatically add the
@@ -56,7 +55,7 @@ class MultiMessage;
  *         DerivedMultiMessage<MyDerivedMultiMessage, MyBaseMultiMessage>` shoud be used.
  */
 template <typename DerivedT, typename... BasesT>
-class DerivedMultiMessage : public BasesT...
+class MORPHEUS_EXPORT DerivedMultiMessage : public BasesT...
 {
   public:
     virtual ~DerivedMultiMessage() = default;
@@ -138,7 +137,7 @@ class DerivedMultiMessage : public BasesT...
 
 // Single base class version. Should be the version used by default
 template <typename DerivedT, typename BaseT>
-class DerivedMultiMessage<DerivedT, BaseT> : public BaseT
+class MORPHEUS_EXPORT DerivedMultiMessage<DerivedT, BaseT> : public BaseT
 {
   public:
     using BaseT::BaseT;
@@ -188,7 +187,7 @@ class DerivedMultiMessage<DerivedT, BaseT> : public BaseT
 
 // No base class version. This should only be used by `MultiMessage` itself.
 template <typename DerivedT>
-class DerivedMultiMessage<DerivedT>
+class MORPHEUS_EXPORT DerivedMultiMessage<DerivedT>
 {
   public:
     virtual ~DerivedMultiMessage() = default;
@@ -236,7 +235,7 @@ class DerivedMultiMessage<DerivedT>
  slicing operations, it holds a reference to a batched metadata object and stores the offset and count into that batch.
  *
  */
-class MultiMessage : public DerivedMultiMessage<MultiMessage>
+class MORPHEUS_EXPORT MultiMessage : public DerivedMultiMessage<MultiMessage>
 {
   public:
     /**
@@ -329,7 +328,7 @@ class MultiMessage : public DerivedMultiMessage<MultiMessage>
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct MultiMessageInterfaceProxy
+struct MORPHEUS_EXPORT MultiMessageInterfaceProxy
 {
     /**
      * TODO(Documentation)
@@ -389,7 +388,5 @@ struct MultiMessageInterfaceProxy
                                                      const std::vector<RangeType>& ranges,
                                                      pybind11::object num_selected_rows);
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp
index 9b89997ef9..bbd93785e3 100644
--- a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp
+++ b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/messages/multi.hpp"
@@ -42,8 +43,8 @@ namespace morpheus {
  * of the data contained within it. Builds on top of the `MultiInferenceMessage` and `MultiTensorMessage` class
  * to add additional data for inferencing.
  */
-#pragma GCC visibility push(default)
-class MultiInferenceMessage : public DerivedMultiMessage<MultiInferenceMessage, MultiTensorMessage>
+
+class MORPHEUS_EXPORT MultiInferenceMessage : public DerivedMultiMessage<MultiInferenceMessage, MultiTensorMessage>
 {
   public:
     /**
@@ -98,7 +99,7 @@ class MultiInferenceMessage : public DerivedMultiMessage<MultiInferenceMessage,
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct MultiInferenceMessageInterfaceProxy : public MultiTensorMessageInterfaceProxy
+struct MORPHEUS_EXPORT MultiInferenceMessageInterfaceProxy : public MultiTensorMessageInterfaceProxy
 {
     /**
      * @brief Create and initialize a MultiInferenceMessage object, and return a shared pointer to the result
@@ -121,6 +122,5 @@ struct MultiInferenceMessageInterfaceProxy : public MultiTensorMessageInterfaceP
                                                        TensorIndex count,
                                                        std::string id_tensor_name);
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp
index c0ee61275f..9908ec0ea6 100644
--- a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp
+++ b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/messages/meta.hpp"  // for MessageMeta
 #include "morpheus/messages/multi.hpp"
@@ -44,8 +45,9 @@ namespace morpheus {
  * proper inputs are set and eases debugging.
  *
  */
-#pragma GCC visibility push(default)
-class MultiInferenceFILMessage : public DerivedMultiMessage<MultiInferenceFILMessage, MultiInferenceMessage>
+
+class MORPHEUS_EXPORT MultiInferenceFILMessage
+  : public DerivedMultiMessage<MultiInferenceFILMessage, MultiInferenceMessage>
 {
   public:
     /**
@@ -105,7 +107,7 @@ class MultiInferenceFILMessage : public DerivedMultiMessage<MultiInferenceFILMes
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct MultiInferenceFILMessageInterfaceProxy : public MultiInferenceMessageInterfaceProxy
+struct MORPHEUS_EXPORT MultiInferenceFILMessageInterfaceProxy : public MultiInferenceMessageInterfaceProxy
 {
     /**
      * @brief Create and initialize a MultiInferenceFILMessage, and return a shared pointer to the result
@@ -146,6 +148,5 @@ struct MultiInferenceFILMessageInterfaceProxy : public MultiInferenceMessageInte
      */
     static pybind11::object seq_ids(MultiInferenceFILMessage& self);
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp
index a3d24d8aa2..c4936f9d68 100644
--- a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp
+++ b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/messages/meta.hpp"  // for MessageMeta
 #include "morpheus/messages/multi.hpp"
@@ -39,13 +40,13 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * A stronger typed version of `MultiInferenceMessage` that is used for NLP workloads. Helps ensure the
  * proper inputs are set and eases debugging.
  *
  */
-class MultiInferenceNLPMessage : public DerivedMultiMessage<MultiInferenceNLPMessage, MultiInferenceMessage>
+class MORPHEUS_EXPORT MultiInferenceNLPMessage
+  : public DerivedMultiMessage<MultiInferenceNLPMessage, MultiInferenceMessage>
 {
   public:
     /**
@@ -121,7 +122,7 @@ class MultiInferenceNLPMessage : public DerivedMultiMessage<MultiInferenceNLPMes
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct MultiInferenceNLPMessageInterfaceProxy : public MultiInferenceMessageInterfaceProxy
+struct MORPHEUS_EXPORT MultiInferenceNLPMessageInterfaceProxy : public MultiInferenceMessageInterfaceProxy
 {
     /**
      * @brief Create and initialize a MultiInferenceNLPMessage, and return a shared pointer to the result
@@ -171,5 +172,4 @@ struct MultiInferenceNLPMessageInterfaceProxy : public MultiInferenceMessageInte
      */
     static pybind11::object seq_ids(MultiInferenceNLPMessage& self);
 };
-#pragma GCC visibility pop
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/multi_response.hpp b/morpheus/_lib/include/morpheus/messages/multi_response.hpp
index c6765424f1..b3234e2408 100644
--- a/morpheus/_lib/include/morpheus/messages/multi_response.hpp
+++ b/morpheus/_lib/include/morpheus/messages/multi_response.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/messages/multi.hpp"
@@ -44,8 +45,8 @@ namespace morpheus {
  * from ResponseMemory.
  *
  */
-#pragma GCC visibility push(default)
-class MultiResponseMessage : public DerivedMultiMessage<MultiResponseMessage, MultiTensorMessage>
+
+class MORPHEUS_EXPORT MultiResponseMessage : public DerivedMultiMessage<MultiResponseMessage, MultiTensorMessage>
 {
   public:
     /**
@@ -116,7 +117,7 @@ class MultiResponseMessage : public DerivedMultiMessage<MultiResponseMessage, Mu
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct MultiResponseMessageInterfaceProxy : public MultiTensorMessageInterfaceProxy
+struct MORPHEUS_EXPORT MultiResponseMessageInterfaceProxy : public MultiTensorMessageInterfaceProxy
 {
     /**
      * @brief Create and initialize a MultiResponseMessage, and return a shared pointer to the result
@@ -175,6 +176,5 @@ struct MultiResponseMessageInterfaceProxy : public MultiTensorMessageInterfacePr
      */
     static pybind11::object get_probs_tensor(MultiResponseMessage& self);
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp
index 085eab6a6b..7449a1f5ee 100644
--- a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp
+++ b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/messages/multi.hpp"
@@ -44,8 +45,9 @@ namespace morpheus {
  * array. Helps ensure the proper outputs are set and eases debugging
  *
  */
-#pragma GCC visibility push(default)
-class MultiResponseProbsMessage : public DerivedMultiMessage<MultiResponseProbsMessage, MultiResponseMessage>
+
+class MORPHEUS_EXPORT MultiResponseProbsMessage
+  : public DerivedMultiMessage<MultiResponseProbsMessage, MultiResponseMessage>
 {
   public:
     /**
@@ -94,7 +96,7 @@ class MultiResponseProbsMessage : public DerivedMultiMessage<MultiResponseProbsM
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct MultiResponseProbsMessageInterfaceProxy : public MultiResponseMessageInterfaceProxy
+struct MORPHEUS_EXPORT MultiResponseProbsMessageInterfaceProxy : public MultiResponseMessageInterfaceProxy
 {
     /**
      * @brief Create and initialize a MultiResponseProbsMessage object, and return a shared pointer to the result
@@ -128,6 +130,5 @@ struct MultiResponseProbsMessageInterfaceProxy : public MultiResponseMessageInte
      */
     static pybind11::object probs(MultiResponseProbsMessage& self);
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp
index 30de97cb17..67ce442163 100644
--- a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp
+++ b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/messages/multi.hpp"
@@ -30,7 +31,6 @@
 #include <vector>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
 
 /****** MultiTensorMessage*******************************/
 
@@ -53,7 +53,7 @@ namespace morpheus {
  * to the same record
  *
  */
-class MultiTensorMessage : public DerivedMultiMessage<MultiTensorMessage, MultiMessage>
+class MORPHEUS_EXPORT MultiTensorMessage : public DerivedMultiMessage<MultiTensorMessage, MultiMessage>
 {
   public:
     /**
@@ -138,7 +138,7 @@ class MultiTensorMessage : public DerivedMultiMessage<MultiTensorMessage, MultiM
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct MultiTensorMessageInterfaceProxy
+struct MORPHEUS_EXPORT MultiTensorMessageInterfaceProxy
 {
     /**
      * @brief Create and initialize a MultiTensorMessage, and return a shared pointer to the result
@@ -228,7 +228,5 @@ struct MultiTensorMessageInterfaceProxy
      */
     static pybind11::object get_tensor_property(MultiTensorMessage& self, const std::string name);
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/modules/data_loader_module.hpp b/morpheus/_lib/include/morpheus/modules/data_loader_module.hpp
index 29b5793099..046e78a0ac 100644
--- a/morpheus/_lib/include/morpheus/modules/data_loader_module.hpp
+++ b/morpheus/_lib/include/morpheus/modules/data_loader_module.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/io/data_loader.hpp"
 
 #include <mrc/modules/properties/persistent.hpp>
@@ -26,8 +27,8 @@
 #include <string>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
-class DataLoaderModule : public mrc::modules::SegmentModule, public mrc::modules::PersistentModule
+
+class MORPHEUS_EXPORT DataLoaderModule : public mrc::modules::SegmentModule, public mrc::modules::PersistentModule
 {
     using type_t = DataLoaderModule;
 
@@ -46,5 +47,4 @@ class DataLoaderModule : public mrc::modules::SegmentModule, public mrc::modules
 
     DataLoader m_data_loader{};
 };
-#pragma GCC visibility pop
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/dtype.hpp b/morpheus/_lib/include/morpheus/objects/dtype.hpp
index 2297460b52..9d4b67d25c 100644
--- a/morpheus/_lib/include/morpheus/objects/dtype.hpp
+++ b/morpheus/_lib/include/morpheus/objects/dtype.hpp
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include "morpheus/export.h"
+
 #include <cudf/types.hpp>
 
 #include <climits>  // for CHAR_BIT
@@ -47,12 +49,10 @@ constexpr std::size_t size_in_bits()
     return sizeof(T) * CHAR_BIT;
 }
 
-#pragma GCC visibility push(default)
-
 /**
  * @brief Enum class for representing data types used in Tensors and DataFrame columns.
  */
-enum class TypeId : int32_t
+enum class MORPHEUS_EXPORT TypeId : int32_t
 {
     EMPTY,    ///< Always null with no underlying data
     INT8,     ///< 1 byte signed integer
@@ -92,7 +92,7 @@ enum class TypeId : int32_t
  * @class DType
  * @brief This class represents a data type specified by a TypeId.
  */
-struct DType
+struct MORPHEUS_EXPORT DType
 {
     /**
      * @brief Construct a DType for a given type specified by a TypeId.
@@ -260,7 +260,5 @@ struct DType
 
     TypeId m_type_id;
 };
-
 /** @} */  // end of group
-#pragma GCC visibility pop
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/factory_registry.hpp b/morpheus/_lib/include/morpheus/objects/factory_registry.hpp
index 15f7d0ff05..df33de7b51 100644
--- a/morpheus/_lib/include/morpheus/objects/factory_registry.hpp
+++ b/morpheus/_lib/include/morpheus/objects/factory_registry.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/io/data_loader.hpp"
 
 #include <glog/logging.h>
@@ -31,9 +32,9 @@
 #include <string>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 template <typename ObjectReturnTypeT>
-class FactoryRegistry
+class MORPHEUS_EXPORT FactoryRegistry
 {
   public:
     static bool contains(const std::string& name)
@@ -112,6 +113,4 @@ template <typename ObjectReturnTypeT>
 std::map<std::string, std::function<std::shared_ptr<ObjectReturnTypeT>(nlohmann::json)>>
     FactoryRegistry<ObjectReturnTypeT>::m_object_constructors;
 
-#pragma GCC visibility pop
-
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/fiber_queue.hpp b/morpheus/_lib/include/morpheus/objects/fiber_queue.hpp
index f64f0a9928..1214b1a35f 100644
--- a/morpheus/_lib/include/morpheus/objects/fiber_queue.hpp
+++ b/morpheus/_lib/include/morpheus/objects/fiber_queue.hpp
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include "morpheus/export.h"
+
 #include <boost/fiber/buffered_channel.hpp>
 #include <boost/fiber/channel_op_status.hpp>
 #include <pybind11/pybind11.h>  // IWYU pragma: keep
@@ -40,7 +42,7 @@ namespace morpheus {
  * order
  *
  */
-class FiberQueue
+class MORPHEUS_EXPORT FiberQueue
 {
   public:
     FiberQueue(std::size_t max_size);
@@ -84,12 +86,11 @@ class FiberQueue
     boost::fibers::buffered_channel<pybind11::object> m_queue;
 };
 
-#pragma GCC visibility push(default)
 /****** FiberQueueInterfaceProxy *************************/
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct FiberQueueInterfaceProxy
+struct MORPHEUS_EXPORT FiberQueueInterfaceProxy
 {
     /**
      * @brief Create and initialize a FIberQueue, and return a shared pointer to the result
@@ -123,6 +124,6 @@ struct FiberQueueInterfaceProxy
                      const pybind11::object& value,
                      const pybind11::object& traceback);
 };
-#pragma GCC visibility pop
+
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/file_types.hpp b/morpheus/_lib/include/morpheus/objects/file_types.hpp
index 329ade91ef..6287a83b0b 100644
--- a/morpheus/_lib/include/morpheus/objects/file_types.hpp
+++ b/morpheus/_lib/include/morpheus/objects/file_types.hpp
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include "morpheus/export.h"
+
 #include <cstdint>
 #include <filesystem>  // for path
 #include <ostream>
@@ -31,8 +33,7 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
-enum class FileTypes : int32_t
+enum class MORPHEUS_EXPORT FileTypes : int32_t
 {
     Auto,
     JSON,
@@ -46,7 +47,7 @@ enum class FileTypes : int32_t
  * @param f
  * @return std::string
  */
-inline std::string filetypes_to_str(const FileTypes& f)
+inline std::string MORPHEUS_EXPORT filetypes_to_str(const FileTypes& f)
 {
     switch (f)
     {
@@ -83,7 +84,7 @@ static inline std::ostream& operator<<(std::ostream& os, const FileTypes& f)
  * @param filename String to a file. Does not need to exist
  * @return FileTypes
  */
-FileTypes determine_file_type(const std::string& filename);
+FileTypes MORPHEUS_EXPORT determine_file_type(const std::string& filename);
 
 /**
  * @brief Determines the file type from a filename based on extension. For example, my_file.json would return
@@ -92,9 +93,6 @@ FileTypes determine_file_type(const std::string& filename);
  * @param filename path to a file. Does not need to exist
  * @return FileTypes
  */
-FileTypes determine_file_type(const std::filesystem::path& filename);
-
-#pragma GCC visibility pop
-
+FileTypes MORPHEUS_EXPORT determine_file_type(const std::filesystem::path& filename);
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/filter_source.hpp b/morpheus/_lib/include/morpheus/objects/filter_source.hpp
index 7034b0c224..5560fa922b 100644
--- a/morpheus/_lib/include/morpheus/objects/filter_source.hpp
+++ b/morpheus/_lib/include/morpheus/objects/filter_source.hpp
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include "morpheus/export.h"
+
 #include <cstdint>
 #include <string>
 
@@ -27,16 +29,11 @@ namespace morpheus {
  * @{
  * @file
  */
-
-#pragma GCC visibility push(default)
-enum class FilterSource : int32_t
+enum class MORPHEUS_EXPORT FilterSource : int32_t
 {
     Auto,
     TENSOR,
     DATAFRAME
 };
-
-#pragma GCC visibility pop
-
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/mutable_table_ctx_mgr.hpp b/morpheus/_lib/include/morpheus/objects/mutable_table_ctx_mgr.hpp
index 42bda09738..58dd685ff3 100644
--- a/morpheus/_lib/include/morpheus/objects/mutable_table_ctx_mgr.hpp
+++ b/morpheus/_lib/include/morpheus/objects/mutable_table_ctx_mgr.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/objects/table_info.hpp"
 
@@ -31,8 +32,7 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
-class MutableTableCtxMgr
+class MORPHEUS_EXPORT MutableTableCtxMgr
 {
   public:
     MutableTableCtxMgr(MessageMeta& meta_msg);
@@ -47,7 +47,5 @@ class MutableTableCtxMgr
     std::unique_ptr<MutableTableInfo> m_table;
     std::unique_ptr<pybind11::object> m_py_table;
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/rmm_tensor.hpp b/morpheus/_lib/include/morpheus/objects/rmm_tensor.hpp
index 48e18fe045..013d598aaf 100644
--- a/morpheus/_lib/include/morpheus/objects/rmm_tensor.hpp
+++ b/morpheus/_lib/include/morpheus/objects/rmm_tensor.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/objects/dtype.hpp"  // for DType
 #include "morpheus/objects/memory_descriptor.hpp"
 #include "morpheus/objects/tensor_object.hpp"
@@ -29,7 +30,7 @@
 #include <vector>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 /****** Component public implementations *******************/
 /****** RMMTensor****************************************/
 
@@ -42,7 +43,7 @@ namespace morpheus {
 /**
  * TODO(Documentation)
  */
-class RMMTensor : public ITensor
+class MORPHEUS_EXPORT RMMTensor : public ITensor
 {
   public:
     RMMTensor(std::shared_ptr<rmm::device_buffer> device_buffer,
@@ -166,7 +167,5 @@ class RMMTensor : public ITensor
     ShapeType m_shape;
     ShapeType m_stride;
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/tensor.hpp b/morpheus/_lib/include/morpheus/objects/tensor.hpp
index 93f5fe3aba..c8907d174b 100644
--- a/morpheus/_lib/include/morpheus/objects/tensor.hpp
+++ b/morpheus/_lib/include/morpheus/objects/tensor.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/objects/dtype.hpp"
 #include "morpheus/objects/tensor_object.hpp"
 #include "morpheus/types.hpp"  // for ShapeType, TensorIndex, TensorSize
@@ -45,8 +46,7 @@ namespace morpheus {
  *
  */
 
-#pragma GCC visibility push(default)
-class Tensor
+class MORPHEUS_EXPORT Tensor
 {
   public:
     Tensor(std::shared_ptr<rmm::device_buffer> buffer,
@@ -92,7 +92,5 @@ class Tensor
     TensorSize m_offset;
     std::shared_ptr<rmm::device_buffer> m_device_buffer;
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/objects/wrapped_tensor.hpp b/morpheus/_lib/include/morpheus/objects/wrapped_tensor.hpp
index d035744c07..8997e617e0 100644
--- a/morpheus/_lib/include/morpheus/objects/wrapped_tensor.hpp
+++ b/morpheus/_lib/include/morpheus/objects/wrapped_tensor.hpp
@@ -18,6 +18,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/objects/tensor_object.hpp"
 
 #include <pybind11/pytypes.h>
@@ -33,16 +34,14 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct TensorObjectInterfaceProxy
+struct MORPHEUS_EXPORT TensorObjectInterfaceProxy
 {
     static pybind11::dict cuda_array_interface(TensorObject& self);
     static pybind11::object to_cupy(TensorObject& self);
     static TensorObject from_cupy(pybind11::object cupy_array);
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/add_classification.hpp b/morpheus/_lib/include/morpheus/stages/add_classification.hpp
index d37981c342..29f2151c83 100644
--- a/morpheus/_lib/include/morpheus/stages/add_classification.hpp
+++ b/morpheus/_lib/include/morpheus/stages/add_classification.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"                          // for exporting symbols
 #include "morpheus/messages/control.hpp"              // for ControlMessage
 #include "morpheus/messages/multi_response.hpp"       // for MultiResponseMessage
 #include "morpheus/stages/add_scores_stage_base.hpp"  // for AddScoresStageBase
@@ -41,13 +42,12 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief Add detected classifications to each message. Classification labels based on probabilities calculated in
  * inference stage. Label indexes will be looked up in the idx2label property.
  */
 template <typename InputT, typename OutputT>
-class AddClassificationsStage : public AddScoresStageBase<InputT, OutputT>
+class MORPHEUS_EXPORT AddClassificationsStage : public AddScoresStageBase<InputT, OutputT>
 {
   public:
     /**
@@ -68,7 +68,7 @@ using AddClassificationsStageCM =  // NOLINT(readability-identifier-naming)
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct AddClassificationStageInterfaceProxy
+struct MORPHEUS_EXPORT AddClassificationStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a AddClassificationStage that receives MultiResponseMessage and emits
@@ -103,7 +103,5 @@ struct AddClassificationStageInterfaceProxy
         std::map<std::size_t, std::string> idx2label,
         float threshold);
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/add_scores.hpp b/morpheus/_lib/include/morpheus/stages/add_scores.hpp
index df133606cb..38d799af01 100644
--- a/morpheus/_lib/include/morpheus/stages/add_scores.hpp
+++ b/morpheus/_lib/include/morpheus/stages/add_scores.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/control.hpp"              // for ControlMessage
 #include "morpheus/messages/multi_response.hpp"       // for MultiResponseMessage
 #include "morpheus/stages/add_scores_stage_base.hpp"  // for AddScoresStageBase
@@ -40,13 +41,12 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief Add probability scores to each message. Score labels based on probabilities calculated in inference stage.
  * Label indexes will be looked up in the idx2label property.
  */
 template <typename InputT, typename OutputT>
-class AddScoresStage : public AddScoresStageBase<InputT, OutputT>
+class MORPHEUS_EXPORT AddScoresStage : public AddScoresStageBase<InputT, OutputT>
 {
   public:
     /**
@@ -66,7 +66,7 @@ using AddScoresStageCM =  // NOLINT(readability-identifier-naming)
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct AddScoresStageInterfaceProxy
+struct MORPHEUS_EXPORT AddScoresStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a AddScoresStage that receives MultiResponseMessage and emits MultiResponseMessage,
@@ -95,6 +95,5 @@ struct AddScoresStageInterfaceProxy
         mrc::segment::Builder& builder, const std::string& name, std::map<std::size_t, std::string> idx2label);
 };
 
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/add_scores_stage_base.hpp b/morpheus/_lib/include/morpheus/stages/add_scores_stage_base.hpp
index da8dff1214..c7e5e69792 100644
--- a/morpheus/_lib/include/morpheus/stages/add_scores_stage_base.hpp
+++ b/morpheus/_lib/include/morpheus/stages/add_scores_stage_base.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/control.hpp"
 #include "morpheus/messages/multi_response.hpp"
 
@@ -43,12 +44,12 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief Base class for both `AddScoresStage` and `AddClassificationStage`
  */
 template <typename InputT, typename OutputT>
-class AddScoresStageBase : public mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>
+class MORPHEUS_EXPORT AddScoresStageBase
+  : public mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>;
@@ -84,6 +85,5 @@ using AddScoresStageBaseMM =  // NOLINT(readability-identifier-naming)
 using AddScoresStageBaseCM =  // NOLINT(readability-identifier-naming)
     AddScoresStageBase<ControlMessage, ControlMessage>;
 
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/deserialize.hpp b/morpheus/_lib/include/morpheus/stages/deserialize.hpp
index 9b93547132..2a7f62403f 100644
--- a/morpheus/_lib/include/morpheus/stages/deserialize.hpp
+++ b/morpheus/_lib/include/morpheus/stages/deserialize.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/control.hpp"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/messages/multi.hpp"
@@ -49,7 +50,6 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 using cm_task_t = std::pair<std::string, nlohmann::json>;
 
 void make_output_message(std::shared_ptr<MessageMeta>& incoming_message,
@@ -66,7 +66,8 @@ void make_output_message(std::shared_ptr<MessageMeta>& incoming_message,
 
 /****** DeserializationStage********************************/
 template <typename OutputT>
-class DeserializeStage : public mrc::pymrc::PythonNode<std::shared_ptr<MessageMeta>, std::shared_ptr<OutputT>>
+class MORPHEUS_EXPORT DeserializeStage
+  : public mrc::pymrc::PythonNode<std::shared_ptr<MessageMeta>, std::shared_ptr<OutputT>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<MessageMeta>, std::shared_ptr<OutputT>>;
@@ -101,7 +102,7 @@ class DeserializeStage : public mrc::pymrc::PythonNode<std::shared_ptr<MessageMe
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct DeserializeStageInterfaceProxy
+struct MORPHEUS_EXPORT DeserializeStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a DeserializationStage that emits MultiMessage's, and return the result
@@ -185,7 +186,5 @@ typename DeserializeStage<OutputT>::subscribe_fn_t DeserializeStage<OutputT>::bu
             }));
     };
 }
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/file_source.hpp b/morpheus/_lib/include/morpheus/stages/file_source.hpp
index 95ec2ebd64..9ab6707142 100644
--- a/morpheus/_lib/include/morpheus/stages/file_source.hpp
+++ b/morpheus/_lib/include/morpheus/stages/file_source.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/meta.hpp"
 
 #include <boost/fiber/context.hpp>
@@ -43,12 +44,11 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief Load messages from a file. Source stage is used to load messages from a file and
  * dumping the contents into the pipeline immediately. Useful for testing performance and accuracy of a pipeline.
  */
-class FileSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
+class MORPHEUS_EXPORT FileSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>;
@@ -82,7 +82,7 @@ class FileSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageM
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct FileSourceStageInterfaceProxy
+struct MORPHEUS_EXPORT FileSourceStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a FileSourceStage, and return the result
@@ -111,6 +111,5 @@ struct FileSourceStageInterfaceProxy
         std::vector<std::string> filter_null_columns = {},
         pybind11::dict parser_kwargs                 = pybind11::dict());
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/filter_detection.hpp b/morpheus/_lib/include/morpheus/stages/filter_detection.hpp
index 092a7c37de..5e78a8322e 100644
--- a/morpheus/_lib/include/morpheus/stages/filter_detection.hpp
+++ b/morpheus/_lib/include/morpheus/stages/filter_detection.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/multi.hpp"
 #include "morpheus/objects/dev_mem_info.hpp"  // for DevMemInfo
 #include "morpheus/objects/filter_source.hpp"
@@ -43,7 +44,6 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief FilterDetectionsStage is used to filter rows from a dataframe based on values in a tensor or dataframe column
  * using a specified criteria. Rows in the `meta` dataframe are excluded if their associated value in the datasource
@@ -68,7 +68,7 @@ namespace morpheus {
  * Depending on the downstream stages, this can cause performance issues, especially if those stages need to acquire
  * the Python GIL.
  */
-class FilterDetectionsStage
+class MORPHEUS_EXPORT FilterDetectionsStage
   : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>
 {
   public:
@@ -105,7 +105,7 @@ class FilterDetectionsStage
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct FilterDetectionStageInterfaceProxy
+struct MORPHEUS_EXPORT FilterDetectionStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a FilterDetectionStage, and return the result
@@ -126,7 +126,5 @@ struct FilterDetectionStageInterfaceProxy
                                                                              FilterSource filter_source,
                                                                              std::string field_name);
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/http_server_source_stage.hpp b/morpheus/_lib/include/morpheus/stages/http_server_source_stage.hpp
index c311b6aae6..4b0f37dfeb 100644
--- a/morpheus/_lib/include/morpheus/stages/http_server_source_stage.hpp
+++ b/morpheus/_lib/include/morpheus/stages/http_server_source_stage.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"                   // for exporting symbols
 #include "morpheus/messages/meta.hpp"          // for MessageMeta
 #include "morpheus/utilities/http_server.hpp"  // for HttpServer
 
@@ -50,11 +51,9 @@ using request_queue_t = boost::fibers::buffered_channel<table_t>;
  * @file
  */
 
-#pragma GCC visibility push(default)
-
 // TODO(dagardner): optionally add headers to the dataframe
 
-class HttpServerSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
+class MORPHEUS_EXPORT HttpServerSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>;
@@ -94,7 +93,7 @@ class HttpServerSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<Me
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct HttpServerSourceStageInterfaceProxy
+struct MORPHEUS_EXPORT HttpServerSourceStageInterfaceProxy
 {
     static std::shared_ptr<mrc::segment::Object<HttpServerSourceStage>> init(mrc::segment::Builder& builder,
                                                                              const std::string& name,
@@ -112,6 +111,5 @@ struct HttpServerSourceStageInterfaceProxy
                                                                              bool lines,
                                                                              std::size_t stop_after);
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/kafka_source.hpp b/morpheus/_lib/include/morpheus/stages/kafka_source.hpp
index 909c9c7527..1632fa3f97 100644
--- a/morpheus/_lib/include/morpheus/stages/kafka_source.hpp
+++ b/morpheus/_lib/include/morpheus/stages/kafka_source.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/types.hpp"
 
@@ -50,9 +51,7 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
-
-class KafkaOAuthCallback : public RdKafka::OAuthBearerTokenRefreshCb
+class MORPHEUS_EXPORT KafkaOAuthCallback : public RdKafka::OAuthBearerTokenRefreshCb
 {
   public:
     KafkaOAuthCallback(const std::function<std::map<std::string, std::string>()>& oauth_callback);
@@ -65,7 +64,7 @@ class KafkaOAuthCallback : public RdKafka::OAuthBearerTokenRefreshCb
 /**
  * This class loads messages from the Kafka cluster by serving as a Kafka consumer.
  */
-class KafkaSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
+class MORPHEUS_EXPORT KafkaSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>;
@@ -195,7 +194,7 @@ class KafkaSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<Message
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct KafkaSourceStageInterfaceProxy
+struct MORPHEUS_EXPORT KafkaSourceStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a KafkaSourceStage, and return the result
@@ -269,6 +268,5 @@ struct KafkaSourceStageInterfaceProxy
     static std::unique_ptr<KafkaOAuthCallback> make_kafka_oauth_callback(
         std::optional<pybind11::function>&& oauth_callback);
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/preallocate.hpp b/morpheus/_lib/include/morpheus/stages/preallocate.hpp
index ab1cabdde0..7470ba6e53 100644
--- a/morpheus/_lib/include/morpheus/stages/preallocate.hpp
+++ b/morpheus/_lib/include/morpheus/stages/preallocate.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/control.hpp"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/messages/multi.hpp"
@@ -35,7 +36,7 @@
 #include <vector>
 
 namespace morpheus {
-#pragma GCC visibility push(default)
+
 namespace {
 /**
  * @brief Performs preallocation to the underlying dataframe. These functions ensure that the MutableTableInfo object
@@ -73,7 +74,8 @@ void preallocate(std::shared_ptr<morpheus::MultiMessage> msg,
  * `PreallocateMultiMessageStage`
  */
 template <typename MessageT>
-class PreallocateStage : public mrc::pymrc::PythonNode<std::shared_ptr<MessageT>, std::shared_ptr<MessageT>>
+class MORPHEUS_EXPORT PreallocateStage
+  : public mrc::pymrc::PythonNode<std::shared_ptr<MessageT>, std::shared_ptr<MessageT>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<MessageT>, std::shared_ptr<MessageT>>;
@@ -94,7 +96,7 @@ class PreallocateStage : public mrc::pymrc::PythonNode<std::shared_ptr<MessageT>
  * @brief Interface proxy, used to insulate python bindings.
  */
 template <typename MessageT>
-struct PreallocateStageInterfaceProxy
+struct MORPHEUS_EXPORT PreallocateStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a DeserializationStage, and return the result.
@@ -138,5 +140,5 @@ std::shared_ptr<mrc::segment::Object<PreallocateStage<MessageT>>> PreallocateSta
 {
     return builder.construct_object<PreallocateStage<MessageT>>(name, needed_columns);
 }
-#pragma GCC visibility pop
+
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/preprocess_fil.hpp b/morpheus/_lib/include/morpheus/stages/preprocess_fil.hpp
index 982ebca09d..9d259a5220 100644
--- a/morpheus/_lib/include/morpheus/stages/preprocess_fil.hpp
+++ b/morpheus/_lib/include/morpheus/stages/preprocess_fil.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/control.hpp"
 #include "morpheus/messages/multi.hpp"
 #include "morpheus/messages/multi_inference.hpp"
@@ -46,12 +47,12 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief FIL input data for inference
  */
 template <typename InputT, typename OutputT>
-class PreprocessFILStage : public mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>
+class MORPHEUS_EXPORT PreprocessFILStage
+  : public mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>;
@@ -90,7 +91,7 @@ using PreprocessFILStageCM =  // NOLINT(readability-identifier-naming)
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct PreprocessFILStageInterfaceProxy
+struct MORPHEUS_EXPORT PreprocessFILStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a PreprocessFILStage that receives MultiMessage and emits MultiInferenceMessage,
@@ -115,6 +116,5 @@ struct PreprocessFILStageInterfaceProxy
     static std::shared_ptr<mrc::segment::Object<PreprocessFILStage<ControlMessage, ControlMessage>>> init_cm(
         mrc::segment::Builder& builder, const std::string& name, const std::vector<std::string>& features);
 };
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/preprocess_nlp.hpp b/morpheus/_lib/include/morpheus/stages/preprocess_nlp.hpp
index c6c03f7311..49d9db0701 100644
--- a/morpheus/_lib/include/morpheus/stages/preprocess_nlp.hpp
+++ b/morpheus/_lib/include/morpheus/stages/preprocess_nlp.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"                      // for exporting symbols
 #include "morpheus/messages/control.hpp"          // for ControlMessage
 #include "morpheus/messages/multi.hpp"            // for MultiMessage
 #include "morpheus/messages/multi_inference.hpp"  // for MultiInferenceMessage
@@ -47,12 +48,12 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief NLP input data for inference
  */
 template <typename InputT, typename OutputT>
-class PreprocessNLPStage : public mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>
+class MORPHEUS_EXPORT PreprocessNLPStage
+  : public mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<OutputT>>;
@@ -119,7 +120,7 @@ using PreprocessNLPStageCM =  // NOLINT(readability-identifier-naming)
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct PreprocessNLPStageInterfaceProxy
+struct MORPHEUS_EXPORT PreprocessNLPStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a ProcessNLPStage that receives MultiMessage and emits MultiInferenceMessage, and
@@ -186,7 +187,5 @@ struct PreprocessNLPStageInterfaceProxy
         int stride         = -1,
         std::string column = "data");
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/serialize.hpp b/morpheus/_lib/include/morpheus/stages/serialize.hpp
index 36921feeb6..c17dad38ba 100644
--- a/morpheus/_lib/include/morpheus/stages/serialize.hpp
+++ b/morpheus/_lib/include/morpheus/stages/serialize.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/control.hpp"
 #include "morpheus/messages/meta.hpp"  // for MessageMeta
 #include "morpheus/messages/multi.hpp"
@@ -45,13 +46,13 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief Include & exclude columns from messages. This class filters columns from a `MultiMessage` object emitting a
  * `MessageMeta`.
  */
 template <typename InputT>
-class SerializeStage : public mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<MessageMeta>>
+class MORPHEUS_EXPORT SerializeStage
+  : public mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<InputT>, std::shared_ptr<MessageMeta>>;
@@ -97,7 +98,7 @@ using SerializeStageCM = SerializeStage<ControlMessage>;  // NOLINT(readability-
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct SerializeStageInterfaceProxy
+struct MORPHEUS_EXPORT SerializeStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a SerializeStage, and return the result
@@ -133,7 +134,5 @@ struct SerializeStageInterfaceProxy
                                                                            const std::vector<std::string>& exclude,
                                                                            bool fixed_columns = true);
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/stages/write_to_file.hpp b/morpheus/_lib/include/morpheus/stages/write_to_file.hpp
index 8efb212488..ba134e5fa2 100644
--- a/morpheus/_lib/include/morpheus/stages/write_to_file.hpp
+++ b/morpheus/_lib/include/morpheus/stages/write_to_file.hpp
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include "morpheus/export.h"
 #include "morpheus/messages/meta.hpp"
 #include "morpheus/objects/file_types.hpp"
 
@@ -42,12 +43,12 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
 /**
  * @brief Write all messages to a file. Messages are written to a file by this class.
  * This class does not maintain an open file or buffer messages.
  */
-class WriteToFileStage : public mrc::pymrc::PythonNode<std::shared_ptr<MessageMeta>, std::shared_ptr<MessageMeta>>
+class MORPHEUS_EXPORT WriteToFileStage
+  : public mrc::pymrc::PythonNode<std::shared_ptr<MessageMeta>, std::shared_ptr<MessageMeta>>
 {
   public:
     using base_t = mrc::pymrc::PythonNode<std::shared_ptr<MessageMeta>, std::shared_ptr<MessageMeta>>;
@@ -110,7 +111,7 @@ class WriteToFileStage : public mrc::pymrc::PythonNode<std::shared_ptr<MessageMe
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct WriteToFileStageInterfaceProxy
+struct MORPHEUS_EXPORT WriteToFileStageInterfaceProxy
 {
     /**
      * @brief Create and initialize a WriteToFileStage, and return the result
@@ -132,7 +133,5 @@ struct WriteToFileStageInterfaceProxy
                                                                         bool include_index_col  = true,
                                                                         bool flush              = false);
 };
-
-#pragma GCC visibility pop
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/utilities/http_server.hpp b/morpheus/_lib/include/morpheus/utilities/http_server.hpp
index 89117cbab9..76aebbef68 100644
--- a/morpheus/_lib/include/morpheus/utilities/http_server.hpp
+++ b/morpheus/_lib/include/morpheus/utilities/http_server.hpp
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include "morpheus/export.h"  // for exporting symbols
+
 #include <boost/asio/io_context.hpp>   // for io_context
 #include <boost/asio/ip/tcp.hpp>       // for tcp, tcp::acceptor, tcp::endpoint, tcp::socket
 #include <boost/beast/core/error.hpp>  // for error_code
@@ -43,9 +45,7 @@ namespace morpheus {
  * @file
  */
 
-#pragma GCC visibility push(default)
-
-class Listener;
+class MORPHEUS_EXPORT Listener;
 
 using on_complete_cb_fn_t = std::function<void(const boost::system::error_code& /* error message */)>;
 
@@ -89,7 +89,7 @@ constexpr std::size_t DefaultMaxPayloadSize{1024 * 1024 * 10};  // 10MB
  * @param max_payload_size The maximum size in bytes of the payload that the server will accept in a single request.
  * @param request_timeout The timeout for a request.
  */
-class HttpServer
+class MORPHEUS_EXPORT HttpServer
 {
   public:
     HttpServer(payload_parse_fn_t payload_parse_fn,
@@ -127,7 +127,7 @@ class HttpServer
  *
  * @details Constructed by the HttpServer class and should not be used directly.
  */
-class Listener : public std::enable_shared_from_this<Listener>
+class MORPHEUS_EXPORT Listener : public std::enable_shared_from_this<Listener>
 {
   public:
     Listener(boost::asio::io_context& io_context,
@@ -165,7 +165,7 @@ class Listener : public std::enable_shared_from_this<Listener>
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct HttpServerInterfaceProxy
+struct MORPHEUS_EXPORT HttpServerInterfaceProxy
 {
     static std::shared_ptr<HttpServer> init(pybind11::function py_parse_fn,
                                             std::string bind_address,
@@ -186,5 +186,4 @@ struct HttpServerInterfaceProxy
                      const pybind11::object& value,
                      const pybind11::object& traceback);
 };
-
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/utilities/python_util.hpp b/morpheus/_lib/include/morpheus/utilities/python_util.hpp
index 6fc258d912..b76ccdd58b 100644
--- a/morpheus/_lib/include/morpheus/utilities/python_util.hpp
+++ b/morpheus/_lib/include/morpheus/utilities/python_util.hpp
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include "morpheus/export.h"
+
 #include <sys/types.h>
 
 #include <string>
@@ -27,7 +29,6 @@ using PyObject = _object;  // NOLINT(readability-identifier-naming)
 
 namespace morpheus::utilities {
 
-#pragma GCC visibility push(default)
 /**
  * @brief Shows a python warning using the `warnings.warn` module. These warnings can be suppressed and work different
  * than `logger.warn()`
@@ -36,9 +37,8 @@ namespace morpheus::utilities {
  * @param category A Python warning message type such as `PyExc_DeprecationWarning`
  * @param stack_level If the warning should appear earlier up in the stack, set this to >1
  */
-void show_warning_message(const std::string& deprecation_message,
-                          PyObject* category  = nullptr,
-                          ssize_t stack_level = 1);
-#pragma GCC visibility pop
+MORPHEUS_EXPORT void show_warning_message(const std::string& deprecation_message,
+                                          PyObject* category  = nullptr,
+                                          ssize_t stack_level = 1);
 
 }  // namespace morpheus::utilities

From 266612e5ff4a644885292106f480d57782d81501 Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Thu, 9 May 2024 13:39:31 -0400
Subject: [PATCH 34/38] Remove `pyarrow_hotfix` import from `__init__.py`
 (#1692)

- Remove `pyarrow_hotfix` import

Closes #1691

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Eli Fajardo (https://github.com/efajardo-nv)
  - Michael Demoret (https://github.com/mdemoret-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1692
---
 ci/conda/recipes/run_conda_build.sh | 20 ++++++++++++++------
 docker/build_conda_packages.sh      |  4 +++-
 docker/run_container_dev.sh         |  5 ++++-
 morpheus/__init__.py                |  7 -------
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/ci/conda/recipes/run_conda_build.sh b/ci/conda/recipes/run_conda_build.sh
index d53ce56323..6c8cd3183d 100755
--- a/ci/conda/recipes/run_conda_build.sh
+++ b/ci/conda/recipes/run_conda_build.sh
@@ -19,7 +19,7 @@ NUMARGS=$#
 ARGS=$*
 
 function hasArg {
-    (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
+   (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
 }
 
 function get_version() {
@@ -99,6 +99,14 @@ CONDA_ARGS_ARRAY+=("-c" "${CONDA_CHANNEL_ALIAS:+"${CONDA_CHANNEL_ALIAS%/}/"}nvid
 CONDA_ARGS_ARRAY+=("-c" "${CONDA_CHANNEL_ALIAS:+"${CONDA_CHANNEL_ALIAS%/}/"}pytorch")
 CONDA_ARGS_ARRAY+=("-c" "${CONDA_CHANNEL_ALIAS:+"${CONDA_CHANNEL_ALIAS%/}/"}defaults")
 
+if [[ ${NUMARGS} == 0 ]]; then
+   echo -e "${r}ERROR: No arguments were provided. Please provide at least one package to build. Available packages:${x}"
+   echo -e "${r}   morpheus${x}"
+   echo -e "${r}   pydebug${x}"
+   echo -e "${r}Exiting...${x}"
+   exit 12
+fi
+
 if hasArg morpheus; then
    # Set GIT_VERSION to set the project version inside of meta.yaml
    export GIT_VERSION="$(get_version)"
@@ -110,10 +118,10 @@ if hasArg morpheus; then
 fi
 
 if hasArg pydebug; then
-  export MORPHEUS_PYTHON_VER=$(python --version | cut -d ' ' -f 2)
+   export MORPHEUS_PYTHON_VER=$(python --version | cut -d ' ' -f 2)
 
-  echo "Running conda-build for python-dbg..."
-  set -x
-  conda ${CONDA_COMMAND} "${CONDA_ARGS_ARRAY[@]}" ${CONDA_ARGS} ./ci/conda/recipes/python-dbg
-  set +x
+   echo "Running conda-build for python-dbg..."
+   set -x
+   conda ${CONDA_COMMAND} "${CONDA_ARGS_ARRAY[@]}" ${CONDA_ARGS} ./ci/conda/recipes/python-dbg
+   set +x
 fi
diff --git a/docker/build_conda_packages.sh b/docker/build_conda_packages.sh
index 43e0b1bbba..006737eff0 100755
--- a/docker/build_conda_packages.sh
+++ b/docker/build_conda_packages.sh
@@ -53,7 +53,9 @@ DOCKER_EXTRA_ARGS=()
 BUILD_SCRIPT="${BUILD_SCRIPT}
 export CONDA_ARGS=\"${CONDA_ARGS[@]}\"
 ./ci/conda/recipes/run_conda_build.sh "$@"
+EXIT_CODE=\$?
 chown -R ${CUR_UID}:${CUR_GID} .cache .conda-bld
+exit \$EXIT_CODE
 "
 
 echo "Running conda build"
@@ -62,4 +64,4 @@ echo "Running conda build"
 DOCKER_EXTRA_ARGS="${DOCKER_EXTRA_ARGS[@]}" ${SCRIPT_DIR}/run_container_dev.sh bash -c "${BUILD_SCRIPT}"
 
 echo "Conda packages have been built. Use the following to install into an environment:"
-echo "    mamba install -c file://$(realpath ${MORPHEUS_ROOT}/.conda-bld) -c nvidia -c rapidsai -c conda-forge $@"
+echo "    mamba install -c file://$(realpath ${MORPHEUS_ROOT}/.conda-bld) -c conda-forge -c rapidsai -c rapidsai-nightly -c nvidia -c nvidia/label/dev $@"
diff --git a/docker/run_container_dev.sh b/docker/run_container_dev.sh
index e440763988..9a2db756af 100755
--- a/docker/run_container_dev.sh
+++ b/docker/run_container_dev.sh
@@ -45,4 +45,7 @@ docker run \
     -ti \
     ${DOCKER_ARGS} ${DOCKER_EXTRA_ARGS} \
     ${DOCKER_IMAGE_NAME}:${DOCKER_IMAGE_TAG} "${@:-bash}"
-set +x
+
+{ EXIT_CODE=$?; set +x; } 2>/dev/null
+
+exit $EXIT_CODE
diff --git a/morpheus/__init__.py b/morpheus/__init__.py
index ec8aa7eb19..a011c1ec87 100644
--- a/morpheus/__init__.py
+++ b/morpheus/__init__.py
@@ -15,13 +15,6 @@
 import logging
 import os
 
-# ########################### CVE-2023-47248 Mitigation ############################
-# Import pyarrow_hotfix as early as possible to ensure that the pyarrow hotfix is applied before any code can use it
-# Can be removed after upgrading to pyarrow 14.0.1 or later (which is dictated by cudf)
-import pyarrow_hotfix
-
-# ##################################################################################
-
 # Create a default null logger to prevent log messages from being propagated to users of this library unless otherwise
 # configured. Use the `utils.logging` module to configure Morpheus logging
 logging.getLogger(__name__).addHandler(logging.NullHandler())

From 26c95e13fe996dfed61f53dabf8f4267f8d84504 Mon Sep 17 00:00:00 2001
From: Yuchen Zhang <134643420+yuchenz427@users.noreply.github.com>
Date: Thu, 9 May 2024 15:48:34 -0700
Subject: [PATCH 35/38] Update `ControlMessage` to hold arbitrary Python
 objects & update `MessageMeta` to copy & slice (#1637)

- For ControlMessage, use a specialization of `nlohmann::basic_json` to hold arbitrary Python objects for `m_metadata` & `m_tasks`

- Implement dataframe slicing method for `MessageMeta` which is equivalent to copy_meta_ranges() in MultiMessage. The method will do copy & slicing to the original dataframe, instead of sharing the ownership.

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Yuchen Zhang (https://github.com/yuchenz427)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1637
---
 morpheus/_lib/cmake/libmorpheus.cmake         |   1 +
 .../include/morpheus/messages/control.hpp     |  87 +++------
 .../_lib/include/morpheus/messages/meta.hpp   |  34 ++++
 .../_lib/include/morpheus/pybind11/json.hpp   | 136 ++++++++++++++
 .../include/morpheus/utilities/json_types.hpp |  98 +++++++++-
 morpheus/_lib/messages/__init__.pyi           |  12 +-
 morpheus/_lib/messages/module.cpp             |  24 ++-
 morpheus/_lib/src/messages/control.cpp        |  85 +++------
 morpheus/_lib/src/messages/meta.cpp           |  69 ++++++-
 morpheus/_lib/src/utilities/json_types.cpp    | 171 ++++++++++++++++++
 morpheus/_lib/tests/CMakeLists.txt            |   1 +
 .../tests/messages/test_control_message.cpp   |  12 +-
 .../_lib/tests/messages/test_message_meta.cpp |  79 ++++----
 tests/messages/test_control_message.py        |  86 +++++++++
 tests/messages/test_message_meta.py           |  18 ++
 tests/tests_data/csv_sample.csv               |   4 +-
 16 files changed, 733 insertions(+), 184 deletions(-)
 create mode 100644 morpheus/_lib/src/utilities/json_types.cpp

diff --git a/morpheus/_lib/cmake/libmorpheus.cmake b/morpheus/_lib/cmake/libmorpheus.cmake
index 388337cadd..1100e8fb46 100644
--- a/morpheus/_lib/cmake/libmorpheus.cmake
+++ b/morpheus/_lib/cmake/libmorpheus.cmake
@@ -81,6 +81,7 @@ add_library(morpheus
   src/utilities/cudf_util.cpp
   src/utilities/cupy_util.cpp
   src/utilities/http_server.cpp
+  src/utilities/json_types.cpp
   src/utilities/matx_util.cu
   src/utilities/python_util.cpp
   src/utilities/string_util.cpp
diff --git a/morpheus/_lib/include/morpheus/messages/control.hpp b/morpheus/_lib/include/morpheus/messages/control.hpp
index 6a96f844f8..2565bf4874 100644
--- a/morpheus/_lib/include/morpheus/messages/control.hpp
+++ b/morpheus/_lib/include/morpheus/messages/control.hpp
@@ -17,11 +17,11 @@
 
 #pragma once
 
-#include "morpheus/export.h"           // for exporting symbols
-#include "morpheus/messages/meta.hpp"  // for MessageMeta
+#include "morpheus/export.h"                  // for MORPHEUS_EXPORT
+#include "morpheus/messages/meta.hpp"         // for MessageMeta
+#include "morpheus/utilities/json_types.hpp"  // for json_t
 
-#include <nlohmann/json.hpp>   // for json, basic_json
-#include <pybind11/pytypes.h>  // for object, dict, list, none
+#include <pybind11/pytypes.h>  // for object, dict, list
 
 #include <chrono>    // for system_clock, time_point
 #include <map>       // for map
@@ -177,28 +177,28 @@ class MORPHEUS_EXPORT ControlMessage
 {
   public:
     ControlMessage();
-    explicit ControlMessage(const nlohmann::json& config);
+    explicit ControlMessage(const morpheus::utilities::json_t& config);
 
     ControlMessage(const ControlMessage& other);  // Copies config and metadata, but not payload
 
     /**
      * @brief Set the configuration object for the control message.
-     * @param config A json object containing configuration information.
+     * @param config A morpheus::utilities::json_t object containing configuration information.
      */
-    void config(const nlohmann::json& config);
+    void config(const morpheus::utilities::json_t& config);
 
     /**
      * @brief Get the configuration object for the control message.
-     * @return A const reference to the json object containing configuration information.
+     * @return A const reference to the morpheus::utilities::json_t object containing configuration information.
      */
-    [[nodiscard]] const nlohmann::json& config() const;
+    [[nodiscard]] const morpheus::utilities::json_t& config() const;
 
     /**
      * @brief Add a task of the given type to the control message.
      * @param task_type A string indicating the type of the task.
-     * @param task A json object describing the task.
+     * @param task A morpheus::utilities::json_t object describing the task.
      */
-    void add_task(const std::string& task_type, const nlohmann::json& task);
+    void add_task(const std::string& task_type, const morpheus::utilities::json_t& task);
 
     /**
      * @brief Check if a task of the given type exists in the control message.
@@ -210,21 +210,21 @@ class MORPHEUS_EXPORT ControlMessage
     /**
      * @brief Remove and return a task of the given type from the control message.
      * @param task_type A string indicating the type of the task.
-     * @return A json object describing the task.
+     * @return A morpheus::utilities::json_t object describing the task.
      */
-    nlohmann::json remove_task(const std::string& task_type);
+    morpheus::utilities::json_t remove_task(const std::string& task_type);
 
     /**
      * @brief Get the tasks for the control message.
      */
-    [[nodiscard]] const nlohmann::json& get_tasks() const;
+    [[nodiscard]] const morpheus::utilities::json_t& get_tasks() const;
 
     /**
      * @brief Add a key-value pair to the metadata for the control message.
      * @param key A string key for the metadata value.
-     * @param value A json object describing the metadata value.
+     * @param value A morpheus::utilities::json_t object describing the metadata value.
      */
-    void set_metadata(const std::string& key, const nlohmann::json& value);
+    void set_metadata(const std::string& key, const morpheus::utilities::json_t& value);
 
     /**
      * @brief Check if a metadata key exists in the control message.
@@ -236,7 +236,7 @@ class MORPHEUS_EXPORT ControlMessage
     /**
      * @brief Get the metadata for the control message.
      */
-    [[nodiscard]] nlohmann::json get_metadata() const;
+    [[nodiscard]] morpheus::utilities::json_t get_metadata() const;
 
     /**
      * @brief Get the metadata value for the given key from the control message.
@@ -245,9 +245,9 @@ class MORPHEUS_EXPORT ControlMessage
      * @param key A string indicating the metadata key.
      * @param fail_on_nonexist If true, throws an exception when the key does not exist.
      *                         If false, returns std::nullopt for non-existing keys.
-     * @return An optional json object describing the metadata value if it exists.
+     * @return An optional morpheus::utilities::json_t object describing the metadata value if it exists.
      */
-    [[nodiscard]] nlohmann::json get_metadata(const std::string& key, bool fail_on_nonexist = false) const;
+    [[nodiscard]] morpheus::utilities::json_t get_metadata(const std::string& key, bool fail_on_nonexist = false) const;
 
     /**
      * @brief Lists all metadata keys currently stored in the control message.
@@ -372,8 +372,8 @@ class MORPHEUS_EXPORT ControlMessage
     std::shared_ptr<MessageMeta> m_payload{nullptr};
     std::shared_ptr<TensorMemory> m_tensors{nullptr};
 
-    nlohmann::json m_tasks{};
-    nlohmann::json m_config{};
+    morpheus::utilities::json_t m_tasks{};
+    morpheus::utilities::json_t m_config{};
 
     std::map<std::string, time_point_t> m_timestamps{};
 };
@@ -401,51 +401,6 @@ struct MORPHEUS_EXPORT ControlMessageProxy
      */
     static std::shared_ptr<ControlMessage> copy(ControlMessage& self);
 
-    /**
-     * @brief Retrieves the configuration of the ControlMessage as a dictionary.
-     * @param self Reference to the underlying ControlMessage object.
-     * @return A pybind11::dict representing the ControlMessage's configuration.
-     */
-    static pybind11::dict config(ControlMessage& self);
-
-    /**
-     * @brief Updates the configuration of the ControlMessage from a dictionary.
-     * @param self Reference to the underlying ControlMessage object.
-     * @param config A pybind11::dict representing the new configuration.
-     */
-    static void config(ControlMessage& self, pybind11::dict& config);
-
-    /**
-     * @brief Adds a task to the ControlMessage.
-     * @param self Reference to the underlying ControlMessage object.
-     * @param type The type of the task to be added.
-     * @param task A pybind11::dict representing the task to be added.
-     */
-    static void add_task(ControlMessage& self, const std::string& type, pybind11::dict& task);
-
-    /**
-     * @brief Removes and returns a task of the given type from the ControlMessage.
-     * @param self Reference to the underlying ControlMessage object.
-     * @param type The type of the task to be removed.
-     * @return A pybind11::dict representing the removed task.
-     */
-    static pybind11::dict remove_task(ControlMessage& self, const std::string& type);
-
-    /**
-     * @brief Retrieves all tasks from the ControlMessage.
-     * @param self Reference to the underlying ControlMessage object.
-     * @return A pybind11::dict containing all tasks.
-     */
-    static pybind11::dict get_tasks(ControlMessage& self);
-
-    /**
-     * @brief Sets a metadata key-value pair.
-     * @param self Reference to the underlying ControlMessage object.
-     * @param key The key for the metadata entry.
-     * @param value The value for the metadata entry, must be JSON serializable.
-     */
-    static void set_metadata(ControlMessage& self, const std::string& key, pybind11::object& value);
-
     /**
      * @brief Retrieves a metadata value by key, with an optional default value.
      *
diff --git a/morpheus/_lib/include/morpheus/messages/meta.hpp b/morpheus/_lib/include/morpheus/messages/meta.hpp
index 041185c14e..750236df4a 100644
--- a/morpheus/_lib/include/morpheus/messages/meta.hpp
+++ b/morpheus/_lib/include/morpheus/messages/meta.hpp
@@ -122,6 +122,23 @@ class MORPHEUS_EXPORT MessageMeta
      */
     virtual std::optional<std::string> ensure_sliceable_index();
 
+    /**
+     * @brief Creates a deep copy of DataFrame with the specified ranges.
+     *
+     * @param ranges the tensor index ranges to copy
+     * @return std::shared_ptr<MessageMeta> the deep copy of the specified ranges
+     */
+    virtual std::shared_ptr<MessageMeta> copy_ranges(const std::vector<RangeType>& ranges) const;
+
+    /**
+     * @brief Get a slice of the underlying DataFrame by creating a deep copy
+     *
+     * @param start the tensor index of the start of the copy
+     * @param stop the tensor index of the end of the copy
+     * @return std::shared_ptr<MessageMeta> the deep copy of the speicifed slice
+     */
+    virtual std::shared_ptr<MessageMeta> get_slice(TensorIndex start, TensorIndex stop) const;
+
     /**
      * @brief Create MessageMeta cpp object from a python object
      *
@@ -297,6 +314,23 @@ struct MORPHEUS_EXPORT MessageMetaInterfaceProxy
      * @return std::string The name of the column with the old index or nullopt if no changes were made.
      */
     static std::optional<std::string> ensure_sliceable_index(MessageMeta& self);
+
+    /**
+     * @brief Creates a deep copy of DataFrame with the specified ranges.
+     *
+     * @param ranges the tensor index ranges to copy
+     * @return std::shared_ptr<MessageMeta> the deep copy of the specified ranges
+     */
+    static std::shared_ptr<MessageMeta> copy_ranges(MessageMeta& self, const std::vector<RangeType>& ranges);
+
+    /**
+     * @brief Get a slice of the underlying DataFrame by creating a deep copy
+     *
+     * @param start the tensor index of the start of the copy
+     * @param stop the tensor index of the end of the copy
+     * @return std::shared_ptr<MessageMeta> the deep copy of the speicifed slice
+     */
+    static std::shared_ptr<MessageMeta> get_slice(MessageMeta& self, TensorIndex start, TensorIndex stop);
 };
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/include/morpheus/pybind11/json.hpp b/morpheus/_lib/include/morpheus/pybind11/json.hpp
index 69d3f2ac3e..14d2af3ae8 100644
--- a/morpheus/_lib/include/morpheus/pybind11/json.hpp
+++ b/morpheus/_lib/include/morpheus/pybind11/json.hpp
@@ -166,5 +166,141 @@ struct type_caster<nlohmann::json_list>
     }
 };
 
+template <>
+struct type_caster<morpheus::utilities::json_t>
+{
+  public:
+    /**
+     * This macro establishes a local variable 'value' of type morpheus::utilities::json_t
+     */
+    PYBIND11_TYPE_CASTER(morpheus::utilities::json_t, _("object"));
+
+    /**
+     * Conversion part 1 (Python->C++): convert a PyObject into an morpheus::utilities::json_t
+     * instance or return false upon failure. The second argument
+     * indicates whether implicit conversions should be applied.
+     */
+    bool load(handle src, bool convert)
+    {
+        if (!src)
+        {
+            return false;
+        }
+
+        if (src.is_none())
+        {
+            value = morpheus::utilities::json_t(nullptr);
+        }
+        else
+        {
+            value = morpheus::utilities::cast_from_pyobject(pybind11::reinterpret_borrow<pybind11::object>(src));
+        }
+
+        return true;
+    }
+
+    /**
+     * Conversion part 2 (C++ -> Python): convert an morpheus::utilities::json_t instance into
+     * a Python object. The second and third arguments are used to
+     * indicate the return value policy and parent object (for
+     * ``return_value_policy::reference_internal``) and are generally
+     * ignored by implicit casters.
+     */
+    static handle cast(morpheus::utilities::json_t src, return_value_policy policy, handle parent)
+    {
+        return morpheus::utilities::cast_from_json(src).release();
+    }
+};
+
+template <>
+struct type_caster<morpheus::utilities::json_dict_t>
+{
+  public:
+    /**
+     * This macro establishes a local variable 'value' of type morpheus::utilities::json_t_dict
+     */
+    PYBIND11_TYPE_CASTER(morpheus::utilities::json_dict_t, _("dict[str, typing.Any]"));
+
+    /**
+     * Conversion part 1 (Python->C++): convert a PyObject into an morpheus::utilities::json_t_dict
+     * instance or return false upon failure. The second argument
+     * indicates whether implicit conversions should be applied.
+     */
+    bool load(handle src, bool convert)
+    {
+        if (!src || src.is_none())
+        {
+            return false;
+        }
+
+        if (!PyDict_Check(src.ptr()))
+        {
+            return false;
+        }
+
+        value = static_cast<const morpheus::utilities::json_dict_t>(
+            morpheus::utilities::cast_from_pyobject(pybind11::reinterpret_borrow<pybind11::object>(src)));
+
+        return true;
+    }
+
+    /**
+     * Conversion part 2 (C++ -> Python): convert an morpheus::utilities::json_t_dict instance into
+     * a Python object. The second and third arguments are used to
+     * indicate the return value policy and parent object (for
+     * ``return_value_policy::reference_internal``) and are generally
+     * ignored by implicit casters.
+     */
+    static handle cast(morpheus::utilities::json_dict_t src, return_value_policy policy, handle parent)
+    {
+        return morpheus::utilities::cast_from_json(src).release();
+    }
+};
+
+template <>
+struct type_caster<morpheus::utilities::json_list_t>
+{
+  public:
+    /**
+     * This macro establishes a local variable 'value' of type morpheus::utilities::json_t_list
+     */
+    PYBIND11_TYPE_CASTER(morpheus::utilities::json_list_t, _("list[typing.Any]"));
+
+    /**
+     * Conversion part 1 (Python->C++): convert a PyObject into an morpheus::utilities::json_t_list
+     * instance or return false upon failure. The second argument
+     * indicates whether implicit conversions should be applied.
+     */
+    bool load(handle src, bool convert)
+    {
+        if (!src || src.is_none())
+        {
+            return false;
+        }
+
+        if (!PyList_Check(src.ptr()))
+        {
+            return false;
+        }
+
+        value = static_cast<const morpheus::utilities::json_list_t>(
+            morpheus::utilities::cast_from_pyobject(pybind11::reinterpret_borrow<pybind11::object>(src)));
+
+        return true;
+    }
+
+    /**
+     * Conversion part 2 (C++ -> Python): convert an morpheus::utilities::json_t_list instance into
+     * a Python object. The second and third arguments are used to
+     * indicate the return value policy and parent object (for
+     * ``return_value_policy::reference_internal``) and are generally
+     * ignored by implicit casters.
+     */
+    static handle cast(morpheus::utilities::json_list_t src, return_value_policy policy, handle parent)
+    {
+        return morpheus::utilities::cast_from_json(src).release();
+    }
+};
+
 }  // namespace detail
 }  // namespace PYBIND11_NAMESPACE
diff --git a/morpheus/_lib/include/morpheus/utilities/json_types.hpp b/morpheus/_lib/include/morpheus/utilities/json_types.hpp
index bf7769ba97..7dd4fd4516 100644
--- a/morpheus/_lib/include/morpheus/utilities/json_types.hpp
+++ b/morpheus/_lib/include/morpheus/utilities/json_types.hpp
@@ -17,7 +17,103 @@
 
 #pragma once
 
-#include <nlohmann/json.hpp>
+#include "morpheus/export.h"  // for MORPHEUS_EXPORT
+
+#include <nlohmann/adl_serializer.hpp>  // for adl_serializer
+#include <nlohmann/json.hpp>            // for basic_json
+#include <pybind11/pytypes.h>           // for object
+#include <pymrc/types.hpp>              // for PyHolder
+
+#include <cstdint>  // for int64_t, uint64_t, uint8_t
+#include <map>      // for map
+#include <string>   // for allocator, string
+#include <vector>   // for vector
+
+namespace morpheus::utilities {
+/**
+ * @brief A container class derived from std::vector<uint8_t> to make it compatible with nlohmann::json to hold
+ * arbitrary Python objects as bytes.
+ *
+ */
+class MORPHEUS_EXPORT PythonByteContainer : public std::vector<uint8_t>
+{
+  public:
+    /**
+     * @brief Construct a new Python Byte Container object
+     *
+     */
+    PythonByteContainer() = default;
+
+    /**
+     * @brief Construct a new Python Byte Container object by initializing it with a `mrc::pymrc::PyHolder`.
+     *
+     * @param py_obj a PyHolder object that holds a Python object to be stored into the container
+     */
+    PythonByteContainer(mrc::pymrc::PyHolder py_obj);
+
+    /**
+     * @brief Get the PyHolder object from the container
+     *
+     * @return mrc::pymrc::PyHolder the PyHolder object stored in the container
+     */
+    mrc::pymrc::PyHolder get_py_obj() const;
+
+  private:
+    mrc::pymrc::PyHolder m_py_obj;
+};
+
+/**
+ * @brief  * A specialization of `nlohmann::basic_json` with customized BinaryType `PythonByteContainer` to hold Python
+ * objects as bytes.
+ *
+ */
+using json_t = nlohmann::basic_json<std::map,
+                                    std::vector,
+                                    std::string,
+                                    bool,
+                                    std::int64_t,
+                                    std::uint64_t,
+                                    double,
+                                    std::allocator,
+                                    nlohmann::adl_serializer,
+                                    PythonByteContainer,
+                                    void>;
+
+/**
+ * @brief Convert a `json_t` object to a pybind11 object. The difference to `mrc::pymrc::cast_from_json()` is that if
+ * the object cannot be serialized, it checks if the object contains a supported binary type. Otherwise,
+ * pybind11::none is returned.
+ *
+ * @param source : `json_t` object
+ * @return pybind11 object
+ */
+MORPHEUS_EXPORT pybind11::object cast_from_json(const morpheus::utilities::json_t& source);
+
+/**
+ * @brief Convert a pybind11 object to a json_t object. The difference to `mrc::pymrc::cast_from_pyobject` is that if
+ * the object cannot be serialized, it wraps the python object in a `PythonByteContainer` and returns it as a binary.
+ *
+ * @param source : pybind11 object
+ * @return json_t object.
+ */
+MORPHEUS_EXPORT json_t cast_from_pyobject(const pybind11::object& source);
+
+// NOLINTBEGIN(readability-identifier-naming)
+/*
+    Derived class from json_t to allow for custom type names. Use this if the return type would always be an object
+   (i.e. dict[str, Any] in python)
+*/
+class MORPHEUS_EXPORT json_dict_t : public morpheus::utilities::json_t
+{};
+
+/*
+    Derived class from json_t to allow for custom type names. Use this if the return type would always be an object
+   (i.e. dict[str, Any] in python)
+*/
+class MORPHEUS_EXPORT json_list_t : public morpheus::utilities::json_t
+{};
+// NOLINTEND(readability-identifier-naming)
+}  // namespace morpheus::utilities
 
 namespace nlohmann {
 // NOLINTBEGIN(readability-identifier-naming)
diff --git a/morpheus/_lib/messages/__init__.pyi b/morpheus/_lib/messages/__init__.pyi
index f94113fa7b..b65fdbab82 100644
--- a/morpheus/_lib/messages/__init__.pyi
+++ b/morpheus/_lib/messages/__init__.pyi
@@ -43,18 +43,18 @@ class ControlMessage():
     def __init__(self, arg0: ControlMessage) -> None: ...
     @typing.overload
     def __init__(self, arg0: dict) -> None: ...
-    def add_task(self, task_type: str, task: dict) -> None: ...
+    def add_task(self, task_type: str, task: object) -> None: ...
     @typing.overload
-    def config(self) -> dict: ...
+    def config(self) -> object: ...
     @typing.overload
-    def config(self, config: dict) -> None: ...
+    def config(self, config: object) -> None: ...
     def copy(self) -> ControlMessage: ...
     def filter_timestamp(self, regex_filter: str) -> dict: 
         """
         Retrieve timestamps matching a regex filter within a given group.
         """
     def get_metadata(self, key: object = None, default_value: object = None) -> object: ...
-    def get_tasks(self) -> dict: ...
+    def get_tasks(self) -> object: ...
     def get_timestamp(self, key: str, fail_if_nonexist: bool = False) -> object: 
         """
         Retrieve the timestamp for a given group and key. Returns None if the timestamp does not exist and fail_if_nonexist is False.
@@ -68,7 +68,7 @@ class ControlMessage():
     def payload(self, arg0: MessageMeta) -> None: ...
     @typing.overload
     def payload(self, meta: object) -> None: ...
-    def remove_task(self, task_type: str) -> dict: ...
+    def remove_task(self, task_type: str) -> object: ...
     def set_metadata(self, key: str, value: object) -> None: ...
     def set_timestamp(self, key: str, timestamp: object) -> None: 
         """
@@ -182,6 +182,7 @@ class InferenceMemoryNLP(InferenceMemory, TensorMemory):
 class MessageMeta():
     def __init__(self, df: object) -> None: ...
     def copy_dataframe(self) -> object: ...
+    def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]) -> MessageMeta: ...
     def ensure_sliceable_index(self) -> typing.Optional[str]: ...
     def get_column_names(self) -> typing.List[str]: ...
     @typing.overload
@@ -192,6 +193,7 @@ class MessageMeta():
     def get_data(self, columns: str) -> object: ...
     @typing.overload
     def get_data(self, columns: typing.List[str]) -> object: ...
+    def get_slice(self, start: int, stop: int) -> MessageMeta: ...
     def has_sliceable_index(self) -> bool: ...
     @staticmethod
     def make_from_file(arg0: str) -> MessageMeta: ...
diff --git a/morpheus/_lib/messages/module.cpp b/morpheus/_lib/messages/module.cpp
index 7132e2192f..de094ae432 100644
--- a/morpheus/_lib/messages/module.cpp
+++ b/morpheus/_lib/messages/module.cpp
@@ -35,11 +35,14 @@
 #include "morpheus/messages/multi_tensor.hpp"
 #include "morpheus/objects/data_table.hpp"
 #include "morpheus/objects/mutable_table_ctx_mgr.hpp"
+#include "morpheus/pybind11/json.hpp"  // IWYU pragma: keep
 #include "morpheus/utilities/cudf_util.hpp"
+#include "morpheus/utilities/json_types.hpp"  // for json_t
 #include "morpheus/utilities/string_util.hpp"
 #include "morpheus/version.hpp"
 
 #include <mrc/edge/edge_connector.hpp>
+#include <nlohmann/json.hpp>      // for basic_json
 #include <pybind11/functional.h>  // IWYU pragma: keep
 #include <pybind11/pybind11.h>
 #include <pybind11/pytypes.h>
@@ -250,6 +253,12 @@ PYBIND11_MODULE(messages, _module)
         .def("mutable_dataframe", &MessageMetaInterfaceProxy::mutable_dataframe, py::return_value_policy::move)
         .def("has_sliceable_index", &MessageMetaInterfaceProxy::has_sliceable_index)
         .def("ensure_sliceable_index", &MessageMetaInterfaceProxy::ensure_sliceable_index)
+        .def("copy_ranges", &MessageMetaInterfaceProxy::copy_ranges, py::return_value_policy::move, py::arg("ranges"))
+        .def("get_slice",
+             &MessageMetaInterfaceProxy::get_slice,
+             py::return_value_policy::move,
+             py::arg("start"),
+             py::arg("stop"))
         .def_static("make_from_file", &MessageMetaInterfaceProxy::init_cpp);
 
     py::class_<MultiMessage, std::shared_ptr<MultiMessage>>(_module, "MultiMessage")
@@ -386,17 +395,16 @@ PYBIND11_MODULE(messages, _module)
         .def(py::init<>())
         .def(py::init(py::overload_cast<py::dict&>(&ControlMessageProxy::create)))
         .def(py::init(py::overload_cast<std::shared_ptr<ControlMessage>>(&ControlMessageProxy::create)))
-        .def("add_task", &ControlMessageProxy::add_task, py::arg("task_type"), py::arg("task"))
-        .def("config",
-             pybind11::overload_cast<ControlMessage&, py::dict&>(&ControlMessageProxy::config),
-             py::arg("config"))
-        .def("config", pybind11::overload_cast<ControlMessage&>(&ControlMessageProxy::config))
+        .def("add_task", &ControlMessage::add_task, py::arg("task_type"), py::arg("task"))
+        .def(
+            "config", py::overload_cast<const morpheus::utilities::json_t&>(&ControlMessage::config), py::arg("config"))
+        .def("config", py::overload_cast<>(&ControlMessage::config, py::const_))
         .def("copy", &ControlMessageProxy::copy)
         .def("get_metadata",
              &ControlMessageProxy::get_metadata,
              py::arg("key")           = py::none(),
              py::arg("default_value") = py::none())
-        .def("get_tasks", &ControlMessageProxy::get_tasks)
+        .def("get_tasks", &ControlMessage::get_tasks)
         .def("filter_timestamp",
              py::overload_cast<ControlMessage&, const std::string&>(&ControlMessageProxy::filter_timestamp),
              "Retrieve timestamps matching a regex filter within a given group.",
@@ -423,8 +431,8 @@ PYBIND11_MODULE(messages, _module)
             py::arg("meta"))
         .def("tensors", pybind11::overload_cast<>(&ControlMessage::tensors))
         .def("tensors", pybind11::overload_cast<const std::shared_ptr<TensorMemory>&>(&ControlMessage::tensors))
-        .def("remove_task", &ControlMessageProxy::remove_task, py::arg("task_type"))
-        .def("set_metadata", &ControlMessageProxy::set_metadata, py::arg("key"), py::arg("value"))
+        .def("remove_task", &ControlMessage::remove_task, py::arg("task_type"))
+        .def("set_metadata", &ControlMessage::set_metadata, py::arg("key"), py::arg("value"))
         .def("task_type", pybind11::overload_cast<>(&ControlMessage::task_type))
         .def(
             "task_type", pybind11::overload_cast<ControlMessageType>(&ControlMessage::task_type), py::arg("task_type"));
diff --git a/morpheus/_lib/src/messages/control.cpp b/morpheus/_lib/src/messages/control.cpp
index 0edece274d..ca23c5f9f8 100644
--- a/morpheus/_lib/src/messages/control.cpp
+++ b/morpheus/_lib/src/messages/control.cpp
@@ -17,19 +17,20 @@
 
 #include "morpheus/messages/control.hpp"
 
-#include "morpheus/messages/meta.hpp"
+#include "morpheus/messages/meta.hpp"  // for MessageMeta, MessageMetaInterfaceProxy
 
-#include <glog/logging.h>
-#include <pybind11/chrono.h>  // IWYU pragma: keep
-#include <pybind11/pybind11.h>
-#include <pybind11/pytypes.h>
-#include <pymrc/utils.hpp>
+#include <glog/logging.h>       // for COMPACT_GOOGLE_LOG_INFO, LogMessage, VLOG
+#include <nlohmann/json.hpp>    // for basic_json, json_ref, iter_impl, operator<<
+#include <pybind11/chrono.h>    // IWYU pragma: keep
+#include <pybind11/pybind11.h>  // for cast, object::cast
+#include <pybind11/pytypes.h>   // for object, none, dict, isinstance, list, str, value_error, generic_item
+#include <pymrc/utils.hpp>      // for cast_from_pyobject
 
-#include <optional>
-#include <ostream>
-#include <regex>
-#include <stdexcept>
-#include <utility>
+#include <optional>   // for optional, nullopt
+#include <ostream>    // for basic_ostream, operator<<
+#include <regex>      // for regex_search, regex
+#include <stdexcept>  // for runtime_error
+#include <utility>    // for pair
 
 namespace py = pybind11;
 using namespace py::literals;
@@ -41,10 +42,10 @@ const std::string ControlMessage::s_config_schema = R"()";
 std::map<std::string, ControlMessageType> ControlMessage::s_task_type_map{{"inference", ControlMessageType::INFERENCE},
                                                                           {"training", ControlMessageType::TRAINING}};
 
-ControlMessage::ControlMessage() : m_config({{"metadata", nlohmann::json::object()}}), m_tasks({}) {}
+ControlMessage::ControlMessage() : m_config({{"metadata", morpheus::utilities::json_t::object()}}), m_tasks({}) {}
 
-ControlMessage::ControlMessage(const nlohmann::json& _config) :
-  m_config({{"metadata", nlohmann::json::object()}}),
+ControlMessage::ControlMessage(const morpheus::utilities::json_t& _config) :
+  m_config({{"metadata", morpheus::utilities::json_t::object()}}),
   m_tasks({})
 {
     config(_config);
@@ -56,12 +57,12 @@ ControlMessage::ControlMessage(const ControlMessage& other)
     m_tasks  = other.m_tasks;
 }
 
-const nlohmann::json& ControlMessage::config() const
+const morpheus::utilities::json_t& ControlMessage::config() const
 {
     return m_config;
 }
 
-void ControlMessage::add_task(const std::string& task_type, const nlohmann::json& task)
+void ControlMessage::add_task(const std::string& task_type, const morpheus::utilities::json_t& task)
 {
     VLOG(20) << "Adding task of type " << task_type << " to control message" << task.dump(4);
     auto _task_type = s_task_type_map.contains(task_type) ? s_task_type_map[task_type] : ControlMessageType::NONE;
@@ -84,7 +85,7 @@ bool ControlMessage::has_task(const std::string& task_type) const
     return m_tasks.contains(task_type) && m_tasks.at(task_type).size() > 0;
 }
 
-const nlohmann::json& ControlMessage::get_tasks() const
+const morpheus::utilities::json_t& ControlMessage::get_tasks() const
 {
     return m_tasks;
 }
@@ -101,7 +102,7 @@ std::vector<std::string> ControlMessage::list_metadata() const
     return key_list;
 }
 
-void ControlMessage::set_metadata(const std::string& key, const nlohmann::json& value)
+void ControlMessage::set_metadata(const std::string& key, const morpheus::utilities::json_t& value)
 {
     if (m_config["metadata"].contains(key))
     {
@@ -116,14 +117,14 @@ bool ControlMessage::has_metadata(const std::string& key) const
     return m_config["metadata"].contains(key);
 }
 
-nlohmann::json ControlMessage::get_metadata() const
+morpheus::utilities::json_t ControlMessage::get_metadata() const
 {
     auto metadata = m_config["metadata"];
 
     return metadata;
 }
 
-nlohmann::json ControlMessage::get_metadata(const std::string& key, bool fail_on_nonexist) const
+morpheus::utilities::json_t ControlMessage::get_metadata(const std::string& key, bool fail_on_nonexist) const
 {
     // Assuming m_metadata is a std::map<std::string, nlohmann::json> storing metadata
     auto metadata = m_config["metadata"];
@@ -136,11 +137,10 @@ nlohmann::json ControlMessage::get_metadata(const std::string& key, bool fail_on
     {
         throw std::runtime_error("Metadata key does not exist: " + key);
     }
-
     return {};
 }
 
-nlohmann::json ControlMessage::remove_task(const std::string& task_type)
+morpheus::utilities::json_t ControlMessage::remove_task(const std::string& task_type)
 {
     auto& task_set = m_tasks.at(task_type);
     auto iter_task = task_set.begin();
@@ -193,7 +193,7 @@ std::optional<time_point_t> ControlMessage::get_timestamp(const std::string& key
     return std::nullopt;
 }
 
-void ControlMessage::config(const nlohmann::json& config)
+void ControlMessage::config(const morpheus::utilities::json_t& config)
 {
     if (config.contains("type"))
     {
@@ -257,7 +257,6 @@ void ControlMessage::task_type(ControlMessageType type)
 }
 
 /*** Proxy Implementations ***/
-
 std::shared_ptr<ControlMessage> ControlMessageProxy::create(py::dict& config)
 {
     return std::make_shared<ControlMessage>(mrc::pymrc::cast_from_pyobject(config));
@@ -273,30 +272,6 @@ std::shared_ptr<ControlMessage> ControlMessageProxy::copy(ControlMessage& self)
     return std::make_shared<ControlMessage>(self);
 }
 
-void ControlMessageProxy::add_task(ControlMessage& self, const std::string& task_type, py::dict& task)
-{
-    self.add_task(task_type, mrc::pymrc::cast_from_pyobject(task));
-}
-
-py::dict ControlMessageProxy::remove_task(ControlMessage& self, const std::string& task_type)
-{
-    auto task = self.remove_task(task_type);
-
-    return mrc::pymrc::cast_from_json(task);
-}
-
-py::dict ControlMessageProxy::get_tasks(ControlMessage& self)
-{
-    return mrc::pymrc::cast_from_json(self.get_tasks());
-}
-
-py::dict ControlMessageProxy::config(ControlMessage& self)
-{
-    auto dict = mrc::pymrc::cast_from_json(self.config());
-
-    return dict;
-}
-
 py::object ControlMessageProxy::get_metadata(ControlMessage& self,
                                              const py::object& key,
                                              pybind11::object default_value)
@@ -304,7 +279,7 @@ py::object ControlMessageProxy::get_metadata(ControlMessage& self,
     if (key.is_none())
     {
         auto metadata = self.get_metadata();
-        return mrc::pymrc::cast_from_json(metadata);
+        return cast_from_json(metadata);
     }
 
     auto value = self.get_metadata(py::cast<std::string>(key), false);
@@ -313,12 +288,7 @@ py::object ControlMessageProxy::get_metadata(ControlMessage& self,
         return default_value;
     }
 
-    return mrc::pymrc::cast_from_json(value);
-}
-
-void ControlMessageProxy::set_metadata(ControlMessage& self, const std::string& key, pybind11::object& value)
-{
-    self.set_metadata(key, mrc::pymrc::cast_from_pyobject(value));
+    return cast_from_json(value);
 }
 
 py::list ControlMessageProxy::list_metadata(ControlMessage& self)
@@ -382,11 +352,6 @@ void ControlMessageProxy::set_timestamp(ControlMessage& self, const std::string&
     }
 }
 
-void ControlMessageProxy::config(ControlMessage& self, py::dict& config)
-{
-    self.config(mrc::pymrc::cast_from_pyobject(config));
-}
-
 void ControlMessageProxy::payload_from_python_meta(ControlMessage& self, const pybind11::object& meta)
 {
     self.payload(MessageMetaInterfaceProxy::init_python_meta(meta));
diff --git a/morpheus/_lib/src/messages/meta.cpp b/morpheus/_lib/src/messages/meta.cpp
index dfb8dfbd47..b141a0e6f0 100644
--- a/morpheus/_lib/src/messages/meta.cpp
+++ b/morpheus/_lib/src/messages/meta.cpp
@@ -27,8 +27,11 @@
 
 #include <cuda_runtime.h>               // for cudaMemcpy, cudaMemcpy2D, cudaMemcpyKind
 #include <cudf/column/column_view.hpp>  // for column_view
+#include <cudf/concatenate.hpp>
+#include <cudf/copying.hpp>
 #include <cudf/io/types.hpp>
-#include <cudf/types.hpp>  // for type_id, data_type, size_type
+#include <cudf/table/table_view.hpp>  // for table_view
+#include <cudf/types.hpp>             // for type_id, data_type, size_type
 #include <glog/logging.h>
 #include <mrc/cuda/common.hpp>  // for __check_cuda_errors, MRC_CHECK_CUDA
 #include <pybind11/gil.h>
@@ -41,9 +44,10 @@
 #include <cstdint>  // for uint8_t
 #include <memory>
 #include <optional>
-#include <ostream>    // for operator<< needed by glog
-#include <stdexcept>  // for runtime_error
-#include <tuple>      // for make_tuple, tuple
+#include <ostream>        // for operator<< needed by glog
+#include <stdexcept>      // for runtime_error
+#include <tuple>          // for make_tuple, tuple
+#include <unordered_map>  // for unordered_map
 #include <utility>
 // We're already including pybind11.h and don't need to include cast.
 // For some reason IWYU also thinks we need array for the `isinsance` call.
@@ -84,7 +88,7 @@ TableInfo MessageMeta::get_info(const std::vector<std::string>& column_names) co
 
 void MessageMeta::set_data(const std::string& col_name, TensorObject tensor)
 {
-    this->set_data({col_name}, {tensor});
+    this->set_data({col_name}, std::vector<TensorObject>{tensor});
 }
 
 void MessageMeta::set_data(const std::vector<std::string>& column_names, const std::vector<TensorObject>& tensors)
@@ -111,16 +115,13 @@ void MessageMeta::set_data(const std::vector<std::string>& column_names, const s
         const auto tensor_type    = DType(tensors[i].dtype());
         const auto tensor_type_id = tensor_type.cudf_type_id();
         const auto row_stride     = tensors[i].stride(0);
-
         CHECK(tensors[i].count() == cv.size() &&
               (table_type_id == tensor_type_id ||
                (table_type_id == cudf::type_id::BOOL8 && tensor_type_id == cudf::type_id::UINT8)));
-
         const auto item_size = tensors[i].dtype().item_size();
 
         // Dont use cv.data<>() here since that does not account for the size of each element
         auto data_start = const_cast<uint8_t*>(cv.head<uint8_t>()) + cv.offset() * item_size;
-
         if (row_stride == 1)
         {
             // column major just use cudaMemcpy
@@ -193,6 +194,41 @@ bool MessageMeta::has_sliceable_index() const
     return table.has_sliceable_index();
 }
 
+std::shared_ptr<MessageMeta> MessageMeta::copy_ranges(const std::vector<RangeType>& ranges) const
+{
+    // copy ranges into a sequntial list of values
+    // https://github.com/rapidsai/cudf/issues/11223
+    std::vector<TensorIndex> cudf_ranges;
+    for (const auto& p : ranges)
+    {
+        // Append the message offset to the range here
+        cudf_ranges.push_back(p.first);
+        cudf_ranges.push_back(p.second);
+    }
+    auto table_info   = this->get_info();
+    auto column_names = table_info.get_column_names();
+    auto metadata     = cudf::io::table_metadata{};
+
+    metadata.schema_info.reserve(column_names.size() + 1);
+    metadata.schema_info.emplace_back("");
+
+    for (auto column_name : column_names)
+    {
+        metadata.schema_info.emplace_back(column_name);
+    }
+
+    auto table_view                     = table_info.get_view();
+    auto sliced_views                   = cudf::slice(table_view, cudf_ranges);
+    cudf::io::table_with_metadata table = {cudf::concatenate(sliced_views), std::move(metadata)};
+
+    return MessageMeta::create_from_cpp(std::move(table), 1);
+}
+
+std::shared_ptr<MessageMeta> MessageMeta::get_slice(TensorIndex start, TensorIndex stop) const
+{
+    return this->copy_ranges({{start, stop}});
+}
+
 std::optional<std::string> MessageMeta::ensure_sliceable_index()
 {
     auto table = this->get_mutable_info();
@@ -462,6 +498,23 @@ std::optional<std::string> MessageMetaInterfaceProxy::ensure_sliceable_index(Mes
     return self.ensure_sliceable_index();
 }
 
+std::shared_ptr<MessageMeta> MessageMetaInterfaceProxy::copy_ranges(MessageMeta& self,
+                                                                    const std::vector<RangeType>& ranges)
+{
+    pybind11::gil_scoped_release no_gil;
+
+    return self.copy_ranges(ranges);
+}
+
+std::shared_ptr<MessageMeta> MessageMetaInterfaceProxy::get_slice(MessageMeta& self,
+                                                                  TensorIndex start,
+                                                                  TensorIndex stop)
+{
+    pybind11::gil_scoped_release no_gil;
+
+    return self.get_slice(start, stop);
+}
+
 SlicedMessageMeta::SlicedMessageMeta(std::shared_ptr<MessageMeta> other,
                                      TensorIndex start,
                                      TensorIndex stop,
diff --git a/morpheus/_lib/src/utilities/json_types.cpp b/morpheus/_lib/src/utilities/json_types.cpp
new file mode 100644
index 0000000000..db790df6ee
--- /dev/null
+++ b/morpheus/_lib/src/utilities/json_types.cpp
@@ -0,0 +1,171 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morpheus/utilities/json_types.hpp"
+
+#include <pybind11/pybind11.h>  // for cast, handle::cast, object::cast, pybind11
+
+#include <cstdint>    // for uint64_t
+#include <stdexcept>  // for runtime_error
+#include <typeinfo>   // for type_info
+#include <utility>    // for move
+
+namespace py = pybind11;
+
+namespace {
+template <typename T>
+uint64_t type_to_uint64()
+{
+    return std::hash<std::string>{}(typeid(T).name());
+}
+}  // namespace
+
+namespace morpheus::utilities {
+
+PythonByteContainer::PythonByteContainer(mrc::pymrc::PyHolder py_obj) : m_py_obj(std::move(py_obj)) {}
+
+mrc::pymrc::PyHolder PythonByteContainer::get_py_obj() const
+{
+    return m_py_obj;
+}
+
+py::object cast_from_json(const morpheus::utilities::json_t& source)
+{
+    if (source.is_null())
+    {
+        return py::none();
+    }
+    if (source.is_array())
+    {
+        py::list list_;
+        for (const auto& element : source)
+        {
+            list_.append(cast_from_json(element));
+        }
+        return std::move(list_);
+    }
+    if (source.is_boolean())
+    {
+        return py::bool_(source.get<bool>());
+    }
+    if (source.is_number_float())
+    {
+        return py::float_(source.get<double>());
+    }
+    if (source.is_number_integer())
+    {
+        return py::int_(source.get<morpheus::utilities::json_t::number_integer_t>());
+    }
+    if (source.is_number_unsigned())
+    {
+        return py::int_(source.get<morpheus::utilities::json_t::number_unsigned_t>());
+    }
+    if (source.is_object())
+    {
+        py::dict dict;
+        for (const auto& it : source.items())
+        {
+            dict[py::str(it.key())] = cast_from_json(it.value());
+        }
+
+        return std::move(dict);
+    }
+    if (source.is_string())
+    {
+        return py::str(source.get<std::string>());
+    }
+    if (source.is_binary())
+    {
+        if (source.get_binary().has_subtype() && source.get_binary().subtype() == type_to_uint64<py::object>())
+        {
+            return source.get_binary().get_py_obj();
+        }
+        throw std::runtime_error("Unsupported binary type");
+    }
+
+    return py::none();
+}
+
+json_t cast_from_pyobject_impl(const py::object& source,
+                               mrc::pymrc::unserializable_handler_fn_t unserializable_handler_fn,
+                               const std::string& parent_path = "")
+{
+    // Dont return via initializer list with JSON. It performs type deduction and gives different results
+    // NOLINTBEGIN(modernize-return-braced-init-list)
+    if (source.is_none())
+    {
+        return json_t();
+    }
+
+    if (py::isinstance<py::dict>(source))
+    {
+        const auto py_dict = source.cast<py::dict>();
+        auto json_obj      = json_t::object();
+        for (const auto& p : py_dict)
+        {
+            std::string key{p.first.cast<std::string>()};
+            std::string path{parent_path + "/" + key};
+            json_obj[key] = cast_from_pyobject_impl(p.second.cast<py::object>(), unserializable_handler_fn, path);
+        }
+        return json_obj;
+    }
+
+    if (py::isinstance<py::list>(source) || py::isinstance<py::tuple>(source))
+    {
+        const auto py_list = source.cast<py::list>();
+        auto json_arr      = json_t::array();
+        for (const auto& p : py_list)
+        {
+            std::string path{parent_path + "/" + std::to_string(json_arr.size())};
+            json_arr.push_back(cast_from_pyobject_impl(p.cast<py::object>(), unserializable_handler_fn, path));
+        }
+
+        return json_arr;
+    }
+
+    if (py::isinstance<py::bool_>(source))
+    {
+        return json_t(py::cast<bool>(std::move(source)));
+    }
+
+    if (py::isinstance<py::int_>(source))
+    {
+        return json_t(py::cast<long>(std::move(source)));
+    }
+
+    if (py::isinstance<py::float_>(source))
+    {
+        return json_t(py::cast<double>(std::move(source)));
+    }
+
+    if (py::isinstance<py::str>(source))
+    {
+        return json_t(py::cast<std::string>(std::move(source)));
+    }
+
+    // source is not serializable, return as a binary object in PythonByteContainer
+    return json_t::binary(PythonByteContainer(py::cast<mrc::pymrc::PyHolder>(source)), type_to_uint64<py::object>());
+
+    // NOLINTEND(modernize-return-braced-init-list)
+}
+
+json_t cast_from_pyobject(const py::object& source)
+{
+    return cast_from_pyobject_impl(source, nullptr);
+}
+
+}  // namespace morpheus::utilities
\ No newline at end of file
diff --git a/morpheus/_lib/tests/CMakeLists.txt b/morpheus/_lib/tests/CMakeLists.txt
index e42e7717e8..788055a7c0 100644
--- a/morpheus/_lib/tests/CMakeLists.txt
+++ b/morpheus/_lib/tests/CMakeLists.txt
@@ -105,6 +105,7 @@ add_morpheus_test(
     messages/test_control_message.cpp
     messages/test_dev_doc_ex3.cpp
     messages/test_sliced_message_meta.cpp
+    messages/test_message_meta.cpp
 )
 
 add_morpheus_test(
diff --git a/morpheus/_lib/tests/messages/test_control_message.cpp b/morpheus/_lib/tests/messages/test_control_message.cpp
index 11eb5353b2..642660fcdc 100644
--- a/morpheus/_lib/tests/messages/test_control_message.cpp
+++ b/morpheus/_lib/tests/messages/test_control_message.cpp
@@ -21,9 +21,12 @@
 #include "morpheus/messages/control.hpp"               // for ControlMessage
 #include "morpheus/messages/memory/tensor_memory.hpp"  // for TensorMemory
 #include "morpheus/messages/meta.hpp"                  // for MessageMeta
+#include "morpheus/utilities/json_types.hpp"           // for PythonByteContainer
 
-#include <gtest/gtest.h>      // for Message, TestPartResult, AssertionResult, TestInfo
-#include <nlohmann/json.hpp>  // for basic_json, json_ref, json
+#include <gtest/gtest.h>       // for Message, TestPartResult, AssertionResult, TestInfo
+#include <nlohmann/json.hpp>   // for basic_json, json_ref, json
+#include <pybind11/pytypes.h>  // for literals, pybind11
+#include <pybind11/stl.h>      // IWYU pragma: keep
 
 #include <algorithm>  // for find
 #include <chrono>     // for system_clock
@@ -41,6 +44,10 @@ using clock_type_t = std::chrono::system_clock;
 
 using TestControlMessage = morpheus::test::TestMessages;  // NOLINT(readability-identifier-naming)
 
+namespace py = pybind11;
+using namespace pybind11::literals;
+using namespace std::string_literals;
+
 TEST_F(TestControlMessage, InitializationTest)
 {
     auto msg_one = ControlMessage();
@@ -153,7 +160,6 @@ TEST_F(TestControlMessage, TaskTest)
     auto msg_train = ControlMessage();
 
     ASSERT_EQ(msg_infer.config().contains("some_value"), false);
-
     auto config = nlohmann::json();
     nlohmann::json task_properties;
     task_properties = {
diff --git a/morpheus/_lib/tests/messages/test_message_meta.cpp b/morpheus/_lib/tests/messages/test_message_meta.cpp
index 9724704c1c..7214571f32 100644
--- a/morpheus/_lib/tests/messages/test_message_meta.cpp
+++ b/morpheus/_lib/tests/messages/test_message_meta.cpp
@@ -15,53 +15,70 @@
  * limitations under the License.
  */
 
-#include "../test_utils/common.hpp"  // IWYU pragma: associated
-#include "test_messages.hpp"
+#include "../test_utils/common.hpp"
+#include "../test_utils/tensor_utils.hpp"  // for assert_eq_device_to_host
+#include "test_messages.hpp"               // for TestMessages
 
-#include "morpheus/io/deserializers.hpp"  // for load_table_from_file, prepare_df_index
-#include "morpheus/messages/meta.hpp"     // for MessageMeta and SlicedMessageMeta
-#include "morpheus/objects/rmm_tensor.hpp"
-#include "morpheus/objects/table_info.hpp"   // for TableInfo
-#include "morpheus/utilities/cudf_util.hpp"  // for CudfHelper
+#include "morpheus/io/deserializers.hpp"    // for load_table_from_file
+#include "morpheus/messages/meta.hpp"       // for MessageMeta
+#include "morpheus/objects/dtype.hpp"       // for DType
+#include "morpheus/objects/table_info.hpp"  // for TableInfo
+#include "morpheus/objects/tensor.hpp"      // for Tensor
+#include "morpheus/types.hpp"               // for RangeType
 
-#include <gtest/gtest.h>
-#include <mrc/cuda/common.hpp>
-#include <pybind11/gil.h>       // for gil_scoped_release, gil_scoped_acquire
-#include <pybind11/pybind11.h>  // IWYU pragma: keep
+#include <gtest/gtest.h>             // for TestInfo, TEST_F
+#include <pybind11/gil.h>            // for gil_scoped_release
+#include <rmm/cuda_stream_view.hpp>  // for cuda_stream_per_thread
+#include <rmm/device_buffer.hpp>     // for device_buffer
 
-#include <filesystem>  // for std::filesystem::path
-#include <memory>      // for shared_ptr
-#include <utility>     // for move
+#include <cstdint>     // for int64_t
+#include <filesystem>  // for operator/, path
+#include <memory>      // for allocator, __shared_ptr_access, shared_ptr, make_shared
+#include <vector>      // for vector
 
 using namespace morpheus;
+using namespace morpheus::test;
 
 using TestMessageMeta = morpheus::test::TestMessages;  // NOLINT(readability-identifier-naming)
 
-TEST_F(TestMessageMeta, SetMetaWithColumnName)
+TEST_F(TestMessageMeta, SetdataWithColumnName)
 {
     pybind11::gil_scoped_release no_gil;
     auto test_data_dir               = test::get_morpheus_root() / "tests/tests_data";
     std::filesystem::path input_file = test_data_dir / "csv_sample.csv";
 
-    auto table = load_table_from_file(input_file);
-    auto meta  = MessageMeta::create_from_cpp(std::move(table));
+    auto meta = MessageMeta::create_from_cpp(load_table_from_file(input_file));
 
-    const std::size_t count = 3;
-    DType int_type(TypeId::INT64);
-    std::vector<int64_t> expected_ints{4, 5, 6};
-    auto buffer = std::make_shared<rmm::device_buffer>(count * int_type.item_size(), rmm::cuda_stream_per_thread);
+    std::vector<int64_t> packed_data_host{9, 8, 7, 6, 5, 4, 3, 2, 1};
+    int mess_count   = packed_data_host.size();
+    int cols_size    = 1;
+    auto packed_data = std::make_shared<rmm::device_buffer>(
+        packed_data_host.data(), mess_count * cols_size * sizeof(int64_t), rmm::cuda_stream_per_thread);
 
-    MRC_CHECK_CUDA(cudaMemcpy(buffer->data(), expected_ints.data(), buffer->size(), cudaMemcpyHostToDevice));
+    auto tensor = Tensor::create(packed_data, DType::create<int64_t>(), {mess_count, cols_size}, {}, 0);
+    meta->set_data("int", tensor);
 
-    ShapeType shape{3, 1};
-    auto tensor = std::make_shared<RMMTensor>(buffer, 0, int_type, shape);
-    TensorObject tensor_object(tensor);
-    meta->set_data("int", tensor_object);
+    assert_eq_device_to_host(meta->get_info().get_column(0), packed_data_host);
+}
+
+TEST_F(TestMessageMeta, CopyRangeAndSlicing)
+{
+    pybind11::gil_scoped_release no_gil;
+    auto test_data_dir               = test::get_morpheus_root() / "tests/tests_data";
+    std::filesystem::path input_file = test_data_dir / "csv_sample.csv";
+
+    auto meta = MessageMeta::create_from_cpp(load_table_from_file(input_file));
 
-    std::vector<int64_t> actual_ints(expected_ints.size());
+    std::vector<RangeType> ranges                  = {{0, 1}, {3, 6}};
+    auto copy_range_meta                           = meta->copy_ranges(ranges);
+    std::vector<int64_t> copy_range_expected_int   = {1, 4, 5, 6};
+    std::vector<double> copy_range_expected_double = {1.1, 4.4, 5.5, 6.6};
+    assert_eq_device_to_host(copy_range_meta->get_info().get_column(0), copy_range_expected_int);
+    assert_eq_device_to_host(copy_range_meta->get_info().get_column(1), copy_range_expected_double);
 
-    auto cm_int_meta = meta->get_info().get_column(0);
-    MRC_CHECK_CUDA(
-        cudaMemcpy(actual_ints.data(), cm_int_meta.data<int64_t>(), count * sizeof(int64_t), cudaMemcpyDeviceToHost));
-    EXPECT_EQ(expected_ints, actual_ints);
+    auto sliced_meta                           = meta->get_slice(2, 4);
+    std::vector<int64_t> sliced_expected_int   = {3, 4};
+    std::vector<double> sliced_expected_double = {3.3, 4.4};
+    assert_eq_device_to_host(sliced_meta->get_info().get_column(0), sliced_expected_int);
+    assert_eq_device_to_host(sliced_meta->get_info().get_column(1), sliced_expected_double);
 }
diff --git a/tests/messages/test_control_message.py b/tests/messages/test_control_message.py
index dc2c1a3c2b..e52fa87720 100644
--- a/tests/messages/test_control_message.py
+++ b/tests/messages/test_control_message.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 
 import datetime
+import io
+import sys
 
 import cupy as cp
 import pytest
@@ -400,3 +402,87 @@ def test_consistency_after_multiple_operations():
                        cp.array([4, 5, 6])), "Mismatch in input_ids after update."
     assert cp.allclose(retrieved_tensors.get_tensor("new_tensor"),
                        new_tensor["new_tensor"]), "New tensor data mismatch."
+
+
+class NonSerializablePyClass():
+
+    def __init__(self):
+        self.name = "non_serializable_py_class"
+
+    def __getstate__(self):
+        raise TypeError("This object is not serializable")
+
+
+class NonSerializableNestedPyClass():
+
+    def __init__(self):
+        self.name = "non_serializable_nested_py_class"
+        self.non_serializable = NonSerializablePyClass()
+
+
+class NonSerializableNestedPyClassWithFile():
+
+    def __init__(self):
+        self.name = "non_serializable_nested_py_class_with_file"
+        self.file_obj = io.StringIO("string data")
+
+
+@pytest.fixture(name="py_object",
+                scope="function",
+                params=[NonSerializablePyClass, NonSerializableNestedPyClass, NonSerializableNestedPyClassWithFile])
+def fixture_pyobject(request):
+    return request.param()
+
+
+@pytest.mark.usefixtures("config_only_cpp")
+def test_metadata_holds_non_serializable_python_obj(py_object):
+
+    message = messages.ControlMessage()
+
+    obj = py_object
+    key = obj.name
+
+    message.set_metadata(key, obj)
+    assert key in message.list_metadata()
+    metadata = message.get_metadata(key)
+    assert obj is metadata
+
+    dict_with_obj = {"nested_obj": obj}
+    message.set_metadata("nested", dict_with_obj)
+    metadata_dict_with_obj = message.get_metadata("nested")
+
+    # Check that the dict was serialized and recreated
+    assert dict_with_obj is not metadata_dict_with_obj
+
+    # Check that the nested non-serializable object is the same
+    assert obj is metadata_dict_with_obj["nested_obj"]
+
+
+@pytest.mark.usefixtures("config_only_cpp")
+def test_tasks_hold_non_serializable_python_obj(py_object):
+
+    message = messages.ControlMessage()
+
+    obj = py_object
+    task_key = "non_serializable"
+    task_name = "task"
+
+    message.add_task(task_key, {task_name: obj})
+    assert message.has_task(task_key)
+    task = message.get_tasks()[task_key][0][task_name]
+    assert obj is task
+
+    ref_count = sys.getrefcount(obj)
+    assert message.remove_task(task_key)[task_name] is obj
+    # Check the removed task decreases the reference count
+    assert sys.getrefcount(obj) == ref_count - 1
+
+    dict_with_obj = {"nested_obj": obj}
+    message.set_metadata("nested", dict_with_obj)
+    metadata_dict_with_obj = message.get_metadata("nested")
+
+    # Check that the dict was serialized and recreated
+    assert dict_with_obj is not metadata_dict_with_obj
+
+    # Check that the nested non-serializable object is the same
+    assert obj is metadata_dict_with_obj["nested_obj"]
diff --git a/tests/messages/test_message_meta.py b/tests/messages/test_message_meta.py
index 2ee8dd3c40..88d10e0139 100644
--- a/tests/messages/test_message_meta.py
+++ b/tests/messages/test_message_meta.py
@@ -219,3 +219,21 @@ def test_get_column_names(df: DataFrameType):
     meta = MessageMeta(df)
 
     assert sorted(meta.get_column_names()) == expected_columns
+
+
+def test_cpp_meta_slicing(dataset_cudf: DatasetManager):
+    """
+    Test copy_range() and get_slice() of MessageMetaCpp
+    """
+    df = dataset_cudf["filter_probs.csv"]
+
+    cpp_meta = MessageMetaCpp(df)
+    ranges = [(0, 1), (3, 6)]
+    copy_range_cpp_meta = cpp_meta.copy_ranges(ranges)
+    expected_copy_range_df = cudf.concat([df[start:stop] for start, stop in ranges])
+    DatasetManager.assert_compare_df(copy_range_cpp_meta.df, expected_copy_range_df)
+
+    slice_idx = [2, 4]
+    sliced_cpp_meta = cpp_meta.get_slice(slice_idx[0], slice_idx[1])
+    expected_sliced_df = df[slice_idx[0]:slice_idx[1]]
+    DatasetManager.assert_compare_df(sliced_cpp_meta.df, expected_sliced_df)
diff --git a/tests/tests_data/csv_sample.csv b/tests/tests_data/csv_sample.csv
index 9d2aff44af..f8a1000d9c 100644
--- a/tests/tests_data/csv_sample.csv
+++ b/tests/tests_data/csv_sample.csv
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:977ef8a2d12b388e2dc6db474d00e0f488f1fe0fc733f88d51668ade50f5e9a5
-size 32
+oid sha256:c016e91bf295b70efe1ec9bb69aa61702af3c146a95571295d7d67765c94d397
+size 64

From 1e8518b031c4e4be24996f4d64f245a5f6739468 Mon Sep 17 00:00:00 2001
From: eago <elena.ago@gmail.com>
Date: Wed, 15 May 2024 16:15:21 +0200
Subject: [PATCH 36/38] DOCA stage split: source + convert (#1617)

Split original DOCA Source Stage in two stages:
- Source stage to receive and forward packets with the new RawPacketMessage
- Convert stage to convert from RawPacketMessage to MessageMeta format

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - eago (https://github.com/e-ago)
  - https://github.com/eagonv
  - Michael Demoret (https://github.com/mdemoret-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1617
---
 .devcontainer/Dockerfile                      |   5 +
 .devcontainer/devcontainer.json               |   7 +-
 .devcontainer/docker/optional_deps/doca.sh    |  64 ++
 docker/optional_deps/doca.sh                  |  63 +-
 docs/source/stages/morpheus_stages.md         |   3 +-
 examples/doca/{run.py => run_tcp.py}          | 103 ++-
 examples/doca/run_udp_convert.py              |  68 ++
 examples/doca/run_udp_raw.py                  |  71 ++
 examples/doca/vdb_realtime/README.md          | 131 ++++
 .../sender/dataset/doca_overview.txt          |  40 ++
 .../sender/dataset/gpunetio_blog_post.txt     |  75 ++
 .../dataset/gpunetio_programming_guide.txt    | 231 ++++++
 examples/doca/vdb_realtime/sender/send.py     |  42 ++
 examples/doca/vdb_realtime/vdb.py             | 136 ++++
 examples/llm/rag/run.py                       |  16 +-
 examples/llm/rag/standalone_pipeline.py       |  28 +-
 examples/llm/vdb_upload/helper.py             |  41 ++
 examples/llm/vdb_upload/run.py                |   4 +-
 examples/llm/vdb_upload/vdb_config_doca.yaml  | 239 +++++++
 morpheus/_lib/cmake/libmorpheus.cmake         |   1 +
 morpheus/_lib/doca/CMakeLists.txt             |   4 +-
 morpheus/_lib/doca/__init__.pyi               |   4 +
 .../doca/include/morpheus/doca/common.hpp     |  17 +-
 .../include/morpheus/doca/doca_context.hpp    |  14 +-
 .../include/morpheus/doca/doca_kernels.hpp    |  89 +++
 .../include/morpheus/doca/doca_rx_pipe.hpp    |   6 +
 .../include/morpheus/doca/doca_rx_queue.hpp   |   4 +-
 .../include/morpheus/doca/doca_semaphore.hpp  |   5 +
 .../morpheus/doca/doca_source_kernels.hpp     |  54 --
 .../doca/{doca_source.hpp => doca_stages.hpp} |  54 +-
 .../doca/include/morpheus/doca/packets.hpp    | 340 +++++++++
 morpheus/_lib/doca/module.cpp                 |   8 +-
 morpheus/_lib/doca/src/doca_context.cpp       |  71 +-
 morpheus/_lib/doca/src/doca_convert.cpp       | 202 ++++++
 morpheus/_lib/doca/src/doca_convert_kernel.cu | 297 ++++++++
 morpheus/_lib/doca/src/doca_rx_pipe.cpp       |  56 +-
 morpheus/_lib/doca/src/doca_rx_queue.cpp      |   9 +-
 morpheus/_lib/doca/src/doca_semaphore.cpp     |   7 +
 morpheus/_lib/doca/src/doca_source.cpp        | 351 +++------
 morpheus/_lib/doca/src/doca_source_kernel.cu  | 216 ++++++
 morpheus/_lib/doca/src/doca_source_kernels.cu | 668 ------------------
 morpheus/_lib/doca/src/rte_context.cpp        |   2 +
 .../include/morpheus/messages/raw_packet.hpp  | 143 ++++
 morpheus/_lib/messages/__init__.pyi           |  18 +
 morpheus/_lib/messages/module.cpp             |  11 +
 morpheus/_lib/src/messages/raw_packet.cpp     | 121 ++++
 morpheus/cli/commands.py                      |   3 +-
 morpheus/messages/__init__.py                 |   2 +
 morpheus/modules/output/write_to_vector_db.py |   4 +-
 morpheus/stages/doca/doca_convert_stage.py    |  85 +++
 morpheus/stages/doca/doca_source_stage.py     |  12 +-
 51 files changed, 3061 insertions(+), 1184 deletions(-)
 create mode 100755 .devcontainer/docker/optional_deps/doca.sh
 rename examples/doca/{run.py => run_tcp.py} (59%)
 create mode 100644 examples/doca/run_udp_convert.py
 create mode 100644 examples/doca/run_udp_raw.py
 create mode 100644 examples/doca/vdb_realtime/README.md
 create mode 100644 examples/doca/vdb_realtime/sender/dataset/doca_overview.txt
 create mode 100644 examples/doca/vdb_realtime/sender/dataset/gpunetio_blog_post.txt
 create mode 100644 examples/doca/vdb_realtime/sender/dataset/gpunetio_programming_guide.txt
 create mode 100644 examples/doca/vdb_realtime/sender/send.py
 create mode 100644 examples/doca/vdb_realtime/vdb.py
 create mode 100644 examples/llm/vdb_upload/vdb_config_doca.yaml
 create mode 100644 morpheus/_lib/doca/include/morpheus/doca/doca_kernels.hpp
 delete mode 100644 morpheus/_lib/doca/include/morpheus/doca/doca_source_kernels.hpp
 rename morpheus/_lib/doca/include/morpheus/doca/{doca_source.hpp => doca_stages.hpp} (59%)
 create mode 100644 morpheus/_lib/doca/include/morpheus/doca/packets.hpp
 create mode 100644 morpheus/_lib/doca/src/doca_convert.cpp
 create mode 100644 morpheus/_lib/doca/src/doca_convert_kernel.cu
 create mode 100644 morpheus/_lib/doca/src/doca_source_kernel.cu
 delete mode 100644 morpheus/_lib/doca/src/doca_source_kernels.cu
 create mode 100644 morpheus/_lib/include/morpheus/messages/raw_packet.hpp
 create mode 100644 morpheus/_lib/src/messages/raw_packet.cpp
 create mode 100644 morpheus/stages/doca/doca_convert_stage.py

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index c102b78a8a..2d05b68a23 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -15,4 +15,9 @@
 
 FROM rapidsai/devcontainers:23.12-cpp-mambaforge-ubuntu22.04 AS base
 
+ENV MORPHEUS_SUPPORT_DOCA=ON
+
+COPY ./docker/optional_deps docker/optional_deps
+RUN ./docker/optional_deps/doca.sh /tmp/doca
+
 ENV PATH="${PATH}:/workspaces/morpheus/.devcontainer/bin"
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 720df1508d..25326954a0 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -18,6 +18,7 @@
     "build": {
         "dockerfile": "Dockerfile"
     },
+    "privileged": true,
 
     "hostRequirements": {
         "gpu": true
@@ -33,7 +34,8 @@
     ],
 
     "runArgs": [
-        "--network=morpheus"
+        "--network=morpheus",
+        "-v=/dev/hugepages:/dev/hugepages"
     ],
 
     "containerEnv": {
@@ -93,8 +95,7 @@
             "type": "bind",
             "source": "${localWorkspaceFolder}/.devcontainer/opt/morpheus",
             "target": "/opt/morpheus"
-        },
-
+        }
     ],
 
     "features": {
diff --git a/.devcontainer/docker/optional_deps/doca.sh b/.devcontainer/docker/optional_deps/doca.sh
new file mode 100755
index 0000000000..f1975fbfe0
--- /dev/null
+++ b/.devcontainer/docker/optional_deps/doca.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF}
+LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
+LINUX_VER=${LINUX_VER:-22.04}
+DOCA_VERSION=${DOCA_VERSION:-2.7.0}
+
+# Exit early if nothing to do
+if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
+   exit 0
+fi
+
+WORKING_DIR=$1
+
+echo "Installing DOCA using directory: ${WORKING_DIR}"
+
+DEB_DIR=${WORKING_DIR}/deb
+
+mkdir -p ${DEB_DIR}
+
+DOCA_OS_VERSION="ubuntu2204"
+DOCA_PKG_LINK="https://www.mellanox.com/downloads/DOCA/DOCA_v${DOCA_VERSION}/host/doca-host_${DOCA_VERSION}-204000-24.04-${DOCA_OS_VERSION}_amd64.deb"
+
+# Upgrade the base packages (diff between image and Canonical upstream repo)
+apt update -y
+apt upgrade -y
+
+# Install wget
+apt install -y --no-install-recommends wget
+
+wget -qO - ${DOCA_PKG_LINK} -O doca-host.deb
+apt install ./doca-host.deb
+apt update
+apt install -y doca-all
+apt install -y doca-gpu doca-gpu-dev
+
+# Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
+GDRCOPY_DIR=${WORKING_DIR}/gdrcopy
+
+if [[ ! -d "${GDRCOPY_DIR}" ]] ; then
+    git clone https://github.com/NVIDIA/gdrcopy.git ${GDRCOPY_DIR}
+    cd ${GDRCOPY_DIR}
+else
+    cd ${GDRCOPY_DIR}
+    git pull https://github.com/NVIDIA/gdrcopy.git
+fi
+
+make lib lib_install
diff --git a/docker/optional_deps/doca.sh b/docker/optional_deps/doca.sh
index 97d1d108f3..f1975fbfe0 100755
--- a/docker/optional_deps/doca.sh
+++ b/docker/optional_deps/doca.sh
@@ -19,7 +19,7 @@ set -e
 MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF}
 LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
 LINUX_VER=${LINUX_VER:-22.04}
-DOCA_VERSION=${DOCA_VERSION:-2.6.0}
+DOCA_VERSION=${DOCA_VERSION:-2.7.0}
 
 # Exit early if nothing to do
 if [[ ${MORPHEUS_SUPPORT_DOCA} != @(TRUE|ON) ]]; then
@@ -34,64 +34,21 @@ DEB_DIR=${WORKING_DIR}/deb
 
 mkdir -p ${DEB_DIR}
 
-DOCA_REPO_LINK="https://linux.mellanox.com/public/repo/doca/${DOCA_VERSION}"
-DOCA_REPO="${DOCA_REPO_LINK}/ubuntu22.04"
-DOCA_REPO_ARCH="x86_64"
-DOCA_UPSTREAM_REPO="${DOCA_REPO}/${DOCA_REPO_ARCH}"
+DOCA_OS_VERSION="ubuntu2204"
+DOCA_PKG_LINK="https://www.mellanox.com/downloads/DOCA/DOCA_v${DOCA_VERSION}/host/doca-host_${DOCA_VERSION}-204000-24.04-${DOCA_OS_VERSION}_amd64.deb"
 
 # Upgrade the base packages (diff between image and Canonical upstream repo)
 apt update -y
 apt upgrade -y
 
-# Cleanup apt
-rm -rf /var/lib/apt/lists/*
-apt autoremove -y
+# Install wget
+apt install -y --no-install-recommends wget
 
-# Configure DOCA Repository, and install packages
-apt update -y
-
-# Install wget & Add the DOCA public repository
-apt install -y --no-install-recommends wget software-properties-common gpg-agent
-wget -qO - ${DOCA_UPSTREAM_REPO}/GPG-KEY-Mellanox.pub | apt-key add -
-add-apt-repository "deb [trusted=yes] ${DOCA_UPSTREAM_REPO} ./"
-apt update -y
-
-# Install base-rt content
-apt install -y --no-install-recommends \
-    doca-gpu \
-    doca-gpu-dev \
-    doca-prime-runtime \
-    doca-prime-sdk \
-    doca-sdk \
-    dpcp \
-    flexio \
-    ibacm \
-    ibverbs-utils \
-    librdmacm1 \
-    libibnetdisc5 \
-    libibumad3 \
-    libibmad5 \
-    libopensm \
-    libopenvswitch \
-    libyara8 \
-    mlnx-tools \
-    ofed-scripts \
-    openmpi \
-    openvswitch-common \
-    openvswitch-switch \
-    srptools \
-    mlnx-ethtool \
-    mlnx-iproute2 \
-    python3-pyverbs \
-    rdma-core \
-    ucx \
-    yara
-
-    # Cleanup apt
-rm -rf /usr/lib/python3/dist-packages
-apt remove -y software-properties-common gpg-agent
-rm -rf /var/lib/apt/lists/*
-apt autoremove -y
+wget -qO - ${DOCA_PKG_LINK} -O doca-host.deb
+apt install ./doca-host.deb
+apt update
+apt install -y doca-all
+apt install -y doca-gpu doca-gpu-dev
 
 # Now install the gdrcopy library according to: https://github.com/NVIDIA/gdrcopy
 GDRCOPY_DIR=${WORKING_DIR}/gdrcopy
diff --git a/docs/source/stages/morpheus_stages.md b/docs/source/stages/morpheus_stages.md
index 75970e7876..7b6e31d832 100644
--- a/docs/source/stages/morpheus_stages.md
+++ b/docs/source/stages/morpheus_stages.md
@@ -32,7 +32,8 @@ Stages are the building blocks of Morpheus pipelines. Below is a list of the mos
 
 ## Doca
 
-- Doca Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaSourceStage` A source stage used to receive raw packet data from a ConnectX-6 Dx NIC. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.
+- Doca Source Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaSourceStage` A source stage used to receive raw packet data in GPU memory from a ConnectX NIC using DOCA GPUNetIO function within a CUDA kernel to actually receive and process Ethernet network packets. Receive packets information is passed to next pipeline stage in the form of RawPacketMessage. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.
+- Doca Convert Stage {py:class}`~morpheus.stages.doca.doca_source_stage.DocaConvertStage` Convert the RawPacketMessage format received by the DOCA Source Stage into a more complex message format MetaMessage. Packets' info never leave the GPU memory. This stage is not compiled by default refer to the [Doca Example](../../../examples/doca/README.md) for details on building this stage.
 
 ## General
 
diff --git a/examples/doca/run.py b/examples/doca/run_tcp.py
similarity index 59%
rename from examples/doca/run.py
rename to examples/doca/run_tcp.py
index a2a4415f04..2fd8cb16e2 100644
--- a/examples/doca/run.py
+++ b/examples/doca/run_tcp.py
@@ -20,6 +20,7 @@
 from morpheus.config import CppConfig
 from morpheus.config import PipelineModes
 from morpheus.pipeline.linear_pipeline import LinearPipeline
+from morpheus.stages.doca.doca_convert_stage import DocaConvertStage
 from morpheus.stages.doca.doca_source_stage import DocaSourceStage
 from morpheus.stages.general.monitor_stage import MonitorStage
 from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage
@@ -66,18 +67,7 @@
     help="GPU PCI Address",
     required=True,
 )
-@click.option(
-    "--traffic_type",
-    help="UDP or TCP traffic",
-    required=True,
-)
-def run_pipeline(pipeline_batch_size,
-                 model_max_batch_size,
-                 model_fea_length,
-                 out_file,
-                 nic_addr,
-                 gpu_addr,
-                 traffic_type):
+def run_pipeline(pipeline_batch_size, model_max_batch_size, model_fea_length, out_file, nic_addr, gpu_addr):
     # Enable the default logger
     configure_logging(log_level=logging.DEBUG)
 
@@ -87,7 +77,7 @@ def run_pipeline(pipeline_batch_size,
     config.mode = PipelineModes.NLP
 
     # Below properties are specified by the command line
-    config.num_threads = 4
+    config.num_threads = 1
     config.pipeline_batch_size = pipeline_batch_size
     config.model_max_batch_size = model_max_batch_size
     config.feature_length = model_fea_length
@@ -111,52 +101,47 @@ def run_pipeline(pipeline_batch_size,
     pipeline = LinearPipeline(config)
 
     # add doca source stage
-    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, traffic_type))
-
-    if traffic_type == 'udp':
-        pipeline.add_stage(MonitorStage(config, description="DOCA GPUNetIO rate", unit='pkts'))
-
-    if traffic_type == 'tcp':
-        # add deserialize stage
-        pipeline.add_stage(DeserializeStage(config))
-        pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))
-
-        hashfile = '/workspace/models/training-tuning-scripts/sid-models/resources/bert-base-uncased-hash.txt'
-
-        # add preprocessing stage
-        pipeline.add_stage(
-            PreprocessNLPStage(config,
-                               vocab_hash_file=hashfile,
-                               do_lower_case=True,
-                               truncation=True,
-                               add_special_tokens=False,
-                               column='data'))
-
-        pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))
-
-        # add inference stage
-        pipeline.add_stage(
-            TritonInferenceStage(
-                config,
-                # model_name="sid-minibert-trt",
-                model_name="sid-minibert-onnx",
-                server_url="localhost:8000",
-                force_convert_inputs=True,
-                use_shared_memory=True))
-
-        pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))
-
-        # add class stage
-        pipeline.add_stage(AddClassificationsStage(config))
-        pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))
-
-        # serialize
-        pipeline.add_stage(SerializeStage(config))
-        pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))
-
-        # write to file
-        pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
-        pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))
+    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, 'tcp'))
+    pipeline.add_stage(DocaConvertStage(config))
+
+    # add deserialize stage
+    pipeline.add_stage(DeserializeStage(config))
+    pipeline.add_stage(MonitorStage(config, description="Deserialize rate", unit='pkts'))
+
+    hashfile = 'data/bert-base-uncased-hash.txt'
+
+    # add preprocessing stage
+    pipeline.add_stage(
+        PreprocessNLPStage(config,
+                           vocab_hash_file=hashfile,
+                           do_lower_case=True,
+                           truncation=True,
+                           add_special_tokens=False,
+                           column='data'))
+
+    pipeline.add_stage(MonitorStage(config, description="Tokenize rate", unit='pkts'))
+
+    # add inference stage
+    pipeline.add_stage(
+        TritonInferenceStage(config,
+                             model_name="sid-minibert-onnx",
+                             server_url="localhost:8000",
+                             force_convert_inputs=True,
+                             use_shared_memory=True))
+
+    pipeline.add_stage(MonitorStage(config, description="Inference rate", unit='pkts'))
+
+    # add class stage
+    pipeline.add_stage(AddClassificationsStage(config))
+    pipeline.add_stage(MonitorStage(config, description="AddClass rate", unit='pkts'))
+
+    # serialize
+    pipeline.add_stage(SerializeStage(config))
+    pipeline.add_stage(MonitorStage(config, description="Serialize rate", unit='pkts'))
+
+    # write to file
+    pipeline.add_stage(WriteToFileStage(config, filename=out_file, overwrite=True))
+    pipeline.add_stage(MonitorStage(config, description="Write to file rate", unit='pkts'))
 
     # Build the pipeline here to see types in the vizualization
     pipeline.build()
diff --git a/examples/doca/run_udp_convert.py b/examples/doca/run_udp_convert.py
new file mode 100644
index 0000000000..9ead309e1c
--- /dev/null
+++ b/examples/doca/run_udp_convert.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+import click
+
+from morpheus.config import Config
+from morpheus.config import CppConfig
+from morpheus.config import PipelineModes
+from morpheus.pipeline.linear_pipeline import LinearPipeline
+from morpheus.stages.doca.doca_convert_stage import DocaConvertStage
+from morpheus.stages.doca.doca_source_stage import DocaSourceStage
+from morpheus.stages.general.monitor_stage import MonitorStage
+from morpheus.utils.logger import configure_logging
+
+
+@click.command()
+@click.option(
+    "--nic_addr",
+    help="NIC PCI Address",
+    required=True,
+)
+@click.option(
+    "--gpu_addr",
+    help="GPU PCI Address",
+    required=True,
+)
+def run_pipeline(nic_addr, gpu_addr):
+    # Enable the default logger
+    configure_logging(log_level=logging.DEBUG)
+
+    CppConfig.set_should_use_cpp(True)
+
+    config = Config()
+    config.mode = PipelineModes.NLP
+
+    # Below properties are specified by the command line
+    config.num_threads = 10
+    config.edge_buffer_size = 1024
+
+    pipeline = LinearPipeline(config)
+
+    # add doca source stage
+    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, 'udp'))
+    pipeline.add_stage(DocaConvertStage(config))
+    pipeline.add_stage(MonitorStage(config, description="DOCA GPUNetIO rate", unit='pkts'))
+
+    # Build the pipeline here to see types in the vizualization
+    pipeline.build()
+
+    # Run the pipeline
+    pipeline.run()
+
+
+if __name__ == "__main__":
+    run_pipeline()
diff --git a/examples/doca/run_udp_raw.py b/examples/doca/run_udp_raw.py
new file mode 100644
index 0000000000..7d99fe939d
--- /dev/null
+++ b/examples/doca/run_udp_raw.py
@@ -0,0 +1,71 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+import click
+
+from morpheus.config import Config
+from morpheus.config import CppConfig
+from morpheus.config import PipelineModes
+from morpheus.messages import RawPacketMessage
+from morpheus.pipeline.linear_pipeline import LinearPipeline
+from morpheus.stages.doca.doca_source_stage import DocaSourceStage
+from morpheus.stages.general.monitor_stage import MonitorStage
+from morpheus.utils.logger import configure_logging
+
+
+@click.command()
+@click.option(
+    "--nic_addr",
+    help="NIC PCI Address",
+    required=True,
+)
+@click.option(
+    "--gpu_addr",
+    help="GPU PCI Address",
+    required=True,
+)
+def run_pipeline(nic_addr, gpu_addr):
+    # Enable the default logger
+    configure_logging(log_level=logging.DEBUG)
+
+    CppConfig.set_should_use_cpp(True)
+
+    config = Config()
+    config.mode = PipelineModes.NLP
+
+    # Below properties are specified by the command line
+    config.num_threads = 4
+    config.edge_buffer_size = 1024
+
+    def count_raw_packets(message: RawPacketMessage):
+        return message.num
+
+    pipeline = LinearPipeline(config)
+
+    # add doca source stage
+    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, 'udp'))
+    pipeline.add_stage(
+        MonitorStage(config, description="DOCA GPUNetIO rate", unit='pkts', determine_count_fn=count_raw_packets))
+
+    # Build the pipeline here to see types in the vizualization
+    pipeline.build()
+
+    # Run the pipeline
+    pipeline.run()
+
+
+if __name__ == "__main__":
+    run_pipeline()
diff --git a/examples/doca/vdb_realtime/README.md b/examples/doca/vdb_realtime/README.md
new file mode 100644
index 0000000000..e4b02e6042
--- /dev/null
+++ b/examples/doca/vdb_realtime/README.md
@@ -0,0 +1,131 @@
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# DOCA real-time embeddings creation
+
+In this example, the DOCA stages are used to receive packets from the network and provide packets' information to Triton server in order to create embeddings on those packets. Embeddings are then stored in the Milvus database which can be used to query an LLM about packets' content.
+
+## Run Milvus
+
+Download the milvus docker-compose file from the [Milvus GitHub repository]()
+
+```bash
+mkdir milvus
+cd milvus
+wget https://github.com/milvus-io/milvus/releases/download/v2.3.3/milvus-standalone-docker-compose-gpu.yml -O docker-compose.yml
+```
+
+Start Milvus
+
+```bash
+docker compose up -d
+```
+
+## Launch Triton Inference Server
+
+To serve the embedding model, we will use Triton:
+
+```bash
+cd ${MORPHEUS_ROOT}
+
+# Fetch all models
+./scripts/fetch_data.py fetch models
+# Launch Triton
+docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:24.01-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model all-MiniLM-L6-v2
+```
+
+## Populate the Milvus database
+
+```bash
+cd ${MORPHEUS_ROOT}
+
+python examples/doca/vdb_realtime/vdb.py --nic_addr=ca:00.0 --gpu_addr=17:00.0
+```
+
+## Send data to the NIC to be indexed
+
+As the subject of the experiment we created an Ethernet packet generator taking packets' payload from DOCA SDK and DOCA GPUNetIO documentation.
+On another machine, run the following command to send packets:
+
+```bash
+pip install scapy
+sudo python3 examples/doca/vdb_realtime/sender/send.py
+```
+
+On the original machine, wait for the "Upload rate" to match the "DOCA GPUNetIO Source rate" and then press `Ctrl+C` to stop the script. The output should look like the following
+
+```
+====Building Segment Complete!====
+Accumulated 1 rows for collection: vdb_doca
+Accumulated 2 rows for collection: vdb_doca
+Accumulated 3 rows for collection: vdb_doca
+Accumulated 1 rows for collection: vdb_doca
+Accumulated 2 rows for collection: vdb_doca
+Accumulated 3 rows for collection: vdb_doca
+Stopping pipeline. Please wait... Press Ctrl+C again to kill.
+====Stopping Pipeline====
+====Pipeline Stopped====
+DOCA GPUNetIO Source rate[Complete]: 229 pkts [04:29,  1.18s/ pkts]
+Embedding rate[Complete]: 229 pkts [05:51,  1.53s/ pkts]
+====Pipeline Complete====
+```
+
+## Query the Milvus database
+
+First, set the NeMo LLM API Key:
+
+```bash
+export NGC_API_KEY="<YOUR_NGC_API>"
+```
+
+Then install basic requirements:
+```bash
+conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-121_arch-x86_64.yaml --prune
+```
+
+Run the RAG example to query the Milvus database:
+
+```bash
+cd ${MORPHEUS_ROOT}
+python examples/llm/main.py --use_cpp=True --log_level=DEBUG rag pipeline --vdb_resource_name=vdb_doca --question="What is DOCA SDK?","What is DOCA GPUNetIO?","What does DOCA GPUNetIO to remove the CPU from the critical path?"
+```
+
+You should see the answer to the query in the output:
+
+```
+Pipeline complete. Received 3 responses
+Question:
+What is DOCA?
+Response:
+ DOCA is a library that provides a set of APIs for creating and managing network devices on GPUs.
+Question:
+What is the DOCA SDK?
+Response:
+ The DOCA Software Development Kit (SDK) is a software development kit that provides a set of libraries, tools, and documentation to help developers create and deploy network applications on Mellanox network adapters.
+Question:
+What does DOCA GPUNetIO to remove the CPU from the critical path?
+Response:
+ DOCA GPUNetIO enables GPU-centric solutions that remove the CPU from the critical path by providing the following features:
+   GPUDirect Async Kernel-Initiated Network (GDAKIN) communications – a CUDA kernel can invoke GPUNetIO device functions to receive or send, directly interacting with the NIC
+       CPU intervention is not needed in the application critical path
+   GPUDirect RDMA – receive packets directly into a contiguous GPU memory​ area
+   Semaphores – provide a standardized I/O communication protocol between the receiving entity and the CUDA kernel real-time packet processing​
+   Smart memory allocation – allocate aligned GPU memory buffers exposing them to direct CPU access
+       Combination of CUDA and DPDK gpudev library (with the DOCA GPUNetIO shared library is doca-gpu.pc. However, there is no pkgconfig file for the DOCA GPUNetIO CUDA device's static library /opt/mellanox/d
+Total time: 10.61 sec
+Pipeline runtime: 4.12 sec
+```
diff --git a/examples/doca/vdb_realtime/sender/dataset/doca_overview.txt b/examples/doca/vdb_realtime/sender/dataset/doca_overview.txt
new file mode 100644
index 0000000000..992d66d97f
--- /dev/null
+++ b/examples/doca/vdb_realtime/sender/dataset/doca_overview.txt
@@ -0,0 +1,40 @@
+This is an overview of the structure of NVIDIA DOCA documentation. It walks you through DOCA's developer zone portal which contains all the information about the DOCA toolkit from NVIDIA, providing everything you need to develop BlueField-accelerated applications.
+
+The NVIDIA DOCA SDK enables developers to rapidly create applications and services on top of NVIDIA® BlueField® networking platform, leveraging industry-standard APIs. With DOCA, developers can deliver breakthrough networking, security, and storage performance by harnessing the power of NVIDIA's BlueField data-processing units (DPUs) and SuperNICs.
+
+Installation
+
+DOCA contains a runtime and development environment for both the host and as part of a BlueField device image. The full installation instructions for both can be found in the NVIDIA DOCA Installation Guide for Linux.
+Whether DOCA has been installed on the host or on the BlueField networking platform, one can find the different DOCA components under the /opt/mellanox/doca directory. These include the traditional SDK-related components (libraries, header files, etc.) as well as the DOCA samples, applications, tools and more, as described in this document.
+
+API
+
+The DOCA SDK is built around the different DOCA libraries designed to leverage the capabilities of BlueField. Under the Programming Guides section, one can find a detailed description of each DOCA library, its goals, and API. These guides document DOCA's API, aiming to help developers wishing to develop DOCA-based programs.
+The API References section holds the Doxygen-generated documentation of DOCA's official API. See NVIDIA DOCA Library APIs.
+Please note that, as explained in the NVIDIA DOCA gRPC Infrastructure User Guide, some of DOCA's libraries also support a gRPC-based API. More information about these extended programming interfaces can be found in detail in the programming guides of the respective libraries.
+Programming Guides
+DOCA programming guides provide the full picture of DOCA libraries and their APIs. Each guide includes an introduction, architecture, API overview, and other library-specific information.
+Each library's programming guide includes code snippets for achieving basic DOCA-based tasks. It is recommended to review these samples while going over the programming guide of the relevant DOCA library to learn about its API. The samples provide an implementation example of a single feature of a given DOCA library.
+For a more detailed reference of full DOCA-based programs that make use of multiple DOCA libraries, please refer to the Reference Applications.
+
+Applications
+
+Applications are a higher-level reference code than the samples and demonstrate how a full DOCA-based program can be built. In addition to the supplied source code and compilation definitions, the applications are also shipped in their compiled binary form. This is to allow users an out-of-the-box interaction with DOCA-based programs without the hassle of a developer-oriented compilation process.
+Many DOCA applications combine the functionality of more than one DOCA library and offer an example implementation for common scenarios of interest to users such as application recognition according to incoming/outgoing traffic, scanning files using the hardware RegEx acceleration, and much more.
+For more information about DOCA applications, refer to DOCA Applications.
+
+Tools
+
+Some of the DOCA libraries are shipped alongside helper tools for both runtime and development. These tools are often an extension to the library's own API and bridge the gap between the library's expected input format and the input available to the users.
+An example for one such DOCA tool is the doca_dpi_compiler, responsible for converting Suricata-based rules to their matching .cdo definition files which are then used by the DOCA DPI library.
+For more information about DOCA tools, refer to DOCA Tools.
+
+Services
+
+DOCA services are containerized DOCA-based programs that provide an end-to-end solution for a given use case. DOCA services are accessible as part of NVIDIA's container catalog (NGC) from which they can be easily deployed directly to BlueField, and sometimes also to the host.
+For more information about container-based deployment to the BlueField DPU or SmartNIC, refer to the NVIDIA BlueField DPU Container Deployment Guide.
+For more information about DOCA services, refer to the DOCA Services.
+
+Note
+
+For questions, comments, and feedback, please contact us at DOCA-Feedback@exchange.nvidia.com
\ No newline at end of file
diff --git a/examples/doca/vdb_realtime/sender/dataset/gpunetio_blog_post.txt b/examples/doca/vdb_realtime/sender/dataset/gpunetio_blog_post.txt
new file mode 100644
index 0000000000..60face7956
--- /dev/null
+++ b/examples/doca/vdb_realtime/sender/dataset/gpunetio_blog_post.txt
@@ -0,0 +1,75 @@
+A growing number of network applications need to exercise GPU real-time packet processing in order to implement high data rate solutions: data filtering, data placement, network analysis, sensors’ signal processing, and more.
+
+One primary motivation is the high degree of parallelism that the GPU can enable to process in parallel multiple packets while offering scalability and programmability.
+
+For an overview of the basic concepts of these techniques and an initial solution based on the DPDK gpudev library, see Boosting Inline Packet Processing Using DPDK and GPUdev with GPUs.
+
+This post explains how the new NVIDIA DOCA GPUNetIO Library can overcome some of the limitations found in the previous DPDK solution, moving a step closer to GPU-centric packet processing applications.
+Introduction
+
+Real-time GPU processing of network packets is a technique useful to several different application domains, including signal processing, network security, information gathering, and input reconstruction. The goal of these applications is to realize an inline packet processing pipeline to receive packets in GPU memory (without staging copies through CPU memory); process them in parallel with one or more CUDA kernels; and then run inference, evaluate, or send over the network the result of the calculation.
+
+Typically, in this pipeline, the CPU is the intermediary because it has to synchronize network card (NIC) receive activity with the GPU processing. This wakes up the CUDA kernel as soon as new packets have been received in GPU memory. Similar considerations can be applied to the send side of the pipeline.
+Graphic showing a CPU-centric application wherein the CPU has to wake up the network card to receive packets (that will be transferred directly in GPU memory through DMA), unblock the CUDA kernel waiting for those packets to arrive in GPU to actually start the packet processing.
+Figure 1. CPU-centric application with the CPU orchestrating the GPU and network card work
+
+The Data Plane Development Kit (DPDK) framework introduced the gpudev library to provide a solution for this kind of application: receive or send using GPU memory (GPUDirect RDMA technology) in combination with low-latency CPU synchronization. For more information about different approaches to coordinating CPU and GPU activity, see Boosting Inline Packet Processing Using DPDK and GPUdev with GPUs.
+GPUDirect Async Kernel-Initiated Network communications
+
+Looking at Figure 1, it is clear that the CPU is the main bottleneck. It has too many responsibilities in synchronizing NIC and GPU tasks and managing multiple network queues. As an example, consider an application with many receive queues and incoming traffic of 100 Gbps. A CPU-centric solution would have:
+
+    CPU invoking the network function on each receive queue to receive packets in GPU memory using one or multiple CPU cores
+    CPU collecting packets’ info (packets addresses, number)
+    CPU notifying the GPU about new received packets
+    GPU processing the packets
+
+This CPU-centric approach is:
+
+    Resource consuming: To deal with high-rate network throughput (100 Gbps or more) the application may have to dedicate an entire CPU physical core to receive or send packets.
+    Not scalable: To receive or send in parallel with different queues, the application may have to use multiple CPU cores, even on systems where the total number of CPU cores may be limited to a low number (depending on the platform).
+    Platform-dependent: The same application on a low-power CPU decreases the performance.
+
+The next natural step for GPU inline packet processing applications is to remove the CPU from the critical path. Moving to a GPU-centric solution, the GPU can directly interact with the NIC to receive packets so the processing can start as soon as packets arrive in GPU memory. The same considerations can be applied to the send operation.
+
+The capability of a GPU to control the NIC activity from a CUDA kernel is called GPUDirect Async Kernel-Initiated Network (GDAKIN) communications. Assuming the use of an NVIDIA GPU and an NVIDIA NIC, it is possible to expose the NIC registers to the direct access of the GPU. In this way, a CUDA kernel can directly configure and update these registers to orchestrate a send or a receive network operation without the intervention of the CPU.
+Graphic showing a GPU-centric application, with the GPU controlling the network card and packet processing without the need of the CPU.
+Figure 2. GPU-centric application with the GPU controlling the network card and packet processing without the need of the CPU
+
+DPDK is, by definition, a CPU framework. To enable GDAKIN communications, it would be necessary to move the whole control path on the GPU, which is not applicable. For this reason, this feature is enabled by creating a new NVIDIA DOCA library.
+NVIDIA DOCA GPUNetIO Library
+
+NVIDIA DOCA SDK is the new NVIDIA framework composed of drivers, libraries, tools, documentation, and example applications. These resources are needed to leverage your application with the network, security, and computation features the NVIDIA hardware can expose on host systems and DPU.
+
+NVIDIA DOCA GPUNetIO is a new library developed on top of the NVIDIA DOCA 1.5 release to introduce the notion of a GPU device in the DOCA ecosystem (Figure 3). To facilitate the creation of a DOCA GPU-centric real-time packet processing application, DOCA GPUNetIO combines GPUDirect RDMA for data-path acceleration, smart GPU memory management, low-latency message passing techniques between CPU and GPU (through GDRCopy features) and GDAKIN communications. 
+
+This enables a CUDA kernel to directly control an NVIDIA ConnectX network card. To maximize the performance, DOCA GPUNetIO Library must be used on platforms considered GPUDirect-friendly, where the GPU and the network card are directly connected through a dedicated PCIe bridge. The DPU converged card is an example but the same topology can be realized on host systems as well.
+
+DOCA GPUNetIO targets are GPU packet processing network applications using the Ethernet protocol to exchange packets in a network. With these applications, there is no need for a pre-synchronization phase across peers through an OOB mechanism, as for RDMA-based applications. There is also no need to assume other peers use DOCA GPUNetIO to communicate and no need to be topology-aware. In future releases, the RDMA option will be enabled to cover more use cases.
+
+Here are the DOCA GPUNetIO features enabled in the current release:
+
+    GDAKIN communications: A CUDA kernel can invoke the CUDA device functions in the DOCA GPUNetIO Library to instruct the network card to send or receive packets.
+    Accurate Send Scheduling: It is possible to schedule packets’ transmission in the future according to some user-provided timestamp.
+    GPUDirect RDMA: Receive or send packets in contiguous fixed-size GPU memory strides without CPU memory staging copies.
+    Semaphores: Provide a standardized low-latency message passing protocol between CPU and GPU or between different GPU CUDA kernels.
+    CPU direct access to GPU memory: CPU can modify GPU memory buffers without using the CUDA memory API.
+
+Graphic depicting NVIDIA DOCA GPUNetIO configuration requiring a GPU and CUDA drivers and libraries installed on the same platform.
+Figure 3. NVIDIA DOCA GPUNetIO is a new DOCA library requiring a GPU and CUDA drivers and libraries installed on the same platform
+
+As shown in Figure 4, the typical DOCA GPUNetIO application steps are:
+
+    Initial configuration phase on CPU
+        Use DOCA to identify and initialize a GPU device and a network device
+        Use DOCA GPUNetIO to create receive or send queues manageable from a CUDA kernel
+        Use DOCA Flow to determine which type of packet should land in each receive queue (for example, subset of IP addresses, TCP or UDP protocol, and so on)
+        Launch one or more CUDA kernels (to execute packet processing/filtering/analysis)
+    Runtime control and data path on GPU within CUDA kernel
+        Use DOCA GPUNetIO CUDA device functions to send or receive packets
+        Use DOCA GPUNetIO CUDA device functions to interact with the semaphores to synchronize the work with other CUDA kernels or with the CPU
+
+Flow chart showing generic GPU packet processing pipeline data flow composed by several building blocks: receive packets in GPU memory, first staging GPU packet processing or filtering, additional GPU processing (AI inference, for example), processing output stored in the GPU memory.
+Figure 4. Generic GPU packet processing pipeline data flow composed by several building blocks
+
+The following sections present an overview of possible GPU packet processing pipeline application layouts combining DOCA GPUNetIO building blocks.
+
diff --git a/examples/doca/vdb_realtime/sender/dataset/gpunetio_programming_guide.txt b/examples/doca/vdb_realtime/sender/dataset/gpunetio_programming_guide.txt
new file mode 100644
index 0000000000..f12e9c52f7
--- /dev/null
+++ b/examples/doca/vdb_realtime/sender/dataset/gpunetio_programming_guide.txt
@@ -0,0 +1,231 @@
+This document provides an overview and configuration instructions for DOCA GPUNetIO API.
+
+Introduction
+
+Real-time GPU processing of network packets is a technique useful for application domains involving signal processing, network security, information gathering, input reconstruction, and more. These applications involve the CPU in the critical path (CPU-centric approach) to coordinate the network card (NIC) for receiving packets in the GPU memory (GPUDirect RDMA) and notifying a packet-processing CUDA kernel waiting on the GPU for a new set of packets. In lower-power platforms, the CPU can easily become the bottleneck, masking GPU value. The aim is to maximize the zero-packet-loss throughput at the the lowest latency possible.
+
+A CPU-centric approach may not be scalable when increasing the number of clients connected to the application as the time between two receive operations on the same queue (client) would increase with the number of queues. The new DOCA GPUNetIO library allows developers to orchestrate these kinds of applications while optimizing performance, combining GPUDirect RDMA for data-path acceleration, GDRCopy library to give the CPU direct access to GPU memory, and GPUDirect Async kernel-initiated communications to allow a CUDA kernel to directly control the NIC.
+
+DOCA GPUNetIO enables GPU-centric solutions that remove the CPU from the critical path by providing the following features:
+   GPUDirect Async Kernel-Initiated Network (GDAKIN) communications – a CUDA kernel can invoke GPUNetIO device functions to receive or send, directly interacting with the NIC
+       CPU intervention is not needed in the application critical path
+   GPUDirect RDMA – receive packets directly into a contiguous GPU memory​ area
+   Semaphores – provide a standardized I/O communication protocol between the receiving entity and the CUDA kernel real-time packet processing​
+   Smart memory allocation – allocate aligned GPU memory buffers exposing them to direct CPU access
+       Combination of CUDA and DPDK gpudev library (which requires the GDRCopy library) already embedded in the DPDK released with DOCA
+   Ethernet protocol management on GPU
+
+Morpheus and Aerial 5G SDK are examples of NVIDIA applications actively using DOCA GPUNetIO.
+
+For a deep dive into the technology and motivations, please refer to the NVIDIA Blog post Inline GPU Packet Processing with NVIDIA DOCA GPUNetIO. A second NVIDIA blog post Realizing the Power of Real-Time Network Processing with NVIDIA DOCA GPUNetIO has been published to provide more example use-cases where DOCA GPUNetIO has been useful to improve the execution.
+System Configuration
+
+DOCA GPUNetIO requires a properly configured environment. The following subsections describe the required setup. DOCA GPUNetIO is available for all DOCA for host and BFB packages and it must be explicitly installed after the installation of the base DOCA packages.
+Warning
+
+Assuming the DOCA base package has been installed on the system, to install all DOCA GPUNetIO components, run: apt install -y doca-gpu doca-gpu-dev
+
+It is presumed that CUDA Toolkit and NVIDIA driver are installed on the system (host x86 or DPU Arm) where the DOCA GPUNetIO is built and executed.
+Internal hardware topology of the system should be GPUDirect-RDMA-friendly to maximize the internal throughput between the GPU and the NIC.
+As DOCA GPUNetIO is present in both DOCA for host and DOCA BFB (for DPU Arm), a GPUNetIO application can be executed either on the host CPU or on the Arm cores of the DPU. The following subsections provide a description of both scenarios.
+
+Note
+
+DOCA GPUNetIO has been tested on bare-metal and in docker but never in a virtualized environment. Using KVM is discouraged for now.
+Application on Host CPU
+Assuming the DOCA GPUNetIO application is running on the host x86 CPU cores, it is highly recommended to have a dedicated PCIe connection between the GPU and the NIC. This topology can be realized in two ways:
+Adding an additional PCIe switch to one of the PCIe root complex slots and attaching to this switch a GPU and a ConnectX adapter
+Connecting an NVIDIA Converged Accelerator DPU to the PCIe root complex and setting it to NIC mode (i.e., exposing the GPU and NIC devices to the host)
+
+You may check the topology of your system using lspci -tvvv or nvidia-smi topo -m.
+Option 1: ConnectX Adapter in Ethernet Mode
+
+NVIDIA ConnectX firmware must be 22.36.1010 or later. It is highly recommended to only use NVIDIA adapter from ConnectX-6 Dx and later.
+DOCA GPUNetIO allows a CUDA kernel to control the NIC when working with Ethernet protocol. For this reason, the ConnectX must be set to Ethernet mode
+
+
+Option 2: DPU Converged Accelerator in NIC mode
+DPU firmware must be 24.35.2000 or newer.
+To expose and use the GPU and the NIC on the converged accelerator DPU to an application running on the Host x86, configure the DPU to operate in NIC mode.
+
+Application on DPU Converged Arm CPU
+
+In this scenario, the DOCA GPUNetIO is running on the CPU Arm cores of the DPU using the GPU and NIC on the same DPU .
+The converged accelerator DPU must be set to CPU mode after flashing the right BFB image (refer to NVIDIA DOCA Installation Guide for Linux for details). From the x86 host, configure the DPU as detailed in the following steps:
+
+
+PCIe Configuration
+
+On some x86 systems, the Access Control Services (ACS) must be disabled to ensure direct communication between the NIC and GPU, whether they reside on the same converged accelerator DPU or on different PCIe slots in the system. The recommended solution is to disable ACS control via BIOS (e.g., Supermicro or HPE). Alternatively, it is also possible to disable it via command line, but it may not be as effective as the BIOS option. Assuming system topology Option 2, with a converged accelerator DPU as follows:
+
+$ lspci -tvvv...+-[0000:b0]-+-00.0  Intel Corporation Device 09a2
+ |           +-00.1  Intel Corporation Device 09a4
+ |           +-00.2  Intel Corporation Device 09a3
+ |           +-00.4  Intel Corporation Device 0998
+ |           \-02.0-[b1-b6]----00.0-[b2-b6]--+-00.0-[b3]--+-00.0  Mellanox Technologies MT42822 BlueField-2 integrated ConnectX-6 Dx network controller
+ |                                           |            +-00.1  Mellanox Technologies MT42822 BlueField-2 integrated ConnectX-6 Dx network controller
+ |                                           |            \-00.2  Mellanox Technologies MT42822 BlueField-2 SoC Management Interface
+ |                                           \-01.0-[b4-b6]----00.0-[b5-b6]----08.0-[b6]----00.0  NVIDIA Corporation Device 20b8
+
+        
+
+The PCIe switch address to consider is b2:00.0 (entry point of the DPU). ACSCtl must have all negative values:
+PCIe set: setpci -s b2:00.0 ECAP_ACS+6.w=0:fc
+
+To verify that the setting has been applied correctly:
+
+PCIe check
+$ sudo lspci -s b2:00.0 -vvvv | grep -i ACSCtl
+ACSCtl: SrcValid- TransBlk- ReqRedir- CmpltRedir- UpstreamFwd- EgressCtrl- DirectTrans-
+
+If the application still does not report any received packets, try to disable IOMMU. On some systems, it can be done from the BIOS looking for the the VT-d or IOMMU from the NorthBridge configuration and change that setting to Disable and save it. The system may also require adding intel_iommu=off or amd_iommu=off to the kernel options. That can be done through the grub command line as follows:
+IOMMU
+$ sudo vim /etc/default/grub
+# GRUB_CMDLINE_LINUX_DEFAULT="iommu=off intel_iommu=off <more options>"
+$ sudo update-grub
+$ sudo reboot
+
+Hugepages
+A DOCA GPUNetIO application over Ethernet uses typically DOCA Flow to set flow steering rules to the Ethernet receive queues. Flow-based programs require an allocation of huge pages and it can be done temporarily as explained in the DOCA Flow or permanently via grub command line:
+IOMMU
+
+$ sudo vim /etc/default/grub
+# GRUB_CMDLINE_LINUX_DEFAULT="default_hugepagesz=1G hugepagesz=1G hugepages=4 <more options>"
+$ sudo update-grub
+$ sudo reboot
+ 
+# After rebooting, check huge pages info
+$ grep -i huge /proc/meminfo
+AnonHugePages:         0 kB
+ShmemHugePages:        0 kB
+FileHugePages:         0 kB
+HugePages_Total:       4
+HugePages_Free:        4
+HugePages_Rsvd:        0
+HugePages_Surp:        0
+Hugepagesize:    1048576 kB
+Hugetlb:         4194304 kB
+        
+
+GPU Configuration
+
+CUDA Toolkit 12.1 or newer must be installed on the host. It is also recommended to enable persistence mode to decrease initial application latency nvidia-smi -pm 1.
+To allow the CPU to access the GPU memory directly without the need for CUDA API, DPDK and DOCA require the GDRCopy kernel module to be installed on the system:
+
+# Run nvidia-peermem kernel module
+sudo modprobe nvidia-peermem
+ 
+# Install GDRCopy
+sudo apt install -y check kmod
+git clone https://github.com/NVIDIA/gdrcopy.git /opt/mellanox/gdrcopy
+cd /opt/mellanox/gdrcopy
+make
+# Run gdrdrv kernel module
+./insmod.sh
+ 
+# Double check nvidia-peermem and gdrdrv module are running
+$ lsmod | egrep gdrdrv
+gdrdrv                 24576  0
+nvidia              55726080  4 nvidia_uvm,nvidia_peermem,gdrdrv,nvidia_modeset
+ 
+# Export library path
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/mellanox/gdrcopy/src
+ 
+# Ensure CUDA library path is in the env var
+export PATH="/usr/local/cuda/bin:${PATH}"
+export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}"
+export CPATH="$(echo /usr/local/cuda/targets/{x86_64,sbsa}-linux/include | sed 's/ /:/'):${CPATH}"
+
+BlueField-3 Specific Configuration
+
+To run a DOCA GPUNetIO application on the Arm DPU cores in a BlueField-3 converged card (section "Application on DPU Converged Arm CPU"), it is mandatory to set an NVIDIA driver option at the end of the driver configuration file:
+Set NVIDIA driver option
+
+cat <<EOF | sudo tee /etc/modprobe.d/nvidia.conf
+options nvidia NVreg_RegistryDwords="RmDmaAdjustPeerMmioBF3=1;"
+EOF
+
+To make sure the option has been detected by the NVIDIA driver, run:
+Check NVIDIA driver option
+
+$ grep RegistryDwords /proc/driver/nvidia/params
+RegistryDwords: "RmDmaAdjustPeerMmioBF3=1;"
+RegistryDwordsPerDevice: ""
+
+GPU Memory Mapping (nvidia-peermem vs. dmabuf)
+
+To allow the NIC to send and receive packets using GPU memory, it is required to launch the NVIDIA kernel module nvidia-peermem (using modprobe nvidia-peermem). It is shipped by default with the CUDA Toolkit installation.
+Mapping buffers through the nvidia-peermem module is the legacy mapping mode.
+Alternatively, DOCA offers the ability to map GPU memory through the dmabuf providing a set high-level functions. Prerequisites are DOCA installed on a system with:
+    Linux Kernel ≥ 6.2
+    libibverbs ≥ 1.14.44
+    CUDA Toolkit installed with the -m=kernel-open flag (which implies the NVIDIA driver in Open Source mode)
+
+Installing DOCA on kernel 6.2 to enable the dmabuf is experimental.
+An example can be found in the DOCA GPU Packet Processing application:
+
+DMABuf functions
+
+/* Get from CUDA the dmabuf file-descriptor for the GPU memory buffer */
+result = doca_gpu_dmabuf_fd(gpu_dev, gpu_buffer_addr, gpu_buffer_size, &(dmabuf_fd));
+if (result != DOCA_SUCCESS) {
+	/* If it fails, create a DOCA mmap for the GPU memory buffer with the nvidia-peermem legacy method */
+	doca_mmap_set_memrange(gpu_buffer_mmap, gpu_buffer_addr, gpu_buffer_size);
+} else {
+	/* If it succeeds, create a DOCA mmap for the GPU memory buffer using the dmabuf method */
+	doca_mmap_set_dmabuf_memrange(gpu_buffer_mmap, dmabuf_fd, gpu_buffer_addr, 0, gpu_buffer_size);
+}
+
+If the function doca_gpu_dmabuf_fd fails, it probably means the NVIDIA driver is not installed with the open-source mode.
+Later, when calling the doca_mmap_start, the DOCA library tries to map the GPU memory buffer using the dmabuf file descriptor. If it fails (something incorrectly set on the Linux system), it fallbacks trying to map the GPU buffer with the legacy mode (nvidia-peermem ). If it fails, an informative error is returned.
+
+GPU BAR1 Size
+
+Every time a GPU buffer is mapped to the NIC (e.g., buffers associated with send or receive queues), a portion of the GPU BAR1 mapping space is used. Therefore, it is important to check that the BAR1 mapping is large enough to hold all the bytes the DOCA GPUNetIO application is trying to map. To verify the BAR1 mapping space of a GPU you can use nvidia-smi:
+
+$ nvidia-smi -q
+ 
+==============NVSMI LOG==============
+.....
+Attached GPUs                             : 1
+GPU 00000000:CA:00.0
+    Product Name                          : NVIDIA A100 80GB PCIe
+    Product Architecture                  : Ampere
+    Persistence Mode                      : Enabled
+.....
+    BAR1 Memory Usage
+        Total                             : 131072 MiB
+        Used                              : 1 MiB
+        Free                              : 131071 MiB
+
+By default, some GPUs (e.g. RTX models) may have a very small BAR1 size:
+BAR1 mapping
+
+$ nvidia-smi -q | grep -i bar -A 3
+	BAR1 Memory Usage
+	Total : 256 MiB
+	Used : 6 MiB
+	Free : 250 MiB
+
+If the BAR1 size is not enough, DOCA GPUNetIO applications may exit with errors because DOCA mmap fails to map the GPU memory buffers to the NIC (e.g., Failed to start mmap DOCA Driver call failure). To overcome this issue, the GPU BAR1 must be increased from the BIOS. The system should have "Resizable BAR" option enabled. For further information, refer to this NVIDIA forum post.
+
+Architecture
+
+A GPU packet processing network application can be split into two fundamental phases:
+   Setup on the CPU (devices configuration, memory allocation, launch of CUDA kernels, etc.)
+   Main data path where GPU and NIC interact to exercise their functions
+
+DOCA GPUNetIO provides different building blocks, some of them in combination with the DOCA Ethernet library, to create a full pipeline running entirely on the GPU.
+
+During the setup phase on the CPU, applications must:
+   Prepare all the objects on the CPU.
+   Export a GPU handler for them.
+   Launch a CUDA kernel passing the object's GPU handler to work with the object during the data path.
+
+For this reason, DOCA GPUNetIO is composed of two libraries:
+   libdoca_gpunetio with functions invoked by CPU to prepare the GPU, allocate memory and objects
+   libdoca_gpunetio_device with functions invoked by GPU within CUDA kernels during the data path
+
+The pkgconfig file for the DOCA GPUNetIO shared library is doca-gpu.pc. However, there is no pkgconfig file for the DOCA GPUNetIO CUDA device's static library /opt/mellanox/doca/lib/x86_64-linux-gnu/libdoca_gpunetio_device.a, so it must be explicitly linked to the CUDA application if DOCA GPUNetIO CUDA device functions are required.
+
+Refer to the NVIDIA DOCA GPU Packet Processing Application Guide for an example of using DOCA GPUNetIO to send and receive Ethernet packets.
diff --git a/examples/doca/vdb_realtime/sender/send.py b/examples/doca/vdb_realtime/sender/send.py
new file mode 100644
index 0000000000..202c1fd35a
--- /dev/null
+++ b/examples/doca/vdb_realtime/sender/send.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import os
+
+from scapy.all import IP  # pylint: disable=no-name-in-module
+from scapy.all import UDP  # pylint: disable=no-name-in-module
+from scapy.all import RandShort
+from scapy.all import Raw
+from scapy.all import send
+
+
+def main():
+    os.chdir("dataset")
+    for file in glob.glob("*.txt"):
+        with open(file, 'r', encoding='utf-8') as fp:
+            while True:
+                content = fp.read(1024)
+                if not content:
+                    break
+                pkt = IP(src="192.168.2.28", dst="192.168.2.27") / UDP(sport=RandShort(),
+                                                                       dport=5001) / Raw(load=content.encode('utf-8'))
+                print(pkt)
+                send(pkt, iface="enp202s0f0np0")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/doca/vdb_realtime/vdb.py b/examples/doca/vdb_realtime/vdb.py
new file mode 100644
index 0000000000..964551798f
--- /dev/null
+++ b/examples/doca/vdb_realtime/vdb.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+
+import click
+import pymilvus
+
+from morpheus.config import Config
+from morpheus.config import CppConfig
+from morpheus.config import PipelineModes
+from morpheus.pipeline.linear_pipeline import LinearPipeline
+from morpheus.stages.doca.doca_convert_stage import DocaConvertStage
+from morpheus.stages.doca.doca_source_stage import DocaSourceStage
+from morpheus.stages.general.monitor_stage import MonitorStage
+from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage
+from morpheus.stages.output.write_to_vector_db_stage import WriteToVectorDBStage
+from morpheus.stages.preprocess.deserialize_stage import DeserializeStage
+from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage
+from morpheus.utils.logger import configure_logging
+
+
+def build_milvus_service(embedding_size):
+
+    milvus_resource_kwargs = {
+        "index_conf": {
+            "field_name": "embedding",
+            "metric_type": "L2",
+            "index_type": "HNSW",
+            "params": {
+                "M": 8,
+                "efConstruction": 64,
+            },
+        },
+        "schema_conf": {
+            "enable_dynamic_field": True,
+            "schema_fields": [
+                pymilvus.FieldSchema(name="id",
+                                     dtype=pymilvus.DataType.INT64,
+                                     description="Primary key for the collection",
+                                     is_primary=True,
+                                     auto_id=True).to_dict(),
+                pymilvus.FieldSchema(name="src_ip",
+                                     dtype=pymilvus.DataType.VARCHAR,
+                                     description="The packet source IP address",
+                                     max_length=65_535).to_dict(),
+                pymilvus.FieldSchema(name="data",
+                                     dtype=pymilvus.DataType.VARCHAR,
+                                     description="The packet data",
+                                     max_length=65_535).to_dict(),
+                pymilvus.FieldSchema(name="embedding",
+                                     dtype=pymilvus.DataType.FLOAT_VECTOR,
+                                     description="Embedding vectors",
+                                     dim=embedding_size).to_dict(),
+            ],
+            "description": "Test collection schema"
+        }
+    }
+
+    return milvus_resource_kwargs
+
+
+@click.command()
+@click.option(
+    "--nic_addr",
+    help="NIC PCI Address",
+    required=True,
+)
+@click.option(
+    "--gpu_addr",
+    help="GPU PCI Address",
+    required=True,
+)
+def run_pipeline(nic_addr, gpu_addr):
+    # Enable the default logger
+    configure_logging(log_level=logging.DEBUG)
+
+    CppConfig.set_should_use_cpp(True)
+
+    config = Config()
+    config.mode = PipelineModes.NLP
+    config.pipeline_batch_size = 1024
+    config.feature_length = 512
+    config.edge_buffer_size = 512
+    config.num_threads = 20
+
+    pipeline = LinearPipeline(config)
+
+    # add doca source stage
+    pipeline.set_source(DocaSourceStage(config, nic_addr, gpu_addr, 'udp'))
+    pipeline.add_stage(DocaConvertStage(config))
+
+    pipeline.add_stage(MonitorStage(config, description="DOCA GPUNetIO Source rate", unit='pkts'))
+
+    pipeline.add_stage(DeserializeStage(config))
+
+    pipeline.add_stage(PreprocessNLPStage(config))
+
+    pipeline.add_stage(
+        TritonInferenceStage(config,
+                             force_convert_inputs=True,
+                             model_name="all-MiniLM-L6-v2",
+                             server_url="localhost:8001",
+                             use_shared_memory=True))
+    pipeline.add_stage(MonitorStage(config, description="Embedding rate", unit='pkts'))
+
+    pipeline.add_stage(
+        WriteToVectorDBStage(config,
+                             resource_name="vdb_doca",
+                             batch_size=16896,
+                             recreate=True,
+                             service="milvus",
+                             uri="http://localhost:19530",
+                             resource_schemas={"vdb_doca": build_milvus_service(384)}))
+    pipeline.add_stage(MonitorStage(config, description="Upload rate", unit='docs'))
+
+    # Build the pipeline here to see types in the vizualization
+    pipeline.build()
+
+    # Run the pipeline
+    pipeline.run()
+
+
+if __name__ == "__main__":
+    run_pipeline()
diff --git a/examples/llm/rag/run.py b/examples/llm/rag/run.py
index ff39a07f51..9798f875bf 100644
--- a/examples/llm/rag/run.py
+++ b/examples/llm/rag/run.py
@@ -70,10 +70,18 @@ def run():
     type=click.IntRange(min=1),
     help="Number of times to repeat the input query. Useful for testing performance.",
 )
-@click.option("--llm_service",
-              default="NemoLLM",
-              type=click.Choice(['NemoLLM', 'OpenAI'], case_sensitive=False),
-              help="LLM service to issue requests to, should be used in conjunction with --model_name.")
+@click.option(
+    "--llm_service",
+    default="NemoLLM",
+    type=click.Choice(['NemoLLM', 'OpenAI'], case_sensitive=False),
+    help="LLM service to issue requests to, should be used in conjunction with --model_name.",
+)
+@click.option(
+    "--question",
+    type=str,
+    multiple=True,
+    help="The question to answer with the RAG pipeline. Specify multiple times to answer multiple questions at once.",
+)
 def pipeline(**kwargs):
 
     from .standalone_pipeline import standalone
diff --git a/examples/llm/rag/standalone_pipeline.py b/examples/llm/rag/standalone_pipeline.py
index 5d3cd01c58..ed8f4bff68 100644
--- a/examples/llm/rag/standalone_pipeline.py
+++ b/examples/llm/rag/standalone_pipeline.py
@@ -14,8 +14,6 @@
 import logging
 import time
 
-import pandas as pd
-
 import cudf
 
 from morpheus.config import Config
@@ -48,9 +46,21 @@ def _build_engine(model_name: str, vdb_resource_name: str, llm_service: str, emb
 
     prompt = """You are a helpful assistant. Given the following background information:\n
 {% for c in contexts -%}
+{% if c.title %}
 Title: {{ c.title }}
+{% endif %}
+{% if c.summary %}
 Summary: {{ c.summary }}
+{% endif %}
+{% if c.page_content %}
 Text: {{ c.page_content }}
+{% endif %}
+{% if c.header %}
+Header: {{ c.header }}
+{% endif %}
+{% if c.data %}
+Data: {{ c.data }}
+{% endif %}
 {% endfor %}
 
 Please answer the following question: \n{{ query }}"""
@@ -85,7 +95,8 @@ def standalone(num_threads,
                vdb_resource_name,
                repeat_count,
                llm_service: str,
-               embedding_size: int):
+               embedding_size: int,
+               question: list[str]):
     config = Config()
     config.mode = PipelineModes.NLP
     config.edge_buffer_size = 128
@@ -95,9 +106,7 @@ def standalone(num_threads,
     config.pipeline_batch_size = pipeline_batch_size
     config.model_max_batch_size = model_max_batch_size
 
-    source_dfs = [
-        cudf.DataFrame({"questions": ["What are some new attacks discovered in the cyber security industry?"] * 5})
-    ]
+    source_dfs = [cudf.DataFrame({"questions": question})]
 
     completion_task = {"task_type": "completion", "task_dict": {"input_keys": ["questions"], }}
 
@@ -132,7 +141,10 @@ def standalone(num_threads,
     if logger.isEnabledFor(logging.DEBUG):
         # The responses are quite long, when debug is enabled disable the truncation that pandas and cudf normally
         # perform on the output
-        pd.set_option('display.max_colwidth', None)
-        logger.debug("Responses:\n%s", responses['response'])
+        # pd.set_option('display.max_colwidth', None)
+
+        for row in responses.itertuples():
+            logger.debug("Question:\n%s", row.questions)
+            logger.debug("Response:\n%s", row.response)
 
     return start_time
diff --git a/examples/llm/vdb_upload/helper.py b/examples/llm/vdb_upload/helper.py
index 20f0484a97..eb355183e2 100644
--- a/examples/llm/vdb_upload/helper.py
+++ b/examples/llm/vdb_upload/helper.py
@@ -107,6 +107,45 @@ def setup_filesystem_source(pipe: Pipeline, config: Config, source_name: str, fs
     return file_pipe
 
 
+# pylint: disable=unused-argument
+def setup_doca_source(pipe: Pipeline, config: Config, source_name: str, stage_config: typing.Dict[str, typing.Any]):
+    """
+    Set up the DOCA source stage in the pipeline.
+
+    Parameters
+    ----------
+    pipe : Pipeline
+        The pipeline to which the DOCA source stage will be added.
+    config : Config
+        Configuration object for the pipeline.
+    source_name : str
+        The name of the DOCA source stage.
+    stage_config : typing.Dict[str, Any]
+        Configuration parameters for the DOCA source stage.
+
+    Returns
+    -------
+    SubPipeline
+        The sub-pipeline stage created for the DOCA source.
+    """
+    from morpheus.stages.doca.doca_convert_stage import DocaConvertStage
+    from morpheus.stages.doca.doca_source_stage import DocaSourceStage
+    from morpheus.stages.preprocess.deserialize_stage import DeserializeStage
+
+    source_stage = pipe.add_stage(
+        DocaSourceStage(config,
+                        stage_config.pop('nic_addr'),
+                        stage_config.pop('gpu_addr'),
+                        stage_config.pop('traffic_type')))
+    convert_stage = pipe.add_stage(DocaConvertStage(config))
+    deserialize_stage = pipe.add_stage(DeserializeStage(config))
+
+    pipe.add_edge(source_stage, convert_stage)
+    pipe.add_edge(convert_stage, deserialize_stage)
+
+    return deserialize_stage
+
+
 def setup_custom_source(pipe: Pipeline, config: Config, source_name: str, custom_config: typing.Dict[str, typing.Any]):
     """
     Setup a custom source stage in the pipeline.
@@ -191,6 +230,8 @@ def process_vdb_sources(pipe: Pipeline, config: Config, vdb_source_config: typin
             vdb_sources.append(setup_filesystem_source(pipe, config, source_name, source_config))
         elif (source_type == 'custom'):
             vdb_sources.append(setup_custom_source(pipe, config, source_name, source_config))
+        elif (source_type == 'doca'):
+            vdb_sources.append(setup_doca_source(pipe, config, source_name, source_config))
         else:
             raise ValueError(f"Unsupported source type: {source_type}")
 
diff --git a/examples/llm/vdb_upload/run.py b/examples/llm/vdb_upload/run.py
index 974e5ec213..19ea993605 100644
--- a/examples/llm/vdb_upload/run.py
+++ b/examples/llm/vdb_upload/run.py
@@ -103,8 +103,8 @@ def run():
     help="Timeout in seconds for RSS feed requests.")
 @click.option("--source_type",
               multiple=True,
-              type=click.Choice(['rss', 'filesystem'], case_sensitive=False),
-              default=['rss'],
+              type=click.Choice(['rss', 'filesystem', 'doca'], case_sensitive=False),
+              default=[],
               show_default=True,
               help="The type of source to use. Can specify multiple times for different source types.")
 @click.option(
diff --git a/examples/llm/vdb_upload/vdb_config_doca.yaml b/examples/llm/vdb_upload/vdb_config_doca.yaml
new file mode 100644
index 0000000000..9cead98794
--- /dev/null
+++ b/examples/llm/vdb_upload/vdb_config_doca.yaml
@@ -0,0 +1,239 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vdb_pipeline:
+  embeddings:
+    isolate_embeddings: false
+    model_kwargs:
+      force_convert_inputs: true
+      model_name: "all-MiniLM-L6-v2"
+      server_url: "http://localhost:8001"
+      use_shared_memory: true
+
+  pipeline:
+    edge_buffer_size: 128
+    feature_length: 512
+    max_batch_size: 256
+    num_threads: 5
+    pipeline_batch_size: 128
+
+  sources:
+    - type: doca
+      name: "doca_stage"
+      config:
+        nic_addr: "ca:00.0"
+        gpu_addr: "17:00.0"
+        traffic_type: "udp"
+        vdb_resource_name: "vdb_doca"
+
+  tokenizer:
+    model_kwargs:
+      add_special_tokens: false
+      column: "data"
+      do_lower_case: true
+      truncation: true
+      vocab_hash_file: "data/bert-base-uncased-hash.txt"
+    model_name: "bert-base-uncased-hash"
+
+  vdb:
+    batch_size: 16896
+    resource_name: "vdb_doca"  # Identifier for the resource in the vector database
+    embedding_size: 384
+    recreate: True  # Whether to recreate the resource if it already exists
+    service: "milvus"  # Specify the type of vector database
+    uri: "http://localhost:19530"  # URI for connecting to the Vector Database server
+    resource_schemas:
+      VDBGENERAL:
+        index_conf:
+          field_name: "embedding"
+          metric_type: "L2"
+          index_type: "HNSW"
+          params:
+            M: 8
+            efConstruction: 64
+
+        schema_conf:
+          enable_dynamic_field: true
+          schema_fields:
+            - name: id
+              dtype: INT64
+              description: Primary key for the collection
+              is_primary: true
+              auto_id: true
+            - name: title
+              dtype: VARCHAR
+              description: Title or heading of the data entry
+              max_length: 65_535
+            - name: source
+              dtype: VARCHAR
+              description: Source or origin of the data entry
+              max_length: 65_535
+            - name: summary
+              dtype: VARCHAR
+              description: Brief summary or abstract of the data content
+              max_length: 65_535
+            - name: content
+              dtype: VARCHAR
+              description: Main content or body of the data entry
+              max_length: 65_535
+            - name: embedding
+              dtype: FLOAT_VECTOR
+              description: Embedding vectors representing the data entry
+              dim: 384 # Size of the embeddings to store in the vector database
+          description: Collection schema for diverse data sources
+      vdb_pdf:
+        index_conf:
+          field_name: embedding
+          metric_type: L2
+          index_type: HNSW
+          params:
+            M: 8
+            efConstruction: 64
+
+        schema_conf:
+          enable_dynamic_field: true
+          schema_fields:
+            - name: id
+              dtype: INT64
+              description: Primary key for the collection
+              is_primary: true
+              auto_id: true
+            - name: title
+              dtype: VARCHAR
+              description: Title or heading of the data entry
+              max_length: 65_535
+            - name: source
+              dtype: VARCHAR
+              description: Source or origin of the data entry
+              max_length: 65_535
+            - name: summary
+              dtype: VARCHAR
+              description: Brief summary or abstract of the data content
+              max_length: 65_535
+            - name: content
+              dtype: VARCHAR
+              description: Main content or body of the data entry
+              max_length: 65_535
+            - name: embedding
+              dtype: FLOAT_VECTOR
+              description: Embedding vectors representing the data entry
+              dim: 384 # Size of the embeddings to store in the vector database
+          description: Collection schema for diverse data sources
+      vdb_csv:
+        index_conf:
+          field_name: embedding
+          metric_type: L2
+          index_type: HNSW
+          params:
+            M: 8
+            efConstruction: 64
+
+        schema_conf:
+          enable_dynamic_field: true
+          schema_fields:
+            - name: id
+              dtype: INT64
+              description: Primary key for the collection
+              is_primary: true
+              auto_id: true
+            - name: title
+              dtype: VARCHAR
+              description: Title or heading of the data entry
+              max_length: 65_535
+            - name: source
+              dtype: VARCHAR
+              description: Source or origin of the data entry
+              max_length: 65_535
+            - name: summary
+              dtype: VARCHAR
+              description: Brief summary or abstract of the data content
+              max_length: 65_535
+            - name: content
+              dtype: VARCHAR
+              description: Main content or body of the data entry
+              max_length: 65_535
+            - name: embedding
+              dtype: FLOAT_VECTOR
+              description: Embedding vectors representing the data entry
+              dim: 384 # Size of the embeddings to store in the vector database
+          description: Collection schema for diverse data sources
+      vdb_rss:
+        index_conf:
+          field_name: embedding
+          metric_type: L2
+          index_type: HNSW
+          params:
+            M: 8
+            efConstruction: 64
+
+        schema_conf:
+          enable_dynamic_field: true
+          schema_fields:
+            - name: id
+              dtype: INT64
+              description: Primary key for the collection
+              is_primary: true
+              auto_id: true
+            - name: title
+              dtype: VARCHAR
+              description: Title or heading of the data entry
+              max_length: 65_535
+            - name: source
+              dtype: VARCHAR
+              description: Source or origin of the data entry
+              max_length: 65_535
+            - name: summary
+              dtype: VARCHAR
+              description: Brief summary or abstract of the data content
+              max_length: 65_535
+            - name: content
+              dtype: VARCHAR
+              description: Main content or body of the data entry
+              max_length: 65_535
+            - name: embedding
+              dtype: FLOAT_VECTOR
+              description: Embedding vectors representing the data entry
+              dim: 384 # Size of the embeddings to store in the vector database
+          description: Collection schema for diverse data sources
+      vdb_doca:
+        index_conf:
+          field_name: embedding
+          metric_type: L2
+          index_type: HNSW
+          params:
+            M: 8
+            efConstruction: 64
+
+        schema_conf:
+          enable_dynamic_field: true
+          schema_fields:
+            - name: id
+              dtype: INT64
+              description: Primary key for the collection
+              is_primary: true
+              auto_id: true
+            - name: header
+              dtype: VARCHAR
+              description: Header of the packet
+              max_length: 65_535
+            - name: data
+              dtype: VARCHAR
+              description: Data of the packet
+              max_length: 65_535
+            - name: embedding
+              dtype: FLOAT_VECTOR
+              description: Embedding vectors representing the data entry
+              dim: 384 # Size of the embeddings to store in the vector database
+          description: Collection schema for diverse data sources
\ No newline at end of file
diff --git a/morpheus/_lib/cmake/libmorpheus.cmake b/morpheus/_lib/cmake/libmorpheus.cmake
index 1100e8fb46..954620dfac 100644
--- a/morpheus/_lib/cmake/libmorpheus.cmake
+++ b/morpheus/_lib/cmake/libmorpheus.cmake
@@ -50,6 +50,7 @@ add_library(morpheus
   src/messages/multi_response.cpp
   src/messages/multi_tensor.cpp
   src/messages/multi.cpp
+  src/messages/raw_packet.cpp
   src/modules/data_loader_module.cpp
   src/objects/data_table.cpp
   src/objects/dev_mem_info.cpp
diff --git a/morpheus/_lib/doca/CMakeLists.txt b/morpheus/_lib/doca/CMakeLists.txt
index 9cdf7a7e44..1dc277ad9c 100644
--- a/morpheus/_lib/doca/CMakeLists.txt
+++ b/morpheus/_lib/doca/CMakeLists.txt
@@ -22,10 +22,12 @@ find_package(doca REQUIRED)
 add_library(morpheus_doca
   # Keep these sorted!
   src/doca_context.cpp
+  src/doca_convert_kernel.cu
+  src/doca_convert.cpp
   src/doca_rx_pipe.cpp
   src/doca_rx_queue.cpp
   src/doca_semaphore.cpp
-  src/doca_source_kernels.cu
+  src/doca_source_kernel.cu
   src/doca_source.cpp
   src/rte_context.cpp
 )
diff --git a/morpheus/_lib/doca/__init__.pyi b/morpheus/_lib/doca/__init__.pyi
index 28f6282983..6150391bff 100644
--- a/morpheus/_lib/doca/__init__.pyi
+++ b/morpheus/_lib/doca/__init__.pyi
@@ -5,10 +5,14 @@ import morpheus._lib.messages
 import mrc.core.segment
 
 __all__ = [
+    "DocaConvertStage",
     "DocaSourceStage"
 ]
 
 
+class DocaConvertStage(mrc.core.segment.SegmentObject):
+    def __init__(self, builder: mrc.core.segment.Builder, name: str) -> None: ...
+    pass
 class DocaSourceStage(mrc.core.segment.SegmentObject):
     def __init__(self, builder: mrc.core.segment.Builder, name: str, nic_pci_address: str, gpu_pci_address: str, traffic_type: str) -> None: ...
     pass
diff --git a/morpheus/_lib/doca/include/morpheus/doca/common.hpp b/morpheus/_lib/doca/include/morpheus/doca/common.hpp
index b2da26b088..5ad821bbb3 100644
--- a/morpheus/_lib/doca/include/morpheus/doca/common.hpp
+++ b/morpheus/_lib/doca/include/morpheus/doca/common.hpp
@@ -23,16 +23,20 @@
 #include <type_traits>
 #include <vector>
 
-uint32_t const PACKETS_PER_THREAD   = 4;
-uint32_t const THREADS_PER_BLOCK    = 1024;  // 512
+uint32_t const PACKETS_PER_THREAD   = 16;
+uint32_t const THREADS_PER_BLOCK    = 512;
 uint32_t const PACKETS_PER_BLOCK    = PACKETS_PER_THREAD * THREADS_PER_BLOCK;
 uint32_t const PACKET_RX_TIMEOUT_NS = 1000000;  // 1ms //500us
 
 uint32_t const MAX_PKT_RECEIVE = PACKETS_PER_BLOCK;
-uint32_t const MAX_PKT_SIZE    = 4096;
+uint32_t const MAX_PKT_CONVERT = MAX_PKT_RECEIVE * 5;
+uint32_t const MAX_PKT_SIZE    = 2048;
+uint32_t const MAX_PKT_HDR     = 64;
 uint32_t const MAX_PKT_NUM     = 65536;
-uint32_t const MAX_QUEUE       = 4;
-uint32_t const MAX_SEM_X_QUEUE = 32;
+uint32_t const MAX_QUEUE       = 2;
+uint32_t const MAX_SEM_X_QUEUE = 4096;
+
+uint32_t const IP_ADDR_STRING_LEN = 15;
 
 enum doca_traffic_type
 {
@@ -42,6 +46,9 @@ enum doca_traffic_type
 
 struct packets_info
 {
+    uintptr_t* pkt_addr;
+    uint32_t* pkt_hdr_size;
+    uint32_t* pkt_pld_size;
     int32_t packet_count_out;
     int32_t payload_size_total_out;
 
diff --git a/morpheus/_lib/doca/include/morpheus/doca/doca_context.hpp b/morpheus/_lib/doca/include/morpheus/doca/doca_context.hpp
index 018c7aca94..e3f2b7d03f 100644
--- a/morpheus/_lib/doca/include/morpheus/doca/doca_context.hpp
+++ b/morpheus/_lib/doca/include/morpheus/doca/doca_context.hpp
@@ -17,15 +17,17 @@
 
 #pragma once
 
-#include "morpheus/doca/common.hpp"
-#include "morpheus/doca/error.hpp"
 #include "morpheus/doca/rte_context.hpp"
 
-#include <doca_eth_rxq.h>
-#include <doca_flow.h>
-#include <doca_gpunetio.h>
+#include <doca_ctx.h>
+#include <doca_dev.h>
+#include <doca_flow_crypto.h>
 #include <doca_log.h>
 
+#include <cstdint>
+#include <memory>
+#include <string>
+
 #define GPU_PAGE_SIZE (1UL << 16)
 
 namespace morpheus::doca {
@@ -42,7 +44,7 @@ struct DocaContext
     uint16_t m_nic_port;
     uint32_t m_max_queue_count;
     std::unique_ptr<RTEContext> m_rte_context;
-    doca_log_backend* sdk_log;
+    doca_log_backend* m_sdk_log;
 
   public:
     DocaContext(std::string nic_addr, std::string gpu_addr);
diff --git a/morpheus/_lib/doca/include/morpheus/doca/doca_kernels.hpp b/morpheus/_lib/doca/include/morpheus/doca/doca_kernels.hpp
new file mode 100644
index 0000000000..a959989267
--- /dev/null
+++ b/morpheus/_lib/doca/include/morpheus/doca/doca_kernels.hpp
@@ -0,0 +1,89 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_view.hpp>
+#include <doca_eth_rxq.h>
+#include <doca_flow.h>
+#include <doca_gpunetio.h>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
+
+#include <cstdio>
+#include <memory>
+
+namespace morpheus::doca {
+
+std::unique_ptr<cudf::column> gather_payload(
+    int32_t packet_count,
+    uintptr_t* packets_buffer,
+    uint32_t* header_sizes,
+    uint32_t* payload_sizes,
+    uint32_t* fixed_size_list,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+std::unique_ptr<cudf::column> gather_header(
+    int32_t packet_count,
+    uintptr_t* packets_buffer,
+    uint32_t* header_sizes,
+    uint32_t* payload_sizes,
+    uint32_t* fixed_size_list,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+std::unique_ptr<cudf::column> gather_header(
+    int32_t packet_count,
+    uintptr_t* packets_buffer,
+    uint32_t* header_sizes,
+    uint32_t* payload_sizes,
+    rmm::cuda_stream_view stream,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+void gather_header_scalar(int32_t packet_count,
+                          uintptr_t* packets_buffer,
+                          uint32_t* header_sizes,
+                          uint32_t* payload_sizes,
+                          uint8_t* header_col,
+                          rmm::cuda_stream_view stream,
+                          rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+void gather_payload_scalar(int32_t packet_count,
+                           uintptr_t* packets_buffer,
+                           uint32_t* header_sizes,
+                           uint32_t* payload_sizes,
+                           uint8_t* payload_col,
+                           rmm::cuda_stream_view stream,
+                           rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+std::unique_ptr<cudf::column> integers_to_mac(
+    cudf::column_view const& integers,
+    rmm::cuda_stream_view stream        = cudf::detail::default_stream_value,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+int packet_receive_kernel(doca_gpu_eth_rxq* rxq_0,
+                          doca_gpu_eth_rxq* rxq_1,
+                          doca_gpu_semaphore_gpu* sem_0,
+                          doca_gpu_semaphore_gpu* sem_1,
+                          uint16_t sem_idx_0,
+                          uint16_t sem_idx_1,
+                          bool is_tcp,
+                          uint32_t* exit_condition,
+                          cudaStream_t stream);
+}  // namespace morpheus::doca
diff --git a/morpheus/_lib/doca/include/morpheus/doca/doca_rx_pipe.hpp b/morpheus/_lib/doca/include/morpheus/doca/doca_rx_pipe.hpp
index 4c7940ac22..e915eefe13 100644
--- a/morpheus/_lib/doca/include/morpheus/doca/doca_rx_pipe.hpp
+++ b/morpheus/_lib/doca/include/morpheus/doca/doca_rx_pipe.hpp
@@ -17,9 +17,15 @@
 
 #pragma once
 
+#include "morpheus/doca/common.hpp"
 #include "morpheus/doca/doca_context.hpp"
 #include "morpheus/doca/doca_rx_queue.hpp"
 
+#include <doca_flow.h>
+
+#include <memory>
+#include <vector>
+
 namespace morpheus::doca {
 
 /**
diff --git a/morpheus/_lib/doca/include/morpheus/doca/doca_rx_queue.hpp b/morpheus/_lib/doca/include/morpheus/doca/doca_rx_queue.hpp
index 537061954c..f776e98b50 100644
--- a/morpheus/_lib/doca/include/morpheus/doca/doca_rx_queue.hpp
+++ b/morpheus/_lib/doca/include/morpheus/doca/doca_rx_queue.hpp
@@ -20,8 +20,10 @@
 #include "morpheus/doca/doca_context.hpp"
 #include "morpheus/doca/doca_mem.hpp"
 
+#include <doca_ctx.h>
 #include <doca_eth_rxq.h>
-#include <doca_gpunetio.h>
+
+#include <memory>
 
 namespace morpheus::doca {
 
diff --git a/morpheus/_lib/doca/include/morpheus/doca/doca_semaphore.hpp b/morpheus/_lib/doca/include/morpheus/doca/doca_semaphore.hpp
index 635455b442..81031f2100 100644
--- a/morpheus/_lib/doca/include/morpheus/doca/doca_semaphore.hpp
+++ b/morpheus/_lib/doca/include/morpheus/doca/doca_semaphore.hpp
@@ -19,6 +19,11 @@
 
 #include "morpheus/doca/doca_context.hpp"
 
+#include <doca_gpunetio.h>
+
+#include <cstdint>
+#include <memory>
+
 namespace morpheus::doca {
 
 /**
diff --git a/morpheus/_lib/doca/include/morpheus/doca/doca_source_kernels.hpp b/morpheus/_lib/doca/include/morpheus/doca/doca_source_kernels.hpp
deleted file mode 100644
index ffd5714991..0000000000
--- a/morpheus/_lib/doca/include/morpheus/doca/doca_source_kernels.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include <cudf/column/column.hpp>
-#include <cudf/column/column_view.hpp>
-#include <doca_eth_rxq.h>
-#include <doca_flow.h>
-#include <doca_gpunetio.h>
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <memory>
-
-namespace morpheus::doca {
-
-std::unique_ptr<cudf::column> gather_payload(
-    int32_t packet_count,
-    char* payload_buffer,
-    int32_t* payload_sizes,
-    rmm::cuda_stream_view stream        = cudf::detail::default_stream_value,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-std::unique_ptr<cudf::column> integers_to_mac(
-    cudf::column_view const& integers,
-    rmm::cuda_stream_view stream        = cudf::detail::default_stream_value,
-    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-void packet_receive_kernel(doca_gpu_eth_rxq* rxq,
-                           doca_gpu_semaphore_gpu* sem,
-                           uint16_t sem_idx,
-                           bool is_tcp,
-                           uint32_t* exit_condition,
-                           cudaStream_t stream);
-
-void packet_gather_kernel(
-    int32_t packet_count, char* packet_buffer, int32_t* payload_sizes, char* payload_chars_out, cudaStream_t stream);
-
-}  // namespace morpheus::doca
diff --git a/morpheus/_lib/doca/include/morpheus/doca/doca_source.hpp b/morpheus/_lib/doca/include/morpheus/doca/doca_stages.hpp
similarity index 59%
rename from morpheus/_lib/doca/include/morpheus/doca/doca_source.hpp
rename to morpheus/_lib/doca/include/morpheus/doca/doca_stages.hpp
index d56c289007..f335c7fcb0 100644
--- a/morpheus/_lib/doca/include/morpheus/doca/doca_source.hpp
+++ b/morpheus/_lib/doca/include/morpheus/doca/doca_stages.hpp
@@ -20,6 +20,7 @@
 #include "morpheus/doca/common.hpp"
 #include "morpheus/export.h"
 #include "morpheus/messages/meta.hpp"
+#include "morpheus/messages/raw_packet.hpp"
 
 #include <mrc/segment/builder.hpp>
 #include <pymrc/node.hpp>
@@ -41,16 +42,17 @@ struct DocaSemaphore;
  *
  * Tested only on ConnectX 6-Dx with a single GPU on the same NUMA node running firmware 24.35.2000
  */
-class MORPHEUS_EXPORT DocaSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>
+class MORPHEUS_EXPORT DocaSourceStage : public mrc::pymrc::PythonSource<std::shared_ptr<RawPacketMessage>>
 {
   public:
-    using base_t = mrc::pymrc::PythonSource<std::shared_ptr<MessageMeta>>;
+    using base_t = mrc::pymrc::PythonSource<std::shared_ptr<RawPacketMessage>>;
     using typename base_t::source_type_t;
     using typename base_t::subscriber_fn_t;
 
     DocaSourceStage(std::string const& nic_pci_address,
                     std::string const& gpu_pci_address,
                     std::string const& traffic_type);
+    ~DocaSourceStage() override;
 
   private:
     subscriber_fn_t build();
@@ -60,7 +62,6 @@ class MORPHEUS_EXPORT DocaSourceStage : public mrc::pymrc::PythonSource<std::sha
     std::vector<std::shared_ptr<morpheus::doca::DocaSemaphore>> m_semaphore;
     std::shared_ptr<morpheus::doca::DocaRxPipe> m_rxpipe;
     enum doca_traffic_type m_traffic_type;
-    rmm::cuda_stream rstream;
 };
 
 /****** DocaSourceStageInterfaceProxy***********************/
@@ -79,4 +80,51 @@ struct MORPHEUS_EXPORT DocaSourceStageInterfaceProxy
                                                                        std::string const& traffic_type);
 };
 
+/**
+ * @brief Transform DOCA GPUNetIO raw packets into Dataframe for other Morpheus stages.
+ *
+ * Tested only on ConnectX 6-Dx with a single GPU on the same NUMA node running firmware 24.35.2000
+ */
+class MORPHEUS_EXPORT DocaConvertStage
+  : public mrc::pymrc::PythonNode<std::shared_ptr<RawPacketMessage>, std::shared_ptr<MessageMeta>>
+{
+  public:
+    using base_t = mrc::pymrc::PythonNode<std::shared_ptr<RawPacketMessage>, std::shared_ptr<MessageMeta>>;
+    // Input = Receive = Sink = RawPacketMessage
+    using typename base_t::sink_type_t;
+    // Output = Send = Source = MessageMeta
+    using typename base_t::source_type_t;
+    using typename base_t::subscribe_fn_t;
+
+    DocaConvertStage();
+    ~DocaConvertStage() override;
+
+  private:
+    /**
+     * Called every time a message is passed to this stage
+     */
+    source_type_t on_data(sink_type_t x);
+    source_type_t on_raw_packet_message(sink_type_t x);
+
+    cudaStream_t m_stream;
+    rmm::cuda_stream_view m_stream_cpp;
+    uint32_t* m_fixed_pld_size_list;
+    uint32_t* m_fixed_pld_size_list_cpu;
+    uint32_t* m_fixed_hdr_size_list;
+    uint32_t* m_fixed_hdr_size_list_cpu;
+};
+
+/****** DocaConvertStageInterfaceProxy***********************/
+/**
+ * @brief Interface proxy, used to insulate python bindings.
+ */
+struct MORPHEUS_EXPORT DocaConvertStageInterfaceProxy
+{
+    /**
+     * @brief Create and initialize a DocaConvertStage, and return the result.
+     */
+    static std::shared_ptr<mrc::segment::Object<DocaConvertStage>> init(mrc::segment::Builder& builder,
+                                                                        std::string const& name);
+};
+
 }  // namespace morpheus
diff --git a/morpheus/_lib/doca/include/morpheus/doca/packets.hpp b/morpheus/_lib/doca/include/morpheus/doca/packets.hpp
new file mode 100644
index 0000000000..8faa5066a4
--- /dev/null
+++ b/morpheus/_lib/doca/include/morpheus/doca/packets.hpp
@@ -0,0 +1,340 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "morpheus/doca/common.hpp"
+
+#define ETHER_ADDR_LEN 6 /**< Length of Ethernet address. */
+
+#define BYTE_SWAP16(v) ((((uint16_t)(v)&UINT16_C(0x00ff)) << 8) | (((uint16_t)(v)&UINT16_C(0xff00)) >> 8))
+
+#define TCP_PROTOCOL_ID 0x6
+#define UDP_PROTOCOL_ID 0x11
+#define TIMEOUT_NS 500000  // 500us
+#define RTE_IPV4_HDR_IHL_MASK (0x0f)
+#define RTE_IPV4_IHL_MULTIPLIER (4)
+
+// Allow naming and c arrays for compatibility with existing code
+// NOLINTBEGIN(readability-identifier-naming)
+// NOLINTBEGIN(modernize-avoid-c-arrays)
+
+enum tcp_flags
+{
+    TCP_FLAG_FIN = (1 << 0),
+    /* set tcp packet with Fin flag */
+    TCP_FLAG_SYN = (1 << 1),
+    /* set tcp packet with Syn flag */
+    TCP_FLAG_RST = (1 << 2),
+    /* set tcp packet with Rst flag */
+    TCP_FLAG_PSH = (1 << 3),
+    /* set tcp packet with Psh flag */
+    TCP_FLAG_ACK = (1 << 4),
+    /* set tcp packet with Ack flag */
+    TCP_FLAG_URG = (1 << 5),
+    /* set tcp packet with Urg flag */
+    TCP_FLAG_ECE = (1 << 6),
+    /* set tcp packet with ECE flag */
+    TCP_FLAG_CWR = (1 << 7),
+    /* set tcp packet with CQE flag */
+};
+
+struct ether_hdr
+{
+    uint8_t d_addr_bytes[ETHER_ADDR_LEN]; /* Destination addr bytes in tx order */
+    uint8_t s_addr_bytes[ETHER_ADDR_LEN]; /* Source addr bytes in tx order */
+    uint16_t ether_type;                  /* Frame type */
+} __attribute__((__packed__));
+
+struct ipv4_hdr
+{
+    uint8_t version_ihl;      /* version and header length */
+    uint8_t type_of_service;  /* type of service */
+    uint16_t total_length;    /* length of packet */
+    uint16_t packet_id;       /* packet ID */
+    uint16_t fragment_offset; /* fragmentation offset */
+    uint8_t time_to_live;     /* time to live */
+    uint8_t next_proto_id;    /* protocol ID */
+    uint16_t hdr_checksum;    /* header checksum */
+    uint32_t src_addr;        /* source address */
+    uint32_t dst_addr;        /* destination address */
+} __attribute__((__packed__));
+
+struct eth_ip
+{
+    struct ether_hdr l2_hdr; /* Ethernet header */
+    struct ipv4_hdr l3_hdr;  /* IP header */
+} __attribute__((__packed__));
+
+struct tcp_hdr
+{
+    uint16_t src_port; /* TCP source port */
+    uint16_t dst_port; /* TCP destination port */
+    uint32_t sent_seq; /* TX data sequence number */
+    uint32_t recv_ack; /* RX data acknowledgment sequence number */
+    uint8_t dt_off;    /* Data offset */
+    uint8_t tcp_flags; /* TCP flags */
+    uint16_t rx_win;   /* RX flow control window */
+    uint16_t cksum;    /* TCP checksum */
+    uint16_t tcp_urp;  /* TCP urgent pointer, if any */
+} __attribute__((__packed__));
+
+struct eth_ip_tcp_hdr
+{
+    struct ether_hdr l2_hdr; /* Ethernet header */
+    struct ipv4_hdr l3_hdr;  /* IP header */
+    struct tcp_hdr l4_hdr;   /* TCP header */
+} __attribute__((__packed__));
+
+struct udp_hdr
+{
+    uint16_t src_port;    /* UDP source port */
+    uint16_t dst_port;    /* UDP destination port */
+    uint16_t dgram_len;   /* UDP datagram length */
+    uint16_t dgram_cksum; /* UDP datagram checksum */
+} __attribute__((__packed__));
+
+struct eth_ip_udp_hdr
+{
+    struct ether_hdr l2_hdr; /* Ethernet header */
+    struct ipv4_hdr l3_hdr;  /* IP header */
+    struct udp_hdr l4_hdr;   /* UDP header */
+} __attribute__((__packed__));
+
+// NOLINTEND(modernize-avoid-c-arrays)
+// NOLINTEND(readability-identifier-naming)
+
+#define TCP_HDR_SIZE sizeof(struct eth_ip_tcp_hdr)
+#define UDP_HDR_SIZE sizeof(struct eth_ip_udp_hdr)
+
+__device__ __inline__ int raw_to_tcp(const uintptr_t buf_addr, struct eth_ip_tcp_hdr** hdr, uint8_t** packet_data)
+{
+    (*hdr)         = (struct eth_ip_tcp_hdr*)buf_addr;
+    (*packet_data) = (uint8_t*)(buf_addr + sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+                                (((*hdr)->l4_hdr.dt_off >> 4) * sizeof(int)));
+
+    return 0;
+}
+
+__device__ __inline__ int raw_to_udp(const uintptr_t buf_addr, struct eth_ip_udp_hdr** hdr, uint8_t** packet_data)
+{
+    (*hdr)         = (struct eth_ip_udp_hdr*)buf_addr;
+    (*packet_data) = (uint8_t*)(buf_addr + sizeof(struct eth_ip_udp_hdr));
+
+    return 0;
+}
+
+__device__ __forceinline__ uint8_t gpu_ipv4_hdr_len(const struct ipv4_hdr& packet_l3)
+{
+    return (uint8_t)((packet_l3.version_ihl & RTE_IPV4_HDR_IHL_MASK) * RTE_IPV4_IHL_MULTIPLIER);
+};
+
+__device__ __forceinline__ uint32_t get_packet_size(ipv4_hdr& packet_l3)
+{
+    return static_cast<int32_t>(BYTE_SWAP16(packet_l3.total_length));
+}
+
+__device__ __forceinline__ int32_t get_payload_tcp_size(ipv4_hdr& packet_l3, tcp_hdr& packet_l4)
+{
+    auto packet_size       = get_packet_size(packet_l3);
+    auto ip_header_length  = gpu_ipv4_hdr_len(packet_l3);
+    auto tcp_header_length = static_cast<int32_t>(packet_l4.dt_off >> 4) * sizeof(int32_t);
+    auto payload_size      = packet_size - ip_header_length - tcp_header_length;
+
+    return payload_size;
+}
+
+__device__ __forceinline__ int32_t get_payload_udp_size(ipv4_hdr& packet_l3, udp_hdr& packet_l4)
+{
+    auto packet_size      = get_packet_size(packet_l3);
+    auto ip_header_length = gpu_ipv4_hdr_len(packet_l3);
+    auto payload_size     = packet_size - ip_header_length - sizeof(struct udp_hdr);
+
+    return payload_size;
+}
+
+__device__ __forceinline__ char to_hex_16(uint8_t value)
+{
+    return "0123456789ABCDEF"[value];
+}
+
+__device__ __forceinline__ int64_t mac_bytes_to_int64(uint8_t* mac)
+{
+    return static_cast<uint64_t>(mac[0]) << 40 | static_cast<uint64_t>(mac[1]) << 32 |
+           static_cast<uint32_t>(mac[2]) << 24 | static_cast<uint32_t>(mac[3]) << 16 |
+           static_cast<uint32_t>(mac[4]) << 8 | static_cast<uint32_t>(mac[5]);
+}
+
+__device__ __forceinline__ int64_t mac_int64_to_chars(int64_t mac, char* out)
+{
+    uint8_t mac_0 = (mac >> 40) & (0xFF);
+    out[0]        = to_hex_16(mac_0 / 16);
+    out[1]        = to_hex_16(mac_0 % 16);
+    out[2]        = ':';
+
+    uint8_t mac_1 = (mac >> 32) & (0xFF);
+    out[3]        = to_hex_16(mac_1 / 16);
+    out[4]        = to_hex_16(mac_1 % 16);
+    out[5]        = ':';
+
+    uint8_t mac_2 = (mac >> 24) & (0xFF);
+    out[6]        = to_hex_16(mac_2 / 16);
+    out[7]        = to_hex_16(mac_2 % 16);
+    out[8]        = ':';
+
+    uint8_t mac_3 = (mac >> 16) & (0xFF);
+    out[9]        = to_hex_16(mac_3 / 16);
+    out[10]       = to_hex_16(mac_3 % 16);
+    out[11]       = ':';
+
+    uint8_t mac_4 = (mac >> 8) & (0xFF);
+    out[12]       = to_hex_16(mac_4 / 16);
+    out[13]       = to_hex_16(mac_4 % 16);
+    out[14]       = ':';
+
+    uint8_t mac_5 = (mac >> 0) & (0xFF);
+    out[15]       = to_hex_16(mac_5 / 16);
+    out[16]       = to_hex_16(mac_5 % 16);
+}
+
+__device__ __forceinline__ uint32_t ip_to_int32(uint32_t address)
+{
+    return (address & 0x000000ff) << 24 | (address & 0x0000ff00) << 8 | (address & 0x00ff0000) >> 8 |
+           (address & 0xff000000) >> 24;
+}
+
+__device__ __forceinline__ int num_to_string(uint32_t value, char* sp)
+{
+    char tmp[16];  // be careful with the length of the buffer
+    char* tp = tmp;
+    int i;
+    int radix = 10;
+
+    while (value || tp == tmp)
+    {
+        i = value % radix;
+        value /= radix;
+        if (i < 10)
+            *tp++ = i + '0';
+        else
+            *tp++ = i + 'a' - 10;
+    }
+
+    int len = tp - tmp;
+    while (tp > tmp)
+        *sp++ = *--tp;
+
+    return len;
+}
+
+__device__ __forceinline__ int ip_to_string(uint32_t ip_int, uint8_t* ip_str)
+{
+    int i;
+    int idxp  = 0;
+    int idxt  = 0;
+    int radix = 10;
+    uint8_t tmp[3];
+
+    // uint32_t ip = ip_to_int32(ip_int);
+    // Assuming network order
+    uint8_t ip0 = (uint8_t)(ip_int & 0x000000ff);
+    uint8_t ip1 = (uint8_t)((ip_int & 0x0000ff00) >> 8);
+    uint8_t ip2 = (uint8_t)((ip_int & 0x00ff0000) >> 16);
+    uint8_t ip3 = (uint8_t)((ip_int & 0xff000000) >> 24);
+
+    idxt = 0;
+    while (ip0)
+    {
+        i = ip0 % radix;
+        ip0 /= radix;
+        if (i < 10)
+            tmp[idxt++] = i + '0';
+        else
+            tmp[idxt++] = i + 'a' - 10;
+    }
+    --idxt;
+    while (idxt >= 0)
+        ip_str[idxp++] = tmp[idxt--];
+    ip_str[idxp++] = '.';
+
+    idxt = 0;
+    while (ip1)
+    {
+        i = ip1 % radix;
+        ip1 /= radix;
+        if (i < 10)
+            tmp[idxt++] = i + '0';
+        else
+            tmp[idxt++] = i + 'a' - 10;
+    }
+    --idxt;
+    while (idxt >= 0)
+        ip_str[idxp++] = tmp[idxt--];
+    ip_str[idxp++] = '.';
+
+    idxt = 0;
+    while (ip2)
+    {
+        i = ip2 % radix;
+        ip2 /= radix;
+        if (i < 10)
+            tmp[idxt++] = i + '0';
+        else
+            tmp[idxt++] = i + 'a' - 10;
+    }
+    --idxt;
+    while (idxt >= 0)
+        ip_str[idxp++] = tmp[idxt--];
+    ip_str[idxp++] = '.';
+
+    idxt = 0;
+    while (ip3)
+    {
+        i = ip3 % radix;
+        ip3 /= radix;
+        if (i < 10)
+            tmp[idxt++] = i + '0';
+        else
+            tmp[idxt++] = i + 'a' - 10;
+    }
+    --idxt;
+    while (idxt >= 0)
+    {
+        // Add print here to check boundaries
+        if (idxp > IP_ADDR_STRING_LEN)
+        {
+            printf("idxp %d > IP_ADDR_STRING_LEN %d\n", idxp, IP_ADDR_STRING_LEN);
+            return 0;
+        }
+        if (idxt > 3)
+        {
+            printf("idxt %d > 3\n", idxt);
+            return 0;
+        }
+
+        ip_str[idxp++] = tmp[idxt--];
+    }
+
+    // printf("ip_str %c%c%c%c %c%c%c%c %c%c%c%c %c%c%c Final pos %d\n",
+    //         ip_str[0],ip_str[1],ip_str[2],ip_str[3],
+    //         ip_str[4],ip_str[5],ip_str[6],ip_str[7],
+    //         ip_str[8],ip_str[9],ip_str[10],ip_str[11],
+    //         ip_str[12],ip_str[13],ip_str[14],
+    //         pos);
+
+    return idxp;
+}
diff --git a/morpheus/_lib/doca/module.cpp b/morpheus/_lib/doca/module.cpp
index 4aab98aaa1..d0c6a5aef1 100644
--- a/morpheus/_lib/doca/module.cpp
+++ b/morpheus/_lib/doca/module.cpp
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-#include "morpheus/doca/doca_source.hpp"
+#include "morpheus/doca/doca_stages.hpp"
 
 #include <mrc/segment/builder.hpp>  // IWYU pragma: keep
 #include <mrc/segment/object.hpp>
@@ -43,6 +43,12 @@ PYBIND11_MODULE(doca, m)
              py::arg("nic_pci_address"),
              py::arg("gpu_pci_address"),
              py::arg("traffic_type"));
+
+    py::class_<mrc::segment::Object<DocaConvertStage>,
+               mrc::segment::ObjectProperties,
+               std::shared_ptr<mrc::segment::Object<DocaConvertStage>>>(
+        m, "DocaConvertStage", py::multiple_inheritance())
+        .def(py::init<>(&DocaConvertStageInterfaceProxy::init), py::arg("builder"), py::arg("name"));
 }
 
 }  // namespace morpheus
diff --git a/morpheus/_lib/doca/src/doca_context.cpp b/morpheus/_lib/doca/src/doca_context.cpp
index 0ec12c3c07..27f34d88ea 100644
--- a/morpheus/_lib/doca/src/doca_context.cpp
+++ b/morpheus/_lib/doca/src/doca_context.cpp
@@ -21,23 +21,22 @@
 #include "morpheus/doca/error.hpp"
 #include "morpheus/utilities/error.hpp"
 
-#include <cuda_runtime.h>
-#include <doca_argp.h>
 #include <doca_dpdk.h>
 #include <doca_error.h>
-#include <doca_eth_rxq.h>
+#include <doca_flow.h>
 #include <doca_gpunetio.h>
-#include <doca_mmap.h>
-#include <doca_types.h>
-#include <doca_version.h>
 #include <glog/logging.h>
-#include <rte_eal.h>
 #include <rte_ethdev.h>
+#include <rte_flow.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
 
+#include <algorithm>
+#include <cstdio>
 #include <iostream>
 #include <memory>
+#include <stdexcept>
 #include <string>
-#include <utility>
 
 namespace {
 
@@ -89,19 +88,16 @@ static doca_error_t open_doca_device_with_pci(const char* pcie_value, struct doc
 
 doca_flow_port* init_doca_flow(uint16_t port_id, uint8_t rxq_num)
 {
-    std::array<char, MAX_PORT_STR_LEN> port_id_str;
-    doca_flow_port_cfg port_cfg = {0};
     doca_flow_port* df_port;
-    doca_flow_cfg rxq_flow_cfg = {0};
-    rte_eth_dev_info dev_info  = {nullptr};
-    rte_eth_conf eth_conf      = {
-             .rxmode =
+    rte_eth_dev_info dev_info = {nullptr};
+    rte_eth_conf eth_conf     = {
+            .rxmode =
             {
-                     .mtu = 2048, /* Not really used, just to initialize DPDK */
+                    .mtu = 1024, /* Not really used, just to initialize DPDK */
             },
-             .txmode =
+            .txmode =
             {
-                     .offloads = RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM,
+                    .offloads = RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM,
             },
     };
     rte_mempool* mp = nullptr;
@@ -138,23 +134,21 @@ doca_flow_port* init_doca_flow(uint16_t port_id, uint8_t rxq_num)
     RTE_TRY(rte_flow_isolate(port_id, 1, &error));
     RTE_TRY(rte_eth_dev_start(port_id));
 
-    /* Initialize doca flow framework */
-    rxq_flow_cfg.pipe_queues = rxq_num;
-    /*
-     * HWS: Hardware steering
-     * Isolated: don't create RSS rule for DPDK created RX queues
-     */
-    rxq_flow_cfg.mode_args            = "vnf,hws,isolated";
-    rxq_flow_cfg.resource.nb_counters = FLOW_NB_COUNTERS;
-
-    DOCA_TRY(doca_flow_init(&rxq_flow_cfg));
-
-    /* Start doca flow port */
-    port_cfg.port_id = port_id;
-    port_cfg.type    = DOCA_FLOW_PORT_DPDK_BY_ID;
-    snprintf(port_id_str.begin(), MAX_PORT_STR_LEN, "%d", port_cfg.port_id);
-    port_cfg.devargs = port_id_str.cbegin();
-    DOCA_TRY(doca_flow_port_start(&port_cfg, &df_port));
+    struct doca_flow_cfg* rxq_flow_cfg;
+    DOCA_TRY(doca_flow_cfg_create(&rxq_flow_cfg));
+    DOCA_TRY(doca_flow_cfg_set_pipe_queues(rxq_flow_cfg, rxq_num));
+    DOCA_TRY(doca_flow_cfg_set_mode_args(rxq_flow_cfg, "vnf,hws,isolated"));
+    DOCA_TRY(doca_flow_cfg_set_nr_counters(rxq_flow_cfg, FLOW_NB_COUNTERS));
+    DOCA_TRY(doca_flow_init(rxq_flow_cfg));
+    doca_flow_cfg_destroy(rxq_flow_cfg);
+
+    struct doca_flow_port_cfg* port_cfg;
+    char port_id_str[MAX_PORT_STR_LEN];
+    DOCA_TRY(doca_flow_port_cfg_create(&port_cfg));
+    snprintf(port_id_str, MAX_PORT_STR_LEN, "%d", port_id);
+    DOCA_TRY(doca_flow_port_cfg_set_devargs(port_cfg, port_id_str));
+    DOCA_TRY(doca_flow_port_start(port_cfg, &df_port));
+    doca_flow_port_cfg_destroy(port_cfg);
 
     return df_port;
 }
@@ -177,8 +171,8 @@ DocaContext::DocaContext(std::string nic_addr, std::string gpu_addr) : m_max_que
     m_rte_context = std::make_unique<RTEContext>();
 
     /* Register a logger backend for internal SDK errors and warnings */
-    // DOCA_TRY(doca_log_backend_create_with_file_sdk(stderr, &sdk_log));
-    // DOCA_TRY(doca_log_backend_set_sdk_level(sdk_log, DOCA_LOG_LEVEL_DEBUG));
+    // DOCA_TRY(doca_log_backend_create_with_file_sdk(stderr, &m_sdk_log));
+    // DOCA_TRY(doca_log_backend_set_sdk_level(m_sdk_log, DOCA_LOG_LEVEL_DEBUG));
 
     DOCA_TRY(open_doca_device_with_pci(nic_addr_c, &m_dev));
     DOCA_TRY(doca_dpdk_port_probe(m_dev, "dv_flow_en=2"));
@@ -200,13 +194,16 @@ DocaContext::~DocaContext()
 {
     doca_flow_port_stop(m_flow_port);
     doca_flow_destroy();
-
     if (m_gpu != nullptr)
     {
         auto doca_ret = doca_gpu_destroy(m_gpu);
         if (doca_ret != DOCA_SUCCESS)
             LOG(WARNING) << "DOCA cleanup failed (" << doca_ret << ")" << std::endl;
     }
+
+    int ret = rte_eth_dev_stop(m_nic_port);
+    if (ret != 0)
+        LOG(ERROR) << "Couldn't stop DPDK port " << m_nic_port << "err " << ret;
 }
 
 doca_gpu* DocaContext::gpu()
diff --git a/morpheus/_lib/doca/src/doca_convert.cpp b/morpheus/_lib/doca/src/doca_convert.cpp
new file mode 100644
index 0000000000..99ade6c60e
--- /dev/null
+++ b/morpheus/_lib/doca/src/doca_convert.cpp
@@ -0,0 +1,202 @@
+/**
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morpheus/doca/common.hpp"
+#include "morpheus/doca/doca_kernels.hpp"
+#include "morpheus/doca/doca_stages.hpp"
+#include "morpheus/messages/meta.hpp"
+#include "morpheus/messages/raw_packet.hpp"
+
+#include <boost/fiber/context.hpp>
+#include <cuda_runtime.h>
+#include <cudf/column/column.hpp>
+#include <cudf/io/types.hpp>
+#include <cudf/table/table.hpp>
+#include <generic/rte_byteorder.h>
+#include <glog/logging.h>
+#include <mrc/segment/builder.hpp>
+#include <mrc/segment/object.hpp>
+#include <rmm/cuda_stream_view.hpp>
+#include <rxcpp/rx.hpp>
+
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <ctime>
+#include <memory>
+#include <optional>
+#include <stdexcept>
+#include <string>
+#include <thread>
+#include <type_traits>
+#include <typeinfo>
+#include <utility>
+#include <vector>
+
+#define BE_IPV4_ADDR(a, b, c, d) (RTE_BE32((a << 24) + (b << 16) + (c << 8) + d)) /* Big endian conversion */
+#define ENABLE_TIMERS 0
+
+std::optional<uint32_t> ip_to_int(std::string const& ip_address)
+{
+    if (ip_address.empty())
+    {
+        return 0;
+    }
+
+    uint8_t a, b, c, d;
+    uint32_t ret;
+
+    ret = sscanf(ip_address.c_str(), "%hhu.%hhu.%hhu.%hhu", &a, &b, &c, &d);
+
+    printf("%u: %u %u %u %u\n", ret, a, b, c, d);
+
+    if (ret == 4)
+    {
+        return BE_IPV4_ADDR(a, b, c, d);
+    }
+
+    return std::nullopt;
+}
+
+static uint64_t now_ns()
+{
+    struct timespec t;
+    if (clock_gettime(CLOCK_REALTIME, &t) != 0)
+        return 0;
+    return (uint64_t)t.tv_nsec + (uint64_t)t.tv_sec * 1000 * 1000 * 1000;
+}
+
+#define DEBUG_GET_TIMESTAMP(ts) clock_gettime(CLOCK_REALTIME, (ts))
+
+namespace morpheus {
+
+DocaConvertStage::DocaConvertStage() :
+  base_t(rxcpp::operators::map([this](sink_type_t x) {
+      return this->on_data(std::move(x));
+  }))
+{
+    cudaStreamCreateWithFlags(&m_stream, cudaStreamNonBlocking);
+    m_stream_cpp              = rmm::cuda_stream_view(reinterpret_cast<cudaStream_t>(m_stream));
+    m_fixed_pld_size_list_cpu = (uint32_t*)calloc(MAX_PKT_RECEIVE, sizeof(uint32_t));
+    cudaMalloc((void**)&m_fixed_pld_size_list, MAX_PKT_RECEIVE * sizeof(uint32_t));
+    for (int idx = 0; idx < MAX_PKT_RECEIVE; idx++)
+        m_fixed_pld_size_list_cpu[idx] = MAX_PKT_SIZE;
+    cudaMemcpy(m_fixed_pld_size_list, m_fixed_pld_size_list_cpu, MAX_PKT_RECEIVE * sizeof(uint32_t), cudaMemcpyDefault);
+
+    m_fixed_hdr_size_list_cpu = (uint32_t*)calloc(MAX_PKT_RECEIVE, sizeof(uint32_t));
+    cudaMalloc((void**)&m_fixed_hdr_size_list, MAX_PKT_RECEIVE * sizeof(uint32_t));
+    for (int idx = 0; idx < MAX_PKT_RECEIVE; idx++)
+        m_fixed_hdr_size_list_cpu[idx] = IP_ADDR_STRING_LEN;
+    cudaMemcpy(m_fixed_hdr_size_list, m_fixed_hdr_size_list_cpu, MAX_PKT_RECEIVE * sizeof(uint32_t), cudaMemcpyDefault);
+}
+
+DocaConvertStage::~DocaConvertStage()
+{
+    free(m_fixed_pld_size_list_cpu);
+    cudaFree(m_fixed_pld_size_list);
+    free(m_fixed_hdr_size_list_cpu);
+    cudaFree(m_fixed_hdr_size_list);
+    cudaStreamDestroy(m_stream);
+}
+
+DocaConvertStage::source_type_t DocaConvertStage::on_data(sink_type_t x)
+{
+    if constexpr (std::is_same_v<sink_type_t, std::shared_ptr<RawPacketMessage>>)
+    {
+        return this->on_raw_packet_message(x);
+    }
+    // sink_type_t not supported
+    else
+    {
+        std::string error_msg{"DocaConvertStage receives unsupported input type: " + std::string(typeid(x).name())};
+        LOG(ERROR) << error_msg;
+        throw std::runtime_error(error_msg);
+    }
+}
+
+DocaConvertStage::source_type_t DocaConvertStage::on_raw_packet_message(sink_type_t raw_msg)
+{
+    auto packet_count      = raw_msg->count();
+    auto max_size          = raw_msg->get_max_size();
+    auto pkt_addr_list     = raw_msg->get_pkt_addr_list();
+    auto pkt_hdr_size_list = raw_msg->get_pkt_hdr_size_list();
+    auto pkt_pld_size_list = raw_msg->get_pkt_pld_size_list();
+    auto queue_idx         = raw_msg->get_queue_idx();
+
+    // LOG(WARNING) << "New RawPacketMessage with " << packet_count << " packets from queue id " << queue_idx;
+
+#if ENABLE_TIMERS == 1
+    const auto t0 = now_ns();
+#endif
+    // gather header data
+    auto header_src_ip_col = doca::gather_header(
+        packet_count, pkt_addr_list, pkt_hdr_size_list, pkt_pld_size_list, m_fixed_hdr_size_list, m_stream_cpp);
+
+#if ENABLE_TIMERS == 1
+    const auto t1 = now_ns();
+#endif
+    // gather payload data
+    auto payload_col = doca::gather_payload(
+        packet_count, pkt_addr_list, pkt_hdr_size_list, pkt_pld_size_list, m_fixed_pld_size_list, m_stream_cpp);
+
+#if ENABLE_TIMERS == 1
+    const auto t2 = now_ns();
+#endif
+    std::vector<std::unique_ptr<cudf::column>> gathered_columns;
+    gathered_columns.emplace_back(std::move(header_src_ip_col));
+    gathered_columns.emplace_back(std::move(payload_col));
+
+    // After this point buffers can be reused -> copies actual packets' data
+    auto gathered_table = std::make_unique<cudf::table>(std::move(gathered_columns));
+
+#if ENABLE_TIMERS == 1
+    const auto t3 = now_ns();
+#endif
+    auto gathered_metadata = cudf::io::table_metadata();
+    gathered_metadata.schema_info.emplace_back("src_ip");
+    gathered_metadata.schema_info.emplace_back("data");
+
+    auto gathered_table_w_metadata =
+        cudf::io::table_with_metadata{std::move(gathered_table), std::move(gathered_metadata)};
+
+#if ENABLE_TIMERS == 1
+    const auto t4 = now_ns();
+#endif
+    auto meta = MessageMeta::create_from_cpp(std::move(gathered_table_w_metadata), 0);
+
+#if ENABLE_TIMERS == 1
+    const auto t5 = now_ns();
+#endif
+    cudaStreamSynchronize(m_stream_cpp);
+#if ENABLE_TIMERS == 1
+    const auto t6 = now_ns();
+
+    LOG(WARNING) << "Queue " << queue_idx << " packets " << packet_count << " header column " << t1 - t0
+                 << " payload column " << t2 - t1 << " gather columns " << t3 - t2 << " gather metadata " << t4 - t3
+                 << " create_from_cpp " << t5 - t4 << " stream sync " << t6 - t5 << std::endl;
+#endif
+
+    return std::move(meta);
+}
+
+std::shared_ptr<mrc::segment::Object<DocaConvertStage>> DocaConvertStageInterfaceProxy::init(
+    mrc::segment::Builder& builder, std::string const& name)
+{
+    return builder.construct_object<DocaConvertStage>(name);
+}
+
+}  // namespace morpheus
diff --git a/morpheus/_lib/doca/src/doca_convert_kernel.cu b/morpheus/_lib/doca/src/doca_convert_kernel.cu
new file mode 100644
index 0000000000..e1bfce795b
--- /dev/null
+++ b/morpheus/_lib/doca/src/doca_convert_kernel.cu
@@ -0,0 +1,297 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morpheus/doca/common.hpp"
+#include "morpheus/doca/packets.hpp"
+#include "morpheus/utilities/error.hpp"
+
+#include <cub/cub.cuh>
+#include <cuda/std/chrono>
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/strings/detail/strings_children.cuh>
+#include <cudf/strings/detail/utilities.cuh>
+#include <cudf/strings/detail/utilities.hpp>
+#include <doca_eth_rxq.h>
+#include <doca_gpunetio.h>
+#include <doca_gpunetio_dev_buf.cuh>
+#include <doca_gpunetio_dev_eth_rxq.cuh>
+#include <doca_gpunetio_dev_sem.cuh>
+#include <rmm/exec_policy.hpp>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <stdio.h>
+#include <thrust/gather.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <memory>
+
+__global__ void _packet_gather_payload_kernel(
+  int32_t  packet_count,
+  uintptr_t*  packets_buffer,
+  uint32_t* header_sizes,
+  uint32_t* payload_sizes,
+  uint8_t*  payload_chars_out
+)
+{
+  int pkt_idx = threadIdx.x;
+  int j = 0;
+
+  while (pkt_idx < packet_count) {
+    uint8_t* pkt_hdr_addr = (uint8_t*)(packets_buffer[pkt_idx] + header_sizes[pkt_idx]);
+    // if (!pkt_hdr_addr)
+    //   continue;
+    for (j = 0; j < payload_sizes[pkt_idx]; j++)
+      payload_chars_out[(MAX_PKT_SIZE * pkt_idx) + j] = pkt_hdr_addr[j];
+    for (; j < MAX_PKT_SIZE; j++)
+      payload_chars_out[(MAX_PKT_SIZE * pkt_idx) + j] = '\0';
+    pkt_idx += blockDim.x;
+  }
+
+#if 0
+
+  // Specialize BlockScan for a 1D block of 128 threads of type int
+  using BlockScan = cub::BlockScan<int32_t, THREADS_PER_BLOCK>;
+  // Allocate shared memory for BlockScan
+  __shared__ typename BlockScan::TempStorage temp_storage;
+  int32_t payload_offsets[PACKETS_PER_THREAD];
+  /* Th0 will work on first 4 packets, etc.. */
+  for (auto i = 0; i < PACKETS_PER_THREAD; i++) {
+    auto packet_idx = threadIdx.x * PACKETS_PER_THREAD + i;
+    if (packet_idx >= packet_count)
+      payload_offsets[i] = 0;
+    else
+      payload_offsets[i] = payload_sizes[packet_idx];
+  }
+  __syncthreads();
+
+  /* Calculate the right payload offset for each thread */
+  int32_t data_offsets_agg;
+  BlockScan(temp_storage).ExclusiveSum(payload_offsets, payload_offsets, data_offsets_agg);
+  __syncthreads();
+
+  for (auto i = 0; i < PACKETS_PER_THREAD; i++) {
+    auto packet_idx = threadIdx.x * PACKETS_PER_THREAD + i;
+    if (packet_idx >= packet_count)
+      continue;
+
+    auto payload_size = payload_sizes[packet_idx];
+    for (auto j = 0; j < payload_size; j++) {
+      auto value = *(((uint8_t*)packets_buffer[packet_idx]) + header_sizes[packet_idx] + j);
+      payload_chars_out[payload_offsets[i] + j] = value;
+      // printf("payload %d size %d : 0x%1x / 0x%1x addr %lx\n",
+      //     payload_offsets[i] + j, payload_size,
+      //     payload_chars_out[payload_offsets[i] + j], value,
+      //     packets_buffer[packet_idx]);
+    }
+  }
+#endif
+}
+
+__global__ void _packet_gather_header_kernel(
+  int32_t   packet_count,
+  uintptr_t*  packets_buffer,
+  uint32_t* header_sizes,
+  uint32_t* payload_sizes,
+  uint8_t*  header_src_ip_addr
+)
+{
+  int pkt_idx = threadIdx.x;
+
+  while (pkt_idx < packet_count) {
+    uint8_t* pkt_hdr_addr = (uint8_t*)(packets_buffer[pkt_idx]);
+    // if (!pkt_hdr_addr)
+    //   continue;
+    int len = ip_to_string(((struct eth_ip *)pkt_hdr_addr)->l3_hdr.src_addr, header_src_ip_addr + (IP_ADDR_STRING_LEN * pkt_idx));
+    while (len < IP_ADDR_STRING_LEN)
+      header_src_ip_addr[(IP_ADDR_STRING_LEN * pkt_idx) + len++] = '\0';
+    pkt_idx += blockDim.x;
+  }
+}
+
+namespace morpheus {
+namespace doca {
+
+std::unique_ptr<cudf::column> gather_payload(
+  int32_t      packet_count,
+  uintptr_t*    packets_buffer,
+  uint32_t*    header_sizes,
+  uint32_t*    payload_sizes,
+  uint32_t*    fixed_size_list,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
+    fixed_size_list,
+    fixed_size_list + packet_count,
+    stream,
+    mr
+  );
+
+  auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr);
+  auto d_chars      = chars_column->mutable_view().data<uint8_t>();
+
+  _packet_gather_payload_kernel<<<1, THREADS_PER_BLOCK, 0, stream>>>(
+    packet_count,
+    packets_buffer,
+    header_sizes,
+    payload_sizes,
+    d_chars
+  );
+
+  return cudf::make_strings_column(packet_count,
+    std::move(offsets_column),
+    std::move(chars_column),
+    0,
+    {});
+}
+
+std::unique_ptr<cudf::column> gather_header(
+  int32_t      packet_count,
+  uintptr_t*    packets_buffer,
+  uint32_t*    header_sizes,
+  uint32_t*    payload_sizes,
+  uint32_t*    fixed_size_list,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
+    fixed_size_list,
+    fixed_size_list + packet_count,
+    stream,
+    mr
+  );
+
+  auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr);
+  auto d_chars      = chars_column->mutable_view().data<uint8_t>();
+
+  _packet_gather_header_kernel<<<1, THREADS_PER_BLOCK, 0, stream>>>(
+    packet_count,
+    packets_buffer,
+    header_sizes,
+    payload_sizes,
+    d_chars
+  );
+
+  return cudf::make_strings_column(packet_count,
+    std::move(offsets_column),
+    std::move(chars_column),
+    0,
+    {});
+}
+
+void gather_header_scalar(
+  int32_t      packet_count,
+  uintptr_t*    packets_buffer,
+  uint32_t*    header_sizes,
+  uint32_t*    payload_sizes,
+  uint8_t*      header_src_ip_addr,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+   _packet_gather_header_kernel<<<1, THREADS_PER_BLOCK, 0, stream>>>(
+    packet_count,
+    packets_buffer,
+    header_sizes,
+    payload_sizes,
+    header_src_ip_addr
+  );
+}
+
+void gather_payload_scalar(
+  int32_t      packet_count,
+  uintptr_t*    packets_buffer,
+  uint32_t*    header_sizes,
+  uint32_t*    payload_sizes,
+  uint8_t*      payload_col,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  _packet_gather_payload_kernel<<<1, THREADS_PER_BLOCK, 0, stream>>>(
+    packet_count,
+    packets_buffer,
+    header_sizes,
+    payload_sizes,
+    payload_col
+  );
+}
+
+
+struct integers_to_mac_fn {
+  cudf::column_device_view const d_column;
+  int32_t const* d_offsets;
+  char* d_chars;
+
+  __device__ void operator()(cudf::size_type idx)
+  {
+    int64_t mac_address = d_column.element<int64_t>(idx);
+    char* out_ptr       = d_chars + d_offsets[idx];
+
+    mac_int64_to_chars(mac_address, out_ptr);
+  }
+};
+
+std::unique_ptr<cudf::column> integers_to_mac(
+  cudf::column_view const& integers,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr
+)
+{
+  CUDF_EXPECTS(integers.type().id() == cudf::type_id::INT64, "Input column must be type_id::INT64 type");
+  CUDF_EXPECTS(integers.null_count() == 0, "integers_to_mac does not support null values.");
+
+  cudf::size_type strings_count = integers.size();
+
+  if (strings_count == 0)
+  {
+    return cudf::make_empty_column(cudf::type_id::STRING);
+  }
+
+  auto const_17_itr = thrust::constant_iterator<cudf::size_type>(17);
+  auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
+    const_17_itr,
+    const_17_itr + strings_count,
+    stream,
+    mr
+  );
+
+  auto column       = cudf::column_device_view::create(integers, stream);
+  auto d_column     = *column;
+  auto d_offsets    = offsets_column->view().data<int32_t>();
+  auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr);
+  auto d_chars      = chars_column->mutable_view().data<char>();
+
+  thrust::for_each_n(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<cudf::size_type>(0),
+    strings_count,
+    integers_to_mac_fn{d_column, d_offsets, d_chars}
+  );
+
+  return cudf::make_strings_column(strings_count,
+    std::move(offsets_column),
+    std::move(chars_column),
+    0,
+    {});
+}
+
+
+} //doca
+} //morpheus
diff --git a/morpheus/_lib/doca/src/doca_rx_pipe.cpp b/morpheus/_lib/doca/src/doca_rx_pipe.cpp
index 32f00253a2..a5ed61b142 100644
--- a/morpheus/_lib/doca/src/doca_rx_pipe.cpp
+++ b/morpheus/_lib/doca/src/doca_rx_pipe.cpp
@@ -17,9 +17,16 @@
 
 #include "morpheus/doca/doca_rx_pipe.hpp"
 
-#include <glog/logging.h>
+#include "morpheus/doca/error.hpp"
+
+#include <doca_eth_rxq.h>
+#include <doca_flow_net.h>
 #include <netinet/in.h>
 
+#include <array>
+#include <cstdint>
+#include <utility>
+
 namespace morpheus::doca {
 
 /* Create more Queues/Different Flows */
@@ -27,7 +34,7 @@ DocaRxPipe::DocaRxPipe(std::shared_ptr<DocaContext> context,
                        std::vector<std::shared_ptr<morpheus::doca::DocaRxQueue>> rxq,
                        enum doca_traffic_type const type) :
   m_context(context),
-  m_rxq(rxq),
+  m_rxq(std::move(rxq)),
   m_traffic_type(type),
   m_pipe(nullptr)
 {
@@ -35,7 +42,6 @@ DocaRxPipe::DocaRxPipe(std::shared_ptr<DocaContext> context,
     for (int idx = 0; idx < m_rxq.size(); idx++)
         doca_eth_rxq_get_flow_queue_id(m_rxq[idx]->rxq_info_cpu(), &(rss_queues[idx]));
 
-    doca_flow_match match_mask{0};
     doca_flow_match match{};
     match.outer.l3_type = DOCA_FLOW_L3_TYPE_IP4;
     if (m_traffic_type == DOCA_TRAFFIC_TYPE_TCP)
@@ -65,20 +71,17 @@ DocaRxPipe::DocaRxPipe(std::shared_ptr<DocaContext> context,
     doca_flow_monitor monitor{};
     monitor.counter_type = DOCA_FLOW_RESOURCE_TYPE_NON_SHARED;
 
-    doca_flow_pipe_cfg pipe_cfg{};
-    pipe_cfg.attr.name                   = "GPU_RXQ_PIPE";
-    pipe_cfg.attr.enable_strict_matching = true;
-    pipe_cfg.attr.type                   = DOCA_FLOW_PIPE_BASIC;
-    pipe_cfg.attr.nb_actions             = 0;
-    pipe_cfg.attr.is_root                = false;
-    pipe_cfg.match                       = &match;
-    pipe_cfg.match_mask                  = &match_mask;
-    pipe_cfg.monitor                     = &monitor;
-    pipe_cfg.port                        = context->flow_port();
-
-    DOCA_TRY(doca_flow_pipe_create(&pipe_cfg, &fwd, &miss_fwd, &m_pipe));
-
-    doca_flow_pipe_entry* placeholder_entry = nullptr;
+    struct doca_flow_pipe_cfg* pipe_cfg;
+    DOCA_TRY(doca_flow_pipe_cfg_create(&pipe_cfg, context->flow_port()));
+    DOCA_TRY(doca_flow_pipe_cfg_set_name(pipe_cfg, "GPU_RXQ_PIPE"));
+    DOCA_TRY(doca_flow_pipe_cfg_set_enable_strict_matching(pipe_cfg, true));
+    DOCA_TRY(doca_flow_pipe_cfg_set_type(pipe_cfg, DOCA_FLOW_PIPE_BASIC));
+    DOCA_TRY(doca_flow_pipe_cfg_set_is_root(pipe_cfg, false));
+    DOCA_TRY(doca_flow_pipe_cfg_set_match(pipe_cfg, &match, nullptr));
+    DOCA_TRY(doca_flow_pipe_cfg_set_monitor(pipe_cfg, &monitor));
+    DOCA_TRY(doca_flow_pipe_create(pipe_cfg, &fwd, &miss_fwd, &m_pipe));
+
+    struct doca_flow_pipe_entry* placeholder_entry;
     DOCA_TRY(doca_flow_pipe_add_entry(
         0, m_pipe, &match, nullptr, nullptr, nullptr, DOCA_FLOW_NO_WAIT, nullptr, &placeholder_entry));
     DOCA_TRY(doca_flow_entries_process(context->flow_port(), 0, 0, 0));
@@ -90,16 +93,15 @@ DocaRxPipe::DocaRxPipe(std::shared_ptr<DocaContext> context,
     doca_flow_monitor root_monitor  = {};
     root_monitor.counter_type       = DOCA_FLOW_RESOURCE_TYPE_NON_SHARED;
 
-    doca_flow_pipe_cfg root_pipe_cfg          = {};
-    root_pipe_cfg.attr.name                   = "ROOT_PIPE";
-    root_pipe_cfg.attr.enable_strict_matching = true;
-    root_pipe_cfg.attr.is_root                = true;
-    root_pipe_cfg.attr.type                   = DOCA_FLOW_PIPE_CONTROL;
-    root_pipe_cfg.monitor                     = &root_monitor;
-    root_pipe_cfg.match_mask                  = &root_match_mask;
-    root_pipe_cfg.port                        = context->flow_port();
-
-    DOCA_TRY(doca_flow_pipe_create(&root_pipe_cfg, nullptr, nullptr, &m_root_pipe));
+    struct doca_flow_pipe_cfg* root_pipe_cfg;
+    DOCA_TRY(doca_flow_pipe_cfg_create(&root_pipe_cfg, context->flow_port()));
+    DOCA_TRY(doca_flow_pipe_cfg_set_name(root_pipe_cfg, "ROOT_PIPE"));
+    DOCA_TRY(doca_flow_pipe_cfg_set_enable_strict_matching(root_pipe_cfg, true));
+    DOCA_TRY(doca_flow_pipe_cfg_set_type(root_pipe_cfg, DOCA_FLOW_PIPE_CONTROL));
+    DOCA_TRY(doca_flow_pipe_cfg_set_is_root(root_pipe_cfg, true));
+    DOCA_TRY(doca_flow_pipe_cfg_set_match(root_pipe_cfg, nullptr, &root_match_mask));
+    DOCA_TRY(doca_flow_pipe_cfg_set_monitor(root_pipe_cfg, &root_monitor));
+    DOCA_TRY(doca_flow_pipe_create(root_pipe_cfg, nullptr, nullptr, &m_root_pipe));
 
     struct doca_flow_match root_match_gpu = {};
     struct doca_flow_fwd root_fwd_gpu     = {};
diff --git a/morpheus/_lib/doca/src/doca_rx_queue.cpp b/morpheus/_lib/doca/src/doca_rx_queue.cpp
index 5b802e871d..4dfbbc82eb 100644
--- a/morpheus/_lib/doca/src/doca_rx_queue.cpp
+++ b/morpheus/_lib/doca/src/doca_rx_queue.cpp
@@ -21,8 +21,14 @@
 #include "morpheus/doca/error.hpp"
 #include "morpheus/utilities/error.hpp"
 
+#include <doca_error.h>
+#include <doca_mmap.h>
+#include <doca_types.h>
 #include <glog/logging.h>
 
+#include <cstdint>
+#include <ostream>
+
 namespace morpheus::doca {
 
 DocaRxQueue::DocaRxQueue(std::shared_ptr<DocaContext> context) :
@@ -43,7 +49,8 @@ DocaRxQueue::DocaRxQueue(std::shared_ptr<DocaContext> context) :
     m_packet_memory = std::make_unique<DocaMem<void>>(m_context, cyclic_buffer_size, DOCA_GPU_MEM_TYPE_GPU);
 
     DOCA_TRY(doca_mmap_set_memrange(m_packet_mmap, m_packet_memory->gpu_ptr(), cyclic_buffer_size));
-    DOCA_TRY(doca_mmap_set_permissions(m_packet_mmap, DOCA_ACCESS_FLAG_LOCAL_READ_WRITE));
+    DOCA_TRY(doca_mmap_set_permissions(m_packet_mmap,
+                                       DOCA_ACCESS_FLAG_LOCAL_READ_WRITE | DOCA_ACCESS_FLAG_PCI_RELAXED_ORDERING));
     DOCA_TRY(doca_mmap_start(m_packet_mmap));
     DOCA_TRY(doca_eth_rxq_set_pkt_buf(m_rxq_info_cpu, m_packet_mmap, 0, cyclic_buffer_size));
 
diff --git a/morpheus/_lib/doca/src/doca_semaphore.cpp b/morpheus/_lib/doca/src/doca_semaphore.cpp
index d0da096d7c..3cf4695024 100644
--- a/morpheus/_lib/doca/src/doca_semaphore.cpp
+++ b/morpheus/_lib/doca/src/doca_semaphore.cpp
@@ -17,6 +17,13 @@
 
 #include "morpheus/doca/doca_semaphore.hpp"
 
+#include "morpheus/doca/common.hpp"
+#include "morpheus/doca/error.hpp"
+
+#include <doca_types.h>
+
+#include <utility>
+
 namespace morpheus::doca {
 
 DocaSemaphore::DocaSemaphore(std::shared_ptr<DocaContext> context, uint16_t size) :
diff --git a/morpheus/_lib/doca/src/doca_source.cpp b/morpheus/_lib/doca/src/doca_source.cpp
index b855f82dcc..1a4f7d5d17 100644
--- a/morpheus/_lib/doca/src/doca_source.cpp
+++ b/morpheus/_lib/doca/src/doca_source.cpp
@@ -15,60 +15,43 @@
  * limitations under the License.
  */
 
-#include "morpheus/doca/doca_source.hpp"
+#include "cuda.h"
 
+#include "morpheus/doca/common.hpp"
 #include "morpheus/doca/doca_context.hpp"
+#include "morpheus/doca/doca_kernels.hpp"
+#include "morpheus/doca/doca_mem.hpp"
 #include "morpheus/doca/doca_rx_pipe.hpp"
 #include "morpheus/doca/doca_rx_queue.hpp"
 #include "morpheus/doca/doca_semaphore.hpp"
-#include "morpheus/doca/doca_source_kernels.hpp"
+#include "morpheus/doca/doca_stages.hpp"
+#include "morpheus/messages/raw_packet.hpp"
 #include "morpheus/utilities/error.hpp"
 
-#include <cudf/column/column_factories.hpp>
-#include <cudf/column/column_view.hpp>
-#include <cudf/copying.hpp>
-#include <cudf/filling.hpp>
-#include <cudf/scalar/scalar_factories.hpp>
-#include <cudf/strings/convert/convert_ipv4.hpp>
-#include <cudf/strings/detail/utilities.cuh>
-#include <cudf/strings/detail/utilities.hpp>
-#include <cudf/table/table.hpp>
+#include <boost/fiber/context.hpp>
+#include <cuda_runtime.h>
+#include <doca_gpunetio.h>
+#include <doca_types.h>
 #include <glog/logging.h>
+#include <mrc/core/utils.hpp>
+#include <mrc/runnable/context.hpp>
 #include <mrc/segment/builder.hpp>
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_scalar.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-#include <rte_byteorder.h>
+#include <mrc/segment/object.hpp>
+#include <pymrc/node.hpp>
+#include <rxcpp/rx.hpp>
 
-#include <iostream>
+#include <cstdint>
+#include <ctime>
+#include <functional>
 #include <memory>
-#include <stdexcept>
-
-#define BE_IPV4_ADDR(a, b, c, d) (RTE_BE32((a << 24) + (b << 16) + (c << 8) + d)) /* Big endian conversion */
-
-std::optional<uint32_t> ip_to_int(std::string const& ip_address)
-{
-    if (ip_address.empty())
-    {
-        return 0;
-    }
-
-    uint8_t a, b, c, d;
-    uint32_t ret;
-
-    ret = sscanf(ip_address.c_str(), "%hhu.%hhu.%hhu.%hhu", &a, &b, &c, &d);
-
-    printf("%u: %u %u %u %u\n", ret, a, b, c, d);
-
-    if (ret == 4)
-    {
-        return BE_IPV4_ADDR(a, b, c, d);
-    }
-
-    return std::nullopt;
-}
+#include <ostream>
+#include <string>
+#include <thread>
+#include <utility>
+#include <vector>
 
 #define debug_get_timestamp(ts) clock_gettime(CLOCK_REALTIME, (ts))
+#define ENABLE_TIMERS 0
 
 namespace morpheus {
 
@@ -94,6 +77,8 @@ DocaSourceStage::DocaSourceStage(std::string const& nic_pci_address,
     m_rxpipe = std::make_shared<morpheus::doca::DocaRxPipe>(m_context, m_rxq, m_traffic_type);
 }
 
+DocaSourceStage::~DocaSourceStage() = default;
+
 static uint64_t now_ns()
 {
     struct timespec t;
@@ -105,96 +90,49 @@ static uint64_t now_ns()
 DocaSourceStage::subscriber_fn_t DocaSourceStage::build()
 {
     return [this](rxcpp::subscriber<source_type_t> output) {
+        CUdevice cuDevice;
+        CUcontext cuContext;
+
+        cudaSetDevice(0);  // Need to rely on GPU 0
+        cudaFree(0);
+        cuDeviceGet(&cuDevice, 0);
+        cuCtxCreate(&cuContext, CU_CTX_SCHED_SPIN | CU_CTX_MAP_HOST, cuDevice);
+        cuCtxPushCurrent(cuContext);
+
         struct packets_info* pkt_ptr;
-        int sem_idx          = 0;
-        cudaStream_t rstream = nullptr;
-        cudaStream_t pstream = nullptr;
-        cudf::table_view* fixed_width_inputs_table_view[MAX_SEM_X_QUEUE];
-
-        std::vector<rmm::device_uvector<char>> payload_buffer_d;
-        std::vector<rmm::device_uvector<int32_t>> payload_sizes_d;
-        std::vector<rmm::device_uvector<int64_t>> src_mac_out_d;
-        std::vector<rmm::device_uvector<int64_t>> dst_mac_out_d;
-        std::vector<rmm::device_uvector<int64_t>> src_ip_out_d;
-        std::vector<rmm::device_uvector<int64_t>> dst_ip_out_d;
-        std::vector<rmm::device_uvector<uint16_t>> src_port_out_d;
-        std::vector<rmm::device_uvector<uint16_t>> dst_port_out_d;
-        std::vector<rmm::device_uvector<int32_t>> tcp_flags_out_d;
-        std::vector<rmm::device_uvector<int32_t>> ether_type_out_d;
-        std::vector<rmm::device_uvector<int32_t>> next_proto_id_out_d;
-        std::vector<rmm::device_uvector<uint32_t>> timestamp_out_d;
-
-        int thread_idx = mrc::runnable::Context::get_runtime_context().rank();
-
-        if (thread_idx >= MAX_QUEUE)
+        int sem_idx[MAX_QUEUE] = {0};
+        cudaStream_t rstream   = nullptr;
+        int thread_idx         = mrc::runnable::Context::get_runtime_context().rank();
+
+        // Add per queue
+        auto pkt_addr_unique = std::make_unique<morpheus::doca::DocaMem<uintptr_t>>(
+            m_context, MAX_PKT_RECEIVE * MAX_SEM_X_QUEUE, DOCA_GPU_MEM_TYPE_GPU);
+        auto pkt_hdr_size_unique = std::make_unique<morpheus::doca::DocaMem<uint32_t>>(
+            m_context, MAX_PKT_RECEIVE * MAX_SEM_X_QUEUE, DOCA_GPU_MEM_TYPE_GPU);
+        auto pkt_pld_size_unique = std::make_unique<morpheus::doca::DocaMem<uint32_t>>(
+            m_context, MAX_PKT_RECEIVE * MAX_SEM_X_QUEUE, DOCA_GPU_MEM_TYPE_GPU);
+
+        if (thread_idx > 1)
         {
-            MORPHEUS_FAIL("More CPU threads than allowed queues");
+            MORPHEUS_FAIL("Only 1 CPU threads is allowed to run the DOCA Source Stage");
         }
 
-        payload_buffer_d.reserve(MAX_SEM_X_QUEUE);
-        payload_sizes_d.reserve(MAX_SEM_X_QUEUE);
-        src_mac_out_d.reserve(MAX_SEM_X_QUEUE);
-        dst_mac_out_d.reserve(MAX_SEM_X_QUEUE);
-        src_ip_out_d.reserve(MAX_SEM_X_QUEUE);
-        dst_ip_out_d.reserve(MAX_SEM_X_QUEUE);
-        src_port_out_d.reserve(MAX_SEM_X_QUEUE);
-        dst_port_out_d.reserve(MAX_SEM_X_QUEUE);
-        tcp_flags_out_d.reserve(MAX_SEM_X_QUEUE);
-        ether_type_out_d.reserve(MAX_SEM_X_QUEUE);
-        next_proto_id_out_d.reserve(MAX_SEM_X_QUEUE);
-        timestamp_out_d.reserve(MAX_SEM_X_QUEUE);
-
         // Dedicated CUDA stream for the receiver kernel
         cudaStreamCreateWithFlags(&rstream, cudaStreamNonBlocking);
-        cudaStreamCreateWithFlags(&pstream, cudaStreamNonBlocking);
-        auto pstream_cpp = rmm::cuda_stream_view(reinterpret_cast<cudaStream_t>(pstream));
-        mrc::Unwinder ensure_cleanup([rstream, pstream]() {
+        mrc::Unwinder ensure_cleanup([rstream]() {
             // Ensure that the stream gets cleaned up even if we error
             cudaStreamDestroy(rstream);
-            cudaStreamDestroy(pstream);
         });
 
-        for (int idxs = 0; idxs < MAX_SEM_X_QUEUE; idxs++)
+        for (int queue_idx = 0; queue_idx < MAX_QUEUE; queue_idx++)
         {
-            payload_buffer_d.push_back(rmm::device_uvector<char>(MAX_PKT_RECEIVE * MAX_PKT_SIZE, pstream_cpp));
-            payload_sizes_d.push_back(rmm::device_uvector<int32_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            src_mac_out_d.push_back(rmm::device_uvector<int64_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            dst_mac_out_d.push_back(rmm::device_uvector<int64_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            src_ip_out_d.push_back(rmm::device_uvector<int64_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            dst_ip_out_d.push_back(rmm::device_uvector<int64_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            src_port_out_d.push_back(rmm::device_uvector<uint16_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            dst_port_out_d.push_back(rmm::device_uvector<uint16_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            tcp_flags_out_d.push_back(rmm::device_uvector<int32_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            ether_type_out_d.push_back(rmm::device_uvector<int32_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            next_proto_id_out_d.push_back(rmm::device_uvector<int32_t>(MAX_PKT_RECEIVE, pstream_cpp));
-            timestamp_out_d.push_back(rmm::device_uvector<uint32_t>(MAX_PKT_RECEIVE, pstream_cpp));
-
-            pkt_ptr = static_cast<struct packets_info*>(m_semaphore[thread_idx]->get_info_cpu(idxs));
-            pkt_ptr->payload_buffer_out = payload_buffer_d[idxs].data();
-            pkt_ptr->payload_sizes_out  = payload_sizes_d[idxs].data();
-            pkt_ptr->src_mac_out        = src_mac_out_d[idxs].data();
-            pkt_ptr->dst_mac_out        = dst_mac_out_d[idxs].data();
-            pkt_ptr->src_ip_out         = src_ip_out_d[idxs].data();
-            pkt_ptr->dst_ip_out         = dst_ip_out_d[idxs].data();
-            pkt_ptr->src_port_out       = src_port_out_d[idxs].data();
-            pkt_ptr->dst_port_out       = dst_port_out_d[idxs].data();
-            pkt_ptr->tcp_flags_out      = tcp_flags_out_d[idxs].data();
-            pkt_ptr->ether_type_out     = ether_type_out_d[idxs].data();
-            pkt_ptr->next_proto_id_out  = next_proto_id_out_d[idxs].data();
-            pkt_ptr->timestamp_out      = timestamp_out_d[idxs].data();
-
-            fixed_width_inputs_table_view[idxs] = new cudf::table_view(std::vector<cudf::column_view>{
-                cudf::column_view(cudf::device_span<const int64_t>(src_mac_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const int64_t>(dst_mac_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const int64_t>(src_ip_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const int64_t>(dst_ip_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const uint16_t>(src_port_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const uint16_t>(dst_port_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const int32_t>(tcp_flags_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const int32_t>(ether_type_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const int32_t>(next_proto_id_out_d[idxs])),
-                cudf::column_view(cudf::device_span<const uint32_t>(timestamp_out_d[idxs])),
-            });
+            for (int idxs = 0; idxs < MAX_SEM_X_QUEUE; idxs++)
+            {
+                pkt_ptr               = static_cast<struct packets_info*>(m_semaphore[queue_idx]->get_info_cpu(idxs));
+                pkt_ptr->pkt_addr     = pkt_addr_unique->gpu_ptr() + (MAX_PKT_RECEIVE * idxs);
+                pkt_ptr->pkt_hdr_size = pkt_hdr_size_unique->gpu_ptr() + (MAX_PKT_RECEIVE * idxs);
+                pkt_ptr->pkt_pld_size = pkt_pld_size_unique->gpu_ptr() + (MAX_PKT_RECEIVE * idxs);
+            }
         }
 
         auto exit_condition =
@@ -214,139 +152,70 @@ DocaSourceStage::subscriber_fn_t DocaSourceStage::build()
                 continue;
             }
 
-            // printf("Launching kernel with idx0 %d idx1 %d idx2 %d\n", sem_idx[0], sem_idx[1], sem_idx[2]);
-            // const auto start_kernel = now_ns();
-            morpheus::doca::packet_receive_kernel(m_rxq[thread_idx]->rxq_info_gpu(),
-                                                  m_semaphore[thread_idx]->gpu_ptr(),
-                                                  sem_idx,
+#if ENABLE_TIMERS == 1
+            const auto start_kernel = now_ns();
+#endif
+            // Assume MAX_QUEUE is 2
+            morpheus::doca::packet_receive_kernel(m_rxq[0]->rxq_info_gpu(),
+                                                  m_rxq[1]->rxq_info_gpu(),
+                                                  m_semaphore[0]->gpu_ptr(),
+                                                  m_semaphore[1]->gpu_ptr(),
+                                                  sem_idx[0],
+                                                  sem_idx[1],
                                                   (m_traffic_type == DOCA_TRAFFIC_TYPE_TCP) ? true : false,
                                                   exit_condition->gpu_ptr(),
                                                   rstream);
             cudaStreamSynchronize(rstream);
 
-            if (m_semaphore[thread_idx]->is_ready(sem_idx))
+#if ENABLE_TIMERS == 1
+            const auto end_kernel = now_ns();
+#endif
+            for (int queue_idx = 0; queue_idx < MAX_QUEUE; queue_idx++)
             {
-                // const auto start = now_ns();
-                // LOG(WARNING) << "CPU READY sem " << idxs << " queue " << thread_idx << std::endl;
-
-                pkt_ptr = static_cast<struct packets_info*>(m_semaphore[thread_idx]->get_info_cpu(sem_idx));
-
-                // const auto table_stop = now_ns();
-
-                auto packet_count       = pkt_ptr->packet_count_out;
-                auto payload_size_total = pkt_ptr->payload_size_total_out;
-
-                // LOG(WARNING) << "CPU packet_count " << packet_count << " payload_size_total " << payload_size_total
-                // << std::endl;
-
-                // Should not be necessary
-                if (packet_count == 0)
-                    continue;
-
-                // gather payload data
-                auto payload_col = doca::gather_payload(
-                    packet_count, pkt_ptr->payload_buffer_out, pkt_ptr->payload_sizes_out, pstream_cpp);
-
-                // const auto gather_payload_stop = now_ns();
-
-                auto iota_col = [packet_count]() {
-                    using scalar_type_t = cudf::scalar_type_t<uint32_t>;
-                    auto zero =
-                        cudf::make_numeric_scalar(cudf::data_type(cudf::data_type{cudf::type_to_id<uint32_t>()}));
-                    static_cast<scalar_type_t*>(zero.get())->set_value(0);
-                    zero->set_valid_async(false);
-                    return cudf::sequence(packet_count, *zero);
-                }();
-
-                // Accept the stream now?
-                auto gathered_table   = cudf::gather(*fixed_width_inputs_table_view[sem_idx],
-                                                   iota_col->view(),
-                                                   cudf::out_of_bounds_policy::DONT_CHECK,
-                                                   pstream_cpp);
-                auto gathered_columns = gathered_table->release();
-
-                // const auto table_create_stop = now_ns();
-
-                // post-processing for mac addresses
-                auto src_mac_col = gathered_columns[0].release();
-                // Accept the stream now?
-                auto src_mac_str_col = morpheus::doca::integers_to_mac(src_mac_col->view(), pstream_cpp);
-                gathered_columns[0].reset(src_mac_str_col.release());
-
-                auto dst_mac_col     = gathered_columns[1].release();
-                auto dst_mac_str_col = morpheus::doca::integers_to_mac(dst_mac_col->view(), pstream_cpp);
-                gathered_columns[1].reset(dst_mac_str_col.release());
-
-                // post-processing for ip addresses
-                auto src_ip_col = gathered_columns[2].release();
-                // Accept the stream now?
-                auto src_ip_str_col = cudf::strings::integers_to_ipv4(src_ip_col->view(), pstream_cpp);
-                gathered_columns[2].reset(src_ip_str_col.release());
-
-                auto dst_ip_col = gathered_columns[3].release();
-                // Accept the stream now?
-                auto dst_ip_str_col = cudf::strings::integers_to_ipv4(dst_ip_col->view(), pstream_cpp);
-                gathered_columns[3].reset(dst_ip_str_col.release());
-
-                gathered_columns.emplace_back(std::move(payload_col));
-
-                // const auto gathered_columns_stop = now_ns();
-
-                auto gathered_metadata = cudf::io::table_metadata();
-                // assemble metadata
-                gathered_metadata.schema_info.emplace_back("src_mac");
-                gathered_metadata.schema_info.emplace_back("dst_mac");
-                gathered_metadata.schema_info.emplace_back("src_ip");
-                gathered_metadata.schema_info.emplace_back("dst_ip");
-                gathered_metadata.schema_info.emplace_back("src_port");
-                gathered_metadata.schema_info.emplace_back("dst_port");
-                gathered_metadata.schema_info.emplace_back("tcp_flags");
-                gathered_metadata.schema_info.emplace_back("ether_type");
-                gathered_metadata.schema_info.emplace_back("next_proto");
-                gathered_metadata.schema_info.emplace_back("timestamp");
-                gathered_metadata.schema_info.emplace_back("data");
-
-                // After this point buffers can be reused -> copies actual packets' data
-                gathered_table = std::make_unique<cudf::table>(std::move(gathered_columns));
-
-                // const auto gather_table_meta = now_ns();
-
-                auto gathered_table_w_metadata =
-                    cudf::io::table_with_metadata{std::move(gathered_table), std::move(gathered_metadata)};
-
-                // const auto create_message_cpp = now_ns();
-
-                auto meta = MessageMeta::create_from_cpp(std::move(gathered_table_w_metadata), 0);
-
-                // Do we still need this synchronize?
-                //  const auto gather_meta_stop = now_ns();
-
-                cudaStreamSynchronize(pstream_cpp);
-                output.on_next(std::move(meta));
-
-                m_semaphore[thread_idx]->set_free(sem_idx);
-                sem_idx = (sem_idx + 1) % MAX_SEM_X_QUEUE;
-
-                // const auto end = now_ns();
-                // LOG(WARNING) << "Queue " << thread_idx
-                //             << " packets " << packet_count
-                //             << " kernel time ns " << start - start_kernel
-                //             << " CPU time ns " << end - start
-                //             << " table creation ns " << table_stop - start
-                //             << " gather payload ns " << gather_payload_stop - table_stop
-                //             << " table create ns " << table_create_stop - gather_payload_stop
-                //             << " gather column ns " << gathered_columns_stop - table_create_stop
-                //             << " gather meta schema ns " << gather_table_meta - gathered_columns_stop
-                //             << " gather meta table ns " << create_message_cpp - gather_table_meta
-                //             << " create message cpp ns " << gather_meta_stop - create_message_cpp
-                //             << " final ns " << end - gather_meta_stop
-                //             << std::endl;
+                if (m_semaphore[queue_idx]->is_ready(sem_idx[queue_idx]))
+                {
+#if ENABLE_TIMERS == 1
+                    const auto start_cpu = now_ns();
+#endif
+                    pkt_ptr =
+                        static_cast<struct packets_info*>(m_semaphore[queue_idx]->get_info_cpu(sem_idx[queue_idx]));
+
+                    // Should not be necessary
+                    if (pkt_ptr->packet_count_out == 0)
+                        continue;
+                    // Should never happen
+                    if (pkt_ptr->packet_count_out > PACKETS_PER_BLOCK)
+                        LOG(ERROR) << "Received " << pkt_ptr->packet_count_out << " pkts > max pkts "
+                                   << PACKETS_PER_BLOCK;
+
+                    // Create RawPacketMessage with the burst of packets just received
+                    auto meta = RawPacketMessage::create_from_cpp(pkt_ptr->packet_count_out,
+                                                                  MAX_PKT_SIZE,
+                                                                  pkt_ptr->pkt_addr,
+                                                                  pkt_ptr->pkt_hdr_size,
+                                                                  pkt_ptr->pkt_pld_size,
+                                                                  true,
+                                                                  queue_idx);
+
+                    output.on_next(std::move(meta));
+
+                    m_semaphore[queue_idx]->set_free(sem_idx[queue_idx]);
+                    sem_idx[queue_idx] = (sem_idx[queue_idx] + 1) % MAX_SEM_X_QUEUE;
+#if ENABLE_TIMERS == 1
+                    const auto end_cpu = now_ns();
+                    LOG(WARNING) << "Queue " << queue_idx << " packets " << pkt_ptr->packet_count_out
+                                 << " kernel time ns " << end_kernel - start_kernel << " CPU time ns "
+                                 << end_cpu - start_cpu << std::endl;
+#endif
+                }
             }
         }
 
         cancel_thread.join();
 
         output.on_completed();
+
+        cuCtxPopCurrent(&cuContext);
     };
 }
 
diff --git a/morpheus/_lib/doca/src/doca_source_kernel.cu b/morpheus/_lib/doca/src/doca_source_kernel.cu
new file mode 100644
index 0000000000..0a7f6e1cc3
--- /dev/null
+++ b/morpheus/_lib/doca/src/doca_source_kernel.cu
@@ -0,0 +1,216 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morpheus/doca/common.hpp"
+#include "morpheus/doca/packets.hpp"
+#include "morpheus/utilities/error.hpp"
+
+#include <cub/cub.cuh>
+#include <cuda/std/chrono>
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/strings/detail/strings_children.cuh>
+#include <cudf/strings/detail/utilities.cuh>
+#include <cudf/strings/detail/utilities.hpp>
+#include <doca_eth_rxq.h>
+#include <doca_gpunetio.h>
+#include <doca_gpunetio_dev_buf.cuh>
+#include <doca_gpunetio_dev_eth_rxq.cuh>
+#include <doca_gpunetio_dev_sem.cuh>
+#include <rmm/exec_policy.hpp>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <stdio.h>
+#include <thrust/gather.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <memory>
+
+#define DEVICE_GET_TIME(globaltimer) asm volatile("mov.u64 %0, %globaltimer;" : "=l"(globaltimer))
+
+__global__ void _packet_receive_kernel(
+                                        doca_gpu_eth_rxq* rxq_0, doca_gpu_eth_rxq* rxq_1,
+                                        doca_gpu_semaphore_gpu* sem_0, doca_gpu_semaphore_gpu* sem_1,
+                                        uint16_t sem_idx_0, uint16_t sem_idx_1,
+                                        const bool is_tcp, uint32_t* exit_condition
+)
+{
+    __shared__ uint32_t packet_count_received;
+    __shared__ uint64_t packet_offset_received;
+    __shared__ struct packets_info *pkt_info;
+#if RUN_PERSISTENT
+    doca_gpu_semaphore_status sem_status;
+#endif
+    doca_gpu_buf *buf_ptr;
+    uintptr_t buf_addr;
+    doca_error_t doca_ret;
+    struct eth_ip_tcp_hdr *hdr_tcp;
+    struct eth_ip_udp_hdr *hdr_udp;
+    uint8_t *payload;
+    doca_gpu_eth_rxq* rxq;
+    doca_gpu_semaphore_gpu* sem;
+    uint16_t sem_idx;
+    uint32_t pkt_idx = threadIdx.x;
+    // unsigned long long rx_start = 0, rx_stop = 0, pkt_proc = 0, reduce_stop =0, reduce_start = 0;
+
+    if (blockIdx.x == 0) {
+        rxq = rxq_0;
+        sem = sem_0;
+        sem_idx = sem_idx_0;
+    } else {
+        rxq = rxq_1;
+        sem = sem_1;
+        sem_idx = sem_idx_1;
+    }
+
+    //Initial semaphore index 0, assume it's free!
+    doca_ret = doca_gpu_dev_semaphore_get_custom_info_addr(sem, sem_idx, (void **)&pkt_info);
+    if (doca_ret != DOCA_SUCCESS) {
+        printf("Error %d doca_gpu_dev_semaphore_get_custom_info_addr\n", doca_ret);
+        DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
+        return;
+    }
+
+    if (threadIdx.x == 0) {
+        DOCA_GPUNETIO_VOLATILE(pkt_info->packet_count_out) = 0;
+        DOCA_GPUNETIO_VOLATILE(packet_count_received) = 0;
+    }
+    __syncthreads();
+
+    // do {
+        // if (threadIdx.x == 0) DEVICE_GET_TIME(rx_start);
+        doca_ret = doca_gpu_dev_eth_rxq_receive_block(rxq, PACKETS_PER_BLOCK, PACKET_RX_TIMEOUT_NS, &packet_count_received, &packet_offset_received);
+        if (doca_ret != DOCA_SUCCESS) [[unlikely]] {
+            DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
+            return;
+        }
+        __threadfence();
+        if (DOCA_GPUNETIO_VOLATILE(packet_count_received) == 0)
+            return;
+
+        while (pkt_idx < DOCA_GPUNETIO_VOLATILE(packet_count_received)) {
+            doca_ret = doca_gpu_dev_eth_rxq_get_buf(rxq, DOCA_GPUNETIO_VOLATILE(packet_offset_received) + pkt_idx, &buf_ptr);
+            if (doca_ret != DOCA_SUCCESS) [[unlikely]] {
+                DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
+                return;
+            }
+
+            doca_ret = doca_gpu_dev_buf_get_addr(buf_ptr, &buf_addr);
+            if (doca_ret != DOCA_SUCCESS) [[unlikely]] {
+                DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
+                return;
+            }
+
+            pkt_info->pkt_addr[pkt_idx] = buf_addr;
+            if (is_tcp) {
+                raw_to_tcp(buf_addr, &hdr_tcp, &payload);
+                pkt_info->pkt_hdr_size[pkt_idx] = TCP_HDR_SIZE;
+                pkt_info->pkt_pld_size[pkt_idx] = get_payload_tcp_size(hdr_tcp->l3_hdr, hdr_tcp->l4_hdr);
+            } else {
+                raw_to_udp(buf_addr, &hdr_udp, &payload);
+                pkt_info->pkt_hdr_size[pkt_idx] = UDP_HDR_SIZE;
+                pkt_info->pkt_pld_size[pkt_idx] = get_payload_udp_size(hdr_udp->l3_hdr, hdr_udp->l4_hdr);
+            }
+
+            pkt_idx += blockDim.x;
+        }
+        __syncthreads();
+
+        if (threadIdx.x == 0) {
+            DOCA_GPUNETIO_VOLATILE(pkt_info->packet_count_out) = packet_count_received;
+            DOCA_GPUNETIO_VOLATILE(packet_count_received) = 0;
+            doca_ret = doca_gpu_dev_semaphore_set_status(sem, sem_idx, DOCA_GPU_SEMAPHORE_STATUS_READY);
+            if (doca_ret != DOCA_SUCCESS) {
+                printf("Error %d doca_gpu_dev_semaphore_set_status\n", doca_ret);
+                DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
+            }
+
+            // printf("CUDA rx time %ld proc time %ld pkt conv %ld block reduce %ld\n",
+            //         rx_stop - rx_start,
+            //         pkt_proc - rx_stop,
+            //         reduce_start - rx_stop,
+            //         reduce_stop - reduce_start);
+        }
+        __syncthreads();
+
+#if RUN_PERSISTENT
+        // sem_idx = (sem_idx+1)%MAX_SEM_X_QUEUE;
+
+        // Get packets' info from next semaphore
+        // if (threadIdx.x == 0) {
+            // do {
+            //     doca_ret = doca_gpu_dev_semaphore_get_status(sem, sem_idx, &sem_status);
+            //     if (doca_ret != DOCA_SUCCESS) {
+            //         printf("Error %d doca_gpu_dev_semaphore_get_status\n", doca_ret);
+            //         DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
+            //         break;
+            //     }
+
+            //     if (sem_status == DOCA_GPU_SEMAPHORE_STATUS_FREE) {
+            //         doca_ret = doca_gpu_dev_semaphore_get_custom_info_addr(sem, sem_idx, (void **)&pkt_info);
+            //         if (doca_ret != DOCA_SUCCESS) {
+            //             printf("Error %d doca_gpu_dev_semaphore_get_custom_info_addr\n", doca_ret);
+            //             DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
+            //         }
+
+            //         DOCA_GPUNETIO_VOLATILE(pkt_info->packet_count_out) = 0;
+            //         DOCA_GPUNETIO_VOLATILE(pkt_info->payload_size_total_out) = 0;
+            //         DOCA_GPUNETIO_VOLATILE(packet_count_received) = 0;
+
+            //         break;
+            //     }
+            // } while (DOCA_GPUNETIO_VOLATILE(*exit_condition) == 0);
+          // }
+        // __syncthreads();
+    // } while (DOCA_GPUNETIO_VOLATILE(*exit_condition) == 0)
+
+  if (threadIdx.x == 0)
+    doca_gpu_dev_sem_set_status(sem_in, *sem_idx, DOCA_GPU_SEMAPHORE_STATUS_FREE);
+  // __threadfence();
+  // __syncthreads();
+#endif
+}
+
+namespace morpheus {
+namespace doca {
+
+int packet_receive_kernel(doca_gpu_eth_rxq* rxq_0, doca_gpu_eth_rxq* rxq_1,
+                           doca_gpu_semaphore_gpu* sem_0, doca_gpu_semaphore_gpu* sem_1,
+                           uint16_t sem_idx_0, uint16_t sem_idx_1,
+                           bool is_tcp,
+                           uint32_t* exit_condition,
+                           cudaStream_t stream)
+{
+    cudaError_t result = cudaSuccess;
+
+    _packet_receive_kernel<<<MAX_QUEUE, THREADS_PER_BLOCK, 0, stream>>>(rxq_0, rxq_1, sem_0, sem_1, sem_idx_0, sem_idx_1, is_tcp, exit_condition);
+
+	/* Check no previous CUDA errors */
+	result = cudaGetLastError();
+	if (cudaSuccess != result) {
+		fprintf(stderr, "[%s:%d] cuda failed with %s\n", __FILE__, __LINE__, cudaGetErrorString(result));
+		return -1;
+	}
+
+    return 0;
+}
+
+} //doca
+} //morpheus
diff --git a/morpheus/_lib/doca/src/doca_source_kernels.cu b/morpheus/_lib/doca/src/doca_source_kernels.cu
deleted file mode 100644
index 773539e14b..0000000000
--- a/morpheus/_lib/doca/src/doca_source_kernels.cu
+++ /dev/null
@@ -1,668 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "morpheus/doca/common.hpp"
-
-#include "morpheus/utilities/error.hpp"
-
-#include <cub/cub.cuh>
-#include <cuda/std/chrono>
-#include <cudf/column/column.hpp>
-#include <cudf/column/column_device_view.cuh>
-#include <cudf/column/column_factories.hpp>
-#include <cudf/column/column_view.hpp>
-#include <cudf/strings/detail/strings_children.cuh>
-#include <cudf/strings/detail/utilities.cuh>
-#include <cudf/strings/detail/utilities.hpp>
-#include <doca_eth_rxq.h>
-#include <doca_gpunetio.h>
-#include <doca_gpunetio_dev_buf.cuh>
-#include <doca_gpunetio_dev_eth_rxq.cuh>
-#include <doca_gpunetio_dev_sem.cuh>
-#include <rmm/exec_policy.hpp>
-#include <rte_ether.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
-#include <stdio.h>
-#include <thrust/gather.h>
-#include <thrust/iterator/constant_iterator.h>
-#include <thrust/iterator/counting_iterator.h>
-
-#include <memory>
-
-#define ETHER_ADDR_LEN  6 /**< Length of Ethernet address. */
-
-#define BYTE_SWAP16(v) \
-    ((((uint16_t)(v) & UINT16_C(0x00ff)) << 8) | (((uint16_t)(v) & UINT16_C(0xff00)) >> 8))
-
-#define TCP_PROTOCOL_ID 0x6
-#define UDP_PROTOCOL_ID 0x11
-#define TIMEOUT_NS 500000 //500us
-
-enum tcp_flags {
-    TCP_FLAG_FIN = (1 << 0),
-    /* set tcp packet with Fin flag */
-    TCP_FLAG_SYN = (1 << 1),
-    /* set tcp packet with Syn flag */
-    TCP_FLAG_RST = (1 << 2),
-    /* set tcp packet with Rst flag */
-    TCP_FLAG_PSH = (1 << 3),
-    /* set tcp packet with Psh flag */
-    TCP_FLAG_ACK = (1 << 4),
-    /* set tcp packet with Ack flag */
-    TCP_FLAG_URG = (1 << 5),
-    /* set tcp packet with Urg flag */
-    TCP_FLAG_ECE = (1 << 6),
-    /* set tcp packet with ECE flag */
-    TCP_FLAG_CWR = (1 << 7),
-    /* set tcp packet with CQE flag */
-};
-
-struct ether_hdr {
-    uint8_t d_addr_bytes[ETHER_ADDR_LEN];	/* Destination addr bytes in tx order */
-    uint8_t s_addr_bytes[ETHER_ADDR_LEN];	/* Source addr bytes in tx order */
-    uint16_t ether_type;			/* Frame type */
-} __attribute__((__packed__));
-
-struct ipv4_hdr {
-    uint8_t version_ihl;		/* version and header length */
-    uint8_t  type_of_service;	/* type of service */
-    uint16_t total_length;		/* length of packet */
-    uint16_t packet_id;		/* packet ID */
-    uint16_t fragment_offset;	/* fragmentation offset */
-    uint8_t  time_to_live;		/* time to live */
-    uint8_t  next_proto_id;		/* protocol ID */
-    uint16_t hdr_checksum;		/* header checksum */
-    uint32_t src_addr;		/* source address */
-    uint32_t dst_addr;		/* destination address */
-} __attribute__((__packed__));
-
-struct tcp_hdr {
-    uint16_t src_port;	/* TCP source port */
-    uint16_t dst_port;	/* TCP destination port */
-    uint32_t sent_seq;	/* TX data sequence number */
-    uint32_t recv_ack;	/* RX data acknowledgment sequence number */
-    uint8_t dt_off;		/* Data offset */
-    uint8_t tcp_flags;	/* TCP flags */
-    uint16_t rx_win;	/* RX flow control window */
-    uint16_t cksum;		/* TCP checksum */
-    uint16_t tcp_urp;	/* TCP urgent pointer, if any */
-} __attribute__((__packed__));
-
-struct eth_ip_tcp_hdr {
-    struct ether_hdr l2_hdr;	/* Ethernet header */
-    struct ipv4_hdr l3_hdr;		/* IP header */
-    struct tcp_hdr l4_hdr;		/* TCP header */
-} __attribute__((__packed__));
-
-struct udp_hdr {
-    uint16_t src_port;	/* UDP source port */
-    uint16_t dst_port;	/* UDP destination port */
-    uint16_t dgram_len;	/* UDP datagram length */
-    uint16_t dgram_cksum;	/* UDP datagram checksum */
-} __attribute__((__packed__));
-
-struct eth_ip_udp_hdr {
-    struct ether_hdr l2_hdr;	/* Ethernet header */
-    struct ipv4_hdr l3_hdr;		/* IP header */
-    struct udp_hdr l4_hdr;		/* UDP header */
-} __attribute__((__packed__));
-
-__device__ __inline__ int
-raw_to_tcp(const uintptr_t buf_addr, struct eth_ip_tcp_hdr **hdr, uint8_t **packet_data)
-{
-    (*hdr) = (struct eth_ip_tcp_hdr *) buf_addr;
-    (*packet_data) = (uint8_t *) (buf_addr + sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + (((*hdr)->l4_hdr.dt_off >> 4) * sizeof(int)));
-
-    return 0;
-}
-
-__device__ __inline__ int
-raw_to_udp(const uintptr_t buf_addr, struct eth_ip_udp_hdr **hdr, uint8_t **packet_data)
-{
-    (*hdr) = (struct eth_ip_udp_hdr *) buf_addr;
-    (*packet_data) = (uint8_t *) (buf_addr + sizeof(struct eth_ip_udp_hdr));
-
-    return 0;
-}
-
-__device__ __forceinline__ uint8_t
-gpu_ipv4_hdr_len(const struct ipv4_hdr& packet_l3)
-{
-    return (uint8_t)((packet_l3.version_ihl & RTE_IPV4_HDR_IHL_MASK) * RTE_IPV4_IHL_MULTIPLIER);
-};
-
-__device__ __forceinline__ uint32_t
-get_packet_size(ipv4_hdr& packet_l3)
-{
-    return static_cast<int32_t>(BYTE_SWAP16(packet_l3.total_length));
-}
-
-__device__ __forceinline__ int32_t
-get_payload_tcp_size(ipv4_hdr& packet_l3, tcp_hdr& packet_l4)
-{
-    auto packet_size       = get_packet_size(packet_l3);
-    auto ip_header_length  = gpu_ipv4_hdr_len(packet_l3);
-    auto tcp_header_length = static_cast<int32_t>(packet_l4.dt_off >> 4) * sizeof(int32_t);
-    auto payload_size      = packet_size - ip_header_length - tcp_header_length;
-
-    return payload_size;
-}
-
-__device__ __forceinline__ int32_t
-get_payload_udp_size(ipv4_hdr& packet_l3, udp_hdr& packet_l4)
-{
-    auto packet_size       = get_packet_size(packet_l3);
-    auto ip_header_length  = gpu_ipv4_hdr_len(packet_l3);
-    auto payload_size      = packet_size - ip_header_length - sizeof(struct udp_hdr);
-
-    return payload_size;
-}
-
-__device__ char to_hex_16(uint8_t value)
-{
-    return "0123456789ABCDEF"[value];
-}
-
-__device__ int64_t mac_bytes_to_int64(uint8_t* mac)
-{
-    return static_cast<uint64_t>(mac[0]) << 40
-        | static_cast<uint64_t>(mac[1]) << 32
-        | static_cast<uint32_t>(mac[2]) << 24
-        | static_cast<uint32_t>(mac[3]) << 16
-        | static_cast<uint32_t>(mac[4]) << 8
-        | static_cast<uint32_t>(mac[5]);
-}
-
-__device__ int64_t mac_int64_to_chars(int64_t mac, char* out)
-{
-    uint8_t mac_0 = (mac >> 40) & (0xFF);
-    out[0]  = to_hex_16(mac_0 / 16);
-    out[1]  = to_hex_16(mac_0 % 16);
-    out[2]  = ':';
-
-    uint8_t mac_1 = (mac >> 32) & (0xFF);
-    out[3]  = to_hex_16(mac_1 / 16);
-    out[4]  = to_hex_16(mac_1 % 16);
-    out[5]  = ':';
-
-    uint8_t mac_2 = (mac >> 24) & (0xFF);
-    out[6]  = to_hex_16(mac_2 / 16);
-    out[7]  = to_hex_16(mac_2 % 16);
-    out[8]  = ':';
-
-    uint8_t mac_3 = (mac >> 16) & (0xFF);
-    out[9]  = to_hex_16(mac_3 / 16);
-    out[10] = to_hex_16(mac_3 % 16);
-    out[11] = ':';
-
-    uint8_t mac_4 = (mac >> 8) & (0xFF);
-    out[12] = to_hex_16(mac_4 / 16);
-    out[13] = to_hex_16(mac_4 % 16);
-    out[14] = ':';
-
-    uint8_t mac_5 = (mac >> 0) & (0xFF);
-    out[15] = to_hex_16(mac_5 / 16);
-    out[16] = to_hex_16(mac_5 % 16);
-}
-
-__device__ uint32_t tcp_parse_timestamp(rte_tcp_hdr const *tcp)
-{
-    const uint8_t *tcp_opt = (typeof(tcp_opt))tcp + RTE_TCP_MIN_HDR_LEN;
-    const uint8_t *tcp_data = (typeof(tcp_data))tcp + static_cast<int32_t>(tcp->dt_off * sizeof(int32_t));
-
-    while (tcp_opt < tcp_data) {
-        switch(tcp_opt[0]) {
-            case RTE_TCP_OPT_END:
-            return 0;
-            case RTE_TCP_OPT_NOP:
-            tcp_opt++;
-            continue;
-            case RTE_TCP_OPT_TIMESTAMP:
-            return (static_cast<uint32_t>(tcp_opt[2]) << 24)
-                | (static_cast<uint32_t>(tcp_opt[3]) << 16)
-                | (static_cast<uint32_t>(tcp_opt[4]) << 8)
-                | (static_cast<uint32_t>(tcp_opt[5]) << 0);
-            default:
-            if (tcp_opt[1] == 0) {
-                return 0;
-            } else {
-                tcp_opt += tcp_opt[1];
-            }
-            continue;
-        }
-    }
-
-    return 0;
-}
-
-#define DEVICE_GET_TIME(globaltimer) asm volatile("mov.u64 %0, %globaltimer;" : "=l"(globaltimer))
-
-__global__ void _packet_receive_kernel(
-    doca_gpu_eth_rxq*       rxq,
-    doca_gpu_semaphore_gpu* sem,
-    uint16_t sem_idx,
-    const bool is_tcp,
-    uint32_t* exit_condition
-)
-{
-    __shared__ uint32_t packet_count_received;
-    __shared__ uint64_t packet_offset_received;
-    __shared__ struct packets_info *pkt_info;
-    // Specialize BlockReduce for a 1D block of 128 threads of type int
-    using BlockReduce = cub::BlockReduce<int32_t, THREADS_PER_BLOCK>;
-    // Allocate shared memory for BlockReduce
-    __shared__ typename BlockReduce::TempStorage temp_storage;
-#if RUN_PERSISTENT
-    doca_gpu_semaphore_status sem_status;
-#endif
-    int32_t _payload_sizes[PACKETS_PER_THREAD];
-    int32_t _payload_flags[PACKETS_PER_THREAD];
-    doca_gpu_buf *buf_ptr;
-    uintptr_t buf_addr;
-    doca_error_t doca_ret;
-    struct eth_ip_tcp_hdr *hdr_tcp;
-    struct eth_ip_udp_hdr *hdr_udp;
-    uint8_t *payload;
-    // unsigned long long rx_start = 0, rx_stop = 0, pkt_proc = 0, reduce_stop =0, reduce_start = 0;
-
-    // IP address conversion
-    auto ip_to_int64 = []__device__(auto address){
-    return (address & 0x000000ff) << 24
-            | (address & 0x0000ff00) << 8
-            | (address & 0x00ff0000) >> 8
-            | (address & 0xff000000) >> 24;
-    };
-
-    //Initial semaphore index 0, assume it's free!
-    doca_ret = doca_gpu_dev_semaphore_get_custom_info_addr(sem, sem_idx, (void **)&pkt_info);
-    if (doca_ret != DOCA_SUCCESS) {
-        printf("Error %d doca_gpu_dev_semaphore_get_custom_info_addr\n", doca_ret);
-        DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
-        return;
-    }
-
-    if (threadIdx.x == 0) {
-        DOCA_GPUNETIO_VOLATILE(pkt_info->packet_count_out) = 0;
-        DOCA_GPUNETIO_VOLATILE(pkt_info->payload_size_total_out) = 0;
-        DOCA_GPUNETIO_VOLATILE(packet_count_received) = 0;
-    }
-    __syncthreads();
-
-    // do {
-        // if (threadIdx.x == 0) DEVICE_GET_TIME(rx_start);
-        doca_ret = doca_gpu_dev_eth_rxq_receive_block(rxq, PACKETS_PER_BLOCK, PACKET_RX_TIMEOUT_NS, &packet_count_received, &packet_offset_received);
-        if (doca_ret != DOCA_SUCCESS) [[unlikely]] {
-            DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
-            return;
-        }
-        __threadfence();
-        if (DOCA_GPUNETIO_VOLATILE(packet_count_received) == 0)
-            return;
-
-        // if (threadIdx.x == 0)
-        //   printf("Block %d sem id %d received %d\n", blockIdx.x, sem_idx, DOCA_GPUNETIO_VOLATILE(packet_count_received));
-        // if (threadIdx.x == 0) DEVICE_GET_TIME(rx_stop);
-
-        for (auto i = 0; i < PACKETS_PER_THREAD; i++) {
-            auto packet_idx = threadIdx.x * PACKETS_PER_THREAD + i;
-            if (packet_idx >= DOCA_GPUNETIO_VOLATILE(packet_count_received)) {
-                _payload_sizes[i] = 0;
-                _payload_flags[i] = 0;
-                continue;
-            }
-
-            doca_ret = doca_gpu_dev_eth_rxq_get_buf(rxq, DOCA_GPUNETIO_VOLATILE(packet_offset_received) + packet_idx, &buf_ptr);
-            if (doca_ret != DOCA_SUCCESS) [[unlikely]] {
-                DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
-                return;
-            }
-
-            doca_ret = doca_gpu_dev_buf_get_addr(buf_ptr, &buf_addr);
-            if (doca_ret != DOCA_SUCCESS) [[unlikely]] {
-                DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
-                return;
-            }
-
-            if (is_tcp) {
-                raw_to_tcp(buf_addr, &hdr_tcp, &payload);
-                //Payload
-                auto payload_size = get_payload_tcp_size(hdr_tcp->l3_hdr, hdr_tcp->l4_hdr);
-                for(auto j = 0; j < payload_size; j++)
-                    pkt_info->payload_buffer_out[packet_idx * MAX_PKT_SIZE + j] = payload[j];
-                _payload_sizes[i] = payload_size;
-                _payload_flags[i] = 1;
-                pkt_info->payload_sizes_out[packet_idx] = payload_size;
-                // mac address
-                pkt_info->src_mac_out[packet_idx] = mac_bytes_to_int64(hdr_tcp->l2_hdr.s_addr_bytes);
-                pkt_info->dst_mac_out[packet_idx] = mac_bytes_to_int64(hdr_tcp->l2_hdr.d_addr_bytes);
-                // ip address
-                pkt_info->src_ip_out[packet_idx] = ip_to_int64(hdr_tcp->l3_hdr.src_addr);
-                pkt_info->dst_ip_out[packet_idx] = ip_to_int64(hdr_tcp->l3_hdr.dst_addr);
-                // ports
-                pkt_info->src_port_out[packet_idx] = BYTE_SWAP16(hdr_tcp->l4_hdr.src_port);
-                pkt_info->dst_port_out[packet_idx] = BYTE_SWAP16(hdr_tcp->l4_hdr.dst_port);
-                // tcp flags
-                pkt_info->tcp_flags_out[packet_idx] = static_cast<int32_t> (hdr_tcp->l4_hdr.tcp_flags);
-                // frame type
-                pkt_info->ether_type_out[packet_idx] = static_cast<int32_t> (hdr_tcp->l2_hdr.ether_type);
-                // protocol id
-                pkt_info->next_proto_id_out[packet_idx] = static_cast<int32_t> (hdr_tcp->l3_hdr.next_proto_id);
-            } else {
-                raw_to_udp(buf_addr, &hdr_udp, &payload);
-                //Payload
-                auto payload_size = get_payload_udp_size(hdr_udp->l3_hdr, hdr_udp->l4_hdr);
-                for(auto j = 0; j < payload_size; j++)
-                    pkt_info->payload_buffer_out[packet_idx * MAX_PKT_SIZE + j] = payload[j];
-                _payload_sizes[i] = payload_size;
-                _payload_flags[i] = 1;
-                pkt_info->payload_sizes_out[packet_idx] = payload_size;
-                // mac address
-                pkt_info->src_mac_out[packet_idx] = mac_bytes_to_int64(hdr_udp->l2_hdr.s_addr_bytes);
-                pkt_info->dst_mac_out[packet_idx] = mac_bytes_to_int64(hdr_udp->l2_hdr.d_addr_bytes);
-                // ip address
-                pkt_info->src_ip_out[packet_idx] = ip_to_int64(hdr_udp->l3_hdr.src_addr);
-                pkt_info->dst_ip_out[packet_idx] = ip_to_int64(hdr_udp->l3_hdr.dst_addr);
-                // ports
-                pkt_info->src_port_out[packet_idx] = BYTE_SWAP16(hdr_udp->l4_hdr.src_port);
-                pkt_info->dst_port_out[packet_idx] = BYTE_SWAP16(hdr_udp->l4_hdr.dst_port);
-                // frame type
-                pkt_info->ether_type_out[packet_idx] = static_cast<int32_t> (hdr_udp->l2_hdr.ether_type);
-                // protocol id
-                pkt_info->next_proto_id_out[packet_idx] = static_cast<int32_t> (hdr_udp->l3_hdr.next_proto_id);
-            }
-
-            auto now = cuda::std::chrono::system_clock::now();
-            auto now_ms = cuda::std::chrono::time_point_cast<cuda::std::chrono::milliseconds>(now);
-            auto epoch = now_ms.time_since_epoch();
-            pkt_info->timestamp_out[packet_idx] = epoch.count();
-        }
-
-        // if (threadIdx.x == 0) DEVICE_GET_TIME(reduce_start);
-        auto payload_size_total = BlockReduce(temp_storage).Sum(_payload_sizes);
-        __syncthreads();
-        auto packet_count = BlockReduce(temp_storage).Sum(_payload_flags);
-        __syncthreads();
-        // if (threadIdx.x == 0) DEVICE_GET_TIME(reduce_stop);
-
-        if (threadIdx.x == 0) {
-            // DEVICE_GET_TIME(pkt_proc);
-            DOCA_GPUNETIO_VOLATILE(pkt_info->packet_count_out) = packet_count;
-            DOCA_GPUNETIO_VOLATILE(pkt_info->payload_size_total_out) = payload_size_total;
-            // printf("Block %d Update semaphore %d with %d packets %d size\n",
-            //   blockIdx.x,
-            //   sem_idx, DOCA_GPUNETIO_VOLATILE(pkt_info->packet_count_out), DOCA_GPUNETIO_VOLATILE(pkt_info->payload_size_total_out));
-            doca_ret = doca_gpu_dev_semaphore_set_status(sem, sem_idx, DOCA_GPU_SEMAPHORE_STATUS_READY);
-            if (doca_ret != DOCA_SUCCESS) {
-                printf("Error %d doca_gpu_dev_semaphore_set_status\n", doca_ret);
-                DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
-                // break;
-            }
-
-            // printf("CUDA rx time %ld proc time %ld pkt conv %ld block reduce %ld\n",
-            //         rx_stop - rx_start,
-            //         pkt_proc - rx_stop,
-            //         reduce_start - rx_stop,
-            //         reduce_stop - reduce_start);
-        }
-        __syncthreads();
-
-#if RUN_PERSISTENT
-        // sem_idx = (sem_idx+1)%MAX_SEM_X_QUEUE;
-
-        // Get packets' info from next semaphore
-        // if (threadIdx.x == 0) {
-            // do {
-            //     doca_ret = doca_gpu_dev_semaphore_get_status(sem, sem_idx, &sem_status);
-            //     if (doca_ret != DOCA_SUCCESS) {
-            //         printf("Error %d doca_gpu_dev_semaphore_get_status\n", doca_ret);
-            //         DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
-            //         break;
-            //     }
-
-            //     if (sem_status == DOCA_GPU_SEMAPHORE_STATUS_FREE) {
-            //         doca_ret = doca_gpu_dev_semaphore_get_custom_info_addr(sem, sem_idx, (void **)&pkt_info);
-            //         if (doca_ret != DOCA_SUCCESS) {
-            //             printf("Error %d doca_gpu_dev_semaphore_get_custom_info_addr\n", doca_ret);
-            //             DOCA_GPUNETIO_VOLATILE(*exit_condition) = 1;
-            //         }
-
-            //         DOCA_GPUNETIO_VOLATILE(pkt_info->packet_count_out) = 0;
-            //         DOCA_GPUNETIO_VOLATILE(pkt_info->payload_size_total_out) = 0;
-            //         DOCA_GPUNETIO_VOLATILE(packet_count_received) = 0;
-
-            //         break;
-            //     }
-            // } while (DOCA_GPUNETIO_VOLATILE(*exit_condition) == 0);
-          // }
-        // __syncthreads();
-    // } while (DOCA_GPUNETIO_VOLATILE(*exit_condition) == 0)
-
-  if (threadIdx.x == 0)
-    doca_gpu_dev_sem_set_status(sem_in, *sem_idx, DOCA_GPU_SEMAPHORE_STATUS_FREE);
-  // __threadfence();
-  // __syncthreads();
-#endif
-}
-
-__global__ void _packet_gather_kernel(
-  int32_t  packet_count,
-  char*    payload_buffer,
-  int32_t* payload_sizes,
-  char*    payload_chars_out
-)
-{
-  // Specialize BlockScan for a 1D block of 128 threads of type int
-  using BlockScan = cub::BlockScan<int32_t, THREADS_PER_BLOCK>;
-
-  // Allocate shared memory for BlockScan
-  __shared__ typename BlockScan::TempStorage temp_storage;
-
-  int32_t payload_capture[PACKETS_PER_THREAD];
-  int32_t payload_offsets[PACKETS_PER_THREAD];
-
-  for (auto i = 0; i < PACKETS_PER_THREAD; i++)
-  {
-    auto packet_idx = threadIdx.x * PACKETS_PER_THREAD + i;
-
-    if (packet_idx >= packet_count) {
-      payload_capture[i] = 0;
-      payload_offsets[i] = 0;
-    } else {
-      payload_capture[i] = 1;
-      payload_offsets[i] = payload_sizes[packet_idx];
-    }
-  }
-
-  __syncthreads();
-
-  int32_t data_offsets_agg;
-  BlockScan(temp_storage).ExclusiveSum(payload_offsets, payload_offsets, data_offsets_agg);
-
-  __syncthreads();
-
-  int32_t data_capture_agg;
-  BlockScan(temp_storage).ExclusiveSum(payload_capture, payload_capture, data_capture_agg);
-
-  __syncthreads();
-
-  for (auto i = 0; i < PACKETS_PER_THREAD; i++)
-  {
-    auto packet_idx = threadIdx.x * PACKETS_PER_THREAD + i;
-
-    if (packet_idx >= packet_count) {
-      continue;
-    }
-
-    auto payload_size = payload_sizes[packet_idx];
-
-    for (auto j = 0; j < payload_size; j++)
-    {
-      auto value = payload_buffer[packet_idx * MAX_PKT_SIZE + j];
-
-      auto payload_chars_out_idx = payload_offsets[i] + j;
-
-      if (payload_chars_out_idx) {
-        payload_chars_out[payload_chars_out_idx] = value;
-      }
-    }
-  }
-}
-
-namespace morpheus {
-namespace doca {
-
-namespace {
-
-struct integers_to_mac_fn {
-  cudf::column_device_view const d_column;
-  int32_t const* d_offsets;
-  char* d_chars;
-
-  __device__ void operator()(cudf::size_type idx)
-  {
-    int64_t mac_address = d_column.element<int64_t>(idx);
-    char* out_ptr       = d_chars + d_offsets[idx];
-
-    mac_int64_to_chars(mac_address, out_ptr);
-  }
-};
-
-}
-
-std::unique_ptr<cudf::column> integers_to_mac(
-  cudf::column_view const& integers,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr
-)
-{
-  CUDF_EXPECTS(integers.type().id() == cudf::type_id::INT64, "Input column must be type_id::INT64 type");
-  CUDF_EXPECTS(integers.null_count() == 0, "integers_to_mac does not support null values.");
-
-  cudf::size_type strings_count = integers.size();
-
-  if (strings_count == 0)
-  {
-    return cudf::make_empty_column(cudf::type_id::STRING);
-  }
-
-  auto const_17_itr = thrust::constant_iterator<cudf::size_type>(17);
-  auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
-    const_17_itr,
-    const_17_itr + strings_count,
-    stream,
-    mr
-  );
-
-  auto column       = cudf::column_device_view::create(integers, stream);
-  auto d_column     = *column;
-  auto d_offsets    = offsets_column->view().data<int32_t>();
-  auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr);
-  auto d_chars      = chars_column->mutable_view().data<char>();
-
-  thrust::for_each_n(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<cudf::size_type>(0),
-    strings_count,
-    integers_to_mac_fn{d_column, d_offsets, d_chars}
-  );
-
-  return cudf::make_strings_column(strings_count,
-    std::move(offsets_column),
-    std::move(chars_column),
-    0,
-    {});
-}
-
-void cuda_memory_test1()
-{
-    const unsigned int N = 10485760;
-    const unsigned int bytes = N * sizeof(int);
-    int *h_a = (int*)malloc(bytes);
-    memset(h_a, 0, bytes);
-    int *d_a;
-
-    CUDA_TRY(cudaMalloc((int**)&d_a, bytes));
-    CUDA_TRY(cudaMemcpy(d_a, h_a, bytes, cudaMemcpyHostToDevice));
-    CUDA_TRY(cudaMemcpy(h_a, d_a, bytes, cudaMemcpyDeviceToHost));
-
-    free(h_a);
-    CUDA_TRY(cudaFree(d_a));
-}
-
-void cuda_memory_test2()
-{
-    const unsigned int N = 10485760;
-    const unsigned int bytes = N * sizeof(int);
-    int *h_a = (int*)malloc(bytes);
-    memset(h_a, 0, bytes);
-    int *d_a;
-
-    CUDA_TRY(cudaMalloc((int**)&d_a, bytes));
-    CUDA_TRY(cudaMemcpy(d_a, h_a, bytes, cudaMemcpyHostToDevice));
-    CUDA_TRY(cudaMemcpy(h_a, d_a, bytes, cudaMemcpyDeviceToHost));
-
-    free(h_a);
-    CUDA_TRY(cudaFree(d_a));
-}
-
-void packet_receive_kernel(
-  doca_gpu_eth_rxq*       rxq,
-  doca_gpu_semaphore_gpu* sem,
-  uint16_t sem_idx,
-  bool is_tcp,
-  uint32_t*               exit_condition,
-  cudaStream_t            stream
-)
-{
-  _packet_receive_kernel<<<1, THREADS_PER_BLOCK, 0, stream>>>(rxq, sem, sem_idx, is_tcp, exit_condition);
-}
-
-std::unique_ptr<cudf::column> gather_payload(
-  int32_t      packet_count,
-  char*        payload_buffer,
-  int32_t*     payload_sizes,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
-{
-  auto [offsets_column, bytes] = cudf::detail::make_offsets_child_column(
-    payload_sizes,
-    payload_sizes + packet_count,
-    stream,
-    mr
-  );
-
-  auto chars_column = cudf::strings::detail::create_chars_child_column(bytes, stream, mr);
-  auto d_chars      = chars_column->mutable_view().data<char>();
-
-  _packet_gather_kernel<<<1, THREADS_PER_BLOCK, 0, stream>>>(
-    packet_count,
-    payload_buffer,
-    payload_sizes,
-    d_chars
-  );
-
-  return cudf::make_strings_column(packet_count,
-    std::move(offsets_column),
-    std::move(chars_column),
-    0,
-    {});
-}
-
-}
-
-}
diff --git a/morpheus/_lib/doca/src/rte_context.cpp b/morpheus/_lib/doca/src/rte_context.cpp
index 705da11235..2de41834fd 100644
--- a/morpheus/_lib/doca/src/rte_context.cpp
+++ b/morpheus/_lib/doca/src/rte_context.cpp
@@ -23,6 +23,8 @@
 #include <rte_eal.h>
 
 #include <array>
+#include <ostream>
+#include <vector>
 
 namespace morpheus::doca {
 
diff --git a/morpheus/_lib/include/morpheus/messages/raw_packet.hpp b/morpheus/_lib/include/morpheus/messages/raw_packet.hpp
new file mode 100644
index 0000000000..dbc1c314d5
--- /dev/null
+++ b/morpheus/_lib/include/morpheus/messages/raw_packet.hpp
@@ -0,0 +1,143 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+namespace morpheus {
+
+#pragma GCC visibility push(default)
+/****** Component public implementations ******************/
+/****** RawPacketMessage ****************************************/
+
+/**
+ * @brief Container for class holding a list of raw packets (number of packets, max size and pointers)
+ *
+ */
+class RawPacketMessage
+{
+  public:
+    /**
+     * @brief Return number of packets in the message
+     *
+     * @return uint32_t
+     */
+    uint32_t count() const;
+
+    /**
+     * @brief Return max packet size in the message
+     *
+     * @return uint32_t
+     */
+    uint32_t get_max_size() const;
+
+    /**
+     * @brief Get the address of the packet at the given index
+     *
+     * @return uintptr_t
+     */
+    uintptr_t get_pkt_addr_idx(uint32_t pkt_idx) const;
+
+    /**
+     * @brief Get the header size of the packet at the given index
+     *
+     * @return uintptr_t
+     */
+    uintptr_t get_pkt_hdr_size_idx(uint32_t pkt_idx) const;
+
+    /**
+     * @brief Get the payload size of the packet at the given index
+     *
+     * @return uintptr_t
+     */
+    uintptr_t get_pkt_pld_size_idx(uint32_t pkt_idx) const;
+
+    /**
+     * @brief Get the address of the packet list
+     *
+     * @return uintptr_t *
+     */
+    uintptr_t* get_pkt_addr_list() const;
+
+    /**
+     * @brief Get the header size of the packet list
+     *
+     * @return uintptr_t *
+     */
+    uint32_t* get_pkt_hdr_size_list() const;
+
+    /**
+     * @brief Get the payload size of the packet list
+     *
+     * @return uintptr_t *
+     */
+    uint32_t* get_pkt_pld_size_list() const;
+
+    /**
+     * @brief Get the queue index of the packet list
+     *
+     * @return uint32_t
+     */
+    uint32_t get_queue_idx() const;
+
+    /**
+     * @brief Return if packet list is store in GPU (true) or CPU pinned memory (false)
+     *
+     * @return bool
+     */
+    bool is_gpu_mem() const;
+
+    /**
+     * @brief Create RawPacketMessage cpp object from a cpp object, used internally by `create_from_cpp`
+     *
+     * @param data_table
+     * @param index_col_count
+     * @return std::shared_ptr<RawPacketMessage>
+     */
+    static std::shared_ptr<RawPacketMessage> create_from_cpp(uint32_t num,
+                                                             uint32_t max_size,
+                                                             uintptr_t* ptr_addr,
+                                                             uint32_t* ptr_hdr_size,
+                                                             uint32_t* ptr_pld_size,
+                                                             bool gpu_mem,
+                                                             uint16_t queue_idx = 0xFFFF);
+
+  protected:
+    RawPacketMessage(uint32_t num,
+                     uint32_t max_size,
+                     uintptr_t* ptr_addr,
+                     uint32_t* ptr_hdr_size,
+                     uint32_t* ptr_pld_size,
+                     bool gpu_mem,
+                     int queue_idx);
+
+    uint32_t m_num;
+    uint32_t m_max_size;
+    uintptr_t* m_ptr_addr;
+    uint32_t* m_ptr_hdr_size;
+    uint32_t* m_ptr_pld_size;
+    uint16_t m_queue_idx;
+    bool m_gpu_mem;
+};
+
+struct RawPacketMessageProxy
+{};
+
+#pragma GCC visibility pop
+}  // namespace morpheus
diff --git a/morpheus/_lib/messages/__init__.pyi b/morpheus/_lib/messages/__init__.pyi
index b65fdbab82..472d2fe2a8 100644
--- a/morpheus/_lib/messages/__init__.pyi
+++ b/morpheus/_lib/messages/__init__.pyi
@@ -29,6 +29,7 @@ __all__ = [
     "MultiResponseProbsMessage",
     "MultiTensorMessage",
     "MutableTableCtxMgr",
+    "RawPacketMessage",
     "ResponseMemory",
     "ResponseMemoryProbs",
     "TensorMemory",
@@ -325,6 +326,23 @@ class MutableTableCtxMgr():
     def __setattr__(self, *args, **kwargs) -> None: ...
     def __setitem__(self, *args, **kwargs) -> None: ...
     pass
+class RawPacketMessage():
+    @property
+    def gpu_mem(self) -> bool:
+        """
+        :type: bool
+        """
+    @property
+    def max_size(self) -> int:
+        """
+        :type: int
+        """
+    @property
+    def num(self) -> int:
+        """
+        :type: int
+        """
+    pass
 class ResponseMemory(TensorMemory):
     def __init__(self, *, count: int, tensors: object = None) -> None: ...
     def get_output(self, name: str) -> object: ...
diff --git a/morpheus/_lib/messages/module.cpp b/morpheus/_lib/messages/module.cpp
index de094ae432..fcaacb2e9d 100644
--- a/morpheus/_lib/messages/module.cpp
+++ b/morpheus/_lib/messages/module.cpp
@@ -33,6 +33,7 @@
 #include "morpheus/messages/multi_response.hpp"
 #include "morpheus/messages/multi_response_probs.hpp"
 #include "morpheus/messages/multi_tensor.hpp"
+#include "morpheus/messages/raw_packet.hpp"
 #include "morpheus/objects/data_table.hpp"
 #include "morpheus/objects/mutable_table_ctx_mgr.hpp"
 #include "morpheus/pybind11/json.hpp"  // IWYU pragma: keep
@@ -112,6 +113,11 @@ PYBIND11_MODULE(messages, _module)
     mrc::edge::EdgeConnector<mrc::pymrc::PyObjectHolder,
                              std::shared_ptr<morpheus::MultiInferenceMessage>>::register_converter();
 
+    mrc::edge::EdgeConnector<std::shared_ptr<morpheus::MultiResponseMessage>,
+                             mrc::pymrc::PyObjectHolder>::register_converter();
+    mrc::edge::EdgeConnector<mrc::pymrc::PyObjectHolder,
+                             std::shared_ptr<morpheus::MultiResponseMessage>>::register_converter();
+
     // EdgeConnectors for derived classes of MultiMessage to MultiMessage
     mrc::edge::EdgeConnector<std::shared_ptr<morpheus::MultiTensorMessage>,
                              std::shared_ptr<morpheus::MultiMessage>>::register_converter();
@@ -450,6 +456,11 @@ PYBIND11_MODULE(messages, _module)
                     py::arg("name"),
                     py::arg("throw_if_not_exists") = true);
 
+    py::class_<RawPacketMessage, std::shared_ptr<RawPacketMessage>>(_module, "RawPacketMessage")
+        .def_property_readonly("num", &RawPacketMessage::count)
+        .def_property_readonly("max_size", &RawPacketMessage::get_max_size)
+        .def_property_readonly("gpu_mem", &RawPacketMessage::is_gpu_mem);
+
     _module.attr("__version__") =
         MORPHEUS_CONCAT_STR(morpheus_VERSION_MAJOR << "." << morpheus_VERSION_MINOR << "." << morpheus_VERSION_PATCH);
 }
diff --git a/morpheus/_lib/src/messages/raw_packet.cpp b/morpheus/_lib/src/messages/raw_packet.cpp
new file mode 100644
index 0000000000..a444ec66e3
--- /dev/null
+++ b/morpheus/_lib/src/messages/raw_packet.cpp
@@ -0,0 +1,121 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morpheus/messages/raw_packet.hpp"
+
+#include <pybind11/pytypes.h>
+
+#include <memory>
+
+// We're already including pybind11.h and don't need to include cast.
+// For some reason IWYU also thinks we need array for the `isinsance` call.
+// IWYU pragma: no_include <pybind11/cast.h>
+// IWYU pragma: no_include <array>
+
+namespace morpheus {
+
+namespace py = pybind11;
+using namespace py::literals;
+
+/****** Component public implementations *******************/
+/****** RawPacketMessage ****************************************/
+
+uint32_t RawPacketMessage::count() const
+{
+    return m_num;
+}
+
+uint32_t RawPacketMessage::get_max_size() const
+{
+    return m_max_size;
+}
+
+uintptr_t RawPacketMessage::get_pkt_addr_idx(uint32_t pkt_idx) const
+{
+    if (pkt_idx > m_num || m_gpu_mem == true)
+        return 0;
+    return m_ptr_addr[pkt_idx];
+}
+
+uintptr_t RawPacketMessage::get_pkt_hdr_size_idx(uint32_t pkt_idx) const
+{
+    if (pkt_idx > m_num || m_gpu_mem == true)
+        return 0;
+    return m_ptr_hdr_size[pkt_idx];
+}
+
+uintptr_t RawPacketMessage::get_pkt_pld_size_idx(uint32_t pkt_idx) const
+{
+    if (pkt_idx > m_num || m_gpu_mem == true)
+        return 0;
+    return m_ptr_pld_size[pkt_idx];
+}
+
+uintptr_t* RawPacketMessage::get_pkt_addr_list() const
+{
+    return m_ptr_addr;
+}
+
+uint32_t* RawPacketMessage::get_pkt_hdr_size_list() const
+{
+    return m_ptr_hdr_size;
+}
+
+uint32_t* RawPacketMessage::get_pkt_pld_size_list() const
+{
+    return m_ptr_pld_size;
+}
+
+uint32_t RawPacketMessage::get_queue_idx() const
+{
+    return m_queue_idx;
+}
+
+bool RawPacketMessage::is_gpu_mem() const
+{
+    return m_gpu_mem;
+}
+
+std::shared_ptr<RawPacketMessage> RawPacketMessage::create_from_cpp(uint32_t num,
+                                                                    uint32_t max_size,
+                                                                    uintptr_t* ptr_addr,
+                                                                    uint32_t* ptr_hdr_size,
+                                                                    uint32_t* ptr_pld_size,
+                                                                    bool gpu_mem,
+                                                                    uint16_t queue_idx)
+{
+    return std::shared_ptr<RawPacketMessage>(
+        new RawPacketMessage(num, max_size, ptr_addr, ptr_hdr_size, ptr_pld_size, gpu_mem, queue_idx));
+}
+
+RawPacketMessage::RawPacketMessage(uint32_t num_,
+                                   uint32_t max_size_,
+                                   uintptr_t* ptr_addr_,
+                                   uint32_t* ptr_hdr_size_,
+                                   uint32_t* ptr_pld_size_,
+                                   bool gpu_mem_,
+                                   int queue_idx_) :
+  m_num(num_),
+  m_max_size(max_size_),
+  m_ptr_addr(ptr_addr_),
+  m_ptr_hdr_size(ptr_hdr_size_),
+  m_ptr_pld_size(ptr_pld_size_),
+  m_gpu_mem(gpu_mem_),
+  m_queue_idx(queue_idx_)
+{}
+
+}  // namespace morpheus
diff --git a/morpheus/cli/commands.py b/morpheus/cli/commands.py
index b8a64b4135..3136e3e0c5 100644
--- a/morpheus/cli/commands.py
+++ b/morpheus/cli/commands.py
@@ -703,4 +703,5 @@ def post_pipeline(ctx: click.Context, *args, **kwargs):
 add_command("trigger", "morpheus.stages.general.trigger_stage.TriggerStage", modes=ALL)
 add_command("validate", "morpheus.stages.postprocess.validation_stage.ValidationStage", modes=ALL)
 
-add_command("from-doca", "morpheus.stages.doca.doca_source_stage.DocaSourceStage", modes=NLP_ONLY)
+add_command("from-doca-source", "morpheus.stages.doca.doca_source_stage.DocaSourceStage", modes=NLP_ONLY)
+add_command("from-doca-convert", "morpheus.stages.doca.doca_convert_stage.DocaConvertStage", modes=NLP_ONLY)
diff --git a/morpheus/messages/__init__.py b/morpheus/messages/__init__.py
index 8aa18813c3..b10100cba7 100644
--- a/morpheus/messages/__init__.py
+++ b/morpheus/messages/__init__.py
@@ -20,6 +20,7 @@
 
 from morpheus._lib.messages import ControlMessage
 from morpheus._lib.messages import DataLoaderRegistry
+from morpheus._lib.messages import RawPacketMessage
 from morpheus.messages.memory.tensor_memory import TensorMemory
 from morpheus.messages.memory.inference_memory import InferenceMemory
 from morpheus.messages.memory.inference_memory import InferenceMemoryAE
@@ -59,6 +60,7 @@
     "MultiResponseMessage",
     "MultiResponseProbsMessage",
     "MultiTensorMessage",
+    "RawPacketMessage",
     "ResponseMemory",
     "ResponseMemoryAE",
     "ResponseMemoryProbs",
diff --git a/morpheus/modules/output/write_to_vector_db.py b/morpheus/modules/output/write_to_vector_db.py
index c141aef7c6..9e1fd4d11a 100644
--- a/morpheus/modules/output/write_to_vector_db.py
+++ b/morpheus/modules/output/write_to_vector_db.py
@@ -138,7 +138,7 @@ def _write_to_vector_db(builder: mrc.Builder):
 
     preprocess_vdb_resources(service, recreate, resource_schemas)
 
-    accumulator_dict = {default_resource_name: AccumulationStats(msg_count=0, last_insert_time=-1, data=[])}
+    accumulator_dict = {default_resource_name: AccumulationStats(msg_count=0, last_insert_time=time.time(), data=[])}
 
     def on_completed():
         final_df_references = []
@@ -202,7 +202,7 @@ def on_data(msg: typing.Union[ControlMessage, MultiResponseMessage, MultiMessage
                     accumulator.data.append(df)
                 else:
                     accumulator_dict[msg_resource_target] = AccumulationStats(msg_count=df_size,
-                                                                              last_insert_time=-1,
+                                                                              last_insert_time=time.time(),
                                                                               data=[df])
 
                 for key, accum_stats in accumulator_dict.items():
diff --git a/morpheus/stages/doca/doca_convert_stage.py b/morpheus/stages/doca/doca_convert_stage.py
new file mode 100644
index 0000000000..c4da3bd672
--- /dev/null
+++ b/morpheus/stages/doca/doca_convert_stage.py
@@ -0,0 +1,85 @@
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import typing
+
+import mrc
+
+from morpheus.cli import register_stage
+from morpheus.config import Config
+from morpheus.config import PipelineModes
+from morpheus.messages import MessageMeta
+from morpheus.pipeline.preallocator_mixin import PreallocatorMixin
+from morpheus.pipeline.single_port_stage import SinglePortStage
+from morpheus.pipeline.stage_schema import StageSchema
+
+logger = logging.getLogger(__name__)
+
+
+@register_stage("from-doca-convert", modes=[PipelineModes.NLP])
+class DocaConvertStage(PreallocatorMixin, SinglePortStage):
+    """
+    A source stage used to receive raw packet data from a ConnectX-6 Dx NIC.
+
+    Parameters
+    ----------
+    c : `morpheus.config.Config`
+        Pipeline configuration instance.
+    """
+
+    def __init__(self, c: Config):
+
+        super().__init__(c)
+
+        # Attempt to import the C++ stage on creation
+        try:
+            # pylint: disable=c-extension-no-member
+            import morpheus._lib.doca as _doca
+
+            self.doca_convert_class = _doca.DocaConvertStage
+        except ImportError as ex:
+            raise NotImplementedError(("The Morpheus DOCA components could not be imported. "
+                                       "Ensure the DOCA components have been built and installed. Error message: ") +
+                                      ex.msg) from ex
+
+        self._max_concurrent = 5
+
+    @property
+    def name(self) -> str:
+        return "from-doca-convert"
+
+    @property
+    def input_count(self) -> int:
+        """Return None for no max input count"""
+        return None
+
+    def compute_schema(self, schema: StageSchema):
+        schema.output_schema.set_type(MessageMeta)
+
+    def supports_cpp_node(self):
+        return True
+
+    def accepted_types(self) -> tuple:
+        return (typing.Any, )
+
+    def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject:
+
+        if self._build_cpp_node():
+            node = self.doca_convert_class(builder, self.unique_name)
+            node.launch_options.pe_count = self._max_concurrent
+            builder.make_edge(input_node, node)
+            return node
+
+        raise NotImplementedError("Does not support Python nodes")
diff --git a/morpheus/stages/doca/doca_source_stage.py b/morpheus/stages/doca/doca_source_stage.py
index a49784499f..83f5de6c5e 100644
--- a/morpheus/stages/doca/doca_source_stage.py
+++ b/morpheus/stages/doca/doca_source_stage.py
@@ -19,7 +19,7 @@
 from morpheus.cli import register_stage
 from morpheus.config import Config
 from morpheus.config import PipelineModes
-from morpheus.messages import MessageMeta
+from morpheus.messages import RawPacketMessage
 from morpheus.pipeline.preallocator_mixin import PreallocatorMixin
 from morpheus.pipeline.single_output_source import SingleOutputSource
 from morpheus.pipeline.stage_schema import StageSchema
@@ -27,7 +27,7 @@
 logger = logging.getLogger(__name__)
 
 
-@register_stage("from-doca", modes=[PipelineModes.NLP])
+@register_stage("from-doca-source", modes=[PipelineModes.NLP])
 class DocaSourceStage(PreallocatorMixin, SingleOutputSource):
     """
     A source stage used to receive raw packet data from a ConnectX-6 Dx NIC.
@@ -65,7 +65,6 @@ def __init__(
 
         self._batch_size = c.pipeline_batch_size
         self._input_count = None
-        self._max_concurrent = c.num_threads
         self._nic_pci_address = nic_pci_address
         self._gpu_pci_address = gpu_pci_address
         self._traffic_type = traffic_type.lower()
@@ -75,7 +74,7 @@ def __init__(
 
     @property
     def name(self) -> str:
-        return "from-doca"
+        return "from-doca-source"
 
     @property
     def input_count(self) -> int:
@@ -83,7 +82,7 @@ def input_count(self) -> int:
         return None
 
     def compute_schema(self, schema: StageSchema):
-        schema.output_schema.set_type(MessageMeta)
+        schema.output_schema.set_type(RawPacketMessage)
 
     def supports_cpp_node(self):
         return True
@@ -96,7 +95,8 @@ def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject:
                                            self._nic_pci_address,
                                            self._gpu_pci_address,
                                            self._traffic_type)
-            node.launch_options.pe_count = self._max_concurrent
+            # Only 1 thread is enough for 2 queues
+            node.launch_options.pe_count = 1
             return node
 
         raise NotImplementedError("Does not support Python nodes")

From b61502a2d3f17041e1a12af1a2c95bdfe54dd64c Mon Sep 17 00:00:00 2001
From: Eli Fajardo <efajardo@nvidia.com>
Date: Wed, 15 May 2024 10:45:53 -0400
Subject: [PATCH 37/38] Remove unused MLflow client arg from DFP inference
 implementations (#1700)

- Remove unused MLflow client arg from DFP inference stage and module implementations
- Add missing table of content items to Modular DFP guide

Closes #1693

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Eli Fajardo (https://github.com/efajardo-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1700
---
 .../guides/10_modular_pipeline_digital_fingerprinting.md  | 8 +++++++-
 .../production/morpheus/dfp/modules/dfp_inference.py      | 2 +-
 .../production/morpheus/dfp/stages/dfp_inference_stage.py | 2 +-
 .../production/morpheus/dfp/utils/model_cache.py          | 2 +-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md b/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md
index bbf46ae691..46ccd34446 100644
--- a/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md
+++ b/docs/source/developer_guide/guides/10_modular_pipeline_digital_fingerprinting.md
@@ -45,6 +45,12 @@ limitations under the License.
     - [DFP Post Processing](#dfp-post-processing)
     - [Serialize](#serialize)
     - [Write to File](#write-to-file)
+  - [Running Example Modular DFP Pipelines](#running-example-modular-dfp-pipelines)
+    - [System requirements](#system-requirements)
+    - [Building the services](#building-the-services)
+    - [Downloading the example datasets](#downloading-the-example-datasets)
+    - [Run Morpheus pipeline](#run-morpheus-pipeline)
+    - [Output Fields](#output-fields)
 
 ## Introduction
 
@@ -522,7 +528,7 @@ pip install s3fs
 python examples/digital_fingerprinting/fetch_example_data.py all
 ```
 
-### Morpheus Pipeline
+### Run Morpheus pipeline
 From the `examples/digital_fingerprinting/production` dir, run:
 ```bash
 docker compose run morpheus_pipeline bash
diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py b/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py
index 2dd4707245..a527e74b1c 100644
--- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py
+++ b/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py
@@ -97,7 +97,7 @@ def process_task(control_message: ControlMessage):
             if (model_cache is None):
                 raise RuntimeError(f"Could not find model for user {user_id}")
 
-            loaded_model = model_cache.load_model(client)
+            loaded_model = model_cache.load_model()
 
         # TODO(Devin): Recovery strategy should be more robust/configurable in practice
         except Exception as exec_info:
diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py
index 3d6570c9a4..c9e08a0842 100644
--- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py
+++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py
@@ -98,7 +98,7 @@ def on_data(self, message: MultiDFPMessage) -> MultiDFPMessage:
             if (model_cache is None):
                 raise RuntimeError(f"Could not find model for user {user_id}")
 
-            loaded_model = model_cache.load_model(self._client)
+            loaded_model = model_cache.load_model()
 
         except Exception:
             logger.exception("Error trying to get model", exc_info=True)
diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py b/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py
index 0440a455dc..54b7c57d11 100644
--- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py
+++ b/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py
@@ -92,7 +92,7 @@ def last_used(self):
     def last_checked(self):
         return self._last_checked
 
-    def load_model(self, _) -> AutoEncoder:
+    def load_model(self) -> AutoEncoder:
 
         now = datetime.now()
 

From 08e40dcadd34c03bd36654cb9b648b18de2a4c7a Mon Sep 17 00:00:00 2001
From: Yuchen Zhang <134643420+yczhang-nv@users.noreply.github.com>
Date: Mon, 20 May 2024 12:19:17 -0400
Subject: [PATCH 38/38] Support `ControlMessage` for `Preprocess` and
 `PostProcess` stages (#1623)

Support `ControlMessage` for `Preprocess` and `PostProcess` stages, including:

- Preprocess: `PreprocessAEStage`
- PostProcess: `FilterDetectionStage`, `GenerateVizFrameStage`, `MLFlowDriftStage`, `TimeseriesStage`, `ValidationStage`

## By Submitting this PR I confirm:
- I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md).
- When the PR is ready for review, new or existing tests cover these changes.
- When the PR is ready for review, the documentation is up to date with these changes.

Authors:
  - Yuchen Zhang (https://github.com/yczhang-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/1623
---
 morpheus/_lib/cmake/libmorpheus.cmake         |   2 +-
 ...er_detection.hpp => filter_detections.hpp} |  77 +++--
 .../_lib/src/stages/add_scores_stage_base.cpp |  30 +-
 morpheus/_lib/src/stages/filter_detection.cpp | 217 ------------
 .../_lib/src/stages/filter_detections.cpp     | 310 ++++++++++++++++++
 morpheus/_lib/src/stages/preprocess_fil.cpp   |  15 +-
 morpheus/_lib/src/stages/preprocess_nlp.cpp   |  27 +-
 morpheus/_lib/stages/__init__.pyi             |   8 +-
 morpheus/_lib/stages/module.cpp               |  22 +-
 .../filter_detections_controller.py           |  48 ++-
 .../postprocess/filter_detections_stage.py    |  26 +-
 .../postprocess/generate_viz_frames_stage.py  |  37 ++-
 .../stages/postprocess/ml_flow_drift_stage.py |  22 +-
 .../stages/postprocess/timeseries_stage.py    |  74 +++--
 .../stages/postprocess/validation_stage.py    |   2 +-
 .../stages/preprocess/preprocess_ae_stage.py  |  44 ++-
 .../stages/preprocess/preprocess_fil_stage.py |   3 +-
 tests/stages/test_filter_detections_stage.py  | 241 ++++++++++++++
 .../stages/test_generate_viz_frames_stage.py  |  77 +++++
 tests/stages/test_ml_flow_drift_stage.py      |  87 +++++
 tests/stages/test_preprocess_ae_stage.py      |  74 +++++
 tests/stages/test_preprocess_fil_stage.py     |   9 +-
 tests/stages/test_preprocess_nlp_stage.py     |   8 +-
 tests/stages/test_timeseries_stage.py         |  81 +++++
 tests/stages/test_validation_stage.py         |  61 ++++
 tests/test_add_classifications_stage.py       |  10 +-
 tests/test_add_scores_stage.py                |  10 +-
 tests/test_filter_detections_stage.py         | 192 -----------
 tests/test_filter_detections_stage_pipe.py    |  33 ++
 29 files changed, 1279 insertions(+), 568 deletions(-)
 rename morpheus/_lib/include/morpheus/stages/{filter_detection.hpp => filter_detections.hpp} (61%)
 delete mode 100644 morpheus/_lib/src/stages/filter_detection.cpp
 create mode 100644 morpheus/_lib/src/stages/filter_detections.cpp
 create mode 100644 tests/stages/test_filter_detections_stage.py
 create mode 100644 tests/stages/test_generate_viz_frames_stage.py
 create mode 100644 tests/stages/test_ml_flow_drift_stage.py
 create mode 100644 tests/stages/test_preprocess_ae_stage.py
 create mode 100644 tests/stages/test_timeseries_stage.py
 create mode 100644 tests/stages/test_validation_stage.py
 delete mode 100755 tests/test_filter_detections_stage.py

diff --git a/morpheus/_lib/cmake/libmorpheus.cmake b/morpheus/_lib/cmake/libmorpheus.cmake
index 954620dfac..b4d3e5baaf 100644
--- a/morpheus/_lib/cmake/libmorpheus.cmake
+++ b/morpheus/_lib/cmake/libmorpheus.cmake
@@ -70,7 +70,7 @@ add_library(morpheus
   src/stages/add_scores.cpp
   src/stages/deserialize.cpp
   src/stages/file_source.cpp
-  src/stages/filter_detection.cpp
+  src/stages/filter_detections.cpp
   src/stages/http_server_source_stage.cpp
   src/stages/inference_client_stage.cpp
   src/stages/kafka_source.cpp
diff --git a/morpheus/_lib/include/morpheus/stages/filter_detection.hpp b/morpheus/_lib/include/morpheus/stages/filter_detections.hpp
similarity index 61%
rename from morpheus/_lib/include/morpheus/stages/filter_detection.hpp
rename to morpheus/_lib/include/morpheus/stages/filter_detections.hpp
index 5e78a8322e..1e2d0cd86b 100644
--- a/morpheus/_lib/include/morpheus/stages/filter_detection.hpp
+++ b/morpheus/_lib/include/morpheus/stages/filter_detections.hpp
@@ -17,22 +17,22 @@
 
 #pragma once
 
-#include "morpheus/export.h"
-#include "morpheus/messages/multi.hpp"
-#include "morpheus/objects/dev_mem_info.hpp"  // for DevMemInfo
-#include "morpheus/objects/filter_source.hpp"
+#include "morpheus/export.h"                   // for MORPHEUS_EXPORT
+#include "morpheus/messages/control.hpp"       // for ControlMessage
+#include "morpheus/messages/multi.hpp"         // for MultiMessage
+#include "morpheus/objects/dev_mem_info.hpp"   // for DevMemInfo
+#include "morpheus/objects/filter_source.hpp"  // for FilterSource
 
-#include <boost/fiber/context.hpp>
-#include <mrc/segment/builder.hpp>
-#include <mrc/segment/object.hpp>
-#include <pymrc/node.hpp>
-#include <rxcpp/rx.hpp>
+#include <cuda_runtime.h>           // for cudaMemcpy
+#include <mrc/segment/builder.hpp>  // for Builder
+#include <mrc/segment/object.hpp>   // for Object
+#include <pymrc/node.hpp>           // for PythonNode
+#include <rxcpp/rx.hpp>             // for observable_member, trace_activity, map, decay_t, from
 
 #include <cstddef>  // for size_t
-#include <map>
-#include <memory>
-#include <string>
-#include <thread>
+#include <map>      // for map
+#include <memory>   // for allocator, shared_ptr
+#include <string>   // for string
 
 namespace morpheus {
 /****** Component public implementations *******************/
@@ -68,11 +68,12 @@ namespace morpheus {
  * Depending on the downstream stages, this can cause performance issues, especially if those stages need to acquire
  * the Python GIL.
  */
+template <typename MessageT>
 class MORPHEUS_EXPORT FilterDetectionsStage
-  : public mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>
+  : public mrc::pymrc::PythonNode<std::shared_ptr<MessageT>, std::shared_ptr<MessageT>>
 {
   public:
-    using base_t = mrc::pymrc::PythonNode<std::shared_ptr<MultiMessage>, std::shared_ptr<MultiMessage>>;
+    using base_t = mrc::pymrc::PythonNode<std::shared_ptr<MessageT>, std::shared_ptr<MessageT>>;
     using typename base_t::sink_type_t;
     using typename base_t::source_type_t;
     using typename base_t::subscribe_fn_t;
@@ -90,8 +91,8 @@ class MORPHEUS_EXPORT FilterDetectionsStage
 
   private:
     subscribe_fn_t build_operator();
-    DevMemInfo get_tensor_filter_source(const std::shared_ptr<morpheus::MultiMessage>& x);
-    DevMemInfo get_column_filter_source(const std::shared_ptr<morpheus::MultiMessage>& x);
+    DevMemInfo get_tensor_filter_source(const sink_type_t& x);
+    DevMemInfo get_column_filter_source(const sink_type_t& x);
 
     float m_threshold;
     bool m_copy;
@@ -101,6 +102,11 @@ class MORPHEUS_EXPORT FilterDetectionsStage
     std::map<std::size_t, std::string> m_idx2label;
 };
 
+using FilterDetectionsStageMM =  // NOLINT(readability-identifier-naming)
+    FilterDetectionsStage<MultiMessage>;
+using FilterDetectionsStageCM =  // NOLINT(readability-identifier-naming)
+    FilterDetectionsStage<ControlMessage>;
+
 /****** FilterDetectionStageInterfaceProxy******************/
 /**
  * @brief Interface proxy, used to insulate python bindings.
@@ -108,7 +114,27 @@ class MORPHEUS_EXPORT FilterDetectionsStage
 struct MORPHEUS_EXPORT FilterDetectionStageInterfaceProxy
 {
     /**
-     * @brief Create and initialize a FilterDetectionStage, and return the result
+     * @brief Create and initialize a FilterDetectionStage that receives MultiMessage and emits MultiMessage, and return
+     * the result
+     *
+     * @param builder : Pipeline context object reference
+     * @param name : Name of a stage reference
+     * @param threshold : Threshold to classify
+     * @param copy : Whether or not to perform a copy default=true
+     * @param filter_source : Indicate if the values used for filtering exist in either an output tensor
+     * (`FilterSource::TENSOR`) or a column in a Dataframe (`FilterSource::DATAFRAME`).
+     * @param field_name : Name of the tensor or Dataframe column to filter on default="probs"
+     * @return std::shared_ptr<mrc::segment::Object<FilterDetectionsStage<MultiMessage, MultiMessage>>>
+     */
+    static std::shared_ptr<mrc::segment::Object<FilterDetectionsStageMM>> init_mm(mrc::segment::Builder& builder,
+                                                                                  const std::string& name,
+                                                                                  float threshold,
+                                                                                  bool copy,
+                                                                                  FilterSource filter_source,
+                                                                                  std::string field_name);
+    /**
+     * @brief Create and initialize a FilterDetectionStage that receives ControlMessage and emits ControlMessage, and
+     * return the result
      *
      * @param builder : Pipeline context object reference
      * @param name : Name of a stage reference
@@ -117,14 +143,15 @@ struct MORPHEUS_EXPORT FilterDetectionStageInterfaceProxy
      * @param filter_source : Indicate if the values used for filtering exist in either an output tensor
      * (`FilterSource::TENSOR`) or a column in a Dataframe (`FilterSource::DATAFRAME`).
      * @param field_name : Name of the tensor or Dataframe column to filter on default="probs"
-     * @return std::shared_ptr<mrc::segment::Object<FilterDetectionsStage>>
+     * @return std::shared_ptr<mrc::segment::Object<FilterDetectionsStage<ControlMessage, ControlMessage>>>
      */
-    static std::shared_ptr<mrc::segment::Object<FilterDetectionsStage>> init(mrc::segment::Builder& builder,
-                                                                             const std::string& name,
-                                                                             float threshold,
-                                                                             bool copy,
-                                                                             FilterSource filter_source,
-                                                                             std::string field_name);
+    static std::shared_ptr<mrc::segment::Object<FilterDetectionsStageCM>> init_cm(mrc::segment::Builder& builder,
+                                                                                  const std::string& name,
+                                                                                  float threshold,
+                                                                                  bool copy,
+                                                                                  FilterSource filter_source,
+                                                                                  std::string field_name);
 };
+
 /** @} */  // end of group
 }  // namespace morpheus
diff --git a/morpheus/_lib/src/stages/add_scores_stage_base.cpp b/morpheus/_lib/src/stages/add_scores_stage_base.cpp
index b7ff58ca67..4bb76420de 100644
--- a/morpheus/_lib/src/stages/add_scores_stage_base.cpp
+++ b/morpheus/_lib/src/stages/add_scores_stage_base.cpp
@@ -18,26 +18,24 @@
 #include "morpheus/stages/add_scores_stage_base.hpp"
 
 #include "morpheus/messages/memory/tensor_memory.hpp"  // for TensorMemory
-#include "morpheus/messages/meta.hpp"
-#include "morpheus/messages/multi_response.hpp"  // for MultiResponseMessage
-#include "morpheus/objects/dtype.hpp"            // for DType
-#include "morpheus/objects/tensor.hpp"           // for Tensor
-#include "morpheus/objects/tensor_object.hpp"    // for TensorObject
-#include "morpheus/types.hpp"                    // for TensorIndex
-#include "morpheus/utilities/matx_util.hpp"      // for MatxUtil
-#include "morpheus/utilities/string_util.hpp"    // for StringUtil
-#include "morpheus/utilities/tensor_util.hpp"    // for TensorUtils
-
-#include <glog/logging.h>  // for CHECK, COMPACT_GOOGLE_LOG_FATAL, LogMessageFatal, COMP...
+#include "morpheus/messages/meta.hpp"                  // for MessageMeta
+#include "morpheus/messages/multi_response.hpp"        // for MultiResponseMessage
+#include "morpheus/objects/dtype.hpp"                  // for DType
+#include "morpheus/objects/tensor.hpp"                 // for Tensor
+#include "morpheus/objects/tensor_object.hpp"          // for TensorObject
+#include "morpheus/types.hpp"                          // for TensorIndex
+#include "morpheus/utilities/matx_util.hpp"            // for MatxUtil
+#include "morpheus/utilities/string_util.hpp"          // for StringUtil
+#include "morpheus/utilities/tensor_util.hpp"          // for TensorUtils
+
+#include <glog/logging.h>  // for CHECK, COMPACT_GOOGLE_LOG_FATAL, LogMessageFatal
 #include <rxcpp/rx.hpp>    // for observable_member, trace_activity, decay_t, operator|
 
 #include <cstddef>      // for size_t
 #include <iterator>     // for reverse_iterator
 #include <memory>       // for shared_ptr, allocator, __shared_ptr_access
 #include <ostream>      // for basic_ostream, operator<<, basic_ostream::operator<<
-#include <stdexcept>    // for runtime_error
 #include <type_traits>  // for is_same_v
-#include <typeinfo>     // for type_info
 #include <utility>      // for move, pair
 #include <vector>       // for vector
 // IWYU thinks we need __alloc_traits<>::value_type for vector assignments
@@ -72,12 +70,10 @@ AddScoresStageBase<InputT, OutputT>::source_type_t AddScoresStageBase<InputT, Ou
     {
         this->on_control_message(x);
     }
-    // sink_type_t not supported
     else
     {
-        std::string error_msg{"AddScoresStageBase receives unsupported input type: " + std::string(typeid(x).name())};
-        LOG(ERROR) << error_msg;
-        throw std::runtime_error(error_msg);
+        // sink_type_t not supported
+        static_assert(!sizeof(sink_type_t), "AddScoresStageBase receives unsupported input type");
     }
     return x;
 }
diff --git a/morpheus/_lib/src/stages/filter_detection.cpp b/morpheus/_lib/src/stages/filter_detection.cpp
deleted file mode 100644
index 199d716e5b..0000000000
--- a/morpheus/_lib/src/stages/filter_detection.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "morpheus/stages/filter_detection.hpp"  // IWYU pragma: accosiated
-
-#include "mrc/segment/builder.hpp"
-#include "mrc/segment/object.hpp"
-#include "pymrc/node.hpp"
-
-#include "morpheus/messages/multi_tensor.hpp"
-#include "morpheus/objects/dev_mem_info.hpp"  // for DevMemInfo
-#include "morpheus/objects/dtype.hpp"         // for DataType
-#include "morpheus/objects/memory_descriptor.hpp"
-#include "morpheus/objects/table_info.hpp"
-#include "morpheus/objects/tensor_object.hpp"  // for TensorIndex, TensorObject
-#include "morpheus/types.hpp"                  // for RangeType
-#include "morpheus/utilities/matx_util.hpp"
-#include "morpheus/utilities/tensor_util.hpp"  // for TensorUtils::get_element_stride
-
-#include <cuda_runtime.h>  // for cudaMemcpy, cudaMemcpyDeviceToDevice, cudaMemcpyDeviceToHost
-#include <cudf/column/column_view.hpp>
-#include <cudf/types.hpp>
-#include <glog/logging.h>       // for CHECK, CHECK_NE
-#include <mrc/cuda/common.hpp>  // for MRC_CHECK_CUDA
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>  // for device_buffer
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <cstddef>
-#include <cstdint>  // for uint8_t
-#include <exception>
-#include <functional>
-#include <memory>
-#include <ostream>  // needed for glog
-#include <string>
-#include <utility>  // for pair
-#include <vector>
-// IWYU thinks we need ext/new_allocator.h for size_t for some reason
-// IWYU pragma: no_include <ext/new_allocator.h>
-
-namespace morpheus {
-
-// Component public implementations
-// ************ FilterDetectionStage **************************** //
-FilterDetectionsStage::FilterDetectionsStage(float threshold,
-                                             bool copy,
-                                             FilterSource filter_source,
-                                             std::string field_name) :
-  PythonNode(base_t::op_factory_from_sub_fn(build_operator())),
-  m_threshold(threshold),
-  m_copy(copy),
-  m_filter_source(filter_source),
-  m_field_name(std::move(field_name))
-{
-    CHECK(m_filter_source != FilterSource::Auto);  // The python stage should determine this
-}
-
-DevMemInfo FilterDetectionsStage::get_tensor_filter_source(const std::shared_ptr<morpheus::MultiMessage>& x)
-{
-    // The pipeline build will check to ensure that our input is a MultiResponseMessage
-    const auto& filter_source = std::static_pointer_cast<morpheus::MultiTensorMessage>(x)->get_tensor(m_field_name);
-    CHECK(filter_source.rank() > 0 && filter_source.rank() <= 2)
-        << "C++ impl of the FilterDetectionsStage currently only supports one and two dimensional "
-           "arrays";
-
-    // Depending on the input the stride is given in bytes or elements, convert to elements
-    auto stride = morpheus::TensorUtils::get_element_stride(filter_source.get_stride());
-    return {filter_source.data(), filter_source.dtype(), filter_source.get_memory(), filter_source.get_shape(), stride};
-}
-
-DevMemInfo FilterDetectionsStage::get_column_filter_source(const std::shared_ptr<morpheus::MultiMessage>& x)
-{
-    auto table_info = x->get_meta(m_field_name);
-
-    // since we only asked for one column, we know its the first
-    const auto& col = table_info.get_column(0);
-    auto dtype      = morpheus::DType::from_cudf(col.type().id());
-    auto num_rows   = col.size();
-    auto data =
-        const_cast<uint8_t*>(static_cast<const uint8_t*>(col.head<uint8_t>() + col.offset() * dtype.item_size()));
-
-    return {
-        data,
-        std::move(dtype),
-        std::make_shared<MemoryDescriptor>(rmm::cuda_stream_per_thread, rmm::mr::get_current_device_resource()),
-        {num_rows, 1},
-        {1, 0},
-    };
-}
-
-FilterDetectionsStage::subscribe_fn_t FilterDetectionsStage::build_operator()
-{
-    return [this](rxcpp::observable<sink_type_t> input, rxcpp::subscriber<source_type_t> output) {
-        std::function<DevMemInfo(const std::shared_ptr<morpheus::MultiMessage>& x)> get_filter_source;
-
-        if (m_filter_source == FilterSource::TENSOR)
-        {
-            get_filter_source = [this](auto x) {
-                return get_tensor_filter_source(x);
-            };
-        }
-        else
-        {
-            get_filter_source = [this](auto x) {
-                return get_column_filter_source(x);
-            };
-        }
-
-        return input.subscribe(rxcpp::make_observer<sink_type_t>(
-            [this, &output, &get_filter_source](sink_type_t x) {
-                auto tmp_buffer = get_filter_source(x);
-
-                const auto num_rows    = tmp_buffer.shape(0);
-                const auto num_columns = tmp_buffer.shape(1);
-
-                bool by_row = (num_columns > 1);
-
-                // Now call the threshold function
-                auto thresh_bool_buffer = MatxUtil::threshold(tmp_buffer, m_threshold, by_row);
-
-                std::vector<uint8_t> host_bool_values(num_rows);
-
-                // Copy bools back to host
-                MRC_CHECK_CUDA(cudaMemcpy(host_bool_values.data(),
-                                          thresh_bool_buffer->data(),
-                                          thresh_bool_buffer->size(),
-                                          cudaMemcpyDeviceToHost));
-
-                // Only used when m_copy is true
-                std::vector<RangeType> selected_ranges;
-                std::size_t num_selected_rows = 0;
-
-                // We are slicing by rows, using num_rows as our marker for undefined
-                std::size_t slice_start = num_rows;
-                for (std::size_t row = 0; row < num_rows; ++row)
-                {
-                    bool above_threshold = host_bool_values[row];
-
-                    if (above_threshold && slice_start == num_rows)
-                    {
-                        slice_start = row;
-                    }
-                    else if (!above_threshold && slice_start != num_rows)
-                    {
-                        if (m_copy)
-                        {
-                            selected_ranges.emplace_back(std::pair{slice_start, row});
-                            num_selected_rows += (row - slice_start);
-                        }
-                        else
-                        {
-                            output.on_next(x->get_slice(slice_start, row));
-                        }
-
-                        slice_start = num_rows;
-                    }
-                }
-
-                if (slice_start != num_rows)
-                {
-                    // Last row was above the threshold
-                    if (m_copy)
-                    {
-                        selected_ranges.emplace_back(std::pair{slice_start, num_rows});
-                        num_selected_rows += (num_rows - slice_start);
-                    }
-                    else
-                    {
-                        output.on_next(x->get_slice(slice_start, num_rows));
-                    }
-                }
-
-                // num_selected_rows will always be 0 when m_copy is false,
-                // or when m_copy is true, but none of the rows matched the output
-                if (num_selected_rows > 0)
-                {
-                    DCHECK(m_copy);
-                    output.on_next(x->copy_ranges(selected_ranges, num_selected_rows));
-                }
-            },
-            [&](std::exception_ptr error_ptr) {
-                output.on_error(error_ptr);
-            },
-            [&]() {
-                output.on_completed();
-            }));
-    };
-}
-
-// ************ FilterDetectionStageInterfaceProxy ************* //
-std::shared_ptr<mrc::segment::Object<FilterDetectionsStage>> FilterDetectionStageInterfaceProxy::init(
-    mrc::segment::Builder& builder,
-    const std::string& name,
-    float threshold,
-    bool copy,
-    FilterSource filter_source,
-    std::string field_name)
-{
-    auto stage = builder.construct_object<FilterDetectionsStage>(name, threshold, copy, filter_source, field_name);
-
-    return stage;
-}
-}  // namespace morpheus
diff --git a/morpheus/_lib/src/stages/filter_detections.cpp b/morpheus/_lib/src/stages/filter_detections.cpp
new file mode 100644
index 0000000000..c1cf8b7036
--- /dev/null
+++ b/morpheus/_lib/src/stages/filter_detections.cpp
@@ -0,0 +1,310 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "morpheus/stages/filter_detections.hpp"
+
+#include "mrc/segment/builder.hpp"  // for Builder
+#include "mrc/segment/object.hpp"   // for Object
+
+#include "morpheus/messages/control.hpp"           // for ControlMessage
+#include "morpheus/messages/multi.hpp"             // for MultiMessage
+#include "morpheus/messages/multi_tensor.hpp"      // for MultiTensorMessage
+#include "morpheus/objects/dev_mem_info.hpp"       // for DevMemInfo
+#include "morpheus/objects/dtype.hpp"              // for DType
+#include "morpheus/objects/memory_descriptor.hpp"  // for MemoryDescriptor
+#include "morpheus/objects/table_info.hpp"         // for TableInfo
+#include "morpheus/types.hpp"                      // for RangeType
+#include "morpheus/utilities/matx_util.hpp"        // for MatxUtil
+#include "morpheus/utilities/tensor_util.hpp"      // for TensorUtils
+
+#include <cuda_runtime.h>                         // for cudaMemcpy, cudaMemcpyKind
+#include <cudf/column/column_view.hpp>            // for column_view
+#include <cudf/types.hpp>                         // for data_type
+#include <glog/logging.h>                         // for COMPACT_GOOGLE_LOG_FATAL, LogMessageFatal, CHECK, DCHECK
+#include <mrc/cuda/common.hpp>                    // for MRC_CHECK_CUDA
+#include <rmm/cuda_stream_view.hpp>               // for cuda_stream_per_thread
+#include <rmm/mr/device/per_device_resource.hpp>  // for get_current_device_resource
+
+#include <cstddef>     // for size_t
+#include <cstdint>     // for uint8_t
+#include <exception>   // for exception_ptr
+#include <functional>  // for function
+#include <memory>      // for make_shared, shared_ptr, __shared_ptr_access
+#include <ostream>     // for operator<<, basic_ostream
+#include <string>      // for char_traits, string
+#include <utility>     // for move, pair
+#include <vector>      // for vector
+
+namespace morpheus {
+
+// Component public implementations
+// ************ FilterDetectionStage **************************** //
+template <typename MessageT>
+FilterDetectionsStage<MessageT>::FilterDetectionsStage(float threshold,
+                                                       bool copy,
+                                                       FilterSource filter_source,
+                                                       std::string field_name) :
+  base_t(base_t::op_factory_from_sub_fn(build_operator())),
+  m_threshold(threshold),
+  m_copy(copy),
+  m_filter_source(filter_source),
+  m_field_name(std::move(field_name))
+{
+    CHECK(m_filter_source != FilterSource::Auto);  // The python stage should determine this
+}
+
+template <typename MessageT>
+DevMemInfo FilterDetectionsStage<MessageT>::get_tensor_filter_source(const sink_type_t& x)
+{
+    if constexpr (std::is_same_v<MessageT, MultiMessage>)
+    {
+        // The pipeline build will check to ensure that our input is a MultiResponseMessage
+        const auto& filter_source = std::static_pointer_cast<MultiTensorMessage>(x)->get_tensor(m_field_name);
+        CHECK(filter_source.rank() > 0 && filter_source.rank() <= 2)
+            << "C++ impl of the FilterDetectionsStage currently only supports one and two dimensional "
+               "arrays";
+
+        // Depending on the input the stride is given in bytes or elements, convert to elements
+        auto stride = TensorUtils::get_element_stride(filter_source.get_stride());
+        return {
+            filter_source.data(), filter_source.dtype(), filter_source.get_memory(), filter_source.get_shape(), stride};
+    }
+    else if constexpr (std::is_same_v<MessageT, ControlMessage>)
+    {
+        const auto& filter_source = x->tensors()->get_tensor(m_field_name);
+        CHECK(filter_source.rank() > 0 && filter_source.rank() <= 2)
+            << "C++ impl of the FilterDetectionsStage currently only supports one and two dimensional "
+               "arrays";
+
+        // Depending on the input the stride is given in bytes or elements, convert to elements
+        auto stride = TensorUtils::get_element_stride(filter_source.get_stride());
+        return {
+            filter_source.data(), filter_source.dtype(), filter_source.get_memory(), filter_source.get_shape(), stride};
+    }
+    else
+    {
+        // sink_type_t not supported
+        static_assert(!sizeof(sink_type_t), "FilterDetectionsStage receives unsupported input type");
+    }
+}
+
+template <typename MessageT>
+DevMemInfo FilterDetectionsStage<MessageT>::get_column_filter_source(const sink_type_t& x)
+{
+    TableInfo table_info;
+    if constexpr (std::is_same_v<MessageT, MultiMessage>)
+    {
+        table_info = x->get_meta(m_field_name);
+    }
+    else if constexpr (std::is_same_v<MessageT, ControlMessage>)
+    {
+        table_info = x->payload()->get_info(m_field_name);
+    }
+    else
+    {
+        // sink_type_t not supported
+        static_assert(!sizeof(sink_type_t), "FilterDetectionsStage receives unsupported input type");
+    }
+
+    // since we only asked for one column, we know its the first
+    const auto& col = table_info.get_column(0);
+    auto dtype      = DType::from_cudf(col.type().id());
+    auto num_rows   = col.size();
+    auto data =
+        const_cast<uint8_t*>(static_cast<const uint8_t*>(col.head<uint8_t>() + col.offset() * dtype.item_size()));
+
+    return {
+        data,
+        std::move(dtype),
+        std::make_shared<MemoryDescriptor>(rmm::cuda_stream_per_thread, rmm::mr::get_current_device_resource()),
+        {num_rows, 1},
+        {1, 0},
+    };
+}
+
+template <typename MessageT>
+FilterDetectionsStage<MessageT>::subscribe_fn_t FilterDetectionsStage<MessageT>::build_operator()
+{
+    return [this](rxcpp::observable<sink_type_t> input, rxcpp::subscriber<source_type_t> output) {
+        std::function<DevMemInfo(const sink_type_t& x)> get_filter_source;
+
+        if (m_filter_source == FilterSource::TENSOR)
+        {
+            get_filter_source = [this](auto x) {
+                return get_tensor_filter_source(x);
+            };
+        }
+        else
+        {
+            get_filter_source = [this](auto x) {
+                return get_column_filter_source(x);
+            };
+        }
+
+        return input.subscribe(rxcpp::make_observer<sink_type_t>(
+            [this, &output, &get_filter_source](sink_type_t x) {
+                auto tmp_buffer = get_filter_source(x);
+
+                const auto num_rows    = tmp_buffer.shape(0);
+                const auto num_columns = tmp_buffer.shape(1);
+
+                bool by_row = (num_columns > 1);
+
+                // Now call the threshold function
+                auto thresh_bool_buffer = MatxUtil::threshold(tmp_buffer, m_threshold, by_row);
+
+                std::vector<uint8_t> host_bool_values(num_rows);
+
+                // Copy bools back to host
+                MRC_CHECK_CUDA(cudaMemcpy(host_bool_values.data(),
+                                          thresh_bool_buffer->data(),
+                                          thresh_bool_buffer->size(),
+                                          cudaMemcpyDeviceToHost));
+
+                // Only used when m_copy is true
+                std::vector<RangeType> selected_ranges;
+                std::size_t num_selected_rows = 0;
+
+                // We are slicing by rows, using num_rows as our marker for undefined
+                std::size_t slice_start = num_rows;
+                for (std::size_t row = 0; row < num_rows; ++row)
+                {
+                    bool above_threshold = host_bool_values[row];
+
+                    if (above_threshold && slice_start == num_rows)
+                    {
+                        slice_start = row;
+                    }
+                    else if (!above_threshold && slice_start != num_rows)
+                    {
+                        if (m_copy)
+                        {
+                            selected_ranges.emplace_back(std::pair{slice_start, row});
+                            num_selected_rows += (row - slice_start);
+                        }
+                        else
+                        {
+                            if constexpr (std::is_same_v<MessageT, MultiMessage>)
+                            {
+                                output.on_next(x->get_slice(slice_start, row));
+                            }
+                            else if constexpr (std::is_same_v<MessageT, ControlMessage>)
+                            {
+                                auto meta                                 = x->payload();
+                                std::shared_ptr<ControlMessage> sliced_cm = std::make_shared<ControlMessage>(*x);
+                                sliced_cm->payload(meta->get_slice(slice_start, row));
+                                output.on_next(sliced_cm);
+                            }
+                            else
+                            {
+                                // sink_type_t not supported
+                                static_assert(!sizeof(sink_type_t),
+                                              "FilterDetectionsStage receives unsupported input type");
+                            }
+                        }
+
+                        slice_start = num_rows;
+                    }
+                }
+
+                if (slice_start != num_rows)
+                {
+                    // Last row was above the threshold
+                    if (m_copy)
+                    {
+                        selected_ranges.emplace_back(std::pair{slice_start, num_rows});
+                        num_selected_rows += (num_rows - slice_start);
+                    }
+                    else
+                    {
+                        if constexpr (std::is_same_v<MessageT, MultiMessage>)
+                        {
+                            output.on_next(x->get_slice(slice_start, num_rows));
+                        }
+                        else if constexpr (std::is_same_v<MessageT, ControlMessage>)
+                        {
+                            auto meta = x->payload();
+                            x->payload(meta->get_slice(slice_start, num_rows));
+                            output.on_next(x);
+                        }
+                        else
+                        {
+                            // sink_type_t not supported
+                            static_assert(!sizeof(sink_type_t),
+                                          "FilterDetectionsStage receives unsupported input type");
+                        }
+                    }
+                }
+
+                // num_selected_rows will always be 0 when m_copy is false,
+                // or when m_copy is true, but none of the rows matched the output
+                if (num_selected_rows > 0)
+                {
+                    DCHECK(m_copy);
+                    if constexpr (std::is_same_v<MessageT, MultiMessage>)
+                    {
+                        output.on_next(x->copy_ranges(selected_ranges, num_selected_rows));
+                    }
+                    else if constexpr (std::is_same_v<MessageT, ControlMessage>)
+                    {
+                        auto meta = x->payload();
+                        x->payload(meta->copy_ranges(selected_ranges));
+                        output.on_next(x);
+                    }
+                    else
+                    {
+                        // sink_type_t not supported
+                        static_assert(!sizeof(sink_type_t), "FilterDetectionsStage receives unsupported input type");
+                    }
+                }
+            },
+            [&](std::exception_ptr error_ptr) {
+                output.on_error(error_ptr);
+            },
+            [&]() {
+                output.on_completed();
+            }));
+    };
+}
+
+// ************ FilterDetectionStageInterfaceProxy ************* //
+std::shared_ptr<mrc::segment::Object<FilterDetectionsStageMM>> FilterDetectionStageInterfaceProxy::init_mm(
+    mrc::segment::Builder& builder,
+    const std::string& name,
+    float threshold,
+    bool copy,
+    FilterSource filter_source,
+    std::string field_name)
+{
+    auto stage = builder.construct_object<FilterDetectionsStageMM>(name, threshold, copy, filter_source, field_name);
+
+    return stage;
+}
+
+std::shared_ptr<mrc::segment::Object<FilterDetectionsStageCM>> FilterDetectionStageInterfaceProxy::init_cm(
+    mrc::segment::Builder& builder,
+    const std::string& name,
+    float threshold,
+    bool copy,
+    FilterSource filter_source,
+    std::string field_name)
+{
+    auto stage = builder.construct_object<FilterDetectionsStageCM>(name, threshold, copy, filter_source, field_name);
+
+    return stage;
+}
+}  // namespace morpheus
diff --git a/morpheus/_lib/src/stages/preprocess_fil.cpp b/morpheus/_lib/src/stages/preprocess_fil.cpp
index 978e7557eb..ad1e09c1b4 100644
--- a/morpheus/_lib/src/stages/preprocess_fil.cpp
+++ b/morpheus/_lib/src/stages/preprocess_fil.cpp
@@ -38,7 +38,6 @@
 #include <cudf/column/column_view.hpp>  // for column_view
 #include <cudf/types.hpp>               // for type_id, data_type
 #include <cudf/unary.hpp>               // for cast
-#include <glog/logging.h>               // for COMPACT_GOOGLE_LOG_ERROR, LOG, LogMessage
 #include <mrc/cuda/common.hpp>          // for __check_cuda_errors, MRC_CHECK_CUDA
 #include <mrc/segment/builder.hpp>      // for Builder
 #include <pybind11/gil.h>               // for gil_scoped_acquire
@@ -50,9 +49,7 @@
 #include <algorithm>    // for find
 #include <cstddef>      // for size_t
 #include <memory>       // for shared_ptr, __shared_ptr_access, allocator, mak...
-#include <stdexcept>    // for runtime_error
 #include <type_traits>  // for is_same_v
-#include <typeinfo>     // for type_info
 #include <utility>      // for move
 
 namespace morpheus {
@@ -144,12 +141,10 @@ TableInfo PreprocessFILStage<InputT, OutputT>::fix_bad_columns(sink_type_t x)
         // Now re-get the meta
         return x->payload()->get_info(m_fea_cols);
     }
-    // sink_type_t not supported
     else
     {
-        std::string error_msg{"PreProcessFILStage receives unsupported input type: " + std::string(typeid(x).name())};
-        LOG(ERROR) << error_msg;
-        throw std::runtime_error(error_msg);
+        // sink_type_t not supported
+        static_assert(!sizeof(sink_type_t), "PreProcessFILStage receives unsupported input type");
     }
 }
 
@@ -164,12 +159,10 @@ PreprocessFILStage<InputT, OutputT>::source_type_t PreprocessFILStage<InputT, Ou
     {
         return on_control_message(x);
     }
-    // sink_type_t not supported
     else
     {
-        std::string error_msg{"PreProcessFILStage receives unsupported input type: " + std::string(typeid(x).name())};
-        LOG(ERROR) << error_msg;
-        throw std::runtime_error(error_msg);
+        // sink_type_t not supported
+        static_assert(!sizeof(sink_type_t), "PreProcessFILStage receives unsupported input type");
     }
 }
 
diff --git a/morpheus/_lib/src/stages/preprocess_nlp.cpp b/morpheus/_lib/src/stages/preprocess_nlp.cpp
index 75fd794103..5ea0c82153 100644
--- a/morpheus/_lib/src/stages/preprocess_nlp.cpp
+++ b/morpheus/_lib/src/stages/preprocess_nlp.cpp
@@ -22,15 +22,15 @@
 #include "morpheus/messages/control.hpp"                  // for ControlMessage
 #include "morpheus/messages/memory/inference_memory.hpp"  // for InferenceMemory
 #include "morpheus/messages/memory/tensor_memory.hpp"     // for TensorMemory
-#include "morpheus/messages/meta.hpp"
-#include "morpheus/messages/multi.hpp"            // for MultiMessage
-#include "morpheus/messages/multi_inference.hpp"  // for MultiInferenceMessage
-#include "morpheus/objects/dev_mem_info.hpp"      // for DevMemInfo
-#include "morpheus/objects/dtype.hpp"             // for DType
-#include "morpheus/objects/table_info.hpp"        // for TableInfo
-#include "morpheus/objects/tensor.hpp"            // for Tensor
-#include "morpheus/types.hpp"                     // for TensorIndex
-#include "morpheus/utilities/matx_util.hpp"       // for MatxUtil
+#include "morpheus/messages/meta.hpp"                     // for MessageMeta
+#include "morpheus/messages/multi.hpp"                    // for MultiMessage
+#include "morpheus/messages/multi_inference.hpp"          // for MultiInferenceMessage
+#include "morpheus/objects/dev_mem_info.hpp"              // for DevMemInfo
+#include "morpheus/objects/dtype.hpp"                     // for DType
+#include "morpheus/objects/table_info.hpp"                // for TableInfo
+#include "morpheus/objects/tensor.hpp"                    // for Tensor
+#include "morpheus/types.hpp"                             // for TensorIndex
+#include "morpheus/utilities/matx_util.hpp"               // for MatxUtil
 
 #include <cudf/column/column.hpp>                 // for column
 #include <cudf/column/column_factories.hpp>       // for make_column_from_scalar
@@ -42,7 +42,6 @@
 #include <cudf/table/table_view.hpp>              // for table_view
 #include <cudf/types.hpp>                         // for type_id, data_type
 #include <cudf/unary.hpp>                         // for cast
-#include <glog/logging.h>                         // for COMPACT_GOOGLE_LOG_ERROR, LOG, LogMessage
 #include <mrc/segment/builder.hpp>                // for Builder
 #include <nvtext/normalize.hpp>                   // for normalize_spaces
 #include <nvtext/subword_tokenize.hpp>            // for tokenizer_result, load_vocabulary_file, subword_tok...
@@ -52,9 +51,7 @@
 
 #include <cstdint>      // for uint32_t, int32_t
 #include <memory>       // for shared_ptr, unique_ptr, __shared_ptr_access, make_s...
-#include <stdexcept>    // for runtime_error
 #include <type_traits>  // for is_same_v
-#include <typeinfo>     // for type_info
 #include <utility>      // for move
 #include <vector>       // for vector
 
@@ -100,12 +97,10 @@ PreprocessNLPStage<InputT, OutputT>::source_type_t PreprocessNLPStage<InputT, Ou
     {
         return this->on_control_message(x);
     }
-    // sink_type_t not supported
     else
     {
-        std::string error_msg{"PreProcessNLPStage receives unsupported input type: " + std::string(typeid(x).name())};
-        LOG(ERROR) << error_msg;
-        throw std::runtime_error(error_msg);
+        // sink_type_t not supported
+        static_assert(!sizeof(sink_type_t), "PreProcessNLPStage receives unsupported input type");
     }
 }
 
diff --git a/morpheus/_lib/stages/__init__.pyi b/morpheus/_lib/stages/__init__.pyi
index bfd66dcb64..8b8413b67e 100644
--- a/morpheus/_lib/stages/__init__.pyi
+++ b/morpheus/_lib/stages/__init__.pyi
@@ -21,7 +21,8 @@ __all__ = [
     "DeserializeControlMessageStage",
     "DeserializeMultiMessageStage",
     "FileSourceStage",
-    "FilterDetectionsStage",
+    "FilterDetectionsControlMessageStage",
+    "FilterDetectionsMultiMessageStage",
     "FilterSource",
     "HttpServerSourceStage",
     "InferenceClientStageCM",
@@ -64,7 +65,10 @@ class FileSourceStage(mrc.core.segment.SegmentObject):
     @typing.overload
     def __init__(self, builder: mrc.core.segment.Builder, name: str, filename: str, repeat: int, filter_null: bool, filter_null_columns: typing.List[str], parser_kwargs: dict) -> None: ...
     pass
-class FilterDetectionsStage(mrc.core.segment.SegmentObject):
+class FilterDetectionsControlMessageStage(mrc.core.segment.SegmentObject):
+    def __init__(self, builder: mrc.core.segment.Builder, name: str, threshold: float, copy: bool, filter_source: morpheus._lib.common.FilterSource, field_name: str = 'probs') -> None: ...
+    pass
+class FilterDetectionsMultiMessageStage(mrc.core.segment.SegmentObject):
     def __init__(self, builder: mrc.core.segment.Builder, name: str, threshold: float, copy: bool, filter_source: morpheus._lib.common.FilterSource, field_name: str = 'probs') -> None: ...
     pass
 class HttpServerSourceStage(mrc.core.segment.SegmentObject):
diff --git a/morpheus/_lib/stages/module.cpp b/morpheus/_lib/stages/module.cpp
index 32c3c5e030..5b33f59179 100644
--- a/morpheus/_lib/stages/module.cpp
+++ b/morpheus/_lib/stages/module.cpp
@@ -25,7 +25,7 @@
 #include "morpheus/stages/add_scores.hpp"
 #include "morpheus/stages/deserialize.hpp"
 #include "morpheus/stages/file_source.hpp"
-#include "morpheus/stages/filter_detection.hpp"
+#include "morpheus/stages/filter_detections.hpp"
 #include "morpheus/stages/http_server_source_stage.hpp"
 #include "morpheus/stages/inference_client_stage.hpp"
 #include "morpheus/stages/kafka_source.hpp"
@@ -168,11 +168,23 @@ PYBIND11_MODULE(stages, _module)
              py::arg("filter_null_columns"),
              py::arg("parser_kwargs"));
 
-    py::class_<mrc::segment::Object<FilterDetectionsStage>,
+    py::class_<mrc::segment::Object<FilterDetectionsStageMM>,
                mrc::segment::ObjectProperties,
-               std::shared_ptr<mrc::segment::Object<FilterDetectionsStage>>>(
-        _module, "FilterDetectionsStage", py::multiple_inheritance())
-        .def(py::init<>(&FilterDetectionStageInterfaceProxy::init),
+               std::shared_ptr<mrc::segment::Object<FilterDetectionsStageMM>>>(
+        _module, "FilterDetectionsMultiMessageStage", py::multiple_inheritance())
+        .def(py::init<>(&FilterDetectionStageInterfaceProxy::init_mm),
+             py::arg("builder"),
+             py::arg("name"),
+             py::arg("threshold"),
+             py::arg("copy"),
+             py::arg("filter_source"),
+             py::arg("field_name") = "probs");
+
+    py::class_<mrc::segment::Object<FilterDetectionsStageCM>,
+               mrc::segment::ObjectProperties,
+               std::shared_ptr<mrc::segment::Object<FilterDetectionsStageCM>>>(
+        _module, "FilterDetectionsControlMessageStage", py::multiple_inheritance())
+        .def(py::init<>(&FilterDetectionStageInterfaceProxy::init_cm),
              py::arg("builder"),
              py::arg("name"),
              py::arg("threshold"),
diff --git a/morpheus/controllers/filter_detections_controller.py b/morpheus/controllers/filter_detections_controller.py
index ecd38a59b3..167bef64fb 100644
--- a/morpheus/controllers/filter_detections_controller.py
+++ b/morpheus/controllers/filter_detections_controller.py
@@ -20,6 +20,7 @@
 import typing_utils
 
 from morpheus.common import FilterSource
+from morpheus.messages import ControlMessage
 from morpheus.messages import MultiMessage
 from morpheus.messages import MultiResponseMessage
 
@@ -66,12 +67,18 @@ def field_name(self):
         """
         return self._field_name
 
-    def _find_detections(self, x: MultiMessage) -> typing.Union[cp.ndarray, np.ndarray]:
-        # Determind the filter source
-        if self._filter_source == FilterSource.TENSOR:
-            filter_source = x.get_output(self._field_name)
-        else:
-            filter_source = x.get_meta(self._field_name).values
+    def _find_detections(self, x: MultiMessage | ControlMessage) -> typing.Union[cp.ndarray, np.ndarray]:
+        # Determine the filter source
+        if isinstance(x, MultiMessage):
+            if self._filter_source == FilterSource.TENSOR:
+                filter_source = x.get_output(self._field_name)
+            else:
+                filter_source = x.get_meta(self._field_name).values
+        elif isinstance(x, ControlMessage):
+            if self._filter_source == FilterSource.TENSOR:
+                filter_source = x.tensors().get_tensor(self._field_name)
+            else:
+                filter_source = x.payload().get_data(self._field_name).values
 
         if (isinstance(filter_source, np.ndarray)):
             array_mod = np
@@ -89,7 +96,7 @@ def _find_detections(self, x: MultiMessage) -> typing.Union[cp.ndarray, np.ndarr
 
         return array_mod.where(detections[1:] != detections[:-1])[0].reshape((-1, 2))
 
-    def filter_copy(self, x: MultiMessage) -> MultiMessage:
+    def filter_copy(self, x: MultiMessage | ControlMessage) -> MultiMessage | ControlMessage:
         """
         This function uses a threshold value to filter the messages.
 
@@ -113,9 +120,15 @@ def filter_copy(self, x: MultiMessage) -> MultiMessage:
         if (true_pairs.shape[0] == 0):
             return None
 
-        return x.copy_ranges(true_pairs)
+        if isinstance(x, MultiMessage):
+            return x.copy_ranges(true_pairs)
+        if isinstance(x, ControlMessage):
+            meta = x.payload()
+            x.payload(meta.copy_ranges(true_pairs))
+            return x
+        raise TypeError(f"Unsupported message type: {type(x)}")
 
-    def filter_slice(self, x: MultiMessage) -> typing.List[MultiMessage]:
+    def filter_slice(self, x: MultiMessage | ControlMessage) -> typing.List[MultiMessage] | typing.List[ControlMessage]:
         """
         This function uses a threshold value to filter the messages.
 
@@ -134,10 +147,19 @@ def filter_slice(self, x: MultiMessage) -> typing.List[MultiMessage]:
         output_list = []
         if x is not None:
             true_pairs = self._find_detections(x)
-            for pair in true_pairs:
-                pair = tuple(pair.tolist())
-                if ((pair[1] - pair[0]) > 0):
-                    output_list.append(x.get_slice(*pair))
+            if isinstance(x, MultiMessage):
+                for pair in true_pairs:
+                    pair = tuple(pair.tolist())
+                    if ((pair[1] - pair[0]) > 0):
+                        output_list.append(x.get_slice(*pair))
+            elif isinstance(x, ControlMessage):
+                for pair in true_pairs:
+                    pair = tuple(pair.tolist())
+                    if ((pair[1] - pair[0]) > 0):
+                        sliced_meta = x.payload().get_slice(*pair)
+                        cm = ControlMessage(x)
+                        cm.payload(sliced_meta)
+                        output_list.append(cm)
 
         return output_list
 
diff --git a/morpheus/stages/postprocess/filter_detections_stage.py b/morpheus/stages/postprocess/filter_detections_stage.py
index c071ffb333..9cadb26290 100644
--- a/morpheus/stages/postprocess/filter_detections_stage.py
+++ b/morpheus/stages/postprocess/filter_detections_stage.py
@@ -23,6 +23,7 @@
 from morpheus.common import FilterSource
 from morpheus.config import Config
 from morpheus.controllers.filter_detections_controller import FilterDetectionsController
+from morpheus.messages import ControlMessage
 from morpheus.messages import MultiMessage
 from morpheus.messages import MultiResponseMessage
 from morpheus.pipeline.single_port_stage import SinglePortStage
@@ -103,9 +104,9 @@ def accepted_types(self) -> typing.Tuple:
 
         """
         if self._controller.filter_source == FilterSource.TENSOR:
-            return (MultiResponseMessage, )
+            return (MultiResponseMessage, ControlMessage)
 
-        return (MultiMessage, )
+        return (MultiMessage, ControlMessage)
 
     def compute_schema(self, schema: StageSchema):
         self._controller.update_filter_source(message_type=schema.input_type)
@@ -117,12 +118,21 @@ def supports_cpp_node(self):
 
     def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject:
         if self._build_cpp_node():
-            node = _stages.FilterDetectionsStage(builder,
-                                                 self.unique_name,
-                                                 self._controller.threshold,
-                                                 self._copy,
-                                                 self._controller.filter_source,
-                                                 self._controller.field_name)
+            if (self._schema.input_type == ControlMessage):
+                node = _stages.FilterDetectionsControlMessageStage(builder,
+                                                                   self.unique_name,
+                                                                   self._controller.threshold,
+                                                                   self._copy,
+                                                                   self._controller.filter_source,
+                                                                   self._controller.field_name)
+
+            else:
+                node = _stages.FilterDetectionsMultiMessageStage(builder,
+                                                                 self.unique_name,
+                                                                 self._controller.threshold,
+                                                                 self._copy,
+                                                                 self._controller.filter_source,
+                                                                 self._controller.field_name)
         else:
 
             if self._copy:
diff --git a/morpheus/stages/postprocess/generate_viz_frames_stage.py b/morpheus/stages/postprocess/generate_viz_frames_stage.py
index cf60d638b8..b2d059666c 100644
--- a/morpheus/stages/postprocess/generate_viz_frames_stage.py
+++ b/morpheus/stages/postprocess/generate_viz_frames_stage.py
@@ -32,6 +32,7 @@
 from morpheus.cli.register_stage import register_stage
 from morpheus.config import Config
 from morpheus.config import PipelineModes
+from morpheus.messages import ControlMessage
 from morpheus.messages import MultiResponseMessage
 from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin
 from morpheus.pipeline.single_port_stage import SinglePortStage
@@ -91,11 +92,11 @@ def accepted_types(self) -> typing.Tuple:
 
         Returns
         -------
-        typing.Tuple[morpheus.pipeline.messages.MultiResponseMessage, ]
+        typing.Tuple[morpheus.pipeline.messages.MultiResponseMessage, ControlMessage]
             Accepted input types
 
         """
-        return (MultiResponseMessage, )
+        return (MultiResponseMessage, ControlMessage)
 
     def supports_cpp_node(self):
         return False
@@ -118,7 +119,7 @@ def round_to_sec(x: int | float):
         """
         return int(round(x / 1000.0) * 1000)
 
-    def _to_vis_df(self, x: MultiResponseMessage):
+    def _to_vis_df(self, x: MultiResponseMessage | ControlMessage):
 
         idx2label = {
             0: 'address',
@@ -133,7 +134,11 @@ def _to_vis_df(self, x: MultiResponseMessage):
             9: 'user'
         }
 
-        df = x.get_meta(["timestamp", "src_ip", "dest_ip", "src_port", "dest_port", "data"])
+        columns = ["timestamp", "src_ip", "dest_ip", "src_port", "dest_port", "data"]
+        if isinstance(x, MultiResponseMessage):
+            df = x.get_meta(columns)
+        elif isinstance(x, ControlMessage):
+            df = x.payload().get_data(columns)
 
         def indent_data(y: str):
             try:
@@ -141,9 +146,16 @@ def indent_data(y: str):
             except Exception:
                 return y
 
+        if isinstance(df, cudf.DataFrame):
+            df = df.to_pandas()
+
         df["data"] = df["data"].apply(indent_data)
 
-        probs = x.get_probs_tensor()
+        if isinstance(x, MultiResponseMessage):
+            probs = x.get_probs_tensor()
+        elif isinstance(x, ControlMessage):
+            probs = x.tensors().get_tensor("probs")
+
         pass_thresh = (probs >= 0.5).any(axis=1)
         max_arg = probs.argmax(axis=1)
 
@@ -263,14 +275,21 @@ def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) ->
 
         def node_fn(input_obs, output_obs):
 
-            def write_batch(x: MultiResponseMessage):
+            def write_batch(x: MultiResponseMessage | ControlMessage):
 
                 sink = pa.BufferOutputStream()
 
                 # This is the timestamp of the earliest message
-                time0 = x.get_meta("timestamp").min()
-
-                df = x.get_meta(["timestamp", "src_ip", "dest_ip", "secret_keys", "data"])
+                if isinstance(x, MultiResponseMessage):
+                    time0 = x.get_meta("timestamp").min()
+                elif isinstance(x, ControlMessage):
+                    time0 = x.payload().get_data("timestamp").min()
+
+                columns = ["timestamp", "src_ip", "dest_ip", "secret_keys", "data"]
+                if isinstance(x, MultiResponseMessage):
+                    df = x.get_meta(columns)
+                elif isinstance(x, ControlMessage):
+                    df = x.payload().get_data(columns)
 
                 out_df = cudf.DataFrame()
 
diff --git a/morpheus/stages/postprocess/ml_flow_drift_stage.py b/morpheus/stages/postprocess/ml_flow_drift_stage.py
index 4e5974cf51..60434c3a42 100644
--- a/morpheus/stages/postprocess/ml_flow_drift_stage.py
+++ b/morpheus/stages/postprocess/ml_flow_drift_stage.py
@@ -24,6 +24,7 @@
 from morpheus.cli.register_stage import register_stage
 from morpheus.config import Config
 from morpheus.config import PipelineModes
+from morpheus.messages import ControlMessage
 from morpheus.messages import MultiResponseMessage
 from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin
 from morpheus.pipeline.single_port_stage import SinglePortStage
@@ -119,27 +120,36 @@ def accepted_types(self) -> typing.Tuple:
 
         Returns
         -------
-        typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ]
+        typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ControlMessage]
             Accepted input types.
 
         """
-        return (MultiResponseMessage, )
+        return (MultiResponseMessage, ControlMessage)
 
     def supports_cpp_node(self):
         return False
 
-    def _calc_drift(self, x: MultiResponseMessage):
+    def _calc_drift(self, x: MultiResponseMessage | ControlMessage):
+        if isinstance(x, MultiResponseMessage):
+            probs_tensor = x.get_probs_tensor()
+        elif isinstance(x, ControlMessage):
+            probs_tensor = x.tensors().get_tensor("probs")
 
         # All probs in a batch will be calculated
-        shifted = cp.abs(x.get_probs_tensor() - 0.5) + 0.5
+        shifted = cp.abs(probs_tensor - 0.5) + 0.5
 
         # Make sure the labels list is long enough
         for label in range(len(self._labels), shifted.shape[1]):
             self._labels.append(str(label))
 
-        for i in list(range(0, x.count, self._batch_size)):
+        if isinstance(x, MultiResponseMessage):
+            count = x.count
+        elif isinstance(x, ControlMessage):
+            count = x.payload().count
+
+        for i in list(range(0, count, self._batch_size)):
             start = i
-            end = min(start + self._batch_size, x.count)
+            end = min(start + self._batch_size, count)
             mean = cp.mean(shifted[start:end, :], axis=0, keepdims=True)
 
             # For each column, report the metric
diff --git a/morpheus/stages/postprocess/timeseries_stage.py b/morpheus/stages/postprocess/timeseries_stage.py
index 28c4b70f2c..5005114df3 100644
--- a/morpheus/stages/postprocess/timeseries_stage.py
+++ b/morpheus/stages/postprocess/timeseries_stage.py
@@ -28,8 +28,9 @@
 from morpheus.cli.register_stage import register_stage
 from morpheus.config import Config
 from morpheus.config import PipelineModes
+from morpheus.messages import ControlMessage
+from morpheus.messages import MultiResponseAEMessage
 from morpheus.messages import MultiResponseMessage
-from morpheus.messages.multi_ae_message import MultiMessage
 from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin
 from morpheus.pipeline.single_port_stage import SinglePortStage
 
@@ -58,7 +59,6 @@ def calc_bin(obj: pd.Timestamp, time0: pd.Timestamp, resolution_sec: float) -> i
     """
     Calculates the bin spacing between the start and stop timestamp at a specified resolution.
     """
-
     return round((round_seconds(obj) - time0).total_seconds()) // resolution_sec
 
 
@@ -164,7 +164,7 @@ class _TimeSeriesAction:
     window_end: dt.datetime = None
 
     send_message: bool = False
-    message: MultiResponseMessage = None
+    message: MultiResponseMessage | ControlMessage = None
 
 
 class _UserTimeSeries:
@@ -207,7 +207,8 @@ def __init__(self,
         self._holding_timestamps = deque()
 
         # Stateful members
-        self._pending_messages: deque[MultiResponseMessage] = deque()  # Holds the existing messages pending
+        self._pending_messages: deque[MultiResponseMessage
+                                      | ControlMessage] = deque()  # Holds the existing messages pending
         self._timeseries_data: pd.DataFrame = pd.DataFrame(columns=[self._timestamp_col
                                                                     ])  # Holds all available timeseries data
 
@@ -263,16 +264,24 @@ def _determine_action(self, is_complete: bool) -> typing.Optional[_TimeSeriesAct
         if (len(self._pending_messages) == 0):
             return None
 
-        # Note: We calculate everything in bins to ensure 1) Full bins, and 2) Even binning
+        # Note: We calculate everything in bins to ensure 1) Full xbins, and 2) Even binning
         timeseries_start = self._timeseries_data["event_bin"].iloc[0]
         timeseries_end = self._timeseries_data["event_bin"].iloc[-1]
 
         # Peek the front message
-        x: MultiResponseMessage = self._pending_messages[0]
+        x: MultiResponseMessage | ControlMessage = self._pending_messages[0]
 
         # Get the first message timestamp
-        message_start = calc_bin(x.get_meta(self._timestamp_col).iloc[0], self._t0_epoch, self._resolution_sec)
-        message_end = calc_bin(x.get_meta(self._timestamp_col).iloc[-1], self._t0_epoch, self._resolution_sec)
+        if isinstance(x, MultiResponseMessage):
+            message_start = calc_bin(x.get_meta(self._timestamp_col).iloc[0], self._t0_epoch, self._resolution_sec)
+            message_end = calc_bin(x.get_meta(self._timestamp_col).iloc[-1], self._t0_epoch, self._resolution_sec)
+        elif isinstance(x, ControlMessage):
+            message_start = calc_bin(pd.Timestamp(x.payload().get_data(self._timestamp_col).iloc[0]),
+                                     self._t0_epoch,
+                                     self._resolution_sec)
+            message_end = calc_bin(pd.Timestamp(x.payload().get_data(self._timestamp_col).iloc[-1]),
+                                   self._t0_epoch,
+                                   self._resolution_sec)
 
         window_start = message_start - self._half_window_bins
         window_end = message_end + self._half_window_bins
@@ -341,17 +350,23 @@ def _determine_action(self, is_complete: bool) -> typing.Optional[_TimeSeriesAct
                                  send_message=True,
                                  message=self._pending_messages.popleft())
 
-    def _calc_timeseries(self, x: MultiResponseMessage, is_complete: bool):
+    def _calc_timeseries(self, x: MultiResponseMessage | ControlMessage, is_complete: bool):
 
         if (x is not None):
 
             # Ensure that we have the meta column set for all messages
-            x.set_meta("ts_anomaly", False)
+            if isinstance(x, MultiResponseMessage):
+                x.set_meta("ts_anomaly", False)
+            elif isinstance(x, ControlMessage):
+                x.payload().set_data("ts_anomaly", False)
 
             # Save this message in the pending queue
             self._pending_messages.append(x)
 
-            new_timedata = x.get_meta([self._timestamp_col])
+            if isinstance(x, MultiResponseMessage):
+                new_timedata = x.get_meta([self._timestamp_col])
+            elif isinstance(x, ControlMessage):
+                new_timedata = x.payload().get_data([self._timestamp_col]).to_pandas()
 
             # Save this message event times in the event list. Ensure the values are always sorted
             self._timeseries_data = pd.concat([self._timeseries_data, new_timedata]).sort_index()
@@ -472,34 +487,38 @@ def accepted_types(self) -> typing.Tuple:
 
         Returns
         -------
-        typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ]
+        typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ControlMessage]
             Accepted input types.
 
         """
-        return (MultiMessage, )
+        return (MultiResponseMessage, ControlMessage)
 
     def supports_cpp_node(self):
         return False
 
-    def _call_timeseries_user(self, x: MultiMessage):
+    def _call_timeseries_user(self, x: MultiResponseAEMessage | ControlMessage):
+        if isinstance(x, MultiResponseAEMessage):
+            user_id = x.user_id
+        elif isinstance(x, ControlMessage):
+            user_id = x.get_metadata("user_id")
 
-        if (x.user_id not in self._timeseries_per_user):
-            self._timeseries_per_user[x.user_id] = _UserTimeSeries(user_id=x.user_id,
-                                                                   timestamp_col=self._timestamp_col,
-                                                                   resolution=self._resolution,
-                                                                   min_window=self._min_window,
-                                                                   hot_start=self._hot_start,
-                                                                   cold_end=self._cold_end,
-                                                                   filter_percent=self._filter_percent,
-                                                                   zscore_threshold=self._zscore_threshold)
+        if (user_id not in self._timeseries_per_user):
+            self._timeseries_per_user[user_id] = _UserTimeSeries(user_id=user_id,
+                                                                 timestamp_col=self._timestamp_col,
+                                                                 resolution=self._resolution,
+                                                                 min_window=self._min_window,
+                                                                 hot_start=self._hot_start,
+                                                                 cold_end=self._cold_end,
+                                                                 filter_percent=self._filter_percent,
+                                                                 zscore_threshold=self._zscore_threshold)
 
-        return self._timeseries_per_user[x.user_id]._calc_timeseries(x, False)
+        return self._timeseries_per_user[user_id]._calc_timeseries(x, False)
 
     def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject:
 
-        def on_next(x: MultiMessage):
+        def on_next(x: MultiResponseMessage | ControlMessage):
 
-            message_list: typing.List[MultiResponseMessage] = self._call_timeseries_user(x)
+            message_list: typing.List[MultiResponseMessage | ControlMessage] = self._call_timeseries_user(x)
 
             return message_list
 
@@ -508,7 +527,8 @@ def on_completed():
             to_send = []
 
             for timestamp in self._timeseries_per_user.values():
-                message_list: typing.List[MultiResponseMessage] = timestamp._calc_timeseries(None, True)
+                message_list: typing.List[MultiResponseMessage | ControlMessage] = timestamp._calc_timeseries(
+                    None, True)
 
                 to_send = to_send + message_list
 
diff --git a/morpheus/stages/postprocess/validation_stage.py b/morpheus/stages/postprocess/validation_stage.py
index 7ae46db06f..445e8b4be9 100644
--- a/morpheus/stages/postprocess/validation_stage.py
+++ b/morpheus/stages/postprocess/validation_stage.py
@@ -111,7 +111,7 @@ def accepted_types(self) -> typing.Tuple:
 
         Returns
         -------
-        typing.Tuple(`morpheus.pipeline.messages.MultiMessage`, )
+        typing.Tuple(`morpheus.pipeline.messages.MultiMessage`, ControlMessage)
             Accepted input types.
 
         """
diff --git a/morpheus/stages/preprocess/preprocess_ae_stage.py b/morpheus/stages/preprocess/preprocess_ae_stage.py
index e96a527630..1cf7263d7c 100644
--- a/morpheus/stages/preprocess/preprocess_ae_stage.py
+++ b/morpheus/stages/preprocess/preprocess_ae_stage.py
@@ -22,9 +22,11 @@
 from morpheus.cli.register_stage import register_stage
 from morpheus.config import Config
 from morpheus.config import PipelineModes
+from morpheus.messages import ControlMessage
 from morpheus.messages import InferenceMemoryAE
 from morpheus.messages import MultiInferenceMessage
 from morpheus.messages import MultiMessage
+from morpheus.messages import TensorMemory as CppTensorMemory
 from morpheus.messages.multi_ae_message import MultiAEMessage
 from morpheus.stages.inference.auto_encoder_inference_stage import MultiInferenceAEMessage
 from morpheus.stages.preprocess.preprocess_base_stage import PreprocessBaseStage
@@ -58,14 +60,14 @@ def accepted_types(self) -> typing.Tuple:
         """
         Returns accepted input types for this stage.
         """
-        return (MultiAEMessage, )
+        return (MultiAEMessage, ControlMessage)
 
     def supports_cpp_node(self):
         return False
 
     @staticmethod
     def pre_process_batch(x: MultiAEMessage, fea_len: int,
-                          feature_columns: typing.List[str]) -> MultiInferenceAEMessage:
+                          feature_columns: typing.List[str]) -> MultiInferenceAEMessage | ControlMessage:
         """
         This function performs pre-processing for autoencoder.
 
@@ -84,7 +86,42 @@ def pre_process_batch(x: MultiAEMessage, fea_len: int,
             Autoencoder inference message.
 
         """
+        if isinstance(x, ControlMessage):
+            return PreprocessAEStage.process_control_message(x, fea_len, feature_columns)
+        if isinstance(x, MultiAEMessage):
+            return PreprocessAEStage.process_multi_ae_message(x, fea_len, feature_columns)
+        raise TypeError("Unsupported message type.")
 
+    @staticmethod
+    def process_control_message(x: ControlMessage, fea_len: int, feature_columns: typing.List[str]) -> ControlMessage:
+        meta_df = x.payload().get_data(x.payload().df.columns.intersection(feature_columns))
+
+        autoencoder = x.get_metadata("autoencoder")
+        scores_mean = x.get_metadata("train_scores_mean")
+        scores_std = x.get_metadata("train_scores_std")
+        count = len(meta_df.index)
+
+        inputs = cp.zeros(meta_df.shape, dtype=cp.float32)
+
+        if autoencoder is not None:
+            data = autoencoder.prepare_df(meta_df)
+            inputs = autoencoder.build_input_tensor(data)
+            inputs = cp.asarray(inputs.detach())
+            count = inputs.shape[0]
+
+        seg_ids = cp.zeros((count, 3), dtype=cp.uint32)
+        seg_ids[:, 0] = cp.arange(0, count, dtype=cp.uint32)
+        seg_ids[:, 2] = fea_len - 1
+
+        x.set_metadata("autoencoder", autoencoder)
+        x.set_metadata("train_scores_mean", scores_mean)
+        x.set_metadata("train_scores_std", scores_std)
+        x.tensors(CppTensorMemory(count=count, tensors={"input": inputs, "seq_ids": seg_ids}))
+        return x
+
+    @staticmethod
+    def process_multi_ae_message(x: MultiAEMessage, fea_len: int,
+                                 feature_columns: typing.List[str]) -> MultiInferenceAEMessage:
         meta_df = x.get_meta(x.meta.df.columns.intersection(feature_columns))
         autoencoder = x.model
         scores_mean = x.train_scores_mean
@@ -117,7 +154,8 @@ def pre_process_batch(x: MultiAEMessage, fea_len: int,
 
         return infer_message
 
-    def _get_preprocess_fn(self) -> typing.Callable[[MultiMessage], MultiInferenceMessage]:
+    def _get_preprocess_fn(
+            self) -> typing.Callable[[MultiMessage | ControlMessage], MultiInferenceMessage | ControlMessage]:
         return partial(PreprocessAEStage.pre_process_batch,
                        fea_len=self._fea_length,
                        feature_columns=self._feature_columns)
diff --git a/morpheus/stages/preprocess/preprocess_fil_stage.py b/morpheus/stages/preprocess/preprocess_fil_stage.py
index cbfc6a581f..8ff369ebe7 100644
--- a/morpheus/stages/preprocess/preprocess_fil_stage.py
+++ b/morpheus/stages/preprocess/preprocess_fil_stage.py
@@ -67,7 +67,8 @@ def supports_cpp_node(self):
         return True
 
     @staticmethod
-    def pre_process_batch(x: MultiMessage, fea_len: int, fea_cols: typing.List[str]) -> MultiInferenceFILMessage:
+    def pre_process_batch(x: typing.Union[MultiMessage, ControlMessage], fea_len: int,
+                          fea_cols: typing.List[str]) -> typing.Union[MultiMessage, ControlMessage]:
         """
         For FIL category usecases, this function performs pre-processing.
 
diff --git a/tests/stages/test_filter_detections_stage.py b/tests/stages/test_filter_detections_stage.py
new file mode 100644
index 0000000000..4f9cb43cfe
--- /dev/null
+++ b/tests/stages/test_filter_detections_stage.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cupy as cp
+import pytest
+
+import morpheus._lib.messages as _messages
+from morpheus.common import FilterSource
+from morpheus.messages import ControlMessage
+from morpheus.messages import MultiResponseMessage
+from morpheus.messages import ResponseMemory
+from morpheus.messages.message_meta import MessageMeta
+from morpheus.stages.postprocess.filter_detections_stage import FilterDetectionsStage
+
+
+def _make_multi_response_message(df, probs):
+    df_ = df[0:len(probs)]
+    mem = ResponseMemory(count=len(df_), tensors={'probs': probs})
+
+    return MultiResponseMessage(meta=MessageMeta(df_), memory=mem)
+
+
+def _make_control_message(df, probs):
+    df_ = df[0:len(probs)]
+    cm = ControlMessage()
+    cm.payload(MessageMeta(df_))
+    cm.tensors(_messages.TensorMemory(count=len(df_), tensors={'probs': probs}))
+
+    return cm
+
+
+def test_constructor(config):
+    fds = FilterDetectionsStage(config)
+    assert fds.name == "filter"
+
+    # Just ensure that we get a valid non-empty tuple
+    accepted_types = fds.accepted_types()
+    assert isinstance(accepted_types, tuple)
+    assert len(accepted_types) > 0
+
+
+@pytest.mark.use_cudf
+def test_filter_copy(config, filter_probs_df):
+    fds = FilterDetectionsStage(config, threshold=0.5, filter_source=FilterSource.TENSOR)
+
+    probs = cp.array([[0.1, 0.5, 0.3], [0.2, 0.3, 0.4]])
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+
+    # All values are at or below the threshold so nothing should be returned
+    output_multi_response_message = fds._controller.filter_copy(mock_multi_response_message)
+    assert output_multi_response_message is None
+    output_control_message = fds._controller.filter_copy(mock_control_message)
+    assert output_control_message is None
+
+    # Only one row has a value above the threshold
+    probs = cp.array([
+        [0.2, 0.4, 0.3],
+        [0.1, 0.5, 0.8],
+        [0.2, 0.4, 0.3],
+    ])
+
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+    output_multi_response_message = fds._controller.filter_copy(mock_multi_response_message)
+    assert output_multi_response_message.get_meta().to_cupy().tolist() == filter_probs_df.loc[1:1, :].to_cupy().tolist()
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+    output_control_message = fds._controller.filter_copy(mock_control_message)
+    assert output_control_message.payload().get_data().to_cupy().tolist() == output_multi_response_message.get_meta(
+    ).to_cupy().tolist()
+
+    # Two adjacent rows have a value above the threashold
+    probs = cp.array([
+        [0.2, 0.4, 0.3],
+        [0.1, 0.2, 0.3],
+        [0.1, 0.5, 0.8],
+        [0.1, 0.9, 0.2],
+        [0.2, 0.4, 0.3],
+    ])
+
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+    output_multi_response_message = fds._controller.filter_copy(mock_multi_response_message)
+    assert output_multi_response_message.get_meta().to_cupy().tolist() == filter_probs_df.loc[2:3, :].to_cupy().tolist()
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+    output_control_message = fds._controller.filter_copy(mock_control_message)
+    assert output_control_message.payload().get_data().to_cupy().tolist() == output_multi_response_message.get_meta(
+    ).to_cupy().tolist()
+
+    # Two non-adjacent rows have a value above the threashold
+    probs = cp.array([
+        [0.2, 0.4, 0.3],
+        [0.1, 0.2, 0.3],
+        [0.1, 0.5, 0.8],
+        [0.4, 0.3, 0.2],
+        [0.1, 0.9, 0.2],
+        [0.2, 0.4, 0.3],
+    ])
+
+    mask = cp.zeros(len(filter_probs_df), dtype=cp.bool_)
+    mask[2] = True
+    mask[4] = True
+
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+    output_multi_response_message = fds._controller.filter_copy(mock_multi_response_message)
+    assert output_multi_response_message.get_meta().to_cupy().tolist() == filter_probs_df.loc[
+        mask, :].to_cupy().tolist()
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+    output_control_message = fds._controller.filter_copy(mock_control_message)
+    assert output_control_message.payload().get_data().to_cupy().tolist() == output_multi_response_message.get_meta(
+    ).to_cupy().tolist()
+
+
+@pytest.mark.use_cudf
+@pytest.mark.parametrize('do_copy', [True, False])
+@pytest.mark.parametrize('threshold', [0.1, 0.5, 0.8])
+@pytest.mark.parametrize('field_name', ['v1', 'v2', 'v3', 'v4'])
+def test_filter_column(config, filter_probs_df, do_copy, threshold, field_name):
+    fds = FilterDetectionsStage(config,
+                                threshold=threshold,
+                                copy=do_copy,
+                                filter_source=FilterSource.DATAFRAME,
+                                field_name=field_name)
+    expected_df = filter_probs_df.to_pandas()
+    expected_df = expected_df[expected_df[field_name] > threshold]
+
+    probs = cp.zeros([len(filter_probs_df), 3], 'float')
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+    # All values are at or below the threshold
+    output_multi_response_message = fds._controller.filter_copy(mock_multi_response_message)
+    assert output_multi_response_message.get_meta().to_cupy().tolist() == expected_df.to_numpy().tolist()
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+    output_control_message = fds._controller.filter_copy(mock_control_message)
+    assert output_control_message.payload().get_data().to_cupy().tolist() == output_multi_response_message.get_meta(
+    ).to_cupy().tolist()
+
+
+@pytest.mark.use_cudf
+def test_filter_slice(config, filter_probs_df):
+    fds = FilterDetectionsStage(config, threshold=0.5, filter_source=FilterSource.TENSOR)
+
+    probs = cp.array([[0.1, 0.5, 0.3], [0.2, 0.3, 0.4]])
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+
+    # All values are at or below the threshold
+    output_multi_response_messages = fds._controller.filter_slice(mock_multi_response_message)
+    assert len(output_multi_response_messages) == 0
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+    output_control_message = fds._controller.filter_slice(mock_control_message)
+    assert len(output_control_message) == len(output_multi_response_messages)
+
+    # Only one row has a value above the threshold
+    probs = cp.array([
+        [0.2, 0.4, 0.3],
+        [0.1, 0.5, 0.8],
+        [0.2, 0.4, 0.3],
+    ])
+
+    mock_multi_response_message: MultiResponseMessage = _make_multi_response_message(filter_probs_df, probs)
+
+    output_multi_response_messages = fds._controller.filter_slice(mock_multi_response_message)
+    assert len(output_multi_response_messages) == 1
+    assert output_multi_response_messages[0].get_meta().to_cupy().tolist() == filter_probs_df.loc[
+        1:1, :].to_cupy().tolist()
+
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+    output_control_message = fds._controller.filter_slice(mock_control_message)
+    assert len(output_control_message) == len(output_multi_response_messages)
+    assert output_control_message[0].payload().get_data().to_cupy().tolist(
+    ) == output_multi_response_messages[0].get_meta().to_cupy().tolist()
+
+    # Two adjacent rows have a value above the threashold
+    probs = cp.array([
+        [0.2, 0.4, 0.3],
+        [0.1, 0.2, 0.3],
+        [0.1, 0.5, 0.8],
+        [0.1, 0.9, 0.2],
+        [0.2, 0.4, 0.3],
+    ])
+
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+
+    output_multi_response_messages = fds._controller.filter_slice(mock_multi_response_message)
+    assert len(output_multi_response_messages) == 1
+    assert output_multi_response_messages[0].offset == 2
+    assert output_multi_response_messages[0].count == 2
+    assert output_multi_response_messages[0].get_meta().to_cupy().tolist() == filter_probs_df.loc[
+        2:3, :].to_cupy().tolist()
+
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+    output_control_message = fds._controller.filter_slice(mock_control_message)
+    assert len(output_control_message) == len(output_multi_response_messages)
+    assert output_control_message[0].payload().get_data().to_cupy().tolist(
+    ) == output_multi_response_messages[0].get_meta().to_cupy().tolist()
+
+    # Two non-adjacent rows have a value above the threashold
+    probs = cp.array([
+        [0.2, 0.4, 0.3],
+        [0.1, 0.2, 0.3],
+        [0.1, 0.5, 0.8],
+        [0.4, 0.3, 0.2],
+        [0.1, 0.9, 0.2],
+        [0.2, 0.4, 0.3],
+    ])
+
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+
+    output_multi_response_messages = fds._controller.filter_slice(mock_multi_response_message)
+    assert len(output_multi_response_messages) == 2
+
+    # pylint: disable=unbalanced-tuple-unpacking
+    (multi_response_msg1, multi_response_msg2) = output_multi_response_messages
+    assert multi_response_msg1.offset == 2
+    assert multi_response_msg1.count == 1
+
+    assert multi_response_msg2.offset == 4
+    assert multi_response_msg2.count == 1
+
+    assert multi_response_msg1.get_meta().to_cupy().tolist() == filter_probs_df.loc[2:2, :].to_cupy().tolist()
+    assert multi_response_msg2.get_meta().to_cupy().tolist() == filter_probs_df.loc[4:4, :].to_cupy().tolist()
+
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+    output_control_message = fds._controller.filter_slice(mock_control_message)
+    assert len(output_control_message) == len(output_multi_response_messages)
+    (control_msg1, control_msg2) = output_control_message  # pylint: disable=unbalanced-tuple-unpacking
+    assert control_msg1.payload().count == multi_response_msg1.count
+    assert control_msg2.payload().count == multi_response_msg2.count
+
+    assert control_msg1.payload().get_data().to_cupy().tolist() == multi_response_msg1.get_meta().to_cupy().tolist()
+    assert control_msg2.payload().get_data().to_cupy().tolist() == multi_response_msg2.get_meta().to_cupy().tolist()
diff --git a/tests/stages/test_generate_viz_frames_stage.py b/tests/stages/test_generate_viz_frames_stage.py
new file mode 100644
index 0000000000..7cc79c2d31
--- /dev/null
+++ b/tests/stages/test_generate_viz_frames_stage.py
@@ -0,0 +1,77 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import cupy as cp
+import typing_utils
+
+import cudf
+
+import morpheus._lib.messages as _messages
+from morpheus.config import Config
+from morpheus.messages import ControlMessage
+from morpheus.messages import MessageMeta
+from morpheus.messages import MultiResponseMessage
+from morpheus.messages import ResponseMemory
+from morpheus.stages.postprocess.generate_viz_frames_stage import GenerateVizFramesStage
+
+
+def _make_multi_response_message(df, probs):
+    df_ = df[0:len(probs)]
+    mem = ResponseMemory(count=len(df_), tensors={'probs': probs})
+
+    return MultiResponseMessage(meta=MessageMeta(df_), memory=mem)
+
+
+def _make_control_message(df, probs):
+    df_ = df[0:len(probs)]
+    cm = ControlMessage()
+    cm.payload(MessageMeta(df_))
+    cm.tensors(_messages.TensorMemory(count=len(df_), tensors={'probs': probs}))
+
+    return cm
+
+
+def test_constructor(config: Config):
+    stage = GenerateVizFramesStage(config)
+    assert stage.name == "gen_viz"
+
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiResponseMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
+
+
+def test_process_control_message_and_multi_message(config: Config):
+    stage = GenerateVizFramesStage(config)
+
+    df = cudf.DataFrame({
+        "timestamp": [1616380971990, 1616380971991],
+        "src_ip": ["10.20.16.248", "10.244.0.1"],
+        "dest_ip": ["10.244.0.59", "10.244.0.25"],
+        "src_port": ["50410", "50410"],
+        "dest_port": ["80", "80"],
+        "data": ["a", "b"]
+    })
+
+    probs = cp.array([[0.1, 0.5, 0.3], [0.2, 0.3, 0.4]])
+    mock_multi_response_message = _make_multi_response_message(df, probs)
+    mock_control_message = _make_control_message(df, probs)
+
+    output_multi_response_message_list = stage._to_vis_df(mock_multi_response_message)
+    output_control_message_list = stage._to_vis_df(mock_control_message)
+    for output_multi_response_message, output_control_message in zip(output_multi_response_message_list,
+                                                                     output_control_message_list):
+        assert output_multi_response_message[1].equals(output_control_message[1])
diff --git a/tests/stages/test_ml_flow_drift_stage.py b/tests/stages/test_ml_flow_drift_stage.py
new file mode 100644
index 0000000000..3f41683315
--- /dev/null
+++ b/tests/stages/test_ml_flow_drift_stage.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+from unittest.mock import patch
+
+import cupy as cp
+import pytest
+import typing_utils
+
+import morpheus._lib.messages as _messages
+from morpheus.messages import ControlMessage
+from morpheus.messages import MultiResponseMessage
+from morpheus.messages import ResponseMemory
+from morpheus.messages.message_meta import MessageMeta
+from morpheus.stages.postprocess.ml_flow_drift_stage import MLFlowDriftStage
+
+
+def _make_multi_response_message(df, probs):
+    df_ = df[0:len(probs)]
+    mem = ResponseMemory(count=len(df_), tensors={'probs': probs})
+
+    return MultiResponseMessage(meta=MessageMeta(df_), count=len(df_), memory=mem)
+
+
+def _make_control_message(df, probs):
+    df_ = df[0:len(probs)]
+    cm = ControlMessage()
+    cm.payload(MessageMeta(df_))
+    cm.tensors(_messages.TensorMemory(count=len(df_), tensors={'probs': probs}))
+
+    return cm
+
+
+def test_constructor(config):
+    with patch("morpheus.stages.postprocess.ml_flow_drift_stage.mlflow.start_run"):
+        stage = MLFlowDriftStage(config)
+    assert stage.name == "mlflow_drift"
+
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiResponseMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
+
+
+@pytest.mark.use_cudf
+@pytest.mark.use_python
+def test_calc_drift(config, filter_probs_df):
+    with patch("morpheus.stages.postprocess.ml_flow_drift_stage.mlflow.start_run"):
+        labels = ["a", "b", "c"]
+        stage = MLFlowDriftStage(config, labels=labels, batch_size=1)
+
+    probs = cp.array([[0.1, 0.5, 0.3], [0.2, 0.3, 0.4]])
+    mock_multi_response_message = _make_multi_response_message(filter_probs_df, probs)
+    mock_control_message = _make_control_message(filter_probs_df, probs)
+
+    expected_metrics = [{
+        'a': 0.9, 'b': 0.5, 'c': 0.7, 'total': 0.6999999999999998
+    }, {
+        'a': 0.8, 'b': 0.7, 'c': 0.6, 'total': 0.7000000000000001
+    }]
+
+    multi_response_message_metrics = []
+    with patch("morpheus.stages.postprocess.ml_flow_drift_stage.mlflow.log_metrics") as mock_log_metrics:
+        stage._calc_drift(mock_multi_response_message)
+        for call_arg in mock_log_metrics.call_args_list:
+            multi_response_message_metrics.append(call_arg[0][0])
+    assert multi_response_message_metrics == expected_metrics
+
+    control_message_metrics = []
+    with patch("morpheus.stages.postprocess.ml_flow_drift_stage.mlflow.log_metrics") as mock_log_metrics:
+        stage._calc_drift(mock_control_message)
+        for call_arg in mock_log_metrics.call_args_list:
+            control_message_metrics.append(call_arg[0][0])
+    assert control_message_metrics == multi_response_message_metrics
diff --git a/tests/stages/test_preprocess_ae_stage.py b/tests/stages/test_preprocess_ae_stage.py
new file mode 100644
index 0000000000..d702abee54
--- /dev/null
+++ b/tests/stages/test_preprocess_ae_stage.py
@@ -0,0 +1,74 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import cupy as cp
+import pytest
+import typing_utils
+
+import cudf
+
+from morpheus.config import Config
+from morpheus.config import ConfigAutoEncoder
+from morpheus.messages import ControlMessage
+from morpheus.messages import MessageMeta
+from morpheus.messages import MultiAEMessage
+from morpheus.stages.preprocess.preprocess_ae_stage import PreprocessAEStage
+
+
+@pytest.fixture(name='config')
+def fixture_config(config: Config):
+    config.feature_length = 256
+    config.ae = ConfigAutoEncoder()
+    config.ae.feature_columns = ["data"]
+    yield config
+
+
+def test_constructor(config: Config):
+    stage = PreprocessAEStage(config)
+    assert stage.name == "preprocess-ae"
+
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiAEMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
+
+
+def test_process_control_message_and_multi_message(config: Config):
+    stage = PreprocessAEStage(config)
+
+    df = cudf.DataFrame({"data": ["a", "b", "c"]})
+    meta = MessageMeta(df)
+
+    input_multi_ae_message = MultiAEMessage(meta=meta,
+                                            mess_offset=0,
+                                            mess_count=3,
+                                            model=None,
+                                            train_scores_mean=0.0,
+                                            train_scores_std=1.0)
+
+    output_multi_inference_ae_message = stage.pre_process_batch(input_multi_ae_message,
+                                                                fea_len=256,
+                                                                feature_columns=["data"])
+
+    input_control_message = ControlMessage()
+    input_control_message.payload(meta)
+
+    output_control_message = stage.pre_process_batch(input_control_message, fea_len=256, feature_columns=["data"])
+
+    # Check if each tensor in the control message is equal to the corresponding tensor in the inference message
+    for tensor_key in output_control_message.tensors().tensor_names:
+        assert cp.array_equal(output_control_message.tensors().get_tensor(tensor_key),
+                              getattr(output_multi_inference_ae_message, tensor_key))
diff --git a/tests/stages/test_preprocess_fil_stage.py b/tests/stages/test_preprocess_fil_stage.py
index 638fcaa994..cdbe66dafe 100644
--- a/tests/stages/test_preprocess_fil_stage.py
+++ b/tests/stages/test_preprocess_fil_stage.py
@@ -13,8 +13,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import typing
+
 import cupy as cp
 import pytest
+import typing_utils
 
 import cudf
 
@@ -40,9 +43,9 @@ def test_constructor(config: Config):
     assert stage._fea_length == config.feature_length
     assert stage.features == config.fil.feature_columns
 
-    accepted_types = stage.accepted_types()
-    assert isinstance(accepted_types, tuple)
-    assert len(accepted_types) > 0
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
 
 
 def test_process_control_message(config: Config):
diff --git a/tests/stages/test_preprocess_nlp_stage.py b/tests/stages/test_preprocess_nlp_stage.py
index 22fc99e04a..9c202a168d 100644
--- a/tests/stages/test_preprocess_nlp_stage.py
+++ b/tests/stages/test_preprocess_nlp_stage.py
@@ -13,11 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import typing
 from unittest.mock import Mock
 from unittest.mock import patch
 
 import cupy as cp
 import pytest
+import typing_utils
 
 import cudf
 
@@ -61,9 +63,9 @@ def test_constructor(config: Config):
     assert stage._do_lower_case is False
     assert stage._add_special_tokens is False
 
-    accepted_types = stage.accepted_types()
-    assert isinstance(accepted_types, tuple)
-    assert len(accepted_types) > 0
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
 
 
 @patch("morpheus.stages.preprocess.preprocess_nlp_stage.tokenize_text_series")
diff --git a/tests/stages/test_timeseries_stage.py b/tests/stages/test_timeseries_stage.py
new file mode 100644
index 0000000000..8babd0e752
--- /dev/null
+++ b/tests/stages/test_timeseries_stage.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import cupy as cp
+import pandas as pd
+import pytest
+import typing_utils
+
+import morpheus._lib.messages as _messages
+from morpheus.config import Config
+from morpheus.config import ConfigAutoEncoder
+from morpheus.messages import ControlMessage
+from morpheus.messages import MultiResponseAEMessage
+from morpheus.messages import MultiResponseMessage
+from morpheus.messages import ResponseMemory
+from morpheus.messages.message_meta import MessageMeta
+from morpheus.stages.postprocess.timeseries_stage import TimeSeriesStage
+
+
+@pytest.fixture(name='config')
+def fixture_config(config: Config):
+    config.feature_length = 256
+    config.ae = ConfigAutoEncoder()
+    config.ae.feature_columns = ["data"]
+    config.ae.timestamp_column_name = "ts"
+    yield config
+
+
+def _make_multi_response_ae_message(df, probs):
+    df_ = df[0:len(probs)]
+    mem = ResponseMemory(count=len(df_), tensors={'probs': probs})
+
+    return MultiResponseAEMessage(meta=MessageMeta(df_), count=len(df_), memory=mem, user_id="test_user_id")
+
+
+def _make_control_message(df, probs):
+    df_ = df[0:len(probs)]
+    cm = ControlMessage()
+    cm.payload(MessageMeta(df_))
+    cm.tensors(_messages.TensorMemory(count=len(df_), tensors={'probs': probs}))
+    cm.set_metadata("user_id", "test_user_id")
+
+    return cm
+
+
+def test_constructor(config):
+    stage = TimeSeriesStage(config)
+    assert stage.name == "timeseries"
+
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiResponseMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
+
+
+@pytest.mark.use_cudf
+@pytest.mark.use_python
+def test_call_timeseries_user(config):
+    stage = TimeSeriesStage(config)
+
+    df = pd.DataFrame({"ts": pd.date_range(start='01-01-2022', periods=5)})
+    probs = cp.array([[0.1, 0.5, 0.3], [0.2, 0.3, 0.4]])
+    mock_multi_response_ae_message = _make_multi_response_ae_message(df, probs)
+    mock_control_message = _make_control_message(df, probs)
+
+    assert stage._call_timeseries_user(mock_multi_response_ae_message)[0].user_id == "test_user_id"
+    assert stage._call_timeseries_user(mock_control_message)[0].get_metadata("user_id") == "test_user_id"
diff --git a/tests/stages/test_validation_stage.py b/tests/stages/test_validation_stage.py
new file mode 100644
index 0000000000..8f15799b63
--- /dev/null
+++ b/tests/stages/test_validation_stage.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import pandas as pd
+import typing_utils
+
+from morpheus.messages import ControlMessage
+from morpheus.messages import MultiMessage
+from morpheus.messages.message_meta import MessageMeta
+from morpheus.stages.postprocess.validation_stage import ValidationStage
+
+
+def _make_multi_message(df):
+    return MultiMessage(meta=MessageMeta(df))
+
+
+def _make_control_message(df):
+    cm = ControlMessage()
+    cm.payload(MessageMeta(df))
+
+    return cm
+
+
+def test_constructor(config):
+    df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+    stage = ValidationStage(config, val_file_name=df)
+    assert stage.name == "validation"
+
+    # Just ensure that we get a valid non-empty tuple
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
+
+
+def test_do_comparison(config):
+    df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+    stage = ValidationStage(config, val_file_name=df)
+
+    mm = _make_multi_message(df)
+    cm = _make_control_message(df)
+
+    stage._append_message(mm)
+    mm_results = stage.get_results(clear=True)
+    stage._append_message(cm)
+    cm_results = stage.get_results(clear=True)
+    assert mm_results == cm_results
diff --git a/tests/test_add_classifications_stage.py b/tests/test_add_classifications_stage.py
index 80091f3dc5..e3bbf70c1a 100755
--- a/tests/test_add_classifications_stage.py
+++ b/tests/test_add_classifications_stage.py
@@ -14,8 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import typing
+
 import cupy as cp
 import pytest
+import typing_utils
 
 import cudf
 
@@ -43,10 +46,9 @@ def test_constructor(config: Config):
     assert stage._idx2label == {0: 'frogs', 1: 'lizards', 2: 'toads'}
     assert stage.name == "add-class"
 
-    # Just ensure that we get a valid non-empty tuple
-    accepted_types = stage.accepted_types()
-    assert isinstance(accepted_types, tuple)
-    assert len(accepted_types) > 0
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiResponseMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
 
 
 def test_constructor_explicit_labels(config: Config):
diff --git a/tests/test_add_scores_stage.py b/tests/test_add_scores_stage.py
index e454a0e35f..0e347c7d78 100755
--- a/tests/test_add_scores_stage.py
+++ b/tests/test_add_scores_stage.py
@@ -14,8 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import typing
+
 import cupy as cp
 import pytest
+import typing_utils
 
 import cudf
 
@@ -44,10 +47,9 @@ def test_constructor(config: Config):
     assert stage._idx2label == {0: 'frogs', 1: 'lizards', 2: 'toads'}
     assert stage.name == "add-scores"
 
-    # Just ensure that we get a valid non-empty tuple
-    accepted_types = stage.accepted_types()
-    assert isinstance(accepted_types, tuple)
-    assert len(accepted_types) > 0
+    accepted_union = typing.Union[stage.accepted_types()]
+    assert typing_utils.issubtype(MultiResponseMessage, accepted_union)
+    assert typing_utils.issubtype(ControlMessage, accepted_union)
 
 
 def test_constructor_explicit_labels(config: Config):
diff --git a/tests/test_filter_detections_stage.py b/tests/test_filter_detections_stage.py
deleted file mode 100755
index ba8ed0591f..0000000000
--- a/tests/test_filter_detections_stage.py
+++ /dev/null
@@ -1,192 +0,0 @@
-#!/usr/bin/env python
-# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import cupy as cp
-import pytest
-
-from morpheus.common import FilterSource
-from morpheus.messages import MultiResponseMessage
-from morpheus.messages import ResponseMemory
-from morpheus.messages.message_meta import MessageMeta
-from morpheus.stages.postprocess.filter_detections_stage import FilterDetectionsStage
-
-
-def _make_message(df, probs):
-    df_ = df[0:len(probs)]
-    mem = ResponseMemory(count=len(df_), tensors={'probs': probs})
-    return MultiResponseMessage(meta=MessageMeta(df_), memory=mem)
-
-
-def test_constructor(config):
-    fds = FilterDetectionsStage(config)
-    assert fds.name == "filter"
-
-    # Just ensure that we get a valid non-empty tuple
-    accepted_types = fds.accepted_types()
-    assert isinstance(accepted_types, tuple)
-    assert len(accepted_types) > 0
-
-    fds = FilterDetectionsStage(config, threshold=0.2)
-    assert fds._controller._threshold == 0.2
-
-
-@pytest.mark.use_cudf
-@pytest.mark.use_python
-def test_filter_copy(config, filter_probs_df):
-    fds = FilterDetectionsStage(config, threshold=0.5, filter_source=FilterSource.TENSOR)
-
-    probs = cp.array([[0.1, 0.5, 0.3], [0.2, 0.3, 0.4]])
-    mock_message = _make_message(filter_probs_df, probs)
-
-    # All values are at or below the threshold so nothing should be returned
-    output_message = fds._controller.filter_copy(mock_message)
-    assert output_message is None
-
-    # Only one row has a value above the threshold
-    probs = cp.array([
-        [0.2, 0.4, 0.3],
-        [0.1, 0.5, 0.8],
-        [0.2, 0.4, 0.3],
-    ])
-
-    mock_message = _make_message(filter_probs_df, probs)
-
-    output_message = fds._controller.filter_copy(mock_message)
-    assert output_message.get_meta().to_cupy().tolist() == filter_probs_df.loc[1:1, :].to_cupy().tolist()
-
-    # Two adjacent rows have a value above the threashold
-    probs = cp.array([
-        [0.2, 0.4, 0.3],
-        [0.1, 0.2, 0.3],
-        [0.1, 0.5, 0.8],
-        [0.1, 0.9, 0.2],
-        [0.2, 0.4, 0.3],
-    ])
-
-    mock_message = _make_message(filter_probs_df, probs)
-
-    output_message = fds._controller.filter_copy(mock_message)
-    assert output_message.get_meta().to_cupy().tolist() == filter_probs_df.loc[2:3, :].to_cupy().tolist()
-
-    # Two non-adjacent rows have a value above the threashold
-    probs = cp.array([
-        [0.2, 0.4, 0.3],
-        [0.1, 0.2, 0.3],
-        [0.1, 0.5, 0.8],
-        [0.4, 0.3, 0.2],
-        [0.1, 0.9, 0.2],
-        [0.2, 0.4, 0.3],
-    ])
-
-    mock_message = _make_message(filter_probs_df, probs)
-
-    output_message = fds._controller.filter_copy(mock_message)
-    mask = cp.zeros(len(filter_probs_df), dtype=cp.bool_)
-    mask[2] = True
-    mask[4] = True
-    assert output_message.get_meta().to_cupy().tolist() == filter_probs_df.loc[mask, :].to_cupy().tolist()
-
-
-@pytest.mark.use_cudf
-@pytest.mark.use_python
-@pytest.mark.parametrize('do_copy', [True, False])
-@pytest.mark.parametrize('threshold', [0.1, 0.5, 0.8])
-@pytest.mark.parametrize('field_name', ['v1', 'v2', 'v3', 'v4'])
-def test_filter_column(config, filter_probs_df, do_copy, threshold, field_name):
-    fds = FilterDetectionsStage(config,
-                                threshold=threshold,
-                                copy=do_copy,
-                                filter_source=FilterSource.DATAFRAME,
-                                field_name=field_name)
-    expected_df = filter_probs_df.to_pandas()
-    expected_df = expected_df[expected_df[field_name] > threshold]
-
-    probs = cp.zeros([len(filter_probs_df), 3], 'float')
-    mock_message = _make_message(filter_probs_df, probs)
-
-    # All values are at or below the threshold
-    output_message = fds._controller.filter_copy(mock_message)
-
-    assert output_message.get_meta().to_cupy().tolist() == expected_df.to_numpy().tolist()
-
-
-@pytest.mark.use_cudf
-@pytest.mark.use_python
-def test_filter_slice(config, filter_probs_df):
-    fds = FilterDetectionsStage(config, threshold=0.5, filter_source=FilterSource.TENSOR)
-
-    probs = cp.array([[0.1, 0.5, 0.3], [0.2, 0.3, 0.4]])
-    mock_message = _make_message(filter_probs_df, probs)
-
-    # All values are at or below the threshold
-    output_messages = fds._controller.filter_slice(mock_message)
-    assert len(output_messages) == 0
-
-    # Only one row has a value above the threshold
-    probs = cp.array([
-        [0.2, 0.4, 0.3],
-        [0.1, 0.5, 0.8],
-        [0.2, 0.4, 0.3],
-    ])
-
-    mock_message = _make_message(filter_probs_df, probs)
-
-    output_messages = fds._controller.filter_slice(mock_message)
-    assert len(output_messages) == 1
-    output_message = output_messages[0]
-    assert output_message.get_meta().to_cupy().tolist() == filter_probs_df.loc[1:1, :].to_cupy().tolist()
-
-    # Two adjacent rows have a value above the threashold
-    probs = cp.array([
-        [0.2, 0.4, 0.3],
-        [0.1, 0.2, 0.3],
-        [0.1, 0.5, 0.8],
-        [0.1, 0.9, 0.2],
-        [0.2, 0.4, 0.3],
-    ])
-
-    mock_message = _make_message(filter_probs_df, probs)
-
-    output_messages = fds._controller.filter_slice(mock_message)
-    assert len(output_messages) == 1
-    output_message = output_messages[0]
-    assert output_message.offset == 2
-    assert output_message.count == 2
-    assert output_message.get_meta().to_cupy().tolist() == filter_probs_df.loc[2:3, :].to_cupy().tolist()
-
-    # Two non-adjacent rows have a value above the threashold
-    probs = cp.array([
-        [0.2, 0.4, 0.3],
-        [0.1, 0.2, 0.3],
-        [0.1, 0.5, 0.8],
-        [0.4, 0.3, 0.2],
-        [0.1, 0.9, 0.2],
-        [0.2, 0.4, 0.3],
-    ])
-
-    mock_message = _make_message(filter_probs_df, probs)
-
-    output_messages = fds._controller.filter_slice(mock_message)
-    assert len(output_messages) == 2
-    (msg1, msg2) = output_messages  # pylint: disable=unbalanced-tuple-unpacking
-    assert msg1.offset == 2
-    assert msg1.count == 1
-
-    assert msg2.offset == 4
-    assert msg2.count == 1
-
-    assert msg1.get_meta().to_cupy().tolist() == filter_probs_df.loc[2:2, :].to_cupy().tolist()
-    assert msg2.get_meta().to_cupy().tolist() == filter_probs_df.loc[4:4, :].to_cupy().tolist()
diff --git a/tests/test_filter_detections_stage_pipe.py b/tests/test_filter_detections_stage_pipe.py
index e90ea13b3f..15e36bd244 100755
--- a/tests/test_filter_detections_stage_pipe.py
+++ b/tests/test_filter_detections_stage_pipe.py
@@ -24,7 +24,9 @@
 from _utils import assert_results
 from _utils.dataset_manager import DatasetManager
 from _utils.stages.conv_msg import ConvMsg
+from morpheus.common import FilterSource
 from morpheus.config import Config
+from morpheus.messages import ControlMessage
 from morpheus.messages import MessageMeta
 from morpheus.messages import MultiMessage
 from morpheus.messages import MultiResponseMessage
@@ -92,6 +94,30 @@ def _test_filter_detections_stage_multi_segment_pipe(config: Config, dataset_pan
     assert_results(comp_stage.get_results())
 
 
+def _test_filter_detections_control_message_stage_multi_segment_pipe(config: Config,
+                                                                     dataset_pandas: DatasetManager,
+                                                                     copy: bool = True):
+    threshold = 0.75
+
+    input_df = dataset_pandas["filter_probs.csv"]
+    pipe = LinearPipeline(config)
+    pipe.set_source(InMemorySourceStage(config, [cudf.DataFrame(input_df)]))
+    pipe.add_segment_boundary(MessageMeta)
+    pipe.add_stage(DeserializeStage(config, message_type=ControlMessage))
+    pipe.add_segment_boundary(data_type=ControlMessage)
+    pipe.add_stage(ConvMsg(config, message_type=ControlMessage))
+    pipe.add_segment_boundary(ControlMessage)
+    pipe.add_stage(FilterDetectionsStage(config, threshold=threshold, copy=copy, filter_source=FilterSource.TENSOR))
+    pipe.add_segment_boundary(ControlMessage)
+    pipe.add_stage(SerializeStage(config))
+    pipe.add_segment_boundary(MessageMeta)
+    comp_stage = pipe.add_stage(
+        CompareDataFrameStage(config, build_expected(dataset_pandas["filter_probs.csv"], threshold)))
+    pipe.run()
+
+    assert_results(comp_stage.get_results())
+
+
 @pytest.mark.slow
 @pytest.mark.parametrize('order', ['F', 'C'])
 @pytest.mark.parametrize('pipeline_batch_size', [256, 1024, 2048])
@@ -109,3 +135,10 @@ def test_filter_detections_stage_pipe(config: Config,
 @pytest.mark.parametrize('do_copy', [True, False])
 def test_filter_detections_stage_multi_segment_pipe(config: Config, dataset_pandas: DatasetManager, do_copy: bool):
     return _test_filter_detections_stage_multi_segment_pipe(config, dataset_pandas, do_copy)
+
+
+@pytest.mark.parametrize('do_copy', [True, False])
+def test_filter_detections_control_message_stage_multi_segment_pipe(config: Config,
+                                                                    dataset_pandas: DatasetManager,
+                                                                    do_copy: bool):
+    return _test_filter_detections_control_message_stage_multi_segment_pipe(config, dataset_pandas, do_copy)