diff --git a/.devcontainer/cuda12.1-conda/devcontainer.json b/.devcontainer/cuda12.1-conda/devcontainer.json index fa5d24c6d8..ee34c67fa8 100644 --- a/.devcontainer/cuda12.1-conda/devcontainer.json +++ b/.devcontainer/cuda12.1-conda/devcontainer.json @@ -5,7 +5,7 @@ "args": { "CUDA": "12.1", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.06-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-mambaforge-ubuntu22.04" } }, "privileged": true, diff --git a/.github/workflows/ci_pipe.yml b/.github/workflows/ci_pipe.yml index 3482fd2b8d..0d4bb14c7d 100644 --- a/.github/workflows/ci_pipe.yml +++ b/.github/workflows/ci_pipe.yml @@ -21,16 +21,12 @@ on: run_check: required: true type: boolean - conda_core_run_build: - description: 'Runs the stage to build the morpheus-core conda package' - required: true - type: boolean conda_upload_label: description: 'The label to use when uploading the morpheus conda packages. Leave empty to disable uploading' required: true type: string conda_run_build: - description: 'Runs the conda-build stage to build the conda package with all morpheus components' + description: 'Runs the conda-build stage to build the morpheus conda packages' required: true type: boolean container: @@ -204,9 +200,9 @@ jobs: shell: bash run: ./morpheus/ci/scripts/github/docs.sh - package-core: - name: Package Core - if: ${{ inputs.conda_core_run_build }} + package: + name: Conda Package + if: ${{ inputs.conda_run_build }} needs: [documentation, test] runs-on: linux-amd64-cpu16 timeout-minutes: 60 @@ -240,38 +236,4 @@ jobs: CONDA_TOKEN: "${{ secrets.CONDA_TOKEN }}" SCRIPT_ARGS: "${{ inputs.conda_upload_label != '' && 'upload' || '' }}" CONDA_PKG_LABEL: "${{ inputs.conda_upload_label }}" - run: ./morpheus/ci/scripts/github/conda_core.sh $SCRIPT_ARGS - - package: - name: Package All - if: ${{ inputs.conda_run_build }} - needs: [check, documentation, test] - runs-on: linux-amd64-cpu16 - timeout-minutes: 60 - container: - credentials: - username: '$oauthtoken' - password: ${{ secrets.NGC_API_KEY }} - image: ${{ inputs.container }} - strategy: - fail-fast: true - - steps: - - name: Checkout - uses: actions/checkout@v3 - with: - lfs: false - path: 'morpheus' - fetch-depth: 0 - submodules: 'recursive' - - - name: Get AWS credentials using OIDC - uses: aws-actions/configure-aws-credentials@v1-node16 - with: - role-to-assume: ${{ vars.AWS_ROLE_ARN }} - aws-region: ${{ vars.AWS_REGION }} - role-duration-seconds: 43200 # 12h - - - name: conda - shell: bash - run: ./morpheus/ci/scripts/github/conda.sh + run: ./morpheus/ci/scripts/github/conda_libs.sh $SCRIPT_ARGS diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 097d3c9e69..1afe90625c 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -68,7 +68,6 @@ jobs: is_main_branch: ${{ github.ref_name == 'main' }} is_dev_branch: ${{ startsWith(github.ref_name, 'branch-') }} has_conda_build_label: ${{ steps.get-pr-info.outcome == 'success' && contains(fromJSON(steps.get-pr-info.outputs.pr-info).labels.*.name, 'conda-build') || false }} - has_conda_core_build_label: ${{ steps.get-pr-info.outcome == 'success' && contains(fromJSON(steps.get-pr-info.outputs.pr-info).labels.*.name, 'conda-core-build') || false }} has_skip_ci_label: ${{ steps.get-pr-info.outcome == 'success' && contains(fromJSON(steps.get-pr-info.outputs.pr-info).labels.*.name, 'skip-ci') || false }} pr_info: ${{ steps.get-pr-info.outcome == 'success' && steps.get-pr-info.outputs.pr-info || '' }} @@ -90,16 +89,13 @@ jobs: with: # Run checks for any PR branch run_check: ${{ fromJSON(needs.prepare.outputs.is_pr) }} - # Build morpheus-core conda package. This is done for main/dev branches and - # for PRs with the conda-core-build label - conda_core_run_build: ${{ !fromJSON(needs.prepare.outputs.is_pr) || fromJSON(needs.prepare.outputs.has_conda_core_build_label) }} - # Upload morpheus-core conda package only for non PR branches. Use 'main' for main branch and 'dev' for all other branches - conda_upload_label: ${{ !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' }} - # Run morpheus conda package, with all components. This is done for main/dev - # branches and for PRs with the conda-build label. + # Build conda packages for all the morpheus libraries - core, dfp, llm. This is + # done for main/dev branches and for PRs with the conda-build label conda_run_build: ${{ !fromJSON(needs.prepare.outputs.is_pr) || fromJSON(needs.prepare.outputs.has_conda_build_label) }} - container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-240614 - test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-240614 + # Upload morpheus conda packages only for non PR branches. Use 'main' for main branch and 'dev' for all other branches + conda_upload_label: ${{ !fromJSON(needs.prepare.outputs.is_pr) && (fromJSON(needs.prepare.outputs.is_main_branch) && 'main' || 'dev') || '' }} + container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-build-241004 + test_container: nvcr.io/ea-nvidia-morpheus/morpheus:morpheus-ci-test-241004 secrets: CONDA_TOKEN: ${{ secrets.CONDA_TOKEN }} NGC_API_KEY: ${{ secrets.NGC_API_KEY }} diff --git a/CMakeLists.txt b/CMakeLists.txt index a295b5ac5d..bd9580ae12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,9 @@ option(BUILD_SHARED_LIBS "Default value for whether or not to build shared or st option(MORPHEUS_BUILD_BENCHMARKS "Whether or not to build benchmarks" OFF) option(MORPHEUS_BUILD_DOCS "Enable building of API documentation" OFF) option(MORPHEUS_BUILD_EXAMPLES "Whether or not to build examples" OFF) -option(MORPHEUS_BUILD_MORPHEUS_LLM "Whether or not to build morpheus_llm" OFF) +option(MORPHEUS_BUILD_MORPHEUS_CORE "Whether or not to build morpheus_core" ON) +option(MORPHEUS_BUILD_MORPHEUS_DFP "Whether or not to build morpheus_dfp" ON) +option(MORPHEUS_BUILD_MORPHEUS_LLM "Whether or not to build morpheus_llm" ON) option(MORPHEUS_BUILD_TESTS "Whether or not to build tests" OFF) option(MORPHEUS_ENABLE_DEBUG_INFO "Enable printing debug information" OFF) option(MORPHEUS_PYTHON_BUILD_STUBS "Whether or not to generated .pyi stub files for C++ Python modules. Disable to avoid requiring loading the NVIDIA GPU Driver during build" ON) @@ -37,7 +39,7 @@ option(MORPHEUS_USE_IWYU "Enable running include-what-you-use as part of the bui set(MORPHEUS_PY_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/wheel" CACHE STRING "Location to install the python directory") -set(MORPHEUS_RAPIDS_VERSION "24.02" CACHE STRING "Sets default versions for RAPIDS libraries.") +set(MORPHEUS_RAPIDS_VERSION "24.10" CACHE STRING "Sets default versions for RAPIDS libraries.") set(MORPHEUS_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data") mark_as_advanced(MORPHEUS_CACHE_DIR) diff --git a/ci/conda/recipes/morpheus-core/morpheus_core_build.sh b/ci/conda/recipes/morpheus-libs/cmake_common.sh similarity index 61% rename from ci/conda/recipes/morpheus-core/morpheus_core_build.sh rename to ci/conda/recipes/morpheus-libs/cmake_common.sh index 5ef4920b9e..dcfa69b13f 100644 --- a/ci/conda/recipes/morpheus-core/morpheus_core_build.sh +++ b/ci/conda/recipes/morpheus-libs/cmake_common.sh @@ -1,3 +1,4 @@ + # SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # @@ -16,26 +17,10 @@ # It is assumed that this script is executed from the root of the repo directory by conda-build # (https://conda-forge.org/docs/maintainer/knowledge_base.html#using-cmake) -# Need to ensure this value is set before checking it in the if block -MORPHEUS_SUPPORT_DOCA=-OFF -MORPHEUS_BUILD_MORPHEUS_LLM=-OFF - # This will store all of the cmake args. Make sure to prepend args to allow # incoming values to overwrite them -CMAKE_ARGS=${CMAKE_ARGS:-""} - -export CCACHE_BASEDIR=$(realpath ${SRC_DIR}/..) -export USE_SCCACHE=${USE_SCCACHE:-""} - -# Check for some mrc environment variables. Append to front of args to allow users to overwrite them -if [[ -n "${MORPHEUS_CACHE_DIR}" ]]; then - # Set the cache variable, then set the Staging prefix to allow for host searching - CMAKE_ARGS="-DMORPHEUS_CACHE_DIR=${MORPHEUS_CACHE_DIR} ${CMAKE_ARGS}" - - # Double check that the cache dir has been created - mkdir -p ${MORPHEUS_CACHE_DIR} -fi +# CMake flags common across all libraries CMAKE_ARGS="-DCMAKE_MESSAGE_CONTEXT_SHOW=ON ${CMAKE_ARGS}" CMAKE_ARGS="-DCMAKE_INSTALL_PREFIX=$PREFIX ${CMAKE_ARGS}" CMAKE_ARGS="-DCMAKE_INSTALL_LIBDIR=lib ${CMAKE_ARGS}" @@ -51,38 +36,12 @@ CMAKE_ARGS="-DPython_EXECUTABLE=${PYTHON} ${CMAKE_ARGS}" CMAKE_ARGS="-DPYTHON_EXECUTABLE=${PYTHON} ${CMAKE_ARGS}" # for pybind11 CMAKE_ARGS="--log-level=VERBOSE ${CMAKE_ARGS}" -if [[ "${USE_SCCACHE}" == "1" ]]; then - CMAKE_ARGS="-DCCACHE_PROGRAM_PATH=$(which sccache) ${CMAKE_ARGS}" -fi - -echo "CC : ${CC}" -echo "CXX : ${CXX}" -echo "CUDAHOSTCXX : ${CUDAHOSTCXX}" -echo "CUDA : ${CUDA}" -echo "CMAKE_ARGS : ${CMAKE_ARGS}" - -echo "========Begin Env========" -env -echo "========End Env========" - -BUILD_DIR="build-conda" +# Append to front of args to allow users to overwrite them +if [[ -n "${MORPHEUS_CACHE_DIR}" ]]; then + # Set the cache variable, then set the Staging prefix to allow for host searching + CMAKE_ARGS="-DMORPHEUS_CACHE_DIR=${MORPHEUS_CACHE_DIR} ${CMAKE_ARGS}" -# Check if the build directory already exists. And if so, delete the -# CMakeCache.txt and CMakeFiles to ensure a clean configuration -if [[ -d "./${BUILD_DIR}" ]]; then - echo "Deleting old CMake files at ./${BUILD_DIR}" - rm -rf "./${BUILD_DIR}/CMakeCache.txt" - rm -rf "./${BUILD_DIR}/CMakeFiles" + # Double check that the cache dir has been created + mkdir -p ${MORPHEUS_CACHE_DIR} fi -# Run configure -cmake -B ${BUILD_DIR} \ - ${CMAKE_ARGS} \ - --log-level=verbose \ - . - -# Build the components -cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install - -# Install just the mprpheus core python wheel components -${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus/dist/*.whl diff --git a/ci/conda/recipes/morpheus-core/conda_build_config.yaml b/ci/conda/recipes/morpheus-libs/conda_build_config.yaml similarity index 96% rename from ci/conda/recipes/morpheus-core/conda_build_config.yaml rename to ci/conda/recipes/morpheus-libs/conda_build_config.yaml index 4b051dc074..9c88ef46cb 100644 --- a/ci/conda/recipes/morpheus-core/conda_build_config.yaml +++ b/ci/conda/recipes/morpheus-libs/conda_build_config.yaml @@ -14,19 +14,19 @@ # limitations under the License. c_compiler_version: - - 11.2 + - 12.1 cxx_compiler_version: - - 11.2 + - 12.1 cuda_compiler: - cuda-nvcc cuda_compiler_version: - - 12.1 + - 12.5 python: - 3.10 rapids_version: - - 24.02 + - 24.10 diff --git a/ci/conda/recipes/morpheus-core/meta.yaml b/ci/conda/recipes/morpheus-libs/meta.yaml similarity index 60% rename from ci/conda/recipes/morpheus-core/meta.yaml rename to ci/conda/recipes/morpheus-libs/meta.yaml index f72004b95a..dffdffe53e 100644 --- a/ci/conda/recipes/morpheus-core/meta.yaml +++ b/ci/conda/recipes/morpheus-libs/meta.yaml @@ -13,19 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +# This recipe splits into packages - morpheus-core, morpheus-dfp and morpheus-llm {% set version = environ.get('GIT_VERSION', '0.0.0.dev').lstrip('v') + environ.get('VERSION_SUFFIX', '') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} package: - name: morpheus-core + name: morpheus-split version: {{ version }} source: git_url: ../../../.. outputs: - + ############################### morpheus-core ############################# - name: morpheus-core + type: conda_v2 build: number: {{ GIT_DESCRIBE_NUMBER }} string: cuda_{{ cuda_compiler_version }}_py{{ python }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} @@ -35,8 +37,6 @@ outputs: - CMAKE_CUDA_ARCHITECTURES - MORPHEUS_CACHE_DIR - MORPHEUS_PYTHON_BUILD_STUBS - - MORPHEUS_SUPPORT_DOCA - - MORPHEUS_BUILD_MORPHEUS_LLM - PARALLEL_LEVEL run_exports: - {{ pin_subpackage("morpheus-core", max_pin="x.x") }} @@ -47,15 +47,13 @@ outputs: - {{ compiler("c") }} - {{ compiler("cuda") }} - {{ compiler("cxx") }} - - automake =1.16.5 # Needed for DOCA build - ccache - cmake =3.27 - cuda-cudart-dev {{ cuda_compiler_version }}.* # Needed by CMake to compile a test application - cuda-version {{ cuda_compiler_version }}.* - - libtool # Needed for DOCA build - ninja =1.11 - - pkg-config =0.29 # for mrc cmake - - sysroot_linux-64 =2.17 + - pkg-config =0.29 + - sysroot_linux-64 =2.28 host: # CUDA dependencies - cuda-cudart-dev {{ cuda_compiler_version }}.* @@ -67,7 +65,7 @@ outputs: # Non-CUDA dependencies - cudf {{ rapids_version }} - cython 3.0.* - - glog 0.6.* + - glog >=0.7.1,<0.8 - libcudf {{ rapids_version }} - librdkafka >=1.9.2,<1.10.0a0 - mrc {{ minor_version }} @@ -78,7 +76,7 @@ outputs: - rapidjson 1.1.0 - scikit-build 0.17.6 - versioneer-518 - - zlib 1.2.13 # required to build triton client + - zlib 1.3.1 # required to build triton client run: # Runtime only requirements. This + setup.py is the definitive runtime requirement list # This should be synced with `runtime` in dependencies.yaml @@ -93,7 +91,7 @@ outputs: - docker-py =5.0.* - elasticsearch ==8.9.0 - feedparser =6.0.* - - grpcio =1.59.* + - grpcio =1.62.* - mlflow>=2.10.0,<3 - mrc - networkx=2.8.8 @@ -103,8 +101,6 @@ outputs: - python - python-confluent-kafka >=1.9.2,<1.10.0a0 - python-graphviz - - pytorch-cuda - - pytorch * *cuda* - rapids-dask-dependency {{ rapids_version }} # provides dask and distributed - requests - requests-cache =1.1.* @@ -130,11 +126,82 @@ outputs: imports: - morpheus commands: - - echo # make sure test requirements get installed + - echo # pytest will be added post re-factoring + + ############################### morpheus-dfp ############################# + - name: morpheus-dfp + type: conda_v2 + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda_{{ cuda_compiler_version }}_py{{ python }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - CMAKE_CUDA_ARCHITECTURES + - MORPHEUS_CACHE_DIR + - MORPHEUS_PYTHON_BUILD_STUBS + - PARALLEL_LEVEL + run_exports: + - {{ pin_subpackage("morpheus-dfp", max_pin="x.x") }} + script: morpheus_dfp_build.sh + + requirements: + build: + - ccache + - cmake =3.27 + host: + - {{ pin_subpackage('morpheus-core', exact=True) }} + - pip + - python {{ python }} + - scikit-build 0.17.6 + - versioneer-518 + run: + - {{ pin_subpackage('morpheus-core', exact=True) }} + + #test: Tests will be added post test refactoring + + ############################### morpheus-llm ############################# + - name: morpheus-llm + type: conda_v2 + build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda_{{ cuda_compiler_version }}_py{{ python }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - CMAKE_CUDA_ARCHITECTURES + - MORPHEUS_CACHE_DIR + - MORPHEUS_PYTHON_BUILD_STUBS + - PARALLEL_LEVEL + run_exports: + - {{ pin_subpackage("morpheus-llm", max_pin="x.x") }} + script: morpheus_llm_build.sh + + requirements: + build: + - {{ compiler("c") }} + - {{ compiler("cxx") }} + - ccache + - cmake =3.27 + - ninja =1.11 + - pkg-config =0.29 + host: + # morpheus-core has to be at the top. changing that order will result in different + # package versions getting installed creating unexpected version conflicts. + - {{ pin_subpackage('morpheus-core', exact=True) }} + - cython 3.0.* + - glog >=0.7.1,<0.8 + - pip + - pybind11-stubgen 0.10.5 + - python {{ python }} + - rapidjson 1.1.0 + - scikit-build 0.17.6 + - versioneer-518 + - zlib 1.3.1 # required to build triton client + run: + - {{ pin_subpackage('morpheus-core', exact=True) }} + #test: Tests will be added post test refactoring + about: home: https://github.com/nv-morpheus/Morpheus license: Apache-2.0 license_family: Apache license_file: LICENSE - summary: Morpheus Cybersecurity Core Library + summary: Morpheus Cybersecurity Library diff --git a/ci/conda/recipes/morpheus-libs/morpheus_core_build.sh b/ci/conda/recipes/morpheus-libs/morpheus_core_build.sh new file mode 100644 index 0000000000..6f9caf3314 --- /dev/null +++ b/ci/conda/recipes/morpheus-libs/morpheus_core_build.sh @@ -0,0 +1,77 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It is assumed that this script is executed from the root of the repo directory by conda-build +# (https://conda-forge.org/docs/maintainer/knowledge_base.html#using-cmake) + +# This will store all of the cmake args. Make sure to prepend args to allow +# incoming values to overwrite them + +source $RECIPE_DIR/cmake_common.sh + +CMAKE_ARGS=${CMAKE_ARGS:-""} + +export CCACHE_BASEDIR=$(realpath ${SRC_DIR}/..) +export USE_SCCACHE=${USE_SCCACHE:-""} + +if [[ -n "${MORPHEUS_CACHE_DIR}" ]]; then + # Set the cache variable, then set the Staging prefix to allow for host searching + CMAKE_ARGS="-DMORPHEUS_CACHE_DIR=${MORPHEUS_CACHE_DIR} ${CMAKE_ARGS}" + + # Double check that the cache dir has been created + mkdir -p ${MORPHEUS_CACHE_DIR} +fi + +# Enable core. Core is enabled by default and this is to just highlight that it is on +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_CORE=ON ${CMAKE_ARGS}" + +# Disable dfp, llm and doca +CMAKE_ARGS="-DMORPHEUS_SUPPORT_DOCA=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_DFP=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=OFF ${CMAKE_ARGS}" + +if [[ "${USE_SCCACHE}" == "1" ]]; then + CMAKE_ARGS="-DCCACHE_PROGRAM_PATH=$(which sccache) ${CMAKE_ARGS}" +fi + +echo "CC : ${CC}" +echo "CXX : ${CXX}" +echo "CUDAHOSTCXX : ${CUDAHOSTCXX}" +echo "CUDA : ${CUDA}" +echo "CMAKE_ARGS : ${CMAKE_ARGS}" + +echo "========Begin Env========" +env +echo "========End Env========" + +BUILD_DIR="build-conda-core" + +# remove the old build directory +if [[ -d "./${BUILD_DIR}" ]]; then + echo "Deleting old build dir at ./${BUILD_DIR}" + rm -rf "./${BUILD_DIR}/" +fi + +# Run configure +cmake -B ${BUILD_DIR} \ + ${CMAKE_ARGS} \ + --log-level=verbose \ + . + +# Build the components +cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install + +# Install just the morpheus core python wheel components +${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus/dist/*.whl diff --git a/ci/conda/recipes/morpheus-libs/morpheus_dfp_build.sh b/ci/conda/recipes/morpheus-libs/morpheus_dfp_build.sh new file mode 100644 index 0000000000..cc837f80b9 --- /dev/null +++ b/ci/conda/recipes/morpheus-libs/morpheus_dfp_build.sh @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It is assumed that this script is executed from the root of the repo directory by conda-build +# (https://conda-forge.org/docs/maintainer/knowledge_base.html#using-cmake) + +# This will store all of the cmake args. Make sure to prepend args to allow +# incoming values to overwrite them + +source $RECIPE_DIR/cmake_common.sh + +CMAKE_ARGS=${CMAKE_ARGS:-""} + +export CCACHE_BASEDIR=$(realpath ${SRC_DIR}/..) +export USE_SCCACHE=${USE_SCCACHE:-""} + +# Enable DFP +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_DFP=ON ${CMAKE_ARGS}" + +# Disable core, llm and doca +CMAKE_ARGS="-DMORPHEUS_SUPPORT_DOCA=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_CORE=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=OFF ${CMAKE_ARGS}" + + +if [[ "${USE_SCCACHE}" == "1" ]]; then + CMAKE_ARGS="-DCCACHE_PROGRAM_PATH=$(which sccache) ${CMAKE_ARGS}" +fi + +BUILD_DIR="build-conda-dfp" + +# remove the old build directory +if [[ -d "./${BUILD_DIR}" ]]; then + echo "Deleting old build dir at ./${BUILD_DIR}" + rm -rf "./${BUILD_DIR}/" +fi + +# Run configure +cmake -B ${BUILD_DIR} \ + ${CMAKE_ARGS} \ + --log-level=verbose \ + . + +# Build the components +cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install + +# Install the mprpheus dfp python wheel components +${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus_dfp/dist/*.whl diff --git a/ci/conda/recipes/morpheus-libs/morpheus_llm_build.sh b/ci/conda/recipes/morpheus-libs/morpheus_llm_build.sh new file mode 100644 index 0000000000..d29ea0e396 --- /dev/null +++ b/ci/conda/recipes/morpheus-libs/morpheus_llm_build.sh @@ -0,0 +1,60 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# It is assumed that this script is executed from the root of the repo directory by conda-build +# (https://conda-forge.org/docs/maintainer/knowledge_base.html#using-cmake) + +# This will store all of the cmake args. Make sure to prepend args to allow +# incoming values to overwrite them + +source $RECIPE_DIR/cmake_common.sh + +CMAKE_ARGS=${CMAKE_ARGS:-""} + +export CCACHE_BASEDIR=$(realpath ${SRC_DIR}/..) +export USE_SCCACHE=${USE_SCCACHE:-""} + +# Enable llm +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=ON ${CMAKE_ARGS}" + +# Disable core, dfp and doca +CMAKE_ARGS="-DMORPHEUS_SUPPORT_DOCA=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_CORE=OFF ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_DFP=OFF ${CMAKE_ARGS}" + +if [[ "${USE_SCCACHE}" == "1" ]]; then + CMAKE_ARGS="-DCCACHE_PROGRAM_PATH=$(which sccache) ${CMAKE_ARGS}" +fi + +BUILD_DIR="build-conda-llm" + +# remove the old build directory +if [[ -d "./${BUILD_DIR}" ]]; then + echo "Deleting old build dir at ./${BUILD_DIR}" + rm -rf "./${BUILD_DIR}/" +fi + + +# Run configure +cmake -B ${BUILD_DIR} \ + ${CMAKE_ARGS} \ + --log-level=verbose \ + . + +# Build the components +cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install + +# Install just the morpheus llm python wheel components +${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus_llm/dist/*.whl diff --git a/ci/conda/recipes/morpheus/conda_build_config.yaml b/ci/conda/recipes/morpheus/conda_build_config.yaml index 4b051dc074..9c88ef46cb 100644 --- a/ci/conda/recipes/morpheus/conda_build_config.yaml +++ b/ci/conda/recipes/morpheus/conda_build_config.yaml @@ -14,19 +14,19 @@ # limitations under the License. c_compiler_version: - - 11.2 + - 12.1 cxx_compiler_version: - - 11.2 + - 12.1 cuda_compiler: - cuda-nvcc cuda_compiler_version: - - 12.1 + - 12.5 python: - 3.10 rapids_version: - - 24.02 + - 24.10 diff --git a/ci/conda/recipes/morpheus/meta.yaml b/ci/conda/recipes/morpheus/meta.yaml index 00b15fe488..23327bcbc7 100644 --- a/ci/conda/recipes/morpheus/meta.yaml +++ b/ci/conda/recipes/morpheus/meta.yaml @@ -36,7 +36,6 @@ outputs: - MORPHEUS_CACHE_DIR - MORPHEUS_PYTHON_BUILD_STUBS - MORPHEUS_SUPPORT_DOCA - - MORPHEUS_BUILD_MORPHEUS_LLM - PARALLEL_LEVEL run_exports: - {{ pin_subpackage("morpheus", max_pin="x.x") }} @@ -57,7 +56,7 @@ outputs: - libtool # Needed for DOCA build - ninja =1.11 - pkg-config =0.29 # for mrc cmake - - sysroot_linux-64 =2.17 + - sysroot_linux-64 =2.28 host: # CUDA dependencies - cuda-cudart-dev {{ cuda_compiler_version }}.* @@ -69,7 +68,7 @@ outputs: # Non-CUDA dependencies - cudf {{ rapids_version }} - cython 3.0.* - - glog 0.6.* + - glog >=0.7.1,<0.8 - libcudf {{ rapids_version }} - librdkafka >=1.9.2,<1.10.0a0 - mrc {{ minor_version }} @@ -81,7 +80,7 @@ outputs: - rdma-core >=48 # Needed for DOCA. - scikit-build 0.17.6 - versioneer-518 - - zlib 1.2.13 # required to build triton client + - zlib 1.3.1 # required to build triton client run: # Runtime only requirements. This + setup.py is the definitive runtime requirement list # This should be synced with `runtime` in dependencies.yaml @@ -96,7 +95,7 @@ outputs: - docker-py =5.0.* - elasticsearch ==8.9.0 - feedparser =6.0.* - - grpcio =1.59.* + - grpcio =1.62.* - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 - mlflow>=2.10.0,<3 - mrc @@ -107,8 +106,6 @@ outputs: - python - python-confluent-kafka >=1.9.2,<1.10.0a0 - python-graphviz - - pytorch-cuda - - pytorch * *cuda* - rapids-dask-dependency {{ rapids_version }} # provides dask and distributed - requests - requests-cache =1.1.* diff --git a/ci/conda/recipes/morpheus/morpheus_build.sh b/ci/conda/recipes/morpheus/morpheus_build.sh index 723559b4af..ad5771566c 100644 --- a/ci/conda/recipes/morpheus/morpheus_build.sh +++ b/ci/conda/recipes/morpheus/morpheus_build.sh @@ -18,7 +18,6 @@ # Need to ensure this value is set before checking it in the if block MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF} -MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-OFF} # This will store all of the cmake args. Make sure to prepend args to allow # incoming values to overwrite them @@ -45,9 +44,10 @@ if [[ ${MORPHEUS_SUPPORT_DOCA} == @(TRUE|ON) ]]; then echo "MORPHEUS_SUPPORT_DOCA is ON. Setting CMAKE_CUDA_ARCHITECTURES to supported values: '${CMAKE_CUDA_ARCHITECTURES}'" fi -if [[ ${MORPHEUS_BUILD_MORPHEUS_LLM} == @(TRUE|ON) ]]; then - CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=ON ${CMAKE_ARGS}" -fi +# enable all functional blocks +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_CORE=ON ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_LLM=ON ${CMAKE_ARGS}" +CMAKE_ARGS="-DMORPHEUS_BUILD_MORPHEUS_DFP=ON ${CMAKE_ARGS}" CMAKE_ARGS="-DCMAKE_MESSAGE_CONTEXT_SHOW=ON ${CMAKE_ARGS}" CMAKE_ARGS="-DCMAKE_INSTALL_PREFIX=$PREFIX ${CMAKE_ARGS}" @@ -100,3 +100,4 @@ cmake --build ${BUILD_DIR} -j${PARALLEL_LEVEL:-$(nproc)} --target install # Install just the python wheel components ${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus/dist/*.whl ${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus_llm/dist/*.whl +${PYTHON} -m pip install -vv ${BUILD_DIR}/python/morpheus_dfp/dist/*.whl diff --git a/ci/conda/recipes/run_conda_build.sh b/ci/conda/recipes/run_conda_build.sh index 3f96d70736..20eb9092ea 100755 --- a/ci/conda/recipes/run_conda_build.sh +++ b/ci/conda/recipes/run_conda_build.sh @@ -126,7 +126,7 @@ CONDA_ARGS_ARRAY+=("-c" "${CONDA_CHANNEL_ALIAS:+"${CONDA_CHANNEL_ALIAS%/}/"}defa if [[ ${NUMARGS} == 0 ]]; then echo -e "${r}ERROR: No arguments were provided. Please provide at least one package to build. Available packages:${x}" echo -e "${r} morpheus${x}" - echo -e "${r} morpheus-core${x}" + echo -e "${r} morpheus-libs${x}" echo -e "${r} pydebug${x}" echo -e "${r}Exiting...${x}" exit 12 @@ -134,7 +134,6 @@ fi if hasArg morpheus; then export MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF} - export MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-ON} # Set GIT_VERSION to set the project version inside of meta.yaml export GIT_VERSION="$(get_version)" @@ -144,15 +143,13 @@ if hasArg morpheus; then set +x fi -if hasArg morpheus-core; then - export MORPHEUS_SUPPORT_DOCA=-OFF - export MORPHEUS_BUILD_MORPHEUS_LLM=-OFF +if hasArg morpheus-libs; then # Set GIT_VERSION to set the project version inside of meta.yaml export GIT_VERSION="$(get_version)" - echo "Running conda-build for morpheus-core v${GIT_VERSION}..." + echo "Running conda-build for morpheus libraries v${GIT_VERSION}..." set -x - conda ${CONDA_COMMAND} "${CONDA_ARGS_ARRAY[@]}" ${CONDA_ARGS} ci/conda/recipes/morpheus-core + conda ${CONDA_COMMAND} "${CONDA_ARGS_ARRAY[@]}" ${CONDA_ARGS} ci/conda/recipes/morpheus-libs set +x fi diff --git a/ci/runner/Dockerfile b/ci/runner/Dockerfile index ed150d0657..e3d7347268 100644 --- a/ci/runner/Dockerfile +++ b/ci/runner/Dockerfile @@ -16,8 +16,8 @@ # Args used in FROM commands must come first ARG FROM_IMAGE="rapidsai/ci-conda" ARG CUDA_PKG_VER=12-0 -ARG CUDA_SHORT_VER=12.1 -ARG CUDA_VER=12.1.1 +ARG CUDA_SHORT_VER=12.5 +ARG CUDA_VER=12.5.1 ARG LINUX_DISTRO=ubuntu ARG LINUX_VER=22.04 ARG PROJ_NAME=morpheus @@ -60,7 +60,6 @@ RUN rapids-dependency-file-generator \ rm -rf /tmp/conda ENV MORPHEUS_SUPPORT_DOCA=ON -ENV MORPHEUS_BUILD_MORPHEUS_LLM=ON COPY ./.devcontainer/docker/optional_deps/doca.sh /tmp/doca/ diff --git a/ci/scripts/github/build.sh b/ci/scripts/github/build.sh index 7babe4a40a..8f75893967 100755 --- a/ci/scripts/github/build.sh +++ b/ci/scripts/github/build.sh @@ -42,7 +42,7 @@ cmake --build ${BUILD_DIR} --parallel ${PARALLEL_LEVEL} log_sccache_stats rapids-logger "Archiving results" -tar cfj "${WORKSPACE_TMP}/wheel.tar.bz" ${BUILD_DIR}/python/morpheus/dist ${BUILD_DIR}/python/morpheus_llm/dist +tar cfj "${WORKSPACE_TMP}/wheel.tar.bz" ${BUILD_DIR}/python/morpheus/dist ${BUILD_DIR}/python/morpheus_llm/dist ${BUILD_DIR}/python/morpheus_dfp/dist MORPHEUS_LIBS=($(find ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus/morpheus/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;) \ $(find ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus_llm/morpheus_llm/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;) \ diff --git a/ci/scripts/github/checks.sh b/ci/scripts/github/checks.sh index 6b6e579feb..a00afe718f 100755 --- a/ci/scripts/github/checks.sh +++ b/ci/scripts/github/checks.sh @@ -50,6 +50,7 @@ log_sccache_stats rapids-logger "Installing Morpheus" pip install ./python/morpheus pip install ./python/morpheus_llm +pip install ./python/morpheus_dfp rapids-logger "Checking copyright headers" python ${MORPHEUS_ROOT}/ci/scripts/copyright.py --verify-apache-v2 --git-diff-commits ${CHANGE_TARGET} ${GIT_COMMIT} diff --git a/ci/scripts/github/cmake_all.sh b/ci/scripts/github/cmake_all.sh index 86b0d65dcd..0e378a505a 100644 --- a/ci/scripts/github/cmake_all.sh +++ b/ci/scripts/github/cmake_all.sh @@ -27,6 +27,7 @@ _FLAGS+=("-DMORPHEUS_BUILD_BENCHMARKS=ON") _FLAGS+=("-DMORPHEUS_BUILD_EXAMPLES=ON") _FLAGS+=("-DMORPHEUS_BUILD_TESTS=ON") _FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_LLM=ON") +_FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_DFP=ON") if [[ "${LOCAL_CI}" == "" ]]; then _FLAGS+=("-DCCACHE_PROGRAM_PATH=$(which sccache)") fi diff --git a/ci/scripts/github/cmake_core.sh b/ci/scripts/github/cmake_core.sh deleted file mode 100644 index 20d250e45c..0000000000 --- a/ci/scripts/github/cmake_core.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -_FLAGS=() -_FLAGS+=("-B" "${BUILD_DIR}") -_FLAGS+=("-G" "Ninja") -_FLAGS+=("-DCMAKE_MESSAGE_CONTEXT_SHOW=ON") -_FLAGS+=("-DMORPHEUS_CUDA_ARCHITECTURES=RAPIDS") -_FLAGS+=("-DMORPHEUS_USE_CONDA=ON") -_FLAGS+=("-DMORPHEUS_USE_CCACHE=ON") -_FLAGS+=("-DMORPHEUS_PYTHON_INPLACE_BUILD=OFF") -_FLAGS+=("-DMORPHEUS_PYTHON_BUILD_STUBS=ON") -_FLAGS+=("-DMORPHEUS_BUILD_BENCHMARKS=OFF") -_FLAGS+=("-DMORPHEUS_BUILD_EXAMPLES=OFF") -_FLAGS+=("-DMORPHEUS_BUILD_TESTS=OFF") -_FLAGS+=("-DMORPHEUS_BUILD_MORPHEUS_LLM=OFF") -_FLAGS+=("-DMORPHEUS_SUPPORT_DOCA=OFF") -if [[ "${LOCAL_CI}" == "" ]]; then - _FLAGS+=("-DCCACHE_PROGRAM_PATH=$(which sccache)") -fi -export CMAKE_BUILD_ALL_FEATURES="${_FLAGS[@]}" -unset _FLAGS diff --git a/ci/scripts/github/conda_core.sh b/ci/scripts/github/conda_libs.sh similarity index 81% rename from ci/scripts/github/conda_core.sh rename to ci/scripts/github/conda_libs.sh index b2006b6162..7969a30c83 100755 --- a/ci/scripts/github/conda_core.sh +++ b/ci/scripts/github/conda_libs.sh @@ -18,7 +18,6 @@ set -e CI_SCRIPT_ARGS="$@" source ${WORKSPACE}/ci/scripts/github/common.sh -source ${WORKSPACE}/ci/scripts/github/cmake_core.sh cd ${MORPHEUS_ROOT} @@ -42,21 +41,21 @@ fi # Print the info just to be sure base is active conda info -rapids-logger "Building Conda Package morpheus-core" +rapids-logger "Building Morpheus Libraries" # Run the conda build, and upload to conda forge if requested export MORPHEUS_PYTHON_BUILD_STUBS=OFF export CONDA_ARGS="--skip-existing" -${MORPHEUS_ROOT}/ci/conda/recipes/run_conda_build.sh morpheus-core "${CI_SCRIPT_ARGS}" +${MORPHEUS_ROOT}/ci/conda/recipes/run_conda_build.sh morpheus-libs "${CI_SCRIPT_ARGS}" # If we didn't receive the upload argument, upload the artifact to S3 if [[ " ${CI_SCRIPT_ARGS} " =~ " upload " ]]; then - rapids-logger "Building Conda Package morpheus-core... Done" + rapids-logger "Building Morpheus Libraries... Done" else # if we didn't receive the upload argument, we can still upload the artifact to S3 - tar cfj "${WORKSPACE_TMP}/conda_morpheus_core.tar.bz" "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" + tar cfj "${WORKSPACE_TMP}/conda_libs.tar.bz" "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" ls -lh ${WORKSPACE_TMP}/ rapids-logger "Pushing results to ${DISPLAY_ARTIFACT_URL}/" - upload_artifact "${WORKSPACE_TMP}/conda_morpheus_core.tar.bz" + upload_artifact "${WORKSPACE_TMP}/conda_libs.tar.bz" fi diff --git a/ci/scripts/github/docs.sh b/ci/scripts/github/docs.sh index a0e2057fe8..441c8495b0 100755 --- a/ci/scripts/github/docs.sh +++ b/ci/scripts/github/docs.sh @@ -32,6 +32,7 @@ tar xf "${WORKSPACE_TMP}/wheel.tar.bz" pip install ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus/dist/*.whl pip install ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus_llm/dist/*.whl +pip install ${MORPHEUS_ROOT}/${BUILD_DIR}/python/morpheus_dfp/dist/*.whl rapids-logger "Pulling LFS assets" cd ${MORPHEUS_ROOT} diff --git a/ci/scripts/run_ci_local.sh b/ci/scripts/run_ci_local.sh index 8f7df827da..4a2bbe3f23 100755 --- a/ci/scripts/run_ci_local.sh +++ b/ci/scripts/run_ci_local.sh @@ -21,13 +21,13 @@ case "$1" in STAGES=("bash") ;; "all" ) - STAGES=("checks" "build" "docs" "test" "conda_core" "conda") + STAGES=("checks" "build" "docs" "test" "conda_libs" "conda") ;; - "checks" | "build" | "docs" | "test" | "conda_core" | "conda" | "bash" ) + "checks" | "build" | "docs" | "test" | "conda_libs" | "conda" | "bash" ) STAGES=("$1") ;; * ) - echo "Error: Invalid argument \"$1\" provided. Expected values: \"all\", \"checks\", \"build\", \"docs\", \"test\", \"conda_core\", \"conda\", or \"bash\"" + echo "Error: Invalid argument \"$1\" provided. Expected values: \"all\", \"checks\", \"build\", \"docs\", \"test\", \"conda_libs\", \"conda\", or \"bash\"" exit 1 ;; esac @@ -58,7 +58,7 @@ GIT_BRANCH=$(git branch --show-current) GIT_COMMIT=$(git log -n 1 --pretty=format:%H) LOCAL_CI_TMP=${LOCAL_CI_TMP:-${MORPHEUS_ROOT}/.tmp/local_ci_tmp} -CONTAINER_VER=${CONTAINER_VER:-240614} +CONTAINER_VER=${CONTAINER_VER:-241004} CUDA_VER=${CUDA_VER:-12.1} DOCKER_EXTRA_ARGS=${DOCKER_EXTRA_ARGS:-""} diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 2b59d30593..b4f428c64e 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -41,13 +41,11 @@ rapids_find_package(ZLIB if(MORPHEUS_BUILD_BENCHMARKS) # google benchmark - # - Expects package to pre-exist in the build environment # ================ - rapids_find_package(benchmark REQUIRED - GLOBAL_TARGETS benchmark::benchmark + include(${rapids-cmake-dir}/cpm/gbench.cmake) + rapids_cpm_gbench( BUILD_EXPORT_SET ${PROJECT_NAME}-core-exports INSTALL_EXPORT_SET ${PROJECT_NAME}-core-exports - FIND_ARGS CONFIG ) endif() @@ -65,46 +63,21 @@ morpheus_utils_configure_glog() if(MORPHEUS_BUILD_TESTS) # google test - # - Expects package to pre-exist in the build environment # =========== - rapids_find_package(GTest REQUIRED - GLOBAL_TARGETS GTest::gtest GTest::gmock GTest::gtest_main GTest::gmock_main + include(${rapids-cmake-dir}/cpm/gtest.cmake) + rapids_cpm_gtest( BUILD_EXPORT_SET ${PROJECT_NAME}-core-exports INSTALL_EXPORT_SET ${PROJECT_NAME}-core-exports - FIND_ARGS CONFIG ) endif() -# cccl -- get an explicit cccl build, matx tries to pull a tag that doesn't exist. -# ========= -morpheus_utils_configure_cccl() - -# matx -# ==== -morpheus_utils_configure_matx() - -# pybind11 -# ========= -morpheus_utils_configure_pybind11() - -# RD-Kafka -# ===== -morpheus_utils_configure_rdkafka() - -# RxCpp -# ===== -morpheus_utils_configure_rxcpp() - -# MRC (Should come after all third party but before NVIDIA repos) -# ===== -morpheus_utils_configure_mrc() - -# CuDF -# ===== -morpheus_utils_configure_cudf() +# Include dependencies based on components being built +if(MORPHEUS_BUILD_MORPHEUS_CORE) + include(dependencies_core) +endif() -# Triton-client -# ===== -morpheus_utils_configure_tritonclient() +if(MORPHEUS_BUILD_MORPHEUS_LLM) + include(dependencies_llm) +endif() list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/cmake/dependencies_core.cmake b/cmake/dependencies_core.cmake new file mode 100644 index 0000000000..b8c5457a69 --- /dev/null +++ b/cmake/dependencies_core.cmake @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +list(APPEND CMAKE_MESSAGE_CONTEXT "dep_core") + +# cccl -- get an explicit cccl build, matx tries to pull a tag that doesn't exist. +# ========= +morpheus_utils_configure_cccl() + +# matx +# ==== +morpheus_utils_configure_matx() + +# pybind11 +# ========= +morpheus_utils_configure_pybind11() + +# RD-Kafka +# ===== +morpheus_utils_configure_rdkafka() + +# RxCpp +# ===== +morpheus_utils_configure_rxcpp() + +# MRC (Should come after all third party but before NVIDIA repos) +# ===== +morpheus_utils_configure_mrc() + +# CuDF +# ===== +morpheus_utils_configure_cudf() + +# Triton-client +# ===== +morpheus_utils_configure_tritonclient() + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/cmake/dependencies_llm.cmake b/cmake/dependencies_llm.cmake new file mode 100644 index 0000000000..fc1f062ec9 --- /dev/null +++ b/cmake/dependencies_llm.cmake @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +list(APPEND CMAKE_MESSAGE_CONTEXT "dep_llm") + +# pybind11 +# ========= +morpheus_utils_configure_pybind11() + +# RD-Kafka +# ===== +morpheus_utils_configure_rdkafka() + +# MRC (Should come after all third party but before NVIDIA repos) +# ===== +morpheus_utils_configure_mrc() + +# CuDF +morpheus_utils_configure_cudf() + +# Triton-client +# ===== +morpheus_utils_configure_tritonclient() + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/conda/environments/all_cuda-121_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml similarity index 76% rename from conda/environments/all_cuda-121_arch-x86_64.yaml rename to conda/environments/all_cuda-125_arch-x86_64.yaml index 201b487b98..0281cf6b68 100644 --- a/conda/environments/all_cuda-121_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -22,18 +22,18 @@ dependencies: - clangdev=16 - click>=8 - cmake=3.27 -- cuda-cudart-dev=12.1 -- cuda-cudart=12.1 -- cuda-nvcc=12.1 -- cuda-nvml-dev=12.1 -- cuda-nvrtc-dev=12.1 -- cuda-nvrtc=12.1 -- cuda-nvtx-dev=12.1 -- cuda-nvtx=12.1 -- cuda-tools=12.1 -- cuda-version=12.1 -- cudf=24.02 -- cuml=24.02.* +- cuda-cudart-dev=12.5 +- cuda-cudart=12.5 +- cuda-nvcc=12.5 +- cuda-nvml-dev=12.5 +- cuda-nvrtc-dev=12.5 +- cuda-nvrtc=12.5 +- cuda-nvtx-dev=12.5 +- cuda-nvtx=12.5 +- cuda-sanitizer-api +- cuda-version=12.5 +- cudf=24.10 +- cuml=24.10.* - cupy - cxx-compiler - cython=3.0 @@ -45,23 +45,28 @@ dependencies: - exhale=0.3.6 - feedparser=6.0 - flake8 -- gcc_linux-64=11.2 - git-lfs -- glog=0.6 -- grpcio-status=1.59 -- grpcio=1.59 -- gxx_linux-64=11.2 +- glog>=0.7.1,<0.8 +- grpcio +- grpcio-status +- gtest=1.14 +- gxx=12.1 - huggingface_hub=0.20.2 - include-what-you-use=0.20 - ipython - isort - jsonpatch>=1.33 - kfp -- libcudf=24.02 +- libcublas-dev +- libcudf=24.10 +- libcufft-dev +- libcurand-dev +- libcusolver-dev - librdkafka>=1.9.2,<1.10.0a0 - libtool - libwebp=1.3.2 -- mlflow>=2.10.0,<3 +- libzlib >=1.3.1,<2 +- mlflow - mrc=24.10 - myst-parser=0.18.1 - nbsphinx @@ -81,6 +86,7 @@ dependencies: - pre-commit - pybind11-stubgen=0.10.5 - pydantic +- pylibcudf=24.10 - pylint=3.0.3 - pypdf=3.17.4 - pypdfium2=4.30 @@ -92,22 +98,20 @@ dependencies: - python-docx==1.1.0 - python-graphviz - python=3.10 -- pytorch-cuda -- pytorch=*=*cuda* - rapidjson=1.1.0 -- rapids-dask-dependency=24.02 +- rapids-dask-dependency=24.10 - rdma-core>=48 - requests - requests-cache=1.1 - requests-toolbelt=1.0 -- s3fs=2023.12.2 +- s3fs - scikit-build=0.17.6 - scikit-learn=1.3.2 - sentence-transformers=2.7 - sphinx - sphinx_rtd_theme - sqlalchemy<2.0 -- sysroot_linux-64=2.17 +- sysroot_linux-64>=2.28 - tqdm=4 - transformers=4.36.2 - tritonclient=2.34 @@ -120,15 +124,15 @@ dependencies: - watchdog=3.0 - websockets - yapf=0.40.1 -- zlib=1.2.13 - pip: + - --extra-index-url https://download.pytorch.org/whl/cu124 - --find-links https://data.dgl.ai/wheels-test/repo.html - --find-links https://data.dgl.ai/wheels/cu121/repo.html - databricks-cli < 0.100 - databricks-connect - dgl==2.0.0 - dglgo - - faiss-gpu==1.7.* + - faiss-cpu - google-search-results==2.4 - langchain-nvidia-ai-endpoints==0.0.11 - langchain==0.1.16 @@ -136,4 +140,5 @@ dependencies: - nemollm==0.3.5 - pymilvus==2.3.6 - pytest-kafka==0.6.0 -name: all_cuda-121_arch-x86_64 + - torch==2.4.0+cu124 +name: all_cuda-125_arch-x86_64 diff --git a/conda/environments/dev_cuda-121_arch-x86_64.yaml b/conda/environments/dev_cuda-125_arch-x86_64.yaml similarity index 74% rename from conda/environments/dev_cuda-121_arch-x86_64.yaml rename to conda/environments/dev_cuda-125_arch-x86_64.yaml index 2b00aaeab2..af599fb7de 100644 --- a/conda/environments/dev_cuda-121_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-125_arch-x86_64.yaml @@ -19,14 +19,14 @@ dependencies: - clangdev=16 - click>=8 - cmake=3.27 -- cuda-cudart-dev=12.1 -- cuda-nvcc=12.1 -- cuda-nvml-dev=12.1 -- cuda-nvrtc-dev=12.1 -- cuda-nvtx-dev=12.1 -- cuda-tools=12.1 -- cuda-version=12.1 -- cudf=24.02 +- cuda-cudart-dev=12.5 +- cuda-nvcc=12.5 +- cuda-nvml-dev=12.5 +- cuda-nvrtc-dev=12.5 +- cuda-nvtx-dev=12.5 +- cuda-sanitizer-api +- cuda-version=12.5 +- cudf=24.10 - cupy - cxx-compiler - cython=3.0 @@ -38,20 +38,25 @@ dependencies: - exhale=0.3.6 - feedparser=6.0 - flake8 -- gcc_linux-64=11.2 - git-lfs -- glog=0.6 -- grpcio-status=1.59 -- grpcio=1.59 -- gxx_linux-64=11.2 +- glog>=0.7.1,<0.8 +- grpcio +- grpcio-status +- gtest=1.14 +- gxx=12.1 - include-what-you-use=0.20 - ipython - isort -- libcudf=24.02 +- libcublas-dev +- libcudf=24.10 +- libcufft-dev +- libcurand-dev +- libcusolver-dev - librdkafka>=1.9.2,<1.10.0a0 - libtool - libwebp=1.3.2 -- mlflow>=2.10.0,<3 +- libzlib >=1.3.1,<2 +- mlflow - mrc=24.10 - myst-parser=0.18.1 - nbsphinx @@ -66,6 +71,7 @@ dependencies: - pre-commit - pybind11-stubgen=0.10.5 - pydantic +- pylibcudf=24.10 - pylint=3.0.3 - pypdfium2=4.30 - pytest-asyncio @@ -76,10 +82,8 @@ dependencies: - python-docx==1.1.0 - python-graphviz - python=3.10 -- pytorch-cuda -- pytorch=*=*cuda* - rapidjson=1.1.0 -- rapids-dask-dependency=24.02 +- rapids-dask-dependency=24.10 - rdma-core>=48 - requests - requests-cache=1.1 @@ -88,7 +92,7 @@ dependencies: - sphinx - sphinx_rtd_theme - sqlalchemy<2.0 -- sysroot_linux-64=2.17 +- sysroot_linux-64>=2.28 - tqdm=4 - tritonclient=2.34 - typing_utils=0.1 @@ -100,11 +104,12 @@ dependencies: - watchdog=3.0 - websockets - yapf=0.40.1 -- zlib=1.2.13 - pip: + - --extra-index-url https://download.pytorch.org/whl/cu124 - databricks-cli < 0.100 - databricks-connect - milvus==2.3.5 - pymilvus==2.3.6 - pytest-kafka==0.6.0 -name: dev_cuda-121_arch-x86_64 + - torch==2.4.0+cu124 +name: dev_cuda-125_arch-x86_64 diff --git a/conda/environments/examples_cuda-121_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml similarity index 85% rename from conda/environments/examples_cuda-121_arch-x86_64.yaml rename to conda/environments/examples_cuda-125_arch-x86_64.yaml index 66b30db78e..ffcae28e4a 100644 --- a/conda/environments/examples_cuda-121_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -15,21 +15,21 @@ dependencies: - beautifulsoup4=4.12 - boto3 - click>=8 -- cudf=24.02 -- cuml=24.02.* +- cudf=24.10 +- cuml=24.10.* - cupy - datacompy=0.10 - dill=0.3.7 - docker-py=5.0 - elasticsearch==8.9.0 - feedparser=6.0 -- grpcio-status=1.59 -- grpcio=1.59 +- grpcio +- grpcio-status - huggingface_hub=0.20.2 - jsonpatch>=1.33 - kfp - libwebp=1.3.2 -- mlflow>=2.10.0,<3 +- mlflow - mrc=24.10 - networkx=2.8.8 - newspaper3k=0.2 @@ -48,13 +48,11 @@ dependencies: - python-docx==1.1.0 - python-graphviz - python=3.10 -- pytorch-cuda -- pytorch=*=*cuda* -- rapids-dask-dependency=24.02 +- rapids-dask-dependency=24.10 - requests - requests-cache=1.1 - requests-toolbelt=1.0 -- s3fs=2023.12.2 +- s3fs - scikit-learn=1.3.2 - sentence-transformers=2.7 - sqlalchemy<2.0 @@ -65,17 +63,19 @@ dependencies: - watchdog=3.0 - websockets - pip: + - --extra-index-url https://download.pytorch.org/whl/cu124 - --find-links https://data.dgl.ai/wheels-test/repo.html - --find-links https://data.dgl.ai/wheels/cu121/repo.html - databricks-cli < 0.100 - databricks-connect - dgl==2.0.0 - dglgo - - faiss-gpu==1.7.* + - faiss-cpu - google-search-results==2.4 - langchain-nvidia-ai-endpoints==0.0.11 - langchain==0.1.16 - milvus==2.3.5 - nemollm==0.3.5 - pymilvus==2.3.6 -name: examples_cuda-121_arch-x86_64 + - torch==2.4.0+cu124 +name: examples_cuda-125_arch-x86_64 diff --git a/conda/environments/model-utils_cuda-121_arch-x86_64.yaml b/conda/environments/model-utils_cuda-125_arch-x86_64.yaml similarity index 88% rename from conda/environments/model-utils_cuda-121_arch-x86_64.yaml rename to conda/environments/model-utils_cuda-125_arch-x86_64.yaml index 761f19aaa0..2957c36473 100644 --- a/conda/environments/model-utils_cuda-121_arch-x86_64.yaml +++ b/conda/environments/model-utils_cuda-125_arch-x86_64.yaml @@ -9,7 +9,7 @@ channels: - nvidia/label/dev - pytorch dependencies: -- cuml=24.02.* +- cuml=24.10.* - jupyterlab - matplotlib - onnx @@ -20,4 +20,4 @@ dependencies: - seqeval=1.2.2 - transformers=4.36.2 - xgboost -name: model-utils_cuda-121_arch-x86_64 +name: model-utils_cuda-125_arch-x86_64 diff --git a/conda/environments/runtime_cuda-121_arch-x86_64.yaml b/conda/environments/runtime_cuda-125_arch-x86_64.yaml similarity index 75% rename from conda/environments/runtime_cuda-121_arch-x86_64.yaml rename to conda/environments/runtime_cuda-125_arch-x86_64.yaml index ea6a442b3a..2551739061 100644 --- a/conda/environments/runtime_cuda-121_arch-x86_64.yaml +++ b/conda/environments/runtime_cuda-125_arch-x86_64.yaml @@ -12,21 +12,21 @@ dependencies: - appdirs - beautifulsoup4=4.12 - click>=8 -- cuda-cudart=12.1 -- cuda-nvrtc=12.1 -- cuda-nvtx=12.1 -- cuda-version=12.1 -- cudf=24.02 +- cuda-cudart=12.5 +- cuda-nvrtc=12.5 +- cuda-nvtx=12.5 +- cuda-version=12.5 +- cudf=24.10 - cupy - datacompy=0.10 - dill=0.3.7 - docker-py=5.0 - elasticsearch==8.9.0 - feedparser=6.0 -- grpcio-status=1.59 -- grpcio=1.59 +- grpcio +- grpcio-status - libwebp=1.3.2 -- mlflow>=2.10.0,<3 +- mlflow - mrc=24.10 - networkx=2.8.8 - numpydoc=1.5 @@ -36,9 +36,7 @@ dependencies: - python-confluent-kafka>=1.9.2,<1.10.0a0 - python-graphviz - python=3.10 -- pytorch-cuda -- pytorch=*=*cuda* -- rapids-dask-dependency=24.02 +- rapids-dask-dependency=24.10 - requests - requests-cache=1.1 - scikit-learn=1.3.2 @@ -49,8 +47,10 @@ dependencies: - watchdog=3.0 - websockets - pip: + - --extra-index-url https://download.pytorch.org/whl/cu124 - databricks-cli < 0.100 - databricks-connect - milvus==2.3.5 - pymilvus==2.3.6 -name: runtime_cuda-121_arch-x86_64 + - torch==2.4.0+cu124 +name: runtime_cuda-125_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index e8f5525696..5c2eb2a5b2 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -14,12 +14,13 @@ # limitations under the License. # Dependency list for https://github.com/rapidsai/dependency-file-generator + files: # Includes all dependencies together in a single file all: output: conda matrix: - cuda: ["12.1"] + cuda: ["12.5"] arch: [x86_64] includes: - benchmark_cpp @@ -45,7 +46,7 @@ files: dev: output: conda matrix: - cuda: ["12.1"] + cuda: ["12.5"] arch: [x86_64] includes: - benchmark_cpp @@ -67,7 +68,7 @@ files: build: output: none matrix: - cuda: ["12.1"] + cuda: ["12.5"] arch: [x86_64] includes: - benchmark_cpp @@ -87,7 +88,7 @@ files: test: output: none matrix: - cuda: ["12.1"] + cuda: ["12.5"] arch: [x86_64] includes: - benchmark_cpp @@ -108,7 +109,7 @@ files: docs: output: none matrix: - cuda: ["12.1"] + cuda: ["12.5"] arch: [x86_64] includes: - benchmark_cpp @@ -131,7 +132,7 @@ files: runtime: output: conda matrix: - cuda: ["12.1"] + cuda: ["12.5"] arch: [x86_64] includes: - cudatoolkit @@ -145,7 +146,7 @@ files: examples: output: conda matrix: - cuda: ["12.1"] + cuda: ["12.5"] arch: [x86_64] includes: - cve-mitigation @@ -161,7 +162,7 @@ files: model-utils: output: conda matrix: - cuda: ["12.1"] + cuda: ["12.5"] arch: [x86_64] includes: - model-training-tuning @@ -196,26 +197,30 @@ dependencies: - output_types: [conda] matrices: - matrix: - cuda: "12.1" + cuda: "12.5" packages: - - cuda-cudart=12.1 - - cuda-nvrtc=12.1 - - cuda-nvtx=12.1 - - cuda-version=12.1 + - cuda-cudart=12.5 + - cuda-nvrtc=12.5 + - cuda-nvtx=12.5 + - cuda-version=12.5 cudatoolkit-dev: specific: - output_types: [conda] matrices: - matrix: - cuda: "12.1" + cuda: "12.5" packages: - - cuda-cudart-dev=12.1 - - cuda-nvml-dev=12.1 - - cuda-nvrtc-dev=12.1 - - cuda-nvtx-dev=12.1 - - cuda-tools=12.1 - - cuda-version=12.1 + - cuda-cudart-dev=12.5 + - cuda-nvml-dev=12.5 + - cuda-nvrtc-dev=12.5 + - cuda-nvtx-dev=12.5 + - cuda-sanitizer-api + - cuda-version=12.5 + - libcublas-dev # required by matx + - libcufft-dev # required by matx + - libcusolver-dev # required by matx + - libcurand-dev # required by matx @@ -226,22 +231,21 @@ dependencies: - output_types: [conda] packages: # Compilers - - cuda-nvcc=12.1 + - cuda-nvcc=12.5 - cxx-compiler - - gcc_linux-64=11.2 - - gxx_linux-64=11.2 + - gxx=12.1 # Non-Compiler Dependencies - automake=1.16.5 # Needed for DOCA build - c-ares=1.32 # 1.33 causes an undefined symbol error - ccache - cmake=3.27 - - cuda-cudart-dev=12.1 - - cuda-version=12.1 + - cuda-cudart-dev=12.5 + - cuda-version=12.5 - libtool # Needed for DOCA build - ninja=1.11 - pkg-config=0.29 # for mrc cmake - - sysroot_linux-64=2.17 + - sysroot_linux-64>=2.28 # Build dependencies for Morpheus on the host arch. Mirrors the `host` section in # ci/conda/recipes/morpheus/meta.yaml @@ -250,19 +254,21 @@ dependencies: - output_types: [conda] packages: # Include: cudatoolkit-dev - - cudf=24.02 + - cudf=24.10 - cython=3.0 - - glog=0.6 - - libcudf=24.02 + - glog>=0.7.1,<0.8 + - gtest=1.14 + - libcudf=24.10 - librdkafka>=1.9.2,<1.10.0a0 + - libzlib >=1.3.1,<2 - mrc=24.10 - nlohmann_json=3.11 - pybind11-stubgen=0.10.5 + - pylibcudf=24.10 - rapidjson=1.1.0 - rdma-core>=48 # Needed for DOCA. - scikit-build=0.17.6 - versioneer-518 - - zlib=1.2.13 checks: common: @@ -323,29 +329,26 @@ dependencies: - appdirs - beautifulsoup4=4.12 - click>=8 - # - cuda-version=12.1 ## - - cudf=24.02 + # - cuda-version=12.5 ## + - cudf=24.10 - cupy # Version determined from cudf - datacompy=0.10 - dill=0.3.7 - docker-py=5.0 - elasticsearch==8.9.0 - feedparser=6.0 - - grpcio=1.59 - - grpcio-status=1.59 + - grpcio + - grpcio-status # - libwebp=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 ## - - mlflow>=2.10.0,<3 + - mlflow #>=2.10.0,<3 - mrc=24.10 - networkx=2.8.8 - numpydoc=1.5 - pydantic - # - python ## - python-confluent-kafka>=1.9.2,<1.10.0a0 - python-graphviz - - pytorch-cuda - - pytorch=*=*cuda* - pluggy=1.3 - - rapids-dask-dependency=24.02 # provides dask and distributed + - rapids-dask-dependency=24.10 # provides dask and distributed - requests - requests-cache=1.1 - scikit-learn=1.3.2 @@ -357,10 +360,12 @@ dependencies: - websockets - pip - pip: + - --extra-index-url https://download.pytorch.org/whl/cu124 - databricks-cli < 0.100 - databricks-connect - milvus==2.3.5 # update to match pymilvus when available - pymilvus==2.3.6 + - torch==2.4.0+cu124 test_python_morpheus: common: @@ -385,13 +390,13 @@ dependencies: - boto3 - kfp - papermill=2.4.0 - - s3fs=2023.12.2 + - s3fs example-gnn: common: - output_types: [conda] packages: - - &cuml cuml=24.02.* + - &cuml cuml=24.10.* - pip - pip: - --find-links https://data.dgl.ai/wheels/cu121/repo.html @@ -421,7 +426,7 @@ dependencies: - pip: - langchain==0.1.16 - langchain-nvidia-ai-endpoints==0.0.11 - - faiss-gpu==1.7.* + - faiss-cpu - google-search-results==2.4 - nemollm==0.3.5 diff --git a/docker/Dockerfile b/docker/Dockerfile index e9a51ecccf..be44ca1869 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -341,12 +341,13 @@ FROM conda_env_dev as git_clone ARG MORPHEUS_ROOT_HOST # Source the morpheus env to pick up the git-lfs package -RUN --mount=type=bind,source=${MORPHEUS_ROOT_HOST},target=/opt/host_repo \ +RUN --mount=type=bind,source=.,target=/opt/host_repo \ source activate morpheus &&\ - git clone file:///opt/host_repo /tmp/morpheus_repo &&\ + # Use a local clone to allow unpushed commits to be included + git clone file:///opt/host_repo/${MORPHEUS_ROOT_HOST} /tmp/morpheus_repo &&\ cd /tmp/morpheus_repo &&\ git lfs install &&\ - /tmp/morpheus_repo/scripts/fetch_data.py fetch datasets examples + ./scripts/fetch_data.py fetch datasets examples # ============ Stage: runtime ============ # Setup container for runtime environment @@ -357,7 +358,7 @@ ARG MORPHEUS_ROOT_HOST # Only copy specific files/folders over that are necessary for runtime COPY --from=git_clone "/tmp/morpheus_repo/conda/environments/*.yaml" "./conda/environments/" COPY --from=git_clone "/tmp/morpheus_repo/docker" "./docker" -COPY --from=build_docs "/workspace/build/docs/html" "./docs" +COPY --from=build_docs "/workspace/${MORPHEUS_ROOT_HOST}/build/docs/html" "./docs" COPY --from=git_clone "/tmp/morpheus_repo/examples" "./examples" COPY --from=git_clone "/tmp/morpheus_repo/models/datasets" "./models/datasets" COPY --from=git_clone "/tmp/morpheus_repo/scripts" "./scripts" diff --git a/docker/build_container.sh b/docker/build_container.sh index ca7f7cad78..85877583f7 100755 --- a/docker/build_container.sh +++ b/docker/build_container.sh @@ -36,6 +36,7 @@ LINUX_DISTRO=${LINUX_DISTRO:-ubuntu} LINUX_VER=${LINUX_VER:-22.04} MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-"OFF"} MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-"ON"} +MORPHEUS_BUILD_MORPHEUS_DFP=${MORPHEUS_BUILD_MORPHEUS_DFP:-"ON"} PYTHON_VER=${PYTHON_VER:-3.10} # Determine the relative path from $PWD to $MORPHEUS_ROOT @@ -53,6 +54,7 @@ DOCKER_ARGS="${DOCKER_ARGS} --build-arg LINUX_VER=${LINUX_VER}" DOCKER_ARGS="${DOCKER_ARGS} --build-arg MORPHEUS_ROOT_HOST=${MORPHEUS_ROOT_HOST}" DOCKER_ARGS="${DOCKER_ARGS} --build-arg MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA}" DOCKER_ARGS="${DOCKER_ARGS} --build-arg MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM}" +DOCKER_ARGS="${DOCKER_ARGS} --build-arg MORPHEUS_BUILD_MORPHEUS_DFP=${MORPHEUS_BUILD_MORPHEUS_DFP}" DOCKER_ARGS="${DOCKER_ARGS} --build-arg PYTHON_VER=${PYTHON_VER}" DOCKER_ARGS="${DOCKER_ARGS} --network=host" diff --git a/docker/run_container_release.sh b/docker/run_container_release.sh index 42575d394e..dce2132b1a 100755 --- a/docker/run_container_release.sh +++ b/docker/run_container_release.sh @@ -29,6 +29,7 @@ pushd ${SCRIPT_DIR} &> /dev/null MORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF} MORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-ON} +MORPHEUS_BUILD_MORPHEUS_DFP=${MORPHEUS_BUILD_MORPHEUS_DFP:-ON} DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME:-"nvcr.io/nvidia/morpheus/morpheus"} DOCKER_IMAGE_TAG=${DOCKER_IMAGE_TAG:-"$(git describe --tags --abbrev=0)-runtime"} diff --git a/docs/README.md b/docs/README.md index 469303430e..4fe4c43e58 100644 --- a/docs/README.md +++ b/docs/README.md @@ -22,7 +22,7 @@ Additional packages required for building the documentation are defined in `./co ## Install Additional Dependencies From the root of the Morpheus repo: ```bash -conda env update --solver=libmamba -n morpheus --file conda/environments/dev_cuda-121_arch-x86_64.yaml --prune +conda env update --solver=libmamba -n morpheus --file conda/environments/dev_cuda-125_arch-x86_64.yaml --prune ``` ## Build Morpheus and Documentation diff --git a/docs/source/developer_guide/contributing.md b/docs/source/developer_guide/contributing.md index 38439e9d43..b31edbfc64 100644 --- a/docs/source/developer_guide/contributing.md +++ b/docs/source/developer_guide/contributing.md @@ -160,15 +160,7 @@ Note: These instructions assume the user is using `mamba` instead of `conda` sin - Volta architecture GPU or better - [CUDA 12.1](https://developer.nvidia.com/cuda-12-1-0-download-archive) - `conda` and `mamba` - - Refer to the [Getting Started Guide](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) if `conda` is not already installed - - Install `mamba`: - - ```bash - conda activate base - conda install -c conda-forge mamba - ``` - - - **Note:** `mamba` should only be installed once in the base environment + - If `conda` and `mamba` are not installed, we recommend using the MiniForge install guide which is located [here](https://github.com/conda-forge/miniforge). This will install both `conda` and `mamba` and set the channel default to use `conda-forge`. 1. Set up environment variables and clone the repo: ```bash @@ -177,7 +169,7 @@ Note: These instructions assume the user is using `mamba` instead of `conda` sin cd $MORPHEUS_ROOT ``` -1. Ensure all submodules are checked out: +2. Ensure all submodules are checked out: ```bash git submodule update --init --recursive @@ -185,7 +177,7 @@ git submodule update --init --recursive 1. Create the Morpheus Conda environment ```bash - conda env create --solver=libmamba -n morpheus --file conda/environments/dev_cuda-121_arch-x86_64.yaml + conda env create --solver=libmamba -n morpheus --file conda/environments/dev_cuda-125_arch-x86_64.yaml conda activate morpheus ``` @@ -199,6 +191,7 @@ git submodule update --init --recursive ```bash pip install -e ${MORPHEUS_ROOT}/python/morpheus pip install -e ${MORPHEUS_ROOT}/python/morpheus_llm + pip install -e ${MORPHEUS_ROOT}/python/morpheus_dfp ``` Once Morpheus has been built, it can be installed into the current virtual environment. 1. Test the build (Note: some tests will be skipped)\ diff --git a/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md b/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md index fa3a37dd61..4f60bcf155 100644 --- a/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md +++ b/docs/source/developer_guide/guides/6_digital_fingerprinting_reference.md @@ -209,7 +209,7 @@ For input files containing an ISO 8601 formatted date string the `iso_date_regex from functools import partial from morpheus.utils.file_utils import date_extractor -from dfp.utils.regex_utils import iso_date_regex +from morpheus_dfp.utils.regex_utils import iso_date_regex ``` ```python # Batch files into buckets by time. Use the default ISO date extractor from the filename diff --git a/examples/developer_guide/3_simple_cpp_stage/README.md b/examples/developer_guide/3_simple_cpp_stage/README.md index 6e62534325..51573b0ad4 100644 --- a/examples/developer_guide/3_simple_cpp_stage/README.md +++ b/examples/developer_guide/3_simple_cpp_stage/README.md @@ -21,5 +21,5 @@ limitations under the License. |-------------|-----------|-------| | Conda | ✔ | | | Morpheus Docker Container | ✔ | | -| Morpheus Release Container | ✔ | Requires adding development packages to the container's Conda environment via `conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/dev_cuda-121_arch-x86_64.yaml` | +| Morpheus Release Container | ✔ | Requires adding development packages to the container's Conda environment via `conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/dev_cuda-125_arch-x86_64.yaml` | | Dev Container | ✔ | | diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md index 1fba854fde..988381e1c6 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/README.md +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/README.md @@ -25,7 +25,7 @@ This example adds two flags to the `read_simple.py` script. A `--use_cpp` flag w |-------------|-----------|-------| | Conda | ✔ | | | Morpheus Docker Container | ✔ | Requires launching the RabbitMQ container on the host | -| Morpheus Release Container | ✔ | Requires launching the RabbitMQ container on the host, and adding development packages to the container's Conda environment via `conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/dev_cuda-121_arch-x86_64.yaml` | +| Morpheus Release Container | ✔ | Requires launching the RabbitMQ container on the host, and adding development packages to the container's Conda environment via `conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/dev_cuda-125_arch-x86_64.yaml` | | Dev Container | ✘ | | ## Installing Pika diff --git a/examples/digital_fingerprinting/production/.env.sample b/examples/digital_fingerprinting/production/.env.sample index 3fee3685cc..01d7686c65 100644 --- a/examples/digital_fingerprinting/production/.env.sample +++ b/examples/digital_fingerprinting/production/.env.sample @@ -1,10 +1,4 @@ # NOTE: This file should be copied to `.env` in the same folder and updated for each user -MYSQL_DATABASE="db" -MYSQL_USER="mlflow" -MYSQL_PASSWORD="good_password" -MYSQL_ROOT_PASSWORD="even_better_password" -MYSQL_ROOT_HOST="172.*.*.*" -MYSQL_LOG_CONSOLE=1 # Update these with your own credentials UID=$(id -u) GID=$(id -g) UID=1000 diff --git a/examples/digital_fingerprinting/production/Dockerfile b/examples/digital_fingerprinting/production/Dockerfile index 8b9a884da0..d2e330c944 100644 --- a/examples/digital_fingerprinting/production/Dockerfile +++ b/examples/digital_fingerprinting/production/Dockerfile @@ -28,7 +28,7 @@ WORKDIR /workspace/examples/digital_fingerprinting/ # Install DFP dependencies RUN source activate morpheus \ - && /opt/conda/bin/conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/examples_cuda-121_arch-x86_64.yaml + && /opt/conda/bin/conda env update --solver=libmamba -n morpheus --file /workspace/conda/environments/examples_cuda-125_arch-x86_64.yaml # Set the tracking URI for mlflow ENV MLFLOW_TRACKING_URI="http://mlflow:5000" diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py b/examples/digital_fingerprinting/production/dfp_azure_pipeline.py similarity index 96% rename from examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py rename to examples/digital_fingerprinting/production/dfp_azure_pipeline.py index e35c3d5f02..dab4122ebd 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py +++ b/examples/digital_fingerprinting/production/dfp_azure_pipeline.py @@ -24,17 +24,6 @@ import click import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -59,6 +48,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex def _file_type_name_to_enum(file_type: str) -> FileTypes: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py b/examples/digital_fingerprinting/production/dfp_duo_pipeline.py similarity index 95% rename from examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py rename to examples/digital_fingerprinting/production/dfp_duo_pipeline.py index 4f8333d632..c1e3e00495 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py +++ b/examples/digital_fingerprinting/production/dfp_duo_pipeline.py @@ -24,17 +24,6 @@ import click import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -60,6 +49,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex def _file_type_name_to_enum(file_type: str) -> FileTypes: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py b/examples/digital_fingerprinting/production/dfp_integrated_training_batch_pipeline.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py rename to examples/digital_fingerprinting/production/dfp_integrated_training_batch_pipeline.py index 5cd551055d..5e857929f7 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py +++ b/examples/digital_fingerprinting/production/dfp_integrated_training_batch_pipeline.py @@ -17,23 +17,23 @@ from datetime import datetime import click -# When segment modules are imported, they're added to the module registry. -# To avoid flake8 warnings about unused code, the noqa flag is used during import -import dfp.modules # noqa: F401 # pylint:disable=unused-import -from dfp.utils.config_generator import ConfigGenerator -from dfp.utils.config_generator import generate_ae_config -from dfp.utils.dfp_arg_parser import DFPArgParser -from dfp.utils.schema_utils import Schema -from dfp.utils.schema_utils import SchemaBuilder import morpheus.loaders # noqa: F401 # pylint:disable=unused-import import morpheus.modules # noqa: F401 # pylint:disable=unused-import +# When segment modules are imported, they're added to the module registry. +# To avoid flake8 warnings about unused code, the noqa flag is used during import +import morpheus_dfp.modules # noqa: F401 # pylint:disable=unused-import from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import parse_log_level from morpheus.config import Config from morpheus.pipeline.pipeline import Pipeline from morpheus.stages.general.multi_port_modules_stage import MultiPortModulesStage from morpheus.stages.input.control_message_file_source_stage import ControlMessageFileSourceStage +from morpheus_dfp.utils.config_generator import ConfigGenerator +from morpheus_dfp.utils.config_generator import generate_ae_config +from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser +from morpheus_dfp.utils.schema_utils import Schema +from morpheus_dfp.utils.schema_utils import SchemaBuilder @click.command() diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py b/examples/digital_fingerprinting/production/dfp_integrated_training_streaming_pipeline.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py rename to examples/digital_fingerprinting/production/dfp_integrated_training_streaming_pipeline.py index 55ebcf71a9..587dc81358 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py +++ b/examples/digital_fingerprinting/production/dfp_integrated_training_streaming_pipeline.py @@ -17,23 +17,23 @@ from datetime import datetime import click -# When segment modules are imported, they're added to the module registry. -# To avoid flake8 warnings about unused code, the noqa flag is used during import. -import dfp.modules # noqa: F401 # pylint:disable=unused-import -from dfp.utils.config_generator import ConfigGenerator -from dfp.utils.config_generator import generate_ae_config -from dfp.utils.dfp_arg_parser import DFPArgParser -from dfp.utils.schema_utils import Schema -from dfp.utils.schema_utils import SchemaBuilder import morpheus.loaders # noqa: F401 # pylint:disable=unused-import import morpheus.modules # noqa: F401 # pylint:disable=unused-import +# When segment modules are imported, they're added to the module registry. +# To avoid flake8 warnings about unused code, the noqa flag is used during import. +import morpheus_dfp.modules # noqa: F401 # pylint:disable=unused-import from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import parse_log_level from morpheus.config import Config from morpheus.pipeline.pipeline import Pipeline from morpheus.stages.general.multi_port_modules_stage import MultiPortModulesStage from morpheus.stages.input.control_message_kafka_source_stage import ControlMessageKafkaSourceStage +from morpheus_dfp.utils.config_generator import ConfigGenerator +from morpheus_dfp.utils.config_generator import generate_ae_config +from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser +from morpheus_dfp.utils.schema_utils import Schema +from morpheus_dfp.utils.schema_utils import SchemaBuilder @click.command() diff --git a/examples/digital_fingerprinting/production/grafana/run.py b/examples/digital_fingerprinting/production/grafana/run.py index 2bb7ade0e4..47d8e927d5 100644 --- a/examples/digital_fingerprinting/production/grafana/run.py +++ b/examples/digital_fingerprinting/production/grafana/run.py @@ -26,17 +26,6 @@ import logging_loki import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -61,6 +50,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex def _file_type_name_to_enum(file_type: str) -> FileTypes: diff --git a/examples/digital_fingerprinting/production/mlflow/Dockerfile b/examples/digital_fingerprinting/production/mlflow/Dockerfile index 7398ee0c0c..284f46f28b 100644 --- a/examples/digital_fingerprinting/production/mlflow/Dockerfile +++ b/examples/digital_fingerprinting/production/mlflow/Dockerfile @@ -24,7 +24,7 @@ RUN apt update && \ rm -rf /var/cache/apt/* /var/lib/apt/lists/* # Install python packages -RUN pip install "mlflow >=2.10.0,<3" boto3 pymysql pyyaml +RUN pip install "mlflow >=2.10.0,<3" boto3 pyyaml # We run on port 5000 EXPOSE 5000 diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md index b002e7fa47..e43755094e 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/README.md @@ -47,7 +47,7 @@ Install additional required dependencies: ```bash mamba env update \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-121_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml ``` diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py index 734e3e17ed..480893e8b8 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/benchmark_conf_generator.py @@ -25,10 +25,11 @@ import mlflow import pandas as pd -from dfp.utils.config_generator import ConfigGenerator -from dfp.utils.config_generator import generate_ae_config -from dfp.utils.dfp_arg_parser import DFPArgParser -from dfp.utils.schema_utils import SchemaBuilder + +from morpheus_dfp.utils.config_generator import ConfigGenerator +from morpheus_dfp.utils.config_generator import generate_ae_config +from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser +from morpheus_dfp.utils.schema_utils import SchemaBuilder logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py index 9583821290..051e3b7f25 100644 --- a/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/benchmarks/test_bench_e2e_dfp_pipeline.py @@ -20,23 +20,11 @@ import typing import boto3 -import dfp.modules # noqa: F401 # pylint:disable=unused-import import pytest -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex -from dfp.utils.schema_utils import Schema import morpheus.loaders # noqa: F401 # pylint:disable=unused-import import morpheus.modules # noqa: F401 # pylint:disable=unused-import +import morpheus_dfp.modules # noqa: F401 # pylint:disable=unused-import from benchmarks.benchmark_conf_generator import BenchmarkConfGenerator from benchmarks.benchmark_conf_generator import load_json from benchmarks.benchmark_conf_generator import set_mlflow_tracking_uri @@ -53,6 +41,18 @@ from morpheus.utils.column_info import DataFrameInputSchema from morpheus.utils.file_utils import date_extractor from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex +from morpheus_dfp.utils.schema_utils import Schema logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb index 1b40052f04..39be0c336f 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_inference.ipynb @@ -47,15 +47,15 @@ "from datetime import datetime\n", "from functools import partial\n", "\n", - "from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", - "from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", - "from dfp.stages.dfp_inference_stage import DFPInferenceStage\n", - "from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage\n", - "from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", - "from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", - "from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", - "from dfp.stages.multi_file_source import MultiFileSource\n", - "from dfp.utils.regex_utils import iso_date_regex\n", + "from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", + "from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", + "from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage\n", + "from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage\n", + "from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", + "from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", + "from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", + "from morpheus_dfp.stages.multi_file_source import MultiFileSource\n", + "from morpheus_dfp.utils.regex_utils import iso_date_regex\n", "\n", "from morpheus.common import FileTypes\n", "from morpheus.common import FilterSource\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_integrated_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_integrated_training.ipynb index 75c14999a6..0002a318b8 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_integrated_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_integrated_training.ipynb @@ -45,12 +45,12 @@ "\n", "# When segment modules are imported, they're added to the module registry.\n", "# To avoid flake8 warnings about unused code, the noqa flag is used during import.\n", - "import dfp.modules # noqa: F401\n", - "from dfp.utils.config_generator import ConfigGenerator\n", - "from dfp.utils.config_generator import generate_ae_config\n", - "from dfp.utils.dfp_arg_parser import DFPArgParser\n", - "from dfp.utils.schema_utils import Schema\n", - "from dfp.utils.schema_utils import SchemaBuilder\n", + "import morpheus_dfp.modules # noqa: F401\n", + "from morpheus_dfp.utils.config_generator import ConfigGenerator\n", + "from morpheus_dfp.utils.config_generator import generate_ae_config\n", + "from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser\n", + "from morpheus_dfp.utils.schema_utils import Schema\n", + "from morpheus_dfp.utils.schema_utils import SchemaBuilder\n", "\n", "import morpheus.loaders # noqa: F401\n", "import morpheus.modules # noqa: F401\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb index acb759341b..4547dea6e9 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_azure_training.ipynb @@ -45,15 +45,15 @@ "import typing\n", "from datetime import datetime\n", "\n", - "from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", - "from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", - "from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage\n", - "from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", - "from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", - "from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", - "from dfp.stages.dfp_training import DFPTraining\n", - "from dfp.stages.multi_file_source import MultiFileSource\n", - "from dfp.utils.regex_utils import iso_date_regex\n", + "from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", + "from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", + "from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage\n", + "from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", + "from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", + "from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", + "from morpheus_dfp.stages.dfp_training import DFPTraining\n", + "from morpheus_dfp.stages.multi_file_source import MultiFileSource\n", + "from morpheus_dfp.utils.regex_utils import iso_date_regex\n", "\n", "from morpheus.common import FileTypes\n", "from morpheus.cli.utils import get_log_levels\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb index c2b126d2cc..675952b652 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_inference.ipynb @@ -45,15 +45,15 @@ "import typing\n", "from datetime import datetime\n", "\n", - "from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", - "from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", - "from dfp.stages.dfp_inference_stage import DFPInferenceStage\n", - "from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage\n", - "from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", - "from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", - "from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", - "from dfp.stages.multi_file_source import MultiFileSource\n", - "from dfp.utils.regex_utils import iso_date_regex\n", + "from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", + "from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", + "from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage\n", + "from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage\n", + "from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", + "from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", + "from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", + "from morpheus_dfp.stages.multi_file_source import MultiFileSource\n", + "from morpheus_dfp.utils.regex_utils import iso_date_regex\n", "\n", "from morpheus.common import FileTypes\n", "from morpheus.common import FilterSource\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_integrated_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_integrated_training.ipynb index db57a85a1e..086786e9a1 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_integrated_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_integrated_training.ipynb @@ -45,14 +45,14 @@ "\n", "# When segment modules are imported, they're added to the module registry.\n", "# To avoid flake8 warnings about unused code, the noqa flag is used during import.\n", - "import dfp.modules # noqa: F401\n", + "import morpheus_dfp.modules # noqa: F401\n", "from morpheus import modules # noqa: F401\n", "from morpheus import loaders # noqa: F401\n", - "from dfp.utils.config_generator import ConfigGenerator\n", - "from dfp.utils.config_generator import generate_ae_config\n", - "from dfp.utils.dfp_arg_parser import DFPArgParser\n", - "from dfp.utils.schema_utils import Schema\n", - "from dfp.utils.schema_utils import SchemaBuilder\n", + "from morpheus_dfp.utils.config_generator import ConfigGenerator\n", + "from morpheus_dfp.utils.config_generator import generate_ae_config\n", + "from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser\n", + "from morpheus_dfp.utils.schema_utils import Schema\n", + "from morpheus_dfp.utils.schema_utils import SchemaBuilder\n", "\n", "import morpheus.loaders # noqa: F401\n", "import morpheus.modules # noqa: F401\n", diff --git a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb index 598903af35..a0a30e2c07 100644 --- a/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb +++ b/examples/digital_fingerprinting/production/morpheus/notebooks/dfp_duo_training.ipynb @@ -45,15 +45,15 @@ "import typing\n", "from datetime import datetime\n", "\n", - "from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", - "from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", - "from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage\n", - "from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", - "from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", - "from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", - "from dfp.stages.dfp_training import DFPTraining\n", - "from dfp.stages.multi_file_source import MultiFileSource\n", - "from dfp.utils.regex_utils import iso_date_regex\n", + "from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage\n", + "from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage\n", + "from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage\n", + "from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage\n", + "from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage\n", + "from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage\n", + "from morpheus_dfp.stages.dfp_training import DFPTraining\n", + "from morpheus_dfp.stages.multi_file_source import MultiFileSource\n", + "from morpheus_dfp.utils.regex_utils import iso_date_regex\n", "\n", "from morpheus.common import FileTypes\n", "from morpheus.cli.utils import get_log_levels\n", diff --git a/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py b/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py index afa9706ff7..09d6304042 100644 --- a/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py +++ b/examples/digital_fingerprinting/visualization/dfp_viz_azure_pipeline.py @@ -23,17 +23,6 @@ import click import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -54,6 +43,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex @click.command() diff --git a/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py b/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py index 28f36995c1..f039644b77 100644 --- a/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py +++ b/examples/digital_fingerprinting/visualization/dfp_viz_duo_pipeline.py @@ -23,17 +23,6 @@ import click import mlflow import pandas as pd -from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage -from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage -from dfp.stages.dfp_inference_stage import DFPInferenceStage -from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage -from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage -from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage -from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage -from dfp.stages.dfp_training import DFPTraining -from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage -from dfp.stages.multi_file_source import MultiFileSource -from dfp.utils.regex_utils import iso_date_regex from morpheus.cli.utils import get_log_levels from morpheus.cli.utils import get_package_relative_file @@ -55,6 +44,17 @@ from morpheus.utils.file_utils import date_extractor from morpheus.utils.file_utils import load_labels_file from morpheus.utils.logger import configure_logging +from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage +from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage +from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage +from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage +from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage +from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage +from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage +from morpheus_dfp.stages.dfp_training import DFPTraining +from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage +from morpheus_dfp.stages.multi_file_source import MultiFileSource +from morpheus_dfp.utils.regex_utils import iso_date_regex @click.command() diff --git a/examples/doca/vdb_realtime/README.md b/examples/doca/vdb_realtime/README.md index 6633957106..64dabdb459 100644 --- a/examples/doca/vdb_realtime/README.md +++ b/examples/doca/vdb_realtime/README.md @@ -98,7 +98,7 @@ export NGC_API_KEY="" Then install basic requirements: ```bash -conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-121_arch-x86_64.yaml --prune +conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-x86_64.yaml --prune ``` Run the RAG example to query the Milvus database: diff --git a/examples/gnn_fraud_detection_pipeline/README.md b/examples/gnn_fraud_detection_pipeline/README.md index 8bb1ab1570..3945eced97 100644 --- a/examples/gnn_fraud_detection_pipeline/README.md +++ b/examples/gnn_fraud_detection_pipeline/README.md @@ -17,7 +17,7 @@ limitations under the License. # GNN Fraud Detection Pipeline ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-121_arch-x86_64.yaml` or `conda/environments/examples_cuda-121_arch-x86_64.yaml` environment files. Refer to the [Requirements](#requirements) section for more information. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. Refer to the [Requirements](#requirements) section for more information. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | @@ -32,7 +32,7 @@ Prior to running the GNN fraud detection pipeline, additional requirements must ```bash mamba env update \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-121_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml ``` ## Running diff --git a/examples/gnn_fraud_detection_pipeline/stages/classification_stage.py b/examples/gnn_fraud_detection_pipeline/stages/classification_stage.py index 013034dcef..3a5845f6e6 100644 --- a/examples/gnn_fraud_detection_pipeline/stages/classification_stage.py +++ b/examples/gnn_fraud_detection_pipeline/stages/classification_stage.py @@ -46,7 +46,7 @@ def __init__(self, c: Config, model_xgb_file: str): super().__init__(c) - self._xgb_model = cuml.ForestInference.load(model_xgb_file, output_class=True) + self._xgb_model = cuml.ForestInference.load(model_xgb_file, output_class=True, model_type="xgboost") self._needed_columns.update({'node_id': TypeId.INT64, 'prediction': TypeId.FLOAT32}) @property diff --git a/examples/llm/agents/README.md b/examples/llm/agents/README.md index d8c2944b26..2721452a93 100644 --- a/examples/llm/agents/README.md +++ b/examples/llm/agents/README.md @@ -35,7 +35,7 @@ limitations under the License. - [Run example (Kafka Pipeline)](#run-example-kafka-pipeline) ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-121_arch-x86_64.yaml` or `conda/environments/examples_cuda-121_arch-x86_64.yaml` environment files. Refer to the [Install Dependencies](#install-dependencies) section for more information. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. Refer to the [Install Dependencies](#install-dependencies) section for more information. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | @@ -106,7 +106,7 @@ Install the required dependencies. ```bash mamba env update \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-121_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml ``` diff --git a/examples/llm/completion/README.md b/examples/llm/completion/README.md index c619546c47..e72ffe1ce6 100644 --- a/examples/llm/completion/README.md +++ b/examples/llm/completion/README.md @@ -31,7 +31,7 @@ limitations under the License. - [Running the Morpheus Pipeline](#running-the-morpheus-pipeline) ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-121_arch-x86_64.yaml` or `conda/environments/examples_cuda-121_arch-x86_64.yaml` environment files. Refer to the [Install Dependencies](#install-dependencies) section for more information. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. Refer to the [Install Dependencies](#install-dependencies) section for more information. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | @@ -80,7 +80,7 @@ Install the required dependencies. ```bash mamba env update \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-121_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml ``` diff --git a/examples/llm/rag/README.md b/examples/llm/rag/README.md index f21531ac05..f7c0863b5e 100644 --- a/examples/llm/rag/README.md +++ b/examples/llm/rag/README.md @@ -18,7 +18,7 @@ limitations under the License. # Retrieval Augmented Generation (RAG) Pipeline ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-121_arch-x86_64.yaml` or `conda/environments/examples_cuda-121_arch-x86_64.yaml` environment files. This example also requires the [VDB upload](../vdb_upload/README.md) pipeline to have been run previously. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. This example also requires the [VDB upload](../vdb_upload/README.md) pipeline to have been run previously. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | diff --git a/examples/llm/vdb_upload/README.md b/examples/llm/vdb_upload/README.md index 7348a9cde6..b892de2159 100644 --- a/examples/llm/vdb_upload/README.md +++ b/examples/llm/vdb_upload/README.md @@ -34,7 +34,7 @@ limitations under the License. - [Exporting and Deploying a Different Model from Hugging Face](#exporting-and-deploying-a-different-model-from-hugging-face) ## Supported Environments -All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-121_arch-x86_64.yaml` or `conda/environments/examples_cuda-121_arch-x86_64.yaml` environment files. +All environments require additional Conda packages which can be installed with either the `conda/environments/all_cuda-125_arch-x86_64.yaml` or `conda/environments/examples_cuda-125_arch-x86_64.yaml` environment files. | Environment | Supported | Notes | |-------------|-----------|-------| | Conda | ✔ | | diff --git a/external/utilities b/external/utilities index fb2c9503fb..85f8f7af2e 160000 --- a/external/utilities +++ b/external/utilities @@ -1 +1 @@ -Subproject commit fb2c9503fbfdd08503013f712b8bc1e4d9869933 +Subproject commit 85f8f7af2e8d9bc7bde978cd40c40297b1116957 diff --git a/manifest.yaml b/manifest.yaml index f81e333671..8646890c13 100644 --- a/manifest.yaml +++ b/manifest.yaml @@ -23,7 +23,7 @@ repos: -DMORPHEUS_PYTHON_INPLACE_BUILD=ON python: - name: morpheus - sub_dir: "" + sub_dir: python/morpheus git: host: github tag: branch-24.10 diff --git a/models/training-tuning-scripts/fraud-detection-models/README.md b/models/training-tuning-scripts/fraud-detection-models/README.md index 025d871ceb..219121b91a 100644 --- a/models/training-tuning-scripts/fraud-detection-models/README.md +++ b/models/training-tuning-scripts/fraud-detection-models/README.md @@ -26,7 +26,7 @@ Install packages for training GNN model. ```bash mamba env update \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/model-utils-121_arch-x86_64.yaml + --file ./conda/environments/model-utils-125_arch-x86_64.yaml ``` ### Options for training and tuning models. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b41fabadf0..1a90518cbe 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -15,10 +15,16 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "python") -add_subdirectory(morpheus) +if (MORPHEUS_BUILD_MORPHEUS_CORE) + add_subdirectory(morpheus) +endif() if (MORPHEUS_BUILD_MORPHEUS_LLM) -add_subdirectory(morpheus_llm) + add_subdirectory(morpheus_llm) +endif() + +if (MORPHEUS_BUILD_MORPHEUS_DFP) + add_subdirectory(morpheus_dfp) endif() list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/python/morpheus/morpheus/_lib/cmake/libmorpheus.cmake b/python/morpheus/morpheus/_lib/cmake/libmorpheus.cmake index 62c33b96c4..7d07b41bbd 100644 --- a/python/morpheus/morpheus/_lib/cmake/libmorpheus.cmake +++ b/python/morpheus/morpheus/_lib/cmake/libmorpheus.cmake @@ -83,8 +83,9 @@ target_link_libraries(morpheus $<$:ZLIB::ZLIB> PUBLIC $ - cudf::cudf CUDA::nvtx3 + cudf::cudf + glog::glog mrc::pymrc RDKAFKA::RDKAFKA TritonClient::httpclient_static diff --git a/python/morpheus/morpheus/_lib/cudf_helpers.pyx b/python/morpheus/morpheus/_lib/cudf_helpers.pyx index 0940fd8f18..2345978b92 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers.pyx +++ b/python/morpheus/morpheus/_lib/cudf_helpers.pyx @@ -19,18 +19,32 @@ from cudf.core.dtypes import StructDtype from libcpp.string cimport string from libcpp.utility cimport move from libcpp.vector cimport vector +from pylibcudf.libcudf.io.types cimport column_name_info +from pylibcudf.libcudf.io.types cimport table_metadata +from pylibcudf.libcudf.io.types cimport table_with_metadata +from pylibcudf.libcudf.table.table_view cimport table_view +from pylibcudf.libcudf.types cimport size_type from cudf._lib.column cimport Column -from cudf._lib.cpp.io.types cimport column_name_info -from cudf._lib.cpp.io.types cimport table_metadata -from cudf._lib.cpp.io.types cimport table_with_metadata -from cudf._lib.cpp.table.table_view cimport table_view -from cudf._lib.cpp.types cimport size_type from cudf._lib.utils cimport data_from_unique_ptr -from cudf._lib.utils cimport get_column_names from cudf._lib.utils cimport table_view_from_table +cdef vector[string] get_column_names(object tbl, object index): + cdef vector[string] column_names + if index is not False: + if isinstance(tbl._index, cudf.core.multiindex.MultiIndex): + for idx_name in tbl._index.names: + column_names.push_back(str.encode(idx_name)) + else: + if tbl._index.name is not None: + column_names.push_back(str.encode(tbl._index.name)) + + for col_name in tbl._column_names: + column_names.push_back(str.encode(col_name)) + + return column_names + cdef extern from "morpheus/objects/table_info.hpp" namespace "morpheus" nogil: diff --git a/python/morpheus/morpheus/_lib/src/utilities/matx_util.cu b/python/morpheus/morpheus/_lib/src/utilities/matx_util.cu index a1dc626242..b5bf6c6b22 100644 --- a/python/morpheus/morpheus/_lib/src/utilities/matx_util.cu +++ b/python/morpheus/morpheus/_lib/src/utilities/matx_util.cu @@ -274,7 +274,7 @@ struct MatxUtil__MatxThreshold auto output_tensor = matx::make_tensor(static_cast(output_data), output_shape); // Convert max value to bool - (output_tensor = matx::rmax(input_tensor, {1}) > (InputT)threshold).run(stream.value()); + (output_tensor = matx::max(input_tensor, {1}) > (InputT)threshold).run(stream.value()); } /** @@ -362,7 +362,7 @@ struct MatxUtil__MatxReduceMax auto output_slice = output_tensor.template Slice<1>({output_idx, 0}, {matx::matxDropDim, matx::matxEnd}); - (output_slice = matx::rmax(input_slice.Permute({1, 0}))).run(stream.value()); + (output_slice = matx::max(input_slice.Permute({1, 0}))).run(stream.value()); } }; } // namespace diff --git a/python/morpheus/morpheus/_lib/tests/CMakeLists.txt b/python/morpheus/morpheus/_lib/tests/CMakeLists.txt index 0dab45cbc6..215b595576 100644 --- a/python/morpheus/morpheus/_lib/tests/CMakeLists.txt +++ b/python/morpheus/morpheus/_lib/tests/CMakeLists.txt @@ -26,6 +26,7 @@ add_executable(test_cuda target_link_libraries(test_cuda PRIVATE + glog::glog GTest::gtest GTest::gtest_main matx::matx diff --git a/python/morpheus/morpheus/controllers/mlflow_model_writer_controller.py b/python/morpheus/morpheus/controllers/mlflow_model_writer_controller.py index 2f81401e94..8bc1be6829 100644 --- a/python/morpheus/morpheus/controllers/mlflow_model_writer_controller.py +++ b/python/morpheus/morpheus/controllers/mlflow_model_writer_controller.py @@ -19,6 +19,8 @@ import urllib.parse import mlflow +import mlflow.types +import pandas as pd import requests from mlflow.exceptions import MlflowException from mlflow.models.signature import ModelSignature @@ -28,7 +30,7 @@ from mlflow.tracking import MlflowClient from mlflow.types import ColSpec from mlflow.types import Schema -from mlflow.types.utils import _infer_pandas_column +from mlflow.types.utils import _infer_pandas_column as _mlflow_infer_pandas_column from mlflow.types.utils import _infer_schema import cudf @@ -39,6 +41,19 @@ logger = logging.getLogger(__name__) +def _infer_pandas_column(col: pd.Series): + try: + return _mlflow_infer_pandas_column(col) + except MlflowException as ex: + # mlflow internally calls `pd.api.types.is_string_dtype(col)` which was changed in Pandas 2. + # passing the series such as `[None]` will return False when the dtype is string, so + # it is necessary to pass the dtype of the series rather than the series itself for backwards + # compatibility. + if "Failed to infer schema for pandas.Series" in ex.message and pd.api.types.is_string_dtype(col.dtype): + return mlflow.types.DataType.string + raise ex + + class MLFlowModelWriterController: """ Controller class for writing machine learning models to MLflow with optional permissions and configurations. diff --git a/python/morpheus/morpheus/parsers/event_parser.py b/python/morpheus/morpheus/parsers/event_parser.py index 998232c130..a82785b48a 100644 --- a/python/morpheus/morpheus/parsers/event_parser.py +++ b/python/morpheus/morpheus/parsers/event_parser.py @@ -98,7 +98,7 @@ def parse_raw_event(self, text: cudf.Series, event_regex: typing.Dict[str, any]) # Applies regex pattern for each expected output column to raw data for col in event_specific_columns: regex_pattern = event_regex.get(col) - extracted_gdf = text.str.extract(regex_pattern) + extracted_gdf = text.str.extract(regex_pattern).reset_index() if not extracted_gdf.empty: parsed_gdf[col] = extracted_gdf[0] diff --git a/python/morpheus/morpheus/parsers/ip.py b/python/morpheus/morpheus/parsers/ip.py index 814d46f9dd..1fcb75ee81 100644 --- a/python/morpheus/morpheus/parsers/ip.py +++ b/python/morpheus/morpheus/parsers/ip.py @@ -39,9 +39,9 @@ def ip_to_int(values): >>> ip.ip_to_int(cudf.Series(["192.168.0.1","10.0.0.1"])) 0 3232235521 1 167772161 - dtype: int64 + dtype: uint32 """ - return cudf.Series(values.str.ip2int()) + return values.str.ip2int() def int_to_ip(values): @@ -52,7 +52,7 @@ def int_to_ip(values): Parameters ---------- values : cudf.Series - Integer representations of IP addresses + uint32 representations of IP addresses Returns ------- @@ -63,12 +63,12 @@ def int_to_ip(values): -------- >>> import morpheus.parsers.ip as ip >>> import cudf - >>> ip.int_to_ip(cudf.Series([3232235521, 167772161])) + >>> ip.int_to_ip(cudf.Series([3232235521, 167772161], dtype=cudf.api.types.dtype("uint32"))) 0 192.168.0.1 1 10.0.0.1 dtype: object """ - return cudf.Series(values._column.int2ip()) + return cudf.Series._from_column(values._column.int2ip()) def is_ip(ips: str): diff --git a/python/morpheus/morpheus/stages/general/multi_processing_stage.py b/python/morpheus/morpheus/stages/general/multi_processing_stage.py new file mode 100644 index 0000000000..f96939841a --- /dev/null +++ b/python/morpheus/morpheus/stages/general/multi_processing_stage.py @@ -0,0 +1,200 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import inspect +import typing +from abc import abstractmethod + +import mrc +import mrc.core.operators as ops + +from morpheus.config import Config +from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema +from morpheus.utils.shared_process_pool import SharedProcessPool + +InputT = typing.TypeVar('InputT') +OutputT = typing.TypeVar('OutputT') + + +class MultiProcessingBaseStage(SinglePortStage, typing.Generic[InputT, OutputT]): + + def __init__(self, *, c: Config, process_pool_usage: float, max_in_flight_messages: int = None): + super().__init__(c=c) + + if not 0 <= process_pool_usage <= 1: + raise ValueError("process_pool_usage must be between 0 and 1.") + self._process_pool_usage = process_pool_usage + + self._shared_process_pool = SharedProcessPool() + self._shared_process_pool.wait_until_ready() + + if max_in_flight_messages is None: + # set the multiplier to 1.5 to keep the workers busy + self._max_in_flight_messages = int(self._shared_process_pool.total_max_workers * 1.5) + else: + self._max_in_flight_messages = max_in_flight_messages + + def accepted_types(self) -> typing.Tuple: + """ + There are two approaches to inherit from this class: + - With generic types: MultiProcessingDerivedStage(MultiProcessingBaseStage[InputT, OutputT]) + - With concrete types: MultiProcessingDerivedStage(MultiProcessingBaseStage[int, str]) + + When inheriting with generic types, the derived class can be instantiated like this: + + stage = MultiProcessingDerivedStage[int, str]() + + In this case, typing.Generic stores the stage type in stage.__orig_class__, the concrete types can be accessed + as below: + + input_type = typing.get_args(stage.__orig_class__)[0] # int + output_type = typing.get_args(stage.__orig_class__)[1] # str + + However, when instantiating a stage which inherits with concrete types: + + stage = MultiProcessingDerivedStage() + + The stage instance does not have __orig_class__ attribute (since it is not a generic type). Thus, the concrete + types need be retrieved from its base class (which is a generic type): + + input_type = typing.get_args(stage.__orig_bases__[0])[0] # int + output_type = typing.get_args(stage.__orig_bases__[0])[1] # str + + Raises: + RuntimeError: if the accepted cannot be deducted from either __orig_class__ or __orig_bases__ + + Returns: + typing.Tuple: accepted input types + """ + if hasattr(self, "__orig_class__"): + # inherited with generic types + input_type = typing.get_args(self.__orig_class__)[0] # pylint: disable=no-member + + elif hasattr(self, "__orig_bases__"): + # inherited with concrete types + input_type = typing.get_args(self.__orig_bases__[0])[0] # pylint: disable=no-member + + else: + raise RuntimeError("Could not deduct input type") + + return (input_type, ) + + def compute_schema(self, schema: StageSchema): + """ + See the comment on `accepted_types` for more information on accessing the input and output types. + + Args: + schema (StageSchema): StageSchema + + Raises: + RuntimeError: if the output type cannot be deducted from either __orig_class__ or __orig_bases__ + """ + if hasattr(self, "__orig_class__"): + # inherited with abstract types + output_type = typing.get_args(self.__orig_class__)[1] # pylint: disable=no-member + + elif hasattr(self, "__orig_bases__"): + # inherited with concrete types + output_type = typing.get_args(self.__orig_bases__[0])[1] + + else: + raise RuntimeError("Could not deduct output type") + + schema.output_schema.set_type(output_type) + + def supports_cpp_node(self): + return False + + @abstractmethod + def _on_data(self, data: InputT) -> OutputT: + pass + + def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: + node = builder.make_node(self.name, ops.map(self._on_data)) + node.launch_options.pe_count = self._max_in_flight_messages + + builder.make_edge(input_node, node) + + return node + + +def _get_func_signature(func: typing.Callable[[InputT], OutputT]) -> tuple[type, type]: + signature = inspect.signature(func) + + if isinstance(func, functools.partial): + # If the function is a partial, find the type of the first unbound argument + bound_args = func.keywords + input_arg = None + + for param in signature.parameters.values(): + if param.name not in bound_args: + if input_arg is not None: + raise ValueError("Found more than one unbound arguments in partial function") + input_arg = param + + if input_arg is None: + raise ValueError("Cannot find unbound argument in partial function") + input_t = input_arg.annotation + + else: + if len(signature.parameters) != 1: + raise ValueError("Function must have exactly one argument") + + input_t = next(iter(signature.parameters.values())).annotation + + output_t = signature.return_annotation + + return (input_t, output_t) + + +class MultiProcessingStage(MultiProcessingBaseStage[InputT, OutputT]): + + def __init__(self, + *, + c: Config, + unique_name: str, + process_fn: typing.Callable[[InputT], OutputT], + process_pool_usage: float, + max_in_flight_messages: int = None): + super().__init__(c=c, process_pool_usage=process_pool_usage, max_in_flight_messages=max_in_flight_messages) + + self._name = unique_name + self._process_fn = process_fn + self._shared_process_pool.set_usage(self.name, self._process_pool_usage) + + @property + def name(self) -> str: + return self._name + + def _on_data(self, data: InputT) -> OutputT: + task = self._shared_process_pool.submit_task(self.name, self._process_fn, data) + result = task.result() + + return result + + @staticmethod + def create(*, + c: Config, + unique_name: str, + process_fn: typing.Callable[[InputT], OutputT], + process_pool_usage: float): + + input_t, output_t = _get_func_signature(process_fn) + return MultiProcessingStage[input_t, output_t](c=c, + unique_name=unique_name, + process_pool_usage=process_pool_usage, + process_fn=process_fn) diff --git a/python/morpheus/morpheus/stages/input/arxiv_source.py b/python/morpheus/morpheus/stages/input/arxiv_source.py index 0e987da229..1e24fb855f 100644 --- a/python/morpheus/morpheus/stages/input/arxiv_source.py +++ b/python/morpheus/morpheus/stages/input/arxiv_source.py @@ -37,7 +37,7 @@ IMPORT_ERROR_MESSAGE = ( "ArxivSource requires additional dependencies to be installed. Install them by running the following command: " "`conda env update --solver=libmamba -n morpheus" - "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune`") + "--file conda/environments/all_cuda-125_arch-x86_64.yaml --prune`") @register_stage("from-arxiv") @@ -47,7 +47,7 @@ class ArxivSource(PreallocatorMixin, SingleOutputSource): This stage requires several additional dependencies to be installed. Install them by running the following command: `conda env update --solver=libmamba -n morpheus " - "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune` + "--file conda/environments/all_cuda-125_arch-x86_64.yaml --prune` Parameters ---------- diff --git a/python/morpheus/morpheus/stages/input/http_server_source_stage.py b/python/morpheus/morpheus/stages/input/http_server_source_stage.py index 2afdd07efe..54e78793fc 100644 --- a/python/morpheus/morpheus/stages/input/http_server_source_stage.py +++ b/python/morpheus/morpheus/stages/input/http_server_source_stage.py @@ -277,7 +277,6 @@ def _generate_frames(self, subscription: mrc.Subscription) -> typing.Iterator[Co or not subscription.is_subscribed()): self._processing = False else: - logger.debug("Queue empty, sleeping ...") time.sleep(self._sleep_time) except Closed: logger.error("Queue closed unexpectedly, shutting down") diff --git a/python/morpheus/morpheus/stages/postprocess/generate_viz_frames_stage.py b/python/morpheus/morpheus/stages/postprocess/generate_viz_frames_stage.py index 97ead48749..7e62870138 100644 --- a/python/morpheus/morpheus/stages/postprocess/generate_viz_frames_stage.py +++ b/python/morpheus/morpheus/stages/postprocess/generate_viz_frames_stage.py @@ -281,8 +281,8 @@ def write_batch(msg: ControlMessage): out_df = cudf.DataFrame() out_df["dt"] = (df["timestamp"] - time0).astype(np.int32) - out_df["src"] = df["src_ip"].str.ip_to_int().astype(np.int32) - out_df["dst"] = df["dest_ip"].str.ip_to_int().astype(np.int32) + out_df["src"] = df["src_ip"].str.ip_to_int().astype(np.uint32) + out_df["dst"] = df["dest_ip"].str.ip_to_int().astype(np.uint32) out_df["lvl"] = df["secret_keys"].astype(np.int32) out_df["data"] = df["data"] diff --git a/python/morpheus/morpheus/utils/shared_process_pool.py b/python/morpheus/morpheus/utils/shared_process_pool.py new file mode 100644 index 0000000000..1abfe06d30 --- /dev/null +++ b/python/morpheus/morpheus/utils/shared_process_pool.py @@ -0,0 +1,434 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import math +import multiprocessing as mp +import os +import queue +import threading +from enum import Enum +from typing import Callable + +logger = logging.getLogger(__name__) + + +class PoolStatus(Enum): + INITIALIZING = 0 + RUNNING = 1 + STOPPED = 2 + SHUTDOWN = 3 + + +class SimpleFuture: + + def __init__(self, manager): + self._result = manager.Value("i", None) + self._exception = manager.Value("i", None) + self._done = manager.Event() + + def set_result(self, result): + self._result.value = result + self._done.set() + + def set_exception(self, exception): + self._exception.value = exception + self._done.set() + + def result(self): + self._done.wait() + if self._exception.value is not None: + raise self._exception.value + return self._result.value + + def done(self): + return self._done.is_set() + + +class Task(SimpleFuture): + + def __init__(self, manager, process_fn: Callable, args, kwargs): + super().__init__(manager) + self._process_fn = process_fn + self._args = args + self._kwargs = kwargs + + @property + def process_fn(self): + return self._process_fn + + @property + def args(self): + return self._args + + @property + def kwargs(self): + return self._kwargs + + +class CancellationToken: + + def __init__(self, manager): + self._cancel_event = manager.Event() + + def cancel(self): + self._cancel_event.set() + + def is_cancelled(self): + return self._cancel_event.is_set() + + +# pylint: disable=W0201 +class SharedProcessPool: + + _instance = None + _lock = threading.Lock() + + def __new__(cls): + """ + Singleton pattern for SharedProcessPool. + + Returns + ------- + cls._instance : SharedProcessPool + The SharedProcessPool instance. + + Raises + ------ + RuntimeError + If SharedProcessPool() is called when the instance already exists but not running + """ + + with cls._lock: + if cls._instance is None: + logger.debug("SharedProcessPool.__new__: Creating a new instance...") + cls._instance = super().__new__(cls) + cls._instance._initialize() + logger.debug("SharedProcessPool.__new__: SharedProcessPool has been initialized.") + + else: + logger.debug("SharedProcessPool.__new__: instance already exists.") + + return cls._instance + + def _initialize(self): + self._status = PoolStatus.INITIALIZING + + cpu_usage = os.environ.get("MORPHEUS_SHARED_PROCESS_POOL_CPU_USAGE", None) + if cpu_usage is not None: + cpu_usage = float(cpu_usage) + if not 0 <= cpu_usage <= 1: + raise ValueError("CPU usage must be between 0 and 1.") + else: + cpu_usage = 0.5 + self._total_max_workers = math.floor(max(1, len(os.sched_getaffinity(0)) * cpu_usage)) + self._processes = [] + + self._context = mp.get_context("fork") + self._manager = self._context.Manager() + self._task_queues = self._manager.dict() + self._stage_semaphores = self._manager.dict() + self._total_usage = 0.0 + self._stage_usage = {} + + self._cancellation_token = CancellationToken(self._manager) + self._launch_condition = threading.Condition() + self._join_condition = threading.Condition() + + self.start() + + def _launch_workers(self): + for i in range(self.total_max_workers): + process = self._context.Process(target=self._worker, + args=(self._cancellation_token, self._task_queues, self._stage_semaphores)) + process.start() + self._processes.append(process) + logger.debug("SharedProcessPool._lanch_workers(): Process %s/%s has been started.", + i + 1, + self.total_max_workers) + + with self._launch_condition: + self._launch_condition.notify_all() + + self._status = PoolStatus.RUNNING + + @property + def total_max_workers(self): + return self._total_max_workers + + @property + def status(self) -> PoolStatus: + return self._status + + @staticmethod + def _worker(cancellation_token, task_queues, stage_semaphores): + logger.debug("SharedProcessPool._worker: Worker process %s has been started.", os.getpid()) + + while True: + if cancellation_token.is_cancelled(): + logger.debug("SharedProcessPool._worker: Worker process %s has terminated the worker loop.", + os.getpid()) + return + + for stage_name, task_queue in task_queues.items(): + semaphore = stage_semaphores[stage_name] + + if not semaphore.acquire(blocking=False): + # Stage has reached the limitation of processes + continue + + try: + task = task_queue.get(timeout=0.1) + except queue.Empty: + semaphore.release() + continue + + if task is None: + logger.warning("SharedProcessPool._worker: Worker process %s has received a None task.", + os.getpid()) + semaphore.release() + continue + + process_fn = task.process_fn + args = task.args + kwargs = task.kwargs + + try: + result = process_fn(*args, **kwargs) + task.set_result(result) + except Exception as e: + task.set_exception(e) + + task_queue.task_done() + + semaphore.release() + + def _join_process_pool(self): + for task_queue in self._task_queues.values(): + task_queue.join() + + self._cancellation_token.cancel() + for i, p in enumerate(self._processes): + p.join() + logger.debug("Process %s/%s has been joined.", i + 1, len(self._processes)) + + with self._join_condition: + self._join_condition.notify_all() + + def submit_task(self, stage_name: str, process_fn: Callable, *args, **kwargs) -> Task: + """ + Submit a task to the SharedProcessPool. + + Parameters + ---------- + stage_name : str + The unique name of the stage. + process_fn : Callable + The function to be executed in the process pool. + args : Any + Arbitrary arguments for the process_fn. + kwargs : Any + Arbitrary keyword arguments for the process_fn. + + Returns + ------- + Task + The task object that includes the result of the process_fn. + + Raises + ------ + RuntimeError + If the SharedProcessPool is not running. + ValueError + If the stage_name has not been set in the SharedProcessPool. + """ + if self._status != PoolStatus.RUNNING: + raise RuntimeError("Cannot submit a task to a SharedProcessPool that is not running.") + + if stage_name not in self._stage_usage: + raise ValueError(f"Stage {stage_name} has not been set in SharedProcessPool.") + + task = Task(self._manager, process_fn, args, kwargs) + self._task_queues[stage_name].put(task) + + return task + + def set_usage(self, stage_name: str, percentage: float): + """ + Set the usage of the SharedProcessPool for a specific stage. + + Parameters + ---------- + stage_name : str + The unique name of the stage. + percentage : float + The percentage of the total workers that will be allocated to the stage, should be between 0 and 1. + + Raises + ------ + RuntimeError + If the SharedProcessPool is not running. + ValueError + If the percentage is not between 0 and 1 or the total usage is greater than 1. + """ + if self._status != PoolStatus.RUNNING: + raise RuntimeError("Cannot set usage to a SharedProcessPool that is not running.") + + if not 0 <= percentage <= 1: + raise ValueError("Percentage must be between 0 and 1.") + + new_total_usage = self._total_usage - self._stage_usage.get(stage_name, 0.0) + percentage + + if new_total_usage > 1.0: + raise ValueError("Total percentage cannot exceed 1.0.") + + self._stage_usage[stage_name] = percentage + self._total_usage = new_total_usage + + allowed_processes_num = max(1, int(self._total_max_workers * percentage)) + self._stage_semaphores[stage_name] = self._manager.Semaphore(allowed_processes_num) + + if stage_name not in self._task_queues: + self._task_queues[stage_name] = self._manager.Queue() + + logger.debug("SharedProcessPool.set_usage(): stage_usage: %s", self._stage_usage) + logger.debug("SharedProcessPool.set_usage(): stage semaphores: %s", allowed_processes_num) + + def start(self): + """ + Start the SharedProcessPool that is currently stopped and keep the settings before last shutdown. + + Raises + ------ + RuntimeError + If the SharedProcessPool is not shutdown. + """ + if self._status == PoolStatus.RUNNING: + logger.warning("SharedProcessPool.start(): process pool is already running.") + return + + process_launcher = threading.Thread(target=self._launch_workers) + process_launcher.start() + process_launcher.join() + + def wait_until_ready(self, timeout: float | None = None): + """ + Wait until the SharedProcessPool is running and ready to accept tasks. + + Parameters + ---------- + timeout : _type_, optional + timeout in seconds to wait for the SharedProcessPool to be ready, by default None. + If None, it will wait indefinitely. + + Raises + ------ + RuntimeError + If the SharedProcessPool is not initializing or running. + TimeoutError + If has been waiting more than the timeout. + """ + if self._status == PoolStatus.RUNNING: + logger.debug("SharedProcessPool.wait_until_ready(): SharedProcessPool is already running.") + return + + if self._status == PoolStatus.INITIALIZING: + with self._launch_condition: + launched = self._launch_condition.wait(timeout) + if not launched: + raise TimeoutError("Time out.") + else: + raise RuntimeError("Cannot wait for a pool that is not initializing or running.") + + logger.debug("SharedProcessPool.wait_until_ready(): SharedProcessPool is ready.") + + def reset(self): + """ + Clear all the previous settings and restart the SharedProcessPool. + + Raises + ------ + RuntimeError + If the SharedProcessPool is not already shut down. + """ + if self._status != PoolStatus.SHUTDOWN: + raise RuntimeError("Cannot reset a SharedProcessPool that is not already shut down.") + + self._initialize() + + def stop(self): + """ + Stop receiving any new tasks. + """ + if self._status not in (PoolStatus.RUNNING, PoolStatus.INITIALIZING): + logger.warning("SharedProcessPool.stop(): Cannot stop a SharedProcessPool that is not running.") + return + + # No new tasks will be accepted from this point + self._status = PoolStatus.STOPPED + + def join(self, timeout: float | None = None): + """ + Wait until the SharedProcessPool is terminated. + + Parameters + ---------- + timeout : _type_, optional + timeout in seconds to wait for the SharedProcessPool to be terminated, by default None. + If None, it will wait indefinitely. + + Raises + ------ + RuntimeError + If is called on a SharedProcessPool that is not stopped. + + TimeoutError + If has been waiting more than the timeout. + """ + + if self._status != PoolStatus.STOPPED: + if self._status == PoolStatus.SHUTDOWN: + logging.warning("SharedProcessPool.join(): process pool is already shut down.") + return + + raise RuntimeError("Cannot join SharedProcessPool that is not stopped.") + + process_joiner = threading.Thread(target=self._join_process_pool) + process_joiner.start() + + with self._join_condition: + joined = self._join_condition.wait(timeout) + if not joined: + raise TimeoutError("time out.") + + process_joiner.join() + + self._status = PoolStatus.SHUTDOWN + + logger.debug("SharedProcessPool.join(): SharedProcessPool has been joined.") + + def terminate(self): + """ + Terminate all processes and shutdown the SharedProcessPool immediately. + """ + for i, p in enumerate(self._processes): + p.terminate() + logger.debug("Process %s/%s has been terminated.", i + 1, self._total_max_workers) + + logger.debug("SharedProcessPool.terminate(): SharedProcessPool has been terminated.") + self._status = PoolStatus.SHUTDOWN + + def __del__(self): + self.terminate() diff --git a/python/morpheus_dfp/CMakeLists.txt b/python/morpheus_dfp/CMakeLists.txt new file mode 100644 index 0000000000..fbb1afe6ad --- /dev/null +++ b/python/morpheus_dfp/CMakeLists.txt @@ -0,0 +1,46 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +list(APPEND CMAKE_MESSAGE_CONTEXT "morpheus_dfp") + +# Save the root of the python for relative paths +set(MORPHEUS_DFP_PY_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) + + +# ##################################### +# #### Morpheus DFP Python Setup ###### +# ##################################### +morpheus_utils_python_configure() + +morpheus_utils_create_python_package(morpheus_dfp + PROJECT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/morpheus_dfp" +) + +if(MORPHEUS_PYTHON_INPLACE_BUILD) + list(APPEND extra_args "IS_INPLACE") +endif() + +if(MORPHEUS_PYTHON_BUILD_WHEEL) + list(APPEND extra_args "BUILD_WHEEL") +endif() + +if(MORPHEUS_PYTHON_PERFORM_INSTALL) + list(APPEND extra_args "INSTALL_WHEEL") +endif() + +morpheus_utils_build_python_package(morpheus_dfp ${extra_args}) + +list(POP_BACK CMAKE_MESSAGE_CONTEXT) diff --git a/python/morpheus_dfp/MANIFEST.in b/python/morpheus_dfp/MANIFEST.in new file mode 100644 index 0000000000..807dc3f1b0 --- /dev/null +++ b/python/morpheus_dfp/MANIFEST.in @@ -0,0 +1,2 @@ +include morpheus_dfp/_version.py +recursive-include morpheus_dfp py.typed diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/__init__.py b/python/morpheus_dfp/morpheus_dfp/__init__.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/__init__.py rename to python/morpheus_dfp/morpheus_dfp/__init__.py diff --git a/python/morpheus_dfp/morpheus_dfp/_version.py b/python/morpheus_dfp/morpheus_dfp/_version.py new file mode 100644 index 0000000000..af416901c7 --- /dev/null +++ b/python/morpheus_dfp/morpheus_dfp/_version.py @@ -0,0 +1,685 @@ +# SPDX-FileCopyrightText: Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file helps to compute a version number in source trees obtained from +# git-archive tarball (such as those provided by githubs download-from-tag +# feature). Distribution tarballs (built by setup.py sdist) and build +# directories (produced by setup.py build) will contain a much shorter file +# that just contains the computed version number. + +# This file is released into the public domain. Generated by +# versioneer-0.22 (https://github.com/python-versioneer/python-versioneer) +"""Git implementation of _version.py.""" + +import errno +import functools +import os +import re +import subprocess +import sys +from typing import Callable +from typing import Dict + + +def get_keywords(): + """Get the keywords needed to look up the version information.""" + # these strings will be replaced by git during git-archive. + # setup.py/versioneer.py will grep for the variable names, so they must + # each be defined on a line of their own. _version.py will just call + # get_keywords(). + git_refnames = "$Format:%d$" + git_full = "$Format:%H$" + git_date = "$Format:%ci$" + keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} + return keywords + + +class VersioneerConfig: + """Container for Versioneer configuration parameters.""" + + +def get_config(): + """Create, populate and return the VersioneerConfig() object.""" + # these strings are filled in when 'setup.py versioneer' creates + # _version.py + cfg = VersioneerConfig() + cfg.VCS = "git" + cfg.style = "pep440" + cfg.tag_prefix = "v" + cfg.parentdir_prefix = "morpheus-" + cfg.versionfile_source = "morpheus/_version.py" + cfg.verbose = False + return cfg + + +class NotThisMethod(Exception): + """Exception raised if a method is not valid for the current scenario.""" + + +LONG_VERSION_PY: Dict[str, str] = {} +HANDLERS: Dict[str, Dict[str, Callable]] = {} + + +def register_vcs_handler(vcs, method): # decorator + """Create decorator to mark a method as the handler of a VCS.""" + + def decorate(f): + """Store f in HANDLERS[vcs][method].""" + if vcs not in HANDLERS: + HANDLERS[vcs] = {} + HANDLERS[vcs][method] = f + return f + + return decorate + + +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): + """Call the given command(s).""" + assert isinstance(commands, list) + process = None + + popen_kwargs = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + + for command in commands: + try: + dispcmd = str([command] + args) + # remember shell=False, so use git.cmd on windows, not just git + process = subprocess.Popen([command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs) + break + except OSError: + e = sys.exc_info()[1] + if e.errno == errno.ENOENT: + continue + if verbose: + print("unable to run %s" % dispcmd) + print(e) + return None, None + else: + if verbose: + print("unable to find command, tried %s" % (commands, )) + return None, None + stdout = process.communicate()[0].strip().decode() + if process.returncode != 0: + if verbose: + print("unable to run %s (error)" % dispcmd) + print("stdout was %s" % stdout) + return None, process.returncode + return stdout, process.returncode + + +def versions_from_parentdir(parentdir_prefix, root, verbose): + """Try to determine the version from the parent directory name. + + Source tarballs conventionally unpack into a directory that includes both + the project name and a version string. We will also support searching up + two directory levels for an appropriately named parent directory + """ + rootdirs = [] + + for _ in range(3): + dirname = os.path.basename(root) + if dirname.startswith(parentdir_prefix): + return { + "version": dirname[len(parentdir_prefix):], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None + } + rootdirs.append(root) + root = os.path.dirname(root) # up a level + + if verbose: + print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) + raise NotThisMethod("rootdir doesn't start with parentdir_prefix") + + +@register_vcs_handler("git", "get_keywords") +def git_get_keywords(versionfile_abs): + """Extract version information from the given file.""" + # the code embedded in _version.py can just fetch the value of these + # keywords. When used from setup.py, we don't want to import _version.py, + # so we do it with a regexp instead. This function is not used from + # _version.py. + keywords = {} + try: + with open(versionfile_abs, "r") as fobj: + for line in fobj: + if line.strip().startswith("git_refnames ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["refnames"] = mo.group(1) + if line.strip().startswith("git_full ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["full"] = mo.group(1) + if line.strip().startswith("git_date ="): + mo = re.search(r'=\s*"(.*)"', line) + if mo: + keywords["date"] = mo.group(1) + except OSError: + pass + return keywords + + +@register_vcs_handler("git", "keywords") +def git_versions_from_keywords(keywords, tag_prefix, verbose): + """Get version information from git keywords.""" + if "refnames" not in keywords: + raise NotThisMethod("Short version file found") + date = keywords.get("date") + if date is not None: + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + + # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant + # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 + # -like" string, which we must then edit to make compliant), because + # it's been around since git-1.5.3, and it's too difficult to + # discover which version we're using, or to work around using an + # older one. + date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + refnames = keywords["refnames"].strip() + if refnames.startswith("$Format"): + if verbose: + print("keywords are unexpanded, not using") + raise NotThisMethod("unexpanded keywords, not a git-archive tarball") + refs = {r.strip() for r in refnames.strip("()").split(",")} + # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of + # just "foo-1.0". If we see a "tag: " prefix, prefer those. + TAG = "tag: " + tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + if not tags: + # Either we're using git < 1.8.3, or there really are no tags. We use + # a heuristic: assume all version tags have a digit. The old git %d + # expansion behaves like git log --decorate=short and strips out the + # refs/heads/ and refs/tags/ prefixes that would let us distinguish + # between branches and tags. By ignoring refnames without digits, we + # filter out many common branch names like "release" and + # "stabilization", as well as "HEAD" and "master". + tags = {r for r in refs if re.search(r'\d', r)} + if verbose: + print("discarding '%s', no digits" % ",".join(refs - tags)) + if verbose: + print("likely tags: %s" % ",".join(sorted(tags))) + for ref in sorted(tags): + # sorting will prefer e.g. "2.0" over "2.0rc1" + if ref.startswith(tag_prefix): + r = ref[len(tag_prefix):] + # Filter out refs that exactly match prefix or that don't start + # with a number once the prefix is stripped (mostly a concern + # when prefix is '') + if not re.match(r'\d', r): + continue + if verbose: + print("picking %s" % r) + return { + "version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date + } + # no suitable tags, so version is "0+unknown", but full hex is still there + if verbose: + print("no suitable tags, using unknown + full revision id") + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None + } + + +@register_vcs_handler("git", "pieces_from_vcs") +def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): + """Get version from 'git describe' in the root of the source tree. + + This only gets called if the git-archive 'subst' keywords were *not* + expanded, and _version.py hasn't already been rewritten with a short + version string, meaning we're inside a checked out source tree. + """ + GITS = ["git"] + if sys.platform == "win32": + GITS = ["git.cmd", "git.exe"] + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + if rc != 0: + if verbose: + print("Directory %s not under git control" % root) + raise NotThisMethod("'git rev-parse --git-dir' returned error") + + MATCH_ARGS = ["--match", "%s*" % tag_prefix] if tag_prefix else [] + + # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] + # if there isn't one, this yields HEX[-dirty] (no NUM) + describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", "--always", "--long", *MATCH_ARGS], cwd=root) + # --long was added in git-1.5.5 + if describe_out is None: + raise NotThisMethod("'git describe' failed") + describe_out = describe_out.strip() + full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) + if full_out is None: + raise NotThisMethod("'git rev-parse' failed") + full_out = full_out.strip() + + pieces = {} + pieces["long"] = full_out + pieces["short"] = full_out[:7] # maybe improved later + pieces["error"] = None + + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) + # --abbrev-ref was added in git-1.6.3 + if rc != 0 or branch_name is None: + raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") + branch_name = branch_name.strip() + + if branch_name == "HEAD": + # If we aren't exactly on a branch, pick a branch which represents + # the current commit. If all else fails, we are on a branchless + # commit. + branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) + # --contains was added in git-1.5.4 + if rc != 0 or branches is None: + raise NotThisMethod("'git branch --contains' returned error") + branches = branches.split("\n") + + # Remove the first line if we're running detached + if "(" in branches[0]: + branches.pop(0) + + # Strip off the leading "* " from the list of branches. + branches = [branch[2:] for branch in branches] + if "master" in branches: + branch_name = "master" + elif not branches: + branch_name = None + else: + # Pick the first branch that is returned. Good or bad. + branch_name = branches[0] + + pieces["branch"] = branch_name + + # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] + # TAG might have hyphens. + git_describe = describe_out + + # look for -dirty suffix + dirty = git_describe.endswith("-dirty") + pieces["dirty"] = dirty + if dirty: + git_describe = git_describe[:git_describe.rindex("-dirty")] + + # now we have TAG-NUM-gHEX or HEX + + if "-" in git_describe: + # TAG-NUM-gHEX + mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + if not mo: + # unparsable. Maybe git-describe is misbehaving? + pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) + return pieces + + # tag + full_tag = mo.group(1) + if not full_tag.startswith(tag_prefix): + if verbose: + fmt = "tag '%s' doesn't start with prefix '%s'" + print(fmt % (full_tag, tag_prefix)) + pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) + return pieces + pieces["closest-tag"] = full_tag[len(tag_prefix):] + + # distance: number of commits since tag + pieces["distance"] = int(mo.group(2)) + + # commit: short hex revision ID + pieces["short"] = mo.group(3) + + else: + # HEX: no tags + pieces["closest-tag"] = None + count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) + pieces["distance"] = int(count_out) # total number of commits + + # commit date: see ISO-8601 comment in git_versions_from_keywords() + date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() + # Use only the last line. Previous lines may contain GPG signature + # information. + date = date.splitlines()[-1] + pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) + + return pieces + + +def plus_or_dot(pieces): + """Return a + if we don't already have one, else return a .""" + if "+" in pieces.get("closest-tag", ""): + return "." + return "+" + + +def render_pep440(pieces): + """Build up version string, with post-release "local version identifier". + + Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you + get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty + + Exceptions: + 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_branch(pieces): + """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . + + The ".dev0" means not master branch. Note that .dev0 sorts backwards + (a feature branch will appear "older" than the master branch). + + Exceptions: + 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0" + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def pep440_split_post(ver): + """Split pep440 version string at the post-release segment. + + Returns the release segments before the post-release and the + post-release version number (or -1 if no post-release segment is present). + """ + vc = str.split(ver, ".post") + return vc[0], int(vc[1] or 0) if len(vc) == 2 else None + + +def render_pep440_pre(pieces): + """TAG[.postN.devDISTANCE] -- No -dirty. + + Exceptions: + 1: no tags. 0.post0.devDISTANCE + """ + if pieces["closest-tag"]: + if pieces["distance"]: + # update the post release segment + tag_version, post_version = pep440_split_post(pieces["closest-tag"]) + rendered = tag_version + if post_version is not None: + rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) + else: + rendered += ".post0.dev%d" % (pieces["distance"]) + else: + # no commits, use the tag as the version + rendered = pieces["closest-tag"] + else: + # exception #1 + rendered = "0.post0.dev%d" % pieces["distance"] + return rendered + + +def render_pep440_post(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX] . + + The ".dev0" means dirty. Note that .dev0 sorts backwards + (a dirty tree will appear "older" than the corresponding clean one), + but you shouldn't be releasing software with -dirty anyways. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + return rendered + + +def render_pep440_post_branch(pieces): + """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . + + The ".dev0" means not master branch. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += plus_or_dot(pieces) + rendered += "g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["branch"] != "master": + rendered += ".dev0" + rendered += "+g%s" % pieces["short"] + if pieces["dirty"]: + rendered += ".dirty" + return rendered + + +def render_pep440_old(pieces): + """TAG[.postDISTANCE[.dev0]] . + + The ".dev0" means dirty. + + Exceptions: + 1: no tags. 0.postDISTANCE[.dev0] + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"] or pieces["dirty"]: + rendered += ".post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + else: + # exception #1 + rendered = "0.post%d" % pieces["distance"] + if pieces["dirty"]: + rendered += ".dev0" + return rendered + + +def render_git_describe(pieces): + """TAG[-DISTANCE-gHEX][-dirty]. + + Like 'git describe --tags --dirty --always'. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + if pieces["distance"]: + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render_git_describe_long(pieces): + """TAG-DISTANCE-gHEX[-dirty]. + + Like 'git describe --tags --dirty --always -long'. + The distance/hash is unconditional. + + Exceptions: + 1: no tags. HEX[-dirty] (note: no 'g' prefix) + """ + if pieces["closest-tag"]: + rendered = pieces["closest-tag"] + rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + else: + # exception #1 + rendered = pieces["short"] + if pieces["dirty"]: + rendered += "-dirty" + return rendered + + +def render(pieces, style): + """Render the given version pieces into the requested style.""" + if pieces["error"]: + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None + } + + if not style or style == "default": + style = "pep440" # the default + + if style == "pep440": + rendered = render_pep440(pieces) + elif style == "pep440-branch": + rendered = render_pep440_branch(pieces) + elif style == "pep440-pre": + rendered = render_pep440_pre(pieces) + elif style == "pep440-post": + rendered = render_pep440_post(pieces) + elif style == "pep440-post-branch": + rendered = render_pep440_post_branch(pieces) + elif style == "pep440-old": + rendered = render_pep440_old(pieces) + elif style == "git-describe": + rendered = render_git_describe(pieces) + elif style == "git-describe-long": + rendered = render_git_describe_long(pieces) + else: + raise ValueError("unknown style '%s'" % style) + + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date") + } + + +def get_versions(): + """Get version information or return default if unable to do so.""" + # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have + # __file__, we can work backwards from there to the root. Some + # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which + # case we can only use expanded keywords. + + cfg = get_config() + verbose = cfg.verbose + + try: + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) + except NotThisMethod: + pass + + try: + root = os.path.realpath(__file__) + # versionfile_source is the relative path from the top of the source + # tree (where the .git directory might live) to this file. Invert + # this to find the root from __file__. + for _ in cfg.versionfile_source.split('/'): + root = os.path.dirname(root) + except NameError: + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None + } + + try: + pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) + return render(pieces, cfg.style) + except NotThisMethod: + pass + + try: + if cfg.parentdir_prefix: + return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) + except NotThisMethod: + pass + + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None + } diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/messages/__init__.py b/python/morpheus_dfp/morpheus_dfp/messages/__init__.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/messages/__init__.py rename to python/morpheus_dfp/morpheus_dfp/messages/__init__.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/messages/dfp_message_meta.py b/python/morpheus_dfp/morpheus_dfp/messages/dfp_message_meta.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/messages/dfp_message_meta.py rename to python/morpheus_dfp/morpheus_dfp/messages/dfp_message_meta.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/__init__.py b/python/morpheus_dfp/morpheus_dfp/modules/__init__.py similarity index 68% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/__init__.py rename to python/morpheus_dfp/morpheus_dfp/modules/__init__.py index f274245601..cf1e5741ba 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/__init__.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/__init__.py @@ -17,16 +17,16 @@ # When segment modules are imported, they're added to the module registry. # To avoid flake8 warnings about unused code, the noqa flag is used during import. -from dfp.modules import dfp_data_prep -from dfp.modules import dfp_deployment -from dfp.modules import dfp_inference -from dfp.modules import dfp_inference_pipe -from dfp.modules import dfp_postprocessing -from dfp.modules import dfp_preproc -from dfp.modules import dfp_rolling_window -from dfp.modules import dfp_split_users -from dfp.modules import dfp_training -from dfp.modules import dfp_training_pipe +from morpheus_dfp.modules import dfp_data_prep +from morpheus_dfp.modules import dfp_deployment +from morpheus_dfp.modules import dfp_inference +from morpheus_dfp.modules import dfp_inference_pipe +from morpheus_dfp.modules import dfp_postprocessing +from morpheus_dfp.modules import dfp_preproc +from morpheus_dfp.modules import dfp_rolling_window +from morpheus_dfp.modules import dfp_split_users +from morpheus_dfp.modules import dfp_training +from morpheus_dfp.modules import dfp_training_pipe __all__ = [ "dfp_split_users", diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_data_prep.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_data_prep.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_data_prep.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_data_prep.py index 0a91a21b2e..dfaad30e87 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_data_prep.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_data_prep.py @@ -24,8 +24,7 @@ from morpheus.utils.column_info import process_dataframe from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_DATA_PREP +from morpheus_dfp.utils.module_ids import DFP_DATA_PREP logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_deployment.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_deployment.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py index e4744c32b6..05611fbca0 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_deployment.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_deployment.py @@ -22,10 +22,9 @@ from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import merge_dictionaries from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_DEPLOYMENT -from ..utils.module_ids import DFP_INFERENCE_PIPE -from ..utils.module_ids import DFP_TRAINING_PIPE +from morpheus_dfp.utils.module_ids import DFP_DEPLOYMENT +from morpheus_dfp.utils.module_ids import DFP_INFERENCE_PIPE +from morpheus_dfp.utils.module_ids import DFP_TRAINING_PIPE logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py similarity index 96% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py index 48e6e03568..2a66a04d9d 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference.py @@ -16,8 +16,6 @@ import time import mrc -from dfp.utils.model_cache import ModelCache -from dfp.utils.model_cache import ModelManager from mlflow.tracking.client import MlflowClient from mrc.core import operators as ops @@ -26,9 +24,10 @@ from morpheus.messages import ControlMessage from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..messages.dfp_message_meta import DFPMessageMeta -from ..utils.module_ids import DFP_INFERENCE +from morpheus_dfp.messages.dfp_message_meta import DFPMessageMeta +from morpheus_dfp.utils.model_cache import ModelCache +from morpheus_dfp.utils.model_cache import ModelManager +from morpheus_dfp.utils.module_ids import DFP_INFERENCE logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference_pipe.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference_pipe.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py index 24f72c278a..26c36ca763 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_inference_pipe.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_inference_pipe.py @@ -23,13 +23,12 @@ from morpheus.utils.module_ids import WRITE_TO_FILE from morpheus.utils.module_utils import merge_dictionaries from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_DATA_PREP -from ..utils.module_ids import DFP_INFERENCE -from ..utils.module_ids import DFP_INFERENCE_PIPE -from ..utils.module_ids import DFP_POST_PROCESSING -from ..utils.module_ids import DFP_PREPROC -from ..utils.module_ids import DFP_ROLLING_WINDOW +from morpheus_dfp.utils.module_ids import DFP_DATA_PREP +from morpheus_dfp.utils.module_ids import DFP_INFERENCE +from morpheus_dfp.utils.module_ids import DFP_INFERENCE_PIPE +from morpheus_dfp.utils.module_ids import DFP_POST_PROCESSING +from morpheus_dfp.utils.module_ids import DFP_PREPROC +from morpheus_dfp.utils.module_ids import DFP_ROLLING_WINDOW logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_postprocessing.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_postprocessing.py index 908c0d61c5..3375fc40af 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_postprocessing.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_postprocessing.py @@ -22,8 +22,7 @@ from morpheus.messages import ControlMessage from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_POST_PROCESSING +from morpheus_dfp.utils.module_ids import DFP_POST_PROCESSING logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_preproc.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_preproc.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_preproc.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_preproc.py index 4dd89334dc..54f934495f 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_preproc.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_preproc.py @@ -24,9 +24,8 @@ from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import merge_dictionaries from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_PREPROC -from ..utils.module_ids import DFP_SPLIT_USERS +from morpheus_dfp.utils.module_ids import DFP_PREPROC +from morpheus_dfp.utils.module_ids import DFP_SPLIT_USERS logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_rolling_window.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_rolling_window.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py index bfdbe13e2c..ea9b1fede8 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_rolling_window.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_rolling_window.py @@ -19,8 +19,6 @@ import mrc import pandas as pd -from dfp.utils.cached_user_window import CachedUserWindow -from dfp.utils.logging_timer import log_time from mrc.core import operators as ops import cudf @@ -29,8 +27,9 @@ from morpheus.messages import MessageMeta from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_ROLLING_WINDOW +from morpheus_dfp.utils.cached_user_window import CachedUserWindow +from morpheus_dfp.utils.logging_timer import log_time +from morpheus_dfp.utils.module_ids import DFP_ROLLING_WINDOW logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_split_users.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_split_users.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py index a5757ac407..7ec8c7f0f4 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_split_users.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_split_users.py @@ -17,7 +17,6 @@ import mrc import pandas as pd -from dfp.utils.logging_timer import log_time from mrc.core import operators as ops import cudf @@ -26,8 +25,8 @@ from morpheus.messages import MessageMeta from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_SPLIT_USERS +from morpheus_dfp.utils.logging_timer import log_time +from morpheus_dfp.utils.module_ids import DFP_SPLIT_USERS logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py index 388eba25ec..234855d9f1 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training.py @@ -24,9 +24,8 @@ from morpheus.models.dfencoder import AutoEncoder from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import register_module - -from ..messages.dfp_message_meta import DFPMessageMeta -from ..utils.module_ids import DFP_TRAINING +from morpheus_dfp.messages.dfp_message_meta import DFPMessageMeta +from morpheus_dfp.utils.module_ids import DFP_TRAINING logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training_pipe.py b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training_pipe.py rename to python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py index 9fcbd946af..cfa4c49fdc 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/modules/dfp_training_pipe.py +++ b/python/morpheus_dfp/morpheus_dfp/modules/dfp_training_pipe.py @@ -21,12 +21,11 @@ from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE from morpheus.utils.module_utils import merge_dictionaries from morpheus.utils.module_utils import register_module - -from ..utils.module_ids import DFP_DATA_PREP -from ..utils.module_ids import DFP_PREPROC -from ..utils.module_ids import DFP_ROLLING_WINDOW -from ..utils.module_ids import DFP_TRAINING -from ..utils.module_ids import DFP_TRAINING_PIPE +from morpheus_dfp.utils.module_ids import DFP_DATA_PREP +from morpheus_dfp.utils.module_ids import DFP_PREPROC +from morpheus_dfp.utils.module_ids import DFP_ROLLING_WINDOW +from morpheus_dfp.utils.module_ids import DFP_TRAINING +from morpheus_dfp.utils.module_ids import DFP_TRAINING_PIPE logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/__init__.py b/python/morpheus_dfp/morpheus_dfp/py.typed similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/__init__.py rename to python/morpheus_dfp/morpheus_dfp/py.typed diff --git a/python/morpheus_dfp/morpheus_dfp/stages/__init__.py b/python/morpheus_dfp/morpheus_dfp/stages/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_file_batcher_stage.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_file_batcher_stage.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_to_df.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_file_to_df.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_to_df.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_file_to_df.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_inference_stage.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_inference_stage.py index 7d37c9514d..0324a5e67f 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py +++ b/python/morpheus_dfp/morpheus_dfp/stages/dfp_inference_stage.py @@ -25,9 +25,8 @@ from morpheus.messages import ControlMessage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stage_schema import StageSchema - -from ..utils.model_cache import ModelCache -from ..utils.model_cache import ModelManager +from morpheus_dfp.utils.model_cache import ModelCache +from morpheus_dfp.utils.model_cache import ModelManager logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_mlflow_model_writer.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_mlflow_model_writer.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_mlflow_model_writer.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_mlflow_model_writer.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_postprocessing_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_postprocessing_stage.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_postprocessing_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_postprocessing_stage.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_preprocessing_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_preprocessing_stage.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_preprocessing_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_preprocessing_stage.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_rolling_window_stage.py similarity index 97% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_rolling_window_stage.py index 59b98a57df..f9233c6f89 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py +++ b/python/morpheus_dfp/morpheus_dfp/stages/dfp_rolling_window_stage.py @@ -26,10 +26,9 @@ from morpheus.messages import ControlMessage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stage_schema import StageSchema - -from ..messages.dfp_message_meta import DFPMessageMeta -from ..utils.cached_user_window import CachedUserWindow -from ..utils.logging_timer import log_time +from morpheus_dfp.messages.dfp_message_meta import DFPMessageMeta +from morpheus_dfp.utils.cached_user_window import CachedUserWindow +from morpheus_dfp.utils.logging_timer import log_time logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_split_users_stage.py similarity index 98% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_split_users_stage.py index 9a6a448bd5..2a40b4521e 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py +++ b/python/morpheus_dfp/morpheus_dfp/stages/dfp_split_users_stage.py @@ -27,9 +27,8 @@ from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.type_aliases import DataFrameType - -from ..messages.dfp_message_meta import DFPMessageMeta -from ..utils.logging_timer import log_time +from morpheus_dfp.messages.dfp_message_meta import DFPMessageMeta +from morpheus_dfp.utils.logging_timer import log_time logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_training.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_training.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_viz_postproc.py b/python/morpheus_dfp/morpheus_dfp/stages/dfp_viz_postproc.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_viz_postproc.py rename to python/morpheus_dfp/morpheus_dfp/stages/dfp_viz_postproc.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/multi_file_source.py b/python/morpheus_dfp/morpheus_dfp/stages/multi_file_source.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/multi_file_source.py rename to python/morpheus_dfp/morpheus_dfp/stages/multi_file_source.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/write_to_s3_stage.py b/python/morpheus_dfp/morpheus_dfp/stages/write_to_s3_stage.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/stages/write_to_s3_stage.py rename to python/morpheus_dfp/morpheus_dfp/stages/write_to_s3_stage.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/__init__.py b/python/morpheus_dfp/morpheus_dfp/utils/__init__.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/__init__.py rename to python/morpheus_dfp/morpheus_dfp/utils/__init__.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/cached_user_window.py b/python/morpheus_dfp/morpheus_dfp/utils/cached_user_window.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/cached_user_window.py rename to python/morpheus_dfp/morpheus_dfp/utils/cached_user_window.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py b/python/morpheus_dfp/morpheus_dfp/utils/config_generator.py similarity index 96% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py rename to python/morpheus_dfp/morpheus_dfp/utils/config_generator.py index e6726b18c4..9e3e2d904c 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/config_generator.py +++ b/python/morpheus_dfp/morpheus_dfp/utils/config_generator.py @@ -14,12 +14,6 @@ import os -from dfp.utils.dfp_arg_parser import DFPArgParser -from dfp.utils.dfp_arg_parser import pyobj2str -from dfp.utils.module_ids import DFP_DEPLOYMENT -from dfp.utils.regex_utils import iso_date_regex_pattern -from dfp.utils.schema_utils import Schema - from morpheus.cli.utils import get_package_relative_file from morpheus.config import Config from morpheus.config import ConfigAutoEncoder @@ -27,6 +21,11 @@ from morpheus.messages import ControlMessage from morpheus.utils.file_utils import load_labels_file from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE +from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser +from morpheus_dfp.utils.dfp_arg_parser import pyobj2str +from morpheus_dfp.utils.module_ids import DFP_DEPLOYMENT +from morpheus_dfp.utils.regex_utils import iso_date_regex_pattern +from morpheus_dfp.utils.schema_utils import Schema class ConfigGenerator: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py b/python/morpheus_dfp/morpheus_dfp/utils/dfp_arg_parser.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py rename to python/morpheus_dfp/morpheus_dfp/utils/dfp_arg_parser.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/logging_timer.py b/python/morpheus_dfp/morpheus_dfp/utils/logging_timer.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/logging_timer.py rename to python/morpheus_dfp/morpheus_dfp/utils/logging_timer.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py b/python/morpheus_dfp/morpheus_dfp/utils/model_cache.py similarity index 99% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py rename to python/morpheus_dfp/morpheus_dfp/utils/model_cache.py index 54b7c57d11..e3150725d5 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/model_cache.py +++ b/python/morpheus_dfp/morpheus_dfp/utils/model_cache.py @@ -26,8 +26,7 @@ from mlflow.tracking.client import MlflowClient from morpheus.models.dfencoder import AutoEncoder - -from .logging_timer import log_time +from morpheus_dfp.utils.logging_timer import log_time logger = logging.getLogger(f"morpheus.{__name__}") diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/module_ids.py b/python/morpheus_dfp/morpheus_dfp/utils/module_ids.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/module_ids.py rename to python/morpheus_dfp/morpheus_dfp/utils/module_ids.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/regex_utils.py b/python/morpheus_dfp/morpheus_dfp/utils/regex_utils.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/regex_utils.py rename to python/morpheus_dfp/morpheus_dfp/utils/regex_utils.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/schema_utils.py b/python/morpheus_dfp/morpheus_dfp/utils/schema_utils.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/schema_utils.py rename to python/morpheus_dfp/morpheus_dfp/utils/schema_utils.py diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/user_model_manager.py b/python/morpheus_dfp/morpheus_dfp/utils/user_model_manager.py similarity index 100% rename from examples/digital_fingerprinting/production/morpheus/dfp/utils/user_model_manager.py rename to python/morpheus_dfp/morpheus_dfp/utils/user_model_manager.py diff --git a/python/morpheus_dfp/setup.cfg b/python/morpheus_dfp/setup.cfg new file mode 100644 index 0000000000..ade54b665b --- /dev/null +++ b/python/morpheus_dfp/setup.cfg @@ -0,0 +1,26 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. + +# ===== versioneer Config ===== +[versioneer] +VCS = git +style = pep440 +versionfile_source = morpheus_dfp/_version.py +versionfile_build = morpheus_dfp/_version.py +tag_prefix = v +parentdir_prefix = morpheus_dfp- + diff --git a/python/morpheus_dfp/setup.py b/python/morpheus_dfp/setup.py new file mode 100644 index 0000000000..37e5aecbfb --- /dev/null +++ b/python/morpheus_dfp/setup.py @@ -0,0 +1,45 @@ +# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer setup' after changing this section, and commit the +# resulting files. + +import versioneer +from setuptools import find_packages # noqa: E402 +from setuptools import setup # noqa: E402 + +setup( + name="morpheus_dfp", + version=versioneer.get_version(), # pylint: disable=no-member + description="Morpheus Digital Finger Printing", + classifiers=[ + "Development Status :: 3 - Alpha", + "Environment :: GPU :: NVIDIA CUDA", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Security", + "Topic :: System :: Networking :: Monitoring", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3 :: Only", + ], + author="NVIDIA Corporation", + include_package_data=True, + packages=find_packages(), + install_requires=[], + license="Apache", + python_requires='>=3.10, <4', + cmdclass=versioneer.get_cmdclass(), # pylint: disable=no-member +) diff --git a/python/morpheus_dfp/todo.md b/python/morpheus_dfp/todo.md new file mode 100644 index 0000000000..08f15bbf70 --- /dev/null +++ b/python/morpheus_dfp/todo.md @@ -0,0 +1,55 @@ + + +# Tasks +- [X] Refactor Morpheus production stages + +- [X] Add morpheus_dfp Conda recipe + +- [X] Update CI to build and upload morpheus_dfp Conda package + +- [] Update docs to include the DFP apis and README for using the DFP library + +- [] Move DFP unit tests from tests/examples/digital_fingerprinting to tests/morpheus_dfp + +- [] Refactor DFP benchmarks + +- [] Update DFP example docker file to install the morpheus_dfp package instead of using the Morpheus image as base + +- [] Consolidate version file used in setup.py across all Morpheus packages + + +# Q&A for future reference +1. Do we refactor sample pipelines to python/morpheus_dfp/morpheus_dfp/pipeline? + No. They are not part of the library. They are just examples. + +2. Do we refactor data (just the script for pulling the data, fetch_example_data.py) used for running the sample DFP pipelines? + No. Same as above. + +3. Do we refactor Morpheus DFP starter example? + No. Starter will be dropped, #1715 + +4. Visualizations? + No. Sample pipeline. + +5. Demo? + No. Sample pipeline. + +6. Refactor notebooks? + No. Sample only. + +7. Refactor DFP example control messages? + No. diff --git a/python/morpheus_llm/CMakeLists.txt b/python/morpheus_llm/CMakeLists.txt index 2383ee8ec3..d2093e759c 100644 --- a/python/morpheus_llm/CMakeLists.txt +++ b/python/morpheus_llm/CMakeLists.txt @@ -16,7 +16,6 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "morpheus_llm") # Save the root of the python for relative paths -# Where is this used, fixme set(MORPHEUS_LLM_PY_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) @@ -25,8 +24,7 @@ set(MORPHEUS_LLM_PY_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) # ################################# morpheus_utils_python_configure() -# Include the main morpheus code -# fixme: add another subdirectory src and change SOURCE_DIRECTORY to ${CMAKE_CURRENT_SOURCE_DIR}/src +# Include the main morpheus_llm code morpheus_utils_create_python_package(morpheus_llm PROJECT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/morpheus_llm" @@ -34,7 +32,7 @@ morpheus_utils_create_python_package(morpheus_llm add_subdirectory(morpheus_llm) -# Complete the python package - fixme do these need to be redefined +# Complete the python package if(MORPHEUS_PYTHON_INPLACE_BUILD) list(APPEND extra_args "IS_INPLACE") endif() diff --git a/python/morpheus_llm/morpheus_llm/_lib/cmake/libmorpheus_llm.cmake b/python/morpheus_llm/morpheus_llm/_lib/cmake/libmorpheus_llm.cmake index d4377c64db..83ba243398 100644 --- a/python/morpheus_llm/morpheus_llm/_lib/cmake/libmorpheus_llm.cmake +++ b/python/morpheus_llm/morpheus_llm/_lib/cmake/libmorpheus_llm.cmake @@ -27,42 +27,32 @@ add_library(morpheus_llm add_library(${PROJECT_NAME}::morpheus_llm ALIAS morpheus_llm) -# fixme check if all these are needed +# morpheus_llm can be built two ways - +# 1. For development purposes (eg. scripts/compile.sh) all the functional blocks are built. +# This includes morpheus (core), morpheus_llm, morpheus_dfp etc. In this case we +# set dependencies on build targets across components. +# 2. For conda packaging purposes morpheus_llm is built on its own. In this case +# the dependencies (including morpheus-core) are loaded from the conda enviroment. +if (MORPHEUS_BUILD_MORPHEUS_CORE) + # Add a dependency on the morpheus cpython libraries + get_property(py_morpheus_target GLOBAL PROPERTY py_morpheus_target_property) + add_dependencies(morpheus_llm ${py_morpheus_target}) +else() + rapids_find_package(morpheus REQUIRED) +endif() + target_link_libraries(morpheus_llm PRIVATE - matx::matx $<$:ZLIB::ZLIB> PUBLIC $ cudf::cudf - CUDA::nvtx3 mrc::pymrc ${PROJECT_NAME}::morpheus - ) -# Add the include directories of the cudf_helpers_project since we dont want to link directly to it -get_property(cudf_helpers_target GLOBAL PROPERTY cudf_helpers_target_property) -get_target_property(cudf_helpers_include ${cudf_helpers_target} INTERFACE_INCLUDE_DIRECTORIES) - -target_include_directories(morpheus - PRIVATE - ${cudf_helpers_include} -) - -# Also add a dependency to the target so that the headers are generated before the target is built -add_dependencies(morpheus_llm ${cudf_helpers_target}) -message("add_dependencies morpheus_llm ${cudf_helpers_target}") - -# Add a dependency on the morpheus cpython libraries -get_property(py_morpheus_target GLOBAL PROPERTY py_morpheus_target_property) -add_dependencies(morpheus_llm ${py_morpheus_target}) -message("add_dependencies morpheus_llm ${py_morpheus_target}") - -# fixme: find another way to include morpheus headers target_include_directories(morpheus_llm PUBLIC - $ $ $ ) diff --git a/python/morpheus_llm/morpheus_llm/error.py b/python/morpheus_llm/morpheus_llm/error.py new file mode 100644 index 0000000000..02a2694ed3 --- /dev/null +++ b/python/morpheus_llm/morpheus_llm/error.py @@ -0,0 +1,18 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +IMPORT_ERROR_MESSAGE = ( + "{package} not found. Install it and other additional dependencies by running the following command:\n" + "`conda env update --solver=libmamba -n morpheus " + "--file conda/environments/examples_cuda-125_arch-x86_64.yaml`") diff --git a/python/morpheus_llm/morpheus_llm/llm/nodes/langchain_agent_node.py b/python/morpheus_llm/morpheus_llm/llm/nodes/langchain_agent_node.py index e63b1d351c..0e96c600fd 100644 --- a/python/morpheus_llm/morpheus_llm/llm/nodes/langchain_agent_node.py +++ b/python/morpheus_llm/morpheus_llm/llm/nodes/langchain_agent_node.py @@ -16,13 +16,19 @@ import logging import typing -from langchain_core.exceptions import OutputParserException - +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.llm import LLMContext from morpheus_llm.llm import LLMNodeBase logger = logging.getLogger(__name__) +IMPORT_EXCEPTION = None + +try: + from langchain_core.exceptions import OutputParserException +except ImportError as import_exc: + IMPORT_EXCEPTION = import_exc + if typing.TYPE_CHECKING: from langchain.agents import AgentExecutor @@ -47,6 +53,9 @@ def __init__(self, agent_executor: "AgentExecutor", replace_exceptions: bool = False, replace_exceptions_value: typing.Optional[str] = None): + if IMPORT_EXCEPTION is not None: + raise ImportError(IMPORT_ERROR_MESSAGE.format('langchain_core')) from IMPORT_EXCEPTION + super().__init__() self._agent_executor = agent_executor diff --git a/python/morpheus_llm/morpheus_llm/llm/services/nemo_llm_service.py b/python/morpheus_llm/morpheus_llm/llm/services/nemo_llm_service.py index ef80814929..30cda8e02c 100644 --- a/python/morpheus_llm/morpheus_llm/llm/services/nemo_llm_service.py +++ b/python/morpheus_llm/morpheus_llm/llm/services/nemo_llm_service.py @@ -18,16 +18,13 @@ import warnings from morpheus.utils.env_config_value import EnvConfigValue +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.llm.services.llm_service import LLMClient from morpheus_llm.llm.services.llm_service import LLMService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = ( - "NemoLLM not found. Install it and other additional dependencies by running the following command:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/examples_cuda-121_arch-x86_64.yaml --prune`") try: import nemollm @@ -53,7 +50,7 @@ class NeMoLLMClient(LLMClient): def __init__(self, parent: "NeMoLLMService", *, model_name: str, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='nemollm')) from IMPORT_EXCEPTION super().__init__() @@ -231,7 +228,7 @@ def __init__(self, """ if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='nemollm')) from IMPORT_EXCEPTION super().__init__() diff --git a/python/morpheus_llm/morpheus_llm/llm/services/nvfoundation_llm_service.py b/python/morpheus_llm/morpheus_llm/llm/services/nvfoundation_llm_service.py index d1e706b7c2..709f394712 100644 --- a/python/morpheus_llm/morpheus_llm/llm/services/nvfoundation_llm_service.py +++ b/python/morpheus_llm/morpheus_llm/llm/services/nvfoundation_llm_service.py @@ -16,17 +16,13 @@ import typing from morpheus.utils.env_config_value import EnvConfigValue +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.llm.services.llm_service import LLMClient from morpheus_llm.llm.services.llm_service import LLMService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = ( - "The `langchain-nvidia-ai-endpoints` package was not found. Install it and other additional dependencies by " - "running the following command:" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/examples_cuda-121_arch-x86_64.yaml`") try: from langchain_core.prompt_values import StringPromptValue @@ -52,7 +48,8 @@ class NVFoundationLLMClient(LLMClient): def __init__(self, parent: "NVFoundationLLMService", *, model_name: str, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError( + IMPORT_ERROR_MESSAGE.format(package='langchain-nvidia-ai-endpoints')) from IMPORT_EXCEPTION super().__init__() @@ -218,7 +215,8 @@ class BaseURL(EnvConfigValue): def __init__(self, *, api_key: APIKey | str = None, base_url: BaseURL | str = None, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError( + IMPORT_ERROR_MESSAGE.format(package='langchain-nvidia-ai-endpoints')) from IMPORT_EXCEPTION super().__init__() diff --git a/python/morpheus_llm/morpheus_llm/llm/services/openai_chat_service.py b/python/morpheus_llm/morpheus_llm/llm/services/openai_chat_service.py index d4eaac4503..2df6048d5a 100644 --- a/python/morpheus_llm/morpheus_llm/llm/services/openai_chat_service.py +++ b/python/morpheus_llm/morpheus_llm/llm/services/openai_chat_service.py @@ -23,16 +23,13 @@ import appdirs from morpheus.utils.env_config_value import EnvConfigValue +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.llm.services.llm_service import LLMClient from morpheus_llm.llm.services.llm_service import LLMService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = ("OpenAIChatService & OpenAIChatClient require the openai package to be installed. " - "Install it by running the following command:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/examples_cuda-121_arch-x86_64.yaml --prune`") try: import openai @@ -107,7 +104,7 @@ def __init__(self, json=False, **model_kwargs) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='openai')) from IMPORT_EXCEPTION super().__init__() @@ -400,7 +397,7 @@ def __init__(self, default_model_kwargs: dict = None) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='openai')) from IMPORT_EXCEPTION super().__init__() diff --git a/python/morpheus_llm/morpheus_llm/service/vdb/faiss_vdb_service.py b/python/morpheus_llm/morpheus_llm/service/vdb/faiss_vdb_service.py index 82e6c146d2..0197f3071d 100644 --- a/python/morpheus_llm/morpheus_llm/service/vdb/faiss_vdb_service.py +++ b/python/morpheus_llm/morpheus_llm/service/vdb/faiss_vdb_service.py @@ -21,13 +21,13 @@ import cudf +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.service.vdb.vector_db_service import VectorDBResourceService from morpheus_llm.service.vdb.vector_db_service import VectorDBService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = "FaissDBResourceService requires the FAISS library to be installed." try: from langchain.embeddings.base import Embeddings @@ -50,7 +50,7 @@ class FaissVectorDBResourceService(VectorDBResourceService): def __init__(self, parent: "FaissVectorDBService", *, name: str) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='langchain and faiss-gpu')) from IMPORT_EXCEPTION super().__init__() @@ -285,7 +285,7 @@ class FaissVectorDBService(VectorDBService): def __init__(self, local_dir: str, embeddings: "Embeddings"): if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='langchain and faiss-gpu')) from IMPORT_EXCEPTION self._local_dir = local_dir self._embeddings = embeddings diff --git a/python/morpheus_llm/morpheus_llm/service/vdb/milvus_vector_db_service.py b/python/morpheus_llm/morpheus_llm/service/vdb/milvus_vector_db_service.py index f43fbbf79c..71df614b23 100644 --- a/python/morpheus_llm/morpheus_llm/service/vdb/milvus_vector_db_service.py +++ b/python/morpheus_llm/morpheus_llm/service/vdb/milvus_vector_db_service.py @@ -25,13 +25,13 @@ from morpheus.io.utils import cudf_string_cols_exceed_max_bytes from morpheus.io.utils import truncate_string_cols_by_bytes from morpheus.utils.type_aliases import DataFrameType +from morpheus_llm.error import IMPORT_ERROR_MESSAGE from morpheus_llm.service.vdb.vector_db_service import VectorDBResourceService from morpheus_llm.service.vdb.vector_db_service import VectorDBService logger = logging.getLogger(__name__) IMPORT_EXCEPTION = None -IMPORT_ERROR_MESSAGE = "MilvusVectorDBResourceService requires the milvus and pymilvus packages to be installed." # Milvus has a max string length in bytes of 65,535. Multi-byte characters like "ñ" will have a string length of 1, the # byte length encoded as UTF-8 will be 2 @@ -234,7 +234,7 @@ class MilvusVectorDBResourceService(VectorDBResourceService): def __init__(self, name: str, client: "MilvusClient", truncate_long_strings: bool = False) -> None: if IMPORT_EXCEPTION is not None: - raise ImportError(IMPORT_ERROR_MESSAGE) from IMPORT_EXCEPTION + raise ImportError(IMPORT_ERROR_MESSAGE.format(package='pymilvus')) from IMPORT_EXCEPTION super().__init__() diff --git a/scripts/compile.sh b/scripts/compile.sh index cf8c628515..f0fc750b75 100755 --- a/scripts/compile.sh +++ b/scripts/compile.sh @@ -33,7 +33,9 @@ cmake -S . -B ${BUILD_DIR} -GNinja \ -DMORPHEUS_USE_CCACHE=ON \ -DMORPHEUS_USE_CONDA=${MORPHEUS_USE_CONDA:-ON} \ -DMORPHEUS_SUPPORT_DOCA=${MORPHEUS_SUPPORT_DOCA:-OFF} \ + -DMORPHEUS_BUILD_MORPHEUS_CORE=${MORPHEUS_BUILD_MORPHEUS_CORE:-ON} \ -DMORPHEUS_BUILD_MORPHEUS_LLM=${MORPHEUS_BUILD_MORPHEUS_LLM:-ON} \ + -DMORPHEUS_BUILD_MORPHEUS_DFP=${MORPHEUS_BUILD_MORPHEUS_DFP:-ON} \ ${INSTALL_PREFIX:+-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX}} \ ${CMAKE_ARGS:+${CMAKE_ARGS}} \ ${CMAKE_CONFIGURE_EXTRA_ARGS:+${CMAKE_CONFIGURE_EXTRA_ARGS}} diff --git a/tests/_utils/__init__.py b/tests/_utils/__init__.py index c11afb671b..b116dffb69 100644 --- a/tests/_utils/__init__.py +++ b/tests/_utils/__init__.py @@ -25,7 +25,6 @@ import numpy as np import pytest - from morpheus.io.deserializers import read_file_to_df from .test_directories import TestDirectories diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index 9aa0bd105a..148dbb3d44 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -200,5 +200,5 @@ You can use the same Dev container created here to run the Production DFP benchm ```bash mamba env update \ -n ${CONDA_DEFAULT_ENV} \ - --file ./conda/environments/examples_cuda-121_arch-x86_64.yaml + --file ./conda/environments/examples_cuda-125_arch-x86_64.yaml ``` diff --git a/tests/benchmarks/conftest.py b/tests/benchmarks/conftest.py index 1e21affaa8..607febf434 100644 --- a/tests/benchmarks/conftest.py +++ b/tests/benchmarks/conftest.py @@ -20,8 +20,13 @@ from unittest import mock import pytest -from pynvml.smi import NVSMI_QUERY_GPU -from pynvml.smi import nvidia_smi + +try: + from pynvml.smi import NVSMI_QUERY_GPU + from pynvml.smi import nvidia_smi +except ImportError: + print("pynvml is not installed") + from test_bench_e2e_pipelines import E2E_TEST_CONFIGS diff --git a/tests/benchmarks/test_bench_agents_simple_pipeline.py b/tests/benchmarks/test_bench_agents_simple_pipeline.py index 85202fdc02..cbd83e3cae 100644 --- a/tests/benchmarks/test_bench_agents_simple_pipeline.py +++ b/tests/benchmarks/test_bench_agents_simple_pipeline.py @@ -19,13 +19,17 @@ import typing from unittest import mock -import langchain import pytest -from langchain.agents import AgentType -from langchain.agents import initialize_agent -from langchain.agents import load_tools -from langchain.agents.tools import Tool -from langchain.utilities import serpapi + +try: + import langchain + from langchain.agents import AgentType + from langchain.agents import initialize_agent + from langchain.agents import load_tools + from langchain.agents.tools import Tool + from langchain.utilities import serpapi +except ImportError: + print("langchain is not installed") import cudf diff --git a/tests/conftest.py b/tests/conftest.py index 952142f249..6eafa0d80c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,6 +47,10 @@ from _utils.kafka import kafka_server # noqa: F401 pylint:disable=unused-import from _utils.kafka import zookeeper_proc # noqa: F401 pylint:disable=unused-import +OPT_DEP_SKIP_REASON = ( + "This test requires the {package} package to be installed, to install this run:\n" + "`conda env update --solver=libmamba -n morpheus --file conda/environments/examples_cuda-125_arch-x86_64.yaml`") + def pytest_addoption(parser: pytest.Parser): """ @@ -1064,33 +1068,53 @@ def nemollm_fixture(fail_missing: bool): """ Fixture to ensure nemollm is installed """ - skip_reason = ("Tests for the NeMoLLMService require the nemollm package to be installed, to install this run:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune`") - yield import_or_skip("nemollm", reason=skip_reason, fail_missing=fail_missing) + yield import_or_skip("nemollm", reason=OPT_DEP_SKIP_REASON.format(package="nemollm"), fail_missing=fail_missing) -@pytest.fixture(name="nvfoundationllm", scope='session') -def nvfoundationllm_fixture(fail_missing: bool): +@pytest.fixture(name="openai", scope='session') +def openai_fixture(fail_missing: bool): """ - Fixture to ensure nvfoundationllm is installed + Fixture to ensure openai is installed """ - skip_reason = ( - "Tests for NVFoundation require the langchain-nvidia-ai-endpoints package to be installed, to install this " - "run:\n `conda env update --solver=libmamba -n morpheus " - "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune`") - yield import_or_skip("langchain_nvidia_ai_endpoints", reason=skip_reason, fail_missing=fail_missing) + yield import_or_skip("openai", reason=OPT_DEP_SKIP_REASON.format(package="openai"), fail_missing=fail_missing) -@pytest.fixture(name="openai", scope='session') -def openai_fixture(fail_missing: bool): +@pytest.fixture(name="langchain", scope='session') +def langchain_fixture(fail_missing: bool): """ - Fixture to ensure openai is installed + Fixture to ensure langchain is installed + """ + yield import_or_skip("langchain", reason=OPT_DEP_SKIP_REASON.format(package="langchain"), fail_missing=fail_missing) + + +@pytest.fixture(name="langchain_core", scope='session') +def langchain_core_fixture(fail_missing: bool): + """ + Fixture to ensure langchain_core is installed + """ + yield import_or_skip("langchain_core", + reason=OPT_DEP_SKIP_REASON.format(package="langchain_core"), + fail_missing=fail_missing) + + +@pytest.fixture(name="langchain_community", scope='session') +def langchain_community_fixture(fail_missing: bool): + """ + Fixture to ensure langchain_community is installed + """ + yield import_or_skip("langchain_community", + reason=OPT_DEP_SKIP_REASON.format(package="langchain_community"), + fail_missing=fail_missing) + + +@pytest.fixture(name="langchain_nvidia_ai_endpoints", scope='session') +def langchain_nvidia_ai_endpoints_fixture(fail_missing: bool): + """ + Fixture to ensure langchain_nvidia_ai_endpoints is installed """ - skip_reason = ("Tests for the OpenAIChatService require the openai package to be installed, to install this run:\n" - "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/all_cuda-121_arch-x86_64.yaml --prune`") - yield import_or_skip("openai", reason=skip_reason, fail_missing=fail_missing) + yield import_or_skip("langchain_nvidia_ai_endpoints", + reason=OPT_DEP_SKIP_REASON.format(package="langchain_nvidia_ai_endpoints"), + fail_missing=fail_missing) @pytest.mark.usefixtures("openai") diff --git a/tests/dfencoder/test_autoencoder.py b/tests/dfencoder/test_autoencoder.py index 43a1b7574b..bd02907f92 100755 --- a/tests/dfencoder/test_autoencoder.py +++ b/tests/dfencoder/test_autoencoder.py @@ -503,4 +503,4 @@ def test_auto_encoder_num_only_convergence(train_ae: autoencoder.AutoEncoder): for loss in train_ae.logger.train_fts.values()], axis=0) / len(train_ae.logger.train_fts) # Make sure the model converges with numerical feats only - assert avg_loss[-1] < avg_loss[0] / 2 + assert avg_loss[-1] < avg_loss[0] * 0.75 diff --git a/tests/examples/digital_fingerprinting/conftest.py b/tests/examples/digital_fingerprinting/conftest.py index 25fb2b54c0..c441a24e6b 100644 --- a/tests/examples/digital_fingerprinting/conftest.py +++ b/tests/examples/digital_fingerprinting/conftest.py @@ -91,7 +91,8 @@ def dfp_prod_in_sys_path( @pytest.fixture(name="dfp_message_meta") def dfp_message_meta_fixture(config, dataset_pandas: DatasetManager): import pandas as pd - from dfp.messages.dfp_message_meta import DFPMessageMeta + + from morpheus_dfp.messages.dfp_message_meta import DFPMessageMeta user_id = 'test_user' df = dataset_pandas['filter_probs.csv'] diff --git a/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py b/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py index 6120867926..fd94312f97 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_file_batcher_stage.py @@ -48,7 +48,7 @@ def file_specs_fixture(test_data_dir: str): def test_constructor(config: Config): - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage def date_conversion_func(x): return x @@ -71,7 +71,7 @@ def date_conversion_func(x): def test_constructor_deprecated_args(config: Config): """Test that the deprecated sampling_rate_s arg is still supported""" - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage with pytest.deprecated_call(): stage = DFPFileBatcherStage(config, lambda x: x, sampling_rate_s=55) @@ -82,14 +82,14 @@ def test_constructor_deprecated_args(config: Config): def test_constructor_both_sample_args_error(config: Config): """Test that an error is raised if both sampling and sampling_rate_s are specified""" - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage with pytest.raises(AssertionError): DFPFileBatcherStage(config, lambda x: x, sampling=55, sampling_rate_s=20) def test_on_data(config: Config, date_conversion_func: typing.Callable, file_specs: typing.List[fsspec.core.OpenFile]): - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func) assert not stage.on_data([]) @@ -108,7 +108,7 @@ def test_on_data_two_batches(config: Config, file_specs: typing.List[fsspec.core.OpenFile], test_data_dir: str): # Test with a one-minute window which should split the data into two batches - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func, period='min') batches = stage.on_data(file_specs) assert len(batches) == 2 @@ -131,7 +131,7 @@ def test_on_data_start_time(config: Config, file_specs: typing.List[fsspec.core.OpenFile], test_data_dir: str): # Test with a start time that excludes some files - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func, period='min', @@ -151,7 +151,7 @@ def test_on_data_end_time(config: Config, file_specs: typing.List[fsspec.core.OpenFile], test_data_dir: str): # Test with a end time that excludes some files - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func, period='min', @@ -172,7 +172,7 @@ def test_on_data_start_time_end_time(config: Config, file_specs: typing.List[fsspec.core.OpenFile], test_data_dir: str): # Test with a start & end time that excludes some files - from dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage + from morpheus_dfp.stages.dfp_file_batcher_stage import DFPFileBatcherStage stage = DFPFileBatcherStage(config, date_conversion_func, period='min', diff --git a/tests/examples/digital_fingerprinting/test_dfp_file_to_df.py b/tests/examples/digital_fingerprinting/test_dfp_file_to_df.py index 19fa6add61..557818d8c7 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_file_to_df.py +++ b/tests/examples/digital_fingerprinting/test_dfp_file_to_df.py @@ -77,7 +77,7 @@ def test_single_object_to_dataframe_timeout(): @pytest.mark.usefixtures("restore_environ") def test_constructor(config: Config): - from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage + from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage # The user may have this already set, ensure it is undefined os.environ.pop('MORPHEUS_FILE_DOWNLOAD_TYPE', None) @@ -118,7 +118,7 @@ def test_get_or_create_dataframe_from_batch_cache_miss(mock_proc_df: mock.MagicM tmp_path: str, single_file_obj: fsspec.core.OpenFile, dataset_pandas: DatasetManager): - from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage + from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage config.ae.timestamp_column_name = 'v1' mock_dask_cluster.return_value = mock_dask_cluster mock_dask_client.return_value = mock_dask_client @@ -195,7 +195,7 @@ def test_get_or_create_dataframe_from_batch_cache_hit(mock_obf_to_df: mock.Magic use_convert_to_dataframe: bool, tmp_path: str, dataset_pandas: DatasetManager): - from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage + from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage config.ae.timestamp_column_name = 'v1' mock_dask_cluster.return_value = mock_dask_cluster mock_dask_client.return_value = mock_dask_client @@ -253,7 +253,7 @@ def test_get_or_create_dataframe_from_batch_none_noop(mock_obf_to_df: mock.Magic dl_type: str, use_convert_to_dataframe: bool, tmp_path: str): - from dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage + from morpheus_dfp.stages.dfp_file_to_df import DFPFileToDataFrameStage mock_dask_cluster.return_value = mock_dask_cluster mock_dask_client.return_value = mock_dask_client diff --git a/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py b/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py index 1175d0a61e..722fadb993 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_inference_stage.py @@ -30,20 +30,20 @@ @pytest.fixture(name="mock_mlflow_client", autouse=True) def mock_mlflow_client_fixture(): - with mock.patch("dfp.stages.dfp_inference_stage.MlflowClient") as mock_mlflow_client: + with mock.patch("morpheus_dfp.stages.dfp_inference_stage.MlflowClient") as mock_mlflow_client: mock_mlflow_client.return_value = mock_mlflow_client yield mock_mlflow_client @pytest.fixture(name="mock_model_manager", autouse=True) def mock_model_manager_fixture(): - with mock.patch("dfp.stages.dfp_inference_stage.ModelManager") as mock_model_manager: + with mock.patch("morpheus_dfp.stages.dfp_inference_stage.ModelManager") as mock_model_manager: mock_model_manager.return_value = mock_model_manager yield mock_model_manager def test_constructor(config: Config, mock_mlflow_client: mock.MagicMock, mock_model_manager: mock.MagicMock): - from dfp.stages.dfp_inference_stage import DFPInferenceStage + from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage stage = DFPInferenceStage(config, model_name_formatter="test_model_name-{user_id}-{user_md5}") @@ -58,7 +58,7 @@ def test_constructor(config: Config, mock_mlflow_client: mock.MagicMock, mock_mo def test_get_model(config: Config, mock_mlflow_client: mock.MagicMock, mock_model_manager: mock.MagicMock): - from dfp.stages.dfp_inference_stage import DFPInferenceStage + from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage mock_model_cache = mock.MagicMock() mock_model_manager.load_user_model.return_value = mock_model_cache @@ -80,7 +80,7 @@ def test_on_data( control_message: "ControlMessage", # noqa: F821 log_level: int, dataset_pandas: DatasetManager): - from dfp.stages.dfp_inference_stage import DFPInferenceStage + from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage set_log_level(log_level) @@ -116,7 +116,7 @@ def test_on_data_get_model_error( mock_model_manager: mock.MagicMock, control_message: "ControlMessage", # noqa: F821 raise_error: bool): - from dfp.stages.dfp_inference_stage import DFPInferenceStage + from morpheus_dfp.stages.dfp_inference_stage import DFPInferenceStage # There are two error conditions that can occur in get_model can return None or raise an error if raise_error: diff --git a/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py b/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py index 49fc093ba4..b39e05a03d 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py +++ b/tests/examples/digital_fingerprinting/test_dfp_mlflow_model_writer.py @@ -110,7 +110,7 @@ def mock_mlflow(): def test_constructor(config: Config): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage stage = DFPMLFlowModelWriterStage(config, model_name_formatter="test_model_name-{user_id}-{user_md5}", @@ -131,7 +131,7 @@ def test_constructor(config: Config): ("test_model_name-{user_id}-{user_md5}", 'test_城安宮川', "test_model_name-test_城安宮川-c9acc3dec97777c8b6fd8ae70a744ea8") ]) def test_user_id_to_model(config: Config, model_name_formatter: str, user_id: str, expected_val: str): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage stage = DFPMLFlowModelWriterStage(config, model_name_formatter=model_name_formatter) assert stage._controller.user_id_to_model(user_id) == expected_val @@ -149,7 +149,7 @@ def test_user_id_to_model(config: Config, model_name_formatter: str, user_id: st 'test_城安宮川', "/test/expr/dfp-test_城安宮川-test_城安宮川-c9acc3dec97777c8b6fd8ae70a744ea8")]) def test_user_id_to_experiment(config: Config, experiment_name_formatter: str, user_id: str, expected_val: str): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage stage = DFPMLFlowModelWriterStage(config, model_name_formatter="dfp-{user_id}", @@ -178,7 +178,7 @@ def verify_apply_model_permissions(mock_requests: MockedRequests, def test_apply_model_permissions(config: Config, databricks_env: dict, mock_requests: MockedRequests): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage databricks_permissions = OrderedDict([('group1', 'CAN_READ'), ('group2', 'CAN_WRITE')]) stage = DFPMLFlowModelWriterStage(config, databricks_permissions=databricks_permissions, timeout=10) stage._controller._apply_model_permissions("test_experiment") @@ -206,7 +206,7 @@ def test_apply_model_permissions_no_perms_error(config: Config, else: os.environ.pop("DATABRICKS_TOKEN", None) - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage stage = DFPMLFlowModelWriterStage(config) with pytest.raises(RuntimeError): stage._controller._apply_model_permissions("test_experiment") @@ -217,7 +217,7 @@ def test_apply_model_permissions_no_perms_error(config: Config, @pytest.mark.usefixtures("databricks_env") def test_apply_model_permissions_requests_error(config: Config, mock_requests: MockedRequests): - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage mock_requests.get.side_effect = RuntimeError("test error") stage = DFPMLFlowModelWriterStage(config, timeout=10) @@ -238,9 +238,9 @@ def test_on_data( databricks_env: dict, databricks_permissions: dict, tracking_uri: str): - from dfp.messages.dfp_message_meta import DFPMessageMeta - from dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage - from dfp.stages.dfp_mlflow_model_writer import conda_env + from morpheus_dfp.messages.dfp_message_meta import DFPMessageMeta + from morpheus_dfp.stages.dfp_mlflow_model_writer import DFPMLFlowModelWriterStage + from morpheus_dfp.stages.dfp_mlflow_model_writer import conda_env should_apply_permissions = (databricks_permissions is not None and tracking_uri == "databricks") diff --git a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py index b173c145dc..31176221a9 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_postprocessing_stage.py @@ -27,7 +27,7 @@ def test_constructor(config: Config): - from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage + from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage stage = DFPPostprocessingStage(config) assert isinstance(stage, SinglePortStage) assert stage._needed_columns['event_time'] == TypeId.STRING @@ -36,13 +36,13 @@ def test_constructor(config: Config): @pytest.mark.usefixtures("reset_loglevel") @pytest.mark.parametrize('use_on_data', [True, False]) @pytest.mark.parametrize('log_level', [logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]) -@mock.patch('dfp.stages.dfp_postprocessing_stage.datetime') +@mock.patch('morpheus_dfp.stages.dfp_postprocessing_stage.datetime') def test_process_events_on_data(mock_datetime: mock.MagicMock, config: Config, control_message: ControlMessage, use_on_data: bool, log_level: int): - from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage + from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage mock_dt_obj = mock.MagicMock() mock_dt_obj.strftime.return_value = '2021-01-01T00:00:00Z' @@ -68,7 +68,7 @@ def test_process_events_on_data(mock_datetime: mock.MagicMock, def test_on_data_none(config: Config): - from dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage + from morpheus_dfp.stages.dfp_postprocessing_stage import DFPPostprocessingStage stage = DFPPostprocessingStage(config) assert stage.on_data(None) is None mock_payload = mock.MagicMock() diff --git a/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py b/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py index 538e20425e..49a2feea90 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_preprocessing_stage.py @@ -28,7 +28,7 @@ def test_constructor(config: Config): - from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage + from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage schema = DataFrameInputSchema() stage = DFPPreprocessingStage(config, input_schema=schema) @@ -43,7 +43,7 @@ def test_process_features( control_message: "ControlMessage", # noqa: F821 dataset_pandas: DatasetManager, log_level: int): - from dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage + from morpheus_dfp.stages.dfp_preprocessing_stage import DFPPreprocessingStage set_log_level(log_level) diff --git a/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py b/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py index 01504d7d47..b8f7e8cd18 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_rolling_window_stage.py @@ -41,7 +41,7 @@ def build_mock_user_cache(user_id: str = 'test_user', def test_constructor(config: Config): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') assert isinstance(stage, SinglePortStage) @@ -53,7 +53,7 @@ def test_constructor(config: Config): def test_get_user_cache_hit(config: Config): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -65,8 +65,8 @@ def test_get_user_cache_hit(config: Config): def test_get_user_cache_miss(config: Config): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage - from dfp.utils.cached_user_window import CachedUserWindow + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.utils.cached_user_window import CachedUserWindow config.ae.timestamp_column_name = 'test_timestamp_col' stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -85,7 +85,7 @@ def test_build_window_no_new( config: Config, dfp_message_meta: "DFPMessageMeta" # noqa: F821 ): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -99,7 +99,7 @@ def test_build_window_not_enough_data( config: Config, dfp_message_meta: "DFPMessageMeta" # noqa: F821 ): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -112,7 +112,7 @@ def test_build_window_min_increment( config: Config, dfp_message_meta: "DFPMessageMeta" # noqa: F821 ): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -125,7 +125,7 @@ def test_build_window_invalid( config: Config, dfp_message_meta: "DFPMessageMeta" # noqa: F821 ): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -144,7 +144,7 @@ def test_build_window_overlap( config: Config, dfp_message_meta: "DFPMessageMeta" # noqa: F821 ): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') @@ -165,9 +165,8 @@ def test_build_window( use_on_data: bool, dfp_message_meta: "DFPMessageMeta", # noqa: F821 dataset_pandas: DatasetManager): - from dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage - from morpheus.messages import ControlMessage + from morpheus_dfp.stages.dfp_rolling_window_stage import DFPRollingWindowStage stage = DFPRollingWindowStage(config, min_history=5, min_increment=7, max_history=100, cache_dir='/test/path/cache') diff --git a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py index 8189df73fe..8563fd7f9a 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py +++ b/tests/examples/digital_fingerprinting/test_dfp_split_users_stage.py @@ -27,7 +27,7 @@ def test_constructor(config: Config): - from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage + from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage stage = DFPSplitUsersStage(config, include_generic=False, include_individual=True) assert isinstance(stage, SinglePortStage) @@ -64,7 +64,7 @@ def test_extract_users(config: Config, include_individual: bool, skip_users: typing.List[str], only_users: typing.List[str]): - from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage + from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage config.ae.userid_column_name = "From" config.ae.fallback_username = "testy_testerson" @@ -120,7 +120,7 @@ def test_extract_users(config: Config, def test_extract_users_none_to_empty(config: Config): - from dfp.stages.dfp_split_users_stage import DFPSplitUsersStage + from morpheus_dfp.stages.dfp_split_users_stage import DFPSplitUsersStage stage = DFPSplitUsersStage(config, include_generic=True, include_individual=True) assert not stage.extract_users(None) diff --git a/tests/examples/digital_fingerprinting/test_dfp_training.py b/tests/examples/digital_fingerprinting/test_dfp_training.py index 60cd545eab..e4683c1ea2 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_training.py +++ b/tests/examples/digital_fingerprinting/test_dfp_training.py @@ -25,7 +25,7 @@ def test_constructor(config: Config): - from dfp.stages.dfp_training import DFPTraining + from morpheus_dfp.stages.dfp_training import DFPTraining stage = DFPTraining(config, model_kwargs={'test': 'this'}, epochs=40, validation_size=0.5) assert isinstance(stage, SinglePortStage) @@ -36,24 +36,23 @@ def test_constructor(config: Config): @pytest.mark.parametrize('validation_size', [-1, -0.2, 1, 5]) def test_constructor_bad_validation_size(config: Config, validation_size: float): - from dfp.stages.dfp_training import DFPTraining + from morpheus_dfp.stages.dfp_training import DFPTraining with pytest.raises(ValueError): DFPTraining(config, validation_size=validation_size) @pytest.mark.parametrize('validation_size', [0., 0.2]) -@mock.patch('dfp.stages.dfp_training.AutoEncoder') -@mock.patch('dfp.stages.dfp_training.train_test_split') +@mock.patch('morpheus_dfp.stages.dfp_training.AutoEncoder') +@mock.patch('morpheus_dfp.stages.dfp_training.train_test_split') def test_on_data(mock_train_test_split: mock.MagicMock, mock_ae: mock.MagicMock, config: Config, dataset_pandas: DatasetManager, validation_size: float): - from dfp.messages.dfp_message_meta import DFPMessageMeta - from dfp.stages.dfp_training import DFPTraining - from morpheus.messages import ControlMessage + from morpheus_dfp.messages.dfp_message_meta import DFPMessageMeta + from morpheus_dfp.stages.dfp_training import DFPTraining mock_ae.return_value = mock_ae diff --git a/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py b/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py index 571f976712..ea15b2caee 100644 --- a/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py +++ b/tests/examples/digital_fingerprinting/test_dfp_viz_postproc.py @@ -54,7 +54,7 @@ def expected_df_fixture(config: Config, control_message: "ControlMessage"): # n def test_constructor(config: Config): - from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage + from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage stage = DFPVizPostprocStage(config, period='M', output_dir='/fake/test/dir', output_prefix='test_prefix') assert isinstance(stage, SinglePortStage) @@ -72,7 +72,7 @@ def test_postprocess( control_message: "ControlMessage", # noqa: F821 expected_df: pd.DataFrame, dataset_pandas: DatasetManager): - from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage + from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage # _postprocess doesn't write to disk, so the fake output_dir, shouldn't be an issue stage = DFPVizPostprocStage(config, period='min', output_dir='/fake/test/dir', output_prefix='test_prefix') @@ -88,7 +88,7 @@ def test_write_to_files( control_message: "ControlMessage", # noqa: F821 expected_df: pd.DataFrame, dataset_pandas: DatasetManager): - from dfp.stages.dfp_viz_postproc import DFPVizPostprocStage + from morpheus_dfp.stages.dfp_viz_postproc import DFPVizPostprocStage stage = DFPVizPostprocStage(config, period='min', output_dir=tmp_path, output_prefix='test_prefix_') assert stage._write_to_files(control_message) is control_message diff --git a/tests/examples/digital_fingerprinting/test_multifile_source.py b/tests/examples/digital_fingerprinting/test_multifile_source.py index c430e10c12..e0f982ce60 100644 --- a/tests/examples/digital_fingerprinting/test_multifile_source.py +++ b/tests/examples/digital_fingerprinting/test_multifile_source.py @@ -25,7 +25,7 @@ def test_constructor(config: Config): - from dfp.stages.multi_file_source import MultiFileSource + from morpheus_dfp.stages.multi_file_source import MultiFileSource batch_size = 1234 n_threads = 13 @@ -43,7 +43,7 @@ def test_constructor(config: Config): def test_generate_frames_fsspec(config: Config, tmp_path: str): - from dfp.stages.multi_file_source import MultiFileSource + from morpheus_dfp.stages.multi_file_source import MultiFileSource file_glob = os.path.join(TEST_DIRS.tests_data_dir, 'appshield', 'snapshot-1', '*.json') temp_glob = os.path.join(tmp_path, '*.json') # this won't match anything @@ -65,7 +65,7 @@ def test_generate_frames_fsspec(config: Config, tmp_path: str): @mock.patch('time.sleep') def test_polling_generate_frames_fsspec(amock_time: mock.MagicMock, config: Config, tmp_path: str): - from dfp.stages.multi_file_source import MultiFileSource + from morpheus_dfp.stages.multi_file_source import MultiFileSource file_glob = os.path.join(TEST_DIRS.tests_data_dir, 'appshield', 'snapshot-1', '*.json') temp_glob = os.path.join(tmp_path, '*.json') # this won't match anything @@ -88,7 +88,7 @@ def test_polling_generate_frames_fsspec(amock_time: mock.MagicMock, config: Conf def test_generate_frames_fsspec_no_files(config: Config, tmp_path: str): - from dfp.stages.multi_file_source import MultiFileSource + from morpheus_dfp.stages.multi_file_source import MultiFileSource assert os.listdir(tmp_path) == [] diff --git a/tests/examples/digital_fingerprinting/test_write_to_s3_stage.py b/tests/examples/digital_fingerprinting/test_write_to_s3_stage.py index 6c951ff175..beab5e3f2a 100644 --- a/tests/examples/digital_fingerprinting/test_write_to_s3_stage.py +++ b/tests/examples/digital_fingerprinting/test_write_to_s3_stage.py @@ -20,7 +20,7 @@ def test_constructor(config: Config): - from dfp.stages.write_to_s3_stage import WriteToS3Stage + from morpheus_dfp.stages.write_to_s3_stage import WriteToS3Stage mock_s3_writer = mock.MagicMock() stage = WriteToS3Stage(config, s3_writer=mock_s3_writer) diff --git a/tests/examples/digital_fingerprinting/utils/test_config_generator.py b/tests/examples/digital_fingerprinting/utils/test_config_generator.py index 40d4f37b67..eb4c850f94 100644 --- a/tests/examples/digital_fingerprinting/utils/test_config_generator.py +++ b/tests/examples/digital_fingerprinting/utils/test_config_generator.py @@ -23,7 +23,7 @@ @pytest.fixture(name="dfp_arg_parser") def dfp_arg_parser_fixture(): - from dfp.utils.dfp_arg_parser import DFPArgParser + from morpheus_dfp.utils.dfp_arg_parser import DFPArgParser dfp_arg_parser = DFPArgParser(skip_user=["unittest-skip-user"], only_user=["unittest-only-user"], start_time=datetime(1993, 4, 5, 6, 7, 8), @@ -43,13 +43,13 @@ def dfp_arg_parser_fixture(): @pytest.fixture(name="schema") def schema_fixture(config: Config): - from dfp.utils.schema_utils import SchemaBuilder + from morpheus_dfp.utils.schema_utils import SchemaBuilder schema_builder = SchemaBuilder(config, "duo") yield schema_builder.build_schema() def test_constructor(config: Config, dfp_arg_parser: "DFPArgParser", schema: "Schema"): # noqa: F821 - from dfp.utils.config_generator import ConfigGenerator + from morpheus_dfp.utils.config_generator import ConfigGenerator config_generator = ConfigGenerator(config=config, dfp_arg_parser=dfp_arg_parser, schema=schema, encoding="latin1") diff --git a/tests/examples/llm/common/conftest.py b/tests/examples/llm/common/conftest.py index 259b535304..591ed21cba 100644 --- a/tests/examples/llm/common/conftest.py +++ b/tests/examples/llm/common/conftest.py @@ -60,5 +60,5 @@ def langchain_fixture(fail_missing: bool): skip_reason = ("Tests for the WebScraperStage require the langchain package to be installed, to install this run:\n" "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`") + "--file conda/environments/dev_cuda-125_arch-x86_64.yaml --prune`") yield import_or_skip("langchain", reason=skip_reason, fail_missing=fail_missing) diff --git a/tests/llm/conftest.py b/tests/llm/conftest.py index 3519166635..94658863c5 100644 --- a/tests/llm/conftest.py +++ b/tests/llm/conftest.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import types from unittest import mock import pytest @@ -20,6 +21,54 @@ from _utils import require_env_variable +@pytest.fixture(name="nemollm", scope='session', autouse=True) +def nemollm_fixture(nemollm: types.ModuleType): + """ + Fixture to ensure nemollm is installed + """ + yield nemollm + + +@pytest.fixture(name="openai", scope='session', autouse=True) +def openai_fixture(openai: types.ModuleType): + """ + Fixture to ensure openai is installed + """ + yield openai + + +@pytest.fixture(name="langchain", scope='session', autouse=True) +def langchain_fixture(langchain: types.ModuleType): + """ + Fixture to ensure langchain is installed + """ + yield langchain + + +@pytest.fixture(name="langchain_core", scope='session', autouse=True) +def langchain_core_fixture(langchain_core: types.ModuleType): + """ + Fixture to ensure langchain_core is installed + """ + yield langchain_core + + +@pytest.fixture(name="langchain_community", scope='session', autouse=True) +def langchain_community_fixture(langchain_community: types.ModuleType): + """ + Fixture to ensure langchain_community is installed + """ + yield langchain_community + + +@pytest.fixture(name="langchain_nvidia_ai_endpoints", scope='session', autouse=True) +def langchain_nvidia_ai_endpoints_fixture(langchain_nvidia_ai_endpoints: types.ModuleType): + """ + Fixture to ensure langchain_nvidia_ai_endpoints is installed + """ + yield langchain_nvidia_ai_endpoints + + @pytest.fixture(name="countries") def countries_fixture(): yield [ diff --git a/tests/llm/nodes/test_langchain_agent_node.py b/tests/llm/nodes/test_langchain_agent_node.py index 033e978402..0779b11604 100644 --- a/tests/llm/nodes/test_langchain_agent_node.py +++ b/tests/llm/nodes/test_langchain_agent_node.py @@ -19,14 +19,6 @@ from unittest import mock import pytest -from langchain.agents import AgentType -from langchain.agents import Tool -from langchain.agents import initialize_agent -from langchain.callbacks.manager import AsyncCallbackManagerForToolRun -from langchain.callbacks.manager import CallbackManagerForToolRun -from langchain_community.chat_models.openai import ChatOpenAI -from langchain_core.exceptions import OutputParserException -from langchain_core.tools import BaseTool from _utils.llm import execute_node from _utils.llm import mk_mock_langchain_tool @@ -34,6 +26,26 @@ from morpheus_llm.llm import LLMNodeBase from morpheus_llm.llm.nodes.langchain_agent_node import LangChainAgentNode +try: + from langchain.agents import AgentType + from langchain.agents import Tool + from langchain.agents import initialize_agent + from langchain.callbacks.manager import AsyncCallbackManagerForToolRun + from langchain.callbacks.manager import CallbackManagerForToolRun + from langchain_community.chat_models.openai import ChatOpenAI + from langchain_core.tools import BaseTool +except ImportError: + pass + + +class OutputParserExceptionStandin(Exception): + """ + Stand-in for the OutputParserException class to avoid importing the actual class from the langchain_core.exceptions. + There is a need to have OutputParserException objects appear in test parameters, but we don't want to import + langchain_core at the top of the test as it is an optional dependency. + """ + pass + def test_constructor(mock_agent_executor: mock.MagicMock): node = LangChainAgentNode(agent_executor=mock_agent_executor) @@ -156,32 +168,6 @@ def test_execute_error(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMoc assert isinstance(execute_node(node, input="input1"), RuntimeError) -class MetadataSaverTool(BaseTool): - # The base class defines *args and **kwargs in the signature for _run and _arun requiring the arguments-differ - # pylint: disable=arguments-differ - name: str = "MetadataSaverTool" - description: str = "useful for when you need to know the name of a reptile" - - saved_metadata: list[dict] = [] - - def _run( - self, - query: str, - run_manager: typing.Optional[CallbackManagerForToolRun] = None, - ) -> str: - raise NotImplementedError("This tool only supports async") - - async def _arun( - self, - query: str, - run_manager: typing.Optional[AsyncCallbackManagerForToolRun] = None, - ) -> str: - assert query is not None # avoiding unused-argument - assert run_manager is not None - self.saved_metadata.append(run_manager.metadata.copy()) - return "frog" - - @pytest.mark.parametrize("metadata", [{ "morpheus": "unittest" @@ -192,6 +178,32 @@ async def _arun( }], ids=["single-metadata", "single-metadata-list", "multiple-metadata-list"]) def test_metadata(mock_chat_completion: tuple[mock.MagicMock, mock.MagicMock], metadata: dict): + + class MetadataSaverTool(BaseTool): + # The base class defines *args and **kwargs in the signature for _run and _arun requiring the arguments-differ + # pylint: disable=arguments-differ + name: str = "MetadataSaverTool" + description: str = "useful for when you need to know the name of a reptile" + + saved_metadata: list[dict] = [] + + def _run( + self, + query: str, + run_manager: typing.Optional[CallbackManagerForToolRun] = None, + ) -> str: + raise NotImplementedError("This tool only supports async") + + async def _arun( + self, + query: str, + run_manager: typing.Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + assert query is not None # avoiding unused-argument + assert run_manager is not None + self.saved_metadata.append(run_manager.metadata.copy()) + return "frog" + if isinstance(metadata['morpheus'], list): num_meta = len(metadata['morpheus']) input_data = [f"input_{i}" for i in range(num_meta)] @@ -271,7 +283,7 @@ def mock_llm_chat(*_, messages, **__): "arun_return,replace_value,expected_output", [ ( - [[OutputParserException("Parsing Error"), "A valid result."]], + [[OutputParserExceptionStandin("Parsing Error"), "A valid result."]], "Default error message.", [["Default error message.", "A valid result."]], ), @@ -282,7 +294,7 @@ def mock_llm_chat(*_, messages, **__): ), ( [ - ["A valid result.", OutputParserException("Parsing Error")], + ["A valid result.", OutputParserExceptionStandin("Parsing Error")], [Exception("General error"), "Another valid result."], ], None, @@ -297,6 +309,22 @@ def test_execute_replaces_exceptions( replace_value: str, expected_output: list, ): + # We couldn't import OutputParserException at the module level, so we need to replace instances of + # OutputParserExceptionStandin with OutputParserException + from langchain_core.exceptions import OutputParserException + + arun_return_tmp = [] + for values in arun_return: + values_tmp = [] + for value in values: + if isinstance(value, OutputParserExceptionStandin): + values_tmp.append(OutputParserException(*value.args)) + else: + values_tmp.append(value) + arun_return_tmp.append(values_tmp) + + arun_return = arun_return_tmp + placeholder_input_values = {"foo": "bar"} # a non-empty placeholder input for the context mock_agent_executor.arun.return_value = arun_return diff --git a/tests/llm/services/conftest.py b/tests/llm/services/conftest.py index a802c6ec84..88f30e76ba 100644 --- a/tests/llm/services/conftest.py +++ b/tests/llm/services/conftest.py @@ -36,12 +36,12 @@ def openai_fixture(openai): yield openai -@pytest.fixture(name="nvfoundationllm", autouse=True, scope='session') -def nvfoundationllm_fixture(nvfoundationllm): +@pytest.fixture(name="langchain_nvidia_ai_endpoints", autouse=True, scope='session') +def langchain_nvidia_ai_endpoints_fixture(langchain_nvidia_ai_endpoints): """ - All of the tests in this subdir require nvfoundationllm + All of the tests in this subdir require langchain_nvidia_ai_endpoints """ - yield nvfoundationllm + yield langchain_nvidia_ai_endpoints @pytest.fixture(name="mock_chat_completion", autouse=True) diff --git a/tests/llm/services/test_nvfoundation_llm_service.py b/tests/llm/services/test_nvfoundation_llm_service.py index f139ddacde..35a6a66f2b 100644 --- a/tests/llm/services/test_nvfoundation_llm_service.py +++ b/tests/llm/services/test_nvfoundation_llm_service.py @@ -17,13 +17,17 @@ from unittest import mock import pytest -from langchain_core.messages import ChatMessage -from langchain_core.outputs import ChatGeneration -from langchain_core.outputs import LLMResult from morpheus_llm.llm.services.nvfoundation_llm_service import NVFoundationLLMClient from morpheus_llm.llm.services.nvfoundation_llm_service import NVFoundationLLMService +try: + from langchain_core.messages import ChatMessage + from langchain_core.outputs import ChatGeneration + from langchain_core.outputs import LLMResult +except ImportError: + pass + @pytest.fixture(name="set_default_nvidia_api_key", autouse=True, scope="function") def set_default_nvidia_api_key_fixture(): @@ -34,7 +38,7 @@ def set_default_nvidia_api_key_fixture(): @pytest.mark.parametrize("api_key", ["nvapi-12345", None]) @pytest.mark.parametrize("base_url", ["http://test.nvidia.com/v1", None]) -def test_constructor(api_key: str, base_url: bool): +def test_constructor(api_key: str | None, base_url: bool | None): service = NVFoundationLLMService(api_key=api_key, base_url=base_url) diff --git a/tests/llm/test_agents_simple_pipe.py b/tests/llm/test_agents_simple_pipe.py index 61fa7f8d84..5d33dacb03 100644 --- a/tests/llm/test_agents_simple_pipe.py +++ b/tests/llm/test_agents_simple_pipe.py @@ -18,12 +18,6 @@ from unittest import mock import pytest -from langchain.agents import AgentType -from langchain.agents import initialize_agent -from langchain.agents import load_tools -from langchain.agents.tools import Tool -from langchain_community.llms import OpenAI # pylint: disable=no-name-in-module -from langchain_community.utilities import serpapi import cudf @@ -41,6 +35,18 @@ from morpheus_llm.llm.task_handlers.simple_task_handler import SimpleTaskHandler from morpheus_llm.stages.llm.llm_engine_stage import LLMEngineStage +try: + from langchain.agents import AgentType + from langchain.agents import initialize_agent + from langchain.agents import load_tools + from langchain.agents.tools import Tool + from langchain.schema import Generation + from langchain.schema import LLMResult + from langchain_community.llms import OpenAI # pylint: disable=no-name-in-module + from langchain_community.utilities import serpapi +except ImportError: + pass + @pytest.fixture(name="questions") def questions_fixture(): @@ -48,7 +54,6 @@ def questions_fixture(): def _build_agent_executor(model_name: str): - llm = OpenAI(model=model_name, temperature=0, cache=False) # Explicitly construct the serpapi tool, loading it via load_tools makes it too difficult to mock @@ -132,9 +137,6 @@ def test_agents_simple_pipe(mock_openai_agenerate: mock.AsyncMock, questions: list[str]): os.environ.update({'OPENAI_API_KEY': 'test_api_key', 'SERPAPI_API_KEY': 'test_api_key'}) - from langchain.schema import Generation - from langchain.schema import LLMResult - assert serpapi.SerpAPIWrapper().aresults is mock_serpapi_aresults model_name = "test_model" diff --git a/tests/stages/arxiv/conftest.py b/tests/stages/arxiv/conftest.py index 0865faada0..6166cb1250 100644 --- a/tests/stages/arxiv/conftest.py +++ b/tests/stages/arxiv/conftest.py @@ -23,7 +23,7 @@ SKIP_REASON = ("Tests for the arxiv_source require a number of packages not installed in the Morpheus development " "environment. To install these run:\n" "`conda env update --solver=libmamba -n morpheus " - "--file conda/environments/dev_cuda-121_arch-x86_64.yaml --prune`") + "--file conda/environments/dev_cuda-125_arch-x86_64.yaml --prune`") @pytest.fixture(name="arxiv", autouse=True, scope='session') diff --git a/tests/test_ip.py b/tests/test_ip.py index a8875939b1..5899c810ae 100644 --- a/tests/test_ip.py +++ b/tests/test_ip.py @@ -19,13 +19,13 @@ def test_ip_to_int(): input_df = cudf.Series(["5.79.97.178", "94.130.74.45"]) - expected = cudf.Series([89088434, 1585596973]) + expected = cudf.Series([89088434, 1585596973], dtype=cudf.api.types.dtype("uint32")) actual = ip.ip_to_int(input_df) assert actual.equals(expected) def test_int_to_ip(): - input_df = cudf.Series([89088434, 1585596973]) + input_df = cudf.Series([89088434, 1585596973], dtype=cudf.api.types.dtype("uint32")) expected = cudf.Series(["5.79.97.178", "94.130.74.45"]) actual = ip.int_to_ip(input_df) assert actual.equals(expected) diff --git a/tests/test_multi_processing_stage.py b/tests/test_multi_processing_stage.py new file mode 100644 index 0000000000..d83e9b8d3d --- /dev/null +++ b/tests/test_multi_processing_stage.py @@ -0,0 +1,232 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import typing +from functools import partial +from typing import Generator + +import pandas as pd +import pytest + +import cudf + +from _utils import assert_results +from _utils.dataset_manager import DatasetManager +from morpheus.config import Config +from morpheus.messages import ControlMessage +from morpheus.messages import MessageMeta +from morpheus.pipeline import LinearPipeline +from morpheus.pipeline.stage_decorator import stage +from morpheus.stages.general.multi_processing_stage import MultiProcessingBaseStage +from morpheus.stages.general.multi_processing_stage import MultiProcessingStage +from morpheus.stages.input.in_memory_data_generation_stage import InMemoryDataGenStage +from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage +from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage +from morpheus.stages.output.in_memory_sink_stage import InMemorySinkStage +from morpheus.stages.postprocess.serialize_stage import SerializeStage +from morpheus.stages.preprocess.deserialize_stage import DeserializeStage + + +def _create_df(count: int) -> pd.DataFrame: + return pd.DataFrame({"a": range(count)}, {"b": range(count)}) + + +def _process_df(df: pd.DataFrame, column: str, value: str) -> pd.DataFrame: + df[column] = value + return df + + +def test_create_stage_type_deduction(config: Config, dataset_pandas: DatasetManager): + + # Test create() with normal function + mp_stage = MultiProcessingStage.create(c=config, + unique_name="multi-processing-stage-1", + process_fn=_create_df, + process_pool_usage=0.1) + assert mp_stage.name == "multi-processing-stage-1" + input_t, output_t = typing.get_args(mp_stage.__orig_class__) # pylint: disable=no-member + assert input_t == int + assert output_t == pd.DataFrame + + # Test create() with partial function with 1 unbound argument + df = dataset_pandas["csv_sample.csv"] + partial_fn = partial(_process_df, df=df, value="new_value") + + mp_stage = MultiProcessingStage.create(c=config, + unique_name="multi-processing-stage-2", + process_fn=partial_fn, + process_pool_usage=0.1) + + assert mp_stage.name == "multi-processing-stage-2" + input_t, output_t = typing.get_args(mp_stage.__orig_class__) # pylint: disable=no-member + assert mp_stage.accepted_types() == (str, ) + assert input_t == str + assert output_t == pd.DataFrame + + # Invalid case: create() with partial function with 0 unbound argument + invalid_partial_fn = partial(_process_df, df=df, column="new_column", value="new_value") + with pytest.raises(ValueError): + MultiProcessingStage.create(c=config, + unique_name="multi-processing-stage-3", + process_fn=invalid_partial_fn, + process_pool_usage=0.1) + + # Invalid case: create() with function with more than 1 arguments + invalid_partial_fn = partial(_process_df, df=df) + with pytest.raises(ValueError): + MultiProcessingStage.create(c=config, + unique_name="multi-processing-stage-4", + process_fn=invalid_partial_fn, + process_pool_usage=0.1) + + +class DerivedMultiProcessingStage(MultiProcessingBaseStage[ControlMessage, ControlMessage]): + + def __init__(self, + *, + c: Config, + process_pool_usage: float, + add_column_name: str, + max_in_flight_messages: int = None): + super().__init__(c=c, process_pool_usage=process_pool_usage, max_in_flight_messages=max_in_flight_messages) + + self._add_column_name = add_column_name + self._shared_process_pool.set_usage(self.name, self._process_pool_usage) + + @property + def name(self) -> str: + return "derived-multi-processing-stage" + + def _on_data(self, data: ControlMessage) -> ControlMessage: + + input_df = data.payload().copy_dataframe() + pdf = input_df.to_pandas() + partial_process_fn = partial(_process_df, column=self._add_column_name, value="Hello") + + task = self._shared_process_pool.submit_task(self.name, partial_process_fn, pdf) + + df = cudf.DataFrame.from_pandas(task.result()) + meta = MessageMeta(df) + data.payload(meta) + + return data + + +def test_derived_stage_type_deduction(config: Config): + + mp_stage = DerivedMultiProcessingStage(c=config, process_pool_usage=0.1, add_column_name="new_column") + assert mp_stage.name == "derived-multi-processing-stage" + assert mp_stage.accepted_types() == (ControlMessage, ) + + input_t, output_t = typing.get_args(mp_stage.__orig_bases__[0]) # pylint: disable=no-member + assert input_t == ControlMessage + assert output_t == ControlMessage + + +def pandas_dataframe_generator(dataset_pandas: DatasetManager, count: int) -> Generator[pd.DataFrame, None, None]: + + df = dataset_pandas["csv_sample.csv"] + for _ in range(count): + yield df + + +def test_created_stage_pipe(config: Config, dataset_pandas: DatasetManager): + + config.num_threads = os.cpu_count() + + input_df = dataset_pandas["csv_sample.csv"] + + expected_df = input_df.copy() + expected_df["new_column"] = "Hello" + + df_count = 100 + df_generator = partial(pandas_dataframe_generator, dataset_pandas, df_count) + + partial_fn = partial(_process_df, column="new_column", value="Hello") + + pipe = LinearPipeline(config) + pipe.set_source(InMemoryDataGenStage(config, df_generator, output_data_type=pd.DataFrame)) + pipe.add_stage(MultiProcessingStage[pd.DataFrame, pd.DataFrame].create(c=config, + unique_name="multi-processing-stage-5", + process_fn=partial_fn, + process_pool_usage=0.1)) + sink_stage = pipe.add_stage(InMemorySinkStage(config)) + + pipe.run() + + for df in sink_stage.get_messages(): + assert df.equals(expected_df) + + +def test_derived_stage_pipe(config: Config, dataset_pandas: DatasetManager): + + config.num_threads = os.cpu_count() + + input_df = dataset_pandas["csv_sample.csv"] + add_column_name = "new_column" + expected_df = input_df.copy() + expected_df[add_column_name] = "Hello" + + pipe = LinearPipeline(config) + pipe.set_source(InMemorySourceStage(config, [cudf.DataFrame(input_df)])) + pipe.add_stage(DeserializeStage(config, ensure_sliceable_index=True)) + pipe.add_stage(DerivedMultiProcessingStage(c=config, process_pool_usage=0.1, add_column_name=add_column_name)) + pipe.add_stage(SerializeStage(config)) + comp_stage = pipe.add_stage(CompareDataFrameStage(config, expected_df)) + + pipe.run() + + assert_results(comp_stage.get_results()) + + +def test_multiple_stages_pipe(config: Config, dataset_pandas: DatasetManager): + config.num_threads = os.cpu_count() + + input_df = dataset_pandas["csv_sample.csv"] + + expected_df = input_df.copy() + expected_df["new_column_1"] = "new_value" + expected_df["new_column_2"] = "Hello" + + df_count = 100 + df_generator = partial(pandas_dataframe_generator, dataset_pandas, df_count) + + partial_fn = partial(_process_df, column="new_column_1", value="new_value") + + @stage + def pdf_to_control_message_stage(pdf: pd.DataFrame) -> ControlMessage: + df = cudf.DataFrame.from_pandas(pdf) + meta = MessageMeta(df) + msg = ControlMessage() + msg.payload(meta) + + return msg + + pipe = LinearPipeline(config) + pipe.set_source(InMemoryDataGenStage(config, df_generator, output_data_type=pd.DataFrame)) + pipe.add_stage( + MultiProcessingStage.create(c=config, + unique_name="multi-processing-stage-6", + process_fn=partial_fn, + process_pool_usage=0.1)) + pipe.add_stage(pdf_to_control_message_stage(config)) + pipe.add_stage(DerivedMultiProcessingStage(c=config, process_pool_usage=0.1, add_column_name="new_column_2")) + pipe.add_stage(SerializeStage(config)) + comp_stage = pipe.add_stage(CompareDataFrameStage(config, expected_df)) + + pipe.run() + + assert_results(comp_stage.get_results()) diff --git a/tests/utils/test_shared_process_pool.py b/tests/utils/test_shared_process_pool.py new file mode 100644 index 0000000000..3ac3669c16 --- /dev/null +++ b/tests/utils/test_shared_process_pool.py @@ -0,0 +1,305 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import multiprocessing as mp +import os +import threading +from decimal import Decimal +from fractions import Fraction + +import pytest + +from morpheus.utils.shared_process_pool import PoolStatus +from morpheus.utils.shared_process_pool import SharedProcessPool + +logger = logging.getLogger(__name__) + +# This test has issues with joining processes when testing with pytest `-s` option. Run pytest without `-s` flag + + +@pytest.fixture(scope="session", autouse=True) +def setup_and_teardown(): + # Set lower CPU usage for unit test to avoid slowing down the test + os.environ["MORPHEUS_SHARED_PROCESS_POOL_CPU_USAGE"] = "0.1" + + pool = SharedProcessPool() + + # Since SharedProcessPool might be used in other tests, stop and reset the pool before the test starts + pool.stop() + pool.join() + pool.reset() + yield + + # Stop the pool after all tests are done + pool.stop() + pool.join() + os.environ.pop("MORPHEUS_SHARED_PROCESS_POOL_CPU_USAGE", None) + + +@pytest.fixture(name="shared_process_pool") +def shared_process_pool_fixture(): + + pool = SharedProcessPool() + pool.wait_until_ready() + yield pool + + # Stop and reset the pool after each test + pool.stop() + pool.join() + pool.reset() + + +def _add_task(x, y): + return x + y + + +def _blocked_until_signaled_task(q: mp.Queue): + return q.get() + + +def _function_raises_exception(): + raise RuntimeError("Exception is raised in the process.") + + +def _function_returns_unserializable_result(): + return threading.Lock() + + +def _arbitrary_function(*args, **kwargs): + return args, kwargs + + +def _check_pool_stage_settings(pool: SharedProcessPool, stage_name: str, usage: float): + + assert pool._stage_usage.get(stage_name) == usage + assert stage_name in pool._stage_semaphores + assert stage_name in pool._task_queues + + +def test_singleton(): + + pool_1 = SharedProcessPool() + pool_2 = SharedProcessPool() + + assert pool_1 is pool_2 + + +@pytest.mark.slow +def test_pool_status(shared_process_pool): + + pool = shared_process_pool + assert pool.status == PoolStatus.RUNNING + + pool.set_usage("test_stage", 0.5) + + pool.stop() + pool.join() + assert pool.status == PoolStatus.SHUTDOWN + + # After pool.start(), the pool should have the same status as before stopping + pool.start() + pool.wait_until_ready() + assert pool.status == PoolStatus.RUNNING + assert pool._total_usage == 0.5 + _check_pool_stage_settings(pool, "test_stage", 0.5) + + pool.stop() + pool.join() + assert pool.status == PoolStatus.SHUTDOWN + + # After pool.reset(), the pool should reset all the status + pool.reset() + pool.wait_until_ready() + assert pool.status == PoolStatus.RUNNING + assert pool._total_usage == 0.0 + assert not pool._stage_usage + assert not pool._stage_semaphores + assert not pool._task_queues + + +@pytest.mark.slow +@pytest.mark.parametrize( + "a, b, expected", + [ + (1, 2, 3), # Integers + (complex(1, 2), complex(3, 4), complex(4, 6)), # Complex numbers + (Decimal('1.1'), Decimal('2.2'), Decimal('3.3')), # Decimal numbers + (Fraction(1, 2), Fraction(1, 3), Fraction(5, 6)), # Fractions + ("Hello, ", "world!", "Hello, world!"), # Strings + ([1, 2, 3], [4, 5, 6], [1, 2, 3, 4, 5, 6]), # Lists + ((1, 2, 3), (4, 5, 6), (1, 2, 3, 4, 5, 6)), # Tuples + ]) +def test_submit_single_task(shared_process_pool, a, b, expected): + + pool = shared_process_pool + pool.set_usage("test_stage", 0.5) + + task = pool.submit_task("test_stage", _add_task, a, b) + assert task.result() == expected + + task = pool.submit_task("test_stage", _add_task, x=a, y=b) + assert task.result() == expected + + task = pool.submit_task("test_stage", _add_task, a, y=b) + assert task.result() == expected + + pool.stop() + + # After the pool is stopped, it should not accept any new tasks + with pytest.raises(RuntimeError): + pool.submit_task("test_stage", _add_task, 10, 20) + + +@pytest.mark.slow +def test_submit_task_with_invalid_stage(shared_process_pool): + + pool = shared_process_pool + + with pytest.raises(ValueError): + pool.submit_task("stage_does_not_exist", _add_task, 10, 20) + + +@pytest.mark.slow +def test_submit_task_raises_exception(shared_process_pool): + + pool = shared_process_pool + pool.set_usage("test_stage", 0.5) + + task = pool.submit_task("test_stage", _function_raises_exception) + with pytest.raises(RuntimeError): + task.result() + + +@pytest.mark.slow +def test_submit_task_with_unserializable_result(shared_process_pool): + + pool = shared_process_pool + pool.set_usage("test_stage", 0.5) + + task = pool.submit_task("test_stage", _function_returns_unserializable_result) + with pytest.raises(TypeError): + task.result() + + +@pytest.mark.slow +def test_submit_task_with_unserializable_arg(shared_process_pool): + + pool = shared_process_pool + pool.set_usage("test_stage", 0.5) + + # Unserializable arguments cannot be submitted to the pool + with pytest.raises(TypeError): + pool.submit_task("test_stage", _arbitrary_function, threading.Lock()) + + +@pytest.mark.slow +@pytest.mark.parametrize( + "a, b, expected", + [ + (1, 2, 3), # Integers + (complex(1, 2), complex(3, 4), complex(4, 6)), # Complex numbers + (Decimal('1.1'), Decimal('2.2'), Decimal('3.3')), # Decimal numbers + (Fraction(1, 2), Fraction(1, 3), Fraction(5, 6)), # Fractions + ("Hello, ", "world!", "Hello, world!"), # Strings + ([1, 2, 3], [4, 5, 6], [1, 2, 3, 4, 5, 6]), # Lists + ((1, 2, 3), (4, 5, 6), (1, 2, 3, 4, 5, 6)), # Tuples + ]) +def test_submit_multiple_tasks(shared_process_pool, a, b, expected): + + pool = shared_process_pool + pool.set_usage("test_stage", 0.5) + + num_tasks = 100 + tasks = [] + for _ in range(num_tasks): + tasks.append(pool.submit_task("test_stage", _add_task, a, b)) + + for future in tasks: + assert future.result() == expected + + +@pytest.mark.slow +def test_set_usage(shared_process_pool): + + pool = shared_process_pool + + pool.set_usage("test_stage_1", 0.5) + assert pool._total_usage == 0.5 + _check_pool_stage_settings(pool, "test_stage_1", 0.5) + + pool.set_usage("test_stage_2", 0.3) + assert pool._total_usage == 0.8 + _check_pool_stage_settings(pool, "test_stage_2", 0.3) + + # valid update to the usage of an existing stage + pool.set_usage("test_stage_1", 0.6) + assert pool._total_usage == 0.9 + _check_pool_stage_settings(pool, "test_stage_1", 0.6) + + # invalid update to the usage of an existing stage, exceeding the total usage limit + with pytest.raises(ValueError): + pool.set_usage("test_stage_1", 0.8) + + # adding a new stage usage, exceeding the total usage limit + with pytest.raises(ValueError): + pool.set_usage("test_stage_3", 0.2) + + with pytest.raises(ValueError): + pool.set_usage("test_stage_1", 1.1) + + with pytest.raises(ValueError): + pool.set_usage("test_stage_1", -0.1) + + # invalid settings should not change the pool status + _check_pool_stage_settings(pool, "test_stage_1", 0.6) + assert pool._total_usage == 0.9 + + +@pytest.mark.slow +def test_task_completion_with_early_stop(shared_process_pool): + + pool = shared_process_pool + pool.set_usage("test_stage_1", 0.1) + pool.set_usage("test_stage_2", 0.3) + pool.set_usage("test_stage_3", 0.5) + + manager = mp.Manager() + queue = manager.Queue() + + tasks = [] + + task_num = 10 + + for _ in range(task_num): + tasks.append(pool.submit_task("test_stage_1", _blocked_until_signaled_task, queue)) + tasks.append(pool.submit_task("test_stage_2", _blocked_until_signaled_task, queue)) + tasks.append(pool.submit_task("test_stage_3", _blocked_until_signaled_task, queue)) + + pool.stop() + + # No tasks have been completed since they have not been signaled yet + for task in tasks: + assert not task.done() + + for i in range(len(tasks)): + queue.put(i) + + pool.join() + + # all tasks should be completed before the pool is shutdown + assert len(tasks) == 3 * task_num + for task in tasks: + assert task.done()