From 90f0c4588394af439d6ae5d272cb28f8a3c1ef0c Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 27 Nov 2024 15:08:35 -0800 Subject: [PATCH 1/7] Increase time limit for conda builds to 90 minutes --- .github/workflows/ci_pipe.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_pipe.yml b/.github/workflows/ci_pipe.yml index 1b6391119d..269b111733 100644 --- a/.github/workflows/ci_pipe.yml +++ b/.github/workflows/ci_pipe.yml @@ -209,7 +209,7 @@ jobs: if: ${{ inputs.conda_run_build }} needs: [documentation, test] runs-on: linux-amd64-gpu-v100-latest-1 - timeout-minutes: 60 + timeout-minutes: 90 container: image: ${{ inputs.base_container }} options: --cap-add=sys_nice From 485547c9fb18e118033584b567c35923d4955049 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 16 Dec 2024 16:47:39 -0800 Subject: [PATCH 2/7] Fix documentation link --- docs/source/developer_guide/guides/2_real_world_phishing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md index 61ba218fd5..65bb6bd58b 100644 --- a/docs/source/developer_guide/guides/2_real_world_phishing.md +++ b/docs/source/developer_guide/guides/2_real_world_phishing.md @@ -413,7 +413,7 @@ else: pipeline.add_stage(RecipientFeaturesStage(config)) ``` -To tokenize the input data we will use Morpheus' `PreprocessNLPStage`. This stage uses the [cuDF subword tokenizer](https://docs.rapids.ai/api/cudf/stable/user_guide/api_docs/subword_tokenize/#subwordtokenizer) to transform strings into a tensor of numbers to be fed into the neural network model. Rather than split the string by characters or whitespaces, we split them into meaningful subwords based upon the occurrence of the subwords in a large training corpus. You can find more details here: [https://arxiv.org/abs/1810.04805v2](https://arxiv.org/abs/1810.04805v2). All we need to know for now is that the text will be converted to subword token ids based on the vocabulary file that we provide (`vocab_hash_file=vocab file`). +To tokenize the input data we will use Morpheus' `PreprocessNLPStage`. This stage uses the [cuDF subword tokenizer](https://docs.rapids.ai/api/cudf/legacy/user_guide/api_docs/subword_tokenize/#subwordtokenizer) to transform strings into a tensor of numbers to be fed into the neural network model. Rather than split the string by characters or whitespaces, we split them into meaningful subwords based upon the occurrence of the subwords in a large training corpus. You can find more details here: [https://arxiv.org/abs/1810.04805v2](https://arxiv.org/abs/1810.04805v2). All we need to know for now is that the text will be converted to subword token ids based on the vocabulary file that we provide (`vocab_hash_file=vocab file`). Let's go ahead and instantiate our `PreprocessNLPStage` and add it to the pipeline: From b35d86143ec9a0f5a9b44a0b9017f68bd5603ada Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 Dec 2024 08:29:31 -0800 Subject: [PATCH 3/7] Update stub --- python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi index f4acdbedb6..bece30f67e 100644 --- a/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi +++ b/python/morpheus/morpheus/_lib/cudf_helpers/__init__.pyi @@ -1,20 +1,24 @@ from __future__ import annotations import morpheus._lib.cudf_helpers import typing +from cudf.core.column.column import ColumnBase from cudf.core.buffer.exposure_tracked_buffer import ExposureTrackedBuffer from cudf.core.buffer.spillable_buffer import SpillableBuffer from cudf.core.dtypes import StructDtype import _cython_3_0_11 import cudf +import itertools import rmm __all__ = [ + "ColumnBase", "ExposureTrackedBuffer", "SpillableBuffer", "StructDtype", "as_buffer", "bitmask_allocation_size_bytes", "cudf", + "itertools", "rmm" ] From 9d5e6a3bf3447ffe4fba89de59c9397cf2b8b514 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 Dec 2024 08:37:05 -0800 Subject: [PATCH 4/7] Update test to skip when optional dependencies are not installed --- tests/conftest.py | 10 ++++++++++ tests/morpheus_llm/llm/conftest.py | 8 ++++++++ tests/morpheus_llm/llm/test_agents_simple_pipe.py | 2 +- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 093eada5c1..5babac6e1f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1125,6 +1125,16 @@ def langchain_community_fixture(fail_missing: bool): fail_missing=fail_missing) +@pytest.fixture(name="langchain_openai", scope='session') +def langchain_community_fixture(fail_missing: bool): + """ + Fixture to ensure langchain_openai is installed + """ + yield import_or_skip("langchain_openai", + reason=OPT_DEP_SKIP_REASON.format(package="langchain_openai"), + fail_missing=fail_missing) + + @pytest.fixture(name="langchain_nvidia_ai_endpoints", scope='session') def langchain_nvidia_ai_endpoints_fixture(fail_missing: bool): """ diff --git a/tests/morpheus_llm/llm/conftest.py b/tests/morpheus_llm/llm/conftest.py index 427b8b46c7..8d196720aa 100644 --- a/tests/morpheus_llm/llm/conftest.py +++ b/tests/morpheus_llm/llm/conftest.py @@ -61,6 +61,14 @@ def langchain_community_fixture(langchain_community: types.ModuleType): yield langchain_community +@pytest.fixture(name="langchain_openai", scope='session', autouse=True) +def langchain_community_fixture(langchain_openai: types.ModuleType): + """ + Fixture to ensure langchain_openai is installed + """ + yield langchain_openai + + @pytest.fixture(name="langchain_nvidia_ai_endpoints", scope='session', autouse=True) def langchain_nvidia_ai_endpoints_fixture(langchain_nvidia_ai_endpoints: types.ModuleType): """ diff --git a/tests/morpheus_llm/llm/test_agents_simple_pipe.py b/tests/morpheus_llm/llm/test_agents_simple_pipe.py index d219a9780e..8aa9e0d0e1 100644 --- a/tests/morpheus_llm/llm/test_agents_simple_pipe.py +++ b/tests/morpheus_llm/llm/test_agents_simple_pipe.py @@ -127,7 +127,7 @@ def test_agents_simple_pipe_integration_openai(config: Config, questions: list[s assert float(response_match.group(1)) >= 3.7 -@pytest.mark.usefixtures("openai", "restore_environ") +@pytest.mark.usefixtures("langchain_community", "langchain_openai", "openai", "restore_environ") @mock.patch("langchain_community.utilities.serpapi.SerpAPIWrapper.aresults") @mock.patch("langchain_openai.OpenAI._agenerate", autospec=True) # autospec is needed as langchain will inspect the function From f1ed8df44769ec31b64c1bd211541842e347acfc Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 Dec 2024 08:40:31 -0800 Subject: [PATCH 5/7] Fix copy/paste error --- tests/conftest.py | 2 +- tests/morpheus_llm/llm/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5babac6e1f..acf60bc7cf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1126,7 +1126,7 @@ def langchain_community_fixture(fail_missing: bool): @pytest.fixture(name="langchain_openai", scope='session') -def langchain_community_fixture(fail_missing: bool): +def langchain_openai_fixture(fail_missing: bool): """ Fixture to ensure langchain_openai is installed """ diff --git a/tests/morpheus_llm/llm/conftest.py b/tests/morpheus_llm/llm/conftest.py index 8d196720aa..b316c26498 100644 --- a/tests/morpheus_llm/llm/conftest.py +++ b/tests/morpheus_llm/llm/conftest.py @@ -62,7 +62,7 @@ def langchain_community_fixture(langchain_community: types.ModuleType): @pytest.fixture(name="langchain_openai", scope='session', autouse=True) -def langchain_community_fixture(langchain_openai: types.ModuleType): +def langchain_openai_fixture(langchain_openai: types.ModuleType): """ Fixture to ensure langchain_openai is installed """ From bd24d981e6ada8be549233a29ca2b733eb7751c5 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 Dec 2024 11:38:44 -0800 Subject: [PATCH 6/7] Skip if numexpr is not installed --- tests/conftest.py | 8 ++++++++ tests/morpheus_llm/llm/test_agents_simple_pipe.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index acf60bc7cf..63f1d6aba2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1155,6 +1155,14 @@ def databricks_fixture(fail_missing: bool): fail_missing=fail_missing) +@pytest.fixture(name="numexpr", scope='session') +def numexpr_fixture(fail_missing: bool): + """ + Fixture to ensure numexpr is installed + """ + yield import_or_skip("numexpr", reason=OPT_DEP_SKIP_REASON.format(package="numexpr"), fail_missing=fail_missing) + + @pytest.mark.usefixtures("openai") @pytest.fixture(name="mock_chat_completion") def mock_chat_completion_fixture(): diff --git a/tests/morpheus_llm/llm/test_agents_simple_pipe.py b/tests/morpheus_llm/llm/test_agents_simple_pipe.py index 8aa9e0d0e1..b6105f4be1 100644 --- a/tests/morpheus_llm/llm/test_agents_simple_pipe.py +++ b/tests/morpheus_llm/llm/test_agents_simple_pipe.py @@ -127,7 +127,7 @@ def test_agents_simple_pipe_integration_openai(config: Config, questions: list[s assert float(response_match.group(1)) >= 3.7 -@pytest.mark.usefixtures("langchain_community", "langchain_openai", "openai", "restore_environ") +@pytest.mark.usefixtures("langchain_community", "langchain_openai", "numexpr", "openai", "restore_environ") @mock.patch("langchain_community.utilities.serpapi.SerpAPIWrapper.aresults") @mock.patch("langchain_openai.OpenAI._agenerate", autospec=True) # autospec is needed as langchain will inspect the function From 5eaec88a0e147027e0e92c97832f2304ce60d42e Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 17 Dec 2024 12:29:55 -0800 Subject: [PATCH 7/7] Include openai in the pip requirements for morpheus_llm --- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- conda/environments/examples_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 3 ++- python/morpheus_llm/morpheus_llm/requirements_morpheus_llm.txt | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 513b2dd157..75aa6a0a93 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -80,7 +80,7 @@ dependencies: - numexpr - numpydoc=1.5 - onnx=1.15 -- openai=1.13 +- openai==1.13.* - papermill=2.4.0 - pip - pkg-config=0.29 diff --git a/conda/environments/examples_cuda-125_arch-x86_64.yaml b/conda/environments/examples_cuda-125_arch-x86_64.yaml index 646069c124..67b5e80b2e 100644 --- a/conda/environments/examples_cuda-125_arch-x86_64.yaml +++ b/conda/environments/examples_cuda-125_arch-x86_64.yaml @@ -38,7 +38,7 @@ dependencies: - numexpr - numpydoc=1.5 - onnx=1.15 -- openai=1.13 +- openai==1.13.* - papermill=2.4.0 - pip - pluggy=1.3 diff --git a/dependencies.yaml b/dependencies.yaml index 477633593d..bd81cdd1b3 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -436,6 +436,7 @@ dependencies: - &langchain-nvidia-ai-endpoints langchain-nvidia-ai-endpoints==0.0.11 - &langchain-openai langchain-openai==0.1.3 - milvus==2.3.5 # update to match pymilvus when available + - &openai openai==1.13.* - pymilvus==2.3.6 - &nemollm nemollm==0.3.5 @@ -494,7 +495,7 @@ dependencies: - newspaper3k=0.2 - numexpr - onnx=1.15 - - openai=1.13 + - *openai - pypdf=3.17.4 - *pypdfium2 - *python-docx diff --git a/python/morpheus_llm/morpheus_llm/requirements_morpheus_llm.txt b/python/morpheus_llm/morpheus_llm/requirements_morpheus_llm.txt index d8f16a5a37..c0537c403b 100644 --- a/python/morpheus_llm/morpheus_llm/requirements_morpheus_llm.txt +++ b/python/morpheus_llm/morpheus_llm/requirements_morpheus_llm.txt @@ -8,5 +8,6 @@ langchain-openai==0.1.3 langchain==0.1.16 milvus==2.3.5 nemollm==0.3.5 +openai==1.13.* pymilvus==2.3.6 torch==2.4.0+cu124