diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 568df49ce2..720df1508d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -119,7 +119,6 @@ ], "settings": { "cmake.cmakePath": "/tmp/.current-conda-env/bin/cmake", - // "cmake.languageSupport.dotnetPath": "/usr/bin/dotnet", "C_Cpp.intelliSenseEngine": "disabled", "python.terminal.activateEnvironment": false } diff --git a/CMakeLists.txt b/CMakeLists.txt index 02f97d6847..77fd97a444 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,7 +74,14 @@ morpheus_utils_initialize_package_manager( ) # Initialize CUDA -# NOTE: This MUST occur before any 'project' calls because of rapids_cmake requirements. +# This is a two-step process. We need to call morpheus_utils_initialize_cuda_arch which in turn calls +# rapids_cuda_init_architectures prior to calling project(). This is because rapids_cuda_init_architectures defines a +# `CMAKE_PROJECT__INCLUDE` hook which is invoked by the project() call. This hook is what allows us to +# set `CMAKE_CUDA_ARCHITECTURES=rapids` when performing a release build which will be expanded to the current list of +# supported architectures by our version of rapids. +# +# After the call to project() we can then call morpheus_utils_enable_cuda() which will set some CUDA+clang settings +# which can only be performed after calling project(), but which must be set prior to calling enable_language(CUDA) if(DEFINED MORPHEUS_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES "${MORPHEUS_CUDA_ARCHITECTURES}") endif() @@ -89,7 +96,7 @@ project(morpheus LANGUAGES C CXX ) -# Configure CUDA architecture +# This sets some clang specific settings for CUDA prior to calling enable_language(CUDA) morpheus_utils_enable_cuda() rapids_cmake_write_version_file(${CMAKE_BINARY_DIR}/autogenerated/include/morpheus/version.hpp) @@ -178,8 +185,6 @@ if(MORPHEUS_ENABLE_DEBUG_INFO) morpheus_utils_print_target_properties( TARGETS - morpheus._lib.cudf_helpers - morpheus morpheus._lib.llm morpheus WRITE_TO_FILE ) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index e22889979d..09032a6b46 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -50,10 +50,6 @@ endif() # ==== morpheus_utils_configure_glog() -# grpc -# ========= -morpheus_utils_configure_grpc() - if(MORPHEUS_BUILD_TESTS) # google test # - Expects package to pre-exist in the build environment diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index c2034f8a60..4a9df33f32 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -17,6 +17,7 @@ dependencies: - boost-cpp=1.82 - boto3 - breathe=4.34.0 +- ccache>=3.7 - clangdev=16 - click>=8 - cmake=3.25 @@ -70,6 +71,7 @@ dependencies: - openai=0.28 - papermill=2.3.4 - pip +- pkg-config - pluggy=1.0 - pre-commit - protobuf=4.21.* diff --git a/conda/environments/dev_cuda-118_arch-x86_64.yaml b/conda/environments/dev_cuda-118_arch-x86_64.yaml index 786d8010a3..7d14c179f8 100644 --- a/conda/environments/dev_cuda-118_arch-x86_64.yaml +++ b/conda/environments/dev_cuda-118_arch-x86_64.yaml @@ -15,6 +15,7 @@ dependencies: - benchmark=1.6.0 - boost-cpp=1.82 - breathe=4.34.0 +- ccache>=3.7 - clangdev=16 - click>=8 - cmake=3.25 @@ -55,6 +56,7 @@ dependencies: - numpydoc=1.4 - nvtabular=23.06 - pip +- pkg-config - pluggy=1.0 - pre-commit - protobuf=4.21.* diff --git a/dependencies.yaml b/dependencies.yaml index 2918d61a06..b24ad121eb 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -137,6 +137,7 @@ dependencies: - gxx_linux-64=11.2 - cmake=3.25 - boost-cpp=1.82 + - ccache>=3.7 - cuda-nvcc - cudf=23.06 - cxx-compiler @@ -145,6 +146,7 @@ dependencies: - librdkafka=1.9.2 - ninja=1.10 - nlohmann_json=3.9 + - pkg-config # for mrc cmake - protobuf=4.21.* - pybind11-stubgen=0.10 - rapidjson=1.1.0 diff --git a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb index d66234974d..7decd59636 100644 --- a/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb +++ b/models/training-tuning-scripts/fraud-detection-models/gnn-fraud-detection-training.ipynb @@ -50,16 +50,16 @@ "source": [ "%load_ext autoreload\n", "%autoreload 2\n", - "import pandas as pd\n", - "import numpy as np\n", "import os\n", + "\n", "import dgl\n", + "import matplotlib.pylab as plt\n", "import numpy as np\n", - "import pandas as pd\n", "import torch\n", "import torch.nn as nn\n", "from model import HeteroRGCN\n", "from model import HinSAGE\n", + "from model import prepare_data\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import auc\n", "from sklearn.metrics import average_precision_score\n", @@ -68,9 +68,15 @@ "from sklearn.metrics import roc_curve\n", "from torchmetrics.functional import accuracy\n", "from tqdm import trange\n", + "from training import build_fsi_graph\n", + "from training import evaluate\n", + "from training import get_metrics\n", + "from training import init_loaders\n", + "from training import save_model\n", + "from training import train\n", "from xgboost import XGBClassifier\n", - "from training import (get_metrics, evaluate, init_loaders, build_fsi_graph,\n", - " map_node_id, prepare_data, save_model, train)\n" + "\n", + "import cudf" ] }, { @@ -85,26 +91,6 @@ "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" ] }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "device(type='cuda', index=0)" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#device " - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -122,8 +108,8 @@ "# Replace training-data.csv and validation-data.csv with training & validation csv in dataset file.\n", "TRAINING_DATA ='../../datasets/training-data/fraud-detection-training-data.csv'\n", "VALIDATION_DATA = '../../datasets/validation-data/fraud-detection-validation-data.csv'\n", - "train_data = pd.read_csv(TRAINING_DATA)\n", - "inductive_data = pd.read_csv(VALIDATION_DATA)" + "train_data = cudf.read_csv(TRAINING_DATA)\n", + "inductive_data = cudf.read_csv(VALIDATION_DATA)" ] }, { @@ -141,16 +127,15 @@ "outputs": [], "source": [ "# Increase number of samples.\n", - "def augement_data(train_data=train_data, n=20):\n", - " max_id = inductive_data.index.max()\n", + "def augment_data(train_data=train_data, n=20):\n", + " train_data.drop(columns=['index'], inplace=True, axis=1)\n", " non_fraud = train_data[train_data['fraud_label'] == 0]\n", - " \n", - " non_fraud = non_fraud.drop(['index'], axis=1)\n", - " df_fraud = pd.concat([non_fraud for i in range(n)])\n", - " df_fraud.index = np.arange(1076, 1076 + df_fraud.shape[0])\n", - " df_fraud['index'] = df_fraud.index\n", - " \n", - " return pd.concat((train_data, df_fraud))" + " df_fraud = cudf.concat([non_fraud for _ in range(n)])\n", + " df_train = cudf.concat([train_data, df_fraud])\n", + " df_train.reset_index(inplace=True)\n", + " df_train['index'] = df_train.index\n", + "\n", + " return df_train" ] }, { @@ -159,7 +144,19 @@ "metadata": {}, "outputs": [], "source": [ - "train_data = augement_data(train_data, n=20)" + "train_data = augment_data(train_data, n=20)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# re-arange test data index\n", + "last_train_index = train_data.index.max()+1\n", + "inductive_data.index = np.arange(last_train_index, last_train_index + inductive_data.shape[0])\n", + "inductive_data['index'] = inductive_data.index" ] }, { @@ -173,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -183,11 +180,11 @@ "The distribution of fraud for the train data is:\n", " 0 11865\n", "1 188\n", - "Name: fraud_label, dtype: int64\n", + "Name: fraud_label, dtype: int32\n", "The distribution of fraud for the inductive data is:\n", " 0 244\n", "1 21\n", - "Name: fraud_label, dtype: int64\n" + "Name: fraud_label, dtype: int32\n" ] } ], @@ -196,38 +193,13 @@ "print('The distribution of fraud for the inductive data is:\\n', inductive_data['fraud_label'].value_counts())" ] }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# split train, test and create nodes index\n", - "def prepare_data(df_train, df_test):\n", - " \n", - " train_idx_ = df_train.shape[0]\n", - " df = pd.concat([df_train, df_test], axis=0)\n", - " df['tran_id'] = df['index']\n", - "\n", - " meta_cols = ['tran_id', 'client_node', 'merchant_node']\n", - " for col in meta_cols:\n", - " map_node_id(df, col)\n", - "\n", - " train_idx = df['tran_id'][:train_idx_]\n", - " test_idx = df['tran_id'][train_idx_:]\n", - "\n", - " df['index'] = df['tran_id']\n", - " df.index = df['index']\n", - "\n", - " return (df.iloc[train_idx, :], df.iloc[test_idx, :], train_idx, test_idx, df['fraud_label'].values, df)" - ] - }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ + "# Split into training, testing datasets\n", "train_data, test_data, train_idx, inductive_idx, labels, df = prepare_data(train_data, inductive_data)" ] }, @@ -236,7 +208,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3. Construct transasction graph network" + "### 3. Construct transaction graph network" ] }, { @@ -253,45 +225,17 @@ "metadata": {}, "outputs": [], "source": [ - "meta_cols = [\"client_node\", \"merchant_node\", \"fraud_label\", \"index\", \"tran_id\"]\n", + "meta_cols = [\"client_node\", \"merchant_node\", \"index\"]\n", "\n", "# Build graph\n", "whole_graph, feature_tensors = build_fsi_graph(df, meta_cols)\n", "train_graph, _ = build_fsi_graph(train_data, meta_cols)\n", - "whole_graph = whole_graph.to(device)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "# Dataset to tensors\n", - "feature_tensors = feature_tensors.to(device)\n", - "train_idx = torch.from_numpy(train_idx.values).to(device)\n", - "inductive_idx = torch.from_numpy(inductive_idx.values).to(device)\n", - "labels = torch.LongTensor(labels).to(device)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Graph(num_nodes={'client': 623, 'merchant': 388, 'transaction': 12053},\n", - " num_edges={('client', 'buy', 'transaction'): 12053, ('merchant', 'sell', 'transaction'): 12053, ('transaction', 'bought', 'client'): 12053, ('transaction', 'issued', 'merchant'): 12053},\n", - " metagraph=[('client', 'transaction', 'buy'), ('transaction', 'client', 'bought'), ('transaction', 'merchant', 'issued'), ('merchant', 'transaction', 'sell')])\n" - ] - } - ], - "source": [ - "# Show structure of training graph.\n", - "print(train_graph)" + "\n", + "# Dataset\n", + "feature_tensors = feature_tensors.float()\n", + "train_idx = torch.from_dlpack(train_idx.values.toDlpack()).long()\n", + "inductive_idx = torch.from_dlpack(inductive_idx.values.toDlpack()).long()\n", + "labels = torch.from_dlpack(labels.toDlpack()).long()" ] }, { @@ -312,31 +256,34 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Hyperparameters\n", "target_node = \"transaction\"\n", - "epochs = 20\n", + "epochs = 25\n", "in_size, hidden_size, out_size, n_layers,\\\n", " embedding_size = 111, 64, 2, 2, 1\n", - "batch_size = 100\n", - "hyperparameters = {\"in_size\": in_size, \"hidden_size\": hidden_size,\n", - " \"out_size\": out_size, \"n_layers\": n_layers,\n", - " \"embedding_size\": embedding_size,\n", - " \"target_node\": target_node,\n", - " \"epoch\": epochs}\n", - "\n", + "batch_size = 256\n", + "in_size, hidden_size, out_size, n_layers, embedding_size = 111, 64, 2, 2, 1\n", + "hyperparameters = {\n", + " \"in_size\": in_size,\n", + " \"hidden_size\": hidden_size,\n", + " \"out_size\": out_size,\n", + " \"n_layers\": n_layers,\n", + " \"embedding_size\": embedding_size,\n", + " \"target_node\": target_node,\n", + " \"epoch\": epochs\n", + "}\n", "\n", - "scale_pos_weight = train_data['fraud_label'].sum() / train_data.shape[0]\n", - "scale_pos_weight = torch.tensor(\n", - " [scale_pos_weight, 1-scale_pos_weight]).to(device)" + "scale_pos_weight = (labels[train_idx].sum() / train_data.shape[0]).item()\n", + "scale_pos_weight = torch.FloatTensor([scale_pos_weight, 1 - scale_pos_weight]).to(device)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -345,7 +292,6 @@ " device), train_idx, test_idx=inductive_idx,\n", " val_idx=inductive_idx, g_test=whole_graph, batch_size=batch_size)\n", "\n", - "\n", "# Set model variables\n", "model = HinSAGE(train_graph, in_size, hidden_size, out_size, n_layers, embedding_size).to(device)\n", "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)\n", @@ -354,314 +300,384 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 0/20 [00:00#sk-container-id-2 {color: black;background-color: white;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}
XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+       "
XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
        "              colsample_bylevel=None, colsample_bynode=None,\n",
        "              colsample_bytree=None, early_stopping_rounds=None,\n",
        "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
@@ -806,7 +820,7 @@
        "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
        "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
        "              n_estimators=100, n_jobs=None, num_parallel_tree=None,\n",
-       "              predictor=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.