diff --git a/.gitignore b/.gitignore index 66dd292..32b2b4b 100644 --- a/.gitignore +++ b/.gitignore @@ -160,14 +160,14 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +# miscellaneous +.gitpod.yml +**/development/ # Data and models data/*/* !.gitkeep -#devops -.gitpod.yml - # Logs Directories outputs mlruns diff --git a/README.md b/README.md index 85d993a..d934173 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,23 @@ -# PROJECT / BUSINESS UNDERSTANDING +# Log Anomaly Detection + + +
+ Table of Contents +
    +
  1. About The Project
  2. +
  3. + Getting Started + +
  4. +
  5. Analysis and Report
  6. +
  7. Contact
  8. +
  9. Acknowledgments
  10. +
+
+ +## About The Project In the realm of computing, logging involves the process of creating a record detailing events that transpire within a computer system. These events encompass issues, errors, or even informative updates about ongoing operations. These occurrences might manifest within the operating system or other software components. For every such event, a message or entry is documented. @@ -16,6 +35,54 @@ The data containing the logs was provided. The dataset was in **JSON** format wh The labels for logs areĀ **"abnormal" and "normal"** -## Project Objectives +### Project Objectives 1. To train a machine learning model that can predict whether a given log is an anomaly or normal + +### Dataset +about the dataset +pic of samples of data + + +## Getting Started + +To get a local copy up and running follow these simple example steps. + +### Prerequisites + +In + +```bash +# Clone this repository +$ git clone + +# Go into the repository +$ cd + +# Install dependencies +$ make setup +``` + +### Runing the program + +train.py +different models +evalute.py + +## Analysis and Report + +

(back to top)

+ + +## Contact + +If you have questions or need assistance, feel free to reach out to: + +**Name:** **Ipadeola Ezekiel Ladipo** +**Email:** +**GitHub:** [@rileydrizzy](https://github.com/rileydrizzy) +**Linkdeln:** [Ipadeola Ladipo](https://www.linkedin.com/in/ladipo-ipadeola/) + +

(back to top)

+ +--- diff --git a/dev.txt b/dev.txt new file mode 100644 index 0000000..913981e --- /dev/null +++ b/dev.txt @@ -0,0 +1,7 @@ +evaluate and analysis c # +pretrained embedding # +learning rate scheduler # +class weight +github repo +Confusion Matrix for error analysis c # +Hyper-tunning of the model to be done \ No newline at end of file diff --git a/development/devscript.py b/development/devscript.py deleted file mode 100644 index 393d90c..0000000 --- a/development/devscript.py +++ /dev/null @@ -1,60 +0,0 @@ -## Heruristic Benchmark -# TODO implement HB pr Dummy Classifier -import numpy as np -from sklearn.model_selection import train_test_split -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import ( - accuracy_score, - precision_score, - recall_score, - f1_score, - roc_auc_score, -) - -# Generate synthetic imbalanced data (replace this with your actual data) - -minority_class_size = 100 -majority_class_size = 1000 -minority_class = np.random.rand(minority_class_size, 2) + np.array([1, 1]) -majority_class = np.random.rand(majority_class_size, 2) - -# Calculate class proportion -class_proportion = majority_class_size // minority_class_size - -# Randomly sample majority class instances -sampled_majority_class_indices = np.random.choice( - majority_class_size, minority_class_size * class_proportion, replace=False -) -sampled_majority_class = majority_class[sampled_majority_class_indices] - -# Combine minority and sampled majority class instances -balanced_data = np.vstack((minority_class, sampled_majority_class)) -labels = np.hstack( - (np.ones(minority_class_size), np.zeros(minority_class_size * class_proportion)) -) - -# Split data into training and testing sets -X_train, X_test, y_train, y_test = train_test_split( - balanced_data, labels, test_size=0.2, random_state=42 -) - -# Train a logistic regression model -model = LogisticRegression() -model.fit(X_train, y_train) - -# Predictions -y_pred = model.predict(X_test) - -# Evaluate model performance -accuracy = accuracy_score(y_test, y_pred) -precision = precision_score(y_test, y_pred) -recall = recall_score(y_test, y_pred) -f1 = f1_score(y_test, y_pred) -roc_auc = roc_auc_score(y_test, y_pred) - -# Print results -print(f"Accuracy: {accuracy:.2f}") -print(f"Precision: {precision:.2f}") -print(f"Recall: {recall:.2f}") -print(f"F1-Score: {f1:.2f}") -print(f"AUC-ROC: {roc_auc:.2f}") diff --git a/development/new_dev.ipynb b/development/new_dev.ipynb deleted file mode 100644 index 1c5bc9a..0000000 --- a/development/new_dev.ipynb +++ /dev/null @@ -1,396 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from time import strftime\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import mlflow\n", - "import numpy as np\n", - "import seaborn as sns\n", - "import tensorflow as tf\n", - "from sklearn.metrics import auc, confusion_matrix, precision_recall_curve\n", - "from sklearn.model_selection import train_test_split\n", - "from tensorflow.keras.layers import Dense\n", - "from tensorflow.keras.models import Sequential" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Generate some example data (replace this with your actual data)\n", - "X, y = np.random.rand(1000, 10), np.random.randint(2, size=(1000,))\n", - "\n", - "# Split the data into training and testing sets\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "25/25 [==============================] - 0s 6ms/step - loss: 0.7012 - accuracy: 0.4888 - val_loss: 0.6993 - val_accuracy: 0.5200\n", - "7/7 [==============================] - 0s 939us/step\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def plot_confusion_matrix(model, X_test, y_test, threshold=0.5, save_path=None):\n", - " \"\"\"\n", - " Generate and plot the confusion matrix for a TensorFlow model.\n", - "\n", - " Parameters:\n", - " - model: The trained TensorFlow model.\n", - " - X_test: Test features.\n", - " - y_test: True labels for the test set.\n", - " - threshold: Decision threshold for binary classification.\n", - " - save_path: Optional path to save the plot as an image.\n", - "\n", - " Returns:\n", - " - None\n", - " \"\"\"\n", - " # Make predictions on the test set\n", - " y_pred = model.predict(X_test)\n", - "\n", - " # Apply threshold for binary classification\n", - " y_pred_binary = (y_pred > threshold).astype(int)\n", - "\n", - " # For binary classification, flatten the true labels\n", - " y_test = np.reshape(y_test, (-1,))\n", - "\n", - " # Calculate the confusion matrix\n", - " cm = confusion_matrix(y_test, y_pred_binary)\n", - "\n", - " # Plot the confusion matrix\n", - " plt.figure(figsize=(8, 6))\n", - " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, annot_kws={\"size\": 14})\n", - " plt.xlabel('Predicted')\n", - " plt.ylabel('True')\n", - " plt.title('Confusion Matrix')\n", - " plt.xticks([0, 1], ['Predicted Normal', 'Predicted Abr'])\n", - " plt.yticks([0, 1], ['True 0', 'True 1'])\n", - " plt.show()\n", - "\n", - " return cm\n", - "\n", - "# Example usage:\n", - "# Assuming you have a trained model, test data (X_test, y_test), and a directory to save the plot\n", - "# Replace placeholders with your actual data and paths\n", - "\n", - "# Build a simple binary classification model (replace this with your actual model)\n", - "model = Sequential([\n", - " Dense(64, activation='relu', input_shape=(10,)),\n", - " Dense(1, activation='sigmoid')\n", - "])\n", - "\n", - "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", - "\n", - "# Train the model (replace this with your actual training process)\n", - "model.fit(X_train, y_train, epochs=1, batch_size=32, validation_data=(X_test, y_test))\n", - "\n", - "save_directory = \"path/to/save/directory\"\n", - "\n", - "# Call the function to generate and save the confusion matrix\n", - "anw = plot_confusion_matrix(model, X_test, y_test, save_path=f\"{save_directory}/confusion_matrix.png\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[101, 8],\n", - " [ 68, 23]])" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "anw" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "25/25 [==============================] - 2s 23ms/step - loss: 0.6976 - accuracy: 0.5188 - val_loss: 0.7043 - val_accuracy: 0.3650\n", - "Epoch 2/10\n", - "25/25 [==============================] - 0s 6ms/step - loss: 0.6897 - accuracy: 0.5462 - val_loss: 0.7046 - val_accuracy: 0.4050\n", - "Epoch 3/10\n", - "25/25 [==============================] - 0s 7ms/step - loss: 0.6882 - accuracy: 0.5575 - val_loss: 0.7025 - val_accuracy: 0.4450\n", - "Epoch 4/10\n", - "25/25 [==============================] - 0s 8ms/step - loss: 0.6871 - accuracy: 0.5550 - val_loss: 0.7036 - val_accuracy: 0.4300\n", - "Epoch 5/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6859 - accuracy: 0.5713 - val_loss: 0.7053 - val_accuracy: 0.4300\n", - "Epoch 6/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6847 - accuracy: 0.5900 - val_loss: 0.7054 - val_accuracy: 0.4350\n", - "Epoch 7/10\n", - "25/25 [==============================] - 0s 8ms/step - loss: 0.6839 - accuracy: 0.5775 - val_loss: 0.7060 - val_accuracy: 0.4400\n", - "Epoch 8/10\n", - "25/25 [==============================] - 0s 8ms/step - loss: 0.6837 - accuracy: 0.5838 - val_loss: 0.7075 - val_accuracy: 0.4500\n", - "Epoch 9/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6824 - accuracy: 0.5775 - val_loss: 0.7055 - val_accuracy: 0.4450\n", - "Epoch 10/10\n", - "25/25 [==============================] - 0s 8ms/step - loss: 0.6814 - accuracy: 0.5750 - val_loss: 0.7079 - val_accuracy: 0.4450\n", - "7/7 [==============================] - 0s 2ms/step\n" - ] - }, - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'path/to/save/directory/precision_recall_curve.png'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[2], line 72\u001b[0m\n\u001b[0;32m 69\u001b[0m save_directory \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpath/to/save/directory\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;66;03m# Call the function to generate and save the precision-recall curve\u001b[39;00m\n\u001b[1;32m---> 72\u001b[0m \u001b[43mplot_precision_recall_curve\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msave_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43msave_directory\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/precision_recall_curve.png\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "Cell \u001b[1;32mIn[2], line 46\u001b[0m, in \u001b[0;36mplot_precision_recall_curve\u001b[1;34m(model, X_test, y_test, save_path)\u001b[0m\n\u001b[0;32m 44\u001b[0m \u001b[38;5;66;03m# Save the plot if save_path is provided\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m save_path:\n\u001b[1;32m---> 46\u001b[0m \u001b[43mplt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[43msave_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 47\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision-Recall Curve saved at: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msave_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 48\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\pyplot.py:1023\u001b[0m, in \u001b[0;36msavefig\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 1020\u001b[0m \u001b[38;5;129m@_copy_docstring_and_deprecators\u001b[39m(Figure\u001b[38;5;241m.\u001b[39msavefig)\n\u001b[0;32m 1021\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msavefig\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 1022\u001b[0m fig \u001b[38;5;241m=\u001b[39m gcf()\n\u001b[1;32m-> 1023\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1024\u001b[0m fig\u001b[38;5;241m.\u001b[39mcanvas\u001b[38;5;241m.\u001b[39mdraw_idle() \u001b[38;5;66;03m# Need this if 'transparent=True', to reset colors.\u001b[39;00m\n\u001b[0;32m 1025\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\figure.py:3343\u001b[0m, in \u001b[0;36mFigure.savefig\u001b[1;34m(self, fname, transparent, **kwargs)\u001b[0m\n\u001b[0;32m 3339\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes:\n\u001b[0;32m 3340\u001b[0m stack\u001b[38;5;241m.\u001b[39menter_context(\n\u001b[0;32m 3341\u001b[0m ax\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39m_cm_set(facecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m, edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m-> 3343\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprint_figure\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backend_bases.py:2366\u001b[0m, in \u001b[0;36mFigureCanvasBase.print_figure\u001b[1;34m(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)\u001b[0m\n\u001b[0;32m 2362\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2363\u001b[0m \u001b[38;5;66;03m# _get_renderer may change the figure dpi (as vector formats\u001b[39;00m\n\u001b[0;32m 2364\u001b[0m \u001b[38;5;66;03m# force the figure dpi to 72), so we need to set it again here.\u001b[39;00m\n\u001b[0;32m 2365\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m cbook\u001b[38;5;241m.\u001b[39m_setattr_cm(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, dpi\u001b[38;5;241m=\u001b[39mdpi):\n\u001b[1;32m-> 2366\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mprint_method\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2367\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2368\u001b[0m \u001b[43m \u001b[49m\u001b[43mfacecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfacecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2369\u001b[0m \u001b[43m \u001b[49m\u001b[43medgecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43medgecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2370\u001b[0m \u001b[43m \u001b[49m\u001b[43morientation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morientation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2371\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox_inches_restore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_bbox_inches_restore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2372\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2373\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 2374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches \u001b[38;5;129;01mand\u001b[39;00m restore_bbox:\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backend_bases.py:2232\u001b[0m, in \u001b[0;36mFigureCanvasBase._switch_canvas_and_return_print_method..\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 2228\u001b[0m optional_kws \u001b[38;5;241m=\u001b[39m { \u001b[38;5;66;03m# Passed by print_figure for other renderers.\u001b[39;00m\n\u001b[0;32m 2229\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124medgecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morientation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 2230\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbbox_inches_restore\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[0;32m 2231\u001b[0m skip \u001b[38;5;241m=\u001b[39m optional_kws \u001b[38;5;241m-\u001b[39m {\u001b[38;5;241m*\u001b[39minspect\u001b[38;5;241m.\u001b[39msignature(meth)\u001b[38;5;241m.\u001b[39mparameters}\n\u001b[1;32m-> 2232\u001b[0m print_method \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mwraps(meth)(\u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2233\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mskip\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 2234\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# Let third-parties do as they see fit.\u001b[39;00m\n\u001b[0;32m 2235\u001b[0m print_method \u001b[38;5;241m=\u001b[39m meth\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backends\\backend_agg.py:509\u001b[0m, in \u001b[0;36mFigureCanvasAgg.print_png\u001b[1;34m(self, filename_or_obj, metadata, pil_kwargs)\u001b[0m\n\u001b[0;32m 462\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprint_png\u001b[39m(\u001b[38;5;28mself\u001b[39m, filename_or_obj, \u001b[38;5;241m*\u001b[39m, metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, pil_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 463\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 464\u001b[0m \u001b[38;5;124;03m Write the figure to a PNG file.\u001b[39;00m\n\u001b[0;32m 465\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 507\u001b[0m \u001b[38;5;124;03m *metadata*, including the default 'Software' key.\u001b[39;00m\n\u001b[0;32m 508\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 509\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_print_pil\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpng\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backends\\backend_agg.py:458\u001b[0m, in \u001b[0;36mFigureCanvasAgg._print_pil\u001b[1;34m(self, filename_or_obj, fmt, pil_kwargs, metadata)\u001b[0m\n\u001b[0;32m 453\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 454\u001b[0m \u001b[38;5;124;03mDraw the canvas, then save it using `.image.imsave` (to which\u001b[39;00m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;124;03m*pil_kwargs* and *metadata* are forwarded).\u001b[39;00m\n\u001b[0;32m 456\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 457\u001b[0m FigureCanvasAgg\u001b[38;5;241m.\u001b[39mdraw(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m--> 458\u001b[0m \u001b[43mmpl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimsave\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuffer_rgba\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morigin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mupper\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43mdpi\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdpi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\image.py:1689\u001b[0m, in \u001b[0;36mimsave\u001b[1;34m(fname, arr, vmin, vmax, cmap, format, origin, dpi, metadata, pil_kwargs)\u001b[0m\n\u001b[0;32m 1687\u001b[0m pil_kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mformat\u001b[39m)\n\u001b[0;32m 1688\u001b[0m pil_kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, (dpi, dpi))\n\u001b[1;32m-> 1689\u001b[0m \u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\PIL\\Image.py:2428\u001b[0m, in \u001b[0;36mImage.save\u001b[1;34m(self, fp, format, **params)\u001b[0m\n\u001b[0;32m 2426\u001b[0m fp \u001b[38;5;241m=\u001b[39m builtins\u001b[38;5;241m.\u001b[39mopen(filename, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr+b\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 2427\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2428\u001b[0m fp \u001b[38;5;241m=\u001b[39m \u001b[43mbuiltins\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mw+b\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2430\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2431\u001b[0m save_handler(\u001b[38;5;28mself\u001b[39m, fp, filename)\n", - "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'path/to/save/directory/precision_recall_curve.png'" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from sklearn.metrics import auc, precision_recall_curve\n", - "from sklearn.model_selection import train_test_split\n", - "from tensorflow.keras.layers import Dense\n", - "from tensorflow.keras.models import Sequential\n", - "\n", - "\n", - "def plot_precision_recall_curve(model, X_test, y_test, save_path=None):\n", - " \"\"\"\n", - " Generate and plot the precision-recall curve for a TensorFlow model.\n", - "\n", - " Parameters:\n", - " - model: The trained TensorFlow model.\n", - " - X_test: Test features.\n", - " - y_test: True labels for the test set.\n", - " - save_path: Optional path to save the plot as an image.\n", - "\n", - " Returns:\n", - " - None\n", - " \"\"\"\n", - " # Make predictions on the test set\n", - " y_pred = model.predict(X_test)\n", - "\n", - " # For binary classification, flatten the true labels\n", - " y_test = np.reshape(y_test, (-1,))\n", - " y_pred = np.reshape(y_pred, (-1,))\n", - "\n", - " # Calculate precision and recall\n", - " precision, recall, thresholds = precision_recall_curve(y_test, y_pred)\n", - "\n", - " # Calculate the area under the precision-recall curve\n", - " auc_score = auc(recall, precision)\n", - "\n", - " # Plot the precision-recall curve\n", - " plt.figure(figsize=(8, 6))\n", - " plt.plot(recall, precision, label=f'Precision-Recall Curve (AUC = {auc_score:.2f})', color='b')\n", - " plt.xlabel('Recall')\n", - " plt.ylabel('Precision')\n", - " plt.title('Precision-Recall Curve')\n", - " plt.legend(loc='upper right')\n", - " plt.grid(True)\n", - "\n", - " # Save the plot if save_path is provided\n", - " if save_path:\n", - " plt.savefig(save_path)\n", - " print(f\"Precision-Recall Curve saved at: {save_path}\")\n", - " else:\n", - " plt.show()\n", - "\n", - "# Generate some example data (replace this with your actual data)\n", - "X, y = np.random.rand(1000, 10), np.random.randint(2, size=(1000,))\n", - "\n", - "# Split the data into training and testing sets\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", - "\n", - "# Build a simple binary classification model (replace this with your actual model)\n", - "model = Sequential([\n", - " Dense(64, activation='relu', input_shape=(10,)),\n", - " Dense(1, activation='sigmoid')\n", - "])\n", - "\n", - "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", - "\n", - "# Train the model (replace this with your actual training process)\n", - "model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))\n", - "\n", - "# Specify the directory to save the plot\n", - "save_directory = \"path/to/save/directory\"\n", - "\n", - "# Call the function to generate and save the precision-recall curve\n", - "plot_precision_recall_curve(model, X_test, y_test, save_path=f\"{save_directory}/precision_recall_curve.png\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/10\n", - "25/25 [==============================] - 3s 38ms/step - loss: 0.7014 - accuracy: 0.4863 - val_loss: 0.7011 - val_accuracy: 0.4850\n", - "Epoch 2/10\n", - "25/25 [==============================] - 0s 10ms/step - loss: 0.6968 - accuracy: 0.4888 - val_loss: 0.6990 - val_accuracy: 0.4650\n", - "Epoch 3/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6937 - accuracy: 0.5188 - val_loss: 0.6979 - val_accuracy: 0.4550\n", - "Epoch 4/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6936 - accuracy: 0.4975 - val_loss: 0.6979 - val_accuracy: 0.4750\n", - "Epoch 5/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6907 - accuracy: 0.5250 - val_loss: 0.6979 - val_accuracy: 0.4950\n", - "Epoch 6/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6892 - accuracy: 0.5337 - val_loss: 0.7022 - val_accuracy: 0.4900\n", - "Epoch 7/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6874 - accuracy: 0.5375 - val_loss: 0.6984 - val_accuracy: 0.4850\n", - "Epoch 8/10\n", - "25/25 [==============================] - 0s 9ms/step - loss: 0.6860 - accuracy: 0.5512 - val_loss: 0.7012 - val_accuracy: 0.4750\n", - "Epoch 9/10\n", - "25/25 [==============================] - 0s 8ms/step - loss: 0.6863 - accuracy: 0.5462 - val_loss: 0.7008 - val_accuracy: 0.4850\n", - "Epoch 10/10\n", - "25/25 [==============================] - 0s 7ms/step - loss: 0.6846 - accuracy: 0.5362 - val_loss: 0.7001 - val_accuracy: 0.4900\n", - "7/7 [==============================] - 0s 3ms/step\n" - ] - }, - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'path/to/save/directory/precision_recall_curve_with_thresholds.png'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[3], line 76\u001b[0m\n\u001b[0;32m 73\u001b[0m save_directory \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpath/to/save/directory\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 75\u001b[0m \u001b[38;5;66;03m# Call the function to generate and save the precision-recall curve with threshold points\u001b[39;00m\n\u001b[1;32m---> 76\u001b[0m \u001b[43mplot_precision_recall_curve\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msave_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43msave_directory\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/precision_recall_curve_with_thresholds.png\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "Cell \u001b[1;32mIn[3], line 52\u001b[0m, in \u001b[0;36mplot_precision_recall_curve\u001b[1;34m(model, X_test, y_test, save_path)\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[38;5;66;03m# Save the plot if save_path is provided\u001b[39;00m\n\u001b[0;32m 51\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m save_path:\n\u001b[1;32m---> 52\u001b[0m \u001b[43mplt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[43msave_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 53\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision-Recall Curve saved at: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msave_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 54\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\pyplot.py:1023\u001b[0m, in \u001b[0;36msavefig\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 1020\u001b[0m \u001b[38;5;129m@_copy_docstring_and_deprecators\u001b[39m(Figure\u001b[38;5;241m.\u001b[39msavefig)\n\u001b[0;32m 1021\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msavefig\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 1022\u001b[0m fig \u001b[38;5;241m=\u001b[39m gcf()\n\u001b[1;32m-> 1023\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1024\u001b[0m fig\u001b[38;5;241m.\u001b[39mcanvas\u001b[38;5;241m.\u001b[39mdraw_idle() \u001b[38;5;66;03m# Need this if 'transparent=True', to reset colors.\u001b[39;00m\n\u001b[0;32m 1025\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\figure.py:3343\u001b[0m, in \u001b[0;36mFigure.savefig\u001b[1;34m(self, fname, transparent, **kwargs)\u001b[0m\n\u001b[0;32m 3339\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes:\n\u001b[0;32m 3340\u001b[0m stack\u001b[38;5;241m.\u001b[39menter_context(\n\u001b[0;32m 3341\u001b[0m ax\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39m_cm_set(facecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m, edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m-> 3343\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprint_figure\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backend_bases.py:2366\u001b[0m, in \u001b[0;36mFigureCanvasBase.print_figure\u001b[1;34m(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)\u001b[0m\n\u001b[0;32m 2362\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2363\u001b[0m \u001b[38;5;66;03m# _get_renderer may change the figure dpi (as vector formats\u001b[39;00m\n\u001b[0;32m 2364\u001b[0m \u001b[38;5;66;03m# force the figure dpi to 72), so we need to set it again here.\u001b[39;00m\n\u001b[0;32m 2365\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m cbook\u001b[38;5;241m.\u001b[39m_setattr_cm(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, dpi\u001b[38;5;241m=\u001b[39mdpi):\n\u001b[1;32m-> 2366\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mprint_method\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2367\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2368\u001b[0m \u001b[43m \u001b[49m\u001b[43mfacecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfacecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2369\u001b[0m \u001b[43m \u001b[49m\u001b[43medgecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43medgecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2370\u001b[0m \u001b[43m \u001b[49m\u001b[43morientation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morientation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2371\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox_inches_restore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_bbox_inches_restore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2372\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2373\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 2374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches \u001b[38;5;129;01mand\u001b[39;00m restore_bbox:\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backend_bases.py:2232\u001b[0m, in \u001b[0;36mFigureCanvasBase._switch_canvas_and_return_print_method..\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 2228\u001b[0m optional_kws \u001b[38;5;241m=\u001b[39m { \u001b[38;5;66;03m# Passed by print_figure for other renderers.\u001b[39;00m\n\u001b[0;32m 2229\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124medgecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morientation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 2230\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbbox_inches_restore\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[0;32m 2231\u001b[0m skip \u001b[38;5;241m=\u001b[39m optional_kws \u001b[38;5;241m-\u001b[39m {\u001b[38;5;241m*\u001b[39minspect\u001b[38;5;241m.\u001b[39msignature(meth)\u001b[38;5;241m.\u001b[39mparameters}\n\u001b[1;32m-> 2232\u001b[0m print_method \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mwraps(meth)(\u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2233\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mskip\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 2234\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# Let third-parties do as they see fit.\u001b[39;00m\n\u001b[0;32m 2235\u001b[0m print_method \u001b[38;5;241m=\u001b[39m meth\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backends\\backend_agg.py:509\u001b[0m, in \u001b[0;36mFigureCanvasAgg.print_png\u001b[1;34m(self, filename_or_obj, metadata, pil_kwargs)\u001b[0m\n\u001b[0;32m 462\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprint_png\u001b[39m(\u001b[38;5;28mself\u001b[39m, filename_or_obj, \u001b[38;5;241m*\u001b[39m, metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, pil_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 463\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 464\u001b[0m \u001b[38;5;124;03m Write the figure to a PNG file.\u001b[39;00m\n\u001b[0;32m 465\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 507\u001b[0m \u001b[38;5;124;03m *metadata*, including the default 'Software' key.\u001b[39;00m\n\u001b[0;32m 508\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 509\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_print_pil\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpng\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backends\\backend_agg.py:458\u001b[0m, in \u001b[0;36mFigureCanvasAgg._print_pil\u001b[1;34m(self, filename_or_obj, fmt, pil_kwargs, metadata)\u001b[0m\n\u001b[0;32m 453\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 454\u001b[0m \u001b[38;5;124;03mDraw the canvas, then save it using `.image.imsave` (to which\u001b[39;00m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;124;03m*pil_kwargs* and *metadata* are forwarded).\u001b[39;00m\n\u001b[0;32m 456\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 457\u001b[0m FigureCanvasAgg\u001b[38;5;241m.\u001b[39mdraw(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m--> 458\u001b[0m \u001b[43mmpl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimsave\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuffer_rgba\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morigin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mupper\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43mdpi\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdpi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\image.py:1689\u001b[0m, in \u001b[0;36mimsave\u001b[1;34m(fname, arr, vmin, vmax, cmap, format, origin, dpi, metadata, pil_kwargs)\u001b[0m\n\u001b[0;32m 1687\u001b[0m pil_kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mformat\u001b[39m)\n\u001b[0;32m 1688\u001b[0m pil_kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, (dpi, dpi))\n\u001b[1;32m-> 1689\u001b[0m \u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\PIL\\Image.py:2428\u001b[0m, in \u001b[0;36mImage.save\u001b[1;34m(self, fp, format, **params)\u001b[0m\n\u001b[0;32m 2426\u001b[0m fp \u001b[38;5;241m=\u001b[39m builtins\u001b[38;5;241m.\u001b[39mopen(filename, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr+b\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 2427\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2428\u001b[0m fp \u001b[38;5;241m=\u001b[39m \u001b[43mbuiltins\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mw+b\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2430\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2431\u001b[0m save_handler(\u001b[38;5;28mself\u001b[39m, fp, filename)\n", - "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'path/to/save/directory/precision_recall_curve_with_thresholds.png'" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# Example usage:\n", - "# Assuming you have a trained model, test data (X_test, y_test), and a directory to save the plot\n", - "# Replace placeholders with your actual data and paths\n", - "\n", - "# Build a simple binary classification model (replace this with your actual model)\n", - "model = Sequential([\n", - " Dense(64, activation='relu', input_shape=(10,)),\n", - " Dense(1, activation='sigmoid')\n", - "])\n", - "\n", - "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n", - "\n", - "# Train the model (replace this with your actual training process)\n", - "model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))\n", - "\n", - "# Specify the directory to save the plot\n", - "save_directory = \"path/to/save/directory\"\n", - "\n", - "# Call the function to generate and save the precision-recall curve with threshold points\n", - "plot_precision_recall_curve(model, X_test, y_test, save_path=f\"{save_directory}/precision_recall_curve_with_thresholds.png\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/development/train_dev.ipynb b/development/train_dev.ipynb deleted file mode 100644 index 26c0369..0000000 --- a/development/train_dev.ipynb +++ /dev/null @@ -1,568 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from pathlib import Path\n", - "from time import strftime\n", - "\n", - "import mlflow\n", - "import pandas as pd\n", - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'c:\\\\Main Workspace\\\\LogAnomalyDetect\\\\src'" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pwd()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "c:\\Main Workspace\\LogAnomalyDetect\\src\n" - ] - } - ], - "source": [ - "%cd src" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[nltk_data] Downloading package stopwords to\n", - "[nltk_data] C:\\Users\\Yinka\\AppData\\Roaming\\nltk_data...\n", - "[nltk_data] Package stopwords is already up-to-date!\n" - ] - } - ], - "source": [ - "from dataset_loader import get_dataset, preprocess_and_encode, get_vectorization_layer\n", - "from utils.common_utils import get_device_strategy\n", - "from src.utils.common_utils import set_seed" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Random seed set as 42\n" - ] - } - ], - "source": [ - "set_seed()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "data_path = 'development/dev.gzip'\n", - "clean_dev = \"development/clean_dev.gzip\"" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
LogTarget
01119803499 2005.06.26 R03-M0-NC-C:J03-U01 200...normal
11119803105 2005.06.26 R04-M0-NE-C:J06-U11 200...normal
21121496169 2005.07.15 R06-M0-N6-C:J03-U01 200...normal
31120968564 2005.07.09 R26-M0-N0-C:J03-U01 200...normal
41120953205 2005.07.09 R27-M0-N7-C:J11-U01 200...normal
\n", - "
" - ], - "text/plain": [ - " Log Target\n", - "0 1119803499 2005.06.26 R03-M0-NC-C:J03-U01 200... normal\n", - "1 1119803105 2005.06.26 R04-M0-NE-C:J06-U11 200... normal\n", - "2 1121496169 2005.07.15 R06-M0-N6-C:J03-U01 200... normal\n", - "3 1120968564 2005.07.09 R26-M0-N0-C:J03-U01 200... normal\n", - "4 1120953205 2005.07.09 R27-M0-N7-C:J11-U01 200... normal" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.read_parquet(data_path)\n", - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(1000, 2)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "#preprocess_and_encode(data_path, \"development/clean_dev.gzip\")" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "df_dataset = get_dataset(data_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(, )\n", - "(, )\n", - "(, )\n" - ] - } - ], - "source": [ - "for idx in df_dataset.take(3):\n", - " print(idx)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "clean_dataset = get_dataset(clean_dev)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(, )\n", - "(, )\n" - ] - } - ], - "source": [ - "for idx in clean_dataset.take(2):\n", - " print(idx)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "token_layer, vocab_s = get_vectorization_layer(clean_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "420" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vocab_s" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "a = token_layer(\"k e r n e l i n f o\")" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "S = get_device_strategy()\n", - "S.num_replicas_in_sync" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "from src.models.model_loader import ModelLoader" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "loader = ModelLoader()\n", - "test_model_loader = loader.get_model(\"cnn\")" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "test_model = test_model_loader(embedding_vocab= vocab_s, embedding_dim= 5,\n", - " vectorization_layer=token_layer)" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [], - "source": [ - "loss = tf.keras.losses.BinaryCrossentropy()\n", - "optim = tf.keras.optimizers.Adadelta(learning_rate=0.01)\n", - "tensorb = tf.keras.callbacks.TensorBoard()\n", - "f1_score = tf.keras.metrics.F1Score()" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "test_model.compile(loss=loss, optimizer=optim)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/20\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "500/500 [==============================] - 6s 9ms/step - loss: 0.6863 - val_loss: 0.6790\n", - "Epoch 2/20\n", - "500/500 [==============================] - 4s 7ms/step - loss: 0.6715 - val_loss: 0.6627\n", - "Epoch 3/20\n", - "500/500 [==============================] - 3s 7ms/step - loss: 0.6536 - val_loss: 0.6418\n", - "Epoch 4/20\n", - "500/500 [==============================] - 3s 7ms/step - loss: 0.6304 - val_loss: 0.6179\n", - "Epoch 5/20\n", - "500/500 [==============================] - 3s 7ms/step - loss: 0.6084 - val_loss: 0.5926\n", - "Epoch 6/20\n", - "500/500 [==============================] - 4s 7ms/step - loss: 0.5784 - val_loss: 0.5634\n", - "Epoch 7/20\n", - "500/500 [==============================] - 4s 7ms/step - loss: 0.5538 - val_loss: 0.5329\n", - "Epoch 8/20\n", - "500/500 [==============================] - 3s 6ms/step - loss: 0.5198 - val_loss: 0.4995\n", - "Epoch 9/20\n", - "500/500 [==============================] - 3s 6ms/step - loss: 0.4875 - val_loss: 0.4642\n", - "Epoch 10/20\n", - "500/500 [==============================] - 3s 7ms/step - loss: 0.4586 - val_loss: 0.4288\n", - "Epoch 11/20\n", - "500/500 [==============================] - 3s 6ms/step - loss: 0.4300 - val_loss: 0.3940\n", - "Epoch 12/20\n", - "500/500 [==============================] - 4s 9ms/step - loss: 0.3902 - val_loss: 0.3593\n", - "Epoch 13/20\n", - "500/500 [==============================] - 3s 6ms/step - loss: 0.3676 - val_loss: 0.3279\n", - "Epoch 14/20\n", - "500/500 [==============================] - 3s 6ms/step - loss: 0.3465 - val_loss: 0.3010\n", - "Epoch 15/20\n", - "500/500 [==============================] - 3s 6ms/step - loss: 0.3201 - val_loss: 0.2765\n", - "Epoch 16/20\n", - "500/500 [==============================] - 3s 7ms/step - loss: 0.3088 - val_loss: 0.2555\n", - "Epoch 17/20\n", - "500/500 [==============================] - 5s 10ms/step - loss: 0.2840 - val_loss: 0.2389\n", - "Epoch 18/20\n", - "500/500 [==============================] - 4s 9ms/step - loss: 0.2773 - val_loss: 0.2236\n", - "Epoch 19/20\n", - "500/500 [==============================] - 4s 8ms/step - loss: 0.2587 - val_loss: 0.2123\n", - "Epoch 20/20\n", - "500/500 [==============================] - 5s 10ms/step - loss: 0.2585 - val_loss: 0.2023\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_model.fit(clean_dataset,epochs=20, validation_data= clean_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/2\n", - "500/500 [==============================] - 10s 15ms/step - loss: 0.6919 - val_loss: 0.6830\n", - "Epoch 2/2\n", - "500/500 [==============================] - 4s 8ms/step - loss: 0.6743 - val_loss: 0.6653\n" - ] - } - ], - "source": [ - "with S.scope():\n", - " token_layer, vocab_s = get_vectorization_layer(clean_dataset)\n", - " new_test_model = test_model_loader(embedding_vocab= vocab_s, embedding_dim= 5,\n", - " vectorization_layer=token_layer)\n", - " new_test_model.compile(loss=loss, optimizer=optim)\n", - " new_test_model.fit(clean_dataset,epochs=2, validation_data= clean_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "test_model.fit()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(, )\n" - ] - } - ], - "source": [ - "\"\"\"def vectorize_text(text, label):\n", - " text = tf.expand_dims(text, -1)\n", - " return tokenizer(text), label\n", - "final_dataset = dataset.map(vectorize_text)\n", - "for sample in final_dataset.take(1):\n", - " print(sample)\"\"\"" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.2" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/config/config.yaml b/src/config/config.yaml index 0514019..53af5d0 100644 --- a/src/config/config.yaml +++ b/src/config/config.yaml @@ -23,6 +23,8 @@ params: total_epochs: 1 learning_rate: 0.01 cm_threshold: 0.5 + majority_class_weight : 0 + minority_class_weight : 0 model_name: "11DCNN" diff --git a/src/main.py b/src/main.py index 86e1456..c73c33f 100644 --- a/src/main.py +++ b/src/main.py @@ -34,9 +34,14 @@ # Importing functions and classes from other modules from dataset_loader import get_dataset, get_vectorization_layer from models.model_loader import ModelLoader -from utils.common_utils import (get_device_strategy, plot_confusion_matrix, - plot_precision_recall_curve, - set_mlflow_tracking, set_seed, tensorboard_dir) +from utils.common_utils import ( + get_device_strategy, + plot_confusion_matrix, + plot_precision_recall_curve, + set_mlflow_tracking, + set_seed, + tensorboard_dir, +) from utils.logging import logger @@ -58,6 +63,7 @@ def main(cfg: DictConfig): # Set up MLflow tracking for the experiment experiment_id = set_mlflow_tracking(cfg.model_name) + # Callbacks for model training checkpoint_path = f"artifacts/{cfg.model_name}/model_checkpoints" @@ -95,7 +101,7 @@ def main(cfg: DictConfig): ) logger.info(f"Retrieving the model: {cfg.model_name}") - load_model_func = ModelLoader().get_model(cfg.model_name) + build_model_func = ModelLoader().get_model(cfg.model_name) loss_func = tf.keras.losses.BinaryCrossentropy() optim = tf.keras.optimizers.Adam(learning_rate=cfg.params.learning_rate) f1_score_metrics = tf.keras.metrics.F1Score( @@ -113,15 +119,20 @@ def main(cfg: DictConfig): ): mlflow.set_tag("model_name", cfg.model_name) + # Class weigth + class_weight = { + 0: cfg.params.majority_class_weight, + 1: cfg.params.minority_class_weight, + } + # Training the model within the distributed strategy scope with strategy.scope(): tokenizer, vocab_size = get_vectorization_layer(dataset=train_data) - model = load_model_func( + model = build_model_func( vectorization_layer=tokenizer, embedding_vocab=vocab_size ) model.compile( - loss=loss_func, - optimizer=optim, + loss=loss_func, optimizer=optim, metrics=[f1_score_metrics] ) logger.info( f" Training {cfg.model_name} for {cfg.params.total_epochs} epochs"