diff --git a/.gitignore b/.gitignore
index 66dd292..32b2b4b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,14 +160,14 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
+# miscellaneous
+.gitpod.yml
+**/development/
# Data and models
data/*/*
!.gitkeep
-#devops
-.gitpod.yml
-
# Logs Directories
outputs
mlruns
diff --git a/README.md b/README.md
index 85d993a..d934173 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,23 @@
-# PROJECT / BUSINESS UNDERSTANDING
+# Log Anomaly Detection
+
+
+
+ Table of Contents
+
+ About The Project
+
+ Getting Started
+
+
+ Analysis and Report
+ Contact
+ Acknowledgments
+
+
+
+## About The Project
In the realm of computing, logging involves the process of creating a record detailing events that transpire within a computer system. These events encompass issues, errors, or even informative updates about ongoing operations. These occurrences might manifest within the operating system or other software components. For every such event, a message or entry is documented.
@@ -16,6 +35,54 @@ The data containing the logs was provided. The dataset was in **JSON** format wh
The labels for logs areĀ **"abnormal" and "normal"**
-## Project Objectives
+### Project Objectives
1. To train a machine learning model that can predict whether a given log is an anomaly or normal
+
+### Dataset
+about the dataset
+pic of samples of data
+
+
+## Getting Started
+
+To get a local copy up and running follow these simple example steps.
+
+### Prerequisites
+
+In
+
+```bash
+# Clone this repository
+$ git clone
+
+# Go into the repository
+$ cd
+
+# Install dependencies
+$ make setup
+```
+
+### Runing the program
+
+train.py
+different models
+evalute.py
+
+## Analysis and Report
+
+
(back to top )
+
+
+## Contact
+
+If you have questions or need assistance, feel free to reach out to:
+
+**Name:** **Ipadeola Ezekiel Ladipo**
+**Email:**
+**GitHub:** [@rileydrizzy](https://github.com/rileydrizzy)
+**Linkdeln:** [Ipadeola Ladipo](https://www.linkedin.com/in/ladipo-ipadeola/)
+
+(back to top )
+
+---
diff --git a/dev.txt b/dev.txt
new file mode 100644
index 0000000..913981e
--- /dev/null
+++ b/dev.txt
@@ -0,0 +1,7 @@
+evaluate and analysis c #
+pretrained embedding #
+learning rate scheduler #
+class weight
+github repo
+Confusion Matrix for error analysis c #
+Hyper-tunning of the model to be done
\ No newline at end of file
diff --git a/development/devscript.py b/development/devscript.py
deleted file mode 100644
index 393d90c..0000000
--- a/development/devscript.py
+++ /dev/null
@@ -1,60 +0,0 @@
-## Heruristic Benchmark
-# TODO implement HB pr Dummy Classifier
-import numpy as np
-from sklearn.model_selection import train_test_split
-from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import (
- accuracy_score,
- precision_score,
- recall_score,
- f1_score,
- roc_auc_score,
-)
-
-# Generate synthetic imbalanced data (replace this with your actual data)
-
-minority_class_size = 100
-majority_class_size = 1000
-minority_class = np.random.rand(minority_class_size, 2) + np.array([1, 1])
-majority_class = np.random.rand(majority_class_size, 2)
-
-# Calculate class proportion
-class_proportion = majority_class_size // minority_class_size
-
-# Randomly sample majority class instances
-sampled_majority_class_indices = np.random.choice(
- majority_class_size, minority_class_size * class_proportion, replace=False
-)
-sampled_majority_class = majority_class[sampled_majority_class_indices]
-
-# Combine minority and sampled majority class instances
-balanced_data = np.vstack((minority_class, sampled_majority_class))
-labels = np.hstack(
- (np.ones(minority_class_size), np.zeros(minority_class_size * class_proportion))
-)
-
-# Split data into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(
- balanced_data, labels, test_size=0.2, random_state=42
-)
-
-# Train a logistic regression model
-model = LogisticRegression()
-model.fit(X_train, y_train)
-
-# Predictions
-y_pred = model.predict(X_test)
-
-# Evaluate model performance
-accuracy = accuracy_score(y_test, y_pred)
-precision = precision_score(y_test, y_pred)
-recall = recall_score(y_test, y_pred)
-f1 = f1_score(y_test, y_pred)
-roc_auc = roc_auc_score(y_test, y_pred)
-
-# Print results
-print(f"Accuracy: {accuracy:.2f}")
-print(f"Precision: {precision:.2f}")
-print(f"Recall: {recall:.2f}")
-print(f"F1-Score: {f1:.2f}")
-print(f"AUC-ROC: {roc_auc:.2f}")
diff --git a/development/new_dev.ipynb b/development/new_dev.ipynb
deleted file mode 100644
index 1c5bc9a..0000000
--- a/development/new_dev.ipynb
+++ /dev/null
@@ -1,396 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "from pathlib import Path\n",
- "from time import strftime\n",
- "\n",
- "import matplotlib.pyplot as plt\n",
- "import mlflow\n",
- "import numpy as np\n",
- "import seaborn as sns\n",
- "import tensorflow as tf\n",
- "from sklearn.metrics import auc, confusion_matrix, precision_recall_curve\n",
- "from sklearn.model_selection import train_test_split\n",
- "from tensorflow.keras.layers import Dense\n",
- "from tensorflow.keras.models import Sequential"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Generate some example data (replace this with your actual data)\n",
- "X, y = np.random.rand(1000, 10), np.random.randint(2, size=(1000,))\n",
- "\n",
- "# Split the data into training and testing sets\n",
- "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "25/25 [==============================] - 0s 6ms/step - loss: 0.7012 - accuracy: 0.4888 - val_loss: 0.6993 - val_accuracy: 0.5200\n",
- "7/7 [==============================] - 0s 939us/step\n"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "def plot_confusion_matrix(model, X_test, y_test, threshold=0.5, save_path=None):\n",
- " \"\"\"\n",
- " Generate and plot the confusion matrix for a TensorFlow model.\n",
- "\n",
- " Parameters:\n",
- " - model: The trained TensorFlow model.\n",
- " - X_test: Test features.\n",
- " - y_test: True labels for the test set.\n",
- " - threshold: Decision threshold for binary classification.\n",
- " - save_path: Optional path to save the plot as an image.\n",
- "\n",
- " Returns:\n",
- " - None\n",
- " \"\"\"\n",
- " # Make predictions on the test set\n",
- " y_pred = model.predict(X_test)\n",
- "\n",
- " # Apply threshold for binary classification\n",
- " y_pred_binary = (y_pred > threshold).astype(int)\n",
- "\n",
- " # For binary classification, flatten the true labels\n",
- " y_test = np.reshape(y_test, (-1,))\n",
- "\n",
- " # Calculate the confusion matrix\n",
- " cm = confusion_matrix(y_test, y_pred_binary)\n",
- "\n",
- " # Plot the confusion matrix\n",
- " plt.figure(figsize=(8, 6))\n",
- " sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, annot_kws={\"size\": 14})\n",
- " plt.xlabel('Predicted')\n",
- " plt.ylabel('True')\n",
- " plt.title('Confusion Matrix')\n",
- " plt.xticks([0, 1], ['Predicted Normal', 'Predicted Abr'])\n",
- " plt.yticks([0, 1], ['True 0', 'True 1'])\n",
- " plt.show()\n",
- "\n",
- " return cm\n",
- "\n",
- "# Example usage:\n",
- "# Assuming you have a trained model, test data (X_test, y_test), and a directory to save the plot\n",
- "# Replace placeholders with your actual data and paths\n",
- "\n",
- "# Build a simple binary classification model (replace this with your actual model)\n",
- "model = Sequential([\n",
- " Dense(64, activation='relu', input_shape=(10,)),\n",
- " Dense(1, activation='sigmoid')\n",
- "])\n",
- "\n",
- "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
- "\n",
- "# Train the model (replace this with your actual training process)\n",
- "model.fit(X_train, y_train, epochs=1, batch_size=32, validation_data=(X_test, y_test))\n",
- "\n",
- "save_directory = \"path/to/save/directory\"\n",
- "\n",
- "# Call the function to generate and save the confusion matrix\n",
- "anw = plot_confusion_matrix(model, X_test, y_test, save_path=f\"{save_directory}/confusion_matrix.png\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([[101, 8],\n",
- " [ 68, 23]])"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "anw"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/10\n",
- "25/25 [==============================] - 2s 23ms/step - loss: 0.6976 - accuracy: 0.5188 - val_loss: 0.7043 - val_accuracy: 0.3650\n",
- "Epoch 2/10\n",
- "25/25 [==============================] - 0s 6ms/step - loss: 0.6897 - accuracy: 0.5462 - val_loss: 0.7046 - val_accuracy: 0.4050\n",
- "Epoch 3/10\n",
- "25/25 [==============================] - 0s 7ms/step - loss: 0.6882 - accuracy: 0.5575 - val_loss: 0.7025 - val_accuracy: 0.4450\n",
- "Epoch 4/10\n",
- "25/25 [==============================] - 0s 8ms/step - loss: 0.6871 - accuracy: 0.5550 - val_loss: 0.7036 - val_accuracy: 0.4300\n",
- "Epoch 5/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6859 - accuracy: 0.5713 - val_loss: 0.7053 - val_accuracy: 0.4300\n",
- "Epoch 6/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6847 - accuracy: 0.5900 - val_loss: 0.7054 - val_accuracy: 0.4350\n",
- "Epoch 7/10\n",
- "25/25 [==============================] - 0s 8ms/step - loss: 0.6839 - accuracy: 0.5775 - val_loss: 0.7060 - val_accuracy: 0.4400\n",
- "Epoch 8/10\n",
- "25/25 [==============================] - 0s 8ms/step - loss: 0.6837 - accuracy: 0.5838 - val_loss: 0.7075 - val_accuracy: 0.4500\n",
- "Epoch 9/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6824 - accuracy: 0.5775 - val_loss: 0.7055 - val_accuracy: 0.4450\n",
- "Epoch 10/10\n",
- "25/25 [==============================] - 0s 8ms/step - loss: 0.6814 - accuracy: 0.5750 - val_loss: 0.7079 - val_accuracy: 0.4450\n",
- "7/7 [==============================] - 0s 2ms/step\n"
- ]
- },
- {
- "ename": "FileNotFoundError",
- "evalue": "[Errno 2] No such file or directory: 'path/to/save/directory/precision_recall_curve.png'",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[1;32mIn[2], line 72\u001b[0m\n\u001b[0;32m 69\u001b[0m save_directory \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpath/to/save/directory\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 71\u001b[0m \u001b[38;5;66;03m# Call the function to generate and save the precision-recall curve\u001b[39;00m\n\u001b[1;32m---> 72\u001b[0m \u001b[43mplot_precision_recall_curve\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msave_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43msave_directory\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/precision_recall_curve.png\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
- "Cell \u001b[1;32mIn[2], line 46\u001b[0m, in \u001b[0;36mplot_precision_recall_curve\u001b[1;34m(model, X_test, y_test, save_path)\u001b[0m\n\u001b[0;32m 44\u001b[0m \u001b[38;5;66;03m# Save the plot if save_path is provided\u001b[39;00m\n\u001b[0;32m 45\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m save_path:\n\u001b[1;32m---> 46\u001b[0m \u001b[43mplt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[43msave_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 47\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision-Recall Curve saved at: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msave_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 48\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\pyplot.py:1023\u001b[0m, in \u001b[0;36msavefig\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 1020\u001b[0m \u001b[38;5;129m@_copy_docstring_and_deprecators\u001b[39m(Figure\u001b[38;5;241m.\u001b[39msavefig)\n\u001b[0;32m 1021\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msavefig\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 1022\u001b[0m fig \u001b[38;5;241m=\u001b[39m gcf()\n\u001b[1;32m-> 1023\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1024\u001b[0m fig\u001b[38;5;241m.\u001b[39mcanvas\u001b[38;5;241m.\u001b[39mdraw_idle() \u001b[38;5;66;03m# Need this if 'transparent=True', to reset colors.\u001b[39;00m\n\u001b[0;32m 1025\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\figure.py:3343\u001b[0m, in \u001b[0;36mFigure.savefig\u001b[1;34m(self, fname, transparent, **kwargs)\u001b[0m\n\u001b[0;32m 3339\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes:\n\u001b[0;32m 3340\u001b[0m stack\u001b[38;5;241m.\u001b[39menter_context(\n\u001b[0;32m 3341\u001b[0m ax\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39m_cm_set(facecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m, edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m-> 3343\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprint_figure\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backend_bases.py:2366\u001b[0m, in \u001b[0;36mFigureCanvasBase.print_figure\u001b[1;34m(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)\u001b[0m\n\u001b[0;32m 2362\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2363\u001b[0m \u001b[38;5;66;03m# _get_renderer may change the figure dpi (as vector formats\u001b[39;00m\n\u001b[0;32m 2364\u001b[0m \u001b[38;5;66;03m# force the figure dpi to 72), so we need to set it again here.\u001b[39;00m\n\u001b[0;32m 2365\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m cbook\u001b[38;5;241m.\u001b[39m_setattr_cm(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, dpi\u001b[38;5;241m=\u001b[39mdpi):\n\u001b[1;32m-> 2366\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mprint_method\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2367\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2368\u001b[0m \u001b[43m \u001b[49m\u001b[43mfacecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfacecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2369\u001b[0m \u001b[43m \u001b[49m\u001b[43medgecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43medgecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2370\u001b[0m \u001b[43m \u001b[49m\u001b[43morientation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morientation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2371\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox_inches_restore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_bbox_inches_restore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2372\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2373\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 2374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches \u001b[38;5;129;01mand\u001b[39;00m restore_bbox:\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backend_bases.py:2232\u001b[0m, in \u001b[0;36mFigureCanvasBase._switch_canvas_and_return_print_method..\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 2228\u001b[0m optional_kws \u001b[38;5;241m=\u001b[39m { \u001b[38;5;66;03m# Passed by print_figure for other renderers.\u001b[39;00m\n\u001b[0;32m 2229\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124medgecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morientation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 2230\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbbox_inches_restore\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[0;32m 2231\u001b[0m skip \u001b[38;5;241m=\u001b[39m optional_kws \u001b[38;5;241m-\u001b[39m {\u001b[38;5;241m*\u001b[39minspect\u001b[38;5;241m.\u001b[39msignature(meth)\u001b[38;5;241m.\u001b[39mparameters}\n\u001b[1;32m-> 2232\u001b[0m print_method \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mwraps(meth)(\u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2233\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mskip\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 2234\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# Let third-parties do as they see fit.\u001b[39;00m\n\u001b[0;32m 2235\u001b[0m print_method \u001b[38;5;241m=\u001b[39m meth\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backends\\backend_agg.py:509\u001b[0m, in \u001b[0;36mFigureCanvasAgg.print_png\u001b[1;34m(self, filename_or_obj, metadata, pil_kwargs)\u001b[0m\n\u001b[0;32m 462\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprint_png\u001b[39m(\u001b[38;5;28mself\u001b[39m, filename_or_obj, \u001b[38;5;241m*\u001b[39m, metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, pil_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 463\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 464\u001b[0m \u001b[38;5;124;03m Write the figure to a PNG file.\u001b[39;00m\n\u001b[0;32m 465\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 507\u001b[0m \u001b[38;5;124;03m *metadata*, including the default 'Software' key.\u001b[39;00m\n\u001b[0;32m 508\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 509\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_print_pil\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpng\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backends\\backend_agg.py:458\u001b[0m, in \u001b[0;36mFigureCanvasAgg._print_pil\u001b[1;34m(self, filename_or_obj, fmt, pil_kwargs, metadata)\u001b[0m\n\u001b[0;32m 453\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 454\u001b[0m \u001b[38;5;124;03mDraw the canvas, then save it using `.image.imsave` (to which\u001b[39;00m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;124;03m*pil_kwargs* and *metadata* are forwarded).\u001b[39;00m\n\u001b[0;32m 456\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 457\u001b[0m FigureCanvasAgg\u001b[38;5;241m.\u001b[39mdraw(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m--> 458\u001b[0m \u001b[43mmpl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimsave\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuffer_rgba\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morigin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mupper\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43mdpi\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdpi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\image.py:1689\u001b[0m, in \u001b[0;36mimsave\u001b[1;34m(fname, arr, vmin, vmax, cmap, format, origin, dpi, metadata, pil_kwargs)\u001b[0m\n\u001b[0;32m 1687\u001b[0m pil_kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mformat\u001b[39m)\n\u001b[0;32m 1688\u001b[0m pil_kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, (dpi, dpi))\n\u001b[1;32m-> 1689\u001b[0m \u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\PIL\\Image.py:2428\u001b[0m, in \u001b[0;36mImage.save\u001b[1;34m(self, fp, format, **params)\u001b[0m\n\u001b[0;32m 2426\u001b[0m fp \u001b[38;5;241m=\u001b[39m builtins\u001b[38;5;241m.\u001b[39mopen(filename, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr+b\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 2427\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2428\u001b[0m fp \u001b[38;5;241m=\u001b[39m \u001b[43mbuiltins\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mw+b\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2430\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2431\u001b[0m save_handler(\u001b[38;5;28mself\u001b[39m, fp, filename)\n",
- "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'path/to/save/directory/precision_recall_curve.png'"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "import matplotlib.pyplot as plt\n",
- "import numpy as np\n",
- "import tensorflow as tf\n",
- "from sklearn.metrics import auc, precision_recall_curve\n",
- "from sklearn.model_selection import train_test_split\n",
- "from tensorflow.keras.layers import Dense\n",
- "from tensorflow.keras.models import Sequential\n",
- "\n",
- "\n",
- "def plot_precision_recall_curve(model, X_test, y_test, save_path=None):\n",
- " \"\"\"\n",
- " Generate and plot the precision-recall curve for a TensorFlow model.\n",
- "\n",
- " Parameters:\n",
- " - model: The trained TensorFlow model.\n",
- " - X_test: Test features.\n",
- " - y_test: True labels for the test set.\n",
- " - save_path: Optional path to save the plot as an image.\n",
- "\n",
- " Returns:\n",
- " - None\n",
- " \"\"\"\n",
- " # Make predictions on the test set\n",
- " y_pred = model.predict(X_test)\n",
- "\n",
- " # For binary classification, flatten the true labels\n",
- " y_test = np.reshape(y_test, (-1,))\n",
- " y_pred = np.reshape(y_pred, (-1,))\n",
- "\n",
- " # Calculate precision and recall\n",
- " precision, recall, thresholds = precision_recall_curve(y_test, y_pred)\n",
- "\n",
- " # Calculate the area under the precision-recall curve\n",
- " auc_score = auc(recall, precision)\n",
- "\n",
- " # Plot the precision-recall curve\n",
- " plt.figure(figsize=(8, 6))\n",
- " plt.plot(recall, precision, label=f'Precision-Recall Curve (AUC = {auc_score:.2f})', color='b')\n",
- " plt.xlabel('Recall')\n",
- " plt.ylabel('Precision')\n",
- " plt.title('Precision-Recall Curve')\n",
- " plt.legend(loc='upper right')\n",
- " plt.grid(True)\n",
- "\n",
- " # Save the plot if save_path is provided\n",
- " if save_path:\n",
- " plt.savefig(save_path)\n",
- " print(f\"Precision-Recall Curve saved at: {save_path}\")\n",
- " else:\n",
- " plt.show()\n",
- "\n",
- "# Generate some example data (replace this with your actual data)\n",
- "X, y = np.random.rand(1000, 10), np.random.randint(2, size=(1000,))\n",
- "\n",
- "# Split the data into training and testing sets\n",
- "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
- "\n",
- "# Build a simple binary classification model (replace this with your actual model)\n",
- "model = Sequential([\n",
- " Dense(64, activation='relu', input_shape=(10,)),\n",
- " Dense(1, activation='sigmoid')\n",
- "])\n",
- "\n",
- "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
- "\n",
- "# Train the model (replace this with your actual training process)\n",
- "model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))\n",
- "\n",
- "# Specify the directory to save the plot\n",
- "save_directory = \"path/to/save/directory\"\n",
- "\n",
- "# Call the function to generate and save the precision-recall curve\n",
- "plot_precision_recall_curve(model, X_test, y_test, save_path=f\"{save_directory}/precision_recall_curve.png\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/10\n",
- "25/25 [==============================] - 3s 38ms/step - loss: 0.7014 - accuracy: 0.4863 - val_loss: 0.7011 - val_accuracy: 0.4850\n",
- "Epoch 2/10\n",
- "25/25 [==============================] - 0s 10ms/step - loss: 0.6968 - accuracy: 0.4888 - val_loss: 0.6990 - val_accuracy: 0.4650\n",
- "Epoch 3/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6937 - accuracy: 0.5188 - val_loss: 0.6979 - val_accuracy: 0.4550\n",
- "Epoch 4/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6936 - accuracy: 0.4975 - val_loss: 0.6979 - val_accuracy: 0.4750\n",
- "Epoch 5/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6907 - accuracy: 0.5250 - val_loss: 0.6979 - val_accuracy: 0.4950\n",
- "Epoch 6/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6892 - accuracy: 0.5337 - val_loss: 0.7022 - val_accuracy: 0.4900\n",
- "Epoch 7/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6874 - accuracy: 0.5375 - val_loss: 0.6984 - val_accuracy: 0.4850\n",
- "Epoch 8/10\n",
- "25/25 [==============================] - 0s 9ms/step - loss: 0.6860 - accuracy: 0.5512 - val_loss: 0.7012 - val_accuracy: 0.4750\n",
- "Epoch 9/10\n",
- "25/25 [==============================] - 0s 8ms/step - loss: 0.6863 - accuracy: 0.5462 - val_loss: 0.7008 - val_accuracy: 0.4850\n",
- "Epoch 10/10\n",
- "25/25 [==============================] - 0s 7ms/step - loss: 0.6846 - accuracy: 0.5362 - val_loss: 0.7001 - val_accuracy: 0.4900\n",
- "7/7 [==============================] - 0s 3ms/step\n"
- ]
- },
- {
- "ename": "FileNotFoundError",
- "evalue": "[Errno 2] No such file or directory: 'path/to/save/directory/precision_recall_curve_with_thresholds.png'",
- "output_type": "error",
- "traceback": [
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
- "Cell \u001b[1;32mIn[3], line 76\u001b[0m\n\u001b[0;32m 73\u001b[0m save_directory \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpath/to/save/directory\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 75\u001b[0m \u001b[38;5;66;03m# Call the function to generate and save the precision-recall curve with threshold points\u001b[39;00m\n\u001b[1;32m---> 76\u001b[0m \u001b[43mplot_precision_recall_curve\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_test\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msave_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43msave_directory\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m/precision_recall_curve_with_thresholds.png\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
- "Cell \u001b[1;32mIn[3], line 52\u001b[0m, in \u001b[0;36mplot_precision_recall_curve\u001b[1;34m(model, X_test, y_test, save_path)\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[38;5;66;03m# Save the plot if save_path is provided\u001b[39;00m\n\u001b[0;32m 51\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m save_path:\n\u001b[1;32m---> 52\u001b[0m \u001b[43mplt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[43msave_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 53\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision-Recall Curve saved at: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00msave_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 54\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\pyplot.py:1023\u001b[0m, in \u001b[0;36msavefig\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 1020\u001b[0m \u001b[38;5;129m@_copy_docstring_and_deprecators\u001b[39m(Figure\u001b[38;5;241m.\u001b[39msavefig)\n\u001b[0;32m 1021\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msavefig\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m 1022\u001b[0m fig \u001b[38;5;241m=\u001b[39m gcf()\n\u001b[1;32m-> 1023\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msavefig\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1024\u001b[0m fig\u001b[38;5;241m.\u001b[39mcanvas\u001b[38;5;241m.\u001b[39mdraw_idle() \u001b[38;5;66;03m# Need this if 'transparent=True', to reset colors.\u001b[39;00m\n\u001b[0;32m 1025\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\figure.py:3343\u001b[0m, in \u001b[0;36mFigure.savefig\u001b[1;34m(self, fname, transparent, **kwargs)\u001b[0m\n\u001b[0;32m 3339\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ax \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes:\n\u001b[0;32m 3340\u001b[0m stack\u001b[38;5;241m.\u001b[39menter_context(\n\u001b[0;32m 3341\u001b[0m ax\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39m_cm_set(facecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m, edgecolor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m-> 3343\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprint_figure\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backend_bases.py:2366\u001b[0m, in \u001b[0;36mFigureCanvasBase.print_figure\u001b[1;34m(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)\u001b[0m\n\u001b[0;32m 2362\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2363\u001b[0m \u001b[38;5;66;03m# _get_renderer may change the figure dpi (as vector formats\u001b[39;00m\n\u001b[0;32m 2364\u001b[0m \u001b[38;5;66;03m# force the figure dpi to 72), so we need to set it again here.\u001b[39;00m\n\u001b[0;32m 2365\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m cbook\u001b[38;5;241m.\u001b[39m_setattr_cm(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, dpi\u001b[38;5;241m=\u001b[39mdpi):\n\u001b[1;32m-> 2366\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mprint_method\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2367\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2368\u001b[0m \u001b[43m \u001b[49m\u001b[43mfacecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfacecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2369\u001b[0m \u001b[43m \u001b[49m\u001b[43medgecolor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43medgecolor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2370\u001b[0m \u001b[43m \u001b[49m\u001b[43morientation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43morientation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2371\u001b[0m \u001b[43m \u001b[49m\u001b[43mbbox_inches_restore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_bbox_inches_restore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 2372\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2373\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 2374\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches \u001b[38;5;129;01mand\u001b[39;00m restore_bbox:\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backend_bases.py:2232\u001b[0m, in \u001b[0;36mFigureCanvasBase._switch_canvas_and_return_print_method..\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 2228\u001b[0m optional_kws \u001b[38;5;241m=\u001b[39m { \u001b[38;5;66;03m# Passed by print_figure for other renderers.\u001b[39;00m\n\u001b[0;32m 2229\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124medgecolor\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morientation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 2230\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbbox_inches_restore\u001b[39m\u001b[38;5;124m\"\u001b[39m}\n\u001b[0;32m 2231\u001b[0m skip \u001b[38;5;241m=\u001b[39m optional_kws \u001b[38;5;241m-\u001b[39m {\u001b[38;5;241m*\u001b[39minspect\u001b[38;5;241m.\u001b[39msignature(meth)\u001b[38;5;241m.\u001b[39mparameters}\n\u001b[1;32m-> 2232\u001b[0m print_method \u001b[38;5;241m=\u001b[39m functools\u001b[38;5;241m.\u001b[39mwraps(meth)(\u001b[38;5;28;01mlambda\u001b[39;00m \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: \u001b[43mmeth\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 2233\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mskip\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 2234\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m: \u001b[38;5;66;03m# Let third-parties do as they see fit.\u001b[39;00m\n\u001b[0;32m 2235\u001b[0m print_method \u001b[38;5;241m=\u001b[39m meth\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backends\\backend_agg.py:509\u001b[0m, in \u001b[0;36mFigureCanvasAgg.print_png\u001b[1;34m(self, filename_or_obj, metadata, pil_kwargs)\u001b[0m\n\u001b[0;32m 462\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprint_png\u001b[39m(\u001b[38;5;28mself\u001b[39m, filename_or_obj, \u001b[38;5;241m*\u001b[39m, metadata\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, pil_kwargs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m 463\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 464\u001b[0m \u001b[38;5;124;03m Write the figure to a PNG file.\u001b[39;00m\n\u001b[0;32m 465\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 507\u001b[0m \u001b[38;5;124;03m *metadata*, including the default 'Software' key.\u001b[39;00m\n\u001b[0;32m 508\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 509\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_print_pil\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpng\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\backends\\backend_agg.py:458\u001b[0m, in \u001b[0;36mFigureCanvasAgg._print_pil\u001b[1;34m(self, filename_or_obj, fmt, pil_kwargs, metadata)\u001b[0m\n\u001b[0;32m 453\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 454\u001b[0m \u001b[38;5;124;03mDraw the canvas, then save it using `.image.imsave` (to which\u001b[39;00m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;124;03m*pil_kwargs* and *metadata* are forwarded).\u001b[39;00m\n\u001b[0;32m 456\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 457\u001b[0m FigureCanvasAgg\u001b[38;5;241m.\u001b[39mdraw(\u001b[38;5;28mself\u001b[39m)\n\u001b[1;32m--> 458\u001b[0m \u001b[43mmpl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimsave\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename_or_obj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuffer_rgba\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morigin\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mupper\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43mdpi\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdpi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpil_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\matplotlib\\image.py:1689\u001b[0m, in \u001b[0;36mimsave\u001b[1;34m(fname, arr, vmin, vmax, cmap, format, origin, dpi, metadata, pil_kwargs)\u001b[0m\n\u001b[0;32m 1687\u001b[0m pil_kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mformat\u001b[39m)\n\u001b[0;32m 1688\u001b[0m pil_kwargs\u001b[38;5;241m.\u001b[39msetdefault(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdpi\u001b[39m\u001b[38;5;124m\"\u001b[39m, (dpi, dpi))\n\u001b[1;32m-> 1689\u001b[0m \u001b[43mimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mpil_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
- "File \u001b[1;32mc:\\Users\\Yinka\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\PIL\\Image.py:2428\u001b[0m, in \u001b[0;36mImage.save\u001b[1;34m(self, fp, format, **params)\u001b[0m\n\u001b[0;32m 2426\u001b[0m fp \u001b[38;5;241m=\u001b[39m builtins\u001b[38;5;241m.\u001b[39mopen(filename, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr+b\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 2427\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2428\u001b[0m fp \u001b[38;5;241m=\u001b[39m \u001b[43mbuiltins\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopen\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mw+b\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2430\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 2431\u001b[0m save_handler(\u001b[38;5;28mself\u001b[39m, fp, filename)\n",
- "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'path/to/save/directory/precision_recall_curve_with_thresholds.png'"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "# Example usage:\n",
- "# Assuming you have a trained model, test data (X_test, y_test), and a directory to save the plot\n",
- "# Replace placeholders with your actual data and paths\n",
- "\n",
- "# Build a simple binary classification model (replace this with your actual model)\n",
- "model = Sequential([\n",
- " Dense(64, activation='relu', input_shape=(10,)),\n",
- " Dense(1, activation='sigmoid')\n",
- "])\n",
- "\n",
- "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
- "\n",
- "# Train the model (replace this with your actual training process)\n",
- "model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))\n",
- "\n",
- "# Specify the directory to save the plot\n",
- "save_directory = \"path/to/save/directory\"\n",
- "\n",
- "# Call the function to generate and save the precision-recall curve with threshold points\n",
- "plot_precision_recall_curve(model, X_test, y_test, save_path=f\"{save_directory}/precision_recall_curve_with_thresholds.png\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.2"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/development/train_dev.ipynb b/development/train_dev.ipynb
deleted file mode 100644
index 26c0369..0000000
--- a/development/train_dev.ipynb
+++ /dev/null
@@ -1,568 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "from pathlib import Path\n",
- "from time import strftime\n",
- "\n",
- "import mlflow\n",
- "import pandas as pd\n",
- "import tensorflow as tf"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'c:\\\\Main Workspace\\\\LogAnomalyDetect\\\\src'"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pwd()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "c:\\Main Workspace\\LogAnomalyDetect\\src\n"
- ]
- }
- ],
- "source": [
- "%cd src"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "[nltk_data] Downloading package stopwords to\n",
- "[nltk_data] C:\\Users\\Yinka\\AppData\\Roaming\\nltk_data...\n",
- "[nltk_data] Package stopwords is already up-to-date!\n"
- ]
- }
- ],
- "source": [
- "from dataset_loader import get_dataset, preprocess_and_encode, get_vectorization_layer\n",
- "from utils.common_utils import get_device_strategy\n",
- "from src.utils.common_utils import set_seed"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Random seed set as 42\n"
- ]
- }
- ],
- "source": [
- "set_seed()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [],
- "source": [
- "data_path = 'development/dev.gzip'\n",
- "clean_dev = \"development/clean_dev.gzip\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Log \n",
- " Target \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 \n",
- " 1119803499 2005.06.26 R03-M0-NC-C:J03-U01 200... \n",
- " normal \n",
- " \n",
- " \n",
- " 1 \n",
- " 1119803105 2005.06.26 R04-M0-NE-C:J06-U11 200... \n",
- " normal \n",
- " \n",
- " \n",
- " 2 \n",
- " 1121496169 2005.07.15 R06-M0-N6-C:J03-U01 200... \n",
- " normal \n",
- " \n",
- " \n",
- " 3 \n",
- " 1120968564 2005.07.09 R26-M0-N0-C:J03-U01 200... \n",
- " normal \n",
- " \n",
- " \n",
- " 4 \n",
- " 1120953205 2005.07.09 R27-M0-N7-C:J11-U01 200... \n",
- " normal \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Log Target\n",
- "0 1119803499 2005.06.26 R03-M0-NC-C:J03-U01 200... normal\n",
- "1 1119803105 2005.06.26 R04-M0-NE-C:J06-U11 200... normal\n",
- "2 1121496169 2005.07.15 R06-M0-N6-C:J03-U01 200... normal\n",
- "3 1120968564 2005.07.09 R26-M0-N0-C:J03-U01 200... normal\n",
- "4 1120953205 2005.07.09 R27-M0-N7-C:J11-U01 200... normal"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df = pd.read_parquet(data_path)\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(1000, 2)"
- ]
- },
- "execution_count": 10,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [],
- "source": [
- "#preprocess_and_encode(data_path, \"development/clean_dev.gzip\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
- "outputs": [],
- "source": [
- "df_dataset = get_dataset(data_path)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(, )\n",
- "(, )\n",
- "(, )\n"
- ]
- }
- ],
- "source": [
- "for idx in df_dataset.take(3):\n",
- " print(idx)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [],
- "source": [
- "clean_dataset = get_dataset(clean_dev)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(, )\n",
- "(, )\n"
- ]
- }
- ],
- "source": [
- "for idx in clean_dataset.take(2):\n",
- " print(idx)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "metadata": {},
- "outputs": [],
- "source": [
- "token_layer, vocab_s = get_vectorization_layer(clean_dataset)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "420"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "vocab_s"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "metadata": {},
- "outputs": [],
- "source": [
- "a = token_layer(\"k e r n e l i n f o\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "a"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "1"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "S = get_device_strategy()\n",
- "S.num_replicas_in_sync"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "metadata": {},
- "outputs": [],
- "source": [
- "from src.models.model_loader import ModelLoader"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "metadata": {},
- "outputs": [],
- "source": [
- "loader = ModelLoader()\n",
- "test_model_loader = loader.get_model(\"cnn\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [],
- "source": [
- "test_model = test_model_loader(embedding_vocab= vocab_s, embedding_dim= 5,\n",
- " vectorization_layer=token_layer)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 44,
- "metadata": {},
- "outputs": [],
- "source": [
- "loss = tf.keras.losses.BinaryCrossentropy()\n",
- "optim = tf.keras.optimizers.Adadelta(learning_rate=0.01)\n",
- "tensorb = tf.keras.callbacks.TensorBoard()\n",
- "f1_score = tf.keras.metrics.F1Score()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [],
- "source": [
- "test_model.compile(loss=loss, optimizer=optim)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/20\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "500/500 [==============================] - 6s 9ms/step - loss: 0.6863 - val_loss: 0.6790\n",
- "Epoch 2/20\n",
- "500/500 [==============================] - 4s 7ms/step - loss: 0.6715 - val_loss: 0.6627\n",
- "Epoch 3/20\n",
- "500/500 [==============================] - 3s 7ms/step - loss: 0.6536 - val_loss: 0.6418\n",
- "Epoch 4/20\n",
- "500/500 [==============================] - 3s 7ms/step - loss: 0.6304 - val_loss: 0.6179\n",
- "Epoch 5/20\n",
- "500/500 [==============================] - 3s 7ms/step - loss: 0.6084 - val_loss: 0.5926\n",
- "Epoch 6/20\n",
- "500/500 [==============================] - 4s 7ms/step - loss: 0.5784 - val_loss: 0.5634\n",
- "Epoch 7/20\n",
- "500/500 [==============================] - 4s 7ms/step - loss: 0.5538 - val_loss: 0.5329\n",
- "Epoch 8/20\n",
- "500/500 [==============================] - 3s 6ms/step - loss: 0.5198 - val_loss: 0.4995\n",
- "Epoch 9/20\n",
- "500/500 [==============================] - 3s 6ms/step - loss: 0.4875 - val_loss: 0.4642\n",
- "Epoch 10/20\n",
- "500/500 [==============================] - 3s 7ms/step - loss: 0.4586 - val_loss: 0.4288\n",
- "Epoch 11/20\n",
- "500/500 [==============================] - 3s 6ms/step - loss: 0.4300 - val_loss: 0.3940\n",
- "Epoch 12/20\n",
- "500/500 [==============================] - 4s 9ms/step - loss: 0.3902 - val_loss: 0.3593\n",
- "Epoch 13/20\n",
- "500/500 [==============================] - 3s 6ms/step - loss: 0.3676 - val_loss: 0.3279\n",
- "Epoch 14/20\n",
- "500/500 [==============================] - 3s 6ms/step - loss: 0.3465 - val_loss: 0.3010\n",
- "Epoch 15/20\n",
- "500/500 [==============================] - 3s 6ms/step - loss: 0.3201 - val_loss: 0.2765\n",
- "Epoch 16/20\n",
- "500/500 [==============================] - 3s 7ms/step - loss: 0.3088 - val_loss: 0.2555\n",
- "Epoch 17/20\n",
- "500/500 [==============================] - 5s 10ms/step - loss: 0.2840 - val_loss: 0.2389\n",
- "Epoch 18/20\n",
- "500/500 [==============================] - 4s 9ms/step - loss: 0.2773 - val_loss: 0.2236\n",
- "Epoch 19/20\n",
- "500/500 [==============================] - 4s 8ms/step - loss: 0.2587 - val_loss: 0.2123\n",
- "Epoch 20/20\n",
- "500/500 [==============================] - 5s 10ms/step - loss: 0.2585 - val_loss: 0.2023\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- ""
- ]
- },
- "execution_count": 42,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "test_model.fit(clean_dataset,epochs=20, validation_data= clean_dataset)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/2\n",
- "500/500 [==============================] - 10s 15ms/step - loss: 0.6919 - val_loss: 0.6830\n",
- "Epoch 2/2\n",
- "500/500 [==============================] - 4s 8ms/step - loss: 0.6743 - val_loss: 0.6653\n"
- ]
- }
- ],
- "source": [
- "with S.scope():\n",
- " token_layer, vocab_s = get_vectorization_layer(clean_dataset)\n",
- " new_test_model = test_model_loader(embedding_vocab= vocab_s, embedding_dim= 5,\n",
- " vectorization_layer=token_layer)\n",
- " new_test_model.compile(loss=loss, optimizer=optim)\n",
- " new_test_model.fit(clean_dataset,epochs=2, validation_data= clean_dataset)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "test_model.fit()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(, )\n"
- ]
- }
- ],
- "source": [
- "\"\"\"def vectorize_text(text, label):\n",
- " text = tf.expand_dims(text, -1)\n",
- " return tokenizer(text), label\n",
- "final_dataset = dataset.map(vectorize_text)\n",
- "for sample in final_dataset.take(1):\n",
- " print(sample)\"\"\""
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "env",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.2"
- },
- "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/config/config.yaml b/src/config/config.yaml
index 0514019..53af5d0 100644
--- a/src/config/config.yaml
+++ b/src/config/config.yaml
@@ -23,6 +23,8 @@ params:
total_epochs: 1
learning_rate: 0.01
cm_threshold: 0.5
+ majority_class_weight : 0
+ minority_class_weight : 0
model_name:
"11DCNN"
diff --git a/src/main.py b/src/main.py
index 86e1456..c73c33f 100644
--- a/src/main.py
+++ b/src/main.py
@@ -34,9 +34,14 @@
# Importing functions and classes from other modules
from dataset_loader import get_dataset, get_vectorization_layer
from models.model_loader import ModelLoader
-from utils.common_utils import (get_device_strategy, plot_confusion_matrix,
- plot_precision_recall_curve,
- set_mlflow_tracking, set_seed, tensorboard_dir)
+from utils.common_utils import (
+ get_device_strategy,
+ plot_confusion_matrix,
+ plot_precision_recall_curve,
+ set_mlflow_tracking,
+ set_seed,
+ tensorboard_dir,
+)
from utils.logging import logger
@@ -58,6 +63,7 @@ def main(cfg: DictConfig):
# Set up MLflow tracking for the experiment
experiment_id = set_mlflow_tracking(cfg.model_name)
+
# Callbacks for model training
checkpoint_path = f"artifacts/{cfg.model_name}/model_checkpoints"
@@ -95,7 +101,7 @@ def main(cfg: DictConfig):
)
logger.info(f"Retrieving the model: {cfg.model_name}")
- load_model_func = ModelLoader().get_model(cfg.model_name)
+ build_model_func = ModelLoader().get_model(cfg.model_name)
loss_func = tf.keras.losses.BinaryCrossentropy()
optim = tf.keras.optimizers.Adam(learning_rate=cfg.params.learning_rate)
f1_score_metrics = tf.keras.metrics.F1Score(
@@ -113,15 +119,20 @@ def main(cfg: DictConfig):
):
mlflow.set_tag("model_name", cfg.model_name)
+ # Class weigth
+ class_weight = {
+ 0: cfg.params.majority_class_weight,
+ 1: cfg.params.minority_class_weight,
+ }
+
# Training the model within the distributed strategy scope
with strategy.scope():
tokenizer, vocab_size = get_vectorization_layer(dataset=train_data)
- model = load_model_func(
+ model = build_model_func(
vectorization_layer=tokenizer, embedding_vocab=vocab_size
)
model.compile(
- loss=loss_func,
- optimizer=optim,
+ loss=loss_func, optimizer=optim, metrics=[f1_score_metrics]
)
logger.info(
f" Training {cfg.model_name} for {cfg.params.total_epochs} epochs"