From d977d1dd0e6e2e6040c981848dc34763d59357cd Mon Sep 17 00:00:00 2001
From: Sasi Bonu <37780704+sasibonu@users.noreply.github.com>
Date: Sat, 4 May 2024 16:36:00 -0600
Subject: [PATCH] Delete .ipynb_checkpoints directory

---
 .../SasiBonuA04-checkpoint.ipynb              | 627 ------------------
 1 file changed, 627 deletions(-)
 delete mode 100644 .ipynb_checkpoints/SasiBonuA04-checkpoint.ipynb

diff --git a/.ipynb_checkpoints/SasiBonuA04-checkpoint.ipynb b/.ipynb_checkpoints/SasiBonuA04-checkpoint.ipynb
deleted file mode 100644
index e88ea75..0000000
--- a/.ipynb_checkpoints/SasiBonuA04-checkpoint.ipynb
+++ /dev/null
@@ -1,627 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from datasets import load_dataset\n",
-    "import matplotlib.pyplot as plt\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "import seaborn as sns\n",
-    "\n",
-    "from sklearn.feature_extraction.text import CountVectorizer\n",
-    "from sklearn.svm import SVC\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "from sklearn.metrics import confusion_matrix"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset_en_hi = load_dataset(\"open_subtitles\", \"en-hi\")\n",
-    "dataset_da_ru = load_dataset(\"open_subtitles\", \"da-ru\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_en_hi = dataset_en_hi[\"train\"]\n",
-    "data_da_ru = dataset_da_ru[\"train\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': ['0', '1'],\n",
-       " 'meta': [{'year': 1948,\n",
-       "   'imdbId': 40522,\n",
-       "   'subtitleId': {'en': 4180294, 'hi': 4239106},\n",
-       "   'sentenceIds': {'en': [1], 'hi': [1]}},\n",
-       "  {'year': 1948,\n",
-       "   'imdbId': 40522,\n",
-       "   'subtitleId': {'en': 4180294, 'hi': 4239106},\n",
-       "   'sentenceIds': {'en': [2], 'hi': [2]}}],\n",
-       " 'translation': [{'en': 'THE BICYCLE THIEF', 'hi': 'साइकिल चोर'},\n",
-       "  {'en': 'Ricci?', 'hi': 'रिच्ची?'}]}"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "data_en_hi[0:2]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_en_hi = data_en_hi[:len(data_en_hi)//4]  # Get the first half of the data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.feature_extraction.text import CountVectorizer\n",
-    "from sklearn.svm import SVC\n",
-    "from sklearn.model_selection import GridSearchCV\n",
-    "\n",
-    "# Extract English subtitles and their corresponding Hindi translations\n",
-    "english_subtitles = [item['en'] for item in data_en_hi['translation']]\n",
-    "hindi_translations = [item['hi'] for item in data_en_hi['translation']]\n",
-    "merged_subtitles = english_subtitles + hindi_translations\n",
-    "labels = [0] * len(english_subtitles) + [1] * len(hindi_translations)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Convert text data into numerical features using CountVectorizer\n",
-    "vectorizer = CountVectorizer()\n",
-    "X = vectorizer.fit_transform(merged_subtitles)\n",
-    "y = labels"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = SVC(C=1, kernel='linear')  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "IOPub data rate exceeded.\n",
-      "The Jupyter server will temporarily stop sending output\n",
-      "to the client in order to avoid crashing it.\n",
-      "To change this limit, set the config variable\n",
-      "`--ServerApp.iopub_data_rate_limit`.\n",
-      "\n",
-      "Current values:\n",
-      "ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)\n",
-      "ServerApp.rate_limit_window=3.0 (secs)\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Check the unique classes in Hindi translations\n",
-    "unique_classes = set(hindi_translations)\n",
-    "print(\"Unique classes:\", unique_classes)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def prepare_labels(data):\n",
-    "  \"\"\"\n",
-    "  This function assigns labels based on the presence of keys 'en' and 'hi'.\n",
-    "  \"\"\"\n",
-    "  labels = []\n",
-    "  for item in data['translation']:\n",
-    "    if 'en' in item:\n",
-    "      labels.append(1)  # Label 1 if 'en' key exists\n",
-    "    elif 'hi' in item and 'en' not in item:\n",
-    "      labels.append(0)  # Label 0 if 'hi' exists (but not 'en')\n",
-    "    else:\n",
-    "      # Handle cases where neither 'en' nor 'hi' exist (modify as needed)\n",
-    "      labels.append(-1)  # Example: assign -1 for undefined label\n",
-    "  return labels\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "text = []\n",
-    "\n",
-    "for item in data_en_hi['translation']:\n",
-    "  text.append(item['en'])  \n",
-    "\n",
-    "labels = prepare_labels(data_en_hi)\n",
-    "vectorizer = CountVectorizer()\n",
-    "\n",
-    "if len(text) != len(labels):\n",
-    "    raise ValueError(\"Number of sentences and labels don't match!\")\n",
-    "\n",
-    "X = vectorizer.fit_transform(text)\n",
-    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValueError",
-     "evalue": "The number of classes has to be greater than one; got 1 class",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[13], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m model \u001b[38;5;241m=\u001b[39m SVC(C\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, kernel\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrbf\u001b[39m\u001b[38;5;124m'\u001b[39m)  \u001b[38;5;66;03m# You can experiment with different kernels\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;66;03m# Use the model for prediction on the testing set\u001b[39;00m\n\u001b[1;32m      5\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mpredict(X_test)\n",
-      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py:1152\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[0;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1145\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[1;32m   1147\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[1;32m   1148\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m   1149\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[1;32m   1150\u001b[0m     )\n\u001b[1;32m   1151\u001b[0m ):\n\u001b[0;32m-> 1152\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/svm/_base.py:199\u001b[0m, in \u001b[0;36mBaseLibSVM.fit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m    189\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    190\u001b[0m     X, y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_data(\n\u001b[1;32m    191\u001b[0m         X,\n\u001b[1;32m    192\u001b[0m         y,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    196\u001b[0m         accept_large_sparse\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m    197\u001b[0m     )\n\u001b[0;32m--> 199\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_targets\u001b[49m\u001b[43m(\u001b[49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    201\u001b[0m sample_weight \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39masarray(\n\u001b[1;32m    202\u001b[0m     [] \u001b[38;5;28;01mif\u001b[39;00m sample_weight \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m sample_weight, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mfloat64\n\u001b[1;32m    203\u001b[0m )\n\u001b[1;32m    204\u001b[0m solver_type \u001b[38;5;241m=\u001b[39m LIBSVM_IMPL\u001b[38;5;241m.\u001b[39mindex(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_impl)\n",
-      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/svm/_base.py:747\u001b[0m, in \u001b[0;36mBaseSVC._validate_targets\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m    745\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclass_weight_ \u001b[38;5;241m=\u001b[39m compute_class_weight(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclass_weight, classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mcls\u001b[39m, y\u001b[38;5;241m=\u001b[39my_)\n\u001b[1;32m    746\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mcls\u001b[39m) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m--> 747\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    748\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe number of classes has to be greater than one; got \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m class\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    749\u001b[0m         \u001b[38;5;241m%\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m    750\u001b[0m     )\n\u001b[1;32m    752\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclasses_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\n\u001b[1;32m    754\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(y, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mfloat64, order\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mC\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[0;31mValueError\u001b[0m: The number of classes has to be greater than one; got 1 class"
-     ]
-    }
-   ],
-   "source": [
-    "model = SVC(C=1, kernel='rbf')  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "ValueError",
-     "evalue": "The number of classes has to be greater than one; got 1 class",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[10], line 7\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# Step 5: Train the SVM model\u001b[39;00m\n\u001b[1;32m      6\u001b[0m model \u001b[38;5;241m=\u001b[39m SVC(C\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, kernel\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrbf\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 7\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[38;5;66;03m# Step 6: Evaluate the model\u001b[39;00m\n\u001b[1;32m     10\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mpredict(X_test)\n",
-      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/base.py:1152\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[0;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1145\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[1;32m   1147\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[1;32m   1148\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m   1149\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[1;32m   1150\u001b[0m     )\n\u001b[1;32m   1151\u001b[0m ):\n\u001b[0;32m-> 1152\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/svm/_base.py:199\u001b[0m, in \u001b[0;36mBaseLibSVM.fit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m    189\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    190\u001b[0m     X, y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_data(\n\u001b[1;32m    191\u001b[0m         X,\n\u001b[1;32m    192\u001b[0m         y,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    196\u001b[0m         accept_large_sparse\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m    197\u001b[0m     )\n\u001b[0;32m--> 199\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_targets\u001b[49m\u001b[43m(\u001b[49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    201\u001b[0m sample_weight \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39masarray(\n\u001b[1;32m    202\u001b[0m     [] \u001b[38;5;28;01mif\u001b[39;00m sample_weight \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m sample_weight, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mfloat64\n\u001b[1;32m    203\u001b[0m )\n\u001b[1;32m    204\u001b[0m solver_type \u001b[38;5;241m=\u001b[39m LIBSVM_IMPL\u001b[38;5;241m.\u001b[39mindex(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_impl)\n",
-      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/sklearn/svm/_base.py:747\u001b[0m, in \u001b[0;36mBaseSVC._validate_targets\u001b[0;34m(self, y)\u001b[0m\n\u001b[1;32m    745\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclass_weight_ \u001b[38;5;241m=\u001b[39m compute_class_weight(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclass_weight, classes\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mcls\u001b[39m, y\u001b[38;5;241m=\u001b[39my_)\n\u001b[1;32m    746\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mcls\u001b[39m) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m--> 747\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    748\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe number of classes has to be greater than one; got \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m class\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    749\u001b[0m         \u001b[38;5;241m%\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mcls\u001b[39m)\n\u001b[1;32m    750\u001b[0m     )\n\u001b[1;32m    752\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclasses_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\n\u001b[1;32m    754\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39masarray(y, dtype\u001b[38;5;241m=\u001b[39mnp\u001b[38;5;241m.\u001b[39mfloat64, order\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mC\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[0;31mValueError\u001b[0m: The number of classes has to be greater than one; got 1 class"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "\n",
-    "# Step 4: Split the data into training and testing sets\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)\n",
-    "\n",
-    "\n",
-    "# Step 5: Train the SVM model\n",
-    "model = SVC(C=1, kernel='rbf')\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Step 6: Evaluate the model\n",
-    "y_pred = model.predict(X_test)\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of samples in X: 186032\n",
-      "Number of samples in labels: 186032\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Check the number of samples in X and labels\n",
-    "print(\"Number of samples in X:\", X.shape[0])\n",
-    "print(\"Number of samples in labels:\", len(labels))\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model accuracy on testing data: 0.9999\n",
-      "Confusion Matrix:\n",
-      "[[    0     1]\n",
-      " [    0 18603]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Train the SVM model\n",
-    "model = SVC(C=1, kernel='poly', degree=3)  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model accuracy on testing data: 1.0000\n",
-      "Confusion Matrix:\n",
-      "[[    1     0]\n",
-      " [    0 18603]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Train the SVM model\n",
-    "model = SVC(C=1, kernel='linear')  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model accuracy on testing data: 0.9999\n",
-      "Confusion Matrix:\n",
-      "[[    0     1]\n",
-      " [    0 18603]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "# Train the SVM model\n",
-    "model = SVC(C=2, kernel='rbf')  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model accuracy on testing data: 0.9999\n",
-      "Confusion Matrix:\n",
-      "[[    0     1]\n",
-      " [    0 18603]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Train the SVM model\n",
-    "model = SVC(C=2, kernel='poly', degree=3)  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model accuracy on testing data: 1.0000\n",
-      "Confusion Matrix:\n",
-      "[[    1     0]\n",
-      " [    0 18603]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Train the SVM model\n",
-    "model = SVC(C=2, kernel='linear')  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model accuracy on testing data: 0.9999\n",
-      "Confusion Matrix:\n",
-      "[[    0     1]\n",
-      " [    0 18603]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "# Train the SVM model\n",
-    "model = SVC(C=5, kernel='rbf')  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'SVC' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[1], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Train the SVM model\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[43mSVC\u001b[49m(C\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m, kernel\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpoly\u001b[39m\u001b[38;5;124m'\u001b[39m, degree\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m)  \u001b[38;5;66;03m# You can experiment with different kernels\u001b[39;00m\n\u001b[1;32m      3\u001b[0m model\u001b[38;5;241m.\u001b[39mfit(X_train, y_train)\n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# Use the model for prediction on the testing set\u001b[39;00m\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'SVC' is not defined"
-     ]
-    }
-   ],
-   "source": [
-    "# Train the SVM model\n",
-    "model = SVC(C=5, kernel='poly', degree=3)  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)\n",
-    "\n",
-    "plt.figure(figsize=(8, 6))\n",
-    "sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])\n",
-    "plt.xlabel('Predicted')\n",
-    "plt.ylabel('True')\n",
-    "plt.title('Confusion Matrix')\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model accuracy on testing data: 1.0000\n",
-      "Confusion Matrix:\n",
-      "[[    1     0]\n",
-      " [    0 18603]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Train the SVM model\n",
-    "model = SVC(C=5, kernel='linear')  # You can experiment with different kernels\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "# Use the model for prediction on the testing set\n",
-    "y_pred = model.predict(X_test)\n",
-    "\n",
-    "# Evaluate model performance (e.g., accuracy)\n",
-    "from sklearn.metrics import accuracy_score\n",
-    "accuracy = accuracy_score(y_test, y_pred)\n",
-    "print(f\"Model accuracy on testing data: {accuracy:.4f}\")\n",
-    "\n",
-    "conf_matrix = confusion_matrix(y_test, y_pred)\n",
-    "\n",
-    "# Print the confusion matrix\n",
-    "print(\"Confusion Matrix:\")\n",
-    "print(conf_matrix)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}