Skip to content

Commit

Permalink
Adding Experiment 1
Browse files Browse the repository at this point in the history
  • Loading branch information
jcollopy-tulane committed Apr 25, 2024
1 parent 1651eb1 commit fdf7c5d
Showing 1 changed file with 142 additions and 0 deletions.
142 changes: 142 additions & 0 deletions notebooks/Experiment-Naive_Bayes.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f7c7072e-41a8-4a68-816a-f138dfe2f713",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.metrics import f1_score\n",
"from sklearn.naive_bayes import BernoulliNB\n",
"from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2007cdd-38d5-40a5-9ed7-4f593d081a2e",
"metadata": {},
"outputs": [],
"source": [
"train_df = pd.read_csv(\"train.csv\")\n",
"val_df = pd.read_csv(\"validation.csv\")\n",
"test_df = pd.read_csv(\"test.csv\")"
]
},
{
"cell_type": "markdown",
"id": "aa429827-8d41-44eb-a206-a5e5531ef105",
"metadata": {},
"source": [
"### Evaluating Naive Bayes on Validation Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2c29088-90a7-4084-8b2c-be0651c36c20",
"metadata": {},
"outputs": [],
"source": [
"BNB = make_pipeline(CountVectorizer(), BernoulliNB())\n",
"BNB.fit(train_df[\"Stemmed\"], train_df[\"Result_Bin\"])\n",
"\n",
"# Predict on the test set\n",
"y_pred = BNB.predict(val_df[\"Stemmed\"])\n",
"y_val = val_df[\"Result_Bin\"]\n",
"# Calculate F1\n",
"f1 = f1_score(y_val, y_pred)\n",
"print(\"F1 Score:\", f1)\n",
"# Calculate Precision\n",
"precision = precision_score(y_val, y_pred)\n",
"print(\"Precision:\", round(precision, 3))\n",
"# Calculate recall\n",
"recall = recall_score(y_val, y_pred)\n",
"print(\"Recall:\", round(recall, 3))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48c88dff-14a5-4975-9670-1cc33cc611df",
"metadata": {},
"outputs": [],
"source": [
"conf_matrix = confusion_matrix(y_val, y_pred)\n",
"\n",
"cm_display = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels = [\"Loss\",\"Win\"])\n",
"cm_display.plot()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "ae96b678-a0ba-46be-9759-b3ecb78c37fc",
"metadata": {},
"source": [
"### Evaluating Naive Bayes on Testing Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "649a1ffa-5717-4efa-ba97-6d6109e7cac7",
"metadata": {},
"outputs": [],
"source": [
"# Predict on the test set\n",
"y_pred = BNB.predict(test_df[\"Stemmed\"])\n",
"y_test = test_df[\"Result_Bin\"]\n",
"# Calculate F1\n",
"f1 = f1_score(y_test, y_pred)\n",
"print(\"F1 Score:\", f1)\n",
"# Calculate Precision\n",
"precision = precision_score(y_test, y_pred)\n",
"print(\"Precision:\", round(precision, 3))\n",
"# Calculate Recall\n",
"recall = recall_score(y_test, y_pred)\n",
"print(\"Recall:\", round(recall, 3))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fcf8fd82-2256-465c-bf87-fffefdf87e6d",
"metadata": {},
"outputs": [],
"source": [
"conf_matrix = confusion_matrix(y_test, y_pred)\n",
"\n",
"cm_display = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels = [0,1])\n",
"cm_display.plot()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit fdf7c5d

Please sign in to comment.