From 97aad0915e0b158f5f0389e84ff861c266249961 Mon Sep 17 00:00:00 2001
From: jcollopy-tulane <jcollopy@tulane.edu>
Date: Wed, 1 May 2024 15:02:34 -0500
Subject: [PATCH] Figures

---
 notebooks/Experiment-Error_Analysis.ipynb | 910 ++++++++++------------
 notebooks/Experiments-BERT.ipynb          | 244 ------
 notebooks/functions/__init__.py           |   0
 notebooks/functions/functions_utils.py    |  79 --
 4 files changed, 425 insertions(+), 808 deletions(-)
 delete mode 100644 notebooks/Experiments-BERT.ipynb
 delete mode 100644 notebooks/functions/__init__.py
 delete mode 100644 notebooks/functions/functions_utils.py
diff --git a/notebooks/Experiment-Error_Analysis.ipynb b/notebooks/Experiment-Error_Analysis.ipynb
index 5d5db26..e24ae5e 100644
--- a/notebooks/Experiment-Error_Analysis.ipynb
+++ b/notebooks/Experiment-Error_Analysis.ipynb
@@ -2,31 +2,202 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 150,
    "id": "723a4fc3-1fe3-4e6c-a7a7-b48cf9aadba7",
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import re\n",
+    "import pickle\n",
+    "from sklearn.naive_bayes import BernoulliNB\n",
+    "from sklearn.feature_extraction.text import CountVectorizer\n",
+    "from sklearn.pipeline import make_pipeline\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from nltk.corpus import stopwords\n",
+    "from nltk.stem import PorterStemmer"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 151,
    "id": "7a164824-3b43-459e-bdc6-e6ff06e75265",
    "metadata": {},
    "outputs": [],
    "source": [
+    "train_df = pd.read_csv(\"../data/train.csv\")\n",
+    "val_df = pd.read_csv(\"../data/validation.csv\")\n",
+    "test_df = pd.read_csv(\"../data/test.csv\")\n",
+    "\n",
     "bnb = pd.read_csv(\"bnb_results.csv\")\n",
     "lr = pd.read_csv(\"lr_results.csv\")\n",
     "cnn = pd.read_csv(\"cnn_results.csv\")\n",
     "bert = pd.read_csv(\"bert_results.csv\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "12b98887-31f1-44b5-b08d-842924010bd6",
+   "metadata": {},
+   "source": [
+    "### Get Models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 152,
+   "id": "06a86d5c-a86d-43af-bd0c-21da92400556",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-4 {color: black;}#sk-container-id-4 pre{padding: 0;}#sk-container-id-4 div.sk-toggleable {background-color: white;}#sk-container-id-4 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-4 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-4 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-4 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-4 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-4 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-4 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-4 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-4 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-4 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-4 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-4 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-4 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-4 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-4 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-4 div.sk-item {position: relative;z-index: 1;}#sk-container-id-4 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-4 div.sk-item::before, #sk-container-id-4 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-4 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-4 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-4 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-4 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-4 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-4 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-4 div.sk-label-container {text-align: center;}#sk-container-id-4 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-4 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-4\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>BernoulliNB()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">BernoulliNB</label><div class=\"sk-toggleable__content\"><pre>BernoulliNB()</pre></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "BernoulliNB()"
+      ]
+     },
+     "execution_count": 152,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nb_vectorizer = CountVectorizer(analyzer='word', ngram_range=(1, 2), binary=True)\n",
+    "basic_vectorizer = CountVectorizer()\n",
+    "train_x = nb_vectorizer.fit_transform(train_df[\"Stemmed\"])\n",
+    "train_y = train_df[\"Result_Bin\"]\n",
+    "\n",
+    "nb_model = BernoulliNB()\n",
+    "nb_model.fit(train_x, train_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 153,
+   "id": "0dfaabbb-1a1e-417b-ba78-a088c2b4bfb8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Find Post. Dist.\n",
+    "def find_prob_nb(text, label):\n",
+    "    words = process_text(text).split()\n",
+    "    print(words)\n",
+    "    evidence = dict()\n",
+    "    df = train_df[train_df[\"Result\"] == label]\n",
+    "    for word in words:\n",
+    "        word_count = train_df['Stemmed'].str.contains(word, case=False).sum()\n",
+    "        evidence[word] = word_count/len(train_df)\n",
+    "\n",
+    "    likelihood = dict()\n",
+    "    for word in words:\n",
+    "        word_count = df['Stemmed'].str.contains(word, case=False).sum()\n",
+    "        likelihood[word] = word_count/len(df)\n",
+    "    prior = len(df)/len(train_df)\n",
+    "    \n",
+    "    ratio = dict()\n",
+    "\n",
+    "    for key, value in likelihood.items():\n",
+    "        if key in evidence:\n",
+    "            ratio[key] = value / evidence[key]\n",
+    "    post = dict()\n",
+    "    post = {key: value * prior for key, value in ratio.items()}\n",
+    "\n",
+    "    print(post)\n",
+    "        \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 154,
+   "id": "9b7427af-3d09-4d88-ad2d-af7c25dcaad0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>#sk-container-id-5 {color: black;}#sk-container-id-5 pre{padding: 0;}#sk-container-id-5 div.sk-toggleable {background-color: white;}#sk-container-id-5 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-5 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-5 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-5 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-5 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-5 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-5 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-5 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-5 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-5 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-5 div.sk-item {position: relative;z-index: 1;}#sk-container-id-5 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-5 div.sk-item::before, #sk-container-id-5 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-5 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-5 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-5 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-5 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-5 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-5 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-5 div.sk-label-container {text-align: center;}#sk-container-id-5 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-5 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-5\" type=\"checkbox\" checked><label for=\"sk-estimator-id-5\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div>"
+      ],
+      "text/plain": [
+       "LogisticRegression()"
+      ]
+     },
+     "execution_count": 154,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "lr_vectorizer = CountVectorizer(analyzer='word', ngram_range=(1, 2))\n",
+    "train_x = lr_vectorizer.fit_transform(train_df[\"Stemmed\"])\n",
+    "train_y = train_df[\"Result_Bin\"]\n",
+    "\n",
+    "lr_model = LogisticRegression()\n",
+    "lr_model.fit(train_x, train_y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 155,
+   "id": "bc8284f4-a2c2-4386-a6f8-2355c42e2def",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Gets Coefficients for LR\n",
+    "def return_coef_lr(input):\n",
+    "\n",
+    "    model = lr_model\n",
+    "    text = process_text(input)\n",
+    "    text_vec = lr_vectorizer.transform([text])\n",
+    "\n",
+    "    vocabulary = lr_vectorizer.get_feature_names_out()\n",
+    "    coefficients = lr_model.coef_[0]\n",
+    "\n",
+    "    \n",
+    "    word_coefficient_map = {word: coef for word, coef in zip(vocabulary, coefficients)}\n",
+    "\n",
+    "   \n",
+    "    for word in text.split():\n",
+    "        if word in word_coefficient_map:\n",
+    "            print(f\"Word: {word}, Coefficient: {word_coefficient_map[word]}\")\n",
+    "        else:\n",
+    "            print(f\"Word: {word}, Coefficient: 0\")  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 156,
+   "id": "c628ef6b-588e-451a-889a-75f030550ee5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Processing Text\n",
+    "\n",
+    "def process_text(document):\n",
+    "    # Tokenize the document\n",
+    "    tokens = document.split()\n",
+    "    tokens = [re.sub(r'^\\W+|\\W+$', '', token) for token in tokens]\n",
+    "    tokens = [token.lower() for token in tokens]\n",
+    "\n",
+    "    # Remove stopwords\n",
+    "    stop_words = set(stopwords.words('english'))\n",
+    "    tokens = [token for token in tokens if token not in stop_words]\n",
+    "\n",
+    "    # Stem the tokens\n",
+    "    stemmer = PorterStemmer()\n",
+    "    stemmed_tokens = [stemmer.stem(token) for token in tokens]\n",
+    "\n",
+    "    # Return the processed text\n",
+    "    return ' '.join(stemmed_tokens)"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 45,
-   "id": "fe41ab19-b216-4c5c-b344-96c87b25ffd1",
+   "execution_count": 157,
+   "id": "510953a7-1313-4499-b35d-2c16ba936469",
    "metadata": {},
    "outputs": [
     {
@@ -163,7 +334,7 @@
        "4                 1               0.999648  "
       ]
      },
-     "execution_count": 45,
+     "execution_count": 157,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -174,8 +345,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
-   "id": "07b5bb6b-d6e6-454e-852a-cc53b9c28571",
+   "execution_count": 158,
+   "id": "091b9b0b-fc61-405f-8a03-342f810b1bd2",
    "metadata": {},
    "outputs": [
     {
@@ -312,7 +483,7 @@
        "4                 1               0.958090  "
       ]
      },
-     "execution_count": 46,
+     "execution_count": 158,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -323,8 +494,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
-   "id": "3503eb65-a580-4284-b73a-8e8b72473589",
+   "execution_count": 159,
+   "id": "a0e64e88-bf4f-4965-82d7-fe1f08b10ee2",
    "metadata": {},
    "outputs": [
     {
@@ -461,7 +632,7 @@
        "4             1                1  "
       ]
      },
-     "execution_count": 47,
+     "execution_count": 159,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -472,191 +643,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
-   "id": "37790c3f-056a-4426-bdb8-e622b1adbc6a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bnb_tp = bnb[(bnb[\"Result_Bin\"] == 1) & (bnb[\"Predicted_Result\"] == 1)]\n",
-    "lr_fn = lr[(lr[\"Result_Bin\"] == 1) & (lr[\"Predicted_Result\"] == 0)]\n",
-    "cnn_fn = cnn[(cnn[\"Result_Bin\"] == 1) & (cnn[\"Predicted_Label\"] == 0)]\n",
-    "bert_fn = bert[(bert[\"Result_Bin\"] == 1) & (bert[\"Predicted\"] == 0)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 68,
-   "id": "62175a58-e883-445e-b282-2a05505f07ae",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "first_200_df = bnb_tp.sort_values(\"Predicted_Probability\", ascending = True)\n",
-    "first_200 = first_200_df['Comment'].head(200)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "id": "4424f031-2a2c-4c25-a7e7-81b983f1da89",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "comments_lr = first_200.isin(lr_fn['Comment'])\n",
-    "comments_cnn = first_200.isin(cnn_fn['Comment'])\n",
-    "\n",
-    "comments_both = first_200[comments_lr & comments_cnn]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 70,
-   "id": "b9747982-b602-4034-9fbf-a3408da57c2c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "19      Big response against one of the hottest teams ...\n",
-       "1132    This is my last remaining team-specific subred...\n",
-       "2114    I point out when he plays well, and I’m gonna ...\n",
-       "2180    I think what's happening is coaching is trying...\n",
-       "1489    Problem with being the best is every single te...\n",
-       "962     Dame’s game is really unbelievable… He’s the b...\n",
-       "1655    He was questionable with non-covid illness. I ...\n",
-       "2068    I’m going to keep saying this, Doc is just Coa...\n",
-       "567     B2B and all those excuses aside, this goes to ...\n",
-       "647     So we're going to beat the good teams & lose t...\n",
-       "1629    Fr his playmaking has been nothing short of am...\n",
-       "2234    Yeah, we were 19-6 at this point last season a...\n",
-       "446     If dame played like this in Portland, we would...\n",
-       "2016    I will take the L, but you still think this te...\n",
-       "2236    Correction: Giannis would’ve bullied AG into g...\n",
-       "1044    It's because of the strength of schedule i gue...\n",
-       "544     Teams already sag off him. They celebrate if h...\n",
-       "1243    Both giannis and dame turned up when it matter...\n",
-       "1789       Yep people still calling for trades and firing\n",
-       "2057    Without Giannis or Khris, we beat a Kawhi/PG/H...\n",
-       "26      But it’s never an excuse when teams have to go...\n",
-       "1068    some people here complaining we act like the B...\n",
-       "1430          Doc Rivers > a tree stump > Adrian Griffin.\n",
-       "342     We shot great from 3, but we shot like ass fro...\n",
-       "2229    whenever a team beats miami it feels like Chri...\n",
-       "101     Yeah will be fun seeing him play more. I know ...\n",
-       "28      He commented on someones ig midway through the...\n",
-       "33      We've been trying to replace PJ Tucker since h...\n",
-       "14       End of the bench playing basketball happened lol\n",
-       "1609    Yep. And if he gets that first one he gets ano...\n",
-       "133     Super happy with the result. Really good team ...\n",
-       "986     Playing 3rd game in 4 nights and the first two...\n",
-       "1693    I'm pretty use to it by now but this game was ...\n",
-       "1196    Still looking for that sweet spot but I’m fine...\n",
-       "1962    You are right if we are aiming for winning in ...\n",
-       "Name: Comment, dtype: object"
-      ]
-     },
-     "execution_count": 70,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "comments_both"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "id": "3c85b034-378e-4c46-8754-1f18cffa7f18",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "19                                                                                                                                                                                         Big response against one of the hottest teams in the Association right now. Cavs will cause a lot of problems especially if fully healthy. We’ve got a lot of uncertainties with how we will organize as a team but we have the answer to the biggest question: do we have the talent to hang with any team?  Good W!\n",
-      "1132                                                                                                                                                                                        This is my last remaining team-specific subreddit, the others became too toxic so I unsubbed. This one is heading there too, sadly. I think the Dame trade brought in a bunch of sad Packers fans (there are many) and spoiled what used to be my favorite sports subreddit. I'm still hopeful it'll bounce back ...\n",
-      "2114                                                                                                                                                                                                                                                                                                                                             I point out when he plays well, and I’m gonna do the same when he plays like shit. 4/18 isn’t good enough he should know when his shot is off and stop chucking\n",
-      "2180                                                                                                                                                                                                                                                                                I think what's happening is coaching is trying to use a scheme to cater to the defenders. Dame, Payne and Beasley aren't one on one stoppers. So using a switching zone seems effective and Brook can still troll the paint.\n",
-      "1489                                                                                                                                                                                                                                                                                                                                                              Problem with being the best is every single team brings their best against you. Think we're gonna have our mettle tested very often this year.\n",
-      "962     Dame’s game is really unbelievable… He’s the best logo shooter of all time statistically. If you come out too far, he’ll explode right past you and shoot or get a dunk/layup/foul. If you grab at him, he’s the best freethrow shooter in the league this season (and headed to 4th career all-time). If your teammate comes over to help, he is an elite playmaker and will make the right pass more often than not. He’s like prime Harden but much smaller, better at shooting, and way more clutch.\n",
-      "1655                                                                                                                                                                                                                                                                            He was questionable with non-covid illness. I thought he looked off but knowing he was sick made me give him a pass, which I doubt many on this sub will do. Wouldn't surprise me if he was under the weather versus Boston too.\n",
-      "2068                                                                                                                                                                                                                                                                                                                                                                                                              I’m going to keep saying this, Doc is just Coach Bud and that’s fine. Not ideal but it’s fine.\n",
-      "567                                                                                                                                                                                                                                                                                                            B2B and all those excuses aside, this goes to show that the Bucks are still a threat. Yeah, there's some problems but they still have the second best record in the east and this game shows why.\n",
-      "647                                                                                                                                                                                                                                                                                                                                                                                                  So we're going to beat the good teams & lose to the bad teams?  The opposite of last season.  I'll take it.\n",
-      "1629                                                                                                                                                                            Fr his playmaking has been nothing short of amazing. Last game there was a sequence where 2-3 times in a row dame got the ball, drove and made the most perfect right play pass but all the players he passed it to missed. And i was just thinking i really hope this doesnt discourage him from keeping to make the right pass\n",
-      "2234                                                                                                                                                                                                                             Yeah, we were 19-6 at this point last season and that was after the torrid 9-0 start. We are winning a lot more consistently this season and it looks like we’re starting to exit the clutch time purgatory we’ve been stuck in to start the season finally (knock on wood lol)\n",
-      "446                                                                                                                                                                                                                                                                                                                                                         If dame played like this in Portland, we would have been pissed to trade jrue.  Stop moving the goalposts, we expected a superstar and this ain't it\n",
-      "2016                                                                                                                                                                                                                                                                                                                                       I will take the L, but you still think this team has nothing to concern with and don’t need trade? Don’t act like Blazers is a good team and this is a convincing win\n",
-      "2236                                                                                                                                                                                                                                                                                                                                                   Correction: Giannis would’ve bullied AG into getting him back in the game. He knows Doc won’t go for that shit lol. This is why we needed a veteran coach\n",
-      "1044                                                                                                                                                                                                                                                                                                                                                                                                  It's because of the strength of schedule i guess, but that's something out of the players' control anyways\n",
-      "544                                                                                                                                                                                                                                                                                                                                                 Teams already sag off him. They celebrate if he takes a 3.  I don't disagree with Giannis taking one or two 3's a game, but it just can't be more than that.\n",
-      "1243                                                                                                                                                                                                                                                                                                                                   Both giannis and dame turned up when it mattered but I can't trust this defence to save a single basket anymore.  Struggling against these lot tells us more about us tbh\n",
-      "1789                                                                                                                                                                                                                                                                                                                                                                                                                                                              Yep people still calling for trades and firing\n",
-      "2057                                                                                                                                                                                                                                                                                                                                                                          Without Giannis or Khris, we beat a Kawhi/PG/Harden Clippers team and held them to 106 points. Imagine saying that two months ago.\n",
-      "26                                                                                                                                                                                                                                                                                                                                       But it’s never an excuse when teams have to go to Denver and play in that altitude with only a day to adjust to it. Media is trash. Anything to protect their darlings.\n",
-      "1068                                                                                                                                                                                                                                                                                            some people here complaining we act like the Bucks has lost (they did lose b2b to the Pacers) while they just act like the Spurs is a top 3 team so we barely beating them should be viewed as an accomplishment\n",
-      "1430                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Doc Rivers > a tree stump > Adrian Griffin.\n",
-      "342                                                                                                                                                                                                                                                                                                                                                                              We shot great from 3, but we shot like ass from 2, which is also out of the norm. Most of those looks from 3 were wide open too\n",
-      "2229                                                                                                                                                                                                                                                                                                                                                                                                                                                 whenever a team beats miami it feels like Christmas morning\n",
-      "101                                                                                                                                                                                                                                                                                                              Yeah will be fun seeing him play more. I know he’s good but didn’t watch many Blazer games since the Bucks hardly ever play them. Mostly pay attention to the Eastern conference most the year.\n",
-      "28                                                                                                                                                                                                                                                                                                                                                                                               He commented on someones ig midway through the game saying he was alright haha i think hell be back pretty soon\n",
-      "33                                                                                                                                                                                                                                                                                                                                                                                                                       We've been trying to replace PJ Tucker since he left, we might have to finally done it.\n",
-      "14                                                                                                                                                                                                                                                                                                                                                                                                                                                              End of the bench playing basketball happened lol\n",
-      "1609                                                                                                                                                                                                                                                                                                                                                                                           Yep. And if he gets that first one he gets another one. Basically he stops shooting those whenever he misses one.\n",
-      "133                                                                                                                                                                                                                                                                                                                                         Super happy with the result. Really good team win.   Really sad about the lack of MarJon minutes. I have a feeling he's going to be shipped out before the deadline.\n",
-      "986                                                                                                                                                                                                                                                                                                                Playing 3rd game in 4 nights and the first two against the team with the 2nd fastest pace and the 3rd game against a team with the 3rd fastest pace was obviously an issue for our old asses.\n",
-      "1693                                                                                                                                                                                                                                                                                                                                                                      I'm pretty use to it by now but this game was another one of those that my mind can't wrap around the fact that we have Damian Lillard\n",
-      "1196                                                                                                                                                                                                                                                                                                                                                          Still looking for that sweet spot but I’m fine with all the experimenting with lineups and schemes early on. Reserving serious judgement for 2024.\n",
-      "1962                                                                                                                                                                                                                                                                                                                                                                             You are right if we are aiming for winning in the regular season and getting into the playoffs only  Insert \"This is fine\" meme\n",
-      "Name: Comment, dtype: object\n"
-     ]
-    }
-   ],
-   "source": [
-    "with pd.option_context('display.max_colwidth', None):\n",
-    "    print(comments_both)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2cf46d1c-4eb8-44ab-8e40-a81094e6188e",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c6479afe-f5f2-4fd5-b87a-4a878475b085",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "711e3fb8-bb68-4ce4-ae0e-c958b2cc5b32",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "846f9cbd-b0ed-48b0-b772-c8cd238334f5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bnb_fp = bnb[(bnb[\"Result_Bin\"] == 0) & (bnb[\"Predicted_Result\"] == 1)]\n",
-    "lr_tn = lr[(lr[\"Result_Bin\"] == 0) & (lr[\"Predicted_Result\"] == 0)]\n",
-    "cnn_tn = cnn[(cnn[\"Result_Bin\"] == 0) & (cnn[\"Predicted_Label\"] == 0)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "179a9408-e2df-475a-9e42-db6041d14608",
+   "execution_count": 160,
+   "id": "175ff3dc-900c-42c4-bf7e-a948ddcb0b96",
    "metadata": {},
    "outputs": [
     {
@@ -687,8 +675,7 @@
        "      <th>No_Stop</th>\n",
        "      <th>Stemmed</th>\n",
        "      <th>Result_Bin</th>\n",
-       "      <th>Predicted_Result</th>\n",
-       "      <th>Predicted_Probability</th>\n",
+       "      <th>Predicted</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -701,8 +688,18 @@
        "      <td>feel like became dependent recent late-game he...</td>\n",
        "      <td>feel like becam depend recent late-gam heroic ...</td>\n",
        "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.225801</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>I like it even more when I don't think we're g...</td>\n",
+       "      <td>Win</td>\n",
+       "      <td>i like it even more when i don't think we're g...</td>\n",
+       "      <td>like even n't think 're gon na win win anyways</td>\n",
+       "      <td>like even n't think 're gon na win win anyway</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -714,43 +711,28 @@
        "      <td>game confirm everyth alreadi knew bobbi ’ play...</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.305607</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>5</td>\n",
-       "      <td>Good fight on the road against the defending c...</td>\n",
-       "      <td>Loss</td>\n",
-       "      <td>good fight on the road against the defending c...</td>\n",
-       "      <td>good fight road defending champs bad first gam...</td>\n",
-       "      <td>good fight road defend champ bad first game doc</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.061192</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>6</td>\n",
-       "      <td>Doc is overrated. Major L recruiting him</td>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>I’m in shock as a Blazers fan. I know for a fa...</td>\n",
        "      <td>Loss</td>\n",
-       "      <td>doc is overrated major l recruiting him</td>\n",
-       "      <td>doc overrated major l recruiting</td>\n",
-       "      <td>doc overr major l recruit</td>\n",
+       "      <td>i’m in shock as a blazers fan i know for a fac...</td>\n",
+       "      <td>’ shock blazers fan know fact lillard play way...</td>\n",
+       "      <td>’ shock blazer fan know fact lillard play way ...</td>\n",
        "      <td>0</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.343441</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Yep! We got a lot of bucks basketball left woo...</td>\n",
-       "      <td>Loss</td>\n",
-       "      <td>yep we got a lot of bucks basketball left woot...</td>\n",
-       "      <td>yep got lot bucks basketball left woot hope do...</td>\n",
-       "      <td>yep got lot buck basketbal left woot hope dont...</td>\n",
-       "      <td>0</td>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Can we please change the banner to our current...</td>\n",
+       "      <td>Win</td>\n",
+       "      <td>can we please change the banner to our current...</td>\n",
+       "      <td>please change banner current bucks roster ’ wa...</td>\n",
+       "      <td>pleas chang banner current buck roster ’ want ...</td>\n",
+       "      <td>1</td>\n",
        "      <td>0</td>\n",
-       "      <td>0.436663</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -759,274 +741,200 @@
       "text/plain": [
        "   Unnamed: 0                                            Comment Result  \\\n",
        "0           0  I feel like we became too dependent on our rec...   Loss   \n",
+       "1           1  I like it even more when I don't think we're g...    Win   \n",
        "2           2  This game confirmed everything I already knew ...   Loss   \n",
-       "5           5  Good fight on the road against the defending c...   Loss   \n",
-       "6           6           Doc is overrated. Major L recruiting him   Loss   \n",
-       "8           8  Yep! We got a lot of bucks basketball left woo...   Loss   \n",
+       "3           3  I’m in shock as a Blazers fan. I know for a fa...   Loss   \n",
+       "4           4  Can we please change the banner to our current...    Win   \n",
        "\n",
        "                                         Comment_Adj  \\\n",
        "0  i feel like we became too dependent on our rec...   \n",
+       "1  i like it even more when i don't think we're g...   \n",
        "2  this game confirmed everything i already knew ...   \n",
-       "5  good fight on the road against the defending c...   \n",
-       "6            doc is overrated major l recruiting him   \n",
-       "8  yep we got a lot of bucks basketball left woot...   \n",
+       "3  i’m in shock as a blazers fan i know for a fac...   \n",
+       "4  can we please change the banner to our current...   \n",
        "\n",
        "                                             No_Stop  \\\n",
        "0  feel like became dependent recent late-game he...   \n",
+       "1     like even n't think 're gon na win win anyways   \n",
        "2  game confirmed everything already knew bobby ’...   \n",
-       "5  good fight road defending champs bad first gam...   \n",
-       "6                   doc overrated major l recruiting   \n",
-       "8  yep got lot bucks basketball left woot hope do...   \n",
-       "\n",
-       "                                             Stemmed  Result_Bin  \\\n",
-       "0  feel like becam depend recent late-gam heroic ...           0   \n",
-       "2  game confirm everyth alreadi knew bobbi ’ play...           0   \n",
-       "5    good fight road defend champ bad first game doc           0   \n",
-       "6                          doc overr major l recruit           0   \n",
-       "8  yep got lot buck basketbal left woot hope dont...           0   \n",
+       "3  ’ shock blazers fan know fact lillard play way...   \n",
+       "4  please change banner current bucks roster ’ wa...   \n",
        "\n",
-       "   Predicted_Result  Predicted_Probability  \n",
-       "0                 0               0.225801  \n",
-       "2                 0               0.305607  \n",
-       "5                 0               0.061192  \n",
-       "6                 0               0.343441  \n",
-       "8                 0               0.436663  "
+       "                                             Stemmed  Result_Bin  Predicted  \n",
+       "0  feel like becam depend recent late-gam heroic ...           0          1  \n",
+       "1      like even n't think 're gon na win win anyway           1          1  \n",
+       "2  game confirm everyth alreadi knew bobbi ’ play...           0          0  \n",
+       "3  ’ shock blazer fan know fact lillard play way ...           0          0  \n",
+       "4  pleas chang banner current buck roster ’ want ...           1          0  "
       ]
      },
-     "execution_count": 49,
+     "execution_count": 160,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "lr_tn.head()"
+    "bert.head()"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "00b14cb7-40b5-49b3-949c-6bc69c61c3a8",
+   "cell_type": "markdown",
+   "id": "f2e8667b-9b6d-4b12-9b00-d0c6ab2ea194",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "first_200_df = bnb_fp.sort_values(\"Predicted_Probability\", ascending = False)\n",
-    "first_200 = first_200_df['Comment'].head(200)"
+    "## Evaluating False Negatives for LR, BERT, and CNN"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
-   "id": "03ad08ff-5653-4404-a3b4-b5ff1e473030",
+   "execution_count": 161,
+   "id": "ee1e09c8-4a76-4e4d-bf52-5ef417d41680",
    "metadata": {},
    "outputs": [],
    "source": [
-    "comments_lr = first_200.isin(lr_tn['Comment'])\n",
-    "comments_cnn = first_200.isin(cnn_tn['Comment'])\n",
-    "\n",
-    "comments_both = first_200[comments_lr & comments_cnn]"
+    "bnb_tp = bnb[(bnb[\"Result\"] == \"Win\") & (bnb[\"Predicted_Result\"] == 1)]\n",
+    "lr_fn = lr[(lr[\"Result\"] == \"Win\") & (lr[\"Predicted_Result\"] == 0)]\n",
+    "cnn_fn = cnn[(cnn[\"Result\"] == \"Win\") & (cnn[\"Predicted_Label\"] == 0)]\n",
+    "bert_fn = bert[(bert[\"Result\"] == \"Win\") & (bert[\"Predicted\"] == 0)]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
-   "id": "7993b8fe-2a25-4563-a001-725984d45602",
+   "execution_count": 162,
+   "id": "7d5a379e-df32-461d-8783-7e911d437a84",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2223    Livingston had some decent moments on both ends tonight. Hopefully can see some improvement with more minutes\n",
-      "1373                                                                                 They used to be bad… really bad…\n",
-      "1876                                                                                    Rather him than 3-13 dame tbh\n",
-      "1213                                              Having watched this game: Yes. The Bucks are really fucking stupid.\n",
-      "580                                         If you can't suggest players in the proposed trade, then this is useless.\n",
-      "Name: Comment, dtype: object\n"
+      "Yep people still calling for trades and firing\n",
+      "Dame’s game is really unbelievable… He’s the best logo shooter of all time statistically. If you come out too far, he’ll explode right past you and shoot or get a dunk/layup/foul. If you grab at him, he’s the best freethrow shooter in the league this season (and headed to 4th career all-time). If your teammate comes over to help, he is an elite playmaker and will make the right pass more often than not. He’s like prime Harden but much smaller, better at shooting, and way more clutch.\n",
+      "on ESPN they said could be the next game or the game after next\n",
+      "Malik is playing well, sending him to the bench now will throw him off\n"
      ]
     }
    ],
    "source": [
-    "with pd.option_context('display.max_colwidth', None):\n",
-    "    print(comments_both)"
+    "bnb_comments = set(bnb_tp[\"Comment\"])\n",
+    "lr_comments = set(lr_fn[\"Comment\"])\n",
+    "cnn_comments = set(cnn_fn[\"Comment\"])\n",
+    "bert_comments = set(bert_fn[\"Comment\"])\n",
+    "\n",
+    "common_comments = lr_comments.intersection(cnn_comments, bert_comments, bnb_comments)\n",
+    "\n",
+    "for i, comment in enumerate(common_comments):\n",
+    "    if i < 4:\n",
+    "        print(comment)\n",
+    "    else:\n",
+    "        break"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
-   "id": "f3d5f504-c9cb-44ea-9343-ede7c9462493",
+   "execution_count": 163,
+   "id": "6540bfc4-669c-470d-9847-1d4a94003940",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "      Unnamed: 0                                            Comment Result  \\\n",
-      "892          892  If the scheme that you designed is so bad that...   Loss   \n",
-      "921          921  I can't much blame Livingston for his offense ...   Loss   \n",
-      "929          929  I think there's probs some hesitation to shoot...   Loss   \n",
-      "1809        1809  MarJon was already out of the rotation - and I...   Loss   \n",
-      "2223        2223  Livingston had some decent moments on both end...   Loss   \n",
-      "\n",
-      "                                            Comment_Adj  \\\n",
-      "892   if the scheme that you designed is so bad that...   \n",
-      "921   i can't much blame livingston for his offense ...   \n",
-      "929   i think there's probs some hesitation to shoot...   \n",
-      "1809  marjon was already out of the rotation  and i ...   \n",
-      "2223  livingston had some decent moments on both end...   \n",
-      "\n",
-      "                                                No_Stop  \\\n",
-      "892   scheme designed bad chris livingston best play...   \n",
-      "921   ca n't much blame livingston offense game cons...   \n",
-      "929   think 's probs hesitation shoot specifically a...   \n",
-      "1809  marjon already rotation n't know 've noticed '...   \n",
-      "2223  livingston decent moments ends tonight hopeful...   \n",
-      "\n",
-      "                                                Stemmed  Result_Bin  \\\n",
-      "892   scheme design bad chri livingston best player ...           0   \n",
-      "921   ca n't much blame livingston offens game consi...           0   \n",
-      "929   think 's prob hesit shoot specif ajj livingsto...           0   \n",
-      "1809  marjon alreadi rotat n't know 've notic 's lin...           0   \n",
-      "2223  livingston decent moment end tonight hope see ...           0   \n",
-      "\n",
-      "      Predicted_Result  Predicted_Probability  \n",
-      "892                  0               0.486864  \n",
-      "921                  0               0.263094  \n",
-      "929                  0               0.295788  \n",
-      "1809                 0               0.158327  \n",
-      "2223                 0               0.382528  \n"
+      "Word: malik, Coefficient: -0.5406748829539177\n",
+      "Word: play, Coefficient: -0.056864372549471444\n",
+      "Word: well, Coefficient: -0.24822874624598537\n",
+      "Word: send, Coefficient: 0.4619140404395336\n",
+      "Word: bench, Coefficient: -0.3270444798255762\n",
+      "Word: throw, Coefficient: -0.0845347188654442\n"
      ]
     }
    ],
    "source": [
-    "target_comment = \"Livingston\"\n",
-    "\n",
-    "# Find the comment in the dataframe\n",
-    "found_comment = lr_tn[lr_tn['Comment'].str.contains(target_comment)]\n",
-    "\n",
-    "# Print the found comment\n",
-    "print(found_comment)"
+    "target_comment = \"Malik is playing well, sending him to the bench now will throw him off\"\n",
+    "return_coef_lr(target_comment)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
-   "id": "05162f82-3e62-4153-9480-5642d5cb1218",
+   "execution_count": 164,
+   "id": "39955afd-8b36-43f6-b434-8127598342ba",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "      Unnamed: 0                                            Comment Result  \\\n",
-      "892          892  If the scheme that you designed is so bad that...   Loss   \n",
-      "921          921  I can't much blame Livingston for his offense ...   Loss   \n",
-      "929          929  I think there's probs some hesitation to shoot...   Loss   \n",
-      "1075        1075  Wiggins doesn’t think he’s a superstar and is ...    Win   \n",
-      "1209        1209  My dream of Chris Livingston Jr becoming a rot...    Win   \n",
-      "1235        1235  The Blazers are good at their developing their...   Loss   \n",
-      "1809        1809  MarJon was already out of the rotation - and I...   Loss   \n",
-      "2223        2223  Livingston had some decent moments on both end...   Loss   \n",
-      "\n",
-      "                                            Comment_Adj  \\\n",
-      "892   if the scheme that you designed is so bad that...   \n",
-      "921   i can't much blame livingston for his offense ...   \n",
-      "929   i think there's probs some hesitation to shoot...   \n",
-      "1075  wiggins doesn’t think he’s a superstar and is ...   \n",
-      "1209  my dream of chris livingston jr becoming a rot...   \n",
-      "1235  the blazers are good at their developing their...   \n",
-      "1809  marjon was already out of the rotation  and i ...   \n",
-      "2223  livingston had some decent moments on both end...   \n",
-      "\n",
-      "                                                No_Stop  \\\n",
-      "892   scheme designed bad chris livingston best play...   \n",
-      "921   ca n't much blame livingston offense game cons...   \n",
-      "929   think 's probs hesitation shoot specifically a...   \n",
-      "1075  wiggins ’ think ’ superstar smart enough make ...   \n",
-      "1209  dream chris livingston jr becoming rotation pl...   \n",
-      "1235  blazers good developing players simply underst...   \n",
-      "1809  marjon already rotation n't know 've noticed '...   \n",
-      "2223  livingston decent moments ends tonight hopeful...   \n",
-      "\n",
-      "                                                Stemmed  Result_Bin  \\\n",
-      "892   scheme design bad chri livingston best player ...           0   \n",
-      "921   ca n't much blame livingston offens game consi...           0   \n",
-      "929   think 's prob hesit shoot specif ajj livingsto...           0   \n",
-      "1075  wiggin ’ think ’ superstar smart enough make e...           1   \n",
-      "1209  dream chri livingston jr becom rotat player al...           1   \n",
-      "1235  blazer good develop player simpli understand y...           0   \n",
-      "1809  marjon alreadi rotat n't know 've notic 's lin...           0   \n",
-      "2223  livingston decent moment end tonight hope see ...           0   \n",
-      "\n",
-      "      Predicted_Result  Predicted_Probability  \n",
-      "892                  0               0.486864  \n",
-      "921                  0               0.263094  \n",
-      "929                  0               0.295788  \n",
-      "1075                 0               0.022897  \n",
-      "1209                 1               0.827561  \n",
-      "1235                 1               0.558184  \n",
-      "1809                 0               0.158327  \n",
-      "2223                 0               0.382528  \n"
+      "['malik', 'play', 'well', 'send', 'bench', 'throw']\n",
+      "{'malik': 0.43478260869565216, 'play': 0.4826001313197636, 'well': 0.4766666666666667, 'send': 0.6428571428571427, 'bench': 0.5251396648044693, 'throw': 0.5132743362831859}\n"
      ]
     }
    ],
    "source": [
-    "target_comment = \"Livingston\"\n",
-    "\n",
-    "# Find the comment in the dataframe\n",
-    "found_comment = lr[lr['Comment'].str.contains(target_comment)]\n",
-    "\n",
-    "# Print the found comment\n",
-    "print(found_comment)"
+    "find_prob_nb(target_comment, \"Win\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "845efb3e-655c-43c6-950d-0a19f01eae3b",
+   "metadata": {},
+   "source": [
+    "## Evaluating False Positives"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
-   "id": "283aeffc-9189-486c-b3e3-7d007750ff64",
+   "execution_count": 165,
+   "id": "846f9cbd-b0ed-48b0-b772-c8cd238334f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bnb_fp = bnb[(bnb[\"Result\"] == \"Loss\") & (bnb[\"Predicted_Result\"] == 1)].sort_values(\"Predicted_Probability\", ascending = False)\n",
+    "lr_tn = lr[(lr[\"Result\"] == \"Loss\") & (lr[\"Predicted_Result\"] == 0)]\n",
+    "cnn_tn = cnn[(cnn[\"Result\"] == \"Loss\") & (cnn[\"Predicted_Label\"] == 0)]\n",
+    "bert_tn = bert[(bert[\"Result\"] == \"Loss\") & (bert[\"Predicted\"] == 0)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 166,
+   "id": "597cb4c9-131d-465f-a457-bdc1da3c25d4",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "      Unnamed: 0                                            Comment Result  \\\n",
-      "2223        2223  Livingston had some decent moments on both end...   Loss   \n",
-      "\n",
-      "                                            Comment_Adj  \\\n",
-      "2223  livingston had some decent moments on both end...   \n",
-      "\n",
-      "                                                No_Stop  \\\n",
-      "2223  livingston decent moments ends tonight hopeful...   \n",
-      "\n",
-      "                                                Stemmed  Result_Bin  \\\n",
-      "2223  livingston decent moment end tonight hope see ...           0   \n",
-      "\n",
-      "      Predicted_Result  Predicted_Probability  \n",
-      "2223                 1               0.999957  \n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/hs/br_4rpdj68nc3sfdpgv0xgn80000gn/T/ipykernel_24579/3657591789.py:4: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
-      "  found_comment = bnb_fp[bnb['Comment'].str.contains(target_comment)]\n"
+      "None of us know what was at the root of the tension between these guys. They might not even understand it themselves.\n",
+      "Dame is checked out and is perpetually lazy, apathetic, and downright stupid on the court. It’s miserable to watch. I miss Jrue, and Khris is better than Dame right now.\n",
+      "Don’t let a regular season game in November fool you if both teams matched up in the playoffs the Bucks beat this team in 5\n",
+      "2 FOR 19 FROM 3 for Lopez/Dame/Khris…unacceptable\n",
+      "I’ll do u one better. The guy hasn’t made it out of the 2nd round since the big three celtics.\n",
+      "The Bucks have played the easiest schedule in the NBA so far by a pretty decent margin too.\n",
+      "Doc has to.be trolling us right?  His post game press conference he talked about getting Pat more minutes so he's ready and confident down the stretch.\n",
+      "Both Pacers and Heat have the shooters to have decent odds of a hot streak from 3 taking some games they shouldnt.\n",
+      "We won’t see the second round lmao\n",
+      "If you blame the bench, you don't know basketball. The real problem is the starting 5. They don't click together.\n"
      ]
     }
    ],
    "source": [
-    "target_comment = \"Livingston had some decent moments on both ends tonight. Hopefully can see some improvement with more minutes\"\n",
+    "bnb_comments = set(bnb_fp[\"Comment\"])\n",
+    "lr_comments = set(lr_tn[\"Comment\"])\n",
+    "cnn_comments = set(cnn_tn[\"Comment\"])\n",
+    "bert_comments = set(bert_tn[\"Comment\"])\n",
     "\n",
-    "# Find the comment in the dataframe\n",
-    "found_comment = bnb_fp[bnb['Comment'].str.contains(target_comment)]\n",
+    "common_comments = lr_comments.intersection(cnn_comments, bert_comments, bnb_comments)\n",
     "\n",
-    "# Print the found comment\n",
-    "print(found_comment)"
+    "for i, comment in enumerate(common_comments):\n",
+    "    if i < 10:\n",
+    "        print(comment)\n",
+    "    else:\n",
+    "        break"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 171,
    "id": "322fdf5b-4f85-4f4a-84c0-45f0a6f14f1a",
    "metadata": {},
    "outputs": [
@@ -1034,105 +942,137 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "      Unnamed: 0                                            Comment Result  \\\n",
-      "2223        2223  Livingston had some decent moments on both end...   Loss   \n",
-      "\n",
-      "                                            Comment_Adj  \\\n",
-      "2223  livingston had some decent moments on both end...   \n",
-      "\n",
-      "                                                No_Stop  \\\n",
-      "2223  livingston decent moments ends tonight hopeful...   \n",
-      "\n",
-      "                                                Stemmed  Result_Bin  \\\n",
-      "2223  livingston decent moment end tonight hope see ...           0   \n",
-      "\n",
-      "      Predicted_Result  Predicted_Probability  \n",
-      "2223                 0               0.382528  \n"
+      "Word: dame, Coefficient: -0.1700790422176605\n",
+      "Word: check, Coefficient: 0.24744862055832584\n",
+      "Word: perpetu, Coefficient: -0.027956271452183712\n",
+      "Word: lazi, Coefficient: -0.23844876824699507\n",
+      "Word: apathet, Coefficient: 0\n",
+      "Word: downright, Coefficient: 0\n",
+      "Word: stupid, Coefficient: -0.6631070629775302\n",
+      "Word: court, Coefficient: 0.146007882187147\n",
+      "Word: it’, Coefficient: 0\n",
+      "Word: miser, Coefficient: 0.11285656253966668\n",
+      "Word: watch, Coefficient: -0.25341505528863745\n",
+      "Word: miss, Coefficient: -0.4716422624688569\n",
+      "Word: jrue, Coefficient: -0.33698888558356804\n",
+      "Word: khri, Coefficient: 0.34432296915886257\n",
+      "Word: better, Coefficient: 0.15519712399002292\n",
+      "Word: dame, Coefficient: -0.1700790422176605\n",
+      "Word: right, Coefficient: -0.2621512216885956\n"
      ]
     }
    ],
    "source": [
-    "target_comment = \"Livingston had some decent moments on both ends tonight. Hopefully can see some improvement with more minutes\"\n",
+    "target_comment = \"Dame is checked out and is perpetually lazy, apathetic, and downright stupid on the court. It’s miserable to watch. I miss Jrue, and Khris is better than Dame right now\"\n",
     "\n",
-    "# Find the comment in the dataframe\n",
-    "found_comment = lr_tn[lr_tn['Comment'].str.contains(target_comment)]\n",
-    "\n",
-    "# Print the found comment\n",
-    "print(found_comment)"
+    "return_coef_lr(target_comment)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "id": "b5d582d2",
+   "execution_count": 170,
+   "id": "0f2667ff",
    "metadata": {},
    "outputs": [
     {
-     "ename": "ImportError",
-     "evalue": "attempted relative import with no known parent package",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[2], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpickle\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mfunctions_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m process_text, basic_process, cnn_process\n",
-      "\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['dame', 'check', 'perpetu', 'lazi', 'apathet', 'downright', 'stupid', 'court', 'it’', 'miser', 'watch', 'miss', 'jrue', 'khri', 'better', 'dame', 'right']\n",
+      "{'dame': 0.4727272727272728, 'check': 0.31034482758620685, 'perpetu': 0.49999999999999994, 'lazi': 0.49999999999999994, 'apathet': nan, 'downright': nan, 'stupid': 0.6086956521739131, 'court': 0.4726027397260274, 'it’': nan, 'miser': 0.5555555555555556, 'watch': 0.541958041958042, 'miss': 0.5688888888888889, 'jrue': 0.58, 'khri': 0.40449438202247184, 'better': 0.44941176470588234, 'right': 0.5503875968992248}\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/var/folders/hs/br_4rpdj68nc3sfdpgv0xgn80000gn/T/ipykernel_5441/1060855778.py:21: RuntimeWarning: invalid value encountered in double_scalars\n",
+      "  ratio[key] = value / evidence[key]\n"
      ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>Comment</th>\n",
+       "      <th>Result</th>\n",
+       "      <th>Comment_Adj</th>\n",
+       "      <th>No_Stop</th>\n",
+       "      <th>Stemmed</th>\n",
+       "      <th>Result_Bin</th>\n",
+       "      <th>Predicted_Result</th>\n",
+       "      <th>Predicted_Probability</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1282</th>\n",
+       "      <td>1282</td>\n",
+       "      <td>Dame is checked out and is perpetually lazy, a...</td>\n",
+       "      <td>Loss</td>\n",
+       "      <td>dame is checked out and is perpetually lazy ap...</td>\n",
+       "      <td>dame checked perpetually lazy apathetic downri...</td>\n",
+       "      <td>dame check perpetu lazi apathet downright stup...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.995509</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      Unnamed: 0                                            Comment Result  \\\n",
+       "1282        1282  Dame is checked out and is perpetually lazy, a...   Loss   \n",
+       "\n",
+       "                                            Comment_Adj  \\\n",
+       "1282  dame is checked out and is perpetually lazy ap...   \n",
+       "\n",
+       "                                                No_Stop  \\\n",
+       "1282  dame checked perpetually lazy apathetic downri...   \n",
+       "\n",
+       "                                                Stemmed  Result_Bin  \\\n",
+       "1282  dame check perpetu lazi apathet downright stup...           0   \n",
+       "\n",
+       "      Predicted_Result  Predicted_Probability  \n",
+       "1282                 1               0.995509  "
+      ]
+     },
+     "execution_count": 170,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "import pickle\n",
-    "from ...functions.functions_utils import process_text, basic_process, cnn_process"
+    "find_prob_nb(target_comment, \"Loss\")\n",
+    "result = bnb[bnb['Comment'].str.contains(target_comment)]\n",
+    "result"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0423d0d1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def return_coef_bnb(text):\n",
-    "\n",
-    "    model = bnb\n",
-    "    text = process_text(input_field)\n",
-    "    text = bnb_vectorizer.transform([text])\n",
-    "\n",
-    "    vocabulary = vectorizer.get_feature_names_out()\n",
-    "    coefficients = model.coef_[0]\n",
-    "\n",
-    "    # Map coefficients to words\n",
-    "    word_coefficient_map = {word: coef for word, coef in zip(vocabulary, coefficients)}\n",
-    "\n",
-    "    # Print coefficients for words in the input text\n",
-    "    for word in input_text.split():\n",
-    "        if word in word_coefficient_map:\n",
-    "            print(f\"Word: {word}, Coefficient: {word_coefficient_map[word]}\")\n",
-    "        else:\n",
-    "            print(f\"Word: {word}, Coefficient: 0\")  "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ddb1cd57",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d6f2f431",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python (testenv)",
    "language": "python",
-   "name": "python3"
+   "name": "testenv"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1144,7 +1084,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.9.18"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/Experiments-BERT.ipynb b/notebooks/Experiments-BERT.ipynb
deleted file mode 100644
index e42126c..0000000
--- a/notebooks/Experiments-BERT.ipynb
+++ /dev/null
@@ -1,244 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "e55024f6-48aa-46ae-ae28-c8e342b41d05",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/anaconda3/envs/testenv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "from transformers import DistilBertTokenizerFast\n",
-    "from transformers import TFDistilBertForSequenceClassification\n",
-    "from transformers import set_seed\n",
-    "import tensorflow as tf\n",
-    "from tqdm import tqdm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "81a58130-e8c9-4e5a-8080-8e0a90e98171",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_df = pd.read_csv(\"../data/train.csv\")\n",
-    "val_df = pd.read_csv(\"../data/validation.csv\")\n",
-    "test_df = pd.read_csv(\"../data/test.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "56356a8e-bc73-432d-902e-70f7c6125df7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "1ba9fa14-53a1-44c4-9d13-1baed5ba3c36",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def tokenize(sentences, max_length=100, padding='max_length'):\n",
-    "    return tokenizer(\n",
-    "        sentences,\n",
-    "        truncation=True,\n",
-    "        padding=padding,\n",
-    "        max_length=max_length,\n",
-    "        return_tensors=\"tf\" \n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "ac239be7-6e4d-4ceb-854d-2031c1c9db7c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bert_x_train = train_df[\"Comment_Adj\"].tolist()\n",
-    "bert_y_train = train_df[\"Result_Bin\"].tolist()\n",
-    "bert_x_val = val_df[\"Comment_Adj\"].tolist()\n",
-    "bert_y_val = val_df[\"Result_Bin\"].tolist()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "5c468de7-c416-4ec6-8a86-0984f0671c7b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_encodings = tokenize(bert_x_train)\n",
-    "val_encodings = tokenize(bert_x_val)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "b4ace8c8-91e6-4a5d-bc5e-2a2dccfebcd8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_labels = tf.convert_to_tensor(bert_y_train, dtype=tf.int32)\n",
-    "val_labels = tf.convert_to_tensor(bert_y_val, dtype=tf.int32)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "783c1b81-4d0b-40bb-9c05-3faa24a9c911",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "seed_value = 42\n",
-    "set_seed(seed_value)\n",
-    "\n",
-    "train_dataset = tf.data.Dataset.from_tensor_slices((\n",
-    "    dict(train_encodings),  \n",
-    "    train_labels\n",
-    ")).shuffle(1000).batch(30).prefetch(1)\n",
-    "\n",
-    "validation_dataset = tf.data.Dataset.from_tensor_slices((\n",
-    "    dict(val_encodings),  \n",
-    "    val_labels\n",
-    ")).batch(30).prefetch(1)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "d46d7bd2-808e-415b-878b-5e0146cb2705",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight']\n",
-      "- This IS expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).\n",
-      "- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).\n",
-      "Some weights or buffers of the TF 2.0 model TFDistilBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classifier.weight', 'classifier.bias']\n",
-      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 1/2\n",
-      "228/228 [==============================] - 976s 4s/step - loss: 0.6641 - accuracy: 0.5987 - val_loss: 0.6537 - val_accuracy: 0.5980\n",
-      "Epoch 2/2\n",
-      "228/228 [==============================] - 1016s 4s/step - loss: 0.5443 - accuracy: 0.7240 - val_loss: 0.6240 - val_accuracy: 0.6652\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<keras.callbacks.History at 0x29ef32190>"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "seed_value = 42\n",
-    "set_seed(seed_value)\n",
-    "\n",
-    "model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased',num_labels=2)\n",
-    "optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=5e-5)\n",
-    "model.compile(\n",
-    "    optimizer=optimizer,\n",
-    "    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
-    "    metrics = [\"accuracy\")\n",
-    "\n",
-    "model.fit(\n",
-    "    x=train_dataset,\n",
-    "    y=None,\n",
-    "    validation_data=validation_dataset,\n",
-    "    batch_size=30,\n",
-    "    epochs=2\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "b4882ab2-48f9-472b-b5eb-b817f48c7418",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-04-28 13:32:35.788501: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype int32 and shape [2276,100]\n",
-      "\t [[{{node Placeholder/_1}}]]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "76/76 [==============================] - 93s 1s/step - loss: 0.6230 - accuracy: 0.6586\n",
-      "Loss: 0.622992217540741\n",
-      "Accuracy: 0.6586115956306458\n"
-     ]
-    }
-   ],
-   "source": [
-    "bert_x_test = test_df[\"Comment_Adj\"].tolist()\n",
-    "bert_y_test = test_df[\"Result_Bin\"].tolist()\n",
-    "\n",
-    "test_encodings = tokenize(bert_x_test)\n",
-    "\n",
-    "test_labels = tf.convert_to_tensor(bert_y_test, dtype=tf.int32)\n",
-    "\n",
-    "test_dataset = tf.data.Dataset.from_tensor_slices((\n",
-    "    dict(test_encodings),  \n",
-    "    test_labels\n",
-    ")).shuffle(1000).batch(30).prefetch(1)\n",
-    "\n",
-    "results = model.evaluate(test_dataset)\n",
-    "print(\"Loss:\", results[0])\n",
-    "print(\"Accuracy:\", results[1])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python (testenv)",
-   "language": "python",
-   "name": "testenv"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.18"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/notebooks/functions/__init__.py b/notebooks/functions/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/notebooks/functions/functions_utils.py b/notebooks/functions/functions_utils.py
deleted file mode 100644
index 8971909..0000000
--- a/notebooks/functions/functions_utils.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import re
-import pandas as pd
-from nltk.corpus import stopwords
-from nltk.stem import PorterStemmer
-from tensorflow.keras.preprocessing.text import Tokenizer
-from tensorflow.keras.preprocessing.sequence import pad_sequences
-
-def process_text(document):
-    # Tokenize the document
-    tokens = document.split()
-    tokens = [re.sub(r'^\W+|\W+$', '', token) for token in tokens]
-    tokens = [token.lower() for token in tokens]
-
-    # Remove stopwords
-    stop_words = set(stopwords.words('english'))
-    tokens = [token for token in tokens if token not in stop_words]
-
-    # Stem the tokens
-    stemmer = PorterStemmer()
-    stemmed_tokens = [stemmer.stem(token) for token in tokens]
-
-    # Return the processed text
-    return ' '.join(stemmed_tokens)
-
-
-tokenizer = Tokenizer()
-train_df = pd.read_csv("/Users/jackiecollopy/Downloads/project-reddit/data/train.csv")
-val_df = pd.read_csv("/Users/jackiecollopy/Downloads/project-reddit/data/train.csv")
-test_df = pd.read_csv("/Users/jackiecollopy/Downloads/project-reddit/data/train.csv")
-
-def basic_process(document):
-    # Tokenize the document
-    tokens = document.split()
-    # Remove punctuation at the start and end of each token and convert to lowercase
-    tokens = [re.sub(r'^\W+|\W+$', '', token).lower() for token in tokens]
-    # Join processed tokens back into a string
-    processed_text = ' '.join(tokens)
-    return processed_text
-
-def cnn_process(document):
-    
-    processed_document = basic_process(document)
-    tokenizer = Tokenizer()
-
-    texts = pd.concat([train_df["Comment_Adj"], val_df["Comment_Adj"], test_df["Comment_Adj"]])
-    tokenizer.fit_on_texts(texts)
-
-    all_sequences = tokenizer.texts_to_sequences(texts)
-    sequences = tokenizer.texts_to_sequences([processed_document])
-    
-    padded_sequences = pad_sequences(sequences, maxlen=87, padding='post')
-    return padded_sequences
-
-
-def bert_process(document):
-    tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
-    inputs = tokenizer.encode_plus(
-        comment,
-        add_special_tokens=True,
-        max_length=128,
-        padding='max_length',
-        return_attention_mask=True,
-        truncation=True,
-        return_tensors='tf'
-    )
-
-    input_ids = inputs['input_ids']
-    attention_mask = inputs['attention_mask']
-
-    return input_ids, attention_mask
-
-
-
-
-
-    
\ No newline at end of file