Skip to content

Commit

Permalink
Ideas
Browse files Browse the repository at this point in the history
  • Loading branch information
jcollopy-tulane committed Apr 30, 2024
1 parent 39c7fe2 commit ed2617c
Show file tree
Hide file tree
Showing 8 changed files with 253 additions and 69 deletions.
4 changes: 2 additions & 2 deletions nlp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ def train_nb():
Naive Bayes Model (Do Third)
"""
train_df = pd.read_csv(config.get('data', 'file1'))
val_df = pd.read_csv(config.get('data', 'file2'))
test_df = pd.read_csv(config.get('data', 'file3'))

train_df["Comment"] = train_df["Comment"].apply(process_text)
bnb_vectorizer = CountVectorizer()
X_train = bnb_vectorizer.fit_transform(train_df["Comment"])
y_train = train_df["Result_Bin"]
val_df["Comment"] = val_df["Comment"].apply(process_text)
test_df["Comment"] = val_df["Comment"].apply(process_text)
X_val = bnb_vectorizer.transform(val_df["Comment"])
y_val = val_df["Result_Bin"]

Expand Down
166 changes: 150 additions & 16 deletions notebooks/Experiment-CNN-1.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 34,
"id": "1eac01e8-3c62-469b-96ea-d4babd8f9348",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -72,8 +72,8 @@
"X_train = pad_sequences(X_train_sequences, padding='post', maxlen=87)\n",
"X_val = pad_sequences(X_val_sequences, padding='post', maxlen=87)\n",
"label_encoder = LabelEncoder()\n",
"y_train = label_encoder.fit_transform(train_df[\"Result_Bin\"])\n",
"y_val = label_encoder.fit_transform(val_df[\"Result_Bin\"])"
"y_train = train_df[\"Result_Bin\"]\n",
"y_val = val_df[\"Result_Bin\"]"
]
},
{
Expand Down Expand Up @@ -444,7 +444,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 32,
"id": "3a809d75-24a8-4e56-8776-ac4899c95e15",
"metadata": {},
"outputs": [],
Expand All @@ -454,12 +454,12 @@
"X_test = pad_sequences(X_test_sequences, padding='post', maxlen=87)\n",
"label_encoder = LabelEncoder()\n",
"\n",
"y_test = label_encoder.fit_transform(test_df[\"Result_Bin\"])"
"y_test = test_df[\"Result_Bin\"]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 33,
"id": "9612ac53-cc11-4ee6-acff-d33cabf98e73",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -561,20 +561,154 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 30,
"id": "e96b0c61-e235-4fb2-88af-6149c17184a6",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'cnn_model' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[28], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m results_df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mActual_Label\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m y_test\n\u001b[1;32m 5\u001b[0m results_df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPredicted_Label\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m preds\n\u001b[0;32m----> 6\u001b[0m probs \u001b[38;5;241m=\u001b[39m \u001b[43mcnn_model\u001b[49m\u001b[38;5;241m.\u001b[39mpredict(X_test)\n\u001b[1;32m 7\u001b[0m results_df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpredict_proba\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m probs\n\u001b[1;32m 8\u001b[0m results_df\u001b[38;5;241m.\u001b[39mhead()\n",
"\u001b[0;31mNameError\u001b[0m: name 'cnn_model' is not defined"
"name": "stdout",
"output_type": "stream",
"text": [
"72/72 [==============================] - 0s 2ms/step\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Comment</th>\n",
" <th>Result</th>\n",
" <th>Comment_Adj</th>\n",
" <th>No_Stop</th>\n",
" <th>Stemmed</th>\n",
" <th>Result_Bin</th>\n",
" <th>Actual_Label</th>\n",
" <th>Predicted_Label</th>\n",
" <th>predict_proba</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>I feel like we became too dependent on our rec...</td>\n",
" <td>Loss</td>\n",
" <td>i feel like we became too dependent on our rec...</td>\n",
" <td>feel like became dependent recent late-game he...</td>\n",
" <td>feel like becam depend recent late-gam heroic ...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0.416957</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>I like it even more when I don't think we're g...</td>\n",
" <td>Win</td>\n",
" <td>i like it even more when i don't think we're g...</td>\n",
" <td>like even n't think 're gon na win win anyways</td>\n",
" <td>like even n't think 're gon na win win anyway</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.844099</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>This game confirmed everything I already knew ...</td>\n",
" <td>Loss</td>\n",
" <td>this game confirmed everything i already knew ...</td>\n",
" <td>game confirmed everything already knew bobby ’...</td>\n",
" <td>game confirm everyth alreadi knew bobbi ’ play...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.533056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>I’m in shock as a Blazers fan. I know for a fa...</td>\n",
" <td>Loss</td>\n",
" <td>i’m in shock as a blazers fan i know for a fac...</td>\n",
" <td>’ shock blazers fan know fact lillard play way...</td>\n",
" <td>’ shock blazer fan know fact lillard play way ...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0.542013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Can we please change the banner to our current...</td>\n",
" <td>Win</td>\n",
" <td>can we please change the banner to our current...</td>\n",
" <td>please change banner current bucks roster ’ wa...</td>\n",
" <td>pleas chang banner current buck roster ’ want ...</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0.706892</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Comment Result \\\n",
"0 I feel like we became too dependent on our rec... Loss \n",
"1 I like it even more when I don't think we're g... Win \n",
"2 This game confirmed everything I already knew ... Loss \n",
"3 I’m in shock as a Blazers fan. I know for a fa... Loss \n",
"4 Can we please change the banner to our current... Win \n",
"\n",
" Comment_Adj \\\n",
"0 i feel like we became too dependent on our rec... \n",
"1 i like it even more when i don't think we're g... \n",
"2 this game confirmed everything i already knew ... \n",
"3 i’m in shock as a blazers fan i know for a fac... \n",
"4 can we please change the banner to our current... \n",
"\n",
" No_Stop \\\n",
"0 feel like became dependent recent late-game he... \n",
"1 like even n't think 're gon na win win anyways \n",
"2 game confirmed everything already knew bobby ’... \n",
"3 ’ shock blazers fan know fact lillard play way... \n",
"4 please change banner current bucks roster ’ wa... \n",
"\n",
" Stemmed Result_Bin \\\n",
"0 feel like becam depend recent late-gam heroic ... 0 \n",
"1 like even n't think 're gon na win win anyway 1 \n",
"2 game confirm everyth alreadi knew bobbi ’ play... 0 \n",
"3 ’ shock blazer fan know fact lillard play way ... 0 \n",
"4 pleas chang banner current buck roster ’ want ... 1 \n",
"\n",
" Actual_Label Predicted_Label predict_proba \n",
"0 0 1 0.416957 \n",
"1 1 1 0.844099 \n",
"2 0 0 0.533056 \n",
"3 0 0 0.542013 \n",
"4 1 1 0.706892 "
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
Expand All @@ -583,7 +717,7 @@
"results_df = test_df.copy()\n",
"results_df[\"Actual_Label\"] = y_test\n",
"results_df[\"Predicted_Label\"] = preds\n",
"probs = cnn_model.predict(X_test)\n",
"probs = model.predict(X_test)\n",
"results_df['predict_proba'] = probs\n",
"results_df.head()"
]
Expand Down
64 changes: 30 additions & 34 deletions notebooks/Experiment-Logistic_Regression.ipynb

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions notebooks/Experiment-Naive_Bayes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"id": "f7c7072e-41a8-4a68-816a-f138dfe2f713",
"metadata": {},
"outputs": [],
Expand All @@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "a2007cdd-38d5-40a5-9ed7-4f593d081a2e",
"metadata": {},
"outputs": [],
Expand All @@ -39,7 +39,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "f2c29088-90a7-4084-8b2c-be0651c36c20",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -73,7 +73,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 4,
"id": "48c88dff-14a5-4975-9670-1cc33cc611df",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -122,7 +122,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"id": "649a1ffa-5717-4efa-ba97-6d6109e7cac7",
"metadata": {},
"outputs": [
Expand Down
78 changes: 66 additions & 12 deletions notebooks/New_Bert.ipynb

Large diffs are not rendered by default.

Binary file added notebooks/lrt_confusion_matrix.pdf
Binary file not shown.
Binary file added notebooks/lrv_confusion_matrix.pdf
Binary file not shown.
Binary file modified notebooks/nbv_confusion_matrix.pdf
Binary file not shown.

0 comments on commit ed2617c

Please sign in to comment.