From dcf0ee3da16febe115667560f7e006e2a9800838 Mon Sep 17 00:00:00 2001 From: jcollopy-tulane Date: Mon, 29 Apr 2024 16:55:32 -0500 Subject: [PATCH] IDK --- notebooks/New_Bert.ipynb | 99 +++++++--------------------------------- 1 file changed, 16 insertions(+), 83 deletions(-) diff --git a/notebooks/New_Bert.ipynb b/notebooks/New_Bert.ipynb index 47ec344..e97e746 100644 --- a/notebooks/New_Bert.ipynb +++ b/notebooks/New_Bert.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 34, + "execution_count": 48, "id": "9a807a6a-bfb2-44f7-ac15-24043f0b388f", "metadata": {}, "outputs": [], @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 50, "id": "883141b4-cd6d-45d0-ad6b-caa3b905ae10", "metadata": {}, "outputs": [], @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 53, "id": "74ac3c58-156e-4f6e-9fa0-a522f4f21449", "metadata": {}, "outputs": [], @@ -45,16 +45,8 @@ " padding=\"max_length\",\n", " max_length=max_length,\n", " return_tensors=\"pt\"\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "6457cfad-6088-4f25-b30e-7105e5aee114", - "metadata": {}, - "outputs": [], - "source": [ + " )\n", + "\n", "class CommentsDataset(Dataset):\n", " def __init__(self, encodings, labels):\n", " self.encodings = encodings\n", @@ -66,32 +58,16 @@ " return item\n", "\n", " def __len__(self):\n", - " return len(self.labels)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "514cb603-8131-4195-b297-aae6169d9424", - "metadata": {}, - "outputs": [], - "source": [ + " return len(self.labels)\n", + "\n", "train_encodings = tokenize(train_df)\n", "val_encodings = tokenize(val_df)\n", "test_encodings = tokenize(test_df)\n", "\n", "train_dataset = CommentsDataset(train_encodings, train_df['Result_Bin'])\n", "val_dataset = CommentsDataset(val_encodings, val_df['Result_Bin'])\n", - "test_dataset = CommentsDataset(test_encodings, test_df['Result_Bin'])" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "b2171c6a-a7ea-4d98-815f-e04d5e428a7a", - "metadata": {}, - "outputs": [], - "source": [ + "test_dataset = CommentsDataset(test_encodings, test_df['Result_Bin'])\n", + "\n", "train_loader = DataLoader(train_dataset, batch_size=10, shuffle=True)\n", "val_loader = DataLoader(val_dataset, batch_size=10)\n", "test_loader = DataLoader(test_dataset, batch_size=10)" @@ -99,43 +75,10 @@ }, { "cell_type": "code", - "execution_count": 40, - "id": "8d1851fc-f761-45ad-a7fa-c779fa6a3ecd", + "execution_count": null, + "id": "3a7f59a5-371e-4c5c-9c6c-6a60448bf365", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-mini and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "/Users/jackiecollopy/nlp-virtual/lib/python3.11/site-packages/transformers/optimization.py:521: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", - " warnings.warn(\n", - "Epoch 1/6: 0%| | 0/683 [00:00