diff --git a/notebooks/New_Bert.ipynb b/notebooks/New_Bert.ipynb index c2b76cb..d9b636a 100644 --- a/notebooks/New_Bert.ipynb +++ b/notebooks/New_Bert.ipynb @@ -1,5 +1,21 @@ { "cells": [ + { + "cell_type": "markdown", + "id": "d9210791-41e0-42f3-99b4-a93439662152", + "metadata": {}, + "source": [ + "### BERT Experiment" + ] + }, + { + "cell_type": "markdown", + "id": "d36e1d24-9ab5-41e7-be19-54b2d319363c", + "metadata": {}, + "source": [ + "Below, the necessary libaries are imported and data uploaded." + ] + }, { "cell_type": "code", "execution_count": 39, @@ -33,6 +49,14 @@ "test_df = pd.read_csv(\"../data/test.csv\")" ] }, + { + "cell_type": "markdown", + "id": "24896d87-bc73-4cf4-a0dd-ee788685f056", + "metadata": {}, + "source": [ + "Below the MiniBERT tokenizer is initialized. Then, a custom dataset class is created that will create the necessary datasets that are compatible with PyTorch." + ] + }, { "cell_type": "code", "execution_count": 41, @@ -77,139 +101,20 @@ "test_loader = DataLoader(test_dataset, batch_size=10)" ] }, + { + "cell_type": "markdown", + "id": "121b1db7-6173-4310-bdf5-6836a5cfe0e3", + "metadata": {}, + "source": [ + "Below, the training of the model begins. Training and validation accuracy are accounted for and plotted as well." + ] + }, { "cell_type": "code", - "execution_count": 42, - "id": "3a7f59a5-371e-4c5c-9c6c-6a60448bf365", + "execution_count": null, + "id": "316b3c35-3982-46a4-a3ab-62da1203d6ac", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-mini and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "/Users/jackiecollopy/nlp-virtual/lib/python3.11/site-packages/transformers/optimization.py:521: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", - " warnings.warn(\n", - "Epoch 1/6 - Training: 0%| | 0/683 [00:00