diff --git a/README.md b/README.md index 59f56a2..c0e5f8a 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ In the Notebooks module you can view all of the different experiments we ran. ### Demo To run the demo: +clone the repository using ```git clone https://github.com/tulane-cmps6730/project-legal.git``` +open up your terminal and cd into ```project-legal``` Move into the app directory with by using ```cd app``` in your command line To run the app use ```flask run``` This should run the flask app locally on your computer, so you should be able to use the demo in your web browser at http://127.0.0.1:5000 (the port number may differ depending on your computer) diff --git a/notebooks/BoWExperiments(2).ipynb b/notebooks/BoWExperiments(2).ipynb index f3ee364..9298987 100644 --- a/notebooks/BoWExperiments(2).ipynb +++ b/notebooks/BoWExperiments(2).ipynb @@ -776,32 +776,31 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "metadata": {}, "outputs": [ { - "ename": "TypeError", - "evalue": "sparse array length is ambiguous; use getnnz() or shape[0]", + "ename": "NameError", + "evalue": "name 'bow_classifier' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m bow_tensor \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbow_text\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/school/NLP/project-legal/nlp-virtual/lib/python3.10/site-packages/scipy/sparse/_base.py:395\u001b[0m, in \u001b[0;36m_spbase.__len__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 394\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__len__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 395\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msparse array length is ambiguous; use getnnz()\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m or shape[0]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[0;31mTypeError\u001b[0m: sparse array length is ambiguous; use getnnz() or shape[0]" + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m logits \u001b[38;5;241m=\u001b[39m \u001b[43mbow_classifier\u001b[49m\u001b[38;5;241m.\u001b[39mdecision_function(bow_text)\n\u001b[1;32m 2\u001b[0m logits\n", + "\u001b[0;31mNameError\u001b[0m: name 'bow_classifier' is not defined" ] } ], "source": [ - "import scipy.sparse as sp\n", - "sparse_matrix = sp.coo_matrix(bow_text)\n", - "values = torch.tensor(sparse_matrix.data)\n", - "indices = torch.tensor(np.vstack((sparse_matrix.row, sparse_matrix.col)), dtype=torch.long)\n", - "shape = torch.Size(sparse_matrix.shape)\n", - "\n", - "# Create sparse tensor\n", - "torch_sparse_tensor = torch.sparse_coo_tensor(indices, values, shape)\n", - "torch_sparse_tensor" + "logits = classifier.decision_function(bow_text)\n", + "logits" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we want to make an ensemble method, we will have to use tensorflow because it has more support for sparse matrices, which we need for the bag of words method, compared to pytorch" ] }, { diff --git a/requirements.txt b/requirements.txt index 8890ab3..6a22b81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ absl-py==2.1.0 +accelerate==0.29.2 aiohttp==3.9.3 aiosignal==1.3.1 appdirs==1.4.4 @@ -70,6 +71,7 @@ multiprocess==0.70.16 namex==0.0.7 nest-asyncio==1.6.0 networkx==3.3 +-e git+ssh://git@github.com/tulane-cmps6730/project-legal.git@d3d959cb3a9baec00d1c388539dd18e5dcceaea9#egg=nlp nltk==3.8.1 numpy==1.26.4 nvidia-cublas-cu12==12.1.3.1 @@ -129,6 +131,7 @@ threadpoolctl==3.3.0 tokenizers==0.15.2 torch==2.2.2 torchinfo==1.8.0 +torchtext==0.17.2 tornado==6.4 tqdm==4.66.2 traitlets==5.14.2