-
Notifications
You must be signed in to change notification settings - Fork 0
/
data-to-pandas.py
37 lines (32 loc) · 1.13 KB
/
data-to-pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
from hidden import filePath
import numpy as np
import pandas as pd
idPhrase = []
sentiment = []
# obtain all instances of development data with negative sentiment
fileNames = os.listdir(filePath + "test/neg/")
negSentences = []
for i in range(1, len(fileNames)):
file = open(filePath + "test/neg/" + fileNames[i], 'r')
negSentences.append(file.read())
idPhrase.append(fileNames[i])
sentiment.append("0")
print(len(negSentences))
# turn the assembled array of negative sentences into a pandas serries
negSeries = pd.Series(negSentences)
# obtain all instances of dev data with positive sentiment
fileNames = os.listdir(filePath + "test/pos/")
posSentences = []
for i in range(1, len(fileNames)):
file = open(filePath + "test/pos/" + fileNames[i], 'r')
posSentences.append(file.read())
idPhrase.append(fileNames[i])
sentiment.append("1")
print(len(posSentences))
# assembles everything into a Pandas dataframe
d = {'id': pd.Series(idPhrase), 'sentence': pd.Series(negSentences+posSentences), 'sentiment': pd.Series(sentiment)}
df = pd.DataFrame(d)
print(df.index)
print(df.columns)
df.to_csv("testSentences.csv")