Skip to content

Commit

Permalink
support for non numeric ids
Browse files Browse the repository at this point in the history
  • Loading branch information
felipebravom committed Oct 4, 2017
1 parent 244ae23 commit 6fe0726
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 4 deletions.
8 changes: 4 additions & 4 deletions evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,20 @@ def evaluate(pred,gold):
for line in gold_lines:
parts=line.split('\t')
if len(parts)==4:
data_dic[int(parts[0])]=[float(line.split('\t')[3])]
data_dic[parts[0]]=[float(line.split('\t')[3])]
else:
raise ValueError('Format problem.')


for line in pred_lines:
parts=line.split('\t')
if len(parts)==4:
if int(parts[0]) in data_dic:
if parts[0] in data_dic:
try:
data_dic[int(parts[0])].append(float(line.split('\t')[3]))
data_dic[parts[0]].append(float(line.split('\t')[3]))
except ValueError:
# Invalid predictions are replaced by a default value
data_dic[int(parts[0])].append(0.5)
data_dic[parts[0]].append(0.5)
else:
raise ValueError('Invalid tweet id.')
else:
Expand Down
86 changes: 86 additions & 0 deletions tweets_to_arff_disc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# tweets_to_arff.py
# felipebravom
# Running example: python tweets_to_arff data/anger-ratings-0to1.test.target.tsv data/anger-ratings-0to1.test.target.arff
import sys


def create_arff(input_file,output_file):
"""
Creates an arff dataset
"""




out=open(output_file,"w")
header='@relation '+input_file+'\n\n@attribute id numeric \n@attribute tweet string\n@attribute emotion string\n@attribute intensity {low,medium,high} \n\n@data\n'
out.write(header)



f=open(input_file, "rb")
lines=f.readlines()


for line in lines:
parts=line.split("\t")
if len(parts)==4:

id=parts[0]
tweet=parts[1]
emotion=parts[2]
score=parts[3].strip()
if score == "NONE":
score = "?"
else:
score_val=float(score)
if score_val <= 1.0/3:
score="low"
elif score_val <=2.0/3:
score="medium"
else:
score="high"

out_line=id+',\"'+tweet+'\",'+'\"'+emotion+'\",'+score+'\n'
out.write(out_line)
else:
print "Wrong format"


f.close()
out.close()





def main(argv):
input_file=argv[0]
output_file=argv[1]
create_arff(input_file,output_file)


if __name__ == "__main__":
main(sys.argv[1:])









0 comments on commit 6fe0726

Please sign in to comment.