-
Notifications
You must be signed in to change notification settings - Fork 1
/
SennaParser.py
97 lines (83 loc) · 2.35 KB
/
SennaParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
## @package SennaParser
# This script is used to parse the file given by SENNA (a semantic role labeling toolkit, see http://ml.nec-labs.com/senna/)
# @author Wencan Luo ([email protected])
#Usage:
# python SennaParser.py filename
#
# Usage Example:
#python SennaParser.py ../data/outputFromSenna.txt
#
import sys
import fio
from SennaUnit import *
def SennaParseWithCountDict(filename):
"""
@function: Parse the file and return a list of sentence with index.
@param filename: string, the filename of the sennafile, the sennafile is an output file given by SENNA
@return: <list, dict>, the dict stores for the start line for each sentence
"""
lines = fio.ReadFile(filename)
print "nLine=", len(lines)
sys.stdout.flush()
CountDict = {}
nCount = 0
nLast = -1
for i in range(len(lines)):
line = lines[i]
row = []
line = line.strip()
if len(line) == 0: #the last sentence is finished
CountDict[nCount] = nLast+1
nLast = i
nCount = nCount + 1
print "nCount=", nCount
sys.stdout.flush()
#for s in sentences:
# print s
return lines, CountDict
def SennaParse(filename):
"""
@function: Parse the file and return a list of sentence. Each sentence is a SennaSentence
@param filename: string, the filename of the sennafile, the sennafile is an output file given by SENNA
@return: list, Each item is a SennaSentence
"""
lines = fio.ReadFile(filename)
#print "nLine=", len(lines)
sys.stdout.flush()
nCount = 0
for line in lines:
row = []
line = line.strip()
if len(line) == 0: #the last sentence is finished
nCount = nCount + 1
#print "nCount=", nCount
sys.stdout.flush()
sentences = [None]*nCount
nCount = 0
tm = []
for line in lines:
row = []
line = line.strip()
if len(line) == 0: #the last sentence is finished
sentences[nCount] = SennaSentence(tm)
nCount = nCount + 1
tm = []
continue
for num in line.split("\t"):
row.append(num.strip())
tm.append(row)
#for s in sentences:
# print s
return sentences
if __name__ == "__main__":
if len(sys.argv) != 2:
#print "Incorrect Input. Usage: python SennaParser.py filename"
#sys.exit()
filename = '../data/senna/senna.2.MP.output'
else:
filename = sys.argv[1]
sentences = SennaParse(filename)
print len(sentences)
print sentences[0]
#lines, dict = SennaParseWithCountDict('H:/svn/nlu/20_Data/TextData/outputFromSenna.txt')
#fio.PrintDict(dict)