-
Notifications
You must be signed in to change notification settings - Fork 1
/
bankInfoExtractor.py
114 lines (104 loc) · 2.58 KB
/
bankInfoExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import taggedToTree as ttt
adjd = {"average":"avg", "avg":"avg", \
"maximum":"max", "max":"max", \
"minimum":"min", "min":"min", \
"latest":"curr", "last":"curr", "current":"curr"}
keywordl = ['balance','debit','credit']
def cleanTree(t):
"""Removes unnecesary nodes from tree
Remove things like "What is", "Who Are"
WP + VBZ, WP VBP
"""
for node in t['children']:
if node['pos'] == 'WP':
wpnode = node
vbznode = t['children'][t['children'].index(node) + 1]
t['children'].remove(wpnode)
t['children'].remove(vbznode)
return t
def bankKeyWord(t):
"""
Input: Parsed Tree obj
Output Tuple:
(keyword, adj(optional))
Possible Keywords: balance, debit, credit
Possible Adj (optional): max, min, avg
Eg: (balance, avg)
(debit, max)
(balance,)
"""
keyword = ""
adj = ""
#If root word is 'NN', this is most likely the keyword
if (t['pos'] == 'NN'):
if t['key'] in keywordl:
keyword = t['key']
#If I find the key word
#avb,max,min is at first level child
#unless there is determiner.
if t['children'][0]['pos'] == 'DT':
tadj = t['children'][1]['key']
else:
tadj = t['children'][0]['key']
if tadj in adjd.keys():
adj = adjd[tadj]
return (keyword,adj)
#Think this time related code should be stand alone in another file
from datetime import datetime
from timestring import Date
from timestring import Range
def timeLine(t):
"""
Input: Parsed tree object
Output: (from, to) or None
"""
li = list()
ttt.getSubTree(t, 'IN', li)
for node in li:
print "Tree form"
ttt.print_node(node, 0)
print "-----"
li2 = list()
ttt.flattenTreeKeys(node, li2)
s = ' '.join(li2[1:])
print s
s = ""
if len(li) == 0:
#Try to see if "this month" or "last month"
ttt.getSubTree(t, 'NN', li)
for node in li:
if ((node['key'] == 'month')
|(node['key'] == 'year')):
li2 = list()
ttt.flattenTreeKeys(node, li2)
li2.reverse()
s = ' '.join(li2)
break;
if s == '':
s = "in last month"
elif li[0]['key'] == 'in':
#Eg: in may 2016
li2 = list()
ttt.flattenTreeKeys(li[0], li2)
s = ' '.join(li2)
elif li[0]['key'] == 'since':
#Eg: since feb 2016
li2 = list()
ttt.flattenTreeKeys(li[0], li2)
s = ' '.join(li2) + ' to now'
elif li[0]['key'] == 'from':
if (len(li) == 2):
#Have distinct from and to nodes
#Eg: from january 2016 to may 2016
li2 = list()
ttt.flattenTreeKeys(li[0], li2)
ttt.flattenTreeKeys(li[1], li2)
s = ' '.join(li2)
else:
li2 = list()
ttt.flattenTreeKeys(li[0], li2)
s = ' '.join(li2)
print s
r = Range(s)
print r
return r