Skip to content

Commit

Permalink
fetch pattern score
Browse files Browse the repository at this point in the history
  • Loading branch information
doug919 committed Feb 23, 2015
1 parent 21e57c1 commit fafb368
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 9 deletions.
16 changes: 12 additions & 4 deletions batch/batchFetchPatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,13 @@

def get_arguments(argv):

parser = argparse.ArgumentParser(description='fetch patterns from MongoDB')
parser = argparse.ArgumentParser(description='fetch patterns from MongoDB and sum up all vectors')
parser.add_argument('output_file', metavar='output_file',
help='File name of the ouput .npa file')
help='file name of the ouput .npa file')
parser.add_argument('-s', '--scoring', action='store_true', default=False,
help='use scored pattern emotion array')
parser.add_argument('-l', '--vlambda', metavar='LAMBDA', type=float, default=1.0,
help='a scoring parameter lambda which is useful when "-s" is set (DEFAULT: 1.0)')
parser.add_argument('-v', '--verbose', action='store_true', default=False,
help='show messages')
parser.add_argument('-d', '--debug', action='store_true', default=False,
Expand Down Expand Up @@ -54,12 +58,16 @@ def update_progress_bar(n_cur, n_total, bar_length=50):
weighted = True
for udocId, emotion in docs:

update_progress_bar(udocId, len(docs))
if loglevel <= logging.INFO:
update_progress_bar(udocId, len(docs))

pattern_freq_vec = pf.get_pattern_freq_by_udocId(udocId, min_count, weighted)

# sum vectors horizontally
sum_vec = pf.sum_pattern_freq_vector(pattern_freq_vec)
if args.scoring:
sum_vec = pf.sum_pattern_score_vector(pattern_freq_vec, args.vlambda)
else:
sum_vec = pf.sum_pattern_freq_vector(pattern_freq_vec)

X.append(sum_vec)
y.append(emotion)
Expand Down
63 changes: 59 additions & 4 deletions feelit/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,20 +181,75 @@ def get_pattern_freq_by_udocId(self, udocId, min_count=1, weighted=True):

return pattern_freq_vec

def sum_pattern_freq_vector(self, pf):
def _sum_pattern_vector(self, pf, use_score=False, vlambda=1.0):

sum_vec = [0] * len(self.emotion_list)

for freq_vec in pf.values():

temp_vec = []
for e in freq_vec:
temp_vec.append(freq_vec[e])
if use_score:
score_vec = self.pattern_score(freq_vec, vlambda)
temp_vec = score_vec.values()
else:
temp_vec = freq_vec.values()

sum_vec = map(add, sum_vec, temp_vec)

return sum_vec

def sum_pattern_freq_vector(self, pf):
"""
sum up pattern emotion arrays by occurence frequency
"""
return self._sum_pattern_vector(pf, False)

def sum_pattern_score_vector(self, pf, vlambda=1.0):
"""
sum up pattern emotion score arrays
"""
return self._sum_pattern_vector(pf, True, vlambda)

def pattern_score(self, freq_vec, vlambda):
"""
scoring a pattern emotion array
"""

emotion_set = set(freq_vec.keys())
S_vec = {}
for e in freq_vec: # to keep the key order as same, we do not loop with set but dict

## s(p, e) = f(p, e)
s_e = freq_vec[e]

exclusive_set = emotion_set - set([e])
sum_l1 = 0
sum_l2 = 0

for not_e in exclusive_set: # here we don't care about the order
sum_l1 += freq_vec[not_e]
sum_l2 += pow(freq_vec[not_e], 2)

## beta =
# lambda ^ (L2_Norm(f(p, -e))
#
beta = pow(vlambda, pow(sum_l2, 0.5))

## s(p, -e) =
# L2_Norm(f(p, -e)) ^ 2
# --------------------------
# L1_Norm(f(p, -e)) + beta
#
s_not_e = sum_l2 / (sum_l1+beta)

## final score S(p, e) =
# s(p, e)
# -----------------------
# (s(p, e) + s(p, -e))
#
S_vec[e] = s_e / (s_e + s_not_e)

return S_vec


class FileSplitter(object):
"""
Expand Down
16 changes: 15 additions & 1 deletion feelit/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,4 +364,18 @@ def dump_list_to_csv(file_name, data):
import csv
w = csv.writer(open(file_name, 'w'))
for row in data:
w.writerow(row)
w.writerow(row)

############################################## arguments parsing
def parse_range(astr):
result = set()
for part in astr.split(','):
x = part.split('-')
result.update(range(int(x[0]), int(x[-1]) + 1))
return sorted(result)

def parse_list(astr):
result = set()
for part in astr.split(','):
result.add(float(part))
return sorted(result)

0 comments on commit fafb368

Please sign in to comment.