-
Notifications
You must be signed in to change notification settings - Fork 0
/
significance.py
36 lines (28 loc) · 1.12 KB
/
significance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# coding: utf-8
import re
from scipy import stats
def parse(path):
maps, p10s = [], []
with open(path) as evalfile:
l1 = evalfile.readline()
while l1 != "":
v1 = float(re.split(r'\t+', l1)[2].strip())
maps.append(v1)
l1 = evalfile.readline()
return maps[:-1]
def significance_test(a, b, threshold=0.05):
test_result = stats.ttest_rel(a, b)
if test_result.pvalue > threshold:
print("PValue = {} --> The null hypothesis, that the distributions are equal cannot be rejected (threshold {})".format(test_result.pvalue, threshold))
else:
print("PValue = {} --> The null hypothesis can be rejected (threshold {})".format(test_result.pvalue, threshold))
print()
maps_tf_idf = parse("evaluation_results/evaluation_tf-idf.txt")
maps_bm25 = parse("evaluation_results/evaluation_bm25.txt")
maps_bm25va = parse("evaluation_results/evaluation_bm25va.txt")
print("TF-IDF vs BM25")
significance_test(maps_tf_idf, maps_bm25)
print("TF-IDF vs BM25VA")
significance_test(maps_tf_idf, maps_bm25va)
print("BM25 vs BM25VA")
significance_test(maps_bm25, maps_bm25va)