-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluate.py
66 lines (60 loc) · 1.88 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from hotpot_evaluate_v1 import f1_score, normalize_answer
import csv
def get(filename):
data = []
with open(filename, newline='') as file:
reader = csv.reader(file, delimiter=",")
for row in reader:
d = {}
d['question'] = row[1]
d['answer'] = row[2]
d['bridge_answer'] = row[3]
d['bridge_score'] = row[3]
d['intersec_answer'] = row[4]
d['intersec_score'] = row[5]
d['ultimate_answer'] = row[6]
data.append(d)
return data
first = get("dev-answers-old.csv")
second = get("dev-answers.csv")
data = first + second
fscores = [0, 0, 0]
ems = [0, 0, 0]
precision = [0, 0, 0]
recall = [0, 0, 0]
for i, d in enumerate(data):
a = d['answer']
bridge_answer = d['bridge_answer']
intersec_answer = d['intersec_answer']
ultimate_answer = d['ultimate_answer']
f1, prcsn, rcll = f1_score(bridge_answer, a)
fscores[0] += f1
precision[0] += prcsn
recall[0] += rcll
ems[0] += bridge_answer == a
f1, prcsn, rcll = f1_score(intersec_answer, a)
fscores[1] += f1
precision[1] += prcsn
recall[1] += rcll
ems[1] += intersec_answer == a
f1, prcsn, rcll = f1_score(ultimate_answer, a)
fscores[2] += f1
precision[2] += prcsn
recall[2] += rcll
ems[2] += ultimate_answer == a
shit1 = bridge_answer == a
shit2 = intersec_answer == a
shit3 = ultimate_answer == a
print("{:5d} : {:5s} : {:5s} : {:5s}".format(i, str(shit1), str(shit2), str(shit3)))
eval_file_name = "dev-eval.csv"
N = len(data)
fscores = [i/N for i in fscores]
ems = [i/N for i in ems]
precision = [i/N for i in fscores]
recall = [i/N for i in ems]
with open(eval_file_name, mode='a') as file:
writer = csv.writer(file)
writer.writerow(fscores)
writer.writerow(precision)
writer.writerow(recall)
writer.writerow(ems)