forked from EMNLP2019LSAN/LSAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
105 lines (81 loc) · 3.32 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import csv
import codecs
import yaml
import time
import numpy as np
from sklearn import metrics
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
def read_config(path):
return AttrDict(yaml.load(open(path, 'r')))
def read_datas(filename, trans_to_num=False):
lines = open(filename, 'r').readlines()
lines = list(map(lambda x: x.split(), lines))
if trans_to_num:
lines = [list(map(int, line)) for line in lines]
return lines
def save_datas(data, filename, trans_to_str=False):
if trans_to_str:
data = [list(map(str, line)) for line in data]
lines = list(map(lambda x: " ".join(x), data))
with open(filename, 'w') as f:
f.write("\n".join(lines))
def logging(file):
def write_log(s):
print(s, end='')
with open(file, 'a') as f:
f.write(s)
return write_log
def logging_csv(file):
def write_csv(s):
with open(file, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow(s)
return write_csv
def format_time(t):
return time.strftime("%Y-%m-%d-%H:%M:%S", t)
def eval_metrics(reference, candidate, label_dict, log_path):
ref_dir = log_path + 'reference/'
cand_dir = log_path + 'candidate/'
if not os.path.exists(ref_dir):
os.mkdir(ref_dir)
if not os.path.exists(cand_dir):
os.mkdir(cand_dir)
ref_file = ref_dir+'reference'
cand_file = cand_dir+'candidate'
for i in range(len(reference)):
with codecs.open(ref_file+str(i), 'w', 'utf-8') as f:
f.write("".join(reference[i])+'\n')
with codecs.open(cand_file+str(i), 'w', 'utf-8') as f:
f.write("".join(candidate[i])+'\n')
def make_label(l, label_dict):
length = len(label_dict)
result = np.zeros(length)
indices = [label_dict.get(label.strip().lower(), 0) for label in l]
result[indices] = 1
return result
def prepare_label(y_list, y_pre_list, label_dict):
reference = np.array([make_label(y, label_dict) for y in y_list])
candidate = np.array([make_label(y_pre, label_dict) for y_pre in y_pre_list])
return reference, candidate
def get_metrics(y, y_pre):
hamming_loss = metrics.hamming_loss(y, y_pre)
macro_f1 = metrics.f1_score(y, y_pre, average='macro')
macro_precision = metrics.precision_score(y, y_pre, average='macro')
macro_recall = metrics.recall_score(y, y_pre, average='macro')
micro_f1 = metrics.f1_score(y, y_pre, average='micro')
micro_precision = metrics.precision_score(y, y_pre, average='micro')
micro_recall = metrics.recall_score(y, y_pre, average='micro')
return hamming_loss, macro_f1, macro_precision, macro_recall, micro_f1, micro_precision, micro_recall
y, y_pre = prepare_label(reference, candidate, label_dict)
hamming_loss, macro_f1, macro_precision, macro_recall, micro_f1, micro_precision, micro_recall = get_metrics(y, y_pre)
return {'hamming_loss': hamming_loss,
'macro_f1': macro_f1,
'macro_precision': macro_precision,
'macro_recall': macro_recall,
'micro_f1': micro_f1,
'micro_precision': micro_precision,
'micro_recall': micro_recall}