1
+ # -*- coding: utf-8 -*-
2
+ import numpy as np
3
+ import random
4
+ import codecs
5
+ """
6
+ compute single evaulation matrix for task1,task2 and task3:
7
+ compute f1 score(micro,macro) for accusation & relevant article, and score for pentaly
8
+ """
9
+
10
+ small_value = 0.00001
11
+ random_number = 500
12
+ def compute_confuse_matrix_batch (y_targetlabel_list ,y_logits_array ,label_dict ,name = 'default' ):
13
+ """
14
+ compute confuse matrix for a batch
15
+ :param y_targetlabel_list: a list; each element is a mulit-hot,e.g. [1,0,0,1,...]
16
+ :param y_logits_array: a 2-d array. [batch_size,num_class]
17
+ :param label_dict:{label:(TP, FP, FN)}
18
+ :param name: a string for debug purpose
19
+ :return:label_dict:{label:(TP, FP, FN)}
20
+ """
21
+ for i ,y_targetlabel_list_single in enumerate (y_targetlabel_list ):
22
+ label_dict = compute_confuse_matrix (y_targetlabel_list_single ,y_logits_array [i ],label_dict ,name = name )
23
+ return label_dict
24
+
25
+ def compute_confuse_matrix (y_targetlabel_list_single ,y_logit_array_single ,label_dict ,name = 'default' ):
26
+ """
27
+ compute true postive(TP), false postive(FP), false negative(FN) given target lable and predict label
28
+ :param y_targetlabel_list: a list. length is batch_size(e.g.1). each element is a multi-hot,like '[0,0,1,0,1,...]'
29
+ :param y_logit_array: an numpy array. shape is:[batch_size,num_classes]
30
+ :param label_dict {label:(TP,FP,FN)}
31
+ :return: macro_f1(a scalar),micro_f1(a scalar)
32
+ """
33
+ #1.get target label and predict label
34
+ y_target_labels = get_target_label_short (y_targetlabel_list_single ) #e.g. y_targetlabel_list[0]=[2,12,88]
35
+ #y_logit=y_logit_array_single #y_logit_array[0] #[202]
36
+ y_predict_labels = [i for i in range (len (y_logit_array_single )) if y_logit_array_single [i ]>= 0.50 ] #TODO 0.5PW e.g.[2,12,13,10]
37
+ if len (y_predict_labels ) < 1 : y_predict_labels = [np .argmax (y_logit_array_single )]
38
+
39
+ #if len(y_predict_labels)<1: y_predict_labels=[np.argmax(y_logit_array_single)] #TODO ADD 2018.05.29
40
+ if random .choice ([x for x in range (random_number )]) == 1 :print (name + ".y_target_labels:" ,y_target_labels ,";y_predict_labels:" ,y_predict_labels ) #debug purpose
41
+
42
+ #2.count number of TP,FP,FN for each class
43
+ y_labels_unique = []
44
+ y_labels_unique .extend (y_target_labels )
45
+ y_labels_unique .extend (y_predict_labels )
46
+ y_labels_unique = list (set (y_labels_unique ))
47
+ for i ,label in enumerate (y_labels_unique ): #e.g. label=2
48
+ TP , FP , FN = label_dict [label ]
49
+ if label in y_predict_labels and label in y_target_labels :#predict=1,truth=1 (TP)
50
+ TP = TP + 1
51
+ elif label in y_predict_labels and label not in y_target_labels :#predict=1,truth=0(FP)
52
+ FP = FP + 1
53
+ elif label not in y_predict_labels and label in y_target_labels :#predict=0,truth=1(FN)
54
+ FN = FN + 1
55
+ label_dict [label ] = (TP , FP , FN )
56
+ return label_dict
57
+
58
+
59
+ def compute_penalty_score_batch (target_deaths , predict_deaths ,target_lifeimprisons , predict_lifeimprisons ,target_imprsions , predict_imprisons ):
60
+ """
61
+ compute penalty score(task 3) for a batch.
62
+ :param target_deaths: a list. each element is a mulit-hot list
63
+ :param predict_deaths: a 2-d array. [batch_size,num_class]
64
+ :param target_lifeimprisons: a list. each element is a mulit-hot list
65
+ :param predict_lifeimprisons: a 2-d array. [batch_size,num_class]
66
+ :param target_imprsions: a list. each element is a mulit-hot list
67
+ :param predict_imprisons: a 2-d array. [batch_size,num_class]
68
+ :return: score_batch: a scalar, average score for that batch
69
+ """
70
+ length = len (target_deaths )
71
+ score_total = 0.0
72
+ for i in range (length ):
73
+ score = compute_penalty_score (target_deaths [i ], predict_deaths [i ], target_lifeimprisons [i ],predict_lifeimprisons [i ],target_imprsions [i ], predict_imprisons [i ])
74
+ score_total = score_total + score
75
+ score_batch = score_total / float (length )
76
+ return score_batch
77
+
78
+ def compute_penalty_score (target_death , predict_death ,target_lifeimprison , predict_lifeimprison ,target_imprsion , predict_imprison ):
79
+ """
80
+ compute penalty score(task 3) for a single data
81
+ :param target_death: a mulit-hot list. e.g. [1,0,0,1,...]
82
+ :param predict_death: [num_class]
83
+ :param target_lifeimprison: a mulit-hot list. e.g. [1,0,0,1,...]
84
+ :param predict_lifeimprison: [num_class]
85
+ :param target_imprsion: a mulit-hot list. e.g. [1,0,0,1,...]
86
+ :param predict_imprison:[num_class]
87
+ :return: score: a scalar,score for this data
88
+ """
89
+ score_death = compute_death_lifeimprisonment_score (target_death , predict_death )
90
+ score_lifeimprisonment = compute_death_lifeimprisonment_score (target_lifeimprison , predict_lifeimprison )
91
+ score_imprisonment = compute_imprisonment_score (target_imprsion , predict_imprison )
92
+ score = ((score_death + score_lifeimprisonment + score_imprisonment )/ 3.0 )* (100.0 )
93
+ return score
94
+
95
+ def compute_death_lifeimprisonment_score (target ,predict ):
96
+ """
97
+ compute score for death or life imprisonment
98
+ :param target: a list
99
+ :param predict: an array
100
+ :return: score: a scalar
101
+ """
102
+
103
+ score = 0.0
104
+ target = np .argmax (target )
105
+ predict = np .argmax (predict )
106
+ if random .choice ([x for x in range (random_number )]) == 1 :print ("death_lifeimprisonment_score.target:" , target , ";predict:" , predict )
107
+ if target == predict :
108
+ score = 1.0
109
+ if random .choice ([x for x in range (random_number )]) == 1 :print ("death_lifeimprisonment_score:" ,score )
110
+ return score
111
+
112
+ def compute_imprisonment_score (target_value ,predict_value ):
113
+ """
114
+ compute imprisonment score
115
+ :param target_value: a scalar
116
+ :param predict_value:a scalar
117
+ :return: score: a scalar
118
+ """
119
+ if random .choice ([x for x in range (random_number )]) == 1 :print ("x.imprisonment_score.target_value:" ,target_value ,";predict_value:" ,predict_value )
120
+ score = 0.0
121
+ v = np .abs (np .log (predict_value + 1.0 )- np .log (target_value + 1.0 ))
122
+ if v <= 0.2 :
123
+ score = 1.0
124
+ elif v <= 0.4 :
125
+ score = 0.8
126
+ elif v <= 0.6 :
127
+ score = 0.6
128
+ elif v <= 0.8 :
129
+ score = 0.4
130
+ elif v <= 1.0 :
131
+ score = 0.2
132
+ else :
133
+ score = 0.0
134
+ if random .choice ([x for x in range (random_number )]) == 1 :print ("imprisonment_score:" ,score )
135
+ return score
136
+
137
+ def compute_micro_macro (label_dict ):
138
+ """
139
+ compute f1 of micro and macro
140
+ :param label_dict:
141
+ :return: f1_micro,f1_macro: scalar, scalar
142
+ """
143
+ f1_micro = compute_f1_micro_use_TFFPFN (label_dict )
144
+ f1_macro = compute_f1_macro_use_TFFPFN (label_dict )
145
+ return f1_micro ,f1_macro
146
+
147
+ def compute_f1_micro_use_TFFPFN (label_dict ):
148
+ """
149
+ compute f1_micro
150
+ :param label_dict: {label:(TP,FP,FN)}
151
+ :return: f1_micro: a scalar
152
+ """
153
+ TF_micro_accusation , FP_micro_accusation , FN_micro_accusation = compute_TF_FP_FN_micro (label_dict )
154
+ f1_micro_accusation = compute_f1 (TF_micro_accusation , FP_micro_accusation , FN_micro_accusation ,'micro' )
155
+ return f1_micro_accusation
156
+
157
+ def compute_f1_macro_use_TFFPFN (label_dict ):
158
+ """
159
+ compute f1_macro
160
+ :param label_dict: {label:(TP,FP,FN)}
161
+ :return: f1_macro
162
+ """
163
+ f1_dict = {}
164
+ num_classes = len (label_dict )
165
+ for label , tuplee in label_dict .items ():
166
+ TP ,FP ,FN = tuplee
167
+ f1_score_onelabel = compute_f1 (TP ,FP ,FN ,'macro' )
168
+ f1_dict [label ]= f1_score_onelabel
169
+ f1_score_sum = 0.0
170
+ for label ,f1_score in f1_dict .items ():
171
+ f1_score_sum = f1_score_sum + f1_score
172
+ f1_score = f1_score_sum / float (num_classes )
173
+ return f1_score
174
+
175
+ #[this function is for debug purpose only]
176
+ def compute_f1_score_write_for_debug (label_dict ,label2index ):
177
+ """
178
+ compute f1 score. basicly you can also use other function to get result
179
+ :param label_dict: {label:(TP,FP,FN)}
180
+ :return: a dict. key is label name, value is f1 score.
181
+ """
182
+ f1score_dict = {}
183
+ # 1. compute f1 score for each accusation.
184
+ for label , tuplee in label_dict .items ():
185
+ TP , FP , FN = tuplee
186
+ f1_score_single = compute_f1 (TP , FP , FN , 'normal_f1_score' )
187
+ accusation_index2label = {kv [1 ]: kv [0 ] for kv in label2index .items ()}
188
+ label_name = accusation_index2label [label ]
189
+ f1score_dict [label_name ]= f1_score_single
190
+
191
+ # 2. each to file system for debug purpose.
192
+ f1score_file = 'debug_accuracy.txt'
193
+ write_object = codecs .open (f1score_file , mode = 'a' , encoding = 'utf-8' )
194
+ write_object .write ("\n \n " )
195
+
196
+ #tuple_list = sorted(f1score_dict.items(), lambda x, y: cmp(x[1], y[1]), reverse=False)
197
+ tuple_list = sorted (f1score_dict .items (), key = lambda x : x [1 ], reverse = False )
198
+
199
+ for tuplee in tuple_list :
200
+ label_name ,f1_score = tuplee
201
+ write_object .write (label_name + ":" + str (f1_score )+ "\n " )
202
+ write_object .close ()
203
+ return f1score_dict
204
+
205
+ def compute_f1 (TP ,FP ,FN ,compute_type ):
206
+ """
207
+ compute f1
208
+ :param TP_micro: number.e.g. 200
209
+ :param FP_micro: number.e.g. 200
210
+ :param FN_micro: number.e.g. 200
211
+ :return: f1_score: a scalar
212
+ """
213
+ precison = TP / (TP + FP + small_value )
214
+ recall = TP / (TP + FN + small_value )
215
+ f1_score = (2 * precison * recall )/ (precison + recall + small_value )
216
+
217
+ if random .choice ([x for x in range (500 )]) == 1 :print (compute_type ,"precison:" ,str (precison ),";recall:" ,str (recall ),";f1_score:" ,f1_score )
218
+
219
+ return f1_score
220
+
221
+ def compute_TF_FP_FN_micro (label_dict ):
222
+ """
223
+ compute micro FP,FP,FN
224
+ :param label_dict_accusation: a dict. {label:(TP, FP, FN)}
225
+ :return:TP_micro,FP_micro,FN_micro
226
+ """
227
+ TP_micro ,FP_micro ,FN_micro = 0.0 ,0.0 ,0.0
228
+ for label ,tuplee in label_dict .items ():
229
+ TP ,FP ,FN = tuplee
230
+ TP_micro = TP_micro + TP
231
+ FP_micro = FP_micro + FP
232
+ FN_micro = FN_micro + FN
233
+ return TP_micro ,FP_micro ,FN_micro
234
+
235
+ def init_label_dict (num_classes ):
236
+ """
237
+ init label dict. this dict will be used to save TP,FP,FN
238
+ :param num_classes:
239
+ :return: label_dict: a dict. {label_index:(0,0,0)}
240
+ """
241
+ label_dict = {}
242
+ for i in range (num_classes ):
243
+ label_dict [i ]= (0 ,0 ,0 )
244
+ return label_dict
245
+
246
+ def get_target_label_short (y_mulitihot ):
247
+ """
248
+ get target label.
249
+ :param y_mulitihot: [0,0,1,0,1,0,...]
250
+ :return: taget_list.e.g. [3,5,100]
251
+ """
252
+ taget_list = [];
253
+ for i , element in enumerate (y_mulitihot ):
254
+ if element == 1 :
255
+ taget_list .append (i )
256
+ return taget_list
0 commit comments