-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheva.py
executable file
·99 lines (88 loc) · 2.7 KB
/
eva.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# -*- coding: utf-8 -*-
# @Time : 2021/7/3 21:49
import numpy as np
from sklearn.metrics import roc_auc_score
def recall(rank, ground_truth, N):
return len(set(rank[:N]) & set(ground_truth)) / float(len(set(ground_truth)))
def precision_at_k(r, k):
"""Score is precision @ k
Relevance is binary (nonzero is relevant).
Returns:
Precision @ k
Raises:
ValueError: len(r) must be >= k
"""
assert k >= 1
r = np.asarray(r)[:k]
return np.mean(r)
def average_precision(r,cut):
"""Score is average precision (area under PR curve)
Relevance is binary (nonzero is relevant).
Returns:
Average precision
"""
r = np.asarray(r)
out = [precision_at_k(r, k + 1) for k in range(cut) if r[k]]
if not out:
return 0.
return np.sum(out)/float(min(cut, np.sum(r)))
def mean_average_precision(rs):
"""Score is mean average precision
Relevance is binary (nonzero is relevant).
Returns:
Mean average precision
"""
return np.mean([average_precision(r) for r in rs])
def dcg_at_k(r, k, method=1):
"""Score is discounted cumulative gain (dcg)
Relevance is positive real values. Can use binary
as the previous methods.
Returns:
Discounted cumulative gain
"""
r = np.asfarray(r)[:k]
if r.size:
if method == 0:
return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
elif method == 1:
return np.sum(r / np.log2(np.arange(2, r.size + 2)))
else:
raise ValueError('method must be 0 or 1.')
return 0.
def ndcg_at_k(r, k, ground_truth, method=1):
"""Score is normalized discounted cumulative gain (ndcg)
Relevance is positive real values. Can use binary
as the previous methods.
Returns:
Normalized discounted cumulative gain
Low but correct defination
"""
GT = set(ground_truth)
if len(GT) > k :
sent_list = [1.0] * k
else:
sent_list = [1.0]*len(GT) + [0.0]*(k-len(GT))
dcg_max = dcg_at_k(sent_list, k, method)
if not dcg_max:
return 0.
return dcg_at_k(r, k, method) / dcg_max
def recall_at_k(r, k, all_pos_num):
r = np.asfarray(r)[:k]
return np.sum(r) / all_pos_num
def hit_at_k(r, k):
r = np.array(r)[:k]
if np.sum(r) > 0:
return 1.
else:
return 0.
def F1(pre, rec):
if pre + rec > 0:
return (2.0 * pre * rec) / (pre + rec)
else:
return 0.
def auc(ground_truth, prediction):
try:
res = roc_auc_score(y_true=ground_truth, y_score=prediction)
except Exception:
res = 0.
return res