forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloss_functions.py
477 lines (382 loc) · 17.4 KB
/
loss_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
import numpy as np
def binary_cross_entropy(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
) -> float:
"""
Calculate the mean binary cross-entropy (BCE) loss between true labels and predicted
probabilities.
BCE loss quantifies dissimilarity between true labels (0 or 1) and predicted
probabilities. It's widely used in binary classification tasks.
BCE = -Σ(y_true * ln(y_pred) + (1 - y_true) * ln(1 - y_pred))
Reference: https://en.wikipedia.org/wiki/Cross_entropy
Parameters:
- y_true: True binary labels (0 or 1)
- y_pred: Predicted probabilities for class 1
- epsilon: Small constant to avoid numerical instability
>>> true_labels = np.array([0, 1, 1, 0, 1])
>>> predicted_probs = np.array([0.2, 0.7, 0.9, 0.3, 0.8])
>>> binary_cross_entropy(true_labels, predicted_probs)
0.2529995012327421
>>> true_labels = np.array([0, 1, 1, 0, 1])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
>>> binary_cross_entropy(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
y_pred = np.clip(y_pred, epsilon, 1 - epsilon) # Clip predictions to avoid log(0)
bce_loss = -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
return np.mean(bce_loss)
def binary_focal_cross_entropy(
y_true: np.ndarray,
y_pred: np.ndarray,
gamma: float = 2.0,
alpha: float = 0.25,
epsilon: float = 1e-15,
) -> float:
"""
Calculate the mean binary focal cross-entropy (BFCE) loss between true labels
and predicted probabilities.
BFCE loss quantifies dissimilarity between true labels (0 or 1) and predicted
probabilities. It's a variation of binary cross-entropy that addresses class
imbalance by focusing on hard examples.
BCFE = -Σ(alpha * (1 - y_pred)**gamma * y_true * log(y_pred)
+ (1 - alpha) * y_pred**gamma * (1 - y_true) * log(1 - y_pred))
Reference: [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf)
Parameters:
- y_true: True binary labels (0 or 1).
- y_pred: Predicted probabilities for class 1.
- gamma: Focusing parameter for modulating the loss (default: 2.0).
- alpha: Weighting factor for class 1 (default: 0.25).
- epsilon: Small constant to avoid numerical instability.
>>> true_labels = np.array([0, 1, 1, 0, 1])
>>> predicted_probs = np.array([0.2, 0.7, 0.9, 0.3, 0.8])
>>> binary_focal_cross_entropy(true_labels, predicted_probs)
0.008257977659239775
>>> true_labels = np.array([0, 1, 1, 0, 1])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
>>> binary_focal_cross_entropy(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
# Clip predicted probabilities to avoid log(0)
y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
bcfe_loss = -(
alpha * (1 - y_pred) ** gamma * y_true * np.log(y_pred)
+ (1 - alpha) * y_pred**gamma * (1 - y_true) * np.log(1 - y_pred)
)
return np.mean(bcfe_loss)
def categorical_cross_entropy(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
) -> float:
"""
Calculate categorical cross-entropy (CCE) loss between true class labels and
predicted class probabilities.
CCE = -Σ(y_true * ln(y_pred))
Reference: https://en.wikipedia.org/wiki/Cross_entropy
Parameters:
- y_true: True class labels (one-hot encoded)
- y_pred: Predicted class probabilities
- epsilon: Small constant to avoid numerical instability
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1], [0.0, 0.1, 0.9]])
>>> categorical_cross_entropy(true_labels, pred_probs)
0.567395975254385
>>> true_labels = np.array([[1, 0], [0, 1]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same shape.
>>> true_labels = np.array([[2, 0, 1], [1, 0, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: y_true must be one-hot encoded.
>>> true_labels = np.array([[1, 0, 1], [1, 0, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.0], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: y_true must be one-hot encoded.
>>> true_labels = np.array([[1, 0, 0], [0, 1, 0]])
>>> pred_probs = np.array([[0.9, 0.1, 0.1], [0.2, 0.7, 0.1]])
>>> categorical_cross_entropy(true_labels, pred_probs)
Traceback (most recent call last):
...
ValueError: Predicted probabilities must sum to approximately 1.
"""
if y_true.shape != y_pred.shape:
raise ValueError("Input arrays must have the same shape.")
if np.any((y_true != 0) & (y_true != 1)) or np.any(y_true.sum(axis=1) != 1):
raise ValueError("y_true must be one-hot encoded.")
if not np.all(np.isclose(np.sum(y_pred, axis=1), 1, rtol=epsilon, atol=epsilon)):
raise ValueError("Predicted probabilities must sum to approximately 1.")
y_pred = np.clip(y_pred, epsilon, 1) # Clip predictions to avoid log(0)
return -np.sum(y_true * np.log(y_pred))
def hinge_loss(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate the mean hinge loss for between true labels and predicted probabilities
for training support vector machines (SVMs).
Hinge loss = max(0, 1 - true * pred)
Reference: https://en.wikipedia.org/wiki/Hinge_loss
Args:
- y_true: actual values (ground truth) encoded as -1 or 1
- y_pred: predicted values
>>> true_labels = np.array([-1, 1, 1, -1, 1])
>>> pred = np.array([-4, -0.3, 0.7, 5, 10])
>>> hinge_loss(true_labels, pred)
1.52
>>> true_labels = np.array([-1, 1, 1, -1, 1, 1])
>>> pred = np.array([-4, -0.3, 0.7, 5, 10])
>>> hinge_loss(true_labels, pred)
Traceback (most recent call last):
...
ValueError: Length of predicted and actual array must be same.
>>> true_labels = np.array([-1, 1, 10, -1, 1])
>>> pred = np.array([-4, -0.3, 0.7, 5, 10])
>>> hinge_loss(true_labels, pred)
Traceback (most recent call last):
...
ValueError: y_true can have values -1 or 1 only.
"""
if len(y_true) != len(y_pred):
raise ValueError("Length of predicted and actual array must be same.")
if np.any((y_true != -1) & (y_true != 1)):
raise ValueError("y_true can have values -1 or 1 only.")
hinge_losses = np.maximum(0, 1.0 - (y_true * y_pred))
return np.mean(hinge_losses)
def huber_loss(y_true: np.ndarray, y_pred: np.ndarray, delta: float) -> float:
"""
Calculate the mean Huber loss between the given ground truth and predicted values.
The Huber loss describes the penalty incurred by an estimation procedure, and it
serves as a measure of accuracy for regression models.
Huber loss =
0.5 * (y_true - y_pred)^2 if |y_true - y_pred| <= delta
delta * |y_true - y_pred| - 0.5 * delta^2 otherwise
Reference: https://en.wikipedia.org/wiki/Huber_loss
Parameters:
- y_true: The true values (ground truth)
- y_pred: The predicted values
>>> true_values = np.array([0.9, 10.0, 2.0, 1.0, 5.2])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> np.isclose(huber_loss(true_values, predicted_values, 1.0), 2.102)
True
>>> true_labels = np.array([11.0, 21.0, 3.32, 4.0, 5.0])
>>> predicted_probs = np.array([8.3, 20.8, 2.9, 11.2, 5.0])
>>> np.isclose(huber_loss(true_labels, predicted_probs, 1.0), 1.80164)
True
>>> true_labels = np.array([11.0, 21.0, 3.32, 4.0])
>>> predicted_probs = np.array([8.3, 20.8, 2.9, 11.2, 5.0])
>>> huber_loss(true_labels, predicted_probs, 1.0)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
huber_mse = 0.5 * (y_true - y_pred) ** 2
huber_mae = delta * (np.abs(y_true - y_pred) - 0.5 * delta)
return np.where(np.abs(y_true - y_pred) <= delta, huber_mse, huber_mae).mean()
def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate the mean squared error (MSE) between ground truth and predicted values.
MSE measures the squared difference between true values and predicted values, and it
serves as a measure of accuracy for regression models.
MSE = (1/n) * Σ(y_true - y_pred)^2
Reference: https://en.wikipedia.org/wiki/Mean_squared_error
Parameters:
- y_true: The true values (ground truth)
- y_pred: The predicted values
>>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> np.isclose(mean_squared_error(true_values, predicted_values), 0.028)
True
>>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
>>> mean_squared_error(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
squared_errors = (y_true - y_pred) ** 2
return np.mean(squared_errors)
def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculates the Mean Absolute Error (MAE) between ground truth (observed)
and predicted values.
MAE measures the absolute difference between true values and predicted values.
Equation:
MAE = (1/n) * Σ(abs(y_true - y_pred))
Reference: https://en.wikipedia.org/wiki/Mean_absolute_error
Parameters:
- y_true: The true values (ground truth)
- y_pred: The predicted values
>>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> np.isclose(mean_absolute_error(true_values, predicted_values), 0.16)
True
>>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> np.isclose(mean_absolute_error(true_values, predicted_values), 2.16)
False
>>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 5.2])
>>> mean_absolute_error(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
return np.mean(abs(y_true - y_pred))
def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Calculate the mean squared logarithmic error (MSLE) between ground truth and
predicted values.
MSLE measures the squared logarithmic difference between true values and predicted
values for regression models. It's particularly useful for dealing with skewed or
large-value data, and it's often used when the relative differences between
predicted and true values are more important than absolute differences.
MSLE = (1/n) * Σ(log(1 + y_true) - log(1 + y_pred))^2
Reference: https://insideaiml.com/blog/MeanSquared-Logarithmic-Error-Loss-1035
Parameters:
- y_true: The true values (ground truth)
- y_pred: The predicted values
>>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2])
>>> mean_squared_logarithmic_error(true_values, predicted_values)
0.0030860877925181344
>>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
>>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2])
>>> mean_squared_logarithmic_error(true_labels, predicted_probs)
Traceback (most recent call last):
...
ValueError: Input arrays must have the same length.
"""
if len(y_true) != len(y_pred):
raise ValueError("Input arrays must have the same length.")
squared_logarithmic_errors = (np.log1p(y_true) - np.log1p(y_pred)) ** 2
return np.mean(squared_logarithmic_errors)
def mean_absolute_percentage_error(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-15
) -> float:
"""
Calculate the Mean Absolute Percentage Error between y_true and y_pred.
Mean Absolute Percentage Error calculates the average of the absolute
percentage differences between the predicted and true values.
Formula = (Σ|y_true[i]-Y_pred[i]/y_true[i]|)/n
Source: https://stephenallwright.com/good-mape-score/
Parameters:
y_true (np.ndarray): Numpy array containing true/target values.
y_pred (np.ndarray): Numpy array containing predicted values.
Returns:
float: The Mean Absolute Percentage error between y_true and y_pred.
Examples:
>>> y_true = np.array([10, 20, 30, 40])
>>> y_pred = np.array([12, 18, 33, 45])
>>> mean_absolute_percentage_error(y_true, y_pred)
0.13125
>>> y_true = np.array([1, 2, 3, 4])
>>> y_pred = np.array([2, 3, 4, 5])
>>> mean_absolute_percentage_error(y_true, y_pred)
0.5208333333333333
>>> y_true = np.array([34, 37, 44, 47, 48, 48, 46, 43, 32, 27, 26, 24])
>>> y_pred = np.array([37, 40, 46, 44, 46, 50, 45, 44, 34, 30, 22, 23])
>>> mean_absolute_percentage_error(y_true, y_pred)
0.064671076436071
"""
if len(y_true) != len(y_pred):
raise ValueError("The length of the two arrays should be the same.")
y_true = np.where(y_true == 0, epsilon, y_true)
absolute_percentage_diff = np.abs((y_true - y_pred) / y_true)
return np.mean(absolute_percentage_diff)
def perplexity_loss(
y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = 1e-7
) -> float:
"""
Calculate the perplexity for the y_true and y_pred.
Compute the Perplexity which useful in predicting language model
accuracy in Natural Language Processing (NLP.)
Perplexity is measure of how certain the model in its predictions.
Perplexity Loss = exp(-1/N (Σ ln(p(x)))
Reference:
https://en.wikipedia.org/wiki/Perplexity
Args:
y_true: Actual label encoded sentences of shape (batch_size, sentence_length)
y_pred: Predicted sentences of shape (batch_size, sentence_length, vocab_size)
epsilon: Small floating point number to avoid getting inf for log(0)
Returns:
Perplexity loss between y_true and y_pred.
>>> y_true = np.array([[1, 4], [2, 3]])
>>> y_pred = np.array(
... [[[0.28, 0.19, 0.21 , 0.15, 0.15],
... [0.24, 0.19, 0.09, 0.18, 0.27]],
... [[0.03, 0.26, 0.21, 0.18, 0.30],
... [0.28, 0.10, 0.33, 0.15, 0.12]]]
... )
>>> perplexity_loss(y_true, y_pred)
5.0247347775367945
>>> y_true = np.array([[1, 4], [2, 3]])
>>> y_pred = np.array(
... [[[0.28, 0.19, 0.21 , 0.15, 0.15],
... [0.24, 0.19, 0.09, 0.18, 0.27],
... [0.30, 0.10, 0.20, 0.15, 0.25]],
... [[0.03, 0.26, 0.21, 0.18, 0.30],
... [0.28, 0.10, 0.33, 0.15, 0.12],
... [0.30, 0.10, 0.20, 0.15, 0.25]],]
... )
>>> perplexity_loss(y_true, y_pred)
Traceback (most recent call last):
...
ValueError: Sentence length of y_true and y_pred must be equal.
>>> y_true = np.array([[1, 4], [2, 11]])
>>> y_pred = np.array(
... [[[0.28, 0.19, 0.21 , 0.15, 0.15],
... [0.24, 0.19, 0.09, 0.18, 0.27]],
... [[0.03, 0.26, 0.21, 0.18, 0.30],
... [0.28, 0.10, 0.33, 0.15, 0.12]]]
... )
>>> perplexity_loss(y_true, y_pred)
Traceback (most recent call last):
...
ValueError: Label value must not be greater than vocabulary size.
>>> y_true = np.array([[1, 4]])
>>> y_pred = np.array(
... [[[0.28, 0.19, 0.21 , 0.15, 0.15],
... [0.24, 0.19, 0.09, 0.18, 0.27]],
... [[0.03, 0.26, 0.21, 0.18, 0.30],
... [0.28, 0.10, 0.33, 0.15, 0.12]]]
... )
>>> perplexity_loss(y_true, y_pred)
Traceback (most recent call last):
...
ValueError: Batch size of y_true and y_pred must be equal.
"""
vocab_size = y_pred.shape[2]
if y_true.shape[0] != y_pred.shape[0]:
raise ValueError("Batch size of y_true and y_pred must be equal.")
if y_true.shape[1] != y_pred.shape[1]:
raise ValueError("Sentence length of y_true and y_pred must be equal.")
if np.max(y_true) > vocab_size:
raise ValueError("Label value must not be greater than vocabulary size.")
# Matrix to select prediction value only for true class
filter_matrix = np.array(
[[list(np.eye(vocab_size)[word]) for word in sentence] for sentence in y_true]
)
# Getting the matrix containing prediction for only true class
true_class_pred = np.sum(y_pred * filter_matrix, axis=2).clip(epsilon, 1)
# Calculating perplexity for each sentence
perp_losses = np.exp(np.negative(np.mean(np.log(true_class_pred), axis=1)))
return np.mean(perp_losses)
if __name__ == "__main__":
import doctest
doctest.testmod()