-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheoc.py
117 lines (80 loc) · 3.1 KB
/
eoc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""Compute EOC (taken from the GitHub of the article).
https://github.com/soufiane001/impact_of_init_and_activation/blob/master/eoc_curves.py
"""
import argparse
import numpy as np
# Defining some activation functions and their derivatives
def relu(x):
"""Implement ReLU activation function."""
return x*(x > 0)
def relu_dash(x):
"""Implement ReLU derivative."""
return (x > 0).astype(int)
def sigmoid(x):
"""Implement signmoid activation function."""
return 1 / (1 + np.exp(-x))
def ssoftplus(x):
"""Implement softplus activation function."""
return np.log(1 + np.exp(x)) - np.log(2)
def tanh(x):
"""Implement tanh activation function."""
return np.tanh(x)
def tanh_dash(x):
"""Implement tanh derivative."""
return 4/(np.exp(x) + np.exp(-x))**2
def tanh_2dash(x):
"""Implement tanh derivative."""
return 8*(-np.exp(x) + np.exp(-x))/(np.exp(-x) + np.exp(x))**3
def elu(x):
"""Implement ELU activation function."""
return x*(x > 0) + (np.exp(x)-1) * (x <= 0)
def elu_dash(x):
"""Implement ELU derivative."""
return (x > 0) + np.exp(x) * (x <= 0)
def get_eoc(act, act_dash, sigma_bs, N):
"""Compute sigma_w on the EOC given sigma_b.
Parameters:
-----------
act : callable
Activation function
act : callable
Activation derivative
sigma_bs : float or float array of shape (n,)
Standard deviation of the bias
N : int
Number of samples to draw to do the computation
Returns:
--------
np.array of shape (n, 3)
Contains (sigma_b, sigma_w, q) for each of the input sigma_b
"""
# Simulate gaussian variables for mean calculations
z1 = np.random.randn(N)
eoc = []
if not isinstance(sigma_bs, list):
sigma_bs = [sigma_bs]
for sigma in sigma_bs:
q = 0
for _ in range(200):
q = sigma**2 + np.mean(act(np.sqrt(q)*z1)**2)/np.mean(act_dash(np.sqrt(q)*z1)**2)
eoc.append([sigma, 1/np.sqrt(np.mean(act_dash(np.sqrt(q)*z1)**2)), q])
return np.squeeze(np.array(eoc))
def get_eoc_by_name(act_name, sigma_bs, N):
"""Compute same values as get_eoc but by specifying activation name."""
if act_name == 'relu':
return get_eoc(relu, relu_dash, sigma_bs, N)
if act_name == 'elu':
return get_eoc(elu, elu_dash, sigma_bs, N)
raise ValueError(f'Unknown activation function {act_name}')
if __name__ == '__main__':
# An example of computation
parser = argparse.ArgumentParser(description='fig1')
parser.add_argument('--sigb', type=float, default=0, help='Sigma bias')
parser.add_argument('--act', type=str, default='elu', help='Which activation to use')
parser.add_argument('--rs', type=int, default=0, help='Random state')
parser.add_argument('--n', type=int, default=500000, help='Number of samples to drawn for the computation')
args = parser.parse_args()
np.random.seed(args.rs)
eoc = get_eoc_by_name(args.act, args.sigb, args.n)
print(f'EOC for sigma_b={args.sigb} and act="{args.act}"" is:')
print(f'\tsigma_w={eoc[1]:.3f}')