-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathworker.py
105 lines (92 loc) · 3.63 KB
/
worker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import ray
import numpy as np
@ray.remote
class Worker:
def __init__(self, method, loss=None, it_local=None, batch_size=1):
self.loss = loss
self.prox_skip, self.sprox_skip, self.vr_prox_skip = False, False, False
if method == 'prox_skip':
self.prox_skip = True
elif method == 'sprox_skip':
self.sprox_skip = True
elif method == 'vr_prox_skip':
self.vr_prox_skip = True
else:
raise ValueError(f'Unknown method {method}!')
self.it_local = it_local
self.batch_size = batch_size
self.c = None
self.h = None
self.rng_skip = np.random.default_rng(42) # random number generator for random synchronizations
self.rng_mskip = np.random.default_rng(45)
self.rng_vr_skip = np.random.default_rng(50)
def run_local(self, x, lr):
self.x = x * 1.
if self.prox_skip:
self.run_prox_skip(lr)
elif self.sprox_skip:
self.run_sprox_skip(lr)
elif self.vr_prox_skip:
self.run_vr_prox_skip(lr)
return self.x
def run_prox_skip(self, lr):
p = 1 / self.it_local
kappa = L / l2
# p = 1 / np.sqrt(kappa)
if self.h is None:
# first iteration
self.h = self.x * 0. # initialize zero vector of the same dimension
else:
# update the gradient estimate
self.h += p / lr * (self.x - self.x_before_averaing)
# since all workers use the same random seed, this number is the same for all of them
it_local = self.rng_skip.geometric(p=p)
for i in range(it_local):
g = self.loss.gradient(self.x)
self.x -= lr * (g - self.h)
self.x_before_averaing = self.x * 1.
def run_sprox_skip(self, lr):
p = 1 / self.it_local
kappa = L / l2
# p = 1 / np.sqrt(kappa)
if self.h is None:
# first iteration
self.h = self.x * 0. # initialize zero vector of the same dimension
else:
# update the gradient estimate
self.h += p / lr * (self.x - self.x_before_averaing)
it_local = self.rng_skip.geometric(p=p)
for i in range(it_local):
g = self.loss.stochastic_gradient(self.x, batch_size=self.batch_size)
self.x -= lr * (g - self.h)
self.x_before_averaing = self.x * 1.
def run_vr_prox_skip(self, lr):
""" Variance reduced ProxSkip.
lr: learning rate
"""
lr = lr / 6
kappa = L / l2
# p = 1 / np.sqrt(kappa)
q = 2 / kappa
p = 1 / self.it_local
if self.h is None:
# first iteration
self.h = self.x * 0. # initialize zero vector of the same dimension
self.y = copy.deepcopy(self.x)
else:
# update the gradient estimate
self.h += p / lr * (self.x - self.x_before_averaing)
mchoice = np.random.choice(2, 1, p=[q, 1 - q])
if not mchoice:
self.y = copy.deepcopy(self.x)
it_local = self.rng_skip.geometric(
p=p) # since all workers use the same random seed, this number is the same for all of them
for i in range(it_local):
g_x = self.loss.stochastic_gradient(self.x, batch_size=self.batch_size)
g_y = self.loss.stochastic_gradient(self.y, batch_size=self.batch_size)
full_g_y = self.loss.gradient(self.y)
g = g_x - g_y + full_g_y
self.x -= lr * (g - self.h)
self.x_before_averaing = self.x * 1.
def get_control_var(self):
return self.c