-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmultistepfwd.py
90 lines (62 loc) · 2.62 KB
/
multistepfwd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import sys
sys.path.insert(1, '/home/home3/abef/research/control/LinearizingStateRepresentation/')
from lib.restartable_pendulum import RestartablePendulumEnv
from lib import state_rep_torch as srt
import gym
import numpy as np
from matplotlib import pyplot as plt
import itertools
import sys
import torch
def main():
for arg in sys.argv:
if arg.startswith('--job='):
i = int(arg.split('--job=')[1])
# specify environment information
env = RestartablePendulumEnv(repeats=3,pixels=True)
# specify training details to loop over
archs = [[50],[100],[50,50],[100,100]] # specifies the fully-connected layers of the encoder
traj_lens = [10] # specifies trajectory length
lrs = [.0001, .0005, .001, .005] # learning rate
param_lists = [archs, traj_lens, lrs]
tup = list(itertools.product(*param_lists))[i]
tup = [[50,50], 7,.001] # this just hardcodes the hyperparameters
parameters = {
"n_episodes" :80000,
"batch_size" : 25, # was 50...
"learning_rate" : tup[2],
"widths" : tup[0],
"traj_len" : tup[1],
}
layers = parameters["widths"]
T = parameters["traj_len"]
save_path = "./multifwd"
n_episodes = parameters["n_episodes"]
batch_size = parameters["batch_size"]
learning_rate = parameters["learning_rate"]
save_every = int(n_episodes/5)
encnet = srt.ConvEncoderNet(layers,env.observation_space.shape[1:])
prednet = srt.MultiStepForward(encnet,T,layers[-1],1)
traj_sampler = srt.TrajectorySampler(env,
srt.sample_pendulum_action_batch,
srt.sample_pendulum_state_batch_old,
T,
device=torch.device("cpu"))
net, losses = srt.train_encoder(prednet,traj_sampler,n_episodes,
batch_size=batch_size,
track_loss_every=int(n_episodes/100),
lr=learning_rate,
save_every=save_every,
save_path=save_path)
torch.save(net,save_path+"net")
# save the training params
with open(save_dir + "train_params.txt","w") as f:
for tup in parameters.items():
f.write(" ".join([str(v) for v in tup]))
f.write("\n")
np.savetxt(save_dir+"losses.txt",np.array(losses))
plt.plot(losses)
plt.savefig(save_dir + "losses.png")
plt.clf()
if __name__ == '__main__':
main()