Skip to content

Commit

Permalink
fix issue #20
Browse files Browse the repository at this point in the history
  • Loading branch information
uidilr committed Nov 14, 2018
1 parent 1dc3c34 commit 0bdee5a
Show file tree
Hide file tree
Showing 9 changed files with 14 additions and 15 deletions.
Binary file modified algo/__pycache__/ppo.cpython-36.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion algo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, Policy, Old_Policy, gamma=0.95, clip_value=0.2, c_1=1, c_2=0.
tf.summary.scalar('total', loss)

self.merged = tf.summary.merge_all()
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4, epsilon=1e-5)
optimizer = tf.train.AdamOptimizer(learning_rate=5e-5, epsilon=1e-5)
self.gradients = optimizer.compute_gradients(loss, var_list=pi_trainable)
self.train_op = optimizer.minimize(loss, var_list=pi_trainable)

Expand Down
Binary file not shown.
Binary file not shown.
11 changes: 5 additions & 6 deletions run_gail.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,35 +36,34 @@ def main(args):
sess.run(tf.global_variables_initializer())

obs = env.reset()
reward = 0 # do NOT use rewards to update policy
success_num = 0

for iteration in range(args.iteration):
observations = []
actions = []
# do NOT use rewards to update policy
rewards = []
v_preds = []
run_policy_steps = 0
while True:
run_policy_steps += 1
obs = np.stack([obs]).astype(dtype=np.float32) # prepare to feed placeholder Policy.obs

act, v_pred = Policy.act(obs=obs, stochastic=True)

act = np.asscalar(act)
v_pred = np.asscalar(v_pred)
next_obs, reward, done, info = env.step(act)

observations.append(obs)
actions.append(act)
rewards.append(reward)
v_preds.append(v_pred)

next_obs, reward, done, info = env.step(act)

if done:
v_preds_next = v_preds[1:] + [0] # next state of terminate state has 0 state value
next_obs = np.stack([next_obs]).astype(dtype=np.float32) # prepare to feed placeholder Policy.obs
_, v_pred = Policy.act(obs=next_obs, stochastic=True)
v_preds_next = v_preds[1:] + [np.asscalar(v_pred)]
obs = env.reset()
reward = -1
break
else:
obs = next_obs
Expand Down
16 changes: 8 additions & 8 deletions run_ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,13 @@ def main(args):
writer = tf.summary.FileWriter(args.logdir, sess.graph)
sess.run(tf.global_variables_initializer())
obs = env.reset()
reward = 0
success_num = 0

for iteration in range(args.iteration):
observations = []
actions = []
v_preds = []
rewards = []
v_preds = []
episode_length = 0
while True: # run policy RUN_POLICY_STEPS which is much less than episode length
episode_length += 1
Expand All @@ -46,17 +45,18 @@ def main(args):
act = np.asscalar(act)
v_pred = np.asscalar(v_pred)

next_obs, reward, done, info = env.step(act)

observations.append(obs)
actions.append(act)
v_preds.append(v_pred)
rewards.append(reward)

next_obs, reward, done, info = env.step(act)
v_preds.append(v_pred)

if done:
v_preds_next = v_preds[1:] + [0] # next state of terminate state has 0 state value
next_obs = np.stack([next_obs]).astype(dtype=np.float32) # prepare to feed placeholder Policy.obs
_, v_pred = Policy.act(obs=next_obs, stochastic=True)
v_preds_next = v_preds[1:] + [np.asscalar(v_pred)]
obs = env.reset()
reward = -1
break
else:
obs = next_obs
Expand All @@ -78,7 +78,7 @@ def main(args):
gaes = PPO.get_gaes(rewards=rewards, v_preds=v_preds, v_preds_next=v_preds_next)

# convert list to numpy array for feeding tf.placeholder
observations = np.reshape(observations, newshape=[-1] + list(ob_space.shape))
observations = np.reshape(observations, newshape=(-1,) + ob_space.shape)
actions = np.array(actions).astype(dtype=np.int32)
gaes = np.array(gaes).astype(dtype=np.float32)
gaes = (gaes - gaes.mean()) / gaes.std()
Expand Down
Binary file modified trained_models/ppo/model.ckpt.data-00000-of-00001
Binary file not shown.
Binary file modified trained_models/ppo/model.ckpt.index
Binary file not shown.
Binary file modified trained_models/ppo/model.ckpt.meta
Binary file not shown.

0 comments on commit 0bdee5a

Please sign in to comment.