-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
62 lines (53 loc) · 2.45 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from unityagents import UnityEnvironment
import numpy as np
import random
import torch
from collections import deque
from ddpg_agent import Agent
import matplotlib.pyplot as plt
#env = UnityEnvironment(file_name='Reacher_Linux_NoVis/Reacher.x86_64')
env = UnityEnvironment(file_name='Reacher_Linux_NoVis_20/Reacher.x86_64')
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
agent = Agent(state_size=33, action_size=4, random_seed=1)
def ddpg(n_episodes=3000, max_t=2000, print_every=100):
scores_deque = deque(maxlen=print_every)
scores = []
for i_episode in range(1, n_episodes+1):
env_info = env.reset(train_mode=True)[brain_name]
states = env_info.vector_observations
agent.reset()
score = np.zeros(len(env_info.agents))
#for t in range(max_t):
while True:
actions = agent.act(states)
env_info = env.step(actions)[brain_name]
next_states = env_info.vector_observations
rewards = env_info.rewards
dones = env_info.local_done
agent.step(states, actions, rewards, next_states, dones)
score += rewards # update the score (for each agent)
states = next_states # roll over states to next time step
if np.any(dones): # exit loop if episode finished
break
scores_deque.append(np.mean(score))
scores.append(np.mean(score))
print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)), end="")
torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
if i_episode % print_every == 0:
print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
if np.mean(scores_deque)>=30.0:
print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_deque)))
torch.save(agent.actor_local.state_dict(), 'checkpoint_actor.pth')
torch.save(agent.critic_local.state_dict(), 'checkpoint_critic.pth')
break
return scores
scores = ddpg()
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig("score.png")