-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAI gym.py
51 lines (39 loc) · 1.47 KB
/
AI gym.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gym
import tensorflow as tf
import numpy as np
import matplotlib as pl
import os
# TODO: Load an environment
env = gym.make("CartPole-v1")
print(env.observation_space)
print(env.action_space)
# TODO Make a random agent
games_to_play = 10
for i in range(games_to_play):
# Reset the environment
obs = env.reset()
episode_rewards = 0
done = False
while not done:
# Render the environment so we can watch
env.render()
# Choose a random action
action = env.action_space.sample()
# Take a step in the environment with the chosen action
obs, reward, done, info = env.step(action)
episode_rewards += reward
# Print episode total rewards when done
print(episode_rewards)
# Close the environment
#env.close()
#TODO Build the policy gradient neural network
class Agent:
def__init__(self,num_action,state_size):
initializer = tf.contrib.layers.xavier_initializer()
self.input_layer = tf.placeholder(dtype=tf.float32, shape=[None, state_size])
hidden_layer = tf.layers.dense(self.input_layer, 8, activation=tf.nn.relu, kernel_initializer=initializer)
hidden_layer_2 = tf.layers.dense(hidden_layer, 8, activation=tf.nn.relu, kernel_initializer=initializer)
# Output of neural net
out = tf.layers.dense(hidden_layer_2, num_actions, activation=None)
self.outputs = tf.nn.softmax(out)
self.choice = tf.argmax(self.outputs, axis=1)