forked from HuskyKingdom/NTHU_DRL_HW2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path112065506_hw2_test.py
93 lines (77 loc) · 3.05 KB
/
112065506_hw2_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2
import numpy as np
from collections import deque
class model(nn.Module):
def __init__(self, num_inputs, num_actions):
super(model, self).__init__()
# Define the convolutional layers
self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
self.conv3 = nn.Conv2d(64, 64, 3, stride=1)
# Flatten the output for fully connected layers
self.flatten = nn.Flatten()
# Define the fully connected layers
self.linear = nn.Linear(3136, 512)
self.fc = nn.Linear(512, num_actions)
def forward(self, x):
# Forward pass through the network
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = self.flatten(x)
x = self.linear(x)
return self.fc(x)
class Agent:
def __init__(self):
# Initialize observation dimension and action dimension
self.obs_dim = 4
self.action_dim = 12
# Create an instance of the model
self.Q = model(self.obs_dim, self.action_dim)
# Path to the model file
self.model_path = "112065506_hw2_data"
# Load the model
self.Q.load_state_dict(torch.load(self.model_path, map_location=torch.device('cpu')))
# Initialize frame count and number of frames to stack
self.frame_count = 0
self.num_stack = 4
# Initialize deque to store frames
self.frames = deque(maxlen=self.num_stack)
# Define the shape of the frame
self.shape = (84, 84)
# Initialize the last action taken
self.last_action = 1
def GrayScale(self, observation):
# Convert RGB observation to grayscale
observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
return observation
def Resize(self, observation):
# Resize the observation to the specified shape
observation = cv2.resize(observation, self.shape[::-1], interpolation=cv2.INTER_AREA)
if observation.ndim == 2:
observation = np.expand_dims(observation, -1)
return observation
def preprocess(self, observation):
# Preprocess the observation
observation = self.Resize(observation)
observation = self.GrayScale(observation)
return observation
def act(self, obs):
# Preprocess the observation
obs = self.preprocess(obs)
# Add the observation to frames
self.frames.append(obs)
# When there are four observations in frames, combine them into one frame
if len(self.frames) == 4:
obs_stack = np.stack(self.frames, axis=0)
obs_stack = np.expand_dims(obs_stack, axis=0)
obs_stack = torch.tensor(obs_stack, dtype=torch.float32)
# Clear frames
self.frames = []
# Use the frame for prediction
action = self.Q(obs_stack)
self.last_action = np.argmax(action.detach().numpy())
return self.last_action