-
Notifications
You must be signed in to change notification settings - Fork 7
/
value_functions.py
42 lines (31 loc) · 1.31 KB
/
value_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import torch
import torch.nn as nn
from util import mlp
class TwinQ(nn.Module):
def __init__(self, state_dim, action_dim, hidden_dim=256, n_hidden=2):
super().__init__()
dims = [state_dim + action_dim, *([hidden_dim] * n_hidden), 1]
self.q1 = mlp(dims, squeeze_output=True)
self.q2 = mlp(dims, squeeze_output=True)
def both(self, state, action):
sa = torch.cat([state, action], 1)
return self.q1(sa), self.q2(sa)
def forward(self, state, action):
return torch.min(*self.both(state, action))
class ValueFunction(nn.Module):
def __init__(self, state_dim, hidden_dim=256, n_hidden=2):
super().__init__()
dims = [state_dim, *([hidden_dim] * n_hidden), 1]
self.v = mlp(dims, squeeze_output=True)
def forward(self, state):
return self.v(state)
class TwinV(nn.Module):
def __init__(self, state_dim, layer_norm=False, hidden_dim=256, n_hidden=2):
super().__init__()
dims = [state_dim, *([hidden_dim] * n_hidden), 1]
self.v1 = mlp(dims, layer_norm=layer_norm, squeeze_output=True)
self.v2 = mlp(dims, layer_norm=layer_norm, squeeze_output=True)
def both(self, state):
return self.v1(state), self.v2(state)
def forward(self, state):
return torch.min(*self.both(state))