-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGridworlds.py
118 lines (109 loc) · 3.06 KB
/
Gridworlds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
'''
File name: Gridworlds.py
Author: Eosandra Grund
Date created: 18.07.2022
Date last modified: 20.07.2022
Python Version: 3.10.4
'''
class Gridworlds:
"""
Contains some default gridworld for creating a Grid object.
### Visualization in comments:
s = starting state
X = barrier
int = value / reward of the state
10 = terminal state
### keys in the dictionary:
x_dim (int>0) : x dimension of gridworld
y_dim (int>0) : y dimension of gridworld
epsilon (0<float<1) : for epsilon-greedy state transition function
start [x,y] = starting state of agent for each episode
terminal [x,y] = terminal state with a positive reward
neg_rewards [[x,y,reward],[x,y,reward],...] = list of fields with negative rewards
barrier [[x,y],[x,y],...] = list of fields that are barriers
"""
GRIDWORLD0 = {
"x_dim" : 5,
"y_dim" : 5,
"epsilon" : 0.1,
"start" : [0,0],
"terminal" : [4,0],
"neg_reward" : [[0,4,-1],[2,1,-1],[4,4,-1]],
"barrier" :[[1,2],[2,0],[2,3]]
}
""" GRIDWORLD0
# s 0 X 0 10 |
# 0 0 -1 0 0 V
# 0 X 0 0 0 y
# 0 0 X 0 0
# -1 0 0 0 -1
-> x dimension
"""
GRIDWORLD1 = {
"x_dim" : 3,
"y_dim" : 3,
"epsilon" : 0.1,
"start" : [0,2],
"terminal" : [2,2],
"neg_reward" : [],
"barrier" :[[1,2],[1,1]]
}
"""GRIDWORLD1
# 0 0 0 |
# 0 X 0 V
# s X 10 y
-> x dimension
"""
GRIDWORLD2 = {
"x_dim" : 8,
"y_dim" : 8,
"epsilon" : 0.1,
"start" : [0,0],
"terminal" : [6,6],
"neg_reward" : [[0,4,-1],[5,6,-1],[5,7,-1]],
"barrier" :[[1,1],[2,2],[3,3],[4,4],[5,5],[2,4],[3,0],[4,2],[5,1],[6,1]]
}
"""GRIDWORLD2
# s 0 0 X 0 0 0 0
# 0 X 0 0 0 X X 0
# 0 0 X 0 X 0 0 0
# 0 0 0 X 0 0 0 0
# -1 0 X 0 X 0 0 0
# 0 0 0 0 0 X 0 0
# 0 0 0 0 0 -1 10 0
# 0 0 0 0 0 -1 0 0
-> x dimension
"""
GRIDWORLD3 = {
"x_dim" : 3,
"y_dim" : 3,
"epsilon" : 0.1,
"start" : [0,2],
"terminal" : [2,0],
"neg_reward" : [[0,0,-3]],
"barrier" :[]
}
"""GRIDWORLD3
# -3 0 10 |
# 0 0 0 V
# s 0 0 y
-> x dimension
"""
GRIDWORLD4 = {
"x_dim" : 4,
"y_dim" : 4,
"epsilon" : 0.1,
"start" : [1,1],
"terminal" : [2,0],
"neg_reward" : [[1,2,-1]],
"barrier" :[[1,0],[2,1],[2,2]]
}
"""GRIDWORLD4
# 0 X 10 0 |
# 0 s X 0 V
# 0 -1 X 0 y
# 0 0 0 0
-> x dimension
"""
GRIDWORLD = [GRIDWORLD0, GRIDWORLD1, GRIDWORLD2, GRIDWORLD3, GRIDWORLD4]
"""to access the gridworlds via number (list of all the gridworlds)"""