-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathalgorithm_3.py
executable file
·140 lines (110 loc) · 5.92 KB
/
algorithm_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import numpy as np
from functions import identify_nearest_centroid_for_multiple_tables
from algorithm_2 import algorithm_2
'''
for testing
import numpy as np
import numpy
numpy.set_printoptions(threshold=numpy.nan)
from double_Q import DQPID
from algorithm_2 import algorithm_2
from algorithm_4 import algorithm_4
from algorithm_3 import algorithm_3
Q_index = 0
from collections import deque
Q_arrange = deque()
Q_arrange.append(DQPID(np.array([[0., 0.]]),0.,1.,[0.05, 0.06, 0.05, 0.05, 0.06, 0.05],3., 7., 0., 0. ))
Q_arrange, Q_index = algorithm_3(Q_arrange,np.array([0, 0, 0, 0.05, 0.06, 0.05, 0.05, 0.06, 0.05]),[0.1, 0.2],1.,'A')
Q_arrange, Q_index = algorithm_3(Q_arrange,np.array([0, 0, 0, 0.05, 0.06, 0.05, 0.05, 0.06, 0.05]),[0.05, 0.05],1.,'A')
'''
def algorithm_3(Q_arrange,Mt,next_state,reward,flag_ab):
# get infor from memory
state_index = Mt[0].astype(int)
action_index = Mt[1].astype(int)
Q_index = Mt[2].astype(int)
action = Mt[3:,]
# I look for close centroids
next_Q_index = Q_index
next_state_index, min_distance_to_centroid = Q_arrange[next_Q_index].identify_nearest_centroid(next_state)
delta = Q_arrange[next_Q_index].max_state
if (np.abs(min_distance_to_centroid) <= np.abs(delta)): # if current state is inside the centroid
l_depth = Q_arrange[next_Q_index].depth
h_new = Q_arrange[next_Q_index].h
# I get the maximum depth achieved so far
L_depth_vector = np.zeros(len(Q_arrange))
for _ in range(len(Q_arrange)):
L_depth_vector[_] = Q_arrange[_].depth
L_max = np.max(L_depth_vector)
if (l_depth < L_max) and len(Q_arrange)>1:
# if I have several objects and I am not currently on the highest achieved depth, I will go and look for an object
# in a higher depth
stop_flag = False
while stop_flag == False and l_depth<L_max:
# increase depth and check the distance
l_depth = l_depth + 1
temp_min_distance_to_centroid, temp_state_index, temp_Q_index = identify_nearest_centroid_for_multiple_tables(Q_arrange,l_depth,next_state)
if (np.abs(temp_min_distance_to_centroid) <= np.abs(Q_arrange[temp_Q_index].max_state)):
# if the current state fist inside the centroid of Higher depth, I save temporarlily
# But I do not stop the look, I will look in a higher depth still
next_state_index = temp_state_index
next_Q_index = temp_Q_index
#fixme
#state_index = temp_state_index
#temp_l_depth = Q_arrange[Q_index].depth
h_new = Q_arrange[temp_Q_index].h
stop_flag = False
else:
# if when I looked in a higher depth the state is not in that centroid, I go back one level and
# I just keep the centroid I found before
l_depth = l_depth - 1
h_new = h_new
next_Q_index = next_Q_index
next_state_index = next_state_index
stop_flag = True
else:
# I update what I have
next_Q_index = next_Q_index
next_state_index = next_state_index
# update Q
if flag_ab=='A':
Q_B_max_next_value = np.max(Q_arrange[next_Q_index].Q_B[next_state_index])
Q_arrange[Q_index].update_Q(state_index,action_index,reward,Q_B_max_next_value,flag_ab)
else:
Q_A_max_next_value = np.max(Q_arrange[next_Q_index].Q_A[next_state_index])
Q_arrange[Q_index].update_Q(state_index,action_index,reward,Q_A_max_next_value,flag_ab)
else:
# if the current state is outside the centroid
# I will have to go backwards until I find one
l_depth = Q_arrange[Q_index].depth
h_new = Q_arrange[Q_index].h
stop_flag = False
while stop_flag == False:
l_depth = l_depth -1
# I look for centroids in lower dephts
if l_depth > 0:
temp_min_distance_to_centroid, temp_state_index, temp_Q_index = identify_nearest_centroid_for_multiple_tables(Q_arrange,l_depth,next_state)
else:
l_depth = 1 # bug catcher, means deapth cant be smaller than 1
temp_min_distance_to_centroid, temp_state_index, temp_Q_index = identify_nearest_centroid_for_multiple_tables(Q_arrange,l_depth,next_state)
if (np.abs(temp_min_distance_to_centroid) <= Q_arrange[temp_Q_index].max_state) and (l_depth > 1):
# if the state is inside the lower centroid, that's good, I just keep this one
stop_flag = True
next_state_index = temp_state_index
next_Q_index = temp_Q_index
# update Q
if flag_ab=='A':
Q_B_max_next_value = np.max(Q_arrange[next_Q_index].Q_B[next_state_index])
Q_arrange[Q_index].update_Q(state_index,action_index,reward,Q_B_max_next_value,flag_ab)
else:
Q_A_max_next_value = np.max(Q_arrange[next_Q_index].Q_A[next_state_index])
Q_arrange[Q_index].update_Q(state_index,action_index,reward,Q_A_max_next_value,flag_ab)
elif l_depth == 1:
# if the depth is the lowest one, I have no option but to use algorithm 2 to see if there is a centroid for that state and to update it
l_new = 1
Q_index = 0
next_Q_index = 0
h_new = Q_arrange[Q_index].h
Q_arrange[Q_index] = algorithm_2(Q_arrange[Q_index], state_index,next_state,reward,action_index, flag_ab)
next_Q_index = Q_index
stop_flag = True
return Q_arrange, Q_index, next_Q_index