forked from AnyLeoPeace/DURLECA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreward_func.py
100 lines (70 loc) · 4.36 KB
/
reward_func.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import numpy as np
'''Reward Func'''
def get_reward_func_dict(args):
print('Using reward func',args.reward_func)
def ac_power_exp_reward_func(SIRH, OD, current_m, accumlated_m, accumlated_d):
h = SIRH[:,3].mean()
accumlated_ratio = accumlated_m / (accumlated_d + 1e-7)
ri = args.lamda * (np.exp(h/args.H0) - 1) # norm to 0~1 (when h < H0)
rm = np.power(accumlated_ratio, args.L0).mean() # norm to 0~1
r = (args.base_score + rm - ri)/args.total_divide
return r
def ac_exp_reward_func(SIRH, OD, current_m, accumlated_m, accumlated_d):
# Note, here we count mobility as a cost. And accumlated_d is fixed as the mean out at each region.
h = SIRH[:,3].mean()
accumlated_ratio = accumlated_m / (accumlated_d + 1e-7) / args.L0
ri = args.lamda * (np.exp(h/args.H0) - 1)# norm to 0~1 (when h < H0)
rm = 1 - np.exp(accumlated_ratio).mean() # norm to 0~1
r = (args.base_score + rm - ri)/args.total_divide
return r
def ac_exp_weight_avg_reward_func(SIRH, OD, current_m, accumlated_m, accumlated_d):
# Note, here we count mobility as a cost. And accumlated_d is fixed as the mean out at each region.
h = SIRH[:,3].mean()
current_lost = OD.sum(0).sum(-1) - current_m.sum(0).sum(-1)
current_ratio = current_lost[:,np.newaxis] / (accumlated_d + 1e-7)
accumlated_ratio = (accumlated_m - current_lost[:,np.newaxis]) / args.mobility_decay / (accumlated_d + 1e-7) / args.L0
ri = args.lamda * (np.exp(h/args.H0) - 1)# norm to 0~1 (when h < H0)
rm = - (np.exp(accumlated_ratio) * current_ratio).mean() # norm to 0~1
r = (args.base_score + rm - ri)/args.total_divide
return r
def ac_power_weight_avg_reward_func(SIRH, OD, current_m, accumlated_m, accumlated_d):
# Note, here we count mobility as a cost. And accumlated_d is fixed as the mean out at each region.
h = SIRH[:,3].mean()
current_lost = OD.sum(0).sum(-1) - current_m.sum(0).sum(-1)
current_ratio = current_lost[:,np.newaxis] / (accumlated_d + 1e-7)
accumlated_ratio = (accumlated_m - current_lost[:,np.newaxis]) / args.mobility_decay / (accumlated_d + 1e-7) / args.L0
ri = args.lamda * (np.exp(h/args.H0) - 1)# norm to 0~1 (when h < H0)
rm = - (np.power(accumlated_ratio, 2) * current_ratio).mean() # norm to 0~1
r = (args.base_score + rm - ri)/args.total_divide
return r
def ac_exp_weight_norm_reward_func(SIRH, OD, current_m, accumlated_m, accumlated_d):
# Note, here we count mobility as a cost. And accumlated_d is fixed as the mean out at each region.
OD_all = OD.sum(0).sum(-1)
h = SIRH[:,3].mean()
current_lost = OD_all - current_m.sum(0).sum(-1)
current_ratio = current_lost[:,np.newaxis] / (OD_all + 1e-7)
accumlated_ratio = (accumlated_m - current_lost[:,np.newaxis])/ args.mobility_decay / (accumlated_d + 1e-7) / args.L0
ri = args.lamda * (np.exp(h/args.H0) - 1)# norm to 0~1 (when h < H0)
rm = - (np.exp(accumlated_ratio) * current_ratio).mean() # norm to 0~1
r = (args.base_score + rm - ri)/args.total_divide
return r
def ac_power_weight_norm_reward_func(SIRH, OD, current_m, accumlated_m, accumlated_d):
# Note, here we count mobility as a cost. And accumlated_d is fixed as the mean out at each region.
OD_all = OD.sum(0).sum(-1)
h = SIRH[:,3].mean()
current_lost = OD_all - current_m.sum(0).sum(-1)
current_ratio = current_lost[:,np.newaxis] / (OD_all + 1e-7)
accumlated_ratio = (accumlated_m - current_lost[:,np.newaxis])/ args.mobility_decay / (accumlated_d + 1e-7) / args.L0
ri = args.lamda * (np.exp(h/args.H0) - 1)# norm to 0~1 (when h < H0)
rm = - (np.power(accumlated_ratio, 2) * current_ratio).mean() # norm to 0~1
r = (args.base_score + rm - ri)/args.total_divide
return r
reward_func_dict = {
'ac_exp_norm':ac_exp_reward_func,
'ac_power_exp_norm':ac_power_exp_reward_func,
'ac_exp_weight_avg': ac_exp_weight_avg_reward_func,
'ac_power_weight_norm': ac_power_weight_norm_reward_func,
'ac_power_weight_avg': ac_power_weight_avg_reward_func,
'ac_exp_weight_norm': ac_exp_weight_norm_reward_func,
}
return reward_func_dict