-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patheval_baseline.py
113 lines (85 loc) · 3.91 KB
/
eval_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from simulator import CampaignSimulatorEnv
from baselines import HumanPolicy, GPUCBPolicyWithSlidingWindow
import pandas as pd
import wandb
import configparser
if __name__ == '__main__':
# Read the config file
config = configparser.ConfigParser()
try:
config.read('./configpolicy.ini')
except Exception as e:
print('Error reading the config file', e)
# Num months to simulate the campaign
num_month = config.getint('DEFAULT', 'num_month')
use_wandb = config.getboolean('DEFAULT', 'use_wandb')
data_channel = config['DEFAULT']['data_channel']
policy_name = config['BASELINE']['baseline_name']
exploration_strategy = config['BASELINE']['exploration_strategy']
adaptation_strategy = config['BASELINE']['adaptation_strategy']
if data_channel == "google":
advertiser_id = config['DEFAULT']['campaign_name']
# Load from the config file to initialize the simulator
data_simulator = pd.read_csv(config['DEFAULT']['data_google'])
data_policy = data_simulator
if data_channel == "smn":
data = pd.read_csv(config['DEFAULT']['data_smn'], sep='\t')
# Filter data as per specified advertiser ID
advertiser_id = config['DEFAULT']['advertiser_id']
data_simulator = data[data['advertiser_id'] == int(advertiser_id)]
data_policy = data_simulator
if data_channel == "criterio":
advertiser_id = config['DEFAULT']['campaign_name']
data_simulator = pd.read_csv(config['DEFAULT']['data_criterio'])
data_policy = data_simulator
else:
RuntimeError("Invalid data channel")
start_date_simulator = data_simulator['date'].min()
end_date_simulator = data_simulator['date'].max()
# Initialize the environment
env = CampaignSimulatorEnv(data_simulator, config, False)
policy = None
if policy_name == 'Human_Policy':
# Initialize the baseline policy
policy = HumanPolicy(data_simulator)
if policy_name == 'GPUCBBaseline_Policy':
policy = GPUCBPolicyWithSlidingWindow(data_policy, config, exploration_strategy, adaptation_strategy)
if use_wandb:
wandb.init(project='Campaign Simulator New', name=policy_name+'-'+exploration_strategy+'-'+adaptation_strategy+'-'+advertiser_id+'-'+data_channel,
group=adaptation_strategy)
# Save the configuration parameters to wandb
wandb.config.update({section: dict(config[section]) for section in config.sections()})
cummilative_reward = 0
cummilative_regret = 0
cummilative_cost = 0
cummilative_cpc = 0
for i in range(num_month):
print(f'Simulating month {i+1}')
observation = env.reset()
# set policy parameters as per env data
policy.reset(env)
days = env.current_days
for day in range(days):
if policy_name == 'Human_Policy':
action = policy.get_action(env.current_date, env.campaigns)
# print(f'action ======== {action} ======== {optimal_expected}')
else:
action, _ = policy.get_action(observation, day)
observation, reward, regret, done, _ , info = env.step(action)
for campaign in observation.keys():
# Add the latest cost incurred to cummulative cost
cummilative_cost += observation[campaign][0][-1]
cpc = cummilative_cost/cummilative_reward if cummilative_reward > 0 else 0
cummilative_reward += reward
cummilative_regret += regret
cummilative_cpc += cpc
if use_wandb:
wandb.log({
'reward': reward,
'regret': regret,
'cumulative_reward': cummilative_reward,
'cumulative_regret': cummilative_regret,
'cumulative_cpc': cummilative_cpc
})
if done:
break