-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmain.py
263 lines (224 loc) · 11.3 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
'''
Main program file.
func:
- train_ppo
- train_td3
- test
- baseline
'''
import os, shutil
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandapower as pp
import pandapower.timeseries as ts
from pandapower.timeseries.data_sources.frame_data import DFData
from pandapower.timeseries.output_writer import OutputWriter
import utils
from cigre_mv_microgrid import create_cigre_mv_microgrid
from controllers.baseline_controller import RandomControl, SimpleControl
from controllers.td3_controller import TD3Agent
from controllers.ppo_controller import PPOAgent
from setting import *
def train_ppo(n_runs, n_epochs, start, train_length, pv_profile, wt_profile, load_profile, price_profile,
sequence_model_type='none', noise_type='action'):
# env
assert(start >= 0 and start < pv_profile.shape[0])
assert(train_length >= 0 and train_length <= pv_profile.shape[0] - start)
time_steps = range(start, start + train_length)
pv_ds = DFData(pv_profile.iloc[start: start+train_length])
wt_ds = DFData(wt_profile.iloc[start: start+train_length])
load_ds = DFData(load_profile.iloc[start: start+train_length])
# history
history_dir = os.path.join('.', 'history', 'train', 'PPO')
if os.path.isdir(history_dir):
shutil.rmtree(history_dir)
# run
ep_return_list = np.zeros((n_runs, n_epochs))
ep_cost_list = np.zeros((n_runs, n_epochs))
for run in range(n_runs):
net, ids = create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds)
agent = PPOAgent(net, ids, pv_profile, wt_profile, load_profile, price_profile, sequence_model_type)
best_cost = train_length * MAX_COST
for epoch in range(n_epochs):
agent.training = True
ts.run_timeseries(net, time_steps=time_steps, continue_on_divergence=False)
ep_return_list[run, epoch] = np.sum(agent.rewards)
np.save(os.path.join('.', 'plot', 'ep_return_list.npy'), ep_return_list)
ep_cost_list[run, epoch] = np.sum(agent.costs)
np.save(os.path.join('.', 'plot', 'ep_cost_list.npy'), ep_cost_list)
print(f'Run: {run + 1}, epoch: {epoch + 1}, return = {ep_return_list[run, epoch]:.3f}, cost = {ep_cost_list[run, epoch]:.3f}')
# history & best models
if epoch >= 20:
cost = np.sum(agent.costs)
if cost < best_cost or (epoch % 20 == 0):
# log history
dir = os.path.join(history_dir,str(run+1), 'best_avg_cost')
if not os.path.isdir(dir):
os.makedirs(dir)
pd.DataFrame(agent.history).to_csv(os.path.join(dir, f'[{epoch}]_cost{cost:.3f}.csv'))
# save best cost model
if cost < best_cost:
agent.save(run)
best_cost = cost
agent.reset()
# plot
print(f'Epoch return: \n {np.mean(ep_return_list, axis=0)}')
print(f'Epoch cost: \n {np.mean(ep_cost_list, axis=0)}')
utils.plot_ep_values(ep_return_list, train_length, n_epochs, ylabel='Return')
utils.plot_ep_values(ep_cost_list, train_length, n_epochs, ylabel='Cost')
def train_td3(n_runs, n_epochs, start, train_length, pv_profile, wt_profile, load_profile, price_profile,
verbose=True, sequence_model_type='rnn', use_pretrained_sequence_model=False,
noise_type='action', retrain=False, run=1):
# env
assert(start >= 0 and start < pv_profile.shape[0])
assert(train_length >= 0 and train_length <= pv_profile.shape[0] - start)
time_steps = range(start, start + train_length)
pv_ds = DFData(pv_profile.iloc[start: start+train_length])
wt_ds = DFData(wt_profile.iloc[start: start+train_length])
load_ds = DFData(load_profile.iloc[start: start+train_length])
# history
history_dir = os.path.join('.', 'history', 'train', 'TD3')
if os.path.isdir(history_dir):
shutil.rmtree(history_dir)
# run
ep_return_list = np.zeros((n_runs, n_epochs))
ep_cost_list = np.zeros((n_runs, n_epochs))
for run in range(n_runs):
net, ids = create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds)
agent = TD3Agent(net, ids, pv_profile, wt_profile, load_profile, price_profile,
training=True, n_epochs=n_epochs,
sequence_model_type=sequence_model_type, use_pretrained_sequence_model=use_pretrained_sequence_model,
buffer_size=BUFFER_SIZE, noise_type=noise_type, batch_size=BATCH_SIZE)
if retrain:
agent.load_models(run=run)
# run
best_cost = train_length * MAX_COST
for epoch in range(n_epochs):
# train
agent.training = True
ts.run_timeseries(net, time_steps=time_steps, verbose=verbose, continue_on_divergence=False)
ep_return_list[run, epoch] = np.sum(agent.rewards)
ep_cost_list[run, epoch] = np.sum(agent.costs)
print(f'Run: {run + 1}, episode: {epoch + 1}, return = {ep_return_list[run, epoch]:.3f}, cost = {ep_cost_list[run, epoch]:.3f}')
# agent.reset()
# test
# agent.training = False
# ts.run_timeseries(net, time_steps=time_steps, verbose=verbose, continue_on_divergence=False)
test_cost = np.sum(agent.costs)
if (epoch >= 20) and ((epoch % 20 == 0) or test_cost < best_cost):
# log history
dir = os.path.join(history_dir,str(run+1), 'best_avg_cost')
if not os.path.isdir(dir):
os.makedirs(dir)
pd.DataFrame(agent.history).to_csv(os.path.join(dir, f'[{epoch}]_cost{ep_cost_list[run, epoch]:.3f}.csv'))
# save best cost model
# if test_cost < best_cost:
# agent.save_models(run=run+1)
# best_cost = test_cost
agent.reset()
# plot
print(f'Episode return: \n {np.mean(ep_return_list, axis=0)}')
print(f'Episode cost: \n {np.mean(ep_cost_list, axis=0)}')
utils.plot_ep_values(ep_return_list, train_length, n_epochs, ylabel='Return')
utils.plot_ep_values(ep_cost_list, train_length, n_epochs, ylabel='Cost')
def test(n_runs, start, test_length, pv_profile, wt_profile, load_profile, price_profile, run, sequence_model_type='rnn', log=False, log_path=None):
assert(start >= 0 and start < pv_profile.shape[0])
assert(test_length >= 0 and test_length <= pv_profile.shape[0] - start)
time_steps=range(start, start+test_length)
# env
pv_ds = DFData(pv_profile.iloc[start: start+test_length])
wt_ds = DFData(wt_profile.iloc[start: start+test_length])
load_ds = DFData(load_profile.iloc[start: start+test_length])
net, ids = create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds)
# log pf results
if log:
n_runs = 1
ow = OutputWriter(net, time_steps, output_path=log_path, output_file_type='.csv', csv_separator=',')
ow.log_variable('res_sgen', 'p_mw')
ow.log_variable('res_load', 'p_mw')
ow.log_variable('res_storage', 'p_mw')
ow.log_variable('res_trafo', 'p_lv_mw', index=[ids['trafo0']])
# agent
agent = PPOAgent(net, ids, pv_profile, wt_profile, load_profile, price_profile, sequence_model_type, training=False)
agent.load(run)
# agent = TD3Agent(net, ids, pv_profile, wt_profile, load_profile, price_profile,
# training=False, sequence_model_type=sequence_model_type)
# agent.load_models(run=run)
# run
ep_cost_list = []
for _ in range(n_runs):
ts.run_timeseries(net, time_steps=time_steps, verbose=False, continue_on_divergence=False)
ep_cost_list.append(np.sum(agent.costs))
agent.reset()
print(f'Avg cost = {np.mean(ep_cost_list)}')
def baseline(n_runs, start, test_length, pv_profile, wt_profile, load_profile, price_profile, Control, log=False, log_path=None):
assert(start >= 0 and start < pv_profile.shape[0])
assert(test_length >= 0 and test_length <= pv_profile.shape[0] - start)
time_steps=range(start, start+test_length)
# env
pv_ds = DFData(pv_profile.iloc[start: start+test_length])
wt_ds = DFData(wt_profile.iloc[start: start+test_length])
load_ds = DFData(load_profile.iloc[start: start+test_length])
net, ids = create_cigre_mv_microgrid(pv_ds, wt_ds, load_ds)
# log pf results
if log:
n_runs = 1
ow = OutputWriter(net, time_steps, output_path=log_path, output_file_type='.csv', csv_separator=',')
ow.log_variable('res_sgen', 'p_mw')
ow.log_variable('res_load', 'p_mw')
ow.log_variable('res_storage', 'p_mw')
ow.log_variable('res_trafo', 'p_lv_mw', index=[ids['trafo0']])
# controller
controller = Control(net, ids, price_profile=price_profile)
# run
ep_cost_list = []
for _ in range(n_runs):
ts.run_timeseries(net, time_steps=time_steps, continue_on_divergence=False, verbose=True)
ep_cost_list.append(np.sum(controller.costs))
controller.reset()
print(f'Avg cost = {np.mean(ep_cost_list)}')
if __name__ == '__main__':
# --- configurations ---
logging.basicConfig(level=logging.INFO)
algo = 'ppo'
sequence_model_type = 'rnn' # ['none', 'conv1d', 'rnn', 'transformer']
sequence_length = 1 if (sequence_model_type == 'none') else SEQ_LENGTH
# train configs
n_train_runs = 10
n_epochs = 500
train_start = 0
train_length = 30 * 24
noise_type = 'action' # ['action', 'param']
use_pretrained_sequence_model = False
# test configs
n_test_runs = 1
# test_start = train_start + train_length
# test_length = 7 * 24
test_start = train_start
test_length = train_length
log = True
log_path = os.path.join('.', 'pf_res', algo, sequence_model_type)
log_path_baseline = os.path.join('.', 'pf_res', 'baseline', 'simple')
# --- profile ---
pv_profile = pd.read_csv('./data/profile/pv_profile.csv')
wt_profile = pd.read_csv('./data/profile/wt_profile.csv')
load_profile = pd.read_csv('./data/profile/load_profile.csv')
price_profile = pd.read_csv('./data/profile/price_profile.csv')
# --- train, test ---
train_ppo(n_train_runs, n_epochs, train_start, train_length,
pv_profile, wt_profile, load_profile, price_profile, sequence_model_type)
# train_td3(n_runs=n_train_runs, n_epochs=n_epochs, start=train_start, train_length=train_length,
# pv_profile=pv_profile, wt_profile=wt_profile, load_profile=load_profile, price_profile=price_profile,
# sequence_model_type=sequence_model_type, use_pretrained_sequence_model=use_pretrained_sequence_model, noise_type=noise_type)
# test(n_runs=n_test_runs, start=test_start, test_length=test_length,
# pv_profile=pv_profile, wt_profile=wt_profile, load_profile=load_profile, price_profile=price_profile,
# run=0, sequence_model_type=sequence_model_type, log=log, log_path=log_path)
# baseline(n_runs=n_test_runs, start=test_start, test_length=test_length,
# pv_profile=pv_profile, wt_profile=wt_profile, load_profile=load_profile, price_profile=price_profile,
# Control=SimpleControl,
# log=log, log_path=log_path_baseline)
# --- plot pf results ---
# utils.plot_pf_results(log_path, test_start, test_length)
# utils.plot_pf_results(dir=log_path_baseline)