-
Notifications
You must be signed in to change notification settings - Fork 0
/
UNI_MULTI_LSTM.py
315 lines (256 loc) · 12.2 KB
/
UNI_MULTI_LSTM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
"""This code integrated from Tensorflow's own webpage exercise by KUTAY DÖNMEZ"""
"""Let's Use ERA5 data for Samsun Merkez Between 2017-2018 with 2 years of data"""
"""And Predict One step forward Temperature with Univariate and Multivariate model"""
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import matplotlib as mpl
import os
#Preparing data
data = xr.open_dataset(r'era5_2017_2018.nc')
data_temp = xr.open_dataset(r'ERA5_2MTEMP_2017_2018.nc')
#equate the variables
evaporation = data['e']
precipitation = data['tp']
temperature = data_temp['t2m']
#interpolate and make data 1d with time series dependent only
#41.344167|36.256389| samsun bölge 17030
lat = 41.344167
lon = 36.256389
p = precipitation.interp(latitude = lat, longitude = lon).values * 1000 # mm unit
e = evaporation.interp(latitude = lat, longitude = lon).values * 1000 # mm unit
t = temperature.interp(latitude = lat, longitude = lon).values - 273.15 # Celsius unit
#Prepare the dates
dates = data['time'].values
#now build a pandas dataset
instance = {'Date':dates,
'Evap':e,
'Temp':t,
'Prec':p}
pd_data = pd.DataFrame(data=instance, )
pd_data.index = pd_data['Date']
pd_data = pd_data.drop(columns='Date')
"""This code builded with the very help of the tensorflow's exercise"""
class lstm():
"""Defining class in order to build easy univariate LSTM"""
def __init__(self):
self = self
def prepare_data(self, data, target, init_index, finit_index, size_unit_window_history,
size_unit_window_target, step, single_step, univariate):
"""Returns data as prepared according to keras univariate lstm input"""
self.data = [] #X
self.labels = [] #Y
self.finit_index = finit_index
#decide initial index to be used with respect to history size
self.init_index = init_index + size_unit_window_history
#check if finit index is passed, if not decide it as the last index
#that is going to predict the target.
if self.finit_index == None:
self.finit_index = len(data) - size_unit_window_target
#loop to feed the windows with according data and label
for i in range(self.init_index, self.finit_index):
self.indices = range(i-size_unit_window_history, i, step)
#chechk if univariate or multivariate
if univariate == True:
self.data.append(np.reshape(data[self.indices], (size_unit_window_history, 1)))
else:
self.data.append(data[self.indices])
if single_step == True:
self.labels.append(target[i+size_unit_window_target])
else:
self.labels.append(target[i:i+size_unit_window_target])
return np.array(self.data), np.array(self.labels)
def standardize_data(self, data, TRAIN_SPLIT):
"""Standardizing the data"""
self.train_mean = data[:TRAIN_SPLIT].mean()
self.train_std = data[:TRAIN_SPLIT].std()
self.data = (data - self.train_mean)/ self.train_std
return self.data
def train_val_to_tfdata(self, x_train, y_train, x_val, y_val,
BATCH_SIZE, BUFFER_SIZE):
"""Returns more optimized train and validation Data"""
self.train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
self.train_data = self.train_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
self.val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val))
self.val_data = self.val_data.cache().batch(BATCH_SIZE).repeat()
return self.train_data, self.val_data
def check_single_window_shape(self, train_data):
"""Input Prepared data, and check the single window size"""
print ('Single window of past history : {}'.format(self.train_data[0].shape))
def build_lstm_model(self, train_data, units, target_size, single_step):
"""Building LSTM model, checking if single step desired or multistep desired"""
self.target_size = target_size
if single_step == True:
self.target_size = 1
model = tf.keras.models.Sequential()
if single_step == True:
model.add(tf.keras.layers.LSTM(units, input_shape = train_data.shape[-2:]) )
elif single_step == False:
model.add(tf.keras.layers.LSTM(units, return_sequences=True, input_shape = train_data.shape[-2:]) )
if single_step == True:
model.add(tf.keras.layers.Dense(self.target_size))
elif single_step == False:
model.add(tf.keras.layers.Dense(self.target_size))
model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='mae')
return model
def fit_lstm_model(self, model, train_data, val_data, evaluation_interval, epochs, val_step):
"""Fit the model to the data"""
self.model = model
self.history = self.model.fit(train_data, epochs=epochs, steps_per_epoch=evaluation_interval,
validation_data = val_data, validation_steps=val_step )
return self.history
def create_time_steps(self, length):
return list(range(-length, 0))
def show_plot(self, plot_data, delta, title):
self.labels = ['History', 'True Future', 'Model Prediction']
self.marker = ['.-', 'rx', 'go']
self.time_steps = self.create_time_steps(plot_data[0].shape[0])
if delta:
self.future = delta
else:
self.future = 0
plt.title(title)
for i, x in enumerate(plot_data):
if i:
plt.plot(self.future, plot_data[i], self.marker[i], markersize=10,
label=self.labels[i])
else:
plt.plot(self.time_steps, plot_data[i].flatten(), self.marker[i], label=self.labels[i])
plt.legend()
plt.xlim([self.time_steps[0], (self.future+5)*2])
plt.xlabel('Time-Step')
return plt
#Building Univariate lstm
TRAIN_SPLIT = int(17520 * 80 / 100)
temp_dt = pd_data['Temp'].values
#Start model instance
p = lstm()
std_temp = p.standardize_data(data = temp_dt, TRAIN_SPLIT=TRAIN_SPLIT)
univariate_past_history = 20
univariate_future_target = 0
#prepare data for input to model
x_train_uni, y_train_uni = p.prepare_data(std_temp, std_temp, 0, TRAIN_SPLIT,
univariate_past_history,
univariate_future_target, step=1,
single_step=True, univariate=True)
x_val_uni, y_val_uni = p.prepare_data(std_temp, std_temp, TRAIN_SPLIT, None,
univariate_past_history,
univariate_future_target, step=1,
single_step=True, univariate=True)
#print ('Single window of past history')
#print (x_train_uni[0])
#print ('\n Target temperature to predict')
#print (y_train_uni[0])
#look at the real history
#p.show_plot([x_train_uni[1], y_train_uni[1]], 0, 'Sample Example')
BATCH_SIZE = 256
BUFFER_SIZE = 10000
#numpy data to tf data
tf_train, tf_val = p.train_val_to_tfdata(x_train_uni, y_train_uni,
x_val_uni, y_val_uni, BATCH_SIZE,
BUFFER_SIZE)
#define lstm model
model = p.build_lstm_model(x_train_uni, 32, 1, True)
for x, y in tf_val.take(1):
print(model.predict(x).shape)
EVALUATION_INTERVAL = 200
EPOCHS = 10
#let's fit the model
p.fit_lstm_model(model, tf_train, tf_val, EVALUATION_INTERVAL,
EPOCHS, val_step=50)
#buradaki -1 , 256 batchden birisinin trainde tahmin edilmiş y sini göstermekte -1'i değiştirebilirsin
#make prediction using validation data
for x, y in tf_val.take(2):
plot = p.show_plot([x[10].numpy(), y[10].numpy(),
model.predict(x)[10]], 0, 'Simple LSTM model')
plot.show()
#We can train the model by also using numpy arrays instead of TF dataset
model.predict(np.reshape(x_val_uni[0], (1,20,1))) # 3d giriş olmalı numpy ile
#History
#Train for 200 steps, validate for 50 steps
#Epoch 1/10
#200/200 [==============================] - 11s 54ms/step - loss: 0.2227 - val_loss: 0.0876
#Epoch 2/10
#200/200 [==============================] - 5s 27ms/step - loss: 0.0766 - val_loss: 0.0648
#Epoch 3/10
#200/200 [==============================] - 5s 27ms/step - loss: 0.0640 - val_loss: 0.0493
#Epoch 4/10
#200/200 [==============================] - 5s 27ms/step - loss: 0.0577 - val_loss: 0.0468
#Epoch 5/10
#200/200 [==============================] - 5s 27ms/step - loss: 0.0547 - val_loss: 0.0454
#Epoch 6/10
#200/200 [==============================] - 5s 27ms/step - loss: 0.0527 - val_loss: 0.0459
#Epoch 7/10
#200/200 [==============================] - 5s 27ms/step - loss: 0.0518 - val_loss: 0.0482
#Epoch 8/10
#200/200 [==============================] - 5s 27ms/step - loss: 0.0507 - val_loss: 0.0469
#Epoch 9/10
#200/200 [==============================] - 5s 27ms/step - loss: 0.0505 - val_loss: 0.0458
#Epoch 10/10
#200/200 [==============================] - 5s 26ms/step - loss: 0.0499 - val_loss: 0.0433
#Let's do multivariate time series prediction using LSTM
#Building Univariate lstm
TRAIN_SPLIT = int(17520 * 80 / 100)
multi_data = pd_data.values
#Start instance
p = lstm()
std_temp = p.standardize_data(data = multi_data, TRAIN_SPLIT=TRAIN_SPLIT)
univariate_past_history = 720
univariate_future_target = 72 #predicting future 72. index
x_train_uni, y_train_uni = p.prepare_data(std_temp, std_temp[:,1], 0, TRAIN_SPLIT,
univariate_past_history,
univariate_future_target, step=6,
single_step=True, univariate=False)
x_val_uni, y_val_uni = p.prepare_data(std_temp, std_temp[:,1], TRAIN_SPLIT, None,
univariate_past_history,
univariate_future_target, step=6,
single_step=True, univariate=False)
#print ('Single window of past history')
#print (x_train_uni[0])
#print ('\n Target temperature to predict')
#print (y_train_uni[0])
#look at the real history
#p.show_plot([x_train_uni[1][:,1], y_train_uni[1]], 0, 'Sample Example')
BATCH_SIZE = 256
BUFFER_SIZE = 10000
tf_train, tf_val = p.train_val_to_tfdata(x_train_uni, y_train_uni,
x_val_uni, y_val_uni, BATCH_SIZE,
BUFFER_SIZE)
#build model
#define lstm model
model = p.build_lstm_model(x_train_uni, 32, 1, True)
for x, y in tf_val.take(1):
print(model.predict(x).shape)
EVALUATION_INTERVAL = 200
EPOCHS = 10
#fit the model
p.fit_lstm_model(model, tf_train, tf_val,EVALUATION_INTERVAL,
EPOCHS, val_step=50)
#Train for 200 steps, validate for 50 steps
#Epoch 1/10
#200/200 [==============================] - 35s 174ms/step - loss: 0.3674 - val_loss: 0.3238
#Epoch 2/10
#200/200 [==============================] - 31s 156ms/step - loss: 0.3219 - val_loss: 0.3274
#Epoch 3/10
#200/200 [==============================] - 31s 155ms/step - loss: 0.3177 - val_loss: 0.3639
#Epoch 4/10
#200/200 [==============================] - 31s 156ms/step - loss: 0.3097 - val_loss: 0.3452
#Epoch 5/10
#200/200 [==============================] - 30s 152ms/step - loss: 0.2964 - val_loss: 0.3239
#Epoch 6/10
#200/200 [==============================] - 30s 150ms/step - loss: 0.2770 - val_loss: 0.3159
#Epoch 7/10
#200/200 [==============================] - 31s 154ms/step - loss: 0.2710 - val_loss: 0.3635
#Epoch 8/10
#200/200 [==============================] - 31s 157ms/step - loss: 0.2551 - val_loss: 0.2954
#Epoch 9/10
#200/200 [==============================] - 32s 161ms/step - loss: 0.2399 - val_loss: 0.2871
#Epoch 10/10
#200/200 [==============================] - 31s 157ms/step - loss: 0.2338 - val_loss: 0.2798
#now let's predict validation data using multivariate LSTM
for x, y in tf_val.take(2):
plot = p.show_plot([x[-1][:,1].numpy(), y[-1].numpy(),
model.predict(x)[-1]], 0, 'Simple LSTM model')
plot.show()