-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMLP.h
437 lines (397 loc) · 15.5 KB
/
MLP.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
/*
Multilayer Perceptron library for ESP32
Inspired by:
https://courses.cs.washington.edu/courses/cse599/01wi/admin/Assignments/bpn.html
http://neuralnetworksanddeeplearning.com/chap2.html
(c) 2020 Lesept
contact: lesept777@gmail.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef MLP_h
#define MLP_h
#include <Arduino.h>
#include "FS.h"
#include "SPIFFS.h"
#define MAX_LAYERS 8 // Maximum number of layers
#ifndef MAX_INPUT
#define MAX_INPUT 70 // Maximum number of neurons in input layer
#endif
// Heuristics options: set them if...
#define H_INIT_OPTIM 0x01 // if you initialize optimize
#define H_CHAN_WEIGH 0x02 // for brand new random weights
#define H_MUTA_WEIGH 0x04 // to slightly change the weights
#define H_CHAN_BATCH 0x08 // to change batch size
#define H_CHAN_LRATE 0x10 // to change the learning rate
#define H_CHAN_SGAIN 0x20 // to change the sigmoid gain
#define H_CHAN_ALPHA 0x40 // to change the sigmoid gain
#define H_SHUF_DATAS 0x80 // to shuffle the dataset
#define H_ZERO_WEIGH 0x100 // to force low weights to 0
#define H_STOP_TOTER 0x200 // stop optimization if test + train Error < threshold
#define H_SELE_WEIGH 0x400 // select best weights over 10 random sets
#define H_FORC_S_G_D 0x800 // force stochastic gradient descent for faster optimization
#define H_REG1_WEIGH 0x1000 // use L1 weight regularization
#define H_REG2_WEIGH 0x2000 // use L2 weight regularization
// Activation functions
enum ACTIVATION {
SIGMOID,
SIGMOID2, /* Sigmoid like function between -1 & 1 */
IDENTITY,
RELU,
LEAKYRELU, /* RELU with a small slope for negative values */
ELU, /* Similar to SIGMOID2 for <0, and RELU for >0 */
SELU,
TANH,
SOFTMAX
};
typedef struct { /* A layer of the network: */
int Number; /* - number of the layer in the network */
int Units; /* - number of neurons in this layer */
int Activation; /* - the number of the activation function */
float* Output; /* - output of ith neuron */
float* Error; /* - error term of ith neuron */
float** Weight; /* - connection weights to ith neuron */
float** WeightSave; /* - saved weights for stopped training */
float** dWeight; /* - weight deltas */
float** dWeightOld; /* - last weight deltas for momentum */
} LAYER;
typedef struct
{
float *In; // dynamic array of input data
float Out; // output
} Data;
typedef struct
{
Data *data; // dynamic array of data
size_t nData; // number of data
size_t nInput; // number of input (In)
} DATASET;
class MLP
{
public:
MLP(int, int*, int = 1, bool = false);
~MLP();
/*
Functions related to dataset and network saving on SPIFFS
*/
// netLoad (filename), netSave (filename)
bool netLoad (const char* const);
void netSave (const char* const);
/*
readCsvFromSpiffs (filename, dataset, nData, coeff)
Reads the dataset from a cdv file on SPIFFS
nData : number of lines of the file
A line is made of: x1, x2, x3 ... xN, Out
where N is the number of neurons of the input layer
coeff : a coefficient to divide the out values if they are too big
*/
int readCsvFromSpiffs (const char* const, DATASET*, int, float);
/*
createDataset (dataset, nData)
nData: number of data in the dataset
A data is made of: x1, x2, x3 ... xN, Out
where N is the number of neurons of the input layer
Keep 'out' near the range 0 .. 10
*/
int createDataset (DATASET*, int);
// process the dataset (minimum value and range)
void processDataset(DATASET*);
void destroyDataset(DATASET*);
// Display the network parameters
void displayNetwork();
/*
The following methods are required
begin (ratio): ratio of the training data in the complete dataset
initLearn (alpha, eta, gain, anneal)
4 parameters of the training phase:
alpha : initial momentum
eta : initial learning rate
gain : initial gain of the sigmoid activation function
anneal: rate of change of the learning rate
*/
void begin (float);
void initLearn (float, float, float, float);
/*
Methods to set various parameters, if you need to change
them anytime during the training phase
*/
void setIterations (int);
void setEpochs (int);
void setBatchSize (int);
void setAlpha (float);
void setEta (float);
void setGain (float);
void setAnneal (float);
// void setActivation (int activation[]);
void setActivation (int*);
/*
set the verbose level
0: mute
1: very few information (optimal)
2: show all steps
3: level 2 plus display weight values (in displayNetwork and netLoad)
*/
void setVerbose (int);
/*
Methods to get various parameters
*/
int getIterations ();
int getEpochs ();
int getBatchSize ();
float getAlpha ();
float getEta ();
float getGain ();
float getAnneal ();
// getNeuronNumbers (layer): get the number of neurons in a specific layer
int getNeuronNumbers (int);
// estimateNetSize : estimate the memory size of the network in bytes
uint32_t estimateNetSize ();
/*
getWeight(layer, upperNeuron, lowerNeuron): get the value of a
specific weight
setWeight(layer, upperNeuron, lowerNeuron, value): set any weight value
*/
float getWeight (int, int, int);
int setWeight (int, int, int, float);
// Allocates memory for the network
void generateNetwork ();
/*
The optimize method is a training solution
Parameters:
- dataset
- number of iterations
- number of epochs per iteration
- number of samples in a batch
Set the heuristics options before calling 'optimize'
otherwise default options are taken
*/
float optimize (DATASET*, int, int, int);
/*
Define the various heuristics used in the 'optimize' method
A heuristics is defined as an integer whose bits indicate
various options (see the #define)
*/
void setHeuristics (long);
/*
Methods to set the options one by one
bool : true / false to allow or disable
float, float: set a range (minimum and maximum values) or probability
*/
void setHeurInitialize (bool);
void setHeurZeroWeights (bool, float);
// first float is the weight range, second is the probability to set weight to 0 (for sparsity)
void setHeurChangeWeights (bool, float, float);
// first float is the mutation probability, second is the percent of change
void setHeurMutateWeights (bool, float, float);
void setHeurChangeBatch (bool);
// in the following methods, the float arguments are min and max values of the range
void setHeurChangeEta (bool, float, float);
void setHeurChangeGain (bool, float, float);
void setHeurChangeAlpha (bool, float, float);
void setHeurShuffleDataset (bool);
void setHeurTotalError (bool);
void setHeurSelectWeights (bool);
// in the 2 following methods, the float argument is the value of lambda (regul parameter)
void setHeurRegulL1 (bool, float);
void setHeurRegulL2 (bool, float);
// Display the summary of the heuristics options
void displayHeuristics ();
// Methods to force the change of the Alpha, Eta Gain and Batchsize values
void changeEta ();
void changeGain ();
void changeAlpha ();
void changeBatchSize ();
// setMaxError (maxError): set the criterium for stopping the learning phase
void setMaxError (float);
// select the best set of weights over 20 random ones
void selectWeights(DATASET*);
// parameters for regularization (L1 & L2)
float regulL1Weights();
float regulL2Weights();
int numberOfWeights();
/*
If you want to program your own optimization function, use the following
methods
trainNet: does the complete propagation + backpropagation
+ weight update process
testNet: computes the current error in the training and testing sets
getError: returns the values of the errors
*/
void trainNetSGD (DATASET*);
void testNet (DATASET*, bool);
void trainAndTest (DATASET*);
void evaluateNet (DATASET*, float);
void getError (float*, float*, int*, int*);
float getTrainSetError (DATASET*);
float getTestSetError (DATASET*);
int getTotalEpochs ();
float* getSoftmaxValues ();
/*
Once the net in trained and optimized, use the predict method
for the inference
Parameters:
input: a pointer to the array of input data (in the format of the dataset)
output: a pointer to the array of output result
*/
float predict (float*);
/*
Various useful functions
estimateDuration: provide an estimate of the duration (in ms)
requires that the dataset is created, the number of iterations
and epochs are set
randomWeights: affect random values to the weights (parameter: range)
weightMutation: slightly change the weights
parameters: probability of change, range of change (in %)
saveWeights and restoreWeights: useful when a good error level is reached
*/
uint32_t estimateDuration (DATASET*);
void shuffleDataset (DATASET*, int, int);
void randomWeights(float);
void saveWeights();
void restoreWeights();
void weightMutation (float, float);
/*
Method to control the parallel run of the learning phase
if enabled, learning is shared on both cores, reaching x2 speedup
*/
void setParallel (bool);
void dispWeights();
private:
struct argsStruct { // structure to pass arguments to parallel tasks
int start;
int end;
int layer;
MLP * instance;
SemaphoreHandle_t * semaphore;
};
// Task for parallel computing of forward propagation
static void forwardTask (void * parameters) {
int start = ((argsStruct *) parameters)->start;
int end = ((argsStruct *) parameters)->end;
float Sum;
int l = ((argsStruct *) parameters)->layer;
LAYER* L = ((argsStruct *) parameters)->instance->Layer[l];
LAYER* LP = ((argsStruct *) parameters)->instance->Layer[l + 1];
for (int i = start; i <= end; i++) {
Sum = 0;
for (int j = 0; j <= L->Units; j++) {
Sum += LP->Weight[i][j] * L->Output[j];
}
LP->Output[i] = ((argsStruct *) parameters)->instance->activation (Sum, LP);
}
xSemaphoreGive(*((argsStruct *) parameters)->semaphore);
vTaskDelete(NULL);
}
// Task for parallel computing of backward propagation
static void backwardTask (void * parameters) {
int start = ((argsStruct *) parameters)->start;
int end = ((argsStruct *) parameters)->end;
float Out, Err;
int l = ((argsStruct *) parameters)->layer;
LAYER* L = ((argsStruct *) parameters)->instance->Layer[l];
LAYER* LP = ((argsStruct *) parameters)->instance->Layer[l - 1];
for (int i = start; i <= end; i++) {
Out = LP->Output[i];
Err = 0;
for (int j = 1; j <= L->Units; j++)
Err += L->Weight[j][i] * L->Error[j]; // W^l (T) . delta^l
LP->Error[i] = ((argsStruct *) parameters)->instance->derivActiv (Out, LP) * Err; // delta^(l-1)
}
xSemaphoreGive(*((argsStruct *) parameters)->semaphore);
vTaskDelete(NULL);
}
// Parameters of the network
LAYER** Layer; /* - layers of this net */
LAYER* InputLayer; /* - input layer */
LAYER* OutputLayer; /* - output layer */
float Alpha; /* - momentum factor */
float Eta; /* - learning rate */
float Gain; /* - gain of sigmoid function */
float Error; /* - total net error */
float AlphaSave; /* - saved learning rate */
float EtaSave; /* - saved learning rate */
float GainSave; /* - saved gain */
float TrainErrorSave; /* - saved best train error */
float TestErrorSave; /* - saved best test error */
// Private variables
bool _enableSkip = false;
bool _parallelRun = false;
int _units[MAX_LAYERS], _numLayers;
int _activations[MAX_LAYERS] = {0};
int _nData, _nTrain, _nTest;
int _nTrainError, _nTestError;
float _ratio = 0.8f;
int _iters, _epochs, _batchSize;
float _anneal = 0.8f;
float _trainError, _testError, _criterion;
float _maxErr = 0.05f;
uint8_t _verbose;
float _minError;
bool _datasetProcessed = false;
float _inMinVal[MAX_INPUT], _inDelta[MAX_INPUT];
float _outMinVal, _outDelta;
float _alphaELU = 1.0f;
float _lambdaRegulL1 = 0.0f;
float _lambdaRegulL2 = 0.0f;
char ActivNames[9][10] = {"SIGMOID", "SIGMOID2", "IDENTITY",
"RELU", "LEAKYRELU", "ELU", "SELU",
"TANH", "SOFTMAX"};
// Booleans for the heuristics
long _heuristics = 0;
bool _initialize = true;
bool _changeWeights = false;
bool _mutateWeights = false;
bool _changeBatch = false;
bool _changeEta = false;
bool _changeGain = false;
bool _shuffleDataset = false;
bool _zeroWeights = false;
bool _changeAlpha = false;
bool _stopTotalError = false;
bool _selectWeights = false;
bool _forceSGD = false;
bool _regulL1 = false;
bool _regulL2 = false;
float _proba = 0.05f, _percent = 0.15f;
float _range = 1.0f;
float _minEta = 0.35f, _maxEta = 1.1f;
float _minGain = 0.5f, _maxGain = 2.0f;
float _minAlpha = 0.5f, _maxAlpha = 1.5f;
float _zeroThreshold = 0.1f;
int _totalEpochs;
bool _eval = false, _predict = false;
float _probaZeroWeight = 0.0f;
// Private methods
void simulateNet(float*, float*, float*, bool);
void process(float*, float*, float*, int);
void propagateNet();
void backpropagateNet();
float computeOutputError(float*, int, int);
void initBatch();
void setInput(float*, int);
void getOutput(float*);
void adjustWeights();
int randomInt(int, int);
float randomFloat(float, float);
int readIntFile (File);
float readFloatFile (File);
float activation (float, LAYER*);
float derivActiv (float, LAYER*);
void softmax ();
// void forwardTask(void*);
// static void startForwardTask(void*);
};
#endif