Skip to content

Commit

Permalink
New ANN functionality - error function, storing errors, weight decay,…
Browse files Browse the repository at this point in the history
… momentum term
  • Loading branch information
alexvakimov committed Apr 14, 2021
1 parent bf4452a commit 41b3e64
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 9 deletions.
3 changes: 2 additions & 1 deletion src/math_ann/NeuralNetwork.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,15 @@ class NeuralNetwork{
vector<MATRIX> propagate(MATRIX& input);
vector<MATRIX> derivatives(MATRIX& input);
double back_propagate(vector<MATRIX>& Y, MATRIX& target);
double error(MATRIX& input, MATRIX& target);

// Init weights and biases
void init_weights_biases_uniform(Random& rnd, double left_w, double right_w, double left_b, double right_b);
void init_weights_biases_normal(Random& rnd, double scaling_w, double shift_w, double scaling_b, double shift_b);


// Training
void train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets);
vector<double> train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets);


// Methods
Expand Down
124 changes: 117 additions & 7 deletions src/math_ann/NeuralNetwork_Algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,69 @@ double NeuralNetwork::back_propagate(vector<MATRIX>& Y, MATRIX& target){
}




double NeuralNetwork::error(MATRIX& inputs, MATRIX& target){
/**
This is a function to compute the error of the prediction
Args:
inputs - Npe[0] x num_patterns matrix of the inputs
target - Npe[Nlayers-1] x num_patterns - the expected output of the ANN
Returns:
the average error
*/

int i, j, L;

if(target.n_cols!=inputs.n_cols){
std::cout<<"Error: The number of patterns is different for inputs "<<inputs.n_cols<<" and targets "<<target.n_cols<<std::endl;
exit(0);
}

int sz = target.n_cols; // number of patterns to handle at the same time

/**
L 0 1 .... NL = Nlayers - 1
W [junk] W[1] W[NL]
B [junk] B[1] B[NL]
Y [Y[0]=input] [ f(W[1]*Y[0] + B[1]) ] [ output = f(W[NL]*Y[NL-1] + B[NL]) ]
deltas [junk] W^T[2]*delta[2] *f'(Y[1]) target - output[NL]
*/

vector<MATRIX> Y;
Y = propagate(inputs);


MATRIX delta(Npe[Nlayers-1], sz);

/// L = Nlayers-1
delta = target - Y[Nlayers-1];

// Compute error
double err = 0.0;
for(i=0; i<Npe[Nlayers-1]; i++){
for(j=0; j<sz; j++){
err += delta.get(i,j) * delta.get(i,j);
}// for j
}// for i
err *= (0.5/double(sz));

return err;

}




void NeuralNetwork::init_weights_biases_normal(Random& rnd, double scaling_w, double shift_w, double scaling_b, double shift_b){

for(int L =1; L < Nlayers; L++){
Expand Down Expand Up @@ -290,13 +353,13 @@ void NeuralNetwork::init_weights_biases_uniform(Random& rnd, double left_w, doub
}


void NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets){
vector<double> NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets){
/**
See more details here:
http://page.mi.fu-berlin.de/rojas/neural/chapter/K8.pdf
*/

int i, epoch, L;
int i, j, epoch, L;

///============ Get the parameters ==================
learning_rate = 0.0;
Expand All @@ -306,6 +369,10 @@ void NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX&
int epoch_size = 1;
int n_patterns = inputs.n_cols;
int verbosity = 0;
int is_error_collect_frequency = 0;
int error_collect_frequency = 1;
vector<double> weight_decay(1, 0.0); int is_weight_decay = 0;
vector<double> bias_decay(1, 0.0); int is_bias_decay = 0;

std::string key;
for(int i=0;i<len(params.values());i++){
Expand All @@ -318,17 +385,50 @@ void NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX&
else if(key=="steps_per_epoch") { steps_per_epoch = bp::extract<int>(params.values()[i]); }
else if(key=="epoch_size") { epoch_size = bp::extract<int>(params.values()[i]); }
else if(key=="verbosity") { verbosity = bp::extract<int>(params.values()[i]); }
else if(key=="error_collect_frequency") {
is_error_collect_frequency = 1;
error_collect_frequency = bp::extract<int>(params.values()[i]);
}
else if(key=="weight_decay"){
is_weight_decay = 1;
boost::python::list tmp = extract<boost::python::list>(params.values()[i]);
for(j=0; j<len(tmp); j++){ weight_decay.push_back( extract<double>(tmp[j]) ); }

if(weight_decay.size()!=Nlayers){
std::cout<<"Error: The number of weight decay constants should be equal to the number of weight matrices \n";
std::cout<<"Now exiting...\n";
exit(0);
}// if
}
else if(key=="bias_decay"){
is_bias_decay = 1;
boost::python::list tmp = extract<boost::python::list>(params.values()[i]);
for(j=0; j<len(tmp); j++){ bias_decay.push_back( extract<double>(tmp[j]) ); }

if(bias_decay.size()!=Nlayers){
std::cout<<"Error: The number of bias decay constants should be equal to the number of bias matrices \n";
std::cout<<"Now exiting...\n";
exit(0);
}// if
}

}


if(!is_error_collect_frequency){ error_collect_frequency = steps_per_epoch; }
if(!is_weight_decay){ for(j=0; j<Nlayers-1; j++){ weight_decay.push_back(0.0); } }
if(!is_bias_decay){ for(j=0; j<Nlayers-1; j++){ bias_decay.push_back(0.0); } }

MATRIX input_subset(sz_x, epoch_size);
MATRIX target_subset(sz_y, epoch_size);
vector<int> subset(epoch_size);
vector<int> inp_dim(sz_x); for(i=0; i<sz_x; i++){ inp_dim[i] = i; }
vector<int> tar_dim(sz_y); for(i=0; i<sz_y; i++){ tar_dim[i] = i; }
vector<MATRIX> Y;

double err = 0.0;
int counter = 0;
double err_loc = 0.0;
vector<double> err;

for(epoch = 0; epoch < num_epochs; epoch++){

Expand All @@ -343,26 +443,34 @@ void NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX&

// Update gradients and outputs
Y = propagate(input_subset);
err = back_propagate( Y, target_subset);
err_loc = back_propagate( Y, target_subset);

if(counter % error_collect_frequency ==0){ err.push_back( err_loc); }


// Update weights and biases
// According to: http://page.mi.fu-berlin.de/rojas/neural/chapter/K8.pdf
for(L = 0; L < Nlayers; L++){

dWold[L] = (learning_rate * dW[L] + momentum_term * dWold[L]);
dBold[L] = (learning_rate * dB[L] + momentum_term * dBold[L]);
dWold[L] = (learning_rate * (dW[L] - weight_decay[L]*W[L]) + momentum_term * dWold[L]);
dBold[L] = (learning_rate * (dB[L] - bias_decay[L]*B[L]) + momentum_term * dBold[L]);

W[L] += dWold[L];
B[L] += dBold[L];

}


counter++;

}// for i

if(verbosity>=1){ cout<<"epoch = "<<epoch<<" (local) error = "<<err<<"\n"; }
if(verbosity>=1){ cout<<"epoch = "<<epoch<<" (local) error = "<<err_loc<<"\n"; }

}// for epoch

return err;

}


Expand Down Expand Up @@ -877,6 +985,8 @@ void NeuralNetwork::ANNTrain(){
// Now add momentum term (if it is not zero)
if(learning_method=="BackProp"){



for(L=1;L<=NL;L++){
/** TESTING ON 4/3/2021!!! : should be "+", but we try - */
dW[L] = dWcurr[L] + momentum_term*dW[L];
Expand Down
4 changes: 3 additions & 1 deletion src/math_ann/libann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,15 @@ ExportANN_docstring += tmp;
vector<MATRIX> (NeuralNetwork::*expt_propagate_v1)(MATRIX& input) = &NeuralNetwork::propagate;
vector<MATRIX> (NeuralNetwork::*expt_derivatives_v1)(MATRIX& input) = &NeuralNetwork::derivatives;
double (NeuralNetwork::*expt_back_propagate_v1)(vector<MATRIX>& Y, MATRIX& target) = &NeuralNetwork::back_propagate;
double (NeuralNetwork::*expt_error_v1)(MATRIX& input, MATRIX& target) = &NeuralNetwork::error;


void (NeuralNetwork::*expt_init_weights_biases_uniform_v1)
(Random& rnd, double left_w, double right_w, double left_b, double right_b) = &NeuralNetwork::init_weights_biases_uniform;
void (NeuralNetwork::*expt_init_weights_biases_normal_v1)
(Random& rnd, double scaling_w, double shift_w, double scaling_b, double shift_b) = &NeuralNetwork::init_weights_biases_normal;

void (NeuralNetwork::*expt_train_v1)
vector<double> (NeuralNetwork::*expt_train_v1)
(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets) = &NeuralNetwork::train;


Expand Down Expand Up @@ -210,6 +211,7 @@ ExportANN_docstring += tmp;
.def("propagate",expt_propagate_v1)
.def("derivatives",expt_derivatives_v1)
.def("back_propagate",expt_back_propagate_v1)
.def("error",expt_error_v1)

.def("init_weights_biases_uniform",expt_init_weights_biases_uniform_v1)
.def("init_weights_biases_normal",expt_init_weights_biases_normal_v1)
Expand Down

0 comments on commit 41b3e64

Please sign in to comment.