New ANN functionality - error function, storing errors, weight decay,…

… momentum term
Quantum-Dynamics-Hub · Apr 14, 2021 · 41b3e64 · 41b3e64
1 parent bf4452a
commit 41b3e64
Show file tree

Hide file tree

Showing 3 changed files with 122 additions and 9 deletions.
diff --git a/src/math_ann/NeuralNetwork.h b/src/math_ann/NeuralNetwork.h
@@ -148,14 +148,15 @@ class NeuralNetwork{
   vector<MATRIX> propagate(MATRIX& input);
   vector<MATRIX> derivatives(MATRIX& input);
   double back_propagate(vector<MATRIX>& Y, MATRIX& target);
+  double error(MATRIX& input, MATRIX& target);
 
   // Init weights and biases
   void init_weights_biases_uniform(Random& rnd, double left_w, double right_w, double left_b, double right_b);
   void init_weights_biases_normal(Random& rnd, double scaling_w, double shift_w, double scaling_b, double shift_b);
 
 
   // Training
-  void train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets);
+  vector<double> train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets);
 
 
   // Methods

diff --git a/src/math_ann/NeuralNetwork_Algorithms.cpp b/src/math_ann/NeuralNetwork_Algorithms.cpp
@@ -259,6 +259,69 @@ double NeuralNetwork::back_propagate(vector<MATRIX>& Y, MATRIX& target){
 }
 
 
+
+
+double NeuralNetwork::error(MATRIX& inputs, MATRIX& target){
+/**
+  This is a function to compute the error of the prediction
+
+  Args:
+    inputs - Npe[0] x num_patterns matrix of the inputs
+    target - Npe[Nlayers-1] x num_patterns - the expected output of the ANN
+
+  Returns: 
+    the average error 
+
+*/
+
+  int i, j, L;  
+
+  if(target.n_cols!=inputs.n_cols){    
+    std::cout<<"Error: The number of patterns is different for inputs "<<inputs.n_cols<<" and targets "<<target.n_cols<<std::endl;
+    exit(0);
+  }
+
+  int sz = target.n_cols; // number of patterns to handle at the same time
+
+  /**
+
+  L       0                   1                       ....             NL = Nlayers - 1
+
+  W      [junk]              W[1]                                        W[NL]
+
+  B      [junk]              B[1]                                        B[NL]
+
+  Y      [Y[0]=input]      [ f(W[1]*Y[0] + B[1]) ]           [ output = f(W[NL]*Y[NL-1] + B[NL]) ]
+
+ deltas  [junk]           W^T[2]*delta[2] *f'(Y[1])                   target - output[NL]
+  
+  */
+
+  vector<MATRIX> Y;
+  Y = propagate(inputs);
+
+
+  MATRIX delta(Npe[Nlayers-1], sz);
+
+  /// L = Nlayers-1  
+  delta = target - Y[Nlayers-1];
+
+  // Compute error
+  double err = 0.0;
+  for(i=0; i<Npe[Nlayers-1]; i++){ 
+    for(j=0; j<sz; j++){ 
+      err += delta.get(i,j) * delta.get(i,j);
+    }// for j
+  }// for i
+  err *= (0.5/double(sz));
+
+  return err;
+
+}
+
+
+
+
 void NeuralNetwork::init_weights_biases_normal(Random& rnd, double scaling_w, double shift_w, double scaling_b, double shift_b){
 
   for(int L =1; L < Nlayers; L++){
@@ -290,13 +353,13 @@ void NeuralNetwork::init_weights_biases_uniform(Random& rnd, double left_w, doub
 }
 
 
-void NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets){
+vector<double> NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets){
 /**
   See more details here:
   http://page.mi.fu-berlin.de/rojas/neural/chapter/K8.pdf
 */
 
-  int i, epoch, L;
+  int i, j, epoch, L;
 
   ///============ Get the parameters ==================
   learning_rate = 0.0;
@@ -306,6 +369,10 @@ void NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX&
   int epoch_size = 1;
   int n_patterns = inputs.n_cols;
   int verbosity = 0;
+  int is_error_collect_frequency = 0; 
+  int error_collect_frequency = 1;
+  vector<double> weight_decay(1, 0.0);   int is_weight_decay = 0;
+  vector<double> bias_decay(1, 0.0);     int is_bias_decay = 0;
 
   std::string key;
   for(int i=0;i<len(params.values());i++){
@@ -318,17 +385,50 @@ void NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX&
     else if(key=="steps_per_epoch") { steps_per_epoch = bp::extract<int>(params.values()[i]);   }
     else if(key=="epoch_size") { epoch_size = bp::extract<int>(params.values()[i]);   }
     else if(key=="verbosity") { verbosity = bp::extract<int>(params.values()[i]);   }
+    else if(key=="error_collect_frequency") { 
+      is_error_collect_frequency = 1;
+      error_collect_frequency = bp::extract<int>(params.values()[i]);  
+    }
+    else if(key=="weight_decay"){
+      is_weight_decay = 1;
+      boost::python::list tmp = extract<boost::python::list>(params.values()[i]);      
+      for(j=0; j<len(tmp); j++){  weight_decay.push_back( extract<double>(tmp[j]) );  }
+
+      if(weight_decay.size()!=Nlayers){
+        std::cout<<"Error: The number of weight decay constants should be equal to the number of weight matrices \n";
+        std::cout<<"Now exiting...\n";
+        exit(0);
+      }// if      
+    }
+    else if(key=="bias_decay"){
+      is_bias_decay = 1;
+      boost::python::list tmp = extract<boost::python::list>(params.values()[i]);      
+      for(j=0; j<len(tmp); j++){  bias_decay.push_back( extract<double>(tmp[j]) );  }
+
+      if(bias_decay.size()!=Nlayers){
+        std::cout<<"Error: The number of bias decay constants should be equal to the number of bias matrices \n";
+        std::cout<<"Now exiting...\n";
+        exit(0);
+      }// if      
+    }
 
   }
 
+
+  if(!is_error_collect_frequency){ error_collect_frequency = steps_per_epoch;  }
+  if(!is_weight_decay){ for(j=0; j<Nlayers-1; j++){  weight_decay.push_back(0.0); } }
+  if(!is_bias_decay){ for(j=0; j<Nlayers-1; j++){  bias_decay.push_back(0.0); } }
+
   MATRIX input_subset(sz_x, epoch_size);
   MATRIX target_subset(sz_y, epoch_size);
   vector<int> subset(epoch_size);
   vector<int> inp_dim(sz_x); for(i=0; i<sz_x; i++){ inp_dim[i] = i; }
   vector<int> tar_dim(sz_y); for(i=0; i<sz_y; i++){ tar_dim[i] = i; }
   vector<MATRIX> Y;
 
-  double err = 0.0;
+  int counter = 0;
+  double err_loc = 0.0;
+  vector<double> err;
 
   for(epoch = 0; epoch < num_epochs; epoch++){    
 
@@ -343,26 +443,34 @@ void NeuralNetwork::train(Random& rnd, bp::dict params, MATRIX& inputs, MATRIX&
 
         // Update gradients and outputs
         Y = propagate(input_subset);
-        err = back_propagate( Y, target_subset);
+        err_loc = back_propagate( Y, target_subset);
+
+        if(counter % error_collect_frequency ==0){  err.push_back( err_loc);       }
+
 
         // Update weights and biases
         // According to: http://page.mi.fu-berlin.de/rojas/neural/chapter/K8.pdf
         for(L = 0; L < Nlayers; L++){         
 
-          dWold[L] = (learning_rate * dW[L] + momentum_term * dWold[L]);
-          dBold[L] = (learning_rate * dB[L] + momentum_term * dBold[L]);
+          dWold[L] = (learning_rate * (dW[L] - weight_decay[L]*W[L]) + momentum_term * dWold[L]);
+          dBold[L] = (learning_rate * (dB[L] - bias_decay[L]*B[L]) + momentum_term * dBold[L]);
 
           W[L] += dWold[L];
           B[L] += dBold[L];
 
         }
 
+
+        counter++;
+
     }// for i
 
-    if(verbosity>=1){  cout<<"epoch = "<<epoch<<" (local) error = "<<err<<"\n";  }
+    if(verbosity>=1){  cout<<"epoch = "<<epoch<<" (local) error = "<<err_loc<<"\n";  }
 
   }// for epoch
 
+  return err;
+
 }
 
 
@@ -877,6 +985,8 @@ void NeuralNetwork::ANNTrain(){
   // Now add momentum term (if it is not zero)
   if(learning_method=="BackProp"){
 
+
+
     for(L=1;L<=NL;L++){
       /**   TESTING ON 4/3/2021!!!  : should be "+", but we try - */
       dW[L] = dWcurr[L] + momentum_term*dW[L];

diff --git a/src/math_ann/libann.cpp b/src/math_ann/libann.cpp
@@ -167,14 +167,15 @@ ExportANN_docstring += tmp;
   vector<MATRIX> (NeuralNetwork::*expt_propagate_v1)(MATRIX& input) = &NeuralNetwork::propagate;
   vector<MATRIX> (NeuralNetwork::*expt_derivatives_v1)(MATRIX& input) = &NeuralNetwork::derivatives;
   double (NeuralNetwork::*expt_back_propagate_v1)(vector<MATRIX>& Y, MATRIX& target) = &NeuralNetwork::back_propagate;
+  double (NeuralNetwork::*expt_error_v1)(MATRIX& input, MATRIX& target) = &NeuralNetwork::error;
 
 
   void (NeuralNetwork::*expt_init_weights_biases_uniform_v1)
   (Random& rnd, double left_w, double right_w, double left_b, double right_b) = &NeuralNetwork::init_weights_biases_uniform;
   void (NeuralNetwork::*expt_init_weights_biases_normal_v1)
   (Random& rnd, double scaling_w, double shift_w, double scaling_b, double shift_b) = &NeuralNetwork::init_weights_biases_normal;
 
-  void (NeuralNetwork::*expt_train_v1)
+  vector<double> (NeuralNetwork::*expt_train_v1)
   (Random& rnd, bp::dict params, MATRIX& inputs, MATRIX& targets) = &NeuralNetwork::train;
 
 
@@ -210,6 +211,7 @@ ExportANN_docstring += tmp;
       .def("propagate",expt_propagate_v1)
       .def("derivatives",expt_derivatives_v1)
       .def("back_propagate",expt_back_propagate_v1)
+      .def("error",expt_error_v1)
 
       .def("init_weights_biases_uniform",expt_init_weights_biases_uniform_v1)
       .def("init_weights_biases_normal",expt_init_weights_biases_normal_v1)