diff --git a/README.md b/README.md index d3971af..2155cb9 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,193 @@ ![License](https://img.shields.io/badge/license-MIT-blue.svg) ![Platform](https://img.shields.io/badge/platform-MacOS-lightgrey.svg) +Netapix is a neural network framework written on pure C. For now it supports CPU mode only. The purpose of the project is investigation of deep neural networks behavior and designing new and effective architectures of convolutional networks. -# Requirements +# Installation +``` +make +``` +# Usage -# Installation +### Train brand new model +``` +./example/bin/netapix train [NPX_PATH] [NPT_PATH] +``` +Folders `weights` and `weights/params` will be created at the `[NPX_PATH]`. +- `weights` for trained weights `.npw` file. +- `weights/params` for copied configuration `.npx` file. -# Usage +### Continue training existing model +``` +./example/bin/netapix train [NPX_PATH] [NPT_PATH] [NPW_PATH] +``` +Folders `weights` and `weights/params` will be created at the `[NPW_PATH]`. +- `weights` for new trained weights `.npw` file. +- `weights/params` for copied configuration `.npx` and `.npw` files that were used to continue training. + + +### Test the model +``` +./example/bin/netapix run [NPI_PATH] [NPW_PATH] [OUTPUT_PATH] +``` +Output file `.npo` will be created under `[OUTPUT_PATH]` folder. +`[OUTPUT_PATH]` is optional. Default value at executable filepath – `./example/bin/output/`. # Documentation + +## Formats + +Netapix works with custom formats. There are number of tools for conversion available [here](https://github.com/touchlane/NetapixTools) + +### .npx +Special format to define network structure and learning policy. See example of simple convolutional network for MNIST dataset below: + +``` +[config] +batch=32 +threads=4 +channels=1 +width=28 +height=28 +init=xavier +validation=10 +backup=5000 +learning=gradient +regularization=L2 +accuracy=0.00001 +eta=0.01 +momentum=0 +lambda=0 +alpha=0.99 +beta=1.01 +gamma=1.01 + +[convolutional] +width=14 +height=14 +channels=100 +stride=1 +padding=0 +activation=relu + +[convolutional] +width=14 +height=14 +channels=10 +stride=1 +padding=0 +activation=relu + +[convolutional] +width=2 +height=2 +channels=10 +stride=1 +padding=0 +activation=relu + +[loss] +input=10 +activation=msqe +``` + +### .npw + +Particular format for binary files with weights. Every .npw file follows the same structure: +``` +[number of layers][4 bytes] + +[layer config] [4 bytes * 15] +[weights] [4 bytes * n] +[biases] [4 bytes * m] + +[layer config] [4 bytes * 15] +[weights] [4 bytes * n_1] +[biases] [4 bytes * m_1] +. +. +. +[layer config] [4 bytes * 15] +[weights] [4 bytes * n_k] +[biases] [4 bytes * m_l] +``` + +### .npt + +Distinct file format for train binary files of the [Netapix](https://github.com/touchlane/Netapix/) framework. It consists of two consequtive one-dimensional arrays with float32. + +### .npi + +Custom format for input binary files of the [Netapix](https://github.com/touchlane/Netapix/) framework. Contains two consequtive one-dimensional arrays with numbers of float32 format. + +### .npo + +Peculiar format for output binary files of the [Netapix](https://github.com/touchlane/Netapix/) framework. This format has one-dimensional array of 4-byte float inside. + +## Config + +| Key | Comment | +| ------------- | ------------- | +|**threads** | number of availbale CPU threads | +|**batch** | total number of training examples present in a single batch | +|**channels** | the depth of the input tenzor (for networks having the first layer as convolutional) | +|**width** | the width of the input tenzor (for networks having the first layer as convolutional) | +|**height** | the height of the input tenzor (for networks having the first layer as convolutional) | +|**init** | the weights initialization type | +|**validation** | indicates the part of the training set reserved for the cross validation | +|**backup** | weights save rate | +|**learning** | supported optimiziers (**gradient**) | +|**regularization** |regularization type (**L1** or **L2**)| +|**accuracy** | the target occuracy | +|**eta** | start learning rate | +|**momentum** | momentum coefficient| +|**lambda** | regularization coefficient | +|**alpha** | decrease learning rate coefficient| +|**beta** | increase learning rate coefficient | +|**gamma** | delta error correction coefficient | + +## Layers + +### Connected +| Key | Comment | +| ------------- | ------------- | +| input | the output's size of the previous layer | +| activation | the type of the activation function | + +### Convolutional +| Key | Comment | +| ------------- | ------------- | +| width | filter width | +| height | filter height | +| channels | number of filters | +| stride | controls how the filter convolves around the input volume | +| padding | positive integer to define the central kernel element | +| activation | the type of the activation function | + +### Loss +| Key | Comment | +| ------------- | ------------- | +| **input** | the size of the networks's output | +| **activation** | the type of a loss function | + +## Math + +### Activation + +| Key | Comment | +| ------------- | ------------- | +| **linear** | *f(x) = x* | +| **relu** | *f(x) > 0 ? x : 0* | +| **logistic**| the standard logistic function| +| **th**| the hyperbolic tangent | +| **softmax**| the normalized exponential function | + +### Loss + +| Key | Comment | +| ------------- | ------------- | +| **msqe** | mean squared error | +| **entropy** | cross entropy | diff --git a/example/config.mk b/example/config.mk index 1fbcfc7..727a1b1 100644 --- a/example/config.mk +++ b/example/config.mk @@ -11,7 +11,7 @@ EXEC = $(EXEC_PATH)/netapix # Compiler, Include, Linker Defines. CC = gcc APP_INCLUDE = -I./include/ -I.$(APP_SRC_DIR) -APP_CFLAGS = $(APP_INCLUDE) -w +APP_CFLAGS = $(APP_INCLUDE) -Wall -O3 LIBPATH = -L./lib -lnetapix LFLAGS = -o $(EXEC) $(LIBPATH) @@ -32,7 +32,7 @@ example_mkdir: # Clean Up Exectuable, Objects, Library, Coverage files d example_clean: - rm -rf $(EXEC) $(APP_OBJ_DIR) + rm -rf $(EXEC_PATH) $(APP_OBJ_DIR) rm -rf $(APP_SRC:.c=.gcda) $(APP_SRC:.c=.gcno) .PHONY: example_all example_clean diff --git a/example/src/netapix.c b/example/src/netapix.c index 14b3ee8..2487549 100644 --- a/example/src/netapix.c +++ b/example/src/netapix.c @@ -50,7 +50,7 @@ int train_mode(int argc, char *argv[]) { params_save_path = make_output_save_path(weights_path, DEFAULT_OUTPUT_WEIGHTS_PARAMS_DIRECTORY_NAME); } else { output_path = make_output_save_path(npx_path, DEFAULT_OUTPUT_WEIGHTS_DIRECTORY_NAME); - params_save_path = output_path; + params_save_path = make_output_save_path(npx_path, DEFAULT_OUTPUT_WEIGHTS_PARAMS_DIRECTORY_NAME); } if (prepare_output_path(output_path, 0) || prepare_output_path(params_save_path, 1)) { @@ -71,18 +71,21 @@ int run_mode(int argc, char *argv[]) { } char *input_path = argv[2]; char *weights_path = argv[3]; - char *output_path = (argc > 4) ? argv[4] : NULL; - - output_path = make_output_save_path(output_path ? output_path : "./", DEFAULT_OUTPUT_RUN_DIRECTORY_NAME); + char *output_path = (argc > 4) ? strdup(argv[4]) : NULL; + + if (!output_path) { + output_path = make_output_save_path("./", DEFAULT_OUTPUT_RUN_DIRECTORY_NAME); + } + if (prepare_output_path(output_path, 1)) { return 0; } - + char *input_name = remove_ext(last_path_component(input_path)); output_path = realloc(output_path, (strlen(output_path) + strlen(input_name) + strlen(".npo") + 1) * sizeof(*output_path)); - sprintf(output_path, "%s%s.npo", output_path, input_name); + sprintf(output_path, "%s/%s.npo", output_path, input_name); run(input_path, weights_path, output_path); return 0; diff --git a/makefile b/makefile index d4b2090..6f3e52a 100644 --- a/makefile +++ b/makefile @@ -9,7 +9,7 @@ ALIB = libnetapix.a # Compiler, Include, Linker Defines. CC = gcc LIB_INCLUDE = -I./include/ -I./src/ -LIB_CFLAGS = $(LIB_INCLUDE) -w +LIB_CFLAGS = $(LIB_INCLUDE) -Wall -O3 all: example diff --git a/src/convolutional.c b/src/convolutional.c index d232f1e..a2444ee 100644 --- a/src/convolutional.c +++ b/src/convolutional.c @@ -145,8 +145,12 @@ void calc_сonvolutional_сorrections(convolutional_layer *layer) { int free_convolutional_layer(convolutional_layer *layer, int is_first_layer) { if (is_first_layer) { free(layer->input); - free_tensor(layer->previous_gradients_tensor, layer->h, layer->z); - free_tensor(layer->input_derivative_tensor, layer->h, layer->z); +// if (layer->previous_gradients_tensor != NULL) { +// free_tensor(layer->previous_gradients_tensor, layer->h, layer->z); +// } +// if (layer->input_derivative_tensor != NULL) { +// free_tensor(layer->input_derivative_tensor, layer->h, layer->z); +// } } free_tensor(layer->input_tensor , layer->h, layer->z); free_array_of_tensors(layer->weights, layer->h1, layer->z1, layer->z2); diff --git a/src/math.c b/src/math.c index bbf4188..7b1c954 100644 --- a/src/math.c +++ b/src/math.c @@ -74,7 +74,7 @@ float msqe(float *vector, float *target, int lenght) { tmp = target[i] - vector[i]; error = error + tmp*tmp; } - return error/2; + return error / lenght; } float cross_entropy(float *vector, float *target, int lenght) { diff --git a/src/run.c b/src/run.c index dd0f3f9..e77e767 100644 --- a/src/run.c +++ b/src/run.c @@ -255,7 +255,20 @@ int read_layer_configs_from_npw(char *path, layer_config **configs, int *layers_ fclose(file); return ERROR_FORMAT; } - int values_count = ((*configs)[i].input_length + 1) * (*configs)[i].output_length; + int values_count = 0; + layer_config config = (*configs)[i]; + layer_type type = (*configs)[i].type; + switch (type) { + case CONNECTED: + values_count = ((*configs)[i].input_length + 1) * (*configs)[i].output_length; + break; + case CONVOLUTIONAL: + values_count = config.width * config.height * config.input_depth * config.channels + config.channels; + break; + default: + printf(FATAL_ERROR_MAKE_WEIGHTS_FAIL_MSG); + break; + } fseek(file, values_count * sizeof(float), SEEK_CUR); } fclose(file); diff --git a/src/train.c b/src/train.c index beeff89..6fb8a78 100644 --- a/src/train.c +++ b/src/train.c @@ -183,9 +183,7 @@ int train(char *npx_path, char *train_path, char *weights_path, char *output_pat int i, j; int epoch = 0; - printf("Initial validation...\n"); - root->params->prev_error = cross_validation(validation, 0); - printf("Epoch:%d Iteration:0 Cross Validation Error:%f\n", epoch, root->params->prev_error); + root->params->prev_error = -1; if (output_path != NULL) { char *initial_weights_path = malloc((strlen(output_path) + strlen("0.npw") + 1) * sizeof(*initial_weights_path)); @@ -195,7 +193,7 @@ int train(char *npx_path, char *train_path, char *weights_path, char *output_pat free(initial_weights_path); } int iteration = 0; - while (root->params->prev_error > root->npx->settings->accuracy) { + while (root->params->prev_error > root->npx->settings->accuracy || root->params->prev_error == -1) { root->params->train_set_number = epoch % root->data_set->count; shuffle(root->data_set->training_set[root->params->train_set_number]); for (i = 0; i < iterations_in_seen; i++) { @@ -226,7 +224,11 @@ int train(char *npx_path, char *train_path, char *weights_path, char *output_pat sprintf(buffer_path, "%s%d.npw", output_path, iteration); write_npw(buffer_path, root->weights, root->npx->net, root->npx->size - 1); } - printf("Iteration:%d Error:%f\n", iteration, average(root->params->batch_errors, root->npx->settings->batch)); + float avgError = average(root->params->batch_errors, root->npx->settings->batch); + if(root->params->prev_error == -1) { + root->params->prev_error = avgError; + } + printf("Iteration:%d Error:%f\n", iteration, avgError); } float error = cross_validation(validation, root->params->train_set_number); epoch = epoch + 1; diff --git a/tests/src/test_math.c b/tests/src/test_math.c index b0774e7..870840d 100644 --- a/tests/src/test_math.c +++ b/tests/src/test_math.c @@ -161,8 +161,8 @@ int test_math_msqe(void) { float error3 = msqe(vector3, target, length); assert_equal_float(error1, 0); - assert_equal_float(error2, 1.5); - assert_equal_float(error3, 2); + assert_equal_float(error2, 0.75); + assert_equal_float(error3, 1); return 0; }