Skip to content

Commit

Permalink
dsc2d not validated
Browse files Browse the repository at this point in the history
  • Loading branch information
i-evi committed Sep 21, 2020
1 parent 8428dd0 commit eb7dc1a
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 2 deletions.
5 changes: 3 additions & 2 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ endif
ALL_O += \
catcoon.o cc_tensor.o cc_dtype.o cc_tsrmgr.o cc_fmap2d.o cc_pool2d.o \
cc_array.o cc_basic.o cc_actfn.o cc_fullycon.o cc_pad2d.o cc_cpufn.o \
cc_conv2d.o cc_normfn.o cc_image.o util_rbt.o util_list.o util_log.o \
util_image.o global_fn_cfg.o
cc_conv2d.o cc_dsc2d.o cc_normfn.o cc_image.o util_rbt.o util_list.o \
util_log.o util_image.o global_fn_cfg.o

CATCOON_A = libcatcoon.a

Expand Down Expand Up @@ -117,6 +117,7 @@ cc_actfn.o : $(patsubst %, ./src/%, cc_actfn.h cc_actfn.c)
cc_array.o : $(patsubst %, ./src/%, cc_array.h cc_array.c)
cc_basic.o : $(patsubst %, ./src/%, cc_basic.h cc_basic.c)
cc_conv2d.o : $(patsubst %, ./src/%, cc_conv2d.h cc_conv2d.c)
cc_dsc2d.o : $(patsubst %, ./src/%, cc_dsc2d.h cc_dsc2d.c)
cc_cpufn.o : $(patsubst %, ./src/%, cc_cpufn.h cc_cpufn.c)
cc_dtype.o : $(patsubst %, ./src/%, cc_dtype.h cc_dtype.c)
cc_fmap2d.o : $(patsubst %, ./src/%, cc_fmap2d.h cc_fmap2d.c)
Expand Down
1 change: 1 addition & 0 deletions src/catcoon.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "cc_assert.h"
#include "cc_basic.h"
#include "cc_conv2d.h"
#include "cc_dsc2d.h"
#include "cc_fmap2d.h"
#include "cc_fullycon.h"
#include "cc_image.h"
Expand Down
186 changes: 186 additions & 0 deletions src/cc_dsc2d.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifdef ENABLE_OPENMP
#include <omp.h>
#endif

#include "cc_assert.h"
#include "cc_basic.h"
#include "cc_fmap2d.h"
#include "cc_pad2d.h"
#include "cc_tsrmgr.h"
#include "util_list.h"
#include "util_log.h"
#include "cc_conv2d.h"
#include "cc_dsc2d.h"

#include "global_fn_cfg.h"
extern fn_conv2d _conv2d;
extern fn_array_add_ew _array_add_ew;
extern fn_array_mul_by _array_add_by;

cc_tensor_t *cc_dw_conv2d(cc_tensor_t *inp,
cc_tensor_t *kernel, cc_tensor_t *bias, cc_int32 s,
cc_int32 p, cc_int32 off, const char *name)
{
cc_uint8 *omp_out_buf = NULL;
cc_tensor_t *inp_pad, *oup = NULL;
cc_int32 o_ch_size, p_ch_mem_size, o_ch_mem_size,
k_ch_mem_size, k_mem_size, num_omp_threads, i;
cc_int32 shape[CC_CNN2D_SHAPE_LEN] = {0};
char pad_name[CC_CONV2D_PAD_NAME_LEN];
#ifdef ENABLE_CC_ASSERT
cc_assert_zero(cc_tensor_dimension(inp) - CC_CNN2D_DIM);
cc_assert_zero(cc_tensor_dimension(kernel) - CC_CONV2D_KERNEL_DIM);
cc_assert_zero(*inp->dtype - *kernel->dtype);
cc_assert_zero(inp->shape[CC_CNN2D_SHAPE_C]
- kernel->shape[CC_CONV2D_KERNEL_I]);
#endif
if (p) {
sprintf(pad_name, "%s%s",
inp->name, CC_CONV2D_PAD_NAME_SURFFIX);
inp_pad = cc_pad2d(inp, p, off, pad_name);
}
else
inp_pad = inp;
#ifdef AUTO_TSRMGR
oup = cc_tsrmgr_get(name);
#endif
if (!oup) {
shape[CC_CNN2D_SHAPE_C] = kernel->shape[CC_CONV2D_KERNEL_I];
shape[CC_CNN2D_SHAPE_H] = cc_conv2d_shape_calc(
inp->shape[CC_CNN2D_SHAPE_H],
kernel->shape[CC_CONV2D_KERNEL_H], s, p);
shape[CC_CNN2D_SHAPE_W] = cc_conv2d_shape_calc(
inp->shape[CC_CNN2D_SHAPE_W],
kernel->shape[CC_CONV2D_KERNEL_W], s, p);
oup = cc_create_tensor(shape, *inp->dtype, name);
}
o_ch_size = oup->shape[CC_CNN2D_SHAPE_W] *
oup->shape[CC_CNN2D_SHAPE_H];
o_ch_mem_size = o_ch_size * cc_dtype_size(*oup->dtype);
p_ch_mem_size = inp_pad->shape[CC_CNN2D_SHAPE_W] *
inp_pad->shape[CC_CNN2D_SHAPE_H] *
cc_dtype_size(*inp->dtype);
k_ch_mem_size = kernel->shape[CC_CONV2D_KERNEL_W] *
kernel->shape[CC_CONV2D_KERNEL_H] *
cc_dtype_size(*kernel->dtype);
k_mem_size = k_ch_mem_size * kernel->shape[CC_CONV2D_KERNEL_I];
num_omp_threads = 1;
#ifdef ENABLE_OPENMP
num_omp_threads = omp_get_max_threads();
#endif
cc_assert_alloc(omp_out_buf =
(cc_uint8*)malloc(o_ch_mem_size * num_omp_threads));
#ifdef AUTO_TSRMGR
memset(oup->data, 0,
list_getlen(oup->container, CC_TENSOR_DATA));
#endif
#ifdef ENABLE_OPENMP
#pragma omp parallel for private(i)
#endif
for (i = 0; i < kernel->shape[CC_CONV2D_KERNEL_I]; ++i) {
_conv2d((inp_pad->data + i * p_ch_mem_size),
oup->data + i * o_ch_mem_size,
inp_pad->shape[CC_CNN2D_SHAPE_W],
inp_pad->shape[CC_CNN2D_SHAPE_H],
oup->shape[CC_CNN2D_SHAPE_W],
oup->shape[CC_CNN2D_SHAPE_H], s, s,
kernel->data + (k_mem_size * i),
kernel->shape[CC_CONV2D_KERNEL_W],
*kernel->dtype);
}
free(omp_out_buf);
if (!bias){
#ifndef AUTO_TSRMGR
if (p)
cc_free_tensor(inp_pad);
#endif
return oup;
} else {
oup = cc_fmap2d_bias(oup, bias, oup->name);
}
#ifndef AUTO_TSRMGR
if (p)
cc_free_tensor(inp_pad);
#endif
return oup;
}

cc_tensor_t *cc_pw_conv2d(cc_tensor_t *inp,
cc_tensor_t *kernel, cc_tensor_t *bias, const char *name)
{
cc_uint8 *omp_out_buf = NULL;
cc_tensor_t *oup = NULL;
cc_int32 o_ch_size, o_ch_mem_size,
k_ch_mem_size, k_mem_size, num_omp_threads, i, j;
cc_int32 shape[CC_CNN2D_SHAPE_LEN] = {0};
#ifdef ENABLE_CC_ASSERT
cc_assert_zero(cc_tensor_dimension(inp) - CC_CNN2D_DIM);
cc_assert_zero(cc_tensor_dimension(kernel) - CC_CONV2D_KERNEL_DIM);
cc_assert_zero(*inp->dtype - *kernel->dtype);
cc_assert_zero(inp->shape[CC_CNN2D_SHAPE_C]
- kernel->shape[CC_CONV2D_KERNEL_I]);
#endif
#ifdef AUTO_TSRMGR
oup = cc_tsrmgr_get(name);
#endif
if (!oup) {
shape[CC_CNN2D_SHAPE_C] = kernel->shape[CC_CONV2D_KERNEL_O];
shape[CC_CNN2D_SHAPE_H] = inp->shape[CC_CNN2D_SHAPE_H];
shape[CC_CNN2D_SHAPE_W] = inp->shape[CC_CNN2D_SHAPE_W];
oup = cc_create_tensor(shape, *inp->dtype, name);
}
o_ch_size = oup->shape[CC_CNN2D_SHAPE_W] *
oup->shape[CC_CNN2D_SHAPE_H];
o_ch_mem_size = o_ch_size * cc_dtype_size(*oup->dtype);
k_ch_mem_size = kernel->shape[CC_CONV2D_KERNEL_W] *
kernel->shape[CC_CONV2D_KERNEL_H] *
cc_dtype_size(*kernel->dtype);
k_mem_size = k_ch_mem_size * kernel->shape[CC_CONV2D_KERNEL_I];
num_omp_threads = 1;
#ifdef ENABLE_OPENMP
num_omp_threads = omp_get_max_threads();
#endif
cc_assert_alloc(omp_out_buf =
(cc_uint8*)malloc(o_ch_mem_size * num_omp_threads));
#ifdef AUTO_TSRMGR
memset(oup->data, 0,
list_getlen(oup->container, CC_TENSOR_DATA));
#endif
#ifdef ENABLE_OPENMP
#pragma omp parallel for private(i, j)
#endif
for (i = 0; i < kernel->shape[CC_CONV2D_KERNEL_O]; ++i) {
for (j = 0; j < kernel->shape[CC_CONV2D_KERNEL_I]; ++j)
{
#ifdef ENABLE_OPENMP
_array_add_by(
omp_out_buf + omp_get_thread_num() * o_ch_mem_size,
o_ch_size, inp->data + o_ch_mem_size * j,
kernel->data + k_mem_size * i + k_ch_mem_size * j,
*oup->dtype);
_array_add_ew(oup->data + o_ch_mem_size * i,
o_ch_size, oup->data + o_ch_mem_size * i,
omp_out_buf + omp_get_thread_num() * o_ch_mem_size,
*oup->dtype);
#else
_array_add_by(omp_out_buf, o_ch_size,
inp->data + o_ch_mem_size * j,
kernel->data + k_mem_size * i + k_ch_mem_size * j,
*oup->dtype);
_array_add_ew(oup->data + o_ch_mem_size * i, o_ch_size,
oup->data + o_ch_mem_size * i, omp_out_buf,
*oup->dtype);
#endif
}
}
free(omp_out_buf);
if (!bias)
return oup;
else
oup = cc_fmap2d_bias(oup, bias, oup->name);
return oup;
}
21 changes: 21 additions & 0 deletions src/cc_dsc2d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef _CC_DSC2D_H_
#define _CC_DSC2D_H_

#ifdef __cplusplus
extern "C" {
#endif

/* Depth-wise convolution 2d */
cc_tensor_t *cc_dw_conv2d(cc_tensor_t *inp,
cc_tensor_t *kernel, cc_tensor_t *bias, cc_int32 s,
cc_int32 p, cc_int32 off, const char *name);

/* Point-wise convolution 2d */
cc_tensor_t *cc_pw_conv2d(cc_tensor_t *inp,
cc_tensor_t *kernel, cc_tensor_t *bias, const char *name);

#ifdef __cplusplus
}
#endif

#endif /* _CC_DSC2D_H_ */

0 comments on commit eb7dc1a

Please sign in to comment.