From 15633730bf7e636bda98a38a87a7daf6ed74093e Mon Sep 17 00:00:00 2001 From: i-evi Date: Sun, 4 Oct 2020 17:11:38 +0800 Subject: [PATCH] fix bug --- demo/lenet.lua | 2 +- src/cc_basic.c | 5 +-- src/cc_dsc2d.c | 26 ++++--------- src/cc_normfn.c | 4 +- util/lua2cc.lua | 97 ++++++++++++++++++++++--------------------------- 5 files changed, 55 insertions(+), 79 deletions(-) diff --git a/demo/lenet.lua b/demo/lenet.lua index 38a44b9..079ed19 100644 --- a/demo/lenet.lua +++ b/demo/lenet.lua @@ -12,7 +12,7 @@ network = { l4 = conv2d ({input = "l3", stride = 1, padding = 2}), l5 = relu ({input = "l4"}), l6 = maxPool2d ({input = "l5", stride = 2}), - l7 = reshape ({input = "l6", shape = {-1, 1}}), + l7 = reshape ({input = "l6", shape = {-1, 1, 1}}), l8 = fullyConnected ({input = "l7"}), l9 = relu ({input = "l8"}), l10 = fullyConnected ({input = "l9"}), diff --git a/src/cc_basic.c b/src/cc_basic.c index 88188f3..df319bf 100644 --- a/src/cc_basic.c +++ b/src/cc_basic.c @@ -72,10 +72,7 @@ void cc_tensor_shape_fix(cc_int32 *shape, cc_int32 elems) s *= v; sptr++; } - if (s != elems) { -#ifdef ENABLE_CC_ASSERT - cc_assert(f); -#endif + if (s != elems || f) { shape[i] = elems / s; } } diff --git a/src/cc_dsc2d.c b/src/cc_dsc2d.c index 946fb71..101d329 100644 --- a/src/cc_dsc2d.c +++ b/src/cc_dsc2d.c @@ -19,16 +19,14 @@ #include "global_fn_cfg.h" extern fn_conv2d _conv2d; extern fn_array_add_ew _array_add_ew; -extern fn_array_mul_by _array_add_by; +extern fn_array_mul_by _array_mul_by; cc_tensor_t *cc_dw_conv2d(cc_tensor_t *inp, const cc_tensor_t *kernel, const cc_tensor_t *bias, cc_int32 s, cc_int32 p, cc_int32 off, const char *name) { - cc_uint8 *omp_out_buf = NULL; cc_tensor_t *inp_pad, *oup = NULL; - cc_int32 o_ch_size, p_ch_mem_size, o_ch_mem_size, - k_ch_mem_size, k_mem_size, num_omp_threads, i; + cc_int32 o_ch_size, p_ch_mem_size, o_ch_mem_size, k_ch_mem_size, i; cc_int32 shape[CC_CNN2D_SHAPE] = {0}; char pad_name[CC_CONV2D_PAD_NAME_LEN]; #ifdef ENABLE_CC_ASSERT @@ -36,7 +34,7 @@ cc_tensor_t *cc_dw_conv2d(cc_tensor_t *inp, cc_assert_zero(cc_tensor_dimension(kernel) - CC_CONV2D_KERNEL_DIM); cc_assert_zero(*inp->dtype - *kernel->dtype); cc_assert_zero(inp->shape[CC_CNN2D_SHAPE_C] - - kernel->shape[CC_CONV2D_KERNEL_I]); + - kernel->shape[CC_CONV2D_KERNEL_O]); #endif if (p) { sprintf(pad_name, "%s%s", @@ -49,7 +47,7 @@ cc_tensor_t *cc_dw_conv2d(cc_tensor_t *inp, oup = cc_tsrmgr_get(name); #endif if (!oup) { - shape[CC_CNN2D_SHAPE_C] = kernel->shape[CC_CONV2D_KERNEL_I]; + shape[CC_CNN2D_SHAPE_C] = kernel->shape[CC_CONV2D_KERNEL_O]; shape[CC_CNN2D_SHAPE_H] = cc_conv2d_shape_calc( inp->shape[CC_CNN2D_SHAPE_H], kernel->shape[CC_CONV2D_KERNEL_H], s, p); @@ -67,13 +65,6 @@ cc_tensor_t *cc_dw_conv2d(cc_tensor_t *inp, k_ch_mem_size = kernel->shape[CC_CONV2D_KERNEL_W] * kernel->shape[CC_CONV2D_KERNEL_H] * cc_dtype_size(*kernel->dtype); - k_mem_size = k_ch_mem_size * kernel->shape[CC_CONV2D_KERNEL_I]; - num_omp_threads = 1; -#ifdef ENABLE_OPENMP - num_omp_threads = omp_get_max_threads(); -#endif - cc_assert_alloc(omp_out_buf = - (cc_uint8*)malloc(o_ch_mem_size * num_omp_threads)); #ifdef AUTO_TSRMGR memset(oup->data, 0, list_getlen(oup->container, CC_TENSOR_DATA)); @@ -81,18 +72,17 @@ cc_tensor_t *cc_dw_conv2d(cc_tensor_t *inp, #ifdef ENABLE_OPENMP #pragma omp parallel for private(i) #endif - for (i = 0; i < kernel->shape[CC_CONV2D_KERNEL_I]; ++i) { + for (i = 0; i < kernel->shape[CC_CONV2D_KERNEL_O]; ++i) { _conv2d((inp_pad->data + i * p_ch_mem_size), oup->data + i * o_ch_mem_size, inp_pad->shape[CC_CNN2D_SHAPE_W], inp_pad->shape[CC_CNN2D_SHAPE_H], oup->shape[CC_CNN2D_SHAPE_W], oup->shape[CC_CNN2D_SHAPE_H], s, s, - kernel->data + (k_mem_size * i), + kernel->data + (k_ch_mem_size * i), kernel->shape[CC_CONV2D_KERNEL_W], *kernel->dtype); } - free(omp_out_buf); if (!bias){ #ifndef AUTO_TSRMGR if (p) @@ -157,7 +147,7 @@ cc_tensor_t *cc_pw_conv2d(cc_tensor_t *inp, const cc_tensor_t *kernel, for (j = 0; j < kernel->shape[CC_CONV2D_KERNEL_I]; ++j) { #ifdef ENABLE_OPENMP - _array_add_by( + _array_mul_by( omp_out_buf + omp_get_thread_num() * o_ch_mem_size, o_ch_size, inp->data + o_ch_mem_size * j, kernel->data + k_mem_size * i + k_ch_mem_size * j, @@ -167,7 +157,7 @@ cc_tensor_t *cc_pw_conv2d(cc_tensor_t *inp, const cc_tensor_t *kernel, omp_out_buf + omp_get_thread_num() * o_ch_mem_size, *oup->dtype); #else - _array_add_by(omp_out_buf, o_ch_size, + _array_mul_by(omp_out_buf, o_ch_size, inp->data + o_ch_mem_size * j, kernel->data + k_mem_size * i + k_ch_mem_size * j, *oup->dtype); diff --git a/src/cc_normfn.c b/src/cc_normfn.c index fede905..337e219 100644 --- a/src/cc_normfn.c +++ b/src/cc_normfn.c @@ -64,8 +64,8 @@ cc_tensor_t *cc_batch_norm2d(cc_tensor_t *inp, ch_size = inp->shape[CC_CNN2D_SHAPE_H] * inp->shape[CC_CNN2D_SHAPE_W]; ch_mem_size = ch_size * dt_size; - for (i = 0; i < inp->shape[CC_CNN2D_SHAPE_C]; ++i) { - _batch_norm(inp->data + ch_mem_size * i, ch_size, + for (i = 0; i < oup->shape[CC_CNN2D_SHAPE_C]; ++i) { + _batch_norm(oup->data + ch_mem_size * i, ch_size, para->data + CC_NORM_PARAMETERS * dt_size * i, *para->dtype); } diff --git a/util/lua2cc.lua b/util/lua2cc.lua index 81b6328..37a22ac 100644 --- a/util/lua2cc.lua +++ b/util/lua2cc.lua @@ -1,4 +1,4 @@ -local parameterLv = 0 -- PKG: Begin at 0 +local parameterLv = 0 -- PKG: Begin at 0 local shapeCounter = 0 local layerCounter = 1 -- MUST Begin at 1 local parameterCnt = 1 -- MUST Begin at 1 @@ -70,8 +70,8 @@ dwConv2d = function(args) if type(scope) == "string" then name = string.format("%s/%s", scope, name) end - parals[info.paraId + 0] = string.format("%03d_w", info.paraLv) - parals[info.paraId + 1] = string.format("%03d_b", info.paraLv) + parals[info.paraId + 0] = string.format("%03d.w", info.paraLv) + parals[info.paraId + 1] = string.format("%03d.b", info.paraLv) layerOutputs[ret.layerId] = output if info.input == nil then if info.layerId - 1 < 1 then @@ -80,7 +80,7 @@ dwConv2d = function(args) info.input = string.format("@%d", info.layerId - 1) end return string.format( - "%s = cc_conv2d(%s, __pls[%d], __pls[%d], %d, %d, %d, \"%s\");", + "%s = cc_dw_conv2d(%s, __pls[%d], __pls[%d], %d, %d, %d, \"%s\");", output, info.input, info.paraId - 1, info.paraId, info.stride, info.padding, info.offset, name) end @@ -100,8 +100,8 @@ pwConv2d = function(args) if type(scope) == "string" then name = string.format("%s/%s", scope, name) end - parals[info.paraId + 0] = string.format("%03d_w", info.paraLv) - parals[info.paraId + 1] = string.format("%03d_b", info.paraLv) + parals[info.paraId + 0] = string.format("%03d.w", info.paraLv) + parals[info.paraId + 1] = string.format("%03d.b", info.paraLv) layerOutputs[ret.layerId] = output if info.input == nil then if info.layerId - 1 < 1 then @@ -129,8 +129,8 @@ fullyConnected = function(args) if type(scope) == "string" then name = string.format("%s/%s", scope, name) end - parals[info.paraId + 0] = string.format("%03d_w", info.paraLv) - parals[info.paraId + 1] = string.format("%03d_b", info.paraLv) + parals[info.paraId + 0] = string.format("%03d.w", info.paraLv) + parals[info.paraId + 1] = string.format("%03d.b", info.paraLv) layerOutputs[ret.layerId] = output if info.input == nil then if info.layerId - 1 < 1 then @@ -243,7 +243,7 @@ batchNorm2d = function(args) if type(scope) == "string" then name = string.format("%s/%s", scope, name) end - parals[info.paraId] = string.format("%03d_n", info.paraLv) + parals[info.paraId] = string.format("%03d.n", info.paraLv) layerOutputs[ret.layerId] = output if info.input == nil then if info.layerId - 1 < 1 then @@ -252,7 +252,7 @@ batchNorm2d = function(args) info.input = string.format("@%d", info.layerId - 1) end return string.format( - "%s = cc_batch_norm(%s, __pls[%d], \"%s\");", + "%s = cc_batch_norm2d(%s, __pls[%d], \"%s\");", output, info.input, info.paraId - 1, name) end return ret @@ -288,63 +288,50 @@ reshape = function(args) return ret end -local fprint = function(fp, ...) +local fputs = function(fp, ...) local args = { ... } - local flag = false for k, v in pairs(args) do - if not flag then - flag = true - else - fp:write('\t') - end fp:write(v) end - fp:write('\n') end local printLine = function(line, indent) if indent == nil then indent = 0 end + local lineLimit = 80 local indentOff = indent * 8 - local llen = #line + indentOff local indentStr = string.rep("\t", indent) - line = indentStr..line - if llen <= 80 then - fprint(_ctrlfp, line) - return - end - local prev = 0 - local curr = 0 + local csr = 1 + local brk = 0 + local pos = 0 + local nextword = "" + repeat - curr, _ = string.find(line, ',', prev + 1) - if curr == nil then - break + if pos == 0 then + pos = indentOff + fputs(_ctrlfp, indentStr) end - if (curr + indentOff) >= 80 then - local buf = string.sub(line, 1, prev) - fprint(_ctrlfp, buf) - line = string.sub(line, prev + 1) - if string.byte(line, 1) == 32 then - line = string.sub(line, 2) - end + brk, _ = string.find(line, ',', csr) + if brk ~= nil then + nextword = string.sub(line, csr, brk) + else + nextword = string.sub(line, csr) + end + csr = csr + #nextword + if pos + #nextword >= lineLimit then + fputs(_ctrlfp, '\n'); + pos = indentOff + fputs(_ctrlfp, indentStr) end - prev = curr - until (#line + indentOff) < 80 - if #line > 80 then - local looking = string.byte(',', 1) - for i = #line, 60, -1 do - if string.byte(line, i) == looking then - fprint(_ctrlfp, string.sub(line, 1, i)) - line = string.sub(line, i + 1) - break + if pos == indentOff then + local off, _ = string.find(nextword, ' ') + if off == 1 then + nextword = string.sub(nextword, 2) end end - end - curr, _ = string.find(line, ' ') - if curr == 1 then - line = string.sub(line, 2) - end - line = indentStr..line - fprint(_ctrlfp, line) + fputs(_ctrlfp, string.format("%s", nextword)) + pos = pos + #nextword + until csr >= #line + fputs(_ctrlfp, '\n') end local runningFlag = true @@ -418,7 +405,8 @@ ccCodeTranslator = function(net, cfg) "static cc_tensor_t *__pls[%d];", #paraxList), indentOff + 0) local layerDef = "cc_tensor_t " - for k, v in pairs(createTsr) do + for k = 1, #createTsr do + v = createTsr[k] layerDef = layerDef..string.format("*%s, ", v) end layerDef = string.sub(layerDef, 1, #layerDef - 2)..";" @@ -431,7 +419,8 @@ ccCodeTranslator = function(net, cfg) "__pls[i] = cc_tsrmgr_get(p_namels[i]);"), indentOff + 1) printLine("}", indentOff + 0) - for k, v in pairs(codeLines) do + for k = 1, #codeLines do + v = codeLines[k] v = string.gsub(v, "@%d*,", function(s) return string.format("%s,",