Skip to content

Commit

Permalink
feat(kernel): 池化支持不对称 padding
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <ydrml@hotmail.com>
  • Loading branch information
YdrMaster committed Dec 4, 2023
1 parent 74f1ec4 commit 0e47987
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 89 deletions.
73 changes: 10 additions & 63 deletions src/04kernel/src/kernels/conv/cudnn_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,68 +1,14 @@
#include "../../utilities/cuda/cudnn_context.hh"
#include "../../utilities/cuda/cudnn_functions.h"
#include "../expand/cuda_kernel.hh"
#include "../extra_padding/extra_padding.cuh"
#include "cudnn_kernel.hh"
#include "hardware/functions.h"
#include <thrust/execution_policy.h>
#include <thrust/tabulate.h>

namespace refactor::kernel {
using namespace cudnn;
using namespace runtime;

struct ExtraPadding {
DataType dt;
int nc, sohw, sow, h, w, padH, padW;

static std::optional<ExtraPadding> build(DataType dt, int const *shape, int const *pads) {
if (pads[0] == pads[2] && pads[1] == pads[3]) {
return std::nullopt;
}
int padH = pads[0] - pads[2], padW = pads[1] - pads[3];
return ExtraPadding{
dt,
shape[0] * shape[1],
(shape[2] + std::abs(padH)) * (shape[3] + std::abs(padW)),
shape[3] + std::abs(padW),
shape[2],
shape[3],
padH,
padW};
}

size_t workspace() const {
return nc * sohw * dt.size();
}
};

template<class T>
struct ExtraPaddingFunctor {
ExtraPadding info;
void const *src;

__device__ T operator()(size_t i) const noexcept {
auto h = i / info.sow,
w = i % info.sow;
if (0 < info.padH) {
if (h < info.padH) {
return 0;
}
h -= info.padH;
} else if (h >= info.h) {
return 0;
}
if (0 < info.padW) {
if (w < info.padW) {
return 0;
}
w -= info.padW;
} else if (w >= info.w) {
return 0;
}
return reinterpret_cast<T const *>(src)[i / info.sohw * info.h * info.w + h * info.w + w];
}
};

auto ConvCudnn::lower(Resources &res) const -> RoutineWorkspace {
// RAII for closure
struct Descriptors {
Expand Down Expand Up @@ -152,14 +98,12 @@ namespace refactor::kernel {
// nvcc at c++11 doesn't support real move capture
auto routine = [d, workspaceSize](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
void const *x = inputs[0], *w = inputs[1];
if (d->extraPadding) {
auto extra = reinterpret_cast<uint8_t *>(workspace) + workspaceSize;
thrust::tabulate(thrust::device,
extra, extra + d->extraPadding->workspace(),
ExtraPaddingFunctor<float>{*d->extraPadding, x});
x = extra;
if (auto f = d->extraPadding; f) {
x = (*f)(x, reinterpret_cast<uint8_t *>(workspace) + workspaceSize);
}
if (auto f = d->biasExpand; f) {
(*f)(res, workspace, inputs + 2, outputs);
}
if (d->biasExpand) { (*(d->biasExpand))(res, workspace, inputs + 2, outputs); }
// build alpha/beta for double
union {
float f32[2];
Expand Down Expand Up @@ -187,7 +131,10 @@ namespace refactor::kernel {
beta,
d->y, outputs[0]));
};
return {std::move(routine), d->extraPadding ? workspaceSize + d->extraPadding->workspace() : workspaceSize};
return {
std::move(routine),
workspaceSize + (d->extraPadding ? d->extraPadding->workspace() : 0),
};
}

}// namespace refactor::kernel
50 changes: 50 additions & 0 deletions src/04kernel/src/kernels/extra_padding/extra_padding.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include "extra_padding.cuh"

namespace refactor::kernel {

std::optional<ExtraPadding>
ExtraPadding::build(DataType dt, int const *shape, int const *pads) {
if (pads[0] == pads[2] && pads[1] == pads[3]) {
return std::nullopt;
}
int padH = pads[0] - pads[2], padW = pads[1] - pads[3];
return ExtraPadding{
dt,
shape[0] * shape[1],
(shape[2] + std::abs(padH)) * (shape[3] + std::abs(padW)),
shape[3] + std::abs(padW),
shape[2],
shape[3],
padH,
padW};
}

size_t
ExtraPadding::workspace() const {
return nc * sohw * dt.size();
}


void const *
ExtraPadding::operator()(void const *src, void *workspace_) const {
auto extra = reinterpret_cast<uint8_t *>(workspace_);

#define CASE(T) \
case DataType::T: { \
using T_ = primitive<DataType::T>::type; \
thrust::tabulate(thrust::device, \
reinterpret_cast<T_ *>(extra), \
reinterpret_cast<T_ *>(extra + workspace()), \
ExtraPaddingFunctor<T_>{*this, src}); \
} break;

switch (dt) {
CASE(F32)
CASE(F64)
default:
UNREACHABLE();
}
return workspace_;
}

}// namespace refactor::kernel
52 changes: 52 additions & 0 deletions src/04kernel/src/kernels/extra_padding/extra_padding.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#ifndef KERNEL_EXTRA_PADDING_CUH
#define KERNEL_EXTRA_PADDING_CUH

#include "common.h"
#include <optional>
#include <thrust/execution_policy.h>
#include <thrust/tabulate.h>

namespace refactor::kernel {

struct ExtraPadding {
DataType dt;
int nc, sohw, sow, h, w, padH, padW;

static std::optional<ExtraPadding> build(DataType dt, int const *shape, int const *pads);

size_t workspace() const;

void const *operator()(void const *src, void *workspace) const;
};

template<class T>
struct ExtraPaddingFunctor {
ExtraPadding info;
void const *src;

__device__ T operator()(size_t i) const noexcept {
auto h = i / info.sow,
w = i % info.sow;
if (0 < info.padH) {
if (h < info.padH) {
return 0;
}
h -= info.padH;
} else if (h >= info.h) {
return 0;
}
if (0 < info.padW) {
if (w < info.padW) {
return 0;
}
w -= info.padW;
} else if (w >= info.w) {
return 0;
}
return reinterpret_cast<T const *>(src)[i / info.sohw * info.h * info.w + h * info.w + w];
}
};

}// namespace refactor::kernel

#endif// KERNEL_EXTRA_PADDING_CUH
10 changes: 3 additions & 7 deletions src/04kernel/src/kernels/pool/cudnn_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,12 @@ namespace refactor::kernel {
#endif

// TODO check data type
auto pb = poolAttributes.padsBegin(),
pe = poolAttributes.padsEnd(),
auto p = poolAttributes.pads(),
d = poolAttributes.dilations(),
s = poolAttributes.strides();
if (x.rank() != 4 ||
poolType == PoolType::Lp ||
d[0] != 1 ||
d[1] != 1 ||
pb[0] != pe[0] ||
pb[1] != pe[1]) {
d[0] != 1 || d[1] != 1) {
return nullptr;
}
return std::make_unique<K>(decltype(info){
Expand All @@ -49,7 +45,7 @@ namespace refactor::kernel {
static_cast<int>(kernelShape[0]),
static_cast<int>(kernelShape[1]),
},
{pb[0], pb[1]},
{p[0], p[1], p[2], p[3]},
{s[0], s[1]},
});
}
Expand Down
42 changes: 24 additions & 18 deletions src/04kernel/src/kernels/pool/cudnn_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "../../utilities/cuda/cudnn_context.hh"
#include "../../utilities/cuda/cudnn_functions.h"
#include "../extra_padding/extra_padding.cuh"
#include "cudnn_kernel.hh"

namespace refactor::kernel {
Expand All @@ -12,6 +13,7 @@ namespace refactor::kernel {
struct Descriptors {
cudnnTensorDescriptor_t x, y;
cudnnPoolingDescriptor_t pooling;
std::optional<ExtraPadding> extraPadding;

Descriptors() {
CUDNN_ASSERT(cudnnCreateTensorDescriptor(&x));
Expand All @@ -28,12 +30,17 @@ namespace refactor::kernel {
Descriptors(Descriptors &&) = delete;
};
auto d = std::make_shared<Descriptors>();

auto cudnnDataType = cudnnDataTypeConvert(info.dt);
auto xs = info.xShape;
auto ys = info.yShape;
CUDNN_ASSERT(cudnnSetTensor4dDescriptor(d->x, CUDNN_TENSOR_NCHW, cudnnDataType, xs[0], xs[1], xs[2], xs[3]));
CUDNN_ASSERT(cudnnSetTensor4dDescriptor(d->y, CUDNN_TENSOR_NCHW, cudnnDataType, ys[0], ys[1], ys[2], ys[3]));
d->extraPadding = ExtraPadding::build(info.dt, info.xShape, info.pads);
int const
xs[]{
info.xShape[0],
info.xShape[1],
info.xShape[2] + std::abs(info.pads[0] - info.pads[2]),
info.xShape[3] + std::abs(info.pads[1] - info.pads[3]),
},
*ys = info.yShape;
setCudnnTensor(d->x, info.dt, slice(xs, 4));
setCudnnTensor(d->y, info.dt, slice(ys, 4));

// clang-format off
auto mode = info.poolType == Ty::Average ? CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING
Expand All @@ -47,28 +54,27 @@ namespace refactor::kernel {
d->pooling,
mode, CUDNN_PROPAGATE_NAN,
kk[0], kk[1],
pp[0], pp[1],
std::min(pp[0], pp[2]), std::min(pp[1], pp[3]),
ss[0], ss[1]));

res.fetchOrStore<CudnnContext>();
// nvcc at c++11 doesn't support real move capture
return [d_ = std::move(d)](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
// fetch cudnn handle from resources
auto handle = res.fetchOrStore<CudnnContext>()->handle;
auto const &d = *d_;
// name inputs and outputs
auto x = inputs[0];
auto y = outputs[0];
auto routine = [d](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
void const *x = inputs[0];
if (auto f = d->extraPadding; f) {
x = (*f)(x, workspace);
}
// TODO? build alpha/beta for double
float alpha = 1, beta = 0;
CUDNN_ASSERT(cudnnPoolingForward(
handle,
d.pooling,
res.fetchOrStore<CudnnContext>()->handle,
d->pooling,
&alpha,
d.x, x,
d->x, x,
&beta,
d.y, y));
d->y, outputs[0]));
};
return {std::move(routine), d->extraPadding ? d->extraPadding->workspace() : 0};
}

}// namespace refactor::kernel
2 changes: 1 addition & 1 deletion src/04kernel/src/kernels/pool/cudnn_kernel.hh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace refactor::kernel {
int xShape[4],
yShape[4],
kernelShape[2],
pads[2],
pads[4],
strides[2];
} info;

Expand Down

0 comments on commit 0e47987

Please sign in to comment.