Skip to content

Commit

Permalink
test(kernel): 测试 DynamicQuantizeLinear
Browse files Browse the repository at this point in the history
Signed-off-by: YdrMaster <ydrml@hotmail.com>
  • Loading branch information
YdrMaster committed Dec 18, 2023
1 parent d6b4952 commit ee14c96
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 3 deletions.
12 changes: 10 additions & 2 deletions src/04kernel/src/kernels/dynamic_quantize_linear/cpu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ namespace refactor::kernel {
return "Performing dynamic quantize linear using CPU";
}

template<class TO, class TI>
static TO saturate(TI x) {
constexpr static auto
QMIN = static_cast<TI>(std::numeric_limits<TO>::min()),
QMAX = static_cast<TI>(std::numeric_limits<TO>::max());
return static_cast<TO>(std::round(std::clamp(x, QMIN, QMAX)));
}

auto K::lower(Resources &) const noexcept -> RoutineWorkspace {
using namespace runtime;
return [size = size](Resources &, void *, void const *const *inputs, void *const *outputs) {
Expand Down Expand Up @@ -49,13 +57,13 @@ namespace refactor::kernel {
});
auto len = std::max(ZERO, max) - std::min(ZERO, min);
auto scale = len / QLEN;
auto zp = static_cast<TO>(std::round(QMIN - min * QLEN / len));
auto zp = saturate<TO>(QMIN - min * QLEN / len);

std::transform(
std::execution::par_unseq,
x, x + size,
reinterpret_cast<TO *>(outputs[0]),
[=](auto it) { return static_cast<TO>(std::round(it / scale) + zp); });
[=](auto it) { return saturate<TO>(std::round(it / scale) + zp); });
*reinterpret_cast<TI *>(outputs[1]) = scale;
*reinterpret_cast<TO *>(outputs[2]) = zp;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ namespace refactor::kernel {
tempStorage, tempStorageSize_,
doubled, minmax, params.n,
QuantizeReduceMinMaxFunctor<TI>{},
QuantizeMinMax<TI>{_MIN, _MAX});
QuantizeMinMax<TI>{_MAX, _MIN});

kernel<<<params.gridSize, params.blockSize>>>(
params.n, minmax, x, y, scale, zp);
Expand Down
31 changes: 31 additions & 0 deletions src/04kernel/test/kernels/dynamic_quantize_linear/test_cpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "../../../src/kernels/dynamic_quantize_linear/cpu_kernel.hh"
#include <gtest/gtest.h>
#include <numeric>

using namespace refactor;
using namespace kernel;

TEST(kernel, DynamicQuantizeLinearCpu) {
// build routine
auto kernel = DynamicQuantizeLinearCpu::build(6);
ASSERT_TRUE(kernel);
auto res = runtime::Resources();
auto routine = kernel->lower(res).routine;
// put input data
std::vector<float> x{0, 2, -3, -2.5, 1.34, 0.5};
std::vector<uint8_t> y(x.size());
float scale;
uint8_t zeroPoint;
// inference
{
void const *inputs[]{x.data()};
void *outputs[]{y.data(), &scale, &zeroPoint};
routine(res, nullptr, inputs, outputs);
}
// check
EXPECT_FLOAT_EQ(scale, (2 + 3) / 255.f);
EXPECT_EQ(zeroPoint, 153);
for (auto i : range0_(y.size())) {
EXPECT_EQ(y[i], static_cast<uint8_t>(std::round(x[i] / scale) + zeroPoint));
}
}
56 changes: 56 additions & 0 deletions src/04kernel/test/kernels/dynamic_quantize_linear/test_cuda.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifdef USE_CUDA

#include "../../../src/kernels/dynamic_quantize_linear/cpu_kernel.hh"
#include "../../../src/kernels/dynamic_quantize_linear/cuda_kernel.hh"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>

using namespace refactor;
using namespace kernel;
using namespace hardware;

TEST(kernel, DynamicQuantizeLinearCuda) {
auto size = 20;
// build routine
auto kernel = DynamicQuantizeLinearCuda::build(size),
kCpu = DynamicQuantizeLinearCpu::build(size);
ASSERT_TRUE(kernel && kCpu);
auto res = runtime::Resources();
auto [routine, workspaceSize] = kernel->lower(res);
auto rCpu = kCpu->lower(res).routine;
// malloc
auto &dev = *device::init(Device::Type::Nvidia, 0, "");
auto xGpu = dev.malloc(size * sizeof(float)),
yGpu = dev.malloc(size * sizeof(uint8_t)),
scaleGpu = dev.malloc(sizeof(float)),
zpGpu = dev.malloc(sizeof(uint8_t)),
workspace = dev.malloc(workspaceSize);
// put input data
std::vector<float> x(size);
std::vector<uint8_t> y(size);
float scale;
uint8_t zeroPoint;
for (auto i : range0_(size)) {
x[i] = i * 3 + 15;
}
xGpu->copyFromHost(x.data());
// inference
{
void const *inputs[]{*xGpu};
void *outputs[]{*yGpu, *scaleGpu, *zpGpu};
routine(res, *workspace, inputs, outputs);
}
{
void const *inputs[]{x.data()};
void *outputs[]{y.data(), &scale, &zeroPoint};
rCpu(res, nullptr, inputs, outputs);
}
// check
{
std::vector<uint8_t> result(size);
yGpu->copyToHost(result.data());
EXPECT_EQ(result, y);
}
}

#endif

0 comments on commit ee14c96

Please sign in to comment.