test(kernel): 测试 DynamicQuantizeLinear

Signed-off-by: YdrMaster <ydrml@hotmail.com>
InfiniTensor · Dec 18, 2023 · ee14c96 · ee14c96
1 parent d6b4952
commit ee14c96
Show file tree

Hide file tree

Showing 4 changed files with 98 additions and 3 deletions.
diff --git a/src/04kernel/src/kernels/dynamic_quantize_linear/cpu_kernel.cc b/src/04kernel/src/kernels/dynamic_quantize_linear/cpu_kernel.cc
@@ -22,6 +22,14 @@ namespace refactor::kernel {
         return "Performing dynamic quantize linear using CPU";
     }
 
+    template<class TO, class TI>
+    static TO saturate(TI x) {
+        constexpr static auto
+            QMIN = static_cast<TI>(std::numeric_limits<TO>::min()),
+            QMAX = static_cast<TI>(std::numeric_limits<TO>::max());
+        return static_cast<TO>(std::round(std::clamp(x, QMIN, QMAX)));
+    }
+
     auto K::lower(Resources &) const noexcept -> RoutineWorkspace {
         using namespace runtime;
         return [size = size](Resources &, void *, void const *const *inputs, void *const *outputs) {
@@ -49,13 +57,13 @@ namespace refactor::kernel {
                 });
             auto len = std::max(ZERO, max) - std::min(ZERO, min);
             auto scale = len / QLEN;
-            auto zp = static_cast<TO>(std::round(QMIN - min * QLEN / len));
+            auto zp = saturate<TO>(QMIN - min * QLEN / len);
 
             std::transform(
                 std::execution::par_unseq,
                 x, x + size,
                 reinterpret_cast<TO *>(outputs[0]),
-                [=](auto it) { return static_cast<TO>(std::round(it / scale) + zp); });
+                [=](auto it) { return saturate<TO>(std::round(it / scale) + zp); });
             *reinterpret_cast<TI *>(outputs[1]) = scale;
             *reinterpret_cast<TO *>(outputs[2]) = zp;
         };

diff --git a/src/04kernel/src/kernels/dynamic_quantize_linear/cuda_kernel.cu b/src/04kernel/src/kernels/dynamic_quantize_linear/cuda_kernel.cu
@@ -124,7 +124,7 @@ namespace refactor::kernel {
                     tempStorage, tempStorageSize_,
                     doubled, minmax, params.n,
                     QuantizeReduceMinMaxFunctor<TI>{},
-                    QuantizeMinMax<TI>{_MIN, _MAX});
+                    QuantizeMinMax<TI>{_MAX, _MIN});
 
                 kernel<<<params.gridSize, params.blockSize>>>(
                     params.n, minmax, x, y, scale, zp);

diff --git a/src/04kernel/test/kernels/dynamic_quantize_linear/test_cpu.cpp b/src/04kernel/test/kernels/dynamic_quantize_linear/test_cpu.cpp
@@ -0,0 +1,31 @@
+#include "../../../src/kernels/dynamic_quantize_linear/cpu_kernel.hh"
+#include <gtest/gtest.h>
+#include <numeric>
+
+using namespace refactor;
+using namespace kernel;
+
+TEST(kernel, DynamicQuantizeLinearCpu) {
+    // build routine
+    auto kernel = DynamicQuantizeLinearCpu::build(6);
+    ASSERT_TRUE(kernel);
+    auto res = runtime::Resources();
+    auto routine = kernel->lower(res).routine;
+    // put input data
+    std::vector<float> x{0, 2, -3, -2.5, 1.34, 0.5};
+    std::vector<uint8_t> y(x.size());
+    float scale;
+    uint8_t zeroPoint;
+    // inference
+    {
+        void const *inputs[]{x.data()};
+        void *outputs[]{y.data(), &scale, &zeroPoint};
+        routine(res, nullptr, inputs, outputs);
+    }
+    // check
+    EXPECT_FLOAT_EQ(scale, (2 + 3) / 255.f);
+    EXPECT_EQ(zeroPoint, 153);
+    for (auto i : range0_(y.size())) {
+        EXPECT_EQ(y[i], static_cast<uint8_t>(std::round(x[i] / scale) + zeroPoint));
+    }
+}
diff --git a/src/04kernel/test/kernels/dynamic_quantize_linear/test_cuda.cpp b/src/04kernel/test/kernels/dynamic_quantize_linear/test_cuda.cpp
@@ -0,0 +1,56 @@
+#ifdef USE_CUDA
+
+#include "../../../src/kernels/dynamic_quantize_linear/cpu_kernel.hh"
+#include "../../../src/kernels/dynamic_quantize_linear/cuda_kernel.hh"
+#include "hardware/device_manager.h"
+#include <gtest/gtest.h>
+
+using namespace refactor;
+using namespace kernel;
+using namespace hardware;
+
+TEST(kernel, DynamicQuantizeLinearCuda) {
+    auto size = 20;
+    // build routine
+    auto kernel = DynamicQuantizeLinearCuda::build(size),
+         kCpu = DynamicQuantizeLinearCpu::build(size);
+    ASSERT_TRUE(kernel && kCpu);
+    auto res = runtime::Resources();
+    auto [routine, workspaceSize] = kernel->lower(res);
+    auto rCpu = kCpu->lower(res).routine;
+    // malloc
+    auto &dev = *device::init(Device::Type::Nvidia, 0, "");
+    auto xGpu = dev.malloc(size * sizeof(float)),
+         yGpu = dev.malloc(size * sizeof(uint8_t)),
+         scaleGpu = dev.malloc(sizeof(float)),
+         zpGpu = dev.malloc(sizeof(uint8_t)),
+         workspace = dev.malloc(workspaceSize);
+    // put input data
+    std::vector<float> x(size);
+    std::vector<uint8_t> y(size);
+    float scale;
+    uint8_t zeroPoint;
+    for (auto i : range0_(size)) {
+        x[i] = i * 3 + 15;
+    }
+    xGpu->copyFromHost(x.data());
+    // inference
+    {
+        void const *inputs[]{*xGpu};
+        void *outputs[]{*yGpu, *scaleGpu, *zpGpu};
+        routine(res, *workspace, inputs, outputs);
+    }
+    {
+        void const *inputs[]{x.data()};
+        void *outputs[]{y.data(), &scale, &zeroPoint};
+        rCpu(res, nullptr, inputs, outputs);
+    }
+    // check
+    {
+        std::vector<uint8_t> result(size);
+        yGpu->copyToHost(result.data());
+        EXPECT_EQ(result, y);
+    }
+}
+
+#endif