diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/grid_sample_opt_bilinear.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/grid_sample_opt_bilinear.cl new file mode 100644 index 00000000000000..710d443266ee2e --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/grid_sample_opt_bilinear.cl @@ -0,0 +1,246 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +typedef __global INPUT0_TYPE data_t; +typedef __global INPUT1_TYPE grid_t; +typedef __global OUTPUT_TYPE output_t; + +typedef INPUT0_TYPE data_et; +typedef float grid_et; +typedef OUTPUT_TYPE output_et; + +inline const data_et FUNC( + get_data_single_value)(const data_t* buffer, const size_t n, const size_t c, const size_t h, const size_t w) { + const size_t idx = INPUT0_GET_INDEX(n, c, h, w); + return buffer[idx]; +} +#define get_data_single_value FUNC_CALL(get_data_single_value) + +inline const grid_et FUNC( + get_grid_single_value)(const grid_t* buffer, const size_t n, const size_t c, const size_t h, const size_t w) { + const size_t idx = INPUT1_GET_INDEX(n, h, w, c); + return buffer[idx]; +} +#define get_grid_single_value FUNC_CALL(get_grid_single_value) + +inline void FUNC(set_output_single_value)(const output_et value, + output_t* buffer, + const size_t n, + const size_t c, + const size_t h, + const size_t w) { + const size_t idx = OUTPUT_GET_INDEX(n, c, h, w); + buffer[idx] = value; +} +#define set_output_single_value FUNC_CALL(set_output_single_value) + +#if defined(ALIGN_CORNERS) +#define rescale_align FUNC(denormalize) +inline grid_et rescale_align(const grid_et value, const size_t range) { + return (value + 1) * ((grid_et)(range)-1) / 2; +} +#else +#define rescale_noalign FUNC(denormalize) +inline grid_et rescale_noalign(const grid_et value, const size_t range) { + return ((value + 1) * (grid_et)(range)-1) / 2; +} +#endif +#define denormalize FUNC_CALL(denormalize) + +#if defined(PADDING_MODE_ZEROS) +#define zeros_padding FUNC(get_padded) +inline data_et zeros_padding(const data_t* data, const size_t n, const size_t c, const long y_d, const long x_d) { + const long H = convert_long(INPUT0_SIZE_Y); + const long W = convert_long(INPUT0_SIZE_X); + if (y_d < 0 || x_d < 0 || y_d >= H || x_d >= W) { + return 0; + } else { + const size_t y = (size_t)(y_d); + const size_t x = (size_t)(x_d); + return get_data_single_value(data, n, c, y, x); + } +} +#undef zeros_padding + +#elif defined(PADDING_MODE_BORDER) +#define border_padding FUNC(get_padded) +inline data_et border_padding(const data_t* data, const size_t n, const size_t c, const long y_d, const long x_d) { + const long H = INPUT0_SIZE_Y; + const long W = INPUT0_SIZE_X; + const size_t y = (size_t)(clamp(y_d, 0l, H - 1)); + const size_t x = (size_t)(clamp(x_d, 0l, W - 1)); + return get_data_single_value(data, n, c, y, x); +} +#undef border_padding + +#elif defined(PADDING_MODE_REFLECTION) + +#if defined(ALIGN_CORNERS) +#define reflection_data_with_align FUNC(get_padded) +inline data_et reflection_data_with_align(const data_t* data, const size_t n, const size_t c, long y_d, long x_d) { + const long H = convert_long(INPUT0_SIZE_Y); + const long W = convert_long(INPUT0_SIZE_X); + const long H_2_2 = H == 1 ? 1 : 2 * (H - 1); + const long W_2_2 = W == 1 ? 1 : 2 * (W - 1); + y_d = abs(y_d) % H_2_2; + x_d = abs(x_d) % W_2_2; + const size_t y = (size_t)(y_d >= H ? H_2_2 - y_d : y_d); + const size_t x = (size_t)(x_d >= W ? W_2_2 - x_d : x_d); + return get_data_single_value(data, n, c, y, x); +} +#undef reflection_data_with_align + +#else +#define reflection_data_no_align FUNC(get_padded) +inline data_et reflection_data_no_align(const data_t* data, const size_t n, const size_t c, long y_d, long x_d) { + const long H = convert_long(INPUT0_SIZE_Y); + const long W = convert_long(INPUT0_SIZE_X); + const long H_2 = convert_long(INPUT0_SIZE_Y) * 2l; + const long W_2 = convert_long(INPUT0_SIZE_X) * 2l; + y_d = (y_d % H_2 + H_2) % H_2; + x_d = (x_d % W_2 + W_2) % W_2; + const size_t y = (size_t)(y_d >= H ? H_2 - 1 - y_d : y_d); + const size_t x = (size_t)(x_d >= W ? W_2 - 1 - x_d : x_d); + return get_data_single_value(data, n, c, y, x); +} +#undef reflection_data_no_align +#endif +#else +#error [clDNN grid_sample_ref.cl]: undefined padding mode +#endif + +#define get_padded FUNC_CALL(get_padded) + +#if defined(INTERPOLATION_MODE_BILINEAR) +#define bilinear FUNC(interpolate) +inline data_et bilinear(const data_t* data, const size_t n, const size_t c, const grid_et y_n, const grid_et x_n) { + const grid_et y_d = denormalize(y_n, INPUT0_SIZE_Y); + const grid_et x_d = denormalize(x_n, INPUT0_SIZE_X); + const grid_et y_topleft = floor(y_d); + const grid_et x_topleft = floor(x_d); + const grid_et dy = y_d - y_topleft; + const grid_et dx = x_d - x_topleft; + const data_et v00 = get_padded(data, n, c, y_topleft, x_topleft); + const data_et v01 = get_padded(data, n, c, y_topleft, x_topleft + 1); + const data_et v10 = get_padded(data, n, c, y_topleft + 1, x_topleft); + const data_et v11 = get_padded(data, n, c, y_topleft + 1, x_topleft + 1); + + const data_et q0 = (1 - dx) * v00 + dx * v01; + const data_et q1 = (1 - dx) * v10 + dx * v11; + return dy * q1 + (1 - dy) * q0; +} +#undef bilinear + +#elif defined(INTERPOLATION_MODE_NEAREST) +#define nearest FUNC(interpolate) +inline data_et nearest(const data_t* data, const size_t n, const size_t c, const grid_et y_n, const grid_et x_n) { + const grid_et y_nearest = rint(denormalize(y_n, INPUT0_SIZE_Y)); + const grid_et x_nearest = rint(denormalize(x_n, INPUT0_SIZE_X)); + return get_padded(data, n, c, y_nearest, x_nearest); +} +#undef nearest + +#elif defined(INTERPOLATION_MODE_BICUBIC) + +typedef MAKE_VECTOR_TYPE(INPUT1_TYPE, 4) vector_grid_4_t; +typedef MAKE_VECTOR_TYPE(INPUT0_TYPE, 4) vector_data_4_t; +typedef MAKE_VECTOR_TYPE(INPUT0_TYPE, 16) matrix_data_4x4_t; + +inline vector_grid_4_t FUNC(cubic_coeffs)(const data_et r, const data_et A) { + vector_grid_4_t v; + v[0] = ((A * (r + 1) - 5 * A) * (r + 1) + 8 * A) * (r + 1) - 4 * A; + v[1] = ((A + 2) * r - (A + 3)) * r * r + 1; + v[2] = ((A + 2) * (1 - r) - (A + 3)) * (1 - r) * (1 - r) + 1; + v[3] = ((A * (2 - r) - 5 * A) * (2 - r) + 8 * A) * (2 - r) - 4 * A; + return v; +} +#define cubic_coeffs FUNC_CALL(cubic_coeffs) + +inline matrix_data_4x4_t FUNC( + gather_4x4)(const data_t* data, const size_t n, const size_t c, const long y_topleft, const long x_topleft) { + matrix_data_4x4_t s; + for (int j = 0; j < 4; ++j) { + for (int i = 0; i < 4; ++i) { + s[j * 4 + i] = get_padded(data, n, c, y_topleft + j, x_topleft + i); + } + } + return s; +} +#define gather_4x4 FUNC_CALL(gather_4x4) + +inline data_et FUNC(inner_product_v4_v4)(const vector_grid_4_t v1, const vector_data_4_t v2) { + return v1[0] * v2[0] + v1[1] * v2[1] + v1[2] * v2[2] + v1[3] * v2[3]; +} +#define inner_product_v4_v4 FUNC_CALL(inner_product_v4_v4) + +inline vector_data_4_t FUNC(inner_product_m4_v4)(const matrix_data_4x4_t m, const vector_grid_4_t v) { + vector_data_4_t p = { + m[0 * 4 + 0] * v[0] + m[0 * 4 + 1] * v[1] + m[0 * 4 + 2] * v[2] + m[0 * 4 + 3] * v[3], + m[1 * 4 + 0] * v[0] + m[1 * 4 + 1] * v[1] + m[1 * 4 + 2] * v[2] + m[1 * 4 + 3] * v[3], + m[2 * 4 + 0] * v[0] + m[2 * 4 + 1] * v[1] + m[2 * 4 + 2] * v[2] + m[2 * 4 + 3] * v[3], + m[3 * 4 + 0] * v[0] + m[3 * 4 + 1] * v[1] + m[3 * 4 + 2] * v[2] + m[3 * 4 + 3] * v[3], + }; + + return p; +} +#define inner_product_m4_v4 FUNC_CALL(inner_product_m4_v4) + +#define bicubic FUNC(interpolate) +inline data_et bicubic(const data_t* data, const size_t n, const size_t c, const grid_et y_n, const grid_et x_n) { + const grid_et y_d = denormalize(y_n, INPUT0_SIZE_Y); + const grid_et x_d = denormalize(x_n, INPUT0_SIZE_X); + const grid_et y_topleft = floor(y_d); + const grid_et x_topleft = floor(x_d); + const grid_et dy = y_d - y_topleft; + const grid_et dx = x_d - x_topleft; + matrix_data_4x4_t s = gather_4x4(data, n, c, y_topleft - 1, x_topleft - 1); + + vector_grid_4_t cy = cubic_coeffs(dy, -0.75); + vector_grid_4_t cx = cubic_coeffs(dx, -0.75); + vector_data_4_t p; + p = inner_product_m4_v4(s, cx); + return inner_product_v4_v4(cy, p); +} +#undef bicubic +#undef inner_product_m4_v4 +#undef inner_product_v4_v4 +#undef cubic_coeffs +#undef gather_4x4 +#else +#error[clDNN grid_sample_ref.cl]: undefined interpolation mode +#endif + +#define interpolate FUNC_CALL(interpolate) + +KERNEL(grid_sample_ref)(const data_t * data, + const grid_t * grid, + output_t * output + #if HAS_FUSED_OPS_DECLS + , FUSED_OPS_DECLS + #endif + ) { +#if INPUT0_BATCH_NUM != INPUT1_BATCH_NUM +#error [clDNN grid_sample_ref.cl]: the batch dimension in the input data tensor's shape doesn't match the batch dimension in the grid tensor's shape +#endif + +#if INPUT1_SIZE_X != 2 +#error [clDNN grid_sample_ref.cl]: wrong dimension of grid tensor's +#endif + + const uint nc = get_global_id(0); + const uint n = nc % OUTPUT_BATCH_NUM; + const uint c = nc / OUTPUT_BATCH_NUM; + const uint h = get_global_id(1); + const uint w = get_global_id(2); + + const grid_et y_n = get_grid_single_value(grid, n, 1, h, w); + const grid_et x_n = get_grid_single_value(grid, n, 0, h, w); + + const output_et out = interpolate(data, n, c, y_n, x_n); + + set_output_single_value(out, output, n, c, h, w); +} +#undef interpolate +#undef get_padded +#undef denormalize diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_base.cpp new file mode 100644 index 00000000000000..abdf3903bcd2e6 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_base.cpp @@ -0,0 +1,110 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "grid_sample_kernel_base.hpp" + +#include "kernel_selector_utils.h" + +namespace kernel_selector { + +KernelsData GridSampleKernelBase::GetKernelsData(const Params& params) const { + if (!Validate(params)) { + return {}; + } + + auto kernel_data = KernelData::Default(params); + const auto& kernel_params = dynamic_cast(*kernel_data.params); + const auto dispatch_data = CalcDispatch(kernel_params); + const auto entry_point = GetEntryPoint(kernelName, kernel_params.layerID, params); + const auto jit_constants = GetJitConstants(kernel_params); + const auto jit = CreateJit(kernelName, jit_constants, entry_point); + auto& kernel = kernel_data.kernels.front(); + + FillCLKernelData(kernel, dispatch_data, params.engineInfo, kernelName, jit, entry_point, {}, false, false, 2); + + return {kernel_data}; +} + +ParamsKey GridSampleKernelBase::GetSupportedKey() const { + ParamsKey key; + key.EnableAllInputDataType(); + key.EnableAllOutputDataType(); + key.EnableDifferentTypes(); + key.EnableInputLayout(DataLayout::bfyx); + key.EnableOutputLayout(DataLayout::bfyx); + key.EnableInputLayout(DataLayout::b_fs_yx_fsv32); + key.EnableOutputLayout(DataLayout::b_fs_yx_fsv32); + key.EnableInputLayout(DataLayout::b_fs_yx_fsv16); + key.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); + key.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16); + key.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16); + key.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv32); + key.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv32); + key.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv16); + key.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv16); + key.EnableTensorOffset(); + key.EnableTensorPitches(); + key.EnableBatching(); + return key; +} + +bool GridSampleKernelBase::Validate(const Params& params) const { + if (params.GetType() != KernelType::GRID_SAMPLE) { + return false; + } + + const auto& kernel_params = dynamic_cast(params); + if (kernel_params.inputs.size() != 2) { + return false; + } + + return true; +} + +JitConstants GridSampleKernelBase::GetJitConstants(const grid_sample_params& kernel_params) const { + auto jit_constants = MakeBaseParamsJitConstants(kernel_params); + + jit_constants.AddConstants({ + MakeJitConstant("INTERPOLATION_MODE_" + ov::as_string(kernel_params.interpolation_mode), true), + MakeJitConstant("PADDING_MODE_" + ov::as_string(kernel_params.padding_mode), true), + }); + + if (kernel_params.align_corners) { + jit_constants.AddConstant(MakeJitConstant("ALIGN_CORNERS", true)); + } + + return jit_constants; +} + +} // namespace kernel_selector + +namespace ov { + +template <> +ov::EnumNames& ::ov::EnumNames< + kernel_selector::grid_sample_params::InterpolationMode>::get() { + static auto enum_names = EnumNames( + "kernel_selector::grid_sample_params::InterpolationMode", + { + {"BILINEAR", kernel_selector::grid_sample_params::InterpolationMode::BILINEAR}, + {"BICUBIC", kernel_selector::grid_sample_params::InterpolationMode::BICUBIC}, + {"NEAREST", kernel_selector::grid_sample_params::InterpolationMode::NEAREST}, + }); + return enum_names; +} + +template <> +ov::EnumNames& ::ov::EnumNames< + kernel_selector::grid_sample_params::PaddingMode>::get() { + static auto enum_names = EnumNames( + "kernel_selector::grid_sample_params::PaddingMode", + { + {"ZEROS", kernel_selector::grid_sample_params::PaddingMode::ZEROS}, + {"BORDER", kernel_selector::grid_sample_params::PaddingMode::BORDER}, + {"REFLECTION", kernel_selector::grid_sample_params::PaddingMode::REFLECTION}, + }); + return enum_names; +} + +} // namespace ov diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_base.hpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_base.hpp new file mode 100644 index 00000000000000..5e4a616d53e228 --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_base.hpp @@ -0,0 +1,46 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "kernel_base_opencl.h" + +namespace kernel_selector { + +/** + * GridSample reference kernel parameters. + */ +struct grid_sample_params : public base_params { + grid_sample_params() : base_params(KernelType::GRID_SAMPLE) {} + bool align_corners = false; + enum class InterpolationMode { + BILINEAR, + BICUBIC, + NEAREST, + } interpolation_mode = InterpolationMode::BILINEAR; + enum class PaddingMode { + ZEROS, + BORDER, + REFLECTION, + } padding_mode = PaddingMode::ZEROS; +}; + +/** + * GridSampleKernelBase. + */ +class GridSampleKernelBase : public KernelBaseOpenCL { +public: + using KernelBaseOpenCL::KernelBaseOpenCL; + + KernelsData GetKernelsData(const Params& params) const override; + ParamsKey GetSupportedKey() const override; + +protected: + virtual CommonDispatchData CalcDispatch(const grid_sample_params& kernel_params) const = 0; + virtual JitConstants GetJitConstants(const grid_sample_params& kernel_params) const; + + bool Validate(const Params& params) const override; +}; + +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_opt_bilinear.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_opt_bilinear.cpp new file mode 100644 index 00000000000000..69c620dff8905a --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_opt_bilinear.cpp @@ -0,0 +1,25 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "grid_sample_kernel_opt_bilinear.hpp" + +#include "kernel_selector_utils.h" + +namespace kernel_selector { + +CommonDispatchData GridSampleKernelOptBilinear::CalcDispatch(const grid_sample_params& kernel_params) const { + CommonDispatchData dispatch_data; + const auto& output = kernel_params.outputs.front(); + + dispatch_data.gws = {output.Batch().v * output.Feature().v, output.Y().v, output.X().v}; + dispatch_data.lws = GetOptimalLocalWorkGroupSizes(dispatch_data.gws, kernel_params.engineInfo); + + return dispatch_data; +} + +KernelsPriority GridSampleKernelOptBilinear::GetKernelsPriority(const Params& /*params*/) const { + return FORCE_PRIORITY_8; +} + +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_opt_bilinear.hpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_opt_bilinear.hpp new file mode 100644 index 00000000000000..be9d38fd1828ee --- /dev/null +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_opt_bilinear.hpp @@ -0,0 +1,20 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "grid_sample_kernel_base.hpp" + +namespace kernel_selector { + +class GridSampleKernelOptBilinear : public GridSampleKernelBase { +public: + GridSampleKernelOptBilinear() : GridSampleKernelBase("grid_sample_opt_bilinear") {} + +protected: + CommonDispatchData CalcDispatch(const grid_sample_params& kernel_params) const override; + KernelsPriority GetKernelsPriority(const Params& /*params*/) const override; +}; + +} // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_ref.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_ref.cpp index 533ab13ba9a8f8..d22c3b8b51daf1 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_ref.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_ref.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -8,9 +8,7 @@ namespace kernel_selector { -namespace { - -CommonDispatchData SetDefault(const grid_sample_params& kernel_params) { +CommonDispatchData GridSampleKernelRef::CalcDispatch(const grid_sample_params& kernel_params) const { CommonDispatchData dispatch_data; const auto& output = kernel_params.outputs.front(); @@ -20,105 +18,4 @@ CommonDispatchData SetDefault(const grid_sample_params& kernel_params) { return dispatch_data; } -} // namespace - -KernelsData GridSampleKernelRef::GetKernelsData(const Params& params) const { - if (!Validate(params)) { - return {}; - } - - auto kernel_data = KernelData::Default(params); - const auto& kernel_params = dynamic_cast(*kernel_data.params); - const auto dispatch_data = SetDefault(kernel_params); - const auto entry_point = GetEntryPoint(kernelName, kernel_params.layerID, params); - const auto jit_constants = GetJitConstants(kernel_params); - const auto jit = CreateJit(kernelName, jit_constants, entry_point); - auto& kernel = kernel_data.kernels.front(); - - FillCLKernelData(kernel, dispatch_data, params.engineInfo, kernelName, jit, entry_point, {}, false, false, 2); - - return {kernel_data}; -} - -ParamsKey GridSampleKernelRef::GetSupportedKey() const { - ParamsKey key; - key.EnableAllInputDataType(); - key.EnableAllOutputDataType(); - key.EnableDifferentTypes(); - key.EnableInputLayout(DataLayout::bfyx); - key.EnableOutputLayout(DataLayout::bfyx); - key.EnableInputLayout(DataLayout::b_fs_yx_fsv32); - key.EnableOutputLayout(DataLayout::b_fs_yx_fsv32); - key.EnableInputLayout(DataLayout::b_fs_yx_fsv16); - key.EnableOutputLayout(DataLayout::b_fs_yx_fsv16); - key.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16); - key.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16); - key.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv32); - key.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv32); - key.EnableInputLayout(DataLayout::bs_fs_yx_bsv32_fsv16); - key.EnableOutputLayout(DataLayout::bs_fs_yx_bsv32_fsv16); - key.EnableTensorOffset(); - key.EnableTensorPitches(); - key.EnableBatching(); - return key; -} - -bool GridSampleKernelRef::Validate(const Params& params) const { - if (params.GetType() != KernelType::GRID_SAMPLE) { - return false; - } - - const auto& kernel_params = dynamic_cast(params); - if (kernel_params.inputs.size() != 2) { - return false; - } - - return true; -} - -JitConstants GridSampleKernelRef::GetJitConstants(const grid_sample_params& kernel_params) const { - auto jit_constants = MakeBaseParamsJitConstants(kernel_params); - - jit_constants.AddConstants({ - MakeJitConstant("INTERPOLATION_MODE_" + ov::as_string(kernel_params.interpolation_mode), true), - MakeJitConstant("PADDING_MODE_" + ov::as_string(kernel_params.padding_mode), true), - }); - - if (kernel_params.align_corners) { - jit_constants.AddConstant(MakeJitConstant("ALIGN_CORNERS", true)); - } - - return jit_constants; -} - } // namespace kernel_selector - -namespace ov { - -template <> -ov::EnumNames& ::ov::EnumNames< - kernel_selector::grid_sample_params::InterpolationMode>::get() { - static auto enum_names = EnumNames( - "kernel_selector::grid_sample_params::InterpolationMode", - { - {"BILINEAR", kernel_selector::grid_sample_params::InterpolationMode::BILINEAR}, - {"BICUBIC", kernel_selector::grid_sample_params::InterpolationMode::BICUBIC}, - {"NEAREST", kernel_selector::grid_sample_params::InterpolationMode::NEAREST}, - }); - return enum_names; -} - -template <> -ov::EnumNames& ::ov::EnumNames< - kernel_selector::grid_sample_params::PaddingMode>::get() { - static auto enum_names = EnumNames( - "kernel_selector::grid_sample_params::PaddingMode", - { - {"ZEROS", kernel_selector::grid_sample_params::PaddingMode::ZEROS}, - {"BORDER", kernel_selector::grid_sample_params::PaddingMode::BORDER}, - {"REFLECTION", kernel_selector::grid_sample_params::PaddingMode::REFLECTION}, - }); - return enum_names; -} - -} // namespace ov diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_ref.hpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_ref.hpp index 78fce8b7e3d561..3a2776b63ee19a 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_ref.hpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_ref.hpp @@ -1,44 +1,22 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once -#include "kernel_base_opencl.h" +#include "grid_sample_kernel_base.hpp" namespace kernel_selector { -/** - * GridSample reference kernel parameters. - */ -struct grid_sample_params : public base_params { - grid_sample_params() : base_params(KernelType::GRID_SAMPLE) {} - bool align_corners = false; - enum class InterpolationMode { - BILINEAR, - BICUBIC, - NEAREST, - } interpolation_mode = InterpolationMode::BILINEAR; - enum class PaddingMode { - ZEROS, - BORDER, - REFLECTION, - } padding_mode = PaddingMode::ZEROS; -}; - /** * Reference kernel for GridSample. */ -class GridSampleKernelRef : public KernelBaseOpenCL { +class GridSampleKernelRef : public GridSampleKernelBase { public: - GridSampleKernelRef() : KernelBaseOpenCL{"grid_sample_ref"} {} - - KernelsData GetKernelsData(const Params& params) const override; - ParamsKey GetSupportedKey() const override; + GridSampleKernelRef() : GridSampleKernelBase("grid_sample_ref") {} protected: - bool Validate(const Params& params) const override; - JitConstants GetJitConstants(const grid_sample_params& kernel_params) const; + CommonDispatchData CalcDispatch(const grid_sample_params& kernel_params) const override; }; } // namespace kernel_selector diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_selector.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_selector.cpp index 2cd2e3ffde3623..b3960828b4cb7d 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_selector.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/grid_sample/grid_sample_kernel_selector.cpp @@ -4,12 +4,14 @@ #include "grid_sample_kernel_selector.hpp" +#include "grid_sample_kernel_opt_bilinear.hpp" #include "grid_sample_kernel_ref.hpp" namespace kernel_selector { grid_sample_kernel_selector::grid_sample_kernel_selector() { Attach(); + Attach(); } KernelsData grid_sample_kernel_selector::GetBestKernels(const Params& params) const { diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/data/grid.bin b/src/plugins/intel_gpu/tests/unit/test_cases/data/grid.bin new file mode 100644 index 00000000000000..b46392a6ce55dc Binary files /dev/null and b/src/plugins/intel_gpu/tests/unit/test_cases/data/grid.bin differ diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/grid_sample_gpu_benchmark.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/grid_sample_gpu_benchmark.cpp new file mode 100644 index 00000000000000..fa09b62fdc1300 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/test_cases/grid_sample_gpu_benchmark.cpp @@ -0,0 +1,214 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include + +#include "intel_gpu/primitives/grid_sample.hpp" +#include "test_utils.h" + +using namespace cldnn; +using namespace ::tests; + +// PK: TEMPORARY BENCHMARK, WILL BE REMOVED BEFORE MERGING TO MASTER. + +namespace { + +static constexpr int WARMUPS = 10; +static constexpr int RUNS = 100; +static constexpr int SEED = 7877; + +namespace helpers { +// TODO: Move to common place. + +// Converts float vector to another type vector. +template +std::vector ConverFloatVector(const std::vector& vec) { + std::vector ret; + ret.reserve(vec.size()); + for (const auto& val : vec) { + ret.push_back(T(val)); + } + return ret; +} + +// Allocates tensoer with given shape and data. +template +memory::ptr AllocateTensor(ov::PartialShape shape, const std::vector& data) { + const layout lo = {shape, ov::element::from(), cldnn::format::bfyx}; + EXPECT_EQ(lo.get_linear_size(), data.size()); + memory::ptr tensor = get_test_engine().allocate_memory(lo); + set_values(tensor, data); + return tensor; +} + +static std::vector RandomFloatVector(size_t size, float low, float high) { + std::vector vec; + vec.resize(size); + static std::default_random_engine engine(SEED); + std::uniform_real_distribution dis(low, high); + + for (size_t i = 0; i < vec.size(); ++i) { + vec[i] = dis(engine); + } + + return vec; +} + +} // namespace helpers + +struct GridSampleTestParams { + ov::PartialShape inputShape; + ov::PartialShape gridShape; + ov::PartialShape outputShape; + GridSampleOp::Attributes attributes; + std::vector inputData; + std::vector gridData; +}; + +class gridSample_benchmark : public ::testing::Test { +public: + struct GridSamplInferenceParams { + GridSampleOp::Attributes attributes; + memory::ptr input; + memory::ptr grid; + memory::ptr expectedOutput; + }; + + template + GridSamplInferenceParams PrepareInferenceParams(const GridSampleTestParams& testParam) { + using T = typename ov::element_type_traits::value_type; + GridSamplInferenceParams ret; + + ret.attributes = testParam.attributes; + + ret.input = + helpers::AllocateTensor(testParam.inputShape, helpers::ConverFloatVector(testParam.inputData)); + ret.grid = helpers::AllocateTensor(testParam.gridShape, helpers::ConverFloatVector(testParam.gridData)); + return ret; + } + + void Execute(const GridSamplInferenceParams& params) { + // Prepare the network. + topology topology; + topology.add(input_layout("input", params.input->get_layout())); + topology.add(input_layout("grid", params.grid->get_layout())); + + topology.add(reorder("reordered_data", input_info("input"), format::bfyx, data_types::f16)); + topology.add(reorder("reordered_grid", input_info("grid"), format::bfyx, data_types::f16)); + topology.add(grid_sample("grid_sample", + {input_info("reordered_data"), input_info("reordered_grid")}, + params.attributes)); + topology.add(reorder("plane_grid_sample", input_info("grid_sample"), format::bfyx, data_types::f16)); + + auto stream = get_test_stream_ptr(get_test_default_config(engine_)); + cldnn::network::ptr network = get_network(engine_, topology, get_test_default_config(engine_), stream, false); + + network->set_input_data("input", params.input); + network->set_input_data("grid", params.grid); + + // Run and check results. + const int warmup = WARMUPS; + const int run = RUNS; + + std::map outputs; + for (int i = 0; i < warmup; ++i) + network->execute(); + network->reset_execution(true); + + // Note: Should be based on events, this one + // also adds up kernel launch time and gpu idle time. + auto start = std::chrono::system_clock::now(); + for (int i = 0; i < run; ++i) + network->execute(); + network->reset_execution(true); + auto stop = std::chrono::system_clock::now(); + + const auto d_actual = std::chrono::duration_cast(stop - start).count(); + + outputs = network->execute(); + auto output = outputs.at("plane_grid_sample").get_memory(); + auto outputShape = output->get_layout().get_shape(); + std::cout << "Avg Time for output shape " << outputShape << ":" << d_actual / run << " microseconds\n\n"; + } + + GridSampleTestParams PrepareParams(const ov::PartialShape& inputShape, + const std::vector& inputData, + const ov::PartialShape& gridShape, + const std::vector& gridData) { + EXPECT_EQ(ov::shape_size(inputShape.get_shape()), inputData.size()); + EXPECT_EQ(ov::shape_size(gridShape.get_shape()), gridData.size()); + GridSampleTestParams params; + params.inputShape = inputShape; + params.gridShape = gridShape; + params.inputData = inputData; + params.gridData = gridData; + params.attributes.mode = GridSampleOp::InterpolationMode::BILINEAR; + params.attributes.align_corners = true; + params.attributes.padding_mode = GridSampleOp::PaddingMode::ZEROS; + return params; + } + + GridSampleTestParams PrepareRandomDataParams(const ov::PartialShape& inputShape, + const ov::PartialShape& gridShape) { + return PrepareParams(inputShape, + helpers::RandomFloatVector(ov::shape_size(inputShape.get_shape()), -1000.0f, 1000.0f), + gridShape, + helpers::RandomFloatVector(ov::shape_size(gridShape.get_shape()), -1.0f, 1.0f)); + } + + GridSampleTestParams PrepareGridDataStaticParams(const ov::PartialShape& inputShape, + const ov::PartialShape& gridShape) { + return PrepareParams(inputShape, + std::vector(ov::shape_size(inputShape.get_shape()), 0), + gridShape, + std::vector(ov::shape_size(gridShape.get_shape()), 0)); + } + + GridSampleTestParams PrepareGridDataFileParams(const std::string& gridFilePath, + const ov::PartialShape& inputShape, + const ov::PartialShape& gridShape) { + std::streampos gridFileSize; + std::ifstream gridFile(gridFilePath, std::ios::binary); + + // get its size: + gridFile.seekg(0, std::ios::end); + gridFileSize = gridFile.tellg(); + gridFile.seekg(0, std::ios::beg); + + // read the data: + std::vector gridData(gridFileSize / sizeof(float)); + gridFile.read((char*)&gridData[0], gridFileSize); + gridFile.close(); + + return PrepareParams(inputShape, + helpers::RandomFloatVector(ov::shape_size(inputShape.get_shape()), -1000.0f, 1000.0f), + gridShape, + gridData); + } + + template + void RunBenchmark(const std::string& name, const GridSampleTestParams& params) { + std::cout << "Benchmark(" << name << "): input shape: " << params.inputShape + << ", grid shape: " << params.gridShape << std::endl; + Execute(PrepareInferenceParams(params)); + } + +private: + engine& engine_ = get_test_engine(); +}; +} // namespace + +TEST_F(gridSample_benchmark, benchmarks) { + RunBenchmark("random access", PrepareRandomDataParams({1, 128, 120, 216}, {1, 120, 216, 2})); + RunBenchmark("constant access", + PrepareGridDataStaticParams({1, 128, 120, 216}, {1, 120, 216, 2})); + + RunBenchmark("random access", PrepareRandomDataParams({2, 128, 80, 144}, {2, 11520, 81, 2})); + RunBenchmark("constant access", + PrepareGridDataStaticParams({2, 128, 80, 144}, {2, 11520, 81, 2})); + + RunBenchmark("access exported from real model", + PrepareGridDataFileParams("src/plugins/intel_gpu/tests/unit/test_cases/data/grid.bin", {2, 128, 80, 144}, {2, 11520, 81, 2})); +} \ No newline at end of file