From c2f8a3cc72fd2bee4ba48915b06ffa83f1ba28b8 Mon Sep 17 00:00:00 2001 From: Taylor Yeonbok Lee Date: Tue, 14 Jan 2025 00:40:26 -0800 Subject: [PATCH] [GPU] Fix conv kernel to select opt kernel even though filter size is large (#28421) ### Details: - From https://github.com/openvinotoolkit/openvino/pull/23621 conv is selecting ref kernel when filter size is large - Reverted the change in 23621 and instead fixed opt kernel impl not to use manual unroll if the filter size is large ### Tickets: - CVS-157998 --- .../convolution_gpu_bfyx_os_iyx_osv16.cl | 21 +++++++++++++++++-- .../convolution_kernel_bfyx_os_iyx_osv16.cpp | 14 ++++++------- .../unit/test_cases/convolution_gpu_test.cpp | 6 +++--- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl index 1ccaf35cf46fd0..576e05dfa91d0f 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/convolution_gpu_bfyx_os_iyx_osv16.cl @@ -171,13 +171,24 @@ KERNEL(convolution_gpu_bfyx_os_iyx_osv16)( uint wi = 0; uint kr = 0; // kr = Kernel Row +#ifdef DISABLE_MANUAL_UNROLL + unroll_for (; kr < FILTER_SIZE_Y; ++kr) +#else LOOP(FILTER_SIZE_Y, kr, // LOOP is a macro that unrolls the loop. +#endif { uint kc = 0; // kc = Kernel Column +#ifdef DISABLE_MANUAL_UNROLL + unroll_for (; kc < FILTER_SIZE_X; ++kc) + { + unroll_for (uint br = 0; br < OUTPUT_BLOCK_HEIGHT; br++) { + unroll_for(uint bc = 0; bc < OUTPUT_BLOCK_WIDTH; bc++) { +#else LOOP(FILTER_SIZE_X, kc, { - for(uint br=0; br(p); - auto filter_size = params.filterSize.x * params.filterSize.y; - if (filter_size >= acceptable_filter_size) { - return false; - } - return true; } @@ -245,6 +237,12 @@ JitConstants ConvolutionKernel_bfyx_os_iyx_osv16::GetJitConstants(const convolut jit.AddConstant(MakeJitConstant("IN_BLOCK_WIDTH", dispatchData.cldnnStyle.inputBlockWidth)); jit.AddConstant(MakeJitConstant("PREFETCH", dispatchData.cldnnStyle.prefetch)); + const size_t large_filter_size = 1024; // This acceptable size was decided by heuristics + auto filter_size = params.filterSize.x * params.filterSize.y; + if (filter_size >= large_filter_size) { + jit.AddConstant(MakeJitConstant("DISABLE_MANUAL_UNROLL", 1)); + } + if (leftovers) { jit.AddConstant(MakeJitConstant("LEFTOVERS", leftovers)); } diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp index b7cac423cd16b0..bbfe7224b4a328 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/convolution_gpu_test.cpp @@ -1689,7 +1689,7 @@ TEST(convolution_f32_fw_gpu, convolution_big_size_weights) { }; const std::vector impl_kernel_data = { - "convolution_gpu_ref__f32" + "convolution_gpu_bfyx_os_iyx_osv16__f32" }; for (size_t m = 0 ; m < filter_size_data.size() / 2; m++) { @@ -1767,8 +1767,8 @@ TEST(convolution_f16_fw_gpu, convolution_big_size_weights) { }; const std::vector impl_kernel_data = { - "convolution_gpu_ref__f16", - "convolution_gpu_bfyx_gemm_like__f16", + "convolution_gpu_bfyx_os_iyx_osv16__f16", + "convolution_gpu_bfyx_os_iyx_osv16__f16", }; for (size_t m = 0 ; m < filter_size_data.size() / 2; m++) {