From c6b611d3cdd862bfd996d429f417c5dbcd2f7e48 Mon Sep 17 00:00:00 2001 From: fineg74 <61437305+fineg74@users.noreply.github.com> Date: Thu, 29 Aug 2024 08:48:59 -0700 Subject: [PATCH] [SYCL][ESIMD][NFC] Optimize reduction implementation and add more test cases (#15215) --- sycl/include/sycl/ext/intel/esimd/math.hpp | 27 +------ sycl/test-e2e/ESIMD/reduction.cpp | 88 ++++++++++++---------- 2 files changed, 51 insertions(+), 64 deletions(-) diff --git a/sycl/include/sycl/ext/intel/esimd/math.hpp b/sycl/include/sycl/ext/intel/esimd/math.hpp index 6f0188111e534..5a1006896ae80 100644 --- a/sycl/include/sycl/ext/intel/esimd/math.hpp +++ b/sycl/include/sycl/ext/intel/esimd/math.hpp @@ -1476,35 +1476,16 @@ template struct esimd_apply_prod { template struct esimd_apply_reduced_max { template simd operator()(simd v1, simd v2) { - if constexpr (detail::is_generic_floating_point_v) { - using CppT = __ESIMD_DNS::element_type_traits::EnclosingCppT; - return __ESIMD_DNS::convert_vector( - __spirv_ocl_fmax( - __ESIMD_DNS::convert_vector(v1.data()), - __ESIMD_DNS::convert_vector(v2.data()))); - } else if constexpr (std::is_unsigned::value) { - return __esimd_umax(v1.data(), v2.data()); - } else { - return __esimd_smax(v1.data(), v2.data()); - } + return __ESIMD_DNS::convert_vector( + __ESIMD_NS::max(v1, v2).data()); } }; template struct esimd_apply_reduced_min { template simd operator()(simd v1, simd v2) { - - if constexpr (detail::is_generic_floating_point_v) { - using CppT = __ESIMD_DNS::element_type_traits::EnclosingCppT; - return __ESIMD_DNS::convert_vector( - __spirv_ocl_fmin( - __ESIMD_DNS::convert_vector(v1.data()), - __ESIMD_DNS::convert_vector(v2.data()))); - } else if constexpr (std::is_unsigned::value) { - return __esimd_umin(v1.data(), v2.data()); - } else { - return __esimd_smin(v1.data(), v2.data()); - } + return __ESIMD_DNS::convert_vector( + __ESIMD_NS::min(v1, v2).data()); } }; diff --git a/sycl/test-e2e/ESIMD/reduction.cpp b/sycl/test-e2e/ESIMD/reduction.cpp index 5e219f2aa548a..454cb4049e9fa 100644 --- a/sycl/test-e2e/ESIMD/reduction.cpp +++ b/sycl/test-e2e/ESIMD/reduction.cpp @@ -12,21 +12,16 @@ #include "esimd_test_utils.hpp" using namespace sycl; +constexpr unsigned InputSize = 32; +constexpr unsigned OutputSize = 4; +constexpr unsigned VL = 32; +constexpr unsigned GroupSize = 1; -typedef short TYPE; - -int main(void) { - constexpr unsigned InputSize = 32; - constexpr unsigned OutputSize = 4; - constexpr unsigned VL = 32; - constexpr unsigned GroupSize = 1; - - queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); - - auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() << "\n"; - TYPE *A = malloc_shared(InputSize, q); - int *B = malloc_shared(OutputSize, q); +template bool test(sycl::queue &Queue) { + std::cout << "TIn= " << esimd_test::type_name() + << " TOut= " << esimd_test::type_name() << std::endl; + TIn *A = malloc_shared(InputSize, Queue); + TOut *B = malloc_shared(OutputSize, Queue); for (unsigned i = 0; i < InputSize; ++i) { if (i == 19) { @@ -41,49 +36,48 @@ int main(void) { range<1> TaskRange{GroupSize}; nd_range<1> Range(GroupRange, TaskRange); - auto e = q.submit([&](handler &cgh) { - cgh.parallel_for( - GroupRange * TaskRange, [=](id<1> i) SYCL_ESIMD_KERNEL { - using namespace sycl::ext::intel::esimd; + auto e = Queue.submit([&](handler &cgh) { + cgh.parallel_for(GroupRange * TaskRange, [=](id<1> i) SYCL_ESIMD_KERNEL { + using namespace sycl::ext::intel::esimd; - simd va; - va.copy_from(A + i * VL); - simd vb; + simd va; + va.copy_from(A + i * VL); + simd vb; - vb[0] = reduce(va, std::plus<>()); - vb[1] = reduce(va, std::multiplies<>()); - vb[2] = hmax(va); - vb[3] = hmin(va); + vb[0] = reduce(va, std::plus<>()); + vb[1] = reduce(va, std::multiplies<>()); + vb[2] = hmax(va); + vb[3] = hmin(va); - vb.copy_to(B + i * VL); - }); + vb.copy_to(B + i * VL); + }); }); e.wait(); } catch (sycl::exception const &e) { std::cout << "SYCL exception caught: " << e.what() << '\n'; - free(A, q); - free(B, q); - return 1; + free(A, Queue); + free(B, Queue); + return 0; } - auto compute_reduce_sum = [](TYPE A[InputSize]) -> int { - int retv = A[0]; + auto compute_reduce_sum = [](TIn A[InputSize]) -> TOut { + TOut retv = A[0]; for (int i = 1; i < InputSize; i++) { retv += A[i]; } return retv; }; - auto compute_reduce_prod = [](TYPE A[InputSize]) -> int { - int retv = A[0]; + auto compute_reduce_prod = [](TIn A[InputSize]) -> TOut { + TOut retv = A[0]; for (int i = 1; i < InputSize; i++) { retv *= A[i]; } return retv; }; - auto compute_reduce_max = [](TYPE A[InputSize]) -> int { - int retv = A[0]; + auto compute_reduce_max = [](TIn A[InputSize]) -> TOut { + TOut retv = A[0]; for (int i = 1; i < InputSize; i++) { if (A[i] > retv) { retv = A[i]; @@ -92,8 +86,8 @@ int main(void) { return retv; }; - auto compute_reduce_min = [](TYPE A[InputSize]) -> int { - int retv = A[0]; + auto compute_reduce_min = [](TIn A[InputSize]) -> TOut { + TOut retv = A[0]; for (int i = 1; i < InputSize; i++) { if (A[i] < retv) { retv = A[i]; @@ -103,7 +97,7 @@ int main(void) { }; bool TestPass = true; - int ref = compute_reduce_sum(A); + TOut ref = compute_reduce_sum(A); if (B[0] != ref) { std::cout << "Incorrect sum " << B[0] << ", expected " << ref << "\n"; TestPass = false; @@ -127,8 +121,20 @@ int main(void) { TestPass = false; } - free(A, q); - free(B, q); + free(A, Queue); + free(B, Queue); + return TestPass; +} + +int main(void) { + + queue Queue(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); + + esimd_test::printTestLabel(Queue); + + bool TestPass = test(Queue); + TestPass &= test(Queue); + TestPass &= test(Queue); if (!TestPass) { std::cout << "Failed\n";