From 91ee67206289ec80c3c02f063040fa4638be0d63 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Mon, 25 Jul 2022 12:24:31 -0400 Subject: [PATCH] [libc] Use nearest_integer instructions to improve expf performance. Use nearest_integer instructions to improve expf performance. Performance tests with CORE-MATH's perf tool: Before the patch: ``` $ ./perf.sh expf LIBC-location: /home/lnt/experiment/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH reciprocal throughput : 9.860 System LIBC reciprocal throughput : 7.728 LIBC reciprocal throughput : 12.363 $ ./perf.sh expf --latency LIBC-location: /home/lnt/experiment/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH latency : 42.802 System LIBC latency : 35.941 LIBC latency : 49.808 ``` After the patch: ``` $ ./perf.sh expf LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH reciprocal throughput : 9.441 System LIBC reciprocal throughput : 7.382 LIBC reciprocal throughput : 8.843 $ ./perf.sh expf --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH latency : 44.192 System LIBC latency : 37.693 LIBC latency : 44.145 ``` Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D130498 --- libc/docs/math.rst | 2 +- libc/src/math/generic/CMakeLists.txt | 2 ++ libc/src/math/generic/expf.cpp | 12 +++++------- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libc/docs/math.rst b/libc/docs/math.rst index 6480a981acb025..42d09617d6bb75 100644 --- a/libc/docs/math.rst +++ b/libc/docs/math.rst @@ -199,7 +199,7 @@ Performance +==============+===========+===================+===========+===================+=====================================+============+=========================+==============+===============+ | cosf | 37 | 32 | 73 | 72 | :math:`[0, 2\pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ -| expf | 14 | 9 | 58 | 42 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +| expf | 9 | 7 | 44 | 38 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ | exp2f | 25 | 8 | 81 | 37 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA | +--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+ diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 7b6a378b4616c5..2658b9691d2211 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -483,6 +483,8 @@ add_entrypoint_object( DEPENDS .common_constants libc.src.__support.FPUtil.fputil + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer libc.src.__support.FPUtil.polyeval libc.include.math COMPILE_OPTIONS diff --git a/libc/src/math/generic/expf.cpp b/libc/src/math/generic/expf.cpp index 948b101b755ebd..1003699cbf5f11 100644 --- a/libc/src/math/generic/expf.cpp +++ b/libc/src/math/generic/expf.cpp @@ -10,9 +10,10 @@ #include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2. #include "src/__support/FPUtil/BasicOperations.h" #include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FMA.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/common.h" #include @@ -80,13 +81,10 @@ LLVM_LIBC_FUNCTION(float, expf, (float x)) { // generated by Sollya. // x_hi = (hi + mid) * 2^7 = round(x * 2^7). - // The default rounding mode for float-to-int conversion in C++ is - // round-toward-zero. To make it round-to-nearest, we add (-1)^sign(x) * 0.5 - // before conversion. - int x_hi = static_cast(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f)); + float kf = fputil::nearest_integer(x * 0x1.0p7f); // Subtract (hi + mid) from x to get lo. - x -= static_cast(x_hi) * 0x1.0p-7f; - double xd = static_cast(x); + double xd = static_cast(fputil::multiply_add(kf, -0x1.0p-7f, x)); + int x_hi = static_cast(kf); x_hi += 104 << 7; // hi = x_hi >> 7 double exp_hi = EXP_M1[x_hi >> 7];