Skip to content

Commit 628fbbe

Browse files
committed
[libc] Use nearest_integer instructions to improve expm1f performance.
Use nearest_integer instructions to improve expf performance. Performance tests with CORE-MATH's perf tool: Before the patch: ``` $ ./perf.sh expm1f LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH reciprocal throughput : 10.096 System LIBC reciprocal throughput : 44.036 LIBC reciprocal throughput : 11.575 $ ./perf.sh expm1f --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH latency : 42.239 System LIBC latency : 122.815 LIBC latency : 50.122 ``` After the patch: ``` $ ./perf.sh expm1f LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH reciprocal throughput : 10.046 System LIBC reciprocal throughput : 43.899 LIBC reciprocal throughput : 9.179 $ ./perf.sh expm1f --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.31 GNU libc release: stable CORE-MATH latency : 42.078 System LIBC latency : 120.488 LIBC latency : 41.528 ``` Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D130502
1 parent 91ee672 commit 628fbbe

File tree

3 files changed

+7
-4
lines changed

3 files changed

+7
-4
lines changed

libc/docs/math.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ Performance
203203
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
204204
| exp2f | 25 | 8 | 81 | 37 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
205205
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
206-
| expm1f | 14 | 53 | 59 | 146 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
206+
| expm1f | 9 | 44 | 42 | 121 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
207207
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
208208
| fmodf | 73 | 263 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
209209
| +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+

libc/src/math/generic/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,7 @@ add_entrypoint_object(
515515
.common_constants
516516
libc.src.__support.FPUtil.fputil
517517
libc.src.__support.FPUtil.multiply_add
518+
libc.src.__support.FPUtil.nearest_integer
518519
libc.src.__support.FPUtil.polyeval
519520
libc.include.math
520521
COMPILE_OPTIONS

libc/src/math/generic/expm1f.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include "src/__support/FPUtil/FMA.h"
1414
#include "src/__support/FPUtil/FPBits.h"
1515
#include "src/__support/FPUtil/PolyEval.h"
16+
#include "src/__support/FPUtil/multiply_add.h"
17+
#include "src/__support/FPUtil/nearest_integer.h"
1618
#include "src/__support/common.h"
1719

1820
#include <errno.h>
@@ -133,10 +135,10 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) {
133135
// generated by Sollya.
134136

135137
// x_hi = hi + mid.
136-
int x_hi = static_cast<int>(x * 0x1.0p7f + (xbits.get_sign() ? -0.5f : 0.5f));
138+
float kf = fputil::nearest_integer(x * 0x1.0p7f);
139+
int x_hi = static_cast<int>(kf);
137140
// Subtract (hi + mid) from x to get lo.
138-
x -= static_cast<float>(x_hi) * 0x1.0p-7f;
139-
double xd = static_cast<double>(x);
141+
double xd = static_cast<double>(fputil::multiply_add(kf, -0x1.0p-7f, x));
140142
x_hi += 104 << 7;
141143
// hi = x_hi >> 7
142144
double exp_hi = EXP_M1[x_hi >> 7];

0 commit comments

Comments
 (0)