Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support native conversions without __fp16/_Float16 types #29

Merged
merged 1 commit into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,27 @@ jobs:
run: cmake --build build --parallel
- name: Test
run: ctest --test-dir build --parallel --output-on-failure
cmake-linux-x86-f16c:
runs-on: ubuntu-20.04
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Update apt
run: sudo apt update
- name: Install multilib gcc
run: sudo apt install gcc-multilib g++-multilib
- name: Install ninja
run: sudo apt install ninja-build
- name: Configure
run: cmake -Bbuild -S. -G Ninja -DCMAKE_BUILD_TYPE=Release -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON
env:
CFLAGS: "-m32 -mf16c"
CXXFLAGS: "-m32 -mf16c"
LDFLAGS: "-m32"
- name: Build
run: cmake --build build --parallel
- name: Test
run: ctest --test-dir build --parallel --output-on-failure
cmake-macos-x86_64:
runs-on: macos-12
timeout-minutes: 15
Expand Down Expand Up @@ -115,6 +136,20 @@ jobs:
run: cmake --build build --config Release --parallel
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-windows-x86-avx2:
runs-on: windows-2019
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Configure
run: cmake -Bbuild -S. -G "Visual Studio 16 2019" -A Win32 -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON
env:
CFLAGS: "/arch:AVX2"
CXXFLAGS: "/arch:AVX2"
- name: Build
run: cmake --build build --config Release --parallel
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-windows-x64:
runs-on: windows-2019
timeout-minutes: 15
Expand All @@ -126,6 +161,20 @@ jobs:
run: cmake --build build --config Release --parallel
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-windows-x64-avx2:
runs-on: windows-2019
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Configure
run: cmake -Bbuild -S. -G "Visual Studio 16 2019" -A x64 -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON
env:
CFLAGS: "/arch:AVX2"
CXXFLAGS: "/arch:AVX2"
- name: Build
run: cmake --build build --config Release --parallel
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-windows-arm64:
runs-on: windows-2019
timeout-minutes: 15
Expand Down
86 changes: 58 additions & 28 deletions include/fp16/fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@
#include <math.h>
#endif

#ifdef _MSC_VER
#include <intrin.h>
#endif

#include <fp16/bitcasts.h>
#include <fp16/macros.h>

#if defined(_MSC_VER)
#include <intrin.h>
#endif
#if defined(__F16C__) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE
#include <immintrin.h>
#endif
#if (defined(__aarch64__) || defined(_M_ARM64)) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE
#include <arm_neon.h>
#endif


/*
* Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
Expand Down Expand Up @@ -107,18 +113,30 @@ static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) {
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline float fp16_ieee_to_fp32_value(uint16_t h) {
#if FP16_USE_FLOAT16_TYPE
union {
uint16_t as_bits;
_Float16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#elif FP16_USE_FP16_TYPE
union {
uint16_t as_bits;
__fp16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#if FP16_USE_NATIVE_CONVERSION
#if FP16_USE_FLOAT16_TYPE
union {
uint16_t as_bits;
_Float16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#elif FP16_USE_FP16_TYPE
union {
uint16_t as_bits;
__fp16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#else
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
return _cvtsh_ss((unsigned short) h);
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128((int) (unsigned int) h)));
#elif defined(_M_ARM64) || defined(__aarch64__)
return vgetq_lane_f32(vcvt_f32_f16(vreinterpret_f16_u16(vdup_n_u16(h))), 0);
#else
#error "Archtecture- or compiler-specific implementation required"
#endif
#endif
#else
/*
* Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
Expand Down Expand Up @@ -236,18 +254,30 @@ static inline float fp16_ieee_to_fp32_value(uint16_t h) {
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline uint16_t fp16_ieee_from_fp32_value(float f) {
#if FP16_USE_FLOAT16_TYPE
union {
_Float16 as_value;
uint16_t as_bits;
} fp16 = { (_Float16) f };
return fp16.as_bits;
#elif FP16_USE_FP16_TYPE
union {
__fp16 as_value;
uint16_t as_bits;
} fp16 = { (__fp16) f };
return fp16.as_bits;
#if FP16_USE_NATIVE_CONVERSION
#if FP16_USE_FLOAT16_TYPE
union {
_Float16 as_value;
uint16_t as_bits;
} fp16 = { (_Float16) f };
return fp16.as_bits;
#elif FP16_USE_FP16_TYPE
union {
__fp16 as_value;
uint16_t as_bits;
} fp16 = { (__fp16) f };
return fp16.as_bits;
#else
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
return _cvtss_sh(f, _MM_FROUND_CUR_DIRECTION);
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
return (uint16_t) _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f), _MM_FROUND_CUR_DIRECTION));
#elif defined(_M_ARM64) || defined(__aarch64__)
return vget_lane_u16(vcvt_f16_f32(vdupq_n_f32(f)), 0);
#else
#error "Archtecture- or compiler-specific implementation required"
#endif
#endif
#else
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float scale_to_inf = 0x1.0p+112f;
Expand Down
14 changes: 14 additions & 0 deletions include/fp16/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
#ifndef FP16_MACROS_H
#define FP16_MACROS_H

#ifndef FP16_USE_NATIVE_CONVERSION
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(_MSC_VER) && defined(_M_ARM64)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(__GNUC__) && defined(__aarch64__)
#define FP16_USE_NATIVE_CONVERSION 1
#endif
#if !defined(FP16_USE_NATIVE_CONVERSION)
#define FP16_USE_NATIVE_CONVERSION 0
#endif // !defined(FP16_USE_NATIVE_CONVERSION)
#endif // !define(FP16_USE_NATIVE_CONVERSION)

#ifndef FP16_USE_FLOAT16_TYPE
#if !defined(__clang__) && !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ >= 12)
Expand Down