Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: option to turn on sse2 optimization #4666

Merged
merged 20 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Install dependencies
run: brew install cmake boost flatbuffers ninja
- name: Configure
run: cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DWARNINGS=Off -DVW_BUILD_VW_C_WRAPPER=Off -DBUILD_TESTING=On -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DVW_INSTALL=Off
run: cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=${{matrix.build_type}} -DWARNINGS=Off -DVW_BUILD_VW_C_WRAPPER=Off -DBUILD_TESTING=On -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DVW_INSTALL=Off -DSTD_INV_SQRT=ON
- name: Build
run: cmake --build build --target all
- name: Unit tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/valgrind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
submodules: recursive
- name: Build C++ VW binary
run: |
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DSTD_INV_SQRT=ON
cmake --build build
- name: Upload vw binary
uses: actions/upload-artifact@v2
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/vendor_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ jobs:
-DWARNINGS=On
-DWARNING_AS_ERROR=On
-DVW_CXX_STANDARD=17
-DSTD_INV_SQRT=ON
- name: Build
run: cmake --build build
- name: Unit tests
Expand Down Expand Up @@ -85,6 +86,7 @@ jobs:
-DVW_ZLIB_SYS_DEP=Off
-DVW_BOOST_MATH_SYS_DEP=Off
-DVW_INSTALL=Off
-DSTD_INV_SQRT=ON
- name: Build
run: cmake --build "${{ env.CMAKE_BUILD_DIR }}" --config ${{ matrix.build_type }}
- name: Test run_tests.py
Expand Down Expand Up @@ -118,6 +120,7 @@ jobs:
-DVW_ZLIB_SYS_DEP=Off
-DVW_BOOST_MATH_SYS_DEP=Off
-DVW_INSTALL=Off
-DSTD_INV_SQRT=ON
- name: Build
run: cmake --build build
- name: Unit tests
Expand Down
2 changes: 1 addition & 1 deletion .scripts/linux/build-static-java.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ cd build
# /usr/local/bin/gcc + g++ is 9.2.0 version
cmake -E env LDFLAGS="-Wl,--exclude-libs,ALL -static-libgcc -static-libstdc++" cmake .. -DCMAKE_BUILD_TYPE=Release -DWARNINGS=Off -DBUILD_JAVA=On -DBUILD_DOCS=Off -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On\
-DBUILD_PYTHON=Off -DSTATIC_LINK_VW_JAVA=On -DCMAKE_C_COMPILER=/usr/local/bin/gcc -DCMAKE_CXX_COMPILER=/usr/local/bin/g++ \
-DBUILD_TESTING=Off -DVW_ZLIB_SYS_DEP=Off -DBUILD_SHARED_LIBS=Off -DVW_BUILD_LAS_WITH_SIMD=Off
-DBUILD_TESTING=Off -DVW_ZLIB_SYS_DEP=Off -DBUILD_SHARED_LIBS=Off -DVW_BUILD_LAS_WITH_SIMD=Off -DSTD_INV_SQRT=ON
NUM_PROCESSORS=$(nproc)
make vw_jni -j ${NUM_PROCESSORS}
2 changes: 1 addition & 1 deletion .scripts/linux/build-with-coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_DIR=$SCRIPT_DIR/../../
cd $REPO_DIR

cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DVW_GCOV=ON -DWARNINGS=OFF -DBUILD_JAVA=Off -DBUILD_PYTHON=Off -DBUILD_TESTING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DVW_GCOV=ON -DWARNINGS=OFF -DBUILD_JAVA=Off -DBUILD_PYTHON=Off -DBUILD_TESTING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DSTD_INV_SQRT=ON
cmake --build build
2 changes: 1 addition & 1 deletion .scripts/linux/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ cd $REPO_DIR
# If parameter 1 is not supplied, it defaults to Release
BUILD_CONFIGURATION=${1:-Release}

cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=${BUILD_CONFIGURATION} -DWARNINGS=Off -DWARNING_AS_ERROR=On -DVW_BUILD_VW_C_WRAPPER=Off -DBUILD_JAVA=On -DBUILD_PYTHON=Off -DBUILD_TESTING=On -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=${BUILD_CONFIGURATION} -DWARNINGS=Off -DWARNING_AS_ERROR=On -DVW_BUILD_VW_C_WRAPPER=Off -DBUILD_JAVA=On -DBUILD_PYTHON=Off -DBUILD_TESTING=On -DBUILD_EXPERIMENTAL_BINDING=On -DVW_FEAT_FLATBUFFERS=On -DVW_FEAT_CSV=On -DVW_FEAT_CB_GRAPH_FEEDBACK=On -DSTD_INV_SQRT=ON
cmake --build build --target all
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ option(VW_BUILD_VW_C_WRAPPER "Enable building the c_wrapper project" ON)
option(vw_BUILD_NET_CORE "Build .NET Core targets" OFF)
option(vw_BUILD_NET_FRAMEWORK "Build .NET Framework targets" OFF)
option(VW_BUILD_WASM "Add WASM target" OFF)
option(STD_INV_SQRT "Use standard library inverse square root" OFF)

if(VW_INSTALL AND NOT VW_ZLIB_SYS_DEP)
message(WARNING "Installing with a vendored version of zlib is not recommended. Use VW_ZLIB_SYS_DEP to use a system dependency or specify VW_INSTALL=OFF to silence this warning.")
Expand Down
8 changes: 8 additions & 0 deletions CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,10 @@
"VW_FEAT_CB_GRAPH_FEEDBACK": {
"type": "BOOL",
"value": "On"
},
"STD_INV_SQRT": {
"type": "BOOL",
"value": "On"
}
}
},
Expand Down Expand Up @@ -208,6 +212,10 @@
"VCPKG_TARGET_TRIPLET": {
"type": "STRING",
"value": "wasm32-emscripten"
},
"STD_INV_SQRT": {
"type": "BOOL",
"value": "On"
}
}
}
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def build_cmake(self, ext):
"-DBUILD_TESTING=Off",
"-DWARNINGS=Off",
"-DVW_FEAT_CB_GRAPH_FEEDBACK=On",
"-DSTD_INV_SQRT=On",
]

# This doesn't work as expected for Python3.6 and 3.7 on Windows.
Expand Down
4 changes: 4 additions & 0 deletions vowpalwabbit/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,10 @@ if (MSVC_IDE)
target_sources(vw_core PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_LIST_DIR}/vw_types.natvis> )
endif()

if(STD_INV_SQRT)
target_compile_definitions(vw_core PUBLIC STD_INV_SQRT)
endif()

# Clang-cl on Windows has issues with our usage of SIMD types. Turn it off explicitly for Windows + clang-cl to mitigate.
# See issue #
if(WIN32 AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
Expand Down
48 changes: 47 additions & 1 deletion vowpalwabbit/core/src/reductions/gd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,52 @@ void sync_weights(VW::workspace& all)
all.sd->contraction = 1.;
}

VW_WARNING_STATE_PUSH
VW_WARNING_DISABLE_UNUSED_FUNCTION
inline float quake_inv_sqrt(float x)
{
// Carmack/Quake/SGI fast method:
float xhalf = 0.5f * x;
static_assert(sizeof(int) == sizeof(float), "Floats and ints are converted between, they must be the same size.");
int i = reinterpret_cast<int&>(x); // store floating-point bits in integer
i = 0x5f3759d5 - (i >> 1); // initial guess for Newton's method
x = reinterpret_cast<float&>(i); // convert new bits into float
x = x * (1.5f - xhalf * x * x); // One round of Newton's method
return x;
}
VW_WARNING_STATE_POP

static inline float inv_sqrt(float x)
{
#if defined(STD_INV_SQRT)
return 1.f / std::sqrt(x);
#endif
#if !defined(VW_NO_INLINE_SIMD)
# if defined(__ARM_NEON__)
// Propagate into vector
float32x2_t v1 = vdup_n_f32(x);
// Estimate
float32x2_t e1 = vrsqrte_f32(v1);
// N-R iteration 1
float32x2_t e2 = vmul_f32(e1, vrsqrts_f32(v1, vmul_f32(e1, e1)));
// N-R iteration 2
float32x2_t e3 = vmul_f32(e2, vrsqrts_f32(v1, vmul_f32(e2, e2)));
// Extract result
return vget_lane_f32(e3, 0);
# elif defined(__SSE2__)
__m128 eta = _mm_load_ss(&x);
eta = _mm_rsqrt_ss(eta);
_mm_store_ss(&x, eta);
# else
x = quake_inv_sqrt(x);
# endif
#else
x = quake_inv_sqrt(x);
#endif

return x;
}

VW_WARNING_STATE_PUSH
VW_WARNING_DISABLE_COND_CONST_EXPR
template <bool sqrt_rate, bool feature_mask_off, size_t adaptive, size_t normalized, size_t spare>
Expand Down Expand Up @@ -580,7 +626,7 @@ inline float compute_rate_decay(power_data& s, float& fw)
float rate_decay = 1.f;
if (adaptive)
{
if (sqrt_rate) { rate_decay = 1.0f / std::sqrt(w[adaptive]); }
if (sqrt_rate) { rate_decay = inv_sqrt(w[adaptive]); }
else { rate_decay = powf(w[adaptive], s.minus_power_t); }
}
if VW_STD17_CONSTEXPR (normalized != 0)
Expand Down
Loading