Skip to content

Commit

Permalink
Attempt to enable SIMD if possible (#28)
Browse files Browse the repository at this point in the history
Signed-off-by: Juan Cruz Viotti <jv@jviotti.com>
  • Loading branch information
jviotti authored Jan 23, 2025
1 parent 2ce51f8 commit 1827a68
Showing 1 changed file with 39 additions and 0 deletions.
39 changes: 39 additions & 0 deletions cmake/noa/defaults.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,42 @@ if(NOA_COMPILER_LLVM AND CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT BUILD_SHARE
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
endif()

# Attempt to enable SIMD (SSE/AVX/NEON)
include(CheckCXXCompilerFlag)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86|AMD64")
if(MSVC)
check_cxx_compiler_flag("/arch:AVX2" COMPILER_SUPPORTS_AVX2)
if(COMPILER_SUPPORTS_AVX2)
message(STATUS "Enabling SIMD AVX2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
elseif(NOT CMAKE_CL_64)
message(STATUS "Enabling SIMD SSE2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2")
endif()
else()
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)
if(COMPILER_SUPPORTS_AVX2)
message(STATUS "Enabling SIMD AVX2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
else()
check_cxx_compiler_flag("-msse4.2" COMPILER_SUPPORTS_SSE42)
if(COMPILER_SUPPORTS_SSE42)
message(STATUS "Enabling SIMD SSE4.2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
else()
check_cxx_compiler_flag("-msse2" COMPILER_SUPPORTS_SSE2)
if(COMPILER_SUPPORTS_SSE2)
message(STATUS "Enabling SIMD SSE2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
endif()
endif()
endif()
endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64" AND NOT MSVC)
check_cxx_compiler_flag("-march=armv8-a+fp+simd" COMPILER_SUPPORTS_NEON)
if(COMPILER_SUPPORTS_NEON)
message(STATUS "Enabling SIMD NEON")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+fp+simd")
endif()
endif()

8 comments on commit 1827a68

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/llvm)

Benchmark suite Current: 1827a68 Previous: 2ce51f8 Ratio
Regex_Lower_S_Or_Upper_S_Asterisk 2.2117420016113742 ns/iter 2.199307607881867 ns/iter 1.01
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 2.1856870917154474 ns/iter 2.1968042504597416 ns/iter 0.99
Regex_Period_Asterisk 2.2080451144387294 ns/iter 2.197131998395807 ns/iter 1.00
Regex_Group_Period_Asterisk_Group 2.1885726952136406 ns/iter 2.222642229197202 ns/iter 0.98
Regex_Period_Plus 2.4890536570416035 ns/iter 2.486941810793137 ns/iter 1.00
Regex_Period 2.254275859716617 ns/iter 2.487411261677186 ns/iter 0.91
Regex_Caret_Period_Plus_Dollar 2.2051475353382446 ns/iter 2.4932047878982577 ns/iter 0.88
Regex_Caret_Group_Period_Plus_Group_Dollar 2.249321076219711 ns/iter 2.4881038443075294 ns/iter 0.90
Regex_Caret_Period_Asterisk_Dollar 2.48697385709702 ns/iter 3.419975495537027 ns/iter 0.73
Regex_Caret_Group_Period_Asterisk_Group_Dollar 2.4877295228283076 ns/iter 3.4183240475531855 ns/iter 0.73
Regex_Caret_X_Hyphen 12.489500915661909 ns/iter 13.058553294204932 ns/iter 0.96
Regex_Period_Md_Dollar 73.24219732286956 ns/iter 73.44419609288495 ns/iter 1.00
Regex_Caret_Slash_Period_Asterisk 7.149563577958759 ns/iter 7.158550399903598 ns/iter 1.00
Regex_Caret_Period_Range_Dollar 3.7309168534879475 ns/iter 4.041534228320043 ns/iter 0.92
Regex_Nested_Backtrack 493.29701815372545 ns/iter 474.83861242869085 ns/iter 1.04

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (macos/llvm)

Benchmark suite Current: 1827a68 Previous: 2ce51f8 Ratio
Regex_Lower_S_Or_Upper_S_Asterisk 2.0292583240443385 ns/iter 1.5700487812441373 ns/iter 1.29
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 1.7211370678939077 ns/iter 1.5831157713256598 ns/iter 1.09
Regex_Period_Asterisk 1.7013607896039016 ns/iter 1.6137278910273962 ns/iter 1.05
Regex_Group_Period_Asterisk_Group 1.7134345782992109 ns/iter 1.567847283893261 ns/iter 1.09
Regex_Period_Plus 2.0253652580294386 ns/iter 1.8859542892340735 ns/iter 1.07
Regex_Period 2.0521606991779366 ns/iter 1.8825247508224847 ns/iter 1.09
Regex_Caret_Period_Plus_Dollar 2.4371348035374303 ns/iter 1.9015837522468932 ns/iter 1.28
Regex_Caret_Group_Period_Plus_Group_Dollar 2.03678016439832 ns/iter 1.8902626676425072 ns/iter 1.08
Regex_Caret_Period_Asterisk_Dollar 1.786334985753373 ns/iter 1.5701413612268826 ns/iter 1.14
Regex_Caret_Group_Period_Asterisk_Group_Dollar 1.711683057968091 ns/iter 1.5733172588936801 ns/iter 1.09
Regex_Caret_X_Hyphen 7.134352031312454 ns/iter 6.585900479371276 ns/iter 1.08
Regex_Period_Md_Dollar 78.93839819181102 ns/iter 67.57618586760034 ns/iter 1.17
Regex_Caret_Slash_Period_Asterisk 7.353400839082598 ns/iter 5.016110988008529 ns/iter 1.47
Regex_Caret_Period_Range_Dollar 2.626148495250189 ns/iter 2.193084784589622 ns/iter 1.20
Regex_Nested_Backtrack 820.4259600058755 ns/iter 728.014448737903 ns/iter 1.13

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/gcc)

Benchmark suite Current: 1827a68 Previous: 2ce51f8 Ratio
Regex_Lower_S_Or_Upper_S_Asterisk 3.1249287837788944 ns/iter 3.770786870485235 ns/iter 0.83
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 3.119018875195122 ns/iter 3.735314059678621 ns/iter 0.84
Regex_Period_Asterisk 2.8015428769508715 ns/iter 3.727509428353618 ns/iter 0.75
Regex_Group_Period_Asterisk_Group 2.798729127164951 ns/iter 3.7295070585594408 ns/iter 0.75
Regex_Period_Plus 3.420409035207511 ns/iter 3.728761856055969 ns/iter 0.92
Regex_Period 3.729862031355302 ns/iter 3.728782825792606 ns/iter 1.00
Regex_Caret_Period_Plus_Dollar 3.420930968479916 ns/iter 3.7316578515933205 ns/iter 0.92
Regex_Caret_Group_Period_Plus_Group_Dollar 3.7304511730561876 ns/iter 3.7284569965221452 ns/iter 1.00
Regex_Caret_Period_Asterisk_Dollar 4.351715551275188 ns/iter 3.737368029768578 ns/iter 1.16
Regex_Caret_Group_Period_Asterisk_Group_Dollar 4.037628557356127 ns/iter 3.72916489460547 ns/iter 1.08
Regex_Caret_X_Hyphen 13.047107498504454 ns/iter 12.43627743188768 ns/iter 1.05
Regex_Period_Md_Dollar 90.095425579982 ns/iter 89.74251022654886 ns/iter 1.00
Regex_Caret_Slash_Period_Asterisk 7.152522430265397 ns/iter 7.467573820468832 ns/iter 0.96
Regex_Caret_Period_Range_Dollar 4.042552699930239 ns/iter 4.352194698386374 ns/iter 0.93
Regex_Nested_Backtrack 820.6497638430028 ns/iter 829.1964580087097 ns/iter 0.99

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (windows/msvc)

Benchmark suite Current: 1827a68 Previous: 2ce51f8 Ratio
Regex_Lower_S_Or_Upper_S_Asterisk 6.5684910714287925 ns/iter 7.0940446428569 ns/iter 0.93
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 6.843912946428483 ns/iter 7.175428571428403 ns/iter 0.95
Regex_Period_Asterisk 6.660643973214673 ns/iter 6.967736607142529 ns/iter 0.96
Regex_Group_Period_Asterisk_Group 6.949111607143114 ns/iter 6.954904017858001 ns/iter 1.00
Regex_Period_Plus 7.109005580356557 ns/iter 7.5241127232139595 ns/iter 0.94
Regex_Period 6.894577008929354 ns/iter 7.3141104910717 ns/iter 0.94
Regex_Caret_Period_Plus_Dollar 7.207696428571506 ns/iter 7.178859342950812 ns/iter 1.00
Regex_Caret_Group_Period_Plus_Group_Dollar 7.089810267856426 ns/iter 7.439654017857527 ns/iter 0.95
Regex_Caret_Period_Asterisk_Dollar 6.510492857142733 ns/iter 7.190148437499846 ns/iter 0.91
Regex_Caret_Group_Period_Asterisk_Group_Dollar 6.6405223214286275 ns/iter 6.911563616071196 ns/iter 0.96
Regex_Caret_X_Hyphen 14.349668239510743 ns/iter 14.30859151785567 ns/iter 1.00
Regex_Period_Md_Dollar 148.97986611696336 ns/iter 139.81977500804607 ns/iter 1.07
Regex_Caret_Slash_Period_Asterisk 9.680611563926808 ns/iter 10.40948392857136 ns/iter 0.93
Regex_Caret_Period_Range_Dollar 7.079476562500885 ns/iter 7.639246651785265 ns/iter 0.93
Regex_Nested_Backtrack 590.8766964286169 ns/iter 601.5742857142317 ns/iter 0.98

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/llvm)

Benchmark suite Current: 1827a68 Previous: 2ce51f8 Ratio
Regex_Lower_S_Or_Upper_S_Asterisk 2.2499235959921773 ns/iter 2.199307607881867 ns/iter 1.02
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 2.2060396860885505 ns/iter 2.1968042504597416 ns/iter 1.00
Regex_Period_Asterisk 2.208958779685412 ns/iter 2.197131998395807 ns/iter 1.01
Regex_Group_Period_Asterisk_Group 2.189405208109492 ns/iter 2.222642229197202 ns/iter 0.99
Regex_Period_Plus 2.4854403152427045 ns/iter 2.486941810793137 ns/iter 1.00
Regex_Period 2.4893515990261252 ns/iter 2.487411261677186 ns/iter 1.00
Regex_Caret_Period_Plus_Dollar 2.4858889317818442 ns/iter 2.4932047878982577 ns/iter 1.00
Regex_Caret_Group_Period_Plus_Group_Dollar 2.486585320665194 ns/iter 2.4881038443075294 ns/iter 1.00
Regex_Caret_Period_Asterisk_Dollar 3.4203180200805154 ns/iter 3.419975495537027 ns/iter 1.00
Regex_Caret_Group_Period_Asterisk_Group_Dollar 3.4180964722911624 ns/iter 3.4183240475531855 ns/iter 1.00
Regex_Caret_X_Hyphen 12.532071199785058 ns/iter 13.058553294204932 ns/iter 0.96
Regex_Period_Md_Dollar 73.21780860447687 ns/iter 73.44419609288495 ns/iter 1.00
Regex_Caret_Slash_Period_Asterisk 7.149685542982376 ns/iter 7.158550399903598 ns/iter 1.00
Regex_Caret_Period_Range_Dollar 3.730564743668316 ns/iter 4.041534228320043 ns/iter 0.92
Regex_Nested_Backtrack 494.39881138612026 ns/iter 474.83861242869085 ns/iter 1.04

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (linux/gcc)

Benchmark suite Current: 1827a68 Previous: 2ce51f8 Ratio
Regex_Lower_S_Or_Upper_S_Asterisk 3.1174134557305466 ns/iter 3.770786870485235 ns/iter 0.83
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 3.1151028120611066 ns/iter 3.735314059678621 ns/iter 0.83
Regex_Period_Asterisk 2.796373340332538 ns/iter 3.727509428353618 ns/iter 0.75
Regex_Group_Period_Asterisk_Group 2.796204388117546 ns/iter 3.7295070585594408 ns/iter 0.75
Regex_Period_Plus 3.4199081244947207 ns/iter 3.728761856055969 ns/iter 0.92
Regex_Period 3.731521728928112 ns/iter 3.728782825792606 ns/iter 1.00
Regex_Caret_Period_Plus_Dollar 3.4202277649928323 ns/iter 3.7316578515933205 ns/iter 0.92
Regex_Caret_Group_Period_Plus_Group_Dollar 3.7328730782460786 ns/iter 3.7284569965221452 ns/iter 1.00
Regex_Caret_Period_Asterisk_Dollar 4.351309184848928 ns/iter 3.737368029768578 ns/iter 1.16
Regex_Caret_Group_Period_Asterisk_Group_Dollar 4.041458569886005 ns/iter 3.72916489460547 ns/iter 1.08
Regex_Caret_X_Hyphen 13.051231052360245 ns/iter 12.43627743188768 ns/iter 1.05
Regex_Period_Md_Dollar 92.96953215671292 ns/iter 89.74251022654886 ns/iter 1.04
Regex_Caret_Slash_Period_Asterisk 7.150552286374502 ns/iter 7.467573820468832 ns/iter 0.96
Regex_Caret_Period_Range_Dollar 3.5754496964930325 ns/iter 4.352194698386374 ns/iter 0.82
Regex_Nested_Backtrack 914.056690844017 ns/iter 829.1964580087097 ns/iter 1.10

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (macos/llvm)

Benchmark suite Current: 1827a68 Previous: 2ce51f8 Ratio
Regex_Lower_S_Or_Upper_S_Asterisk 1.6820928349963942 ns/iter 1.5700487812441373 ns/iter 1.07
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 1.6841538556789666 ns/iter 1.5831157713256598 ns/iter 1.06
Regex_Period_Asterisk 1.6814032703079635 ns/iter 1.6137278910273962 ns/iter 1.04
Regex_Group_Period_Asterisk_Group 1.686648928007517 ns/iter 1.567847283893261 ns/iter 1.08
Regex_Period_Plus 2.0955225869682144 ns/iter 1.8859542892340735 ns/iter 1.11
Regex_Period 2.372724226203843 ns/iter 1.8825247508224847 ns/iter 1.26
Regex_Caret_Period_Plus_Dollar 2.2704014096279033 ns/iter 1.9015837522468932 ns/iter 1.19
Regex_Caret_Group_Period_Plus_Group_Dollar 2.2705007399860495 ns/iter 1.8902626676425072 ns/iter 1.20
Regex_Caret_Period_Asterisk_Dollar 1.8234157881316468 ns/iter 1.5701413612268826 ns/iter 1.16
Regex_Caret_Group_Period_Asterisk_Group_Dollar 1.7834759081846323 ns/iter 1.5733172588936801 ns/iter 1.13
Regex_Caret_X_Hyphen 7.664126007106659 ns/iter 6.585900479371276 ns/iter 1.16
Regex_Period_Md_Dollar 83.70844307358703 ns/iter 67.57618586760034 ns/iter 1.24
Regex_Caret_Slash_Period_Asterisk 6.36774776376604 ns/iter 5.016110988008529 ns/iter 1.27
Regex_Caret_Period_Range_Dollar 2.562488044850213 ns/iter 2.193084784589622 ns/iter 1.17
Regex_Nested_Backtrack 815.5803431345256 ns/iter 728.014448737903 ns/iter 1.12

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark (windows/msvc)

Benchmark suite Current: 1827a68 Previous: 2ce51f8 Ratio
Regex_Lower_S_Or_Upper_S_Asterisk 6.610266741071362 ns/iter 7.0940446428569 ns/iter 0.93
Regex_Caret_Lower_S_Or_Upper_S_Asterisk_Dollar 6.871774553571797 ns/iter 7.175428571428403 ns/iter 0.96
Regex_Period_Asterisk 6.623035714285257 ns/iter 6.967736607142529 ns/iter 0.95
Regex_Group_Period_Asterisk_Group 6.763704464285679 ns/iter 6.954904017858001 ns/iter 0.97
Regex_Period_Plus 7.163338169642636 ns/iter 7.5241127232139595 ns/iter 0.95
Regex_Period 6.978045758928734 ns/iter 7.3141104910717 ns/iter 0.95
Regex_Caret_Period_Plus_Dollar 6.836851562500103 ns/iter 7.178859342950812 ns/iter 0.95
Regex_Caret_Group_Period_Plus_Group_Dollar 7.071349522002914 ns/iter 7.439654017857527 ns/iter 0.95
Regex_Caret_Period_Asterisk_Dollar 6.6464732142864795 ns/iter 7.190148437499846 ns/iter 0.92
Regex_Caret_Group_Period_Asterisk_Group_Dollar 6.597947544643949 ns/iter 6.911563616071196 ns/iter 0.95
Regex_Caret_X_Hyphen 12.02299285714251 ns/iter 14.30859151785567 ns/iter 0.84
Regex_Period_Md_Dollar 149.73754464285042 ns/iter 139.81977500804607 ns/iter 1.07
Regex_Caret_Slash_Period_Asterisk 9.751679687498793 ns/iter 10.40948392857136 ns/iter 0.94
Regex_Caret_Period_Range_Dollar 6.91976648696527 ns/iter 7.639246651785265 ns/iter 0.91
Regex_Nested_Backtrack 590.7044642856566 ns/iter 601.5742857142317 ns/iter 0.98

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.