Skip to content

Commit

Permalink
Merge branch 'main' into ci-for-nvhpc-debug-build
Browse files Browse the repository at this point in the history
  • Loading branch information
ericniebler authored Sep 25, 2023
2 parents 507d6cd + 9939910 commit cd1ebb4
Show file tree
Hide file tree
Showing 73 changed files with 3,254 additions and 1,635 deletions.
4 changes: 4 additions & 0 deletions .github/copy-pr-bot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Configuration file for `copy-pr-bot` GitHub App
# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/

enabled: true
4 changes: 0 additions & 4 deletions .github/ops-bot.yaml

This file was deleted.

14 changes: 12 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ target_compile_options(stdexec_executable_flags INTERFACE
-ferror-limit=0
-fmacro-backtrace-limit=0
-ftemplate-backtrace-limit=0>
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<VERSION_GREATER:$<CXX_COMPILER_VERSION>,23.3.0>>:
-ftemplate-backtrace-limit 0>
)

# # Always enable colored output
Expand All @@ -197,6 +199,14 @@ target_compile_options(stdexec_executable_flags INTERFACE
-include stdexec/__detail/__force_include.hpp>
)

# Support target for examples and tests
add_library(nvexec_executable_flags INTERFACE)

target_compile_options(nvexec_executable_flags INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-gpu=nomanaged>)
target_link_options(nvexec_executable_flags INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-gpu=nomanaged>)

# Set up nvexec library
option(STDEXEC_ENABLE_CUDA "Enable CUDA targets for non-nvc++ compilers" OFF)
if(CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC")
Expand All @@ -218,9 +228,9 @@ if(STDEXEC_ENABLE_CUDA)
target_link_libraries(nvexec INTERFACE STDEXEC::stdexec)

target_compile_options(nvexec INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=nomanaged -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
target_link_options(nvexec INTERFACE
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=nomanaged -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)
$<$<AND:$<CXX_COMPILER_ID:NVHPC>,$<COMPILE_LANGUAGE:CXX>>:-stdpar -gpu=cc${CMAKE_CUDA_ARCHITECTURES}>)

if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "NVHPC"))
include(rapids-cuda)
Expand Down
4 changes: 3 additions & 1 deletion examples/algorithms/retry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,9 @@ struct _retry_sender {
Env,
stdexec::completion_signatures<stdexec::set_error_t(std::exception_ptr)>,
_value,
_error>;
_error> {
return {};
}

template <stdexec::receiver R>
friend _op<S, R> tag_invoke(stdexec::connect_t, _retry_sender&& self, R r) {
Expand Down
4 changes: 3 additions & 1 deletion examples/algorithms/then.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ struct _then_sender {

template <class Env>
friend auto tag_invoke(stdexec::get_completion_signatures_t, _then_sender&&, Env)
-> _completions_t<Env>;
-> _completions_t<Env> {
return {};
}

// Connect:
template <class R>
Expand Down
4 changes: 4 additions & 0 deletions examples/nvexec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ add_library(nvexec_example INTERFACE)
target_include_directories(nvexec_example
INTERFACE ${CMAKE_CURRENT_LIST_DIR}
)
target_link_libraries(nvexec_example
INTERFACE nvexec_executable_flags
)

add_library(stdpar_multicore INTERFACE)
target_include_directories(stdpar_multicore
Expand Down Expand Up @@ -103,6 +106,7 @@ set(nvexec_gpu_examples
" example.nvexec.reduce : reduce.cpp"
" example.nvexec.split : split.cpp"
" example.nvexec.nvtx : nvtx.cpp"
" example.nvexec.launch : launch.cpp"
"example.nvexec.maxwell_gpu_s : maxwell_gpu_s.cpp"
"example.nvexec.maxwell_gpu_m : maxwell_gpu_m.cpp"
)
Expand Down
65 changes: 65 additions & 0 deletions examples/nvexec/launch.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright (c) 2022 NVIDIA Corporation
*
* Licensed under the Apache License Version 2.0 with LLVM Exceptions
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* https://llvm.org/LICENSE.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <nvexec/stream_context.cuh>
#include <stdexec/execution.hpp>

#include <numeric>
#include <cub/cub.cuh>

#include <thrust/device_vector.h>

constexpr std::size_t N = 2 * 1024;
constexpr std::size_t THREAD_BLOCK_SIZE = 128u;
constexpr std::size_t NUM_BLOCKS = (N + THREAD_BLOCK_SIZE - 1) / THREAD_BLOCK_SIZE;

#define scaling 2

int bench() {
std::vector<int> input(N, 0);
std::iota(input.begin(), input.end(), 1);
std::transform(input.begin(), input.end(), input.begin(), [](int i) { return i * scaling; });
return std::accumulate(input.begin(), input.end(), 0);
}

int main() {
thrust::device_vector<int> input(N, 0);
std::iota(input.begin(), input.end(), 1);
int* first = thrust::raw_pointer_cast(input.data());
int* last = first + input.size();

nvexec::stream_context stream{};

auto snd = stdexec::transfer_just(stream.get_scheduler(), first, last)
| nvexec::launch(
{NUM_BLOCKS, THREAD_BLOCK_SIZE},
[](cudaStream_t stm, int* first, int* last) {
assert(nvexec::is_on_gpu());
int32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < (last - first)) {
first[idx] *= scaling;
}
})
| stdexec::then([](int* first, int* last) {
assert(nvexec::is_on_gpu());
return std::accumulate(first, last, 0);
});

auto [result] = stdexec::sync_wait(std::move(snd)).value();

std::cout << "result: " << result << std::endl;
std::cout << "benchmark: " << bench() << std::endl;
}
Loading

0 comments on commit cd1ebb4

Please sign in to comment.