Skip to content

Commit

Permalink
use direct methods
Browse files Browse the repository at this point in the history
  • Loading branch information
artv3 committed Feb 4, 2025
1 parent 5a2b824 commit 9d810b6
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 27 deletions.
52 changes: 26 additions & 26 deletions src/apps/DIFFUSION3DPA-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace rajaperf {
namespace apps {

template < size_t block_size >
__launch_bounds__(block_size)
__launch_bounds__(block_size)
__global__ void Diffusion3DPA(const Real_ptr Basis,
const Real_ptr dBasis, const Real_ptr D,
const Real_ptr X, Real_ptr Y, bool symmetric) {
Expand All @@ -32,73 +32,73 @@ __global__ void Diffusion3DPA(const Real_ptr Basis,

DIFFUSION3DPA_0_GPU;

GPU_FOREACH_THREAD(dz, z, DPA_D1D) {
GPU_FOREACH_THREAD(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD(dx, x, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(dz, z, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(dx, x, DPA_D1D) {
DIFFUSION3DPA_1;
}
}
}

if (threadIdx.z == 0) {
GPU_FOREACH_THREAD(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD(qx, x, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(qx, x, DPA_Q1D) {
DIFFUSION3DPA_2;
}
}
}
__syncthreads();
GPU_FOREACH_THREAD(dz, z, DPA_D1D) {
GPU_FOREACH_THREAD(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD(qx, x, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(dz, z, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(qx, x, DPA_Q1D) {
DIFFUSION3DPA_3;
}
}
}
__syncthreads();
GPU_FOREACH_THREAD(dz, z, DPA_D1D) {
GPU_FOREACH_THREAD(qy, y, DPA_Q1D) {
GPU_FOREACH_THREAD(qx, x, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(dz, z, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(qy, y, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(qx, x, DPA_Q1D) {
DIFFUSION3DPA_4;
}
}
}
__syncthreads();
GPU_FOREACH_THREAD(qz, z, DPA_Q1D) {
GPU_FOREACH_THREAD(qy, y, DPA_Q1D) {
GPU_FOREACH_THREAD(qx, x, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(qz, z, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(qy, y, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(qx, x, DPA_Q1D) {
DIFFUSION3DPA_5;
}
}
}
__syncthreads();
if (threadIdx.z == 0) {
GPU_FOREACH_THREAD(d, y, DPA_D1D) {
GPU_FOREACH_THREAD(q, x, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(d, y, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(q, x, DPA_Q1D) {
DIFFUSION3DPA_6;
}
}
}
__syncthreads();
GPU_FOREACH_THREAD(qz, z, DPA_Q1D) {
GPU_FOREACH_THREAD(qy, y, DPA_Q1D) {
GPU_FOREACH_THREAD(dx, x, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(qz, z, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(qy, y, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(dx, x, DPA_D1D) {
DIFFUSION3DPA_7;
}
}
}
__syncthreads();
GPU_FOREACH_THREAD(qz, z, DPA_Q1D) {
GPU_FOREACH_THREAD(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD(dx, x, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(qz, z, DPA_Q1D) {
GPU_FOREACH_THREAD_DIRECT(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(dx, x, DPA_D1D) {
DIFFUSION3DPA_8;
}
}
}
__syncthreads();
GPU_FOREACH_THREAD(dz, z, DPA_D1D) {
GPU_FOREACH_THREAD(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD(dx, x, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(dz, z, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(dy, y, DPA_D1D) {
GPU_FOREACH_THREAD_DIRECT(dx, x, DPA_D1D) {
DIFFUSION3DPA_9;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/apps/FEM_MACROS.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP)
#define GPU_FOREACH_THREAD_DIRECT(i, k, N) \
if(int i = threadIdx.k; i < N)
if(int i = threadIdx.k; i < N)
#endif

#if defined(RAJA_ENABLE_SYCL)
Expand Down

0 comments on commit 9d810b6

Please sign in to comment.