Skip to content

Commit

Permalink
Added barriers to nvidia and amd kernels: local inside the main loop …
Browse files Browse the repository at this point in the history
…and global at the end after writing the result

refs #12
  • Loading branch information
olepoeschl committed Jun 21, 2023
1 parent de58953 commit 16f92a4
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/main/resources/kernels.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ kernel void nqfaf_nvidia(global int *ld_arr, global int *rd_arr, global int *col
ld_mem >>= 1;
rd_mem <<= 1;
}
barrier(CLK_LOCAL_MEM_FENCE);
free = ~(jkl_queens[row] | ld | rd | col); // calculate the occupancy of the next row
free &= ~(queen + direction-1); // occupy all bits right from the last queen in order to not place the same queen again
col ^= queen; // free up the column AFTER calculating free in order to not place the same queen again
Expand All @@ -103,6 +104,7 @@ kernel void nqfaf_nvidia(global int *ld_arr, global int *rd_arr, global int *col
solutions++;
}
result[g_id] = solutions; // number of solutions of the work item
barrier(CLK_GLOBAL_MEM_FENCE);
}

// AMD kernel
Expand Down Expand Up @@ -194,13 +196,15 @@ kernel void nqfaf_amd(global int *ld_arr, global int *rd_arr, global int *col_ar
ld_mem >>= 1;
rd_mem <<= 1;
}
barrier(CLK_LOCAL_MEM_FENCE);
free = ~(jkl_queens[row] | ld | rd | col); // calculate the occupancy of the next row
free &= ~(queen + direction-1); // occupy all bits right from the last queen in order to not place the same queen again
col ^= queen; // free up the column AFTER calculating free in order to not place the same queen again

solutions += (row == N-1); // increase the solutions, if we are in the last row
}
result[g_id] = solutions; // number of solutions of the work item
barrier(CLK_GLOBAL_MEM_FENCE);
}

// Intel kernel
Expand Down

0 comments on commit 16f92a4

Please sign in to comment.