Skip to content

Commit

Permalink
Adjust the launch bounds to get_json_object to avoid spilling (#2015)
Browse files Browse the repository at this point in the history
Signed-off-by: Robert (Bobby) Evans <bobby@apache.org>
  • Loading branch information
revans2 authored May 7, 2024
1 parent dcaf803 commit 980633d
Show file tree
Hide file tree
Showing 3 changed files with 410 additions and 470 deletions.
26 changes: 15 additions & 11 deletions src/main/cpp/src/get_json_object.cu
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,6 @@ class json_generator {
}
}

__device__ void reset() { output_len = 0; }

__device__ inline size_t get_output_len() const { return output_len; }
__device__ inline char* get_output_start_position() const { return output; }
__device__ inline char* get_current_output_position() const { return output + output_len; }
Expand Down Expand Up @@ -371,7 +369,6 @@ __device__ bool evaluate_path(json_parser& p,
write_style style;

cudf::device_span<path_instruction const> path;

// is this context task is done
bool task_is_done;

Expand Down Expand Up @@ -840,13 +837,12 @@ rmm::device_uvector<path_instruction> construct_path_commands(
* @returns A pair containing the result code and the output buffer.
*/
__device__ thrust::pair<bool, size_t> get_json_object_single(
char const* input,
cudf::size_type input_len,
char_range input,
cudf::device_span<path_instruction const> path_commands,
char* out_buf,
size_t out_buf_size)
{
json_parser j_parser(input, input_len);
json_parser j_parser(input);
j_parser.next_token();
// JSON validation check
if (json_token::ERROR == j_parser.get_current_token()) { return {false, 0}; }
Expand Down Expand Up @@ -878,16 +874,24 @@ __device__ thrust::pair<bool, size_t> get_json_object_single(
* (chars and validity).
*
* @param col Device view of the incoming string
* @param commands JSONPath command buffer
* @param path_commands JSONPath command buffer
* @param d_sizes a buffer used to write the output sizes in the first pass,
* and is read back in on the second pass to compute offsets.
* @param output_offsets Buffer used to store the string offsets for the results
* of the query
* @param out_buf Buffer used to store the results of the query
* @param out_validity Output validity buffer
* @param out_valid_count Output count of # of valid bits
* @param options Options controlling behavior
*/
template <int block_size>
__launch_bounds__(block_size) CUDF_KERNEL
// We have 1 for the minBlocksPerMultiprocessor in the launch bounds to avoid spilling from
// the kernel itself. By default NVCC uses a heuristic to find a balance between the
// maximum number of registers used by a kernel and the parallelism of the kernel.
// If lots of registers are used the parallelism may suffer. But in our case
// NVCC gets this wrong and we want to avoid spilling all the time or else
// the performance is really bad. This essentially tells NVCC to prefer using lots
// of registers over spilling.
__launch_bounds__(block_size, 1) CUDF_KERNEL
void get_json_object_kernel(cudf::column_device_view col,
cudf::device_span<path_instruction const> path_commands,
cudf::size_type* d_sizes,
Expand All @@ -911,8 +915,8 @@ __launch_bounds__(block_size) CUDF_KERNEL
out_buf != nullptr ? output_offsets[tid + 1] - output_offsets[tid] : 0;

// process one single row
auto [result, output_size] = get_json_object_single(
str.data(), str.size_bytes(), {path_commands.data(), path_commands.size()}, dst, dst_size);
auto [result, output_size] =
get_json_object_single(str, {path_commands.data(), path_commands.size()}, dst, dst_size);
if (result) { is_valid = true; }

// filled in only during the precompute step. during the compute step, the
Expand Down
Loading

0 comments on commit 980633d

Please sign in to comment.