diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 4a97f0251..0c95c1374 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -30,6 +30,10 @@ cmake_policy(SET CMP0112 NEW) add_subdirectory(util) +if (CUTLASS_ENABLE_SYCL) + add_subdirectory(copy_debug) +endif() + if (CUTLASS_ENABLE_LIBRARY) add_subdirectory(library) endif() diff --git a/tools/copy_debug/CMakeLists.txt b/tools/copy_debug/CMakeLists.txt new file mode 100644 index 000000000..628ff774c --- /dev/null +++ b/tools/copy_debug/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (c) 2024 - 2025 Codeplay Software Ltd. All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +if (CUTLASS_ENABLE_SYCL AND SYCL_INTEL_TARGET) + cutlass_add_executable( + copy_debug + copy_debug.cpp + ) + + target_link_libraries( + copy_debug + PRIVATE + CUTLASS + cutlass_tools_util_includes + ) + + add_sycl_to_target(TARGET copy_debug) + +endif() diff --git a/tools/copy_debug/copy_debug.cpp b/tools/copy_debug/copy_debug.cpp new file mode 100644 index 000000000..31702ac8c --- /dev/null +++ b/tools/copy_debug/copy_debug.cpp @@ -0,0 +1,132 @@ +/*************************************************************************************************** + * Copyright (c) 2025 - 2025 Codeplay Software Ltd. All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ + +#include +#include +#include + +#include +#include +#include + +using namespace syclcompat::experimental; +using namespace cute; + +#define SUBGROUP_SIZE (16) + +template +void copy_kernel(TensorS S) { + using namespace cute; + using Element = typename TensorS::value_type; + + // initialization + if(thread(0)){ + for(int i=0; i(i); + } + } + syncthreads(); + + using CopyThreadShape = Shape<_1, Int>; + using traits_load = Copy_Traits; + using Atom_load = Copy_Atom; + auto tiled_copy_load = make_tiled_copy(Atom_load{}.with(S), + Layout{}, + make_layout(shape_div(typename traits_load::BlockShape{}, CopyThreadShape{}))); + + auto thr_copy_load = tiled_copy_load.get_slice(ThreadIdxX()); + + using actual_fragment_size = std::conditional_t, C, fragment_size>; + Tensor fragment = make_tensor(make_shape(actual_fragment_size{},_1{},_1{})); + clear(fragment); + + static_assert(actual_fragment_size::value >= Atom_load::NumValDst, "fragment is too small to hold all results!"); + Tensor fragment_copy_view = make_tensor(fragment.data(), make_shape(C{},_1{},_1{})); + auto blk_load_S = tiled_copy_load.get_pvc_tensor(make_coord(0, 0, 0),make_shape(_1{},_1{},_1{})); + copy(tiled_copy_load, blk_load_S, fragment_copy_view); + + if(thread(0)){ + print("fragment: "); print(fragment); print("\n"); + } + + for(int i=0;i(fragment(i))); print(" "); + } + } + } + if(thread(0)){ + print("\n"); + } +} + +// by default select fragment size to match copy size. This can be set manually to a bigger value as copy size might be wrong +template +void copy(int global_M, int global_N) { + using namespace cute; + + auto tensor_shape = make_shape(global_M, global_N); + int tensor_size = size(tensor_shape); + cutlass::DeviceAllocation src(tensor_size); + + Tensor tensor_S = make_tensor(make_gmem_ptr(src.get()), make_layout(tensor_shape, LayoutLeft{})); + + auto gridDim = syclcompat::dim3(SUBGROUP_SIZE); + auto blockDim = syclcompat::dim3(SUBGROUP_SIZE); + launch>( + launch_policy{gridDim, blockDim, + kernel_properties{sycl_exp::sub_group_size}}, + tensor_S); + + syclcompat::wait_and_throw(); +} + +int main(){ + // for 16b copies use integers as floating point types could lose precision for bigger indices + // for 8b copies you have to work with overflow + copy(256, 256); + copy(256, 256); + return 0; +}