-
Notifications
You must be signed in to change notification settings - Fork 458
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
2024-04-30 nightly release (38fad23)
- Loading branch information
pytorchbot
committed
Apr 30, 2024
1 parent
8ba0eff
commit 2a03600
Showing
33 changed files
with
1,494 additions
and
593 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.glsl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#version 450 core | ||
|
||
#define PRECISION ${PRECISION} | ||
|
||
#define VEC4_T ${texel_type(DTYPE)} | ||
|
||
layout(std430) buffer; | ||
|
||
#include "indexing_utils.h" | ||
|
||
layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out; | ||
layout(set = 0, binding = 1) uniform PRECISION sampler3D existing_out; | ||
layout(set = 0, binding = 2) uniform PRECISION sampler3D image_in; | ||
|
||
layout(set = 0, binding = 3) uniform PRECISION restrict CopyArgs { | ||
ivec4 out_sizes; | ||
ivec4 in_sizes; | ||
// Analogus to range variable in copy. It defines the # of channel being | ||
// copied. | ||
int channel_range; | ||
int src_channel_offset; | ||
int dst_channel_offset; | ||
int unused; | ||
// Operates on (x, y, z) extents. | ||
ivec3 range; | ||
int unused1; | ||
ivec3 dst_offset; | ||
int unused2; | ||
}; | ||
|
||
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; | ||
|
||
layout(constant_id = 3) const int packed_dim = C_DIM; | ||
|
||
void main() { | ||
// Note: Unlike other shaders, the range is often not equal to the destination | ||
// texture extent. | ||
const ivec3 pos = ivec3(gl_GlobalInvocationID); | ||
if (any(greaterThanEqual(pos, range))) { | ||
return; | ||
} | ||
|
||
const ivec3 out_pos = pos + dst_offset; | ||
|
||
const ivec4 out_whcn = to_tensor_idx(out_pos, out_sizes, packed_dim); | ||
|
||
// First read the existing values to make sure the boundary values stay. | ||
VEC4_T v = VEC4_T(texelFetch(existing_out, out_pos, 0)); | ||
|
||
for (int i=0; i<4; i++) { | ||
ivec4 in_whcn = out_whcn; | ||
|
||
in_whcn.z = out_whcn.z - dst_channel_offset + i; | ||
|
||
// Handle the partial update for begining of channel in an existing tensor. | ||
// If the source channel index is below zero or exceeds the range, we skip | ||
// updating the element to avoid overwriting existing data. | ||
if ((in_whcn.z < 0) || (in_whcn.z >= channel_range)) { | ||
continue; | ||
} | ||
|
||
// Readjust for the source offset. | ||
in_whcn.z = in_whcn.z + src_channel_offset; | ||
|
||
ivec4 in_elem_pos = to_texture_elem_pos(in_whcn, in_sizes, packed_dim); | ||
v[i] = VEC4_T(texelFetch(image_in, in_elem_pos.xyz, 0))[in_elem_pos.w]; | ||
} | ||
|
||
imageStore(image_out, out_pos, v); | ||
} |
10 changes: 10 additions & 0 deletions
10
backends/vulkan/runtime/graph/ops/glsl/copy_channel_offset.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
copy_channel_offset: | ||
parameter_names_with_default_values: | ||
DTYPE: float | ||
NDIM: 3 | ||
generate_variant_forall: | ||
DTYPE: | ||
- VALUE: half | ||
- VALUE: float | ||
shader_variants: | ||
- NAME: copy_channel_offset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
/* | ||
* Copyright (c) Meta Platforms, Inc. and affiliates. | ||
* All rights reserved. | ||
* | ||
* This source code is licensed under the BSD-style license found in the | ||
* LICENSE file in the root directory of this source tree. | ||
*/ | ||
|
||
#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h> | ||
|
||
#include <executorch/backends/vulkan/runtime/graph/ops/impl/Copy.h> | ||
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h> | ||
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/KernelUtils.h> | ||
#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h> | ||
#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h> | ||
|
||
namespace vkcompute { | ||
|
||
void add_cat_default_node( | ||
ComputeGraph& graph, | ||
ValueRef in_list_ref, | ||
ValueRef dim_ref, | ||
ValueRef out) { | ||
ValueListPtr input_list = graph.get_value_list(in_list_ref); | ||
|
||
for (ValueRef input_ref : *input_list) { | ||
vTensorPtr t_in = graph.get_tensor(input_ref); | ||
VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked)); | ||
} | ||
|
||
int64_t dim = graph.extract_scalar<int64_t>(dim_ref); | ||
vTensorPtr t_out = graph.get_tensor(out); | ||
|
||
NchwDim nchw_dim = normalize_to_nchw_dim(*t_out, dim); | ||
|
||
// TODO: Find ways to factor out the similar code for width, height, and batch | ||
if (nchw_dim == DimWidth) { | ||
api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); | ||
api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); | ||
|
||
for (ValueRef input_ref : *input_list) { | ||
vTensorPtr t_in = graph.get_tensor(input_ref); | ||
api::utils::ivec3 range = t_in->texture_limits(); | ||
add_copy_offset_node( | ||
graph, input_ref, range, src_offset, dst_offset, out); | ||
dst_offset.data[0] += range.data[0]; | ||
} | ||
|
||
} else if (nchw_dim == DimHeight) { | ||
api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); | ||
api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); | ||
|
||
for (ValueRef input_ref : *input_list) { | ||
vTensorPtr t_in = graph.get_tensor(input_ref); | ||
api::utils::ivec3 range = t_in->texture_limits(); | ||
add_copy_offset_node( | ||
graph, input_ref, range, src_offset, dst_offset, out); | ||
dst_offset.data[1] += range.data[1]; | ||
} | ||
} else if (nchw_dim == DimBatch) { | ||
api::utils::ivec3 src_offset = api::utils::make_ivec3({0, 0, 0}, false); | ||
api::utils::ivec3 dst_offset = api::utils::make_ivec3({0, 0, 0}, false); | ||
|
||
for (ValueRef input_ref : *input_list) { | ||
vTensorPtr t_in = graph.get_tensor(input_ref); | ||
api::utils::ivec3 range = t_in->texture_limits(); | ||
add_copy_offset_node( | ||
graph, input_ref, range, src_offset, dst_offset, out); | ||
dst_offset.data[2] += range.data[2]; | ||
} | ||
} else if (nchw_dim == DimChannel) { | ||
int32_t src_offset = 0; | ||
int32_t dst_offset = 0; | ||
|
||
for (ValueRef input_ref : *input_list) { | ||
vTensorPtr t_in = graph.get_tensor(input_ref); | ||
int32_t range = dim_at<Dim4D::Channel>(t_in->sizes()); | ||
add_copy_channel_offset_node( | ||
graph, input_ref, range, src_offset, dst_offset, out); | ||
dst_offset += range; | ||
} | ||
} else { | ||
VK_THROW("Unexpected value of nchw_dim=", nchw_dim); | ||
} | ||
} | ||
|
||
void cat_default(ComputeGraph& graph, const std::vector<ValueRef>& args) { | ||
add_cat_default_node(graph, args[0], args[1], args[2]); | ||
} | ||
|
||
REGISTER_OPERATORS { | ||
VK_REGISTER_OP(aten.cat.default, cat_default); | ||
} | ||
|
||
} // namespace vkcompute |
Oops, something went wrong.