-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable XeTLA LSTM for GPU #28817
Open
jkasprza
wants to merge
13
commits into
openvinotoolkit:master
Choose a base branch
from
jkasprza:enable_xetla
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+16,313
−7
Open
Enable XeTLA LSTM for GPU #28817
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
4507c03
Add XeTLA source
jkasprza 04a03f5
Enable XeTLA LSTM implementation
jkasprza b348dbe
Merge branch 'openvinotoolkit:master' into enable_xetla
jkasprza 8333fb2
Change lstm shape dimensions to size_t
jkasprza 843fa75
Avoid type conversion when calculating lws, gws
jkasprza 29e4a0e
Fix num_dir index
jkasprza 928acaa
Merge branch 'openvinotoolkit:master' into enable_xetla
jkasprza 136c16e
Change to cmake copy_directory
jkasprza e1b7113
Remove extra rm command
jkasprza a9eefba
Revert changes in LSTM test setup
jkasprza 9b9f929
Improve kernel performance
jkasprza a508078
Merge branch 'openvinotoolkit:master' into enable_xetla
jkasprza 4fcf716
Update copyright notice to 2025
jkasprza File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
// Copyright (C) 2018-2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "impls/ocl/primitive_base.hpp" | ||
|
||
#include "lstm_seq.hpp" | ||
#include "lstm/cm/lstm_seq_cm_kernel_selector.h" | ||
#include "lstm/lstm_kernel_base.h" | ||
#include "openvino/op/lstm_sequence.hpp" | ||
#include "impls/registry/implementation_manager.hpp" | ||
|
||
namespace cldnn { | ||
namespace cm { | ||
|
||
struct lstm_seq_impl : ocl::typed_primitive_impl_ocl<lstm_seq> { | ||
using parent = ocl::typed_primitive_impl_ocl<lstm_seq>; | ||
using parent::parent; | ||
using kernel_selector_t = kernel_selector::lstm_seq_cm_kernel_selector; | ||
using kernel_params_t = kernel_selector::lstm_params; | ||
|
||
DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cm::lstm_seq_impl) | ||
|
||
std::unique_ptr<primitive_impl> clone() const override { | ||
return std::make_unique<lstm_seq_impl>(*this); | ||
} | ||
|
||
protected: | ||
kernel_arguments_data get_arguments(const typed_primitive_inst<lstm_seq>& instance) const override { | ||
kernel_arguments_data args; | ||
for (size_t i = 0; i < instance.inputs_memory_count(); i++) { | ||
args.inputs.push_back(instance.input_memory_ptr(i)); | ||
} | ||
|
||
for (size_t i = 0; i < instance.outputs_memory_count(); i++) { | ||
args.outputs.push_back(instance.output_memory_ptr(i)); | ||
} | ||
return args; | ||
} | ||
|
||
public: | ||
static kernel_params_t get_kernel_params(const kernel_impl_params& impl_param) { | ||
const auto& primitive = impl_param.typed_desc<lstm_seq>(); | ||
auto params = get_default_params<kernel_selector::lstm_params>(impl_param); | ||
for (size_t i = 1; i < impl_param.input_layouts.size(); ++i) { | ||
params.inputs.push_back(convert_data_tensor(impl_param.get_input_layout(i))); | ||
} | ||
|
||
if (!primitive->activations.empty()) { | ||
auto a_sz = primitive->activations.size(); | ||
auto param_sz = primitive->activation_params.size(); | ||
OPENVINO_ASSERT(param_sz == 0|| a_sz == param_sz, "[GPU] Unexpected activation params count in lstm_seq impl: ", param_sz); | ||
for (size_t i = 0; i < a_sz; i++) { | ||
params.activations.emplace_back(get_kernel_selector_activation_param(primitive->activations[i]), | ||
param_sz ? primitive->activation_params[i].a : 0.0f, | ||
param_sz ? primitive->activation_params[i].b : 0.0f); | ||
} | ||
} | ||
|
||
if (primitive->clip > 0.0f) { | ||
params.activations.emplace_back(get_kernel_selector_activation_param(activation_func::clamp), -primitive->clip, primitive->clip); | ||
} | ||
|
||
params.SetOffsetOrder(static_cast<int32_t>(primitive->offset_order)); | ||
params.clip = primitive->clip; | ||
params.direction = primitive->direction; | ||
//Legacy multi-output | ||
params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[1])); | ||
if (!primitive->initial_cell_state.pid.empty()) { | ||
params.outputs.push_back(convert_data_tensor(impl_param.input_layouts[1])); | ||
} | ||
return params; | ||
} | ||
|
||
static kernel_impl_params static_canonicalize_shapes(const kernel_impl_params& impl_params) { | ||
if (impl_params.get_input_layout().get_partial_shape().size() != 3) { | ||
return primitive_impl::static_canonicalize_shapes(impl_params); | ||
} | ||
auto updated_impl_params = canonicalize_fused_shapes(impl_params); | ||
return updated_impl_params; | ||
} | ||
|
||
kernel_impl_params canonicalize_shapes(const kernel_impl_params& impl_params) const override { | ||
return static_canonicalize_shapes(impl_params); | ||
} | ||
}; | ||
|
||
std::unique_ptr<primitive_impl> LSTMSeqImplementationManager::create_impl(const program_node& node, const kernel_impl_params& params) const { | ||
OPENVINO_ASSERT(node.is_type<lstm_seq>()); | ||
return ocl::typed_primitive_impl_ocl<lstm_seq>::create<lstm_seq_impl>(static_cast<const lstm_seq_node&>(node), params); | ||
} | ||
|
||
} // namespace cm | ||
} // namespace cldnn | ||
|
||
BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cm::lstm_seq_impl) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
// Copyright (C) 2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include <memory> | ||
|
||
#include "impls/registry/implementation_manager.hpp" | ||
#include "intel_gpu/runtime/layout.hpp" | ||
#include "lstm_seq_inst.h" | ||
namespace cldnn { | ||
namespace cm { | ||
|
||
struct LSTMSeqImplementationManager : public ImplementationManager { | ||
OV_GPU_PRIMITIVE_IMPL("cm::lstm_seq") | ||
LSTMSeqImplementationManager(shape_types shape_type, ValidateFunc vf = nullptr) | ||
: ImplementationManager(impl_types::cm, shape_type, vf) {} | ||
|
||
in_out_fmts_t query_formats(const program_node& node) const override { | ||
assert(node.is_type<lstm_seq>()); | ||
std::vector<format::type> in_fmts(node.get_dependencies().size(), format::any); | ||
std::vector<format::type> out_fmts(node.get_outputs_count(), format::any); | ||
|
||
for (size_t idx = 0; idx < node.get_dependencies().size(); idx++) { | ||
in_fmts[idx] = format::bfyx; | ||
} | ||
out_fmts[0] = format::byfx; | ||
for (size_t idx = 1; idx < node.get_outputs_count(); idx++) { | ||
out_fmts[idx] = format::bfyx; | ||
} | ||
|
||
return {in_fmts, out_fmts}; | ||
} | ||
|
||
std::unique_ptr<primitive_impl> create_impl(const program_node& node, | ||
const kernel_impl_params& params) const override; | ||
|
||
bool validate_impl(const program_node& node) const override { | ||
assert(node.is_type<lstm_seq>()); | ||
|
||
auto &engine = node.get_program().get_engine(); | ||
auto &config = node.get_program().get_config(); | ||
const auto& info = engine.get_device_info(); | ||
|
||
// XeTLA LSTM optimized for Xe2 architectures | ||
if (!check_cm_jit_support(engine, config) || info.arch != gpu_arch::xe2) { | ||
return false; | ||
} | ||
|
||
const auto& lstm_node = node.as<lstm_seq>(); | ||
const auto& lstm_prim = lstm_node.get_primitive(); | ||
if (lstm_prim->clip > 0.0f) { | ||
return false; | ||
} | ||
|
||
if (lstm_prim->activations.size() != 3 || | ||
lstm_prim->activations[0] != activation_func::logistic || | ||
lstm_prim->activations[1] != activation_func::hyperbolic_tan || | ||
lstm_prim->activations[2] != activation_func::hyperbolic_tan) { | ||
return false; | ||
} | ||
|
||
auto in_layouts = node.get_input_layouts(); | ||
unsigned int expected_inputs = 7; | ||
if (in_layouts.size() != expected_inputs) { | ||
return false; | ||
} | ||
{ | ||
auto &seq_lengths = in_layouts[expected_inputs-1]; | ||
if (seq_lengths.format != format::bfyx || seq_lengths.data_type != data_types::i32) { | ||
return false; | ||
} | ||
in_layouts.pop_back(); | ||
} | ||
|
||
|
||
auto out_layouts = node.get_output_layouts(); | ||
for (auto &layout : in_layouts) { | ||
if (layout.format != format::bfyx || layout.data_type != data_types::f16) { | ||
return false; | ||
} | ||
} | ||
for (auto &layout : out_layouts) { | ||
if (layout.data_type != data_types::f16) { | ||
return false; | ||
} | ||
} | ||
|
||
auto num_gates = 4; | ||
auto batch_size = in_layouts[0].get_dim(0); | ||
auto input_size = in_layouts[0].get_dim(2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This |
||
auto hidden_size = in_layouts[3].get_dim(1) / num_gates; | ||
auto num_dir = in_layouts[3].get_dim(0); | ||
if (hidden_size != 128 || batch_size != 1 || num_dir != 2 || (input_size != 64 && input_size != 256)) { | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
}; | ||
|
||
} // namespace cm | ||
} // namespace cldnn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
5 changes: 5 additions & 0 deletions
5
src/plugins/intel_gpu/src/kernel_selector/cm_kernels/include/batch_headers/cm_xetla.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
// This file is to be preprocessed by the cpp compiler before adding to batch headers db for cm | ||
|
||
#define XETLA_CODE_BASE __CM__ | ||
#define XETLA_NO_CM_INCLUDE | ||
#include <xetla.hpp> |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please check that none of the input/output tensors have dynamic shapes. Otherwise this call will trigger the exception