Skip to content

Commit

Permalink
[GPU] Add check to fallback to permute_ref if different format for on…
Browse files Browse the repository at this point in the history
…eDNN (openvinotoolkit#27402)

### Details:
If the input and output format are not the same in permute before oneDNN
convolution, permute_kernel_f_y_axes doesn't support it. Need to
fallback to permute_ref


### Tickets:
 - *CVS-155933*
  • Loading branch information
clee30 authored Nov 8, 2024
1 parent b416fb0 commit fb5b5ed
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ bool PermuteKernel_f_y_axes::Validate(const Params& p) const {
const auto& params = dynamic_cast<const permute_params&>(p);
const auto& in = params.inputs[0];
const auto in_layout = in.GetLayout();
const auto& out = params.outputs[0];
const auto& out_layout = out.GetLayout();

const auto feature_div = GetDivisor(in.Feature().v);
const auto y_div = GetDivisor(in.Y().v);
Expand All @@ -227,6 +229,10 @@ bool PermuteKernel_f_y_axes::Validate(const Params& p) const {
return false;
}

if (in_layout != out_layout) {
return false;
}

// Accept only supported blocked layouts and SIMD sizes.
if (!SimpleLayout(in_layout)) {
const auto feature_block_size = GetFeatureBlockSize(params);
Expand Down
70 changes: 70 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/permute_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2175,6 +2175,76 @@ TEST(permute_gpu_f32_dynamic, bfyx_0_2_3_1) {
}
}

TEST(permute_f_y_axes_fallback, b_fs_yx_fsv16) {
constexpr size_t array_size = 128;

auto& engine = get_test_engine();
if (!engine.get_device_info().supports_immad)
return;

auto input_layout_static = layout{ov::PartialShape{1, 8, 16, 1}, data_types::f32, format::bfyx};
auto input = engine.allocate_memory(input_layout_static);

std::vector<float> input_data;
input_data.reserve(array_size);
for (size_t i = 0; i < array_size; ++i)
input_data.push_back(static_cast<float>(i));

auto weights = engine.allocate_memory({ data_types::f32, format::bfyx, { 8, 16, 1, 1 } });

std::vector<float> weights_data;
weights_data.reserve(array_size);
for (size_t i = 0; i < array_size; ++i)
weights_data.push_back(static_cast<float>(1.0));

set_values(weights, weights_data);
set_values(input, input_data);

auto impl_desc_onednn = ov::intel_gpu::ImplementationDesc{format::b_fs_yx_fsv16, "", impl_types::onednn};
auto impl_forcing_map = ov::intel_gpu::ImplForcingMap{{"conv", impl_desc_onednn}};

topology topology;
topology.add(input_layout("input", input_layout_static));
topology.add(permute("permute", input_info("input"), { 0, 2, 1, 3 }));
topology.add(data("weights", weights));
topology.add(convolution("conv", input_info("permute"), "weights", "", 1, {1,1}, {1,1}, {0,0}, {0,0}, false));

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::force_implementations(impl_forcing_map));

network network(engine, topology, config);
network.set_input_data("input", input);
auto outputs = network.execute();
ASSERT_EQ(outputs.size(), size_t(1));
ASSERT_EQ(outputs.begin()->first, "conv");

auto output = outputs.begin()->second.get_memory();

float answers[] = {
120.f, 120.f, 120.f, 120.f, 120.f, 120.f, 120.f, 120.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
376.f, 376.f, 376.f, 376.f, 376.f, 376.f, 376.f, 376.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
632.f, 632.f, 632.f, 632.f, 632.f, 632.f, 632.f, 632.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
888.f, 888.f, 888.f, 888.f, 888.f, 888.f, 888.f, 888.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1144.f, 1144.f, 1144.f, 1144.f, 1144.f, 1144.f, 1144.f, 1144.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1400.f, 1400.f, 1400.f, 1400.f, 1400.f, 1400.f, 1400.f, 1400.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1656.f, 1656.f, 1656.f, 1656.f, 1656.f, 1656.f, 1656.f, 1656.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
1912.f, 1912.f, 1912.f, 1912.f, 1912.f, 1912.f, 1912.f, 1912.f,
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
};

cldnn::mem_lock<float> output_ptr(output, get_test_stream());
for (size_t i = 0; i < array_size; i++) {
ASSERT_FLOAT_EQ(answers[i], output_ptr[i]);
}
}

class permute_bfzyx_to_bfyxz: public TiledPermuteTest {};

INSTANTIATE_TEST_SUITE_P(, permute_bfzyx_to_bfyxz,
Expand Down

0 comments on commit fb5b5ed

Please sign in to comment.