From 38aceafa031556e545f781e9d35a91e0bf5da221 Mon Sep 17 00:00:00 2001 From: Tomasz Krupa Date: Tue, 28 Jan 2025 14:09:39 +0000 Subject: [PATCH] Remove ConvertPrecision WA and use Convert op + ConstantFolding for both paths --- .../rt_info/weightless_caching_attributes.hpp | 4 +-- .../op/util/weightless_caching_attributes.cpp | 8 +---- .../include/intel_gpu/primitives/data.hpp | 30 ++++--------------- .../intel_gpu/src/plugin/program_builder.cpp | 3 +- 4 files changed, 8 insertions(+), 37 deletions(-) diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp index 6c71e644886c9d..a3708983b0aa12 100644 --- a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp +++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp @@ -10,9 +10,7 @@ namespace ov { -OPENVINO_API void copy_weightless_cache_attr(const std::shared_ptr& from, - const std::shared_ptr& to, - bool set_by_precision_conversion = false); +OPENVINO_API void copy_weightless_cache_attr(const std::shared_ptr& from, const std::shared_ptr& to); /** * @brief Holds weightless caching attributes of a single constant. diff --git a/src/core/src/op/util/weightless_caching_attributes.cpp b/src/core/src/op/util/weightless_caching_attributes.cpp index 3f1be0c8327731..1b2745ce0ae7ec 100644 --- a/src/core/src/op/util/weightless_caching_attributes.cpp +++ b/src/core/src/op/util/weightless_caching_attributes.cpp @@ -9,17 +9,11 @@ bool ov::WeightlessCacheAttribute::is_copyable() const { } OPENVINO_API void ov::copy_weightless_cache_attr(const std::shared_ptr& from, - const std::shared_ptr& to, - bool set_by_convert_precision) { + const std::shared_ptr& to) { const auto& rt_info = from->get_rt_info(); auto weightless_caching_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); if (weightless_caching_attr != rt_info.end()) { to->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] = weightless_caching_attr->second; - if (set_by_convert_precision) { - to->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] - .as() - .set_by_convert_precision = true; - } } } \ No newline at end of file diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp index db6f088947cdca..166d6ad56b01f1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp @@ -54,13 +54,12 @@ struct weightless_cache_manager { size_t original_size, ov::element::Type original_dtype, ov::element::Type curr_dtype, - ov::Shape shape, bool precision_conversion_set_by_transformation) { + ov::Shape shape) { this->bin_offset = bin_offset; this->original_size = original_size; this->original_dtype = original_dtype; this->curr_dtype = curr_dtype; this->shape = shape; - this->precision_conversion_set_by_transformation = precision_conversion_set_by_transformation; do_weightless_caching = true; if (original_dtype != curr_dtype) { @@ -155,7 +154,6 @@ struct weightless_cache_manager { ov::element::Type original_dtype = ov::element::Type_t::undefined; ov::element::Type curr_dtype = ov::element::Type_t::undefined; ov::Shape shape{}; - bool precision_conversion_set_by_transformation = false; bool should_run_transformations() { return do_precision_conversion || reorder_rep.do_reorder; @@ -193,28 +191,10 @@ struct weightless_cache_manager { ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations"); std::shared_ptr model = nullptr; - if (precision_conversion_set_by_transformation) { - results.push_back(std::make_shared(orig_constant->output(0))); - model = std::make_shared(results, inputParams, "aux"); - - - precisions_map fp_convert_precision_map = {{original_dtype, curr_dtype}}; - type_to_fuse_map empty_fuse_map = {}; - const bool keep_precision_sensitive_in_fp32 = false; - const bool convert_input_output_precision = false; - const bool store_original_precision_as_rt_attribute = true; - manager.register_pass(fp_convert_precision_map, - empty_fuse_map, - keep_precision_sensitive_in_fp32, - convert_input_output_precision, - store_original_precision_as_rt_attribute); - } else { - auto convert_op = std::make_shared(orig_constant, curr_dtype); - results.push_back(std::make_shared(convert_op->output(0))); - model = std::make_shared(results, inputParams, "aux"); - - manager.register_pass(); - } + auto convert_op = std::make_shared(orig_constant, curr_dtype); + results.push_back(std::make_shared(convert_op->output(0))); + model = std::make_shared(results, inputParams, "aux"); + manager.register_pass(); manager.run_passes(model); const auto& ops = model->get_ops(); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 8bded8ccb028d2..b1d43fe20b11ae 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -321,8 +321,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr