Remove ConvertPrecision WA and use Convert op + ConstantFolding for b…

…oth paths
openvinotoolkit · Jan 28, 2025 · 38aceaf · 38aceaf
1 parent f741706
commit 38aceaf
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 37 deletions.
diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
@@ -10,9 +10,7 @@
 
 namespace ov {
 
-OPENVINO_API void copy_weightless_cache_attr(const std::shared_ptr<Node>& from,
-                                             const std::shared_ptr<Node>& to,
-                                             bool set_by_precision_conversion = false);
+OPENVINO_API void copy_weightless_cache_attr(const std::shared_ptr<Node>& from, const std::shared_ptr<Node>& to);
 
 /**
  * @brief Holds weightless caching attributes of a single constant.

diff --git a/src/core/src/op/util/weightless_caching_attributes.cpp b/src/core/src/op/util/weightless_caching_attributes.cpp
@@ -9,17 +9,11 @@ bool ov::WeightlessCacheAttribute::is_copyable() const {
 }
 
 OPENVINO_API void ov::copy_weightless_cache_attr(const std::shared_ptr<ov::Node>& from,
-                                                        const std::shared_ptr<ov::Node>& to,
-                                                        bool set_by_convert_precision) {
+                                                 const std::shared_ptr<ov::Node>& to) {
     const auto& rt_info = from->get_rt_info();
     auto weightless_caching_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
 
     if (weightless_caching_attr != rt_info.end()) {
         to->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] = weightless_caching_attr->second;
-        if (set_by_convert_precision) {
-            to->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()]
-                .as<ov::WeightlessCacheAttribute>()
-                .set_by_convert_precision = true;
-        }
     }
 }
diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
@@ -54,13 +54,12 @@ struct weightless_cache_manager {
                            size_t original_size,
                            ov::element::Type original_dtype,
                            ov::element::Type curr_dtype,
-                           ov::Shape shape, bool precision_conversion_set_by_transformation) {
+                           ov::Shape shape) {
         this->bin_offset = bin_offset;
         this->original_size = original_size;
         this->original_dtype = original_dtype;
         this->curr_dtype = curr_dtype;
         this->shape = shape;
-        this->precision_conversion_set_by_transformation = precision_conversion_set_by_transformation;
         do_weightless_caching = true;
 
         if (original_dtype != curr_dtype) {
@@ -155,7 +154,6 @@ struct weightless_cache_manager {
     ov::element::Type original_dtype = ov::element::Type_t::undefined;
     ov::element::Type curr_dtype = ov::element::Type_t::undefined;
     ov::Shape shape{};
-    bool precision_conversion_set_by_transformation = false;
 
     bool should_run_transformations() {
         return do_precision_conversion || reorder_rep.do_reorder;
@@ -193,28 +191,10 @@ struct weightless_cache_manager {
             ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations");
             std::shared_ptr<ov::Model> model = nullptr;
 
-            if (precision_conversion_set_by_transformation) {
-                results.push_back(std::make_shared<ov::op::v0::Result>(orig_constant->output(0)));
-                model = std::make_shared<ov::Model>(results, inputParams, "aux");
-
-
-                precisions_map fp_convert_precision_map = {{original_dtype, curr_dtype}};
-                type_to_fuse_map empty_fuse_map = {};
-                const bool keep_precision_sensitive_in_fp32 = false;
-                const bool convert_input_output_precision = false;
-                const bool store_original_precision_as_rt_attribute = true;
-                manager.register_pass<ov::pass::ConvertPrecision>(fp_convert_precision_map,
-                                                                empty_fuse_map,
-                                                                keep_precision_sensitive_in_fp32,
-                                                                convert_input_output_precision,
-                                                                store_original_precision_as_rt_attribute);
-            } else {
-                auto convert_op = std::make_shared<ov::op::v0::Convert>(orig_constant, curr_dtype);
-                results.push_back(std::make_shared<ov::op::v0::Result>(convert_op->output(0)));
-                model = std::make_shared<ov::Model>(results, inputParams, "aux");
-
-                manager.register_pass<ov::pass::ConstantFolding>();
-            }
+            auto convert_op = std::make_shared<ov::op::v0::Convert>(orig_constant, curr_dtype);
+            results.push_back(std::make_shared<ov::op::v0::Result>(convert_op->output(0)));
+            model = std::make_shared<ov::Model>(results, inputParams, "aux");
+            manager.register_pass<ov::pass::ConstantFolding>();
 
             manager.run_passes(model);
             const auto& ops = model->get_ops();

diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -321,8 +321,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
                                                          attr.original_size,
                                                          attr.original_dtype,
                                                          op.get_output_element_type(0),
-                                                         op.get_output_shape(0),
-                                                         attr.set_by_convert_precision);
+                                                         op.get_output_shape(0));
             }
         }
     }