Skip to content

Commit

Permalink
Remove ConvertPrecision WA and use Convert op + ConstantFolding for b…
Browse files Browse the repository at this point in the history
…oth paths
  • Loading branch information
tkrupa-intel committed Jan 28, 2025
1 parent f741706 commit 38aceaf
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,7 @@

namespace ov {

OPENVINO_API void copy_weightless_cache_attr(const std::shared_ptr<Node>& from,
const std::shared_ptr<Node>& to,
bool set_by_precision_conversion = false);
OPENVINO_API void copy_weightless_cache_attr(const std::shared_ptr<Node>& from, const std::shared_ptr<Node>& to);

/**
* @brief Holds weightless caching attributes of a single constant.
Expand Down
8 changes: 1 addition & 7 deletions src/core/src/op/util/weightless_caching_attributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,11 @@ bool ov::WeightlessCacheAttribute::is_copyable() const {
}

OPENVINO_API void ov::copy_weightless_cache_attr(const std::shared_ptr<ov::Node>& from,
const std::shared_ptr<ov::Node>& to,
bool set_by_convert_precision) {
const std::shared_ptr<ov::Node>& to) {
const auto& rt_info = from->get_rt_info();
auto weightless_caching_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());

if (weightless_caching_attr != rt_info.end()) {
to->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] = weightless_caching_attr->second;
if (set_by_convert_precision) {
to->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()]
.as<ov::WeightlessCacheAttribute>()
.set_by_convert_precision = true;
}
}
}
30 changes: 5 additions & 25 deletions src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,12 @@ struct weightless_cache_manager {
size_t original_size,
ov::element::Type original_dtype,
ov::element::Type curr_dtype,
ov::Shape shape, bool precision_conversion_set_by_transformation) {
ov::Shape shape) {
this->bin_offset = bin_offset;
this->original_size = original_size;
this->original_dtype = original_dtype;
this->curr_dtype = curr_dtype;
this->shape = shape;
this->precision_conversion_set_by_transformation = precision_conversion_set_by_transformation;
do_weightless_caching = true;

if (original_dtype != curr_dtype) {
Expand Down Expand Up @@ -155,7 +154,6 @@ struct weightless_cache_manager {
ov::element::Type original_dtype = ov::element::Type_t::undefined;
ov::element::Type curr_dtype = ov::element::Type_t::undefined;
ov::Shape shape{};
bool precision_conversion_set_by_transformation = false;

bool should_run_transformations() {
return do_precision_conversion || reorder_rep.do_reorder;
Expand Down Expand Up @@ -193,28 +191,10 @@ struct weightless_cache_manager {
ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations");
std::shared_ptr<ov::Model> model = nullptr;

if (precision_conversion_set_by_transformation) {
results.push_back(std::make_shared<ov::op::v0::Result>(orig_constant->output(0)));
model = std::make_shared<ov::Model>(results, inputParams, "aux");


precisions_map fp_convert_precision_map = {{original_dtype, curr_dtype}};
type_to_fuse_map empty_fuse_map = {};
const bool keep_precision_sensitive_in_fp32 = false;
const bool convert_input_output_precision = false;
const bool store_original_precision_as_rt_attribute = true;
manager.register_pass<ov::pass::ConvertPrecision>(fp_convert_precision_map,
empty_fuse_map,
keep_precision_sensitive_in_fp32,
convert_input_output_precision,
store_original_precision_as_rt_attribute);
} else {
auto convert_op = std::make_shared<ov::op::v0::Convert>(orig_constant, curr_dtype);
results.push_back(std::make_shared<ov::op::v0::Result>(convert_op->output(0)));
model = std::make_shared<ov::Model>(results, inputParams, "aux");

manager.register_pass<ov::pass::ConstantFolding>();
}
auto convert_op = std::make_shared<ov::op::v0::Convert>(orig_constant, curr_dtype);
results.push_back(std::make_shared<ov::op::v0::Result>(convert_op->output(0)));
model = std::make_shared<ov::Model>(results, inputParams, "aux");
manager.register_pass<ov::pass::ConstantFolding>();

manager.run_passes(model);
const auto& ops = model->get_ops();
Expand Down
3 changes: 1 addition & 2 deletions src/plugins/intel_gpu/src/plugin/program_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,7 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptr<cldnn::pr
attr.original_size,
attr.original_dtype,
op.get_output_element_type(0),
op.get_output_shape(0),
attr.set_by_convert_precision);
op.get_output_shape(0));
}
}
}
Expand Down

0 comments on commit 38aceaf

Please sign in to comment.