From 40b19c83f60bd7416da7339caae3d9a4fcf2537f Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 15 Jan 2025 19:49:28 +0000 Subject: [PATCH] [NPUW] Serialization fixes (#28442) Co-authored-by: Dmitry Matveev --- .../src/plugin/npuw/compiled_model.cpp | 29 +++++- .../src/plugin/npuw/llm_compiled_model.cpp | 9 +- .../src/plugin/npuw/serialization.cpp | 92 +++++++++++++++++++ .../src/plugin/npuw/serialization.hpp | 5 + .../src/plugin/npuw/weights_bank.cpp | 7 +- .../intel_npu/src/plugin/src/plugin.cpp | 4 - 6 files changed, 136 insertions(+), 10 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index 6ec6e047dddb8d..f0d9950c2e3520 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -133,9 +133,18 @@ std::shared_ptr ov::npuw::ICompiledModel::create( auto use_llm_key = ov::intel_npu::npuw::llm::enabled.name(); if (properties.count(use_llm_key) && properties.at(use_llm_key).as() == true) { LOG_INFO("ov::npuw::LLMCompiledModel will be created."); - compiled_model = std::make_shared(model, plugin, properties); + // Drop CACHE_DIR from the config + // If it's present we will be utilizing LLMCompiledModel's import + // and not the underlying models and submodels + auto config = properties; + config.erase(ov::cache_dir.name()); + compiled_model = std::make_shared(model, plugin, config); } else { LOG_INFO("ov::npuw::CompiledModel will be created."); + // CACHE_DIR isn't supported with NPU_USE_NPUW + if (properties.count(ov::cache_dir.name())) { + OPENVINO_THROW("Option 'CACHE_DIR' is not supported with configuration: NPU_USE_NPUW : YES, NPUW_LLM : NO"); + } pre_load_transform(model, properties); compiled_model = std::make_shared(model, plugin, properties); } @@ -611,6 +620,12 @@ void ov::npuw::CompiledModel::serialize(std::ostream& stream) const { // Write config write(stream, m_cfg); + // FIXME: utilize overload instead + write(stream, m_non_npuw_props.size()); + for (const auto& p : m_non_npuw_props) { + write(stream, p.first); + write_any(stream, p.second); + } // Serialize compiled submodels write(stream, m_compiled_submodels.size()); @@ -671,6 +686,18 @@ std::shared_ptr ov::npuw::CompiledModel::deserialize( // Deserialize config read(stream, compiled->m_cfg); + compiled->m_cfg.parseEnvVars(); + // FIXME: utilize overload instead + std::size_t props_size; + read(stream, props_size); + for (std::size_t i = 0; i < props_size; ++i) { + std::string key; + read(stream, key); + ov::Any val; + read_any(stream, val); + compiled->m_non_npuw_props[key] = val; + } + compiled->implement_properties(); // Deserialize compiled submodels std::size_t subm_size = 0; diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp index 853c839ff6faa1..fb31f7ed0770bb 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp @@ -611,21 +611,21 @@ std::shared_ptr ov::npuw::LLMCompiledModel::deserial if (vmajor != OPENVINO_VERSION_MAJOR || vminor != OPENVINO_VERSION_MINOR || vpatch != OPENVINO_VERSION_PATCH || s11n_version != std::string(NPUW_SERIALIZATION_VERSION)) { OPENVINO_THROW("This blobs was serialized with different OV version!", - " Serialized by OV ", + "\nSerialized by OV ", vmajor, '.', vminor, '.', vpatch, - " Current OV version ", + "\nCurrent OV version ", OPENVINO_VERSION_MAJOR, '.', OPENVINO_VERSION_MINOR, '.', OPENVINO_VERSION_PATCH, - " NPUW serialized by version ", + "\nNPUW serialized by version ", s11n_version, - " NPUW current serialization version ", + "\nNPUW current serialization version ", NPUW_SERIALIZATION_VERSION); } @@ -653,6 +653,7 @@ std::shared_ptr ov::npuw::LLMCompiledModel::deserial // Deserialize config read(stream, compiled->m_cfg); + compiled->implement_properties(); // Deserialize CompiledModels compiled->m_kvcache_compiled = ov::npuw::CompiledModel::deserialize(stream, plugin); diff --git a/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp b/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp index 550a1fdd384499..60417f9f241732 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/serialization.cpp @@ -23,6 +23,10 @@ void ov::npuw::s11n::write(std::ostream& stream, const bool& var) { stream.write(reinterpret_cast(&var), sizeof var); } +void ov::npuw::s11n::write(std::ostream& stream, const float& var) { + stream.write(reinterpret_cast(&var), sizeof var); +} + void ov::npuw::s11n::write(std::ostream& stream, const ov::npuw::compiled::Spatial& var) { using ov::npuw::s11n::write; @@ -74,6 +78,43 @@ void ov::npuw::s11n::write(std::ostream& stream, const ov::Output()) { + write(stream, static_cast(AnyType::STRING)); + write(stream, var.as()); + } else if (var.is()) { + // FIXME: handle properly + write(stream, static_cast(AnyType::CHARS)); + write(stream, std::string(var.as())); + } else if (var.is()) { + write(stream, static_cast(AnyType::SIZET)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::INT)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::INT64)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::UINT32)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::UINT64)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::FLOAT)); + write(stream, var.as()); + } else if (var.is()) { + write(stream, static_cast(AnyType::BOOL)); + write(stream, var.as()); + } else { + NPUW_ASSERT(false && "Unsupported type"); + } +} + void ov::npuw::s11n::read(std::istream& stream, std::streampos& var) { stream.read(reinterpret_cast(&var), sizeof var); } @@ -89,6 +130,10 @@ void ov::npuw::s11n::read(std::istream& stream, bool& var) { stream.read(reinterpret_cast(&var), sizeof var); } +void ov::npuw::s11n::read(std::istream& stream, float& var) { + stream.read(reinterpret_cast(&var), sizeof var); +} + void ov::npuw::s11n::read(std::istream& stream, ov::npuw::compiled::Spatial& var) { using ov::npuw::s11n::read; @@ -169,3 +214,50 @@ void ov::npuw::s11n::read(std::istream& stream, std::shared_ptr& var) var->output(0).set_tensor_ptr(tensor_dummy); var->set_friendly_name(*names.begin()); // any_name ? } + +void ov::npuw::s11n::read_any(std::istream& stream, ov::Any& var) { + // FIXME: ugly, but cannot use .read(stream) here due to its usage of operator>>() + int type_int; + read(stream, type_int); + AnyType type = static_cast(type_int); + if (type == AnyType::STRING) { + std::string val; + read(stream, val); + var = std::move(val); + } else if (type == AnyType::CHARS) { + // FIXME: handle properly + std::string val; + read(stream, val); + var = std::move(val); + } else if (type == AnyType::SIZET) { + std::size_t val; + read(stream, val); + var = val; + } else if (type == AnyType::INT) { + int val; + read(stream, val); + var = val; + } else if (type == AnyType::INT64) { + int64_t val; + read(stream, val); + var = val; + } else if (type == AnyType::UINT32) { + uint32_t val; + read(stream, val); + var = val; + } else if (type == AnyType::UINT64) { + uint64_t val; + read(stream, val); + var = val; + } else if (type == AnyType::FLOAT) { + float val; + read(stream, val); + var = val; + } else if (type == AnyType::BOOL) { + bool val; + read(stream, val); + var = val; + } else { + NPUW_ASSERT(false && "Unsupported type"); + } +} diff --git a/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp b/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp index 77a6b3aa865254..170631f644da12 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/serialization.hpp @@ -27,6 +27,7 @@ class Config; namespace ov { // Forward declaration +class Any; class Node; class Tensor; template @@ -52,19 +53,23 @@ namespace s11n { void write(std::ostream& stream, const std::streampos& var); void write(std::ostream& stream, const std::string& var); void write(std::ostream& stream, const bool& var); +void write(std::ostream& stream, const float& var); void write(std::ostream& stream, const ov::npuw::compiled::Spatial& var); void write(std::ostream& stream, const ov::Tensor& var); void write(std::ostream& stream, const ::intel_npu::Config& var); void write(std::ostream& stream, const ov::Output& var); +void write_any(std::ostream& stream, const ov::Any& var); void read(std::istream& stream, std::streampos& var); void read(std::istream& stream, std::string& var); void read(std::istream& stream, bool& var); +void read(std::istream& stream, float& var); void read(std::istream& stream, ov::npuw::compiled::Spatial& var); void read(std::istream& stream, ov::Tensor& var); void read(std::istream& stream, ::intel_npu::Config& var); void read(std::istream& stream, std::shared_ptr& var); void read(std::istream& stream, std::shared_ptr& var); +void read_any(std::istream& stream, ov::Any& var); // Forward declaration template diff --git a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp index c554d1dcb81b18..125c8b3ab52f99 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/weights_bank.cpp @@ -219,7 +219,8 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str auto iter_device = device_bank.storage.find(uid); if (iter_device != device_bank.storage.end()) { - // Already allocated + // Shouldn't be possible + NPUW_ASSERT(false); return; } @@ -234,6 +235,10 @@ void Bank::read_and_add_tensor(std::istream& stream, int64_t uid, const std::str ov::Tensor allocated_tensor; // FIXME: reading not via a dedicated function + bool is_intialized = false; + read(stream, is_intialized); + NPUW_ASSERT(is_intialized); + std::string type_str; read(stream, type_str); ov::element::Type type(type_str); diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 5d5f666f5119ec..301a71887054f2 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -622,10 +622,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< ov::AnyMap localProperties = properties; if (localProperties.count(useNpuwKey)) { if (localProperties.at(useNpuwKey).as() == true) { - // CACHE_DIR isn't supported with NPU_USE_NPUW - if (localProperties.count(ov::cache_dir.name()) || !_globalConfig.get().empty()) { - OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW!"); - } return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties); } else { // NPUW is disabled, remove the key from the properties