From 2995fd4ade33384374cbd090ab35614d36e507e3 Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Fri, 10 Jan 2025 17:02:38 +0200 Subject: [PATCH] Defer weights loading by default in case of cip Signed-off-by: Bogdan Pereanu --- .../common/include/intel_npu/common/icompiler_adapter.hpp | 7 +++---- .../compiler_adapter/include/driver_compiler_adapter.hpp | 6 +++--- .../compiler_adapter/include/plugin_compiler_adapter.hpp | 6 +++--- .../src/compiler_adapter/include/plugin_graph.hpp | 2 +- .../src/compiler_adapter/src/driver_compiler_adapter.cpp | 7 +++---- .../src/compiler_adapter/src/plugin_compiler_adapter.cpp | 7 +++---- .../intel_npu/src/compiler_adapter/src/plugin_graph.cpp | 6 +++++- 7 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp index 6e585299d68a1d..d5ae4313ece6b7 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp @@ -10,10 +10,9 @@ namespace intel_npu { class ICompilerAdapter { public: - virtual std::shared_ptr compile(const std::shared_ptr& model, - const Config& config) const = 0; - virtual std::shared_ptr parse(std::vector network, const Config& config) const = 0; - virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; + virtual std::shared_ptr compile(const std::shared_ptr& model, Config& config) const = 0; + virtual std::shared_ptr parse(std::vector network, Config& config) const = 0; + virtual ov::SupportedOpsMap query(const std::shared_ptr& model, Config& config) const = 0; virtual ~ICompilerAdapter() = default; }; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index 3fb0ea8937da60..f86c5d8ccd6910 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -21,11 +21,11 @@ class DriverCompilerAdapter final : public ICompilerAdapter { public: DriverCompilerAdapter(const std::shared_ptr& zeroInitStruct); - std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; + std::shared_ptr compile(const std::shared_ptr& model, Config& config) const override; - std::shared_ptr parse(std::vector network, const Config& config) const override; + std::shared_ptr parse(std::vector network, Config& config) const override; - ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; + ov::SupportedOpsMap query(const std::shared_ptr& model, Config& config) const override; private: /** diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 96c71d9f80c668..5e49843619bbcf 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -19,11 +19,11 @@ class PluginCompilerAdapter final : public ICompilerAdapter { public: PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct); - std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; + std::shared_ptr compile(const std::shared_ptr& model, Config& config) const override; - std::shared_ptr parse(std::vector network, const Config& config) const override; + std::shared_ptr parse(std::vector network, Config& config) const override; - ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; + ov::SupportedOpsMap query(const std::shared_ptr& model, Config& config) const override; private: std::shared_ptr _zeroInitStruct; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 2d7d9bfd429e47..b16977adc02733 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -24,7 +24,7 @@ class PluginGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, std::vector blob, - const Config& config); + Config& config); void export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index b17148c6411936..548b4d73f9d38a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -160,7 +160,7 @@ DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr& model, - const Config& config) const { + Config& config) const { OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "compile"); const ze_graph_compiler_version_info_t& compilerVersion = _deviceGraphProperties.compilerVersion; @@ -203,7 +203,7 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(std::vector network, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(std::vector network, Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); @@ -221,8 +221,7 @@ std::shared_ptr DriverCompilerAdapter::parse(std::vector networ std::optional>(std::move(network))); } -ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, - const Config& config) const { +ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, Config& config) const { OV_ITT_TASK_CHAIN(query_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "query"); const ze_graph_compiler_version_info_t& compilerVersion = _deviceGraphProperties.compilerVersion; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 6d67f544db2c17..a92c68135e88b5 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -75,7 +75,7 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr& model, - const Config& config) const { + Config& config) const { OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compile"); _logger.debug("compile start"); @@ -103,7 +103,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr PluginCompilerAdapter::parse(std::vector network, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(std::vector network, Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); @@ -125,8 +125,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::vector networ config); } -ov::SupportedOpsMap PluginCompilerAdapter::query(const std::shared_ptr& model, - const Config& config) const { +ov::SupportedOpsMap PluginCompilerAdapter::query(const std::shared_ptr& model, Config& config) const { OV_ITT_TASK_CHAIN(QUERY_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "query"); return _compiler->query(model, config); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp index b1658e7e0582e0..a9acb445917a80 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp @@ -16,12 +16,16 @@ PluginGraph::PluginGraph(const std::shared_ptr& zeGraphExt, ze_graph_handle_t graphHandle, NetworkMetadata metadata, std::vector blob, - const Config& config) + Config& config) : IGraph(graphHandle, std::move(metadata), config, std::optional>(std::move(blob))), _zeGraphExt(zeGraphExt), _zeroInitStruct(zeroInitStruct), _compiler(compiler), _logger("PluginGraph", config.get()) { + if (!(config.has() || config.has())) { + config.update({{ov::intel_npu::defer_weights_load.name(), "YES"}}); + } + if (!config.get() || config.get()) { _logger.info("Graph initialize is deferred from the \"Graph\" constructor"); return;