Update code according to review

pereanub · Apr 23, 2024 · c99a0a9 · c99a0a9
1 parent 2314468
commit c99a0a9
Show file tree

Hide file tree

Showing 10 changed files with 66 additions and 62 deletions.
diff --git a/src/plugins/intel_npu/src/al/include/npu.hpp b/src/plugins/intel_npu/src/al/include/npu.hpp
@@ -34,7 +34,7 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
     /** @brief Get name of backend */
     virtual const std::string getName() const = 0;
     /** @brief Backend has support for concurrency batching */
-    virtual bool backendSupportBatching() const = 0;
+    virtual bool isBatchingSupported() const = 0;
     /** @brief Register backend-specific options */
     virtual void registerOptions(OptionsDesc& options) const;
 

diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp
@@ -24,7 +24,7 @@ class ZeroEngineBackend final : public IEngineBackend {
     uint32_t getDriverVersion() const override;
     uint32_t getDriverExtVersion() const override;
 
-    bool backendSupportBatching() const override;
+    bool isBatchingSupported() const override;
 
 private:
     std::shared_ptr<ZeroInitStructsHolder> _instance;

diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
@@ -42,14 +42,14 @@ class ZeroInferRequest final : public SyncInferRequest {
     const Config _config;
     Logger _logger;
 
-    zeroProfiling::ProfilingPool _profiling_pool;
-    zeroProfiling::ProfilingQuery _profiling_query;
-    std::shared_ptr<zeroProfiling::NpuInferProfiling> _npu_profiling;
+    zeroProfiling::ProfilingPool _profilingPool;
+    zeroProfiling::ProfilingQuery _profilingQuery;
+    std::shared_ptr<zeroProfiling::NpuInferProfiling> _npuProfiling;
     std::unique_ptr<Pipeline> _pipeline;
 
     // If batching is handled on the compiler side then batching on the plugin shall be set to 1, we don't do any
     // specific operations on the plugin in this case.
-    size_t _batch_size = 1;
+    size_t _batchSize = 1;
 };
 
 }  //  namespace intel_npu
diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
@@ -20,9 +20,9 @@ struct Pipeline {
     Pipeline& operator=(Pipeline&&) = delete;
     virtual ~Pipeline() = default;
 
-    virtual void push(const size_t batch_index) = 0;
-    virtual void pull(const size_t batch_index) = 0;
-    virtual void reset(const size_t batch_index) const = 0;
+    virtual void push(size_t batch_index) = 0;
+    virtual void pull(size_t batch_index) = 0;
+    virtual void reset(size_t batch_index) const = 0;
 
 protected:
     zeroMemory::MemoryManagementUnit _deviceInputs;

diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp
@@ -28,7 +28,7 @@ uint32_t ZeroEngineBackend::getDriverExtVersion() const {
     return _instance->getDriverExtVersion();
 }
 
-bool ZeroEngineBackend::backendSupportBatching() const {
+bool ZeroEngineBackend::isBatchingSupported() const {
     if (_instance->getDriverExtVersion() < ZE_GRAPH_EXT_VERSION_1_6) {
         return false;
     }

diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -69,7 +69,7 @@ size_t get_batch_size_for_node(const IONodeDescriptor& nodeDescriptor,
     return 1;
 }
 
-size_t get_batch_size(
+std::optional<size_t> get_batch_size(
     const NetworkMetadata& metadata,
     const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors,
     const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors) {
@@ -96,7 +96,11 @@ size_t get_batch_size(
     }
 
     auto it = batch_size.begin();
-    return *it;
+    if (*it) {
+        return *it;
+    }
+
+    return std::nullopt;
 }
 
 }  // namespace
@@ -111,22 +115,20 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
       _executor(static_cast<const ZeroExecutor*>(_executorPtr.get())),
       _config(config),
       _logger("ZeroInferRequest", config.get<LOG_LEVEL>()),
-      _profiling_pool(_executor->graph(),
-                      zeroProfiling::POOL_SIZE,
-                      _executor->getInitStructs()->getProfilingDdiTable()),
-      _profiling_query(0,
-                       _executor->getInitStructs()->getDevice(),
-                       _executor->getInitStructs()->getProfilingDdiTable()) {
+      _profilingPool(_executor->graph(), zeroProfiling::POOL_SIZE, _executor->getInitStructs()->getProfilingDdiTable()),
+      _profilingQuery(0,
+                      _executor->getInitStructs()->getDevice(),
+                      _executor->getInitStructs()->getProfilingDdiTable()) {
     const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors =
         _executor->inputs_desc_map();
     const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors =
         _executor->outputs_desc_map();
 
     auto proftype = config.get<PROFILING_TYPE>();
     if (proftype == ov::intel_npu::ProfilingType::INFER) {
-        _npu_profiling = std::make_shared<zeroProfiling::NpuInferProfiling>(_executor->getInitStructs()->getContext(),
-                                                                            _executor->getInitStructs()->getDevice(),
-                                                                            _config.get<LOG_LEVEL>());
+        _npuProfiling = std::make_shared<zeroProfiling::NpuInferProfiling>(_executor->getInitStructs()->getContext(),
+                                                                           _executor->getInitStructs()->getDevice(),
+                                                                           _config.get<LOG_LEVEL>());
     }
 
     ze_device_properties_t properties = {};
@@ -142,7 +144,11 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
 
     if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
         try {
-            _batch_size = get_batch_size(_metadata, executorInputDescriptors, executorOutputDescriptors);
+            auto batchSize = get_batch_size(_metadata, executorInputDescriptors, executorOutputDescriptors);
+
+            if (batchSize.has_value()) {
+                _batchSize = *batchSize;
+            }
         } catch (const std::exception& ex) {
             _logger.info("Got an error when checking the batch size: \n%s", ex.what());
         }
@@ -165,8 +171,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
 
         // When batching is handled by the plugin we need to modify transposed shape with the original batch size since
         // it will be forced to 1 at the compilation time
-        if (_batch_size > 1) {
-            parameterDescriptor.transposedShape[0] = _batch_size;
+        if (_batchSize > 1) {
+            parameterDescriptor.transposedShape[0] = _batchSize;
         }
 
         // The I/O buffers already allocated using the Level Zero API are being reused here
@@ -194,8 +200,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
 
         // When batching is handled by the plugin we need to modify transposed shape with the original batch size since
         // it will be forced to 1 at the compilation time
-        if (_batch_size > 1) {
-            resultDescriptor.transposedShape[0] = _batch_size;
+        if (_batchSize > 1) {
+            resultDescriptor.transposedShape[0] = _batchSize;
         }
 
         allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, allocator);
@@ -242,11 +248,11 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
     /// Construct pipepline
     _pipeline = makePipeline(_executorPtr,
                              _config,
-                             _profiling_pool,
-                             _profiling_query,
-                             _npu_profiling,
+                             _profilingPool,
+                             _profilingQuery,
+                             _npuProfiling,
                              _copyAllTensors,
-                             _batch_size);
+                             _batchSize);
 }
 
 void ZeroInferRequest::infer() {
@@ -284,15 +290,15 @@ void ZeroInferRequest::infer_async() {
         }
     }
 
-    for (size_t i = 0; i < _batch_size; i++) {
+    for (size_t i = 0; i < _batchSize; i++) {
         _pipeline->push(i);
     }
 }
 
 void ZeroInferRequest::get_result() {
     OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_result");
 
-    for (size_t i = 0; i < _batch_size; i++) {
+    for (size_t i = 0; i < _batchSize; i++) {
         _pipeline->pull(i);
     }
 
@@ -328,7 +334,7 @@ void ZeroInferRequest::get_result() {
         }
     }
 
-    for (size_t i = 0; i < _batch_size; i++) {
+    for (size_t i = 0; i < _batchSize; i++) {
         _pipeline->reset(i);
     }
     _logger.debug("InferRequest::get_result finished");
@@ -381,13 +387,13 @@ std::vector<ov::ProfilingInfo> ZeroInferRequest::get_profiling_info() const {
     } else {
         auto proftype = _config.get<PROFILING_TYPE>();
         if (proftype == ov::intel_npu::ProfilingType::INFER) {
-            return _npu_profiling->getNpuInferStatistics();
+            return _npuProfiling->getNpuInferStatistics();
         } else {  /// proftype = MODEL or undefined = fallback to model profiling
-            return _profiling_query.getLayerStatistics();
+            return _profilingQuery.getLayerStatistics();
         }
     }
 }
 
 std::vector<uint8_t> ZeroInferRequest::get_raw_profiling_data() const {
-    return _profiling_query.getData<uint8_t>();
+    return _profilingQuery.getData<uint8_t>();
 }
diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
@@ -94,7 +94,7 @@ struct DiscretePipeline final : public Pipeline {
     DiscretePipeline& operator=(const DiscretePipeline&) = delete;
     virtual ~DiscretePipeline() = default;
 
-    void push(const size_t) override {
+    void push(size_t) override {
         OV_ITT_TASK_CHAIN(ZERO_INFER_REQUEST_DP_PUSH,
                           itt::domains::LevelZeroBackend,
                           "DiscretePipeline::push",
@@ -107,7 +107,7 @@ struct DiscretePipeline final : public Pipeline {
         _command_queues[stage::EXECUTE]->executeCommandList(_command_list[stage::EXECUTE], _fence[stage::EXECUTE]);
     };
 
-    void pull(const size_t) override {
+    void pull(size_t) override {
         OV_ITT_TASK_CHAIN(ZERO_INFER_REQUEST_DP_PULL,
                           itt::domains::LevelZeroBackend,
                           "DiscretePipeline::pull",
@@ -122,7 +122,7 @@ struct DiscretePipeline final : public Pipeline {
         _fence[stage::READBACK].hostSynchronize();
     };
 
-    void reset(const size_t) const override {
+    void reset(size_t) const override {
         // Reset the fence objects
         for (auto& fence : _fence) {
             fence.reset();
@@ -215,7 +215,7 @@ struct IntegratedPipeline final : public Pipeline {
     IntegratedPipeline& operator=(const IntegratedPipeline&) = delete;
     virtual ~IntegratedPipeline() = default;
 
-    void push(const size_t batch_index) override {
+    void push(size_t batch_index) override {
         OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "IntegratedPipeline", "push");
         if (sync_output_with_fences_) {
             _command_queue.executeCommandList(*_command_lists.at(batch_index), *_fences.at(batch_index));
@@ -224,7 +224,7 @@ struct IntegratedPipeline final : public Pipeline {
         }
     };
 
-    void pull(const size_t batch_index) override {
+    void pull(size_t batch_index) override {
         OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, itt::domains::LevelZeroBackend, "IntegratedPipeline", "pull");
         if (sync_output_with_fences_) {
             _fences.at(batch_index)->hostSynchronize();
@@ -237,7 +237,7 @@ struct IntegratedPipeline final : public Pipeline {
         }
     };
 
-    void reset(const size_t batch_index) const override {
+    void reset(size_t batch_index) const override {
         if (sync_output_with_fences_) {
             _fences.at(batch_index)->reset();
         } else {

diff --git a/src/plugins/intel_npu/src/plugin/include/backends.hpp b/src/plugins/intel_npu/src/plugin/include/backends.hpp
@@ -31,7 +31,7 @@ class NPUBackends final {
     std::string getBackendName() const;
     uint32_t getDriverVersion() const;
     uint32_t getDriverExtVersion() const;
-    bool backendSupportBatching() const;
+    bool isBatchingSupported() const;
     void registerOptions(OptionsDesc& options) const;
     std::string getCompilationPlatform(const std::string_view platform, const std::string& deviceId) const;
 

diff --git a/src/plugins/intel_npu/src/plugin/src/backends.cpp b/src/plugins/intel_npu/src/plugin/src/backends.cpp
@@ -155,9 +155,9 @@ uint32_t NPUBackends::getDriverExtVersion() const {
     OPENVINO_THROW("No available backend");
 }
 
-bool NPUBackends::backendSupportBatching() const {
+bool NPUBackends::isBatchingSupported() const {
     if (_backend != nullptr) {
-        return _backend->backendSupportBatching();
+        return _backend->isBatchingSupported();
     }
 
     OPENVINO_THROW("No available backend");

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -43,23 +43,20 @@ std::shared_ptr<ov::Model> create_dummy_model(const IONodeDescriptorMap& paramet
                                               const IONodeDescriptorMap& resultDescriptors,
                                               const std::vector<std::string>& inputNames,
                                               const std::vector<std::string>& outputNames,
-                                              const Config& config) {
+                                              bool isBatchingSupported) {
     ov::ParameterVector parameters;
     ov::NodeVector results;
 
-    const auto& batchMode = config.get<BATCH_MODE>();
-
     for (const std::string& inputName : inputNames) {
         const IONodeDescriptor& parameterDescriptor = parameterDescriptors.at(inputName);
 
         std::shared_ptr<ov::op::v0::Parameter> parameter = [&] {
-            if (batchMode != ov::intel_npu::BatchMode::COMPILER) {
+            if (isBatchingSupported) {
                 return std::make_shared<ov::op::v0::Parameter>(parameterDescriptor.precision,
                                                                parameterDescriptor.originalShape);
-            } else {
-                return std::make_shared<ov::op::v0::Parameter>(parameterDescriptor.precision,
-                                                               parameterDescriptor.transposedShape);
             }
+            return std::make_shared<ov::op::v0::Parameter>(parameterDescriptor.precision,
+                                                           parameterDescriptor.transposedShape);
         }();
 
         parameter->set_friendly_name(parameterDescriptor.currentNodeName);
@@ -79,15 +76,14 @@ std::shared_ptr<ov::Model> create_dummy_model(const IONodeDescriptorMap& paramet
         constantDummy->set_friendly_name(resultDescriptor.legacyName);
 
         const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = [&] {
-            if (batchMode != ov::intel_npu::BatchMode::COMPILER) {
+            if (isBatchingSupported) {
                 return std::make_shared<ov::descriptor::Tensor>(resultDescriptor.precision,
                                                                 resultDescriptor.originalShape,
                                                                 resultDescriptor.outputTensorNames);
-            } else {
-                return std::make_shared<ov::descriptor::Tensor>(resultDescriptor.precision,
-                                                                resultDescriptor.transposedShape,
-                                                                resultDescriptor.outputTensorNames);
             }
+            return std::make_shared<ov::descriptor::Tensor>(resultDescriptor.precision,
+                                                            resultDescriptor.transposedShape,
+                                                            resultDescriptor.outputTensorNames);
         }();
 
         std::shared_ptr<ov::Node> result = std::make_shared<ov::op::v0::Result>(constantDummy);
@@ -99,8 +95,8 @@ std::shared_ptr<ov::Model> create_dummy_model(const IONodeDescriptorMap& paramet
     return std::make_shared<ov::Model>(results, parameters);
 }
 
-void set_batch_config(std::shared_ptr<NPUBackends> backends, Config& config) {
-    if (!backends->backendSupportBatching() || config.get<PLATFORM>() == ov::intel_npu::Platform::NPU3700) {
+void set_batch_config(bool isBatchingSupported, Config& config) {
+    if (!isBatchingSupported || config.get<PLATFORM>() == ov::intel_npu::Platform::NPU3700) {
         if (config.has<BATCH_MODE>() && config.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
             OPENVINO_THROW("Batching on plugin is not supported with this driver version");
         }
@@ -523,7 +519,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
     localConfig.update({{ov::intel_npu::platform.name(), platform}});
 
-    set_batch_config(_backends, localConfig);
+    auto isBatchingSupported = _backends->isBatchingSupported();
+    set_batch_config(isBatchingSupported, localConfig);
 
     if (model->get_variables().size()) {
         if (localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
@@ -616,7 +613,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
     localConfig.update({{ov::intel_npu::platform.name(), platform}});
     auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
 
-    set_batch_config(_backends, localConfig);
+    auto isBatchingSupported = _backends->isBatchingSupported();
+    set_batch_config(isBatchingSupported, localConfig);
 
     Logger logger("NPUPlugin", localConfig.get<LOG_LEVEL>());
 
@@ -643,7 +641,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
         meta.name = "net" + std::to_string(_compiledModelLoadCounter++);
 
         const std::shared_ptr<ov::Model> modelDummy =
-            create_dummy_model(meta.parameters, meta.results, meta.inputNames, meta.outputNames, localConfig);
+            create_dummy_model(meta.parameters, meta.results, meta.inputNames, meta.outputNames, isBatchingSupported);
 
         bool profiling = localConfig.get<PERF_COUNT>();