Update code according to review

pereanub · Apr 24, 2024 · f90a0fb · f90a0fb
1 parent 01c56ca
commit f90a0fb
Show file tree

Hide file tree

Showing 7 changed files with 113 additions and 111 deletions.
diff --git a/src/plugins/intel_npu/src/al/src/config/common.cpp b/src/plugins/intel_npu/src/al/src/config/common.cpp
@@ -68,7 +68,7 @@ ov::intel_npu::BatchMode intel_npu::BATCH_MODE::parse(std::string_view val) {
         return ov::intel_npu::BatchMode::PLUGIN;
     }
 
-    OPENVINO_THROW("Value '{0}' is not a valid BATCH_TYPE option", val);
+    OPENVINO_THROW("Value '", val, "'is not a valid BATCH_MODE option");
 }
 
 std::string intel_npu::BATCH_MODE::toString(const ov::intel_npu::BatchMode& val) {

diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp
@@ -17,6 +17,10 @@
 #include "zero_utils.hpp"
 #include "zero_wrappers.hpp"
 
+namespace {
+constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
+}  // namespace
+
 namespace intel_npu {
 
 class ZeroInferRequest final : public SyncInferRequest {
@@ -49,7 +53,7 @@ class ZeroInferRequest final : public SyncInferRequest {
 
     // If batching is handled on the compiler side then batching on the plugin shall be set to 1, we don't do any
     // specific operations on the plugin in this case.
-    size_t _batchSize = 1;
+    size_t _batchSize = DEFAULT_BATCH_SIZE;
 };
 
 }  //  namespace intel_npu
diff --git a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp
@@ -88,7 +88,7 @@ class HostMemAllocator final {
     static const std::size_t _alignment = STANDARD_PAGE_SIZE;
 };
 
-// Graph arguments (inputs and output) need to be allocated in the host memory.
+// Graph arguments (inputs and outputs) need to be allocated in the host memory.
 // For discrete platforms, graph arguments need to be copied into the device memory.
 // MemoryMangementUnit is used to allocate memory in the device memory.
 // Usage: we should append graph arguments with corresponding names with `appendArgument` call to prepare size

diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp
@@ -29,10 +29,7 @@ uint32_t ZeroEngineBackend::getDriverExtVersion() const {
 }
 
 bool ZeroEngineBackend::isBatchingSupported() const {
-    if (_instance->getDriverExtVersion() < ZE_GRAPH_EXT_VERSION_1_6) {
-        return false;
-    }
-    return true;
+    return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6;
 }
 
 ZeroEngineBackend::~ZeroEngineBackend() = default;

diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -15,24 +15,26 @@ using namespace intel_npu;
 
 namespace {
 
+constexpr std::size_t BATCH_AXIS = 0;
+
 /**
  * @brief Checks that the metadata of the provided descriptor corresponds to the values registered in the Level Zero
  * structure.
  * @param nodeDescriptor The OpenVINO API specific I/O descriptor which shall be compared.
  * @param zeDescriptor The Level Zero specific structure used for comparison.
  * @param name Tensor identifier used for error logging.
  */
-void check_level_zero_attributes_match(const IONodeDescriptor& nodeDescriptor,
-                                       const ZeroExecutor::ArgumentDescriptor& zeDescriptor,
-                                       const std::string& name) {
+void checkLevelZeroAttributesMatch(const IONodeDescriptor& nodeDescriptor,
+                                   const ZeroExecutor::ArgumentDescriptor& zeDescriptor,
+                                   const std::string& name) {
     const ov::element::Type_t ovPrecision = nodeDescriptor.precision;
     const ze_graph_argument_precision_t zePrecision = zeDescriptor.info.devicePrecision;
 
     if (zeroUtils::getZePrecision(ovPrecision) != zePrecision) {
         OPENVINO_THROW("Precision mismatch for parameter " + name);
     }
 
-    const std::vector<size_t>& ovDimensions = nodeDescriptor.originalShape.get_max_shape();
+    const std::vector<size_t>& ovDimensions = nodeDescriptor.transposedShape.get_max_shape();
 
     if (ovDimensions.size() > ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) {
         OPENVINO_THROW(
@@ -45,54 +47,86 @@ void check_level_zero_attributes_match(const IONodeDescriptor& nodeDescriptor,
             OPENVINO_THROW("Shape mismatch for parameter " + name);
         }
     }
+
+    for (size_t index = 1; index < ovDimensions.size(); ++index) {
+        if (ovDimensions[index] != zeDescriptor.info.dims[index] && !nodeDescriptor.transposedShape.is_dynamic()) {
+            OPENVINO_THROW("Shape mismatch for parameter " + name);
+        }
+    }
 }
 
-size_t get_batch_size_for_node(const IONodeDescriptor& nodeDescriptor,
-                               const ZeroExecutor::ArgumentDescriptor& zeDescriptor) {
+std::optional<size_t> getBatchSizeForNode(const IONodeDescriptor& nodeDescriptor,
+                                          const ZeroExecutor::ArgumentDescriptor& zeDescriptor) {
+    Logger logger("GetBatchSizeForNode", Logger::global().level());
+
     const std::vector<size_t>& ovDimensions = nodeDescriptor.originalShape.get_shape();
     switch (zeDescriptor.info.deviceLayout) {
     case ZE_GRAPH_ARGUMENT_LAYOUT_NCHW:
     case ZE_GRAPH_ARGUMENT_LAYOUT_NHWC:
     case ZE_GRAPH_ARGUMENT_LAYOUT_NCDHW:
     case ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC:
     case ZE_GRAPH_ARGUMENT_LAYOUT_NC:
-        if ((ovDimensions[0] == zeDescriptor.info.dims[0]) && (ovDimensions[0] != 1)) {
-            OPENVINO_THROW("Batching on the plugin is not used, batching is handled by the compiler");
+        if ((ovDimensions[BATCH_AXIS] == zeDescriptor.info.dims[BATCH_AXIS]) &&
+            (ovDimensions[BATCH_AXIS] != DEFAULT_BATCH_SIZE)) {
+            logger.info("Batching on the plugin is not used, batching is handled by the compiler");
+            return std::nullopt;
         } else {
-            return ovDimensions[0];
+            return ovDimensions[BATCH_AXIS];
         }
         break;
     default:
-        OPENVINO_THROW("Batching on the plugin is working only when batching is found on 0th dimension");
+        logger.info("Batching on the plugin is working only when batching is found on 0th dimension");
+        return std::nullopt;
     }
 
-    return 1;
+    return DEFAULT_BATCH_SIZE;
 }
 
-std::optional<size_t> get_batch_size(
+/**
+ * @brief Get the batch size to be handled on the plugin.
+ * @details Analyze the shape from the compiled model with the shape from the originalShape and get the originalShape if
+ * it is different.
+ * @param metadata A map to represent descriptions for inputs and outputs of a network.
+ * @param executorInputDescriptors A map to represent Level zero inputs descriptors.
+ * @param executorOutputDescriptors A map to represent Level zero outputs descriptors.
+ */
+
+std::optional<size_t> getBatchSize(
     const NetworkMetadata& metadata,
     const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors,
     const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors) {
     std::set<size_t> batch_size;
 
+    Logger logger("getBatchSize", Logger::global().level());
+
     for (const std::string& inputName : metadata.inputNames) {
-        if (!executorInputDescriptors.count(inputName)) {
-            OPENVINO_THROW("Invalid graph input descriptor key: " + inputName);
+        auto batchSizeForNode =
+            getBatchSizeForNode(metadata.parameters.at(inputName), executorInputDescriptors.at(inputName));
+
+        if (batchSizeForNode.has_value()) {
+            batch_size.insert(*batchSizeForNode);
+        } else {
+            return std::nullopt;
         }
-        batch_size.insert(
-            get_batch_size_for_node(metadata.parameters.at(inputName), executorInputDescriptors.at(inputName)));
     }
 
     for (const std::string& outputName : metadata.outputNames) {
         if (!executorOutputDescriptors.count(outputName)) {
             OPENVINO_THROW("Invalid graph output descriptor key: " + outputName);
         }
-        batch_size.insert(
-            get_batch_size_for_node(metadata.results.at(outputName), executorOutputDescriptors.at(outputName)));
+        auto batchSizeForNode =
+            getBatchSizeForNode(metadata.results.at(outputName), executorOutputDescriptors.at(outputName));
+
+        if (batchSizeForNode.has_value()) {
+            batch_size.insert(*batchSizeForNode);
+        } else {
+            return std::nullopt;
+        }
     }
 
     if (batch_size.size() != 1) {
-        OPENVINO_THROW("Batching on the plugin is working only when have same value for all tensors!");
+        logger.info("Batching works only when we have the same batch size for all tensors!");
+        return std::nullopt;
     }
 
     auto it = batch_size.begin();
@@ -142,25 +176,29 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
 
     auto allocator = zeroMemory::HostMemAllocator(backendPtr);
 
-    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
-        try {
-            auto batchSize = get_batch_size(_metadata, executorInputDescriptors, executorOutputDescriptors);
+    for (const std::string& inputName : _metadata.inputNames) {
+        if (!executorInputDescriptors.count(inputName)) {
+            OPENVINO_THROW("Invalid graph input descriptor key: " + inputName);
+        }
+    }
 
-            if (batchSize.has_value()) {
-                _batchSize = *batchSize;
-            }
-        } catch (const std::exception& ex) {
-            _logger.info("Got an error when checking the batch size: \n%s", ex.what());
+    for (const std::string& outputName : _metadata.outputNames) {
+        if (!executorOutputDescriptors.count(outputName)) {
+            OPENVINO_THROW("Invalid graph output descriptor key: " + outputName);
         }
     }
 
-    for (const std::string& inputName : _metadata.inputNames) {
-        if (!executorInputDescriptors.count(inputName)) {
-            OPENVINO_THROW("Invalid graph input descriptor key: " + inputName);
+    if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
+        auto batchSize = getBatchSize(_metadata, executorInputDescriptors, executorOutputDescriptors);
+
+        if (batchSize.has_value()) {
+            _batchSize = *batchSize;
         }
+    }
 
+    for (const std::string& inputName : _metadata.inputNames) {
         IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName);
-        check_level_zero_attributes_match(parameterDescriptor, executorInputDescriptors.at(inputName), inputName);
+        checkLevelZeroAttributesMatch(parameterDescriptor, executorInputDescriptors.at(inputName), inputName);
 
         ov::Allocator inputAllocator;
         if (properties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) {
@@ -171,8 +209,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
 
         // When batching is handled by the plugin we need to modify transposed shape with the original batch size since
         // it will be forced to 1 at the compilation time
-        if (_batchSize > 1) {
-            parameterDescriptor.transposedShape[0] = _batchSize;
+        if (_batchSize > DEFAULT_BATCH_SIZE) {
+            parameterDescriptor.transposedShape[BATCH_AXIS] = _batchSize;
         }
 
         // The I/O buffers already allocated using the Level Zero API are being reused here
@@ -182,26 +220,22 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
             const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + inputName;
             const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(inputName);
 
-            check_level_zero_attributes_match(shapeDescriptor,
-                                              executorInputDescriptors.at(shapeBufferName),
-                                              shapeBufferName);
+            checkLevelZeroAttributesMatch(shapeDescriptor,
+                                          executorInputDescriptors.at(shapeBufferName),
+                                          shapeBufferName);
 
             allocate_tensor(inputName, shapeDescriptor, TensorType::Shape, inputAllocator);
         }
     }
 
     for (const std::string& outputName : _metadata.outputNames) {
-        if (!executorOutputDescriptors.count(outputName)) {
-            OPENVINO_THROW("Invalid graph output descriptor key: " + outputName);
-        }
-
         IONodeDescriptor& resultDescriptor = _metadata.results.at(outputName);
-        check_level_zero_attributes_match(resultDescriptor, executorOutputDescriptors.at(outputName), outputName);
+        checkLevelZeroAttributesMatch(resultDescriptor, executorOutputDescriptors.at(outputName), outputName);
 
         // When batching is handled by the plugin we need to modify transposed shape with the original batch size since
         // it will be forced to 1 at the compilation time
-        if (_batchSize > 1) {
-            resultDescriptor.transposedShape[0] = _batchSize;
+        if (_batchSize > DEFAULT_BATCH_SIZE) {
+            resultDescriptor.transposedShape[BATCH_AXIS] = _batchSize;
         }
 
         allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, allocator);
@@ -212,9 +246,9 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
                 const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + shapeNameMatch->second;
                 const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second);
 
-                check_level_zero_attributes_match(shapeDescriptor,
-                                                  executorOutputDescriptors.at(shapeBufferName),
-                                                  shapeBufferName);
+                checkLevelZeroAttributesMatch(shapeDescriptor,
+                                              executorOutputDescriptors.at(shapeBufferName),
+                                              shapeBufferName);
 
                 allocate_tensor(shapeNameMatch->second, shapeDescriptor, TensorType::Shape, allocator);
             }
@@ -233,12 +267,12 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
         }
 
         const IONodeDescriptor& stateDescriptor = _metadata.states.at(stateName);
-        check_level_zero_attributes_match(stateDescriptor,
-                                          executorInputDescriptors.at(stateInputBufferName),
-                                          stateInputBufferName);
-        check_level_zero_attributes_match(stateDescriptor,
-                                          executorOutputDescriptors.at(stateOutputBufferName),
-                                          stateOutputBufferName);
+        checkLevelZeroAttributesMatch(stateDescriptor,
+                                      executorInputDescriptors.at(stateInputBufferName),
+                                      stateInputBufferName);
+        checkLevelZeroAttributesMatch(stateDescriptor,
+                                      executorOutputDescriptors.at(stateOutputBufferName),
+                                      stateOutputBufferName);
 
         // Only one buffer per state variable is required, we'll use the "output" one since this one captures the latest
         // tensor value

diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
@@ -56,49 +56,6 @@ void checkedMemcpy(void* destination, size_t destinationSize, void const* source
     memcpy(destination, source, numberOfBytes);
 }
 
-ov::element::Type_t toOVElementType(const ze_graph_metadata_type zeElementType) {
-    switch (zeElementType) {
-    case ZE_GRAPH_METADATA_TYPE_UNDEFINED:
-        return ov::element::Type_t::undefined;
-    case ZE_GRAPH_METADATA_TYPE_DYNAMIC:
-        return ov::element::Type_t::dynamic;
-    case ZE_GRAPH_METADATA_TYPE_BOOLEAN:
-        return ov::element::Type_t::boolean;
-    case ZE_GRAPH_METADATA_TYPE_BF16:
-        return ov::element::Type_t::bf16;
-    case ZE_GRAPH_METADATA_TYPE_F16:
-        return ov::element::Type_t::f16;
-    case ZE_GRAPH_METADATA_TYPE_F32:
-        return ov::element::Type_t::f32;
-    case ZE_GRAPH_METADATA_TYPE_F64:
-        return ov::element::Type_t::f64;
-    case ZE_GRAPH_METADATA_TYPE_I4:
-        return ov::element::Type_t::i4;
-    case ZE_GRAPH_METADATA_TYPE_I8:
-        return ov::element::Type_t::i8;
-    case ZE_GRAPH_METADATA_TYPE_I16:
-        return ov::element::Type_t::i16;
-    case ZE_GRAPH_METADATA_TYPE_I32:
-        return ov::element::Type_t::i32;
-    case ZE_GRAPH_METADATA_TYPE_I64:
-        return ov::element::Type_t::i64;
-    case ZE_GRAPH_METADATA_TYPE_U1:
-        return ov::element::Type_t::u1;
-    case ZE_GRAPH_METADATA_TYPE_U4:
-        return ov::element::Type_t::u4;
-    case ZE_GRAPH_METADATA_TYPE_U8:
-        return ov::element::Type_t::u8;
-    case ZE_GRAPH_METADATA_TYPE_U16:
-        return ov::element::Type_t::u16;
-    case ZE_GRAPH_METADATA_TYPE_U32:
-        return ov::element::Type_t::u32;
-    case ZE_GRAPH_METADATA_TYPE_U64:
-        return ov::element::Type_t::u64;
-    default:
-        return ov::element::Type_t::undefined;
-    }
-}
-
 ov::element::Type_t toOVElementType(const ze_graph_argument_precision_t zeElementType) {
     switch (zeElementType) {
     case ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN: