Skip to content

Commit

Permalink
Update code according to review
Browse files Browse the repository at this point in the history
  • Loading branch information
pereanub committed Apr 24, 2024
1 parent 01c56ca commit f90a0fb
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 111 deletions.
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/al/src/config/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ ov::intel_npu::BatchMode intel_npu::BATCH_MODE::parse(std::string_view val) {
return ov::intel_npu::BatchMode::PLUGIN;
}

OPENVINO_THROW("Value '{0}' is not a valid BATCH_TYPE option", val);
OPENVINO_THROW("Value '", val, "'is not a valid BATCH_MODE option");
}

std::string intel_npu::BATCH_MODE::toString(const ov::intel_npu::BatchMode& val) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
#include "zero_utils.hpp"
#include "zero_wrappers.hpp"

namespace {
constexpr std::size_t DEFAULT_BATCH_SIZE = 1;
} // namespace

namespace intel_npu {

class ZeroInferRequest final : public SyncInferRequest {
Expand Down Expand Up @@ -49,7 +53,7 @@ class ZeroInferRequest final : public SyncInferRequest {

// If batching is handled on the compiler side then batching on the plugin shall be set to 1, we don't do any
// specific operations on the plugin in this case.
size_t _batchSize = 1;
size_t _batchSize = DEFAULT_BATCH_SIZE;
};

} // namespace intel_npu
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/backend/include/zero_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class HostMemAllocator final {
static const std::size_t _alignment = STANDARD_PAGE_SIZE;
};

// Graph arguments (inputs and output) need to be allocated in the host memory.
// Graph arguments (inputs and outputs) need to be allocated in the host memory.
// For discrete platforms, graph arguments need to be copied into the device memory.
// MemoryMangementUnit is used to allocate memory in the device memory.
// Usage: we should append graph arguments with corresponding names with `appendArgument` call to prepare size
Expand Down
5 changes: 1 addition & 4 deletions src/plugins/intel_npu/src/backend/src/zero_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,7 @@ uint32_t ZeroEngineBackend::getDriverExtVersion() const {
}

bool ZeroEngineBackend::isBatchingSupported() const {
if (_instance->getDriverExtVersion() < ZE_GRAPH_EXT_VERSION_1_6) {
return false;
}
return true;
return _instance->getDriverExtVersion() >= ZE_GRAPH_EXT_VERSION_1_6;
}

ZeroEngineBackend::~ZeroEngineBackend() = default;
Expand Down
138 changes: 86 additions & 52 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,26 @@ using namespace intel_npu;

namespace {

constexpr std::size_t BATCH_AXIS = 0;

/**
* @brief Checks that the metadata of the provided descriptor corresponds to the values registered in the Level Zero
* structure.
* @param nodeDescriptor The OpenVINO API specific I/O descriptor which shall be compared.
* @param zeDescriptor The Level Zero specific structure used for comparison.
* @param name Tensor identifier used for error logging.
*/
void check_level_zero_attributes_match(const IONodeDescriptor& nodeDescriptor,
const ZeroExecutor::ArgumentDescriptor& zeDescriptor,
const std::string& name) {
void checkLevelZeroAttributesMatch(const IONodeDescriptor& nodeDescriptor,
const ZeroExecutor::ArgumentDescriptor& zeDescriptor,
const std::string& name) {
const ov::element::Type_t ovPrecision = nodeDescriptor.precision;
const ze_graph_argument_precision_t zePrecision = zeDescriptor.info.devicePrecision;

if (zeroUtils::getZePrecision(ovPrecision) != zePrecision) {
OPENVINO_THROW("Precision mismatch for parameter " + name);
}

const std::vector<size_t>& ovDimensions = nodeDescriptor.originalShape.get_max_shape();
const std::vector<size_t>& ovDimensions = nodeDescriptor.transposedShape.get_max_shape();

if (ovDimensions.size() > ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) {
OPENVINO_THROW(
Expand All @@ -45,54 +47,86 @@ void check_level_zero_attributes_match(const IONodeDescriptor& nodeDescriptor,
OPENVINO_THROW("Shape mismatch for parameter " + name);
}
}

for (size_t index = 1; index < ovDimensions.size(); ++index) {
if (ovDimensions[index] != zeDescriptor.info.dims[index] && !nodeDescriptor.transposedShape.is_dynamic()) {
OPENVINO_THROW("Shape mismatch for parameter " + name);
}
}
}

size_t get_batch_size_for_node(const IONodeDescriptor& nodeDescriptor,
const ZeroExecutor::ArgumentDescriptor& zeDescriptor) {
std::optional<size_t> getBatchSizeForNode(const IONodeDescriptor& nodeDescriptor,
const ZeroExecutor::ArgumentDescriptor& zeDescriptor) {
Logger logger("GetBatchSizeForNode", Logger::global().level());

const std::vector<size_t>& ovDimensions = nodeDescriptor.originalShape.get_shape();
switch (zeDescriptor.info.deviceLayout) {
case ZE_GRAPH_ARGUMENT_LAYOUT_NCHW:
case ZE_GRAPH_ARGUMENT_LAYOUT_NHWC:
case ZE_GRAPH_ARGUMENT_LAYOUT_NCDHW:
case ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC:
case ZE_GRAPH_ARGUMENT_LAYOUT_NC:
if ((ovDimensions[0] == zeDescriptor.info.dims[0]) && (ovDimensions[0] != 1)) {
OPENVINO_THROW("Batching on the plugin is not used, batching is handled by the compiler");
if ((ovDimensions[BATCH_AXIS] == zeDescriptor.info.dims[BATCH_AXIS]) &&
(ovDimensions[BATCH_AXIS] != DEFAULT_BATCH_SIZE)) {
logger.info("Batching on the plugin is not used, batching is handled by the compiler");
return std::nullopt;
} else {
return ovDimensions[0];
return ovDimensions[BATCH_AXIS];
}
break;
default:
OPENVINO_THROW("Batching on the plugin is working only when batching is found on 0th dimension");
logger.info("Batching on the plugin is working only when batching is found on 0th dimension");
return std::nullopt;
}

return 1;
return DEFAULT_BATCH_SIZE;
}

std::optional<size_t> get_batch_size(
/**
* @brief Get the batch size to be handled on the plugin.
* @details Analyze the shape from the compiled model with the shape from the originalShape and get the originalShape if
* it is different.
* @param metadata A map to represent descriptions for inputs and outputs of a network.
* @param executorInputDescriptors A map to represent Level zero inputs descriptors.
* @param executorOutputDescriptors A map to represent Level zero outputs descriptors.
*/

std::optional<size_t> getBatchSize(
const NetworkMetadata& metadata,
const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors,
const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors) {
std::set<size_t> batch_size;

Logger logger("getBatchSize", Logger::global().level());

for (const std::string& inputName : metadata.inputNames) {
if (!executorInputDescriptors.count(inputName)) {
OPENVINO_THROW("Invalid graph input descriptor key: " + inputName);
auto batchSizeForNode =
getBatchSizeForNode(metadata.parameters.at(inputName), executorInputDescriptors.at(inputName));

if (batchSizeForNode.has_value()) {
batch_size.insert(*batchSizeForNode);
} else {
return std::nullopt;
}
batch_size.insert(
get_batch_size_for_node(metadata.parameters.at(inputName), executorInputDescriptors.at(inputName)));
}

for (const std::string& outputName : metadata.outputNames) {
if (!executorOutputDescriptors.count(outputName)) {
OPENVINO_THROW("Invalid graph output descriptor key: " + outputName);
}
batch_size.insert(
get_batch_size_for_node(metadata.results.at(outputName), executorOutputDescriptors.at(outputName)));
auto batchSizeForNode =
getBatchSizeForNode(metadata.results.at(outputName), executorOutputDescriptors.at(outputName));

if (batchSizeForNode.has_value()) {
batch_size.insert(*batchSizeForNode);
} else {
return std::nullopt;
}
}

if (batch_size.size() != 1) {
OPENVINO_THROW("Batching on the plugin is working only when have same value for all tensors!");
logger.info("Batching works only when we have the same batch size for all tensors!");
return std::nullopt;
}

auto it = batch_size.begin();
Expand Down Expand Up @@ -142,25 +176,29 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&

auto allocator = zeroMemory::HostMemAllocator(backendPtr);

if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
try {
auto batchSize = get_batch_size(_metadata, executorInputDescriptors, executorOutputDescriptors);
for (const std::string& inputName : _metadata.inputNames) {
if (!executorInputDescriptors.count(inputName)) {
OPENVINO_THROW("Invalid graph input descriptor key: " + inputName);
}
}

if (batchSize.has_value()) {
_batchSize = *batchSize;
}
} catch (const std::exception& ex) {
_logger.info("Got an error when checking the batch size: \n%s", ex.what());
for (const std::string& outputName : _metadata.outputNames) {
if (!executorOutputDescriptors.count(outputName)) {
OPENVINO_THROW("Invalid graph output descriptor key: " + outputName);
}
}

for (const std::string& inputName : _metadata.inputNames) {
if (!executorInputDescriptors.count(inputName)) {
OPENVINO_THROW("Invalid graph input descriptor key: " + inputName);
if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
auto batchSize = getBatchSize(_metadata, executorInputDescriptors, executorOutputDescriptors);

if (batchSize.has_value()) {
_batchSize = *batchSize;
}
}

for (const std::string& inputName : _metadata.inputNames) {
IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName);
check_level_zero_attributes_match(parameterDescriptor, executorInputDescriptors.at(inputName), inputName);
checkLevelZeroAttributesMatch(parameterDescriptor, executorInputDescriptors.at(inputName), inputName);

ov::Allocator inputAllocator;
if (properties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) {
Expand All @@ -171,8 +209,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&

// When batching is handled by the plugin we need to modify transposed shape with the original batch size since
// it will be forced to 1 at the compilation time
if (_batchSize > 1) {
parameterDescriptor.transposedShape[0] = _batchSize;
if (_batchSize > DEFAULT_BATCH_SIZE) {
parameterDescriptor.transposedShape[BATCH_AXIS] = _batchSize;
}

// The I/O buffers already allocated using the Level Zero API are being reused here
Expand All @@ -182,26 +220,22 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + inputName;
const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(inputName);

check_level_zero_attributes_match(shapeDescriptor,
executorInputDescriptors.at(shapeBufferName),
shapeBufferName);
checkLevelZeroAttributesMatch(shapeDescriptor,
executorInputDescriptors.at(shapeBufferName),
shapeBufferName);

allocate_tensor(inputName, shapeDescriptor, TensorType::Shape, inputAllocator);
}
}

for (const std::string& outputName : _metadata.outputNames) {
if (!executorOutputDescriptors.count(outputName)) {
OPENVINO_THROW("Invalid graph output descriptor key: " + outputName);
}

IONodeDescriptor& resultDescriptor = _metadata.results.at(outputName);
check_level_zero_attributes_match(resultDescriptor, executorOutputDescriptors.at(outputName), outputName);
checkLevelZeroAttributesMatch(resultDescriptor, executorOutputDescriptors.at(outputName), outputName);

// When batching is handled by the plugin we need to modify transposed shape with the original batch size since
// it will be forced to 1 at the compilation time
if (_batchSize > 1) {
resultDescriptor.transposedShape[0] = _batchSize;
if (_batchSize > DEFAULT_BATCH_SIZE) {
resultDescriptor.transposedShape[BATCH_AXIS] = _batchSize;
}

allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, allocator);
Expand All @@ -212,9 +246,9 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + shapeNameMatch->second;
const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second);

check_level_zero_attributes_match(shapeDescriptor,
executorOutputDescriptors.at(shapeBufferName),
shapeBufferName);
checkLevelZeroAttributesMatch(shapeDescriptor,
executorOutputDescriptors.at(shapeBufferName),
shapeBufferName);

allocate_tensor(shapeNameMatch->second, shapeDescriptor, TensorType::Shape, allocator);
}
Expand All @@ -233,12 +267,12 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
}

const IONodeDescriptor& stateDescriptor = _metadata.states.at(stateName);
check_level_zero_attributes_match(stateDescriptor,
executorInputDescriptors.at(stateInputBufferName),
stateInputBufferName);
check_level_zero_attributes_match(stateDescriptor,
executorOutputDescriptors.at(stateOutputBufferName),
stateOutputBufferName);
checkLevelZeroAttributesMatch(stateDescriptor,
executorInputDescriptors.at(stateInputBufferName),
stateInputBufferName);
checkLevelZeroAttributesMatch(stateDescriptor,
executorOutputDescriptors.at(stateOutputBufferName),
stateOutputBufferName);

// Only one buffer per state variable is required, we'll use the "output" one since this one captures the latest
// tensor value
Expand Down
43 changes: 0 additions & 43 deletions src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,49 +56,6 @@ void checkedMemcpy(void* destination, size_t destinationSize, void const* source
memcpy(destination, source, numberOfBytes);
}

ov::element::Type_t toOVElementType(const ze_graph_metadata_type zeElementType) {
switch (zeElementType) {
case ZE_GRAPH_METADATA_TYPE_UNDEFINED:
return ov::element::Type_t::undefined;
case ZE_GRAPH_METADATA_TYPE_DYNAMIC:
return ov::element::Type_t::dynamic;
case ZE_GRAPH_METADATA_TYPE_BOOLEAN:
return ov::element::Type_t::boolean;
case ZE_GRAPH_METADATA_TYPE_BF16:
return ov::element::Type_t::bf16;
case ZE_GRAPH_METADATA_TYPE_F16:
return ov::element::Type_t::f16;
case ZE_GRAPH_METADATA_TYPE_F32:
return ov::element::Type_t::f32;
case ZE_GRAPH_METADATA_TYPE_F64:
return ov::element::Type_t::f64;
case ZE_GRAPH_METADATA_TYPE_I4:
return ov::element::Type_t::i4;
case ZE_GRAPH_METADATA_TYPE_I8:
return ov::element::Type_t::i8;
case ZE_GRAPH_METADATA_TYPE_I16:
return ov::element::Type_t::i16;
case ZE_GRAPH_METADATA_TYPE_I32:
return ov::element::Type_t::i32;
case ZE_GRAPH_METADATA_TYPE_I64:
return ov::element::Type_t::i64;
case ZE_GRAPH_METADATA_TYPE_U1:
return ov::element::Type_t::u1;
case ZE_GRAPH_METADATA_TYPE_U4:
return ov::element::Type_t::u4;
case ZE_GRAPH_METADATA_TYPE_U8:
return ov::element::Type_t::u8;
case ZE_GRAPH_METADATA_TYPE_U16:
return ov::element::Type_t::u16;
case ZE_GRAPH_METADATA_TYPE_U32:
return ov::element::Type_t::u32;
case ZE_GRAPH_METADATA_TYPE_U64:
return ov::element::Type_t::u64;
default:
return ov::element::Type_t::undefined;
}
}

ov::element::Type_t toOVElementType(const ze_graph_argument_precision_t zeElementType) {
switch (zeElementType) {
case ZE_GRAPH_ARGUMENT_PRECISION_UNKNOWN:
Expand Down
Loading

0 comments on commit f90a0fb

Please sign in to comment.