diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index 0fe26c7560ac55..eb103c493e4ef4 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -15,14 +15,10 @@ #include "zero_pipeline.hpp" #include "zero_profiling.hpp" #include "zero_remote_tensor.hpp" +#include "zero_tensor.hpp" namespace intel_npu { -struct TensorInfo { - bool tensorCreatedLocally; - uint64_t originalMemoryId; -}; - class ZeroInferRequest final : public SyncInferRequest { public: explicit ZeroInferRequest(const std::shared_ptr& initStructs, @@ -67,12 +63,9 @@ class ZeroInferRequest final : public SyncInferRequest { std::shared_ptr& get_level_zero_input(size_t index, size_t tensorNo = 0) const; std::vector>& get_level_zero_inputs(size_t index) const; - std::shared_ptr allocate_tensor( - const IODescriptor& descriptor, - const size_t index, - const bool isInput, - const ov::Allocator& allocator = {}, - const std::optional batchSize = std::nullopt) const override; + std::shared_ptr create_tensor(ov::element::Type type, + const ov::Shape& shape, + const ov::Allocator& allocator = {}) const override; const std::shared_ptr _initStructs; const std::shared_ptr _graph; @@ -84,9 +77,6 @@ class ZeroInferRequest final : public SyncInferRequest { mutable std::vector>> _levelZeroInputTensors; mutable std::vector> _levelZeroOutputTensors; - mutable std::vector _levelZeroInputTensorInfo; - mutable std::vector _levelZeroOutputTensorInfo; - ze_device_properties_t _properties = {}; std::shared_ptr _inputAllocator; std::shared_ptr _outputAllocator; diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index bfea560e907967..24ea567a4a5f73 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -7,22 +7,22 @@ #include "intel_npu/common/igraph.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" #include "intel_npu/utils/zero/zero_wrappers.hpp" -#include "openvino/runtime/itensor.hpp" #include "zero_memory.hpp" #include "zero_profiling.hpp" +#include "zero_tensor.hpp" namespace intel_npu { struct Pipeline { public: Pipeline(const Config& config, - const std::shared_ptr& initStructs, + const std::shared_ptr& init_structs, const std::shared_ptr& graph, zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, const std::shared_ptr& npu_profiling, - const std::vector>>& inputTensorsData, - const std::vector>& outputTensorsData, + const std::vector>>& input_tensors_data, + const std::vector>& output_tensors_data, uint32_t group_ordinal); Pipeline(const Pipeline&) = delete; @@ -33,8 +33,8 @@ struct Pipeline { void pull(); void reset() const; - void updateCommandList(const void* data, size_t byte_size, uint32_t index); - void updateCommandList(const void* data, uint32_t index, size_t commandListIndex); + void updateCommandList(uint32_t arg_index, const void* arg_data, size_t byte_size); + void updateCommandListIndex(uint32_t arg_index, const void* arg_data, size_t command_list_index); protected: std::shared_ptr _graph; diff --git a/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp index 8eedda4475b38a..9cae2b98425a37 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_tensor.hpp @@ -22,7 +22,7 @@ class ZeroTensor final : public ov::ITensor { const ov::Shape& shape, const ov::Allocator& allocator); - void* data(const ov::element::Type& element_type) const override; + void* data(const ov::element::Type& type = {}) const override; const ov::element::Type& get_element_type() const override; @@ -32,6 +32,9 @@ class ZeroTensor final : public ov::ITensor { const ov::Strides& get_strides() const override; + bool memory_address_changed(); + void reset_memory_flag(); + ~ZeroTensor(); private: @@ -51,6 +54,7 @@ class ZeroTensor final : public ov::ITensor { mutable std::once_flag _strides_once; ov::Allocator _allocator; void* _ptr = nullptr; + bool _reset_tensor_memory = false; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index af35229903de5d..d0d18f55f9dcd8 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -13,7 +13,6 @@ #include "openvino/op/util/op_types.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" #include "zero_memory.hpp" -#include "zero_tensor.hpp" using namespace intel_npu; @@ -92,17 +91,6 @@ bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, c return false; } -uint64_t get_memory_id(ze_context_handle_t hContext, const void* ptr) { - ze_memory_allocation_properties_t desc = {}; - desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; - auto res = intel_npu::zeMemGetAllocProperties(hContext, ptr, &desc, nullptr); - if (res != ZE_RESULT_SUCCESS) { - return 0; - } - - return desc.id; -} - } // namespace //------------------------------------------------------------------------------ @@ -116,8 +104,6 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& _logger("ZeroInferRequest", config.get()), _levelZeroInputTensors(_metadata.inputs.size(), std::vector>(1, nullptr)), _levelZeroOutputTensors(_metadata.outputs.size(), nullptr), - _levelZeroInputTensorInfo(_metadata.inputs.size(), TensorInfo{false, 0}), - _levelZeroOutputTensorInfo(_metadata.outputs.size(), TensorInfo{false, 0}), _profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE), _profilingQuery(_initStructs, 0) { _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest"); @@ -196,7 +182,6 @@ void ZeroInferRequest::create_pipeline() { INPUT, *_inputAllocator, _graph->get_batch_size()); - _levelZeroInputTensorInfo.at(inputIndex).tensorCreatedLocally = true; } for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) { @@ -212,41 +197,6 @@ void ZeroInferRequest::create_pipeline() { OUTPUT, *_outputAllocator, _graph->get_batch_size()); - _levelZeroOutputTensorInfo.at(outputIndex).tensorCreatedLocally = true; - } - - if (_initStructs->getMutableCommandListVersion()) { - for (size_t inputIndex = 0; inputIndex < _metadata.inputs.size(); ++inputIndex) { - if (is_batched_input(inputIndex)) { - continue; - } - - const IODescriptor inputDescriptor = _metadata.inputs.at(inputIndex); - if (inputDescriptor.isShapeTensor || inputDescriptor.isStateInput) { - continue; - } - - if (std::dynamic_pointer_cast(get_level_zero_input(inputIndex)) != nullptr) { - continue; - } - - _levelZeroInputTensorInfo.at(inputIndex).originalMemoryId = - get_memory_id(_initStructs->getContext(), get_level_zero_input(inputIndex)->data()); - } - - for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) { - const IODescriptor outputDescriptor = _metadata.outputs.at(outputIndex); - if (outputDescriptor.isShapeTensor || outputDescriptor.isStateOutput) { - continue; - } - - if (std::dynamic_pointer_cast(_levelZeroOutputTensors.at(outputIndex)) != nullptr) { - continue; - } - - _levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId = - get_memory_id(_initStructs->getContext(), _levelZeroOutputTensors.at(outputIndex)->data()); - } } // Find the corresponding command queue group. @@ -275,24 +225,15 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr& tenso const bool isInput) { OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data"); auto& levelZeroTensors = isInput ? get_level_zero_input(index) : _levelZeroOutputTensors.at(index); - auto& tensorCreatedLocally = isInput ? _levelZeroInputTensorInfo.at(index).tensorCreatedLocally - : _levelZeroOutputTensorInfo.at(index).tensorCreatedLocally; - - bool setTensorData = false; - bool levelZeroTensorCreatedLocally = true; - - OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation"); - if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) { - _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); - levelZeroTensors = tensor; - levelZeroTensorCreatedLocally = false; - setTensorData = true; - } - if (!setTensorData) { - // make sure that the L0 tensor was allocated locally and is not received from the user when receiving - // random tensor - if (!tensorCreatedLocally) { + const auto& zeroTensor = std::dynamic_pointer_cast(tensor); + + if (zeroTensor == nullptr) { + OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation"); + if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) { + _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); + levelZeroTensors = tensor; + } else { _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor"); OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor"); @@ -301,28 +242,16 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr& tenso isInput, isInput ? *_inputAllocator : *_outputAllocator, _graph->get_batch_size()); - - setTensorData = true; - levelZeroTensorCreatedLocally = true; } - } - - if (setTensorData) { - tensorCreatedLocally = levelZeroTensorCreatedLocally; if (_pipelineIsCreated) { _logger.debug("ZeroInferRequest::infer_async - update command list"); - auto& updateOriginalAddress = isInput ? _levelZeroInputTensorInfo.at(index).originalMemoryId - : _levelZeroOutputTensorInfo.at(index).originalMemoryId; - OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); - _pipeline->updateCommandList(levelZeroTensors->data(), - levelZeroTensors->get_byte_size(), - isInput ? _graph->get_input_descriptors().at(index).idx - : _graph->get_output_descriptors().at(index).idx); - - updateOriginalAddress = get_memory_id(_initStructs->getContext(), levelZeroTensors->data()); + _pipeline->updateCommandList(isInput ? _graph->get_input_descriptors().at(index).idx + : _graph->get_output_descriptors().at(index).idx, + levelZeroTensors->data(), + levelZeroTensors->get_byte_size()); } } } @@ -344,20 +273,16 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrupdateCommandList( + isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx, data, - tensor->get_byte_size(), - isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx); + tensor->get_byte_size()); } } @@ -468,7 +393,7 @@ void ZeroInferRequest::set_tensors(const ov::Output& port, if (_pipelineIsCreated) { OV_ITT_TASK_NEXT(SET_TENSORS, "updateCommandList"); - _pipeline->updateCommandList(data, _graph->get_input_descriptors().at(foundPort.idx).idx, i); + _pipeline->updateCommandListIndex(_graph->get_input_descriptors().at(foundPort.idx).idx, data, i); } } } @@ -500,8 +425,6 @@ ov::SoPtr ZeroInferRequest::get_tensor(const ov::Output ZeroInferRequest::get_tensor(const ov::Outputget_batch_size()); - tensorCreatedLocally = true; - return levelZeroTensors; } @@ -630,31 +551,23 @@ void ZeroInferRequest::infer_async() { inputIndex = 0; for (const auto& levelZeroTensor : _levelZeroInputTensors) { - if (is_batched_input(inputIndex)) { - ++inputIndex; - continue; - } - const auto inputDescriptor = _metadata.inputs.at(inputIndex); - if (inputDescriptor.isShapeTensor || inputDescriptor.isStateInput) { - ++inputIndex; - continue; - } + auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor.at(SINGLE_TENSOR)); - if (std::dynamic_pointer_cast(levelZeroTensor.at(SINGLE_TENSOR)) != nullptr) { + if (is_batched_input(inputIndex) || inputDescriptor.isShapeTensor || inputDescriptor.isStateInput || + std::dynamic_pointer_cast(levelZeroTensor.at(SINGLE_TENSOR)) != nullptr || + zeroTensor == nullptr) { ++inputIndex; continue; } - auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor.at(SINGLE_TENSOR)->data()); - - if (_levelZeroInputTensorInfo.at(inputIndex).originalMemoryId != memoryId) { + if (zeroTensor->memory_address_changed()) { _logger.debug("Update input graph descriptor with the new tensor"); - _pipeline->updateCommandList(levelZeroTensor.at(SINGLE_TENSOR)->data(), - levelZeroTensor.at(SINGLE_TENSOR)->get_byte_size(), - _graph->get_input_descriptors().at(inputIndex).idx); + _pipeline->updateCommandList(_graph->get_input_descriptors().at(inputIndex).idx, + zeroTensor->data(), + zeroTensor->get_byte_size()); - _levelZeroInputTensorInfo.at(inputIndex).originalMemoryId = memoryId; + zeroTensor->reset_memory_flag(); } ++inputIndex; @@ -664,25 +577,21 @@ void ZeroInferRequest::infer_async() { for (const auto& levelZeroTensor : _levelZeroOutputTensors) { const auto outputDescriptor = _metadata.outputs.at(outputIndex); - if (outputDescriptor.isShapeTensor || outputDescriptor.isStateOutput) { - ++outputIndex; - continue; - } + auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor); - if (std::dynamic_pointer_cast(levelZeroTensor) != nullptr) { + if (outputDescriptor.isShapeTensor || outputDescriptor.isStateOutput || + std::dynamic_pointer_cast(levelZeroTensor) != nullptr || zeroTensor == nullptr) { ++outputIndex; continue; } - auto memoryId = get_memory_id(_initStructs->getContext(), levelZeroTensor->data()); - - if (_levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId != memoryId) { + if (zeroTensor->memory_address_changed()) { _logger.debug("Update output graph descriptor with the new tensor"); - _pipeline->updateCommandList(levelZeroTensor->data(), - levelZeroTensor->get_byte_size(), - _graph->get_output_descriptors().at(outputIndex).idx); + _pipeline->updateCommandList(_graph->get_output_descriptors().at(outputIndex).idx, + zeroTensor->data(), + zeroTensor->get_byte_size()); - _levelZeroOutputTensorInfo.at(outputIndex).originalMemoryId = memoryId; + zeroTensor->reset_memory_flag(); } ++outputIndex; @@ -810,45 +719,12 @@ std::vector ZeroInferRequest::get_profiling_info() const { } } -std::shared_ptr ZeroInferRequest::allocate_tensor(const IODescriptor& descriptor, - const size_t index, - const bool isInput, - const ov::Allocator& allocator, - const std::optional batchSize) const { - check_network_precision(descriptor.precision); - - std::shared_ptr tensor; - ov::Shape allocatedTensorShape = descriptor.shapeFromCompiler.get_max_shape(); - - if (batchSize.has_value()) { - allocatedTensorShape[BATCH_AXIS] = *batchSize; - } - - if (descriptor.isStateOutput) { - // Only one buffer is required for each (state input, state output) pair, acting as an input before running the - // inference and as an output after performing it. Thus both the "state input" and "state output" entries shall - // point to the same buffer. - OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(), - "The link between state descriptors is missing, state name: ", - descriptor.nameFromCompiler); - tensor = get_user_input(*descriptor.relatedDescriptorIndex)._ptr; - } else { - tensor = std::make_shared(_initStructs, descriptor.precision, allocatedTensorShape, allocator); - } - - if (isInput) { - if (get_user_input(index) == nullptr) { - get_user_input(index) = tensor; - } - - if (descriptor.isStateInput) { - _variableStates.push_back(std::make_shared(descriptor.nameFromCompiler, tensor)); - } - } else if (_userOutputTensors.at(index) == nullptr) { - _userOutputTensors.at(index) = tensor; - } +std::shared_ptr ZeroInferRequest::create_tensor(ov::element::Type type, + const ov::Shape& shape, + const ov::Allocator& allocator) const { + OPENVINO_ASSERT(allocator, "Allocator mush be provided when creating a zero tensor!"); - return tensor; + return std::make_shared(_initStructs, type, shape, allocator); } std::vector ZeroInferRequest::get_raw_profiling_data() const { diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 81d3f30e20b548..455fe25e158b46 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -33,21 +33,21 @@ Type extract_object(const ov::AnyMap& params, const ov::Property& p) { namespace intel_npu { Pipeline::Pipeline(const Config& config, - const std::shared_ptr& initStructs, + const std::shared_ptr& init_structs, const std::shared_ptr& graph, zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, const std::shared_ptr& npu_profiling, - const std::vector>>& inputTensorsData, - const std::vector>& outputTensorsData, + const std::vector>>& input_tensors_data, + const std::vector>& output_tensors_data, uint32_t group_ordinal) : _graph(graph), _config(config), _id(_graph->get_unique_id()), _number_of_command_lists(_graph->get_batch_size().has_value() ? *_graph->get_batch_size() : 1), _event_pool{ - std::make_shared(initStructs->getDevice(), - initStructs->getContext(), + std::make_shared(init_structs->getDevice(), + init_structs->getContext(), _number_of_command_lists ? static_cast(_number_of_command_lists) : 1)}, _npu_profiling(npu_profiling), _logger("Pipeline", _config.get()) { @@ -64,65 +64,64 @@ Pipeline::Pipeline(const Config& config, _logger.debug("Pipeline - emplace_back _event_pool and _command_queue"); for (size_t i = 0; i < _number_of_command_lists; i++) { _command_lists.emplace_back( - std::make_unique(initStructs, + std::make_unique(init_structs, group_ordinal, - initStructs->getMutableCommandListVersion() ? true : false)); + init_structs->getMutableCommandListVersion() ? true : false)); _events.emplace_back(std::make_shared(_event_pool, static_cast(i))); _fences.emplace_back(std::make_unique(*_graph->get_command_queue())); } for (size_t i = 0; i < _number_of_command_lists; i++) { - size_t ioIndex = 0; + size_t io_index = 0; for (const auto& desc : graph->get_input_descriptors()) { - if (inputTensorsData.at(ioIndex).size() > 1) { + if (input_tensors_data.at(io_index).size() > 1) { void* data = nullptr; - auto remoteTensor = std::dynamic_pointer_cast(inputTensorsData.at(ioIndex).at(i)); - if (remoteTensor == nullptr) { - data = inputTensorsData.at(ioIndex).at(i)->data(); + auto remote_tensor = std::dynamic_pointer_cast(input_tensors_data.at(io_index).at(i)); + if (remote_tensor == nullptr) { + data = input_tensors_data.at(io_index).at(i)->data(); } else { - data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + data = extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); } graph->set_argument_value(desc.idx, data); - ++ioIndex; + ++io_index; continue; } void* data = nullptr; - auto remoteTensor = std::dynamic_pointer_cast(inputTensorsData.at(ioIndex).at(0)); - if (remoteTensor == nullptr) { - data = inputTensorsData.at(ioIndex).at(0)->data(); + auto remote_tensor = std::dynamic_pointer_cast(input_tensors_data.at(io_index).at(0)); + if (remote_tensor == nullptr) { + data = input_tensors_data.at(io_index).at(0)->data(); } else { - data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + data = extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); } graph->set_argument_value( desc.idx, static_cast(data) + - (i * inputTensorsData.at(ioIndex).at(0)->get_byte_size()) / _number_of_command_lists); + (i * input_tensors_data.at(io_index).at(0)->get_byte_size()) / _number_of_command_lists); - ++ioIndex; + ++io_index; } - ioIndex = 0; + io_index = 0; for (const auto& desc : graph->get_output_descriptors()) { void* data = nullptr; - auto remoteTensor = std::dynamic_pointer_cast(outputTensorsData.at(ioIndex)); - if (remoteTensor == nullptr) { - data = outputTensorsData.at(ioIndex)->data(); - + auto remote_tensor = std::dynamic_pointer_cast(output_tensors_data.at(io_index)); + if (remote_tensor == nullptr) { + data = output_tensors_data.at(io_index)->data(); } else { - data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + data = extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); } graph->set_argument_value( desc.idx, static_cast(data) + - (i * outputTensorsData.at(ioIndex)->get_byte_size()) / _number_of_command_lists); - ++ioIndex; + (i * output_tensors_data.at(io_index)->get_byte_size()) / _number_of_command_lists); + ++io_index; } if (_config.get()) { @@ -225,7 +224,7 @@ void Pipeline::reset() const { _logger.debug("Pipeline - rest() completed"); }; -void Pipeline::updateCommandList(const void* data, size_t byte_size, uint32_t index) { +void Pipeline::updateCommandList(uint32_t arg_index, const void* arg_data, size_t byte_size) { OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList"); _logger.debug("Pipeline - updateCommandList"); @@ -233,24 +232,24 @@ void Pipeline::updateCommandList(const void* data, size_t byte_size, uint32_t in for (size_t i = 0; i < _number_of_command_lists; i++) { _command_lists.at(i)->updateMutableCommandList( - index, - static_cast(data) + (i * byte_size) / _number_of_command_lists); + arg_index, + static_cast(arg_data) + (i * byte_size) / _number_of_command_lists); _command_lists.at(i)->close(); } }; -void Pipeline::updateCommandList(const void* data, uint32_t index, size_t commandListIndex) { +void Pipeline::updateCommandListIndex(uint32_t arg_index, const void* arg_data, size_t command_list_index) { OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "Pipeline", "updateCommandList"); _logger.debug("Pipeline - updateCommandList"); const size_t _number_of_command_lists = _command_lists.size(); - OPENVINO_ASSERT(commandListIndex < _number_of_command_lists, + OPENVINO_ASSERT(command_list_index < _number_of_command_lists, "Command list index is higgher than the number of Command lists ", - commandListIndex); + command_list_index); - _command_lists.at(commandListIndex)->updateMutableCommandList(index, data); - _command_lists.at(commandListIndex)->close(); + _command_lists.at(command_list_index)->updateMutableCommandList(arg_index, arg_data); + _command_lists.at(command_list_index)->close(); }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp b/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp index b98628f3f23c93..b2b5cc7c9b166e 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_tensor.cpp @@ -53,8 +53,9 @@ const ov::Shape& ZeroTensor::get_shape() const { } void ZeroTensor::update_strides() const { - if (_element_type.bitwidth() < 8) + if (_element_type.bitwidth() < 8) { return; + } auto& shape = get_shape(); if (_strides.empty() && !shape.empty()) { @@ -110,19 +111,16 @@ void ZeroTensor::destroy_memory() { } void ZeroTensor::set_shape(ov::Shape new_shape) { - if (_shape == new_shape) + if (_shape == new_shape) { return; + } _shape = std::move(new_shape); if (get_size() > get_capacity()) { -#ifdef __linux__ - OPENVINO_THROW("Re-shaping the tensor with a larger shape is not available."); -#endif - if (!_init_structs->getMutableCommandListVersion()) { OPENVINO_THROW("Re-shaping the tensor with a larger shape is not available using this driver version. " - "Please update the driver."); + "Please update the driver to the latest version."); } destroy_memory(); @@ -131,12 +129,22 @@ void ZeroTensor::set_shape(ov::Shape new_shape) { _capacity = _shape; _ptr = _allocator.allocate(get_bytes_capacity()); initialize_elements(_ptr, _element_type, _shape); + + _reset_tensor_memory = true; } _strides.clear(); update_strides(); } +bool ZeroTensor::memory_address_changed() { + return _reset_tensor_memory; +} + +void ZeroTensor::reset_memory_flag() { + _reset_tensor_memory = false; +} + ZeroTensor::~ZeroTensor() { destroy_memory(); } diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp index dfcb37a0043ce6..635802900d3a12 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp @@ -11,12 +11,6 @@ #include "openvino/runtime/iinfer_request.hpp" #include "openvino/runtime/iplugin.hpp" -namespace { - -constexpr size_t BATCH_AXIS = 0; - -} - namespace intel_npu { /** @@ -163,12 +157,15 @@ class SyncInferRequest : public ov::IInferRequest { * @param batchSize If provided, the value of the shape on the 0th axis is overriden with this value. * @return Pointer towards the allocated tensor */ - virtual std::shared_ptr allocate_tensor( - const IODescriptor& descriptor, - const size_t index, - const bool isInput, - const ov::Allocator& allocator = {}, - const std::optional batchSize = std::nullopt) const; + std::shared_ptr allocate_tensor(const IODescriptor& descriptor, + const size_t index, + const bool isInput, + const ov::Allocator& allocator = {}, + const std::optional batchSize = std::nullopt) const; + + virtual std::shared_ptr create_tensor(ov::element::Type type, + const ov::Shape& shape, + const ov::Allocator& allocator = {}) const; bool is_batched_input(size_t idx) const; diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index 09f00b43c840c1..fe331a3c6dada0 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -11,6 +11,12 @@ #include "openvino/util/common_util.hpp" #include "transformations/utils/utils.hpp" +namespace { + +constexpr size_t BATCH_AXIS = 0; + +} + namespace intel_npu { SyncInferRequest::SyncInferRequest(const std::shared_ptr& compiledModel, const Config& config) @@ -310,10 +316,8 @@ std::shared_ptr SyncInferRequest::allocate_tensor(const IODescripto "The link between state descriptors is missing, state name: ", descriptor.nameFromCompiler); tensor = get_user_input(*descriptor.relatedDescriptorIndex)._ptr; - } else if (allocator) { - tensor = ov::make_tensor(descriptor.precision, allocatedTensorShape, allocator); } else { - tensor = ov::make_tensor(descriptor.precision, allocatedTensorShape); + tensor = create_tensor(descriptor.precision, allocatedTensorShape, allocator); } if (isInput) { @@ -331,6 +335,12 @@ std::shared_ptr SyncInferRequest::allocate_tensor(const IODescripto return tensor; } +std::shared_ptr SyncInferRequest::create_tensor(ov::element::Type type, + const ov::Shape& shape, + const ov::Allocator& allocator) const { + return ov::make_tensor(type, shape, allocator); +} + bool SyncInferRequest::is_batched_input(size_t idx) const { return _userInputTensors.at(idx).size() > 1; }