diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index aaaa128518b34f..c40142c75608b8 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -67,11 +67,19 @@ class ZeroInferRequest final : public SyncInferRequest { const ov::Shape& shape, const ov::Allocator& allocator = {}) const override; + void add_state(const IODescriptor& descriptor, size_t tensorIndex) const override; + + void update_pipeline_if_memory_changed(); + void update_states_if_memory_changed(); + const std::shared_ptr _initStructs; const std::shared_ptr _graph; const Config _config; Logger _logger; + const std::vector& _graphInputDescriptors; + const std::vector& _graphOutputDescriptors; + // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another // memory area for the tensor. mutable std::vector>> _levelZeroInputTensors; diff --git a/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp b/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp new file mode 100644 index 00000000000000..c7c03bcfe4c8d8 --- /dev/null +++ b/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp @@ -0,0 +1,77 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_npu/config/config.hpp" +#include "intel_npu/utils/logger/logger.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" +#include "openvino/runtime/ivariable_state.hpp" + +namespace intel_npu { + +/** + * @brief Interface for zero variable state implementation + * @note In case the memory was allocated in the same level zero context use that memory, otherwise use memcpy at infer + * time. Also, get correct data if remote tensor is used. + */ +class ZeroVariableState final : public ov::IVariableState { +public: + explicit ZeroVariableState(const std::shared_ptr& init_structs, + const std::string& name, + const ov::SoPtr& tensor, + size_t tensor_index, + size_t related_tensor_index, + const Config& config); + + void set_state(const ov::SoPtr& new_state) override; + + void reset() override; + + /** + * @brief Get input tensor index used internally for the state + */ + size_t get_tensor_index() const; + + /** + * @brief Get output tensor index used internally for the state + * @details The related tensors are defined by state input, state output pairs. + */ + size_t get_related_tensor_index() const; + + /** + * @brief Get acknowledge if the tensor was updated + */ + bool tensor_was_updated() const; + + /** + * @brief Reset tensor updated flag + */ + void reset_tensor_updated_flag(); + + /** + * @brief Get acknowledge if the zero tensor was updated + * @details In case the memory was allocated in the same level zero context update the zero tensor + */ + bool zero_tensor_should_be_updated() const; + + /** + * @brief Reset zero tensor updated flag + */ + void reset_zero_tensor_updated_flag(); + + ~ZeroVariableState() override = default; + +private: + std::shared_ptr _init_structs; + size_t _tensor_index; + size_t _related_tensor_index; + + bool _tensor_updated = false; + bool _zero_tensor_updated = false; + + Logger _logger; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index b7049f62af6d31..034f69f63e4158 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -13,6 +13,7 @@ #include "openvino/op/util/op_types.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" #include "zero_memory.hpp" +#include "zero_variable_state.hpp" using namespace intel_npu; @@ -63,33 +64,6 @@ void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const A } } -template -Type extract_object(const ov::AnyMap& params, const ov::Property& p) { - auto itrHandle = params.find(p.name()); - ov::Any res = nullptr; - if (itrHandle == params.end()) { - OPENVINO_THROW("No parameter ", p.name(), " found in parameters map"); - } - res = itrHandle->second; - return res.as(); -} - -bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, const void* ptr) { - ze_memory_allocation_properties_t desc = {}; - desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; - auto res = intel_npu::zeMemGetAllocProperties(hContext, ptr, &desc, nullptr); - if (res == ZE_RESULT_SUCCESS) { - if (desc.id) { - if ((desc.type & ZE_MEMORY_TYPE_HOST) || (desc.type & ZE_MEMORY_TYPE_DEVICE) || - (desc.type & ZE_MEMORY_TYPE_SHARED)) { - return true; - } - } - } - - return false; -} - } // namespace //------------------------------------------------------------------------------ @@ -101,13 +75,13 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& _graph(compiledModel->get_graph()), _config(config), _logger("ZeroInferRequest", config.get()), + _graphInputDescriptors(_graph->get_input_descriptors()), + _graphOutputDescriptors(_graph->get_output_descriptors()), _levelZeroInputTensors(_metadata.inputs.size(), std::vector>(1, nullptr)), _levelZeroOutputTensors(_metadata.outputs.size(), nullptr), _profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE), _profilingQuery(_initStructs, 0) { _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest"); - const std::vector& executorInputDescriptors = _graph->get_input_descriptors(); - const std::vector& executorOutputDescriptors = _graph->get_output_descriptors(); auto proftype = config.get(); if (proftype == ov::intel_npu::ProfilingType::INFER) { @@ -127,7 +101,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& size_t ioIndex = 0; for (const IODescriptor& inputDescriptor : _metadata.inputs) { - check_level_zero_attributes_match(inputDescriptor, executorInputDescriptors.at(ioIndex)); + check_level_zero_attributes_match(inputDescriptor, _graphInputDescriptors.at(ioIndex)); if (!(inputDescriptor.isStateInput || inputDescriptor.isShapeTensor)) { ++ioIndex; @@ -142,7 +116,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& ioIndex = 0; for (const IODescriptor& outputDescriptor : _metadata.outputs) { - check_level_zero_attributes_match(outputDescriptor, executorOutputDescriptors.at(ioIndex)); + check_level_zero_attributes_match(outputDescriptor, _graphOutputDescriptors.at(ioIndex)); if (!(outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor)) { ++ioIndex; @@ -203,6 +177,29 @@ void ZeroInferRequest::create_pipeline() { auto groupOrdinal = zeroUtils::findGroupOrdinal(_initStructs->getDevice(), _properties); _logger.debug("ZeroInferRequest::create_pipeline - init completed"); + // Set new tensors and reset variable state flag if memory updated before creating the pipeline + _logger.debug("ZeroInferRequest::create_pipeline - set new tensors and reset variable state flag if memory updated " + "before creating the pipeline"); + for (const auto& variableState : _variableStates) { + auto zeroState = std::dynamic_pointer_cast(variableState._ptr); + + OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin"); + + if (zeroState->tensor_was_updated()) { + get_user_input(zeroState->get_tensor_index()) = zeroState->get_state(); + _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state(); + + zeroState->reset_tensor_updated_flag(); + + if (zeroState->zero_tensor_should_be_updated()) { + zeroState->reset_zero_tensor_updated_flag(); + + get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_state()._ptr; + _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state()._ptr; + } + } + } + _logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline"); // Construct pipeline @@ -228,7 +225,7 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr& tenso bool updateCommandListArg = false; OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "check_data_allocation"); - if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) { + if (zeroUtils::memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensor->data())) { _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); levelZeroTensors = tensor; updateCommandListArg = true; @@ -268,7 +265,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr( - extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context)); + zeroUtils::extract_object(tensor->get_context()->get_property(), ov::intel_npu::l0_context)); if (_initStructs->getContext() != l0_context) { OPENVINO_THROW("Using different context for creating the tensor is not supported"); } @@ -279,7 +276,7 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrget_properties(), ov::intel_npu::mem_handle); + auto data = zeroUtils::extract_object(tensor->get_properties(), ov::intel_npu::mem_handle); OPENVINO_ASSERT(data, "Empty buffer"); OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList"); @@ -371,7 +368,8 @@ void ZeroInferRequest::set_tensors(const ov::Output& port, bool tensorHasSameL0Context = false; OV_ITT_TASK_NEXT(SET_TENSORS, "check_data_allocation"); - if (memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), tensors[i]->data())) { + if (zeroUtils::memory_was_allocated_in_the_same_l0_context(_initStructs->getContext(), + tensors[i]->data())) { _logger.debug("ZeroInferRequest::set_tensors - tensor was created in the same L0 context"); get_level_zero_input(foundPort.idx, i) = tensors.at(i)._ptr; @@ -390,7 +388,7 @@ void ZeroInferRequest::set_tensors(const ov::Output& port, } else { _logger.debug("ZeroInferRequest::set_tensors - remote tensor is used"); - data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + data = zeroUtils::extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); get_level_zero_input(foundPort.idx, i) = tensors.at(i)._ptr; } @@ -453,6 +451,112 @@ ov::SoPtr ZeroInferRequest::get_tensor(const ov::Output(levelZeroTensor.at(SINGLE_TENSOR)); + + if (is_batched_input(ioIndex) || inputDescriptor.isShapeTensor || + is_remote_tensor(levelZeroTensor.at(SINGLE_TENSOR)) || zeroTensor == nullptr) { + ++ioIndex; + continue; + } + + if (zeroTensor->memory_address_changed()) { + _logger.debug("Update input graph descriptor with the new tensor"); + OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer"); + + _pipeline->updateCommandList(_graph->get_input_descriptors().at(ioIndex).idx, + zeroTensor->data(), + zeroTensor->get_byte_size()); + closePipeline = true; + + if (!inputDescriptor.isStateInput) { + zeroTensor->reset_memory_flag(); + } + } + + ++ioIndex; + } + + ioIndex = 0; + + for (const auto& levelZeroTensor : _levelZeroOutputTensors) { + const auto outputDescriptor = _metadata.outputs.at(ioIndex); + auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor); + + if (outputDescriptor.isShapeTensor || is_remote_tensor(levelZeroTensor) || zeroTensor == nullptr) { + ++ioIndex; + continue; + } + + if (zeroTensor->memory_address_changed()) { + _logger.debug("Update output graph descriptor with the new tensor"); + OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer"); + + _pipeline->updateCommandList(_graph->get_output_descriptors().at(ioIndex).idx, + zeroTensor->data(), + zeroTensor->get_byte_size()); + closePipeline = true; + + zeroTensor->reset_memory_flag(); + } + + ++ioIndex; + } + + if (closePipeline) { + _pipeline->closeCommandList(); + } +} + +void ZeroInferRequest::update_states_if_memory_changed() { + bool closePipeline = false; + + for (const auto& variableState : _variableStates) { + auto zeroState = std::dynamic_pointer_cast(variableState._ptr); + + OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin"); + + if (zeroState->tensor_was_updated()) { + get_user_input(zeroState->get_tensor_index()) = zeroState->get_state(); + _userOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state(); + + zeroState->reset_tensor_updated_flag(); + + if (zeroState->zero_tensor_should_be_updated()) { + auto remoteTensor = std::dynamic_pointer_cast(zeroState->get_state()._ptr); + + void* userBuffer = !remoteTensor ? zeroState->get_state()->data() + : zeroUtils::extract_object(remoteTensor->get_properties(), + ov::intel_npu::mem_handle); + + _pipeline->updateCommandList(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx, + userBuffer, + zeroState->get_state()->get_byte_size()); + + _pipeline->updateCommandList(_graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx, + userBuffer, + zeroState->get_state()->get_byte_size()); + + zeroState->reset_zero_tensor_updated_flag(); + + get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_state()._ptr; + _levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state()._ptr; + + closePipeline = true; + } + } + } + + if (closePipeline) { + _pipeline->closeCommandList(); + } +} + void ZeroInferRequest::infer() { if (_config.get()) { OPENVINO_THROW("Only start async is supported when RUN_INFERENCES_SEQUENTIALLY is enabled!"); @@ -476,64 +580,8 @@ void ZeroInferRequest::infer_async() { _pipelineIsCreated = true; } else { if (_initStructs->getMutableCommandListVersion()) { - bool closePipeline = false; - size_t ioIndex = 0; - - for (const auto& levelZeroTensor : _levelZeroInputTensors) { - const auto inputDescriptor = _metadata.inputs.at(ioIndex); - auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor.at(SINGLE_TENSOR)); - - if (is_batched_input(ioIndex) || inputDescriptor.isShapeTensor || inputDescriptor.isStateInput || - is_remote_tensor(levelZeroTensor.at(SINGLE_TENSOR)) || zeroTensor == nullptr) { - ++ioIndex; - continue; - } - - if (zeroTensor->memory_address_changed()) { - _logger.debug("Update input graph descriptor with the new tensor"); - OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer"); - - _pipeline->updateCommandList(_graph->get_input_descriptors().at(ioIndex).idx, - zeroTensor->data(), - zeroTensor->get_byte_size()); - closePipeline = true; - - zeroTensor->reset_memory_flag(); - } - - ++ioIndex; - } - - ioIndex = 0; - - for (const auto& levelZeroTensor : _levelZeroOutputTensors) { - const auto outputDescriptor = _metadata.outputs.at(ioIndex); - auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor); - - if (outputDescriptor.isShapeTensor || outputDescriptor.isStateOutput || - is_remote_tensor(levelZeroTensor) || zeroTensor == nullptr) { - ++ioIndex; - continue; - } - - if (zeroTensor->memory_address_changed()) { - _logger.debug("Update output graph descriptor with the new tensor"); - OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer"); - - _pipeline->updateCommandList(_graph->get_output_descriptors().at(ioIndex).idx, - zeroTensor->data(), - zeroTensor->get_byte_size()); - closePipeline = true; - - zeroTensor->reset_memory_flag(); - } - - ++ioIndex; - } - - if (closePipeline) { - _pipeline->closeCommandList(); - } + update_pipeline_if_memory_changed(); + update_states_if_memory_changed(); } } } @@ -561,10 +609,10 @@ void ZeroInferRequest::infer_async() { auto userBatchRemoteTensor = std::dynamic_pointer_cast(userTensor.at(i)._ptr); - void* userBuffer = - !userBatchRemoteTensor - ? userTensor.at(i)->data() - : extract_object(userBatchRemoteTensor->get_properties(), ov::intel_npu::mem_handle); + void* userBuffer = !userBatchRemoteTensor + ? userTensor.at(i)->data() + : zeroUtils::extract_object(userBatchRemoteTensor->get_properties(), + ov::intel_npu::mem_handle); if (userBuffer != levelZeroBuffer) { if (userBuffer == nullptr || levelZeroBuffer == nullptr) { @@ -586,9 +634,10 @@ void ZeroInferRequest::infer_async() { for (size_t i = 0; i < userTensor.size(); i++) { auto userBatchRemoteTensor = std::dynamic_pointer_cast(userTensor.at(i)._ptr); - void* userBuffer = !userBatchRemoteTensor ? userTensor.at(i)->data() - : extract_object(userBatchRemoteTensor->get_properties(), - ov::intel_npu::mem_handle); + void* userBuffer = !userBatchRemoteTensor + ? userTensor.at(i)->data() + : zeroUtils::extract_object(userBatchRemoteTensor->get_properties(), + ov::intel_npu::mem_handle); std::memcpy(static_cast(levelZeroBuffer) + (i * userTensor.at(i)->get_byte_size()), userBuffer, @@ -601,9 +650,9 @@ void ZeroInferRequest::infer_async() { } auto userRemoteTensor = std::dynamic_pointer_cast(userTensor.at(SINGLE_TENSOR)._ptr); - void* userBuffer = !userRemoteTensor - ? userTensor.at(SINGLE_TENSOR)->data() - : extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle); + void* userBuffer = !userRemoteTensor ? userTensor.at(SINGLE_TENSOR)->data() + : zeroUtils::extract_object(userRemoteTensor->get_properties(), + ov::intel_npu::mem_handle); const auto& levelZeroTensor = get_level_zero_input(inputIndex); if (!is_remote_tensor(levelZeroTensor)) { @@ -652,9 +701,9 @@ void ZeroInferRequest::get_result() { } auto userRemoteTensor = std::dynamic_pointer_cast(userTensor._ptr); - void* userBuffer = !userRemoteTensor - ? userTensor->data() - : extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle); + void* userBuffer = !userRemoteTensor ? userTensor->data() + : zeroUtils::extract_object(userRemoteTensor->get_properties(), + ov::intel_npu::mem_handle); const std::shared_ptr& levelZeroTensor = _levelZeroOutputTensors.at(outputIndex); if (!is_remote_tensor(levelZeroTensor)) { @@ -751,6 +800,19 @@ std::shared_ptr ZeroInferRequest::create_tensor(ov::element::Type t return std::make_shared(_initStructs, type, shape, allocator); } +void ZeroInferRequest::add_state(const IODescriptor& descriptor, size_t tensorIndex) const { + OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(), + "The link between state descriptors is missing, state name: ", + descriptor.nameFromCompiler); + + _variableStates.push_back(std::make_shared(_initStructs, + descriptor.nameFromCompiler, + get_user_input(tensorIndex), + tensorIndex, + *descriptor.relatedDescriptorIndex, + _config)); +} + std::vector ZeroInferRequest::get_raw_profiling_data() const { return _profilingQuery.getData(); } diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 7ada704c9969d8..a01238a899e0dc 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -15,21 +15,6 @@ #include "intel_npu/utils/zero/zero_types.hpp" #include "zero_remote_tensor.hpp" -namespace { - -template -Type extract_object(const ov::AnyMap& params, const ov::Property& p) { - auto itrHandle = params.find(p.name()); - ov::Any res = nullptr; - if (itrHandle == params.end()) { - OPENVINO_THROW("No parameter ", p.name(), " found in parameters map"); - } - res = itrHandle->second; - return res.as(); -} - -} // namespace - namespace intel_npu { Pipeline::Pipeline(const Config& config, @@ -80,7 +65,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = input_tensors.at(io_index).at(i)->data(); } else { - data = extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); } graph->set_argument_value(desc.idx, data); @@ -94,7 +79,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = input_tensors.at(io_index).at(0)->data(); } else { - data = extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); } graph->set_argument_value( @@ -112,7 +97,7 @@ Pipeline::Pipeline(const Config& config, if (remote_tensor == nullptr) { data = output_tensors.at(io_index)->data(); } else { - data = extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); + data = zeroUtils::extract_object(remote_tensor->get_properties(), ov::intel_npu::mem_handle); } graph->set_argument_value( diff --git a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp new file mode 100644 index 00000000000000..19cabfb4246e5d --- /dev/null +++ b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "zero_variable_state.hpp" + +#include "intel_npu/config/common.hpp" +#include "intel_npu/utils/zero/zero_utils.hpp" +#include "zero_remote_tensor.hpp" + +namespace intel_npu { + +ZeroVariableState::ZeroVariableState(const std::shared_ptr& init_structs, + const std::string& name, + const ov::SoPtr& tensor, + size_t tensor_index, + size_t related_tensor_index, + const Config& config) + : ov::IVariableState(name), + _init_structs(init_structs), + _tensor_index(tensor_index), + _related_tensor_index(related_tensor_index), + _logger("ZeroVariableState", config.get()) { + m_state = tensor; +} + +void ZeroVariableState::set_state(const ov::SoPtr& new_state) { + m_state = new_state; + _tensor_updated = true; + + if (_init_structs->getMutableCommandListVersion()) { + if (!is_remote_tensor(new_state._ptr)) { + if (zeroUtils::memory_was_allocated_in_the_same_l0_context(_init_structs->getContext(), + new_state->data())) { + _logger.debug("ZeroVariableState::set_state - tensor was created in the same L0 context"); + _zero_tensor_updated = true; + } + + return; + } + + _zero_tensor_updated = true; + } +} + +void ZeroVariableState::reset() { + auto remoteTensor = std::dynamic_pointer_cast(m_state._ptr); + + void* userBuffer = !remoteTensor + ? m_state->data() + : zeroUtils::extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + + std::memset(userBuffer, 0, m_state->get_byte_size()); +} + +size_t ZeroVariableState::get_tensor_index() const { + return _tensor_index; +} + +size_t ZeroVariableState::get_related_tensor_index() const { + return _related_tensor_index; +} + +bool ZeroVariableState::tensor_was_updated() const { + return _tensor_updated; +} + +void ZeroVariableState::reset_tensor_updated_flag() { + _tensor_updated = false; +} + +bool ZeroVariableState::zero_tensor_should_be_updated() const { + return _zero_tensor_updated; +} + +void ZeroVariableState::reset_zero_tensor_updated_flag() { + _zero_tensor_updated = false; +} + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp index 3c772168c0c93f..f7406413c9f197 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp @@ -167,6 +167,8 @@ class SyncInferRequest : public ov::IInferRequest { const ov::Shape& shape, const ov::Allocator& allocator = {}) const; + virtual void add_state(const IODescriptor& descriptor, const size_t tensorIndex) const; + bool is_batched_input(size_t idx) const; ov::SoPtr& get_user_input(size_t index) const; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp index acb83d5b718033..0987f2b44bbb04 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp @@ -11,12 +11,11 @@ namespace intel_npu { class VariableState final : public ov::IVariableState { public: - explicit VariableState(const std::string& name, const std::shared_ptr& tensor) - : ov::IVariableState(name) { + explicit VariableState(const std::string& name, const ov::SoPtr& tensor) : ov::IVariableState(name) { m_state = tensor; } - void set_state(const ov::SoPtr& newState) override { + virtual void set_state(const ov::SoPtr& newState) override { if (newState->get_byte_size() != m_state->get_byte_size()) { OPENVINO_THROW("Byte size mismatch"); } @@ -24,7 +23,7 @@ class VariableState final : public ov::IVariableState { std::memcpy(m_state->data(), newState->data(), newState->get_byte_size()); } - void reset() override { + virtual void reset() override { std::memset(m_state->data(), 0, m_state->get_byte_size()); } diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index 17dc6391761e5c..775113ef0d39bf 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -326,7 +326,7 @@ std::shared_ptr SyncInferRequest::allocate_tensor(const IODescripto } if (descriptor.isStateInput) { - _variableStates.push_back(std::make_shared(descriptor.nameFromCompiler, tensor)); + add_state(descriptor, index); } } else if (_userOutputTensors.at(index) == nullptr) { _userOutputTensors.at(index) = tensor; @@ -341,6 +341,11 @@ std::shared_ptr SyncInferRequest::create_tensor(ov::element::Type t return ov::make_tensor(type, shape, allocator); } +void SyncInferRequest::add_state(const IODescriptor& descriptor, const size_t tensorIndex) const { + _variableStates.push_back( + std::make_shared(descriptor.nameFromCompiler, get_user_input(tensorIndex))); +} + bool SyncInferRequest::is_batched_input(size_t idx) const { return _userInputTensors.at(idx).size() > 1; } diff --git a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp index db9dc1c9f51d34..0c2367b680851e 100644 --- a/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp +++ b/src/plugins/intel_npu/src/utils/include/intel_npu/utils/zero/zero_utils.hpp @@ -277,5 +277,32 @@ static inline std::string getLatestBuildError(ze_graph_dditable_ext_curr_t& _gra } } +template +static inline Type extract_object(const ov::AnyMap& params, const ov::Property& p) { + auto itrHandle = params.find(p.name()); + ov::Any res = nullptr; + if (itrHandle == params.end()) { + OPENVINO_THROW("No parameter ", p.name(), " found in parameters map"); + } + res = itrHandle->second; + return res.as(); +} + +static inline bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, const void* ptr) { + ze_memory_allocation_properties_t desc = {}; + desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; + auto res = intel_npu::zeMemGetAllocProperties(hContext, ptr, &desc, nullptr); + if (res == ZE_RESULT_SUCCESS) { + if (desc.id) { + if ((desc.type & ZE_MEMORY_TYPE_HOST) || (desc.type & ZE_MEMORY_TYPE_DEVICE) || + (desc.type & ZE_MEMORY_TYPE_SHARED)) { + return true; + } + } + } + + return false; +} + } // namespace zeroUtils } // namespace intel_npu diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp index f45e30bb109849..f30fa2bb1416a3 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp @@ -11,7 +11,7 @@ using namespace ov::test::behavior; -const std::vector configsInferRequestRunTests = {{ov::log::level(ov::log::Level::ERR)}}; +const std::vector configsInferRequestRunTests = {{}}; INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest, InferRequestRunTests, diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp index 31b55704757b01..ab53a442c16cda 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include "base/ov_behavior_test_utils.hpp" @@ -962,6 +963,104 @@ TEST_P(SetShapeInferRunTests, checkResultsAfterIOBlobReallocation) { } } +TEST_P(SetShapeInferRunTests, checkResultsAfterStateTensorsReallocation) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 10, 10, 10}; + auto dummy_shape = Shape{1, 50, 100, 100}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device); + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + for (auto&& state : inference_request.query_state()) { + state.reset(); + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + auto states = inference_request.query_state(); + for (auto state : states) { + auto last_state = state.get_state(); + auto last_state_size = last_state.get_size(); + auto last_state_data = static_cast(last_state.data()); + + ASSERT_TRUE(last_state_size != 0) << "State size should not be 0"; + + for (size_t i = 0; i < last_state_size; ++i) { + EXPECT_NEAR(0.0, last_state_data[i], 1e-5); + } + } + + // create dummy Tensors to force the driver to allocate memory for the initial tensor somewhere else + [[maybe_unused]] auto l0_host_dummy_tensor_0 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_1 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_2 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_3 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_4 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_5 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_6 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_7 = context.create_host_tensor(ov::element::f32, dummy_shape); + + for (auto item : inference_request.query_state()) { + auto tensor_state = item.get_state(); + auto original_shape = tensor_state.get_shape(); + OV_ASSERT_NO_THROW(tensor_state.set_shape({1, 50, 20, 20})); + OV_ASSERT_NO_THROW(tensor_state.set_shape(original_shape)); + } + + for (auto&& state : inference_request.query_state()) { + state.reset(); + } + + for (auto state : states) { + auto last_state = state.get_state(); + auto last_state_size = last_state.get_size(); + auto last_state_data = static_cast(last_state.data()); + + ASSERT_TRUE(last_state_size != 0) << "State size should not be 0"; + + for (size_t i = 0; i < last_state_size; ++i) { + last_state_data[i] = 1.0f; + } + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + for (auto state : states) { + auto last_state = state.get_state(); + auto last_state_size = last_state.get_size(); + auto last_state_data = static_cast(last_state.data()); + + ASSERT_TRUE(last_state_size != 0) << "State size should not be 0"; + + for (size_t i = 0; i < last_state_size; ++i) { + EXPECT_NEAR(input_data[i], last_state_data[i], 1e-5); + } + } +} + } // namespace behavior } // namespace test } // namespace ov diff --git a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.cpp index 870f6596dca9ce..d3e537863227e4 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.cpp @@ -10,7 +10,7 @@ using namespace ov::test::behavior; -const std::vector remoteConfigs = {{ov::log::level(ov::log::Level::ERR)}}; +const std::vector remoteConfigs = {{}}; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest, RemoteRunTests, diff --git a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp index fa58d4270889ad..c1992b3047996d 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp @@ -434,6 +434,380 @@ TEST_P(RemoteRunTests, CheckOutputDataFromTwoRunsInOutRemoteTensorsHostTensor2) 0); } +TEST_P(RemoteRunTests, checkResultsAfterChangingStateTensors) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 10, 10, 10}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device); + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + auto states = inference_request.query_state(); + + auto tensor_state = states[0].get_state(); + auto tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor0 = context.create_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor1 = context.create_host_tensor(ov::element::f32, tensor_state_shape); + + states[0].set_state(l0_host_tensor0); + states[0].reset(); + states[1].set_state(l0_host_tensor1); + states[1].reset(); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + auto tensor_size = l0_host_tensor0.get_size(); + auto state_data = static_cast(l0_host_tensor0.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor1.get_size(); + state_data = static_cast(l0_host_tensor1.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_state = states[0].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor2 = context.create_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor3 = context.create_host_tensor(ov::element::f32, tensor_state_shape); + + states[0].set_state(l0_host_tensor2); + states[1].set_state(l0_host_tensor3); + + tensor_size = l0_host_tensor2.get_size(); + state_data = static_cast(l0_host_tensor2.data()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + tensor_size = l0_host_tensor3.get_size(); + state_data = static_cast(l0_host_tensor3.data()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + tensor_size = l0_host_tensor2.get_size(); + state_data = static_cast(l0_host_tensor2.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor3.get_size(); + state_data = static_cast(l0_host_tensor3.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } +} + +TEST_P(RemoteRunTests, checkResultsAfterChangingStateTensorsWithRemoteTensors) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 2, 2, 2}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device).as(); + ; + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + auto states = inference_request.query_state(); + + auto tensor_state = states[0].get_state(); + auto tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor0 = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor1 = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + states[0].set_state(l0_host_tensor0); + states[0].reset(); + states[1].set_state(l0_host_tensor1); + states[1].reset(); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + auto tensor_size = l0_host_tensor0.get_size(); + auto state_data = static_cast(l0_host_tensor0.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor1.get_size(); + state_data = static_cast(l0_host_tensor1.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_state = states[0].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor2 = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor3 = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + states[0].set_state(l0_host_tensor2); + states[1].set_state(l0_host_tensor3); + + tensor_size = l0_host_tensor2.get_size(); + state_data = static_cast(l0_host_tensor2.get()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + tensor_size = l0_host_tensor3.get_size(); + state_data = static_cast(l0_host_tensor3.get()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + tensor_size = l0_host_tensor2.get_size(); + state_data = static_cast(l0_host_tensor2.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor3.get_size(); + state_data = static_cast(l0_host_tensor3.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } +} + +TEST_P(RemoteRunTests, checkResultsAfterChangingStateDataWithRemoteAndRandomTensors0) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 10, 10, 10}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device).as(); + ; + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + auto states = inference_request.query_state(); + + auto tensor_state = states[0].get_state(); + auto tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto byte_size = tensor_state.get_byte_size(); + float* data = new float[byte_size / sizeof(float)]; + ov::Tensor random_tensor{ov::element::f32, tensor_state_shape, data}; + + states[0].set_state(l0_host_tensor); + states[0].reset(); + states[1].set_state(random_tensor); + states[1].reset(); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + auto tensor_size = l0_host_tensor.get_size(); + auto state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor.get_size(); + state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + tensor_size = l0_host_tensor.get_size(); + state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } +} + +TEST_P(RemoteRunTests, checkResultsAfterChangingStateDataWithRemoteAndRandomTensors1) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 10, 10, 10}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device).as(); + ; + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + auto states = inference_request.query_state(); + + auto tensor_state = states[0].get_state(); + auto tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto byte_size = tensor_state.get_byte_size(); + float* data = new float[byte_size / sizeof(float)]; + ov::Tensor random_tensor{ov::element::f32, tensor_state_shape, data}; + + auto tensor_size = l0_host_tensor.get_size(); + auto state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + states[0].set_state(l0_host_tensor); + states[1].set_state(random_tensor); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + tensor_size = l0_host_tensor.get_size(); + state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + states[0].reset(); + states[1].reset(); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + tensor_size = l0_host_tensor.get_size(); + state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } +} + } // namespace behavior } // namespace test } // namespace ov diff --git a/src/plugins/intel_npu/tests/functional/common/utils.cpp b/src/plugins/intel_npu/tests/functional/common/utils.cpp index 91f78487934e38..b041e694b19ad0 100644 --- a/src/plugins/intel_npu/tests/functional/common/utils.cpp +++ b/src/plugins/intel_npu/tests/functional/common/utils.cpp @@ -7,6 +7,10 @@ #include #include "intel_npu/npu_private_properties.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/op.hpp" +#include "openvino/op/sigmoid.hpp" std::string getBackendName(const ov::Core& core) { return core.get_property("NPU", ov::intel_npu::backend_name.name()).as(); @@ -99,3 +103,32 @@ std::vector getRWMandatoryPropertiesValues(std::vector p } return props; } + +std::shared_ptr createModelWithStates(ov::element::Type type, const ov::Shape& shape) { + auto input = std::make_shared(type, shape); + auto mem_i1 = std::make_shared(type, shape, 0); + auto mem_r1 = std::make_shared(mem_i1, "r_1-3"); + auto mul1 = std::make_shared(mem_r1, input); + + auto mem_i2 = std::make_shared(type, shape, 0); + auto mem_r2 = std::make_shared(mem_i2, "c_1-3"); + auto mul2 = std::make_shared(mem_r2, mul1); + auto mem_w2 = std::make_shared(mul2, "c_1-3"); + + auto mem_w1 = std::make_shared(mul2, "r_1-3"); + auto sigm = std::make_shared(mul2); + sigm->set_friendly_name("sigmod_state"); + sigm->get_output_tensor(0).set_names({"sigmod_state"}); + mem_r1->set_friendly_name("Memory_1"); + mem_r1->get_output_tensor(0).set_names({"Memory_1"}); + mem_w1->add_control_dependency(mem_r1); + sigm->add_control_dependency(mem_w1); + + mem_r2->set_friendly_name("Memory_2"); + mem_r2->get_output_tensor(0).set_names({"Memory_2"}); + mem_w2->add_control_dependency(mem_r2); + sigm->add_control_dependency(mem_w2); + + auto function = std::make_shared(ov::NodeVector{sigm}, ov::ParameterVector{input}, "add_output"); + return function; +} diff --git a/src/plugins/intel_npu/tests/functional/common/utils.hpp b/src/plugins/intel_npu/tests/functional/common/utils.hpp index 4ad54cc016302c..40ac987bd25487 100644 --- a/src/plugins/intel_npu/tests/functional/common/utils.hpp +++ b/src/plugins/intel_npu/tests/functional/common/utils.hpp @@ -6,6 +6,7 @@ #include #include + #include "common_test_utils/unicode_utils.hpp" std::string getBackendName(const ov::Core& core); @@ -18,6 +19,8 @@ std::string removeDeviceNameOnlyID(const std::string& device_name_id); std::vector getRWMandatoryPropertiesValues(std::vector props); +std::shared_ptr createModelWithStates(ov::element::Type type, const ov::Shape& shape); + template ::value || std::is_same::value)>::type> void removeDirFilesRecursive(const std::basic_string& path) { @@ -72,6 +75,8 @@ struct GenericTestCaseNameClass { }; template -constexpr bool GenericTestCaseNameClass::hasGetTestCaseName< - T, std::void_t().getTestCaseName( - std::declval>()))>> = true; +constexpr bool + GenericTestCaseNameClass::hasGetTestCaseName().getTestCaseName( + std::declval>()))>> = + true;