Skip to content

Commit

Permalink
[NPU] Adding extra features for the state tensors (#28414)
Browse files Browse the repository at this point in the history
### Details:
Add the last features to the state tensors:
- Update MutableCommandList instead of memcpy if memory was allocated in
the same L0 context
- set_shape is available for state tensors as well.

### Tickets:
 - *CVS-160364*

---------

Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
  • Loading branch information
pereanub authored Jan 21, 2025
1 parent 8d5f583 commit 6aab9cc
Show file tree
Hide file tree
Showing 15 changed files with 891 additions and 135 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,19 @@ class ZeroInferRequest final : public SyncInferRequest {
const ov::Shape& shape,
const ov::Allocator& allocator = {}) const override;

void add_state(const IODescriptor& descriptor, size_t tensorIndex) const override;

void update_pipeline_if_memory_changed();
void update_states_if_memory_changed();

const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
const std::shared_ptr<IGraph> _graph;
const Config _config;
Logger _logger;

const std::vector<ArgumentDescriptor>& _graphInputDescriptors;
const std::vector<ArgumentDescriptor>& _graphOutputDescriptors;

// A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
// memory area for the tensor.
mutable std::vector<std::vector<std::shared_ptr<ov::ITensor>>> _levelZeroInputTensors;
Expand Down
77 changes: 77 additions & 0 deletions src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "intel_npu/config/config.hpp"
#include "intel_npu/utils/logger/logger.hpp"
#include "intel_npu/utils/zero/zero_init.hpp"
#include "openvino/runtime/ivariable_state.hpp"

namespace intel_npu {

/**
* @brief Interface for zero variable state implementation
* @note In case the memory was allocated in the same level zero context use that memory, otherwise use memcpy at infer
* time. Also, get correct data if remote tensor is used.
*/
class ZeroVariableState final : public ov::IVariableState {
public:
explicit ZeroVariableState(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
const std::string& name,
const ov::SoPtr<ov::ITensor>& tensor,
size_t tensor_index,
size_t related_tensor_index,
const Config& config);

void set_state(const ov::SoPtr<ov::ITensor>& new_state) override;

void reset() override;

/**
* @brief Get input tensor index used internally for the state
*/
size_t get_tensor_index() const;

/**
* @brief Get output tensor index used internally for the state
* @details The related tensors are defined by state input, state output pairs.
*/
size_t get_related_tensor_index() const;

/**
* @brief Get acknowledge if the tensor was updated
*/
bool tensor_was_updated() const;

/**
* @brief Reset tensor updated flag
*/
void reset_tensor_updated_flag();

/**
* @brief Get acknowledge if the zero tensor was updated
* @details In case the memory was allocated in the same level zero context update the zero tensor
*/
bool zero_tensor_should_be_updated() const;

/**
* @brief Reset zero tensor updated flag
*/
void reset_zero_tensor_updated_flag();

~ZeroVariableState() override = default;

private:
std::shared_ptr<ZeroInitStructsHolder> _init_structs;
size_t _tensor_index;
size_t _related_tensor_index;

bool _tensor_updated = false;
bool _zero_tensor_updated = false;

Logger _logger;
};

} // namespace intel_npu
Loading

0 comments on commit 6aab9cc

Please sign in to comment.