Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NPU] Adding extra features for the state tensors #28414

Merged
merged 4 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,19 @@ class ZeroInferRequest final : public SyncInferRequest {
const ov::Shape& shape,
const ov::Allocator& allocator = {}) const override;

void add_state(const IODescriptor& descriptor, size_t tensorIndex) const override;

void update_pipeline_if_memory_changed();
void update_states_if_memory_changed();

const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
const std::shared_ptr<IGraph> _graph;
const Config _config;
Logger _logger;

const std::vector<ArgumentDescriptor>& _graphInputDescriptors;
const std::vector<ArgumentDescriptor>& _graphOutputDescriptors;

// A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
// memory area for the tensor.
mutable std::vector<std::vector<std::shared_ptr<ov::ITensor>>> _levelZeroInputTensors;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "intel_npu/config/config.hpp"
#include "intel_npu/utils/logger/logger.hpp"
#include "intel_npu/utils/zero/zero_init.hpp"
#include "openvino/runtime/ivariable_state.hpp"

namespace intel_npu {

/**
* @brief Interface for zero variable state implementation
* @note In case the memory was allocated in the same level zero context use that memory, otherwise use memcpy at infer
* time. Also, get correct data if remote tensor is used.
*/
class ZeroVariableState final : public ov::IVariableState {
public:
explicit ZeroVariableState(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
const std::string& name,
const ov::SoPtr<ov::ITensor>& tensor,
size_t tensor_index,
size_t related_tensor_index,
const Config& config);

void set_state(const ov::SoPtr<ov::ITensor>& new_state) override;

void reset() override;

/**
* @brief Get input tensor index used internally for the state
*/
size_t get_tensor_index() const;

/**
* @brief Get output tensor index used internally for the state
* @details The related tensors are defined by state input, state output pairs.
*/
size_t get_related_tensor_index() const;

/**
* @brief Get acknowledge if the tensor was updated
*/
bool tensor_was_updated() const;

/**
* @brief Reset tensor updated flag
*/
void reset_tensor_updated_flag();

/**
* @brief Get acknowledge if the zero tensor was updated
* @details In case the memory was allocated in the same level zero context update the zero tensor
*/
bool zero_tensor_should_be_updated() const;

/**
* @brief Reset zero tensor updated flag
*/
void reset_zero_tensor_updated_flag();

~ZeroVariableState() override = default;

private:
std::shared_ptr<ZeroInitStructsHolder> _init_structs;
size_t _tensor_index;
size_t _related_tensor_index;

bool _tensor_updated = false;
bool _zero_tensor_updated = false;

Logger _logger;
};

} // namespace intel_npu
Loading
Loading