Skip to content

Commit

Permalink
Use friendly name instead of legacy name
Browse files Browse the repository at this point in the history
  • Loading branch information
pereanub committed Apr 17, 2024
1 parent a7c7de4 commit e9f6d99
Show file tree
Hide file tree
Showing 10 changed files with 88 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ class SyncInferRequest : public ov::IInferRequest {
std::vector<std::string> _outputAndStateOutputNames;

std::unordered_map<std::string, std::string> _nodeNameToLegacyName;
std::unordered_map<std::string, std::string> _legacyNameToNodeName;
};

} // namespace intel_npu
31 changes: 14 additions & 17 deletions src/plugins/intel_npu/src/al/src/sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,9 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const ICompiledModel>&

// Map the node names to the legacy ones used by the I/O tensors in order to allow an easier access to the tensors'
// contents
for (const auto& [legacyName, parameterDescriptor] : _metadata.parameters) {
_nodeNameToLegacyName[parameterDescriptor.currentNodeName] = legacyName;
}
for (const auto& [legacyName, resultDescriptor] : _metadata.results) {
_nodeNameToLegacyName[resultDescriptor.currentNodeName] = legacyName;
for (const auto& [name, resultDescriptor] : _metadata.results) {
_nodeNameToLegacyName[name] = resultDescriptor.legacyName;
_legacyNameToNodeName[resultDescriptor.legacyName] = name;
}

_inputAndStateInputNames = _metadata.inputNames;
Expand All @@ -52,8 +50,12 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const ICompiledModel>&
if (contains(_inputAndStateInputNames, shapeName)) {
_inputAndStateInputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName);
}
if (contains(_outputAndStateOutputNames, shapeName)) {
_outputAndStateOutputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName);

const auto& shapeNameMatch = _legacyNameToNodeName.find(shapeName);
if (shapeNameMatch != _legacyNameToNodeName.end()) {
if (contains(_outputAndStateOutputNames, shapeNameMatch->second)) {
_outputAndStateOutputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName);
}
}
}
}
Expand Down Expand Up @@ -87,10 +89,7 @@ std::vector<ov::SoPtr<ov::IVariableState>> SyncInferRequest::query_state() const
}

ov::SoPtr<ov::ITensor> SyncInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
const auto& nodeNameMatch = _nodeNameToLegacyName.find(port.get_node()->get_friendly_name());
OPENVINO_ASSERT(nodeNameMatch != _nodeNameToLegacyName.end(), "Cannot find tensor for port ", port);

return _allTensors.at(nodeNameMatch->second);
return _allTensors.at(port.get_node()->get_friendly_name());
}

void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const ov::SoPtr<ov::ITensor>& tensor) {
Expand All @@ -101,8 +100,7 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
OPENVINO_THROW("Failed to set tensor. ", ex.what());
}

const std::string& legacyName = _nodeNameToLegacyName.at(port.get_node()->get_friendly_name());
_allTensors[legacyName] = tensor._ptr;
_allTensors[port.get_node()->get_friendly_name()] = tensor._ptr;
}

std::vector<ov::SoPtr<ov::ITensor>> SyncInferRequest::get_tensors(const ov::Output<const ov::Node>& /*port*/) const {
Expand Down Expand Up @@ -155,14 +153,12 @@ void SyncInferRequest::check_tensor(const ov::Output<const ov::Node>& port,
void SyncInferRequest::check_tensors() const {
const auto& inputs = _compiledModel->inputs();
for (size_t i = 0; i < inputs.size(); i++) {
const std::string& legacyName = _nodeNameToLegacyName.at(inputs[i].get_node()->get_friendly_name());
check_tensor(inputs[i], _allTensors.at(legacyName));
check_tensor(inputs[i], _allTensors.at(inputs[i].get_node()->get_friendly_name()));
}

const auto& outputs = _compiledModel->outputs();
for (size_t i = 0; i < outputs.size(); i++) {
const std::string& legacyName = _nodeNameToLegacyName.at(outputs[i].get_node()->get_friendly_name());
check_tensor(outputs[i], _allTensors.at(legacyName));
check_tensor(outputs[i], _allTensors.at(outputs[i].get_node()->get_friendly_name()));
}
}

Expand All @@ -184,6 +180,7 @@ void SyncInferRequest::allocate_tensor(std::string tensorName,
_shapesTensors[tensorName] = tensor;
tensorName = SHAPE_TENSOR_PREFIX + tensorName;
}

if (tensorType == TensorType::State) {
_variableStates[tensorName] = std::make_shared<VariableState>(tensorName, tensor);

Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_npu/src/backend/include/zero_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ZeroExecutor final : public IExecutor {
~ZeroExecutor() override;

struct ArgumentDescriptor {
ze_graph_argument_properties_t info;
ze_graph_argument_properties_3_t info;
uint32_t idx;
};

Expand Down Expand Up @@ -67,6 +67,7 @@ class ZeroExecutor final : public IExecutor {

ze_graph_handle_t _graph = nullptr;
ze_graph_properties_t _props{};

std::unordered_map<std::string, ArgumentDescriptor> _inputs_desc_map;
std::unordered_map<std::string, ArgumentDescriptor> _outputs_desc_map;

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/backend/include/zero_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class HostMemAllocator final {
struct MemoryManagementUnit {
MemoryManagementUnit() = default;

void appendArgument(const std::string& name, const ze_graph_argument_properties_t& argument);
void appendArgument(const std::string& name, const std::size_t argSize);
/* Allocate Device memories */
void allocate(const ze_device_handle_t device_handle, const ze_context_handle_t context);

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/backend/include/zero_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ static inline std::size_t layoutCount(const ze_graph_argument_layout_t val) {
}
}

static inline std::size_t getSizeIOBytes(const ze_graph_argument_properties_t& argument) {
static inline std::size_t getSizeIOBytes(const ze_graph_argument_properties_3_t& argument) {
std::size_t num_elements = 1;
for (std::size_t i = 0; i < layoutCount(argument.deviceLayout); ++i) {
num_elements *= argument.dims[i];
Expand Down
39 changes: 29 additions & 10 deletions src/plugins/intel_npu/src/backend/src/zero_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "intel_npu/al/config/common.hpp"
#include "intel_npu/al/itt.hpp"
#include "intel_npu/al/prefix.hpp"
#include "zero_device.hpp"
#include "zero_utils.hpp"

Expand Down Expand Up @@ -55,11 +56,9 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
_config,
_group_ordinal);
Fence fence(graph_command_queue, _config);
ze_device_properties_t properties = {};
properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
zeroUtils::throwOnFail("zeDeviceGetProperties", zeDeviceGetProperties(_initStructs->getDevice(), &properties));

OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate");

ze_graph_desc_t desc{ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES,
nullptr,
ZE_GRAPH_FORMAT_NATIVE,
Expand All @@ -73,17 +72,37 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties");
zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext->pfnGetProperties(_graph, &_props));

OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties");
auto targetDriverExtVersion = _initStructs->getDriverExtVersion();
if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) {
OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please "
"update the driver version");
}

OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3");
for (uint32_t index = 0; index < _props.numGraphArgs; ++index) {
ze_graph_argument_properties_t arg;
zeroUtils::throwOnFail("pfnGetArgumentProperties",
_graph_ddi_table_ext->pfnGetArgumentProperties(_graph, index, &arg));
if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg.type) {
_inputs_desc_map.emplace(std::make_pair(std::string(arg.name), ArgumentDescriptor{arg, index}));
ze_graph_argument_properties_3_t arg3;
zeroUtils::throwOnFail("pfnGetArgumentProperties3",
_graph_ddi_table_ext->pfnGetArgumentProperties3(_graph, index, &arg3));

if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg3.type) {
if (isStateInputName(arg3.name) || isShapeTensorName(arg3.name)) {
_inputs_desc_map.emplace(std::make_pair(std::string(arg3.name), ArgumentDescriptor{arg3, index}));

} else {
_inputs_desc_map.emplace(
std::make_pair(std::string(arg3.debug_friendly_name), ArgumentDescriptor{arg3, index}));
}
} else {
_outputs_desc_map.emplace(std::make_pair(std::string(arg.name), ArgumentDescriptor{arg, index}));
if (isStateOutputName(arg3.name) || isShapeTensorName(arg3.name)) {
_outputs_desc_map.emplace(std::make_pair(std::string(arg3.name), ArgumentDescriptor{arg3, index}));

} else {
_outputs_desc_map.emplace(
std::make_pair(std::string(arg3.debug_friendly_name), ArgumentDescriptor{arg3, index}));
}
}
}

OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize");
graph_command_list.appendGraphInitialize(_graph);
graph_command_list.close();
Expand Down
52 changes: 35 additions & 17 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,13 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
executorInputDescriptors.at(shapeBufferName),
shapeBufferName);

auto allocator = zeroMemory::HostMemAllocator(backendPtr);
ov::Allocator allocator;
if (properties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) {
allocator = zeroMemory::HostMemAllocator(backendPtr, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
} else {
allocator = zeroMemory::HostMemAllocator(backendPtr);
}

allocate_tensor(inputName, shapeDescriptor, TensorType::Shape, allocator);
}
}
Expand All @@ -134,16 +140,25 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&

allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, allocator);

if (contains(_metadata.shapeNames, outputName)) {
const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + outputName;
const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(outputName);
const auto& shapeNameMatch = _nodeNameToLegacyName.find(outputName);
if (shapeNameMatch != _nodeNameToLegacyName.end()) {
if (contains(_metadata.shapeNames, shapeNameMatch->second)) {
const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + shapeNameMatch->second;
const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second);

check_level_zero_attributes_match(shapeDescriptor,
executorOutputDescriptors.at(shapeBufferName),
shapeBufferName);
check_level_zero_attributes_match(shapeDescriptor,
executorOutputDescriptors.at(shapeBufferName),
shapeBufferName);

auto allocator = zeroMemory::HostMemAllocator(backendPtr);
allocate_tensor(outputName, shapeDescriptor, TensorType::Shape, allocator);
ov::Allocator allocator;
if (properties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) {
allocator = zeroMemory::HostMemAllocator(backendPtr, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
} else {
allocator = zeroMemory::HostMemAllocator(backendPtr);
}

allocate_tensor(shapeNameMatch->second, shapeDescriptor, TensorType::Shape, allocator);
}
}
}

Expand Down Expand Up @@ -226,15 +241,18 @@ void ZeroInferRequest::get_result() {

if (isShapeTensorName(name)) {
const auto actualTensorName = name.substr(SHAPE_TENSOR_PREFIX.size());
ov::Shape actualDims;
actualDims.reserve(outputTensor->get_size());

for (size_t i = 0; i < outputTensor->get_size(); ++i) {
const auto reverseIdx = outputTensor->get_size() - 1 - i;
actualDims.push_back(outputTensor->data<uint32_t>()[reverseIdx]);
const auto& shapeNameMatch = _legacyNameToNodeName.find(actualTensorName);
if (shapeNameMatch != _legacyNameToNodeName.end()) {
ov::Shape actualDims;
actualDims.reserve(outputTensor->get_size());

for (size_t i = 0; i < outputTensor->get_size(); ++i) {
const auto reverseIdx = outputTensor->get_size() - 1 - i;
actualDims.push_back(outputTensor->data<uint32_t>()[reverseIdx]);
}
auto& tensorToBeReshaped = _allTensors.at(shapeNameMatch->second);
tensorToBeReshaped->set_shape(actualDims);
}
auto& tensorToBeReshaped = _allTensors.at(actualTensorName);
tensorToBeReshaped->set_shape(actualDims);
}

uint8_t* tensorBuffer = reinterpret_cast<uint8_t*>(outputTensor->data());
Expand Down
3 changes: 1 addition & 2 deletions src/plugins/intel_npu/src/backend/src/zero_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,9 @@ bool HostMemAllocator::is_equal(const HostMemAllocator& other) const {
return other._data != nullptr && _data != nullptr && other._data == _data;
}

void MemoryManagementUnit::appendArgument(const std::string& name, const ze_graph_argument_properties_t& argument) {
void MemoryManagementUnit::appendArgument(const std::string& name, const std::size_t argSize) {
_offsets.emplace(std::make_pair(name, _size));

const std::size_t argSize = zeroUtils::getSizeIOBytes(argument);
_size += argSize + alignment -
(argSize % alignment); // is this really necessary? if 0==argSize%alignment -> add 1 * alignment
}
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct DiscretePipeline final : public Pipeline {

OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::DiscretePipeline::DiscretePipeline");
for (const auto& desc : executor->inputs_desc_map()) {
_deviceInputs.appendArgument(desc.first, desc.second.info);
_deviceInputs.appendArgument(desc.first, zeroUtils::getSizeIOBytes(desc.second.info));
}
_deviceInputs.allocate(device_handle, context);

Expand All @@ -61,7 +61,7 @@ struct DiscretePipeline final : public Pipeline {
_event[stage::UPLOAD].AppendSignalEvent(_command_list[stage::UPLOAD]);

for (const auto& desc : executor->outputs_desc_map()) {
_deviceOutputs.appendArgument(desc.first, desc.second.info);
_deviceOutputs.appendArgument(desc.first, zeroUtils::getSizeIOBytes(desc.second.info));
}
_deviceOutputs.allocate(device_handle, context);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -993,8 +993,8 @@ static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors,
}
const std::string& legacyName = arg.name;

names.push_back(legacyName);
nodeDescriptors[legacyName] =
names.push_back(arg.debug_friendly_name);
nodeDescriptors[arg.debug_friendly_name] =
{legacyName, arg.debug_friendly_name, std::move(outputTensorNames), precision, shape, shape};
}

Expand Down

0 comments on commit e9f6d99

Please sign in to comment.