Skip to content

Commit

Permalink
Update code according to review
Browse files Browse the repository at this point in the history
  • Loading branch information
pereanub committed Apr 23, 2024
1 parent 2314468 commit c99a0a9
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 62 deletions.
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/al/include/npu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class IEngineBackend : public std::enable_shared_from_this<IEngineBackend> {
/** @brief Get name of backend */
virtual const std::string getName() const = 0;
/** @brief Backend has support for concurrency batching */
virtual bool backendSupportBatching() const = 0;
virtual bool isBatchingSupported() const = 0;
/** @brief Register backend-specific options */
virtual void registerOptions(OptionsDesc& options) const;

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/backend/include/zero_backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class ZeroEngineBackend final : public IEngineBackend {
uint32_t getDriverVersion() const override;
uint32_t getDriverExtVersion() const override;

bool backendSupportBatching() const override;
bool isBatchingSupported() const override;

private:
std::shared_ptr<ZeroInitStructsHolder> _instance;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ class ZeroInferRequest final : public SyncInferRequest {
const Config _config;
Logger _logger;

zeroProfiling::ProfilingPool _profiling_pool;
zeroProfiling::ProfilingQuery _profiling_query;
std::shared_ptr<zeroProfiling::NpuInferProfiling> _npu_profiling;
zeroProfiling::ProfilingPool _profilingPool;
zeroProfiling::ProfilingQuery _profilingQuery;
std::shared_ptr<zeroProfiling::NpuInferProfiling> _npuProfiling;
std::unique_ptr<Pipeline> _pipeline;

// If batching is handled on the compiler side then batching on the plugin shall be set to 1, we don't do any
// specific operations on the plugin in this case.
size_t _batch_size = 1;
size_t _batchSize = 1;
};

} // namespace intel_npu
6 changes: 3 additions & 3 deletions src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ struct Pipeline {
Pipeline& operator=(Pipeline&&) = delete;
virtual ~Pipeline() = default;

virtual void push(const size_t batch_index) = 0;
virtual void pull(const size_t batch_index) = 0;
virtual void reset(const size_t batch_index) const = 0;
virtual void push(size_t batch_index) = 0;
virtual void pull(size_t batch_index) = 0;
virtual void reset(size_t batch_index) const = 0;

protected:
zeroMemory::MemoryManagementUnit _deviceInputs;
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/backend/src/zero_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ uint32_t ZeroEngineBackend::getDriverExtVersion() const {
return _instance->getDriverExtVersion();
}

bool ZeroEngineBackend::backendSupportBatching() const {
bool ZeroEngineBackend::isBatchingSupported() const {
if (_instance->getDriverExtVersion() < ZE_GRAPH_EXT_VERSION_1_6) {
return false;
}
Expand Down
58 changes: 32 additions & 26 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ size_t get_batch_size_for_node(const IONodeDescriptor& nodeDescriptor,
return 1;
}

size_t get_batch_size(
std::optional<size_t> get_batch_size(
const NetworkMetadata& metadata,
const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors,
const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors) {
Expand All @@ -96,7 +96,11 @@ size_t get_batch_size(
}

auto it = batch_size.begin();
return *it;
if (*it) {
return *it;
}

return std::nullopt;
}

} // namespace
Expand All @@ -111,22 +115,20 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
_executor(static_cast<const ZeroExecutor*>(_executorPtr.get())),
_config(config),
_logger("ZeroInferRequest", config.get<LOG_LEVEL>()),
_profiling_pool(_executor->graph(),
zeroProfiling::POOL_SIZE,
_executor->getInitStructs()->getProfilingDdiTable()),
_profiling_query(0,
_executor->getInitStructs()->getDevice(),
_executor->getInitStructs()->getProfilingDdiTable()) {
_profilingPool(_executor->graph(), zeroProfiling::POOL_SIZE, _executor->getInitStructs()->getProfilingDdiTable()),
_profilingQuery(0,
_executor->getInitStructs()->getDevice(),
_executor->getInitStructs()->getProfilingDdiTable()) {
const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorInputDescriptors =
_executor->inputs_desc_map();
const std::unordered_map<std::string, ZeroExecutor::ArgumentDescriptor>& executorOutputDescriptors =
_executor->outputs_desc_map();

auto proftype = config.get<PROFILING_TYPE>();
if (proftype == ov::intel_npu::ProfilingType::INFER) {
_npu_profiling = std::make_shared<zeroProfiling::NpuInferProfiling>(_executor->getInitStructs()->getContext(),
_executor->getInitStructs()->getDevice(),
_config.get<LOG_LEVEL>());
_npuProfiling = std::make_shared<zeroProfiling::NpuInferProfiling>(_executor->getInitStructs()->getContext(),
_executor->getInitStructs()->getDevice(),
_config.get<LOG_LEVEL>());
}

ze_device_properties_t properties = {};
Expand All @@ -142,7 +144,11 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&

if (config.get<BATCH_MODE>() != ov::intel_npu::BatchMode::COMPILER) {
try {
_batch_size = get_batch_size(_metadata, executorInputDescriptors, executorOutputDescriptors);
auto batchSize = get_batch_size(_metadata, executorInputDescriptors, executorOutputDescriptors);

if (batchSize.has_value()) {
_batchSize = *batchSize;
}
} catch (const std::exception& ex) {
_logger.info("Got an error when checking the batch size: \n%s", ex.what());
}
Expand All @@ -165,8 +171,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&

// When batching is handled by the plugin we need to modify transposed shape with the original batch size since
// it will be forced to 1 at the compilation time
if (_batch_size > 1) {
parameterDescriptor.transposedShape[0] = _batch_size;
if (_batchSize > 1) {
parameterDescriptor.transposedShape[0] = _batchSize;
}

// The I/O buffers already allocated using the Level Zero API are being reused here
Expand Down Expand Up @@ -194,8 +200,8 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&

// When batching is handled by the plugin we need to modify transposed shape with the original batch size since
// it will be forced to 1 at the compilation time
if (_batch_size > 1) {
resultDescriptor.transposedShape[0] = _batch_size;
if (_batchSize > 1) {
resultDescriptor.transposedShape[0] = _batchSize;
}

allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, allocator);
Expand Down Expand Up @@ -242,11 +248,11 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
/// Construct pipepline
_pipeline = makePipeline(_executorPtr,
_config,
_profiling_pool,
_profiling_query,
_npu_profiling,
_profilingPool,
_profilingQuery,
_npuProfiling,
_copyAllTensors,
_batch_size);
_batchSize);
}

void ZeroInferRequest::infer() {
Expand Down Expand Up @@ -284,15 +290,15 @@ void ZeroInferRequest::infer_async() {
}
}

for (size_t i = 0; i < _batch_size; i++) {
for (size_t i = 0; i < _batchSize; i++) {
_pipeline->push(i);
}
}

void ZeroInferRequest::get_result() {
OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_result");

for (size_t i = 0; i < _batch_size; i++) {
for (size_t i = 0; i < _batchSize; i++) {
_pipeline->pull(i);
}

Expand Down Expand Up @@ -328,7 +334,7 @@ void ZeroInferRequest::get_result() {
}
}

for (size_t i = 0; i < _batch_size; i++) {
for (size_t i = 0; i < _batchSize; i++) {
_pipeline->reset(i);
}
_logger.debug("InferRequest::get_result finished");
Expand Down Expand Up @@ -381,13 +387,13 @@ std::vector<ov::ProfilingInfo> ZeroInferRequest::get_profiling_info() const {
} else {
auto proftype = _config.get<PROFILING_TYPE>();
if (proftype == ov::intel_npu::ProfilingType::INFER) {
return _npu_profiling->getNpuInferStatistics();
return _npuProfiling->getNpuInferStatistics();
} else { /// proftype = MODEL or undefined = fallback to model profiling
return _profiling_query.getLayerStatistics();
return _profilingQuery.getLayerStatistics();
}
}
}

std::vector<uint8_t> ZeroInferRequest::get_raw_profiling_data() const {
return _profiling_query.getData<uint8_t>();
return _profilingQuery.getData<uint8_t>();
}
12 changes: 6 additions & 6 deletions src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ struct DiscretePipeline final : public Pipeline {
DiscretePipeline& operator=(const DiscretePipeline&) = delete;
virtual ~DiscretePipeline() = default;

void push(const size_t) override {
void push(size_t) override {
OV_ITT_TASK_CHAIN(ZERO_INFER_REQUEST_DP_PUSH,
itt::domains::LevelZeroBackend,
"DiscretePipeline::push",
Expand All @@ -107,7 +107,7 @@ struct DiscretePipeline final : public Pipeline {
_command_queues[stage::EXECUTE]->executeCommandList(_command_list[stage::EXECUTE], _fence[stage::EXECUTE]);
};

void pull(const size_t) override {
void pull(size_t) override {
OV_ITT_TASK_CHAIN(ZERO_INFER_REQUEST_DP_PULL,
itt::domains::LevelZeroBackend,
"DiscretePipeline::pull",
Expand All @@ -122,7 +122,7 @@ struct DiscretePipeline final : public Pipeline {
_fence[stage::READBACK].hostSynchronize();
};

void reset(const size_t) const override {
void reset(size_t) const override {
// Reset the fence objects
for (auto& fence : _fence) {
fence.reset();
Expand Down Expand Up @@ -215,7 +215,7 @@ struct IntegratedPipeline final : public Pipeline {
IntegratedPipeline& operator=(const IntegratedPipeline&) = delete;
virtual ~IntegratedPipeline() = default;

void push(const size_t batch_index) override {
void push(size_t batch_index) override {
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "IntegratedPipeline", "push");
if (sync_output_with_fences_) {
_command_queue.executeCommandList(*_command_lists.at(batch_index), *_fences.at(batch_index));
Expand All @@ -224,7 +224,7 @@ struct IntegratedPipeline final : public Pipeline {
}
};

void pull(const size_t batch_index) override {
void pull(size_t batch_index) override {
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, itt::domains::LevelZeroBackend, "IntegratedPipeline", "pull");
if (sync_output_with_fences_) {
_fences.at(batch_index)->hostSynchronize();
Expand All @@ -237,7 +237,7 @@ struct IntegratedPipeline final : public Pipeline {
}
};

void reset(const size_t batch_index) const override {
void reset(size_t batch_index) const override {
if (sync_output_with_fences_) {
_fences.at(batch_index)->reset();
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/include/backends.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class NPUBackends final {
std::string getBackendName() const;
uint32_t getDriverVersion() const;
uint32_t getDriverExtVersion() const;
bool backendSupportBatching() const;
bool isBatchingSupported() const;
void registerOptions(OptionsDesc& options) const;
std::string getCompilationPlatform(const std::string_view platform, const std::string& deviceId) const;

Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_npu/src/plugin/src/backends.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,9 @@ uint32_t NPUBackends::getDriverExtVersion() const {
OPENVINO_THROW("No available backend");
}

bool NPUBackends::backendSupportBatching() const {
bool NPUBackends::isBatchingSupported() const {
if (_backend != nullptr) {
return _backend->backendSupportBatching();
return _backend->isBatchingSupported();
}

OPENVINO_THROW("No available backend");
Expand Down
32 changes: 15 additions & 17 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,20 @@ std::shared_ptr<ov::Model> create_dummy_model(const IONodeDescriptorMap& paramet
const IONodeDescriptorMap& resultDescriptors,
const std::vector<std::string>& inputNames,
const std::vector<std::string>& outputNames,
const Config& config) {
bool isBatchingSupported) {
ov::ParameterVector parameters;
ov::NodeVector results;

const auto& batchMode = config.get<BATCH_MODE>();

for (const std::string& inputName : inputNames) {
const IONodeDescriptor& parameterDescriptor = parameterDescriptors.at(inputName);

std::shared_ptr<ov::op::v0::Parameter> parameter = [&] {
if (batchMode != ov::intel_npu::BatchMode::COMPILER) {
if (isBatchingSupported) {
return std::make_shared<ov::op::v0::Parameter>(parameterDescriptor.precision,
parameterDescriptor.originalShape);
} else {
return std::make_shared<ov::op::v0::Parameter>(parameterDescriptor.precision,
parameterDescriptor.transposedShape);
}
return std::make_shared<ov::op::v0::Parameter>(parameterDescriptor.precision,
parameterDescriptor.transposedShape);
}();

parameter->set_friendly_name(parameterDescriptor.currentNodeName);
Expand All @@ -79,15 +76,14 @@ std::shared_ptr<ov::Model> create_dummy_model(const IONodeDescriptorMap& paramet
constantDummy->set_friendly_name(resultDescriptor.legacyName);

const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = [&] {
if (batchMode != ov::intel_npu::BatchMode::COMPILER) {
if (isBatchingSupported) {
return std::make_shared<ov::descriptor::Tensor>(resultDescriptor.precision,
resultDescriptor.originalShape,
resultDescriptor.outputTensorNames);
} else {
return std::make_shared<ov::descriptor::Tensor>(resultDescriptor.precision,
resultDescriptor.transposedShape,
resultDescriptor.outputTensorNames);
}
return std::make_shared<ov::descriptor::Tensor>(resultDescriptor.precision,
resultDescriptor.transposedShape,
resultDescriptor.outputTensorNames);
}();

std::shared_ptr<ov::Node> result = std::make_shared<ov::op::v0::Result>(constantDummy);
Expand All @@ -99,8 +95,8 @@ std::shared_ptr<ov::Model> create_dummy_model(const IONodeDescriptorMap& paramet
return std::make_shared<ov::Model>(results, parameters);
}

void set_batch_config(std::shared_ptr<NPUBackends> backends, Config& config) {
if (!backends->backendSupportBatching() || config.get<PLATFORM>() == ov::intel_npu::Platform::NPU3700) {
void set_batch_config(bool isBatchingSupported, Config& config) {
if (!isBatchingSupported || config.get<PLATFORM>() == ov::intel_npu::Platform::NPU3700) {
if (config.has<BATCH_MODE>() && config.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
OPENVINO_THROW("Batching on plugin is not supported with this driver version");
}
Expand Down Expand Up @@ -523,7 +519,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
localConfig.update({{ov::intel_npu::platform.name(), platform}});

set_batch_config(_backends, localConfig);
auto isBatchingSupported = _backends->isBatchingSupported();
set_batch_config(isBatchingSupported, localConfig);

if (model->get_variables().size()) {
if (localConfig.get<BATCH_MODE>() == ov::intel_npu::BatchMode::PLUGIN) {
Expand Down Expand Up @@ -616,7 +613,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
localConfig.update({{ov::intel_npu::platform.name(), platform}});
auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());

set_batch_config(_backends, localConfig);
auto isBatchingSupported = _backends->isBatchingSupported();
set_batch_config(isBatchingSupported, localConfig);

Logger logger("NPUPlugin", localConfig.get<LOG_LEVEL>());

Expand All @@ -643,7 +641,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
meta.name = "net" + std::to_string(_compiledModelLoadCounter++);

const std::shared_ptr<ov::Model> modelDummy =
create_dummy_model(meta.parameters, meta.results, meta.inputNames, meta.outputNames, localConfig);
create_dummy_model(meta.parameters, meta.results, meta.inputNames, meta.outputNames, isBatchingSupported);

bool profiling = localConfig.get<PERF_COUNT>();

Expand Down

0 comments on commit c99a0a9

Please sign in to comment.