From 11a51151e137566a4bbda24e74571f9e4879bcfc Mon Sep 17 00:00:00 2001 From: Egor Duplensky Date: Mon, 25 Nov 2024 13:23:57 +0100 Subject: [PATCH] Apply review comments 1 --- src/plugins/intel_cpu/src/edge.cpp | 6 +- src/plugins/intel_cpu/src/edge.h | 2 +- src/plugins/intel_cpu/src/graph.cpp | 65 ++++++++----------- src/plugins/intel_cpu/src/graph.h | 8 +-- src/plugins/intel_cpu/src/memory_control.hpp | 2 +- src/plugins/intel_cpu/src/node.h | 8 +-- src/plugins/intel_cpu/src/nodes/composite.cpp | 6 +- src/plugins/intel_cpu/src/nodes/lora.cpp | 2 +- .../functional/cmake/target_per_test.cmake | 1 - 9 files changed, 42 insertions(+), 58 deletions(-) diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp index 82cf2ea4c631dd..1e0051a683f0b3 100644 --- a/src/plugins/intel_cpu/src/edge.cpp +++ b/src/plugins/intel_cpu/src/edge.cpp @@ -411,10 +411,10 @@ const MemoryDesc& Edge::getOutputDesc() const { const MemoryDesc& Edge::getDesc() const { OPENVINO_ASSERT(!one_of(status, Status::Validated, Status::Allocated), - "Desc of an Allocated edge ", name(), " must be accessed through the memory object"); + "Desc of an Allocated edge ", *this, " must be accessed through the memory object"); if (!getInputDesc().isCompatible(getOutputDesc())) - OPENVINO_THROW("Cannot get descriptor for edge: ", getParent()->getName(), "->", getChild()->getName()); + OPENVINO_THROW("Cannot get descriptor for edge: ", *this); return getInputDesc(); } @@ -443,7 +443,7 @@ void Edge::validate() { getChild(); if (status != Status::Allocated || !memoryPtr) { - OPENVINO_THROW("Error memory is not allocated for edge: ", name()); + OPENVINO_THROW("Error memory is not allocated for edge: ", *this); } status = Status::Validated; } diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index e42c0a7aaf51af..4d34c915dc53d8 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -31,7 +31,7 @@ class Edge { enum class Status { Uninitialized, // base edge is unknown yet NeedAllocation, // edge is the base edge - NotAllocated, // edge is a referencing edge + NotAllocated, // edge references another edge Allocated, // edge memory is allocated Validated // edge is validated }; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 9937345f3f2f0f..fe68360eb30a6e 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -717,25 +717,6 @@ void Graph::ResolveComplexInplaceConflicts() { */ static size_t AllocateStringsAndConstants(EdgeClusters& clusters, const GraphContext::CPtr context) { - auto allocateStringMemory = [context](const EdgePtr& edge) { - if (edge->getParent()->isConstant()) { - if (edge->getParent()->getType() == Type::Input) { - auto constNode = static_cast(edge->getParent().get()); - edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); - } else { - edge->externalAllocate(context->getWeightsCache()); - } - auto stringMemory = dynamic_cast(edge->getMemoryPtr().get()); - OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '", - edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory."); - return stringMemory->getStringMemoryBlockPtr(); - } - - auto memory = std::make_shared(context->getEngine(), edge->getDesc()); - edge->reuse(memory); - return memory->getStringMemoryBlockPtr(); - }; - auto allocateConstantEdge = [context](const EdgePtr& edge) { if (edge->getParent()->getType() == Type::Input) { auto constNode = std::static_pointer_cast(edge->getParent()); @@ -745,6 +726,12 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters, } }; + auto allocateStringMemory = [context](const EdgePtr& edge) { + auto memory = std::make_shared(context->getEngine(), edge->getDesc()); + edge->reuse(memory); + return memory->getStringMemoryBlockPtr(); + }; + auto notAllocatedPartitionEnd = std::partition(clusters.begin(), clusters.end(), [&allocateStringMemory, &allocateConstantEdge, &context](const EdgeCluster& cluster) { @@ -766,7 +753,18 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters, return true; } - // Allocate a string cluster + // Allocate a cluster of the constants + if (baseEdge->getParent()->isConstant()) { + // @todo can we add some meaningful assert here? + for (auto &edge : cluster) { + if (edge->getParent()->isConstant() && edge->getStatus() == Edge::Status::NeedAllocation) { + allocateConstantEdge(edge); + } + } + return false; + } + + // Allocate a non-constant string cluster if (baseEdge->getDesc().getPrecision() == element::string) { OPENVINO_ASSERT(std::all_of(cluster.begin(), cluster.end(), [](const EdgePtr& edge) { @@ -781,17 +779,6 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters, return false; } - // Allocate a constant cluster - if (baseEdge->getParent()->isConstant()) { - // @todo can we add some meaningful assert here? - for (auto &edge : cluster) { - if (edge->getParent()->isConstant() && edge->getStatus() == Edge::Status::NeedAllocation) { - allocateConstantEdge(edge); - } - } - return false; - } - return true; }); @@ -932,13 +919,13 @@ static void ValidateEdgeStatus(const std::vector& edges) { /** * Forms clusters of edges. - * An edge cluster is a collection of edges, so: + * An edge cluster is a collection of edges, with the following properties: * - base edge is an edge with a Memory which other edges point to by means of inplace logic * - first edge of a cluster is a base edge with a status either NeedAllocation or Allocated - * - rest of the edges in a cluster are NotAllocated ones, since they point to their base edge + * - rest of the edges in a cluster are NotAllocated ones, since they point to another edge */ static EdgeClusters FormEdgeClusters(const std::vector& graphEdges) { - typedef std::unordered_map EdgeClusterIdxMap; + using EdgeClusterIdxMap = std::unordered_map; EdgeClusters edgeClusters; EdgeClusterIdxMap edgeClusterIndices; @@ -1058,10 +1045,10 @@ static MemoryRegions FormMemoryRegions(const EdgeClusters& clusters, return memoryRegions; } -static OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegions, - const EdgeClusters& clusters, - const std::map& outputNodes) { - OutputMemoryBlocks outputMemBlocks; +static Graph::OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegions, + const EdgeClusters& clusters, + const std::map& outputNodes) { + Graph::OutputMemoryBlocks outputMemBlocks; memoryRegions.erase(std::remove_if(memoryRegions.begin(), memoryRegions.end(), [&](const MemoryRegion& region) { if (region.size >= 0 || !one_of(region.type, MemoryRegion::RegionType::OUTPUT, MemoryRegion::RegionType::IO)) { return false; @@ -1103,7 +1090,7 @@ static OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegio * 1) EdgeClusters - to propagate the solution through the graph * 2) OutputMemoryBlocks - to allow memory sharing between graph and infer request */ -static std::tuple +static std::tuple SolveMemoryReuse(const std::shared_ptr& memoryControl, const AllocationContext& allocationContext, const GraphContext::CPtr graphContext, diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 5d4805c15fb52c..d73cf6e6f3bdb6 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -30,11 +30,10 @@ namespace node { class MemoryStateNode; } // namespace node -using OutputMemoryBlocks = std::unordered_map; - class Graph { public: - typedef std::shared_ptr Ptr; + using Ptr = std::shared_ptr; + using OutputMemoryBlocks = std::unordered_map; enum class Status { NotReady = 0, @@ -237,8 +236,6 @@ class Graph { void Activate(const std::vector& externalInputMemory = {}, const std::vector& externalOutputMemory = {}); - void Allocate(); - /** * Register the graph in the global allocation context by transforming * local execution data into the global one: @@ -290,6 +287,7 @@ class Graph { const std::vector& outputConfigs = {}); void Configure(bool optimize = true); + void Allocate(); void InitNodes(); void InitDescriptors(); diff --git a/src/plugins/intel_cpu/src/memory_control.hpp b/src/plugins/intel_cpu/src/memory_control.hpp index 23f65a0fe3f326..9a6d14d82ecaff 100644 --- a/src/plugins/intel_cpu/src/memory_control.hpp +++ b/src/plugins/intel_cpu/src/memory_control.hpp @@ -48,7 +48,7 @@ class MemoryControl { void releaseMemory(); private: - explicit MemoryControl(); + MemoryControl(); void insert(const MemoryRegion& region, const std::vector& syncInds); friend class NetworkMemoryControl; diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 88d69fe39a9d20..78e47ed60c816a 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -114,7 +114,6 @@ class NodeDesc { bool hasZeroInputDims() const { const auto& inputConfigs = getConfig().inConfs; - return std::any_of(inputConfigs.begin(), inputConfigs.end(), [](const PortConfig& portConfig) { return portConfig.hasZeroDims(); }); @@ -122,13 +121,13 @@ class NodeDesc { bool hasZeroInputDimsAtPort(size_t portIdx) const { const auto& inputConfigs = getConfig().inConfs; - OPENVINO_ASSERT("Attempt to get NodeDesc input configuration for port " , portIdx, ". Number of inputs is ", inputConfigs.size()); + OPENVINO_ASSERT(portIdx < inputConfigs.size(), "Attempt to get NodeDesc input configuration for port ", + portIdx, ". Number of inputs is ", inputConfigs.size()); return inputConfigs[portIdx].hasZeroDims(); } bool hasZeroOutputDims() const { const auto& outputConfigs = getConfig().outConfs; - return std::any_of(outputConfigs.begin(), outputConfigs.end(), [](const PortConfig& portConfig) { return portConfig.hasZeroDims(); }); @@ -136,7 +135,8 @@ class NodeDesc { bool hasZeroOutputDimsAtPort(size_t portIdx) const { const auto& outputConfigs = getConfig().outConfs; - OPENVINO_ASSERT("Attempt to get NodeDesc output configuration for port " , portIdx, ". Number of outputs is ", outputConfigs.size()); + OPENVINO_ASSERT(portIdx < outputConfigs.size(), "Attempt to get NodeDesc output configuration for port ", + portIdx, ". Number of outputs is ", outputConfigs.size()); return outputConfigs[portIdx].hasZeroDims(); } diff --git a/src/plugins/intel_cpu/src/nodes/composite.cpp b/src/plugins/intel_cpu/src/nodes/composite.cpp index bcb142df51dcb5..ee5ee346d9d52c 100644 --- a/src/plugins/intel_cpu/src/nodes/composite.cpp +++ b/src/plugins/intel_cpu/src/nodes/composite.cpp @@ -42,7 +42,7 @@ void Composite::selectOptimalPrimitiveDescriptor() { std::vector inConfs; std::vector graphInputConfig; - const bool isInPlace = true; + constexpr bool isInPlace = true; for (size_t i = 0; i < getParentEdges().size(); i++) { auto desc = getParentOutputMemDesc(getParentEdgeAt(i)); @@ -82,7 +82,7 @@ int Composite::registerToAllocationContext(int offset, AllocationContext& contex auto inputEdges = m_graph.GetInputNodesMap().at(i)->getChildEdgesAtPort(0); for (const auto& inputEdge : inputEdges) { OPENVINO_ASSERT(inputEdge->getStatus() == Edge::Status::Uninitialized, - "Expected Uninitialized state for edge: ", inputEdge->name()); + "Expected Uninitialized state for edge: ", *this); inputEdge->sharedMemFrom(parentEdge); } } @@ -91,7 +91,7 @@ int Composite::registerToAllocationContext(int offset, AllocationContext& contex auto childEdge = getChildEdgeAt(i); auto outputEdge = m_graph.GetOutputNodesMap().at(i)->getParentEdgeAt(0); OPENVINO_ASSERT(outputEdge->getStatus() == Edge::Status::Uninitialized, - "Expected Uninitialized state for edge: ", outputEdge->name()); + "Expected Uninitialized state for edge: ", *outputEdge); outputEdge->sharedMemFrom(childEdge); } diff --git a/src/plugins/intel_cpu/src/nodes/lora.cpp b/src/plugins/intel_cpu/src/nodes/lora.cpp index 832ece7cd1f153..3237c9de450adf 100644 --- a/src/plugins/intel_cpu/src/nodes/lora.cpp +++ b/src/plugins/intel_cpu/src/nodes/lora.cpp @@ -52,7 +52,7 @@ void LoRA::selectOptimalPrimitiveDescriptor() { inConfs.emplace_back(mainInputDesc); - const bool isInPlace = true; + constexpr bool isInPlace = true; graphInputConfig.emplace_back(node::Input::InputConfig{mainInputDesc, isInPlace}); for (size_t i = 1; i < getParentEdges().size(); i++) { diff --git a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake index 9d7fa9f9d9a365..5b8614ae9b63bb 100644 --- a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake +++ b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake @@ -96,7 +96,6 @@ endif() endfunction() if(ENABLE_CPU_SPECIFIC_TARGET_PER_TEST) - # create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph) create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common ov_cpu_func_subgraph) create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests ov_cpu_func_slt) endif()