diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h index b2890f3f477de1..af67e8a09a9168 100644 --- a/src/plugins/intel_cpu/src/edge.h +++ b/src/plugins/intel_cpu/src/edge.h @@ -30,7 +30,7 @@ class Edge { enum class Status { Uninitialized, // base edge is unknown yet NeedAllocation, // edge is the base edge - NotAllocated, // edge is a referencing edge + NotAllocated, // edge references another edge Allocated, // edge memory is allocated Validated // edge is validated }; diff --git a/src/plugins/intel_cpu/src/graph.cpp b/src/plugins/intel_cpu/src/graph.cpp index 50fee2d9444c2b..219d8c48dae321 100644 --- a/src/plugins/intel_cpu/src/graph.cpp +++ b/src/plugins/intel_cpu/src/graph.cpp @@ -722,25 +722,6 @@ void Graph::ResolveComplexInplaceConflicts() { */ static size_t AllocateStringsAndConstants(EdgeClusters& clusters, const GraphContext::CPtr context) { - auto allocateStringMemory = [context](const EdgePtr& edge) { - if (edge->getParent()->isConstant()) { - if (edge->getParent()->getType() == Type::Input) { - auto constNode = static_cast(edge->getParent().get()); - edge->reuse(std::const_pointer_cast(constNode->getMemoryPtr())); - } else { - edge->externalAllocate(context->getWeightsCache()); - } - auto stringMemory = dynamic_cast(edge->getMemoryPtr().get()); - OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '", - edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory."); - return stringMemory->getStringMemoryBlockPtr(); - } - - auto memory = std::make_shared(context->getEngine(), edge->getDesc()); - edge->reuse(memory); - return memory->getStringMemoryBlockPtr(); - }; - auto allocateConstantEdge = [context](const EdgePtr& edge) { if (edge->getParent()->getType() == Type::Input) { auto constNode = std::static_pointer_cast(edge->getParent()); @@ -750,6 +731,12 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters, } }; + auto allocateStringMemory = [context](const EdgePtr& edge) { + auto memory = std::make_shared(context->getEngine(), edge->getDesc()); + edge->reuse(memory); + return memory->getStringMemoryBlockPtr(); + }; + auto notAllocatedPartitionEnd = std::partition(clusters.begin(), clusters.end(), [&allocateStringMemory, &allocateConstantEdge, &context](const EdgeCluster& cluster) { @@ -771,7 +758,18 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters, return true; } - // Allocate a string cluster + // Allocate a cluster of the constants + if (baseEdge->getParent()->isConstant()) { + // @todo can we add some meaningful assert here? + for (auto &edge : cluster) { + if (edge->getParent()->isConstant() && edge->getStatus() == Edge::Status::NeedAllocation) { + allocateConstantEdge(edge); + } + } + return false; + } + + // Allocate a non-constant string cluster if (baseEdge->getDesc().getPrecision() == element::string) { OPENVINO_ASSERT(std::all_of(cluster.begin(), cluster.end(), [](const EdgePtr& edge) { @@ -786,17 +784,6 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters, return false; } - // Allocate a constant cluster - if (baseEdge->getParent()->isConstant()) { - // @todo can we add some meaningful assert here? - for (auto &edge : cluster) { - if (edge->getParent()->isConstant() && edge->getStatus() == Edge::Status::NeedAllocation) { - allocateConstantEdge(edge); - } - } - return false; - } - return true; }); @@ -937,13 +924,13 @@ static void ValidateEdgeStatus(const std::vector& edges) { /** * Forms clusters of edges. - * An edge cluster is a collection of edges, so: + * An edge cluster is a collection of edges, with the following properties: * - base edge is an edge with a Memory which other edges point to by means of inplace logic * - first edge of a cluster is a base edge with a status either NeedAllocation or Allocated - * - rest of the edges in a cluster are NotAllocated ones, since they point to their base edge + * - rest of the edges in a cluster are NotAllocated ones, since they point to another edge */ static EdgeClusters FormEdgeClusters(const std::vector& graphEdges) { - typedef std::unordered_map EdgeClusterIdxMap; + using EdgeClusterIdxMap = std::unordered_map; EdgeClusters edgeClusters; EdgeClusterIdxMap edgeClusterIndices; @@ -1063,10 +1050,10 @@ static MemoryRegions FormMemoryRegions(const EdgeClusters& clusters, return memoryRegions; } -static OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegions, - const EdgeClusters& clusters, - const std::map& outputNodes) { - OutputMemoryBlocks outputMemBlocks; +static Graph::OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegions, + const EdgeClusters& clusters, + const std::map& outputNodes) { + Graph::OutputMemoryBlocks outputMemBlocks; memoryRegions.erase(std::remove_if(memoryRegions.begin(), memoryRegions.end(), [&](const MemoryRegion& region) { if (region.size >= 0 || !one_of(region.type, MemoryRegion::RegionType::OUTPUT, MemoryRegion::RegionType::IO)) { return false; @@ -1108,7 +1095,7 @@ static OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegio * 1) EdgeClusters - to propagate the solution through the graph * 2) OutputMemoryBlocks - to allow memory sharing between graph and infer request */ -static std::tuple +static std::tuple SolveMemoryReuse(const std::shared_ptr& memoryControl, const AllocationContext& allocationContext, const GraphContext::CPtr graphContext, diff --git a/src/plugins/intel_cpu/src/graph.h b/src/plugins/intel_cpu/src/graph.h index 5d4805c15fb52c..d73cf6e6f3bdb6 100644 --- a/src/plugins/intel_cpu/src/graph.h +++ b/src/plugins/intel_cpu/src/graph.h @@ -30,11 +30,10 @@ namespace node { class MemoryStateNode; } // namespace node -using OutputMemoryBlocks = std::unordered_map; - class Graph { public: - typedef std::shared_ptr Ptr; + using Ptr = std::shared_ptr; + using OutputMemoryBlocks = std::unordered_map; enum class Status { NotReady = 0, @@ -237,8 +236,6 @@ class Graph { void Activate(const std::vector& externalInputMemory = {}, const std::vector& externalOutputMemory = {}); - void Allocate(); - /** * Register the graph in the global allocation context by transforming * local execution data into the global one: @@ -290,6 +287,7 @@ class Graph { const std::vector& outputConfigs = {}); void Configure(bool optimize = true); + void Allocate(); void InitNodes(); void InitDescriptors(); diff --git a/src/plugins/intel_cpu/src/memory_control.hpp b/src/plugins/intel_cpu/src/memory_control.hpp index 23f65a0fe3f326..9a6d14d82ecaff 100644 --- a/src/plugins/intel_cpu/src/memory_control.hpp +++ b/src/plugins/intel_cpu/src/memory_control.hpp @@ -48,7 +48,7 @@ class MemoryControl { void releaseMemory(); private: - explicit MemoryControl(); + MemoryControl(); void insert(const MemoryRegion& region, const std::vector& syncInds); friend class NetworkMemoryControl; diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 88d69fe39a9d20..78e47ed60c816a 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -114,7 +114,6 @@ class NodeDesc { bool hasZeroInputDims() const { const auto& inputConfigs = getConfig().inConfs; - return std::any_of(inputConfigs.begin(), inputConfigs.end(), [](const PortConfig& portConfig) { return portConfig.hasZeroDims(); }); @@ -122,13 +121,13 @@ class NodeDesc { bool hasZeroInputDimsAtPort(size_t portIdx) const { const auto& inputConfigs = getConfig().inConfs; - OPENVINO_ASSERT("Attempt to get NodeDesc input configuration for port " , portIdx, ". Number of inputs is ", inputConfigs.size()); + OPENVINO_ASSERT(portIdx < inputConfigs.size(), "Attempt to get NodeDesc input configuration for port ", + portIdx, ". Number of inputs is ", inputConfigs.size()); return inputConfigs[portIdx].hasZeroDims(); } bool hasZeroOutputDims() const { const auto& outputConfigs = getConfig().outConfs; - return std::any_of(outputConfigs.begin(), outputConfigs.end(), [](const PortConfig& portConfig) { return portConfig.hasZeroDims(); }); @@ -136,7 +135,8 @@ class NodeDesc { bool hasZeroOutputDimsAtPort(size_t portIdx) const { const auto& outputConfigs = getConfig().outConfs; - OPENVINO_ASSERT("Attempt to get NodeDesc output configuration for port " , portIdx, ". Number of outputs is ", outputConfigs.size()); + OPENVINO_ASSERT(portIdx < outputConfigs.size(), "Attempt to get NodeDesc output configuration for port ", + portIdx, ". Number of outputs is ", outputConfigs.size()); return outputConfigs[portIdx].hasZeroDims(); } diff --git a/src/plugins/intel_cpu/src/nodes/composite.cpp b/src/plugins/intel_cpu/src/nodes/composite.cpp index bcb142df51dcb5..d459d5526abd9d 100644 --- a/src/plugins/intel_cpu/src/nodes/composite.cpp +++ b/src/plugins/intel_cpu/src/nodes/composite.cpp @@ -42,7 +42,7 @@ void Composite::selectOptimalPrimitiveDescriptor() { std::vector inConfs; std::vector graphInputConfig; - const bool isInPlace = true; + constexpr bool isInPlace = true; for (size_t i = 0; i < getParentEdges().size(); i++) { auto desc = getParentOutputMemDesc(getParentEdgeAt(i)); diff --git a/src/plugins/intel_cpu/src/nodes/lora.cpp b/src/plugins/intel_cpu/src/nodes/lora.cpp index 832ece7cd1f153..3237c9de450adf 100644 --- a/src/plugins/intel_cpu/src/nodes/lora.cpp +++ b/src/plugins/intel_cpu/src/nodes/lora.cpp @@ -52,7 +52,7 @@ void LoRA::selectOptimalPrimitiveDescriptor() { inConfs.emplace_back(mainInputDesc); - const bool isInPlace = true; + constexpr bool isInPlace = true; graphInputConfig.emplace_back(node::Input::InputConfig{mainInputDesc, isInPlace}); for (size_t i = 1; i < getParentEdges().size(); i++) { diff --git a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake index 9d7fa9f9d9a365..5b8614ae9b63bb 100644 --- a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake +++ b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake @@ -96,7 +96,6 @@ endif() endfunction() if(ENABLE_CPU_SPECIFIC_TARGET_PER_TEST) - # create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph) create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common ov_cpu_func_subgraph) create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests ov_cpu_func_slt) endif()