Skip to content

Commit

Permalink
Apply review comments 1
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorDuplensky committed Nov 25, 2024
1 parent ae29fda commit 408e748
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 53 deletions.
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Edge {
enum class Status {
Uninitialized, // base edge is unknown yet
NeedAllocation, // edge is the base edge
NotAllocated, // edge is a referencing edge
NotAllocated, // edge references another edge
Allocated, // edge memory is allocated
Validated // edge is validated
};
Expand Down
65 changes: 26 additions & 39 deletions src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -722,25 +722,6 @@ void Graph::ResolveComplexInplaceConflicts() {
*/
static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
const GraphContext::CPtr context) {
auto allocateStringMemory = [context](const EdgePtr& edge) {
if (edge->getParent()->isConstant()) {
if (edge->getParent()->getType() == Type::Input) {
auto constNode = static_cast<node::Input *>(edge->getParent().get());
edge->reuse(std::const_pointer_cast<IMemory>(constNode->getMemoryPtr()));
} else {
edge->externalAllocate(context->getWeightsCache());
}
auto stringMemory = dynamic_cast<StringMemory *>(edge->getMemoryPtr().get());
OPENVINO_ASSERT(stringMemory, "[CPU] Edge between nodes '",
edge->getParent()->getName(), "' and '", edge->getChild()->getName(), "' must have StringMemory.");
return stringMemory->getStringMemoryBlockPtr();
}

auto memory = std::make_shared<StringMemory>(context->getEngine(), edge->getDesc());
edge->reuse(memory);
return memory->getStringMemoryBlockPtr();
};

auto allocateConstantEdge = [context](const EdgePtr& edge) {
if (edge->getParent()->getType() == Type::Input) {
auto constNode = std::static_pointer_cast<node::Input>(edge->getParent());
Expand All @@ -750,6 +731,12 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
}
};

auto allocateStringMemory = [context](const EdgePtr& edge) {
auto memory = std::make_shared<StringMemory>(context->getEngine(), edge->getDesc());
edge->reuse(memory);
return memory->getStringMemoryBlockPtr();
};

auto notAllocatedPartitionEnd =
std::partition(clusters.begin(), clusters.end(),
[&allocateStringMemory, &allocateConstantEdge, &context](const EdgeCluster& cluster) {
Expand All @@ -771,7 +758,18 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
return true;
}

// Allocate a string cluster
// Allocate a cluster of the constants
if (baseEdge->getParent()->isConstant()) {
// @todo can we add some meaningful assert here?
for (auto &edge : cluster) {
if (edge->getParent()->isConstant() && edge->getStatus() == Edge::Status::NeedAllocation) {
allocateConstantEdge(edge);
}
}
return false;
}

// Allocate a non-constant string cluster
if (baseEdge->getDesc().getPrecision() == element::string) {
OPENVINO_ASSERT(std::all_of(cluster.begin(), cluster.end(),
[](const EdgePtr& edge) {
Expand All @@ -786,17 +784,6 @@ static size_t AllocateStringsAndConstants(EdgeClusters& clusters,
return false;
}

// Allocate a constant cluster
if (baseEdge->getParent()->isConstant()) {
// @todo can we add some meaningful assert here?
for (auto &edge : cluster) {
if (edge->getParent()->isConstant() && edge->getStatus() == Edge::Status::NeedAllocation) {
allocateConstantEdge(edge);
}
}
return false;
}

return true;
});

Expand Down Expand Up @@ -937,13 +924,13 @@ static void ValidateEdgeStatus(const std::vector<EdgePtr>& edges) {

/**
* Forms clusters of edges.
* An edge cluster is a collection of edges, so:
* An edge cluster is a collection of edges, with the following properties:
* - base edge is an edge with a Memory which other edges point to by means of inplace logic
* - first edge of a cluster is a base edge with a status either NeedAllocation or Allocated
* - rest of the edges in a cluster are NotAllocated ones, since they point to their base edge
* - rest of the edges in a cluster are NotAllocated ones, since they point to another edge
*/
static EdgeClusters FormEdgeClusters(const std::vector<EdgePtr>& graphEdges) {
typedef std::unordered_map<EdgePtr, size_t> EdgeClusterIdxMap;
using EdgeClusterIdxMap = std::unordered_map<EdgePtr, size_t>;
EdgeClusters edgeClusters;
EdgeClusterIdxMap edgeClusterIndices;

Expand Down Expand Up @@ -1063,10 +1050,10 @@ static MemoryRegions FormMemoryRegions(const EdgeClusters& clusters,
return memoryRegions;
}

static OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegions,
const EdgeClusters& clusters,
const std::map<std::size_t, NodePtr>& outputNodes) {
OutputMemoryBlocks outputMemBlocks;
static Graph::OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegions,
const EdgeClusters& clusters,
const std::map<std::size_t, NodePtr>& outputNodes) {
Graph::OutputMemoryBlocks outputMemBlocks;
memoryRegions.erase(std::remove_if(memoryRegions.begin(), memoryRegions.end(), [&](const MemoryRegion& region) {
if (region.size >= 0 || !one_of(region.type, MemoryRegion::RegionType::OUTPUT, MemoryRegion::RegionType::IO)) {
return false;
Expand Down Expand Up @@ -1108,7 +1095,7 @@ static OutputMemoryBlocks FilterOutDynamicOutputEdges(MemoryRegions& memoryRegio
* 1) EdgeClusters - to propagate the solution through the graph
* 2) OutputMemoryBlocks - to allow memory sharing between graph and infer request
*/
static std::tuple<MemoryControl::MemorySolution, EdgeClusters, OutputMemoryBlocks>
static std::tuple<MemoryControl::MemorySolution, EdgeClusters, Graph::OutputMemoryBlocks>
SolveMemoryReuse(const std::shared_ptr<MemoryControl>& memoryControl,
const AllocationContext& allocationContext,
const GraphContext::CPtr graphContext,
Expand Down
8 changes: 3 additions & 5 deletions src/plugins/intel_cpu/src/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,10 @@ namespace node {
class MemoryStateNode;
} // namespace node

using OutputMemoryBlocks = std::unordered_map<std::size_t, ProxyMemoryBlockPtr>;

class Graph {
public:
typedef std::shared_ptr<Graph> Ptr;
using Ptr = std::shared_ptr<Graph>;
using OutputMemoryBlocks = std::unordered_map<std::size_t, ProxyMemoryBlockPtr>;

enum class Status {
NotReady = 0,
Expand Down Expand Up @@ -237,8 +236,6 @@ class Graph {
void Activate(const std::vector<MemoryPtr>& externalInputMemory = {},
const std::vector<MemoryPtr>& externalOutputMemory = {});

void Allocate();

/**
* Register the graph in the global allocation context by transforming
* local execution data into the global one:
Expand Down Expand Up @@ -290,6 +287,7 @@ class Graph {
const std::vector<node::Input::OutputConfig>& outputConfigs = {});

void Configure(bool optimize = true);
void Allocate();

void InitNodes();
void InitDescriptors();
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/memory_control.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class MemoryControl {
void releaseMemory();

private:
explicit MemoryControl();
MemoryControl();
void insert(const MemoryRegion& region, const std::vector<size_t>& syncInds);

friend class NetworkMemoryControl;
Expand Down
8 changes: 4 additions & 4 deletions src/plugins/intel_cpu/src/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,29 +114,29 @@ class NodeDesc {

bool hasZeroInputDims() const {
const auto& inputConfigs = getConfig().inConfs;

return std::any_of(inputConfigs.begin(), inputConfigs.end(), [](const PortConfig& portConfig) {
return portConfig.hasZeroDims();
});
}

bool hasZeroInputDimsAtPort(size_t portIdx) const {
const auto& inputConfigs = getConfig().inConfs;
OPENVINO_ASSERT("Attempt to get NodeDesc input configuration for port " , portIdx, ". Number of inputs is ", inputConfigs.size());
OPENVINO_ASSERT(portIdx < inputConfigs.size(), "Attempt to get NodeDesc input configuration for port ",
portIdx, ". Number of inputs is ", inputConfigs.size());
return inputConfigs[portIdx].hasZeroDims();
}

bool hasZeroOutputDims() const {
const auto& outputConfigs = getConfig().outConfs;

return std::any_of(outputConfigs.begin(), outputConfigs.end(), [](const PortConfig& portConfig) {
return portConfig.hasZeroDims();
});
}

bool hasZeroOutputDimsAtPort(size_t portIdx) const {
const auto& outputConfigs = getConfig().outConfs;
OPENVINO_ASSERT("Attempt to get NodeDesc output configuration for port " , portIdx, ". Number of outputs is ", outputConfigs.size());
OPENVINO_ASSERT(portIdx < outputConfigs.size(), "Attempt to get NodeDesc output configuration for port ",
portIdx, ". Number of outputs is ", outputConfigs.size());
return outputConfigs[portIdx].hasZeroDims();
}

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/nodes/composite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void Composite::selectOptimalPrimitiveDescriptor() {
std::vector<PortConfig> inConfs;
std::vector<Input::InputConfig> graphInputConfig;

const bool isInPlace = true;
constexpr bool isInPlace = true;

for (size_t i = 0; i < getParentEdges().size(); i++) {
auto desc = getParentOutputMemDesc(getParentEdgeAt(i));
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/nodes/lora.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void LoRA::selectOptimalPrimitiveDescriptor() {

inConfs.emplace_back(mainInputDesc);

const bool isInPlace = true;
constexpr bool isInPlace = true;
graphInputConfig.emplace_back(node::Input::InputConfig{mainInputDesc, isInPlace});

for (size_t i = 1; i < getParentEdges().size(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ endif()
endfunction()

if(ENABLE_CPU_SPECIFIC_TARGET_PER_TEST)
# create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph)
create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common ov_cpu_func_subgraph)
create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests ov_cpu_func_slt)
endif()
Expand Down

0 comments on commit 408e748

Please sign in to comment.