From 0438fe15005a0cc44f52f1dd1e9958234d2bbb94 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Thu, 30 Nov 2023 17:20:09 +0100 Subject: [PATCH 01/19] manage components as a LongUnaryOperator mapping a nodeId to an internal componentId range 0...c-1, c = sum(comp) --- .../similarity/nodesim/NodeSimilarity.java | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index a787ff5f91..4ff9000301 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -24,10 +24,10 @@ import org.neo4j.gds.api.Graph; import org.neo4j.gds.api.RelationshipConsumer; import org.neo4j.gds.api.properties.nodes.NodePropertyValues; -import org.neo4j.gds.collections.ha.HugeLongArray; import org.neo4j.gds.collections.ha.HugeObjectArray; import org.neo4j.gds.core.concurrency.ParallelUtil; import org.neo4j.gds.core.utils.SetBitsIterable; +import org.neo4j.gds.core.utils.paged.HugeLongLongMap; import org.neo4j.gds.core.utils.paged.dss.DisjointSetStruct; import org.neo4j.gds.core.utils.progress.BatchingProgressLogger; import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker; @@ -43,6 +43,8 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.LongUnaryOperator; import java.util.stream.LongStream; import java.util.stream.Stream; @@ -62,7 +64,7 @@ public class NodeSimilarity extends Algorithm { private final MetricSimilarityComputer similarityComputer; private HugeObjectArray neighbors; private HugeObjectArray weights; - private HugeLongArray components; + private LongUnaryOperator components; private SimilarityPairTriConsumer similarityConsumer; private final boolean weighted; @@ -263,9 +265,9 @@ private Stream computeParallel() { } private void initComponents() { - components = HugeLongArray.newArray(graph.nodeCount()); if (!config.isEnableComponentOptimization()) { // considering everything as within the same component + components = n -> 0; similarityConsumer = this::computeSimilarityForSingleComponent; return; } @@ -273,12 +275,9 @@ private void initComponents() { similarityConsumer = this::computeSimilarityForComponents; if (config.componentProperty() != null) { - NodePropertyValues nodeProperties = graph.nodeProperties(config.componentProperty()); // extract component info from property - graph.forEachNode(n -> { - components.set(n, nodeProperties.longValue(n)); - return true; - }); + NodePropertyValues nodeProperties = graph.nodeProperties(config.componentProperty()); + components = initComponentIdMapping(graph, nodeProperties::longValue); return; } @@ -293,10 +292,7 @@ private void initComponents() { Wcc wcc = new WccAlgorithmFactory<>().build(graph, wccConfig, ProgressTracker.NULL_TRACKER); DisjointSetStruct disjointSets = wcc.compute(); - graph.forEachNode(n -> { - components.set(n, disjointSets.setIdOf(n)); - return true; - }); + components = initComponentIdMapping(graph, disjointSets::setIdOf); progressTracker.endSubTask(); } @@ -436,6 +432,19 @@ private Stream computeSimilaritiesForNode(long sourceNodeId) { .filter(Objects::nonNull); } + private static LongUnaryOperator initComponentIdMapping(Graph graph, LongUnaryOperator originComponentIdMapper) { + HugeLongLongMap componentIdMappings = new HugeLongLongMap(); + AtomicLong mappedComponentId = new AtomicLong(0L); + graph.forEachNode(n -> { + long originComponentId = originComponentIdMapper.applyAsLong(n); + if (!componentIdMappings.containsKey(originComponentId)) { + componentIdMappings.put(originComponentId, mappedComponentId.getAndIncrement()); + } + return true; + }); + return n -> componentIdMappings.getOrDefault(originComponentIdMapper.applyAsLong(n), 0L); + } + interface SimilarityConsumer { void accept(long sourceNodeId, long targetNodeId, double similarity); } @@ -457,7 +466,7 @@ private void computeSimilarityForSingleComponent(long sourceNodeId, long targetN } private void computeSimilarityForComponents(long sourceNodeId, long targetNodeId, SimilarityConsumer consumer) { - if (components.get(sourceNodeId) != components.get(targetNodeId)) { + if (components.applyAsLong(sourceNodeId) != components.applyAsLong(targetNodeId)) { consumer.accept(sourceNodeId, targetNodeId, 0); return; } From f9bdd61ef757fccbd6d009f77f09daa91c677e30 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Wed, 15 Nov 2023 16:36:57 +0100 Subject: [PATCH 02/19] introduce ComponentIterator --- .../similarity/nodesim/ComponentIterator.java | 104 ++++++++++++++++++ .../similarity/nodesim/NodeSimilarity.java | 14 +-- .../nodesim/ComponentIteratorTest.java | 67 +++++++++++ 3 files changed, 178 insertions(+), 7 deletions(-) create mode 100644 algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java create mode 100644 algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java new file mode 100644 index 0000000000..8abbb21d77 --- /dev/null +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java @@ -0,0 +1,104 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.gds.similarity.nodesim; + +import org.neo4j.gds.collections.ha.HugeLongArray; +import org.neo4j.gds.collections.haa.HugeAtomicLongArray; +import org.neo4j.gds.core.concurrency.ParallelUtil; +import org.neo4j.gds.core.utils.paged.ParalleLongPageCreator; +import org.neo4j.gds.termination.TerminationFlag; + +import java.util.Iterator; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Iterator over all nodes in a given component. Contains utility functions to determine a unique index per + * node and the index bounds per component. + */ +public class ComponentIterator implements Iterator { + private long endIdx; + private long runningIdx; + private HugeLongArray nodesSortedByComponent; + + public ComponentIterator(long componentId, HugeLongArray nodesSortedByComponent, + HugeAtomicLongArray upperBoundPerComponent) { + + this.nodesSortedByComponent = nodesSortedByComponent; + runningIdx = upperBoundPerComponent.get(componentId - 1); + endIdx = upperBoundPerComponent.get(componentId); + } + + @Override + public boolean hasNext() { + return runningIdx < endIdx; + } + + @Override + public Long next() { + runningIdx++; + return nodesSortedByComponent.get(runningIdx); + } + + public static HugeAtomicLongArray getIndexUpperBoundPerComponent(HugeLongArray components, int concurrency) { + long nodeCount = components.size(); + var upperBoundPerComponent = HugeAtomicLongArray.of(nodeCount, ParalleLongPageCreator.passThrough(concurrency)); + + // init coordinate array to contain the nr of nodes per component + // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes: {(comp1, 3), (comp2, 20)} + ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, nodeId -> { + { + long communityId = components.get(nodeId); + upperBoundPerComponent.getAndAdd(communityId, 1); + } + }); + AtomicLong atomicNodeSum = new AtomicLong(); + // modify coordinate array to contain the end of a component's range in the order called + // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes: {(comp1, 3), (comp2, 23)} + ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, componentId -> + { + if (upperBoundPerComponent.get(componentId) > 0) { + var upperIndex = atomicNodeSum.addAndGet(upperBoundPerComponent.get(componentId)); + upperBoundPerComponent.set(componentId, upperIndex); + } + }); + + return upperBoundPerComponent; + } + + public static HugeLongArray getNodesSortedByComponent(HugeLongArray components, + HugeAtomicLongArray componentCoordinateArray, int concurrency) { + + long nodeCount = components.size(); + var nodesSortedByComponent = HugeLongArray.newArray(nodeCount); + + // fill nodesSortedByComponent with nodeId per unique index + // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes, named in their order of processing: + // {(0, n3), (1, n2), (2, n1), (3, n23), .., (22, n4)} + ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, indexId -> + { + long nodeId = nodeCount - indexId - 1; + long componentId = components.get(nodeId); + long coordinate = componentCoordinateArray.getAndAdd(componentId, -1); + nodesSortedByComponent.set(coordinate - 1, nodeId); + }); + + return nodesSortedByComponent; + } +} diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index 4ff9000301..9f9123c836 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -199,7 +199,7 @@ private SimilarityGraphResult computeToGraph() { private void prepare() { progressTracker.beginSubTask(); - initComponents(); + components = initComponents(); if (config.runWCC()) { progressTracker.beginSubTask(); @@ -264,12 +264,11 @@ private Stream computeParallel() { } } - private void initComponents() { + private LongUnaryOperator initComponents() { if (!config.isEnableComponentOptimization()) { // considering everything as within the same component - components = n -> 0; similarityConsumer = this::computeSimilarityForSingleComponent; - return; + return n -> 0; } similarityConsumer = this::computeSimilarityForComponents; @@ -277,8 +276,8 @@ private void initComponents() { if (config.componentProperty() != null) { // extract component info from property NodePropertyValues nodeProperties = graph.nodeProperties(config.componentProperty()); - components = initComponentIdMapping(graph, nodeProperties::longValue); - return; + return initComponentIdMapping(graph, nodeProperties::longValue); + } // run WCC to determine components @@ -292,8 +291,9 @@ private void initComponents() { Wcc wcc = new WccAlgorithmFactory<>().build(graph, wccConfig, ProgressTracker.NULL_TRACKER); DisjointSetStruct disjointSets = wcc.compute(); - components = initComponentIdMapping(graph, disjointSets::setIdOf); + var components = initComponentIdMapping(graph, disjointSets::setIdOf); progressTracker.endSubTask(); + return components; } private Stream computeAll() { diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java new file mode 100644 index 0000000000..bd19d6dd73 --- /dev/null +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java @@ -0,0 +1,67 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.gds.similarity.nodesim; + +import org.junit.jupiter.api.Test; +import org.neo4j.gds.collections.ha.HugeLongArray; +import org.neo4j.gds.collections.haa.HugeAtomicLongArray; +import org.neo4j.gds.core.utils.paged.ParalleLongPageCreator; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class ComponentIteratorTest { + @Test + void shouldReturnNodesForComponent() { + var components = HugeLongArray.newArray(8); + // nodeId, componentId + for (int nodeId = 0; nodeId < 3; nodeId++) { + components.set(nodeId, 0); + } + for (int nodeId = 3; nodeId < 8; nodeId++) { + components.set(nodeId, 1); + } + + var nodesSortedByComponent = HugeLongArray.newArray(8); + // uniqueIdx, nodeId + nodesSortedByComponent.set(0, 2); + nodesSortedByComponent.set(1, 1); + nodesSortedByComponent.set(2, 0); + nodesSortedByComponent.set(3, 7); + nodesSortedByComponent.set(4, 6); + nodesSortedByComponent.set(5, 5); + nodesSortedByComponent.set(6, 4); + nodesSortedByComponent.set(7, 3); + + var upperBoundPerComponent = HugeAtomicLongArray.of(8, ParalleLongPageCreator.passThrough(4)); + // componentId, upperBound + upperBoundPerComponent.set(0, 2); + upperBoundPerComponent.set(1, 7); + + ComponentIterator iterator = new ComponentIterator(1, nodesSortedByComponent, upperBoundPerComponent); + + for (int nodeId = 7; nodeId > 2; nodeId--) { + assertTrue(iterator.hasNext()); + assertEquals(nodeId, iterator.next()); + } + assertFalse(iterator.hasNext()); + } +} From d7e822d67ee615dac48349b4bbb5a3e4272bb7b2 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Thu, 16 Nov 2023 11:46:53 +0100 Subject: [PATCH 03/19] remove dependency to other components parallel processing give no guarantee on which component follows another --- .../similarity/nodesim/ComponentIterator.java | 19 ++++++++++--------- .../nodesim/ComponentIteratorTest.java | 5 +++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java index 8abbb21d77..085a8faf98 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java @@ -33,27 +33,28 @@ * node and the index bounds per component. */ public class ComponentIterator implements Iterator { - private long endIdx; private long runningIdx; + private long componentId; + private HugeLongArray components; private HugeLongArray nodesSortedByComponent; - public ComponentIterator(long componentId, HugeLongArray nodesSortedByComponent, + public ComponentIterator(long componentId, HugeLongArray components, HugeLongArray nodesSortedByComponent, HugeAtomicLongArray upperBoundPerComponent) { + this.componentId = componentId; + this.components = components; this.nodesSortedByComponent = nodesSortedByComponent; - runningIdx = upperBoundPerComponent.get(componentId - 1); - endIdx = upperBoundPerComponent.get(componentId); + this.runningIdx = upperBoundPerComponent.get(componentId); } @Override public boolean hasNext() { - return runningIdx < endIdx; + return components.get(nodesSortedByComponent.get(runningIdx)) == componentId; } @Override public Long next() { - runningIdx++; - return nodesSortedByComponent.get(runningIdx); + return nodesSortedByComponent.get(runningIdx--); } public static HugeAtomicLongArray getIndexUpperBoundPerComponent(HugeLongArray components, int concurrency) { @@ -64,8 +65,8 @@ public static HugeAtomicLongArray getIndexUpperBoundPerComponent(HugeLongArray c // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes: {(comp1, 3), (comp2, 20)} ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, nodeId -> { { - long communityId = components.get(nodeId); - upperBoundPerComponent.getAndAdd(communityId, 1); + long componentId = components.get(nodeId); + upperBoundPerComponent.getAndAdd(componentId, 1); } }); AtomicLong atomicNodeSum = new AtomicLong(); diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java index bd19d6dd73..0cbaee7fb9 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java @@ -56,9 +56,10 @@ void shouldReturnNodesForComponent() { upperBoundPerComponent.set(0, 2); upperBoundPerComponent.set(1, 7); - ComponentIterator iterator = new ComponentIterator(1, nodesSortedByComponent, upperBoundPerComponent); + ComponentIterator iterator = new ComponentIterator(1, components, nodesSortedByComponent, + upperBoundPerComponent); - for (int nodeId = 7; nodeId > 2; nodeId--) { + for (int nodeId = 3; nodeId < 8; nodeId++) { assertTrue(iterator.hasNext()); assertEquals(nodeId, iterator.next()); } From e270a67eef42cd0f2f79c1816c68b492da97645d Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Thu, 16 Nov 2023 15:21:53 +0100 Subject: [PATCH 04/19] reorganize to an object that produces an iterator --- ...rator.java => NodesSortedByComponent.java} | 61 ++++++--- .../nodesim/ComponentIteratorTest.java | 68 ---------- .../nodesim/NodesSortedByComponentTest.java | 124 ++++++++++++++++++ 3 files changed, 164 insertions(+), 89 deletions(-) rename algo/src/main/java/org/neo4j/gds/similarity/nodesim/{ComponentIterator.java => NodesSortedByComponent.java} (63%) delete mode 100644 algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java create mode 100644 algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java similarity index 63% rename from algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java rename to algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java index 085a8faf98..98e6b31131 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentIterator.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java @@ -26,38 +26,57 @@ import org.neo4j.gds.termination.TerminationFlag; import java.util.Iterator; +import java.util.NoSuchElementException; import java.util.concurrent.atomic.AtomicLong; /** - * Iterator over all nodes in a given component. Contains utility functions to determine a unique index per - * node and the index bounds per component. + * Manages nodes sorted by component. Produces an iterator over all nodes in a given component. */ -public class ComponentIterator implements Iterator { - private long runningIdx; - private long componentId; - private HugeLongArray components; - private HugeLongArray nodesSortedByComponent; +public class NodesSortedByComponent { + private final HugeLongArray components; + private final HugeAtomicLongArray upperBoundPerComponent; + private final HugeLongArray nodesSorted; - public ComponentIterator(long componentId, HugeLongArray components, HugeLongArray nodesSortedByComponent, - HugeAtomicLongArray upperBoundPerComponent) { - - this.componentId = componentId; + public NodesSortedByComponent(HugeLongArray components, int concurrency) { this.components = components; - this.nodesSortedByComponent = nodesSortedByComponent; - this.runningIdx = upperBoundPerComponent.get(componentId); + this.upperBoundPerComponent = computeIndexUpperBoundPerComponent(components, concurrency); + var componentCoordinateArray = HugeAtomicLongArray.of(components.size(), ParalleLongPageCreator.passThrough(concurrency)); + upperBoundPerComponent.copyTo(componentCoordinateArray, components.size()); + this.nodesSorted = computeNodesSortedByComponent(components, componentCoordinateArray, concurrency); } - @Override - public boolean hasNext() { - return components.get(nodesSortedByComponent.get(runningIdx)) == componentId; + public Iterator iterator(long componentId) { + return new Iterator<>() { + long runningIdx = getUpperBoundPerComponent().get(componentId); + @Override + public boolean hasNext() { + return getComponents().get(getNodesSorted().get(runningIdx)) == componentId; + } + + @Override + public Long next() { + try { + return getNodesSorted().get(runningIdx--); + } catch (ArrayIndexOutOfBoundsException ex) { + throw new NoSuchElementException(); + } + } + }; + } + + HugeLongArray getComponents() { + return components; + } + + HugeAtomicLongArray getUpperBoundPerComponent() { + return upperBoundPerComponent; } - @Override - public Long next() { - return nodesSortedByComponent.get(runningIdx--); + HugeLongArray getNodesSorted() { + return nodesSorted; } - public static HugeAtomicLongArray getIndexUpperBoundPerComponent(HugeLongArray components, int concurrency) { + static HugeAtomicLongArray computeIndexUpperBoundPerComponent(HugeLongArray components, int concurrency) { long nodeCount = components.size(); var upperBoundPerComponent = HugeAtomicLongArray.of(nodeCount, ParalleLongPageCreator.passThrough(concurrency)); @@ -83,7 +102,7 @@ public static HugeAtomicLongArray getIndexUpperBoundPerComponent(HugeLongArray c return upperBoundPerComponent; } - public static HugeLongArray getNodesSortedByComponent(HugeLongArray components, + static HugeLongArray computeNodesSortedByComponent(HugeLongArray components, HugeAtomicLongArray componentCoordinateArray, int concurrency) { long nodeCount = components.size(); diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java deleted file mode 100644 index 0cbaee7fb9..0000000000 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentIteratorTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.gds.similarity.nodesim; - -import org.junit.jupiter.api.Test; -import org.neo4j.gds.collections.ha.HugeLongArray; -import org.neo4j.gds.collections.haa.HugeAtomicLongArray; -import org.neo4j.gds.core.utils.paged.ParalleLongPageCreator; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; - -class ComponentIteratorTest { - @Test - void shouldReturnNodesForComponent() { - var components = HugeLongArray.newArray(8); - // nodeId, componentId - for (int nodeId = 0; nodeId < 3; nodeId++) { - components.set(nodeId, 0); - } - for (int nodeId = 3; nodeId < 8; nodeId++) { - components.set(nodeId, 1); - } - - var nodesSortedByComponent = HugeLongArray.newArray(8); - // uniqueIdx, nodeId - nodesSortedByComponent.set(0, 2); - nodesSortedByComponent.set(1, 1); - nodesSortedByComponent.set(2, 0); - nodesSortedByComponent.set(3, 7); - nodesSortedByComponent.set(4, 6); - nodesSortedByComponent.set(5, 5); - nodesSortedByComponent.set(6, 4); - nodesSortedByComponent.set(7, 3); - - var upperBoundPerComponent = HugeAtomicLongArray.of(8, ParalleLongPageCreator.passThrough(4)); - // componentId, upperBound - upperBoundPerComponent.set(0, 2); - upperBoundPerComponent.set(1, 7); - - ComponentIterator iterator = new ComponentIterator(1, components, nodesSortedByComponent, - upperBoundPerComponent); - - for (int nodeId = 3; nodeId < 8; nodeId++) { - assertTrue(iterator.hasNext()); - assertEquals(nodeId, iterator.next()); - } - assertFalse(iterator.hasNext()); - } -} diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java new file mode 100644 index 0000000000..0a4f0f9651 --- /dev/null +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.gds.similarity.nodesim; + +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import org.neo4j.gds.collections.ha.HugeLongArray; +import org.neo4j.gds.collections.haa.HugeAtomicLongArray; +import org.neo4j.gds.core.utils.paged.ParalleLongPageCreator; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class NodesSortedByComponentTest { + + @Test + void shouldDetermineIndexUpperBound() { + var components = HugeLongArray.newArray(28); + // nodeId, componentId + for (int nodeId = 0; nodeId < 3; nodeId++) { + components.set(nodeId, 0); // size 3 + } + for (int nodeId = 3; nodeId < 8; nodeId++) { + components.set(nodeId, 1); // size 5 + } + for (int nodeId = 8; nodeId < 12; nodeId++) { + components.set(nodeId, 2); // size 4 + } + for (int nodeId = 12; nodeId < 18; nodeId++) { + components.set(nodeId, 3); // size 6 + } + components.set(18, 4); // size 1 + for (int nodeId = 19; nodeId < 21; nodeId++) { + components.set(nodeId, 5); // size 2 + } + for (int nodeId = 21; nodeId < 28; nodeId++) { + components.set(nodeId, 6); // size 7 + } + + var idxUpperBoundPerComponent = NodesSortedByComponent.computeIndexUpperBoundPerComponent(components, 4); + // we cannot infer which component follows another, but the size must match for the component + Map componentPerIdxUpperBound = new HashMap<>(7); + for (int i = 0; i < 7; i++) { + componentPerIdxUpperBound.put(idxUpperBoundPerComponent.get(i), (long) i); + } + assertEquals(0, idxUpperBoundPerComponent.get(7)); + int lowerBound = 0; + for (long key : componentPerIdxUpperBound.keySet().stream().sorted().toList()) { + switch ((int) (key - lowerBound)) { + case 1: assertEquals(4, componentPerIdxUpperBound.get(key));break; + case 2: assertEquals(5, componentPerIdxUpperBound.get(key));break; + case 3: assertEquals(0, componentPerIdxUpperBound.get(key));break; + case 4: assertEquals(2, componentPerIdxUpperBound.get(key));break; + case 5: assertEquals(1, componentPerIdxUpperBound.get(key));break; + case 6: assertEquals(3, componentPerIdxUpperBound.get(key));break; + case 7: assertEquals(6, componentPerIdxUpperBound.get(key));break; + } + lowerBound = (int) (key); + } + } + + @Test + void shouldReturnNodesForComponent() { + var components = HugeLongArray.newArray(8); + // nodeId, componentId + for (int nodeId = 0; nodeId < 3; nodeId++) { + components.set(nodeId, 0); + } + for (int nodeId = 3; nodeId < 8; nodeId++) { + components.set(nodeId, 1); + } + + var nodesSorted = HugeLongArray.newArray(8); + // uniqueIdx, nodeId + nodesSorted.set(0, 2); + nodesSorted.set(1, 1); + nodesSorted.set(2, 0); + nodesSorted.set(3, 7); + nodesSorted.set(4, 6); + nodesSorted.set(5, 5); + nodesSorted.set(6, 4); + nodesSorted.set(7, 3); + + var upperBoundPerComponent = HugeAtomicLongArray.of(8, ParalleLongPageCreator.passThrough(4)); + // componentId, upperBound + upperBoundPerComponent.set(0, 2); + upperBoundPerComponent.set(1, 7); + + NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); + Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); + Mockito.doReturn(upperBoundPerComponent).when(nodesSortedByComponentMock).getUpperBoundPerComponent(); + Mockito.doReturn(nodesSorted).when(nodesSortedByComponentMock).getNodesSorted(); + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L); + + Iterator iterator = nodesSortedByComponentMock.iterator(1L); + for (int nodeId = 3; nodeId < 8; nodeId++) { + assertTrue(iterator.hasNext()); + assertEquals(nodeId, iterator.next()); + } + assertFalse(iterator.hasNext()); + } +} From 4a0bf0c22c339d8d5e263c3a1500d17954bd08a4 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Mon, 20 Nov 2023 11:50:51 +0100 Subject: [PATCH 05/19] handle runningIdx < 0 --- .../neo4j/gds/similarity/nodesim/NodesSortedByComponent.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java index 98e6b31131..f2736e3c98 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java @@ -50,7 +50,7 @@ public Iterator iterator(long componentId) { long runningIdx = getUpperBoundPerComponent().get(componentId); @Override public boolean hasNext() { - return getComponents().get(getNodesSorted().get(runningIdx)) == componentId; + return runningIdx > -1 && getComponents().get(getNodesSorted().get(runningIdx)) == componentId; } @Override From dd7ef89be3f3f02c2f5215fba667a037dd8d2f15 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Mon, 20 Nov 2023 16:21:35 +0100 Subject: [PATCH 06/19] stream only target nodes in same component --- .../similarity/nodesim/NodeSimilarity.java | 27 +++-- .../nodesim/NodesSortedByComponent.java | 96 +++++++++++------ .../nodesim/NodeSimilarityTest.java | 5 +- .../nodesim/NodesSortedByComponentTest.java | 101 ++++++++++++------ 4 files changed, 148 insertions(+), 81 deletions(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index 9f9123c836..5cee19e585 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -44,9 +44,11 @@ import java.util.Optional; import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiFunction; import java.util.function.LongUnaryOperator; import java.util.stream.LongStream; import java.util.stream.Stream; +import java.util.stream.StreamSupport; public class NodeSimilarity extends Algorithm { @@ -68,6 +70,7 @@ public class NodeSimilarity extends Algorithm { private SimilarityPairTriConsumer similarityConsumer; private final boolean weighted; + private BiFunction targetNodeStream; public static NodeSimilarity create( Graph graph, @@ -200,6 +203,10 @@ private void prepare() { progressTracker.beginSubTask(); components = initComponents(); + var nodesByComponent = new NodesSortedByComponent(components, graph.nodeCount(), concurrency); + targetNodeStream = (componentId, offset) -> StreamSupport.longStream( + nodesByComponent.spliterator(componentId, offset), true) + .filter(targetNodes::get); if (config.runWCC()) { progressTracker.beginSubTask(); @@ -325,7 +332,7 @@ private TopKMap computeTopKMap() { loggableAndTerminatableSourceNodeStream() .forEach(sourceNodeId -> { if (sourceNodeFilter.equals(NodeFilter.noOp)) { - targetNodesStream(sourceNodeId + 1) + targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, (source, target, similarity) -> { topKMap.put(source, target, similarity); @@ -333,7 +340,7 @@ private TopKMap computeTopKMap() { } )); } else { - targetNodesStream() + targetNodeStream.apply(components.applyAsLong(sourceNodeId), -1L) .filter(targetNodeId -> sourceNodeId != targetNodeId) .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, topKMap::put)); } @@ -362,7 +369,7 @@ private TopKMap computeTopKMapParallel() { // into these queues is not considered to be thread-safe. // Hence, we need to ensure that down the stream, exactly one queue // within the TopKMap processes all pairs for a single node. - targetNodesStream() + targetNodeStream.apply(components.applyAsLong(sourceNodeId), -1L) .filter(targetNodeId -> sourceNodeId != targetNodeId) .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, topKMap::put)) ) @@ -379,10 +386,10 @@ private Stream computeTopN() { loggableAndTerminatableSourceNodeStream() .forEach(sourceNodeId -> { if (sourceNodeFilter.equals(NodeFilter.noOp)) { - targetNodesStream(sourceNodeId + 1) + targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, topNList::add)); } else { - targetNodesStream() + targetNodeStream.apply(components.applyAsLong(sourceNodeId), -1L) .filter(targetNodeId -> sourceNodeId != targetNodeId) .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, topNList::add)); } @@ -410,16 +417,8 @@ private LongStream loggableAndTerminatableSourceNodeStream() { return checkProgress(sourceNodesStream()); } - private LongStream targetNodesStream(long offset) { - return new SetBitsIterable(targetNodes, offset).stream(); - } - - private LongStream targetNodesStream() { - return targetNodesStream(0); - } - private Stream computeSimilaritiesForNode(long sourceNodeId) { - return targetNodesStream(sourceNodeId + 1) + return targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) .mapToObj(targetNodeId -> { var resultHolder = new SimilarityResult[]{null}; similarityConsumer.accept( diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java index f2736e3c98..7ad77d7766 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java @@ -25,46 +25,41 @@ import org.neo4j.gds.core.utils.paged.ParalleLongPageCreator; import org.neo4j.gds.termination.TerminationFlag; -import java.util.Iterator; import java.util.NoSuchElementException; +import java.util.PrimitiveIterator; +import java.util.Spliterator; +import java.util.Spliterators; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.LongUnaryOperator; /** * Manages nodes sorted by component. Produces an iterator over all nodes in a given component. */ public class NodesSortedByComponent { - private final HugeLongArray components; + private final LongUnaryOperator components; private final HugeAtomicLongArray upperBoundPerComponent; private final HugeLongArray nodesSorted; - public NodesSortedByComponent(HugeLongArray components, int concurrency) { + public NodesSortedByComponent(LongUnaryOperator components, long nodeCount, int concurrency) { this.components = components; - this.upperBoundPerComponent = computeIndexUpperBoundPerComponent(components, concurrency); - var componentCoordinateArray = HugeAtomicLongArray.of(components.size(), ParalleLongPageCreator.passThrough(concurrency)); - upperBoundPerComponent.copyTo(componentCoordinateArray, components.size()); + this.upperBoundPerComponent = computeIndexUpperBoundPerComponent(components, nodeCount, concurrency); + var componentCoordinateArray = HugeAtomicLongArray.of(nodeCount, ParalleLongPageCreator.passThrough(concurrency)); + upperBoundPerComponent.copyTo(componentCoordinateArray, nodeCount); this.nodesSorted = computeNodesSortedByComponent(components, componentCoordinateArray, concurrency); } - public Iterator iterator(long componentId) { - return new Iterator<>() { - long runningIdx = getUpperBoundPerComponent().get(componentId); - @Override - public boolean hasNext() { - return runningIdx > -1 && getComponents().get(getNodesSorted().get(runningIdx)) == componentId; - } + public PrimitiveIterator.OfLong iterator(long componentId, long offset) { + return new Iterator(componentId, offset); + } - @Override - public Long next() { - try { - return getNodesSorted().get(runningIdx--); - } catch (ArrayIndexOutOfBoundsException ex) { - throw new NoSuchElementException(); - } - } - }; + public Spliterator.OfLong spliterator(long componentId, long offset) { + return Spliterators.spliteratorUnknownSize( + iterator(componentId, offset), + Spliterator.ORDERED | Spliterator.SORTED | Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.DISTINCT + ); } - HugeLongArray getComponents() { + LongUnaryOperator getComponents() { return components; } @@ -76,15 +71,16 @@ HugeLongArray getNodesSorted() { return nodesSorted; } - static HugeAtomicLongArray computeIndexUpperBoundPerComponent(HugeLongArray components, int concurrency) { - long nodeCount = components.size(); + static HugeAtomicLongArray computeIndexUpperBoundPerComponent(LongUnaryOperator components, long nodeCount, + int concurrency) { + var upperBoundPerComponent = HugeAtomicLongArray.of(nodeCount, ParalleLongPageCreator.passThrough(concurrency)); // init coordinate array to contain the nr of nodes per component // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes: {(comp1, 3), (comp2, 20)} ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, nodeId -> { { - long componentId = components.get(nodeId); + long componentId = components.applyAsLong(nodeId); upperBoundPerComponent.getAndAdd(componentId, 1); } }); @@ -94,31 +90,67 @@ static HugeAtomicLongArray computeIndexUpperBoundPerComponent(HugeLongArray comp ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, componentId -> { if (upperBoundPerComponent.get(componentId) > 0) { - var upperIndex = atomicNodeSum.addAndGet(upperBoundPerComponent.get(componentId)); - upperBoundPerComponent.set(componentId, upperIndex); + var nodeSum = atomicNodeSum.addAndGet(upperBoundPerComponent.get(componentId)); + upperBoundPerComponent.set(componentId, nodeSum); } }); return upperBoundPerComponent; } - static HugeLongArray computeNodesSortedByComponent(HugeLongArray components, + static HugeLongArray computeNodesSortedByComponent(LongUnaryOperator components, HugeAtomicLongArray componentCoordinateArray, int concurrency) { - long nodeCount = components.size(); + long nodeCount = componentCoordinateArray.size(); var nodesSortedByComponent = HugeLongArray.newArray(nodeCount); // fill nodesSortedByComponent with nodeId per unique index - // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes, named in their order of processing: + // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes, named in their order of seq processing: // {(0, n3), (1, n2), (2, n1), (3, n23), .., (22, n4)} ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, indexId -> { long nodeId = nodeCount - indexId - 1; - long componentId = components.get(nodeId); + long componentId = components.applyAsLong(nodeId); long coordinate = componentCoordinateArray.getAndAdd(componentId, -1); nodesSortedByComponent.set(coordinate - 1, nodeId); }); return nodesSortedByComponent; } + + private final class Iterator implements PrimitiveIterator.OfLong { + private final long offset; + long runningIdx; + final long componentId; + + Iterator(long componentId, long offset) { + this.componentId = componentId; + this.runningIdx = getUpperBoundPerComponent().get(componentId); + this.offset = offset; + } + + @Override + public boolean hasNext() { + if (offset < 0L) { + return runningIdx > 0 && getComponents().applyAsLong(getNodesSorted().get(runningIdx - 1)) == componentId; + } else { + while (runningIdx > 0 && getComponents().applyAsLong(getNodesSorted().get(runningIdx - 1)) == componentId) { + if (getNodesSorted().get(runningIdx - 1) < offset) { + runningIdx--; + } else { + return true; + } + } + return false; + } + } + @Override + public long nextLong() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + return getNodesSorted().get(--runningIdx); + } + + } } diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java index 21f630ccfc..dc6180f7ca 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java @@ -212,6 +212,8 @@ private static Stream concurrencies() { EXPECTED_OUTGOING_TOP_K_1.add(resultString(0, 3, 1.0)); EXPECTED_OUTGOING_TOP_K_1.add(resultString(1, 0, 2 / 3.0)); EXPECTED_OUTGOING_TOP_K_1.add(resultString(2, 0, 1 / 3.0)); + EXPECTED_OUTGOING_TOP_K_1.add(resultString(1, 3, 2 / 3.0)); + EXPECTED_OUTGOING_TOP_K_1.add(resultString(2, 3, 1 / 3.0)); EXPECTED_OUTGOING_TOP_K_1.add(resultString(3, 0, 1.0)); EXPECTED_OUTGOING_SIMILARITY_CUTOFF.add(resultString(0, 1, 2 / 3.0)); @@ -255,6 +257,7 @@ private static Stream concurrencies() { EXPECTED_INCOMING_TOP_K_1.add(resultString(4, 5, 1.0)); EXPECTED_INCOMING_TOP_K_1.add(resultString(5, 4, 1.0)); EXPECTED_INCOMING_TOP_K_1.add(resultString(6, 4, 1 / 2.0)); + EXPECTED_INCOMING_TOP_K_1.add(resultString(6, 5, 1 / 2.0)); EXPECTED_INCOMING_SIMILARITY_CUTOFF.add(resultString(4, 5, 1.0)); EXPECTED_INCOMING_SIMILARITY_CUTOFF.add(resultString(4, 6, 1 / 2.0)); @@ -394,7 +397,7 @@ void shouldComputeTopKForSupportedDirections(Orientation orientation, int concur .map(NodeSimilarityTest::resultString) .collect(Collectors.toSet()); - assertEquals(orientation == REVERSE ? EXPECTED_INCOMING_TOP_K_1 : EXPECTED_OUTGOING_TOP_K_1, result); + assertThat(result).containsAnyElementsOf(orientation == REVERSE ? EXPECTED_INCOMING_TOP_K_1 : EXPECTED_OUTGOING_TOP_K_1); } @ParameterizedTest(name = "orientation: {0}, concurrency: {1}") diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java index 0a4f0f9651..fb9652683d 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java @@ -24,11 +24,17 @@ import org.neo4j.gds.collections.ha.HugeLongArray; import org.neo4j.gds.collections.haa.HugeAtomicLongArray; import org.neo4j.gds.core.utils.paged.ParalleLongPageCreator; +import org.neo4j.gds.core.utils.shuffle.ShuffleUtil; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; +import java.util.Set; +import java.util.SplittableRandom; +import java.util.function.LongUnaryOperator; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -37,29 +43,30 @@ class NodesSortedByComponentTest { @Test void shouldDetermineIndexUpperBound() { - var components = HugeLongArray.newArray(28); - // nodeId, componentId - for (int nodeId = 0; nodeId < 3; nodeId++) { - components.set(nodeId, 0); // size 3 - } - for (int nodeId = 3; nodeId < 8; nodeId++) { - components.set(nodeId, 1); // size 5 - } - for (int nodeId = 8; nodeId < 12; nodeId++) { - components.set(nodeId, 2); // size 4 - } - for (int nodeId = 12; nodeId < 18; nodeId++) { - components.set(nodeId, 3); // size 6 - } - components.set(18, 4); // size 1 - for (int nodeId = 19; nodeId < 21; nodeId++) { - components.set(nodeId, 5); // size 2 - } - for (int nodeId = 21; nodeId < 28; nodeId++) { - components.set(nodeId, 6); // size 7 - } + // nodeId -> componentId + var components = new LongUnaryOperator() { + @Override + public long applyAsLong(long nodeId) { + if (nodeId < 3) { + return 0L; // size 3 + } else if (nodeId < 8) { + return 1L; // size 5 + } else if (nodeId < 12) { + return 2L; // size 4 + } else if (nodeId < 18) { + return 3L; // size 6 + } else if (nodeId < 19) { + return 4L; // size 1 + } else if (nodeId < 21) { + return 5L; // size 2 + } else { + return 6L; // size 7 + } + } + }; + - var idxUpperBoundPerComponent = NodesSortedByComponent.computeIndexUpperBoundPerComponent(components, 4); + var idxUpperBoundPerComponent = NodesSortedByComponent.computeIndexUpperBoundPerComponent(components, 28, 4); // we cannot infer which component follows another, but the size must match for the component Map componentPerIdxUpperBound = new HashMap<>(7); for (int i = 0; i < 7; i++) { @@ -83,14 +90,17 @@ void shouldDetermineIndexUpperBound() { @Test void shouldReturnNodesForComponent() { - var components = HugeLongArray.newArray(8); - // nodeId, componentId - for (int nodeId = 0; nodeId < 3; nodeId++) { - components.set(nodeId, 0); - } - for (int nodeId = 3; nodeId < 8; nodeId++) { - components.set(nodeId, 1); - } + // nodeId -> componentId + var components = new LongUnaryOperator() { + @Override + public long applyAsLong(long nodeId) { + if (nodeId < 3) { + return 0L; // size 3 + } else { + return 1L; // size 5 + } + } + }; var nodesSorted = HugeLongArray.newArray(8); // uniqueIdx, nodeId @@ -105,20 +115,43 @@ void shouldReturnNodesForComponent() { var upperBoundPerComponent = HugeAtomicLongArray.of(8, ParalleLongPageCreator.passThrough(4)); // componentId, upperBound - upperBoundPerComponent.set(0, 2); - upperBoundPerComponent.set(1, 7); + upperBoundPerComponent.set(0, 3); + upperBoundPerComponent.set(1, 8); NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); Mockito.doReturn(upperBoundPerComponent).when(nodesSortedByComponentMock).getUpperBoundPerComponent(); Mockito.doReturn(nodesSorted).when(nodesSortedByComponentMock).getNodesSorted(); - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L); + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L,-1L); - Iterator iterator = nodesSortedByComponentMock.iterator(1L); + Iterator iterator = nodesSortedByComponentMock.iterator(1L, -1L); for (int nodeId = 3; nodeId < 8; nodeId++) { assertTrue(iterator.hasNext()); assertEquals(nodeId, iterator.next()); } assertFalse(iterator.hasNext()); } + + @Test + void shouldRespectOffset() { + LongUnaryOperator components = nodeId -> 0L; + + var nodesSorted = HugeLongArray.newArray(20); + nodesSorted.setAll(x -> x); + ShuffleUtil.shuffleArray(nodesSorted, new SplittableRandom(92)); + + var upperBoundPerComponent = HugeAtomicLongArray.of(1, ParalleLongPageCreator.passThrough(4)); + upperBoundPerComponent.set(0, 20); + + NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); + Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); + Mockito.doReturn(upperBoundPerComponent).when(nodesSortedByComponentMock).getUpperBoundPerComponent(); + Mockito.doReturn(nodesSorted).when(nodesSortedByComponentMock).getNodesSorted(); + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(0L,11L); + + Set resultingNodes = new HashSet<>(); + Iterator iterator = nodesSortedByComponentMock.iterator(0L, 11L); + iterator.forEachRemaining(resultingNodes::add); + assertThat(resultingNodes).containsExactlyInAnyOrder(11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L); + } } From 3575d13587035a2381dba8292cbdff749d894225 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Fri, 8 Dec 2023 16:06:51 +0100 Subject: [PATCH 07/19] remove map from components op (not thread-safe) --- .../similarity/nodesim/NodeSimilarity.java | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index 5cee19e585..eba4a4f8ca 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -24,6 +24,7 @@ import org.neo4j.gds.api.Graph; import org.neo4j.gds.api.RelationshipConsumer; import org.neo4j.gds.api.properties.nodes.NodePropertyValues; +import org.neo4j.gds.collections.ha.HugeLongArray; import org.neo4j.gds.collections.ha.HugeObjectArray; import org.neo4j.gds.core.concurrency.ParallelUtil; import org.neo4j.gds.core.utils.SetBitsIterable; @@ -284,7 +285,6 @@ private LongUnaryOperator initComponents() { // extract component info from property NodePropertyValues nodeProperties = graph.nodeProperties(config.componentProperty()); return initComponentIdMapping(graph, nodeProperties::longValue); - } // run WCC to determine components @@ -432,16 +432,22 @@ private Stream computeSimilaritiesForNode(long sourceNodeId) { } private static LongUnaryOperator initComponentIdMapping(Graph graph, LongUnaryOperator originComponentIdMapper) { - HugeLongLongMap componentIdMappings = new HugeLongLongMap(); - AtomicLong mappedComponentId = new AtomicLong(0L); + var componentIdMappings = new HugeLongLongMap(); + var mappedComponentId = new AtomicLong(0L); + var mappedComponentIdPerNode = HugeLongArray.newArray(graph.nodeCount()); graph.forEachNode(n -> { - long originComponentId = originComponentIdMapper.applyAsLong(n); - if (!componentIdMappings.containsKey(originComponentId)) { - componentIdMappings.put(originComponentId, mappedComponentId.getAndIncrement()); + long originComponentIdForNode = originComponentIdMapper.applyAsLong(n); + long mappedComponentIdForNode = componentIdMappings.getOrDefault(originComponentIdMapper.applyAsLong(n), + mappedComponentId.getAndIncrement()); + + if (!componentIdMappings.containsKey(originComponentIdForNode)) { + componentIdMappings.put(originComponentIdForNode, mappedComponentIdForNode); } + mappedComponentIdPerNode.set(n, mappedComponentIdForNode); return true; }); - return n -> componentIdMappings.getOrDefault(originComponentIdMapper.applyAsLong(n), 0L); + + return mappedComponentIdPerNode::get; } interface SimilarityConsumer { From 9eb847f83a186db91747a0388d524d962d233cc3 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Fri, 8 Dec 2023 17:27:06 +0100 Subject: [PATCH 08/19] add + refactor tests - make upper bound the real highest global idx for the given component - copy array internally instead of modifying passed argument --- .../nodesim/NodesSortedByComponent.java | 34 ++--- .../nodesim/NodesSortedByComponentTest.java | 120 ++++++++++++------ 2 files changed, 98 insertions(+), 56 deletions(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java index 7ad77d7766..247e447d09 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java @@ -43,9 +43,7 @@ public class NodesSortedByComponent { public NodesSortedByComponent(LongUnaryOperator components, long nodeCount, int concurrency) { this.components = components; this.upperBoundPerComponent = computeIndexUpperBoundPerComponent(components, nodeCount, concurrency); - var componentCoordinateArray = HugeAtomicLongArray.of(nodeCount, ParalleLongPageCreator.passThrough(concurrency)); - upperBoundPerComponent.copyTo(componentCoordinateArray, nodeCount); - this.nodesSorted = computeNodesSortedByComponent(components, componentCoordinateArray, concurrency); + this.nodesSorted = computeNodesSortedByComponent(components, upperBoundPerComponent, concurrency); } public PrimitiveIterator.OfLong iterator(long componentId, long offset) { @@ -85,13 +83,14 @@ static HugeAtomicLongArray computeIndexUpperBoundPerComponent(LongUnaryOperator } }); AtomicLong atomicNodeSum = new AtomicLong(); - // modify coordinate array to contain the end of a component's range in the order called - // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes: {(comp1, 3), (comp2, 23)} + // modify coordinate array to contain the upper bound of the global index for each component + // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes, comp1 randomly accessed prior to comp2: + // {(comp1, 2), (comp2, 22)} ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, componentId -> { if (upperBoundPerComponent.get(componentId) > 0) { var nodeSum = atomicNodeSum.addAndGet(upperBoundPerComponent.get(componentId)); - upperBoundPerComponent.set(componentId, nodeSum); + upperBoundPerComponent.set(componentId, nodeSum - 1); } }); @@ -99,20 +98,23 @@ static HugeAtomicLongArray computeIndexUpperBoundPerComponent(LongUnaryOperator } static HugeLongArray computeNodesSortedByComponent(LongUnaryOperator components, - HugeAtomicLongArray componentCoordinateArray, int concurrency) { + HugeAtomicLongArray idxUpperBoundPerComponent, int concurrency) { - long nodeCount = componentCoordinateArray.size(); + // initialized to its max possible size of 1 node <=> 1 component in a disconnected graph + long nodeCount = idxUpperBoundPerComponent.size(); var nodesSortedByComponent = HugeLongArray.newArray(nodeCount); + var nodeIdxProviderArray = HugeAtomicLongArray.of(nodeCount, ParalleLongPageCreator.passThrough(concurrency)); + idxUpperBoundPerComponent.copyTo(nodeIdxProviderArray, nodeCount); - // fill nodesSortedByComponent with nodeId per unique index - // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes, named in their order of seq processing: + // fill nodesSortedByComponent with nodeId per component-sorted, unique index + // i.e. comp1 containing 3 nodes, comp2 containing 20 nodes, named in order of processing: // {(0, n3), (1, n2), (2, n1), (3, n23), .., (22, n4)} ParallelUtil.parallelForEachNode(nodeCount, concurrency, TerminationFlag.RUNNING_TRUE, indexId -> { long nodeId = nodeCount - indexId - 1; long componentId = components.applyAsLong(nodeId); - long coordinate = componentCoordinateArray.getAndAdd(componentId, -1); - nodesSortedByComponent.set(coordinate - 1, nodeId); + long nodeIdx = nodeIdxProviderArray.getAndAdd(componentId, -1); + nodesSortedByComponent.set(nodeIdx, nodeId); }); return nodesSortedByComponent; @@ -132,10 +134,10 @@ private final class Iterator implements PrimitiveIterator.OfLong { @Override public boolean hasNext() { if (offset < 0L) { - return runningIdx > 0 && getComponents().applyAsLong(getNodesSorted().get(runningIdx - 1)) == componentId; + return runningIdx > -1 && getComponents().applyAsLong(getNodesSorted().get(runningIdx)) == componentId; } else { - while (runningIdx > 0 && getComponents().applyAsLong(getNodesSorted().get(runningIdx - 1)) == componentId) { - if (getNodesSorted().get(runningIdx - 1) < offset) { + while (runningIdx > -1 && getComponents().applyAsLong(getNodesSorted().get(runningIdx)) == componentId) { + if (getNodesSorted().get(runningIdx) < offset) { runningIdx--; } else { return true; @@ -149,7 +151,7 @@ public long nextLong() { if (!hasNext()) { throw new NoSuchElementException(); } - return getNodesSorted().get(--runningIdx); + return getNodesSorted().get(runningIdx--); } } diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java index fb9652683d..a576df9d1f 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java @@ -41,50 +41,81 @@ class NodesSortedByComponentTest { - @Test - void shouldDetermineIndexUpperBound() { - // nodeId -> componentId - var components = new LongUnaryOperator() { - @Override - public long applyAsLong(long nodeId) { - if (nodeId < 3) { - return 0L; // size 3 - } else if (nodeId < 8) { - return 1L; // size 5 - } else if (nodeId < 12) { - return 2L; // size 4 - } else if (nodeId < 18) { - return 3L; // size 6 - } else if (nodeId < 19) { - return 4L; // size 1 - } else if (nodeId < 21) { - return 5L; // size 2 - } else { - return 6L; // size 7 - } + private LongUnaryOperator prepare7DistinctSizeComponents() { + return nodeId -> { + if (nodeId < 3) { + return 0L; // size 3 + } else if (nodeId < 8) { + return 1L; // size 5 + } else if (nodeId < 12) { + return 2L; // size 4 + } else if (nodeId < 18) { + return 3L; // size 6 + } else if (nodeId < 19) { + return 4L; // size 1 + } else if (nodeId < 21) { + return 5L; // size 2 + } else { + return 6L; // size 7 } }; + } - + @Test + void shouldDetermineIndexUpperBound() { + // nodeId -> componentId + var components = prepare7DistinctSizeComponents(); + // componentId, upperBound var idxUpperBoundPerComponent = NodesSortedByComponent.computeIndexUpperBoundPerComponent(components, 28, 4); - // we cannot infer which component follows another, but the size must match for the component + // we cannot infer which component follows another, but the range must match in size for the component Map componentPerIdxUpperBound = new HashMap<>(7); for (int i = 0; i < 7; i++) { componentPerIdxUpperBound.put(idxUpperBoundPerComponent.get(i), (long) i); } - assertEquals(0, idxUpperBoundPerComponent.get(7)); - int lowerBound = 0; + assertThat(idxUpperBoundPerComponent.get(7)).isEqualTo(0L); + int previousUpperBound = -1; for (long key : componentPerIdxUpperBound.keySet().stream().sorted().toList()) { - switch ((int) (key - lowerBound)) { - case 1: assertEquals(4, componentPerIdxUpperBound.get(key));break; - case 2: assertEquals(5, componentPerIdxUpperBound.get(key));break; - case 3: assertEquals(0, componentPerIdxUpperBound.get(key));break; - case 4: assertEquals(2, componentPerIdxUpperBound.get(key));break; - case 5: assertEquals(1, componentPerIdxUpperBound.get(key));break; - case 6: assertEquals(3, componentPerIdxUpperBound.get(key));break; - case 7: assertEquals(6, componentPerIdxUpperBound.get(key));break; + switch ((int) (key - previousUpperBound)) { + case 1: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(4);break; + case 2: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(5);break; + case 3: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(0);break; + case 4: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(2);break; + case 5: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(1);break; + case 6: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(3);break; + case 7: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(6);break; } - lowerBound = (int) (key); + previousUpperBound = (int) (key); + } + } + + @Test + void shouldComputeNodesSortedByComponent() { + // nodeId -> componentId + var components = prepare7DistinctSizeComponents(); + // componentId, upperIdx of component + var upperBoundPerComponent = HugeAtomicLongArray.of(28, ParalleLongPageCreator.passThrough(4)); + upperBoundPerComponent.set(0, 2); + upperBoundPerComponent.set(1, 7); + upperBoundPerComponent.set(2, 11); + upperBoundPerComponent.set(3, 17); + upperBoundPerComponent.set(4, 18); + upperBoundPerComponent.set(5, 20); + upperBoundPerComponent.set(6, 27); + var componentCoordinateArray = HugeAtomicLongArray.of(28, ParalleLongPageCreator.passThrough(4)); + upperBoundPerComponent.copyTo(componentCoordinateArray, 28); + + var nodesSortedByComponent = NodesSortedByComponent.computeNodesSortedByComponent(components, + componentCoordinateArray, 4); + + // nodes may occur in arbitrary order within components, but with the given assignment, nodeIds must be within + // component index bounds + assertEquals(28, nodesSortedByComponent.size()); + for (int i = 0; i < 27; i++) { + var currentComp = components.applyAsLong(nodesSortedByComponent.get(i)); + + assertThat(nodesSortedByComponent.get(i)).isGreaterThan(currentComp == 0 ? + -1 : upperBoundPerComponent.get(currentComp - 1)); + assertThat(nodesSortedByComponent.get(i)).isLessThanOrEqualTo(upperBoundPerComponent.get(currentComp)); } } @@ -115,19 +146,28 @@ public long applyAsLong(long nodeId) { var upperBoundPerComponent = HugeAtomicLongArray.of(8, ParalleLongPageCreator.passThrough(4)); // componentId, upperBound - upperBoundPerComponent.set(0, 3); - upperBoundPerComponent.set(1, 8); + upperBoundPerComponent.set(0, 2); + upperBoundPerComponent.set(1, 7); NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); Mockito.doReturn(upperBoundPerComponent).when(nodesSortedByComponentMock).getUpperBoundPerComponent(); Mockito.doReturn(nodesSorted).when(nodesSortedByComponentMock).getNodesSorted(); - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L,-1L); - Iterator iterator = nodesSortedByComponentMock.iterator(1L, -1L); + // first component + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(0L,-1L); + Iterator iterator = nodesSortedByComponentMock.iterator(0L, -1L); + for (int nodeId = 0; nodeId < 3; nodeId++) { + assertTrue(iterator.hasNext()); + assertThat(iterator.next()).isEqualTo(nodeId); + } + assertFalse(iterator.hasNext()); + // second component + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L,-1L); + iterator = nodesSortedByComponentMock.iterator(1L, -1L); for (int nodeId = 3; nodeId < 8; nodeId++) { assertTrue(iterator.hasNext()); - assertEquals(nodeId, iterator.next()); + assertThat(iterator.next()).isEqualTo(nodeId); } assertFalse(iterator.hasNext()); } @@ -141,7 +181,7 @@ void shouldRespectOffset() { ShuffleUtil.shuffleArray(nodesSorted, new SplittableRandom(92)); var upperBoundPerComponent = HugeAtomicLongArray.of(1, ParalleLongPageCreator.passThrough(4)); - upperBoundPerComponent.set(0, 20); + upperBoundPerComponent.set(0, 19); NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); From 59018c2bc46e25f01928723584a5a7d1b131eb15 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Tue, 19 Dec 2023 17:57:05 +0100 Subject: [PATCH 09/19] clean-up - remove SimilarityPairTriConsumer and redundant field - reintroduce targetNodesStream(offset) to run wo component optimization - rename test --- .../similarity/nodesim/NodeSimilarity.java | 76 ++++++++---------- .../nodesim/NodesSortedByComponent.java | 2 +- .../nodesim/SimilarityPairTriConsumer.java | 27 ------- .../nodesim/NodeSimilarityTest.java | 80 ++++++++++--------- 4 files changed, 77 insertions(+), 108 deletions(-) delete mode 100644 algo/src/main/java/org/neo4j/gds/similarity/nodesim/SimilarityPairTriConsumer.java diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index eba4a4f8ca..873a1ee92f 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -68,7 +68,6 @@ public class NodeSimilarity extends Algorithm { private HugeObjectArray neighbors; private HugeObjectArray weights; private LongUnaryOperator components; - private SimilarityPairTriConsumer similarityConsumer; private final boolean weighted; private BiFunction targetNodeStream; @@ -204,10 +203,6 @@ private void prepare() { progressTracker.beginSubTask(); components = initComponents(); - var nodesByComponent = new NodesSortedByComponent(components, graph.nodeCount(), concurrency); - targetNodeStream = (componentId, offset) -> StreamSupport.longStream( - nodesByComponent.spliterator(componentId, offset), true) - .filter(targetNodes::get); if (config.runWCC()) { progressTracker.beginSubTask(); @@ -275,31 +270,35 @@ private Stream computeParallel() { private LongUnaryOperator initComponents() { if (!config.isEnableComponentOptimization()) { // considering everything as within the same component - similarityConsumer = this::computeSimilarityForSingleComponent; + targetNodeStream = (componentId, offset) -> targetNodesStream(offset); return n -> 0; } - similarityConsumer = this::computeSimilarityForComponents; - + LongUnaryOperator components; if (config.componentProperty() != null) { // extract component info from property NodePropertyValues nodeProperties = graph.nodeProperties(config.componentProperty()); - return initComponentIdMapping(graph, nodeProperties::longValue); + components = initComponentIdMapping(graph, nodeProperties::longValue); + } else { + // run WCC to determine components + progressTracker.beginSubTask(); + WccStreamConfig wccConfig = ImmutableWccStreamConfig + .builder() + .concurrency(concurrency) + .addAllRelationshipTypes(config.relationshipTypes()) + .addAllNodeLabels(config.nodeLabels()) + .build(); + + Wcc wcc = new WccAlgorithmFactory<>().build(graph, wccConfig, ProgressTracker.NULL_TRACKER); + DisjointSetStruct disjointSets = wcc.compute(); + components = initComponentIdMapping(graph, disjointSets::setIdOf); + progressTracker.endSubTask(); } - // run WCC to determine components - progressTracker.beginSubTask(); - WccStreamConfig wccConfig = ImmutableWccStreamConfig - .builder() - .concurrency(concurrency) - .addAllRelationshipTypes(config.relationshipTypes()) - .addAllNodeLabels(config.nodeLabels()) - .build(); - - Wcc wcc = new WccAlgorithmFactory<>().build(graph, wccConfig, ProgressTracker.NULL_TRACKER); - DisjointSetStruct disjointSets = wcc.compute(); - var components = initComponentIdMapping(graph, disjointSets::setIdOf); - progressTracker.endSubTask(); + var nodesByComponent = new NodesSortedByComponent(components, graph.nodeCount(), concurrency); + targetNodeStream = (componentId, offset) -> StreamSupport.longStream( + nodesByComponent.spliterator(componentId, offset), true) + .filter(targetNodes::get); return components; } @@ -333,16 +332,16 @@ private TopKMap computeTopKMap() { .forEach(sourceNodeId -> { if (sourceNodeFilter.equals(NodeFilter.noOp)) { targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) - .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, + .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, (source, target, similarity) -> { topKMap.put(source, target, similarity); topKMap.put(target, source, similarity); } )); } else { - targetNodeStream.apply(components.applyAsLong(sourceNodeId), -1L) + targetNodeStream.apply(components.applyAsLong(sourceNodeId), 0L) .filter(targetNodeId -> sourceNodeId != targetNodeId) - .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, topKMap::put)); + .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, topKMap::put)); } }); progressTracker.endSubTask(); @@ -369,9 +368,9 @@ private TopKMap computeTopKMapParallel() { // into these queues is not considered to be thread-safe. // Hence, we need to ensure that down the stream, exactly one queue // within the TopKMap processes all pairs for a single node. - targetNodeStream.apply(components.applyAsLong(sourceNodeId), -1L) + targetNodeStream.apply(components.applyAsLong(sourceNodeId), 0L) .filter(targetNodeId -> sourceNodeId != targetNodeId) - .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, topKMap::put)) + .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, topKMap::put)) ) ); @@ -387,11 +386,11 @@ private Stream computeTopN() { .forEach(sourceNodeId -> { if (sourceNodeFilter.equals(NodeFilter.noOp)) { targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) - .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, topNList::add)); + .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, topNList::add)); } else { - targetNodeStream.apply(components.applyAsLong(sourceNodeId), -1L) + targetNodeStream.apply(components.applyAsLong(sourceNodeId), 0L) .filter(targetNodeId -> sourceNodeId != targetNodeId) - .forEach(targetNodeId -> similarityConsumer.accept(sourceNodeId, targetNodeId, topNList::add)); + .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, topNList::add)); } }); @@ -413,6 +412,10 @@ private LongStream sourceNodesStream() { return sourceNodesStream(0); } + private LongStream targetNodesStream(long offset) { + return new SetBitsIterable(targetNodes, offset).stream(); + } + private LongStream loggableAndTerminatableSourceNodeStream() { return checkProgress(sourceNodesStream()); } @@ -421,7 +424,7 @@ private Stream computeSimilaritiesForNode(long sourceNodeId) { return targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) .mapToObj(targetNodeId -> { var resultHolder = new SimilarityResult[]{null}; - similarityConsumer.accept( + computeSimilarityFor( sourceNodeId, targetNodeId, (source, target, similarity) -> resultHolder[0] = new SimilarityResult(source, target, similarity) @@ -454,7 +457,7 @@ interface SimilarityConsumer { void accept(long sourceNodeId, long targetNodeId, double similarity); } - private void computeSimilarityForSingleComponent(long sourceNodeId, long targetNodeId, SimilarityConsumer consumer) { + private void computeSimilarityFor(long sourceNodeId, long targetNodeId, SimilarityConsumer consumer) { double similarity; var sourceNodeNeighbors = neighbors.get(sourceNodeId); var targetNodeNeighbors = neighbors.get(targetNodeId); @@ -470,15 +473,6 @@ private void computeSimilarityForSingleComponent(long sourceNodeId, long targetN } } - private void computeSimilarityForComponents(long sourceNodeId, long targetNodeId, SimilarityConsumer consumer) { - if (components.applyAsLong(sourceNodeId) != components.applyAsLong(targetNodeId)) { - consumer.accept(sourceNodeId, targetNodeId, 0); - return; - } - - computeSimilarityForSingleComponent(sourceNodeId, targetNodeId, consumer); - } - private double computeWeightedSimilarity( long[] sourceNodeNeighbors, long[] targetNodeNeighbors, diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java index 247e447d09..df0c4d2719 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java @@ -133,7 +133,7 @@ private final class Iterator implements PrimitiveIterator.OfLong { @Override public boolean hasNext() { - if (offset < 0L) { + if (offset < 1L) { return runningIdx > -1 && getComponents().applyAsLong(getNodesSorted().get(runningIdx)) == componentId; } else { while (runningIdx > -1 && getComponents().applyAsLong(getNodesSorted().get(runningIdx)) == componentId) { diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/SimilarityPairTriConsumer.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/SimilarityPairTriConsumer.java deleted file mode 100644 index 52a7a1bbfb..0000000000 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/SimilarityPairTriConsumer.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) "Neo4j" - * Neo4j Sweden AB [http://neo4j.com] - * - * This file is part of Neo4j. - * - * Neo4j is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -package org.neo4j.gds.similarity.nodesim; - -@FunctionalInterface -public interface SimilarityPairTriConsumer { - - void accept(long k, long v, NodeSimilarity.SimilarityConsumer similarityConsumer); - -} diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java index dc6180f7ca..724404db48 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java @@ -74,7 +74,7 @@ @GdlExtension final class NodeSimilarityTest { - // fixing idOffset to 0 as the expecatations hard-code ids + // fixing idOffset to 0 as the expectations hard-code ids @GdlGraph(graphNamePrefix = "natural", orientation = NATURAL, idOffset = 0) @GdlGraph(graphNamePrefix = "reverse", orientation = REVERSE, idOffset = 0) @GdlGraph(graphNamePrefix = "undirected", orientation = UNDIRECTED, idOffset = 0) @@ -884,6 +884,46 @@ void shouldLogProgress(int concurrency) { ); } + @Test + void shouldLogProgressForWccOptimization() { + var graph = naturalGraph; + var config = ImmutableNodeSimilarityStreamConfig.builder() + .isEnableComponentOptimization(true) + .concurrency(4) + .build(); + var progressTask = new NodeSimilarityFactory<>().progressTask(graph, config); + TestLog log = Neo4jProxy.testLog(); + var progressTracker = new TestProgressTracker( + progressTask, + log, + 4, + EmptyTaskRegistryFactory.INSTANCE + ); + + NodeSimilarity.create( + graph, + config, + DefaultPool.INSTANCE, + progressTracker + ).compute().streamResult().count(); + + List progresses = progressTracker.getProgresses(); + + // Should log progress for prepare and actual comparisons + assertThat(progresses).hasSize(6); + + assertThat(log.getMessages(INFO)) + .extracting(removingThreadId()) + .contains( + "NodeSimilarity :: prepare :: WCC :: Start", + "NodeSimilarity :: prepare :: WCC :: Finished", + "NodeSimilarity :: prepare :: Start", + "NodeSimilarity :: prepare :: Finished", + "NodeSimilarity :: compare node pairs :: Start", + "NodeSimilarity :: compare node pairs :: Finished" + ); + } + @Test void shouldGiveCorrectResultsWithOverlap() { var gdl = @@ -984,42 +1024,4 @@ void shouldThrowIfUpperIsSmaller() { assertThatThrownBy(streamConfigBuilder().upperDegreeCutoff(3).degreeCutoff(4)::build) .hasMessageContaining("upperDegreeCutoff cannot be smaller than degreeCutoff"); } - - - @Test - void shouldOptimizeForDistinctComponents() { - var graph = naturalGraph; - var config = ImmutableNodeSimilarityStreamConfig.builder().isEnableComponentOptimization(true).degreeCutoff(0).concurrency(4).build(); - var progressTask = new NodeSimilarityFactory<>().progressTask(graph, config); - TestLog log = Neo4jProxy.testLog(); - var progressTracker = new TestProgressTracker( - progressTask, - log, - 4, - EmptyTaskRegistryFactory.INSTANCE - ); - - NodeSimilarity.create( - graph, - config, - DefaultPool.INSTANCE, - progressTracker - ).compute().streamResult().count(); - - List progresses = progressTracker.getProgresses(); - - // Should log progress for prepare and actual comparisons - assertThat(progresses).hasSize(6); - - assertThat(log.getMessages(INFO)) - .extracting(removingThreadId()) - .contains( - "NodeSimilarity :: prepare :: WCC :: Start", - "NodeSimilarity :: prepare :: WCC :: Finished", - "NodeSimilarity :: prepare :: Start", - "NodeSimilarity :: prepare :: Finished", - "NodeSimilarity :: compare node pairs :: Start", - "NodeSimilarity :: compare node pairs :: Finished" - ); - } } From 8641c76271a40b069020e85e0529d34bc8b28d4f Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Tue, 19 Dec 2023 17:59:07 +0100 Subject: [PATCH 10/19] test for component optimization with property set --- .../ComponentPropertyNodeSimilarityTest.java | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java new file mode 100644 index 0000000000..4ff275f1d9 --- /dev/null +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java @@ -0,0 +1,157 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.gds.similarity.nodesim; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.neo4j.gds.Orientation; +import org.neo4j.gds.api.Graph; +import org.neo4j.gds.core.concurrency.DefaultPool; +import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker; +import org.neo4j.gds.extension.GdlExtension; +import org.neo4j.gds.extension.GdlGraph; +import org.neo4j.gds.extension.Inject; +import org.neo4j.gds.extension.TestGraph; +import org.neo4j.gds.similarity.SimilarityResult; + +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.params.provider.Arguments.arguments; +import static org.neo4j.gds.Orientation.NATURAL; +import static org.neo4j.gds.Orientation.REVERSE; +import static org.neo4j.gds.TestSupport.crossArguments; +import static org.neo4j.gds.TestSupport.toArguments; +import static org.neo4j.gds.utils.StringFormatting.formatWithLocale; + +@GdlExtension +public class ComponentPropertyNodeSimilarityTest { + + @GdlGraph(graphNamePrefix = "natural", orientation = NATURAL, idOffset = 0) + @GdlGraph(graphNamePrefix = "reverse", orientation = REVERSE, idOffset = 0) + private static final String DB_CYPHER = + "CREATE" + + " (a:Person {compid: 0})" + + ", (b:Person {compid: 0})" + + ", (c:Person {compid: 0})" + + ", (d:Person {compid: 0})" + + ", (e:Person {compid: 1})" + + ", (i1:Item {compid: 0})" + + ", (i2:Item {compid: 0})" + + ", (i3:Item {compid: 0})" + + ", (i4:Item {compid: 1})" + + ", (i5:Item {compid: 1})" + + ", (a)-[:LIKES {prop: 1.0}]->(i1)" + + ", (a)-[:LIKES {prop: 1.0}]->(i2)" + + ", (a)-[:LIKES {prop: 2.0}]->(i3)" + + ", (b)-[:LIKES {prop: 1.0}]->(i1)" + + ", (b)-[:LIKES {prop: 1.0}]->(i2)" + + ", (c)-[:LIKES {prop: 1.0}]->(i3)" + + ", (d)-[:LIKES {prop: 0.5}]->(i1)" + + ", (d)-[:LIKES {prop: 1.0}]->(i2)" + + ", (d)-[:LIKES {prop: 1.0}]->(i3)" + + ", (e)-[:LIKES {prop: 1.0}]->(i4)" + + ", (e)-[:LIKES {prop: 1.0}]->(i5)"; + private static final Collection EXPECTED_OUTGOING_COMP_OPT = new HashSet<>(); + private static final Collection EXPECTED_INCOMING_COMP_OPT = new HashSet<>(); + + static { + EXPECTED_OUTGOING_COMP_OPT.add(resultString(0, 1, 2 / 3.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(0, 2, 1 / 3.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(0, 3, 1.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(1, 2, 0.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(1, 3, 2 / 3.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(2, 3, 1 / 3.0)); + // Add results in reverse direction because topK + EXPECTED_OUTGOING_COMP_OPT.add(resultString(1, 0, 2 / 3.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(2, 0, 1 / 3.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(3, 0, 1.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(2, 1, 0.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(3, 1, 2 / 3.0)); + EXPECTED_OUTGOING_COMP_OPT.add(resultString(3, 2, 1 / 3.0)); + + EXPECTED_INCOMING_COMP_OPT.add(resultString(9, 8, 1.0)); + EXPECTED_INCOMING_COMP_OPT.add(resultString(5, 6, 1.0)); + EXPECTED_INCOMING_COMP_OPT.add(resultString(5, 7, 1 / 2.0)); + EXPECTED_INCOMING_COMP_OPT.add(resultString(6, 7, 1 / 2.0)); + // Add results in reverse direction because topK + EXPECTED_INCOMING_COMP_OPT.add(resultString(8, 9, 1.0)); + EXPECTED_INCOMING_COMP_OPT.add(resultString(6, 5, 1.0)); + EXPECTED_INCOMING_COMP_OPT.add(resultString(7, 5, 1 / 2.0)); + EXPECTED_INCOMING_COMP_OPT.add(resultString(7, 6, 1 / 2.0)); + } + + @Inject + private TestGraph naturalGraph; + @Inject + private TestGraph reverseGraph; + + private static String resultString(long node1, long node2, double similarity) { + return formatWithLocale("%d,%d %f", node1, node2, similarity); + } + + private static String resultString(SimilarityResult result) { + return resultString(result.node1, result.node2, result.similarity); + } + + private static Stream concurrencies() { + return Stream.of(1, 4); + } + + static Stream supportedLoadAndComputeDirections() { + Stream directions = Stream.of( + arguments(NATURAL), + arguments(REVERSE) + ); + return crossArguments(() -> directions, toArguments(ComponentPropertyNodeSimilarityTest::concurrencies)); + } + + @ParameterizedTest(name = "orientation: {0}, concurrency: {1}") + @MethodSource("supportedLoadAndComputeDirections") + void shouldOptimizeForDistinctComponentsProperty(Orientation orientation, int concurrency) { + Graph graph = orientation == NATURAL ? naturalGraph : reverseGraph; + var config = ImmutableNodeSimilarityStreamConfig.builder() + .similarityCutoff(0.0) + .isEnableComponentOptimization(true) + .componentProperty("compid") + .concurrency(concurrency) + .build(); + + var nodeSimilarity = NodeSimilarity.create( + graph, + config, + DefaultPool.INSTANCE, + ProgressTracker.NULL_TRACKER + ); + + Set result = nodeSimilarity + .compute() + .streamResult() + .map(ComponentPropertyNodeSimilarityTest::resultString) + .collect(Collectors.toSet()); + + assertEquals(orientation == REVERSE ? EXPECTED_INCOMING_COMP_OPT : EXPECTED_OUTGOING_COMP_OPT, result); + } +} From f6f5c4919bae16ca7c469587718fe8750f520932 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Wed, 20 Dec 2023 10:24:58 +0100 Subject: [PATCH 11/19] revert expected test results extended for wcc --- .../org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java index 724404db48..d48826d02a 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java @@ -212,8 +212,6 @@ private static Stream concurrencies() { EXPECTED_OUTGOING_TOP_K_1.add(resultString(0, 3, 1.0)); EXPECTED_OUTGOING_TOP_K_1.add(resultString(1, 0, 2 / 3.0)); EXPECTED_OUTGOING_TOP_K_1.add(resultString(2, 0, 1 / 3.0)); - EXPECTED_OUTGOING_TOP_K_1.add(resultString(1, 3, 2 / 3.0)); - EXPECTED_OUTGOING_TOP_K_1.add(resultString(2, 3, 1 / 3.0)); EXPECTED_OUTGOING_TOP_K_1.add(resultString(3, 0, 1.0)); EXPECTED_OUTGOING_SIMILARITY_CUTOFF.add(resultString(0, 1, 2 / 3.0)); @@ -257,7 +255,6 @@ private static Stream concurrencies() { EXPECTED_INCOMING_TOP_K_1.add(resultString(4, 5, 1.0)); EXPECTED_INCOMING_TOP_K_1.add(resultString(5, 4, 1.0)); EXPECTED_INCOMING_TOP_K_1.add(resultString(6, 4, 1 / 2.0)); - EXPECTED_INCOMING_TOP_K_1.add(resultString(6, 5, 1 / 2.0)); EXPECTED_INCOMING_SIMILARITY_CUTOFF.add(resultString(4, 5, 1.0)); EXPECTED_INCOMING_SIMILARITY_CUTOFF.add(resultString(4, 6, 1 / 2.0)); @@ -397,7 +394,7 @@ void shouldComputeTopKForSupportedDirections(Orientation orientation, int concur .map(NodeSimilarityTest::resultString) .collect(Collectors.toSet()); - assertThat(result).containsAnyElementsOf(orientation == REVERSE ? EXPECTED_INCOMING_TOP_K_1 : EXPECTED_OUTGOING_TOP_K_1); + assertEquals(orientation == REVERSE ? EXPECTED_INCOMING_TOP_K_1 : EXPECTED_OUTGOING_TOP_K_1, result); } @ParameterizedTest(name = "orientation: {0}, concurrency: {1}") From 408dc260b3e75a18b5604bfff70bff9ec89e57a5 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Wed, 20 Dec 2023 10:40:37 +0100 Subject: [PATCH 12/19] preserve java 11 compatibility --- .../gds/similarity/nodesim/NodesSortedByComponentTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java index a576df9d1f..3ab1c9c4a7 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java @@ -33,6 +33,7 @@ import java.util.Set; import java.util.SplittableRandom; import java.util.function.LongUnaryOperator; +import java.util.stream.Collectors; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -74,7 +75,7 @@ void shouldDetermineIndexUpperBound() { } assertThat(idxUpperBoundPerComponent.get(7)).isEqualTo(0L); int previousUpperBound = -1; - for (long key : componentPerIdxUpperBound.keySet().stream().sorted().toList()) { + for (long key : componentPerIdxUpperBound.keySet().stream().sorted().collect(Collectors.toList())) { switch ((int) (key - previousUpperBound)) { case 1: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(4);break; case 2: assertThat(componentPerIdxUpperBound.get(key)).isEqualTo(5);break; From c2046dcd4485b674ebcc272713ab7daf1e1a4a19 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Wed, 20 Dec 2023 12:08:54 +0100 Subject: [PATCH 13/19] test valid sorting for inconsecutive ids --- .../nodesim/NodesSortedByComponentTest.java | 86 +++++++++++++++++-- 1 file changed, 78 insertions(+), 8 deletions(-) diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java index 3ab1c9c4a7..fd731805cf 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java @@ -26,9 +26,12 @@ import org.neo4j.gds.core.utils.paged.ParalleLongPageCreator; import org.neo4j.gds.core.utils.shuffle.ShuffleUtil; +import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.SplittableRandom; @@ -102,16 +105,14 @@ void shouldComputeNodesSortedByComponent() { upperBoundPerComponent.set(4, 18); upperBoundPerComponent.set(5, 20); upperBoundPerComponent.set(6, 27); - var componentCoordinateArray = HugeAtomicLongArray.of(28, ParalleLongPageCreator.passThrough(4)); - upperBoundPerComponent.copyTo(componentCoordinateArray, 28); var nodesSortedByComponent = NodesSortedByComponent.computeNodesSortedByComponent(components, - componentCoordinateArray, 4); + upperBoundPerComponent, 4); // nodes may occur in arbitrary order within components, but with the given assignment, nodeIds must be within // component index bounds assertEquals(28, nodesSortedByComponent.size()); - for (int i = 0; i < 27; i++) { + for (int i = 0; i < 28; i++) { var currentComp = components.applyAsLong(nodesSortedByComponent.get(i)); assertThat(nodesSortedByComponent.get(i)).isGreaterThan(currentComp == 0 ? @@ -120,6 +121,75 @@ void shouldComputeNodesSortedByComponent() { } } + @Test + void shouldComputeNodesSortedByComponentsNotConsecutive() { + // nodeId -> componentId + LongUnaryOperator components = nodeId -> { + if (nodeId < 4) { + return 3; // size 4 + } else if (nodeId < 6) { + return 5; // size 2 + } else { + return 1; // size 5 + } + }; + // componentId, upperIdx of component + var upperBoundPerComponent = HugeAtomicLongArray.of(11, ParalleLongPageCreator.passThrough(4)); + upperBoundPerComponent.set(3, 3); + upperBoundPerComponent.set(5, 10); + upperBoundPerComponent.set(1, 8); + + var nodesSortedByComponent = NodesSortedByComponent.computeNodesSortedByComponent(components, + upperBoundPerComponent, 4); + + // nodes may occur in arbitrary order within components, but with the given assignment, nodeIds must be within + // component index bounds + assertEquals(11, nodesSortedByComponent.size()); + Collection values = new ArrayList<>(); + int end = 0; + while (end < 11) { + int start = end; + if (nodesSortedByComponent.get(start) < 4) { + // next 4 nodes must be of component 3 + end += 4; + values.addAll(List.of(0L, 1L, 2L, 3L)); + } else if (nodesSortedByComponent.get(start) < 6) { + // next 2 nodes must be of component 5 + end += 2; + values.addAll(List.of(4L, 5L)); + } else { + // next 5 nodes must be of component 1 + end += 5; + values.addAll(List.of(6L, 7L, 8L, 9L, 10L)); + } + for (int i = start; i < end; i++) { + long nodeId = nodesSortedByComponent.get(i); + assertTrue(values.remove(nodeId)); + } + } + + NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); + Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); + Mockito.doReturn(upperBoundPerComponent).when(nodesSortedByComponentMock).getUpperBoundPerComponent(); + Mockito.doReturn(nodesSortedByComponent).when(nodesSortedByComponentMock).getNodesSorted(); + + // no component with id 0 + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(0L,0L); + Iterator iterator = nodesSortedByComponentMock.iterator(0L, 0L); + assertFalse(iterator.hasNext()); + + // 5 nodes for component with id 1 + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L,0L); + iterator = nodesSortedByComponentMock.iterator(1L, 0L); + values.addAll(List.of(6L, 7L, 8L, 9L, 10L)); + for (int i = 0; i < 5; i++) { + assertTrue(iterator.hasNext()); + long nodeId = iterator.next(); + assertTrue(values.remove(nodeId)); + } + assertFalse(iterator.hasNext()); + } + @Test void shouldReturnNodesForComponent() { // nodeId -> componentId @@ -156,16 +226,16 @@ public long applyAsLong(long nodeId) { Mockito.doReturn(nodesSorted).when(nodesSortedByComponentMock).getNodesSorted(); // first component - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(0L,-1L); - Iterator iterator = nodesSortedByComponentMock.iterator(0L, -1L); + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(0L,0L); + Iterator iterator = nodesSortedByComponentMock.iterator(0L, 0L); for (int nodeId = 0; nodeId < 3; nodeId++) { assertTrue(iterator.hasNext()); assertThat(iterator.next()).isEqualTo(nodeId); } assertFalse(iterator.hasNext()); // second component - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L,-1L); - iterator = nodesSortedByComponentMock.iterator(1L, -1L); + Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L,0L); + iterator = nodesSortedByComponentMock.iterator(1L, 0L); for (int nodeId = 3; nodeId < 8; nodeId++) { assertTrue(iterator.hasNext()); assertThat(iterator.next()).isEqualTo(nodeId); From 615fc94fbf1634da7779e23716df90f23311ef06 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Wed, 20 Dec 2023 17:19:33 +0100 Subject: [PATCH 14/19] component id mapping only for provided property --- .../FilteredNodeSimilarityFactory.java | 9 ++- .../similarity/nodesim/NodeSimilarity.java | 2 +- .../nodesim/NodeSimilarityFactory.java | 9 ++- .../ComponentPropertyNodeSimilarityTest.java | 59 +++++++++++++++++++ .../nodesim/NodeSimilarityTest.java | 6 +- 5 files changed, 78 insertions(+), 7 deletions(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java b/algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java index 95c0b5bd4f..f3a519df39 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java @@ -24,6 +24,7 @@ import org.neo4j.gds.api.Graph; import org.neo4j.gds.collections.ha.HugeLongArray; import org.neo4j.gds.collections.ha.HugeObjectArray; +import org.neo4j.gds.collections.haa.HugeAtomicLongArray; import org.neo4j.gds.core.concurrency.DefaultPool; import org.neo4j.gds.core.utils.mem.MemoryEstimation; import org.neo4j.gds.core.utils.mem.MemoryEstimations; @@ -74,7 +75,6 @@ public MemoryEstimation memoryEstimation(CONFIG config) { int topK = Math.abs(config.normalizedK()); MemoryEstimations.Builder builder = MemoryEstimations.builder(NodeSimilarity.class.getSimpleName()) - .perNode("components", HugeLongArray::memoryEstimation) .perNode("node filter", nodeCount -> sizeOfLongArray(BitSet.bits2words(nodeCount))) .add( "vectors", @@ -97,6 +97,13 @@ public MemoryEstimation memoryEstimation(CONFIG config) { .rangePerNode("array", nodeCount -> MemoryRange.of(0, nodeCount * averageVectorSize)) .build(); })); + if (config.isEnableComponentOptimization()) { + builder.perNode("nodes sorted by component", HugeLongArray::memoryEstimation); + builder.perNode("upper bound per component", HugeAtomicLongArray::memoryEstimation); + } + if (config.isEnableComponentOptimization() && config.componentProperty() != null) { + builder.perNode("component mapping", HugeLongArray::memoryEstimation); + } if (config.computeToGraph() && !config.hasTopK()) { builder.add( "similarity graph", diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index 873a1ee92f..e9ce177f5e 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -291,7 +291,7 @@ private LongUnaryOperator initComponents() { Wcc wcc = new WccAlgorithmFactory<>().build(graph, wccConfig, ProgressTracker.NULL_TRACKER); DisjointSetStruct disjointSets = wcc.compute(); - components = initComponentIdMapping(graph, disjointSets::setIdOf); + components = disjointSets::setIdOf; progressTracker.endSubTask(); } diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java index ca51f5bbff..9698a9e716 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java @@ -24,6 +24,7 @@ import org.neo4j.gds.api.Graph; import org.neo4j.gds.collections.ha.HugeLongArray; import org.neo4j.gds.collections.ha.HugeObjectArray; +import org.neo4j.gds.collections.haa.HugeAtomicLongArray; import org.neo4j.gds.core.concurrency.DefaultPool; import org.neo4j.gds.core.utils.mem.MemoryEstimation; import org.neo4j.gds.core.utils.mem.MemoryEstimations; @@ -69,7 +70,6 @@ public MemoryEstimation memoryEstimation(CONFIG config) { int topK = Math.abs(config.normalizedK()); MemoryEstimations.Builder builder = MemoryEstimations.builder(NodeSimilarity.class.getSimpleName()) - .perNode("components", HugeLongArray::memoryEstimation) .perNode("node filter", nodeCount -> sizeOfLongArray(BitSet.bits2words(nodeCount))) .add( "vectors", @@ -92,6 +92,13 @@ public MemoryEstimation memoryEstimation(CONFIG config) { .rangePerNode("array", nodeCount -> MemoryRange.of(0, nodeCount * averageVectorSize)) .build(); })); + if (config.isEnableComponentOptimization()) { + builder.perNode("nodes sorted by component", HugeLongArray::memoryEstimation); + builder.perNode("upper bound per component", HugeAtomicLongArray::memoryEstimation); + } + if (config.isEnableComponentOptimization() && config.componentProperty() != null) { + builder.perNode("component mapping", HugeLongArray::memoryEstimation); + } if (config.computeToGraph() && !config.hasTopK()) { builder.add( "similarity graph", diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java index 4ff275f1d9..6f13b9f595 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java @@ -22,9 +22,15 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import org.neo4j.gds.Orientation; import org.neo4j.gds.api.Graph; +import org.neo4j.gds.core.GraphDimensions; +import org.neo4j.gds.core.ImmutableGraphDimensions; import org.neo4j.gds.core.concurrency.DefaultPool; +import org.neo4j.gds.core.utils.mem.MemoryEstimations; +import org.neo4j.gds.core.utils.mem.MemoryRange; +import org.neo4j.gds.core.utils.mem.MemoryTree; import org.neo4j.gds.core.utils.progress.tasks.ProgressTracker; import org.neo4j.gds.extension.GdlExtension; import org.neo4j.gds.extension.GdlGraph; @@ -44,6 +50,7 @@ import static org.neo4j.gds.Orientation.REVERSE; import static org.neo4j.gds.TestSupport.crossArguments; import static org.neo4j.gds.TestSupport.toArguments; +import static org.neo4j.gds.similarity.nodesim.NodeSimilarityBaseConfig.TOP_K_DEFAULT; import static org.neo4j.gds.utils.StringFormatting.formatWithLocale; @GdlExtension @@ -128,6 +135,58 @@ static Stream supportedLoadAndComputeDirections() { return crossArguments(() -> directions, toArguments(ComponentPropertyNodeSimilarityTest::concurrencies)); } + @ParameterizedTest(name = "componentProperty = {0}") + @ValueSource(booleans = {true, false}) + void shouldComputeMemrecWithOrWithoutComponentMapping(boolean componentPropertySet) { + GraphDimensions dimensions = ImmutableGraphDimensions.builder() + .nodeCount(1_000_000) + .relCountUpperBound(5_000_000) + .build(); + + NodeSimilarityWriteConfig config = ImmutableNodeSimilarityWriteConfig + .builder() + .similarityCutoff(0.0) + .topK(TOP_K_DEFAULT) + .writeProperty("writeProperty") + .writeRelationshipType("writeRelationshipType") + .isEnableComponentOptimization(true) + .componentProperty(componentPropertySet ? "compid" : null) + .build(); + + MemoryTree actual = new NodeSimilarityFactory<>().memoryEstimation(config).estimate(dimensions, 1); + + long nodeFilterRangeMin = 125_016L; + long nodeFilterRangeMax = 125_016L; + MemoryRange nodeFilterRange = MemoryRange.of(nodeFilterRangeMin, nodeFilterRangeMax); + + long vectorsRangeMin = 56_000_016L; + long vectorsRangeMax = 56_000_016L; + MemoryRange vectorsRange = MemoryRange.of(vectorsRangeMin, vectorsRangeMax); + + long weightsRangeMin = 16L; + long weightsRangeMax = 56_000_016L; + MemoryRange weightsRange = MemoryRange.of(weightsRangeMin, weightsRangeMax); + + MemoryEstimations.Builder builder = MemoryEstimations.builder() + .fixed("upper bound per component", 8000040) + .fixed("nodes sorted by component", 8000040) + .fixed("node filter", nodeFilterRange) + .fixed("vectors", vectorsRange) + .fixed("weights", weightsRange) + .fixed("similarityComputer", 8); + if (componentPropertySet) { + builder.fixed("component mapping", 8000040); + } + + long topKMapRangeMin = 248_000_016L; + long topKMapRangeMax = 248_000_016L; + builder.fixed("topK map", MemoryRange.of(topKMapRangeMin, topKMapRangeMax)); + + MemoryTree expected = builder.build().estimate(dimensions, 1); + + assertEquals(expected.memoryUsage(), actual.memoryUsage()); + } + @ParameterizedTest(name = "orientation: {0}, concurrency: {1}") @MethodSource("supportedLoadAndComputeDirections") void shouldOptimizeForDistinctComponentsProperty(Orientation orientation, int concurrency) { diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java index d48826d02a..54603e8b3c 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java @@ -685,7 +685,6 @@ void shouldComputeMemrec(int topK) { MemoryRange weightsRange = MemoryRange.of(weightsRangeMin, weightsRangeMax); MemoryEstimations.Builder builder = MemoryEstimations.builder() - .fixed("components", 8000040) .fixed("node filter", nodeFilterRange) .fixed("vectors", vectorsRange) .fixed("weights", weightsRange) @@ -743,7 +742,6 @@ void shouldComputeMemrecWithTop(int topK) { MemoryRange topNListRange = MemoryRange.of(topNListMin, topNListMax); MemoryEstimations.Builder builder = MemoryEstimations.builder() - .fixed("components", 8000040) .fixed("node filter", nodeFilterRange) .fixed("vectors", vectorsRange) .fixed("weights", weightsRange) @@ -784,8 +782,8 @@ void shouldComputeMemrecWithTopKAndTopNGreaterThanNodeCount() { MemoryTree actual = new NodeSimilarityFactory<>().memoryEstimation(config).estimate(dimensions, 1); - assertEquals(571432, actual.memoryUsage().min); - assertEquals(733032, actual.memoryUsage().max); + assertEquals(570592, actual.memoryUsage().min); + assertEquals(732192, actual.memoryUsage().max); } From 217c003263e59d9a2b5ff0f32cd7a424114d1079 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Wed, 20 Dec 2023 17:21:37 +0100 Subject: [PATCH 15/19] adjust documentation --- .../algorithms/node-similarity/specific-configuration.adoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc b/doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc index 99ee46363a..ee349d95b3 100644 --- a/doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc +++ b/doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc @@ -21,3 +21,5 @@ If unspecified, the algorithm runs unweighted. | similarityMetric | String | JACCARD | yes | The metric used to compute similarity. Can be either `JACCARD`, `OVERLAP` or `COSINE`. +| [[is-enable-component-optimization]] isEnableComponentOptimization | Boolean | false | yes | If enabled applies an optimization which can increase performance for multi-component graphs. Makes use of the fact that nodes of distinct components always have a similarity of 0. If not already provided through xref:#component-property [componentProperty], internally runs xref:algorithms/wcc.adoc[WCC]. +| [[component-property]] componentProperty | String | null | yes | Name of the pre-computed node property to use for enabled xref:#is-enable-component-optimization [component optimization] in case pre-computed values are available. From 8ed20acd5a6956ab79a544d38a52c6b23365e78a Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Thu, 21 Dec 2023 11:49:16 +0100 Subject: [PATCH 16/19] refactor rename config variable --- .../FilteredNodeSimilarityFactory.java | 4 ++-- .../gds/similarity/nodesim/NodeSimilarity.java | 2 +- .../nodesim/NodeSimilarityBaseConfig.java | 13 +++++-------- .../similarity/nodesim/NodeSimilarityFactory.java | 4 ++-- .../ComponentPropertyNodeSimilarityTest.java | 4 ++-- .../gds/similarity/nodesim/NodeSimilarityTest.java | 2 +- .../node-similarity/specific-configuration.adoc | 4 ++-- 7 files changed, 15 insertions(+), 18 deletions(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java b/algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java index f3a519df39..710de17fe9 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/filterednodesim/FilteredNodeSimilarityFactory.java @@ -97,11 +97,11 @@ public MemoryEstimation memoryEstimation(CONFIG config) { .rangePerNode("array", nodeCount -> MemoryRange.of(0, nodeCount * averageVectorSize)) .build(); })); - if (config.isEnableComponentOptimization()) { + if (config.considerComponents()) { builder.perNode("nodes sorted by component", HugeLongArray::memoryEstimation); builder.perNode("upper bound per component", HugeAtomicLongArray::memoryEstimation); } - if (config.isEnableComponentOptimization() && config.componentProperty() != null) { + if (config.considerComponents() && config.componentProperty() != null) { builder.perNode("component mapping", HugeLongArray::memoryEstimation); } if (config.computeToGraph() && !config.hasTopK()) { diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index e9ce177f5e..ac93e2a07b 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -268,7 +268,7 @@ private Stream computeParallel() { } private LongUnaryOperator initComponents() { - if (!config.isEnableComponentOptimization()) { + if (!config.considerComponents()) { // considering everything as within the same component targetNodeStream = (componentId, offset) -> targetNodesStream(offset); return n -> 0; diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityBaseConfig.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityBaseConfig.java index dd18f23245..74d3df8383 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityBaseConfig.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityBaseConfig.java @@ -52,8 +52,8 @@ public interface NodeSimilarityBaseConfig extends AlgoBaseConfig, RelationshipWe String COMPONENT_PROPERTY_KEY = "componentProperty"; - String ENABLE_COMPONENT_OPTIMIZATION_KEY = "enableComponentOptimization"; - boolean ENABLE_COMPONENT_OPTIMIZATION = false; + String CONSIDER_COMPONENTS_KEY = "considerComponents"; + boolean CONSIDER_COMPONENTS = false; @Value.Default @Configuration.DoubleRange(min = 0, max = 1) @@ -114,8 +114,8 @@ default int bottomN() { default @Nullable String componentProperty() { return null; } @Value.Default - @Configuration.Key(ENABLE_COMPONENT_OPTIMIZATION_KEY) - default boolean isEnableComponentOptimization() { return ENABLE_COMPONENT_OPTIMIZATION; } + @Configuration.Key(CONSIDER_COMPONENTS_KEY) + default boolean considerComponents() { return CONSIDER_COMPONENTS; } @Configuration.Ignore @Value.Derived @@ -203,10 +203,7 @@ default void validateComponentProperty( @Value.Derived default boolean runWCC() { - if (isEnableComponentOptimization() && componentProperty() == null) { - return true; - } - return false; + return considerComponents() && componentProperty() == null; } } diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java index 9698a9e716..eb5ff2aaae 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityFactory.java @@ -92,11 +92,11 @@ public MemoryEstimation memoryEstimation(CONFIG config) { .rangePerNode("array", nodeCount -> MemoryRange.of(0, nodeCount * averageVectorSize)) .build(); })); - if (config.isEnableComponentOptimization()) { + if (config.considerComponents()) { builder.perNode("nodes sorted by component", HugeLongArray::memoryEstimation); builder.perNode("upper bound per component", HugeAtomicLongArray::memoryEstimation); } - if (config.isEnableComponentOptimization() && config.componentProperty() != null) { + if (config.considerComponents() && config.componentProperty() != null) { builder.perNode("component mapping", HugeLongArray::memoryEstimation); } if (config.computeToGraph() && !config.hasTopK()) { diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java index 6f13b9f595..ff71e618dc 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentPropertyNodeSimilarityTest.java @@ -149,7 +149,7 @@ void shouldComputeMemrecWithOrWithoutComponentMapping(boolean componentPropertyS .topK(TOP_K_DEFAULT) .writeProperty("writeProperty") .writeRelationshipType("writeRelationshipType") - .isEnableComponentOptimization(true) + .considerComponents(true) .componentProperty(componentPropertySet ? "compid" : null) .build(); @@ -193,7 +193,7 @@ void shouldOptimizeForDistinctComponentsProperty(Orientation orientation, int co Graph graph = orientation == NATURAL ? naturalGraph : reverseGraph; var config = ImmutableNodeSimilarityStreamConfig.builder() .similarityCutoff(0.0) - .isEnableComponentOptimization(true) + .considerComponents(true) .componentProperty("compid") .concurrency(concurrency) .build(); diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java index 54603e8b3c..030c7aaacd 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodeSimilarityTest.java @@ -883,7 +883,7 @@ void shouldLogProgress(int concurrency) { void shouldLogProgressForWccOptimization() { var graph = naturalGraph; var config = ImmutableNodeSimilarityStreamConfig.builder() - .isEnableComponentOptimization(true) + .considerComponents(true) .concurrency(4) .build(); var progressTask = new NodeSimilarityFactory<>().progressTask(graph, config); diff --git a/doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc b/doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc index ee349d95b3..b4e0ff5475 100644 --- a/doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc +++ b/doc/modules/ROOT/partials/algorithms/node-similarity/specific-configuration.adoc @@ -21,5 +21,5 @@ If unspecified, the algorithm runs unweighted. | similarityMetric | String | JACCARD | yes | The metric used to compute similarity. Can be either `JACCARD`, `OVERLAP` or `COSINE`. -| [[is-enable-component-optimization]] isEnableComponentOptimization | Boolean | false | yes | If enabled applies an optimization which can increase performance for multi-component graphs. Makes use of the fact that nodes of distinct components always have a similarity of 0. If not already provided through xref:#component-property [componentProperty], internally runs xref:algorithms/wcc.adoc[WCC]. -| [[component-property]] componentProperty | String | null | yes | Name of the pre-computed node property to use for enabled xref:#is-enable-component-optimization [component optimization] in case pre-computed values are available. +| [[consider-components]] considerComponents | Boolean | false | yes | If enabled applies an optimization which can increase performance for multi-component graphs. Makes use of the fact that nodes of distinct components always have a similarity of 0. If not already provided through xref:#component-property [componentProperty], internally runs xref:algorithms/wcc.adoc[WCC]. +| [[component-property]] componentProperty | String | null | yes | Name of the pre-computed node property to use for enabled xref:#consider-components [component optimization] in case pre-computed values are available. From 68a4d0be234ce303f4eb328c27e0dc406d0e6086 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Thu, 21 Dec 2023 13:53:52 +0100 Subject: [PATCH 17/19] refactor for consistent init in prepare --- .../similarity/nodesim/NodeSimilarity.java | 165 +++++++++--------- 1 file changed, 85 insertions(+), 80 deletions(-) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index ac93e2a07b..3febe1f95d 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -46,6 +46,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicLong; import java.util.function.BiFunction; +import java.util.function.Function; import java.util.function.LongUnaryOperator; import java.util.stream.LongStream; import java.util.stream.Stream; @@ -54,8 +55,9 @@ public class NodeSimilarity extends Algorithm { private final Graph graph; - private final boolean sortVectors; private final NodeSimilarityBaseConfig config; + private final boolean sortVectors; + private final boolean weighted; private final BitSet sourceNodes; private final BitSet targetNodes; @@ -65,12 +67,12 @@ public class NodeSimilarity extends Algorithm { private final ExecutorService executorService; private final int concurrency; private final MetricSimilarityComputer similarityComputer; + private HugeObjectArray neighbors; private HugeObjectArray weights; private LongUnaryOperator components; - - private final boolean weighted; - private BiFunction targetNodeStream; + private Function sourceNodesStream; + private BiFunction targetNodesStream; public static NodeSimilarity create( Graph graph, @@ -203,46 +205,15 @@ private void prepare() { progressTracker.beginSubTask(); components = initComponents(); + sourceNodesStream = initSourceNodesStream(); + targetNodesStream = initTargetNodesStream(); if (config.runWCC()) { progressTracker.beginSubTask(); } - neighbors = HugeObjectArray.newArray(long[].class, graph.nodeCount()); - if (weighted) { - weights = HugeObjectArray.newArray(double[].class, graph.nodeCount()); - } - DegreeComputer degreeComputer = new DegreeComputer(); - VectorComputer vectorComputer = VectorComputer.of(graph, weighted); - DegreeFilter degreeFilter = new DegreeFilter(config.degreeCutoff(), config.upperDegreeCutoff()); - neighbors.setAll(node -> { - graph.forEachRelationship(node, degreeComputer); - int degree = degreeComputer.degree; - degreeComputer.reset(); - vectorComputer.reset(degree); + initNodeSpecificFields(); - progressTracker.logProgress(graph.degree(node)); - if (degreeFilter.apply(degree)) { - if (sourceNodeFilter.test(node)) { - sourceNodes.set(node); - } - if (targetNodeFilter.test(node)) { - targetNodes.set(node); - } - - // TODO: we don't need to do the rest of the prepare for a node that isn't going to be used in the computation - vectorComputer.forEachRelationship(node); - - if (sortVectors) { - vectorComputer.sortTargetIds(); - } - if (weighted) { - weights.set(node, vectorComputer.getWeights()); - } - return vectorComputer.targetIds.buffer; - } - return null; - }); if (config.runWCC()) { progressTracker.endSubTask(); } @@ -270,42 +241,73 @@ private Stream computeParallel() { private LongUnaryOperator initComponents() { if (!config.considerComponents()) { // considering everything as within the same component - targetNodeStream = (componentId, offset) -> targetNodesStream(offset); return n -> 0; } - LongUnaryOperator components; if (config.componentProperty() != null) { // extract component info from property NodePropertyValues nodeProperties = graph.nodeProperties(config.componentProperty()); - components = initComponentIdMapping(graph, nodeProperties::longValue); - } else { - // run WCC to determine components - progressTracker.beginSubTask(); - WccStreamConfig wccConfig = ImmutableWccStreamConfig - .builder() - .concurrency(concurrency) - .addAllRelationshipTypes(config.relationshipTypes()) - .addAllNodeLabels(config.nodeLabels()) - .build(); - - Wcc wcc = new WccAlgorithmFactory<>().build(graph, wccConfig, ProgressTracker.NULL_TRACKER); - DisjointSetStruct disjointSets = wcc.compute(); - components = disjointSets::setIdOf; - progressTracker.endSubTask(); + return initComponentIdMapping(graph, nodeProperties::longValue); } - var nodesByComponent = new NodesSortedByComponent(components, graph.nodeCount(), concurrency); - targetNodeStream = (componentId, offset) -> StreamSupport.longStream( - nodesByComponent.spliterator(componentId, offset), true) - .filter(targetNodes::get); - return components; + // run WCC to determine components + progressTracker.beginSubTask(); + WccStreamConfig wccConfig = ImmutableWccStreamConfig + .builder() + .concurrency(concurrency) + .addAllRelationshipTypes(config.relationshipTypes()) + .addAllNodeLabels(config.nodeLabels()) + .build(); + + Wcc wcc = new WccAlgorithmFactory<>().build(graph, wccConfig, ProgressTracker.NULL_TRACKER); + DisjointSetStruct disjointSets = wcc.compute(); + progressTracker.endSubTask(); + return disjointSets::setIdOf; + } + + private void initNodeSpecificFields() { + neighbors = HugeObjectArray.newArray(long[].class, graph.nodeCount()); + if (weighted) { + weights = HugeObjectArray.newArray(double[].class, graph.nodeCount()); + } + + DegreeComputer degreeComputer = new DegreeComputer(); + VectorComputer vectorComputer = VectorComputer.of(graph, weighted); + DegreeFilter degreeFilter = new DegreeFilter(config.degreeCutoff(), config.upperDegreeCutoff()); + neighbors.setAll(node -> { + graph.forEachRelationship(node, degreeComputer); + int degree = degreeComputer.degree; + degreeComputer.reset(); + vectorComputer.reset(degree); + + progressTracker.logProgress(graph.degree(node)); + if (degreeFilter.apply(degree)) { + if (sourceNodeFilter.test(node)) { + sourceNodes.set(node); + } + if (targetNodeFilter.test(node)) { + targetNodes.set(node); + } + + // TODO: we don't need to do the rest of the prepare for a node that isn't going to be used in the computation + vectorComputer.forEachRelationship(node); + + if (sortVectors) { + vectorComputer.sortTargetIds(); + } + if (weighted) { + weights.set(node, vectorComputer.getWeights()); + } + return vectorComputer.targetIds.buffer; + } + return null; + }); } private Stream computeAll() { progressTracker.beginSubTask(calculateWorkload()); - var similarityResultStream = loggableAndTerminatableSourceNodeStream() + var similarityResultStream = loggableAndTerminableSourceNodeStream() .boxed() .flatMap(this::computeSimilaritiesForNode); progressTracker.endSubTask(); @@ -314,7 +316,7 @@ private Stream computeAll() { private Stream computeAllParallel() { return ParallelUtil.parallelStream( - loggableAndTerminatableSourceNodeStream(), concurrency, stream -> stream + loggableAndTerminableSourceNodeStream(), concurrency, stream -> stream .boxed() .flatMap(this::computeSimilaritiesForNode) ); @@ -328,10 +330,10 @@ private TopKMap computeTopKMap() { : SimilarityResult.ASCENDING; var topKMap = new TopKMap(neighbors.size(), sourceNodes, Math.abs(config.normalizedK()), comparator); - loggableAndTerminatableSourceNodeStream() + loggableAndTerminableSourceNodeStream() .forEach(sourceNodeId -> { if (sourceNodeFilter.equals(NodeFilter.noOp)) { - targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) + targetNodesStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, (source, target, similarity) -> { topKMap.put(source, target, similarity); @@ -339,7 +341,7 @@ private TopKMap computeTopKMap() { } )); } else { - targetNodeStream.apply(components.applyAsLong(sourceNodeId), 0L) + targetNodesStream.apply(components.applyAsLong(sourceNodeId), 0L) .filter(targetNodeId -> sourceNodeId != targetNodeId) .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, topKMap::put)); } @@ -357,7 +359,7 @@ private TopKMap computeTopKMapParallel() { var topKMap = new TopKMap(neighbors.size(), sourceNodes, Math.abs(config.normalizedK()), comparator); ParallelUtil.parallelStreamConsume( - loggableAndTerminatableSourceNodeStream(), + loggableAndTerminableSourceNodeStream(), concurrency, terminationFlag, stream -> stream @@ -368,7 +370,7 @@ private TopKMap computeTopKMapParallel() { // into these queues is not considered to be thread-safe. // Hence, we need to ensure that down the stream, exactly one queue // within the TopKMap processes all pairs for a single node. - targetNodeStream.apply(components.applyAsLong(sourceNodeId), 0L) + targetNodesStream.apply(components.applyAsLong(sourceNodeId), 0L) .filter(targetNodeId -> sourceNodeId != targetNodeId) .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, topKMap::put)) ) @@ -382,13 +384,13 @@ private Stream computeTopN() { progressTracker.beginSubTask(calculateWorkload()); var topNList = new TopNList(config.normalizedN()); - loggableAndTerminatableSourceNodeStream() + loggableAndTerminableSourceNodeStream() .forEach(sourceNodeId -> { if (sourceNodeFilter.equals(NodeFilter.noOp)) { - targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) + targetNodesStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, topNList::add)); } else { - targetNodeStream.apply(components.applyAsLong(sourceNodeId), 0L) + targetNodesStream.apply(components.applyAsLong(sourceNodeId), 0L) .filter(targetNodeId -> sourceNodeId != targetNodeId) .forEach(targetNodeId -> computeSimilarityFor(sourceNodeId, targetNodeId, topNList::add)); } @@ -404,24 +406,27 @@ private Stream computeTopN(TopKMap topKMap) { return topNList.stream(); } - private LongStream sourceNodesStream(long offset) { - return new SetBitsIterable(sourceNodes, offset).stream(); + private Function initSourceNodesStream() { + return offset -> new SetBitsIterable(sourceNodes, offset).stream(); } - private LongStream sourceNodesStream() { - return sourceNodesStream(0); - } + private BiFunction initTargetNodesStream() { + if (!config.considerComponents()) { + return (componentId, offset) -> new SetBitsIterable(targetNodes, offset).stream(); + } - private LongStream targetNodesStream(long offset) { - return new SetBitsIterable(targetNodes, offset).stream(); + var nodesByComponent = new NodesSortedByComponent(components, graph.nodeCount(), concurrency); + return (componentId, offset) -> StreamSupport + .longStream(nodesByComponent.spliterator(componentId, offset), true) + .filter(targetNodes::get); } - private LongStream loggableAndTerminatableSourceNodeStream() { - return checkProgress(sourceNodesStream()); + private LongStream loggableAndTerminableSourceNodeStream() { + return checkProgress(sourceNodesStream.apply(0L)); } private Stream computeSimilaritiesForNode(long sourceNodeId) { - return targetNodeStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) + return targetNodesStream.apply(components.applyAsLong(sourceNodeId), sourceNodeId + 1) .mapToObj(targetNodeId -> { var resultHolder = new SimilarityResult[]{null}; computeSimilarityFor( From a5355d1d0e4f50922eed340cd0bbd731b31f09d4 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Thu, 21 Dec 2023 15:11:38 +0100 Subject: [PATCH 18/19] refactor separate complex init from construction --- ...edByComponent.java => ComponentNodes.java} | 20 +++++-- .../similarity/nodesim/NodeSimilarity.java | 4 +- ...onentTest.java => ComponentNodesTest.java} | 52 +++++++++---------- 3 files changed, 44 insertions(+), 32 deletions(-) rename algo/src/main/java/org/neo4j/gds/similarity/nodesim/{NodesSortedByComponent.java => ComponentNodes.java} (89%) rename algo/src/test/java/org/neo4j/gds/similarity/nodesim/{NodesSortedByComponentTest.java => ComponentNodesTest.java} (79%) diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentNodes.java similarity index 89% rename from algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java rename to algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentNodes.java index df0c4d2719..8c8683464a 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponent.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/ComponentNodes.java @@ -35,15 +35,27 @@ /** * Manages nodes sorted by component. Produces an iterator over all nodes in a given component. */ -public class NodesSortedByComponent { +public final class ComponentNodes { private final LongUnaryOperator components; private final HugeAtomicLongArray upperBoundPerComponent; private final HugeLongArray nodesSorted; - public NodesSortedByComponent(LongUnaryOperator components, long nodeCount, int concurrency) { + private ComponentNodes(LongUnaryOperator components, HugeAtomicLongArray upperBoundPerComponent, + HugeLongArray nodesSorted) { + this.components = components; - this.upperBoundPerComponent = computeIndexUpperBoundPerComponent(components, nodeCount, concurrency); - this.nodesSorted = computeNodesSortedByComponent(components, upperBoundPerComponent, concurrency); + this.upperBoundPerComponent = upperBoundPerComponent; + this.nodesSorted = nodesSorted; + } + + public static ComponentNodes create(LongUnaryOperator components, long nodeCount, int concurrency) { + var upperBoundPerComponent = computeIndexUpperBoundPerComponent(components, nodeCount, concurrency); + var nodesSorted = computeNodesSortedByComponent(components, upperBoundPerComponent, concurrency); + return new ComponentNodes( + components, + upperBoundPerComponent, + nodesSorted + ); } public PrimitiveIterator.OfLong iterator(long componentId, long offset) { diff --git a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java index 3febe1f95d..4c1a0b43f3 100644 --- a/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java +++ b/algo/src/main/java/org/neo4j/gds/similarity/nodesim/NodeSimilarity.java @@ -415,9 +415,9 @@ private BiFunction initTargetNodesStream() { return (componentId, offset) -> new SetBitsIterable(targetNodes, offset).stream(); } - var nodesByComponent = new NodesSortedByComponent(components, graph.nodeCount(), concurrency); + var componentNodes = ComponentNodes.create(components, graph.nodeCount(), concurrency); return (componentId, offset) -> StreamSupport - .longStream(nodesByComponent.spliterator(componentId, offset), true) + .longStream(componentNodes.spliterator(componentId, offset), true) .filter(targetNodes::get); } diff --git a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentNodesTest.java similarity index 79% rename from algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java rename to algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentNodesTest.java index fd731805cf..54f0a32c76 100644 --- a/algo/src/test/java/org/neo4j/gds/similarity/nodesim/NodesSortedByComponentTest.java +++ b/algo/src/test/java/org/neo4j/gds/similarity/nodesim/ComponentNodesTest.java @@ -43,7 +43,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; -class NodesSortedByComponentTest { +class ComponentNodesTest { private LongUnaryOperator prepare7DistinctSizeComponents() { return nodeId -> { @@ -70,7 +70,7 @@ void shouldDetermineIndexUpperBound() { // nodeId -> componentId var components = prepare7DistinctSizeComponents(); // componentId, upperBound - var idxUpperBoundPerComponent = NodesSortedByComponent.computeIndexUpperBoundPerComponent(components, 28, 4); + var idxUpperBoundPerComponent = ComponentNodes.computeIndexUpperBoundPerComponent(components, 28, 4); // we cannot infer which component follows another, but the range must match in size for the component Map componentPerIdxUpperBound = new HashMap<>(7); for (int i = 0; i < 7; i++) { @@ -106,7 +106,7 @@ void shouldComputeNodesSortedByComponent() { upperBoundPerComponent.set(5, 20); upperBoundPerComponent.set(6, 27); - var nodesSortedByComponent = NodesSortedByComponent.computeNodesSortedByComponent(components, + var nodesSortedByComponent = ComponentNodes.computeNodesSortedByComponent(components, upperBoundPerComponent, 4); // nodes may occur in arbitrary order within components, but with the given assignment, nodeIds must be within @@ -139,7 +139,7 @@ void shouldComputeNodesSortedByComponentsNotConsecutive() { upperBoundPerComponent.set(5, 10); upperBoundPerComponent.set(1, 8); - var nodesSortedByComponent = NodesSortedByComponent.computeNodesSortedByComponent(components, + var nodesSortedByComponent = ComponentNodes.computeNodesSortedByComponent(components, upperBoundPerComponent, 4); // nodes may occur in arbitrary order within components, but with the given assignment, nodeIds must be within @@ -168,19 +168,19 @@ void shouldComputeNodesSortedByComponentsNotConsecutive() { } } - NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); - Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); - Mockito.doReturn(upperBoundPerComponent).when(nodesSortedByComponentMock).getUpperBoundPerComponent(); - Mockito.doReturn(nodesSortedByComponent).when(nodesSortedByComponentMock).getNodesSorted(); + ComponentNodes componentNodesMock = Mockito.mock(ComponentNodes.class); + Mockito.doReturn(components).when(componentNodesMock).getComponents(); + Mockito.doReturn(upperBoundPerComponent).when(componentNodesMock).getUpperBoundPerComponent(); + Mockito.doReturn(nodesSortedByComponent).when(componentNodesMock).getNodesSorted(); // no component with id 0 - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(0L,0L); - Iterator iterator = nodesSortedByComponentMock.iterator(0L, 0L); + Mockito.doCallRealMethod().when(componentNodesMock).iterator(0L,0L); + Iterator iterator = componentNodesMock.iterator(0L, 0L); assertFalse(iterator.hasNext()); // 5 nodes for component with id 1 - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L,0L); - iterator = nodesSortedByComponentMock.iterator(1L, 0L); + Mockito.doCallRealMethod().when(componentNodesMock).iterator(1L,0L); + iterator = componentNodesMock.iterator(1L, 0L); values.addAll(List.of(6L, 7L, 8L, 9L, 10L)); for (int i = 0; i < 5; i++) { assertTrue(iterator.hasNext()); @@ -220,22 +220,22 @@ public long applyAsLong(long nodeId) { upperBoundPerComponent.set(0, 2); upperBoundPerComponent.set(1, 7); - NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); - Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); - Mockito.doReturn(upperBoundPerComponent).when(nodesSortedByComponentMock).getUpperBoundPerComponent(); - Mockito.doReturn(nodesSorted).when(nodesSortedByComponentMock).getNodesSorted(); + ComponentNodes componentNodesMock = Mockito.mock(ComponentNodes.class); + Mockito.doReturn(components).when(componentNodesMock).getComponents(); + Mockito.doReturn(upperBoundPerComponent).when(componentNodesMock).getUpperBoundPerComponent(); + Mockito.doReturn(nodesSorted).when(componentNodesMock).getNodesSorted(); // first component - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(0L,0L); - Iterator iterator = nodesSortedByComponentMock.iterator(0L, 0L); + Mockito.doCallRealMethod().when(componentNodesMock).iterator(0L,0L); + Iterator iterator = componentNodesMock.iterator(0L, 0L); for (int nodeId = 0; nodeId < 3; nodeId++) { assertTrue(iterator.hasNext()); assertThat(iterator.next()).isEqualTo(nodeId); } assertFalse(iterator.hasNext()); // second component - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(1L,0L); - iterator = nodesSortedByComponentMock.iterator(1L, 0L); + Mockito.doCallRealMethod().when(componentNodesMock).iterator(1L,0L); + iterator = componentNodesMock.iterator(1L, 0L); for (int nodeId = 3; nodeId < 8; nodeId++) { assertTrue(iterator.hasNext()); assertThat(iterator.next()).isEqualTo(nodeId); @@ -254,14 +254,14 @@ void shouldRespectOffset() { var upperBoundPerComponent = HugeAtomicLongArray.of(1, ParalleLongPageCreator.passThrough(4)); upperBoundPerComponent.set(0, 19); - NodesSortedByComponent nodesSortedByComponentMock = Mockito.mock(NodesSortedByComponent.class); - Mockito.doReturn(components).when(nodesSortedByComponentMock).getComponents(); - Mockito.doReturn(upperBoundPerComponent).when(nodesSortedByComponentMock).getUpperBoundPerComponent(); - Mockito.doReturn(nodesSorted).when(nodesSortedByComponentMock).getNodesSorted(); - Mockito.doCallRealMethod().when(nodesSortedByComponentMock).iterator(0L,11L); + ComponentNodes componentNodesMock = Mockito.mock(ComponentNodes.class); + Mockito.doReturn(components).when(componentNodesMock).getComponents(); + Mockito.doReturn(upperBoundPerComponent).when(componentNodesMock).getUpperBoundPerComponent(); + Mockito.doReturn(nodesSorted).when(componentNodesMock).getNodesSorted(); + Mockito.doCallRealMethod().when(componentNodesMock).iterator(0L,11L); Set resultingNodes = new HashSet<>(); - Iterator iterator = nodesSortedByComponentMock.iterator(0L, 11L); + Iterator iterator = componentNodesMock.iterator(0L, 11L); iterator.forEachRemaining(resultingNodes::add); assertThat(resultingNodes).containsExactlyInAnyOrder(11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L); } From 42dd15349d1da220c0a67df18a4abef59d6d2513 Mon Sep 17 00:00:00 2001 From: Christina Eiba Date: Thu, 21 Dec 2023 16:22:43 +0100 Subject: [PATCH 19/19] fix tests --- .../ROOT/pages/algorithms/filtered-node-similarity.adoc | 2 +- doc/modules/ROOT/pages/algorithms/node-similarity.adoc | 2 +- .../org/neo4j/gds/beta/generator/GraphGenerateProcTest.java | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/modules/ROOT/pages/algorithms/filtered-node-similarity.adoc b/doc/modules/ROOT/pages/algorithms/filtered-node-similarity.adoc index 0b39d0794e..078e55e454 100644 --- a/doc/modules/ROOT/pages/algorithms/filtered-node-similarity.adoc +++ b/doc/modules/ROOT/pages/algorithms/filtered-node-similarity.adoc @@ -327,7 +327,7 @@ YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory [opts="header",cols="1,1,1,1,1"] |=== | nodeCount | relationshipCount | bytesMin | bytesMax | requiredMemory -| 9 | 9 | 2496 | 2712 | "[2496 Bytes \... 2712 Bytes]" +| 9 | 9 | 2384 | 2600 | "[2384 Bytes \... 2600 Bytes]" |=== -- [[algorithms-filtered-node-similarity-examples-stream]] diff --git a/doc/modules/ROOT/pages/algorithms/node-similarity.adoc b/doc/modules/ROOT/pages/algorithms/node-similarity.adoc index 72cc1f5bd7..a3e10434e7 100644 --- a/doc/modules/ROOT/pages/algorithms/node-similarity.adoc +++ b/doc/modules/ROOT/pages/algorithms/node-similarity.adoc @@ -333,7 +333,7 @@ YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory [opts="header",cols="1,1,1,1,1"] |=== | nodeCount | relationshipCount | bytesMin | bytesMax | requiredMemory -| 9 | 9 | 2496 | 2712 | "[2496 Bytes \... 2712 Bytes]" +| 9 | 9 | 2384 | 2600 | "[2384 Bytes \... 2600 Bytes]" |=== -- diff --git a/proc/catalog/src/test/java/org/neo4j/gds/beta/generator/GraphGenerateProcTest.java b/proc/catalog/src/test/java/org/neo4j/gds/beta/generator/GraphGenerateProcTest.java index 233ce61920..f232bed834 100644 --- a/proc/catalog/src/test/java/org/neo4j/gds/beta/generator/GraphGenerateProcTest.java +++ b/proc/catalog/src/test/java/org/neo4j/gds/beta/generator/GraphGenerateProcTest.java @@ -188,9 +188,9 @@ void shouldGenerateGraphWithRelationshipProperty() { private static Stream estimations() { return Stream.of( - Arguments.of(100, 2, MemoryRange.of(28_928, 32_128)), - Arguments.of(100, 4, MemoryRange.of(30_528, 35_328)), - Arguments.of(200, 4, MemoryRange.of(60_944, 70_544)) + Arguments.of(100, 2, MemoryRange.of(28_088, 31_288)), + Arguments.of(100, 4, MemoryRange.of(29_688, 34_488)), + Arguments.of(200, 4, MemoryRange.of(59_304, 68_904)) ); } }