From 2c482f8a71096c61e8052c2b105555f92185fdf4 Mon Sep 17 00:00:00 2001 From: yuval Date: Sun, 22 Oct 2023 18:03:29 +0200 Subject: [PATCH 1/3] updated longest path docs with extension --- .../doc/syntax/DagLongestPathSyntaxTest.java | 38 ++++++++++++++ .../pages/algorithms/dag/longest-path.adoc | 51 ++++++++++++------- 2 files changed, 72 insertions(+), 17 deletions(-) create mode 100644 doc-test/src/test/java/org/neo4j/gds/doc/syntax/DagLongestPathSyntaxTest.java diff --git a/doc-test/src/test/java/org/neo4j/gds/doc/syntax/DagLongestPathSyntaxTest.java b/doc-test/src/test/java/org/neo4j/gds/doc/syntax/DagLongestPathSyntaxTest.java new file mode 100644 index 0000000000..e3f6b9ff20 --- /dev/null +++ b/doc-test/src/test/java/org/neo4j/gds/doc/syntax/DagLongestPathSyntaxTest.java @@ -0,0 +1,38 @@ + /* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.gds.doc.syntax; + +import java.util.List; + +import static org.neo4j.gds.doc.syntax.SyntaxMode.STREAM; + + class DagLongestPathSyntaxTest extends SyntaxTestBase { + + protected Iterable syntaxModes() { + return List.of( + SyntaxModeMeta.of(STREAM) + ); + } + + @Override + protected String adocFile() { + return "pages/algorithms/dag/longest-path.adoc"; + } + } diff --git a/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc b/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc index 5d7e150a28..172dd01282 100644 --- a/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc +++ b/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc @@ -20,7 +20,7 @@ Finding the longest path that leads to a node in a graph is possible to do in li GDS implementation for this algorithm is based on topological sort and takes linear time. If the graph is not a DAG, the runtime is still linear, but the results cannot be trusted. You can use xref:algorithms/dag/topological-sort.adoc[topological sort] to make sure the graph is a DAG. -The algorithm supports unweighted graphs and graphs with non negative weights. +The algorithm supports weighted and unweighted graphs. Negative weights are currently unsupported. === Usage @@ -46,7 +46,13 @@ CALL gds.dag.longestPath.stream( graphName: String, configuration: Map ) YIELD - nodeId: Integer + index: Integer, + sourceNode: Integer, + targetNode: Integer, + totalCost: Float, + nodeIds: List of Integer, + costs: List of Float, + path: Path ---- include::partial$/algorithms/common-configuration/common-parameters.adoc[] @@ -62,9 +68,15 @@ include::partial$/algorithms/common-configuration/common-stream-stats-configurat .Results [opts="header"] |=== -| Name | Type | Description -| targetNodeId | Integer | The ID of the node for which the path is computed -| distance | Double | The distance of the longest path that ends in the target node +| Name | Type | Description +| index | Integer | 0-based index of the found path. +| sourceNode | Integer | Source node of the path. +| targetNode | Integer | Target node of the path. +| totalCost | Float | Total cost from source to target. +| nodeIds | List of Integer | Node ids on the path in traversal order. +| costs | List of Float | Accumulated costs for each node on the path. +| path | Path | The path represented as Cypher entity. + |=== // include-with-stream @@ -73,9 +85,6 @@ include::partial$/algorithms/common-configuration/common-stream-stats-configurat // tabbed-example ==== -[NOTE] -Current result does not contain the longest path itself, but only the distance of the path. - == Examples @@ -91,7 +100,7 @@ CREATE (n0:Goods {name: 'Timber'}), (n1:Goods {name: 'Lumber'}), (n2:Goods {name: 'Screws'}), - (n3:Workshop {name: 'Table Maker Inc.'}), + (n3:Workshop {name: 'Table Maker'}), (n4:Product {name: 'Table'}), (n0)-[:Processing {time: 1}]->(n1), @@ -127,19 +136,27 @@ For more details on the stream mode in general, see xref:common-usage/running-al [source, cypher, role=noplay setup-query] ---- CALL gds.dag.longestPath.stream("g", {relationshipWeightProperty: "time"}) -YIELD targetNodeId as target, distance -RETURN gds.util.asNode(target).name, distance +YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path +RETURN + index, + gds.util.asNode(sourceNode).name AS sourceNode, + gds.util.asNode(targetNode).name AS targetNode, + totalCost, + [nodeId IN nodeIds | gds.util.asNode(nodeId).name] AS nodeNames, + costs, + nodes(path) as path +ORDER BY index ---- We use the utility function asNode to return the name of node instead of its ID to make results more readable. .Results [opts="header"] |=== -| name | distance -|"Timber" | 0.0 -|"Screws" | 0.0 -|"Lumber" | 1.0 -|"Table Maker" | 3.0 -|"Table" | 4.0 +| index | sourceNode | targetNode | totalCost | nodeNames | costs | path +| 0 | "Timber" | "Timber" | 0.0 | ["Timber"] | [0.0] | [Node[0]] +| 1 | "Timber" | "Lumber" | 1.0 | ["Timber", "Lumber"] | [0.0, 1.0] | [Node[0], Node[1]] +| 2 | "Screws" | "Table Maker" | 3.0 | ["Screws", "Table Maker"] | [0.0, 3.0] | [Node[2], Node[3]] +| 3 | "Screws" | "Screws" | 0.0 | ["Screws"] | [0.0] | [Node[2]] +| 4 | "Screws" | "Table" | 4.0 | ["Screws", "Table Maker", "Table"] | [0.0, 3.0, 4.0] | [Node[2], Node[3], Node[4]] |=== -- From 56d224d7455ef84aed99ab96d39bfcac2473a941 Mon Sep 17 00:00:00 2001 From: ioannispan Date: Mon, 23 Oct 2023 09:04:01 +0200 Subject: [PATCH 2/3] Small doc fixes --- .../neo4j/gds/doc/DagLongestPathDocTest.java | 46 +++++++++++++++++++ .../pages/algorithms/dag/longest-path.adoc | 12 ++--- 2 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 doc-test/src/test/java/org/neo4j/gds/doc/DagLongestPathDocTest.java diff --git a/doc-test/src/test/java/org/neo4j/gds/doc/DagLongestPathDocTest.java b/doc-test/src/test/java/org/neo4j/gds/doc/DagLongestPathDocTest.java new file mode 100644 index 0000000000..7fa3b03adb --- /dev/null +++ b/doc-test/src/test/java/org/neo4j/gds/doc/DagLongestPathDocTest.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.gds.doc; + +import org.neo4j.gds.functions.AsNodeFunc; +import org.neo4j.gds.paths.dag.longestPath.DagLongestPathStreamProc; + +import java.util.List; + +class DagLongestPathDocTest extends SingleFileDocTestBase { + + @Override + protected List> functions() { + return List.of(AsNodeFunc.class); + } + + @Override + protected List> procedures() { + return List.of( + DagLongestPathStreamProc.class + ); + } + + @Override + protected String adocFile() { + return "pages/algorithms/dag/longest-path.adoc"; + } + +} diff --git a/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc b/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc index 172dd01282..23b20601ab 100644 --- a/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc +++ b/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc @@ -147,16 +147,16 @@ RETURN nodes(path) as path ORDER BY index ---- -We use the utility function asNode to return the name of node instead of its ID to make results more readable. +We use the utility function `asNode` to return the name of node instead of its ID to make results more readable. .Results [opts="header"] |=== | index | sourceNode | targetNode | totalCost | nodeNames | costs | path -| 0 | "Timber" | "Timber" | 0.0 | ["Timber"] | [0.0] | [Node[0]] -| 1 | "Timber" | "Lumber" | 1.0 | ["Timber", "Lumber"] | [0.0, 1.0] | [Node[0], Node[1]] -| 2 | "Screws" | "Table Maker" | 3.0 | ["Screws", "Table Maker"] | [0.0, 3.0] | [Node[2], Node[3]] -| 3 | "Screws" | "Screws" | 0.0 | ["Screws"] | [0.0] | [Node[2]] -| 4 | "Screws" | "Table" | 4.0 | ["Screws", "Table Maker", "Table"] | [0.0, 3.0, 4.0] | [Node[2], Node[3], Node[4]] +| 0 | "Timber" | "Timber" | 0.0 | [Timber] | [0.0] | [Node[0]] +| 1 | "Timber" | "Lumber" | 1.0 | [Timber, Lumber] | [0.0, 1.0] | [Node[0], Node[1]] +| 2 | "Screws" | "Table Maker" | 3.0 | [Screws, Table Maker] | [0.0, 3.0] | [Node[2], Node[3]] +| 3 | "Screws" | "Screws" | 0.0 | [Screws] | [0.0] | [Node[2]] +| 4 | "Screws" | "Table" | 4.0 | [Screws, Table Maker, Table] | [0.0, 3.0, 4.0] | [Node[2], Node[3], Node[4]] |=== -- From d492135c9e2f7a03cfd65753c916c7d1c71061f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonatan=20J=C3=A4derberg?= Date: Tue, 24 Oct 2023 11:50:05 +0200 Subject: [PATCH 3/3] One sentence per line and other doc formatting --- .../pages/algorithms/dag/dag-algorithms.adoc | 6 ++-- .../pages/algorithms/dag/longest-path.adoc | 30 ++++++++++++------- .../algorithms/dag/topological-sort.adoc | 26 ++++++++++++---- 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/doc/modules/ROOT/pages/algorithms/dag/dag-algorithms.adoc b/doc/modules/ROOT/pages/algorithms/dag/dag-algorithms.adoc index 429f2939d5..adb7ebeead 100644 --- a/doc/modules/ROOT/pages/algorithms/dag/dag-algorithms.adoc +++ b/doc/modules/ROOT/pages/algorithms/dag/dag-algorithms.adoc @@ -6,9 +6,11 @@ Directed Acyclic Graphs (DAGs) are directed graphs that do not contain cycles. These kind of graphs are commonly used to model dependencies between entities. -The canonical algorithm that goes hand in hand with DAGs is topological sort, for which GDS provides an efficient parallel implementation. Running topological sort is the best way to make sure the graph is a DAG. +The canonical algorithm that goes hand in hand with DAGs is topological sort, for which GDS provides an efficient parallel implementation. +Running topological sort is the best way to make sure the graph is a DAG. -Some of the problems that are computationally hard to solve in the general case can be solved efficiently when the scope is limited to DAGs. One of these is the longest path problem, for which GDS provides an efficient algorithm. +Some of the problems that are computationally hard to solve in the general case can be solved efficiently when the scope is limited to DAGs. +One of these is the longest path problem, for which GDS provides an efficient algorithm. The Neo4j GDS library includes the following DAG algorithms: diff --git a/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc b/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc index 23b20601ab..04d0d03a85 100644 --- a/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc +++ b/doc/modules/ROOT/pages/algorithms/dag/longest-path.adoc @@ -18,14 +18,18 @@ include::partial$/operations-reference/alpha-note.adoc[] Finding the longest path that leads to a node in a graph is possible to do in linear time for the special case of DAGs. -GDS implementation for this algorithm is based on topological sort and takes linear time. If the graph is not a DAG, the runtime is still linear, but the results cannot be trusted. You can use xref:algorithms/dag/topological-sort.adoc[topological sort] to make sure the graph is a DAG. +GDS implementation for this algorithm is based on topological sort and takes linear time. +If the graph is not a DAG, the runtime is still linear, but the results cannot be trusted. +You can use xref:algorithms/dag/topological-sort.adoc[topological sort] to make sure the graph is a DAG. -The algorithm supports weighted and unweighted graphs. Negative weights are currently unsupported. +The algorithm supports weighted and unweighted graphs. +Negative weights are currently unsupported. === Usage -One example for usage of this algorithm is in the context of a supply chain graph. If edges indicate the time to supply, then the distance of the longest path to a target node is the time required to manufacture the node from decision to completion. +One example for usage of this algorithm is in the context of a supply chain graph. +If edges indicate the time to supply, then the distance of the longest path to a target node is the time required to manufacture the node from decision to completion. == Syntax @@ -76,7 +80,6 @@ include::partial$/algorithms/common-configuration/common-stream-stats-configurat | nodeIds | List of Integer | Node ids on the path in traversal order. | costs | List of Float | Accumulated costs for each node on the path. | path | Path | The path represented as Cypher entity. - |=== // include-with-stream @@ -110,9 +113,13 @@ CREATE ---- This graph describes a simple supply chain of constructing a table in the Table Maker workshop. -In order to have lumber for the table, the workshop processes timber, which takes 1 day to complete. Once the lumber is ready, it is already in the workshop, therefor it takes zero time to ship it. However, the screws take 3 days to be shipped to the workshop. Only after the workshop has all the requirements met, the table can be constructed, a process that takes 1 day. +In order to have lumber for the table, the workshop processes timber, which takes 1 day to complete. +Once the lumber is ready, it is already in the workshop, therefor it takes zero time to ship it. +However, the screws take 3 days to be shipped to the workshop. +Only after the workshop has all the requirements met, the table can be constructed, a process that takes 1 day. -The longest path to the table node starts with the screws, then the workshop and then the table, in total: 4 days. This is the bottleneck path, and total time that takes to manufacture the table. +The longest path to the table node starts with the screws, then the workshop and then the table, in total: 4 days. +This is the bottleneck path, and total time that takes to manufacture the table. .The following Cypher statement will project the graph to GDS: [source, cypher, role=noplay setup-query] @@ -147,16 +154,17 @@ RETURN nodes(path) as path ORDER BY index ---- + We use the utility function `asNode` to return the name of node instead of its ID to make results more readable. .Results [opts="header"] |=== -| index | sourceNode | targetNode | totalCost | nodeNames | costs | path -| 0 | "Timber" | "Timber" | 0.0 | [Timber] | [0.0] | [Node[0]] -| 1 | "Timber" | "Lumber" | 1.0 | [Timber, Lumber] | [0.0, 1.0] | [Node[0], Node[1]] -| 2 | "Screws" | "Table Maker" | 3.0 | [Screws, Table Maker] | [0.0, 3.0] | [Node[2], Node[3]] -| 3 | "Screws" | "Screws" | 0.0 | [Screws] | [0.0] | [Node[2]] +| index | sourceNode | targetNode | totalCost | nodeNames | costs | path +| 0 | "Timber" | "Timber" | 0.0 | [Timber] | [0.0] | [Node[0]] +| 1 | "Timber" | "Lumber" | 1.0 | [Timber, Lumber] | [0.0, 1.0] | [Node[0], Node[1]] +| 2 | "Screws" | "Table Maker" | 3.0 | [Screws, Table Maker] | [0.0, 3.0] | [Node[2], Node[3]] +| 3 | "Screws" | "Screws" | 0.0 | [Screws] | [0.0] | [Node[2]] | 4 | "Screws" | "Table" | 4.0 | [Screws, Table Maker, Table] | [0.0, 3.0, 4.0] | [Node[2], Node[3], Node[4]] |=== -- diff --git a/doc/modules/ROOT/pages/algorithms/dag/topological-sort.adoc b/doc/modules/ROOT/pages/algorithms/dag/topological-sort.adoc index 6f0e50b797..7f41ce2a8e 100644 --- a/doc/modules/ROOT/pages/algorithms/dag/topological-sort.adoc +++ b/doc/modules/ROOT/pages/algorithms/dag/topological-sort.adoc @@ -17,7 +17,8 @@ include::partial$/operations-reference/alpha-note.adoc[] A topological sorting of nodes in a graph is an ordering of the nodes in the graph where every node appears only after all the nodes pointing to it have appeared. For example, for a graph with 4 nodes and these relations: `a->b`, `a->c`, `b->d`, `c->d`, there are two acceptable topological sorts: `a, b, c, d` and `a, c, b, d`. -The topological order of the nodes is defined only for directed acyclic graphs (DAGs). See xref:#topological-sort-cycles[below] for the expected result for graphs with cycles. +The topological order of the nodes is defined only for directed acyclic graphs (DAGs). +See xref:#topological-sort-cycles[below] for the expected result for graphs with cycles. GDS provides an efficient parallel implementation for this algorithm. @@ -25,7 +26,8 @@ GDS provides an efficient parallel implementation for this algorithm. [[topological-sort-cycles]] === Cycles -Running the algorithm on a graph with cycles will cause the omitting of part of the nodes from the sorting. The omitted nodes are: +Running the algorithm on a graph with cycles will cause the omitting of part of the nodes from the sorting. + The omitted nodes are: 1. Nodes that are part of a cycle (including self cycles) @@ -43,11 +45,17 @@ image::example-graphs/{image-file}[Visualization of the example graph,align="cen === Usage -Topological ordering of the nodes is beneficial when you want to guarantee a node will only be processed after its dependencies were processed. This is very useful for dependency related tasks such as scheduling or calculations that derive values from their dependencies. +Topological ordering of the nodes is beneficial when you want to guarantee a node will only be processed after its dependencies were processed. +This is very useful for dependency related tasks such as scheduling or calculations that derive values from their dependencies. + ==== Cycles detection -The algorithm can also be used to determine if the graph contains a cycle or not. If all the nodes in the graph appear in the sorting, there is no cycle in the graph. If some of the nodes are missing from the sorting, there is a cycle. It does not tell which nodes constitute the cycle, but it does give a clue, as described in the xref:#topological-sort-cycles[cycles] section. +The algorithm can also be used to determine if the graph contains a cycle or not. +If all the nodes in the graph appear in the sorting, there is no cycle in the graph. +If some of the nodes are missing from the sorting, there is a cycle. +It does not tell which nodes constitute the cycle, but it does give a clue, as described in the xref:#topological-sort-cycles[cycles] section. + ==== Maximum distance from source @@ -106,6 +114,7 @@ include::partial$/algorithms/topological-sort/specific-configuration.adoc[] // tabbed-example ==== + == Examples :algorithm-name: Topological Sort @@ -133,7 +142,8 @@ CREATE (n5)-[:REQUIRED]->(n6) ---- -This graph describes a simplified supply chain of building a house. Each part of the house cannot be worked on before its requirements are met. +This graph describes a simplified supply chain of building a house. +Each part of the house cannot be worked on before its requirements are met. For example, we cannot build support before getting the steel, the skeleton is not ready until both support and base are ready. .The following Cypher statement will project the graph to GDS: @@ -145,8 +155,11 @@ WITH gds.graph.project("g", n, target, {}) AS g RETURN g ---- + === Stream -The stream procedure streams the nodes in the graph ordered by a valid topological order. The nodes can then be processed one by one, guaranteeing that each node is processed only after its dependencies were processed. + +The stream procedure streams the nodes in the graph ordered by a valid topological order. +The nodes can then be processed one by one, guaranteeing that each node is processed only after its dependencies were processed. For more details on the stream mode in general, see xref:common-usage/running-algos.adoc#running-algos-stream[Stream]. @@ -160,6 +173,7 @@ YIELD nodeId, maxDistanceFromSource RETURN gds.util.asNode(nodeId).name AS name, maxDistanceFromSource ORDER BY maxDistanceFromSource, name ---- + We use the utility function `asNode` to return the name of node instead of its ID to make results more readable. .Results