diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 0bf6deaaba49..48005ffb4d27 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1268,9 +1268,7 @@ def get_homogeneous(g, balance_ntypes): int(F.as_scalar(inner_nids[-1])) + 1, ] ) - val = np.cumsum( - val - ).tolist() # note computing the cumulative sum of array elements. + val = np.cumsum(val).tolist() assert val[-1] == g.num_nodes(ntype) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) diff --git a/tools/distpartitioning/dataset_utils.py b/tools/distpartitioning/dataset_utils.py index f6b542f34b53..8567988f192f 100644 --- a/tools/distpartitioning/dataset_utils.py +++ b/tools/distpartitioning/dataset_utils.py @@ -547,6 +547,8 @@ def get_dataset( autogenerate_column_names=True, ) parse_options = pyarrow.csv.ParseOptions(delimiter=" ") + # if getsize() == 0, the file is empty, indicating that the partition doesn't have this attribute. + # The src_ids and dst_ids should remain empty. if os.path.getsize(edge_file) != 0: with pyarrow.csv.open_csv( edge_file,