From b4bd5092266e8c6aeed7739ff2965dceac5e3ceb Mon Sep 17 00:00:00 2001 From: Bowen Yao <112051015+BowenYao18@users.noreply.github.com> Date: Thu, 29 Aug 2024 18:15:29 -0500 Subject: [PATCH] [Dataset] Contribute IGB-Homo dataset to node_classification.py (#7717) Co-authored-by: Muhammed Fatih BALIN --- examples/graphbolt/node_classification.py | 12 ++++++++++-- python/dgl/graphbolt/impl/ondisk_dataset.py | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/examples/graphbolt/node_classification.py b/examples/graphbolt/node_classification.py index a4a8be298d2c..6b0dee719746 100644 --- a/examples/graphbolt/node_classification.py +++ b/examples/graphbolt/node_classification.py @@ -363,9 +363,17 @@ def parse_args(): "--dataset", type=str, default="ogbn-products", - choices=["ogbn-arxiv", "ogbn-products", "ogbn-papers100M"], + choices=[ + "ogbn-arxiv", + "ogbn-products", + "ogbn-papers100M", + "igb-hom-tiny", + "igb-hom-small", + "igb-hom-medium", + ], help="The dataset we can use for node classification example. Currently" - " ogbn-products, ogbn-arxiv, ogbn-papers100M datasets are supported.", + " ogbn-products, ogbn-arxiv, ogbn-papers100M and" + " igb-hom-[tiny|small|medium] datasets are supported.", ) parser.add_argument( "--mode", diff --git a/python/dgl/graphbolt/impl/ondisk_dataset.py b/python/dgl/graphbolt/impl/ondisk_dataset.py index d669dc825509..df3b51f8b074 100644 --- a/python/dgl/graphbolt/impl/ondisk_dataset.py +++ b/python/dgl/graphbolt/impl/ondisk_dataset.py @@ -979,6 +979,16 @@ class BuiltinDataset(OnDiskDataset): .. note:: Reverse edges are added to the original graph. + **igb-hom-[tiny|small|medium]** + The igb-hom-[tiny|small|medium] dataset is a homogeneous citation network, + which is designed for developers to train and evaluate GNN models with + high fidelity. See more details in `igb-hom-[tiny|small|medium] + `_. + + .. note:: + Self edges are added to the original graph. + Node features are stored as float32. + Parameters ---------- name : str @@ -1004,12 +1014,18 @@ class BuiltinDataset(OnDiskDataset): "ogbn-products-seeds", "ogbn-arxiv", "ogbn-arxiv-seeds", + "igb-hom-tiny", + "igb-hom-tiny-seeds", + "igb-hom-small", + "igb-hom-small-seeds", ] _large_datasets = [ "ogb-lsc-mag240m", "ogb-lsc-mag240m-seeds", "ogbn-papers100M", "ogbn-papers100M-seeds", + "igb-hom-medium", + "igb-hom-medium-seeds", ] _all_datasets = _datasets + _large_datasets