diff --git a/examples/graphbolt/pyg/hetero/node_classification.py b/examples/graphbolt/pyg/hetero/node_classification.py index fb46a0ade970..922ff15562ea 100644 --- a/examples/graphbolt/pyg/hetero/node_classification.py +++ b/examples/graphbolt/pyg/hetero/node_classification.py @@ -344,8 +344,11 @@ def parse_args(): "igb-het-tiny", "igb-het-small", "igb-het-medium", + "igb-het-large", + "igb-het-mlperf", + "igb-het", ], - help="Dataset name. Possible values: ogb-lsc-mag240m, igb-het-[tiny|small|medium].", + help="Dataset name. Possible values: ogb-lsc-mag240m, igb-het, and igb-het-[tiny|small|medium|large|mlperf].", ) parser.add_argument( "--fanout", diff --git a/examples/graphbolt/rgcn/evaluator.py b/examples/graphbolt/rgcn/evaluator.py new file mode 100644 index 000000000000..d003c5b62ede --- /dev/null +++ b/examples/graphbolt/rgcn/evaluator.py @@ -0,0 +1,37 @@ +import numpy as np + +try: + import torch +except ImportError: + torch = None + + +class IGB_Evaluator: + def __init__(self, name, num_tasks): + self.name = name + self.num_tasks = num_tasks + + def _parse_input(self, input_dict): + y_true, y_pred = input_dict["y_true"], input_dict["y_pred"] + + if torch and isinstance(y_true, torch.Tensor): + y_true = y_true.cpu().numpy() + if torch and isinstance(y_pred, torch.Tensor): + y_pred = y_pred.cpu().numpy() + + if not isinstance(y_true, np.ndarray) or not isinstance( + y_pred, np.ndarray + ): + raise RuntimeError("Arguments must be numpy arrays") + + if y_true.shape != y_pred.shape or y_true.ndim != 2: + raise RuntimeError("Shape mismatch between y_true and y_pred") + + return y_true, y_pred + + def _eval_acc(self, y_true, y_pred): + return {"acc": np.mean(np.all(y_true == y_pred, axis=1))} + + def eval(self, input_dict): + y_true, y_pred = self._parse_input(input_dict) + return self._eval_acc(y_true, y_pred) diff --git a/examples/graphbolt/rgcn/hetero_rgcn.py b/examples/graphbolt/rgcn/hetero_rgcn.py index 60ab51602ca1..71e05b4e4bfd 100644 --- a/examples/graphbolt/rgcn/hetero_rgcn.py +++ b/examples/graphbolt/rgcn/hetero_rgcn.py @@ -58,6 +58,7 @@ import torch.nn as nn import torch.nn.functional as F from dgl.nn import HeteroEmbedding +from evaluator import IGB_Evaluator from ogb.lsc import MAG240MEvaluator from ogb.nodeproppred import Evaluator from tqdm import tqdm @@ -141,6 +142,10 @@ def create_dataloader( if name == "ogb-lsc-mag240m": node_feature_keys["author"] = ["feat"] node_feature_keys["institution"] = ["feat"] + if "igb-het" in name: + node_feature_keys["author"] = ["feat"] + node_feature_keys["institute"] = ["feat"] + node_feature_keys["fos"] = ["feat"] datapipe = datapipe.fetch_feature(features, node_feature_keys) # Create a DataLoader from the datapipe. @@ -283,13 +288,6 @@ def __init__( } ) - self.loop_weights = nn.ModuleDict( - { - ntype: nn.Linear(in_size, out_size, bias=True) - for ntype in self.ntypes - } - ) - self.dropout = nn.Dropout(dropout) # Initialize parameters of the model. self.reset_parameters() @@ -424,7 +422,9 @@ def evaluate( model.eval() category = "paper" # An evaluator for the dataset. - if name == "ogbn-mag": + if "igb-het" in name: + evaluator = IGB_Evaluator(name=name, num_tasks=1) + elif name == "ogbn-mag": evaluator = Evaluator(name=name) else: evaluator = MAG240MEvaluator() @@ -588,7 +588,7 @@ def main(args): # `institution` are generated in advance and stored in the feature store. # For `ogbn-mag`, we generate the features on the fly. embed_layer = None - if args.dataset == "ogbn-mag": + if args.dataset == "ogbn-mag" or "igb-het" in args.dataset: # Create the embedding layer and move it to the appropriate device. embed_layer = rel_graph_embed(g, feat_size).to(device) print( @@ -663,8 +663,18 @@ def main(args): "--dataset", type=str, default="ogbn-mag", - choices=["ogbn-mag", "ogb-lsc-mag240m"], - help="Dataset name. Possible values: ogbn-mag, ogb-lsc-mag240m", + choices=[ + "ogbn-mag", + "ogb-lsc-mag240m", + "igb-het-tiny", + "igb-het-small", + "igb-het-medium", + "igb-het-large", + "igb-het", + "igb-het-mlperf", + ], + help="Dataset name. Possible values: ogbn-mag, ogb-lsc-mag240m, " + "igb-het, and igb-het-[tiny|small|medium|large|mlperf].", ) parser.add_argument("--num_epochs", type=int, default=3) parser.add_argument("--num_workers", type=int, default=0) diff --git a/python/dgl/graphbolt/impl/ondisk_dataset.py b/python/dgl/graphbolt/impl/ondisk_dataset.py index 303a423a853b..a8a3972fdb6d 100644 --- a/python/dgl/graphbolt/impl/ondisk_dataset.py +++ b/python/dgl/graphbolt/impl/ondisk_dataset.py @@ -983,17 +983,18 @@ class BuiltinDataset(OnDiskDataset): The igb-hom-[tiny|small|medium|large] and igb-hom dataset is a homogeneous citation network, which is designed for developers to train and evaluate GNN models with high fidelity. See more details in - `igb-hom-[tiny|small|medium|large] + `igb-hom-[tiny|small|medium|large] and igb-hom `_. .. note:: Self edges are added to the original graph. Node features are stored as float32. - **igb-het-[tiny|small|medium]** - The igb-hom-[tiny|small|medium] dataset is a heterogeneous citation network, - which is designed for developers to train and evaluate GNN models with - high fidelity. See more details in `igb-het-[tiny|small|medium] + **igb-het and igb-het-[tiny|small|medium|large|mlperf]** + The igb-hom-[tiny|small|medium|large|mlperf] and igb-het dataset is a + heterogeneous citation network, which is designed for developers to train + and evaluate GNN models with high fidelity. See more details in + `igb-het-[tiny|small|medium|large|mlperf] and igb-het `_. .. note:: @@ -1047,6 +1048,12 @@ class BuiltinDataset(OnDiskDataset): "igb-hom-seeds", "igb-het-medium", "igb-het-medium-seeds", + "igb-het-large", + "igb-het-large-seeds", + "igb-het", + "igb-het-seeds", + "igb-het-mlperf", + "igb-het-mlperf-seeds", ] _all_datasets = _datasets + _large_datasets