main.py

# -*- coding: utf-8 -*-

'''
Transferable Graph Generation algorithm.
By @zcrwind (chenrui.zhang@pku.edu.cn).

'''

import os
import numpy as np
import time
import scipy.sparse as sp
import argparse
import random
from sklearn.metrics import f1_score
from collections import defaultdict
from termcolor import cprint

import torch
import torch.nn as nn
import torch.utils.data as data
from torch.optim import lr_scheduler
from torch.nn import init

from utils.data_utils import ZSL_Dataset
from utils.graph_utils import load_graph, adjMatrix2adjLists
from utils.kNN import kNNClassify
from utils.tools import nets_weights_init, reset_grad, print_args, print_nets, setup_seed

from models.graphsage_visual import MeanAggregator, Encoder, SupervisedGraphSage
from models.gan import _netG, _netG2, _netD
from models.label_propagation import label_propagation as LP

def arg_parse():
    desc = 'deep embedding model for zero-shot video classification'
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('--mode', type=str, default='train', help='train or test')
    parser.add_argument('--dataset_name', type=str, default='cub', help='dataset name')
    parser.add_argument('--resume', type=str, default='', help='path of checkpoint file for restarting or testing')
    parser.add_argument('--n_iteration', type=int, default=10, help='how many iteration to run')
    parser.add_argument('--batch_size', type=int, default=64, help='batch size')
    parser.add_argument('--lr', type=float, default=0.001, help='learning rate of graphsage model')
    parser.add_argument('--weight_decay', type=float, default=1e-2, help='weight decay')
    parser.add_argument('--optimizer', type=str, default='adam', help='which optimizer to use')
    parser.add_argument('--labelIdxStart0or1', type=int, choices=[0, 1], default=1, help='self explained')
    parser.add_argument('--root_dir', type=str, default='./data', help='root dir of data')
    parser.add_argument('--graph_datadir', type=str, default='./data/preprocessed_data', help='root dir of the graph data')
    parser.add_argument('--save_dir', type=str, default='./checkpoints', help='root dir for saving model')
    parser.add_argument('--all_visualFea_label_file', type=str, default='res101.mat', help='file contains both visual feature and label of whole datset')
    parser.add_argument('--auxiliary_file', type=str, default='original_att_splits.mat', help='file contains splits and semantic feature')
    parser.add_argument('--gpuid', type=int, default=0, help='which gpu to use')
    parser.add_argument('--n_generation_perClass', type=int, default=50, help='how many samples will be generated by G for SVM training')
    parser.add_argument('--classifier_type', type=str, default='knn', help='what kind of classifier to use for the final unseen classification task. (e.g., knn, svm)')
    parser.add_argument('--n_epoch_sftcls', type=int, default=10, help='epochs for softmax classifier training')
    parser.add_argument('--use_pca', type=str, default='true', help='use PCA (for visual feature) or not')
    parser.add_argument('--reduced_dim_pca', type=int, default=1024, help='the dimension of visual feature after PCA')
    parser.add_argument('--weight_threshold', type=float, default=0.2, help='weight threshold for transforming dense adj matrix to sparser adj matrix')
    parser.add_argument('--gan_checkpoint_dir', type=str, default='./gan_checkpoint_dir', help='checkpoint dir of pretrained GAN model for unseen visual feature generation')
    parser.add_argument('--gan_checkpoint', type=str, default='', help='checkpoint name of pretrained GAN model for unsen visual feature generation')
    parser.add_argument('--use_z', type=str, default='true', help='use noise z or not during unseen visual feature generation')
    parser.add_argument('--z_dim', type=int, default=100, help='dimension of noise (only used when `use_z` is True)')
    parser.add_argument('--print_every', type=int, default=10, help='print loss info every `print_every` batches')
    parser.add_argument('--eval_every', type=int, default=100, help='evaluation every `eval_every` batches')
    parser.add_argument('--n_gene_perC', type=int, default=1, help='how many fake visual feature to be generated per unseen class')
    parser.add_argument('--top_k', type=int, default=20, help='top k large edge weights for graph construction')
    parser.add_argument('--lambda_lploss', type=float, default=1.0, help='the trade-off hyperparagram of label propagation loss (cross-entropy is 1)')
    parser.add_argument('--use_LP_eval', type=str, default='false', help='whether to use label propagation for evaluation')

    args = parser.parse_args()
    return args


def cls_acc(output, label):
    acc = (np.argmax(output, axis=1) == label).sum() / float(label.shape[0])
    return acc


def get_fake_unseen_visual_feat(generator, dataset_name, gan_checkpoint_dir, gan_checkpoint_name,
                                use_z, z_dim, sem_fea_pro, unseen_classes, n_gene_perC=1):
    '''
        Generate the fake visual feature of unseen classes via pretrained GAN model.
        i.e., semantic feature (e.g., attribute or wordvec) --> fake visual feature
        Args:
            generator:
                generator of the GAN model
            dataset_name:
                which dataset to use
            gan_checkpoint_dir:
                the dir of the GAN checkpoints
            gan_checkpoint_name:
                the concept of the checkpoint: (from `AUFS_ZSL` (see https://github.com/zcrwind/AUFS_ZSL))
                {
                    'iteration': int,
                    'netG_state_dict': netG.state_dict(),
                    'netD_state_dict': netD.state_dict(),
                    'netR_state_dict': netR.state_dict(),
                    'acc_unseen': best_acc_unseen,
                    'acc_seen': acc_seen,
                }
            use_z:
                use noise or not.
            z_dim:
                the dimension of the noise vector z.
            sem_fea_pro: np.ndarray
                semantic feature of all classes in dataset. Same with `all_prototype_semantic_feature` in data_utils.py.
                shape is (n_total_classes, semanticFea_dim)
            unseen_classes: np.ndarray of int
                Indexes of the unseen class whose visual feature will be generated.
            n_gene_perC:
                the number of the visual feature to be generated per class.
    '''
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    checkpoint_path = os.path.join(gan_checkpoint_dir, dataset_name, gan_checkpoint_name)
    assert os.path.isfile(checkpoint_path)
    netG = generator
    checkpoint_dict = torch.load(checkpoint_path)
    netG_state_dict = checkpoint_dict['netG_state_dict']
    netG.load_state_dict(netG_state_dict)
    netG = netG.to(device)
    print(netG)
    sem_fea_pro = torch.from_numpy(sem_fea_pro).to(device)
    generated_vis_fea_dict = defaultdict(list)
    if use_z == 'true':
        for i in range(n_gene_perC):
            z = torch.randn(1, z_dim).to(device)
            for _cls in unseen_classes:
                sem_fea = sem_fea_pro[_cls].unsqueeze(0)
                gen_vis_fea = netG(sem_fea, z).detach().cpu().numpy()
                generated_vis_fea_dict[_cls].append(gen_vis_fea)
    else:
        gen_vis_fea = netG(sem_fea)
        for i in range(n_gene_perC):
            for _cls in unseen_classes:
                sem_fea = sem_fea_pro[_cls]
                gen_vis_fea = netG(sem_fea).detach().cpu().numpy()
                generated_vis_fea_dict[_cls].append(gen_vis_fea)

    return generated_vis_fea_dict


def main(seed, args):
    '''main function'''
    setup_seed(seed)
    os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpuid)
    mode = args.mode
    dataset_name = args.dataset_name
    labelIdxStart0or1 = args.labelIdxStart0or1
    root_dir = args.root_dir
    graph_datadir = args.graph_datadir
    all_visualFea_label_file = args.all_visualFea_label_file
    auxiliary_file = args.auxiliary_file
    batch_size = args.batch_size
    weight_decay = args.weight_decay
    use_pca = args.use_pca
    reduced_dim_pca = args.reduced_dim_pca

    zsl_dataset = ZSL_Dataset(root_dir, dataset_name, mode, all_visualFea_label_file, auxiliary_file, use_pca, reduced_dim_pca)
    zsl_dataloader = data.DataLoader(zsl_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    print('data is ready!')

    vi_fea_dim = zsl_dataset.vis_fea_dim
    se_fea_dim = zsl_dataset.sem_fea_dim
    n_tr_class = zsl_dataset.n_tr_class

    te_data_unseen, te_data_seen = zsl_dataset.get_testData()
    te_vis_fea_unseen, te_sem_fea_unseen, te_label_unseen, te_labelID_unseen, te_sem_fea_pro_unseen = te_data_unseen
    te_vis_fea_seen, te_sem_fea_seen, te_label_seen, te_labelID_seen, te_sem_fea_pro_seen = te_data_seen
    tr_vis_fea, tr_sem_fea, all_tr_label, tr_labelID, tr_sem_fea_pro = zsl_dataset.get_trainData()    # for debugging
    all_labels = zsl_dataset.all_labels

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    save_subdir = dataset_name
    save_dir = os.path.join(args.save_dir, save_subdir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    adj_matrix = load_graph(dataset_name, graph_datadir)
    weight_threshold = args.weight_threshold
    adj_lists = adjMatrix2adjLists(adj_matrix, weight_threshold)

    vi_fea_dim = zsl_dataset.vis_fea_dim
    se_fea_dim = zsl_dataset.sem_fea_dim
    use_z = args.use_z.lower()
    z_dim = args.z_dim
    if use_z == 'true':
        netG = _netG(se_fea_dim, vi_fea_dim, z_dim).to(device)
    else:
        netG = _netG2(se_fea_dim, vi_fea_dim).to(device)

    gan_checkpoint_dir = args.gan_checkpoint_dir
    gan_checkpoint_name = args.gan_checkpoint
    sem_fea_pro = zsl_dataset.all_prototype_semantic_feature
    unseen_classes = te_labelID_unseen
    n_gene_perC = args.n_gene_perC
    generated_vis_fea_dict = get_fake_unseen_visual_feat(netG, dataset_name, gan_checkpoint_dir, gan_checkpoint_name,
                                                         use_z, z_dim, sem_fea_pro, unseen_classes, n_gene_perC=n_gene_perC)
    
    for k in generated_vis_fea_dict.keys():
        gen_vis_fea_list = generated_vis_fea_dict[k]
        gen_vis_fea = np.vstack(gen_vis_fea_list)
        n_fake_instances = len(gen_vis_fea_list)
        assert gen_vis_fea.shape == (n_fake_instances, vi_fea_dim)
        tr_vis_fea = np.vstack((tr_vis_fea, gen_vis_fea))
        gen_labels = [k for _ in range(n_fake_instances)]
        gen_labels = np.array(gen_labels)
        all_tr_label = np.hstack((all_tr_label, gen_labels))

    assert len(tr_vis_fea) == len(all_tr_label)
    
    print('building dicts...')
    instanceIdx2classIdx = dict()
    classIdx2instanceIdx = defaultdict(set)
    for instanceIdx, classIdx in enumerate(all_labels):
        instanceIdx2classIdx[instanceIdx] = classIdx
        classIdx2instanceIdx[classIdx].add(instanceIdx)
    
    instanceIdx2classIdx_zsl_train = dict()
    classIdx2instanceIdx_zsl_train = defaultdict(set)
    for instanceIdx, classIdx in enumerate(all_tr_label):
        instanceIdx2classIdx_zsl_train[instanceIdx] = classIdx
        classIdx2instanceIdx_zsl_train[classIdx].add(instanceIdx)

    instanceIdx2classIdx_zsl_test_seen = dict()
    classIdx2instanceIdx_zsl_test_seen = defaultdict(set)
    for instanceIdx, classIdx in enumerate(te_label_seen):
        instanceIdx2classIdx_zsl_test_seen[instanceIdx] = classIdx
        classIdx2instanceIdx_zsl_test_seen[classIdx].add(instanceIdx)

    instanceIdx2classIdx_zsl_test_unseen = dict()
    classIdx2instanceIdx_zsl_test_unseen = defaultdict(set)
    for instanceIdx, classIdx in enumerate(te_label_unseen):
        instanceIdx2classIdx_zsl_test_unseen[instanceIdx] = classIdx
        classIdx2instanceIdx_zsl_test_unseen[classIdx].add(instanceIdx)
    print('build done!')

    # use visual feature as initial input
    firstHop_featureFunc = zsl_dataset.get_firstHop_featureFunc_visual_zsl_train()
    agg1 = MeanAggregator(firstHop_featureFunc).to(device)
    enc1 = Encoder(firstHop_featureFunc, vi_fea_dim, 128, adj_lists, agg1,
                   instanceIdx2classIdx_zsl_train, classIdx2instanceIdx_zsl_train,
                   instanceIdx2classIdx_zsl_test_seen, classIdx2instanceIdx_zsl_test_seen,
                   instanceIdx2classIdx_zsl_test_unseen, classIdx2instanceIdx_zsl_test_unseen,
                   classIdx2instanceIdx,
                   generated_vis_fea_dict,
                   mode='train', seen_labelID_set=tr_labelID,
                   gcn_style=True).to(device)
    agg2 = MeanAggregator(lambda nodes : enc1(nodes).t()).to(device)
    enc2 = Encoder(lambda nodes : enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2,
                   instanceIdx2classIdx_zsl_train, classIdx2instanceIdx_zsl_train,
                   instanceIdx2classIdx_zsl_test_seen, classIdx2instanceIdx_zsl_test_seen,
                   instanceIdx2classIdx_zsl_test_unseen, classIdx2instanceIdx_zsl_test_unseen,
                   classIdx2instanceIdx,
                   generated_vis_fea_dict,
                   mode='train', seen_labelID_set=tr_labelID,
                   base_model=enc1, gcn_style=True).to(device)
    enc1.num_samples = 10
    enc2.num_samples = 10
    nets = [agg1, enc1, agg2, enc2]
    n_classes = zsl_dataset.n_classes
    graphsage = SupervisedGraphSage(n_classes, enc1, enc1.embed_dim).to(device)
    nets_weights_init([graphsage])
    lr = args.lr
    optimizer = torch.optim.Adam(filter(lambda p : p.requires_grad, graphsage.parameters()), lr=lr)
    lr_maker  = lr_scheduler.StepLR(optimizer=optimizer, step_size=1000, gamma=0.9)

    print('start training...')
    best_acc_test_unseen = 0
    best_acc_test_seen = 0
    print_every = args.print_every
    eval_every = args.eval_every
    
    tr_rand_indices = np.random.permutation(len(all_tr_label))
    te_seen_indices = np.array(list(range(len(te_label_seen))))
    te_unseen_indices = np.array(list(range(len(te_label_unseen))))

    eps = np.finfo(float).eps
    for batch in range(args.n_iteration):
        graphsage.train()
        lr_maker.step()
        batch_nodes = tr_rand_indices[:batch_size]
        random.shuffle(tr_rand_indices)
        
        # "cg" for "class-level graph"
        cg_loss = graphsage.loss(batch_nodes, torch.LongTensor(all_tr_label[np.array(batch_nodes)]).to(device))
        batch_labels = all_tr_label[np.array(batch_nodes)]
        _, batch_embeddings, batch_sigma = graphsage(batch_nodes)
        batch_embeddings = torch.t(batch_embeddings)
        N = batch_embeddings.size(0)
        emb_dim = batch_embeddings.size(-1)

        support_ratio = 0.5
        n_support = int(N * support_ratio)
        n_query = N - n_support
        s_labels = batch_labels[:n_support]
        q_labels = batch_labels[n_support:]
        s_labels = torch.from_numpy(s_labels).long()
        q_labels = torch.from_numpy(q_labels).long()
        total_n_classes = zsl_dataset.total_n_classes
        s_labels_onehot = torch.zeros(n_support, total_n_classes).scatter_(1, s_labels.view(-1, 1), 1).to(device)
        q_labels_onehot = torch.zeros(n_query,   total_n_classes).scatter_(1, q_labels.view(-1, 1), 1).to(device)

        F, Fq = LP(batch_embeddings, batch_sigma, args.top_k, s_labels_onehot, total_n_classes)

        ce = nn.CrossEntropyLoss().to(device)
        
        gt = torch.argmax(torch.cat((s_labels_onehot, q_labels_onehot), 0), 1)
        meta_loss = ce(F, gt)    # dual LP: combine the loss of both support and query
        optimizer.zero_grad()
        total_loss = cg_loss + args.lambda_lploss * meta_loss
        total_loss.backward()
        optimizer.step()

        # calculate acc
        predq = torch.argmax(Fq, 1)
        gtq   = torch.argmax(q_labels_onehot, 1)
        correct = (predq == gtq).sum()
        total   = n_query
        acc = 1.0 * correct.float() / float(total)
        if batch % print_every == 0 and batch > 0:
            print('iter: {:4d}/{}  cg_loss: {:.6f}  meta_loss: {:.6f}  acc: {:.6f}  lr: {:.8f}'.format(batch, args.n_iteration, cg_loss, meta_loss, acc, optimizer.param_groups[0]['lr']))

        if batch % eval_every == 0 and batch > 0:
            graphsage.eval()

            ## test_seen
            graphsage.encoder.mode = 'test_seen'
            enc1.mode = 'test_seen'
            agg1.features_func = zsl_dataset.get_firstHop_featureFunc_visual_zsl_test_seen()
            te_seen_output, test_seen_embeddings, _ = graphsage.forward(te_seen_indices)
            te_seen_output = te_seen_output.cpu()
            te_seen_acc = cls_acc(te_seen_output.data.numpy(), te_label_seen)
            cprint('te_seen_acc: {:.6f}'.format(te_seen_acc), 'yellow')
            if te_seen_acc > best_acc_test_seen:
                best_acc_test_seen = te_seen_acc

            ## test_unseen
            graphsage.encoder.mode = 'test_unseen'
            enc1.mode = 'test_unseen'
            agg1.features_func = zsl_dataset.get_firstHop_featureFunc_visual_zsl_train()
            te_unseen_output, test_unseen_embeddings, _ = graphsage.forward(te_unseen_indices)
            te_unseen_output = te_unseen_output.cpu()
            te_unseen_acc = cls_acc(te_unseen_output.data.numpy(), te_label_unseen)
            cprint('te_unseen_acc: {:.6f}'.format(te_unseen_acc), 'yellow')
            if te_unseen_acc > best_acc_test_unseen:
                best_acc_test_unseen = te_unseen_acc
                save_dict = {
                    'iteration' : (batch + 1),
                    'state_dict': graphsage.state_dict(),
                    'acc_unseen': best_acc_test_unseen,
                    'acc_seen'  : te_seen_acc,
                }
                checkpoint_name = 'checkpoint_' + dataset_name + '_iter' + str(batch + 1) + '_accUnseen%.4lf_accSeen%.4lf.pkl' % (best_acc_test_unseen, te_seen_acc)
                checkpoint_path = os.path.join(save_dir, checkpoint_name)
                cprint('saving ' + checkpoint_name + ' in ' + save_dir + '...', 'green')
                torch.save(save_dict, checkpoint_path)

            if args.use_LP_eval == 'true':
                ## support
                graphsage.encoder.mode = 'train'
                enc1.mode = 'train'
                agg1.features_func = zsl_dataset.get_firstHop_featureFunc_visual_zsl_train()
                
                expanded_tr_labels = all_tr_label
                n_expanded_tr = expanded_tr_labels.shape[0]
                expanded_tr_labels = torch.from_numpy(expanded_tr_labels)
                expanded_tr_nodes  = list(range(len(expanded_tr_labels)))
                expanded_tr_nodes  = np.array(expanded_tr_nodes)
                _, expanded_tr_embeddings, expanded_tr_sigma = graphsage(expanded_tr_nodes)
                expanded_tr_embeddings = torch.t(expanded_tr_embeddings)

                n_support = batch_size * 4  # the more the better where the GPU memory allows (upper bound: the number of expanded training instances)
                eval_batch_size = int(batch_size / 2)
                n_query = eval_batch_size
                shuffled_expanded_tr_idxs = np.random.permutation(n_expanded_tr)
                shuffled_idxs = shuffled_expanded_tr_idxs[:n_support]
                eval_s_labels = expanded_tr_labels[shuffled_idxs]
                eval_s_embeds = expanded_tr_embeddings[shuffled_idxs]
                s_labels_onehot = torch.zeros(n_support, total_n_classes).scatter_(1, eval_s_labels.view(-1, 1), 1).to(device)

                ## test seen
                graphsage.encoder.mode = 'test_seen'
                enc1.mode = 'test_seen'
                agg1.features_func = zsl_dataset.get_firstHop_featureFunc_visual_zsl_test_seen()

                n_te_seen = len(te_label_seen)
                eval_times__te_seen = n_te_seen // n_query
                te_seen_correct = 0
                te_seen_total = 0
                test_seen_embeddings = torch.t(test_seen_embeddings)
                for k in range(eval_times__te_seen):
                    te_seen_q_embeds = test_seen_embeddings[(k * n_query) : ((k + 1) * n_query)]
                    te_seen_q_labels = te_label_seen[(k * n_query) : ((k + 1) * n_query)]
                    _embeds = torch.cat((eval_s_embeds, te_seen_q_embeds), 0)
                    _sigma = graphsage.cal_sigma(_embeds)
                    F, Fq = LP(_embeds, _sigma, args.top_k, s_labels_onehot, total_n_classes)

                    te_seen_q_labels = torch.from_numpy(te_seen_q_labels).long()
                    te_seen_q_labels_onehot = torch.zeros(n_query, total_n_classes).scatter_(1, te_seen_q_labels.view(-1, 1), 1).to(device)

                    # calculate acc
                    predq = torch.argmax(Fq, 1)
                    gtq   = torch.argmax(te_seen_q_labels_onehot, 1)
                    te_seen_correct += (predq == gtq).sum()
                    te_seen_total   += n_query

                te_seen_acc_LP = 1.0 * te_seen_correct.float() / float(te_seen_total)
                cprint('[LP] te_seen_acc: {:.6f}'.format(te_seen_acc_LP), 'yellow')


                ## test unseen
                graphsage.encoder.mode = 'test_unseen'
                enc1.mode = 'test_unseen'
                agg1.features_func = zsl_dataset.get_firstHop_featureFunc_visual_zsl_train()

                n_te_unseen = len(te_label_unseen)
                eval_times__te_unseen = n_te_unseen // n_query
                te_unseen_correct = 0
                te_unseen_total = 0
                test_unseen_embeddings = torch.t(test_unseen_embeddings)
                for k in range(eval_times__te_unseen):
                    te_unseen_q_embeds = test_unseen_embeddings[(k * n_query) : ((k + 1) * n_query)]
                    te_unseen_q_labels = te_label_unseen[(k * n_query) : ((k + 1) * n_query)]
                    _embeds = torch.cat((eval_s_embeds, te_unseen_q_embeds), 0)
                    _sigma = graphsage.cal_sigma(_embeds)
                    F, Fq = LP(_embeds, _sigma, args.top_k, s_labels_onehot, total_n_classes)
                    
                    te_unseen_q_labels = torch.from_numpy(te_unseen_q_labels).long()
                    te_unseen_q_labels_onehot = torch.zeros(n_query, total_n_classes).scatter_(1, te_unseen_q_labels.view(-1, 1), 1).to(device)

                    # calculate acc
                    predq = torch.argmax(Fq, 1)
                    gtq   = torch.argmax(te_unseen_q_labels_onehot, 1)
                    te_unseen_correct += (predq == gtq).sum()
                    te_unseen_total   += n_query

                te_unseen_acc_LP = 1.0 * te_unseen_correct.float() / float(te_unseen_total)
                cprint('[LP] te_unseen_acc: {:.6f}'.format(te_unseen_acc_LP), 'red')

            # recover to the training mode
            graphsage.encoder.mode = 'train'
            enc1.mode = 'train'
            agg1.features_func = zsl_dataset.get_firstHop_featureFunc_visual_zsl_train()

    print('Final:' + '%' * 50)
    graphsage.encoder.mode = 'test_seen'
    enc1.mode = 'test_seen'
    agg1.features_func = zsl_dataset.get_firstHop_featureFunc_visual_zsl_test_seen()
    te_seen_output, embeddings, _ = graphsage.forward(te_seen_indices)
    te_seen_output = te_seen_output.cpu()
    te_seen_acc = cls_acc(te_seen_output.data.numpy(), te_label_seen)
    print('test_seen_acc: {:.6f}'.format(te_seen_acc))
    if te_seen_acc > best_acc_test_seen:
        best_acc_test_seen = te_seen_acc
    print('best acc of test_seen data: {:.6f}'.format(best_acc_test_seen))

    graphsage.encoder.mode = 'test_unseen'
    enc1.mode = 'test_unseen'
    agg1.features_func = zsl_dataset.get_firstHop_featureFunc_visual_zsl_train()
    te_unseen_output, embeddings, _ = graphsage.forward(te_unseen_indices)
    te_unseen_output = te_unseen_output.cpu()
    te_unseen_acc = cls_acc(te_unseen_output.data.numpy(), te_label_unseen)
    print('test_UNseen_acc: {:.6f}'.format(te_unseen_acc))
    if te_unseen_acc > best_acc_test_unseen:
        best_acc_test_unseen = te_unseen_acc
    print('best acc of test_UNseen data: {:.6f}'.format(best_acc_test_unseen))
    print('%' * 56)


if __name__ == '__main__':
    seed = np.random.randint(1024, size=(1,))[0]
    args = arg_parse()
    print_args(args)
    main(seed, args)