pegasos.py

import json
import numpy as np

def objective_function(X, y, w, lamb):
    """
    Inputs:
    - Xtrain: A 2 dimensional numpy array of data (number of samples x number of features)
    - ytrain: A 1 dimensional numpy array of labels (length = number of samples )
    - w: a numpy array of D elements as a D-dimension vector, which is the weight vector and initialized to be all 0s
    - lamb: lambda used in pegasos algorithm

    Return:
    - train_obj: the value of objective function in SVM primal formulation
    """
    N=X.shape[0];
    w=np.reshape(w,(len(w),1))
    y=np.reshape(y,(len(y),1))

    n_prod=np.multiply(y, np.matmul(X,w));
    obj_value=(lamb/2)*(np.dot(w.transpose(),w)) + (1/N)*np.sum(np.maximum(0,1-n_prod))

    return obj_value.tolist()


def pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations):
    """
    Inputs:
    - Xtrain: A list of num_train elements, where each element is a list of D-dimensional features.
    - ytrain: A list of num_train labels
    - w: a numpy array of D elements as a D-dimension vector, which is the weight vector and initialized to be all 0s
    - lamb: lambda used in pegasos algorithm
    - k: mini-batch size
    - max_iterations: the maximum number of iterations to update parameters

    Returns:
    - learnt w
    - traiin_obj: a list of the objective function value at each iteration during the training process, length of 500.
    """
    np.random.seed(0)
    Xtrain = np.array(Xtrain)
    ytrain = np.array(ytrain)
    N = Xtrain.shape[0]
    D = Xtrain.shape[1]

    w1 = np.random.rand(w.shape[0], w.shape[1])
    w = w1 / np.sqrt(lamb)

    ytrain=np.reshape(ytrain, (len(ytrain),1))
    w=np.reshape(w,(len(w),1))

    Xtrain=np.insert(Xtrain, 0, 1, axis=1)
    w=np.insert(w,0,1,axis=0)

    train_obj = []

    for iter in range(1, max_iterations + 1):
        A_t = np.floor(np.random.rand(k) * N).astype(int)  # index of the current mini-batch
        
        y_dash=ytrain[A_t]
        x_dash=Xtrain[A_t]
        y_dash=np.reshape(y_dash, (len(y_dash),1))

        A_t_plus = np.where(np.multiply(y_dash, np.matmul(x_dash, w)) < 1)[0]

        x_dash=x_dash[A_t_plus]
        y_dash=y_dash[A_t_plus]
        y_dash=np.reshape(y_dash, (len(y_dash),1))

        eta=1/(lamb*iter)

        w_t_half=(1-(eta*lamb))*w + (eta/k)*(np.matmul(x_dash.transpose(),y_dash))

        second_term=1/(np.sqrt(lamb))
        if np.linalg.norm(w_t_half)!=0:
            second_term=1/(np.sqrt(lamb))/np.linalg.norm(w_t_half)

        w=min(1,second_term)*w_t_half

        train_obj.append(objective_function(Xtrain, ytrain, w, lamb))

    return w, train_obj


def pegasos_test(Xtest, ytest, w, t = 0.):
    """
    Inputs:
    - Xtest: A list of num_test elements, where each element is a list of D-dimensional features.
    - ytest: A list of num_test labels
    - w_l: a numpy array of D elements as a D-dimension vector, which is the weight vector of SVM classifier and learned by pegasos_train()
    - t: threshold, when you get the prediction from SVM classifier, it should be real number from -1 to 1. Make all prediction less than t to -1 and otherwise make to 1 (Binarize)

    Returns:
    - test_acc: testing accuracy.
    """
    Xtest = np.array(Xtest)
    ytest = np.array(ytest)
    N = Xtest.shape[0]

    Xtest=np.insert(Xtest, 0, 1, axis=1)
    ytest=np.reshape(ytest,(len(ytest),1))
    w=np.reshape(w,(len(w),1))

    classifier=np.matmul(Xtest,w)

    prediction=np.matrix(np.zeros(ytest.shape))
    prediction[np.where(classifier<t)[0]]=-1
    prediction[np.where(classifier>=t)[0]]=1

    correct_samples=np.sum(prediction==ytest)
    test_acc=correct_samples/N

    return test_acc
    

def data_loader_mnist(dataset):

    with open(dataset, 'r') as f:
            data_set = json.load(f)
    train_set, valid_set, test_set = data_set['train'], data_set['valid'], data_set['test']

    Xtrain = train_set[0]
    ytrain = train_set[1]
    Xvalid = valid_set[0]
    yvalid = valid_set[1]
    Xtest = test_set[0]
    ytest = test_set[1]

    ## below we add 'one' to the feature of each sample, such that we include the bias term into parameter w
    Xtrain = np.hstack((np.ones((len(Xtrain), 1)), np.array(Xtrain))).tolist()
    Xvalid = np.hstack((np.ones((len(Xvalid), 1)), np.array(Xvalid))).tolist()
    Xtest = np.hstack((np.ones((len(Xtest), 1)), np.array(Xtest))).tolist()

    for i, v in enumerate(ytrain):
        if v < 5:
            ytrain[i] = -1.
        else:
            ytrain[i] = 1.
    for i, v in enumerate(ytest):
        if v < 5:
            ytest[i] = -1.
        else:
            ytest[i] = 1.

    return Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest


def pegasos_mnist():

    test_acc = {}
    train_obj = {}

    Xtrain, ytrain, Xvalid, yvalid, Xtest, ytest = data_loader_mnist(dataset = 'mnist_subset.json')

    max_iterations = 500
    k = 100
    for lamb in (0.01, 0.1, 1):
        w = np.zeros((len(Xtrain[0]), 1))
        w_l, train_obj['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations)
        test_acc['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_test(Xtest, ytest, w_l)

    lamb = 0.1
    for k in (1, 10, 1000):
        w = np.zeros((len(Xtrain[0]), 1))
        w_l, train_obj['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_train(Xtrain, ytrain, w, lamb, k, max_iterations)
        test_acc['k=' + str(k) + '_lambda=' + str(lamb)] = pegasos_test(Xtest, ytest, w_l)

    return test_acc, train_obj


def main():
    test_acc, train_obj = pegasos_mnist() # results on mnist
    print('mnist test acc \n')
    for key, value in test_acc.items():
        print('%s: test acc = %.4f \n' % (key, value))
        

if __name__ == "__main__":
    main()