Skip to content

Commit

Permalink
add test for multilabel quire
Browse files Browse the repository at this point in the history
  • Loading branch information
yangarbiter committed Mar 2, 2017
1 parent 65791a4 commit 60d5546
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 32 deletions.
83 changes: 51 additions & 32 deletions libact/query_strategies/multilabel/multilabel_quire.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,10 @@ def __init__(self, dataset, lamba=1.0, kernel='rbf', gamma=1., coef0=1.,
X, _ = zip(*dataset.get_entries())
self.kernel = kernel
if self.kernel == 'rbf':
self.K = rbf_kernel(X=X, Y=X, gamma=kwargs.pop('gamma', 1.))
self.K = rbf_kernel(X=X, Y=X, gamma=gamma)
elif self.kernel == 'poly':
self.K = polynomial_kernel(X=X,
Y=X,
coef0=kwargs.pop('coef0', 1),
degree=kwargs.pop('degree', 3),
gamma=kwargs.pop('gamma', 1.))
self.K = polynomial_kernel(X=X, Y=X, coef0=coef0, degree=degree,
gamma=gamma)
elif self.kernel == 'linear':
self.K = linear_kernel(X=X, Y=X)
elif hasattr(self.kernel, '__call__'):
Expand All @@ -99,48 +96,70 @@ def __init__(self, dataset, lamba=1.0, kernel='rbf', gamma=1., coef0=1.,


_, lbled_Y = zip(*dataset.get_labeled_entries())
self.n_labels = np.shape(lbled_Y)[1]
n = len(X)
m = np.shape(lbled_Y)[1]
m = self.n_labels
# label correlation matrix
R = np.corrcoef(np.array(lbled_Y).T)
R = np.nan_to_num(R)
self.RK = np.kron(R, self.K)

self.L = lamba * (np.linalg.pinv(self.RK + lamba * np.eye(n*m)))

@inherit_docstring_from(QueryStrategy)
def make_query(self):
dataset = self.dataset
X, Y = zip(*dataset.get_entries())
_, lbled_Y = zip(*dataset.get_labeled_entries())

X = np.array(X)
RK = self.RK
n_instance = len(X)
m = np.shape(lbled_Y)[1]
lamba = self.lamba

def _get_index(self):
_, Y = zip(*self.dataset.get_entries())
n_instance = len(Y)
m = self.n_labels
# index for labeled and unlabeled instance
l_id = []
a_id = []
for i in range(n_instance * m):
if Y[i%n_instance] is None:
if Y[i // m] is None:
a_id.append(i)
else:
l_id.append(i)
return a_id, l_id

#def update(self, entry_id, label):
# # calculate invLaa
# invLaa = self.invLaa
# # idx before update
# a_id, l_id = self.idxs
# m = len(label)
# # assert len(np.where(np.array(a_id) == entry_id*m)[0]) == 1
# idx = np.where(np.array(a_id) == entry_id*m)[0][0]
# for i in range(m):
# D = np.delete(np.delete(invLaa, idx, axis=0), idx, axis=1)
# b = np.delete(invLaa, idx, axis=0)[:, idx]
# # invLuu
# invLaa = D - 1./invLaa[idx, idx] * np.dot(b, b.T)
# self.invLaa = invLaa

@inherit_docstring_from(QueryStrategy)
def make_query(self):
dataset = self.dataset
X, Y = zip(*dataset.get_entries())
X = np.array(X)

n_instance = len(X)
m = self.n_labels
RK = self.RK
lamba = self.lamba
L = self.L
vecY = np.reshape(np.array([y for y in Y if y is not None]).T, (-1, 1))
detLaa = np.linalg.det(L[np.ix_(a_id, a_id)])
#invLaa = np.linalg.pinv(L[np.ix_(a_id, a_id)])
invLaa = (lamba * np.eye(len(a_id)) + RK[np.ix_(a_id, a_id)]) \

a_id, l_id = self._get_index()
# invLaa = np.linalg.pinv(L[np.ix_(a_id, a_id)])
invLaa = ((lamba * np.eye(len(a_id)) + RK[np.ix_(a_id, a_id)]) \
- np.dot(np.dot(RK[np.ix_(a_id, l_id)],
np.linalg.pinv(lamba * np.eye(len(l_id)) \
+ RK[np.ix_(l_id, l_id)])),
RK[np.ix_(l_id, a_id)])
RK[np.ix_(l_id, a_id)])) / lamba

vecY = np.reshape(np.array([y for y in Y if y is not None]).T, (-1, 1))
detLaa = np.linalg.det(L[np.ix_(a_id, a_id)])

score = np.zeros(len(a_id))
b = np.zeros((len(a_id)-1))
score = []
D = np.zeros((len(a_id)-1, len(a_id)-1))
D[...] = invLaa[1:, 1:]
for i, s in enumerate(a_id):
Expand All @@ -162,13 +181,13 @@ def make_query(self):
b[i:] = invLaa[i+1:, i]
invLuu = D - 1./invLaa[i, i] * np.dot(b, b.T)

score.append(L[s, s] - detLaa / L[s, s] \
+ 2 * np.abs(np.dot(L[s, l_id] \
- np.dot(np.dot(L[s, u_id], invLuu),
L[np.ix_(u_id, l_id)]), vecY)))
score[i] = L[s, s] - detLaa / L[s, s] \
+ 2 * np.abs(np.dot(L[s, l_id] \
- np.dot(np.dot(L[s, u_id], invLuu),
L[np.ix_(u_id, l_id)]), vecY))

score = np.sum(np.array(score).reshape(m, -1).T, axis=1)
score = np.sum(score.reshape(m, -1).T, axis=1)

ask_idx = self.random_state_.choice(np.where(score == np.min(score))[0])

return a_id[ask_idx]
return a_id[ask_idx] // m
26 changes: 26 additions & 0 deletions libact/query_strategies/multilabel/tests/test_multilabel_quire.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import unittest

from numpy.testing import assert_array_equal
import numpy as np

from libact.base.dataset import Dataset
from libact.query_strategies.multilabel import MultilabelQUIRE
from libact.utils import run_qs


class MultilabelQUIRETestCase(unittest.TestCase):
"""Variance reduction test case using artifitial dataset"""
def setUp(self):
self.X = [[-2, -1], [1, 1], [-1, -2], [-1, -1], [1, 2], [2, 1]]
self.y = [[0, 1], [1, 0], [0, 1], [1, 0], [1, 0], [1, 1]]
self.quota = 4

def test_multilabel_quire(self):
trn_ds = Dataset(self.X, (self.y[:2] + [None] * (len(self.y) - 2)))
qs = MultilabelQUIRE(trn_ds)
qseq = run_qs(trn_ds, qs, self.y, self.quota)
assert_array_equal(qseq, np.array([2, 3, 4, 5]))


if __name__ == '__main__':
unittest.main()
30 changes: 30 additions & 0 deletions libact/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,33 @@ def calc_cost(y, yhat, cost_matrix):
ith class and prediction as jth class.
"""
return np.mean(cost_matrix[list(y), list(yhat)])

def run_qs(trn_ds, qs, truth, quota):
"""Run query strategy on specified dataset and return quering sequence.
Parameters
----------
trn_ds : Dataset object
The dataset to be run on.
qs : QueryStrategy instance
The active learning algorith to be run.
truth : array-like
The true label.
quota : int
Number of iterations to run
Returns
-------
qseq : numpy array, shape (quota,)
The numpy array of entry_id representing querying sequence.
"""
ret = []
for _ in range(quota):
ask_id = qs.make_query()
trn_ds.update(ask_id, truth[ask_id])

ret.append(ask_id)
return np.array(ret)

0 comments on commit 60d5546

Please sign in to comment.