Skip to content

Commit

Permalink
Merge pull request #30 from ntucllab/interface-documentation
Browse files Browse the repository at this point in the history
Update interfaces.py documentation
  • Loading branch information
yangarbiter committed Dec 31, 2015
2 parents 72ea947 + ed51c35 commit f1caff0
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 17 deletions.
102 changes: 92 additions & 10 deletions libact/base/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ class QueryStrategy(metaclass=ABCMeta):
A QueryStrategy advices on which unlabeled data to be queried next given
a pool of labeled and unlabeled data.
"""

def __init__(self, dataset, **kwargs):
self._dataset = dataset
dataset.on_update(self.update)
Expand All @@ -22,53 +21,136 @@ def dataset(self):
return self._dataset

def update(self, entry_id, label):
"""Update the internal states of the QueryStrategy after each queried
sample being labeled.
Parameters
----------
entry_id : int
The index of the newly labeled sample.
label : float
The label of the queried sample.
"""
pass

@abstractmethod
def make_query(self):
"""Return the index of the sample to be queried and labeled. Read-only.
No modification to the internal states.
Returns
-------
ask_id : int
The index of the next unlabeled sample to be queried and labeled.
"""
pass


class Labeler(metaclass=ABCMeta):
"""Label the queries made by QueryStrategies
A Labeler assigns labels to the features queried by QueryStrategies.
Assign labels to the samples queried by QueryStrategies.
"""

@abstractmethod
def label(self, feature):
"""Return the class labels for the input feature array.
Parameters
----------
feature : array-like, shape (n_features,)
The feature vector whose label is to queried.
Returns
-------
label : int
The class label of the queried feature.
"""
pass


class Model(metaclass=ABCMeta):
"""Classification Model
A Model is trained on a training dataset and produces a class-predicting
function for future features.
A Model returns a class-predicting function for future samples after
trained on a training dataset.
"""

@abstractmethod
def train(self, dataset, *args, **kwargs):
"""Train a model according to the given training dataset.
Parameters
----------
dataset : Dataset object
The training dataset the model is to be trained on.
Returns
-------
self : object
Returns self.
"""
pass

@abstractmethod
def predict(self, feature, *args, **kwargs):
"""Predict the class labels for the input samples
Parameters
----------
feature : array-like, shape (n_samples, n_features)
The unlabeled samples whose labels are to be predicted.
Returns
-------
y_pred : array-like, shape (n_samples,)
The class labels for samples in the feature array.
"""
pass

@abstractmethod
def score(self, testing_dataset, *args, **kwargs):
"""Return the mean accuracy on the test dataset
Parameters
----------
testing_dataset : Dataset object
The testing dataset used to measure the perforance of the trained model.
Returns
-------
score : float
Mean accuracy of self.predict(X) wrt. y.
"""
pass


class ContinuousModel(Model):
"""Classification Model with intermediate continuous output
A continuous classification model is able to output a real-valued vector
for each features provided. The output vector is of shape (n_samples, n_classs)
for an input feature matrix X of shape (n_samples, n_features). The larger the
kth-column value is, the more likely a feature x belongs the class k.
for each features provided.
"""

@abstractmethod
def predict_real(self, feature, *args, **kwargs):
"""Predict confidence scores for samples.
Returns the confidence score for each (sample, class) combination.
The larger the value for entry (sample=x, class=k) is, the more confident
the model is about the sample x belonging to the class k.
Take Logistic Regression as example, the return value is the signed dis-
tance of that sample to the hyperplane.
Parameters
----------
feature : array-like, shape (n_samples, n_features)
The samples whose confidence scores are to be predicted.
Returns
-------
X : array-like, shape (n_samples, n_classes)
Each entry is the confidence scores per (sample, class) combination.
"""
pass
13 changes: 6 additions & 7 deletions libact/labelers/ideal_labeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,19 @@
Ideal/Noiseless labeler that returns true label
"""
from libact.base.interfaces import Labeler
import numpy as np

from libact.base.interfaces import Labeler

class IdealLabeler(Labeler):
"""
Provide the errorless/noiseless label to any feature vectors being queried.
Attributes
Parameters
----------
features: numpy array
an array of features used as the search keys for labels
label: numpy array
an array of noiesless labels corresponding to the features
dataset: Dataset object
Dataset object with the ground-truth label for each sample.
"""

def __init__(self, dataset, **kwargs):
Expand Down

0 comments on commit f1caff0

Please sign in to comment.