diff --git a/libact/base/interfaces.py b/libact/base/interfaces.py index 4c06862..3ba1c62 100644 --- a/libact/base/interfaces.py +++ b/libact/base/interfaces.py @@ -12,7 +12,6 @@ class QueryStrategy(metaclass=ABCMeta): A QueryStrategy advices on which unlabeled data to be queried next given a pool of labeled and unlabeled data. """ - def __init__(self, dataset, **kwargs): self._dataset = dataset dataset.on_update(self.update) @@ -22,41 +21,107 @@ def dataset(self): return self._dataset def update(self, entry_id, label): + """Update the internal states of the QueryStrategy after each queried + sample being labeled. + + Parameters + ---------- + entry_id : int + The index of the newly labeled sample. + + label : float + The label of the queried sample. + """ pass @abstractmethod def make_query(self): + """Return the index of the sample to be queried and labeled. Read-only. + + No modification to the internal states. + + Returns + ------- + ask_id : int + The index of the next unlabeled sample to be queried and labeled. + """ pass class Labeler(metaclass=ABCMeta): """Label the queries made by QueryStrategies - A Labeler assigns labels to the features queried by QueryStrategies. + Assign labels to the samples queried by QueryStrategies. """ - @abstractmethod def label(self, feature): + """Return the class labels for the input feature array. + + Parameters + ---------- + feature : array-like, shape (n_features,) + The feature vector whose label is to queried. + + Returns + ------- + label : int + The class label of the queried feature. + """ pass class Model(metaclass=ABCMeta): """Classification Model - A Model is trained on a training dataset and produces a class-predicting - function for future features. + A Model returns a class-predicting function for future samples after + trained on a training dataset. """ - @abstractmethod def train(self, dataset, *args, **kwargs): + """Train a model according to the given training dataset. + + Parameters + ---------- + dataset : Dataset object + The training dataset the model is to be trained on. + + Returns + ------- + self : object + Returns self. + """ pass @abstractmethod def predict(self, feature, *args, **kwargs): + """Predict the class labels for the input samples + + Parameters + ---------- + feature : array-like, shape (n_samples, n_features) + The unlabeled samples whose labels are to be predicted. + + Returns + ------- + y_pred : array-like, shape (n_samples,) + The class labels for samples in the feature array. + """ pass @abstractmethod def score(self, testing_dataset, *args, **kwargs): + """Return the mean accuracy on the test dataset + + Parameters + ---------- + testing_dataset : Dataset object + The testing dataset used to measure the perforance of the trained model. + + Returns + ------- + score : float + Mean accuracy of self.predict(X) wrt. y. + """ pass @@ -64,11 +129,28 @@ class ContinuousModel(Model): """Classification Model with intermediate continuous output A continuous classification model is able to output a real-valued vector - for each features provided. The output vector is of shape (n_samples, n_classs) - for an input feature matrix X of shape (n_samples, n_features). The larger the - kth-column value is, the more likely a feature x belongs the class k. + for each features provided. """ - @abstractmethod def predict_real(self, feature, *args, **kwargs): + """Predict confidence scores for samples. + + Returns the confidence score for each (sample, class) combination. + + The larger the value for entry (sample=x, class=k) is, the more confident + the model is about the sample x belonging to the class k. + + Take Logistic Regression as example, the return value is the signed dis- + tance of that sample to the hyperplane. + + Parameters + ---------- + feature : array-like, shape (n_samples, n_features) + The samples whose confidence scores are to be predicted. + + Returns + ------- + X : array-like, shape (n_samples, n_classes) + Each entry is the confidence scores per (sample, class) combination. + """ pass diff --git a/libact/labelers/ideal_labeler.py b/libact/labelers/ideal_labeler.py index dc89efc..9d280f4 100644 --- a/libact/labelers/ideal_labeler.py +++ b/libact/labelers/ideal_labeler.py @@ -2,20 +2,19 @@ Ideal/Noiseless labeler that returns true label """ -from libact.base.interfaces import Labeler import numpy as np +from libact.base.interfaces import Labeler + class IdealLabeler(Labeler): """ Provide the errorless/noiseless label to any feature vectors being queried. - Attributes + Parameters ---------- - features: numpy array - an array of features used as the search keys for labels - - label: numpy array - an array of noiesless labels corresponding to the features + dataset: Dataset object + Dataset object with the ground-truth label for each sample. + """ def __init__(self, dataset, **kwargs):