This package provides a simple pipeline to perform diverse classification/regression analysis:
- Temporal Cross Validation
- Generalized Temporal Cross Validation
- Compositional Cross Validation
- Generalized Compositional Cross Validation
.
├── notebooks
│ ├── classify.ipynb
│ └── classify.org
├── Readme.org
├── requirements.txt
└── src
├── classificationCV.py # main class wrapper around scikit-learn
├── main.py # example of simple function to call and use classificationCV class
├── multiscore_cv.py # utils for cross temporal and compositional validation
├── perceptron.py # a torch perceptron
├── skorch.py # wrapper to use torch models with classificationCV
└── utils.py # other utils
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
%run /home/leon/dual_task/dual_data/notebooks/setup.py
%matplotlib inline
%config InlineBackend.figure_format = 'png'
import sys
sys.path.insert(0, '../')
from src.classificationCV import ClassificationCV
from src.main import get_classification
from src.utils import safe_roc_auc_score
import mne
print(mne.__version__)
from sklearn.datasets import make_classification
# Generate synthetic dataset
X, y = make_classification(n_samples=100, # number of samples
n_features=20, # number of features
n_informative=2, # number of informative features
n_redundant=2, # number of redundant features
n_classes=2, # number of classes
random_state=42) # for reproducibility
# we need to add a new axis for MNE estimators to work
# ie getting a decoder for all time points
X = X[..., np.newaxis]
print(X.shape) # Output: (1000, 20)
print(y.shape) # Output: (1000,)
options= {
'n_comp': None, # PCA
'scaler': None, # standardization
'n_boots': 1, # bootstrapping coefs
'n_splits': 3, 'n_repeats': 1, # repeated stratified folds
'scoring': 'roc_auc', # scorer
'mne_estimator':'sliding', # sliding or generalizing
'verbose': 0,
'n_jobs': 30,
}
from sklearn.linear_model import LogisticRegression
# net = LogisticRegression(penalty='l1', solver='liblinear', class_weight='balanced', n_jobs=None)
model = LogisticRegression(penalty='elasticnet', solver='saga', class_weight='balanced', n_jobs=None, l1_ratio=0.95, max_iter=100, tol=.001, multi_class='multinomial')
params = {'model__C': np.logspace(-4, 4, 10)}
params = {}
model = ClassificationCV(model, params, **options)
options['verbose'] = 0
X_epoch = X[..., 0]
coefs, bias = get_classification(model, X_epoch, y, RETURN='coefs', **options)
scores = get_classification(model, X, y, RETURN='scores', **options)
print(scores.shape)
print(scores)