Skip to content

Commit

Permalink
Merge pull request #7 from vecxoz/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
vecxoz authored Feb 23, 2018
2 parents 88dc474 + b53ec16 commit 86a4fd7
Show file tree
Hide file tree
Showing 6 changed files with 464 additions and 118 deletions.
11 changes: 10 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,22 @@
from setuptools import setup

setup(name='vecstack',
version='0.2.1',
version='0.2.2',
description='Python package for stacking (machine learning technique)',
long_description='Convenient way to automate OOF computation, prediction and bagging using any number of models',
classifiers=[
'License :: OSI Approved :: MIT License',
'Operating System :: MacOS',
'Operating System :: Microsoft :: Windows',
'Operating System :: POSIX',
'Operating System :: Unix',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Scientific/Engineering :: Information Analysis',
Expand All @@ -31,4 +38,6 @@
'scipy',
'scikit-learn>=0.18'
],
test_suite='nose.collector',
tests_require=['nose'],
zip_safe=False)
94 changes: 56 additions & 38 deletions tests/test_vecstack_classification_binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

n_classes = 2
n_folds = 5
temp_dir = 'tmpdw35lg54ms80eb42'

X, y = make_classification(n_samples = 500, n_features = 5, n_informative = 3, n_redundant = 1,
n_classes = n_classes, flip_y = 0, random_state = 0)
Expand All @@ -43,12 +44,29 @@

class TestClassificationMulticlass(unittest.TestCase):

@classmethod
def setUpClass(cls):
try:
os.mkdir(temp_dir)
except:
print('Unable to create temp dir')

@classmethod
def tearDownClass(cls):
try:
os.rmdir(temp_dir)
except:
print('Unable to remove temp dir')

def tearDown(self):
# Remove files after each test
files = glob.glob('*.npy')
files.extend(glob.glob('*.txt'))
for file in files:
os.remove(file)
files = glob.glob(os.path.join(temp_dir, '*.npy'))
files.extend(glob.glob(os.path.join(temp_dir, '*.log.txt')))
try:
for file in files:
os.remove(file)
except:
print('Unable to remove temp file')

#---------------------------------------------------------------------------
# Test returned and saved arrays in each mode (parameter <mode>)
Expand All @@ -69,13 +87,13 @@ def test_oof_pred_mode(self):

models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'oof_pred', random_state = 0, verbose = 0, stratified = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand All @@ -95,13 +113,13 @@ def test_oof_mode(self):

models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'oof', random_state = 0, verbose = 0, stratified = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand All @@ -121,13 +139,13 @@ def test_pred_mode(self):

models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'pred', random_state = 0, verbose = 0, stratified = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -160,13 +178,13 @@ def test_oof_pred_bag_mode(self):

models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -197,13 +215,13 @@ def test_pred_bag_mode(self):

models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'pred_bag', random_state = 0, verbose = 0, stratified = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand All @@ -229,12 +247,12 @@ def test_oof_pred_mode_proba(self):
models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, stratified = True,
mode = 'oof_pred', random_state = 0, verbose = 0, needs_proba = True, save_dir = '.')
mode = 'oof_pred', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand All @@ -255,12 +273,12 @@ def test_oof_mode_proba(self):
models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, stratified = True,
mode = 'oof', random_state = 0, verbose = 0, needs_proba = True, save_dir = '.')
mode = 'oof', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand All @@ -281,12 +299,12 @@ def test_pred_mode_proba(self):
models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, stratified = True,
mode = 'pred', random_state = 0, verbose = 0, needs_proba = True, save_dir = '.')
mode = 'pred', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -322,13 +340,13 @@ def test_oof_pred_bag_mode_proba(self):

models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True, needs_proba = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -371,13 +389,13 @@ def test_pred_bag_mode_proba(self):

models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'pred_bag', random_state = 0, verbose = 0, stratified = True, needs_proba = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -415,13 +433,13 @@ def test_oof_pred_bag_mode_shuffle(self):

models = [LogisticRegression()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = True, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = True, save_dir=temp_dir,
mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -450,13 +468,13 @@ def test_oof_mode_metric(self):

models = [LogisticRegression()]
S_train, S_test = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, save_dir = '.',
regression = False, n_folds = n_folds, save_dir=temp_dir,
mode = 'oof', random_state = 0, verbose = 0, stratified = True)

# Load mean score and std from file
# Normally if cleaning is performed there is only one .log.txt file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.log.txt'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.log.txt')))[-1] # take the latest file
with open(file_name) as f:
for line in f:
if 'MEAN' in line:
Expand Down Expand Up @@ -487,14 +505,14 @@ def test_oof_mode_metric_proba(self):

models = [LogisticRegression()]
S_train, S_test = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, save_dir = '.',
regression = False, n_folds = n_folds, save_dir=temp_dir,
mode = 'oof', random_state = 0, verbose = 0, stratified = True,
needs_proba = True)

# Load mean score and std from file
# Normally if cleaning is performed there is only one .log.txt file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.log.txt'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.log.txt')))[-1] # take the latest file
with open(file_name) as f:
for line in f:
if 'MEAN' in line:
Expand Down Expand Up @@ -533,13 +551,13 @@ def test_oof_pred_mode_2_models(self):
models = [LogisticRegression(),
GaussianNB()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'oof_pred', random_state = 0, verbose = 0, stratified = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -596,13 +614,13 @@ def test_oof_pred_bag_mode_2_models(self):
models = [LogisticRegression(),
GaussianNB()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -637,12 +655,12 @@ def test_oof_pred_mode_proba_2_models(self):
GaussianNB()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, stratified = True,
mode = 'oof_pred', random_state = 0, verbose = 0, needs_proba = True, save_dir = '.')
mode = 'oof_pred', random_state = 0, verbose = 0, needs_proba = True, save_dir=temp_dir)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down Expand Up @@ -708,13 +726,13 @@ def test_oof_pred_bag_mode_proba_2_models(self):
models = [LogisticRegression(),
GaussianNB()]
S_train_2, S_test_2 = stacking(models, X_train, y_train, X_test,
regression = False, n_folds = n_folds, shuffle = False, save_dir = '.',
regression = False, n_folds = n_folds, shuffle = False, save_dir=temp_dir,
mode = 'oof_pred_bag', random_state = 0, verbose = 0, stratified = True, needs_proba = True)

# Load OOF from file
# Normally if cleaning is performed there is only one .npy file at given moment
# But if we have no cleaning there may be more then one file so we take the latest
file_name = sorted(glob.glob('*.npy'))[-1] # take the latest file
file_name = sorted(glob.glob(os.path.join(temp_dir, '*.npy')))[-1] # take the latest file
S = np.load(file_name)
S_train_3 = S[0]
S_test_3 = S[1]
Expand Down
Loading

0 comments on commit 86a4fd7

Please sign in to comment.