diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..c309866 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,32 @@ +name: build + +on: [push, pull_request] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + - name: Install dependencies + run: | + pip install poetry + poetry install + - name: Check style + run: poetry run flake8 --exclude=docs* + - name: Test with pytest + run: poetry run pytest --cov=./ --cov-report=xml + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: ./coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..ea411d1 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,71 @@ +name: Release + +on: + push: + branches: [main] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: 3.7 + - name: Install dependencies + run: | + pip install poetry + poetry install + - name: Check style + run: poetry run flake8 --exclude=docs* + - name: Test with pytest + run: | + poetry run pytest --cov=./ --cov-report=xml + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + file: ./coverage.xml + flags: unittests + name: codecov-umbrella + yml: ./codecov.yml + fail_ci_if_error: true + - name: checkout + uses: actions/checkout@master + with: + ref: main + - name: Bump version and tagging and publish + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git pull origin main + poetry run semantic-release version + poetry version $(grep "version" */__init__.py | cut -d "'" -f 2 | cut -d '"' -f 2) + git commit -m "Bump versions" -a + - name: Push package version changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + - name: Get release tag version from package version + run: | + echo ::set-output name=release_tag::$(grep "version" */__init__.py | cut -d "'" -f 2 | cut -d '"' -f 2) + id: release + - name: Create Release with new version + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ steps.release.outputs.release_tag }} + release_name: ${{ steps.release.outputs.release_tag }} + draft: false + prerelease: false + - name: Build package and publish to test PyPI + env: + TEST_PYPI_USERNAME: ${{ secrets.TEST_PYPI_USERNAME }} + TEST_PYPI_PASSWORD: ${{ secrets.TEST_PYPI_PASSWORD }} + run: | + poetry config repositories.test-pypi https://test.pypi.org/legacy/ + poetry build + poetry publish -r test-pypi -u $TEST_PYPI_USERNAME -p $TEST_PYPI_PASSWORD diff --git a/README.md b/README.md index 462b7bc..2dd9821 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ DRY out your regression analysis! -![](https://github.com/ansarusc/aridanalysis/workflows/build/badge.svg) [![codecov](https://codecov.io/gh/ansarusc/aridanalysis/branch/main/graph/badge.svg)](https://codecov.io/gh/ansarusc/aridanalysis) ![Release](https://github.com/ansarusc/aridanalysis/workflows/Release/badge.svg) [![Documentation Status](https://readthedocs.org/projects/aridanalysis/badge/?version=latest)](https://aridanalysis.readthedocs.io/en/latest/?badge=latest) +![](https://github.com/ansarusc/aridanalysis/workflows/build/badge.svg) [![codecov](https://codecov.io/gh/UBC-MDS/aridanalysis_py/branch/main/graph/badge.svg?token=JGT4Z519QD)](https://codecov.io/gh/UBC-MDS/aridanalysis_py)![Release](https://github.com/ansarusc/aridanalysis/workflows/Release/badge.svg) [![Documentation Status](https://readthedocs.org/projects/aridanalysis/badge/?version=latest)](https://aridanalysis.readthedocs.io/en/latest/?badge=latest) ## Python Package for Inferential Regression and EDA Analysis! diff --git a/aridanalysis/aridanalysis.py b/aridanalysis/aridanalysis.py index dba5914..020e4ca 100644 --- a/aridanalysis/aridanalysis.py +++ b/aridanalysis/aridanalysis.py @@ -2,27 +2,35 @@ import pandas.api.types as ptypes import numpy as np import altair as alt -from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet, LogisticRegression +from sklearn.linear_model import ( + LinearRegression, + Lasso, + Ridge, + ElasticNet, + LogisticRegression, +) import statsmodels.api as sm import statsmodels.formula.api as smf + from sklearn.linear_model import PoissonRegressor from sklearn.compose import make_column_transformer from sklearn.preprocessing import OneHotEncoder from sklearn.pipeline import make_pipeline -import sys, os +import sys +import os myPath = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, myPath + '/../aridanalysis') -import error_strings as errors -import warnings +import error_strings as errors # noqa E402 +import warnings # noqa E402 + def arid_eda(df, response, response_type, features=[]): """ - - Function to create summary statistics and basic EDA plots. Given a data frame, - this function outputs general exploratory analysis plots as well as basic - statistics summarizing trends in the features of the input data. - + Function to create summary statistics and basic EDA plots. Given a data + frame, this function outputs general exploratory analysis plots as well + as basic statistics summarizing trends in the features of the input data. + Parameters ---------- df : pandas.DataFrame @@ -33,78 +41,87 @@ def arid_eda(df, response, response_type, features=[]): Input either 'categorical' or 'continous to indicate response type features : list A list of the feature names to perform EDA on - + Returns ------- altair.Chart Plots relevant to the exploratory data analysis - + pandas.DataFrame - A dataframe containing summary statistics relevant to the + A dataframe containing summary statistics relevant to the selected feature and response variable. - + Examples -------- >>> from aridanalysis import aridanalysis - >>> dataframe, plots = arid_eda(house_prices, 'price', 'continuous, ['rooms', 'age','garage']) - >>> dataframe, plots = arid_eda(iris_data, 'species', categorical, ['petalWidth', 'sepalWidth','petalLength']) - + >>> dataframe, plots = arid_eda(house_prices, + 'price', + 'continuous, + ['rooms', 'age','garage']) + + >>> dataframe, plots = arid_eda(iris_data, + 'species', + categorical, + ['petalWidth', 'sepalWidth','petalLength']) + """ - - - ############################ Exception Handling ##################################### + ######################################################################### if type(df) != pd.core.frame.DataFrame: raise KeyError('Input data must be a Pandas DataFrame') if response not in df.columns: raise KeyError('Response variable is not contained within dataframe') - + for feat in features: - if feat not in df.columns: + if feat not in df.columns: raise KeyError(f'{feat} is not contained within dataframe') - + if response in features: raise KeyError('Response variable must be distinct from features') - + if df[response].dtype == np.dtype('O') and response_type == 'continuous': raise KeyError('Current response variable is not continuous') - + if df[response].dtype != np.dtype('O') and response_type == 'categorical': raise KeyError('Current response variable is not categorical') - + if response_type not in ['categorical', 'continuous']: raise KeyError('Response must be categorical or continuous') - - ##################################################################################### - + + ########################################################################### + chartlist = [] corr_plot_width = 70*len(set(features)) corr_plot_height = 70*len(set(features)) - filter_df = df.loc[:,features] - - - if response_type == 'categorical': - for feat in features: # This function creates density plots for each feature - chart = alt.Chart(df, title=(feat + ' Distribution')).transform_density( # only works currently if response is categorical - feat, - as_=[feat, 'density'], - groupby=[response] - ).mark_area(interpolate='monotone', opacity=0.7).encode( - y = 'density:Q', - x = alt.X(feat), - color=response) + filter_df = df.loc[:, features] + + if response_type == "categorical": + for feat in features: # Creates density plots for each feature + chart = ( + alt.Chart(df, title=(feat + " Distribution")) + .transform_density( + feat, as_=[feat, "density"], groupby=[response] + ) + .mark_area(interpolate="monotone", opacity=0.7) + .encode(y="density:Q", x=alt.X(feat), color=response) + ) chartlist.append(chart) - + elif response_type == 'continuous': - - for feat in features: # This function creates histograms for each feature - chart = alt.Chart(df, title=(feat + ' Distribution')).mark_bar().encode( # only works currently if response is continuous - y = 'count()', - x = alt.X(feat, bin=alt.Bin(), title = feat) - ).properties(width=200, height=200) - chartlist.append(chart) - -# for i in range(len(chartlist)): + + for feat in features: # Creates histograms for each feature + for feat in features: # Creates histograms for each feature + chart = ( + alt.Chart(df, title=(feat + " Distribution")) + .mark_bar() + .encode( # only works currently if response is continuous + y="count()", x=alt.X(feat, bin=alt.Bin(), title=feat) + ) + .properties(width=200, height=200) + ) + chartlist.append(chart) + +# for i in range(len(chartlist)): # if i == 0: # dist_output = chartlist[i] # elif i == 1: @@ -112,10 +129,9 @@ def arid_eda(df, response, response_type, features=[]): # elif i % 2 == 1: # dist_output = alt.vconcat(dist_output, chartlist[i]) - row_list = [] # This loop will output the feature distributions as a square + row_list = [] # output feature distributions as a square first_row = True - row_len = (len(set(features))**(1/2))//1 - for i in range(len(chartlist)): + for i in range(len(chartlist)): print(i) if i == 0: current_row = chartlist[i] @@ -124,24 +140,24 @@ def arid_eda(df, response, response_type, features=[]): elif i % 2 == 0: row_list.append(current_row) current_row = chartlist[i] - - row_list.append(current_row) - + + row_list.append(current_row) + for row in row_list: if first_row: dist_output = row first_row = False - else: + else: dist_output = alt.vconcat(dist_output, row) corr_df = filter_df.corr('spearman').stack().reset_index(name='corr') corr_df.loc[corr_df['corr'] == 1, 'corr'] = 0 corr_df['corr_label'] = corr_df['corr'].map('{:.2f}'.format) corr_df['abs'] = corr_df['corr'].abs() - + base = alt.Chart(corr_df, title='Feature Correlation').encode( x=alt.X('level_0', axis=alt.Axis(title='')), - y=alt.Y('level_1', axis=alt.Axis(title='')) + y=alt.Y('level_1', axis=alt.Axis(title='')) ).properties(width=corr_plot_width, height=corr_plot_height) text = base.mark_text().encode( @@ -150,20 +166,21 @@ def arid_eda(df, response, response_type, features=[]): ) cor_sq = base.mark_rect().encode( - color=alt.Color('corr', scale=alt.Scale(scheme='blueorange')) + color=alt.Color('corr', scale=alt.Scale(scheme='blueorange')) ) corr_plot = cor_sq + text return_df = pd.DataFrame(filter_df.describe()) return return_df, dist_output | corr_plot - + def arid_linreg(df, response, features=[], regularization=None, alpha=1): """ Function that performs a linear regression on continuous response data, - using both an sklearn and statsmodel model analogs. These models are optimized - for prediction and inference, respectively. + using both an sklearn and statsmodel model analogs. These models are + optimized for prediction and inference, respectively. + Parameters ---------- data_frame : pandas.Dataframe @@ -177,7 +194,7 @@ def arid_linreg(df, response, features=[], regularization=None, alpha=1): * L1 * L2 * L1L2 alpha : float The regularization weight strength - + Returns ------- sklearn.linear_model @@ -191,70 +208,78 @@ def arid_linreg(df, response, features=[], regularization=None, alpha=1): """ # Validate input arguments assert isinstance(df, pd.DataFrame), errors.INVALID_DATAFRAME - assert not df.empty , errors.EMPTY_DATAFRAME + assert not df.empty, errors.EMPTY_DATAFRAME assert response in df.columns.tolist(), errors.RESPONSE_NOT_FOUND - assert ptypes.is_numeric_dtype(df[response].dtype), errors.INVALID_RESPONSE_DATATYPE - assert regularization in [None, "L1", "L2", "L1L2"], errors.INVALID_REGULARIZATION_INPUT + assert ptypes.is_numeric_dtype(df[response].dtype), \ + errors.INVALID_RESPONSE_DATATYPE + assert regularization in [None, "L1", "L2", "L1L2"], \ + errors.INVALID_REGULARIZATION_INPUT assert ptypes.is_numeric_dtype(type(alpha)), errors.INVALID_ALPHA_INPUT - + # Isolate numeric features from dataframe feature_df = df.drop(response, axis=1) feature_list = feature_df.select_dtypes(['number']).columns - + # Report features that have been discarded to the user if len(feature_df.columns) != len(feature_list): - non_numeric_features = [feature for feature in feature_df.columns if not (feature in feature_list)] - warnings.warn(f"These features are non-numeric and will be discarded: {non_numeric_features}") - + non_numeric_features = [ + feature for feature in feature_df.columns if not (feature in feature_list) # noqaE501 + ] + warnings.warn( + f"These features are non-numeric and will be discarded: {non_numeric_features}" # noqaE501 + ) + # Create a subset of user selected features if supplied if len(features) > 0: feature_list = set(features).intersection(feature_list) # Report any user selected features that were not found if len(feature_list) != len(features): - missing_features = [feature for feature in features if not (feature in feature_list)] - warnings.warn(f"These user-selected features are not present in data: {missing_features}") + missing_features = [ + feature for feature in features if not (feature in feature_list) # noqaE501 + ] + warnings.warn( + f"These user-selected features are not present in data: {missing_features}" # noqaE501 + ) # Assert that there are still features available to perform regression - assert len(feature_list) > 0, errors.NO_VALID_FEATURES + assert len(feature_list) > 0, errors.NO_VALID_FEATURES print(f"Feature list: {feature_list}") - + # Formally define our features and response X = df[feature_list] y = df[response] - + # Create and fit analagous models in sklearn and statsmodels if regularization == "L1": - skl_model = Lasso(alpha, fit_intercept = False).fit(X, y) - sm_model = sm.OLS(y, X).fit_regularized(L1_wt = 1, - alpha = alpha) + skl_model = Lasso(alpha, fit_intercept=False).fit(X, y) + sm_model = sm.OLS(y, X).fit_regularized(L1_wt=1, alpha=alpha) elif regularization == "L2": - skl_model = Ridge(alpha, fit_intercept = False).fit(X, y) + skl_model = Ridge(alpha, fit_intercept=False).fit(X, y) # No idea why statsmodels L2 alpha requires the division by 3, but it # was tested empirically and coefficients/predictions match... - sm_model = sm.OLS(y, X).fit_regularized(L1_wt = 0, - alpha = alpha/3) + sm_model = sm.OLS(y, X).fit_regularized(L1_wt=0, alpha=alpha/3) elif regularization == "L1L2": - skl_model = ElasticNet(alpha, fit_intercept = False).fit(X, y) - sm_model = sm.OLS(y, X).fit_regularized(L1_wt = 0.5, - alpha = alpha) + skl_model = ElasticNet(alpha, fit_intercept=False).fit(X, y) + sm_model = sm.OLS(y, X).fit_regularized(L1_wt=0.5, + alpha=alpha) else: - skl_model = LinearRegression(fit_intercept = False).fit(X, y) + skl_model = LinearRegression(fit_intercept=False).fit(X, y) sm_model = sm.OLS(y, X).fit() - + # Display model coefficients to user - print(pd.DataFrame({'statsmodel coefficients' : sm_model.params, - 'sklearn coefficients' : skl_model.coef_}, index = feature_list)) - - + print(pd.DataFrame({'statsmodel coefficients': sm_model.params, + 'sklearn coefficients': skl_model.coef_}, index=feature_list)) # noqa E501 + return skl_model, sm_model - + + def arid_logreg(df, response, features=[], type="binomial"): - """Function to fit a logistic regression for a binomial or multinomial classification. - - Given a data frame, a response variable and explanatory variables (features), - this function fits a logistic regression and outputs the statistical summary - including the interpretation. - + """Function to fit a binomial or multinomial logistic regression. + + Given a data frame, a response variable and explanatory variables + (features), this function fits a logistic regression and outputs + the statistical summary including the interpretation. + Parameters ---------- df : pandas.DataFrame @@ -265,87 +290,95 @@ def arid_logreg(df, response, features=[], type="binomial"): A list of the column names as explanatory variables type : str Classification type. Either "binomial" or "multinomial" - + Returns ------- sklearn.linear_model - A fitted logistic regression sklearn model configured with the chosen input parameters + A fitted logistic regression sklearn model configured with + the chosen input parameters statsmodels.discrete.discrete_model A fitted Logit statsmodel configured with the chosen input parameters - + Examples -------- - >>> aridanalysis.arid_logreg(df, 'Target', ['feat1', 'feat2', 'feat3'], type="binomial") + >>> aridanalysis.arid_logreg(df, + 'Target', + ['feat1', 'feat2', 'feat3'], + type="binomial") """ # Validate input arguments assert isinstance(df, pd.DataFrame), errors.INVALID_DATAFRAME - assert not df.empty , errors.EMPTY_DATAFRAME + assert not df.empty, errors.EMPTY_DATAFRAME assert response in df.columns.tolist(), errors.RESPONSE_NOT_FOUND assert type in ["binomial", "multinomial"], errors.INVALID_TYPE_INPUT - + # Get features list from df feature_df = df.drop(response, axis=1) feature_list = feature_df.select_dtypes(['number']).columns - + # Report features that have been discarded to the user if len(feature_df.columns) != len(feature_list): - non_numeric_features = [feature for feature in feature_df.columns if not (feature in feature_list)] - warnings.warn(f"These features are non-numeric and will be discarded: {non_numeric_features}") - + non_numeric_features = [feature for feature in feature_df.columns if not (feature in feature_list)] # noqaE501 + warnings.warn(f"These features are non-numeric and will be discarded: {non_numeric_features}") # noqaE501 + # Create a subset of user selected features if supplied if len(features) > 0: feature_list = set(features).intersection(feature_list) # Report any user selected features that were not found if len(feature_list) != len(features): - missing_features = [feature for feature in features if not (feature in feature_list)] - warnings.warn(f"These user-selected features are not present in data: {missing_features}") + missing_features = [feature for feature in features if not (feature in feature_list)] # noqaE501 + warnings.warn(f"These user-selected features are not present in data: {missing_features}") # noqaE501 # Assert that there are still features available to perform classification - assert len(feature_list) > 0, errors.NO_VALID_FEATURES - + assert len(feature_list) > 0, errors.NO_VALID_FEATURES + # Formally define our features and response X = df[feature_list] y = df[response] - + # Create and fit analagous models in sklearn and statsmodels if type == "binomial": - skl_model = LogisticRegression(penalty='none', fit_intercept = False, multi_class='ovr').fit(X, y) + skl_model = LogisticRegression(penalty='none', fit_intercept = False, multi_class='ovr').fit(X, y) # noqaE501 sm_model = sm.Logit(y, X).fit(method="bfgs") - + else: - skl_model = LogisticRegression(penalty='none', fit_intercept = False, multi_class='multinomial').fit(X, y) + skl_model = LogisticRegression(penalty='none', fit_intercept = False, multi_class='multinomial').fit(X, y) # noqaE501 sm_model = sm.MNLogit(y, X).fit() - + # Display model coefficients to user print(pd.DataFrame(skl_model.coef_, columns=feature_list)) print(sm_model.summary()) - + return skl_model, sm_model -def arid_countreg(data_frame, response, con_features=[], cat_features=[], model="additive", alpha=1): + +def arid_countreg(data_frame, response, con_features=[], cat_features=[], model="additive", alpha=1): # noqaE501 """ - Function that performs a count regression on a numerical discete response data, - using both an sklearn and statsmodel model analogs (prediction and inference). - The function will return both models,each one with their respective insights. + Function that performs a count regression on a numerical discete response + data, using both an sklearn and statsmodel model analogs (prediction and + inference). The function will return both models,each one with their + respective insights. Parameters ---------- data_frame : pandas.Dataframe The input dataframe to analyze. response : str - A column name of the response variable. Because the function manipulates count data, it must be of type int. + A column name of the response variable. Because the function manipulates + count data, it must be of type int. con_features : list - A list of the continuous explanatory variables to be used in the analysis. Default value is None, meaning - to use all the numerical columns in the data frame. + A list of the continuous explanatory variables to be used in the + analysis. Default value is None, meaning to use all the numerical + columns in the data frame. cat_features : list - A list of the categorical explanatory variables to be used in the analysis. Default value is None, meaning - to use all the categorical columns in the data frame. + A list of the categorical explanatory variables to be used in the + analysis.Default value is None, meaning to use all the categorical + columns in the data frame. model: str Model type. Either "additive" or "interactive" alpha: float Constant the controls regularization strength in predictive model - - + Returns ------- sklearn.linear_model @@ -353,84 +386,122 @@ def arid_countreg(data_frame, response, con_features=[], cat_features=[], model= statsmodels.regression.linear_model A fitted statsmodel configured with the chosen input parameters - Examples -------- >>> from aridanalysis import aridanalysis - >>> aridanalysis.arid_countreg(df, income, features = [feat1, feat5] ,"additive") + >>> aridanalysis.arid_countreg(df, + income, + features=[feat1, feat5], + "additive") """ assert isinstance(con_features, list), "ERROR: INVALID LIST INTPUT PASSED" assert isinstance(cat_features, list), "ERROR: INVALID LIST INTPUT PASSED" - - - #Deal with the features column + + # Deal with the features column if len(con_features) == 0: - con_features = data_frame.drop(columns=[response]).select_dtypes('number').columns.tolist() + con_features = ( + data_frame.drop(columns=[response]).select_dtypes("number") + .columns.tolist() + ) if len(cat_features) == 0: - cat_features = data_frame.drop(columns=[response]).select_dtypes(['category', 'object']).columns.tolist() - - + cat_features = ( + data_frame.drop(columns=[response]) + .select_dtypes(["category", "object"]) + .columns.tolist() + ) + assert isinstance(data_frame, pd.DataFrame), errors.INVALID_DATAFRAME - assert not data_frame.empty , errors.EMPTY_DATAFRAME + assert not data_frame.empty, errors.EMPTY_DATAFRAME assert response in data_frame.columns.tolist(), errors.RESPONSE_NOT_FOUND - assert all(item in data_frame.columns.tolist() for item in con_features), "ERROR: CONTINUOUS VARIABLE(S) NOT IN DATAFRAME" - assert all(item in data_frame.columns.tolist() for item in cat_features), "ERROR: CATEGORICAL VARIABLE(S) NOT IN DATAFRAME" - assert ptypes.is_integer_dtype(data_frame[response].dtype), "ERROR: INVALID RESPONSE DATATYPE FOR COUNT REGRESSION: MUST BE TYPE INT" + assert all(item in data_frame.columns.tolist() for item in con_features), \ + "ERROR: CONTINUOUS VARIABLE(S) NOT IN DATAFRAME" + assert all(item in data_frame.columns.tolist() for item in cat_features), \ + "ERROR: CATEGORICAL VARIABLE(S) NOT IN DATAFRAME" + assert ptypes.is_integer_dtype(data_frame[response].dtype), \ + "ERROR: INVALID RESPONSE DATATYPE FOR COUNT REGRESSION: MUST BE TYPE INT" # noqaE501 assert model in ["additive", "interactive"], "ERROR: INVALID MODEL PASSED" assert ptypes.is_numeric_dtype(type(alpha)), errors.INVALID_ALPHA_INPUT - - - - #Scikit Learn Model + + # Scikit Learn Model if len(cat_features) != 0: X_sk = data_frame[con_features + cat_features] y_sk = data_frame[response] - preprocessor = make_column_transformer((OneHotEncoder(handle_unknown="ignore"), cat_features)) - pipeline = make_pipeline(preprocessor, PoissonRegressor(alpha=alpha, fit_intercept=True,)) - sk_model = pipeline.fit(X_sk,y_sk) + preprocessor = make_column_transformer( + (OneHotEncoder(handle_unknown="ignore"), cat_features) + ) + pipeline = make_pipeline( + preprocessor, + PoissonRegressor( + alpha=alpha, + fit_intercept=True, + ), + ) + sk_model = pipeline.fit(X_sk, y_sk) else: X_sk = data_frame[con_features] y_sk = data_frame[response] - pipeline = make_pipeline(PoissonRegressor(alpha=0, fit_intercept=True, max_iter=100)) - sk_model = pipeline.fit(X_sk,y_sk) - #Aditive inferential model + pipeline = make_pipeline( + PoissonRegressor(alpha=0, fit_intercept=True, max_iter=100) + ) + sk_model = pipeline.fit(X_sk, y_sk) + + # Aditive inferential model if model == "additive": - cat_features =["C(" + i + ")" for i in cat_features] - con_list = "".join([f"{i}" if i is con_features[0] else f" + {i}"for i in con_features]) - cat_list = "".join([f"{i}" if i is cat_features[0] else f" + {i}"for i in cat_features]) + cat_features = ["C(" + i + ")" for i in cat_features] + con_list = "".join( + [f"{i}" if i is con_features[0] else f" + {i}" for i in con_features] # noqaE501 + ) + cat_list = "".join( + [f"{i}" if i is cat_features[0] else f" + {i}" for i in cat_features] # noqaE501 + ) if len(cat_list) > 0: - formula = f"{response} ~ {con_list} + {cat_list}" + formula = f"{response} ~ {con_list} + {cat_list}" else: - formula = f"{response} ~ {con_list}" - glm_count= smf.glm(formula=formula, data=data_frame, family=sm.families.Poisson()).fit() - print(glm_count.summary()) - else : - cat_features =["C(" + i + ")" for i in cat_features] - con_list = "".join([f"{i}" if i is con_features[0] else f" + {i}"for i in con_features]) - cat_list = "".join([f"{i}" if i is cat_features[0] else f" + {i}"for i in cat_features]) - interact_list = "".join([f"{i} * {j}" if j is cat_features[0] and i is con_features[0] - else f" + {i} * {j}" for i in con_features for j in cat_features]) + formula = f"{response} ~ {con_list}" + glm_count = smf.glm( + formula=formula, data=data_frame, family=sm.families.Poisson() + ).fit() + print(glm_count.summary()) + else: + cat_features = ["C(" + i + ")" for i in cat_features] + con_list = "".join( + [f"{i}" if i is con_features[0] else f" + {i}" for i in con_features] # noqaE501 + ) + cat_list = "".join( + [f"{i}" if i is cat_features[0] else f" + {i}" for i in cat_features] # noqaE501 + ) + interact_list = "".join( + [ + f"{i} * {j}" + if j is cat_features[0] and i is con_features[0] + else f" + {i} * {j}" + for i in con_features + for j in cat_features + ] + ) equal = set() cont_interaction = "" for i in con_features[0:]: for j in con_features[1:]: if i is con_features[0] and j is con_features[1]: - cont_interaction = f"{i} * {j}" - equal.update([(i,j)]) - if len(equal)>0: + cont_interaction = f"{i} * {j}" + equal.update([(i, j)]) + if len(equal) > 0: continue if i != j and (j, i) not in equal: - equal.update([(i,j)]) + equal.update([(i, j)]) cont_interaction += f" + {i} * {j}" if len(cat_features) > 0 and len(cont_interaction) > 0: - formula = f"{response} ~ {con_list} + {cat_list} + {interact_list} + {cont_interaction}" + formula = f"{response} ~ {con_list} + {cat_list} + {interact_list} + {cont_interaction}" # noqaE501 elif len(cat_features) == 0 and len(cont_interaction) > 0: formula = f"{response} ~ {con_list} + {cont_interaction}" - elif len(cat_features) > 0 and len(cont_interaction) == 0: + elif len(cat_features) > 0 and len(cont_interaction) == 0: formula = f"{response} ~ {con_list} + {cat_list} + {interact_list}" else: formula = f"{response} ~ {con_list}" - glm_count= smf.glm(formula=formula, data=data_frame, family=sm.families.Poisson()).fit() - print(glm_count.summary()) - + glm_count = smf.glm(formula=formula, + data=data_frame, + family=sm.families.Poisson()).fit() + print(glm_count.summary()) + return (sk_model, glm_count) diff --git a/aridanalysis/error_strings.py b/aridanalysis/error_strings.py index 2010e34..bdcad06 100644 --- a/aridanalysis/error_strings.py +++ b/aridanalysis/error_strings.py @@ -1,3 +1,4 @@ +# flake8: noqa INVALID_DATAFRAME = "ERROR: INVALID DATAFRAME INPUT" EMPTY_DATAFRAME = "ERROR: EMPTY DATAFRAME INPUT" RESPONSE_NOT_FOUND = "ERROR: RESPONSE SELECTION NOT PRESENT IN DATAFRAME" @@ -6,4 +7,4 @@ INVALID_ALPHA_INPUT = "ERROR: INVALID ALPHA INPUT DATATYPE" NO_VALID_FEATURES = "ERROR: NO VALID FEATURES AVAILABLE" INVALID_INPUT_LIST = "ERROR: INPUT FEATURE ARGUMENT NOT A LIST" -INVALID_TYPE_INPUT = "ERROR: INVALID MODEL TYPE SPECIFIED" \ No newline at end of file +INVALID_TYPE_INPUT = "ERROR: INVALID MODEL TYPE SPECIFIED" diff --git a/poetry.lock b/poetry.lock index c37c7d9..fd83199 100644 --- a/poetry.lock +++ b/poetry.lock @@ -58,6 +58,19 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [package.dependencies] pytz = ">=2015.7" +[[package]] +name = "bleach" +version = "3.3.0" +description = "An easy safelist-based HTML-sanitizing tool." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[package.dependencies] +packaging = "*" +six = ">=1.9.0" +webencodings = "*" + [[package]] name = "certifi" version = "2020.12.5" @@ -66,6 +79,17 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "cffi" +version = "1.14.5" +description = "Foreign Function Interface for Python calling C code." +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +pycparser = "*" + [[package]] name = "chardet" version = "4.0.0" @@ -74,6 +98,37 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "click" +version = "7.1.2" +description = "Composable command line interface toolkit" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "click-log" +version = "0.3.2" +description = "Logging integration for Click" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +click = "*" + +[[package]] +name = "codecov" +version = "2.1.11" +description = "Hosted coverage reports for GitHub, Bitbucket and Gitlab" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[package.dependencies] +coverage = "*" +requests = ">=2.7.9" + [[package]] name = "colorama" version = "0.4.4" @@ -93,6 +148,25 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" [package.extras] toml = ["toml"] +[[package]] +name = "cryptography" +version = "3.4.6" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +cffi = ">=1.12" + +[package.extras] +docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"] +docstest = ["doc8", "pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"] +pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"] +sdist = ["setuptools-rust (>=0.11.4)"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["pytest (>=6.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"] + [[package]] name = "cycler" version = "0.10.0" @@ -112,6 +186,17 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "dotty-dict" +version = "1.3.0" +description = "Dictionary wrapper for quick access to deeply nested keys." +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +setuptools_scm = "*" + [[package]] name = "entrypoints" version = "0.3" @@ -120,6 +205,42 @@ category = "main" optional = false python-versions = ">=2.7" +[[package]] +name = "flake8" +version = "3.8.4" +description = "the modular source code checker: pep8 pyflakes and co" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +mccabe = ">=0.6.0,<0.7.0" +pycodestyle = ">=2.6.0a1,<2.7.0" +pyflakes = ">=2.2.0,<2.3.0" + +[[package]] +name = "gitdb" +version = "4.0.5" +description = "Git Object Database" +category = "dev" +optional = false +python-versions = ">=3.4" + +[package.dependencies] +smmap = ">=3.0.1,<4" + +[[package]] +name = "gitpython" +version = "3.1.14" +description = "Python Git Library" +category = "dev" +optional = false +python-versions = ">=3.4" + +[package.dependencies] +gitdb = ">=4.0.1,<5" + [[package]] name = "idna" version = "2.10" @@ -160,6 +281,25 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "invoke" +version = "1.5.0" +description = "Pythonic task execution" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "jeepney" +version = "0.6.0" +description = "Low-level, pure Python DBus protocol wrapper." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.extras] +test = ["pytest", "pytest-trio", "pytest-asyncio", "testpath", "trio"] + [[package]] name = "jinja2" version = "2.11.3" @@ -200,6 +340,24 @@ six = ">=1.11.0" format = ["idna", "jsonpointer (>1.13)", "rfc3987", "strict-rfc3339", "webcolors"] format_nongpl = ["idna", "jsonpointer (>1.13)", "webcolors", "rfc3986-validator (>0.1.0)", "rfc3339-validator"] +[[package]] +name = "keyring" +version = "23.0.0" +description = "Store and access your passwords safely." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +importlib-metadata = ">=3.6" +jeepney = {version = ">=0.4.2", markers = "sys_platform == \"linux\""} +pywin32-ctypes = {version = "<0.1.0 || >0.1.0,<0.1.1 || >0.1.1", markers = "sys_platform == \"win32\""} +SecretStorage = {version = ">=3.2", markers = "sys_platform == \"linux\""} + +[package.extras] +docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] +testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pytest-cov", "pytest-enabler", "pytest-black (>=0.3.7)", "pytest-mypy"] + [[package]] name = "kiwisolver" version = "1.3.1" @@ -232,6 +390,14 @@ pillow = ">=6.2.0" pyparsing = ">=2.0.3,<2.0.4 || >2.0.4,<2.1.2 || >2.1.2,<2.1.6 || >2.1.6" python-dateutil = ">=2.1" +[[package]] +name = "mccabe" +version = "0.6.1" +description = "McCabe checker, plugin for flake8" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "numpy" version = "1.20.1" @@ -287,6 +453,17 @@ category = "main" optional = false python-versions = ">=3.6" +[[package]] +name = "pkginfo" +version = "1.7.0" +description = "Query metadatdata from sdists / bdists / installed packages." +category = "dev" +optional = false +python-versions = "*" + +[package.extras] +testing = ["nose", "coverage"] + [[package]] name = "pluggy" version = "0.13.1" @@ -320,6 +497,30 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +[[package]] +name = "pycodestyle" +version = "2.6.0" +description = "Python style guide checker" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pycparser" +version = "2.20" +description = "C parser in Python" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "pyflakes" +version = "2.2.0" +description = "passive checker of Python programs" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + [[package]] name = "pygments" version = "2.8.0" @@ -392,6 +593,43 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" [package.dependencies] six = ">=1.5" +[[package]] +name = "python-gitlab" +version = "1.15.0" +description = "Interact with GitLab API" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +requests = ">=2.4.2" +six = "*" + +[[package]] +name = "python-semantic-release" +version = "7.15.0" +description = "Automatic Semantic Versioning for Python projects" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +click = ">=7,<8" +click-log = ">=0.3,<1" +dotty-dict = ">=1.3.0,<2" +gitpython = ">=3.0.8,<4" +invoke = ">=1.4.1,<2" +python-gitlab = ">=1.10,<2" +requests = ">=2.25,<3" +semver = ">=2.10,<3" +tomlkit = ">=0.7.0,<1.0.0" +twine = ">=3,<4" + +[package.extras] +dev = ["mypy", "tox", "isort", "black"] +docs = ["Sphinx (==1.3.6)"] +test = ["coverage (>=5,<6)", "pytest (>=5,<6)", "pytest-xdist (>=1,<2)", "pytest-mock (>=2,<3)", "responses (==0.5.0)", "mock (==1.3.0)"] + [[package]] name = "pytz" version = "2021.1" @@ -400,6 +638,31 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "pywin32-ctypes" +version = "0.2.0" +description = "" +category = "dev" +optional = false +python-versions = "*" + +[[package]] +name = "readme-renderer" +version = "29.0" +description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +bleach = ">=2.1.0" +docutils = ">=0.13.1" +Pygments = ">=2.5.1" +six = "*" + +[package.extras] +md = ["cmarkgfm (>=0.5.0,<0.6.0)"] + [[package]] name = "requests" version = "2.25.1" @@ -418,6 +681,28 @@ urllib3 = ">=1.21.1,<1.27" security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"] socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] +[[package]] +name = "requests-toolbelt" +version = "0.9.1" +description = "A utility belt for advanced users of python-requests" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +requests = ">=2.0.1,<3.0.0" + +[[package]] +name = "rfc3986" +version = "1.4.0" +description = "Validating URI References per RFC 3986" +category = "dev" +optional = false +python-versions = "*" + +[package.extras] +idna2008 = ["idna"] + [[package]] name = "scikit-learn" version = "0.24.1" @@ -463,6 +748,37 @@ numpy = ">=1.15" pandas = ">=0.23" scipy = ">=1.0" +[[package]] +name = "secretstorage" +version = "3.3.1" +description = "Python bindings to FreeDesktop.org Secret Service API" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +cryptography = ">=2.0" +jeepney = ">=0.6" + +[[package]] +name = "semver" +version = "2.13.0" +description = "Python helper for Semantic Versioning (http://semver.org/)" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "setuptools-scm" +version = "5.0.2" +description = "the blessed package to manage your versions by scm tags" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" + +[package.extras] +toml = ["toml"] + [[package]] name = "six" version = "1.15.0" @@ -471,6 +787,14 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +[[package]] +name = "smmap" +version = "3.0.5" +description = "A pure Python implementation of a sliding window memory map manager" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + [[package]] name = "snowballstemmer" version = "2.1.0" @@ -628,6 +952,14 @@ category = "main" optional = false python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +[[package]] +name = "tomlkit" +version = "0.7.0" +description = "Style preserving TOML library" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + [[package]] name = "toolz" version = "0.11.1" @@ -636,6 +968,38 @@ category = "main" optional = false python-versions = ">=3.5" +[[package]] +name = "tqdm" +version = "4.59.0" +description = "Fast, Extensible Progress Meter" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +telegram = ["requests"] + +[[package]] +name = "twine" +version = "3.3.0" +description = "Collection of utilities for publishing packages on PyPI" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +colorama = ">=0.4.3" +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +keyring = ">=15.1" +pkginfo = ">=1.4.2" +readme-renderer = ">=21.0" +requests = ">=2.20" +requests-toolbelt = ">=0.8.0,<0.9.0 || >0.9.0" +rfc3986 = ">=1.4.0" +tqdm = ">=4.14" + [[package]] name = "typing-extensions" version = "3.7.4.3" @@ -668,6 +1032,14 @@ python-versions = ">=3.5" [package.dependencies] pandas = "*" +[[package]] +name = "webencodings" +version = "0.5.1" +description = "Character encoding aliases for legacy web content" +category = "dev" +optional = false +python-versions = "*" + [[package]] name = "zipp" version = "3.4.1" @@ -683,7 +1055,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "64b6dbd8cf18211f3c18d809c4076a3d4cb31974aa3e1d2c65a36bcbea617080" +content-hash = "bc129cd200cdd81a2c2382fd0d4d1895bdbbb3c4df5e1a6bf03b1af57c333bd5" [metadata.files] alabaster = [ @@ -706,14 +1078,70 @@ babel = [ {file = "Babel-2.9.0-py2.py3-none-any.whl", hash = "sha256:9d35c22fcc79893c3ecc85ac4a56cde1ecf3f19c540bba0922308a6c06ca6fa5"}, {file = "Babel-2.9.0.tar.gz", hash = "sha256:da031ab54472314f210b0adcff1588ee5d1d1d0ba4dbd07b94dba82bde791e05"}, ] +bleach = [ + {file = "bleach-3.3.0-py2.py3-none-any.whl", hash = "sha256:6123ddc1052673e52bab52cdc955bcb57a015264a1c57d37bea2f6b817af0125"}, + {file = "bleach-3.3.0.tar.gz", hash = "sha256:98b3170739e5e83dd9dc19633f074727ad848cbedb6026708c8ac2d3b697a433"}, +] certifi = [ {file = "certifi-2020.12.5-py2.py3-none-any.whl", hash = "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"}, {file = "certifi-2020.12.5.tar.gz", hash = "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c"}, ] +cffi = [ + {file = "cffi-1.14.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:bb89f306e5da99f4d922728ddcd6f7fcebb3241fc40edebcb7284d7514741991"}, + {file = "cffi-1.14.5-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:34eff4b97f3d982fb93e2831e6750127d1355a923ebaeeb565407b3d2f8d41a1"}, + {file = "cffi-1.14.5-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:99cd03ae7988a93dd00bcd9d0b75e1f6c426063d6f03d2f90b89e29b25b82dfa"}, + {file = "cffi-1.14.5-cp27-cp27m-win32.whl", hash = "sha256:65fa59693c62cf06e45ddbb822165394a288edce9e276647f0046e1ec26920f3"}, + {file = "cffi-1.14.5-cp27-cp27m-win_amd64.whl", hash = "sha256:51182f8927c5af975fece87b1b369f722c570fe169f9880764b1ee3bca8347b5"}, + {file = "cffi-1.14.5-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:43e0b9d9e2c9e5d152946b9c5fe062c151614b262fda2e7b201204de0b99e482"}, + {file = "cffi-1.14.5-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:cbde590d4faaa07c72bf979734738f328d239913ba3e043b1e98fe9a39f8b2b6"}, + {file = "cffi-1.14.5-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:5de7970188bb46b7bf9858eb6890aad302577a5f6f75091fd7cdd3ef13ef3045"}, + {file = "cffi-1.14.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:a465da611f6fa124963b91bf432d960a555563efe4ed1cc403ba5077b15370aa"}, + {file = "cffi-1.14.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:d42b11d692e11b6634f7613ad8df5d6d5f8875f5d48939520d351007b3c13406"}, + {file = "cffi-1.14.5-cp35-cp35m-win32.whl", hash = "sha256:72d8d3ef52c208ee1c7b2e341f7d71c6fd3157138abf1a95166e6165dd5d4369"}, + {file = "cffi-1.14.5-cp35-cp35m-win_amd64.whl", hash = "sha256:29314480e958fd8aab22e4a58b355b629c59bf5f2ac2492b61e3dc06d8c7a315"}, + {file = "cffi-1.14.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3d3dd4c9e559eb172ecf00a2a7517e97d1e96de2a5e610bd9b68cea3925b4892"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132"}, + {file = "cffi-1.14.5-cp36-cp36m-win32.whl", hash = "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53"}, + {file = "cffi-1.14.5-cp36-cp36m-win_amd64.whl", hash = "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813"}, + {file = "cffi-1.14.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49"}, + {file = "cffi-1.14.5-cp37-cp37m-win32.whl", hash = "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62"}, + {file = "cffi-1.14.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4"}, + {file = "cffi-1.14.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827"}, + {file = "cffi-1.14.5-cp38-cp38-win32.whl", hash = "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e"}, + {file = "cffi-1.14.5-cp38-cp38-win_amd64.whl", hash = "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396"}, + {file = "cffi-1.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee"}, + {file = "cffi-1.14.5-cp39-cp39-win32.whl", hash = "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396"}, + {file = "cffi-1.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d"}, + {file = "cffi-1.14.5.tar.gz", hash = "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"}, +] chardet = [ {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, ] +click = [ + {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, + {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, +] +click-log = [ + {file = "click-log-0.3.2.tar.gz", hash = "sha256:16fd1ca3fc6b16c98cea63acf1ab474ea8e676849dc669d86afafb0ed7003124"}, + {file = "click_log-0.3.2-py2.py3-none-any.whl", hash = "sha256:eee14dc37cdf3072158570f00406572f9e03e414accdccfccd4c538df9ae322c"}, +] +codecov = [ + {file = "codecov-2.1.11-py2.py3-none-any.whl", hash = "sha256:ba8553a82942ce37d4da92b70ffd6d54cf635fc1793ab0a7dc3fecd6ebfb3df8"}, + {file = "codecov-2.1.11-py3.8.egg", hash = "sha256:e95901d4350e99fc39c8353efa450050d2446c55bac91d90fcfd2354e19a6aef"}, + {file = "codecov-2.1.11.tar.gz", hash = "sha256:6cde272454009d27355f9434f4e49f238c0273b216beda8472a65dc4957f473b"}, +] colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, @@ -772,6 +1200,20 @@ coverage = [ {file = "coverage-5.5-pp37-none-any.whl", hash = "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4"}, {file = "coverage-5.5.tar.gz", hash = "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c"}, ] +cryptography = [ + {file = "cryptography-3.4.6-cp36-abi3-macosx_10_10_x86_64.whl", hash = "sha256:57ad77d32917bc55299b16d3b996ffa42a1c73c6cfa829b14043c561288d2799"}, + {file = "cryptography-3.4.6-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:4169a27b818de4a1860720108b55a2801f32b6ae79e7f99c00d79f2a2822eeb7"}, + {file = "cryptography-3.4.6-cp36-abi3-manylinux2010_x86_64.whl", hash = "sha256:93cfe5b7ff006de13e1e89830810ecbd014791b042cbe5eec253be11ac2b28f3"}, + {file = "cryptography-3.4.6-cp36-abi3-manylinux2014_aarch64.whl", hash = "sha256:5ecf2bcb34d17415e89b546dbb44e73080f747e504273e4d4987630493cded1b"}, + {file = "cryptography-3.4.6-cp36-abi3-manylinux2014_x86_64.whl", hash = "sha256:fec7fb46b10da10d9e1d078d1ff8ed9e05ae14f431fdbd11145edd0550b9a964"}, + {file = "cryptography-3.4.6-cp36-abi3-win32.whl", hash = "sha256:df186fcbf86dc1ce56305becb8434e4b6b7504bc724b71ad7a3239e0c9d14ef2"}, + {file = "cryptography-3.4.6-cp36-abi3-win_amd64.whl", hash = "sha256:66b57a9ca4b3221d51b237094b0303843b914b7d5afd4349970bb26518e350b0"}, + {file = "cryptography-3.4.6-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:066bc53f052dfeda2f2d7c195cf16fb3e5ff13e1b6b7415b468514b40b381a5b"}, + {file = "cryptography-3.4.6-pp36-pypy36_pp73-manylinux2014_x86_64.whl", hash = "sha256:600cf9bfe75e96d965509a4c0b2b183f74a4fa6f5331dcb40fb7b77b7c2484df"}, + {file = "cryptography-3.4.6-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:0923ba600d00718d63a3976f23cab19aef10c1765038945628cd9be047ad0336"}, + {file = "cryptography-3.4.6-pp37-pypy37_pp73-manylinux2014_x86_64.whl", hash = "sha256:9e98b452132963678e3ac6c73f7010fe53adf72209a32854d55690acac3f6724"}, + {file = "cryptography-3.4.6.tar.gz", hash = "sha256:2d32223e5b0ee02943f32b19245b61a62db83a882f0e76cc564e1cec60d48f87"}, +] cycler = [ {file = "cycler-0.10.0-py2.py3-none-any.whl", hash = "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d"}, {file = "cycler-0.10.0.tar.gz", hash = "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8"}, @@ -780,10 +1222,25 @@ docutils = [ {file = "docutils-0.16-py2.py3-none-any.whl", hash = "sha256:0c5b78adfbf7762415433f5515cd5c9e762339e23369dbe8000d84a4bf4ab3af"}, {file = "docutils-0.16.tar.gz", hash = "sha256:c2de3a60e9e7d07be26b7f2b00ca0309c207e06c100f9cc2a94931fc75a478fc"}, ] +dotty-dict = [ + {file = "dotty_dict-1.3.0.tar.gz", hash = "sha256:eb0035a3629ecd84397a68f1f42f1e94abd1c34577a19cd3eacad331ee7cbaf0"}, +] entrypoints = [ {file = "entrypoints-0.3-py2.py3-none-any.whl", hash = "sha256:589f874b313739ad35be6e0cd7efde2a4e9b6fea91edcc34e58ecbb8dbe56d19"}, {file = "entrypoints-0.3.tar.gz", hash = "sha256:c70dd71abe5a8c85e55e12c19bd91ccfeec11a6e99044204511f9ed547d48451"}, ] +flake8 = [ + {file = "flake8-3.8.4-py2.py3-none-any.whl", hash = "sha256:749dbbd6bfd0cf1318af27bf97a14e28e5ff548ef8e5b1566ccfb25a11e7c839"}, + {file = "flake8-3.8.4.tar.gz", hash = "sha256:aadae8761ec651813c24be05c6f7b4680857ef6afaae4651a4eccaef97ce6c3b"}, +] +gitdb = [ + {file = "gitdb-4.0.5-py3-none-any.whl", hash = "sha256:91f36bfb1ab7949b3b40e23736db18231bf7593edada2ba5c3a174a7b23657ac"}, + {file = "gitdb-4.0.5.tar.gz", hash = "sha256:c9e1f2d0db7ddb9a704c2a0217be31214e91a4fe1dea1efad19ae42ba0c285c9"}, +] +gitpython = [ + {file = "GitPython-3.1.14-py3-none-any.whl", hash = "sha256:3283ae2fba31c913d857e12e5ba5f9a7772bbc064ae2bb09efafa71b0dd4939b"}, + {file = "GitPython-3.1.14.tar.gz", hash = "sha256:be27633e7509e58391f10207cd32b2a6cf5b908f92d9cd30da2e514e1137af61"}, +] idna = [ {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, @@ -800,6 +1257,15 @@ iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, ] +invoke = [ + {file = "invoke-1.5.0-py2-none-any.whl", hash = "sha256:da7c2d0be71be83ffd6337e078ef9643f41240024d6b2659e7b46e0b251e339f"}, + {file = "invoke-1.5.0-py3-none-any.whl", hash = "sha256:7e44d98a7dc00c91c79bac9e3007276965d2c96884b3c22077a9f04042bd6d90"}, + {file = "invoke-1.5.0.tar.gz", hash = "sha256:f0c560075b5fb29ba14dad44a7185514e94970d1b9d57dcd3723bec5fed92650"}, +] +jeepney = [ + {file = "jeepney-0.6.0-py3-none-any.whl", hash = "sha256:aec56c0eb1691a841795111e184e13cad504f7703b9a64f63020816afa79a8ae"}, + {file = "jeepney-0.6.0.tar.gz", hash = "sha256:7d59b6622675ca9e993a6bd38de845051d315f8b0c72cca3aef733a20b648657"}, +] jinja2 = [ {file = "Jinja2-2.11.3-py2.py3-none-any.whl", hash = "sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419"}, {file = "Jinja2-2.11.3.tar.gz", hash = "sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6"}, @@ -812,6 +1278,10 @@ jsonschema = [ {file = "jsonschema-3.2.0-py2.py3-none-any.whl", hash = "sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163"}, {file = "jsonschema-3.2.0.tar.gz", hash = "sha256:c8a85b28d377cc7737e46e2d9f2b4f44ee3c0e1deac6bf46ddefc7187d30797a"}, ] +keyring = [ + {file = "keyring-23.0.0-py3-none-any.whl", hash = "sha256:29f407fd5509c014a6086f17338c70215c8d1ab42d5d49e0254273bc0a64bbfc"}, + {file = "keyring-23.0.0.tar.gz", hash = "sha256:237ff44888ba9b3918a7dcb55c8f1db909c95b6f071bfb46c6918f33f453a68a"}, +] kiwisolver = [ {file = "kiwisolver-1.3.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fd34fbbfbc40628200730bc1febe30631347103fc8d3d4fa012c21ab9c11eca9"}, {file = "kiwisolver-1.3.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:d3155d828dec1d43283bd24d3d3e0d9c7c350cdfcc0bd06c0ad1209c1bbc36d0"}, @@ -927,6 +1397,10 @@ matplotlib = [ {file = "matplotlib-3.3.4-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:cf3a7e54eff792f0815dbbe9b85df2f13d739289c93d346925554f71d484be78"}, {file = "matplotlib-3.3.4.tar.gz", hash = "sha256:3e477db76c22929e4c6876c44f88d790aacdf3c3f8f3a90cb1975c0bf37825b0"}, ] +mccabe = [ + {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, + {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, +] numpy = [ {file = "numpy-1.20.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ae61f02b84a0211abb56462a3b6cd1e7ec39d466d3160eb4e1da8bf6717cdbeb"}, {file = "numpy-1.20.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:65410c7f4398a0047eea5cca9b74009ea61178efd78d1be9847fac1d6716ec1e"}, @@ -1015,6 +1489,10 @@ pillow = [ {file = "Pillow-8.1.0-pp37-pypy37_pp73-win32.whl", hash = "sha256:b6f00ad5ebe846cc91763b1d0c6d30a8042e02b2316e27b05de04fa6ec831ec5"}, {file = "Pillow-8.1.0.tar.gz", hash = "sha256:887668e792b7edbfb1d3c9d8b5d8c859269a0f0eba4dda562adb95500f60dbba"}, ] +pkginfo = [ + {file = "pkginfo-1.7.0-py2.py3-none-any.whl", hash = "sha256:9fdbea6495622e022cc72c2e5e1b735218e4ffb2a2a69cde2694a6c1f16afb75"}, + {file = "pkginfo-1.7.0.tar.gz", hash = "sha256:029a70cb45c6171c329dfc890cde0879f8c52d6f3922794796e06f577bb03db4"}, +] pluggy = [ {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, @@ -1027,6 +1505,18 @@ py = [ {file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"}, {file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"}, ] +pycodestyle = [ + {file = "pycodestyle-2.6.0-py2.py3-none-any.whl", hash = "sha256:2295e7b2f6b5bd100585ebcb1f616591b652db8a741695b3d8f5d28bdc934367"}, + {file = "pycodestyle-2.6.0.tar.gz", hash = "sha256:c58a7d2815e0e8d7972bf1803331fb0152f867bd89adf8a01dfd55085434192e"}, +] +pycparser = [ + {file = "pycparser-2.20-py2.py3-none-any.whl", hash = "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"}, + {file = "pycparser-2.20.tar.gz", hash = "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0"}, +] +pyflakes = [ + {file = "pyflakes-2.2.0-py2.py3-none-any.whl", hash = "sha256:0d94e0e05a19e57a99444b6ddcf9a6eb2e5c68d3ca1e98e90707af8152c90a92"}, + {file = "pyflakes-2.2.0.tar.gz", hash = "sha256:35b2d75ee967ea93b55750aa9edbbf72813e06a66ba54438df2cfac9e3c27fc8"}, +] pygments = [ {file = "Pygments-2.8.0-py3-none-any.whl", hash = "sha256:b21b072d0ccdf29297a82a2363359d99623597b8a265b8081760e4d0f7153c88"}, {file = "Pygments-2.8.0.tar.gz", hash = "sha256:37a13ba168a02ac54cc5891a42b1caec333e59b66addb7fa633ea8a6d73445c0"}, @@ -1050,14 +1540,38 @@ python-dateutil = [ {file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"}, {file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"}, ] +python-gitlab = [ + {file = "python-gitlab-1.15.0.tar.gz", hash = "sha256:45125a0ed4d0027d4317bdbd71ca02fc52b0ac160b9d2c3c5be131b4d19f867e"}, + {file = "python_gitlab-1.15.0-py3-none-any.whl", hash = "sha256:51ad36873b5c20cd21d9373db0936fb99c521575668320025d0fb519398d2e17"}, +] +python-semantic-release = [ + {file = "python-semantic-release-7.15.0.tar.gz", hash = "sha256:a0ffd462bd42c5e0c41363b3c66c707ef4ad181bebc2f81145debbce9438e713"}, + {file = "python_semantic_release-7.15.0-py3-none-any.whl", hash = "sha256:04835c4e32db24387bcab3aac0a11892b274f00206e360abfa4d8152b082d0f1"}, +] pytz = [ {file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"}, {file = "pytz-2021.1.tar.gz", hash = "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da"}, ] +pywin32-ctypes = [ + {file = "pywin32-ctypes-0.2.0.tar.gz", hash = "sha256:24ffc3b341d457d48e8922352130cf2644024a4ff09762a2261fd34c36ee5942"}, + {file = "pywin32_ctypes-0.2.0-py2.py3-none-any.whl", hash = "sha256:9dc2d991b3479cc2df15930958b674a48a227d5361d413827a4cfd0b5876fc98"}, +] +readme-renderer = [ + {file = "readme_renderer-29.0-py2.py3-none-any.whl", hash = "sha256:63b4075c6698fcfa78e584930f07f39e05d46f3ec97f65006e430b595ca6348c"}, + {file = "readme_renderer-29.0.tar.gz", hash = "sha256:92fd5ac2bf8677f310f3303aa4bce5b9d5f9f2094ab98c29f13791d7b805a3db"}, +] requests = [ {file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"}, {file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"}, ] +requests-toolbelt = [ + {file = "requests-toolbelt-0.9.1.tar.gz", hash = "sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0"}, + {file = "requests_toolbelt-0.9.1-py2.py3-none-any.whl", hash = "sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f"}, +] +rfc3986 = [ + {file = "rfc3986-1.4.0-py2.py3-none-any.whl", hash = "sha256:af9147e9aceda37c91a05f4deb128d4b4b49d6b199775fd2d2927768abdc8f50"}, + {file = "rfc3986-1.4.0.tar.gz", hash = "sha256:112398da31a3344dc25dbf477d8df6cb34f9278a94fee2625d89e4514be8bb9d"}, +] scikit-learn = [ {file = "scikit-learn-0.24.1.tar.gz", hash = "sha256:a0334a1802e64d656022c3bfab56a73fbd6bf4b1298343f3688af2151810bbdf"}, {file = "scikit_learn-0.24.1-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:9bed8a1ef133c8e2f13966a542cb8125eac7f4b67dcd234197c827ba9c7dd3e0"}, @@ -1114,10 +1628,32 @@ seaborn = [ {file = "seaborn-0.11.1-py3-none-any.whl", hash = "sha256:4e1cce9489449a1c6ff3c567f2113cdb41122f727e27a984950d004a88ef3c5c"}, {file = "seaborn-0.11.1.tar.gz", hash = "sha256:44e78eaed937c5a87fc7a892c329a7cc091060b67ebd1d0d306b446a74ba01ad"}, ] +secretstorage = [ + {file = "SecretStorage-3.3.1-py3-none-any.whl", hash = "sha256:422d82c36172d88d6a0ed5afdec956514b189ddbfb72fefab0c8a1cee4eaf71f"}, + {file = "SecretStorage-3.3.1.tar.gz", hash = "sha256:fd666c51a6bf200643495a04abb261f83229dcb6fd8472ec393df7ffc8b6f195"}, +] +semver = [ + {file = "semver-2.13.0-py2.py3-none-any.whl", hash = "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4"}, + {file = "semver-2.13.0.tar.gz", hash = "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f"}, +] +setuptools-scm = [ + {file = "setuptools_scm-5.0.2-py2.7.egg", hash = "sha256:35acc9a3be4fbd4f6f3480eecb3c637dfb5ca1812fe86baf5e6759a0133837cf"}, + {file = "setuptools_scm-5.0.2-py2.py3-none-any.whl", hash = "sha256:bd5c4e37f74c103e117549f89aeb3c244488c4a6422df786d1a7d03257f16b34"}, + {file = "setuptools_scm-5.0.2-py3.5.egg", hash = "sha256:0d4fa3743c7a453f31dae9b44fcc0c869125a323c166a6b39e20122f488addb2"}, + {file = "setuptools_scm-5.0.2-py3.6.egg", hash = "sha256:ce5497a8ff55e81cf88cb402a87dedc7663d2671d5f1303d978ba1afb33c4fb6"}, + {file = "setuptools_scm-5.0.2-py3.7.egg", hash = "sha256:90be2ecff71d92352f59c6371abe017c0859bc617a545ba6aaf4d96951ba7947"}, + {file = "setuptools_scm-5.0.2-py3.8.egg", hash = "sha256:2e8706b90910d66668b3f34aea9cceab3c08ba83fedd78de65ff323edc8a1414"}, + {file = "setuptools_scm-5.0.2-py3.9.egg", hash = "sha256:f23060f0f5b23f26b76e43fb0ef51e5294e4b575f69edab5cfe442635680f2ec"}, + {file = "setuptools_scm-5.0.2.tar.gz", hash = "sha256:83a0cedd3449e3946307811a4c7b9d89c4b5fd464a2fb5eeccd0a5bb158ae5c8"}, +] six = [ {file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"}, {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"}, ] +smmap = [ + {file = "smmap-3.0.5-py2.py3-none-any.whl", hash = "sha256:7bfcf367828031dc893530a29cb35eb8c8f2d7c8f2d0989354d75d24c8573714"}, + {file = "smmap-3.0.5.tar.gz", hash = "sha256:84c2751ef3072d4f6b2785ec7ee40244c6f45eb934d9e543e2c51f1bd3d54c50"}, +] snowballstemmer = [ {file = "snowballstemmer-2.1.0-py2.py3-none-any.whl", hash = "sha256:b51b447bea85f9968c13b650126a888aabd4cb4463fca868ec596826325dedc2"}, {file = "snowballstemmer-2.1.0.tar.gz", hash = "sha256:e997baa4f2e9139951b6f4c631bad912dfd3c792467e2f03d7239464af90e914"}, @@ -1185,10 +1721,22 @@ toml = [ {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, ] +tomlkit = [ + {file = "tomlkit-0.7.0-py2.py3-none-any.whl", hash = "sha256:6babbd33b17d5c9691896b0e68159215a9387ebfa938aa3ac42f4a4beeb2b831"}, + {file = "tomlkit-0.7.0.tar.gz", hash = "sha256:ac57f29693fab3e309ea789252fcce3061e19110085aa31af5446ca749325618"}, +] toolz = [ {file = "toolz-0.11.1-py3-none-any.whl", hash = "sha256:1bc473acbf1a1db4e72a1ce587be347450e8f08324908b8a266b486f408f04d5"}, {file = "toolz-0.11.1.tar.gz", hash = "sha256:c7a47921f07822fe534fb1c01c9931ab335a4390c782bd28c6bcc7c2f71f3fbf"}, ] +tqdm = [ + {file = "tqdm-4.59.0-py2.py3-none-any.whl", hash = "sha256:9fdf349068d047d4cfbe24862c425883af1db29bcddf4b0eeb2524f6fbdb23c7"}, + {file = "tqdm-4.59.0.tar.gz", hash = "sha256:d666ae29164da3e517fcf125e41d4fe96e5bb375cd87ff9763f6b38b5592fe33"}, +] +twine = [ + {file = "twine-3.3.0-py3-none-any.whl", hash = "sha256:2f6942ec2a17417e19d2dd372fc4faa424c87ee9ce49b4e20c427eb00a0f3f41"}, + {file = "twine-3.3.0.tar.gz", hash = "sha256:fcffa8fc37e8083a5be0728371f299598870ee1eccc94e9a25cef7b1dcfa8297"}, +] typing-extensions = [ {file = "typing_extensions-3.7.4.3-py2-none-any.whl", hash = "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f"}, {file = "typing_extensions-3.7.4.3-py3-none-any.whl", hash = "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918"}, @@ -1202,6 +1750,10 @@ vega-datasets = [ {file = "vega_datasets-0.9.0-py3-none-any.whl", hash = "sha256:3d7c63917be6ca9b154b565f4779a31fedce57b01b5b9d99d8a34a7608062a1d"}, {file = "vega_datasets-0.9.0.tar.gz", hash = "sha256:9dbe9834208e8ec32ab44970df315de9102861e4cda13d8e143aab7a80d93fc0"}, ] +webencodings = [ + {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"}, + {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, +] zipp = [ {file = "zipp-3.4.1-py3-none-any.whl", hash = "sha256:51cb66cc54621609dd593d1787f286ee42a5c0adbb4b29abea5a63edc3e03098"}, {file = "zipp-3.4.1.tar.gz", hash = "sha256:3607921face881ba3e026887d8150cca609d517579abe052ac81fc5aeffdbd76"}, diff --git a/pyproject.toml b/pyproject.toml index 4a8b11c..223413c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,15 @@ pytest = "^6.2.2" Sphinx = "^3.5.1" sphinxcontrib-napoleon = "^0.7" pytest-cov = "^2.11.1" +codecov = "^2.1.11" +flake8 = "^3.8.4" +python-semantic-release = "^7.15.0" + +[tool.semantic_release] +version_variable = "aridanalysis/__init__.py:__version__" +version_source = "commit" +upload_to_pypi = "false" +patch_without_tag = "true" [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/tests/test_aridanalysis.py b/tests/test_aridanalysis.py index 2a0b1f4..8fc3030 100644 --- a/tests/test_aridanalysis.py +++ b/tests/test_aridanalysis.py @@ -1,4 +1,3 @@ -from aridanalysis import __version__ from aridanalysis import aridanalysis as aa import pytest import pandas as pd @@ -6,34 +5,41 @@ import sklearn from vega_datasets import data import altair as alt -import statsmodels -import warnings +import statsmodels +# import warnings + +import sys +import os -import sys, os myPath = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, myPath + '/../aridanalysis') -import error_strings as errors +sys.path.insert(0, myPath + "/../aridanalysis") +import error_strings as errors # noqaE402 -def test_version(): - assert __version__ == '0.2.0' @pytest.fixture -def simple_frame(): - ''' +def simple_frame(): + """ Create a basic test dataframe for linear regression tests - ''' - tdf = pd.DataFrame({'x1': [1, 0, 0], - 'x2': [0, 1.0, 0], - 'x3': [0, 0, 1], - 'x4': ['a', 'a', 'b'], - 'y': [1, 3, -1.0]}) + """ + tdf = pd.DataFrame( + { + "x1": [1, 0, 0], + "x2": [0, 1.0, 0], + "x3": [0, 0, 1], + "x4": ["a", "a", "b"], + "y": [1, 3, -1.0], + } + ) return tdf + def test_arideda_return(): """ Test return data type """ - _ , out = aa.arid_eda(data.iris(), 'species', 'categorical', ['sepalLength', 'sepalWidth']) + _, out = aa.arid_eda( + data.iris(), "species", "categorical", ["sepalLength", "sepalWidth"] + ) assert isinstance(out, alt.HConcatChart) @@ -41,7 +47,9 @@ def test_arideda_features(): """ Test calling with valid features list """ - out, _ = aa.arid_eda(data.iris(), 'species', 'categorical', ['sepalLength', 'sepalWidth']) + out, _ = aa.arid_eda( + data.iris(), "species", "categorical", ["sepalLength", "sepalWidth"] + ) assert isinstance(out, pd.core.frame.DataFrame) @@ -49,15 +57,26 @@ def test_arideda_numfeature(): """ Ensure data frame is appropriate size according to features """ - features = ['sepalLength', 'sepalWidth'] - out, _ = aa.arid_eda(data.iris(), 'species', 'categorical', features) - assert out.shape == (8,len(features)) + features = ["sepalLength", "sepalWidth"] + out, _ = aa.arid_eda(data.iris(), "species", "categorical", features) + assert out.shape == (8, len(features)) + def test_arideda_returns_tuple(): """ Check that function returns two items """ - assert len(aa.arid_eda(data.iris(), 'species', 'categorical', ['sepalLength', 'sepalWidth'])) == 2 + assert ( + len( + aa.arid_eda( + data.iris(), + "species", + "categorical", + ["sepalLength", "sepalWidth"] + ) + ) + == 2 + ) def test_arideda_empty_df(): @@ -65,183 +84,474 @@ def test_arideda_empty_df(): Test if error occurs when repsonse type is not categorical or continuous """ with pytest.raises(KeyError): - aa.arid_eda(data.iris(), 'species', 'ORDINAL', ['sepalLength', 'sepalWidth']) + aa.arid_eda( + data.iris(), + "species", + "ORDINAL", + ["sepalLength", "sepalWidth"]) + def test_response_type_incorrect(): """ Test if an error occurs when wrong response type is given """ with pytest.raises(KeyError): - aa.arid_eda(data.iris(), 'petalLength', 'categorical', ['sepalLength', 'sepalWidth']) + aa.arid_eda( + data.iris(), + "petalLength", + "categorical", + ["sepalLength", "sepalWidth"] + ) + def test_linreg_input_errors(simple_frame): - ''' + """ Test linear regression input argument validation - ''' + """ with pytest.raises(AssertionError, match=errors.INVALID_DATAFRAME): - aa.arid_linreg(6, 'y') + aa.arid_linreg(6, "y") with pytest.raises(AssertionError, match=errors.EMPTY_DATAFRAME): - aa.arid_linreg(pd.DataFrame(), 'y') + aa.arid_linreg(pd.DataFrame(), "y") with pytest.raises(AssertionError, match=errors.RESPONSE_NOT_FOUND): - aa.arid_linreg(simple_frame, 'z') + aa.arid_linreg(simple_frame, "z") with pytest.raises(AssertionError, match=errors.INVALID_RESPONSE_DATATYPE): - aa.arid_linreg(simple_frame, 'x4') - with pytest.raises(AssertionError, match=errors.INVALID_REGULARIZATION_INPUT): - aa.arid_linreg(simple_frame, 'y', regularization = "L3") + aa.arid_linreg(simple_frame, "x4") + with pytest.raises(AssertionError, match=errors.INVALID_REGULARIZATION_INPUT): # noqaE501 + aa.arid_linreg(simple_frame, "y", regularization="L3") with pytest.raises(AssertionError, match=errors.INVALID_ALPHA_INPUT): - aa.arid_linreg(simple_frame, 'y', alpha = 'b') + aa.arid_linreg(simple_frame, "y", alpha="b") + def test_linreg_input_features(simple_frame): - ''' + """ Test linear regression input feature arguments - ''' + """ with pytest.raises(AssertionError, match=errors.NO_VALID_FEATURES): - aa.arid_linreg(simple_frame[['y']], 'y') + aa.arid_linreg(simple_frame[["y"]], "y") with pytest.raises(AssertionError, match=errors.NO_VALID_FEATURES): - aa.arid_linreg(simple_frame[['x4', 'y']], 'y') + aa.arid_linreg(simple_frame[["x4", "y"]], "y") with pytest.raises(AssertionError, match=errors.NO_VALID_FEATURES): - aa.arid_linreg(simple_frame, 'y', features=['b']) + aa.arid_linreg(simple_frame, "y", features=["b"]) with pytest.raises(AssertionError, match=errors.NO_VALID_FEATURES): - aa.arid_linreg(simple_frame, 'y', features=['x4']) + aa.arid_linreg(simple_frame, "y", features=["x4"]) with pytest.warns(UserWarning): - aa.arid_linreg(simple_frame, 'y', features=['x1','x2','x3','x4']) + aa.arid_linreg(simple_frame, "y", features=["x1", "x2", "x3", "x4"]) with pytest.warns(UserWarning): - aa.arid_linreg(simple_frame, 'y', features=['x1','b']) - assert len((aa.arid_linreg(simple_frame, 'y'))[0].coef_) == 3 - assert len((aa.arid_linreg(simple_frame, 'y', features=simple_frame.columns))[0].coef_) == 3 - assert len((aa.arid_linreg(simple_frame, 'y', features=['x1','x2','x3']))[0].coef_) == 3 - assert len((aa.arid_linreg(simple_frame, 'y', features=['x1','x2','x3','x4']))[0].coef_) == 3 - assert len((aa.arid_linreg(simple_frame, 'y', features=['x1']))[0].coef_) == 1 - assert len((aa.arid_linreg(simple_frame, 'y', features=['x1','x2']))[0].coef_) == 2 - assert len((aa.arid_linreg(simple_frame, 'y'))[1].params) == 3 - assert len((aa.arid_linreg(simple_frame, 'y', features=simple_frame.columns))[1].params) == 3 - assert len((aa.arid_linreg(simple_frame, 'y', features=['x1','x2','x3']))[1].params) == 3 - assert len((aa.arid_linreg(simple_frame, 'y', features=['x1','x2','x3','x4']))[1].params) == 3 - assert len((aa.arid_linreg(simple_frame, 'y', features=['x1']))[1].params) == 1 - assert len((aa.arid_linreg(simple_frame, 'y', features=['x1','x2']))[1].params) == 2 + aa.arid_linreg(simple_frame, "y", features=["x1", "b"]) + assert len((aa.arid_linreg(simple_frame, "y"))[0].coef_) == 3 + assert ( + len((aa.arid_linreg(simple_frame, + "y", + features=simple_frame.columns))[0].coef_) + == 3 + ) + assert ( + len((aa.arid_linreg(simple_frame, + "y", + features=["x1", "x2", "x3"]))[0].coef_) + == 3 + ) + assert ( + len( + (aa.arid_linreg(simple_frame, + "y", + features=["x1", "x2", "x3", "x4"]))[ + 0 + ].coef_ + ) + == 3 + ) + assert len((aa.arid_linreg( + simple_frame, + "y", + features=["x1"]))[0].coef_) == 1 + assert len((aa.arid_linreg( + simple_frame, + "y", + features=["x1", "x2"]))[0].coef_) == 2 + assert len((aa.arid_linreg(simple_frame, "y"))[1].params) == 3 + assert ( + len( + (aa.arid_linreg( + simple_frame, + "y", + features=simple_frame.columns))[1].params + ) + == 3 + ) + assert ( + len((aa.arid_linreg( + simple_frame, + "y", + features=["x1", "x2", "x3"]))[1].params) + == 3 + ) + assert ( + len( + (aa.arid_linreg( + simple_frame, + "y", + features=["x1", "x2", "x3", "x4"]))[ + 1 + ].params + ) + == 3 + ) + assert len((aa.arid_linreg( + simple_frame, + "y", + features=["x1"]))[1].params) == 1 + assert ( + len((aa.arid_linreg( + simple_frame, + "y", + features=["x1", "x2"]))[1].params) == 2 + ) + def test_linreg_model_types(simple_frame): - ''' + """ Test linear regression output model types - ''' - assert type((aa.arid_linreg(simple_frame, 'y'))[0]) == \ - sklearn.linear_model._base.LinearRegression - assert type((aa.arid_linreg(simple_frame, 'y', regularization = 'L1'))[0]) == \ - sklearn.linear_model._coordinate_descent.Lasso - assert type((aa.arid_linreg(simple_frame, 'y', regularization = 'L2'))[0]) == \ - sklearn.linear_model._ridge.Ridge - assert type((aa.arid_linreg(simple_frame, 'y', regularization = 'L1L2'))[0]) == \ - sklearn.linear_model._coordinate_descent.ElasticNet - assert type((aa.arid_linreg(simple_frame, 'y'))[1]) == \ - statsmodels.regression.linear_model.RegressionResultsWrapper - assert type((aa.arid_linreg(simple_frame, 'y', regularization = 'L1'))[1]) == \ - statsmodels.base.elastic_net.RegularizedResultsWrapper - assert type((aa.arid_linreg(simple_frame, 'y', regularization = 'L2'))[1]) == \ - statsmodels.base.elastic_net.RegularizedResults - assert type((aa.arid_linreg(simple_frame, 'y', regularization = 'L1L2'))[1]) == \ - statsmodels.base.elastic_net.RegularizedResultsWrapper + """ + assert ( + type((aa.arid_linreg(simple_frame, "y"))[0]) + == sklearn.linear_model._base.LinearRegression + ) + assert ( + type((aa.arid_linreg(simple_frame, "y", regularization="L1"))[0]) + == sklearn.linear_model._coordinate_descent.Lasso + ) + assert ( + type((aa.arid_linreg(simple_frame, "y", regularization="L2"))[0]) + == sklearn.linear_model._ridge.Ridge + ) + assert ( + type((aa.arid_linreg(simple_frame, "y", regularization="L1L2"))[0]) + == sklearn.linear_model._coordinate_descent.ElasticNet + ) + assert ( + type((aa.arid_linreg(simple_frame, "y"))[1]) + == statsmodels.regression.linear_model.RegressionResultsWrapper + ) + assert ( + type((aa.arid_linreg(simple_frame, "y", regularization="L1"))[1]) + == statsmodels.base.elastic_net.RegularizedResultsWrapper + ) + assert ( + type((aa.arid_linreg(simple_frame, "y", regularization="L2"))[1]) + == statsmodels.base.elastic_net.RegularizedResults + ) + assert ( + type((aa.arid_linreg(simple_frame, "y", regularization="L1L2"))[1]) + == statsmodels.base.elastic_net.RegularizedResultsWrapper + ) + def test_linreg_model_coefficients(simple_frame): - ''' - Test linear regression output statsmodel and sklearn model coefficients match - ''' - assert aa.arid_linreg(simple_frame, 'y')[0].coef_.all() == \ - (aa.arid_linreg(simple_frame, 'y')[1].params).to_numpy().all() - assert aa.arid_linreg(simple_frame, 'y', regularization = 'L1')[0].coef_.all() == \ - (aa.arid_linreg(simple_frame, 'y', regularization = 'L1')[1].params).to_numpy().all() - assert aa.arid_linreg(simple_frame, 'y', regularization = 'L2')[0].coef_.all() == \ - (aa.arid_linreg(simple_frame, 'y', regularization = 'L2')[1].params).all() - assert aa.arid_linreg(simple_frame, 'y', regularization = 'L1L2')[0].coef_.all() == \ - (aa.arid_linreg(simple_frame, 'y', regularization = 'L1L2')[1].params).to_numpy().all() + """ + Test statsmodel & sklearn model coefficients match + """ + assert ( + aa.arid_linreg(simple_frame, "y")[0].coef_.all() + == (aa.arid_linreg(simple_frame, "y")[1].params).to_numpy().all() + ) + assert ( + aa.arid_linreg(simple_frame, "y", regularization="L1")[0].coef_.all() + == (aa.arid_linreg(simple_frame, "y", regularization="L1")[1].params) + .to_numpy() + .all() + ) + assert ( + aa.arid_linreg(simple_frame, "y", regularization="L2")[0].coef_.all() + == (aa.arid_linreg(simple_frame, "y", regularization="L2")[1].params).all() # noqaE501 + ) + assert ( + aa.arid_linreg(simple_frame, "y", regularization="L1L2")[0].coef_.all() + == (aa.arid_linreg(simple_frame, "y", regularization="L1L2")[1].params) + .to_numpy() + .all() + ) + def test_linreg_model_predictions(simple_frame): - ''' - Test linear regression output statsmodel and sklearn model predictions match - ''' - assert round(aa.arid_linreg(simple_frame, 'y')[0].predict(np.array([[1,4,3]]))[0], 3) == \ - round((aa.arid_linreg(simple_frame, 'y')[1].predict(np.array([[1,4,3]])))[0], 3) - assert round(aa.arid_linreg(simple_frame, 'y', regularization = 'L1')[0].predict(np.array([[1,4,3]]))[0], 3) == \ - round((aa.arid_linreg(simple_frame, 'y', regularization = 'L1')[1].predict(np.array([[1,4,3]])))[0], 3) - assert round(aa.arid_linreg(simple_frame, 'y', regularization = 'L2')[0].predict(np.array([[1,4,3]]))[0], 3) == \ - round((aa.arid_linreg(simple_frame, 'y', regularization = 'L2')[1].predict(np.array([[1,4,3]])))[0], 3) - assert round(aa.arid_linreg(simple_frame, 'y', regularization = 'L1L2')[0].predict(np.array([[1,4,3]]))[0], 3) == \ - round((aa.arid_linreg(simple_frame, 'y', regularization = 'L1L2')[1].predict(np.array([[1,4,3]])))[0], 3) + """ + Test statsmodel and sklearn model predictions match + """ + assert round( + aa.arid_linreg(simple_frame, "y")[0].predict(np.array([[1, 4, 3]]))[0], 3 # noqaE501 + ) == round( + (aa.arid_linreg(simple_frame, "y")[1].predict(np.array([[1, 4, 3]])))[0], 3 # noqaE501 + ) + assert round( + aa.arid_linreg(simple_frame, "y", regularization="L1")[0].predict( + np.array([[1, 4, 3]]) + )[0], + 3, + ) == round( + ( + aa.arid_linreg(simple_frame, "y", regularization="L1")[1].predict( + np.array([[1, 4, 3]]) + ) + )[0], + 3, + ) + assert round( + aa.arid_linreg(simple_frame, "y", regularization="L2")[0].predict( + np.array([[1, 4, 3]]) + )[0], + 3, + ) == round( + ( + aa.arid_linreg(simple_frame, "y", regularization="L2")[1].predict( + np.array([[1, 4, 3]]) + ) + )[0], + 3, + ) + assert round( + aa.arid_linreg(simple_frame, "y", regularization="L1L2")[0].predict( + np.array([[1, 4, 3]]) + )[0], + 3, + ) == round( + aa.arid_linreg(simple_frame, "y", regularization="L1L2")[1].predict( + np.array([[1, 4, 3]]) + )[0], + 3, + ) + @pytest.fixture -def log_df(): - ''' +def log_df(): + """ Create a basic test dataframe for logistic regression tests - ''' - data = [[32, "male", 80, 0], - [26, "female", 65, 1], - [22, "female", 75, 1], - [36, "male", 85, 0], - [45, "male", 82, 1], - [18, "female", 57, 0], - [57, "male", 60, 1]] - log_df = pd.DataFrame(data, columns = ['Age', 'Sex', 'Weight', 'Target']) + """ + data = [ + [32, "male", 80, 0], + [26, "female", 65, 1], + [22, "female", 75, 1], + [36, "male", 85, 0], + [45, "male", 82, 1], + [18, "female", 57, 0], + [57, "male", 60, 1], + ] + log_df = pd.DataFrame(data, columns=["Age", "Sex", "Weight", "Target"]) return log_df + def test_logreg_model_inputs(log_df): with pytest.raises(AssertionError, match=errors.NO_VALID_FEATURES): - aa.arid_logreg(log_df, response="Target", features="Age", type="binomial") + aa.arid_logreg(log_df, response="Target", features="Age", type="binomial") # noqaE501 with pytest.raises(AssertionError, match=errors.INVALID_DATAFRAME): - aa.arid_logreg(17, response="Target", features=["Age", "Sex"], type="binomial") + aa.arid_logreg(17, response="Target", features=["Age", "Sex"], type="binomial") # noqaE501 with pytest.raises(AssertionError, match=errors.EMPTY_DATAFRAME): - aa.arid_logreg(pd.DataFrame(), response="Target", features=["Age", "Sex"], type="binomial") + aa.arid_logreg( + pd.DataFrame(), response="Target", features=["Age", "Sex"], type="binomial" # noqaE501 + ) with pytest.raises(AssertionError, match=errors.RESPONSE_NOT_FOUND): - aa.arid_logreg(df=log_df, response="targ", features=["Age", "Sex"], type="binomial") + aa.arid_logreg( + df=log_df, response="targ", features=["Age", "Sex"], type="binomial" # noqaE501 + ) with pytest.raises(AssertionError, match=errors.INVALID_TYPE_INPUT): - aa.arid_logreg(log_df, response="Target", features=["Age", "Sex"], type="ordinal") + aa.arid_logreg( + log_df, response="Target", features=["Age", "Sex"], type="ordinal" + ) + def test_logreg_model_outputs(log_df): - assert round((aa.arid_logreg(df=log_df, response="Target", features=[], type="binomial")[0].coef_)[0][0], 3) == 0.091 - assert round((aa.arid_logreg(log_df, response="Target", features=["Age"], type="binomial")[0].coef_)[0][0], 3) == 0.015 - assert round((aa.arid_logreg(log_df, response="Target", features=["Weight"], type="binomial")[0].coef_)[0][0], 3) == 0.003 - assert type(aa.arid_logreg(df=log_df, response="Target", features=[], type="binomial")[1]) == statsmodels.discrete.discrete_model.BinaryResultsWrapper + assert ( + round( + ( + aa.arid_logreg( + df=log_df, response="Target", features=[], type="binomial" + )[0].coef_ + )[0][0], + 3, + ) + == 0.091 + ) + assert ( + round( + ( + aa.arid_logreg( + log_df, response="Target", features=["Age"], type="binomial" # noqaE501 + )[0].coef_ + )[0][0], + 3, + ) + == 0.015 + ) + assert ( + round( + ( + aa.arid_logreg( + log_df, response="Target", features=["Weight"], type="binomial" # noqaE501 + )[0].coef_ + )[0][0], + 3, + ) + == 0.003 + ) + assert ( + type( + aa.arid_logreg(df=log_df, response="Target", features=[], type="binomial")[ # noqaE501 + 1 + ] + ) + == statsmodels.discrete.discrete_model.BinaryResultsWrapper + ) + @pytest.fixture -def health_df(): - ''' +def health_df(): + """ Create a basic test dataframe for linear regression tests - ''' - health_df = pd.read_csv("tests/toy_data/badhealth.csv").drop(columns=["Unnamed: 0"]) - health_df["badh"] = health_df["badh"].astype('category') - health_df["badh"] = health_df.badh.replace({0: 'bad', 1 : 'good'}) + """ + health_df = pd.read_csv("tests/toy_data/badhealth.csv").drop(columns=["Unnamed: 0"]) # noqaE501 + health_df["badh"] = health_df["badh"].astype("category") + health_df["badh"] = health_df.badh.replace({0: "bad", 1: "good"}) return health_df def test_countreg_model_inputs(health_df): - with pytest.raises(AssertionError, match="ERROR: INVALID LIST INTPUT PASSED"): - aa.arid_countreg(health_df, response="numvisit", con_features="age", cat_features=["badh"], model="additive") - with pytest.raises(AssertionError, match="ERROR: INVALID LIST INTPUT PASSED"): - aa.arid_countreg(health_df, response="numvisit", con_features=["age"], cat_features="badh", model="additive") + with pytest.raises(AssertionError, match="ERROR: INVALID LIST INTPUT PASSED"): # noqaE501 + aa.arid_countreg( + health_df, + response="numvisit", + con_features="age", + cat_features=["badh"], + model="additive", + ) + with pytest.raises(AssertionError, match="ERROR: INVALID LIST INTPUT PASSED"): # noqaE501 + aa.arid_countreg( + health_df, + response="numvisit", + con_features=["age"], + cat_features="badh", + model="additive", + ) with pytest.raises(AssertionError, match=errors.INVALID_DATAFRAME): - aa.arid_countreg(17, response="numvisit", con_features=["age"], cat_features=["badh"], model="additive") + aa.arid_countreg( + 17, + response="numvisit", + con_features=["age"], + cat_features=["badh"], + model="additive", + ) with pytest.raises(AssertionError, match=errors.EMPTY_DATAFRAME): - aa.arid_countreg(pd.DataFrame(), response="numvisit", con_features=["age"], cat_features=["badh"], model="additive") + aa.arid_countreg( + pd.DataFrame(), + response="numvisit", + con_features=["age"], + cat_features=["badh"], + model="additive", + ) with pytest.raises(AssertionError, match=errors.RESPONSE_NOT_FOUND): - aa.arid_countreg(data_frame=health_df, response="num", con_features=["age"], cat_features=["badh"], model="additive") - with pytest.raises(AssertionError, match="ERROR: INVALID RESPONSE DATATYPE FOR COUNT REGRESSION: MUST BE TYPE INT"): - aa.arid_countreg(data_frame=health_df, response="badh", con_features=["age"], cat_features=[], model="additive") + aa.arid_countreg( + data_frame=health_df, + response="num", + con_features=["age"], + cat_features=["badh"], + model="additive", + ) + with pytest.raises( + AssertionError, + match="ERROR: INVALID RESPONSE DATATYPE FOR COUNT REGRESSION: MUST BE TYPE INT", # noqaE501 + ): + aa.arid_countreg( + data_frame=health_df, + response="badh", + con_features=["age"], + cat_features=[], + model="additive", + ) with pytest.raises(AssertionError, match="ERROR: INVALID MODEL PASSED"): - aa.arid_countreg(health_df, response="numvisit", con_features=["age"], cat_features=[], model="additives") + aa.arid_countreg( + health_df, + response="numvisit", + con_features=["age"], + cat_features=[], + model="additives", + ) with pytest.raises(AssertionError, match=errors.INVALID_ALPHA_INPUT): - aa.arid_countreg(health_df, response="numvisit", con_features=["age"], cat_features=[], model="additive",alpha="san") - -def test_countreg_model_outputs(health_df): - assert len(aa.arid_countreg(data_frame = health_df, response="numvisit", con_features=["age"], cat_features=["badh"], model="additive")[0][1].coef_) == 2 - assert len(aa.arid_countreg(health_df, response="numvisit", con_features=["age"], cat_features=["badh"], model="interactive")[0][1].coef_) == 2 - #In scikit learn interactions do not change the number of coefficients, this a weighted depening con correlation with other features - assert len(aa.arid_countreg(health_df, response="numvisit", con_features=["age"], cat_features=["badh"], model="additive")[1].params) == 3 - assert len(aa.arid_countreg(health_df, response="numvisit", con_features=["age"], cat_features=["badh"], model="interactive")[1].params) == 4 - assert str(type(aa.arid_countreg(data_frame = health_df, response="numvisit", - model="additive")[0])) == "" - assert str(type(aa.arid_countreg(data_frame = health_df, response="numvisit", - model="interactive")[1])) == "" - + aa.arid_countreg( + health_df, + response="numvisit", + con_features=["age"], + cat_features=[], + model="additive", + alpha="san", + ) - - - +def test_countreg_model_outputs(health_df): + assert ( + len( + aa.arid_countreg( + data_frame=health_df, + response="numvisit", + con_features=["age"], + cat_features=["badh"], + model="additive", + )[0][1].coef_ + ) + == 2 + ) + assert ( + len( + aa.arid_countreg( + health_df, + response="numvisit", + con_features=["age"], + cat_features=["badh"], + model="interactive", + )[0][1].coef_ + ) + == 2 + ) + # In scikit learn interactions do not change the number of coefficients, + # this a weighted depening con correlation with other features + assert ( + len( + aa.arid_countreg( + health_df, + response="numvisit", + con_features=["age"], + cat_features=["badh"], + model="additive", + )[1].params + ) + == 3 + ) + assert ( + len( + aa.arid_countreg( + health_df, + response="numvisit", + con_features=["age"], + cat_features=["badh"], + model="interactive", + )[1].params + ) + == 4 + ) + assert ( + str( + type( + aa.arid_countreg( + data_frame=health_df, response="numvisit", model="additive" + )[0] + ) + ) + == "" + ) + assert ( + str( + type( + aa.arid_countreg( + data_frame=health_df, response="numvisit", model="interactive" # noqaE501 + )[1] + ) + ) + == "" # noqaE501 + )