Skip to content

Commit

Permalink
Merge pull request #162 from PythonPredictions/develop
Browse files Browse the repository at this point in the history
Release v1.1.1: merging 2023-03 development branch to master for 2023-03 release.
  • Loading branch information
sandervh14 authored Apr 7, 2023
2 parents 16342ab + 2dfc309 commit e1f6f45
Show file tree
Hide file tree
Showing 23 changed files with 973 additions and 397 deletions.
4 changes: 2 additions & 2 deletions .github/ISSUE_TEMPLATE/issue.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
name: Task
about: A small issue t. It will usually be labeled as `good first issue` or `enhancement`.
about: A small issue. It will usually be labeled as `good first issue` or `enhancement`.
---

<!-- Issue title should mirror the Task Title. -->
Expand All @@ -11,4 +11,4 @@ Task: I am an Issue

## Task Description

This issue will...
This issue will...
5 changes: 2 additions & 3 deletions .github/workflows/development_CI.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Runs CI when pushing to develop branch
# runs pylint and pytest
## Runs CI when pushing to develop branch

name: CI_develop_action

Expand All @@ -26,7 +25,7 @@ jobs:
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
python -m pip install pylint pytest pytest-mock pytest-cov
python -m pip install -r requirements.dev.txt
- name: Test with pytest
run: |
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/master_CI.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Runs CI when pushing to master branch
# runs pylint and pytest
## Runs CI when pushing to master branch

name: CI_master_action

Expand All @@ -26,7 +25,7 @@ jobs:
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
python -m pip install pylint pytest pytest-mock pytest-cov
python -m pip install -r requirements.dev.txt
- name: Test with pytest
run: |
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/master_publish_pypi.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Publishes code to pip when we publish a new release
# runs pylint and pytest
## Publishes code to pip when we publish a new release

name: publish_to_pip

Expand Down
41 changes: 41 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Makefile with some simple commands to make developer's life easier


install-requirements: install-build-essential
pip install -r requirements.txt

dev/install-requirements: install-requirements
pip install -r requirements.dev.txt

install-build-essential:
sudo apt-get update
sudo apt-get install build-essential

update-setuptools:
pip install --upgrade setuptools wheel

test-unit:
pytest tests
@echo 'unit tests OK'

lint:
pylint cobra
@echo 'lint OK'

lint-minimal:
pylint E cobra
@echo 'lint minimal OK'

typecheck:
mypy cobra
@echo 'typecheck OK'

codestyle:
pycodestyle cobra
@echo 'codestyle OK'

docstyle:
pydocstyle cobra
@echo 'docstyle OK'

code-qa: typecheck codestyle docstyle lint-minimal
6 changes: 2 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ This package requires only the usual Python libraries for data science, being nu
pip install -r requirements.txt


**Note**: if you want to install Cobra with e.g. pip, you don't have to install all of these requirements as these are automatically installed with Cobra itself.
**Note**: if you want to install Cobra with e.g. pip, you don't have to install all these requirements as these are automatically installed with Cobra itself.

Installation
------------
Expand All @@ -61,9 +61,7 @@ Documentation and extra material

- HTML documentation of the `individual modules <https://pythonpredictions.github.io/cobra.io/docstring/modules.html>`_.

- A step-by-step `tutorial <https://pythonpredictions.github.io/cobra/tutorials/tutorial_Cobra_logistic_regression.ipynb>`_ for **logistic regression**.

- A step-by-step `tutorial <https://pythonpredictions.github.io/cobra/tutorials/tutorial_Cobra_linear_regression.ipynb>`__ for **linear regression**.
- Step-by-step `tutorials <https://github.com/PythonPredictions/cobra/blob/master/tutorials>`_ for a logistic and a linear regression use case.

- Check out the Data Science Leuven Meetup `talk <https://www.youtube.com/watch?v=w7ceZZqMEaA&feature=youtu.be>`_ by one of the core developers (second presentation). His `slides <https://github.com/PythonPredictions/Cobra-DS-meetup-Leuven/blob/main/DS_Leuven_meetup_20210209_cobra.pdf>`_ and `related material <https://github.com/PythonPredictions/Cobra-DS-meetup-Leuven>`_ are also available.

Expand Down
8 changes: 7 additions & 1 deletion cobra/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
from .version import __version__
from .version import __version__
from cobra.utils import log_tutorial
import logging

logging.basicConfig(level=logging.INFO, format="%(message)s")

log_tutorial()
26 changes: 16 additions & 10 deletions cobra/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,14 @@ def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)):
label="ROC curve (area = {s:.3})".format(s=auc))

ax.plot([0, 1], [0, 1], color="darkorange", linewidth=3,
linestyle="--")
ax.set_xlabel("False Positive Rate", fontsize=15)
ax.set_ylabel("True Positive Rate", fontsize=15)
linestyle="--", label="random selection")
ax.set_xlabel("False positive rate", fontsize=15)
ax.set_ylabel("True positive rate", fontsize=15)
ax.legend(loc="lower right")
ax.set_title("ROC curve", fontsize=20)

ax.set_ylim([0, 1])

if path:
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")

Expand Down Expand Up @@ -224,6 +226,8 @@ def plot_confusion_matrix(self, path: str=None, dim: tuple=(12, 8),
fmt="s", cmap="Blues",
xticklabels=labels, yticklabels=labels)
ax.set_title("Confusion matrix", fontsize=20)
plt.ylabel('True labels', fontsize=15)
plt.xlabel('Predicted labels', fontsize=15)

if path:
plt.savefig(path, format="png", dpi=300, bbox_inches="tight")
Expand Down Expand Up @@ -256,13 +260,13 @@ def plot_cumulative_response_curve(self, path: str=None, dim: tuple=(12, 8)):

plt.bar(x_labels[::-1], lifts, align="center",
color="cornflowerblue")
plt.ylabel("response (%)", fontsize=16)
plt.xlabel("decile", fontsize=16)
plt.ylabel("Response (%)", fontsize=15)
plt.xlabel("Decile", fontsize=15)
ax.set_xticks(x_labels)
ax.set_xticklabels(x_labels)

plt.axhline(y=inc_rate*100, color="darkorange", linestyle="--",
xmin=0.05, xmax=0.95, linewidth=3, label="Incidence")
xmin=0.05, xmax=0.95, linewidth=3, label="incidence")

# Legend
ax.legend(loc="upper right")
Expand Down Expand Up @@ -305,13 +309,13 @@ def plot_lift_curve(self, path: str=None, dim: tuple=(12, 8)):

plt.bar(x_labels[::-1], lifts, align="center",
color="cornflowerblue")
plt.ylabel("lift", fontsize=16)
plt.xlabel("decile", fontsize=16)
plt.ylabel("Lift", fontsize=15)
plt.xlabel("Decile", fontsize=15)
ax.set_xticks(x_labels)
ax.set_xticklabels(x_labels)

plt.axhline(y=1, color="darkorange", linestyle="--",
xmin=0.05, xmax=0.95, linewidth=3, label="Baseline")
xmin=0.05, xmax=0.95, linewidth=3, label="baseline")

# Legend
ax.legend(loc="upper right")
Expand Down Expand Up @@ -354,7 +358,9 @@ def plot_cumulative_gains(self, path: str=None, dim: tuple=(12, 8)):

# Format axes
ax.set_xlim([0, 100])
ax.set_ylim([0, 105])
ax.set_ylim([0, 100])
plt.ylabel("Gain", fontsize=15)
plt.xlabel("Percentage", fontsize=15)

# Format ticks
ticks_loc_y = ax.get_yticks().tolist()
Expand Down
61 changes: 36 additions & 25 deletions cobra/evaluation/pigs_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
import cobra.utils as utils

def generate_pig_tables(basetable: pd.DataFrame,
id_column_name: str,
target_column_name: str,
preprocessed_predictors: list) -> pd.DataFrame:
preprocessed_predictors: list,
id_column_name: str = None) -> pd.DataFrame:
"""Compute PIG tables for all predictors in preprocessed_predictors.
The output is a DataFrame with columns ``variable``, ``label``,
Expand All @@ -20,35 +20,41 @@ def generate_pig_tables(basetable: pd.DataFrame,
----------
basetable : pd.DataFrame
Basetable to compute PIG tables from.
id_column_name : str
Name of the basetable column containing the IDs of the basetable rows
(e.g. customernumber).
target_column_name : str
Name of the basetable column containing the target values to predict.
preprocessed_predictors: list
List of basetable column names containing preprocessed predictors.
id_column_name : str, default=None
Name of the basetable column containing the IDs of the basetable rows
(e.g. customernumber).
Returns
-------
pd.DataFrame
DataFrame containing a PIG table for all predictors.
"""

#check if there is a id-column and define no_predictor accordingly
if id_column_name == None:
no_predictor = [target_column_name]
else:
no_predictor = [id_column_name, target_column_name]


pigs = [
compute_pig_table(basetable,
column_name,
target_column_name,
id_column_name)
)
for column_name in sorted(preprocessed_predictors)
if column_name not in [id_column_name, target_column_name]
if column_name not in no_predictor
]
output = pd.concat(pigs)
output = pd.concat(pigs, ignore_index=True)
return output


def compute_pig_table(basetable: pd.DataFrame,
predictor_column_name: str,
target_column_name: str,
id_column_name: str) -> pd.DataFrame:
target_column_name: str) -> pd.DataFrame:
"""Compute the PIG table of a given predictor for a given target.
Parameters
Expand All @@ -59,8 +65,6 @@ def compute_pig_table(basetable: pd.DataFrame,
Predictor name of which to compute the pig table.
target_column_name : str
Name of the target variable.
id_column_name : str
Name of the id column (used to count population size).
Returns
-------
Expand All @@ -70,14 +74,20 @@ def compute_pig_table(basetable: pd.DataFrame,
global_avg_target = basetable[target_column_name].mean()

# group by the binned variable, compute the incidence
# (=mean of the target for the given bin) and compute the bin size
# (= mean of the target for the given bin) and compute the bin size
# (e.g. COUNT(id_column_name)). After that, rename the columns

res = (basetable.groupby(predictor_column_name)
.agg({target_column_name: "mean", id_column_name: "size"})
.agg(
avg_target = (target_column_name, "mean"),
pop_size = (target_column_name, "size")
)
.reset_index()
.rename(columns={predictor_column_name: "label",
target_column_name: "avg_target",
id_column_name: "pop_size"}))
.rename(
columns={predictor_column_name: "label"}
)
)


# add the column name to a variable column
# add the average incidence
Expand Down Expand Up @@ -165,9 +175,9 @@ def plot_incidence(pig_tables: pd.DataFrame,
ax.plot(np.nan, "#939598", linewidth=6, label='bin size')

# Set labels & ticks
ax.set_ylabel('incidence' if model_type == "classification" else "mean target value",
ax.set_ylabel('Incidence' if model_type == "classification" else "Mean target value",
fontsize=16)
ax.set_xlabel('{} bins' ''.format(variable), fontsize=16)
ax.set_xlabel("Bins", fontsize=15)
ax.xaxis.set_tick_params(labelsize=14)
plt.setp(ax.get_xticklabels(),
rotation=45, ha="right", rotation_mode="anchor")
Expand Down Expand Up @@ -210,13 +220,13 @@ def plot_incidence(pig_tables: pd.DataFrame,
align='center', color="#939598", zorder=1)

# Set labels & ticks
ax2.set_xlabel('{} bins' ''.format(variable), fontsize=16)
ax2.set_xlabel("Bins", fontsize=15)
ax2.xaxis.set_tick_params(rotation=45, labelsize=14)

ax2.yaxis.set_tick_params(labelsize=14)
ax2.yaxis.set_major_formatter(
FuncFormatter(lambda y, _: '{:.1%}'.format(y)))
ax2.set_ylabel('population size', fontsize=16)
ax2.set_ylabel('Population size', fontsize=15)
ax2.tick_params(axis='y', colors="#939598")
ax2.yaxis.label.set_color('#939598')

Expand All @@ -229,10 +239,11 @@ def plot_incidence(pig_tables: pd.DataFrame,

# Title & legend
if model_type == "classification":
title = "Incidence plot - " + variable
title = "Incidence plot"
else:
title = "Mean target plot - " + variable
fig.suptitle(title, fontsize=22)
title = "Mean target plot"
fig.suptitle(title, fontsize=20)
plt.title(variable, fontsize=17)
ax.legend(frameon=False, bbox_to_anchor=(0., 1.01, 1., .102),
loc=3, ncol=1, mode="expand", borderaxespad=0.,
prop={"size": 14})
Expand Down
Loading

0 comments on commit e1f6f45

Please sign in to comment.