Data Science utilities in python.
- Free software: MIT license
- Documentation: http://data-science-utilities-python.readthedocs.io.
from data_science_utilities import data_science_utilities
# make statistic
missing_data = data_science_utilities.missing_data_stats(df)
# display statistic
missing_data
from data_science_utilities import data_science_utilities
train_path = '../data/raw/train.csv'
test_path = '../data/raw/test.csv'
X_train, X_test = data_science_utilities.read_csv_files(train_path, test_path)
from data_science_utilities import data_science_utilities
data_science_utilities.plot_dist_norm(dist, 'distribution normal')
from data_science_utilities import data_science_utilities
data_science_utilities.plot_corelation_matrix(data)
from data_science_utilities import data_science_utilities
data_science_utilities.plot_top_corelation_matrix(data, target, k=10, cmap='YlGnBu')
from data_science_utilities import data_science_utilities
data_science_utilities.plot_scatter(data, column_name, target)
from data_science_utilities import data_science_utilities
data_science_utilities.plot_box(data, column_name, target)
from data_science_utilities import data_science_utilities
data_science_utilities.plot_category_columns(data, limit_bars=10)
from data_science_utilities import data_science_utilities
data_science_utilities.plot_learning_curve(estimator, title, X, y, ylim=None,
cv=None, train_sizes=np.linspace(.1, 1.0, 5))
This package was created with Cookiecutter and the audreyr/cookiecutter-pypackage project template.