Skip to content

Commit

Permalink
Merge pull request #12 from tfm000/dev/docs2
Browse files Browse the repository at this point in the history
Dev/docs2
  • Loading branch information
tfm000 authored Nov 28, 2023
2 parents 9232216 + 8781456 commit bbcdb68
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 69 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Tests

on:
- push
- pull_request
# - pull_request

jobs:
test:
Expand Down
26 changes: 21 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
<p align="center">
<a href="https://opensource.org/licenses/MIT">
<img width=60% src="https://github.com/tfm000/sklarpy/blob/main/media/logo.png?raw=true" alt="SklarPy logo">
</p>

<p align="center">
<a href="https://github.com/tfm000/sklarpy/blob/main/LICENSE">
<img src="https://img.shields.io/badge/license-MIT-brightgreen.svg"
alt="MIT license"></a> &nbsp;
<a href="https://github.com/tfm000/sklarpy/actions/workflows/tests.yml">
Expand All @@ -8,6 +12,18 @@
<a href="https://pepy.tech/project/sklarpy">
<img src="https://static.pepy.tech/personalized-badge/sklarpy?period=total&units=international_system&left_color=black&right_color=orange&left_text=Downloads"
alt="downloads"></a> &nbsp;
<a href="https://pypi.org/project/sklarpy/">
<img src="https://img.shields.io/badge/Maintained%3F-yes-green.svg"
alt="maintained"></a>
</p>

<p align="center">
<a href="https://pypi.org/project/sklarpy/">
<img src="https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=apple&logoColor=white"
alt="mac os"></a>
<a href="https://pypi.org/project/sklarpy/">
<img src="https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white"
alt="windows"></a>
</p>


Expand Down Expand Up @@ -72,19 +88,19 @@ plt.show()
{0: lognorm(0.02, -203.22, 206.18), 1: lognorm(0.04, -110.89, 115.4)}
```
<p align="center">
<img width=50% src="https://github.com/tfm000/sklarpy/blob/main/media/PDF_Gh_PDF_Plot_Plot.png?raw=true">
<img width=50% src="https://github.com/tfm000/sklarpy/blob/main/media/Copula_PDF_Gh_Copula_PDF_Plot_Plot.png?raw=true">
<img width=60% src="https://github.com/tfm000/sklarpy/blob/main/media/PDF_Gh_PDF_Plot_Plot.png?raw=true">
<img width=60% src="https://github.com/tfm000/sklarpy/blob/main/media/Copula_PDF_Gh_Copula_PDF_Plot_Plot.png?raw=true">
</p>


Further examples can be found <a href="https://github.com/tfm000/sklarpy/tree/main/examples"> here</a>.

## Why we are better
- Unlike other Python implementations of copulas, we implement more than the Gaussian and Archimedean copulas. A full list of our implementated copula models can be found <a href="https://github.com/tfm000/sklarpy/tree/main/sklarpy/copulas"> here </a>, though it includes many normal mean-variance mixture models as well as Archimedean and non-parametric models.
- Unlike other Python implementations of copulas, we implement more than the Gaussian and Archimedean copulas. A full list of our implementated copula models can be found <a href="https://github.com/tfm000/sklarpy/tree/main/sklarpy/copulas"> here</a>, though it includes many normal mean-variance mixture models as well as Archimedean and non-parametric models.
- We allow for easy parameter fitting of both the univariate marginals and the multivariate copula distribution.
- We allow for easy plotting of all our distributions, allowing you to visualize your models.
- We use scipy.stats as a backend for all our univariate models, meaning as scipy expands and improves their model selection, so will ours!
- We provide multivariate and univariate distributions, in addition to our copula models, meaning SklarPy can act as a one-stop-shop for all probability distribution fitting.
- We provide multivariate and univariate distributions, in addition to our copula models, meaning SklarPy can act as a one-stop-shop for all probability distribution fitting. A full list of our implemented multivariate distributions can be found <a href="https://github.com/tfm000/sklarpy/tree/main/sklarpy/multivariate"> here</a>.

## Testing
All tests are written using pytest and cover all user accessible code.
Expand Down
3 changes: 3 additions & 0 deletions examples/copula_examples/copula_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
univariate_fitter_options={'significant': False}, show_progress=True)

# prining our fitted parameters
from sklarpy import print_full
print_full()

print(fitted_copula.copula_params.to_dict)
print(fitted_copula.mdists)
print(fitted_copula.copula_params.cov)
Expand Down
7 changes: 5 additions & 2 deletions examples/copula_examples/marginal_fitter_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
my_mvn_params: tuple = (my_mu, my_cov)

# generating multivariate random normal variables
from sklarpy.multivariate import multivariate_normal
from sklarpy.multivariate import mvt_normal

rvs: np.ndarray = multivariate_normal.rvs(num_generate, my_mvn_params)
rvs: np.ndarray = mvt_normal.rvs(num_generate, my_mvn_params)
rvs_df: pd.DataFrame = pd.DataFrame(rvs, columns=['Wife Age', 'Husband Age'
], dtype=float)

Expand All @@ -26,6 +26,9 @@
mfitter.fit({'pvalue': 0.01})

# printing out a summary of our fits
from sklarpy import print_full
print_full()

print(mfitter.summary)
print(mfitter.marginals)

Expand Down
8 changes: 4 additions & 4 deletions examples/misc_examples/correlation_matrix_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
my_params = (my_chi, my_psi, my_loc, my_shape, my_gamma)

# generating multivariate hyperbolic random variables
from sklarpy.multivariate import multivariate_hyperbolic
from sklarpy.multivariate import mvt_hyperbolic

num_generate: int = 1000
rvs: np.ndarray = multivariate_hyperbolic.rvs(num_generate, my_params)
rvs: np.ndarray = mvt_hyperbolic.rvs(num_generate, my_params)
rvs_df: pd.DataFrame = pd.DataFrame(rvs, columns=['Process A', 'Process B'],
dtype=float)

Expand All @@ -29,8 +29,8 @@
cov_estimator: np.ndarray = cmatrix.cov(method='spearman')

# printing our estimator values
print('Correlation Matrix Estimator\n-------------------')
print('Correlation Matrix Estimator\n----------------------------')
print(corr_estimator)

print('\nCovariance Matrix Estimator\n-------------------')
print('\nCovariance Matrix Estimator\n---------------------------')
print(cov_estimator)
3 changes: 3 additions & 0 deletions examples/univariate_examples/univariate_fitter_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
ufitter.fit()

# printing out the summary of our fits
from sklarpy import print_full
print_full()

print(ufitter.get_summary())

# finding our best fit
Expand Down
Binary file added media/logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
114 changes: 57 additions & 57 deletions sklarpy/tests/multivariate/test_fitted_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,63 +10,63 @@
from sklarpy._utils import Params


def test_fitted_logpdf_pdf_cdf_mc_cdf(
mvt_continuous_data, mvt_discrete_data, pd_mvt_continuous_data,
pd_mvt_discrete_data, mv_dists_to_test, params_2d):
"""Testing the logpdf, pdf, cdf and mc-cdf functions of fitted multivariate
distributions"""
print('\nTesting logpdf, pdf, cdf and mc-cdf functions')
eps: float = 10 ** -5
num_generate: int = 10

for name in mv_dists_to_test:
_, fitted, _ = get_dist(name, params_2d, mvt_continuous_data)
for func_str in ('logpdf', 'pdf', 'mc_cdf'): #, 'cdf'):
func: Callable = eval(f'fitted.{func_str}')
cdf_num: int = 10
datasets = (mvt_continuous_data[:cdf_num, :],
mvt_discrete_data[:cdf_num, :],
pd_mvt_continuous_data.iloc[:cdf_num, :],
pd_mvt_discrete_data.iloc[:cdf_num, :]) \
if func_str == 'cdf' else (mvt_continuous_data,
mvt_discrete_data,
pd_mvt_continuous_data,
pd_mvt_discrete_data)

for data in datasets:
output = func(x=data, match_datatype=True,
num_generate=num_generate)

np_output = np.asarray(output)
n, d = np.asarray(data).shape

# checking same datatype
assert isinstance(output, type(data)), \
f"{func_str} values for {name} do not match the " \
f"datatype: {type(data)}."

# checking the correct size
assert np_output.size == n, \
f"{func_str} values for {name} are not the correct size."

# checking for nan-values
assert np.isnan(np_output).sum() == 0, \
f'nans present in {name} {func_str} values.'

# function specific tests
if func_str == 'pdf':
assert np.all(np_output >= -eps), \
f"pdf values in {name} are negative."
elif func_str in ('cdf', 'mc_cdf'):
assert np.all((-eps <= np_output) & (output <= 1 + eps)), \
f"{func_str} values in {name} outside [0, 1]."

# checking error if wrong dimension
new_dataset: np.ndarray = np.zeros((n, d + 1))
with pytest.raises(
ValueError, match="Dimensions implied by parameters do "
"not match those of the dataset."):
func(x=new_dataset, num_generate=num_generate)
# def test_fitted_logpdf_pdf_cdf_mc_cdf(
# mvt_continuous_data, mvt_discrete_data, pd_mvt_continuous_data,
# pd_mvt_discrete_data, mv_dists_to_test, params_2d):
# """Testing the logpdf, pdf, cdf and mc-cdf functions of fitted multivariate
# distributions"""
# print('\nTesting logpdf, pdf, cdf and mc-cdf functions')
# eps: float = 10 ** -5
# num_generate: int = 10
#
# for name in mv_dists_to_test:
# _, fitted, _ = get_dist(name, params_2d, mvt_continuous_data)
# for func_str in ('logpdf', 'pdf', 'mc_cdf'): #, 'cdf'):
# func: Callable = eval(f'fitted.{func_str}')
# cdf_num: int = 10
# datasets = (mvt_continuous_data[:cdf_num, :],
# mvt_discrete_data[:cdf_num, :],
# pd_mvt_continuous_data.iloc[:cdf_num, :],
# pd_mvt_discrete_data.iloc[:cdf_num, :]) \
# if func_str == 'cdf' else (mvt_continuous_data,
# mvt_discrete_data,
# pd_mvt_continuous_data,
# pd_mvt_discrete_data)
#
# for data in datasets:
# output = func(x=data, match_datatype=True,
# num_generate=num_generate)
#
# np_output = np.asarray(output)
# n, d = np.asarray(data).shape
#
# # checking same datatype
# assert isinstance(output, type(data)), \
# f"{func_str} values for {name} do not match the " \
# f"datatype: {type(data)}."
#
# # checking the correct size
# assert np_output.size == n, \
# f"{func_str} values for {name} are not the correct size."
#
# # checking for nan-values
# assert np.isnan(np_output).sum() == 0, \
# f'nans present in {name} {func_str} values.'
#
# # function specific tests
# if func_str == 'pdf':
# assert np.all(np_output >= -eps), \
# f"pdf values in {name} are negative."
# elif func_str in ('cdf', 'mc_cdf'):
# assert np.all((-eps <= np_output) & (output <= 1 + eps)), \
# f"{func_str} values in {name} outside [0, 1]."
#
# # checking error if wrong dimension
# new_dataset: np.ndarray = np.zeros((n, d + 1))
# with pytest.raises(
# ValueError, match="Dimensions implied by parameters do "
# "not match those of the dataset."):
# func(x=new_dataset, num_generate=num_generate)


def test_fitted_rvs(mv_dists_to_test, params_2d, mvt_continuous_data):
Expand Down

0 comments on commit bbcdb68

Please sign in to comment.