From b97c4a9d23094e5b8f51d118e3200030d9b18a3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 00:24:33 +0100 Subject: [PATCH 01/18] qpd b initial --- skpro/distributions/qpd.py | 250 ++++++++++++++++++++----------------- 1 file changed, 134 insertions(+), 116 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index dc724c524..73c21ce70 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -433,7 +433,7 @@ class QPD_B(BaseDistribution): Parameters ---------- - alpha : float + alpha : float or array_like[float] lower quantile of SPT (upper is ``1 - alpha``) qv_low : float or array_like[float] quantile function value of ``alpha`` @@ -441,11 +441,11 @@ class QPD_B(BaseDistribution): quantile function value of quantile 0.5 qv_high : float or array_like[float] quantile function value of quantile ``1 - alpha`` - lower : float + lower : float or array_like[float] lower bound of semi-bounded range. This is used when estimating QPD and calculating expectation and variance - upper : float + upper : float or array_like[float] upper bound of semi-bounded range. This is used when estimating QPD and calculating expectation and variance @@ -479,6 +479,10 @@ class QPD_B(BaseDistribution): "capabilities:approx": ["pdfnorm", "energy"], "capabilities:exact": ["mean", "var", "cdf", "ppf", "pdf"], "distr:measuretype": "continuous", + "broadcast_init": "on", + "broadcast_params": [ + "alpha", "qv_low", "qv_median", "qv_high", "lower", "upper" + ], } def __init__( @@ -503,95 +507,38 @@ def __init__( self.index = index self.columns = columns - from cyclic_boosting.quantile_matching import J_QPD_B - - qv_low, qv_median, qv_high = _prep_qpd_params(qv_low, qv_median, qv_high) - - if index is None: - index = pd.RangeIndex(qv_low.shape[0]) - self.index = index - - if columns is None: - columns = pd.RangeIndex(1) - self.columns = columns - - if version == "normal": - self.phi = norm() - elif version == "logistic": - self.phi = logistic() - else: - raise Exception("Invalid version.") - - if (np.any(qv_low > qv_median)) or np.any(qv_high < qv_median): - warnings.warn( - "The SPT values are not monotonically increasing, " - "each SPT is sorted by value", - stacklevel=2, - ) - idx = np.where((qv_low > qv_median), True, False) + np.where( - (qv_high < qv_median), True, False - ) - un_orderd_idx = np.argwhere(idx > 0).tolist() - warnings.warn(f"sorted index {un_orderd_idx}", stacklevel=2) - for idx in un_orderd_idx: - low, mid, high = sorted([qv_low[idx], qv_median[idx], qv_high[idx]]) - qv_low[idx] = low - qv_median[idx] = mid - qv_high[idx] = high - - self.qpd = J_QPD_B( - alpha=alpha, - qv_low=qv_low, - qv_median=qv_median, - qv_high=qv_high, - l=self.lower, - u=self.upper, - version=version, - ) super().__init__(index=index, columns=columns) - def _mean(self): - """Return expected value of the distribution. + # precompute parameters for methods + phi = _resolve_phi(version) + self.phi = phi - Please set the upper and lower limits of the random variable correctly. + alpha = self._bc_params["alpha"] + qv_low = self._bc_params["qv_low"] + qv_median = self._bc_params["qv_median"] + qv_high = self._bc_params["qv_high"] + lower = self._bc_params["lower"] + upper = self._bc_params["upper"] - Returns - ------- - pd.DataFrame with same rows, columns as `self` - expected value of distribution (entry-wise) - """ - params = self.get_params(deep=False) - lower = params["lower"] - upper = params["upper"] - index = params["index"] - x = np.linspace(lower, upper, num=int(1e3)) - cdf = self.qpd.cdf(x) - if cdf.ndim < 2: - cdf = cdf[:, np.newaxis] - loc = exp_func(x, cdf.T, index.shape[0]) - return loc + params = _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi) + self.params = params - def _var(self): - """Return element/entry-wise variance of the distribution. + def _ppf(self, p: np.ndarray): + """Quantile function = percent point function = inverse cdf.""" + lower = self._bc_params["lower"] + upper = self._bc_params["upper"] + rnge = upper - lower + xi = self.params["xi"] + delta = self.params["delta"] + kappa = self.params["kappa"] + c = self.params["c"] + n = self.params["n"] - Please set the upper and lower limits of the random variable correctly. + phi = self.phi - Returns - ------- - pd.DataFrame with same rows, columns as `self` - variance of distribution (entry-wise) - """ - params = self.get_params(deep=False) - lower = params["lower"] - upper = params["upper"] - index = params["index"] - mean = self.mean().values - x = np.linspace(lower, upper, num=int(1e3)) - cdf = self.qpd.cdf(x) - if cdf.ndim < 2: - cdf = cdf[:, np.newaxis] - var = var_func(x, mean, cdf.T, index.shape[0]) - return var + in_cdf = xi + kappa * np.sinh(delta * (phi.ppf(p) + n * c)) + ppf_arr = lower + rnge * phi.cdf(in_cdf) + return ppf_arr def _pdf(self, x: np.ndarray): """Probability density function. @@ -599,41 +546,55 @@ def _pdf(self, x: np.ndarray): this fucntion transform cdf to pdf because j-qpd's pdf calculation is bit complex """ - return pdf_func(x, self.qpd) + lower = self._bc_params["lower"] + upper = self._bc_params["upper"] + rnge = upper - lower + xi = self.params["xi"] + delta = self.params["delta"] + kappa = self.params["kappa"] + c = self.params["c"] + n = self.params["n"] - def _ppf(self, p: np.ndarray): - """Quantile function = percent point function = inverse cdf.""" - params = self.get_params(deep=False) - index = params["index"] - columns = params["columns"] - qv_low = params["qv_low"] - p_unique = np.unique(p) # de-broadcast - ppf_all = ppf_func(p_unique, self.qpd) - ppf_map = np.tile(p_unique, (qv_low.size, 1)).T - ppf = np.zeros((index.shape[0], len(columns))) - for r in range(p.shape[0]): - for c in range(p.shape[1]): - t = np.where(ppf_map[:, c] == p[r][c]) - ppf_part = ppf_all[t][c] - ppf[r][c] = ppf_part - return ppf + phi = self.phi + + # we work through the chain rule for the entire nested expression in cdf + x_ = (x - lower) / rnge + x_der = 1 / rnge + + phi_ppf = phi.ppf(x) + # derivative of ppf at z is 1 / pdf(ppf(z)) + phi_ppf_der = 1 / phi.pdf(phi.ppf(x_)) * x_der + + in_arcsinh = (phi_ppf - xi) / kappa + in_arcsinh_der = phi_ppf_der / kappa + + in_cdf = np.arcsinh(in_arcsinh) / delta - n * c + in_cdf_der = arcsinh_der(in_arcsinh) * in_arcsinh_der / delta + + # cdf_arr = phi.cdf(in_cdf) + cdf_arr_dev = phi.pdf(in_cdf) * in_cdf_der + + pdf_arr = cdf_arr_dev + return pdf_arr def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" - params = self.get_params(deep=False) - index = params["index"] - columns = params["columns"] - qv_low = params["qv_low"] - x_unique = np.unique(x) # de-broadcast - cdf_all = cdf_func(x_unique, self.qpd) - cdf_map = np.tile(x_unique, (qv_low.size, 1)).T - cdf = np.zeros((index.shape[0], len(columns))) - for r in range(x.shape[0]): - for c in range(x.shape[1]): - t = np.where(cdf_map[:, c] == x[r][c]) - cdf_part = cdf_all[t][c] - cdf[r][c] = cdf_part - return cdf + lower = self._bc_params["lower"] + upper = self._bc_params["upper"] + rnge = upper - lower + xi = self.params["xi"] + delta = self.params["delta"] + kappa = self.params["kappa"] + c = self.params["c"] + n = self.params["n"] + + phi = self.phi + + phi_ppf = phi.ppf((x - lower) / rnge) + in_cdf = np.arcsinh((phi_ppf - xi) / kappa) / delta - n * c + cdf_arr = phi.cdf(in_cdf) + + return cdf_arr @classmethod def get_test_params(cls, parameter_set="default"): @@ -999,3 +960,60 @@ def _prep_qpd_params(qv_low, qv_median, qv_high): qv_median = qv[1].flatten() qv_high = qv[2].flatten() return qv_low, qv_median, qv_high + + +def _resolve_phi(self, phi): + """Resolve base distribution.""" + if phi == "normal": + return norm() + elif phi == "logistic": + return logistic() + else: + return phi + + +def _prep_qpd_vars(alpha, qv_low, qv_high, qv_median, lower, upper, phi): + """Prepare parameters for Johnson Quantile-Parameterized Distributions. + + Parameters + ---------- + alpha : 2D np.array + lower quantile of SPT (upper is ``1 - alpha``) + qv_low : 2D np.array + quantile function value of ``alpha`` + qv_median : 2D np.array + quantile function value of quantile 0.5 + qv_high : 2D np.array + quantile function value of quantile ``1 - alpha`` + lower : 2D np.array + lower bound of range. + upper : 2D np.array + upper bound of range. + phi : scipy.stats.rv_continuous + base distribution + """ + c = phi.ppf(1 - alpha) + L = phi.ppf((qv_low - lower) / (upper - lower)) + H = phi.ppf((qv_high - lower) / (upper - lower)) + B = phi.ppf((qv_median - lower) / (upper - lower)) + HL = H - L + BL = B - L + HB = H - B + LH2B = L + H - 2 * B + + n = np.where(LH2B > 0, 1, -1) + xi = np.where(LH2B > 0, L, H) + + n = np.where(LH2B == 0, 0, n) + xi = np.where(LH2B == 0, B, xi) + + delta = np.arccosh(HL / (2 * np.where(BL < HB), BL, HB)) / c + kappa = HL / np.sinh(2 * delta * c) + + params = {"L": L, "H": H, "B": B, "n": n, "xi": xi, "delta": delta, "kappa": kappa} + return params + + +def arcsinh_der(x): + """Return derivative of arcsinh.""" + return 1 / np.sqrt(1 + x ** 2) \ No newline at end of file From 6e49d6dd7ccfb10605ab15e85746ca6973665c9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 01:02:34 +0100 Subject: [PATCH 02/18] fixes, test --- skpro/distributions/qpd.py | 50 ++++++++++++++++----------- skpro/distributions/tests/test_qpd.py | 17 +++++++++ 2 files changed, 46 insertions(+), 21 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 73c21ce70..e1abe7a1a 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -471,9 +471,8 @@ class QPD_B(BaseDistribution): _tags = { # packaging info # -------------- - "authors": ["setoguchi-naoki", "felix-wick"], + "authors": ["setoguchi-naoki", "felix-wick", "fkiraly"], "maintainers": ["setoguchi-naoki"], - "python_dependencies": ["cyclic_boosting>=1.4.0", "findiff"], # estimator tags # -------------- "capabilities:approx": ["pdfnorm", "energy"], @@ -526,8 +525,7 @@ def __init__( def _ppf(self, p: np.ndarray): """Quantile function = percent point function = inverse cdf.""" lower = self._bc_params["lower"] - upper = self._bc_params["upper"] - rnge = upper - lower + rnge = self.params["rnge"] xi = self.params["xi"] delta = self.params["delta"] kappa = self.params["kappa"] @@ -547,8 +545,7 @@ def _pdf(self, x: np.ndarray): because j-qpd's pdf calculation is bit complex """ lower = self._bc_params["lower"] - upper = self._bc_params["upper"] - rnge = upper - lower + rnge = self.params["rnge"] xi = self.params["xi"] delta = self.params["delta"] kappa = self.params["kappa"] @@ -561,9 +558,9 @@ def _pdf(self, x: np.ndarray): x_ = (x - lower) / rnge x_der = 1 / rnge - phi_ppf = phi.ppf(x) + phi_ppf = phi.ppf(x_) # derivative of ppf at z is 1 / pdf(ppf(z)) - phi_ppf_der = 1 / phi.pdf(phi.ppf(x_)) * x_der + phi_ppf_der = x_der / phi.pdf(phi.ppf(x_)) in_arcsinh = (phi_ppf - xi) / kappa in_arcsinh_der = phi_ppf_der / kappa @@ -580,8 +577,7 @@ def _pdf(self, x: np.ndarray): def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" lower = self._bc_params["lower"] - upper = self._bc_params["upper"] - rnge = upper - lower + rnge = self.params["rnge"] xi = self.params["xi"] delta = self.params["delta"] kappa = self.params["kappa"] @@ -613,9 +609,9 @@ def get_test_params(cls, parameter_set="default"): params2 = { "alpha": 0.2, "version": "normal", - "qv_low": [-0.3, -0.3, -0.3], - "qv_median": [0.0, 0.0, 0.0], - "qv_high": [0.3, 0.3, 0.3], + "qv_low": [[-0.3], [-0.2], [-0.1]], + "qv_median": [[-0.1], [0.0], [0.1]], + "qv_high": [[0.2], [0.3], [0.4]], "lower": -0.5, "upper": 0.5, "index": pd.RangeIndex(3), @@ -962,7 +958,7 @@ def _prep_qpd_params(qv_low, qv_median, qv_high): return qv_low, qv_median, qv_high -def _resolve_phi(self, phi): +def _resolve_phi(phi): """Resolve base distribution.""" if phi == "normal": return norm() @@ -972,7 +968,7 @@ def _resolve_phi(self, phi): return phi -def _prep_qpd_vars(alpha, qv_low, qv_high, qv_median, lower, upper, phi): +def _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi): """Prepare parameters for Johnson Quantile-Parameterized Distributions. Parameters @@ -993,9 +989,10 @@ def _prep_qpd_vars(alpha, qv_low, qv_high, qv_median, lower, upper, phi): base distribution """ c = phi.ppf(1 - alpha) - L = phi.ppf((qv_low - lower) / (upper - lower)) - H = phi.ppf((qv_high - lower) / (upper - lower)) - B = phi.ppf((qv_median - lower) / (upper - lower)) + rnge = upper - lower + L = phi.ppf((qv_low - lower) / rnge) + H = phi.ppf((qv_high - lower) / rnge) + B = phi.ppf((qv_median - lower) / rnge) HL = H - L BL = B - L HB = H - B @@ -1007,13 +1004,24 @@ def _prep_qpd_vars(alpha, qv_low, qv_high, qv_median, lower, upper, phi): n = np.where(LH2B == 0, 0, n) xi = np.where(LH2B == 0, B, xi) - delta = np.arccosh(HL / (2 * np.where(BL < HB), BL, HB)) / c + in_arccosh = HL / (2 * np.where(BL < HB, BL, HB)) + delta = np.arccosh(in_arccosh) / c kappa = HL / np.sinh(2 * delta * c) - params = {"L": L, "H": H, "B": B, "n": n, "xi": xi, "delta": delta, "kappa": kappa} + params = { + "c": c, + "rnge": rnge, + "L": L, + "H": H, + "B": B, + "n": n, + "xi": xi, + "delta": delta, + "kappa": kappa, + } return params def arcsinh_der(x): """Return derivative of arcsinh.""" - return 1 / np.sqrt(1 + x ** 2) \ No newline at end of file + return 1 / np.sqrt(1 + x ** 2) diff --git a/skpro/distributions/tests/test_qpd.py b/skpro/distributions/tests/test_qpd.py index 78c418c1a..f0868b052 100644 --- a/skpro/distributions/tests/test_qpd.py +++ b/skpro/distributions/tests/test_qpd.py @@ -1,5 +1,6 @@ """Tests for quantile-parameterized distributions.""" +import numpy as np import pytest from skpro.distributions.qpd import QPD_B, QPD_S, QPD_U @@ -24,6 +25,22 @@ def test_qpd_b_simple_use(): qpd.mean() +def test_qpd_b_pdf(): + """Test pdf of qpd with bounded mode.""" + # these parameters should produce a uniform on -0.5, 0.5 + qpd_linear = QPD_B( + alpha=0.2, + qv_low=-0.3, + qv_median=0, + qv_high=0.3, + lower=-0.5, + upper=0.5, + ) + x = np.linspace(-0.45, 0.45, 100) + pdf = [qpd_linear.pdf(x_) for x_ in x] + assert np.testing.assert_allclose(np.sum(pdf), 1.0, rtol=1e-5) + + @pytest.mark.skipif( not run_test_for_class(QPD_S), reason="run test only if softdeps are present and incrementally (if requested)", From 619196a7b023fedca41eb6883613a26d9def217e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 01:18:58 +0100 Subject: [PATCH 03/18] refactor --- skpro/distributions/qpd.py | 177 ++++++++++++++----------------------- 1 file changed, 68 insertions(+), 109 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index e1abe7a1a..8307d0b9a 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -239,6 +239,10 @@ class QPD_S(BaseDistribution): "capabilities:approx": ["pdfnorm", "energy"], "capabilities:exact": ["mean", "var", "cdf", "ppf", "pdf"], "distr:measuretype": "continuous", + "broadcast_init": "on", + "broadcast_params": [ + "alpha", "qv_low", "qv_median", "qv_high", "lower", "upper" + ], } def __init__( @@ -263,104 +267,23 @@ def __init__( self.index = index self.columns = columns - from cyclic_boosting.quantile_matching import J_QPD_S - - qv_low, qv_median, qv_high = _prep_qpd_params(qv_low, qv_median, qv_high) - - if index is None: - index = pd.RangeIndex(qv_low.shape[0]) - self.index = index - - if columns is None: - columns = pd.RangeIndex(1) - self.columns = columns - - self._shape = (qv_low.shape[0], 1) - - if version == "normal": - self.phi = norm() - elif version == "logistic": - self.phi = logistic() - else: - raise Exception("Invalid version.") - - if (np.any(qv_low > qv_median)) or np.any(qv_high < qv_median): - warnings.warn( - "The SPT values are not monotonically increasing, " - "each SPT is sorted by value", - stacklevel=2, - ) - idx = np.where((qv_low > qv_median), True, False) + np.where( - (qv_high < qv_median), True, False - ) - un_orderd_idx = np.argwhere(idx > 0).tolist() - warnings.warn(f"sorted index {un_orderd_idx}", stacklevel=2) - for idx in un_orderd_idx: - low, mid, high = sorted([qv_low[idx], qv_median[idx], qv_high[idx]]) - qv_low[idx] = low - qv_median[idx] = mid - qv_high[idx] = high - - self.qpd = J_QPD_S( - alpha=alpha, - qv_low=qv_low, - qv_median=qv_median, - qv_high=qv_high, - l=self.lower, - version=version, - ) super().__init__(index=index, columns=columns) - def _mean(self): - """Return expected value of the distribution. - - Please set the upper and lower limits of the random variable correctly. - - Returns - ------- - pd.DataFrame with same rows, columns as `self` - expected value of distribution (entry-wise) - """ - params = self.get_params(deep=False) - lower = params["lower"] - upper = params["upper"] - index = params["index"] - x = np.linspace(lower, upper, num=int(1e3)) - cdf = self.qpd.cdf(x) - if cdf.ndim < 2: - cdf = cdf[:, np.newaxis] - loc = exp_func(x, cdf.T, index.shape[0]) - return loc - - def _var(self): - """Return element/entry-wise variance of the distribution. - - Please set the upper and lower limits of the random variable correctly. - - Returns - ------- - pd.DataFrame with same rows, columns as `self` - variance of distribution (entry-wise) - """ - params = self.get_params(deep=False) - lower = params["lower"] - upper = params["upper"] - index = params["index"] - mean = self.mean().values - x = np.linspace(lower, upper, num=int(1e3)) - cdf = self.qpd.cdf(x) - if cdf.ndim < 2: - cdf = cdf[:, np.newaxis] - var = var_func(x, mean, cdf.T, index.shape[0]) - return var + # precompute parameters for methods + phi = _resolve_phi(version) + self.phi = phi - def _pdf(self, x: np.ndarray): - """Probability density function. + alpha = self._bc_params["alpha"] + qv_low = self._bc_params["qv_low"] + qv_median = self._bc_params["qv_median"] + qv_high = self._bc_params["qv_high"] + lower = self._bc_params["lower"] + upper = self._bc_params["upper"] - this fucntion transform cdf to pdf - because j-qpd's pdf calculation is bit complex - """ - return pdf_func(x, self.qpd) + params = _prep_qpd_vars( + alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B", + ) + self.params = params def _ppf(self, p: np.ndarray): """Quantile function = percent point function = inverse cdf.""" @@ -379,6 +302,14 @@ def _ppf(self, p: np.ndarray): ppf[r][c] = ppf_part return ppf + def _pdf(self, x: np.ndarray): + """Probability density function. + + this fucntion transform cdf to pdf + because j-qpd's pdf calculation is bit complex + """ + return pdf_func(x, self.qpd) + def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" params = self.get_params(deep=False) @@ -519,18 +450,20 @@ def __init__( lower = self._bc_params["lower"] upper = self._bc_params["upper"] - params = _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi) + params = _prep_qpd_vars( + alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B", + ) self.params = params def _ppf(self, p: np.ndarray): """Quantile function = percent point function = inverse cdf.""" lower = self._bc_params["lower"] rnge = self.params["rnge"] - xi = self.params["xi"] delta = self.params["delta"] kappa = self.params["kappa"] c = self.params["c"] n = self.params["n"] + xi = self.params["xi"] phi = self.phi @@ -546,11 +479,11 @@ def _pdf(self, x: np.ndarray): """ lower = self._bc_params["lower"] rnge = self.params["rnge"] - xi = self.params["xi"] delta = self.params["delta"] kappa = self.params["kappa"] c = self.params["c"] n = self.params["n"] + xi = self.params["xi"] phi = self.phi @@ -578,11 +511,11 @@ def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" lower = self._bc_params["lower"] rnge = self.params["rnge"] - xi = self.params["xi"] delta = self.params["delta"] kappa = self.params["kappa"] c = self.params["c"] n = self.params["n"] + xi = self.params["xi"] phi = self.phi @@ -968,7 +901,7 @@ def _resolve_phi(phi): return phi -def _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi): +def _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B"): """Prepare parameters for Johnson Quantile-Parameterized Distributions. Parameters @@ -987,26 +920,47 @@ def _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi): upper bound of range. phi : scipy.stats.rv_continuous base distribution + mode : str + options are ``B`` (default) or ``S`` + B = bounded mode, S = lower semi-bounded mode """ c = phi.ppf(1 - alpha) rnge = upper - lower - L = phi.ppf((qv_low - lower) / rnge) - H = phi.ppf((qv_high - lower) / rnge) - B = phi.ppf((qv_median - lower) / rnge) + + qll = qv_low - lower + qml = qv_median - lower + qhl = qv_high - lower + + L = phi.ppf(qll / rnge) + H = phi.ppf(qhl / rnge) + B = phi.ppf(qml / rnge) HL = H - L BL = B - L HB = H - B LH2B = L + H - 2 * B - n = np.where(LH2B > 0, 1, -1) - xi = np.where(LH2B > 0, L, H) + HBL = np.where(BL < HB, BL, HB) + n = np.where(LH2B > 0, 1, -1) n = np.where(LH2B == 0, 0, n) - xi = np.where(LH2B == 0, B, xi) - in_arccosh = HL / (2 * np.where(BL < HB, BL, HB)) - delta = np.arccosh(in_arccosh) / c - kappa = HL / np.sinh(2 * delta * c) + if mode == "B": + xi = np.where(LH2B > 0, L, H) + xi = np.where(LH2B == 0, B, xi) + elif mode == "S": + theta = np.where(LH2B > 0, qll, qhl) + theta = np.where(LH2B == 0, qml, theta) + + in_arccosh = HL / (2 * HBL) + delta_unn = np.arccosh(in_arccosh) + if mode == "S": + delta = np.sinh(delta_unn) + delta = delta_unn / c + + if mode == "B": + kappa = HL / np.sinh(2 * delta * c) + elif mode == "S": + kappa = HBL / (delta * c) params = { "c": c, @@ -1015,10 +969,15 @@ def _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi): "H": H, "B": B, "n": n, - "xi": xi, "delta": delta, "kappa": kappa, } + + if mode == "S": + params["theta"] = theta + elif mode == "B": + params["xi"] = xi + return params From 545d1fe0baa2c353284930aee5940e75a763ef4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 01:30:39 +0100 Subject: [PATCH 04/18] qpd_s --- skpro/distributions/qpd.py | 97 ++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 8307d0b9a..c66460630 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -233,7 +233,6 @@ class QPD_S(BaseDistribution): # -------------- "authors": ["setoguchi-naoki", "felix-wick"], "maintainers": ["setoguchi-naoki"], - "python_dependencies": ["cyclic_boosting>=1.4.0", "findiff"], # estimator tags # -------------- "capabilities:approx": ["pdfnorm", "energy"], @@ -281,26 +280,26 @@ def __init__( upper = self._bc_params["upper"] params = _prep_qpd_vars( - alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B", + alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="S", ) self.params = params def _ppf(self, p: np.ndarray): """Quantile function = percent point function = inverse cdf.""" - params = self.get_params(deep=False) - index = params["index"] - columns = params["columns"] - qv_low = params["qv_low"] - p_unique = np.unique(p) # de-broadcast - ppf_all = ppf_func(p_unique, self.qpd) - ppf_map = np.tile(p_unique, (qv_low.size, 1)).T - ppf = np.zeros((index.shape[0], len(columns))) - for r in range(p.shape[0]): - for c in range(p.shape[1]): - t = np.where(ppf_map[:, c] == p[r][c]) - ppf_part = ppf_all[t][c] - ppf[r][c] = ppf_part - return ppf + lower = self._bc_params["lower"] + delta = self.params["delta"] + kappa = self.params["kappa"] + c = self.params["c"] + n = self.params["n"] + theta = self.params["theta"] + + phi = self.phi + + in_sinh = np.arcsinh(phi.ppf(p) * delta) + in_exp = kappa * np.sinh(in_sinh) + np.arcsinh(n * c * delta) + ppf_arr = lower + theta * np.exp(in_exp) + + return ppf_arr def _pdf(self, x: np.ndarray): """Probability density function. @@ -308,24 +307,50 @@ def _pdf(self, x: np.ndarray): this fucntion transform cdf to pdf because j-qpd's pdf calculation is bit complex """ - return pdf_func(x, self.qpd) + lower = self._bc_params["lower"] + delta = self.params["delta"] + kappa = self.params["kappa"] + c = self.params["c"] + n = self.params["n"] + theta = self.params["theta"] + + phi = self.phi + + # we work through the chain rule for the entire nested expression in cdf + x_ = (x - lower) / theta + x_der = 1 / theta + + in_arcsinh = np.log(x_) / kappa + in_arcsinh_der = x_der / (kappa * x_) + + in_sinh = np.arcsinh(in_arcsinh) - np.arcsinh(n * c * delta) + in_sinh_der = arcsinh_der(in_arcsinh) * in_arcsinh_der + + in_cdf = np.sinh(in_sinh) / delta + in_cdf_der = np.cosh(in_sinh) * in_sinh_der / delta + + # cdf_arr = phi.cdf(in_cdf) + cdf_arr_der = phi.pdf(in_cdf) * in_cdf_der + + pdf_arr = cdf_arr_der + return pdf_arr def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" - params = self.get_params(deep=False) - index = params["index"] - columns = params["columns"] - qv_low = params["qv_low"] - x_unique = np.unique(x) # de-broadcast - cdf_all = cdf_func(x_unique, self.qpd) - cdf_map = np.tile(x_unique, (qv_low.size, 1)).T - cdf = np.zeros((index.shape[0], len(columns))) - for r in range(x.shape[0]): - for c in range(x.shape[1]): - t = np.where(cdf_map[:, c] == x[r][c]) - cdf_part = cdf_all[t][c] - cdf[r][c] = cdf_part - return cdf + lower = self._bc_params["lower"] + delta = self.params["delta"] + kappa = self.params["kappa"] + c = self.params["c"] + n = self.params["n"] + theta = self.params["theta"] + + phi = self.phi + + in_arcsinh = np.log((x - lower) / theta) / kappa + in_sinh = np.arcsinh(in_arcsinh) - np.arcsinh(n * c * delta) + cdf_arr = phi.cdf(np.sinh(in_sinh) / delta) + + return cdf_arr @classmethod def get_test_params(cls, parameter_set="default"): @@ -343,9 +368,9 @@ def get_test_params(cls, parameter_set="default"): params2 = { "alpha": 0.2, "version": "normal", - "qv_low": [-0.3, -0.3, -0.3], - "qv_median": [0.0, 0.0, 0.0], - "qv_high": [0.3, 0.3, 0.3], + "qv_low": [[-0.3], [-0.2], [-0.1]], + "qv_median": [[-0.1], [0.0], [0.1]], + "qv_high": [[0.2], [0.3], [0.4]], "lower": -0.5, "index": pd.RangeIndex(3), "columns": pd.Index(["a"]), @@ -502,9 +527,9 @@ def _pdf(self, x: np.ndarray): in_cdf_der = arcsinh_der(in_arcsinh) * in_arcsinh_der / delta # cdf_arr = phi.cdf(in_cdf) - cdf_arr_dev = phi.pdf(in_cdf) * in_cdf_der + cdf_arr_der = phi.pdf(in_cdf) * in_cdf_der - pdf_arr = cdf_arr_dev + pdf_arr = cdf_arr_der return pdf_arr def _cdf(self, x: np.ndarray): From 6af9b5b1b0ad193718e9797f700e47392d8e03dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 01:31:52 +0100 Subject: [PATCH 05/18] Update qpd.py --- skpro/distributions/qpd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index c66460630..bf34d2fcb 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -295,8 +295,8 @@ def _ppf(self, p: np.ndarray): phi = self.phi - in_sinh = np.arcsinh(phi.ppf(p) * delta) - in_exp = kappa * np.sinh(in_sinh) + np.arcsinh(n * c * delta) + in_sinh = np.arcsinh(phi.ppf(p) * delta) + np.arcsinh(n * c * delta) + in_exp = kappa * np.sinh(in_sinh) ppf_arr = lower + theta * np.exp(in_exp) return ppf_arr From 4ca9d1542cb0dba97555f951587af5df413b2b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 01:32:00 +0100 Subject: [PATCH 06/18] remove findiff dep --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e2d955e2d..dbaf23f15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ dependencies = [ [project.optional-dependencies] all_extras = [ "attrs", + "cyclic-boosting>=1.4.0; python_version < '3.12'", "distfit", "lifelines<0.29.0", "mapie", @@ -63,8 +64,6 @@ all_extras = [ "statsmodels>=0.12.1", "tabulate", "uncertainties", - "cyclic-boosting>=1.4.0; python_version < '3.12'", - "findiff", ] dev = [ From 07c04e486d8002aacea4a9ad6a0fc6aed2551f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:02:27 +0100 Subject: [PATCH 07/18] qpd_u --- skpro/distributions/qpd.py | 269 +++++++++++-------------------------- 1 file changed, 82 insertions(+), 187 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index bf34d2fcb..33dc84df9 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -231,7 +231,7 @@ class QPD_S(BaseDistribution): _tags = { # packaging info # -------------- - "authors": ["setoguchi-naoki", "felix-wick"], + "authors": ["setoguchi-naoki", "felix-wick", "fkiraly"], "maintainers": ["setoguchi-naoki"], # estimator tags # -------------- @@ -272,16 +272,7 @@ def __init__( phi = _resolve_phi(version) self.phi = phi - alpha = self._bc_params["alpha"] - qv_low = self._bc_params["qv_low"] - qv_median = self._bc_params["qv_median"] - qv_high = self._bc_params["qv_high"] - lower = self._bc_params["lower"] - upper = self._bc_params["upper"] - - params = _prep_qpd_vars( - alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="S", - ) + params = _prep_qpd_vars(phi=phi, mode="S", **self._bc_params) self.params = params def _ppf(self, p: np.ndarray): @@ -302,11 +293,7 @@ def _ppf(self, p: np.ndarray): return ppf_arr def _pdf(self, x: np.ndarray): - """Probability density function. - - this fucntion transform cdf to pdf - because j-qpd's pdf calculation is bit complex - """ + """Probability density function.""" lower = self._bc_params["lower"] delta = self.params["delta"] kappa = self.params["kappa"] @@ -468,16 +455,7 @@ def __init__( phi = _resolve_phi(version) self.phi = phi - alpha = self._bc_params["alpha"] - qv_low = self._bc_params["qv_low"] - qv_median = self._bc_params["qv_median"] - qv_high = self._bc_params["qv_high"] - lower = self._bc_params["lower"] - upper = self._bc_params["upper"] - - params = _prep_qpd_vars( - alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B", - ) + params = _prep_qpd_vars(phi=phi, mode="B", **self._bc_params) self.params = params def _ppf(self, p: np.ndarray): @@ -497,11 +475,7 @@ def _ppf(self, p: np.ndarray): return ppf_arr def _pdf(self, x: np.ndarray): - """Probability density function. - - this fucntion transform cdf to pdf - because j-qpd's pdf calculation is bit complex - """ + """Probability density function.""" lower = self._bc_params["lower"] rnge = self.params["rnge"] delta = self.params["delta"] @@ -627,14 +601,17 @@ class QPD_U(BaseDistribution): _tags = { # packaging info # -------------- - "authors": ["setoguchi-naoki", "felix-wick"], + "authors": ["setoguchi-naoki", "felix-wick", "fkiraly"], "maintainers": ["setoguchi-naoki"], - "python_dependencies": ["cyclic_boosting>=1.4.0", "findiff"], # estimator tags # -------------- "capabilities:approx": ["pdfnorm", "energy"], "capabilities:exact": ["mean", "var", "cdf", "ppf", "pdf"], "distr:measuretype": "continuous", + "broadcast_init": "on", + "broadcast_params": [ + "alpha", "qv_low", "qv_median", "qv_high", "lower", "upper" + ], } def __init__( @@ -662,146 +639,65 @@ def __init__( self.index = index self.columns = columns - from cyclic_boosting.quantile_matching import J_QPD_extended_U + super().__init__(index=index, columns=columns) - qv_low, qv_median, qv_high = _prep_qpd_params(qv_low, qv_median, qv_high) + # precompute parameters for methods + phi = _resolve_phi(version) + self.phi = phi - if index is None: - index = pd.RangeIndex(qv_low.shape[0]) - self.index = index + params = _prep_qpd_vars(phi=phi, mode="U", **self._bc_params) + self.params = params - if columns is None: - columns = pd.RangeIndex(1) - self.columns = columns + def _ppf(self, p: np.ndarray): + """Quantile function = percent point function = inverse cdf.""" + alpha = self._bc_params["alpha"] + xi = self.params["xi"] + gamma = self.params["gamma"] + delta = self.params["delta"] + kappa = self.params["kappa"] - if version == "normal": - self.phi = norm() - elif version == "logistic": - self.phi = logistic() - else: - raise Exception("Invalid version.") - - if (np.any(qv_low > qv_median)) or np.any(qv_high < qv_median): - warnings.warn( - "The SPT values are not monotonically increasing, " - "each SPT is sorted by value", - stacklevel=2, - ) - idx = np.where((qv_low > qv_median), True, False) + np.where( - (qv_high < qv_median), True, False - ) - un_orderd_idx = np.argwhere(idx > 0).tolist() - warnings.warn(f"sorted index {un_orderd_idx}", stacklevel=2) - for idx in un_orderd_idx: - low, mid, high = sorted([qv_low[idx], qv_median[idx], qv_high[idx]]) - qv_low[idx] = low - qv_median[idx] = mid - qv_high[idx] = high - - iter = np.nditer(qv_low, flags=["c_index"]) - for _i in iter: - jqpd = J_QPD_extended_U( - alpha=alpha, - qv_low=qv_low[iter.index], - qv_median=qv_median[iter.index], - qv_high=qv_high[iter.index], - version=version, - shape=dist_shape, - ) - self.qpd.append(jqpd) + phi = self.phi - super().__init__(index=index, columns=columns) + width = phi.ppf(1 - alpha) + qs = phi.ppf(p) / width - def _mean(self): - """Return expected value of the distribution. - - Please set the upper and lower limits of the random variable correctly. - - Returns - ------- - pd.DataFrame with same rows, columns as `self` - expected value of distribution (entry-wise) - """ - params = self.get_params(deep=False) - lower = params["lower"] - upper = params["upper"] - index = params["index"] - cdf_arr = [] - x = np.linspace(lower, upper, num=int(1e3)) - for qpd in self.qpd: - cdf_arr.append(qpd.cdf(x)) - cdf = np.asarray(cdf_arr) - if cdf.ndim < 2: - cdf = cdf[:, np.newaxis] - loc = exp_func(x, cdf, index.shape[0]) - return loc - - def _var(self): - """Return element/entry-wise variance of the distribution. - - Please set the upper and lower limits of the random variable correctly. - - Returns - ------- - pd.DataFrame with same rows, columns as `self` - variance of distribution (entry-wise) - """ - params = self.get_params(deep=False) - lower = params["lower"] - upper = params["upper"] - index = params["index"] - mean = self.mean().values - cdf_list = [] - x = np.linspace(lower, upper, num=int(1e3)) - for qpd in self.qpd: - cdf_list.append(qpd.cdf(x)) - cdf = np.asarray(cdf_list) - if cdf.ndim < 2: - cdf = cdf[:, np.newaxis] - var = var_func(x, mean, cdf, index.shape[0]) - return var + ppf_arr = xi + kappa * np.sinh((qs - gamma) / delta) + return ppf_arr def _pdf(self, x: np.ndarray): - """Probability density function. + """Probability density function.""" + alpha = self._bc_params["alpha"] + xi = self.params["xi"] + gamma = self.params["gamma"] + delta = self.params["delta"] + kappa = self.params["kappa"] - this fucntion transform cdf to pdf - because j-qpd's pdf calculation is bit complex - """ - return pdf_func(x, self.qpd) + phi = self.phi - def _ppf(self, p: np.ndarray): - """Quantile function = percent point function = inverse cdf.""" - params = self.get_params(deep=False) - index = params["index"] - columns = params["columns"] - qv_low = params["qv_low"] - p_unique = np.unique(p) # de-broadcast - ppf_all = ppf_func(p_unique, self.qpd) - ppf_map = np.tile(p_unique, (qv_low.size, 1)).T - ppf = np.zeros((index.shape[0], len(columns))) - for r in range(p.shape[0]): - for c in range(p.shape[1]): - t = np.where(ppf_map[:, c] == p[r][c]) - ppf_part = ppf_all[t][c] - ppf[r][c] = ppf_part - return ppf + width = phi.ppf(1 - alpha) + + qs = gamma + delta * np.arcsinh((x - xi) / kappa) + qs_der = delta * arcsinh_der((x - xi) / kappa) / kappa + + # cdf_arr = phi.cdf(qs * width) + pdf_arr = phi.pdf(qs * width) * qs_der + return pdf_arr def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" - params = self.get_params(deep=False) - index = params["index"] - columns = params["columns"] - qv_low = params["qv_low"] - x_unique = np.unique(x) # de-broadcast - cdf_all = cdf_func(x_unique, self.qpd) - cdf_map = np.tile(x_unique, (qv_low.size, 1)).T - cdf = np.zeros((index.shape[0], len(columns))) - for r in range(x.shape[0]): - for c in range(x.shape[1]): - t = np.where(cdf_map[:, c] == x[r][c]) - cdf_part = cdf_all[t][c] - cdf[r][c] = cdf_part - return cdf + alpha = self._bc_params["alpha"] + xi = self.params["xi"] + gamma = self.params["gamma"] + delta = self.params["delta"] + kappa = self.params["kappa"] + + phi = self.phi + + width = phi.ppf(1 - alpha) + qs = gamma + delta * np.arcsinh((x - xi) / kappa) + + cdf_arr = phi.cdf(qs * width) + return cdf_arr @classmethod def get_test_params(cls, parameter_set="default"): @@ -818,9 +714,9 @@ def get_test_params(cls, parameter_set="default"): params2 = { "alpha": 0.2, "version": "normal", - "qv_low": [-0.3, -0.3, -0.3], - "qv_median": [0.0, 0.0, 0.0], - "qv_high": [0.3, 0.3, 0.3], + "qv_low": [[-0.3], [-0.2], [-0.1]], + "qv_median": [[-0.1], [0.0], [0.1]], + "qv_high": [[0.2], [0.3], [0.4]], "index": pd.RangeIndex(3), "columns": pd.Index(["a"]), } @@ -902,20 +798,6 @@ def cdf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): return cdf -def _prep_qpd_params(qv_low, qv_median, qv_high): - """Prepare parameters for Johnson Quantile-Parameterized Distributions.""" - qv = [qv_low, qv_median, qv_high] - for i, instance in enumerate(qv): - if isinstance(instance, float): - qv[i] = np.array([qv[i]]) - elif isinstance(instance, Sequence): - qv[i] = np.asarray(qv[i]) - qv_low = qv[0].flatten() - qv_median = qv[1].flatten() - qv_high = qv[2].flatten() - return qv_low, qv_median, qv_high - - def _resolve_phi(phi): """Resolve base distribution.""" if phi == "normal": @@ -926,7 +808,9 @@ def _resolve_phi(phi): return phi -def _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B"): +def _prep_qpd_vars( + alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B", **kwargs, +): """Prepare parameters for Johnson Quantile-Parameterized Distributions. Parameters @@ -969,23 +853,32 @@ def _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B n = np.where(LH2B > 0, 1, -1) n = np.where(LH2B == 0, 0, n) - if mode == "B": + if mode in ["B", "U"]: xi = np.where(LH2B > 0, L, H) xi = np.where(LH2B == 0, B, xi) - elif mode == "S": + if mode == "S": theta = np.where(LH2B > 0, qll, qhl) theta = np.where(LH2B == 0, qml, theta) - - in_arccosh = HL / (2 * HBL) - delta_unn = np.arccosh(in_arccosh) - if mode == "S": - delta = np.sinh(delta_unn) - delta = delta_unn / c + if mode == "U": + theta = np.where(LH2B > 0, BL / HL, HB / HL) + + if mode in ["B", "S"]: + in_arccosh = HL / (2 * HBL) + delta_unn = np.arccosh(in_arccosh) + if mode == "S": + delta = np.sinh(delta_unn) + delta = delta_unn / c + elif mode == "U": + delta = 1.0 / np.arccosh(1 / (2.0 * theta)) + delta = np.where(LH2B == 0, 1, delta) if mode == "B": kappa = HL / np.sinh(2 * delta * c) elif mode == "S": kappa = HBL / (delta * c) + elif mode == "U": + kappa = HL / np.sinh(2.0 / delta) + kappa = np.where(LH2B == 0, HB, kappa) params = { "c": c, @@ -1000,8 +893,10 @@ def _prep_qpd_vars(alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B if mode == "S": params["theta"] = theta - elif mode == "B": + if mode in ["B", "U"]: params["xi"] = xi + if mode == "U": + params["gamma"] = -np.sign(LH2B) return params From c585017bc48c0628b57327d71a9effe4e6a7efe9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:03:01 +0100 Subject: [PATCH 08/18] rename params --- skpro/distributions/qpd.py | 102 ++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 33dc84df9..6c378b6f5 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -272,17 +272,17 @@ def __init__( phi = _resolve_phi(version) self.phi = phi - params = _prep_qpd_vars(phi=phi, mode="S", **self._bc_params) - self.params = params + qpd_params = _prep_qpd_vars(phi=phi, mode="S", **self._bc_params) + self._qpd_params = qpd_params def _ppf(self, p: np.ndarray): """Quantile function = percent point function = inverse cdf.""" lower = self._bc_params["lower"] - delta = self.params["delta"] - kappa = self.params["kappa"] - c = self.params["c"] - n = self.params["n"] - theta = self.params["theta"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] + c = self._qpd_params["c"] + n = self._qpd_params["n"] + theta = self._qpd_params["theta"] phi = self.phi @@ -295,11 +295,11 @@ def _ppf(self, p: np.ndarray): def _pdf(self, x: np.ndarray): """Probability density function.""" lower = self._bc_params["lower"] - delta = self.params["delta"] - kappa = self.params["kappa"] - c = self.params["c"] - n = self.params["n"] - theta = self.params["theta"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] + c = self._qpd_params["c"] + n = self._qpd_params["n"] + theta = self._qpd_params["theta"] phi = self.phi @@ -325,11 +325,11 @@ def _pdf(self, x: np.ndarray): def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" lower = self._bc_params["lower"] - delta = self.params["delta"] - kappa = self.params["kappa"] - c = self.params["c"] - n = self.params["n"] - theta = self.params["theta"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] + c = self._qpd_params["c"] + n = self._qpd_params["n"] + theta = self._qpd_params["theta"] phi = self.phi @@ -455,18 +455,18 @@ def __init__( phi = _resolve_phi(version) self.phi = phi - params = _prep_qpd_vars(phi=phi, mode="B", **self._bc_params) - self.params = params + qpd_params = _prep_qpd_vars(phi=phi, mode="B", **self._bc_params) + self._qpd_params = qpd_params def _ppf(self, p: np.ndarray): """Quantile function = percent point function = inverse cdf.""" lower = self._bc_params["lower"] - rnge = self.params["rnge"] - delta = self.params["delta"] - kappa = self.params["kappa"] - c = self.params["c"] - n = self.params["n"] - xi = self.params["xi"] + rnge = self._qpd_params["rnge"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] + c = self._qpd_params["c"] + n = self._qpd_params["n"] + xi = self._qpd_params["xi"] phi = self.phi @@ -477,12 +477,12 @@ def _ppf(self, p: np.ndarray): def _pdf(self, x: np.ndarray): """Probability density function.""" lower = self._bc_params["lower"] - rnge = self.params["rnge"] - delta = self.params["delta"] - kappa = self.params["kappa"] - c = self.params["c"] - n = self.params["n"] - xi = self.params["xi"] + rnge = self._qpd_params["rnge"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] + c = self._qpd_params["c"] + n = self._qpd_params["n"] + xi = self._qpd_params["xi"] phi = self.phi @@ -509,12 +509,12 @@ def _pdf(self, x: np.ndarray): def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" lower = self._bc_params["lower"] - rnge = self.params["rnge"] - delta = self.params["delta"] - kappa = self.params["kappa"] - c = self.params["c"] - n = self.params["n"] - xi = self.params["xi"] + rnge = self._qpd_params["rnge"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] + c = self._qpd_params["c"] + n = self._qpd_params["n"] + xi = self._qpd_params["xi"] phi = self.phi @@ -645,16 +645,16 @@ def __init__( phi = _resolve_phi(version) self.phi = phi - params = _prep_qpd_vars(phi=phi, mode="U", **self._bc_params) - self.params = params + qpd_params = _prep_qpd_vars(phi=phi, mode="U", **self._bc_params) + self._qpd_params = qpd_params def _ppf(self, p: np.ndarray): """Quantile function = percent point function = inverse cdf.""" alpha = self._bc_params["alpha"] - xi = self.params["xi"] - gamma = self.params["gamma"] - delta = self.params["delta"] - kappa = self.params["kappa"] + xi = self._qpd_params["xi"] + gamma = self._qpd_params["gamma"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] phi = self.phi @@ -667,10 +667,10 @@ def _ppf(self, p: np.ndarray): def _pdf(self, x: np.ndarray): """Probability density function.""" alpha = self._bc_params["alpha"] - xi = self.params["xi"] - gamma = self.params["gamma"] - delta = self.params["delta"] - kappa = self.params["kappa"] + xi = self._qpd_params["xi"] + gamma = self._qpd_params["gamma"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] phi = self.phi @@ -686,10 +686,10 @@ def _pdf(self, x: np.ndarray): def _cdf(self, x: np.ndarray): """Cumulative distribution function.""" alpha = self._bc_params["alpha"] - xi = self.params["xi"] - gamma = self.params["gamma"] - delta = self.params["delta"] - kappa = self.params["kappa"] + xi = self._qpd_params["xi"] + gamma = self._qpd_params["gamma"] + delta = self._qpd_params["delta"] + kappa = self._qpd_params["kappa"] phi = self.phi From 8606c1b1178d268863383a55e03996b7b9fac1b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:03:20 +0100 Subject: [PATCH 09/18] remove deps --- skpro/distributions/qpd.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 6c378b6f5..5d0054b79 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -85,7 +85,6 @@ class QPD_Johnson(_DelegatedDistribution): # -------------- "authors": ["setoguchi-naoki", "felix-wick", "fkiraly"], "maintainers": ["setoguchi-naoki"], - "python_dependencies": ["cyclic_boosting>=1.4.0", "findiff"], # estimator tags # -------------- "capabilities:approx": ["pdfnorm", "energy"], From c4b4d6e3c02161f2b2a2ae777914a4f16ec07da4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:14:29 +0100 Subject: [PATCH 10/18] remove dead code --- skpro/distributions/qpd.py | 80 -------------------------------------- 1 file changed, 80 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 5d0054b79..7ed16d874 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -9,13 +9,8 @@ "setoguchi-naoki", ] # interface only. Cyclic boosting authors in cyclic_boosting package -import typing -import warnings from typing import Sequence -if typing.TYPE_CHECKING: - from cyclic_boosting.quantile_matching import J_QPD_S, J_QPD_B - import numpy as np import pandas as pd from scipy.stats import logistic, norm @@ -722,81 +717,6 @@ def get_test_params(cls, parameter_set="default"): return [params1, params2] -def calc_pdf(cdf: np.ndarray) -> np.ndarray: - """Return pdf value for all samples.""" - from findiff import FinDiff - - dx = 1e-6 - derivative = FinDiff(1, dx, 1) - pdf = np.asarray(derivative(cdf)) - return pdf - - -def exp_func(x: np.ndarray, cdf: np.ndarray, size: int): - """Return Expectation.""" - pdf = calc_pdf(cdf) - x = np.tile(x, (size, 1)) - loc = np.trapz(x * pdf, x, dx=1e-6, axis=1) - return loc - - -def var_func(x: np.ndarray, mu: np.ndarray, cdf: np.ndarray, size: int): - """Return Variance.""" - pdf = calc_pdf(cdf) - x = np.tile(x, (size, 1)) - var = np.trapz(((x - mu) ** 2) * pdf, x, dx=1e-6, axis=1) - return var - - -def pdf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): - """Return pdf value.""" - pdf = np.zeros_like(x) - for r in range(x.shape[0]): - for c in range(x.shape[1]): - element = x[r][c] - x0 = np.linspace(element, element + 1e-3, num=3) - if isinstance(qpd, list): - cdf = np.asarray([func.cdf(x0) for func in qpd]) - cdf = cdf.reshape(cdf.shape[0], -1) - else: - cdf = qpd.cdf(x0) - if cdf.ndim < 2: - for _ in range(2 - cdf.ndim): - cdf = cdf[:, np.newaxis] - cdf = cdf.T - pdf_part = calc_pdf(cdf) - pdf[r][c] = pdf_part[0][0] - return pdf - - -def ppf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): - """Return ppf value.""" - if isinstance(qpd, list): - ppf = np.asarray([func.ppf(x) for func in qpd]) - ppf = ppf.reshape(ppf.shape[0], -1) - else: - ppf = qpd.ppf(x) - if ppf.ndim < 2: - for _ in range(2 - ppf.ndim): - ppf = ppf[np.newaxis] - ppf = ppf.T - return ppf - - -def cdf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): - """Return cdf value.""" - if isinstance(qpd, list): - cdf = np.asarray([func.cdf(x) for func in qpd]) - cdf = cdf.reshape(cdf.shape[0], -1) - else: - cdf = qpd.cdf(x) - if cdf.ndim < 2: - for _ in range(2 - cdf.ndim): - cdf = cdf[np.newaxis] - cdf = cdf.T - return cdf - - def _resolve_phi(phi): """Resolve base distribution.""" if phi == "normal": From 40b9c2b0d1443009440a8151a015f7f7fe97991b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:22:40 +0100 Subject: [PATCH 11/18] Revert "remove dead code" This reverts commit c4b4d6e3c02161f2b2a2ae777914a4f16ec07da4. --- skpro/distributions/qpd.py | 80 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 7ed16d874..5d0054b79 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -9,8 +9,13 @@ "setoguchi-naoki", ] # interface only. Cyclic boosting authors in cyclic_boosting package +import typing +import warnings from typing import Sequence +if typing.TYPE_CHECKING: + from cyclic_boosting.quantile_matching import J_QPD_S, J_QPD_B + import numpy as np import pandas as pd from scipy.stats import logistic, norm @@ -717,6 +722,81 @@ def get_test_params(cls, parameter_set="default"): return [params1, params2] +def calc_pdf(cdf: np.ndarray) -> np.ndarray: + """Return pdf value for all samples.""" + from findiff import FinDiff + + dx = 1e-6 + derivative = FinDiff(1, dx, 1) + pdf = np.asarray(derivative(cdf)) + return pdf + + +def exp_func(x: np.ndarray, cdf: np.ndarray, size: int): + """Return Expectation.""" + pdf = calc_pdf(cdf) + x = np.tile(x, (size, 1)) + loc = np.trapz(x * pdf, x, dx=1e-6, axis=1) + return loc + + +def var_func(x: np.ndarray, mu: np.ndarray, cdf: np.ndarray, size: int): + """Return Variance.""" + pdf = calc_pdf(cdf) + x = np.tile(x, (size, 1)) + var = np.trapz(((x - mu) ** 2) * pdf, x, dx=1e-6, axis=1) + return var + + +def pdf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): + """Return pdf value.""" + pdf = np.zeros_like(x) + for r in range(x.shape[0]): + for c in range(x.shape[1]): + element = x[r][c] + x0 = np.linspace(element, element + 1e-3, num=3) + if isinstance(qpd, list): + cdf = np.asarray([func.cdf(x0) for func in qpd]) + cdf = cdf.reshape(cdf.shape[0], -1) + else: + cdf = qpd.cdf(x0) + if cdf.ndim < 2: + for _ in range(2 - cdf.ndim): + cdf = cdf[:, np.newaxis] + cdf = cdf.T + pdf_part = calc_pdf(cdf) + pdf[r][c] = pdf_part[0][0] + return pdf + + +def ppf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): + """Return ppf value.""" + if isinstance(qpd, list): + ppf = np.asarray([func.ppf(x) for func in qpd]) + ppf = ppf.reshape(ppf.shape[0], -1) + else: + ppf = qpd.ppf(x) + if ppf.ndim < 2: + for _ in range(2 - ppf.ndim): + ppf = ppf[np.newaxis] + ppf = ppf.T + return ppf + + +def cdf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): + """Return cdf value.""" + if isinstance(qpd, list): + cdf = np.asarray([func.cdf(x) for func in qpd]) + cdf = cdf.reshape(cdf.shape[0], -1) + else: + cdf = qpd.cdf(x) + if cdf.ndim < 2: + for _ in range(2 - cdf.ndim): + cdf = cdf[np.newaxis] + cdf = cdf.T + return cdf + + def _resolve_phi(phi): """Resolve base distribution.""" if phi == "normal": From 52e73b3f1e289d4f0db2283121a3cd448c76d89a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:22:43 +0100 Subject: [PATCH 12/18] Reapply "remove dead code" This reverts commit 40b9c2b0d1443009440a8151a015f7f7fe97991b. --- skpro/distributions/qpd.py | 80 -------------------------------------- 1 file changed, 80 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 5d0054b79..7ed16d874 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -9,13 +9,8 @@ "setoguchi-naoki", ] # interface only. Cyclic boosting authors in cyclic_boosting package -import typing -import warnings from typing import Sequence -if typing.TYPE_CHECKING: - from cyclic_boosting.quantile_matching import J_QPD_S, J_QPD_B - import numpy as np import pandas as pd from scipy.stats import logistic, norm @@ -722,81 +717,6 @@ def get_test_params(cls, parameter_set="default"): return [params1, params2] -def calc_pdf(cdf: np.ndarray) -> np.ndarray: - """Return pdf value for all samples.""" - from findiff import FinDiff - - dx = 1e-6 - derivative = FinDiff(1, dx, 1) - pdf = np.asarray(derivative(cdf)) - return pdf - - -def exp_func(x: np.ndarray, cdf: np.ndarray, size: int): - """Return Expectation.""" - pdf = calc_pdf(cdf) - x = np.tile(x, (size, 1)) - loc = np.trapz(x * pdf, x, dx=1e-6, axis=1) - return loc - - -def var_func(x: np.ndarray, mu: np.ndarray, cdf: np.ndarray, size: int): - """Return Variance.""" - pdf = calc_pdf(cdf) - x = np.tile(x, (size, 1)) - var = np.trapz(((x - mu) ** 2) * pdf, x, dx=1e-6, axis=1) - return var - - -def pdf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): - """Return pdf value.""" - pdf = np.zeros_like(x) - for r in range(x.shape[0]): - for c in range(x.shape[1]): - element = x[r][c] - x0 = np.linspace(element, element + 1e-3, num=3) - if isinstance(qpd, list): - cdf = np.asarray([func.cdf(x0) for func in qpd]) - cdf = cdf.reshape(cdf.shape[0], -1) - else: - cdf = qpd.cdf(x0) - if cdf.ndim < 2: - for _ in range(2 - cdf.ndim): - cdf = cdf[:, np.newaxis] - cdf = cdf.T - pdf_part = calc_pdf(cdf) - pdf[r][c] = pdf_part[0][0] - return pdf - - -def ppf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): - """Return ppf value.""" - if isinstance(qpd, list): - ppf = np.asarray([func.ppf(x) for func in qpd]) - ppf = ppf.reshape(ppf.shape[0], -1) - else: - ppf = qpd.ppf(x) - if ppf.ndim < 2: - for _ in range(2 - ppf.ndim): - ppf = ppf[np.newaxis] - ppf = ppf.T - return ppf - - -def cdf_func(x: np.ndarray, qpd: J_QPD_S | J_QPD_B | list): - """Return cdf value.""" - if isinstance(qpd, list): - cdf = np.asarray([func.cdf(x) for func in qpd]) - cdf = cdf.reshape(cdf.shape[0], -1) - else: - cdf = qpd.cdf(x) - if cdf.ndim < 2: - for _ in range(2 - cdf.ndim): - cdf = cdf[np.newaxis] - cdf = cdf.T - return cdf - - def _resolve_phi(phi): """Resolve base distribution.""" if phi == "normal": From fe66e0f2799687fb23a2ecd0bb137c5dd9c78723 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:25:10 +0100 Subject: [PATCH 13/18] Update qpd.py --- skpro/distributions/qpd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 7ed16d874..51cf0667f 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -785,7 +785,7 @@ def _prep_qpd_vars( in_arccosh = HL / (2 * HBL) delta_unn = np.arccosh(in_arccosh) if mode == "S": - delta = np.sinh(delta_unn) + delta_unn = np.sinh(delta_unn) delta = delta_unn / c elif mode == "U": delta = 1.0 / np.arccosh(1 / (2.0 * theta)) From 15284caaf7bfe1e0cb45d251c2de29bfecc1a5cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:29:12 +0100 Subject: [PATCH 14/18] Update qpd.py --- skpro/distributions/qpd.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 51cf0667f..0d88ca9c1 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -234,7 +234,12 @@ class QPD_S(BaseDistribution): "distr:measuretype": "continuous", "broadcast_init": "on", "broadcast_params": [ - "alpha", "qv_low", "qv_median", "qv_high", "lower", "upper" + "alpha", + "qv_low", + "qv_median", + "qv_high", + "lower", + "upper", ], } @@ -417,7 +422,12 @@ class QPD_B(BaseDistribution): "distr:measuretype": "continuous", "broadcast_init": "on", "broadcast_params": [ - "alpha", "qv_low", "qv_median", "qv_high", "lower", "upper" + "alpha", + "qv_low", + "qv_median", + "qv_high", + "lower", + "upper", ], } @@ -604,7 +614,12 @@ class QPD_U(BaseDistribution): "distr:measuretype": "continuous", "broadcast_init": "on", "broadcast_params": [ - "alpha", "qv_low", "qv_median", "qv_high", "lower", "upper" + "alpha", + "qv_low", + "qv_median", + "qv_high", + "lower", + "upper", ], } @@ -728,7 +743,15 @@ def _resolve_phi(phi): def _prep_qpd_vars( - alpha, qv_low, qv_median, qv_high, lower, upper, phi, mode="B", **kwargs, + alpha, + qv_low, + qv_median, + qv_high, + lower, + upper, + phi, + mode="B", + **kwargs, ): """Prepare parameters for Johnson Quantile-Parameterized Distributions. @@ -822,4 +845,4 @@ def _prep_qpd_vars( def arcsinh_der(x): """Return derivative of arcsinh.""" - return 1 / np.sqrt(1 + x ** 2) + return 1 / np.sqrt(1 + x**2) From 40ffcc891568562fe7eb59a82d8292c655c1500b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:37:02 +0100 Subject: [PATCH 15/18] fixes --- skpro/distributions/qpd.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 0d88ca9c1..9baa8ec60 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -776,15 +776,30 @@ def _prep_qpd_vars( B = bounded mode, S = lower semi-bounded mode """ c = phi.ppf(1 - alpha) - rnge = upper - lower + + if mode == "U": + lower = 0 qll = qv_low - lower qml = qv_median - lower qhl = qv_high - lower - L = phi.ppf(qll / rnge) - H = phi.ppf(qhl / rnge) - B = phi.ppf(qml / rnge) + if mode == "B": + rnge = upper - lower + + def tfun(x): + return phi.ppf(x / rnge) + + elif mode == "S": + tfun = np.log + elif mode == "U": + + def tfun(x): + return x + + L = tfun(qll) + H = tfun(qhl) + B = tfun(qml) HL = H - L BL = B - L HB = H - B @@ -824,7 +839,6 @@ def _prep_qpd_vars( params = { "c": c, - "rnge": rnge, "L": L, "H": H, "B": B, @@ -835,6 +849,8 @@ def _prep_qpd_vars( if mode == "S": params["theta"] = theta + if mode == "B": + params["rnge"] = rnge if mode in ["B", "U"]: params["xi"] = xi if mode == "U": From 0f668c06490aa5ea09804862b145e6d22028b14b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 02:39:06 +0100 Subject: [PATCH 16/18] linting --- skpro/distributions/qpd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 9baa8ec60..6697167ef 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -787,7 +787,7 @@ def _prep_qpd_vars( if mode == "B": rnge = upper - lower - def tfun(x): + def tfun(x): return phi.ppf(x / rnge) elif mode == "S": From a8aed1e7a0a64a52676124ced9be67acd411639c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 10:46:33 +0100 Subject: [PATCH 17/18] qpd johnson --- skpro/distributions/qpd.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skpro/distributions/qpd.py b/skpro/distributions/qpd.py index 6697167ef..4b7c7c4c0 100644 --- a/skpro/distributions/qpd.py +++ b/skpro/distributions/qpd.py @@ -152,9 +152,9 @@ def get_test_params(cls, parameter_set="default"): params2 = { "alpha": 0.1, "version": "normal", - "qv_low": [0.2, 0.2, 0.2], - "qv_median": [0.5, 0.5, 0.5], - "qv_high": [0.8, 0.8, 0.8], + "qv_low": [[-0.3], [-0.2], [-0.1]], + "qv_median": [[-0.1], [0.0], [0.1]], + "qv_high": [[0.2], [0.3], [0.4]], "index": pd.Index([1, 2, 5]), "columns": pd.Index(["a"]), } From 73eb853593bdc04b9fa480534915e9ca65cf738b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 15 May 2024 10:48:39 +0100 Subject: [PATCH 18/18] fix test --- skpro/distributions/tests/test_qpd.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skpro/distributions/tests/test_qpd.py b/skpro/distributions/tests/test_qpd.py index f0868b052..0bf070516 100644 --- a/skpro/distributions/tests/test_qpd.py +++ b/skpro/distributions/tests/test_qpd.py @@ -37,8 +37,8 @@ def test_qpd_b_pdf(): upper=0.5, ) x = np.linspace(-0.45, 0.45, 100) - pdf = [qpd_linear.pdf(x_) for x_ in x] - assert np.testing.assert_allclose(np.sum(pdf), 1.0, rtol=1e-5) + pdf_vals = [qpd_linear.pdf(x_) for x_ in x] + np.testing.assert_allclose(pdf_vals, 1.0, rtol=1e-5) @pytest.mark.skipif(