Skip to content

Commit

Permalink
Added HPD calculation to ensemble (#1431)
Browse files Browse the repository at this point in the history
* Added HPD calculation to ensemble

The ensemble from_sample method has now an additional rel_cutoff argument. rel_cutoff allwos to cut the posterior to the alpha % highest density (here non-normalized posterior probability).

* Update ensemble.py

fixes print statement that was only used for debugging

* Update pypesto/ensemble/ensemble.py

Adds default value for burn_in

Co-authored-by: Paul Jonas Jost <70631928+PaulJonasJost@users.noreply.github.com>

* Update pypesto/ensemble/ensemble.py

Fixes wrong default value for the ci_level

Co-authored-by: Paul Jonas Jost <70631928+PaulJonasJost@users.noreply.github.com>

* added test for the hpd calculation and renamed rel_cutoff

* Forgot name change in test

* Integrated comments

---------

Co-authored-by: Paul Jonas Jost <70631928+PaulJonasJost@users.noreply.github.com>
Co-authored-by: PaulJonasJost <jost.pauljonas@gmail.com>
  • Loading branch information
3 people authored Sep 30, 2024
1 parent 61c912d commit 951f0db
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 0 deletions.
88 changes: 88 additions & 0 deletions pypesto/ensemble/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,7 @@ def __init__(
def from_sample(
result: Result,
remove_burn_in: bool = True,
ci_level: float = None,
chain_slice: slice = None,
x_names: Sequence[str] = None,
lower_bound: np.ndarray = None,
Expand All @@ -571,6 +572,10 @@ def from_sample(
remove_burn_in:
Exclude parameter vectors from the ensemble if they are in the
"burn-in".
ci_level:
A form of relative cutoff. Exclude parameter vectors, for which the
(non-normalized) posterior value is not within the `ci_level` best
values.
chain_slice:
Subset the chain with a slice. Any "burn-in" removal occurs first.
x_names:
Expand All @@ -594,14 +599,23 @@ def from_sample(
lower_bound = result.problem.lb
if upper_bound is None:
upper_bound = result.problem.ub
burn_in = 0
if remove_burn_in:
if result.sample_result.burn_in is None:
geweke_test(result)
burn_in = result.sample_result.burn_in
x_vectors = x_vectors[burn_in:]

# added cutoff
if ci_level is not None:
x_vectors = calculate_hpd(
result=result, burn_in=burn_in, ci_level=ci_level
)

if chain_slice is not None:
x_vectors = x_vectors[chain_slice]
x_vectors = x_vectors.T

return Ensemble(
x_vectors=x_vectors,
x_names=x_names,
Expand Down Expand Up @@ -1253,3 +1267,77 @@ def calculate_cutoff(

range = chi2.ppf(q=percentile / 100, df=df)
return fval_opt + range


def calculate_hpd(
result: Result,
burn_in: int = 0,
ci_level: float = 0.95,
):
"""
Calculate Highest Posterior Density (HPD) samples.
The HPD is calculated for a user-defined credibility level (`ci_level`). The
HPD includes all parameter vectors with a (non-normalized) posterior
probability that is higher than the lowest `1-ci_level` %
posterior probability values.
Parameters
----------
result:
The sampling result from which to create the ensemble.
burn_in:
Burn in index that is cut off before HPD is calculated.
ci_level:
Credibility level of the resulting HPD. 0.95 corresponds to the 95% CI.
Only values between 0 and 1 are allowed.
Returns
-------
The HPD parameter vectors.
"""
if not 0 <= ci_level <= 1:
raise ValueError(
f"ci_level={ci_level} is not valid. Choose 0<=ci_level<=1."
)
# get names of chain parameters
param_names = result.problem.get_reduced_vector(result.problem.x_names)

# Get converged parameter samples as numpy arrays
chain = np.asarray(result.sample_result.trace_x[0, burn_in:, :])
neglogpost = result.sample_result.trace_neglogpost[0, burn_in:]
indices = np.arange(
burn_in, len(result.sample_result.trace_neglogpost[0, :])
)

# create df first, as we need to match neglogpost to the according parameter values
pd_params = pd.DataFrame(chain, columns=param_names)
pd_fval = pd.DataFrame(neglogpost, columns=["neglogPosterior"])
pd_iter = pd.DataFrame(indices, columns=["iteration"])

params_df = pd.concat(
[pd_params, pd_fval, pd_iter], axis=1, ignore_index=False
)

# get lower neglogpost bound for HPD
# sort neglogpost values of MCMC chain without burn in
neglogpost_sort = np.sort(neglogpost)

# Get converged chain length
chain_length = len(neglogpost)

# most negative ci percentage samples of the posterior are kept to get the according HPD
neglogpost_lower_bound = neglogpost_sort[int(chain_length * (ci_level))]

# cut posterior to hpd
hpd_params_df = params_df[
params_df["neglogPosterior"] <= neglogpost_lower_bound
]

# convert df to ensemble vector
hpd_params_df_vals_only = hpd_params_df.drop(
columns=["iteration", "neglogPosterior"]
)
hpd_ensemble_vector = hpd_params_df_vals_only.to_numpy()

return hpd_ensemble_vector
47 changes: 47 additions & 0 deletions test/base/test_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pypesto
import pypesto.optimize as optimize
import pypesto.sample as sample
from pypesto.C import AMICI_STATUS, AMICI_T, AMICI_Y, MEAN, WEIGHTED_SIGMA
from pypesto.engine import MultiProcessEngine
from pypesto.ensemble import (
Expand Down Expand Up @@ -224,3 +225,49 @@ def post_processor(amici_outputs, output_type, output_ids):
progress_bar=False,
)
return ensemble_prediction


def test_hpd_calculation():
"""Test the calculation of Highest Posterior Density (HPD)."""
problem = create_petab_problem()

sampler = sample.AdaptiveMetropolisSampler(
options={"show_progress": False}
)

result = optimize.minimize(
problem=problem,
n_starts=3,
progress_bar=False,
)

result = sample.sample(
problem=problem,
sampler=sampler,
n_samples=100,
result=result,
)

# Manually set up sample (only for testing)
burn_in = 1
result.sample_result.burn_in = burn_in
result.sample_result.trace_neglogpost[0][1:] = np.random.permutation(
np.arange(len(result.sample_result.trace_neglogpost[0][1:]))
)

hpd_ensemble = Ensemble.from_sample(
result=result, remove_burn_in=True, ci_level=0.95
)

expected_length = (
int((result.sample_result.trace_x[0][burn_in:].shape[0]) * 0.95) + 1
)
# Check that the HPD parameters have the expected shape
assert hpd_ensemble.x_vectors.shape == (problem.dim, expected_length)
x_indices = np.where(result.sample_result.trace_neglogpost[0][1:] <= 95)[0]
assert np.all(
[
np.any(np.all(x[:, None] == hpd_ensemble.x_vectors, axis=0))
for x in result.sample_result.trace_x[0][burn_in:][x_indices]
]
)

0 comments on commit 951f0db

Please sign in to comment.