Skip to content

Commit

Permalink
fix: ratio plot rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
Ming-Yan committed Apr 4, 2024
1 parent 5ef07a1 commit b2288f7
Show file tree
Hide file tree
Showing 5 changed files with 423 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/mplhep/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
histplot,
make_square_add_cbar,
mpl_magic,
ratioplot,
rescale_to_axessize,
sort_legend,
ylow,
Expand Down Expand Up @@ -63,6 +64,7 @@
# Log plot functions
"histplot",
"hist2dplot",
"ratioplot",
"mpl_magic",
"yscale_legend",
"yscale_anchored_text",
Expand Down
56 changes: 56 additions & 0 deletions src/mplhep/error_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,59 @@ def poisson_interval(sumw, sumw2, coverage=_coverage1sd):
interval = np.array([lo, hi])
interval[interval == np.nan] = 0.0 # chi2.ppf produces nan for counts=0
return interval


def normal_interval(pw, tw, pw2, tw2, coverage=_coverage1sd):
"""Compute errors based on the expansion of pass/(pass + fail), possibly weighted
Parameters
----------
pw : np.ndarray
Numerator, or number of (weighted) successes, vectorized
tw : np.ndarray
Denominator or number of (weighted) trials, vectorized
pw2 : np.ndarray
Numerator sum of weights squared, vectorized
tw2 : np.ndarray
Denominator sum of weights squared, vectorized
coverage : float, optional
Central coverage interval, defaults to 68%
c.f. https://root.cern.ch/doc/master/TEfficiency_8cxx_source.html#l02515
"""

eff = pw / tw

variance = (pw2 * (1 - 2 * eff) + tw2 * eff**2) / (tw**2)
sigma = np.sqrt(variance)

prob = 0.5 * (1 - coverage)
delta = np.zeros_like(sigma)
delta[sigma != 0] = scipy.stats.norm.ppf(prob, scale=sigma[sigma != 0])

lo = eff - np.minimum(eff + delta, np.ones_like(eff))
hi = np.maximum(eff - delta, np.zeros_like(eff)) - eff

return np.array([lo, hi])


def clopper_pearson_interval(num, denom, coverage=_coverage1sd):
"""Compute Clopper-Pearson coverage interval for a binomial distribution
Parameters
----------
num : numpy.ndarray
Numerator, or number of successes, vectorized
denom : numpy.ndarray
Denominator or number of trials, vectorized
coverage : float, optional
Central coverage interval, defaults to 68%
c.f. http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
"""
if np.any(num > denom):
raise ValueError(
"Found numerator larger than denominator while calculating binomial uncertainty"
)
lo = scipy.stats.beta.ppf((1 - coverage) / 2, num, denom - num + 1)
hi = scipy.stats.beta.ppf((1 + coverage) / 2, num + 1, denom - num)
interval = np.array([lo, hi])
interval[:, num == 0.0] = 0.0
interval[1, num == denom] = 1.0
return interval
293 changes: 293 additions & 0 deletions src/mplhep/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,14 @@
from matplotlib.transforms import Bbox
from mpl_toolkits.axes_grid1 import axes_size, make_axes_locatable

from .error_estimation import (
clopper_pearson_interval,
normal_interval,
poisson_interval,
)
from .utils import (
Plottable,
compatible,
get_histogram_axes_title,
get_plottable_protocol_bins,
hist_object_handler,
Expand Down Expand Up @@ -920,6 +926,293 @@ def hist2dplot(
return ColormeshArtists(pc, cb_obj, text_artists)


# copy functions coffea.hist.plotratio https://github.com/CoffeaTeam/coffea/blob/master/coffea/hist/plot.py to boost-hist
def ratioplot(
num,
denom,
ax=None,
clear=True,
flow=None,
xerr=False,
error_opts=None,
denom_fill_opts=None,
guide_opts=None,
unc="num",
label=None,
ext_denom_error=None,
):
"""Create a ratio plot, dividing two compatible histograms
Parameters
----------
num : Hist
Numerator, a single-axis histogram
denom : Hist
Denominator, a single-axis histogram
ax : matplotlib.axes.Axes, optional
Axes object (if None, one is created)
clear : bool, optional
Whether to clear Axes before drawing (if passed); if False, this function will skip drawing the legend
flow : str, optional {None, "show", "sum"}
Whether plot the under/overflow bin. If "show", add additional under/overflow bin. If "sum", add the under/overflow bin content to first/last bin.
xerr: bool, optional
If true, then error bars are drawn for x-axis to indicate the size of the bin.
error_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.errorbar <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.pyplot.errorbar.html>`_ call
internal to this function. Leave blank for defaults. Some special options are interpreted by
this function and not passed to matplotlib: 'emarker' (default: '') specifies the marker type
to place at cap of the errorbar.
denom_fill_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.fill_between <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.fill_between.html>`_ call
internal to this function, filling the denominator uncertainty band. Leave blank for defaults.
guide_opts : dict, optional
A dictionary of options to pass to the matplotlib
`ax.axhline <https://matplotlib.org/3.1.1/api/_as_gen/matplotlib.axes.Axes.axhline.html>`_ call
internal to this function, to plot a horizontal guide line at ratio of 1. Leave blank for defaults.
unc : str, optional
Uncertainty calculation option: 'clopper-pearson' interval for efficiencies; 'poisson-ratio' interval
for ratio of poisson distributions; 'num' poisson interval of numerator scaled by denominator value
(common for data/mc, for better or worse).
label : str, optional
Associate a label to this entry (note: y axis label set by ``num.label``)
ext_denom_error: list of np.array[error_up,error_down], optional
External MC errors not stored in the original histogram
Returns
-------
ax : matplotlib.axes.Axes
A matplotlib `Axes <https://matplotlib.org/3.1.1/api/axes_api.html>`_ object
"""
if ax is None:
fig, ax = plt.subplots(1, 1)
else:
if not isinstance(ax, plt.Axes):
raise ValueError("ax must be a matplotlib Axes object")
if clear:
ax.clear()
if not compatible(num, denom):
raise ValueError(
"numerator and denominator histograms have incompatible axis definitions"
)
if len(num.axes) > 1:
raise ValueError("ratioplot() can only support one-dimensional histograms")
if error_opts is None and denom_fill_opts is None and guide_opts is None:
error_opts = {}
denom_fill_opts = {}
guide_opts = {}
axis = num.axes[0]

ax.set_xlabel(axis.label)
ax.set_ylabel(num.label)
edges = axis.edges
if flow == "show":
edges = np.array(
[
edges[0] - (edges[1] - edges[0]) * 3,
edges[0] - (edges[1] - edges[0]),
*edges,
edges[-1] + (edges[1] - edges[0]),
edges[-1] + (edges[1] - edges[0]) * 3,
]
)
centers = (edges[1:] + edges[:-1]) / 2
ranges = (edges[1:] - edges[:-1]) / 2 if xerr else None
sumw_num, sumw2_num = num.values(), num.variances()
sumw_denom, sumw2_denom = denom.values(), denom.variances()

if flow == "show":
sumw_num, sumw2_num = (
num.view(flow=True)["value"],
num.view(flow=True)["variance"],
)
sumw_num, sumw2_num = np.insert(sumw_num, -1, 0), np.insert(sumw2_num, -1, 0)
sumw_num, sumw2_num = np.insert(sumw_num, 1, 0), np.insert(sumw2_num, 1, 0)
sumw_denom, sumw2_denom = (
denom.view(flow=True)["value"],
denom.view(flow=True)["variance"],
)
sumw_denom, sumw2_denom = (
np.insert(sumw_denom, -1, 0),
np.insert(sumw2_denom, -1, 0),
)
sumw_denom, sumw2_denom = (
np.insert(sumw_denom, 1, 0),
np.insert(sumw2_denom, 1, 0),
)

elif flow == "sum":
sumw_num[0], sumw2_num[0] = (
sumw_num[0] + num.view(flow=True)["value"][0],
sumw2_num[0] + num.view(flow=True)["value"][0],
)
sumw_num[-1], sumw2_num[-1] = (
sumw_num[-1] + num.view(flow=True)["value"][-1],
sumw2_num[-1] + num.view(flow=True)["value"][-1],
)
sumw_denom[0], sumw2_denom[0] = (
sumw_denom[0] + denom.view(flow=True)["value"][0],
sumw2_denom[0] + denom.view(flow=True)["value"][0],
)
sumw_denom[-1], sumw2_denom[-1] = (
sumw_denom[-1] + denom.view(flow=True)["value"][-1],
sumw2_denom[-1] + denom.view(flow=True)["value"][-1],
)
else:
sumw_num, sumw2_num = num.values(), num.variances()
sumw_denom, sumw2_denom = denom.values(), denom.variances()
rsumw = sumw_num / sumw_denom
if unc == "clopper-pearson":
rsumw_err = np.abs(clopper_pearson_interval(sumw_num, sumw_denom) - rsumw)
elif unc == "poisson-ratio":
# poisson ratio n/m is equivalent to binomial n/(n+m)
rsumw_err = np.abs(
clopper_pearson_interval(sumw_num, sumw_num + sumw_denom) - rsumw
)
elif unc == "num":
rsumw_err = np.abs(poisson_interval(rsumw, sumw2_num / sumw_denom**2) - rsumw)
elif unc == "efficiency":
rsumw_err = np.abs(
normal_interval(sumw_num, sumw_denom, sumw2_num, sumw2_denom)
)
else:
raise ValueError("Unrecognized uncertainty option: %r" % unc)

# if additional uncertainties
if ext_denom_error is not None:
if denom_fill_opts is {}:
raise ValueError("suggest to use different style for additional error")
if np.shape(rsumw_err) != np.shape(ext_denom_error / sumw_denom):
raise ValueError("Incompatible error length")
rsumw_err = np.sqrt(rsumw_err**2 + (ext_denom_error / sumw_denom) ** 2)

if error_opts is not None:
opts = {
"label": label,
"linestyle": "none",
"lw": 1,
"marker": "o",
"color": "k",
}
opts.update(error_opts)
emarker = opts.pop("emarker", "")
errbar = ax.errorbar(x=centers, y=rsumw, xerr=ranges, yerr=rsumw_err, **opts)
plt.setp(errbar[1], "marker", emarker)
if denom_fill_opts is not None:
unity = np.ones_like(sumw_denom)
denom_unc = poisson_interval(unity, sumw2_denom / sumw_denom**2)
opts = {
"hatch": "////",
"facecolor": "none",
"lw": 0,
"color": "k",
"alpha": 0.4,
}
if ext_denom_error is not None:
denom_unc[0] = (
unity[0]
- np.sqrt(
(denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
)[0]
)
denom_unc[1] = (
unity[1]
+ np.sqrt(
(denom_unc - unity) ** 2 + (ext_denom_error / sumw_denom) ** 2
)[1]
)
opts = denom_fill_opts
ax.stairs(denom_unc[0], edges=edges, baseline=denom_unc[1], **opts)
if guide_opts is not None:
opts = {"linestyle": "--", "color": (0, 0, 0, 0.5), "linewidth": 1}
opts.update(guide_opts)
if clear is not False:
ax.axhline(1.0, **opts)

if clear:
ax.autoscale(axis="x", tight=True)
ax.set_ylim(0, None)
if flow == "hint" or flow == "show":
d = 0.9 # proportion of vertical to horizontal extent of the slanted line
trans = mpl.transforms.blended_transform_factory(ax.transData, ax.transAxes)
kwargs = dict(
marker=[(-0.5, -d), (0.5, d)],
markersize=15,
linestyle="none",
color="k",
mec="k",
mew=1,
clip_on=False,
transform=trans,
)
xticks = ax.get_xticks().tolist()
if sumw_num[0] > 0.0 or sumw_denom[0] > 0.0:
if flow == "hint":
ax.plot(
[
edges[0] - (edges[1] - edges[0]) / 2.0,
edges[0],
],
[0, 0],
**kwargs,
)
ax.plot(
[
edges[0] - (edges[1] - edges[0]) / 2.0,
edges[0],
],
[1, 1],
**kwargs,
)
if flow == "show":
ax.plot(
[edges[1], edges[2]],
[0, 0],
**kwargs,
)
ax.plot(
[edges[1], edges[2]],
[1, 1],
**kwargs,
)
xticks[0] = ""
xticks[1] = f"<{edges[2]}"

ax.set_xticklabels(xticks)
if sumw_num[-1] > 0.0 or sumw_denom[-1] > 0.0:
if flow == "hint":
ax.plot(
[
edges[-1],
edges[-1] + (edges[1] - edges[0]) / 2.0,
],
[0, 0],
**kwargs,
)
ax.plot(
[
edges[-1],
edges[-1] + (edges[1] - edges[0]) / 2.0,
],
[1, 1],
**kwargs,
)
if flow == "show":
ax.plot(
[edges[-3], edges[-2]],
[0, 0],
**kwargs,
)
ax.plot(
[edges[-3], edges[-2]],
[1, 1],
**kwargs,
)
xticks[-1] = ""
xticks[-2] = f">{edges[-3]}"
ax.set_xticklabels(xticks)
return ax


#############################################
# Utils
def overlap(ax, bbox, get_vertices=False):
Expand Down
10 changes: 10 additions & 0 deletions src/mplhep/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,3 +403,13 @@ def to_padded2d(h, variances=False):
return padded, padded_varis
else:
return padded


def compatible(self, other):
"""Checks if this histogram is compatible with another, i.e. they have identical binning"""
if len(self.axes) != len(other.axes):
return False
if set(self.axes.name) != set(other.axes.name):
return False

return True
Loading

0 comments on commit b2288f7

Please sign in to comment.