Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Comparison update, poisson uncert. #20

Merged
merged 10 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
354 changes: 177 additions & 177 deletions docs/img/1d_comparison.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
106 changes: 53 additions & 53 deletions docs/img/1d_elt1.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
318 changes: 159 additions & 159 deletions docs/img/1d_elt2.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
320 changes: 160 additions & 160 deletions docs/img/1d_elt3.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
246 changes: 123 additions & 123 deletions docs/img/1d_hist_simple.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
15,296 changes: 7,657 additions & 7,639 deletions docs/img/2d_hist_correlations.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
308 changes: 157 additions & 151 deletions docs/img/2d_hist_simple.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
160 changes: 83 additions & 77 deletions docs/img/2d_hist_uneven.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
444 changes: 222 additions & 222 deletions docs/img/hep_examples_dataMC_flatten2D.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2,352 changes: 1,182 additions & 1,170 deletions docs/img/hep_examples_dataMC_pull.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1,157 changes: 572 additions & 585 deletions docs/img/hep_examples_dataMC_stacked.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2,699 changes: 1,343 additions & 1,356 deletions docs/img/hep_examples_dataMC_unstacked.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion plothist/default_style.mplstyle
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ ytick.major.size : 6
xtick.minor.size : 3
ytick.minor.size : 3

xtick.major.pad : 5.
xtick.major.pad : 7.
ytick.major.pad : 5.
axes.axisbelow : False
image.cmap : viridis
Expand Down
81 changes: 68 additions & 13 deletions plothist/hep_plotters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Collection of functions to plot histograms in the context of High Energy Physics
"""
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import matplotlib as mpl
from plothist.plotters import (
Expand All @@ -11,6 +12,7 @@
compare_two_hist,
plot_comparison,
create_comparison_figure,
_hist_ratio_variances,
)


Expand All @@ -31,7 +33,7 @@ def compare_data_mc(
**comparison_kwargs,
):
"""
Compare data to MC simulations.
Compare data to MC simulations. The data uncertainties are computed using the Poisson confidence interval.
Parameters
----------
Expand Down Expand Up @@ -78,6 +80,10 @@ def compare_data_mc(
plot_comparison : Plot the comparison between data and MC simulations.
"""
comparison_kwargs.setdefault("h1_label", "Data")
comparison_kwargs.setdefault("h2_label", "Pred.")
comparison_kwargs.setdefault("comparison", "ratio")
comparison_kwargs.setdefault("ratio_uncertainty", "split")

if fig is None and ax_main is None and ax_comparison is None:
fig, (ax_main, ax_comparison) = create_comparison_figure()
Expand All @@ -92,6 +98,8 @@ def compare_data_mc(
if signal_hist:
signal_hist = _flatten_2d_hist(signal_hist)

mc_hist_total = sum(mc_hist_list)

plot_mc(
mc_hist_list,
signal_hist=signal_hist,
Expand All @@ -104,12 +112,56 @@ def compare_data_mc(
flatten_2d_hist=False, # Already done
)

plot_error_hist(data_hist, ax=ax_main, color="black", label="Data")
uncertainties_low, uncertainties_high = _get_poisson_uncertainties(data_hist)

if comparison_kwargs["comparison"] == "pull":
data_variances = np.where(
data_hist.values() >= mc_hist_total.values(),
uncertainties_low**2,
uncertainties_high**2,
)
data_hist[:] = np.stack([data_hist.values(), data_variances], axis=-1)
elif comparison_kwargs["comparison"] == "ratio":
if comparison_kwargs["ratio_uncertainty"] == "split":
np.seterr(divide="ignore", invalid="ignore")
# Compute asymmetrical uncertainties to plot_comparison()
comparison_kwargs.setdefault(
"yerr",
[
uncertainties_low / mc_hist_total.values(),
uncertainties_high / mc_hist_total.values(),
],
)
np.seterr(divide="warn", invalid="warn")
elif comparison_kwargs["ratio_uncertainty"] == "uncorrelated":
data_hist_high = data_hist.copy()
data_hist_high[:] = np.stack(
[data_hist_high.values(), uncertainties_high**2], axis=-1
)
data_hist_low = data_hist.copy()
data_hist_low[:] = np.stack(
[data_hist_low.values(), uncertainties_low**2], axis=-1
)
# Compute asymmetrical uncertainties to plot_comparison()
comparison_kwargs.setdefault(
"yerr",
[
np.sqrt(_hist_ratio_variances(data_hist_low, mc_hist_total)),
np.sqrt(_hist_ratio_variances(data_hist_high, mc_hist_total)),
],
)

plot_error_hist(
data_hist,
ax=ax_main,
yerr=[uncertainties_low, uncertainties_high],
color="black",
label="Data",
)

_ = ax_main.xaxis.set_ticklabels([])

# Plot MC statistical uncertainty
mc_hist_total = sum(mc_hist_list)
mc_uncertainty = np.sqrt(mc_hist_total.variances())
ax_main.bar(
x=mc_hist_total.axes[0].centers,
Expand All @@ -125,21 +177,12 @@ def compare_data_mc(

ax_main.legend()

# Update the default comparison settings
default_comparison_kwargs = {
"h1_label": "Data",
"h2_label": "Pred.",
"comparison": "ratio",
"ratio_uncertainty": "split",
}
default_comparison_kwargs.update(comparison_kwargs)

plot_comparison(
data_hist,
mc_hist_total,
ax=ax_comparison,
xlabel=xlabel,
**default_comparison_kwargs,
**comparison_kwargs,
)

if save_as is not None:
Expand All @@ -148,6 +191,18 @@ def compare_data_mc(
return fig, ax_main, ax_comparison


def _get_poisson_uncertainties(data_hist):
"""
Get Poisson asymmetrical uncertainties for a histogram.
"""
conf_level = 0.682689492
alpha = 1.0 - conf_level
n = data_hist.values()
uncertainties_low = n - stats.gamma.ppf(alpha / 2, n, scale=1)
uncertainties_high = stats.gamma.ppf(1 - alpha / 2, n + 1, scale=1) - n
return uncertainties_low, uncertainties_high


def plot_mc(
mc_hist_list,
signal_hist=None,
Expand Down
103 changes: 68 additions & 35 deletions plothist/plotters.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,15 @@ def plot_hist(hist, ax, **kwargs):
ax.hist(
x=hist.axes[0].centers,
bins=hist.axes[0].edges,
weights=hist.values(),
weights=np.nan_to_num(hist.values(), 0),
**kwargs,
)
else:
# Multiple histograms
ax.hist(
x=[h.axes[0].centers for h in hist],
bins=hist[0].axes[0].edges,
weights=[h.values() for h in hist],
weights=[np.nan_to_num(h.values(), 0) for h in hist],
**kwargs,
)

Expand Down Expand Up @@ -303,7 +303,6 @@ def plot_2d_hist(hist, ax, pcolormesh_kwargs={}, colorbar_kwargs={}):


def plot_error_hist(hist, ax, **kwargs):
# TODO: Allow the user to provide xerr, yerr, fmt themselves.
"""
Create an errorbar plot from a boost histogram.
Expand All @@ -316,12 +315,12 @@ def plot_error_hist(hist, ax, **kwargs):
**kwargs
Additional keyword arguments forwarded to ax.errorbar().
"""
kwargs.setdefault("yerr", np.sqrt(hist.variances()))
kwargs.setdefault("fmt", ".")

ax.errorbar(
x=hist.axes[0].centers,
xerr=None,
y=hist.values(),
yerr=np.sqrt(hist.variances()),
fmt=".",
**kwargs,
)

Expand Down Expand Up @@ -417,6 +416,42 @@ def compare_two_hist(
return fig, ax_main, ax_comparison


def _hist_ratio_variances(hist_1, hist_2):
"""
Calculate the variances of the ratio of two histograms (hist_1/hist_2).
Parameters
----------
hist_1 : boost_histogram.Histogram
The first histogram.
hist_2 : boost_histogram.Histogram
The second histogram.
Returns
-------
variances : np.ndarray
The variances of the ratio of the two histograms.
Raises
------
ValueError
If the bins of the histograms are not equal.
"""
if not np.all(hist_1.axes[0].edges == hist_2.axes[0].edges):
raise ValueError("The bins of the histograms must be equal.")

np.seterr(divide="ignore", invalid="ignore")
ratio_variances = np.where(
hist_2.values() != 0,
hist_1.variances() / hist_2.values() ** 2
+ hist_2.variances() * hist_1.values() ** 2 / hist_2.values() ** 4,
np.nan,
)
np.seterr(divide="warn", invalid="warn")

return ratio_variances


def plot_comparison(
hist_1,
hist_2,
Expand All @@ -428,6 +463,7 @@ def plot_comparison(
comparison_ylabel=None,
comparison_ylim=None,
ratio_uncertainty="uncorrelated",
**plot_hist_kwargs,
):
"""
Plot the comparison between two histograms.
Expand All @@ -453,7 +489,9 @@ def plot_comparison(
comparison_ylim : tuple or None, optional
The y-axis limits for the comparison plot. Default is None.
ratio_uncertainty : str, optional
How to treat the uncertainties of the histograms when comparison = "ratio" ("uncorrelated" for simple comparison, "split" for scaling and split hist_1 and hist_2 uncertainties). This argument has no effect if comparison != "ration". Default is "uncorrelated".
How to treat the uncertainties of the histograms when comparison = "ratio" ("uncorrelated" for simple comparison, "split" for scaling and split hist_1 and hist_2 uncertainties). This argument has no effect if comparison != "ratio". Default is "uncorrelated".
**plot_hist_kwargs : optional
Arguments to be passed to plot_hist() or plot_error_hist(), called in case the comparison is "pull" or "ratio", respectively. In case of pull, the default arguments are histtype="stepfilled" and color="darkgrey". In case of ratio, the default argument is color="black".
Returns
-------
Expand All @@ -474,52 +512,45 @@ def plot_comparison(
hist_2.values() != 0, hist_1.values() / hist_2.values(), np.nan
)
if ratio_uncertainty == "split":
h1_scaled_uncertainty = np.where(
h1_scaled_uncertainties = np.where(
hist_2.values() != 0,
np.sqrt(hist_1.variances()) / hist_2.values(),
np.nan,
)
elif ratio_uncertainty == "uncorrelated":
ratio_variance = np.where(
h2_scaled_uncertainties = np.where(
hist_2.values() != 0,
hist_1.variances() / hist_2.values() ** 2
+ hist_2.variances() * hist_1.values() ** 2 / hist_2.values() ** 4,
np.sqrt(hist_2.variances()) / hist_2.values(),
np.nan,
)
comparison_variances = h1_scaled_uncertainties**2
elif ratio_uncertainty == "uncorrelated":
comparison_variances = _hist_ratio_variances(hist_1, hist_2)
else:
raise ValueError("ratio_uncertainty not in ['uncorrelated', 'split'].")
if ratio_uncertainty == "split":
h2_scaled_uncertainty = np.sqrt(hist_2.variances()) / hist_2.values()

elif comparison == "pull":
comparison_values = np.where(
hist_2.values() != 0,
hist_1.variances() + hist_2.variances() != 0,
(hist_1.values() - hist_2.values())
/ np.sqrt(hist_1.variances() + hist_2.variances()),
np.nan,
)
ratio_variance = np.where(
hist_1.values() != 0,
1,
np.nan,
)
comparison_variances = np.ones_like(comparison_values)
else:
raise ValueError(
f"{comparison} not available as a comparison (use ratio or pull)."
)

np.seterr(divide="warn", invalid="warn")

ax.errorbar(
x=hist_2.axes[0].centers,
xerr=None,
y=comparison_values,
yerr=np.sqrt(ratio_variance)
if (ratio_uncertainty == "uncorrelated" or comparison == "pull")
else h1_scaled_uncertainty,
fmt=".",
color="black",
)
hist_comparison = bh.Histogram(hist_2.axes[0], storage=bh.storage.Weight())
hist_comparison[:] = np.stack([comparison_values, comparison_variances], axis=-1)

if comparison == "pull":
plot_hist_kwargs.setdefault("histtype", "stepfilled")
plot_hist_kwargs.setdefault("color", "darkgrey")
plot_hist(hist_comparison, ax=ax, **plot_hist_kwargs)
else:
plot_hist_kwargs.setdefault("color", "black")
plot_error_hist(hist_comparison, ax=ax, **plot_hist_kwargs)

if comparison == "ratio":
if comparison_ylim is None:
Expand All @@ -528,9 +559,11 @@ def plot_comparison(
if ratio_uncertainty == "split":
ax.bar(
x=hist_2.axes[0].centers,
bottom=np.nan_to_num(1 - h2_scaled_uncertainty, nan=comparison_ylim[0]),
bottom=np.nan_to_num(
1 - h2_scaled_uncertainties, nan=comparison_ylim[0]
),
height=np.nan_to_num(
2 * h2_scaled_uncertainty, nan=2 * comparison_ylim[-1]
2 * h2_scaled_uncertainties, nan=2 * comparison_ylim[-1]
),
width=hist_2.axes[0].widths,
edgecolor="dimgrey",
Expand Down Expand Up @@ -612,7 +645,7 @@ def f(x0, x1):
def color(lambda_):
# emphasise either low intensity values (gamma < 1),
# or high intensity values (γ > 1)
lambda_gamma = lambda_ ** gamma
lambda_gamma = lambda_**gamma

# Angle and amplitude for the deviation
# from the black to white diagonal
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies = [
"pyyaml>=5.3.1",
"pandas>=1.3.5",
"scikit-learn>=1.0.2",
"scipy>=1.6.0",
]

[project.urls]
Expand Down