Skip to content

Update binning notebook #651

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,064 changes: 540 additions & 524 deletions examples/case_studies/binning.ipynb

Large diffs are not rendered by default.

58 changes: 31 additions & 27 deletions examples/case_studies/binning.myst.md
Original file line number Diff line number Diff line change
@@ -5,9 +5,9 @@ jupytext:
format_name: myst
format_version: 0.13
kernelspec:
display_name: Python [conda env:pymc_env]
display_name: pymc_5.11
language: python
name: conda-env-pymc_env-py
name: python3
---

(awkward_binning)=
@@ -233,7 +233,7 @@ pm.model_to_graphviz(model1)

```{code-cell} ipython3
with model1:
trace1 = pm.sample()
trace1 = pm.sample(random_seed=rng)
```

### Checks on model
@@ -244,7 +244,7 @@ we should be able to generate observations that look close to what we observed.

```{code-cell} ipython3
with model1:
ppc = pm.sample_posterior_predictive(trace1)
ppc = pm.sample_posterior_predictive(trace1, random_seed=rng)
```

We can do this graphically.
@@ -254,7 +254,7 @@ fig, ax = plt.subplots(figsize=(12, 4))
# Plot observed bin counts
c1.plot(kind="bar", ax=ax, alpha=0.5)
# Plot posterior predictive
ppc.posterior_predictive.plot.scatter(x="counts1_dim_0", y="counts1", color="k", alpha=0.2)
ppc.posterior_predictive.plot.scatter(x="counts1_dim_2", y="counts1", color="k", alpha=0.2)
# Formatting
ax.set_xticklabels([f"bin {n}" for n in range(len(c1))])
ax.set_title("Six bin discretization of N(-2, 2)")
@@ -322,7 +322,7 @@ with pm.Model() as model2:

```{code-cell} ipython3
with model2:
trace2 = pm.sample()
trace2 = pm.sample(random_seed=rng)
```

```{code-cell} ipython3
@@ -335,7 +335,7 @@ Let's run a PPC check to ensure we are generating data that are similar to what

```{code-cell} ipython3
with model2:
ppc = pm.sample_posterior_predictive(trace2)
ppc = pm.sample_posterior_predictive(trace2, random_seed=rng)
```

We calculate the mean bin posterior predictive bin counts, averaged over samples.
@@ -355,7 +355,7 @@ fig, ax = plt.subplots(figsize=(12, 4))
# Plot observed bin counts
c2.plot(kind="bar", ax=ax, alpha=0.5)
# Plot posterior predictive
ppc.posterior_predictive.plot.scatter(x="counts2_dim_0", y="counts2", color="k", alpha=0.2)
ppc.posterior_predictive.plot.scatter(x="counts2_dim_2", y="counts2", color="k", alpha=0.2)
# Formatting
ax.set_xticklabels([f"bin {n}" for n in range(len(c2))])
ax.set_title("Seven bin discretization of N(-2, 2)")
@@ -412,7 +412,7 @@ pm.model_to_graphviz(model3)

```{code-cell} ipython3
with model3:
trace3 = pm.sample()
trace3 = pm.sample(random_seed=rng)
```

```{code-cell} ipython3
@@ -423,7 +423,7 @@ az.plot_pair(trace3, var_names=["mu", "sigma"], divergences=True);

```{code-cell} ipython3
with model3:
ppc = pm.sample_posterior_predictive(trace3)
ppc = pm.sample_posterior_predictive(trace3, random_seed=rng)
```

```{code-cell} ipython3
@@ -434,7 +434,7 @@ fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
c1.plot(kind="bar", ax=ax[0], alpha=0.5)
# Plot posterior predictive
ppc.posterior_predictive.plot.scatter(
x="counts1_dim_0", y="counts1", color="k", alpha=0.2, ax=ax[0]
x="counts1_dim_2", y="counts1", color="k", alpha=0.2, ax=ax[0]
)
# Formatting
ax[0].set_xticklabels([f"bin {n}" for n in range(len(c1))])
@@ -445,7 +445,7 @@ ax[0].set_title("Six bin discretization of N(-2, 2)")
c2.plot(kind="bar", ax=ax[1], alpha=0.5)
# Plot posterior predictive
ppc.posterior_predictive.plot.scatter(
x="counts2_dim_0", y="counts2", color="k", alpha=0.2, ax=ax[1]
x="counts2_dim_2", y="counts2", color="k", alpha=0.2, ax=ax[1]
)
# Formatting
ax[1].set_xticklabels([f"bin {n}" for n in range(len(c2))])
@@ -493,14 +493,14 @@ pm.model_to_graphviz(model4)

```{code-cell} ipython3
with model4:
trace4 = pm.sample()
trace4 = pm.sample(random_seed=rng)
```

### Posterior predictive checks

```{code-cell} ipython3
with model4:
ppc = pm.sample_posterior_predictive(trace4)
ppc = pm.sample_posterior_predictive(trace4, random_seed=rng)
```

```{code-cell} ipython3
@@ -509,9 +509,9 @@ fig, ax = plt.subplots(1, 2, figsize=(12, 4))
# Study 1 ----------------------------------------------------------------
# Plot observed bin counts
c1.plot(kind="bar", ax=ax[0], alpha=0.5)
# Plot posterior predictive
ppc.posterior_predictive.plot.scatter(
x="counts1_dim_0", y="counts1", color="k", alpha=0.2, ax=ax[0]
# Plot posterior predictive (subsampling to speed up plotting)
ppc.posterior_predictive.sel(y_dim_2=slice(10)).plot.scatter(
x="counts1_dim_2", y="counts1", color="k", alpha=0.2, ax=ax[0]
)
# Formatting
ax[0].set_xticklabels([f"bin {n}" for n in range(len(c1))])
@@ -625,22 +625,26 @@ pm.model_to_graphviz(model5)

```{code-cell} ipython3
with model5:
trace5 = pm.sample(tune=2000, target_accept=0.99)
trace5 = pm.sample(tune=2000, target_accept=0.98, random_seed=rng)
```

We can see that despite our efforts, we still get some divergences. Plotting the samples and highlighting the divergences suggests (from the top left subplot) that our model is suffering from the funnel problem
We can see that despite our efforts, we still get one divergence. Plotting the samples suggests (top left subplot) that our model is suffering from the funnel problem

```{code-cell} ipython3
trace5.posterior["log(mu_pop_variance)"] = np.log(trace5.posterior["mu_pop_variance"])
az.plot_pair(
trace5, var_names=["mu_pop_mean", "mu_pop_variance", "sigma"], coords=coords, divergences=True
trace5,
var_names=["mu_pop_mean", "log(mu_pop_variance)", "sigma"],
coords=coords,
divergences=True,
);
```

### Posterior predictive checks

```{code-cell} ipython3
with model5:
ppc = pm.sample_posterior_predictive(trace5)
ppc = pm.sample_posterior_predictive(trace5, random_seed=rng)
```

```{code-cell} ipython3
@@ -766,8 +770,8 @@ true_mu, true_beta = 20, 4
BMI = pm.Gumbel.dist(mu=true_mu, beta=true_beta)

# Generate two different sets of random samples from the same Gaussian.
x1 = pm.draw(BMI, 800)
x2 = pm.draw(BMI, 1200)
x1 = pm.draw(BMI, 800, random_seed=rng)
x2 = pm.draw(BMI, 1200, random_seed=rng)

# Calculate bin counts
c1 = data_to_bincounts(x1, d1)
@@ -833,14 +837,14 @@ pm.model_to_graphviz(model6)

```{code-cell} ipython3
with model6:
trace6 = pm.sample()
trace6 = pm.sample(random_seed=rng)
```

### Posterior predictive checks

```{code-cell} ipython3
with model6:
ppc = pm.sample_posterior_predictive(trace6)
ppc = pm.sample_posterior_predictive(trace6, random_seed=rng)
```

```{code-cell} ipython3
@@ -851,7 +855,7 @@ fig, ax = plt.subplots(1, 2, figsize=(12, 4), sharey=True)
c1.plot(kind="bar", ax=ax[0], alpha=0.5)
# Plot posterior predictive
ppc.posterior_predictive.plot.scatter(
x="counts1_dim_0", y="counts1", color="k", alpha=0.2, ax=ax[0]
x="counts1_dim_2", y="counts1", color="k", alpha=0.2, ax=ax[0]
)
# Formatting
ax[0].set_xticklabels([f"bin {n}" for n in range(len(c1))])
@@ -862,7 +866,7 @@ ax[0].set_title("Study 1")
c2.plot(kind="bar", ax=ax[1], alpha=0.5)
# Plot posterior predictive
ppc.posterior_predictive.plot.scatter(
x="counts2_dim_0", y="counts2", color="k", alpha=0.2, ax=ax[1]
x="counts2_dim_2", y="counts2", color="k", alpha=0.2, ax=ax[1]
)
# Formatting
ax[1].set_xticklabels([f"bin {n}" for n in range(len(c2))])