From 0e1e44682a5b169ae8a24adb0475d1912ecdab46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Tue, 29 Apr 2025 15:49:32 +0300 Subject: [PATCH 1/5] .ppc_calibration_overlay_data, and ppc_calibration_overlay(_grouped) --- R/ppc-calibration.R | 96 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 R/ppc-calibration.R diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R new file mode 100644 index 00000000..9a8ca7a1 --- /dev/null +++ b/R/ppc-calibration.R @@ -0,0 +1,96 @@ +#' PPC calibration +#' +#' Assess the calibration of the predictive distributions `yrep` in relation to +#' the data `y'. +#' See the **Plot Descriptions** section, below, for details. +#' +#' @name PPC-calibration +#' @family PPCs +#' +#' @template args-y-rep +#' @template args-group +#' +#' @template return-ggplot-or-data +#' +#' @section Plot Descriptions: +#' \describe{ +#' \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{ +#' +#' } +#' } +#' +NULL + +#' @rdname PPC-calibration +#' @export +.ppc_calibration_data <- function(y, prep, group = NULL) { + y <- validate_y(y) + n_obs <- length(y) + prep <- validate_predictions(prep, n_obs) + if (any(prep > 1 | prep < 0)) { + stop("Values of ´prep´ should be predictive probabilities between 0 and 1.") + } + if (!is.null(group)) { + group <- validate_group(group, n_obs) + } else { + group <- rep(1, n_obs * nrow(prep)) + } + + if (requireNamespace("monotone", quietly = TRUE)) { + monotone <- monotone::monotone + } else { + monotone <- function(y) { + stats::isoreg(y)$yf + } + } + .ppd_data(prep, group = group) %>% + group_by(group, rep_id) %>% + mutate( + ord = order(value), + value = value[ord], + cep = monotone(y[ord]) + ) |> + ungroup() +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration_overlay <- function( + y, prep, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line( + aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration_overlay_grouped <- function( + y, prep, group, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep, group) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line(aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + facet_wrap(vars(group)) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} From d784405a547a2787ab0a50e13b01edfe72eb5140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Mon, 19 May 2025 18:20:12 +0300 Subject: [PATCH 2/5] draft of ppc_calibration plots --- R/ppc-calibration.R | 170 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 138 insertions(+), 32 deletions(-) diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R index 9a8ca7a1..2786aa5c 100644 --- a/R/ppc-calibration.R +++ b/R/ppc-calibration.R @@ -1,4 +1,4 @@ -#' PPC calibration +# x' PPC calibration #' #' Assess the calibration of the predictive distributions `yrep` in relation to #' the data `y'. @@ -14,49 +14,119 @@ #' #' @section Plot Descriptions: #' \describe{ +#' \item{`ppc_calibration()`,`ppc_calibration_grouped()`}{ +#' +#' }, #' \item{`ppc_calibration_overlay()`,`ppc_calibration_overlay_grouped()`}{ #' +#' }, +#' \item{`ppc_loo_calibration()`,`ppc_loo_calibration_grouped()`}{ +#' #' } #' } #' NULL + #' @rdname PPC-calibration #' @export -.ppc_calibration_data <- function(y, prep, group = NULL) { - y <- validate_y(y) - n_obs <- length(y) - prep <- validate_predictions(prep, n_obs) - if (any(prep > 1 | prep < 0)) { - stop("Values of ´prep´ should be predictive probabilities between 0 and 1.") - } - if (!is.null(group)) { - group <- validate_group(group, n_obs) - } else { - group <- rep(1, n_obs * nrow(prep)) - } +ppc_calibration_overlay <- function( + y, prep, ..., linewidth = 0.25, alpha = 0.5) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line( + aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} - if (requireNamespace("monotone", quietly = TRUE)) { - monotone <- monotone::monotone - } else { - monotone <- function(y) { - stats::isoreg(y)$yf - } - } - .ppd_data(prep, group = group) %>% - group_by(group, rep_id) %>% - mutate( - ord = order(value), - value = value[ord], - cep = monotone(y[ord]) - ) |> - ungroup() +#' @rdname PPC-calibration +#' @export +ppc_calibration_overlay_grouped <- function( + y, prep, group, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep, group) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line(aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + facet_wrap(vars(group)) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL } #' @rdname PPC-calibration #' @export -ppc_calibration_overlay <- function( - y, prep, ..., linewidth = 0.25, alpha = 0.7) { +ppc_calibration <- function( + y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep) %>% + group_by(y_id) %>% + summarise( + value = median(value), + lb = quantile(cep, .5 - .5 * prob), + ub = quantile(cep, .5 + .5 * prob), + cep = median(cep) + ) + + ggplot(data) + + aes(value, cep) + + geom_abline(color = "black", linetype = 2) + + geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) + + geom_line( + aes(color = "y"), + linewidth = linewidth + ) + + scale_color_ppc() + + scale_fill_ppc() + + bayesplot_theme_get() + + # legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_calibration_grouped <- function( + y, prep, show_mean, ..., linewidth = 0.25, alpha = 0.7) { + check_ignored_arguments(...) + data <- .ppc_calibration_data(y, prep, group) + ggplot(data) + + geom_abline(color = "black", linetype = 2) + + geom_line(aes(value, cep, group = rep_id, color = "yrep"), + linewidth = linewidth, alpha = alpha + ) + + facet_wrap(vars(group)) + + scale_color_ppc() + + bayesplot_theme_get() + + legend_none() + + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + + xlab("Predicted probability") + + ylab("Conditional event probability") + + NULL +} + +#' @rdname PPC-calibration +#' @export +ppc_loo_calibration <- function( + y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) { check_ignored_arguments(...) data <- .ppc_calibration_data(y, prep) ggplot(data) + @@ -76,8 +146,8 @@ ppc_calibration_overlay <- function( #' @rdname PPC-calibration #' @export -ppc_calibration_overlay_grouped <- function( - y, prep, group, ..., linewidth = 0.25, alpha = 0.7) { +ppc_loo_calibration_grouped <- function( + y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) { check_ignored_arguments(...) data <- .ppc_calibration_data(y, prep, group) ggplot(data) + @@ -94,3 +164,39 @@ ppc_calibration_overlay_grouped <- function( ylab("Conditional event probability") + NULL } + +.ppc_calibration_data <- function(y, prep, group = NULL) { + y <- validate_y(y) + n_obs <- length(y) + prep <- validate_predictions(prep, n_obs) + if (any(prep > 1 | prep < 0)) { + stop("Values of ´prep´ should be predictive probabilities between 0 and 1.") + } + if (!is.null(group)) { + group <- validate_group(group, n_obs) + } else { + group <- rep(1, n_obs * nrow(prep)) + } + + if (requireNamespace("monotone", quietly = TRUE)) { + monotone <- monotone::monotone + } else { + monotone <- function(y) { + stats::isoreg(y)$yf + } + } + .ppd_data(prep, group = group) %>% + group_by(group, rep_id) %>% + mutate( + ord = order(value), + value = value[ord], + cep = monotone(y[ord]) + ) |> + ungroup() +} + +.loo_resample_data <- function(prep, lw, psis_object) { + lw <- .get_lw(lw, psis_object) + stopifnot(identical(dim(prep), dim(lw))) + # Work in progress here... +} From 5cf62f96847440ebad0bcff59a23a718da5353af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 22 May 2025 14:59:14 +0300 Subject: [PATCH 3/5] Add example for ppc_calibration_overlay() --- R/ppc-calibration.R | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R index 2786aa5c..e706b2f9 100644 --- a/R/ppc-calibration.R +++ b/R/ppc-calibration.R @@ -25,6 +25,16 @@ #' } #' } #' +#' @examples +#' color_scheme_set("brightblue") +#' +#' # Make an example dataset of binary observations +#' ymin <- range(example_y_data(), example_yrep_draws())[1] +#' ymax <- range(example_y_data(), example_yrep_draws())[2] +#' y <- rbinom(length(example_y_data()), 1, (example_y_data() - ymin) / (ymax - ymin)) +#' prep <- (example_yrep_draws() - ymin) / (ymax - ymin) +#' +#' ppc_calibration_overlay(y, prep[1:50, ]) NULL @@ -73,7 +83,7 @@ ppc_calibration_overlay_grouped <- function( #' @rdname PPC-calibration #' @export ppc_calibration <- function( - y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.25, alpha = 0.7) { + y, prep, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) { check_ignored_arguments(...) data <- .ppc_calibration_data(y, prep) %>% group_by(y_id) %>% @@ -95,7 +105,7 @@ ppc_calibration <- function( scale_color_ppc() + scale_fill_ppc() + bayesplot_theme_get() + - # legend_none() + + legend_none() + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + xlab("Predicted probability") + ylab("Conditional event probability") + @@ -105,9 +115,17 @@ ppc_calibration <- function( #' @rdname PPC-calibration #' @export ppc_calibration_grouped <- function( - y, prep, show_mean, ..., linewidth = 0.25, alpha = 0.7) { + y, prep, group, show_mean, ..., linewidth = 0.25, alpha = 0.7) { check_ignored_arguments(...) - data <- .ppc_calibration_data(y, prep, group) + data <- .ppc_calibration_data(y, prep) %>% + group_by(y_id) %>% + summarise( + value = median(value), + lb = quantile(cep, .5 - .5 * prob), + ub = quantile(cep, .5 + .5 * prob), + cep = median(cep) + ) + ggplot(data) + geom_abline(color = "black", linetype = 2) + geom_line(aes(value, cep, group = rep_id, color = "yrep"), @@ -147,7 +165,7 @@ ppc_loo_calibration <- function( #' @rdname PPC-calibration #' @export ppc_loo_calibration_grouped <- function( - y, prep, lw, ..., linewidth = 0.25, alpha = 0.7) { + y, prep, group, lw, ..., linewidth = 0.25, alpha = 0.7) { check_ignored_arguments(...) data <- .ppc_calibration_data(y, prep, group) ggplot(data) + From 01ac82635561c7dc721a4315db89f76c39a826b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Teemu=20S=C3=A4ilynoja?= Date: Thu, 22 May 2025 15:07:14 +0300 Subject: [PATCH 4/5] Fix ppc_calibration_grouped() --- R/ppc-calibration.R | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R index e706b2f9..a3d4af42 100644 --- a/R/ppc-calibration.R +++ b/R/ppc-calibration.R @@ -115,10 +115,10 @@ ppc_calibration <- function( #' @rdname PPC-calibration #' @export ppc_calibration_grouped <- function( - y, prep, group, show_mean, ..., linewidth = 0.25, alpha = 0.7) { + y, prep, group, prob = .95, show_mean = TRUE, ..., linewidth = 0.5, alpha = 0.7) { check_ignored_arguments(...) - data <- .ppc_calibration_data(y, prep) %>% - group_by(y_id) %>% + data <- .ppc_calibration_data(y, prep, group) %>% + group_by(group, y_id) %>% summarise( value = median(value), lb = quantile(cep, .5 - .5 * prob), @@ -127,12 +127,16 @@ ppc_calibration_grouped <- function( ) ggplot(data) + + aes(value, cep) + geom_abline(color = "black", linetype = 2) + - geom_line(aes(value, cep, group = rep_id, color = "yrep"), - linewidth = linewidth, alpha = alpha + geom_ribbon(aes(ymin = lb, ymax = ub, fill = "yrep"), alpha = alpha) + + geom_line( + aes(color = "y"), + linewidth = linewidth ) + facet_wrap(vars(group)) + scale_color_ppc() + + scale_fill_ppc() + bayesplot_theme_get() + legend_none() + coord_equal(xlim = c(0, 1), ylim = c(0, 1), expand = FALSE) + From f9806eb8e2401dde2858ddec974374fb2684fd34 Mon Sep 17 00:00:00 2001 From: jgabry Date: Thu, 22 May 2025 16:16:21 -0600 Subject: [PATCH 5/5] fix typo preventing building doc --- R/ppc-calibration.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ppc-calibration.R b/R/ppc-calibration.R index a3d4af42..c26d1202 100644 --- a/R/ppc-calibration.R +++ b/R/ppc-calibration.R @@ -7,7 +7,7 @@ #' @name PPC-calibration #' @family PPCs #' -#' @template args-y-rep +#' @template args-y-yrep #' @template args-group #' #' @template return-ggplot-or-data