From 1715b38838a56e175d8753b4a020461dc786f639 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 5 Sep 2022 16:35:29 +0200 Subject: [PATCH 1/5] draft --- R/averaged_parameters.R | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 R/averaged_parameters.R diff --git a/R/averaged_parameters.R b/R/averaged_parameters.R new file mode 100644 index 000000000..491171431 --- /dev/null +++ b/R/averaged_parameters.R @@ -0,0 +1,54 @@ +library(insight) +library(parameters) +library(MATA) + +set.seed(0) +n <- 20 # 'n' is assumed to be even +x1 <- c(rep(0, n / 2), rep(1, n / 2)) # two groups: x1=0, and x1=1 +x2 <- rnorm(n, mean = 10, sd = 3) +y <- rnorm(n, mean = 3 * x1 + 0.1 * x2) # data generation + +x1 <- factor(x1) +m1 <- glm(y ~ x1) # using 'glm' provides AIC values. +m2 <- glm(y ~ x1 + x2) # using 'lm' doesn't. +aic <- c(m1$aic, m2$aic) +delta.aic <- aic - min(aic) +model.weights <- exp(-0.5 * delta.aic) / sum(exp(-0.5 * delta.aic)) + +# see also +# performance::compare_performance(m1, m2) + +residual.dfs <- c(insight::get_df(m1), insight::get_df(m2)) +# parameters::degrees_of_freedom(m1,method="residual") + +g1 <- insight::get_datagrid(m1) +g2 <- insight::get_datagrid(m2) + +nd1 <- as.data.frame(lapply(g1, function(i) { + if (is.factor(i)) { + as.factor(levels(i)[1]) + } else { + unique(i)[1] + } +})) + + +nd2 <- as.data.frame(lapply(g2, function(i) { + if (is.factor(i)) { + as.factor(levels(i)[1]) + } else { + unique(i)[1] + } +})) + +p1 <- get_predicted(m1, data = nd1, ci = .95) +p2 <- get_predicted(m2, data = nd2, , ci = .95) + +theta.hats <- c(p1, p2) +se.theta.hats <- c(attributes(p1)$ci_data$SE, attributes(p2)$ci_data$SE) + +# 95% MATA-Wald confidence interval for theta: +mata.wald(theta.hats, se.theta.hats, model.weights, + mata.t = TRUE, + residual.dfs = residual.dfs +) From ba161b637798b6895eccb1ad99c7c1c6f32a9a52 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 5 Sep 2022 16:55:00 +0200 Subject: [PATCH 2/5] draft --- R/averaged_parameters.R | 105 +++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 54 deletions(-) diff --git a/R/averaged_parameters.R b/R/averaged_parameters.R index 491171431..a82229fef 100644 --- a/R/averaged_parameters.R +++ b/R/averaged_parameters.R @@ -1,54 +1,51 @@ -library(insight) -library(parameters) -library(MATA) - -set.seed(0) -n <- 20 # 'n' is assumed to be even -x1 <- c(rep(0, n / 2), rep(1, n / 2)) # two groups: x1=0, and x1=1 -x2 <- rnorm(n, mean = 10, sd = 3) -y <- rnorm(n, mean = 3 * x1 + 0.1 * x2) # data generation - -x1 <- factor(x1) -m1 <- glm(y ~ x1) # using 'glm' provides AIC values. -m2 <- glm(y ~ x1 + x2) # using 'lm' doesn't. -aic <- c(m1$aic, m2$aic) -delta.aic <- aic - min(aic) -model.weights <- exp(-0.5 * delta.aic) / sum(exp(-0.5 * delta.aic)) - -# see also -# performance::compare_performance(m1, m2) - -residual.dfs <- c(insight::get_df(m1), insight::get_df(m2)) -# parameters::degrees_of_freedom(m1,method="residual") - -g1 <- insight::get_datagrid(m1) -g2 <- insight::get_datagrid(m2) - -nd1 <- as.data.frame(lapply(g1, function(i) { - if (is.factor(i)) { - as.factor(levels(i)[1]) - } else { - unique(i)[1] - } -})) - - -nd2 <- as.data.frame(lapply(g2, function(i) { - if (is.factor(i)) { - as.factor(levels(i)[1]) - } else { - unique(i)[1] - } -})) - -p1 <- get_predicted(m1, data = nd1, ci = .95) -p2 <- get_predicted(m2, data = nd2, , ci = .95) - -theta.hats <- c(p1, p2) -se.theta.hats <- c(attributes(p1)$ci_data$SE, attributes(p2)$ci_data$SE) - -# 95% MATA-Wald confidence interval for theta: -mata.wald(theta.hats, se.theta.hats, model.weights, - mata.t = TRUE, - residual.dfs = residual.dfs -) + +average_parameters <- function(..., ci = .95, verbose = TRUE) { + insight::check_if_installed("performance") + models <- list(...) + + # compute model weights + aic_values <- sapply(models, performance::performance_aic) + delta_aic <- aic_values - min(aic_values) + model_weights <- exp(-0.5 * delta_aic) / sum(exp(-0.5 * delta_aic)) + + # residual df's + residual_dfs <- sapply(models, degrees_of_freedom, method = "residual") + + # data grid for average predictions + predictions <- lapply(models, function(m) { + d <- insight::get_datagrid(m) + new_data <- as.data.frame(lapply(d, function(i) { + if (is.factor(i)) { + as.factor(levels(i)[1]) + } else { + unique(i)[1] + } + })) + insight::get_predicted(n, data = new_data, ci = .95) + }) + + theta_hats <- unlist(predictions) + se_theta_hats <- sapply(predictions, function(p) { + attributes(p)$ci_data$SE + }) + +alpha <- (1 - ci) / 2 + + CI_low <- stats::uniroot( + f = .tailarea, interval = c(-1e+10, 1e+10), theta.hats = theta_hats, + se.theta.hats = se_theta_hats, model.weights = model_weights, alpha = alpha, + residual.dfs = residual_dfs, tol = 1e-10 + )$root + + CI_high <- stats::uniroot( + f = .tailarea, interval = c(-1e+10, 1e+10), theta.hats = theta_hats, + se.theta.hats = se_theta_hats, model.weights = model_weights, alpha = 1 - alpha, + residual.dfs = residual_dfs, tol = 1e-10 + )$root +} + + +.tailarea <- function(theta, theta.hats, se.theta.hats, model.weights, alpha, residual.dfs) { + t.quantiles <- (theta - theta.hats) / se.theta.hats + sum(model.weights * stats::pt(t.quantiles, df = residual.dfs)) - alpha +} From f8244c5d727c7453b801dcdba0dc3aad7cf43a90 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 5 Sep 2022 16:55:29 +0200 Subject: [PATCH 3/5] draft --- R/averaged_parameters.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/averaged_parameters.R b/R/averaged_parameters.R index a82229fef..eb469adba 100644 --- a/R/averaged_parameters.R +++ b/R/averaged_parameters.R @@ -29,8 +29,6 @@ average_parameters <- function(..., ci = .95, verbose = TRUE) { attributes(p)$ci_data$SE }) -alpha <- (1 - ci) / 2 - CI_low <- stats::uniroot( f = .tailarea, interval = c(-1e+10, 1e+10), theta.hats = theta_hats, se.theta.hats = se_theta_hats, model.weights = model_weights, alpha = alpha, @@ -42,6 +40,8 @@ alpha <- (1 - ci) / 2 se.theta.hats = se_theta_hats, model.weights = model_weights, alpha = 1 - alpha, residual.dfs = residual_dfs, tol = 1e-10 )$root + + c(CI_low, CI_high) } From 191aaf97d8fcd2c0cb9bce9c97b067987c45569d Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 5 Sep 2022 16:59:10 +0200 Subject: [PATCH 4/5] export --- NAMESPACE | 1 + R/averaged_parameters.R | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 20f42c3e7..79306293c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -851,6 +851,7 @@ S3method(visualisation_recipe,n_clusters_dbscan) S3method(visualisation_recipe,n_clusters_elbow) S3method(visualisation_recipe,n_clusters_gap) S3method(visualisation_recipe,n_clusters_silhouette) +export(averaged_parameters) export(bootstrap_model) export(bootstrap_parameters) export(check_clusterstructure) diff --git a/R/averaged_parameters.R b/R/averaged_parameters.R index eb469adba..267d7093d 100644 --- a/R/averaged_parameters.R +++ b/R/averaged_parameters.R @@ -1,5 +1,5 @@ - -average_parameters <- function(..., ci = .95, verbose = TRUE) { +#' @export +averaged_parameters <- function(..., ci = .95, verbose = TRUE) { insight::check_if_installed("performance") models <- list(...) From d44c8c7af172b1297eb857602b9adc272a8799d0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 5 Sep 2022 17:10:40 +0200 Subject: [PATCH 5/5] fixes --- R/averaged_parameters.R | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/R/averaged_parameters.R b/R/averaged_parameters.R index 267d7093d..56d5b3094 100644 --- a/R/averaged_parameters.R +++ b/R/averaged_parameters.R @@ -17,11 +17,13 @@ averaged_parameters <- function(..., ci = .95, verbose = TRUE) { new_data <- as.data.frame(lapply(d, function(i) { if (is.factor(i)) { as.factor(levels(i)[1]) + } else if (is.numeric(i)) { + mean(i, na.rm = TRUE) } else { unique(i)[1] } })) - insight::get_predicted(n, data = new_data, ci = .95) + insight::get_predicted(m, data = new_data, ci = .95, predict = "link") }) theta_hats <- unlist(predictions) @@ -29,23 +31,25 @@ averaged_parameters <- function(..., ci = .95, verbose = TRUE) { attributes(p)$ci_data$SE }) + alpha <- (1 - ci) / 2 + CI_low <- stats::uniroot( - f = .tailarea, interval = c(-1e+10, 1e+10), theta.hats = theta_hats, - se.theta.hats = se_theta_hats, model.weights = model_weights, alpha = alpha, - residual.dfs = residual_dfs, tol = 1e-10 + f = .tailarea, interval = c(-1e+10, 1e+10), theta_hats = theta_hats, + se_theta_hats = se_theta_hats, model_weights = model_weights, alpha = alpha, + residual_dfs = residual_dfs, tol = 1e-10 )$root CI_high <- stats::uniroot( - f = .tailarea, interval = c(-1e+10, 1e+10), theta.hats = theta_hats, - se.theta.hats = se_theta_hats, model.weights = model_weights, alpha = 1 - alpha, - residual.dfs = residual_dfs, tol = 1e-10 + f = .tailarea, interval = c(-1e+10, 1e+10), theta_hats = theta_hats, + se_theta_hats = se_theta_hats, model_weights = model_weights, alpha = 1 - alpha, + residual_dfs = residual_dfs, tol = 1e-10 )$root c(CI_low, CI_high) } -.tailarea <- function(theta, theta.hats, se.theta.hats, model.weights, alpha, residual.dfs) { - t.quantiles <- (theta - theta.hats) / se.theta.hats - sum(model.weights * stats::pt(t.quantiles, df = residual.dfs)) - alpha +.tailarea <- function(theta, theta_hats, se_theta_hats, model_weights, alpha, residual_dfs) { + t_quantiles <- (theta - theta_hats) / se_theta_hats + sum(model_weights * stats::pt(t_quantiles, df = residual_dfs)) - alpha }