Skip to content

Commit dfb781e

Browse files
committed
Updated R package to pass CRAN checks (had to manually remove some pragmas from json header)
1 parent adc64ab commit dfb781e

36 files changed

+361
-299
lines changed

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ export(saveBARTModelToJsonString)
7171
export(saveBCFModelToJsonFile)
7272
export(saveBCFModelToJsonString)
7373
importFrom(R6,R6Class)
74-
importFrom(stats, coef)
74+
importFrom(stats,coef)
7575
importFrom(stats,lm)
7676
importFrom(stats,model.matrix)
7777
importFrom(stats,qgamma)

R/bart.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ bart <- function(X_train, y_train, W_train = NULL, group_ids_train = NULL,
575575
}
576576
if (has_rfx) {
577577
resetRandomEffectsModel(rfx_model, rfx_samples, forest_ind, sigma_alpha_init)
578-
resetRandomEffectsTracker(rfx_tracker_train, rfx_model, rfx_dataset_train, outcome_train, rfx_samples, forest_ind)
578+
resetRandomEffectsTracker(rfx_tracker_train, rfx_model, rfx_dataset_train, outcome_train, rfx_samples)
579579
}
580580
if (sample_sigma_global) current_sigma2 <- global_var_samples[forest_ind + 1]
581581
} else if (has_prev_model) {

R/bcf.R

Lines changed: 67 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -158,9 +158,11 @@
158158
#' tau_train <- tau_x[train_inds]
159159
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train,
160160
#' X_test = X_test, Z_test = Z_test, pi_test = pi_test)
161-
#' # plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = "predicted", ylab = "actual", main = "Prognostic function")
161+
#' # plot(rowMeans(bcf_model$mu_hat_test), mu_test, xlab = "predicted",
162+
#' # ylab = "actual", main = "Prognostic function")
162163
#' # abline(0,1,col="red",lty=3,lwd=3)
163-
#' # plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = "predicted", ylab = "actual", main = "Treatment effect")
164+
#' # plot(rowMeans(bcf_model$tau_hat_test), tau_test, xlab = "predicted",
165+
#' # ylab = "actual", main = "Treatment effect")
164166
#' # abline(0,1,col="red",lty=3,lwd=3)
165167
bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NULL,
166168
rfx_basis_train = NULL, X_test = NULL, Z_test = NULL, pi_test = NULL,
@@ -872,7 +874,7 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU
872874
}
873875
if (has_rfx) {
874876
resetRandomEffectsModel(rfx_model, rfx_samples, forest_ind, sigma_alpha_init)
875-
resetRandomEffectsTracker(rfx_tracker_train, rfx_model, rfx_dataset_train, outcome_train, rfx_samples, forest_ind)
877+
resetRandomEffectsTracker(rfx_tracker_train, rfx_model, rfx_dataset_train, outcome_train, rfx_samples)
876878
}
877879
if (adaptive_coding) {
878880
current_b_1 <- b_1_samples[forest_ind + 1]
@@ -1190,6 +1192,8 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU
11901192
"num_gfr" = num_gfr,
11911193
"num_burnin" = num_burnin,
11921194
"num_mcmc" = num_mcmc,
1195+
"keep_every" = keep_every,
1196+
"num_chains" = num_chains,
11931197
"has_rfx" = has_rfx,
11941198
"has_rfx_basis" = has_basis_rfx,
11951199
"num_rfx_basis" = num_basis_rfx,
@@ -1290,9 +1294,11 @@ bcf <- function(X_train, Z_train, y_train, pi_train = NULL, group_ids_train = NU
12901294
#' tau_train <- tau_x[train_inds]
12911295
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train, pi_train = pi_train)
12921296
#' preds <- predict(bcf_model, X_test, Z_test, pi_test)
1293-
#' # plot(rowMeans(preds$mu_hat), mu_test, xlab = "predicted", ylab = "actual", main = "Prognostic function")
1297+
#' # plot(rowMeans(preds$mu_hat), mu_test, xlab = "predicted",
1298+
#' # ylab = "actual", main = "Prognostic function")
12941299
#' # abline(0,1,col="red",lty=3,lwd=3)
1295-
#' # plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted", ylab = "actual", main = "Treatment effect")
1300+
#' # plot(rowMeans(preds$tau_hat), tau_test, xlab = "predicted",
1301+
#' # ylab = "actual", main = "Treatment effect")
12961302
#' # abline(0,1,col="red",lty=3,lwd=3)
12971303
predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NULL, rfx_basis_test = NULL){
12981304
# Preprocess covariates
@@ -1475,13 +1481,14 @@ predict.bcf <- function(bcf, X_test, Z_test, pi_test = NULL, group_ids_test = NU
14751481
#' rfx_basis_train <- rfx_basis[train_inds,]
14761482
#' rfx_term_test <- rfx_term[test_inds]
14771483
#' rfx_term_train <- rfx_term[train_inds]
1484+
#' bcf_params <- list(sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
14781485
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train,
14791486
#' pi_train = pi_train, group_ids_train = group_ids_train,
14801487
#' rfx_basis_train = rfx_basis_train, X_test = X_test,
14811488
#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test,
14821489
#' rfx_basis_test = rfx_basis_test,
14831490
#' num_gfr = 100, num_burnin = 0, num_mcmc = 100,
1484-
#' sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
1491+
#' params = bcf_params)
14851492
#' rfx_samples <- getRandomEffectSamples(bcf_model)
14861493
getRandomEffectSamples.bcf <- function(object, ...){
14871494
result = list()
@@ -1561,13 +1568,14 @@ getRandomEffectSamples.bcf <- function(object, ...){
15611568
#' rfx_basis_train <- rfx_basis[train_inds,]
15621569
#' rfx_term_test <- rfx_term[test_inds]
15631570
#' rfx_term_train <- rfx_term[train_inds]
1571+
#' bcf_params <- list(sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
15641572
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train,
15651573
#' pi_train = pi_train, group_ids_train = group_ids_train,
15661574
#' rfx_basis_train = rfx_basis_train, X_test = X_test,
15671575
#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test,
15681576
#' rfx_basis_test = rfx_basis_test,
15691577
#' num_gfr = 100, num_burnin = 0, num_mcmc = 100,
1570-
#' sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
1578+
#' params = bcf_params)
15711579
#' # bcf_json <- convertBCFModelToJson(bcf_model)
15721580
convertBCFModelToJson <- function(object){
15731581
jsonobj <- createCppJson()
@@ -1617,6 +1625,8 @@ convertBCFModelToJson <- function(object){
16171625
jsonobj$add_scalar("num_burnin", object$model_params$num_burnin)
16181626
jsonobj$add_scalar("num_mcmc", object$model_params$num_mcmc)
16191627
jsonobj$add_scalar("num_samples", object$model_params$num_samples)
1628+
jsonobj$add_scalar("keep_every", object$model_params$keep_every)
1629+
jsonobj$add_scalar("num_chains", object$model_params$num_chains)
16201630
jsonobj$add_scalar("num_covariates", object$model_params$num_covariates)
16211631
if (object$model_params$sample_sigma_global) {
16221632
jsonobj$add_vector("sigma2_samples", object$sigma2_samples, "parameters")
@@ -1700,13 +1710,14 @@ convertBCFModelToJson <- function(object){
17001710
#' rfx_basis_train <- rfx_basis[train_inds,]
17011711
#' rfx_term_test <- rfx_term[test_inds]
17021712
#' rfx_term_train <- rfx_term[train_inds]
1713+
#' bcf_params <- list(sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
17031714
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train,
17041715
#' pi_train = pi_train, group_ids_train = group_ids_train,
17051716
#' rfx_basis_train = rfx_basis_train, X_test = X_test,
17061717
#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test,
17071718
#' rfx_basis_test = rfx_basis_test,
17081719
#' num_gfr = 100, num_burnin = 0, num_mcmc = 100,
1709-
#' sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
1720+
#' params = bcf_params)
17101721
#' # saveBCFModelToJsonFile(bcf_model, "test.json")
17111722
saveBCFModelToJsonFile <- function(object, filename){
17121723
# Convert to Json
@@ -1773,13 +1784,14 @@ saveBCFModelToJsonFile <- function(object, filename){
17731784
#' rfx_basis_train <- rfx_basis[train_inds,]
17741785
#' rfx_term_test <- rfx_term[test_inds]
17751786
#' rfx_term_train <- rfx_term[train_inds]
1787+
#' bcf_params <- list(sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
17761788
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train,
17771789
#' pi_train = pi_train, group_ids_train = group_ids_train,
17781790
#' rfx_basis_train = rfx_basis_train, X_test = X_test,
17791791
#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test,
17801792
#' rfx_basis_test = rfx_basis_test,
17811793
#' num_gfr = 100, num_burnin = 0, num_mcmc = 100,
1782-
#' sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
1794+
#' params = bcf_params)
17831795
#' # saveBCFModelToJsonString(bcf_model)
17841796
saveBCFModelToJsonString <- function(object){
17851797
# Convert to Json
@@ -1848,13 +1860,14 @@ saveBCFModelToJsonString <- function(object){
18481860
#' rfx_basis_train <- rfx_basis[train_inds,]
18491861
#' rfx_term_test <- rfx_term[test_inds]
18501862
#' rfx_term_train <- rfx_term[train_inds]
1863+
#' bcf_params <- list(sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
18511864
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train,
18521865
#' pi_train = pi_train, group_ids_train = group_ids_train,
18531866
#' rfx_basis_train = rfx_basis_train, X_test = X_test,
18541867
#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test,
18551868
#' rfx_basis_test = rfx_basis_test,
18561869
#' num_gfr = 100, num_burnin = 0, num_mcmc = 100,
1857-
#' sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
1870+
#' params = bcf_params)
18581871
#' # bcf_json <- convertBCFModelToJson(bcf_model)
18591872
#' # bcf_model_roundtrip <- createBCFModelFromJson(bcf_json)
18601873
createBCFModelFromJson <- function(json_object){
@@ -1993,13 +2006,14 @@ createBCFModelFromJson <- function(json_object){
19932006
#' rfx_basis_train <- rfx_basis[train_inds,]
19942007
#' rfx_term_test <- rfx_term[test_inds]
19952008
#' rfx_term_train <- rfx_term[train_inds]
2009+
#' bcf_params <- list(sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
19962010
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train,
19972011
#' pi_train = pi_train, group_ids_train = group_ids_train,
19982012
#' rfx_basis_train = rfx_basis_train, X_test = X_test,
19992013
#' Z_test = Z_test, pi_test = pi_test, group_ids_test = group_ids_test,
20002014
#' rfx_basis_test = rfx_basis_test,
20012015
#' num_gfr = 100, num_burnin = 0, num_mcmc = 100,
2002-
#' sample_sigma_leaf_mu = TRUE, sample_sigma_leaf_tau = FALSE)
2016+
#' params = bcf_params)
20032017
#' # saveBCFModelToJsonFile(bcf_model, "test.json")
20042018
#' # bcf_model_roundtrip <- createBCFModelFromJsonFile("test.json")
20052019
createBCFModelFromJsonFile <- function(json_filename){
@@ -2100,24 +2114,55 @@ createBCFModelFromJsonString <- function(json_string){
21002114
#' @examples
21012115
#' n <- 100
21022116
#' p <- 5
2103-
#' X <- matrix(runif(n*p), ncol = p)
2104-
#' f_XW <- (
2105-
#' ((0 <= X[,1]) & (0.25 > X[,1])) * (-7.5) +
2106-
#' ((0.25 <= X[,1]) & (0.5 > X[,1])) * (-2.5) +
2107-
#' ((0.5 <= X[,1]) & (0.75 > X[,1])) * (2.5) +
2108-
#' ((0.75 <= X[,1]) & (1 > X[,1])) * (7.5)
2109-
#' )
2110-
#' noise_sd <- 1
2111-
#' y <- f_XW + rnorm(n, 0, noise_sd)
2117+
#' x1 <- rnorm(n)
2118+
#' x2 <- rnorm(n)
2119+
#' x3 <- rnorm(n)
2120+
#' x4 <- rnorm(n)
2121+
#' x5 <- rnorm(n)
2122+
#' X <- cbind(x1,x2,x3,x4,x5)
2123+
#' p <- ncol(X)
2124+
#' g <- function(x) {ifelse(x[,5] < -0.44,2,ifelse(x[,5] < 0.44,-1,4))}
2125+
#' mu1 <- function(x) {1+g(x)+x[,1]*x[,3]}
2126+
#' mu2 <- function(x) {1+g(x)+6*abs(x[,3]-1)}
2127+
#' tau1 <- function(x) {rep(3,nrow(x))}
2128+
#' tau2 <- function(x) {1+2*x[,2]*(x[,4] > 0)}
2129+
#' mu_x <- mu1(X)
2130+
#' tau_x <- tau2(X)
2131+
#' pi_x <- 0.8*pnorm((3*mu_x/sd(mu_x)) - 0.5*X[,1]) + 0.05 + runif(n)/10
2132+
#' Z <- rbinom(n,1,pi_x)
2133+
#' E_XZ <- mu_x + Z*tau_x
2134+
#' snr <- 3
2135+
#' group_ids <- rep(c(1,2), n %/% 2)
2136+
#' rfx_coefs <- matrix(c(-1, -1, 1, 1), nrow=2, byrow=TRUE)
2137+
#' rfx_basis <- cbind(1, runif(n, -1, 1))
2138+
#' rfx_term <- rowSums(rfx_coefs[group_ids,] * rfx_basis)
2139+
#' y <- E_XZ + rfx_term + rnorm(n, 0, 1)*(sd(E_XZ)/snr)
2140+
#' X <- as.data.frame(X)
2141+
#' X$x4 <- factor(X$x4, ordered = TRUE)
2142+
#' X$x5 <- factor(X$x5, ordered = TRUE)
21122143
#' test_set_pct <- 0.2
21132144
#' n_test <- round(test_set_pct*n)
21142145
#' n_train <- n - n_test
21152146
#' test_inds <- sort(sample(1:n, n_test, replace = FALSE))
21162147
#' train_inds <- (1:n)[!((1:n) %in% test_inds)]
21172148
#' X_test <- X[test_inds,]
21182149
#' X_train <- X[train_inds,]
2150+
#' pi_test <- pi_x[test_inds]
2151+
#' pi_train <- pi_x[train_inds]
2152+
#' Z_test <- Z[test_inds]
2153+
#' Z_train <- Z[train_inds]
21192154
#' y_test <- y[test_inds]
21202155
#' y_train <- y[train_inds]
2156+
#' mu_test <- mu_x[test_inds]
2157+
#' mu_train <- mu_x[train_inds]
2158+
#' tau_test <- tau_x[test_inds]
2159+
#' tau_train <- tau_x[train_inds]
2160+
#' group_ids_test <- group_ids[test_inds]
2161+
#' group_ids_train <- group_ids[train_inds]
2162+
#' rfx_basis_test <- rfx_basis[test_inds,]
2163+
#' rfx_basis_train <- rfx_basis[train_inds,]
2164+
#' rfx_term_test <- rfx_term[test_inds]
2165+
#' rfx_term_train <- rfx_term[train_inds]
21212166
#' bcf_model <- bcf(X_train = X_train, Z_train = Z_train, y_train = y_train,
21222167
#' pi_train = pi_train, group_ids_train = group_ids_train,
21232168
#' rfx_basis_train = rfx_basis_train, X_test = X_test,
@@ -2177,6 +2222,7 @@ createBCFModelFromCombinedJsonString <- function(json_string_list){
21772222
model_params[["sample_sigma_leaf_mu"]] <- json_object_default$get_boolean("sample_sigma_leaf_mu")
21782223
model_params[["sample_sigma_leaf_tau"]] <- json_object_default$get_boolean("sample_sigma_leaf_tau")
21792224
model_params[["include_variance_forest"]] <- include_variance_forest
2225+
model_params[["propensity_covariate"]] <- json_object_default$get_string("propensity_covariate")
21802226
model_params[["has_rfx"]] <- json_object_default$get_boolean("has_rfx")
21812227
model_params[["has_rfx_basis"]] <- json_object_default$get_boolean("has_rfx_basis")
21822228
model_params[["num_rfx_basis"]] <- json_object_default$get_scalar("num_rfx_basis")
@@ -2263,7 +2309,7 @@ createBCFModelFromCombinedJsonString <- function(json_string_list){
22632309
output[["rfx_samples"]] <- loadRandomEffectSamplesCombinedJson(json_object_list, 0)
22642310
}
22652311

2266-
class(output) <- "bartmodel"
2312+
class(output) <- "bcf"
22672313
return(output)
22682314
}
22692315

R/calibration.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
#' Calibrate the scale parameter on an inverse gamma prior for the global error variance as in Chipman et al (2022) [1]
1+
#' Calibrate the scale parameter on an inverse gamma prior for the global error variance as in Chipman et al (2022)
22
#'
3-
#' [1] Chipman, H., George, E., Hahn, R., McCulloch, R., Pratola, M. and Sparapani, R. (2022). Bayesian Additive Regression Trees, Computational Approaches. In Wiley StatsRef: Statistics Reference Online (eds N. Balakrishnan, T. Colton, B. Everitt, W. Piegorsch, F. Ruggeri and J.L. Teugels). https://doi.org/10.1002/9781118445112.stat08288
3+
#' Chipman, H., George, E., Hahn, R., McCulloch, R., Pratola, M. and Sparapani, R. (2022). Bayesian Additive Regression Trees, Computational Approaches. In Wiley StatsRef: Statistics Reference Online (eds N. Balakrishnan, T. Colton, B. Everitt, W. Piegorsch, F. Ruggeri and J.L. Teugels). https://doi.org/10.1002/9781118445112.stat08288
44
#'
55
#' @param y Outcome to be modeled using BART, BCF or another nonparametric ensemble method.
66
#' @param X Covariates to be used to partition trees in an ensemble or series of ensemble.
7-
#' @param W [Optional] Basis used to define a "leaf regression" model for each decision tree. The "classic" BART model assumes a constant leaf parameter, which is equivalent to a "leaf regression" on a basis of all ones, though it is not necessary to pass a vector of ones, here or to the BART function. Default: `NULL`.
7+
#' @param W (Optional) Basis used to define a "leaf regression" model for each decision tree. The "classic" BART model assumes a constant leaf parameter, which is equivalent to a "leaf regression" on a basis of all ones, though it is not necessary to pass a vector of ones, here or to the BART function. Default: `NULL`.
88
#' @param nu The shape parameter for the global error variance's IG prior. The scale parameter in the Sparapani et al (2021) parameterization is defined as `nu*lambda` where `lambda` is the output of this function. Default: `3`.
9-
#' @param quant [Optional] Quantile of the inverse gamma prior distribution represented by a linear-regression-based overestimate of `sigma^2`. Default: `0.9`.
10-
#' @param standardize [Optional] Whether or not outcome should be standardized (`(y-mean(y))/sd(y)`) before calibration of `lambda`. Default: `TRUE`.
9+
#' @param quant (Optional) Quantile of the inverse gamma prior distribution represented by a linear-regression-based overestimate of `sigma^2`. Default: `0.9`.
10+
#' @param standardize (Optional) Whether or not outcome should be standardized (`(y-mean(y))/sd(y)`) before calibration of `lambda`. Default: `TRUE`.
1111
#'
1212
#' @return Value of `lambda` which determines the scale parameter of the global error variance prior (`sigma^2 ~ IG(nu,nu*lambda)`)
1313
#' @export

R/forest.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ ForestSamples <- R6::R6Class(
256256
},
257257

258258
#' @description
259-
#' Add a numeric (i.e. X[,i] <= c) split to a given tree in the ensemble
259+
#' Add a numeric (i.e. `X[,i] <= c`) split to a given tree in the ensemble
260260
#' @param forest_num Index of the forest which contains the tree to be split
261261
#' @param tree_num Index of the tree to be split
262262
#' @param leaf_num Leaf to be split
@@ -695,7 +695,7 @@ Forest <- R6::R6Class(
695695
},
696696

697697
#' @description
698-
#' Add a numeric (i.e. X[,i] <= c) split to a given tree in the ensemble
698+
#' Add a numeric (i.e. `X[,i] <= c`) split to a given tree in the ensemble
699699
#' @param tree_num Index of the tree to be split
700700
#' @param leaf_num Leaf to be split
701701
#' @param feature_num Feature that defines the new split

R/model.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ createRNG <- function(random_seed = -1){
153153
#' @param alpha Root node split probability in tree prior
154154
#' @param beta Depth prior penalty in tree prior
155155
#' @param min_samples_leaf Minimum number of samples in a tree leaf
156+
#' @param max_depth Maximum depth of any tree in the ensemble in the mean model. Setting to ``-1`` does not enforce any depth limits on trees.
156157
#'
157158
#' @return `ForestModel` object
158159
#' @export

0 commit comments

Comments
 (0)