Skip to content

Commit cdaaed2

Browse files
committed
First commit
1 parent 6bbb75d commit cdaaed2

17 files changed

+1343
-2
lines changed

DESCRIPTION

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Package: scaleBVS
2+
Type: Package
3+
Title: weighted Tempered Gibbs Sampling for Bayesian Variable Selection
4+
Version: 1.0
5+
Date: 2020-01-15
6+
Authors@R: c(
7+
person("Giacomo", "Zanella", email = "[email protected]", role = "aut"),
8+
person("Alberto", "Cabezas Gonzalez", email = "[email protected]", role = c("aut", "cre")))
9+
Maintainer: Alberto Cabezas Gonzalez <[email protected]>
10+
Description: Performs Bayesian variable selection in linear regression contexts using discrete spike and slab priors.
11+
Posterior sampling and calculation of marginal Posterior Inclusion Probabilities (PIPs)
12+
for a explanatory variables is done using the weighted Tempered Gibbs Sampling algorithm of Zanella and Roberts (2019).
13+
License: GPL (>= 2)
14+
Imports: Rcpp (>= 1.0.1), RcppEigen (>= 0.3.3.5.0)
15+
LinkingTo: Rcpp, RcppEigen
16+
RoxygenNote: 6.1.1

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
useDynLib(scaleBVS, .registration=TRUE)
2+
exportPattern("^[[:alpha:]]+")
3+
import(RcppEigen)
4+
importFrom(Rcpp, evalCpp)

R/RcppExports.R

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
2+
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
4+
#' auxiliary function that does weighted Tempered Gibbs Sampling from the posterior distribution of Gamma
5+
#'
6+
#' Performs weighted Tempered Gibbs Sampling
7+
#'
8+
#' @param X_ a matrix of p regressors (independent variables) with dimension (nxp).
9+
#' @param y_ a vector of n observations (dependent variable) with dimensions (nx1).
10+
#' @param n_ number of observations n.
11+
#' @param p_ number of regressors p.
12+
#' @param n_iter a positive integer specifying the number of iterations for the Markov Chain.
13+
#' @param burnin_ a positive integer specifying the number of burn in iterations for the Markov Chain.
14+
#' @param h1_ first parameter of the beta distribution defining h. If value of h fixed, make it h1.
15+
#' @param h2_ second parameter of the beta distribution defining h. If value of h fixed, make h2=0.
16+
#' @param c_ constant of proportionality to prior covariace matrix.
17+
#' @param k_ k_weighted parameter for weighted sampling.
18+
#' @param weighted boolean indicating if weighted sampling.
19+
#'
20+
#' @return List with values for PIP and required elements to reproduce the samples.
21+
wTGS <- function(X_, y_, n_, p_, n_iter, burnin_, h1_, h2_, c_, k_, weighted) {
22+
.Call(`_scaleBVS_wTGS`, X_, y_, n_, p_, n_iter, burnin_, h1_, h2_, c_, k_, weighted)
23+
}
24+

R/createSamples.r

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#' creates the sample matrix from the posterior distribution of a Bayesian Variable Selection model
2+
#'
3+
#'
4+
#' @param samples a list of states from the output of \code{samplingBVS} containing the elements necessary to reproduce the samples of
5+
#' the Markov Chain. These elements are: \cr
6+
#' "start" - starting value for \eqn{\gamma} after the burnin period. \cr
7+
#' "sample_weights" - a vector (n_iterx1) of weights for \eqn{\gamma} at each step of
8+
#' the Markov Chain. \cr
9+
#' "indices_sequence" - a vector (n_iterx1) of indices ranging \eqn{1,...,p} indicating the
10+
#' element of \eqn{\gamma} flipped at each step of the Markov Chain.
11+
#' @param thin an integer greater than or equal to 1 indicating the period when to save
12+
#' samples and sample weights from the Markov Chain, i.e. every how many steps of the
13+
#' Markov Chain the samples and sample weights should be recorded. The default is 1.
14+
#'
15+
#' @return A list with named objects:
16+
#' \item{samples }{a matrix (n_iterxp) of \eqn{\gamma} at each step of the Markov Chain.}
17+
#' \item{weights }{a vector (n_iterx1) of weights for \eqn{\gamma} at each step of
18+
#' the Markov Chain.}
19+
#'
20+
#' @export
21+
#'
22+
#' @seealso \code{\link{samplingBVS}} for running weighted Tempered Gibbs Sampling and caluculating Posterior Inclusion Probabiltiies.
23+
#'
24+
#' @examples
25+
#' #Samples of inclusion of characteristics of cars on describing mileage
26+
#' #load data
27+
#' data(mtcars)
28+
#'
29+
#' #create X matrix and y vector with zero mean for all regressors
30+
#' X <- t(t(mtcars[,-1]) - colMeans(mtcars[,-1]))
31+
#' y <- mtcars$mpg - mean(mtcars$mpg)
32+
#'
33+
#' mtcars.output <- samplingBVS(y, X)
34+
#' mtcars.samples <- createSamples(mtcars.output$states)
35+
#'
36+
#' #Samples
37+
#' head(mtcars.samples$samples)
38+
createSamples <- function(samples, #list outputted from the main function
39+
thin = 1) { #Thinning of the samples
40+
41+
if (thin < 1) stop("thin must be an integer greater than or equal to 1")
42+
43+
check <- names(samples)
44+
if (!all(check %in% c("start", "sample_weights", "indices_sequence")))
45+
if (!all(check %in% c("PIP", "states")))
46+
stop("samples must be from the output of main sampling function (samplingBVS).")
47+
else
48+
samples <- samples$states
49+
50+
n_iter <- length(samples$indices_sequence)
51+
p <- length(samples$start)
52+
if (n_iter%/%thin*p > 10e8) warning(paste("Samples will be a", paste(n_iter%/%thin, p, sep = "x"), "matrix, consider thinning."))
53+
54+
states <- matrix(NA, ncol = p, nrow = n_iter%/%thin)
55+
weights <- rep(NA, n_iter%/%thin)
56+
57+
gamma <- samples$start
58+
for (t in 1:n_iter) {
59+
60+
gamma[samples$indices_sequence[t]] <- 1 - gamma[samples$indices_sequence[t]]
61+
62+
if (t %% thin == 0) {
63+
states[(t/thin),] <- gamma
64+
weights[t/thin] <- samples$sample_weights[t]
65+
}
66+
}
67+
68+
return(list(samples = states,
69+
weights = weights))
70+
}

R/package.R

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#' Bayesian Variable Selection for the linear model via weighted Tempered Gibbs Sampling
2+
#'
3+
#' Perform Bayesian variable selection in linear regression contexts using discrete
4+
#' spike and slab priors. Posterior sampling and calculation of marginal Posterior
5+
#' Inclusion Probabilities (PIPs) for explanatory variables is done using the
6+
#' weighted Tempered Gibbs Sampling algorithm of Zanella and Roberts (2019).
7+
#'
8+
#' Bayesian Variable Selection models provide a natural and coherent framework
9+
#' to select a subset of explanatory variables in linear regression contexts.
10+
#' The binary inclusion variables for each regressor typically possess pairwise
11+
#' and/or negative dependence structures conjectured to be conductive to
12+
#' successful application of weighted Tempered Gibbs Sampling (Zanella and Roberts, 2019).
13+
#'
14+
#' The use of weighted Tempered Gibbs Sampling overcomes the challenges of
15+
#' high-dimensional Bayesian Variable selection models by an efficient computation
16+
#' of the full conditional distribution of the binary inclusion probabilities.
17+
#' These full conditionals allow for the calculation of Rao-Blackwellised
18+
#' estimators of the marginal Posterior Inclusion Probabilities for each regressor.
19+
#' These estimates quantify the uncertainties of the true underlying
20+
#' linear model.
21+
#'
22+
#' This package has been concieved as an implementation of the weighted
23+
#' Tempered Gibbs Sampling algorithm to Bayesian Variable Selection models in order
24+
#' to sample from the distribution of its binary inclusion variables and provide
25+
#' a formal Bayesian answer to variable selection problems.
26+
#'
27+
#' \tabular{ll}{ Package: \tab scaleBVS\cr Type: \tab Package\cr Version:
28+
#' \tab 1.0.0\cr Date: \tab 2020-01-20\cr License: \tab GPL-2\cr }
29+
#'
30+
#' @name scaleBVS-package
31+
#' @aliases scaleBVS-package scaleBVS
32+
#' @docType package
33+
#' @author Giacomo Zanella and Alberto Cabezas Gonzalez
34+
#'
35+
#' Maintainer: Alberto Cabezas Gonzalez \email{[email protected]}
36+
#'
37+
#' @seealso \code{\link{samplingBVS}}, \code{\link{createSamples}}
38+
#'
39+
#' @references
40+
#' Zanella, G. and Roberts, G. (2019). Scalable importance tempering and Bayesian variable selection. Journal of the Royal Statistical Society: Series B (Statistical Methodology): 489–517. Crossref. Web.
41+
#'
42+
#' Zellner, A. (1986). On Assessing Prior Distributions and Bayesian Regression Analysis with g-Prior Distributions. In: Goel, P. and Zellner, A., Eds., Bayesian Inference and Decision Techniques: Essays in Honor of Bruno de Finetti, Elsevier Science Publishers, Inc., New York, 233-243.
43+
#'
44+
#' @keywords package
45+
NULL

R/samplingBVS.r

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#' samples from the posterior distribution of a Bayesian Variable Selection model using weighted Tempered Gibbs Sampling
2+
#'
3+
#' Perform Bayesian variable selection in linear regression contexts
4+
#' using discrete spike and slab priors. Posterior sampling and calculation
5+
#' of marginal Posterior Inclusion Probabilities (PIPs) for explanatory
6+
#' variables is done using the weighted Tempered Gibbs Sampling algorithm
7+
#' of Zanella and Roberts (2019).
8+
#'
9+
#' The evaluated linear regression model can be written as
10+
#' \deqn{Y|\beta_\gamma, \gamma, \sigma^2 ~ N(X_\gamma\beta_\gamma,\sigma^2(I_n))}
11+
#' \deqn{\beta_\gamma|\gamma, \sigma^2 ~ N(0,\sigma^2\Sigma_\gamma)}
12+
#' \deqn{p(\sigma^2) \propto 1/\sigma^2}
13+
#' \deqn{\gamma_i|h iid~ Bern(h) i = 1,...,p}
14+
#' where the posterior probability of interest is \eqn{p(\gamma|Y)}.
15+
#'
16+
#' The prior covariance matrix of the coefficients
17+
#' of the selected regressors is \eqn{\Sigma_\gamma = c(X_\gamma^TX_\gamma)}, i.e. the g-prior recommended by Zellner
18+
#' (1986).
19+
#'
20+
#' The Rao-Blackwellized estimators provide a vector with inclusion probabilities for
21+
#' each of the regressors, \eqn{{p(\gamma_i=1|Y)}_{i=1}^{p}}.
22+
#'
23+
#' \eqn{h} can be a fixed value or \eqn{h ~ Beta(a,b)}.
24+
#'
25+
#' The sampling algorithm flips one of p binary values of \eqn{\gamma} by sampling \eqn{i}
26+
#' from \eqn{1,...,p} proportionally to \eqn{p_i(\gamma)=p(\gamma_i|\gamma_{-i},Y)^{-1}}
27+
#' in the case of Tempered Gibbs Sampling and proportionally to
28+
#' \eqn{p_i(\gamma)=(p(\gamma_i=1|\gamma_{-i},Y)+k/p)/p(\gamma_i|\gamma_{-i},Y)}
29+
#' in the case of weighted Tempered Gibbs Sampling. Also, the weight of the new state of
30+
#' the Markov Chain is proportional to \eqn{(\sum_{i=1}^p p_i(\gamma))^{-1}}.
31+
#'
32+
#' For more information on weighted Tempered Gibbs Sampling, please refer to Zanella and
33+
#' Roberts (2019).
34+
#'
35+
#'
36+
#' @param y a vector of n observations (dependent variable) with dimensions (nx1).
37+
#' @param X a matrix of p regressors (independent variables) with dimension (nxp).
38+
#' @param c a real number greater than 0 which serves as a constant of proportionality
39+
#' to the specification of the prior covariance matrix of the coefficients of the
40+
#' selected regressors in the linear regression. The default is \code{NULL} which yields the recommended constant of proportionality for Zellner's g-prior
41+
#' , i.e. c = n.
42+
#' @param h either a real number greater than 0 and smaller than 1 or a vector of real
43+
#' values, both greater than 0. This parameter specifies the prior information of the
44+
#' inclusion probability of the regressors which is identical for all regressors.
45+
#' In the former case, the prior probability is set to a fixed value. In the latter
46+
#' case, the prior probability is a Beta distribution with the specified parameters.
47+
#' The default is the uniform distribution in terms of a Beta prior \code{c(1,1)}.
48+
#' @param n_iter a positive integer specifying the number of iterations for the Markov
49+
#' Chain. The default is 2000.
50+
#' @param burn_in either an integer greater than 1 or a real number greater than 0 and
51+
#' smaller than 1. Specifies the number of burn in iterations for the Markov Chain. In
52+
#' the former case the burn in iterations are set the fixed integer. In the latter case
53+
#' the number of iterations are the specified percentage of the number of iterations.
54+
#' The default is 0.2.
55+
#' @param k_weight a real number greater than 0 which, in the case of \code{weighted = TRUE},
56+
#' controls the tradeoff between exploration and exploitation in the choice of the variable
57+
#' to be flipped at each iteration. A larger \code{k_weight} favours exploration. The default is 0.
58+
#' @param weighted logical, with default \code{TRUE}, indicating whether to perform
59+
#' weighted Tempered Gibbs Sampling if \code{TRUE} or Tempered Gibbs Sampling if
60+
#' \code{FALSE}.
61+
#'
62+
#' @return A list with named objects:
63+
#' \item{PIP }{a vector (px1) containing Rao-Blackwellised estimators of
64+
#' the marginal PIPs for each of the p regressors in \code{X}.}
65+
#' \item{states }{a list containing the elements necessary to reproduce the samples of
66+
#' the Markov Chain. These elements are:\cr
67+
#' "start" - starting value for \eqn{\gamma} after the burnin period.\cr
68+
#' "sample_weights" - a vector (n_iterx1) of weights for \eqn{\gamma} at each step of
69+
#' the Markov Chain.\cr
70+
#' "indices_sequence" - a vector (n_iterx1) of indices ranging \eqn{1,...,p} indicating the
71+
#' element of \eqn{\gamma} flipped at each step of the Markov Chain.}
72+
#'
73+
#' @export
74+
#'
75+
#' @references
76+
#' Zanella, G. and Roberts, G. (2019). Scalable importance tempering and Bayesian variable selection. Journal of the Royal Statistical Society: Series B (Statistical Methodology): 489–517. Crossref. Web.
77+
#'
78+
#' Zellner, A. (1986). On Assessing Prior Distributions and Bayesian Regression Analysis with g-Prior Distributions. In: Goel, P. and Zellner, A., Eds., Bayesian Inference and Decision Techniques: Essays in Honor of Bruno de Finetti, Elsevier Science Publishers, Inc., New York, 233-243.
79+
#'
80+
#' @seealso \code{\link{createSamples}} for creating the samples of the Markov Chain and their weights used to calculate the PIPs.
81+
#'
82+
#' @examples
83+
#' #Posterior inclusion probabilities of characteristics of cars on describing mileage
84+
#'
85+
#' #load data
86+
#' data(mtcars)
87+
#'
88+
#' #create X matrix and y vector with zero mean for all regressors
89+
#' X <- t(t(mtcars[,-1]) - colMeans(mtcars[,-1]))
90+
#' y <- mtcars$mpg - mean(mtcars$mpg)
91+
#'
92+
#' mtcars.output <- samplingBVS(y, X)
93+
#'
94+
#' names(mtcars.output$PIP) <- names(mtcars[,-1])
95+
#' print(mtcars.output$PIP)
96+
samplingBVS <- function(y, #vector of observations
97+
X, #matrix of regressors
98+
c = NULL, #covariance matric and constant
99+
h = c(1,1), #if vector 2x1 then parameters of Beta
100+
n_iter = 2000, #number of effective iterations
101+
burn_in = 0.2, #percentage(>0,<1)/number(>1) of burnin iterations
102+
k_weight = 0, weighted = TRUE) { #weightedTGS and parameter
103+
104+
### wTGS algorithm for Bayesian variable selection problems
105+
106+
## Set options
107+
if (burn_in < 1) burn_in <- n_iter*burn_in
108+
if (any(h < 0)) stop("h must be a vector of two positive parameters of a Beta distribution or a real number between 0 and 1")
109+
110+
## throw errors for parameters
111+
if (burn_in < 0) stop("Burn in must be a real number between 0 and 1 or an integer larger than 1")
112+
if (!is.null(c))
113+
if (c <= 0) stop("c must be larger than 0")
114+
if (length(h) > 2 | length(h) == 0)
115+
stop("h must be a vector of two positive parameters of a Beta distribution or a real number between 0 and 1")
116+
else if (length(h) == 1)
117+
if (h > 1)
118+
stop("h must be a vector of two positive parameters of a Beta distribution or a real number between 0 and 1")
119+
120+
if (n_iter < 0) stop("n_iter must be an positive integer")
121+
if (k_weight < 0) stop("k_weight must be larger than 0")
122+
123+
## check dimensions of y, X and the initial gamma
124+
n <- length(y)
125+
if (nrow(X) != n) stop("y and X should have the same number of observations")
126+
if (is.null(c)) c <- n
127+
p <- ncol(X)
128+
129+
if (length(h) == 2) {
130+
h1 <- h[1]
131+
h2 <- h[2]
132+
} else {
133+
h1 <- h
134+
h2 <- 0
135+
}
136+
137+
output <- wTGS(as.matrix(X), as.vector(y), n, p, n_iter, burn_in, h1, h2, c, k_weight, weighted)
138+
139+
return(list(PIP = output[[1]],
140+
states = list(start = output[[2]],
141+
sample_weights = output[[3]],
142+
indices_sequence = output[[4]])))
143+
}

0 commit comments

Comments
 (0)