albcab
diff --git a/‎DESCRIPTION
Lines changed: 16 additions & 0 deletions b/‎DESCRIPTION
Lines changed: 16 additions & 0 deletions
diff --git a/‎NAMESPACE
Lines changed: 4 additions & 0 deletions b/‎NAMESPACE
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/RcppExports.R
Lines changed: 24 additions & 0 deletions b/‎R/RcppExports.R
Lines changed: 24 additions & 0 deletions
diff --git a/‎R/createSamples.r
Lines changed: 70 additions & 0 deletions b/‎R/createSamples.r
Lines changed: 70 additions & 0 deletions
diff --git a/‎R/package.R
Lines changed: 45 additions & 0 deletions b/‎R/package.R
Lines changed: 45 additions & 0 deletions
diff --git a/‎R/samplingBVS.r
Lines changed: 143 additions & 0 deletions b/‎R/samplingBVS.r
Lines changed: 143 additions & 0 deletions
@@ -0,0 +1,16 @@
+Package: scaleBVS
+Type: Package
+Title: weighted Tempered Gibbs Sampling for Bayesian Variable Selection
+Version: 1.0
+Date: 2020-01-15
+Authors@R: c(
+    person("Giacomo", "Zanella", email = "[email protected]", role = "aut"),
+    person("Alberto", "Cabezas Gonzalez", email = "[email protected]", role = c("aut", "cre")))
+Maintainer: Alberto Cabezas Gonzalez <[email protected]>
+Description: Performs Bayesian variable selection in linear regression contexts using discrete spike and slab priors. 
+        Posterior sampling and calculation of marginal Posterior Inclusion Probabilities (PIPs) 
+        for a explanatory variables is done using the weighted Tempered Gibbs Sampling algorithm of Zanella and Roberts (2019).
+License: GPL (>= 2)
+Imports: Rcpp (>= 1.0.1), RcppEigen (>= 0.3.3.5.0)
+LinkingTo: Rcpp, RcppEigen
+RoxygenNote: 6.1.1
@@ -0,0 +1,4 @@
+useDynLib(scaleBVS, .registration=TRUE)
+exportPattern("^[[:alpha:]]+")
+import(RcppEigen)
+importFrom(Rcpp, evalCpp)
@@ -0,0 +1,24 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#' auxiliary function that does weighted Tempered Gibbs Sampling from the posterior distribution of Gamma
+#'
+#' Performs weighted Tempered Gibbs Sampling 
+#'
+#' @param X_ a matrix of p regressors (independent variables) with dimension (nxp).
+#' @param y_ a vector of n observations (dependent variable) with dimensions (nx1).
+#' @param n_ number of observations n.
+#' @param p_ number of regressors p.
+#' @param n_iter a positive integer specifying the number of iterations for the Markov Chain.
+#' @param burnin_ a positive integer specifying the number of burn in iterations for the Markov Chain.
+#' @param h1_ first parameter of the beta distribution defining h. If value of h fixed, make it h1.
+#' @param h2_ second parameter of the beta distribution defining h. If value of h fixed, make h2=0.
+#' @param c_ constant of proportionality to prior covariace matrix.
+#' @param k_ k_weighted parameter for weighted sampling.
+#' @param weighted boolean indicating if weighted sampling.
+#'
+#' @return List with values for PIP and required elements to reproduce the samples. 
+wTGS <- function(X_, y_, n_, p_, n_iter, burnin_, h1_, h2_, c_, k_, weighted) {
+    .Call(`_scaleBVS_wTGS`, X_, y_, n_, p_, n_iter, burnin_, h1_, h2_, c_, k_, weighted)
+}
+
@@ -0,0 +1,70 @@
+#' creates the sample matrix from the posterior distribution of a Bayesian Variable Selection model
+#' 
+#' 
+#' @param samples a list of states from the output of \code{samplingBVS} containing the elements necessary to reproduce the samples of
+#' the Markov Chain. These elements are: \cr
+#' "start" - starting value for \eqn{\gamma} after the burnin period. \cr
+#' "sample_weights" - a vector (n_iterx1) of weights for \eqn{\gamma} at each step of 
+#' the Markov Chain. \cr
+#' "indices_sequence" - a vector (n_iterx1) of indices ranging \eqn{1,...,p} indicating the
+#' element of \eqn{\gamma} flipped at each step of the Markov Chain.
+#' @param thin an integer greater than or equal to 1 indicating the period when to save
+#' samples and sample weights from the Markov Chain, i.e. every how many steps of the 
+#' Markov Chain the samples and sample weights should be recorded. The default is 1.
+#'
+#' @return A list with named objects:
+#' \item{samples }{a matrix (n_iterxp) of \eqn{\gamma} at each step of the Markov Chain.}
+#' \item{weights }{a vector (n_iterx1) of weights for \eqn{\gamma} at each step of 
+#' the Markov Chain.}
+#' 
+#' @export
+#'
+#' @seealso \code{\link{samplingBVS}} for running weighted Tempered Gibbs Sampling and caluculating Posterior Inclusion Probabiltiies.
+#'
+#' @examples
+#' #Samples of inclusion of characteristics of cars on describing mileage
+#' #load data
+#' data(mtcars)
+#' 
+#' #create X matrix and y vector with zero mean for all regressors
+#' X <- t(t(mtcars[,-1]) - colMeans(mtcars[,-1]))
+#' y <- mtcars$mpg - mean(mtcars$mpg)
+#' 
+#' mtcars.output <- samplingBVS(y, X)
+#' mtcars.samples <- createSamples(mtcars.output$states)
+#' 
+#' #Samples
+#' head(mtcars.samples$samples)
+createSamples <- function(samples, #list outputted from the main function
+						  thin = 1) { #Thinning of the samples
+  
+  if (thin < 1) stop("thin must be an integer greater than or equal to 1")
+  
+  check <- names(samples)
+  if (!all(check %in% c("start", "sample_weights", "indices_sequence")))
+	if (!all(check %in% c("PIP", "states")))
+	  stop("samples must be from the output of main sampling function (samplingBVS).")
+	else
+	  samples <- samples$states
+  
+  n_iter <- length(samples$indices_sequence)
+  p <- length(samples$start)
+  if (n_iter%/%thin*p > 10e8) warning(paste("Samples will be a", paste(n_iter%/%thin, p, sep = "x"), "matrix, consider thinning."))
+  
+  states <- matrix(NA, ncol = p, nrow = n_iter%/%thin)
+  weights <- rep(NA, n_iter%/%thin)
+  
+  gamma <- samples$start
+  for (t in 1:n_iter) {
+    
+    gamma[samples$indices_sequence[t]] <- 1 - gamma[samples$indices_sequence[t]]
+    
+    if (t %% thin == 0) {
+      states[(t/thin),] <- gamma
+      weights[t/thin] <- samples$sample_weights[t]
+    }
+  }
+  
+  return(list(samples = states,
+              weights = weights))
+}
@@ -0,0 +1,45 @@
+#' Bayesian Variable Selection for the linear model via weighted Tempered Gibbs Sampling
+#' 
+#' Perform Bayesian variable selection in linear regression contexts using discrete 
+#' spike and slab priors. Posterior sampling and calculation of marginal Posterior 
+#' Inclusion Probabilities (PIPs) for explanatory variables is done using the 
+#' weighted Tempered Gibbs Sampling algorithm of Zanella and Roberts (2019).
+#' 
+#' Bayesian Variable Selection models provide a natural and coherent framework
+#' to select a subset of explanatory variables in linear regression contexts. 
+#' The binary inclusion variables for each regressor typically possess pairwise
+#' and/or negative dependence structures conjectured to be conductive to 
+#' successful application of weighted Tempered Gibbs Sampling (Zanella and Roberts, 2019).
+#' 
+#' The use of weighted Tempered Gibbs Sampling overcomes the challenges of
+#' high-dimensional Bayesian Variable selection models by an efficient computation 
+#' of the full conditional distribution of the binary inclusion probabilities.
+#' These full conditionals allow for the calculation of Rao-Blackwellised 
+#' estimators of the marginal Posterior Inclusion Probabilities for each regressor.
+#' These estimates quantify the uncertainties of the true underlying
+#' linear model.
+#' 
+#' This package has been concieved as an implementation of the weighted
+#' Tempered Gibbs Sampling algorithm to Bayesian Variable Selection models in order
+#' to sample from the distribution of its binary inclusion variables and provide
+#' a formal Bayesian answer to variable selection problems.
+#' 
+#' \tabular{ll}{ Package: \tab scaleBVS\cr Type: \tab Package\cr Version:
+#' \tab 1.0.0\cr Date: \tab 2020-01-20\cr License: \tab GPL-2\cr }
+#' 
+#' @name scaleBVS-package
+#' @aliases scaleBVS-package scaleBVS
+#' @docType package
+#' @author Giacomo Zanella and Alberto Cabezas Gonzalez
+#' 
+#' Maintainer: Alberto Cabezas Gonzalez \email{[email protected]}
+#' 
+#' @seealso \code{\link{samplingBVS}}, \code{\link{createSamples}}
+#' 
+#' @references
+#' Zanella, G. and Roberts, G. (2019). Scalable importance tempering and Bayesian variable selection. Journal of the Royal Statistical Society: Series B (Statistical Methodology): 489–517. Crossref. Web.
+#' 
+#' Zellner, A. (1986). On Assessing Prior Distributions and Bayesian Regression Analysis with g-Prior Distributions. In: Goel, P. and Zellner, A., Eds., Bayesian Inference and Decision Techniques: Essays in Honor of Bruno de Finetti, Elsevier Science Publishers, Inc., New York, 233-243.
+#' 
+#' @keywords package
+NULL
@@ -0,0 +1,143 @@
+#' samples from the posterior distribution of a Bayesian Variable Selection model using weighted Tempered Gibbs Sampling
+#' 
+#' Perform Bayesian variable selection in linear regression contexts 
+#' using discrete spike and slab priors. Posterior sampling and calculation 
+#' of marginal Posterior Inclusion Probabilities (PIPs) for explanatory 
+#' variables is done using the weighted Tempered Gibbs Sampling algorithm 
+#' of Zanella and Roberts (2019).
+#' 
+#' The evaluated linear regression model can be written as
+#' \deqn{Y|\beta_\gamma, \gamma, \sigma^2 ~ N(X_\gamma\beta_\gamma,\sigma^2(I_n))}
+#' \deqn{\beta_\gamma|\gamma, \sigma^2 ~ N(0,\sigma^2\Sigma_\gamma)}
+#' \deqn{p(\sigma^2) \propto 1/\sigma^2}
+#' \deqn{\gamma_i|h iid~ Bern(h)       i = 1,...,p}
+#' where the posterior probability of interest is \eqn{p(\gamma|Y)}. 
+#' 
+#' The prior covariance matrix of the coefficients 
+#' of the selected regressors is \eqn{\Sigma_\gamma = c(X_\gamma^TX_\gamma)}, i.e. the g-prior recommended by Zellner 
+#' (1986).
+#' 
+#' The Rao-Blackwellized estimators provide a vector with inclusion probabilities for 
+#' each of the regressors, \eqn{{p(\gamma_i=1|Y)}_{i=1}^{p}}.
+#' 
+#' \eqn{h} can be a fixed value or \eqn{h ~ Beta(a,b)}.
+#' 
+#' The sampling algorithm flips one of p binary values of \eqn{\gamma} by sampling \eqn{i} 
+#' from \eqn{1,...,p} proportionally to \eqn{p_i(\gamma)=p(\gamma_i|\gamma_{-i},Y)^{-1}}
+#' in the case of Tempered Gibbs Sampling and proportionally to 
+#' \eqn{p_i(\gamma)=(p(\gamma_i=1|\gamma_{-i},Y)+k/p)/p(\gamma_i|\gamma_{-i},Y)}
+#' in the case of weighted Tempered Gibbs Sampling. Also, the weight of the new state of
+#' the Markov Chain is proportional to \eqn{(\sum_{i=1}^p p_i(\gamma))^{-1}}.
+#' 
+#' For more information on weighted Tempered Gibbs Sampling, please refer to Zanella and
+#' Roberts (2019).
+#' 
+#' 
+#' @param y a vector of n observations (dependent variable) with dimensions (nx1).
+#' @param X a matrix of p regressors (independent variables) with dimension (nxp).
+#' @param c a real number greater than 0 which serves as a constant of proportionality
+#' to the specification of the prior covariance matrix of the coefficients of the
+#' selected regressors in the linear regression. The default is \code{NULL} which yields the recommended constant of proportionality for Zellner's g-prior 
+#' , i.e. c = n.
+#' @param h either a real number greater than 0 and smaller than 1 or a vector of real
+#' values, both greater than 0. This parameter specifies the prior information of the
+#' inclusion probability of the regressors which is identical for all regressors. 
+#' In the former case, the prior probability is set to a fixed value. In the latter 
+#' case, the prior probability is a Beta distribution with the specified parameters.
+#' The default is the uniform distribution in terms of a Beta prior \code{c(1,1)}. 
+#' @param n_iter a positive integer specifying the number of iterations for the Markov
+#' Chain. The default is 2000.
+#' @param burn_in either an integer greater than 1 or a real number greater than 0 and
+#' smaller than 1. Specifies the number of burn in iterations for the Markov Chain. In
+#' the former case the burn in iterations are set the fixed integer. In the latter case
+#' the number of iterations are the specified percentage of the number of iterations.
+#' The default is 0.2.
+#' @param k_weight a real number greater than 0 which, in the case of \code{weighted = TRUE},
+#' controls the tradeoff between exploration and exploitation in the choice of the variable
+#' to be flipped at each iteration. A larger \code{k_weight} favours exploration. The default is 0.
+#' @param weighted logical, with default \code{TRUE}, indicating whether to perform
+#' weighted Tempered Gibbs Sampling if \code{TRUE} or Tempered Gibbs Sampling if 
+#' \code{FALSE}.
+#' 
+#' @return A list with named objects:
+#' \item{PIP }{a vector (px1) containing Rao-Blackwellised estimators of 
+#' the marginal PIPs for each of the p regressors in \code{X}.}
+#' \item{states }{a list containing the elements necessary to reproduce the samples of
+#' the Markov Chain. These elements are:\cr
+#' "start" - starting value for \eqn{\gamma} after the burnin period.\cr
+#' "sample_weights" - a vector (n_iterx1) of weights for \eqn{\gamma} at each step of 
+#' the Markov Chain.\cr
+#' "indices_sequence" - a vector (n_iterx1) of indices ranging \eqn{1,...,p} indicating the
+#' element of \eqn{\gamma} flipped at each step of the Markov Chain.}
+#'  
+#' @export
+#' 
+#' @references
+#' Zanella, G. and Roberts, G. (2019). Scalable importance tempering and Bayesian variable selection. Journal of the Royal Statistical Society: Series B (Statistical Methodology): 489–517. Crossref. Web.
+#' 
+#' Zellner, A. (1986). On Assessing Prior Distributions and Bayesian Regression Analysis with g-Prior Distributions. In: Goel, P. and Zellner, A., Eds., Bayesian Inference and Decision Techniques: Essays in Honor of Bruno de Finetti, Elsevier Science Publishers, Inc., New York, 233-243.
+#' 
+#' @seealso \code{\link{createSamples}} for creating the samples of the Markov Chain and their weights used to calculate the PIPs.
+#' 
+#' @examples
+#' #Posterior inclusion probabilities of characteristics of cars on describing mileage
+#' 
+#' #load data
+#' data(mtcars)
+#' 
+#' #create X matrix and y vector with zero mean for all regressors
+#' X <- t(t(mtcars[,-1]) - colMeans(mtcars[,-1]))
+#' y <- mtcars$mpg - mean(mtcars$mpg)
+#' 
+#' mtcars.output <- samplingBVS(y, X)
+#' 
+#' names(mtcars.output$PIP) <- names(mtcars[,-1])
+#' print(mtcars.output$PIP)
+samplingBVS <- function(y, #vector of observations
+                        X, #matrix of regressors
+                        c = NULL, #covariance matric and constant
+                        h = c(1,1), #if vector 2x1 then parameters of Beta
+                        n_iter = 2000, #number of effective iterations
+                        burn_in = 0.2, #percentage(>0,<1)/number(>1) of burnin iterations
+                        k_weight = 0, weighted = TRUE) { #weightedTGS and parameter
+  
+  ### wTGS algorithm for Bayesian variable selection problems
+  
+  ## Set options
+  if (burn_in < 1) burn_in <- n_iter*burn_in
+  if (any(h < 0)) stop("h must be a vector of two positive parameters of a Beta distribution or a real number between 0 and 1")
+  
+  ## throw errors for parameters
+  if (burn_in < 0) stop("Burn in must be a real number between 0 and 1 or an integer larger than 1")
+  if (!is.null(c)) 
+    if (c <= 0) stop("c must be larger than 0")
+  if (length(h) > 2 | length(h) == 0)
+    stop("h must be a vector of two positive parameters of a Beta distribution or a real number between 0 and 1")
+  else if (length(h) == 1)
+    if (h > 1)
+      stop("h must be a vector of two positive parameters of a Beta distribution or a real number between 0 and 1")
+
+  if (n_iter < 0) stop("n_iter must be an positive integer")
+  if (k_weight < 0) stop("k_weight must be larger than 0")
+  
+  ## check dimensions of y, X and the initial gamma
+  n <- length(y)
+  if (nrow(X) != n) stop("y and X should have the same number of observations")
+  if (is.null(c)) c <- n 
+  p <- ncol(X)
+  
+  if (length(h) == 2) {
+	h1 <- h[1]
+	h2 <- h[2]
+  } else {
+	h1 <- h
+	h2 <- 0
+  }
+  
+  output <- wTGS(as.matrix(X), as.vector(y), n, p, n_iter, burn_in, h1, h2, c, k_weight, weighted)
+  
+  return(list(PIP = output[[1]],
+              states = list(start = output[[2]],
+                            sample_weights = output[[3]],
+                            indices_sequence = output[[4]])))
+}