Skip to content

Commit f7ef841

Browse files
committed
merged
2 parents cea0654 + f6845a4 commit f7ef841

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+3550
-470
lines changed

.Rbuildignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,7 @@
11
^.*\.Rproj$
22
^\.Rproj\.user$
3+
^data-raw$
4+
.gitignore
5+
^\.travis\.yml$
6+
README.Rmd
7+
README.md

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
.Rhistory
33
.RData
44
.Ruserdata
5-
inst/doc
65
README.html
76
.DS_Store
7+
*.pdf
8+
*.tex
9+
vignettes/cache
10+
vignettes/gppm.bbl

.travis.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2+
3+
language: R
4+
sudo: false
5+
cache: packages

DESCRIPTION

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,28 @@
1-
Package: gppmr
2-
Title: Preliminary Implementation of GPPM
3-
Version: 0.1.0
4-
Authors@R: c(person(c("Julian", "D."), "Karch", email="[email protected]", role=c("aut", "cre", "cph")))
5-
Description: Preliminary Implementation of Gaussian Process Panel Modeling.
1+
Package: gppm
2+
Version: 0.2.0.9000
3+
Title: Gaussian Process Panel Modeling
4+
Description: Provides an implementation of Gaussian process panel modeling (GPPM), which enables: Gaussian process based modeling of longitudinal panel data (GPPM), regular Gaussian process regression with flexible model specification, and Gaussian process regression with multi-task learning.
5+
Authors@R: c(person(c("Julian", "D."), "Karch", email="[email protected]", role=c("aut", "cre", "cph")))
66
Depends:
7-
R (>= 3.3.1),
8-
OpenMx (>= 2.6.9),
7+
R (>= 3.1.0),
8+
Rcpp (>= 0.12.17)
99
Imports:
10-
MASS
11-
License: GPL-3
12-
Encoding: UTF-8
10+
rstan (>= 2.17.3),
11+
ggplot2 (>= 2.2.1),
12+
MASS (>= 7.3-49),
13+
ggthemes (>= 3.5.0),
14+
mvtnorm (>= 1.0-8),
15+
stats,
16+
methods
17+
Suggests:
18+
testthat (>= 2.0.0),
19+
knitr (>= 1.20),
20+
rmarkdown (>= 1.10),
21+
roxygen2 (>= 6.0.1)
22+
License: GPL-3 | file LICENSE
1323
LazyData: true
14-
URL: https://github.com/karchjd/gppmr
15-
BugReports: https://github.com/karchjd/gppmr/issues
24+
URL: https://github.com/karchjd/gppm
25+
BugReports: https://github.com/karchjd/gppm/issues
1626
RoxygenNote: 6.0.1
17-
Suggests: testthat (>= 1.0.2),
18-
knitr,
19-
rmarkdown,
20-
VignetteBuilder: knitr
27+
Roxygen: list(markdown = TRUE)
28+
Encoding: UTF-8

NAMESPACE

100755100644
Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,42 @@
11
# Generated by roxygen2: do not edit by hand
22

3-
export(generateAR)
4-
export(generateLGCM)
5-
export(gppFit)
6-
export(gppModel)
7-
export(simulateData)
8-
import(OpenMx)
3+
S3method(coef,GPPM)
4+
S3method(confint,GPPM)
5+
S3method(fit,GPPM)
6+
S3method(fitted,GPPM)
7+
S3method(logLik,GPPM)
8+
S3method(plot,GPPMPred)
9+
S3method(plot,LongData)
10+
S3method(predict,GPPM)
11+
S3method(print,summary.GPPM)
12+
S3method(simulate,GPPM)
13+
S3method(summary,GPPM)
14+
S3method(vcov,GPPM)
15+
export(SE)
16+
export(accuracy)
17+
export(covf)
18+
export(createLeavePersonsOutFolds)
19+
export(crossvalidate)
20+
export(datas)
21+
export(fit)
22+
export(getIntern)
23+
export(gppm)
24+
export(gppmControl)
25+
export(maxnObs)
26+
export(meanf)
27+
export(nObs)
28+
export(nPars)
29+
export(nPers)
30+
export(nPreds)
31+
export(parEsts)
32+
export(pars)
33+
export(preds)
34+
import(Rcpp)
35+
import(ggplot2)
36+
import(ggthemes)
37+
import(rstan)
38+
import(stats)
39+
importFrom(MASS,mvrnorm)
40+
importFrom(methods,is)
41+
importFrom(mvtnorm,dmvnorm)
42+
importFrom(utils,capture.output)

R/FitRes.R

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
new_FitRes <- function(paraEsts,vcov,LL,nPar,mu,Sigma){
2+
stopifnot(is.double(paraEsts)) #maximum likelihood estimates
3+
stopifnot(is.matrix(vcov)) #covariance of sampling distribution of maximum likelihood estimator
4+
stopifnot(is.double(LL) && length(LL)==1)
5+
stopifnot(is.integer(nPar) && length(nPar)==1)
6+
stopifnot(is.list(mu))
7+
stopifnot(is.list(Sigma))
8+
9+
structure(list(
10+
paraEsts=paraEsts,
11+
vcov=vcov,
12+
LL=LL,
13+
nPar=nPar,
14+
mu=mu,
15+
Sigma=Sigma
16+
),
17+
class='StanData'
18+
)
19+
}
20+
21+
extractMoments <- function(stanOutParas,dataStats){
22+
mu <- rep(list(numeric(dataStats$maxTime)),dataStats$nPer)
23+
Sigma <- rep(list(matrix(nrow=dataStats$maxTime,ncol = dataStats$maxTime)),dataStats$nPer)
24+
for (iPer in seq_len(dataStats$nPer)){
25+
mu[[iPer]] <- stanOutParas$mu[iPer,1:dataStats$nTime[iPer]]
26+
Sigma[[iPer]] <- stanOutParas$Sigma[iPer,1:dataStats$nTime[iPer],1:dataStats$nTime[iPer]]
27+
}
28+
res <- list(mu=mu,Sigma=Sigma,IDs=attr(dataStats,'IDs'))
29+
}
30+
31+
32+
extractLL <- function(meanCov,dataStats){
33+
Y <- dataStats$Y
34+
ll <- 0
35+
for (i in 1:length(Y)){
36+
ll <- ll + mvtnorm::dmvnorm(Y[[i]][1:dataStats$nTime[i]],mean=meanCov$mu[[i]],sigma=as.matrix(meanCov$Sigma[[i]]),log=TRUE)
37+
}
38+
ll
39+
}
40+
41+
extractFitRes <- function(stanOut,parsedModel,dataStats){
42+
paraEsts <- stanOut$par[parsedModel$params]
43+
theNames <- names(paraEsts)
44+
paraEsts <- as.numeric(paraEsts)
45+
names(paraEsts) <- theNames
46+
vcov <- as.matrix(NA)
47+
if (!is.null(stanOut$hessian)){
48+
tryCatch(vcov <- solve(-stanOut$hessian),error=function(e){})
49+
}
50+
51+
nPar <- length(parsedModel$params)
52+
meanCov <- extractMoments(stanOut$par,dataStats)
53+
LL <- extractLL(meanCov,dataStats)
54+
mu=meanCov$mu
55+
Sigma=meanCov$Sigma
56+
new_FitRes(paraEsts,vcov,LL,nPar,mu,Sigma)
57+
}

R/GPPM.R

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
new_GPPM <- function(mFormula,cFormula,myData,control){
2+
stopifnot(is.character(mFormula))
3+
stopifnot(is.character(cFormula))
4+
stopifnot(is.data.frame(myData))
5+
stopifnot(class(control)=='GPPMControl')
6+
7+
structure(list(
8+
mFormula=mFormula, #formula for the mean
9+
cFormula=cFormula, #formula for the covariance
10+
data=myData, #data must be a data frame
11+
control=control, #list of controls
12+
parsedModel=NA, #model in a parsed format
13+
dataForStan=NA, #data as used for stan
14+
stanModel=NA, #generated stan Model
15+
stanOut =NA, #stan output
16+
fitRes=NA #all the fitting results
17+
),class='GPPM')
18+
}
19+
20+
21+
#' Define a Gaussian process panel model
22+
#'
23+
#' This function is used to specify a Gaussian process panel model (GPPM),
24+
#' which can then be fit using \code{\link{fit.GPPM}}.
25+
#'
26+
#' @param mFormula character string. Contains the specification of the mean function. See details for more information.
27+
#'
28+
#' @param cFormula character string. Contains the specification of the covariance function. See details for more information.
29+
#'
30+
#' @param myData data frame. Contains the data to which the model is fitted. Must be in the long-format.
31+
#'
32+
#' @param ID character string. Contains the column label in myData which describes the subject ID.
33+
#'
34+
#' @param DV character string. Contains the column label in myData which contains the to be modeled variable.
35+
#'
36+
#' @param control object of class GPPMControl. Used for storing technical settings. Default should only be changed by advanced users. Generated via \code{\link{gppmControl}}.
37+
#'
38+
#' @return A (unfitted) Gaussian process panel model, which is an object of class 'GPPM'
39+
#' @details
40+
#' mFormula and cFormula contain the specification of the mean and the covariance function respectively.
41+
#' These formulas are defined using character strings. Within these strings there are four basic elements:
42+
#' \itemize{
43+
#' \item Parameters
44+
#' \item Functions and operators
45+
#' \item References to observed variables in the data frame myData
46+
#' \item Mathematical constants
47+
#' }
48+
#' The gppm function automatically recognizes which part of the string refers to which elements. To be able to do this certain relatively common rules need to be followed:
49+
#'
50+
#' Parameters: Parameters may not have the same name as any of the columns in myData to avoid confusing them with a reference to an observed variable.
51+
#' Furthermore, to avoid confusing them with functions, operators, or constants, parameter labels must always begin with a lower case letter and only contain letters and digits.
52+
#'
53+
#' Functions and operators: All functions and operators that are supported by stan can be used; see \url{http://mc-stan.org/users/documentation/} for a full list. In general, all basic operators and functions are supported.
54+
#'
55+
#' References: A reference must be the same as one of the elements of the output of \code{names(myData)}. For references, the same rules apply as for parameters. That is, the column names of myData may only contain letters and digits and must start with a letter.
56+
#'
57+
#' Constants: Again, all constants that are supported by stan can be used and in general the constants are available by their usual name.
58+
#' @seealso \code{\link{fit.GPPM}} for how to fit a GPPM
59+
#' @examples
60+
#' # Defintion of a latent growth curve model
61+
#' \dontrun{
62+
#' data("demoLGCM")
63+
#' lgcm <- gppm('muI+muS*t','varI+covIS*(t+t#)+varS*t*t#+(t==t#)*sigma',
64+
#' demoLGCM,'ID','y')
65+
#' }
66+
#' @import rstan
67+
#' @import ggplot2
68+
#' @import ggthemes
69+
#' @import Rcpp
70+
#' @importFrom mvtnorm dmvnorm
71+
#' @import stats
72+
#' @importFrom MASS mvrnorm
73+
#' @importFrom methods is
74+
#' @importFrom utils capture.output
75+
#' @export
76+
gppm <- function(mFormula,cFormula,myData,ID,DV,control=gppmControl()){
77+
myData <- as_LongData(myData,ID,DV)
78+
validate_gppm(mFormula,cFormula,myData,control)
79+
80+
theModel <- new_GPPM(mFormula,cFormula,myData,control)
81+
theModel$dataForStan <- as_StanData(myData)
82+
theModel$parsedModel <- parseModel(theModel$mFormula,theModel$cFormula,theModel$dataForStan)
83+
theModel$stanModel <- toStan(theModel$parsedModel,control)
84+
return(theModel)
85+
}
86+
87+
subsetData <- function(gpModel,rowIdxs){
88+
newModel <- gpModel
89+
newModel$data <- gpModel$data[rowIdxs,]
90+
newModel$dataForStan <- as_StanData(newModel$data)
91+
return(newModel)
92+
}
93+
94+
updateData <- function(gpModel,newData){
95+
newModel <- gpModel
96+
oldData <- datas(newModel)
97+
stopifnot(identical(names(oldData),names(newData)))
98+
newModel$data <- as_LongData(newData,getID(oldData),getDV(oldData))
99+
newModel$dataForStan <- as_StanData(newModel$data)
100+
return(newModel)
101+
}
102+
103+
validate_gppm <- function(mFormula,cFormula,myData,control){
104+
ID <- attr(myData,'ID')
105+
DV <- attr(myData,'DV')
106+
stopifnot(!is.null(ID))
107+
stopifnot(!is.null(DV))
108+
#type checks
109+
if (!is.character(mFormula)){
110+
stop('mFormula must contain a string')
111+
} else if(length(mFormula)!=1){
112+
113+
}
114+
115+
if (!(is.character(cFormula)&& length(cFormula)==1)){
116+
stop('cFormula must contain a string')
117+
}
118+
119+
if (!is.data.frame(myData)){
120+
stop('myData must be a data frame')
121+
}
122+
123+
if (!(is.character(ID))){
124+
browser()
125+
stop('ID must contain a string')
126+
}
127+
128+
if (!is.character(DV)){
129+
stop('DV must contain a string')
130+
}
131+
132+
if (!class(control)=='GPPMControl'){
133+
stop('control must be of class GPPMControl')
134+
}
135+
136+
varNames <- names(myData)
137+
allValid <- grepl("^[A-Za-z]+[0-9A-Za-z]*$",varNames)
138+
if (any(!allValid)){
139+
stop(sprintf('Invalid variable name %s in your data frame. See ?gppm for naming conventions\n',varNames[!allValid]))
140+
}
141+
142+
143+
if (!"longData" %in% class(myData)){
144+
if (!ID %in% names(myData)){
145+
stop(sprintf('ID variable %s not in data frame',ID))
146+
}
147+
148+
if (!DV %in% names(myData)){
149+
stop(sprintf('DV variable %s not in data frame',DV))
150+
}
151+
}
152+
}
153+
154+
155+

R/GPPMControl.R

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
new_GPPMControl <- function(stanModel){
2+
stopifnot(is.logical(stanModel))
3+
4+
5+
structure(list(
6+
stanModel=stanModel #generate stanModel Object or not
7+
),class='GPPMControl')
8+
}
9+
10+
#' Define settings for a Gaussian process panel model
11+
#'
12+
#' This function is used to specify the settings of a Gaussian process panel model generated by \code{\link{gppm}}.
13+
#'
14+
#' @param stanModel boolean. Should the corresponding stan model be created? Should only be set to FALSE for testing purposes.
15+
#' Not creating the stan model makes model fitting impossible but saves a lot of time.
16+
17+
#' @return Settings for a Gaussian process panel model in an object of class 'GPPMControl'
18+
#' @seealso \code{\link{gppm}}
19+
#' @export
20+
gppmControl <- function(stanModel=TRUE){
21+
new_GPPMControl(stanModel)
22+
}

R/LongData.R

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
new_LongData <- function(myData,ID,DV){
2+
stopifnot(is.character(ID) && length(ID)==1)
3+
stopifnot(is.character(DV) && length(DV)==1)
4+
stopifnot(is.data.frame(myData))
5+
6+
structure(myData,class=c('LongData',class(myData)),ID=ID,DV=DV)
7+
}
8+
9+
as_LongData <- function(myData,ID,DV){
10+
if (!"LongData" %in% class(myData)){
11+
myData <- new_LongData(myData,ID,DV)
12+
}else if (!missing(ID)){
13+
attr(myData,'ID') <- ID
14+
}else if (!missing(DV)){
15+
attr(myData,'DV') <- DV
16+
}
17+
myData
18+
}
19+
20+
21+
getID <- function(longData){
22+
stopifnot("LongData" %in% class(longData))
23+
return(attr(longData,'ID'))
24+
}
25+
26+
getDV <- function(longData){
27+
stopifnot("LongData" %in% class(longData))
28+
return(attr(longData,'DV'))
29+
}
30+

0 commit comments

Comments
 (0)