diff --git a/DESCRIPTION b/DESCRIPTION index ac23a1570..d8a86f73b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -57,13 +57,15 @@ Description: Toolkit to perform statistical analyses of correlation Imports: abind, boot, - dplyr, + dplyr (>= 1.0.0), R6, Rcpp, stringr LinkingTo: Rcpp Suggests: + grr, + pbmcapply, minpack.lm, parallel, rhdf5, @@ -76,12 +78,13 @@ Suggests: staplr, markdown, rmarkdown, - errors + errors, + reshape2 License: GPL-3 URL: https://github.com/HISKP-LQCD/hadron BugReports: https://github.com/HISKP-LQCD/hadron/issues LazyData: true Roxygen: list(markdown = TRUE, old_usage = TRUE, r6 = FALSE) -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.0 Encoding: UTF-8 VignetteBuilder: knitr diff --git a/R/UWerr.R b/R/UWerr.R index 758c97a30..2e62fd1d7 100644 --- a/R/UWerr.R +++ b/R/UWerr.R @@ -81,6 +81,22 @@ uwerr <- function(f, data, nrep, S=1.5, pl=FALSE, ...) { } } +#' @export +apply_uwerrprimary <- function(X, S=1.5){ + stopifnot(length(dim(X)) == 2) + tmp <- t(apply( + X = X, + MARGIN = 2L, + FUN = function(x){ + uw <- uwerrprimary(data = x, S = S, pl = FALSE) + return( c(uw$value, uw$dvalue, uw$ddvalue, uw$tauint, uw$dtauint) ) + } + )) + colnames(tmp) <- c("value", "dvalue", "ddvalue", "tauint", "dtauint") + return(invisible(as.data.frame(tmp))) +} + + #' @export uwerrprimary <- function(data, nrep, S=1.5, pl=FALSE) { diff --git a/R/analysis_gradient_flow.R b/R/analysis_gradient_flow.R index 9e7a5b181..d27f6baa4 100644 --- a/R/analysis_gradient_flow.R +++ b/R/analysis_gradient_flow.R @@ -1,46 +1,75 @@ -# these should also have references -ref_gf_scales <- list() -ref_gf_scales[["sqrt_t0_Eplaq"]] <- list(val=0.1416, dval=0.0008, - label="t_0^{\\mathrm{plaq}}/a^2", - obs="tsqEplaq", - obslabel="$\\langle t^2 E_{\\mathrm{plaq}}(t) \\rangle$", - physobslabel="\\langle t_0^2 E_{\\mathrm{plaq}}(t_0) \\rangle") - -ref_gf_scales[["sqrt_t0_Esym"]] <- list(val=0.1416, dval=0.0008, - label="t_0^{\\mathrm{sym}}/a^2", - obs="tsqEsym", - obslabel="$\\langle t^2 E_{\\mathrm{sym}}(t) \\rangle$", - physobslabel="\\langle t_0^2 E_{\\mathrm{sym}}(t_0) \\rangle") - -ref_gf_scales[["w0_Wsym"]] <- list(val=0.1755, dval=0.0019, - label="(w_0^{\\mathrm{sym}}/a)^2", - obs="Wsym", - obslabel="$\\langle W_{\\mathrm{sym}}(t) \\rangle$", - physobslabel="\\langle W_{\\mathrm{sym}}(w_0^2) \\rangle") - - #' analysis_gradient_flow #' #' @param path string. path to data files #' @param outputbasename string. basename of output files #' @param basename string. basename of input files, for example "gradflow" #' @param read.data boolean. Indicates whether to read data fresh from -#' data files or to use `basename.raw.gradflow.Rdata` instead +#' data files or to use `basename.raw.gradflow.Rdata` lying in the +#' present working directory instead #' @param pl boolean. If set to `TRUE` plots will be generated #' @param skip integer. number of measurements to skip -#' @param start integer. start value for time +#' @param start integer. start value for MD time, this affects the MD time +#' indicated in the plots #' @param scale numeric. scale factor for the MD time, should be set to -#' the stridelength (in units of trajectories or configurations) -#' which was used to produce the gradient flow files, such that -#' the distance between measurements can be interpreted -#' correctly and the reported autocorrelation times scaled appropriately. +#' the stridelength (in units of trajectories or configurations) +#' which was used to produce the gradient flow files, such that +#' the distance between measurements can be interpreted +#' correctly and the reported autocorrelation times scaled appropriately. #' @param plotsize numeric. Plot sidelength, this is passed to -#' \code{tikz.init}. +#' \code{tikz.init}. #' @param dbg boolean. If set to `TRUE` debugging output will be provided. #' #' @description #' function to analyse the gradient flow output files generated by #' the tmLQCD software, see references. +#' +#' The function generates a number of output files beyond plots, much like +#' \link{analysis_online}. In these, the following correspondences of variable +#' names to observables hold: +#' +#' \itemize{ +#' \item{traj: }{Configuration or trajectory number.} +#' \item{t: }{Gradient flow time \eqn{t}.} +#' \item{P: }{Average plaquette \eqn{

}.} +#' \item{Eplaq: }{Energy density \eqn{E(t)} in the plaquette definition.} +#' \item{Esym: }{Energy density \eqn{E(t)} in the clover definition.} +#' \item{tsqEplaq: }{The observable \eqn{t^2 E(t)} used to define the scale \eqn{t_0} in the plaquette definition.} +#' \item{tsqEsym: }{The observable \eqn{t^2 E(t)} used to define the scale \eqn{t_0} in the clover definition.} +#' \item{Wsym: }{The observable \eqn{t d/dt (t^2 E(t))} used to define the scale \eqn{w_0} in the clover definition.} +#' \item{Qsym: }{The topological charge \eqn{Q(t)} in the clover definition.} +#' } +#' +#' The observables are analysed using the Gamma method (\link{uwerr}) and +#' the gradient flow scales \eqn{t_0} and \eqn{w_0} are determined. +#' +#' The generated files are: +#' \itemize{ +#' \item{gradflow.summary.Rdata: }{Named list of data frames \code{gradflow_resultsum}. +#' The names of the elements in this list are based on \code{outputbasename}. +#' The function attempts to read this file from the present working directory, +#' so as to add to the list. If the file does not exist, it is created. +#' If a name already exists in the list, it is overwritten. Each element +#' contains the central value, statistical errors in positive and negative directions and +#' the integrated autocorrelation time and its error for the observables "tsqEplaq", "tsqEsym" +#' , "Wsym" and "Qsym_maxft", where the last corresponds to the topological +#' charge at maximal flow time.} +#' \item{%s.raw.gradflow.Rdata: }{The raw data read from the gradient flow files is written +#' to this file with the pattern replaced by \code{outputbasename}. All of the observables +#' listed above are written for all trajctories and flow times.} +#' \item{%s.result.gradflow.Rdata: }{The results of the Gamma method analysis are written +#' into this file with the pattern replaced by \code{outputbasename}. The saved object +#' , \code{gradflow}, is a data frame with a column "t" and columns of the form +#' "x.y", where "x" is the observable name as given in the list above and "y" is one +#' of "value", "dvalue", "ddvalue", "tauint" and "dttauint". See \link{uwerr} for the meaning +#' of each of these.} +#' \item{%s.gradflow_results_per_meas.Rdata: }{Rather than performing the Gamma method analysis directly +#' at each flow time, it is also possible to determine the gradient flow scales +#' for each measurement and to extract the topological charge at the maximum flow time. +#' The object \code{gradflow_results_per_meas} in this file, where the pattern is replaced +#' with \code{outputbasename}, contains the results of this procedure as a data frame +#' with the columns "traj", "tsqEplaq_per_meas", "tsqEsym_per_meas", "Wsym_per_meas" +#' and "Qsym_maxft_per_meas". Note: the object is a \link{tibble}.} +#' } #' #' @references #' K. Jansen and C. Urbach, Comput.Phys.Commun. 180 (2009) 2717-2738 @@ -54,10 +83,17 @@ analysis_gradient_flow <- function(path, outputbasename, basename="gradflow", skip=0, start=0, scale=1, dbg=FALSE) { + dplyr_version_required <- "1.0.0" + dplyr_avail <- requireNamespace("dplyr", versionCheck = list(op = ">=", version = dplyr_version_required)) + if( !dplyr_avail ){ + stop(sprintf("The 'dplyr' package in version >= %s is required to use this function!\n", + dplyr_version_required)) + } + # much like for the analysis of online measurements, we store summary information # in the list "gradflow_resultsum" with elements named by "outputbasename" # such that when the analysis is rerun, the entries are replaced - resultsfile <- "gradflow.summary.RData" + resultsfile <- "gradflow.summary.Rdata" gradflow_resultsum <- list() if(file.exists(resultsfile)){ message("Loading gradient flow results database from ", resultsfile, "\n") @@ -76,10 +112,13 @@ analysis_gradient_flow <- function(path, outputbasename, basename="gradflow", t_vec <- unique(raw.gradflow$t) Ncol <- ncol(raw.gradflow) Nrow <- length(t_vec) + cnames <- colnames(raw.gradflow[,3:Ncol]) + + have_Qsym <- "Qsym" %in% cnames + # allocate some memory, for each observable, have space for the value, the error and the autocorrelation time # create a list with NULL rownams and sensible column names for this purpose subnames <- c("value","dvalue","ddvalue","tauint","dtauint") - cnames <- colnames(raw.gradflow[,3:Ncol]) outer.cnames <- t(outer(cnames,subnames,FUN=function(x,y) { paste(x,y,sep=".") })) grad.dimnames <- list() grad.dimnames[[1]] <- NULL @@ -91,6 +130,8 @@ analysis_gradient_flow <- function(path, outputbasename, basename="gradflow", summaryvec <- c(t_vec[i_row]) for(i_col in 1:length(cnames)) { obs <- uwerr.gradflow[[cnames[i_col]]] + # we apply the 'scale' factor to the integrated autocorrelation time + # in order to obtain it in the right units summaryvec <- c(summaryvec, obs$value, obs$dvalue, obs$ddvalue, obs$tauint*scale, obs$dtauint*scale) } gradflow[i_row,] <- summaryvec @@ -113,7 +154,6 @@ analysis_gradient_flow <- function(path, outputbasename, basename="gradflow", approx( x=val, y=gradflow$t, xout=0.3 )$y, approx( x=val-dval, y=gradflow$t, xout=0.3)$y ) - ## determine which discrete value of t is closest to the scale in question for( tidx in 1:length(t_vec) ){ if( t_vec[tidx] >= gf_scales[[i]][2] ){ @@ -144,13 +184,86 @@ analysis_gradient_flow <- function(path, outputbasename, basename="gradflow", gf_latspacs[[i]] <- list() for( k in 1:length(sqrt_gf_scale) ){ gf_latspacs[[i]][[k]] <- compute_ratio(dividend=ref_gf_scales[[i]], + # note that we use zero error on our scale + # as the error on the reference scale is large enough divisor=list(val=sqrt_gf_scale[k], dval=0)) } } - save(gradflow_resultsum, - file=resultsfile) + #### GF SCALE RATIOS ### + + + Wsym_idx <- which(gradflow_resultsum[[outputbasename]]$obs == "Wsym") + for( t0type in c("plaq", "sym") ){ + tsqE <- sprintf("tsqE%s", t0type) + t0lab <- sprintf("t0%s", t0type) + ratiolab <- sprintf("%s_ov_w0", t0lab) + tsqE_idx <- which(gradflow_resultsum[[outputbasename]]$obs == tsqE) + + # we don't want to bootstrap absolutely everything so we limit ourselves to the most relevant region + # and only the observables of interest + trange <- range(gradflow_resultsum[[outputbasename]]$val[ which(gradflow_resultsum[[outputbasename]]$obs == tsqE) ], + gradflow_resultsum[[outputbasename]]$val[ which(gradflow_resultsum[[outputbasename]]$obs == "Wsym") ]) + trange <- c(trange[1]*0.5, trange[2]*1.5) + gf_tseries <- subset(tseries_orig(data = dplyr::filter(dplyr::rename(raw.gradflow, md_idx = traj), + t >= trange[1] & t <= trange[2]), + explanatory_vars = c("t")), + subset = c(tsqE, "Wsym")) + + boot.l <- 2*ceiling(max(gradflow_resultsum[[outputbasename]]$tauint[Wsym_idx], + gradflow_resultsum[[outputbasename]]$tauint[tsqE_idx])) + boot.R <- 3*length(unique(raw.gradflow$traj)) + + gf_tseries_boot <- bootstrap.tseries(gf_tseries, + boot.R = boot.R, + boot.l = boot.l, + sim = 'geom', + endcorr = TRUE, + seed = 12345, + serial = FALSE) + + plan <- list() + plan[["t0"]] <- parse(text = sprintf("approx(x = %s, y = t, xout = 0.3)$y", tsqE) ) + plan[["w0"]] <- expression( sqrt(approx(x = Wsym, y = t, xout = 0.3)$y) ) + + gf_scales <- apply_reduce_plan.tseries(ts = gf_tseries_boot, reduce_vars = c("t"), plan = plan) + + ## now actually compute the ratio + plan <- list() + plan[["t0_ov_w0"]] <- parse(text = sprintf("t0 / w0", t0lab) ) + gf_scale_ratio <- apply_transmute_plan.tseries(ts = gf_scales, plan = plan) + + + gradflow_resultsum[[outputbasename]] <- + rbind(gradflow_resultsum[[outputbasename]], + data.frame(obs = ratiolab, + val = gf_scale_ratio$central$t0_ov_w0, + pdval = gf_scale_ratio$se$t0_ov_w0, + mdval = gf_scale_ratio$se$t0_ov_w0, + # since we're working from bootstrap samples, we don't calculate + # the autocorrelation times here + tauint = NA, + dtauint = NA + ) + ) + } + + if( have_Qsym ){ + n <- length(gradflow$t) + gradflow_resultsum[[outputbasename]] <- + rbind(gradflow_resultsum[[outputbasename]], + data.frame(obs = "Qsym_maxft", + val = gradflow$Qsym.value[n], + pdval = gradflow$Qsym.dvalue[n], + mdval = gradflow$Qsym.dvalue[n], + # scale factor has already been applied above! + tauint = gradflow$Qsym.tauint[n], + dtauint = gradflow$Qsym.dtauint[n] + ) + ) + } + save(gradflow_resultsum, file=resultsfile) if(pl) { tikzfiles <- tikz.init(basename=sprintf("%s.gradflow",outputbasename),width=plotsize,height=plotsize) @@ -193,7 +306,7 @@ analysis_gradient_flow <- function(path, outputbasename, basename="gradflow", digits=2, with.dollar=FALSE, with.cdot=FALSE) ), - sprintf("$a=%s$\\,fm", + sprintf("$a \\sim %s$\\,fm ($N_f = 2+1$)", tex.catwitherror(x=gf_latspac[[2]]$val, dx=sqrt( (0.5* ( abs( gf_latspac[[3]]$val-gf_latspac[[2]]$val) + @@ -206,32 +319,30 @@ analysis_gradient_flow <- function(path, outputbasename, basename="gradflow", ), bty='n') - ### plot MD history of the scale observable at the value of t closest to the scale - approx_idx <- which(raw.gradflow$t==gf_approx_scales[[i]]$t) - tseries <- data.frame(y=raw.gradflow[approx_idx,scale_obs], - t=start + c( skip :(skip + - length(raw.gradflow[approx_idx,scale_obs]) - 1 ) )*scale ) - plot_timeseries(dat=tseries, - ylab=sprintf("%s$|_{t/a^2 = %.2f}$", - scale_obslabel, - gf_approx_scales[[i]]$t), - titletext="") + ### plot MD history of the scale observable + scale_obs_per_meas <- dplyr::pull(gradflow_results_per_meas, paste(scale_obs, "_per_meas", sep = "") ) + tseries <- data.frame(y = scale_obs_per_meas, + t = start + c( skip :(skip + + length(scale_obs_per_meas) - 1) )*scale ) + scale_obs_uwerr <- plot_timeseries(dat=tseries, + ylab=sprintf("$%s$", scale_label), + titletext="") # indicate integrated autocorrelation time in scaled units legend(x="topleft", bty='n', pch=NA, - legend=sprintf("$\\tau_{\\mathrm{int}}($ %s $) = %s$ traj.", - scale_obslabel, - tex.catwitherror(x=gf_approx_scales[[i]]$tauint, - dx=gf_approx_scales[[i]]$dtauint, - digits=2, + legend=sprintf("$\\tau_{\\mathrm{int}}( %s ) = %s$ traj.", + scale_label, + tex.catwitherror(x=scale_obs_uwerr["tauint",1] * scale, + dx=scale_obs_uwerr["dtauint",1] * scale, + digits=1, with.dollar=FALSE, with.cdot=FALSE) ) ) } - if( any(cnames == "Qsym") ){ + if( have_Qsym ){ ################ TOPOLOGICAL CHARGE #################### # set up plot plot(x=gradflow$t, @@ -271,9 +382,10 @@ analysis_gradient_flow <- function(path, outputbasename, basename="gradflow", with.dollar=FALSE, with.cdot=FALSE) ) ) - } + } # if( have_Qsym ) tikz.finalize(tikzfiles) - } + } # if(pl) } + diff --git a/R/analysis_online.R b/R/analysis_online.R index 85394e36f..82a198218 100644 --- a/R/analysis_online.R +++ b/R/analysis_online.R @@ -36,7 +36,8 @@ append_pdf_filename <- function(basename, pdf_filenames){ #' @param mudelta numeric. splitting 1+1 sea twisted quark mass #' @param muh numeric. "heavy" twisted mass in the case of a `n_f=2+2` run #' @param addon string. addon to output filenames -#' @param skip integer. number of initial measurements to skip in analysis +#' @param skip_output_data integer. number of lines to skip in reading of `output.data`, usually not necessary +#' @param traj_from integer. first trajectory id to be considered in the analysis #' @param rectangle boolean. If true, rectangle plaquettes are analysed #' @param plaquette boolean. If true, square plaquettes are analysed #' @param dH boolean. If true, delta H is analysed @@ -57,13 +58,22 @@ append_pdf_filename <- function(basename, pdf_filenames){ #' will include `stat_skip` measurements, #' but these will be skipped in the corresponding statistical analysis. #' This maybe useful, for example, to visualise thermalisation. +#' @param omeas.offset integer. Offset to be added to the trajectory counter for the online measurements. +#' This is useful, for example, when `output.data` contains thermalisation +#' but online measurements were only performed once thermalised +#' with a starting configuration filename of `conf.xxxx`, where `xxxx` +#' gets translated to the trajectory ID for the online measurement +#' and thus goes out of step with the trajectory ID of the data in +#' `output.data`. #' @param omeas.samples integer. number of stochastic samples per online measurement #' @param omeas.stride integer. stride length in the reading of online measurements #' @param omeas.avg integer. Block average over this many subsequent measurements. #' @param omeas.stepsize integer. Number of trajectories between online measurements. Autocorrelation #' times of online measurement data will be scaled by this factor. -#' @param evals.stepsize integer. Numer of trajectories between (strange-charm Dirac opertoar) eigenvalue measurements. +#' @param evals.stepsize integer. Number of trajectories between (strange-charm Dirac operator) eigenvalue measurements. #' Autocorrelation times of eigenvalues will be scaled by this factor. +#' @param trajectory_length numeric. Trajectory length. All autocorrelation times will be scaled by this factor +#' such that they are expressed in terms of unit length trajectories. #' @param boot.R integer. number of bootstrap samples to use in bootstrap-based parts of analysis. #' @param boot.l integer. bootstrap block size #' @param outname_suffix string. suffix for output files @@ -88,13 +98,15 @@ append_pdf_filename <- function(basename, pdf_filenames){ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, cg_col, evals_id, rundir, cg.ylim, type="", csw=0, musigma=0, mudelta=0, muh=0, addon="", - skip=0, rectangle=TRUE, + skip_output_data=0, traj_from = 0, rectangle=TRUE, plaquette=TRUE, dH=TRUE, acc=TRUE, trajtime=TRUE, omeas=TRUE, plotsize=5, debug=FALSE, trajlabel=FALSE, title=FALSE, pl=FALSE, method="uwerr", fit.routine="optim", oldnorm=FALSE, S=1.5, - stat_skip=0, omeas.samples=1, omeas.stride=1, omeas.avg=1, + stat_skip=0, + omeas.offset = 0, omeas.samples=1, omeas.stride=1, omeas.avg=1, omeas.stepsize=1, evals.stepsize=1, + trajectory_length=1.0, boot.R=1500, boot.l=2, outname_suffix="", verbose=FALSE) { @@ -120,7 +132,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, } # store analysis results in practical R format, replacing entries as new data is added - resultsfile <- "omeas.summary.RData" + resultsfile <- "omeas.summary.Rdata" resultsum <- list() if(file.exists(resultsfile)){ @@ -134,7 +146,10 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, # set up data structure for analysis results result <- list(params=data.frame(L=L,Time=Time,t1=t1,t2=t2,type=type,beta=beta,kappa=kappa,csw=csw, mul=mul,muh=muh,boot.l=boot.l,boot.R=boot.R, - musigma=musigma,mudelta=mudelta,N.online=0,N.plaq=0,skip=skip, + musigma=musigma,mudelta=mudelta,N.online=0,N.plaq=0, + skip_output_data=skip_output_data, traj_from=traj_from, + omeas.offset=omeas.offset, + omeas.samples=omeas.samples, omeas.avg=omeas.avg, stat_skip=stat_skip,stringsAsFactors=FALSE), obs=data.frame(mpcac_fit=navec, mpcac_mc=navec, @@ -152,6 +167,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, errorband_color <- rgb(0.6,0.0,0.0,0.6) errorband_color2 <- rgb(0.0,0.0,0.6,0.6) + errorband_color3 <- rgb(0.0,0.6,0.0,0.6) if(missing(rundir)){ rundir <- construct_onlinemeas_rundir(type=type,beta=beta,L=L,Time=Time,kappa=kappa,mul=mul, @@ -179,21 +195,22 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, # read online measurements # get all omeas files that exist (hopefully in a consistent stepping) omeas.files <- getorderedfilelist(path=rundir, basename="onlinemeas", last.digits=6) - # extract the trajectory numbers - omeas.cnums <- getorderedconfignumbers(path=rundir, basename="onlinemeas", last.digits=6) - # when the online measurements start later than traj 0 and we want to skip 'skip' - # trajectories, the following should correspond to the correact indexing - omeas.idx <- which(omeas.cnums > skip) - + # extract the trajectory numbers, taking into account a possible offset between + # online measurements and `output.data` + omeas.cnums <- getorderedconfignumbers(path=rundir, basename="onlinemeas", last.digits=6) + omeas.offset + # consider only the trajectory indices that we want to include in the analysis + omeas.idx <- which(omeas.cnums >= traj_from) + if( length(omeas.idx) < 1 ){ - stop(sprintf("After skipping %d trajectories, no online measurements are left!\n")) + stop(sprintf("Considering only trajectories with ids >= %d, no online measurements are left (omeas.offset was %d)!\n", + traj_from, omeas.offset)) } omeas.files <- omeas.files[omeas.idx] omeas.cnums <- omeas.cnums[omeas.idx] - pioncor <- readcmidatafiles( files=omeas.files, skip=0, - avg=omeas.avg, stride=omeas.stride, verbose=verbose ) + pioncor <- readcmidatafiles(files=omeas.files, skip=0, + avg=omeas.avg, stride=omeas.stride, verbose=verbose ) # when dealing with multi-sample online measurements, we need to thin out # the configuration numbers extracted above by stepping through them @@ -227,8 +244,8 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, if(trajlabel){ filelabel <- sprintf("%s_traj%06d-%06d",filelabel,min(omeas.cnums),max(omeas.cnums)) } - message("Writing online measurements RData to ", sprintf("onlineout.%s.RData",filelabel), "\n") - save(onlineout,file=sprintf("onlineout.%s.RData",filelabel)) + message("Writing online measurements Rdata to ", sprintf("onlineout.%s.Rdata",filelabel), "\n") + save(onlineout,file=sprintf("onlineout.%s.Rdata",filelabel)) plotcounter <- plotcounter+1 dpaopp_filename <- sprintf("%02d_dpaopp_%s",plotcounter,filelabel) @@ -237,18 +254,17 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, result$obs$mpcac_mc <- plot_timeseries(dat=data.frame(y=onlineout$MChist.dpaopp, t=omeas.cnums), stat_range=c( stat_skip+1, length(onlineout$MChist.dpaopp) ), + time_factor=omeas.stepsize*trajectory_length, pdf.filename=dpaopp_filename, ylab="$am_\\mathrm{PCAC}$", name="am_PCAC (MC history)", plotsize=plotsize, filelabel=filelabel, titletext=titletext, + uwerr.S = S, errorband_color=errorband_color, smooth_density = TRUE) - # adjust autocorrelation times to be in terms of trajectories - result$obs$mpcac_mc[3:5] <- result$obs$mpcac_mc[3:5]*omeas.stepsize - lengthdpaopp <- length(onlineout$MChist.dpaopp) mindpaopp <- min(onlineout$MChist.dpaopp) maxdpaopp <- max(onlineout$MChist.dpaopp) @@ -274,19 +290,23 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, ylab="$am_\\mathrm{PCAC}$", xlab="$t/a$", main=titletext) - rect(xleft=t1, - xright=t2, - ytop=mpcac_fit$val+mpcac_fit$dval, - ybottom=mpcac_fit$val-mpcac_fit$dval,border=FALSE,col=errorband_color) + if( mpcac_fit$dval < 0.5 ){ + rect(xleft=t1, + xright=t2, + ytop=mpcac_fit$val+mpcac_fit$dval, + ybottom=mpcac_fit$val-mpcac_fit$dval,border=FALSE,col=errorband_color) + } abline(h=mpcac_fit$val,col="red",lwd=2) - rect(xleft=t1, - xright=t2, - ytop=result$obs$mpcac_mc["val",]+result$obs$mpcac_mc["dval",], - ybottom=result$obs$mpcac_mc["val",]-result$obs$mpcac_mc["dval",],border=FALSE,col=errorband_color2) + if( result$obs$mpcac_mc["dval",] < 0.5 ){ + rect(xleft=t1, + xright=t2, + ytop=result$obs$mpcac_mc["val",]+result$obs$mpcac_mc["dval",], + ybottom=result$obs$mpcac_mc["val",]-result$obs$mpcac_mc["dval",],border=FALSE,col=errorband_color2) + } abline(h=result$obs$mpcac_mc["val",],lwd=2,col="blue") - legend(x="topright", bty='n', lty=1, lwd=4, col=c("red","blue"), + legend(x="topright", bty='n', lty=1, lwd=4, col=c("red","blue"), cex = 0.8, legend=c("$ M_\\mathrm{PS} |G_A| / 2|G_P| $ from 3-param ground-state fit", - sprintf("$ \\partial_0 \\langle A_0 P \\rangle / 2 \\langle P P \\rangle $ averaged from t=%d to t=%d",t1,t2) ) + sprintf("$ \\partial_0 \\langle A_0 P \\rangle / 2 \\langle P P \\rangle $ averaged from t=%d to t=%d",t1,t2)) ) tikz.finalize(tikzfiles) @@ -304,39 +324,54 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, # we deal with it here ploterror <- try(plotwitherror(x=onlineout$effmass$t, y=onlineout$effmass$m,dy=onlineout$effmass$dm,t='p', - ylab="$aM_\\mathrm{PS}$", + ylab="$aM_{\\mathrm{PS}}^{\\mathrm{eff}}$", xlab="$t/a$", main=titletext),silent=FALSE) # and plot without errors if required if(inherits(ploterror,"try-error")) { plot(x=onlineout$effmass$t,y=onlineout$effmass$m) } - rect(xleft=t1, - xright=t2, - ytop=onlineout$uwerrresultmps$value[1]+onlineout$uwerrresultmps$dvalue[1], - ybottom=onlineout$uwerrresultmps$value[1]-onlineout$uwerrresultmps$dvalue[1], - border=FALSE, - col=errorband_color) - abline(h=onlineout$uwerrresultmps$value[1],col="black") + if( abs(onlineout$uwerrresultmps$dvalue[1]) < 5*abs(onlineout$uwerrresultmps$value[1]) ){ + rect(xleft=t1, + xright=t2, + ytop=onlineout$uwerrresultmps$value[1]+onlineout$uwerrresultmps$dvalue[1], + ybottom=onlineout$uwerrresultmps$value[1]-onlineout$uwerrresultmps$dvalue[1], + border=FALSE, + col=errorband_color) + } + if( abs(onlineout$uwerrresultpp$dvalue[1]) < 5*abs(onlineout$uwerrresultpp$value[1]) ){ + rect(xleft=t1, + xright=t2, + ytop=onlineout$uwerrresultpp$value[1]+onlineout$uwerrresultpp$dvalue[1], + ybottom=onlineout$uwerrresultpp$value[1]-onlineout$uwerrresultpp$dvalue[1], + border=FALSE, + col=errorband_color2) + } + abline(h=onlineout$uwerrresultmps$value[1],col="red") + abline(h=onlineout$uwerrresultpp$value[1],col="blue") + legend(x="topright", bty='n', lty=1, lwd=4, col=c("red","blue"), cex = 0.8, + legend=c("$aM_{\\mathrm{PS}}$ from 3-param fit to PP and PA correls", + "$aM_{\\mathrm{PS}}$ from 2-param fit to PP correl only") + ) tikz.finalize(tikzfiles) - result$obs$mpi <- t(data.frame(val=abs(onlineout$fitresultpp$par[2]), - dval=onlineout$uwerrresultmps$dvalue[1], - tauint=onlineout$uwerrresultmps$tauint[1]*omeas.stepsize, - dtauint=onlineout$uwerrresultmps$dtauint[1]*omeas.stepsize, - Wopt=onlineout$uwerrresultmps$Wopt[[1]]*omeas.stepsize, stringsAsFactors=FALSE) ) + result$obs$mpi <- t(data.frame(val=onlineout$uwerrresultmps$value[1], + dval=onlineout$uwerrresultmps$dvalue[1], + tauint=onlineout$uwerrresultmps$tauint[1]*omeas.stepsize*trajectory_length, + dtauint=onlineout$uwerrresultmps$dtauint[1]*omeas.stepsize*trajectory_length, + Wopt=onlineout$uwerrresultmps$Wopt[[1]]*omeas.stepsize*trajectory_length, stringsAsFactors=FALSE) ) result$obs$fpi <- t(data.frame(val=2*kappa*2*mul/sqrt(2)*abs(onlineout$fitresultpp$par[1])/ (sqrt(onlineout$fitresultpp$par[2])*sinh(onlineout$fitresultpp$par[2])), - dval=2*kappa*2*mul/sqrt(2)*onlineout$uwerrresultfps$dvalue[1], - tauint=onlineout$uwerrresultfps$tauint[1]*omeas.stepsize, - dtauint=onlineout$uwerrresultfps$dtauint[1]*omeas.stepsize, - Wopt=onlineout$uwerrresultfps$Wopt[[1]]*omeas.stepsize, stringsAsFactors=FALSE) ) + dval=2*kappa*2*mul/sqrt(2)*onlineout$uwerrresultfps$dvalue[1], + tauint=onlineout$uwerrresultfps$tauint[1]*omeas.stepsize*trajectory_length, + dtauint=onlineout$uwerrresultfps$dtauint[1]*omeas.stepsize*trajectory_length, + Wopt=onlineout$uwerrresultfps$Wopt[[1]]*omeas.stepsize*trajectory_length, stringsAsFactors=FALSE) ) if(method=="boot" | method=="all"){ mpi_ov_fpi <- onlineout$tsboot$t[,1]/(2*kappa*2*mul/ sqrt(2)*(onlineout$tsboot$t[,3]/(sinh(onlineout$tsboot$t[,1])*sqrt(onlineout$tsboot$t[,1])))) - result$obs$mpi_ov_fpi <- t(data.frame(val=mean(mpi_ov_fpi), + result$obs$mpi_ov_fpi <- t(data.frame(val=result$obs$mpi[1,1]/result$obs$fpi[1,1], dval=sd(mpi_ov_fpi), tauint=NA, dtauint=NA, @@ -358,21 +393,15 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, } } # if(omeas) - # something in the skip computation is odd, let's just solve it like this - if(skip==0){ - shift <- 1 - } else { - shift <- 0 - } - outdat <- NULL tidx <- NULL if( plaquette || dH || trajtime || acc ) { # read output.data - # skip 'skip' lines. Hopefully the trajectory counter doesn't start at something much larger than 0 + # skip 'skip_output_data' lines in reading of output.data. + # Hopefully the trajectory counter doesn't start at something much larger than 0 # because otherwise we'll probably miss trajectories in the filtering below - outdat <- read.table(outfile, skip=skip, fill=TRUE) + outdat <- read.table(outfile, skip=skip_output_data, fill=TRUE) no_rows <- nrow(outdat) # count the number of columns in the penultimate line of output.data @@ -380,8 +409,8 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, # (when the mass preconditioning is changed, # the number of columns may change so we need to be able to deal with that) no_columns <- max(count.fields(outfile, - skip=skip+no_rows-1)) - + skip=skip_output_data+no_rows-1)) + # restrict any outliers in outdat. Inconsistent lines will almost certainly # be filtered out below outdat <- outdat[,1:no_columns] @@ -408,7 +437,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, # restrict to the lines which are definitely the trajectories we want to # analyse - tidx <- which(outdat$traj > skip) + tidx <- which(outdat$traj >= traj_from) if(debug) print(outdat) } @@ -422,12 +451,14 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, result$obs$P <- plot_timeseries(dat=data.frame(y=outdat$P[tidx], t=outdat$traj[tidx]), stat_range=c( 1+stat_skip, length( tidx ) ), + time_factor=trajectory_length, pdf.filename=plaquette_filename, ylab="$ \\langle P \\rangle$" , name="plaquette", plotsize=plotsize, filelabel=filelabel, titletext=titletext, + uwerr.S = S, errorband_color=errorband_color, smooth_density = TRUE) } @@ -440,6 +471,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, result$obs$dH <- plot_timeseries(dat=data.frame(y=outdat$dH[tidx], t=outdat$traj[tidx]), stat_range=c( 1+stat_skip, length(tidx) ), + time_factor=trajectory_length, pdf.filename=dH_filename, ylab="$ \\delta H $", name="dH", @@ -448,6 +480,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, titletext=titletext, errorband_color=errorband_color, smooth_density = TRUE, + uwerr.S = S, ylim=c(-2,3), hist.probs=c(0.01,0.99)) @@ -459,6 +492,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, result$obs$expdH <- plot_timeseries(dat=data.frame(y=outdat$expdH[tidx], t=outdat$traj[tidx]), stat_range=c( 1+stat_skip, length(tidx) ), + time_factor=trajectory_length, pdf.filename=expdH_filename, ylab="$ \\exp(-\\delta H) $", name="exp(-dH)", @@ -468,6 +502,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, errorband_color=errorband_color, hist.xlim=c(-2,4), smooth_density = TRUE, + uwerr.S = S, ylim=c(0,6), hist.probs=c(0.01,0.99)) } @@ -480,6 +515,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, result$obs$CG.iter <- plot_timeseries(dat=data.frame(y=outdat[tidx,cg_col], t=outdat$traj[tidx]), stat_range=c( 1+stat_skip, length(tidx) ), + time_factor=trajectory_length, pdf.filename=cg_filename, ylab="$N^\\mathrm{iter}_\\mathrm{CG}$", name="CG iterations", @@ -487,6 +523,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, filelabel=filelabel, titletext=titletext, errorband_color=errorband_color, + uwerr.S = S, smooth_density = TRUE ) } @@ -501,19 +538,17 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, col.names=c("traj","min_ev","max_ev","ev_range_min","ev_range_max") ), error=function(e){ stop(sprintf("Reading of %s failed!",ev_filename)) } ) - eval.tidx <- which(evaldata$traj > skip) + eval.tidx <- which(evaldata$traj >= traj_from) temp <- plot_eigenvalue_timeseries(dat=evaldata[eval.tidx,], stat_range=c( 1+stat_skip, length(eval.tidx) ), + time_factor=evals.stepsize*trajectory_length, pdf.filename = ev_pdf_filename, ylab = "eigenvalue", plotsize=plotsize, filelabel=filelabel, titletext=titletext, errorband_color=errorband_color ) - temp$obs$mineval[3:5] <- temp$mineval[3:5]*evals.stepsize - temp$obs$maxeval[3:5] <- temp$maxeval[3:5]*evals.stepsize - result$obs$mineval <- temp$mineval result$obs$maxeval <- temp$maxeval } @@ -527,6 +562,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, result$obs$accrate <- plot_timeseries(dat=data.frame(y=outdat$acc[tidx], t=outdat$traj[tidx]), stat_range=c( 1+stat_skip, length(tidx) ), + time_factor=trajectory_length, pdf.filename=accrate_filename, ylab="Accept / Reject" , name="accrate", @@ -534,6 +570,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, filelabel=filelabel, titletext=titletext, errorband_color=errorband_color, + uwerr.S = S, hist.by=0.5, smooth_density = FALSE) } @@ -547,6 +584,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, result$obs$trajtime <- plot_timeseries(data.frame(y=outdat$trajtime[tidx], t=outdat$traj[tidx]), stat_range=c( 1+stat_skip, length(tidx) ), + time_factor=trajectory_length, pdf.filename=trajtime_filename, ylab="Traj. time [s]" , name="trajtime", @@ -555,6 +593,7 @@ analysis_online <- function(L, Time, t1, t2, beta, kappa, mul, titletext=titletext, errorband_color=errorband_color, smooth_density = TRUE, + uwerr.S = S, hist.probs = c(0.01,0.99)) } diff --git a/R/analysis_tmlqcd_gradient_flow.R b/R/analysis_tmlqcd_gradient_flow.R new file mode 100644 index 000000000..fb4a7b301 --- /dev/null +++ b/R/analysis_tmlqcd_gradient_flow.R @@ -0,0 +1,751 @@ +#' analysis_tmlqcd_gradient_flow +#' +#' @param path string. path to data files +#' @param outputbasename string. basename of output files +#' @param basename string. basename of input files, for example "gradflow" +#' @param read.data boolean. Indicates whether to read data fresh from +#' data files or to use `basename.raw.gradflow.Rdata` lying in the +#' present working directory instead +#' @param make_plots boolean. If set to `TRUE` plots will be generated +#' @param skip integer. number of measurements to skip +#' @param start integer. start value for MD time, this affects the MD time +#' indicated in the plots +#' @param md_scalefac numeric. scale factor for the MD time, should be set to +#' the stridelength (in units of trajectories or configurations) +#' which was used to produce the gradient flow files, such that +#' the distance between measurements can be interpreted +#' correctly and the reported autocorrelation times scaled appropriately. +#' @param: md_idx_scalefac integer. scale factor for the measurement indices +#' when, for instance, configurations are saved every n'th trajectory +#' and subsequently the gradient flow measurement is performed on these +#' configurations, the measurements will be indexed by the configuration +#' index. `md_idx_scalefac` allows the measurements to be reindexed +#' on all plots. For a run where every n'th trajectory was stored, +#' `md_idx_scalefac = n` would thus be used. +#' @param do_bootstrap boolean. If set to `TRUE`, the error analysis will +#' be performed using the stationary bootstrap in addition to the Gamma method. +#' To correctly account for autocorrelations, the block length is chosen +#' automatically from the estimate of the integrated autocorrelation time +#' obtained through the Gamma method, choosing twice the longest observed +#' autocorrelation time amongst all GF observables. +#' The number of bootstrap samples is set to thrice the number of measurements. +#' @param plotsize numeric. Plot sidelength, this is passed to +#' \code{tikz.init}. +#' @param scale_definition numeric. Default \eqn{0.3}. This is the value which is used to define +#' the gluonic scales. For example, for \eqn{t_0}, \eqn{t^2 E = 0.3} is used and this parameter +#' can be used to set this to any other value. +#' +#' +#' @description +#' function to analyse the gradient flow output files generated by +#' the tmLQCD software, see references. +#' = +#' The function generates a number of output files beyond plots, much like +#' \link{analysis_online}. In these, the following correspondences of variable +#' names to observables hold: +#' +#' \itemize{ +#' \item{traj: }{Configuration or trajectory number.} +#' \item{t: }{Gradient flow time \eqn{t}.} +#' \item{P: }{Average plaquette \eqn{

}.} +#' \item{Eplaq: }{Energy density \eqn{E(t)} in the plaquette definition.} +#' \item{Esym: }{Energy density \eqn{E(t)} in the clover definition.} +#' \item{tsqEplaq: }{The observable \eqn{t^2 E(t)} used to define the scale \eqn{t_0} in the plaquette definition.} +#' \item{tsqEsym: }{The observable \eqn{t^2 E(t)} used to define the scale \eqn{t_0} in the clover definition.} +#' \item{Wsym: }{The observable \eqn{t d/dt (t^2 E(t))} used to define the scale \eqn{w_0} in the clover definition.} +#' \item{Qsym: }{The topological charge \eqn{Q(t)} in the clover definition.} +#' } +#' +#' The observables are analysed using the Gamma method (\link{uwerr}) and +#' the gradient flow scales \eqn{t_0} and \eqn{w_0} as well as various ratios +#' thereof are determined. +#' +#' The generated files are: +#' \itemize{ +#' \item{gradflow.summary.Rdata: }{Named list of data frames \code{gradflow_resultsum}. +#' The names of the elements in this list are based on \code{outputbasename}. +#' The function attempts to read this file from the present working directory, +#' so as to add to the list. If the file does not exist, it is created. +#' If a name already exists in the list, it is overwritten. Each element +#' contains the central value, statistical errors in positive and negative directions and +#' the integrated autocorrelation time and its error for the observables "tsqEplaq", "tsqEsym" +#' , "Wsym" and "Qsym_maxft", where the last corresponds to the topological +#' charge at maximal flow time.} +#' \item{%s.raw.gradflow.Rdata: }{The raw data read from the gradient flow files is written +#' to this file with the pattern replaced by \code{outputbasename}. All of the observables +#' listed above are written for all trajctories and flow times.} +#' \item{%s.result.gradflow.Rdata: }{The results of the Gamma method analysis are written +#' into this file with the pattern replaced by \code{outputbasename}. The saved object +#' , \code{gradflow}, is a data frame with a column "t" and columns of the form +#' "x.y", where "x" is the observable name as given in the list above and "y" is one +#' of "value", "dvalue", "ddvalue", "tauint" and "dttauint". See \link{uwerr} for the meaning +#' of each of these.} +#' \item{%s.gradflow_results_per_meas.Rdata: }{Rather than performing the Gamma method analysis directly +#' at each flow time, it is also possible to determine the gradient flow scales +#' for each measurement and to extract the topological charge at the maximum flow time. +#' The object \code{gradflow_results_per_meas} in this file, where the pattern is replaced +#' with \code{outputbasename}, contains the results of this procedure as a data frame +#' with the columns "traj", "tsqEplaq_per_meas", "tsqEsym_per_meas", "Wsym_per_meas" +#' and "Qsym_maxft_per_meas". Note: the object is a \link{tibble}.} +#' } +#' +#' @references +#' K. Jansen and C. Urbach, Comput.Phys.Commun. 180 (2009) 2717-2738 + +#' @return +#' Nothing is returned. +#' +#' @export +analysis_tmlqcd_gradient_flow <- function(path, outputbasename, basename="gradflow", + read_files = TRUE, make_plots = FALSE, plotsize = 4, + skip=0, + md_scalefac=1, md_idx_scalefac=1, + do_bootstrap = FALSE, + scale_definition = 0.3) { + + dplyr_version_required <- "1.0.0" + dplyr_avail <- requireNamespace("dplyr", versionCheck = list(op = ">=", version = dplyr_version_required)) + if( !dplyr_avail ){ + stop(sprintf("The 'dplyr' package in version >= %s is required to use this function!\n", + dplyr_version_required)) + } + + reshape2_avail <- requireNamespace("reshape2") + if( !reshape2_avail ){ + stop("The 'reshape2' package is required to use this function!\n") + } + + # much like for the analysis of online measurements, we store summary information # in the list "gradflow_resultsum" with elements named by "outputbasename" + # such that when the analysis is rerun, the entries are replaced + gf_summaries_db_file <- "gradflow.summary.Rdata" + gradflow_resultsum <- list() + if(file.exists(gf_summaries_db_file)){ + message("Loading gradient flow summary results database from ", gf_summaries_db_file, "\n") + load(gf_summaries_db_file) + } + + raw_gradflow <- read_tmlqcd_gf_data(path = path, skip = skip, + basename = basename, outputbasename = outputbasename, + read_files = read_files) + + have_Qsym <- "Qsym" %in% colnames(raw_gradflow) + + gf_scales <- lapply( + X = define_gf_scales_plan(), + FUN = function(gf_scale){ + # we need to reshape the tidy data frame into a 2D form for each observable + gf_obs_vars <- c('t', 'traj', gf_scale$obs) + gf_obs_arr <- reshape2::acast(data = dplyr::select(raw_gradflow, dplyr::one_of(gf_obs_vars)), + value.var = gf_obs_vars[3], + formula = "traj ~ t") + + gf_obs_uwerr <- apply_uwerrprimary(X = gf_obs_arr, S = 1.5) + gf_scale_uwerr <- uwerrderived(f = gf_find_intersect, + data = gf_obs_arr, + time = dimnames(gf_obs_arr)[[2]], + intersection = scale_definition) + + gf_scale_ratios_uwerr <- list() + if( !any(is.na(gf_scale$ratios)) ){ + for( ratio_i in 1:length(gf_scale$ratios) ){ + ratio_denom_vars <- c('t', 'traj', gf_scale$ratio_denom_obs[ratio_i]) + ratio_name <- gf_scale$ratios[ratio_i] + + gf_ratio_denom_obs_arr <- reshape2::acast(data = dplyr::select(raw_gradflow, dplyr::one_of(ratio_denom_vars)), + value.var = ratio_denom_vars[3], + formula = "traj ~ t") + gf_scale_ratios_uwerr[[ratio_name]] <- + uwerrderived(f = gf_scale$ratio_funs[[ratio_i]], + data = cbind(gf_obs_arr, gf_ratio_denom_obs_arr), + time = dimnames(gf_obs_arr)[[2]], + intersection1 = scale_definition, + intersection2 = scale_definition) + } + } else { + gf_scale_ratios_uwerr <- NA + } + + return( + list(obs = gf_obs_uwerr, + scale = gf_scale_uwerr, + ratios = gf_scale_ratios_uwerr) + ) + } + ) + + # find the maximum integrated autocorrelation time amongst the scales + max_scale_tauint <- 0.5 + for( i in 1:length(gf_scales) ){ + if( max_scale_tauint < gf_scales[[i]]$scale$tauint ){ + max_scale_tauint <- gf_scales[[i]]$scale$tauint + } + } + # and use it to set the average block length for bootstrapping the scale determination + # and the calculation of the scale ratios + boot.l <- 2*ceiling(max_scale_tauint) + boot.R <- 3*length(unique(raw_gradflow$traj)) + + if( do_bootstrap ){ + gf_scales_bs <- bootstrap_gf_scales_and_ratios( + raw_gradflow = raw_gradflow, + boot.R = boot.R, + boot.l = boot.l) + } else { + # build the minimum structure in case we forego the bootstrap + gf_scales_bs <- na_bs_gf_scales_and_ratios() + } + + res <- list(have_Qsym = have_Qsym, + raw_gradflow = raw_gradflow, + uw = gf_scales, + bs = gf_scales_bs) + res[["md_scalefac"]] <- md_scalefac + res[["md_idx_scalefac"]] <- md_idx_scalefac + + if( have_Qsym ){ + Qsym_arr <- reshape2::acast(data = dplyr::select(raw_gradflow, traj, t, Qsym), + value.var = "Qsym", + formula = "traj ~ t") + Qsym_uwerr <- apply_uwerrprimary(X = Qsym_arr, S = 1.5) + + res[["Qsym_uwerr"]] <- Qsym_uwerr + } + + + # now we interpolate all observables in the gradient flow to the flow times + # corresponding to the various gradient flow scales + target_ts <- list() + target_ts[["t0plaq"]] <- list(name = "t0plaq", val = (gf_scales$sqrt_t0plaq$scale$value)^2) + target_ts[["t0sym"]] <- list(name = "t0sym", val = (gf_scales$sqrt_t0sym$scale$value)^2) + target_ts[["w0"]] <- list(name = "w0", val = (gf_scales$w0$scale$value)^2 ) + target_ts[["tmax"]] <- list(name = "tmax", val = max(raw_gradflow$t) ) + res[["interpolations"]] <- gf_interpolate(raw_gradflow, target_ts) + res[["t"]] <- sort(unique(raw_gradflow$t)) + + res[["summary"]] <- summarise_gf_results(res, md_scalefac) + + if(make_plots) gf_make_plots(res, outputbasename, plotsize, scale_definition) + + gradflow_resultsum[[outputbasename]] <- res$summary + save(gradflow_resultsum, file=gf_summaries_db_file) + + gf_analysis <- res + save(gf_analysis, file = sprintf("%s_gf_analysis.Rdata", outputbasename)) + + return(invisible(res)) +} + +gf_plot_flow <- function(obs_uwerr, tlim, intersection, ...){ + stopifnot(length(tlim) == 2) + + x <- as.numeric(rownames(obs_uwerr)) + y <- obs_uwerr$value + ymin <- y - obs_uwerr$dvalue + ymax <- y + obs_uwerr$dvalue + + # empty plot type to set the limits and axis labels + plot(x = x, + y = y, + type = 'n', + xlab = "$t/a^2$", + las = 1, + xlim = tlim, + ...) + draw_errorband(x = x, ymin = ymin, ymax = ymax, col = "red", alpha = 0.6) + if( !missing(intersection) ){ + rect(xleft = intersection$tval - intersection$tse, + xright = intersection$tval + intersection$tse, + ybottom = -10, + ytop = intersection$obsval, + col = col2rgba(col = "blue", alpha = 0.6), + border = NA) + abline(h = intersection$obsval) + abline(v = intersection$tval) + legend(x = "topleft", + lty = 1, + col = "blue", + bty = 'n', + cex = 0.8, + legend = sprintf("$%s = %s$", + intersection$label, + tex.catwitherror(x = intersection$tval, + dx = intersection$tse, + digits = 2, + with.dollar = FALSE))) + } + # all lines on top of the error bands + lines(x = x, y = y) +} + +add_tauint_legend <- function(tauint, dtauint, obslabel){ + legend(x = "topleft", bty = 'n', pch = NA, lty = 1, col = "red", cex = 0.8, + legend = sprintf("$\\tau_{\\mathrm{int}}($ %s $) = %s$ traj.", + obslabel, + tex.catwitherror(x = tauint, + dx = dtauint, + digits = 2, + with.dollar = FALSE, with.cdot = FALSE) + ) + ) +} + +gf_make_plots <- function(gf_analysis_results, outputbasename, plotsize, scale_definition = 0.3){ + dplyr_avail <- requireNamespace("dplyr") + if( !dplyr_avail ){ + stop("The 'dplyr' package is required to use this function!\n") + } + + tikzfiles <- tikz.init(basename = sprintf("%s.gradflow", outputbasename), + width = plotsize, height = plotsize) + + # some bookkeeping to loop over the different scales + loop_plan <- list() + loop_plan[[length(loop_plan)+1]] <- list(name = "sqrt_t0plaq", + intersection_label = "t_0^{\\mathrm{plaq}}/a^2", + interp_name = "t0plaq", + interp_obs = "tsqEplaq_t0plaq", + interp_label_suffix = "$|_{t = t_0^{\\mathrm{plaq}}}$") + loop_plan[[length(loop_plan)+1]] <- list(name = "sqrt_t0sym", + intersection_label = "t_0^{\\mathrm{sym}}/a^2", + interp_name = "t0sym", + interp_obs = "tsqEsym_t0sym", + interp_label_suffix = "$|_{t = t_0^{\\mathrm{sym}}}$") + loop_plan[[length(loop_plan)+1]] <- list(name = "w0", + intersection_label = "(w_0/a)^2", + interp_name = "w0", + interp_obs = "Wsym_w0", + interp_label_suffix = "$|_{t = (w_0^{\\mathrm{sym}})^2}$") + + for( iter in loop_plan ){ + scale_sq <- gf_analysis_results$uw[[iter$name]]$scale$value^2 + scale_sq_se <- 2*gf_analysis_results$uw[[iter$name]]$scale$value * + gf_analysis_results$uw[[iter$name]]$scale$dvalue + + obslabel <- dplyr::filter(gf_analysis_results$summary, name == iter$name)$obslabel + + # plot the flow evolution of the observable for the scale in question + gf_plot_flow(obs_uwerr = gf_analysis_results$uw[[iter$name]]$obs, + tlim = c(0, 1.33 * scale_sq), + intersection = list(obsval = scale_definition, tval = scale_sq, + tse = scale_sq_se, label = iter$intersection_label), + ylim = c(0, 1.33 * scale_definition), + ylab = obslabel + ) + + # now we plot the observable at the flow time corresponding to the scale in question + # being careful about the relationship between the md time indexing and its relationship + # to actual trajectories + interp_dat <- data.frame(t = gf_analysis_results$interpolations[[iter$interp_name]]$md_idx * + gf_analysis_results$md_idx_scalefac, + y = gf_analysis_results$interpolations[[iter$interp_name]][[iter$interp_obs]]) + interp_label <- paste(obslabel, iter$interp_label_suffix) + + interp_uw <- plot_timeseries(dat = interp_dat, ylab = interp_label, titletext = "") + # the last plot is always the tauint estimator, so we can easily add a legend here + # we need to be careful because of the relationship of measurement indexing, their + # frequency and actual MD time + add_tauint_legend(tauint = gf_analysis_results$md_scalefac * interp_uw["tauint",1], + dtauint = gf_analysis_results$md_scalefac * interp_uw["dtauint",1], + obslabel = interp_label) + } + if( gf_analysis_results$have_Qsym ){ + obslabel <- dplyr::filter(gf_analysis_results$summary, name == "Qsym_tmax")$obslabel + + # plot the gradient flow evolution of the topological charge + gf_plot_flow(obs_uwerr = gf_analysis_results$Qsym_uwerr, + tlim = c(0.01, max(gf_analysis_results$t)), + ylim = range(c(gf_analysis_results$Qsym_uwerr$value - gf_analysis_results$Qsym_uwerr$dvalue, + gf_analysis_results$Qsym_uwerr$value + gf_analysis_results$Qsym_uwerr$dvalue)), + log = 'x', + ylab = obslabel) + + # and the MD history of the topological charge at flow time w_0^2 + md_label <- paste(obslabel, "$|_{t = (w_0^{\\mathrm{sym}}/a)^2}$") + Qsym_w0_uw <- plot_timeseries(dat = data.frame(y = gf_analysis_results$interpolations$w0$Qsym_w0, + t = gf_analysis_results$md_idx_scalefac * + unique(gf_analysis_results$raw_gradflow$traj)), + ylab = md_label, titletext = "") + add_tauint_legend(tauint = gf_analysis_results$md_scalefac * Qsym_w0_uw['tauint',1], + dtauint = gf_analysis_results$md_scalefac * Qsym_w0_uw['dtauint',1], + obslabel = md_label) + + # now extract and plot the MD history of the topoligical charge at maximum flow time + Qsym_tmax_dat <- dplyr::transmute( + dplyr::select( + dplyr::filter(gf_analysis_results$raw_gradflow, t == max(t)), + traj, Qsym + ), + t = gf_analysis_results$md_idx_scalefac * traj, + y = Qsym + ) + + md_label <- paste(obslabel, "$|_{t = t_{\\mathrm{max}}}$") + Qsym_tmax_uw <- plot_timeseries(dat = Qsym_tmax_dat, ylab = md_label, titletext = "") + add_tauint_legend(tauint = gf_analysis_results$md_scalefac * Qsym_tmax_uw["tauint",1], + dtauint = gf_analysis_results$md_scalefac * Qsym_tmax_uw["dtauint",1], + obslabel = md_label) + } + + tikz.finalize(tikzfiles) +} + +gf_interpolate <- function(raw_gradflow, target_ts){ + dplyr_avail <- requireNamespace("dplyr") + if( !dplyr_avail ){ + stop("The 'dplyr' package is required to use this function!\n") + } + + gf_tseries <- tseries_orig(data = dplyr::rename(raw_gradflow, md_idx = traj), + explanatory_vars = c("t")) + + all_obs <- dplyr::setdiff(colnames(gf_tseries$data), c("t", "md_idx")) + + res <- lapply( + X = target_ts, + FUN = function(target_t){ + reduce_plan <- lapply( + X = all_obs, + FUN = function(obs){ + exp_string <- sprintf("approx(x = t, y = %s, xout = %f)$y", + obs, target_t$val) + parse(text = exp_string) + } + ) + names(reduce_plan) <- sprintf("%s_%s", all_obs, rep(target_t$name, times = length(all_obs))) + + apply_reduce_plan.tseries(ts = gf_tseries, reduce_vars = c("t"), reduce_plan)$data + } + ) + return(res) +} + +summarise_gf_results <- function(gf_analysis_results, md_scalefac) { + sum_res <- rbind( + data.frame(name = "sqrt_t0plaq", + label = "$\\sqrt{t_0^\\mathrm{plaq}}/a$", + obslabel = "$\\langle t^2 E_{\\mathrm{plaq}}(t) \\rangle$", + val = gf_analysis_results$uw$sqrt_t0plaq$scale$value, + dval = gf_analysis_results$uw$sqrt_t0plaq$scale$dvalue, + dvalbs = gf_analysis_results$bs$scales$se$sqrt_t0plaq, + tauint = md_scalefac*gf_analysis_results$uw$sqrt_t0plaq$scale$tauint, + dtauint = md_scalefac*gf_analysis_results$uw$sqrt_t0plaq$scale$dtauint, + ddval = gf_analysis_results$uw$sqrt_t0plaq$scale$ddvalue), + data.frame(name = "sqrt_t0sym", + label = "$\\sqrt{t_0^\\mathrm{sym}}/a$", + obslabel = "$\\langle t^2 E_{\\mathrm{sym}}(t) \\rangle$", + val = gf_analysis_results$uw$sqrt_t0sym$scale$value, + dval = gf_analysis_results$uw$sqrt_t0sym$scale$dvalue, + dvalbs = gf_analysis_results$bs$scales$se$sqrt_t0sym, + tauint = md_scalefac*gf_analysis_results$uw$sqrt_t0sym$scale$tauint, + dtauint = md_scalefac*gf_analysis_results$uw$sqrt_t0sym$scale$dtauint, + ddval = gf_analysis_results$uw$sqrt_t0sym$scale$ddvalue), + data.frame(name = "w0", + label = "$w_0/a$", + obslabel = "$\\langle t \\frac{d}{dt} [ t^2 E_{\\mathrm{sym}}(t) ]\\rangle$", + val = gf_analysis_results$uw$w0$scale$value, + dval = gf_analysis_results$uw$w0$scale$dvalue, + dvalbs = gf_analysis_results$bs$scales$se$w0, + tauint = md_scalefac*gf_analysis_results$uw$w0$scale$tauint, + dtauint = md_scalefac*gf_analysis_results$uw$w0$scale$dtauint, + ddval = gf_analysis_results$uw$w0$scale$ddvalue), + data.frame(name = "t0plaq_ov_w0", + label = "$t_0^{\\mathrm{plaq}} / (a w_0)$", + obslabel = NA, + val = gf_analysis_results$uw$sqrt_t0plaq$ratios$t0plaq_ov_w0$value, + dval = gf_analysis_results$uw$sqrt_t0plaq$ratios$t0plaq_ov_w0$dvalue, + dvalbs = gf_analysis_results$bs$ratios$se$t0plaq_ov_w0, + tauint = md_scalefac*gf_analysis_results$uw$sqrt_t0plaq$ratios$t0plaq_ov_w0$tauint, + dtauint = md_scalefac*gf_analysis_results$uw$sqrt_t0plaq$ratio$t0plaq_ov_w0$dtauint, + ddval = gf_analysis_results$uw$sqrt_t0plaq$ratios$t0plaq_ov_w0$ddvalue), + data.frame(name = "t0sym_ov_w0", + label = "$t_0^{\\mathrm{sym}} / (a w_0)$", + obslabel = NA, + val = gf_analysis_results$uw$sqrt_t0sym$ratios$t0sym_ov_w0$value, + dval = gf_analysis_results$uw$sqrt_t0sym$ratios$t0sym_ov_w0$dvalue, + dvalbs = gf_analysis_results$bs$ratios$se$t0sym_ov_w0, + tauint = md_scalefac*gf_analysis_results$uw$sqrt_t0sym$ratios$t0sym_ov_w0$tauint, + dtauint = md_scalefac*gf_analysis_results$uw$sqrt_t0sym$ratio$t0sym_ov_w0$dtauint, + ddval = gf_analysis_results$uw$sqrt_t0sym$ratios$t0sym_ov_w0$ddvalue), + data.frame(name = "sqrt_t0plaq_ov_w0", + label = "$\\sqrt{t_0^{\\mathrm{plaq}}} / w_0$", + obslabel = NA, + val = gf_analysis_results$uw$sqrt_t0plaq$ratios$sqrt_t0plaq_ov_w0$value, + dval = gf_analysis_results$uw$sqrt_t0plaq$ratios$sqrt_t0plaq_ov_w0$dvalue, + dvalbs = gf_analysis_results$bs$ratios$se$sqrt_t0plaq_ov_w0, + tauint = md_scalefac*gf_analysis_results$uw$sqrt_t0plaq$ratios$sqrt_t0plaq_ov_w0$tauint, + dtauint = md_scalefac*gf_analysis_results$uw$sqrt_t0plaq$ratios$sqrt_t0plaq_ov_w0$dtauint, + ddval = gf_analysis_results$uw$sqrt_t0plaq$ratios$sqrt_t0plaq_ov_w0$ddvalue), + data.frame(name = "sqrt_t0sym_ov_w0", + label = "$\\sqrt{t_0^{\\mathrm{sym}}} / w_0$", + obslabel = NA, + val = gf_analysis_results$uw$sqrt_t0sym$ratios$sqrt_t0sym_ov_w0$value, + dval = gf_analysis_results$uw$sqrt_t0sym$ratios$sqrt_t0sym_ov_w0$dvalue, + dvalbs = gf_analysis_results$bs$ratios$se$sqrt_t0sym_ov_w0, + tauint = md_scalefac*gf_analysis_results$uw$sqrt_t0sym$ratios$sqrt_t0sym_ov_w0$tauint, + dtauint = md_scalefac*gf_analysis_results$uw$sqrt_t0sym$ratio$sqrt_t0sym_ov_w0$dtauint, + ddval = gf_analysis_results$uw$sqrt_t0sym$ratios$sqrt_t0sym_ov_w0$ddvalue), + data.frame(name = "sqrt_t0sym_ov_sqrt_t0plaq", + label = "$\\sqrt{t_0^{\\mathrm{sym}}} / \\sqrt{t_0^{\\mathrm{plaq}}}$", + obslabel = NA, + val = gf_analysis_results$uw$sqrt_t0sym$ratios$sqrt_t0sym_ov_sqrt_t0plaq$value, + dval = gf_analysis_results$uw$sqrt_t0sym$ratios$sqrt_t0sym_ov_sqrt_t0plaq$dvalue, + dvalbs = gf_analysis_results$bs$ratios$se$sqrt_t0sym_ov_sqrt_t0plaq, + tauint = md_scalefac*gf_analysis_results$uw$sqrt_t0sym$ratios$sqrt_t0sym_ov_sqrt_t0plaq$tauint, + dtauint = md_scalefac*gf_analysis_results$uw$sqrt_t0sym$ratio$sqrt_t0sym_ov_sqrt_t0plaq$dtauint, + ddval = gf_analysis_results$uw$sqrt_t0sym$ratios$sqrt_t0sym_ov_sqrt_t0plaq$ddvalue) + + ) + if( gf_analysis_results$have_Qsym ){ + Qsym_w0_uwerr <- uwerrprimary(data = gf_analysis_results$interpolations$w0$Qsym_w0, S = 1.5, pl = FALSE) + + tmax_idx <- nrow(gf_analysis_results$Qsym_uwerr) + sum_res <- rbind(sum_res, + data.frame(name = "Qsym_w0", + label = "$Q_{\\mathrm{sym}}(t=w_0^2)$", + obslabel = paste("$\\langle \\varepsilon_{\\mu \\nu \\rho \\gamma}^{\\mathrm{(eucl)}}", + "\\mathrm{Tr} [F_{\\mu \\nu} F_{\\rho \\gamma}](t) \\rangle$"), + val = Qsym_w0_uwerr$value, + dval = Qsym_w0_uwerr$dvalue, + dvalbs = NA, + tauint = md_scalefac*Qsym_w0_uwerr$tauint, + dtauint = md_scalefac*Qsym_w0_uwerr$dtauint, + ddval = Qsym_w0_uwerr$ddvalue), + data.frame(name = "Qsym_tmax", + label = "$Q_{\\mathrm{sym}}(t_{\\mathrm{max}})$", + obslabel = paste("$\\langle \\varepsilon_{\\mu \\nu \\rho \\gamma}^{\\mathrm{(eucl)}}", + "\\mathrm{Tr} [F_{\\mu \\nu} F_{\\rho \\gamma}](t) \\rangle$"), + val = gf_analysis_results$Qsym_uwerr$value[tmax_idx], + dval = gf_analysis_results$Qsym_uwerr$dvalue[tmax_idx], + dvalbs = NA, + tauint = md_scalefac*gf_analysis_results$Qsym_uwerr$tauint[tmax_idx], + dtauint = md_scalefac*gf_analysis_results$Qsym_uwerr$dtauint[tmax_idx], + ddval = gf_analysis_results$Qsym_uwerr$ddvalue[tmax_idx]) + ) + } + return(sum_res) +} + +#' return NAs for quantiies in `bootstrap_gf_scales_and_ratios` +na_bs_gf_scales_and_ratios <- function() { + ratios <- list() + scales <- list() + ratios$names <- c("t0plaq_ov_w0", "t0sym_ov_w0", "sqrt_t0plaq_ov_w0", "sqrt_t0sym_ov_w0", "sqrt_t0sym_ov_sqrt_t0plaq") + scales$names <- c("sqrt_t0plaq", "sqrt_t0sym", "w0") + ratios$central <- lapply(X = ratios$names, FUN = function(x) NA) + names(ratios$central) <- ratios$names + ratios$se <- ratios$central + scales$central <- lapply(X = scales$names, FUN = function(x) NA) + names(scales$central) <- scales$names + scales$se <- scales$central + + return( list(scales = scales, ratios = ratios) ) +} + +#' bootstrap gradient flow scale determinations +bootstrap_gf_scales_and_ratios <- function(raw_gradflow, boot.l, boot.R, seed = 12345, scale_definition = 0.3){ + dplyr_avail <- requireNamespace("dplyr") + if( !dplyr_avail ){ + stop("The 'dplyr' package is required to use this function!\n") + } + + # we use this common boot.l to bootstrap both the determination of the numerator + # and the denominator + gf_data <- dplyr::rename(dplyr::select(raw_gradflow, t, traj, Wsym, tsqEplaq, tsqEsym), + md_idx = traj) + + gf_obs_tseries <- bootstrap.tseries( + .tseries = tseries_orig(data = gf_data, explanatory_vars = c("t")), + boot.R = boot.R, + boot.l = boot.l, + seed = seed, + sim = 'geom', + endcorr = TRUE, + serial = FALSE, + progress = TRUE) + + # let's begin by determining the scales + plan <- list() + plan[["sqrt_t0plaq"]] <- expression( sqrt( approx(x = tsqEplaq, y = t, xout = scale_definiton)$y ) ) + plan[["sqrt_t0sym"]] <- expression( sqrt( approx(x = tsqEsym, y = t, xout = scale_definition)$y ) ) + plan[["w0"]] <- expression( sqrt( approx(x = Wsym, y = t, xout = scale_definition)$y ) ) + + gf_scales <- apply_reduce_plan.tseries(ts = gf_obs_tseries, + reduce_vars = c("t"), + plan = plan) + + print(gf_scales) + readline() + + # and then the ratios + plan <- list() + plan[["t0plaq_ov_w0"]] <- expression( sqrt_t0plaq^2 / w0 ) + plan[["t0sym_ov_w0"]] <- expression( sqrt_t0sym^2 / w0 ) + plan[["sqrt_t0plaq_ov_w0"]] <- expression( sqrt_t0plaq / w0 ) + plan[["sqrt_t0sym_ov_w0"]] <- expression( sqrt_t0sym / w0 ) + plan[["sqrt_t0sym_ov_sqrt_t0plaq"]] <- expression( sqrt_t0sym / sqrt_t0plaq ) + gf_scale_ratios <- apply_transmute_plan.tseries(ts = gf_scales, + plan = plan) + + return(invisible( + list(scales = gf_scales, ratios = gf_scale_ratios) + )) +} + + +#' define plan for gradient flow scales determination +define_gf_scales_plan <- function(){ + gf_scales_plan <- list() + gf_scales_plan[["sqrt_t0plaq"]] <- list(obs = "tsqEplaq", scale = "sqrt_t0plaq", + ratios = c("t0plaq_ov_w0", "sqrt_t0plaq_ov_w0"), + ratio_denom_obs = c("Wsym", "Wsym"), + ratio_funs = list(gf_scale_sq_ov_gf_scale, + gf_scale_ov_gf_scale)) + + gf_scales_plan[["sqrt_t0sym"]] <- list(obs = "tsqEsym", scale = "sqrt_t0sym", + ratios = c("t0sym_ov_w0", "sqrt_t0sym_ov_w0", "sqrt_t0sym_ov_sqrt_t0plaq"), + ratio_denom_obs = c("Wsym", "Wsym", "tsqEplaq"), + ratio_funs = list(gf_scale_sq_ov_gf_scale, + gf_scale_ov_gf_scale, + gf_scale_ov_gf_scale)) + + gf_scales_plan[["w0"]] <- list(obs = "Wsym", scale = "w0", + ratios = NA, ratio_num_obs = NA, ratio_denom_obs = NA, ratio_funs = NA) + + return(gf_scales_plan) +} + +#' determine intersection point of a gradient flow observable +#' +#' @param data Numeric vector of length corresponding to the number +#' of primary observables. This would be a single (possibly blocked) measurement +#' of a gradient flowed observable at all flow times. There's an implicit +#' assumption that `data` is monotonic in the relevant region. +#' @param time Numeric vector giving the values of \eqn{t/a^2} that the observable +#' in \code{data} has been measured at. +#' @param Numeric, intersection point to be determined. Usually this is \eqn{0.3}, +#' which is also the default value. +#' +#' @return +#' A single numeric corresponding to the point where `data` as a function of `time` +#' intersects `intersection`, we return the square root of the gradient flow scale. +gf_find_intersect <- function(data, time, intersection = 0.3){ + stopifnot(length(data) == length(time)) + sqrt( approx(x = data, y = time, xout = intersection)$y ) +} + +#' determine the ratio of the square of a gf scale and the value of another +#' +#' @param data Numeric vector of length corresponding to twice the number +#' of primary observables. This would be a single (possibly blocked) measurement +#' of two gradient flowed observables at all flow times. This might be +#' \eqn{ t^2 E(t) } and \eqn{ t \frac{d}{dt} ( t^2 E(t) ) }, for example. There's +#' an implicit assumption that both observables are monotonic in the relevant region. +#' The 'first' observable is in the first \code{length{time}} elements, while +#' the 'second' observable is expected to be in the last \code{length{time}} elements. +#' @param time Numeric vector giving the values of \eqn{t/a^2} that the observable +#' in \code{data} has been measured at. +#' @param intersection1, intersection2 Numeric, intersection points to be determined for +#' the first and second observables, respectively. +#' Usually this is \eqn{0.3}, which is also taken as the default value. +#' +#' @return +#' A single numeric value corresponding to the ratio of the flow time where the flow time of +#' the first observable intersects `intersection1` and the square root of the flow time at +#' which the second observable intersects `intersection2`. +gf_scale_sq_ov_gf_scale <- function(data, time, intersection1 = 0.3, intersection2 = 0.3){ + stopifnot(length(data) == 2*length(time)) + (gf_find_intersect(data = data[1:length(time)], + time = time, + intersection = intersection1))^2 / + gf_find_intersect(data = data[(length(time)+1):(2*length(time))], + time = time, + intersection = intersection2) +} + +#' determine the ratio of the square of a gf scale and the square of another +#' +#' @param data Numeric vector of length corresponding to twice the number +#' of primary observables. This would be a single (possibly blocked) measurement +#' of two gradient flowed observables at all flow times. This might be +#' \eqn{ t^2 E(t) } and \eqn{ t \frac{d}{dt} ( t^2 E(t) ) }, for example. There's +#' an implicit assumption that both observables are monotonic in the relevant region. +#' The 'first' observable is in the first \code{length{time}} elements, while +#' the 'second' observable is expected to be in the last \code{length{time}} elements. +#' @param time Numeric vector giving the values of \eqn{t/a^2} that the observable +#' in \code{data} has been measured at. +#' @param intersection1, intersection2 Numeric, intersection points to be determined for +#' the first and second observables, respectively. +#' Usually this is \eqn{0.3}, which is also taken as the +#' default value. +#' +#' @return +#' A single numeric value corresponding to the ratio of the flow time where +#' the first observable intersects `intersection1` and the flow time at +#' which the second observable intersects `intersection2`. +gf_scale_sq_ov_gf_scale_sq <- function(data, time, intersection1 = 0.3, intersection2 = 0.3){ + stopifnot(length(data) == 2*length(time)) + (gf_find_intersect(data = data[1:length(time)], + time = time, + intersection = intersection1))^2 / + gf_find_intersect(data = data[(length(time)+1):(2*length(time))], + time = time, + intersection = intersection2)^2 +} + +#' determine the ratio of one gf scale and another +#' +#' @param data Numeric vector of length corresponding to twice the number +#' of primary observables. This would be a single (possibly blocked) measurement +#' of two gradient flowed observables at all flow times. This might be +#' \eqn{ t^2 E(t) } and \eqn{ t \frac{d}{dt} ( t^2 E(t) ) }, for example. There's +#' an implicit assumption that both observables are monotonic in the relevant region. +#' The 'first' observable is in the first \code{length{time}} elements, while +#' the 'second' observable is expected to be in the last \code{length{time}} elements. +#' @param time Numeric vector giving the values of \eqn{t/a^2} that the observable +#' in \code{data} has been measured at. +#' @param intersection1, intersection2 Numeric, intersection points to be determined for +#' the first and second observables, respectively. +#' Usually this is \eqn{0.3}, which is also taken as the +#' default value. +#' +#' @return +#' A single numeric value corresponding to the ratio of the square root of flow time where +#' the first observable intersects `intersection1` and the square root of the flow time at +#' which the second observable intersects `intersection2`. +gf_scale_ov_gf_scale <- function(data, time, intersection1 = 0.3, intersection2 = 0.3){ + stopifnot(length(data) == 2*length(time)) + (gf_find_intersect(data = data[1:length(time)], + time = time, + intersection = intersection1)) / + gf_find_intersect(data = data[(length(time)+1):(2*length(time))], + time = time, + intersection = intersection2) +} + +#' helper function to read tmlqcd gradient flow data +#' +#' @param path directory name which contains gradient flow measurement files in the tmlqcd +#' format +#' +#' @param skip Integer, number of measurements to be skipped. +#' @param basename String, basename of the data, for example "gradflow". +#' @param outputbasename Stirng, this function will create an output file of the form "%s.raw_gradflow.Rdata". +#' in the present working directory. This parameter sets "%s". +#' @param read_files Boolean, whether to reread the raw data. +#' +#' @return Gradient flow data as produced by \code{\link{read_tmlqcd_gradflow}}. +#' +read_tmlqcd_gf_data <- function(path, skip, basename, outputbasename, read_files){ + save_filename <- sprintf("%s.raw_gradflow.Rdata", outputbasename) + if(read_files){ + raw_gradflow <- read_tmlqcd_gradflow(path = path, skip = skip, basename = basename) + save(raw_gradflow, file = save_filename, compress = FALSE) + } else { + warning(sprintf(paste("Warning, reading data from %s, if the number of measurements has changed,", + "set `read_files = TRUE` to reread the actualy output files\n"), + save_file) + ) + load(save_file) + } + return(raw_gradflow) +} diff --git a/R/bootstrap.nlsfit.R b/R/bootstrap.nlsfit.R index 7ca8c524b..2debac3a2 100644 --- a/R/bootstrap.nlsfit.R +++ b/R/bootstrap.nlsfit.R @@ -110,7 +110,7 @@ parametric.nlsfit <- function (fn, par.guess, boot.R, y, dy, x, dx, lower = rep(x = -Inf, times = length(par.guess)), upper = rep(x = +Inf, times = length(par.guess)), ..., bootstrap=TRUE) { - stopifnot(length(x) == length(y)) + stopifnot(length(x) %% length(y) == 0) stopifnot(missing(dx) || length(dx) == length(x)) stopifnot(missing(dy) || length(dy) == length(y)) stopifnot(length(lower) == length(par.guess)) @@ -156,7 +156,7 @@ parametric.nlsfit.cov <- function (fn, par.guess, boot.R, y, x, cov, lower = rep(x = -Inf, times = length(par.guess)), upper = rep(x = +Inf, times = length(par.guess)), ..., bootstrap=TRUE, na.rm = FALSE) { - stopifnot(length(x) == length(y)) + stopifnot(length(x) %% length(y) == 0) stopifnot(length(lower) == length(par.guess)) stopifnot(length(upper) == length(par.guess)) @@ -1280,20 +1280,6 @@ plot.bootstrapfit <- function(x, ..., col.line="black", col.band="gray", opacity } } -#' residual_plot -#' -#' generic residual_plot method -#' -#' @param x the object to plot -#' @param ... additional parameters to be passed on to specialised functions -#' -#' @return -#' No return value. -#' -#' @export -residual_plot <- function (x, ...) { - UseMethod("residual_plot", x) -} #' @export residual_plot.bootstrapfit <- function (x, ..., error_fn = x$error.function, operation = `/`) { diff --git a/R/cf.R b/R/cf.R index fd496e4ec..5e901e663 100644 --- a/R/cf.R +++ b/R/cf.R @@ -419,7 +419,7 @@ is_empty.cf <- function (.cf) { #' with elements `boot`, `boot.R`, `boot.l`, `sim`, `endcorr`, #' `resampling_method`, `boot_dim`, `icf` and, optionally #' `iboot_dim` (if both `cf1` and `cf2` contain imaginary parts). -resampling_is_compatible <- function(cf1, cf2){ +resampling_is_compatible.cf <- function(cf1, cf2){ res <- list() res$boot <- ( inherits(cf1, 'cf_boot') & inherits(cf2, 'cf_boot') ) @@ -453,7 +453,7 @@ resampling_is_compatible <- function(cf1, cf2){ #' with elements `boot`, `boot.R`, `boot.l`, `sim`, `endcorr`, #' `resampling_method`, `boot_nrow`, `icf` and, optionally #' `iboot_nrow` (if both `cf1` and `cf2` contain imaginary parts). -resampling_is_concatenable <- function(cf1, cf2){ +resampling_is_concatenable.cf <- function(cf1, cf2){ res <- list() res$boot <- ( inherits(cf1, 'cf_boot') & inherits(cf2, 'cf_boot') ) res$seed <- (cf1$seed == cf2$seed) diff --git a/R/cvc_readutils.R b/R/cvc_readutils.R index c1c8f2a8f..ee54ce421 100644 --- a/R/cvc_readutils.R +++ b/R/cvc_readutils.R @@ -22,7 +22,7 @@ cvc_local_loop_key <- function(loop_type, istoch, gamma, p) #' @title Generate key to identify a momentum and spin-projected loop #' @param loop_type String, loop type. #' @param istoch Integer, index of the stochastic sample. -#' @param gamma Integer, CVC convention gamma matrix identifier. +#' @param gamma Integer or string, gamma matrix identifier. #' @param p Integer vector of length 3, (x,y,z) components of the momentum #' vector in lattice units. #' @return @@ -40,33 +40,55 @@ cvc_local_loop_key <- function(loop_type, istoch, gamma, p) p[1], p[2], p[3]) } -#' @title Generate HDF5 key for CVC 'correlators' meson 2pt function +#' @title Generate HDF5 key for meson 2pt functions labelled with CVC-like pathnames. +#' +#' @details +#' The key for a meson two-point function has the form: +#' +#' /u+-g-d-g/s1/t10/gf5/pfx0pfy0pfz0/gi5/pix0piy0piz0 +#' +#' where, from left to right: +#' \itemize{ +#' \item 'u' is the flavour of the "backward" propagator +#' \item '+' indicates that 'u' is daggered +#' \item 'g' indicates a gamma insertion +#' \item 'd' is the flavour of the "forward" propagator +#' \item 'g' indicates a Dirac structure at the source +#' \item 'sN' is an optional stochastic sample identifier +#' \item 'tXX' is the source time slice +#' \item 'gfN' gamma structure at the sink +#' \item 'pfxXpfyYpfzZ' is the sink momentum in CVC convention (sink and source phases are both e^{ipx}) +#' \item 'giN' gamma structure at the souce in CVC indexing +#' \item 'pixXpiyYpizZ' at the source in CVC convention +#' } +#' #' @param fwd_flav String, "forward" quark flavour identifier. #' @param bwd_flav String, "backward" quark flavour identifier. #' @param src_ts Integer, source time slice. -#' @param snk_gamma Integer, CVC convention gamma matrix identifier at the source. -#' @param src_gamma Integer, CVC convention gamma matrix identified at the sink. +#' @param snk_gamma Integer or string, gamma matrix identifier at the source. +#' @param src_gamma Integer or string, gamma matrix identifier at the sink. #' @param src_p Integer vector of length 3. (x,y,z) components of the source momentum #' vector in lattice units. #' @param snk_p Integer vector of length 3. (x,y,z) components of the sink momentum #' vector in lattice units. +#' @param istoch Integer, optional stochastic sample identifier. #' @return -#' A character vector with the HDF5 key. +#' A character vector with the HDF5 pathname. #' #' @export -correlators_key_meson_2pt <- function(fwd_flav, bwd_flav, src_ts, snk_gamma, src_gamma, src_p, snk_p) +correlators_key_meson_2pt <- function(fwd_flav, bwd_flav, src_ts, snk_gamma, src_gamma, src_p, snk_p, istoch = NA) { stopifnot( length(snk_p) == 3 ) stopifnot( length(src_p) == 3 ) stopifnot( is.character(fwd_flav) ) stopifnot( is.character(bwd_flav) ) stopifnot( is.integer(src_ts) ) - stopifnot( is.integer(snk_gamma) ) - stopifnot( is.integer(src_gamma) ) + if( !is.na(istoch) ){ stopifnot( is.integer(istoch) ) } - sprintf("/%s+-g-%s-g/t%d/gf%d/pfx%dpfy%dpfz%d/gi%d/pix%dpiy%dpiz%d", + sprintf("/%s+-g-%s-g/%st%d/gf%s/pfx%dpfy%dpfz%d/gi%s/pix%dpiy%dpiz%d", bwd_flav, fwd_flav, + ifelse(is.na(istoch), "", sprintf("s%d/", istoch)), src_ts, snk_gamma, snk_p[1],snk_p[2],snk_p[3], @@ -74,17 +96,35 @@ correlators_key_meson_2pt <- function(fwd_flav, bwd_flav, src_ts, snk_gamma, src src_p[1],src_p[2],src_p[3]) } -#' @title Generate key string to identify a meson 2pt function +#' @title Generate identifier string to identify a meson 2pt function with a CVC-like pathname +#' +#' @details +#' The identifier string for a meson two-point function has the form: +#' +#' /u+-g-d-g/gf5/pfx0pfy0pfz0/gi5/pix0piy0piz0 +#' +#' where, from left to right: +#' \itemize{ +#' \item 'u' is the flavour of the "backward" propagator +#' \item '+' indicates that 'u' is daggered +#' \item 'g' indicates a gamma insertion +#' \item 'd' is the flavour of the "forward" propagator +#' \item 'g' indicates a Dirac structure at the source +#' \item 'gfN' gamma structure at the sink +#' \item 'pfxXpfyYpfzZ' is the sink momentum in CVC convention (sink and source phases are both e^{ipx}) +#' \item 'giN' gamma structure at the souce in CVC indexing +#' \item 'pixXpiyYpizZ' at the source in CVC convention +#' } #' @param fwd_flav String, "forward" quark flavour identifier. #' @param bwd_flav String, "backward" quark flavour identifier. -#' @param snk_gamma Integer, CVC convention gamma matrix identifier at the source. -#' @param src_gamma Integer, CVC convention gamma matrix identified at the sink. +#' @param snk_gamma Integer or string, gamma matrix identifier at the source. +#' @param src_gamma Integer or string, gamma matrix identifier at the sink. #' @param src_p Integer vector of length 3. (x,y,z) components of the source momentum #' vector in lattice units. #' @param snk_p Integer vector of length 3. (x,y,z) components of the sink momentum #' vector in lattice units. #' @return -#' A character vector with the HDF5 key. +#' A character vector with the identifier string. #' #' @export cf_key_meson_2pt <- function(fwd_flav, bwd_flav, snk_gamma, src_gamma, src_p, snk_p) @@ -93,10 +133,8 @@ cf_key_meson_2pt <- function(fwd_flav, bwd_flav, snk_gamma, src_gamma, src_p, sn stopifnot( length(src_p) == 3 ) stopifnot( is.character(fwd_flav) ) stopifnot( is.character(bwd_flav) ) - stopifnot( is.integer(snk_gamma) ) - stopifnot( is.integer(src_gamma) ) - sprintf("/%s+-g-%s-g/gf%d/pfx%dpfy%dpfz%d/gi%d/pix%dpiy%dpiz%d", + sprintf("/%s+-g-%s-g/gf%s/pfx%dpfy%dpfz%d/gi%s/pix%dpiy%dpiz%d", bwd_flav, fwd_flav, snk_gamma, @@ -105,11 +143,12 @@ cf_key_meson_2pt <- function(fwd_flav, bwd_flav, snk_gamma, src_gamma, src_p, sn src_p[1],src_p[2],src_p[3]) } -#' @title Generate HDF5 key for CVC 'correlators' meson 3pt function with a local or derivative insertion +#' @title Generate HDF5 key for meson 3pt function with a local or derivative insertion #' +#' @details #' The key for a meson three-point function has the form: #' -#' /sud+-g-u-g/t10/dt12/gf5/pfx0pfy0pfz0/gc0/Ddim0_dir0/Ddim1_dir1/D[...]/gi5/pix0piy0piz0 +#' /sud+-g-u-g/s1/t10/dt12/gf5/pfx0pfy0pfz0/gc0/Ddim0_dir0/Ddim1_dir1/D[...]/gi5/pix0piy0piz0 #' #' where, from left to right: #' \itemize{ @@ -119,11 +158,12 @@ cf_key_meson_2pt <- function(fwd_flav, bwd_flav, snk_gamma, src_gamma, src_p, sn #' \item 'g' indicates a gamma insertion #' \item 'u' is the flavour of the foward propagator #' \item 'g' indicates a Dirac structure at the source +#' \item 'sN' is an optional stochastic sample identifier #' \item 'tXX' is the source time slice #' \item 'dtYY' is the source-sink separation -#' \item 'gfN' gamma structure at the sink in CVC indexing +#' \item 'gfN' gamma structure at the sink #' \item 'pfxXpfyYpfzZ' is the sink momentum in CVC convention (sink and source phases are both e^{ipx}) -#' \item 'gcN' gamma structure at the current insertion point in CVC indexing +#' \item 'gcN' gamma structure at the current insertion point in #' \item 'DdimJ_dirK' covariant displacement applied in dimension 'J', direction 'K' #' where it should be noted that this is. in operator notation, i.e., the right-most #' displacement is the one applied first. @@ -137,8 +177,8 @@ cf_key_meson_2pt <- function(fwd_flav, bwd_flav, snk_gamma, src_gamma, src_p, sn #' @param seq_flav String, "sequential" quark flavour identifier. #' @param src_ts Integer, source time slice. #' @param dt Integer, source-sink separation. -#' @param snk_gamma Integer, CVC convention gamma matrix identifier at the source. -#' @param cur_gamma Integer, CVC convention gamma matrix identified at the insertion. +#' @param snk_gamma Integer or string, gamma matrix identifier at the sink. +#' @param cur_gamma Integer or string, gamma matrix identifier at the insertion. #' @param cur_displ_dim Integer vector of dimensions (0,1,2,3 <-> t,x,y,z) in which #' covariant displacements have been applied. This vector will be #' parsed in reverse order, such that the first element here @@ -150,11 +190,12 @@ cf_key_meson_2pt <- function(fwd_flav, bwd_flav, snk_gamma, src_gamma, src_p, sn #' which the covariant displacements have been applied. Parsing #' as for 'cur_displ_dim'. Length must be matched to 'cur_displ_dim'. #' Defaults to 'NA' for no displacements. -#' @param src_gamma Integer, CVC convention gamma matrix identified at the sink. +#' @param src_gamma Integer or string, gamma matrix identifier at the source. #' @param src_p Integer vector of length 3. (x,y,z) components of the source momentum #' vector in lattice units. #' @param snk_p Integer vector of length 3. (x,y,z) components of the sink momentum #' vector in lattice units. +#' @param istoch Integer, optional stochastic sample identifier. #' @return #' A character vector with the HDF5 key. #' @@ -164,7 +205,8 @@ correlators_key_meson_3pt <- function(fwd_flav, bwd_flav, seq_flav, snk_gamma, cur_gamma, cur_displ_dim = NA, cur_displ_dir = NA, src_gamma, - src_p, snk_p) + src_p, snk_p, + istoch = NA) { stopifnot( length(snk_p) == 3 ) stopifnot( length(src_p) == 3 ) @@ -172,9 +214,7 @@ correlators_key_meson_3pt <- function(fwd_flav, bwd_flav, seq_flav, stopifnot( is.character(bwd_flav) ) stopifnot( is.integer(src_ts) ) stopifnot( is.integer(dt) ) - stopifnot( is.integer(snk_gamma) ) - stopifnot( is.integer(src_gamma) ) - + if( !is.na(istoch) ){ stopifnot( is.integer(istoch) ) } if( !is.na(cur_displ_dim) | !is.na(cur_displ_dir) ){ stopifnot( all(cur_displ_dim %in% c(0:3)) ) stopifnot( all(cur_displ_dir %in% c(0,1)) ) @@ -208,11 +248,12 @@ correlators_key_meson_3pt <- function(fwd_flav, bwd_flav, seq_flav, return(key) } -#' @title Generate HDF5 key for CVC 'correlators' meson 3pt function with a local or derivative insertion +#' @title Generate an identifier string for a meson 3pt function labelled with a CVC-like pathname with a local or derivative insertion #' -#' The key for a meson three-point function has the form: +#' @details +#' The identifier string for a meson three-point function has the form: #' -#' /sud+-g-u-g/t10/dt12/gf5/pfx0pfy0pfz0/gc0/Ddim0_dir0/Ddim1_dir1/D[...]/gi5/pix0piy0piz0 +#' /sud+-g-u-g/dt12/gf5/pfx0pfy0pfz0/gc0/Ddim0_dir0/Ddim1_dir1/D[...]/gi5/pix0piy0piz0 #' #' where, from left to right: #' \itemize{ @@ -222,16 +263,15 @@ correlators_key_meson_3pt <- function(fwd_flav, bwd_flav, seq_flav, #' \item 'g' indicates a gamma insertion #' \item 'u' is the flavour of the foward propagator #' \item 'g' indicates a Dirac structure at the source -#' \item 'tXX' is the source time slice #' \item 'dtYY' is the source-sink separation -#' \item 'gfN' gamma structure at the sink in CVC indexing +#' \item 'gfN' gamma structure at the sink in CVC indexing ('N' can be numeric or a string) #' \item 'pfxXpfyYpfzZ' is the sink momentum in CVC convention (sink and source phases are both e^{ipx}) -#' \item 'gcN' gamma structure at the current insertion point in CVC indexing +#' \item 'gcN' gamma structure at the current insertion point ('N' can be numeric or a string) #' \item 'DdimJ_dirK' covariant displacement applied in dimension 'J', direction 'K' #' where it should be noted that this is. in operator notation, i.e., the right-most #' displacement is the one applied first. #' \item [...] -#' \item 'giN' gamma structure at the souce in CVC indexing +#' \item 'giN' gamma structure at the source ('N' can be numeric or a string) #' \item 'pixXpiyYpizZ' at the source in CVC convention #' } #' @@ -239,8 +279,8 @@ correlators_key_meson_3pt <- function(fwd_flav, bwd_flav, seq_flav, #' @param bwd_flav String, "backward" quark flavour identifier. #' @param seq_flav String, "sequential" quark flavour identifier. #' @param dt Integer, source-sink separation. -#' @param snk_gamma Integer, CVC convention gamma matrix identifier at the source. -#' @param cur_gamma Integer, CVC convention gamma matrix identified at the insertion. +#' @param snk_gamma Integer or string, gamma matrix identifier at the sink. +#' @param cur_gamma Integer or string, gamma matrix identifier at the insertion. #' @param cur_displ_dim Integer vector of dimensions (0,1,2,3 <-> t,x,y,z) in which #' covariant displacements have been applied. This vector will be #' parsed in reverse order, such that the first element here @@ -252,13 +292,13 @@ correlators_key_meson_3pt <- function(fwd_flav, bwd_flav, seq_flav, #' which the covariant displacements have been applied. Parsing #' as for 'cur_displ_dim'. Length must be matched to 'cur_displ_dim'. #' Defaults to 'NA' for no displacements. -#' @param src_gamma Integer, CVC convention gamma matrix identified at the sink. +#' @param src_gamma Integer or string, gamma matrix identifier at the source. #' @param src_p Integer vector of length 3. (x,y,z) components of the source momentum #' vector in lattice units. #' @param snk_p Integer vector of length 3. (x,y,z) components of the sink momentum #' vector in lattice units. #' @return -#' A character vector with the HDF5 key. +#' A character vector with the identifier string. #' #' @export cf_key_meson_3pt <- function(fwd_flav, bwd_flav, seq_flav, @@ -273,8 +313,6 @@ cf_key_meson_3pt <- function(fwd_flav, bwd_flav, seq_flav, stopifnot( is.character(fwd_flav) ) stopifnot( is.character(bwd_flav) ) stopifnot( is.integer(dt) ) - stopifnot( is.integer(snk_gamma) ) - stopifnot( is.integer(src_gamma) ) if( !is.na(cur_displ_dim) | !is.na(cur_displ_dir) ){ stopifnot( all(cur_displ_dim %in% c(0:3)) ) stopifnot( all(cur_displ_dir %in% c(0,1)) ) @@ -337,7 +375,7 @@ cvc_to_raw_cf <- function(cf_dat, dims = c(1,1)) # internal dimensions and time in the 'wrong' order cf_dat <- array(complex(real = cf_dat[ridcs], imaginary = cf_dat[iidcs]), - dim = c(1,dims,nts)) + dim = c(1,dims,nts)) cf_dims <- dim(cf_dat) # reshape the array, ordering 'measurement' (a single one), 'time', # internal dimensions diff --git a/R/functional.R b/R/functional.R index 884d73b7f..b0364353e 100644 --- a/R/functional.R +++ b/R/functional.R @@ -38,3 +38,10 @@ foldr1 <- function (f, xs) { else return (Reduce(f, xs[2:l], xs[[1]])) } + +#' @export +auto_full_join <- function(a, b){ + common_vars <- dplyr::intersect(colnames(a), colnames(b)) + dplyr::full_join(a, b, by = common_vars) +} + diff --git a/R/generics.R b/R/generics.R new file mode 100644 index 000000000..3d07b1b33 --- /dev/null +++ b/R/generics.R @@ -0,0 +1,114 @@ +#' residual_plot +#' +#' generic residual_plot method +#' +#' @param x the object to plot +#' @param ... additional parameters to be passed on to specialised functions +#' +#' @return +#' No return value. +#' +#' @export +residual_plot <- function (x, ...) { + UseMethod("residual_plot", x) +} + +#' generic function to extract a fitted mass +#' +#' @description +#' One of the main analysis tasks in \link{hadron} is the estimation +#' of energy levels or masses from correlation functions. The +#' corresponding analysis functions return objects, typically lists, +#' containing the masses or energy levels. `extract_mass` is a +#' generic function to extrac such fitted mass values. +#' +#' @param object Object to extract the mass from. +#' +#' @return Numeric. The mass value. +#' +#' @export +extract_mass <- function (object) { + UseMethod('extract_mass') +} + +#' generic function to check if resampling samples are compatible +#' +#' @description +#' When binary operations are performed on resampled data, it is +#' necessary to check if the samples are compatible, at the very +#' least they must have the same dimensions. +#' +#' @details +#' Note that since R's double dispatch doesn't really work with S3 +#' classes, the class of \code{x} decides which method is called. +#' +#' @param x lhs object +#' @param y rhs object +#' +#' @return +#' list of booleans which correspond to the results of checks +#' for equality of different properties of the resampling samples +#' +#' @export +resampling_is_compatible <- function(x,y){ + UseMethod('resampling_is_compatible', x) +} + +#' generic function to check if resampling samples are concatenable +#' +#' @description +#' When we want to combine resampled data along an axis orthogonal +#' to the axis of samples (for example, if we want to turn two +#' vectors with 'boot.R' samples into a matrix with 2 columns), +#' then we need to check if the number of samples for both +#' data are the same. +#' +#' @details +#' Note that since R's double dispatch doesn't really work with S3 +#' classes, the class of \code{x} decides which method is called. +#' +#' @param x lhs object +#' @param y rhs object +#' +#' @return +#' list of booleans which correspond to the results of checks +#' for equality of different properties of the resampling samples +#' +#' @export +resampling_is_concatenable <- function(x,y){ + UseMethod('resampling_is_concatenable', x) +} + +#' generic function to multiply two objects with each other +#' +#' @details +#' Note that since R's double dispatch doesn't really work with S3 +#' classes, the class of \code{x} decides which method is called. +#' +#' @description +#' +#' +#' @export +mul <- function(x, y, ...){ + UseMethod('mul', x) +} + +#' generic function to add two objects to each other +#' +#' This function provides +#' +#' @export +add <- function(x, y, ...){ + UseMethod('add', x) +} + +#' @export +subtract <- function(x, y, ...){ + UseMethod('subtract', x) +} + +#' generic function to divide objects by each other +#' @export +div <- function(x, y, ...){ + UseMethod('div', x) +} diff --git a/R/hadron-package.R b/R/hadron-package.R index aed1c9ff9..449b3a325 100644 --- a/R/hadron-package.R +++ b/R/hadron-package.R @@ -41,7 +41,8 @@ NULL #' #' These are not to be called by the user (or in some cases are just waiting #' for proper documentation to be written :). -#' +#' +#' @name internal_hadron_functions #' @aliases arrangeCor.vector arrangeCor.pion arrangeCor.b1 arrangeCor.a0 #' getNxNmatrix ChiSqr.1mass ChiSqr.2mass ChiSqr.3mass fitmasses.vector #' fitmasses.vector.boot fitmasses.pion fitmasses.pion.boot fitmasses.b1 @@ -110,7 +111,8 @@ NULL #' (N that_n - (N-m) that_n^(i)).} Here, \eqn{\hat t_n}{that_n} is the MBB #' estimate (in our case of standard deviation) and \eqn{\hat #' t_n^{(i)}}{that_n^(i)} is the i-th jackknife replication of it. -#' +#' +#' @name jackknifeafterboot #' @aliases jackknifeafterboot jab.cf jab.cf.derived jab.effectivemass #' jab.effectivemassfit jab.matrixfit #' @param cf An object of class \code{cf} generated by @@ -197,4 +199,21 @@ NULL NULL #' A three pion correlator with significant thermal states. -"cA2.09.48_3pi_I3_0_A1u_1_pc" +#' +#' Sample data for a correlation function on a 48 cubed times 96 +#' lattice QCD simulation representing a principal correlator +#' from a three-pion GEVP at maximal isospin. Includes bootstrap +#' samples. +#' +#' @name cA2.09.48_3pi_I3_0_A1u_1_pc +#' @docType data +#' @keywords datasets +#' @format `cf` class with `cf_meta`, `cf_boot`, `cf_principal_correlator` mixins +#' @examples +#' +#' data(cA2.09.48_3pi_I3_0_A1u_1_pc) +#' plot(cA2.09.48_3pi_I3_0_A1u_1_pc, log='y') +#' plot(bootstrap.effectivemass(cA2.09.48_3pi_I3_0_A1u_1_pc)) +#' +NULL + diff --git a/R/nyom_readutils.R b/R/nyom_readutils.R new file mode 100644 index 000000000..792c9db3e --- /dev/null +++ b/R/nyom_readutils.R @@ -0,0 +1,40 @@ +#' @title Convert correlation function read from nyom HDF5 format to 'raw_cf' +#' @description Given a data frame with two columns 'r' and 'i' of real and imaginary components +#' of correlation function, creates an object of class 'raw_cf' with +#' a single measurement, inferring \code{Time} from the passed numeric vector +#' while the shape of the internal dimensions has to be specified explicitly +#' if larger than `one by one` (\code{c(1,1)}). +#' @param cf_dat data frame with two columns 'r' and 'i' corresponding to the real and imaginary parts +#' of a single measurement of a correlation function. +#' Ordering of the input should internal dimensions, time (left to right, fastest to slowest). +#' @param dims Integer vector with the sizes of the internal dimensions. For example, +#' \code{c(4,4)} for spin correlators or \code{c(8,8)} for a spin-flavour correlator. +#' @param mult Numeric or complex vector of length 1 or \code{prod(dims)} to apply an element-wise +#' scaling or sign change to the correlator along the internal dimensions. +#' @return `raw_cf` object with a \code{data} member which contains the data (as complex numbers) +#' in the shape \code{c(1,nts,dims)}, where `nts` is the number of time slices +#' inferred from the length of \code{cfdat} and the product of the internal dimensions \code{dims}. +#' +#' @export +nyom_to_raw_cf <- function(cf_dat, dims = c(1,1), mult = 1.0) +{ + number_of_internal_dims <- prod(dims) + stopifnot( length(mult) == 1 | length(mult) == prod(dims) ) + + nts <- nrow(cf_dat)/number_of_internal_dims + + # turn it into a complex 4D array of with ordering of + # internal dimensions and time in the 'wrong' order + cf_dat <- array(complex(real = cf_dat$r, + imaginary = cf_dat$i)*mult, + dim = c(1,dims,nts)) + cf_dims <- dim(cf_dat) + # reshape the array, ordering 'measurement' (a single one), 'time', + # internal dimensions + cf_dat <- aperm(cf_dat, c(1,length(cf_dims),2:(length(cf_dims)-1))) + + cf <- raw_cf_data(cf = raw_cf_meta(Time=nts, dim=dims), + data = cf_dat) + return(cf) +} + diff --git a/R/onlinemeas.R b/R/onlinemeas.R index 4be213a07..8f749155e 100644 --- a/R/onlinemeas.R +++ b/R/onlinemeas.R @@ -78,6 +78,8 @@ #' result of the time series analysis for the lowest mass as carried out by #' \code{\link{uwerr}} } \item{uwerrresultmpcac}{ the result of the time series #' analysis for the PCAC mass carried out by \code{\link{uwerr}}, see details } +#' \item{uwerrresultpp}{ the results of the time series analysis for the PS mass +#' carried out via \code{\link{uwerr}}, see details } #' \item{effmass}{ effective masses in the pion channel } \item{matrix.size}{ #' size of the data matrix, copied from input } \item{boot}{ object returned by #' the call to \code{\link{boot}} if \code{method} was set correspodingly. @@ -288,6 +290,7 @@ onlinemeas <- function(data, t1, t2, res <- list(fitresult=pcacfit, fitresultpp=massfit, t1=t1, t2=t2, N=length(W[1,]), Time=Time, fitdata=data.frame(t=(jj-1), Fit=Fit[ii], Cor=Cor[ii], Err=E[ii], Chi=Chi[ii]), + uwerrresultpp=sfit.uwerrm, # central value and errors from fit to PP correl only uwerrresultmps=fit.uwerrm, uwerrresultmpcac=fit.uwerrpcac, uwerrresultfps=fit.uwerrfpi, diff --git a/R/plotutils.R b/R/plotutils.R index 06fb9560e..6dda3e86f 100644 --- a/R/plotutils.R +++ b/R/plotutils.R @@ -736,3 +736,58 @@ pointswithslantederror <- function(x, y, dx, dy, cor, col="black", bcol="black", x1=x+fac*dx, y1=y+dy, col=bcol) } + +#' make make a colour transparent +#' +#' @param col Single value of any of the three kinds of R color specifications, +#' i.e., either a color name (as listed by ‘colors()’), a +#' hexadecimal string of the form ‘"#rrggbb"’ or ‘"#rrggbbaa"’ +#' (see ‘rgb’), or a positive integer ‘i’ meaning +#' ‘palette()[i]’. +#' @param alpha Numeric in \code{range(0,1.0)} giving the level of transparency. +#' 1.0 corresponds to full opacity, while 0.0 corresponds to complete transparency. +#' @export +col2rgba <- function(col, alpha = 1.0){ + stopifnot( length(col) == 1 ) + stopifnot( length(alpha) == 1) + col_rgb <- as.vector(col2rgb(col)) + col_rgba <- rgb(red = col_rgb[1]/255.0, + green = col_rgb[2]/255.0, + blue = col_rgb[3]/255.0, + alpha = alpha) + return(col_rgba) +} + +#' draw an error band using `polygon` +#' +#' @param x Numeric vector of x values. +#' @param ymin Numeric vector of lower bounds. +#' @param ymax Numeric vector of upper bounds. +#' @param col Single value of any of the three kinds of R color specifications, +#' i.e., either a color name (as listed by ‘colors()’), a +#' hexadecimal string of the form ‘"#rrggbb"’ or ‘"#rrggbbaa"’ +#' (see ‘rgb’), or a positive integer ‘i’ meaning +#' ‘palette()[i]’. +#' @param alpha Numeric in \code{range(0,1.0)} giving the level of transparency. +#' 1.0 corresponds to full opacity, while 0.0 corresponds to complete transparency. +#' @param the colour to draw the border, see \link{polygon} for details. +#' @param ... Further parameters to be passed to \link{polygon} +#' +#' @details +#' \code{x}, \code{ymin}, \code{ymax} must all be of the same length. The +#' polygon is drawn to the current plotting device. +#' +#' @export +draw_errorband <- function(x, ymin, ymax, col, alpha, border = NA, ...){ + stopifnot( length(x) == length(ymin) ) + stopifnot( length(x) == length(ymax) ) + stopifnot( alpha >= 0.0 & alpha <= 1.0 ) + stopifnot( length(alpha) == 1 ) + stopifnot( length(col) == 1 ) + + poly_col <- col2rgba(col, alpha) + + poly_x <- c(x,rev(x)) + poly_y <- c(ymin,rev(ymax)) + polygon(x = poly_x, y = poly_y, col = poly_col, border = border, ...) +} diff --git a/R/readutils.R b/R/readutils.R index 305a30806..dc84466f0 100644 --- a/R/readutils.R +++ b/R/readutils.R @@ -211,6 +211,22 @@ getorderedconfignumbers <- function(path="./", basename="onlinemeas", last.digit #' @export readcmifiles readcmifiles <- function(files, excludelist=c(""), skip, verbose=FALSE, colClasses, obs=NULL, obs.index, avg=1, stride=1) { + + # use lapply as our default and switch to pbmclapply if available below + my_lapply <- lapply + + pbmcapply_avail <- requireNamespace('pbmcapply') + if(pbmcapply_avail){ + # when verbose output is desired, we disable the progress bar and run with a single thread + if(!verbose) { + my_lapply <- function(...){ pbmcapply::pbmclapply(ignore.interactive=TRUE, mc.preschedule=TRUE, ...) } + } else { + message("readcmifiles called with `verbose=TRUE`, I/O will be single-threaded!\n") + } + } else { + warning("pbmcapply package not found, I/O will be single-threaded!\n") + } + if(missing(files)) { stop("readcmifiles: filelist missing, aborting...\n") } @@ -227,73 +243,65 @@ readcmifiles <- function(files, excludelist=c(""), skip, verbose=FALSE, # when stride is > 1, we only read a subset of files nFilesToLoad <- as.integer(nFiles/stride) nCols <- length(tmpdata) - ## we generate the full size data.frame first - tmpdata[,] <- NA - ldata <- tmpdata - ldata[((nFilesToLoad-1)*fLength+1):(nFilesToLoad*fLength),] <- tmpdata - # create a progress bar - pb <- NULL - if(!verbose) { - pb <- txtProgressBar(min = 0, max = nFiles, style = 3) - } - for(i in c(1:nFiles)) { - # update progress bar - if(!verbose) { - setTxtProgressBar(pb, i) - } - if( !(files[i] %in% excludelist) && file.exists(files[i]) && (i-1) %% stride == 0) { - - if(verbose) { - message("Reading from file ", files[i], "\n") - } - ## read the data - tmpdata <- read.table(files[i], colClasses=colClasses, skip=skip) - if(reduce) { - tmpdata <- tmpdata[tmpdata[,obs.index] %in% obs,] - } - ## sanity checks - if(fLength < length(tmpdata$V1)) { - warning(paste("readcmifiles: file ", files[i], " does not have the same length as the other files. We will cut and hope...\n")) - } - else if(fLength > length(tmpdata$V1)) { - stop(paste("readcmifiles: file", files[i], "is too short. Aborting...\n")) + dat <- my_lapply( + X=1:nFiles, + FUN=function(i){ + if( !(files[i] %in% excludelist) && file.exists(files[i]) && (i-1) %% stride == 0) { + + if(verbose) { + message("Reading from file ", files[i], "\n") + } + ## read the data + tmpdata <- read.table(files[i], colClasses=colClasses, skip=skip) + if(reduce) { + tmpdata <- tmpdata[tmpdata[,obs.index] %in% obs,] + } + ## sanity checks + if(fLength < length(tmpdata$V1)) { + warning(paste("readcmifiles: file ", files[i], " does not have the same length as the other files. We will cut and hope...\n")) + } + else if(fLength > length(tmpdata$V1)) { + stop(paste("readcmifiles: file", files[i], "is too short. Aborting...\n")) + } + if(nCols != length(tmpdata)) { + stop(paste("readcmifiles: file", files[i], "does not have the same number of columns as the other files. Aborting...\n")) + } + + dat_idx_start <- ((i-1)/stride*fLength) + 1 + dat_idx_stop <- dat_idx_start+fLength-1 + return(tmpdata) } - if(nCols != length(tmpdata)) { - stop(paste("readcmifiles: file", files[i], "does not have the same number of columns as the other files. Aborting...\n")) + else if(!file.exists(files[i])) { + stop(paste("readcmifiles: dropped file", files[i], "because it's missing\n")) } - - dat_idx_start <- ((i-1)/stride*fLength) + 1 - dat_idx_stop <- dat_idx_start+fLength-1 - ldata[dat_idx_start:dat_idx_stop,] <- tmpdata - rm(tmpdata) - } - else if(!file.exists(files[i])) { - warning(paste("readcmifiles: dropped file", files[i], "because it's missing\n")) - } - } - if(!verbose) { - close(pb) - } + }) + + # bind the data together + ldata <- do.call(rbind, dat) # if we want to average over successive samples, we do this here if(avg > 1){ - for(i in seq(1,nFilesToLoad,by=avg)){ - out_idx_start <- (i-1)*fLength + 1 - out_idx_stop <- i*fLength - for(j in seq(i+1,i+avg-1)){ - print(j) - avg_idx_start <- (j-1)*fLength + 1 - avg_idx_stop <- j*fLength - # add the next sample to the sample that we use as an output base - ldata[out_idx_start:out_idx_stop,] <- ldata[out_idx_start:out_idx_stop,] + - ldata[avg_idx_start:avg_idx_stop,] - # invalidate the sample that we just added - ldata[avg_idx_start:avg_idx_stop,] <- NA + dat <- my_lapply( + X=seq(1,nFilesToLoad,by=avg), + FUN=function(i){ + out_idx_start <- (i-1)*fLength + 1 + out_idx_stop <- i*fLength + tmpdata <- ldata[out_idx_start:out_idx_stop,] + for(j in seq(i+1,i+avg-1)){ + avg_idx_start <- (j-1)*fLength + 1 + avg_idx_stop <- j*fLength + # add the next sample to the sample that we use as an output base + tmpdata <- tmpdata + + ldata[avg_idx_start:avg_idx_stop,] + } + # take the average over the samples + tmpdata <- tmpdata/avg + return(tmpdata) } - # take the average over the samples - ldata[out_idx_start:out_idx_stop,] <- ldata[out_idx_start:out_idx_stop,]/avg - } + ) + # bind the data together + ldata <- do.call(rbind,dat) } ## remove NAs from missing files ldata <- na.omit(ldata) @@ -1161,23 +1169,35 @@ readcmidisc <- function(files, obs=9, ind.vec=c(2,3,4,5,6,7,8), #' definition (at flow time t) \item tsqEplaq - flow time squared multiplied by #' plaquette energy density \item tsqEsym - flow time squared multiplied by #' clover energy density \item Wsym - BMW 'w(t)' observable }. -#' @author Bartosz Kostrzewa, \email{bartosz.kostrzewa@@desy.de} +#' @author Bartosz Kostrzewa, \email{kostrzewab@@informatik.uni-bonn.de} #' @keywords file #' @examples #' #' path <- system.file("extdata/", package="hadron") -#' raw.gf <- readgradflow(path) +#' raw.gf <- read_tmlqcd_gradflow(path) #' -#' @export readgradflow -readgradflow <- function(path, skip=0, basename="gradflow", col.names) { +#' @export +read_tmlqcd_gradflow <- function(path, skip=0, basename="gradflow", col.names) { files <- getorderedfilelist(path=path, basename=basename, last.digits=6) # the trajectory numbers deduced from the filename gaugeno <- getconfignumbers(files, basename=basename, last.digits=6) files <- files[(skip+1):length(files)] if(length(files)==0) stop(sprintf("readgradflow: no tmlqcd gradient flow files found in path %s",path)) + + missing_colnames <- missing(col.names) + + # use lapply as our default and switch to pbmclapply if available below + my_lapply <- lapply + + pbmcapply_avail <- requireNamespace('pbmcapply') + if(pbmcapply_avail){ + my_lapply <- function(...){ pbmcapply::pbmclapply(ignore.interactive=TRUE, mc.preschedule=TRUE, ...) } + } else { + warning("pbmcapply package not found, I/O will be single-threaded!\n") + } tmpdata <- data.frame() - if(missing(col.names)) { + if(missing_colnames) { tmpdata <- read.table(file=files[1],colClasses="numeric",header=TRUE,stringsAsFactors=FALSE) } else { @@ -1189,34 +1209,43 @@ readgradflow <- function(path, skip=0, basename="gradflow", col.names) { fLength <- length(tmpdata$t) nFiles <- length(files) nCols <- ncol(tmpdata) - ## we generate the full size data.frame first + + ## keep around tmpdata for reasons which will become apparent below tmpdata[,] <- NA - ldata <- tmpdata - ldata[((nFiles-1)*fLength+1):(nFiles*fLength),] <- tmpdata - rm(tmpdata) - pb <- NULL - pb <- txtProgressBar(min = 0, max = length(files), style = 3) - for( i in 1:length(files) ){ - setTxtProgressBar(pb, i) - tmp <- data.frame() - if(missing(col.names)) { - tmp <- read.table(file=files[i], colClasses="numeric", header=TRUE, stringsAsFactors=FALSE) - } - else { - tmp <- read.table(file=files[i], colClasses="numeric", col.names=col.names, stringsAsFactors=FALSE) - } - if(is.null(tmp$traj)) tmp$traj <- gaugeno[i] - # the tmlqcd gradient flow routine has the tendency to crash, so we check if the files - # are complete - if( dim( tmp )[1] != fLength ) { - warning(sprintf("For file %s, number of rows is not correct: %d instead of %d\n",files[i],dim(tmp)[1],fLength) ) - ldata[((i-1)*fLength+1):(i*fLength),] <- NA - } else { - ldata[((i-1)*fLength+1):(i*fLength),] <- tmp + dat <- my_lapply( + X=1:nFiles, + FUN=function(i){ + tmp <- data.frame() + if(missing_colnames) { + tmp <- read.table(file=files[i], colClasses="numeric", header=TRUE, stringsAsFactors=FALSE) + } + else { + tmp <- read.table(file=files[i], colClasses="numeric", col.names=col.names, stringsAsFactors=FALSE) + } + if(is.null(tmp$traj)) tmp$traj <- gaugeno[i] + # the tmlqcd gradient flow routine has the tendency to crash, so we check if the files + # are complete and return an array containing NAs if they are not + if( dim( tmp )[1] != fLength ) { + tmpdata$traj <- gaugeno[i] + return(tmpdata) + } else if ( dim(tmp)[2] != nCols ){ + tmpdata$traj <- gaugeno[i] + return(tmpdata) + } else { + return(tmp) + } + }) + ## issue post-fact warnings on dimensions to not mess up return value of lapply above + for( i in 1:nFiles ){ + if( any(is.na(dat[[i]])) ){ + warning(sprintf("For file %s, number of rows and columns did not match expectations (should be: %d rows, %d columns)! The file was skipped.", + files[i], fLength, nCols)) } } - close(pb) + + ## paste data together + ldata <- do.call(rbind,dat) # keep only rows which contain all data ldata <- ldata[complete.cases(ldata),] diff --git a/R/removeTemporal.cf.R b/R/removeTemporal.cf.R index 016fa335b..5a3ae40b4 100644 --- a/R/removeTemporal.cf.R +++ b/R/removeTemporal.cf.R @@ -291,24 +291,6 @@ dispersion_relation <- function (energy, momentum_d, extent_space, plus = TRUE, return (energy_out) } -#' generic function to extract a fitted mass -#' -#' @description -#' One of the main analysis tasks in \link{hadron} is the estimation -#' of energy levels or masses from correlation functions. The -#' corresponding analysis functions return objects, typically lists, -#' containing the masses or energy levels. `extract_mass` is a -#' generic function to extrac such fitted mass values. -#' -#' @param object Object to extract the mass from. -#' -#' @return Numeric. The mass value. -#' -#' @export -extract_mass <- function (object) { - UseMethod('extract_mass') -} - #' specialisation of \link{extract_mass} to objects of type #' `effectivemassfit` #' diff --git a/R/timeseries.R b/R/timeseries.R index 536fad8d9..c0cbb68b9 100644 --- a/R/timeseries.R +++ b/R/timeseries.R @@ -1,3 +1,741 @@ +#' Container for arbitrary timeseries +#' +#' @family tseries constructors +#' +#' @return +#' returns an object of S3 class `tseries` derived from a `list` +#' +#' @examples +#' new_tseries <- tseries() +#' +#' @export +tseries <- function() { + tseries <- list() + class(tseries) <- append(class(tseries), 'tseries') + return(tseries) +} + +#' Original data tseries mixin constructor +#' +#' @param .tseries `tseries` object to extend +#' @param data Tidy data frame representing an observable or set of observables +#' of arbitrary dimensionality indexed by a "simulation time" index `md_idx`. +#' At the minimum, `data` must have two columns, one of which is named +#' `md_idx`. +#' If, for example, we are dealing with two fields, one could store their values +#' in the columns `f1` and `f2`. If these fields are not scalar, their values +#' must be indexed by further variables, see `explanatory_vars`. +#' Note that it will be internally converted to a \link{tibble}. +#' +#' @param explanatory_vars Vector of strings, names of the explanatory variables +#' in `data`. If, for example, we are dealing with a 3D field which varies in time, +#' the `explanatory_vars` might by `x`, `y` and `z`. +#' +#' @family tseries constructors +#' +#' @return +#' returns the input object of class `tseries`, extended with the elements `data` +#' and `explanatory_vars`. +#' +#' @examples +#' xyz_md <- expand.grid(x = 0:9, y = 0:9, z = 0:9, md_idx = 1:100) +#' new_tseries <- tseries_orig(data = cbind(xyz_md, val = rnorm(nrow(xyz_md))), +#' explanatory_vars = c("x", "y", "z") ) +#' +#' @export +tseries_orig <- function(.tseries = tseries(), data, explanatory_vars = NULL) { + stopifnot(inherits(.tseries, 'tseries')) + stopifnot("md_idx" %in% colnames(data)) + stopifnot(ncol(data) >= 2) + + if( !is.null(explanatory_vars) ){ + stopifnot( all(explanatory_vars %in% colnames(data) ) ) + } + + .tseries$data <- tibble::as_tibble(data) + .tseries$explanatory_vars <- explanatory_vars + + class(.tseries) <- append(class(.tseries), 'tseries_orig') + return(.tseries) +} + +#' Bootstrap tseries mixin constructor +#' +#' @details +#' Mixin constructor to add bootstrap samples to a `tseries` object. +#' +#' @param .tseries `tseries` object +#' @param boot.R Integer, number of bootstrap samples used. +#' @param boot.l Integer, block length in the time-series bootstrap process. +#' @param seed Integer, random number generator seed used in bootstrap. +#' @param sim String, `sim` argument of \link[boot]{tsboot}. +#' @param endcorr Boolean, `endcorr` argument of \link[boot]{tsboot}. +#' @param central Numeric or complex, means of the data. +#' @param samples Numeric or complex vector, the resampling samples. +#' @param resampling_method String, either 'bootstrap' or 'jackknife' +#' @param sample_idcs Two-dimensional array which has the values of `md_idx` +#' for each bootstrap sample as its row. The number of rows should +#' correspond to boot.R, while the number of columns should correspond +#' to the number of measurements for the original data that the +#' bootstrap samples are based on. Can be \code{NULL} when dealing +#' with parametric bootstrap or quantities which arise when combining +#' data from different statistical ensembles. +#' +#' @family tseries constructors +#' +#' @return +#' return the input object of class `tseries` with the bootstrap mixin added +#' which supplies the members +#' \itemize{ +#' \item{boot.R: }{see params} +#' \item{boot.l: }{see params} +#' \item{seed: }{see params} +#' \item{sim: }{see params} +#' \item{endcorr: }{see params} +#' \item{resampling_method: }{see params} +#' \item{samples: }{see params} +#' \item{sample_idcs: }{see params} +#' \item{central: }{central values of the original data or parametric central values} +#' \item{se: }{estimates of the standard error of the means} +#' \item{error_fn: }{function appropriate to compute the statistical error depending on \code{resampling_method}} +#' \item{cov_fn: }{function appropriate to compute the covariance matrix depending on \code{resampling_method}} +#' } +#' +#' @export +tseries_boot <- function(.tseries = tseries(), boot.R, boot.l, seed, sim, endcorr, + samples, central, resampling_method, + explanatory_vars = NULL, + sample_idcs = NULL){ + stopifnot(inherits(.tseries, 'tseries')) + stopifnot(resampling_method %in% c("bootstrap", "jackknife")) + + .tseries$boot.R <- boot.R + .tseries$boot.l <- boot.l + .tseries$seed <- seed + .tseries$sim <- sim + .tseries$endcorr <- endcorr + .tseries$sample_idcs <- sample_idcs + + if( is.null(.tseries$explanatory_vars) ){ + .tseries$explanatory_vars <- explanatory_vars + } + + if ( resampling_method == "bootstrap" ) { + .tseries$error_fn <- sd + .tseries$cov_fn <- cov + } + else if ( reampling_method == "jackknife" ) { + .tseries$error_fn <- function(...) jackknife_error(..., boot.l = boot.l) + .tseries$cov_fn <- jackknife_cov + } + + .tseries$resampling_method <- resampling_method + + .tseries$samples <- samples + .tseries$central <- central + .tseries$se <- tseries_apply_reduction(data = .tseries$samples, + reduction = .tseries$error_fn, + explanatory_vars = .tseries$explanatory_vars) + + class(.tseries) <- append(class(.tseries), 'tseries_boot') + return(.tseries) +} + +tseries_apply_reduction <- function(data, reduction, explanatory_vars = NULL) { + + data_ref <- data + not_across_vars <- c("md_idx", "sample_idx") + # if the data frame represents multi-dimensional data, we group on these indices + if( !is.null(explanatory_vars) ) { + data_ref <- dplyr::group_by_at(data, explanatory_vars) + not_across_vars <- c(not_across_vars, explanatory_vars) + } + + # and now we compute the result of the function on these groups + return( + dplyr::summarise( + data_ref, + dplyr::across(.cols = dplyr::setdiff(colnames(data_ref), not_across_vars), + .fns = reduction), + .groups = "drop" + ) + ) + +} + + +#' bootstrap a `tseries` object containing original data +#' +#' @param .tseries `tseries` object with `tseries_orig` mixin +#' @param boot.R Intenger, number of bootstrap samples to use. +#' @param boot.l Integer, block length to use in bootstrap procedure. +#' @param seed Integer, random number generator seed to be used in bootstrap. +#' @param sim String, `sim` argument passed to \link[boot]{tsboot}. +#' @param endcorr Boolean, `endcorr` argument passed to \link[boot]{tsboot}. +#' @param serial Boolean, whether to disable the use of \link[parallel]{mclapply}. +#' +#' @return `tseries` object with the additional members +#' \itemize{ +#' } +#' +#' @examples +#' xyz_md <- expand.grid(x = 0:9, y = 0:9, z = 0:9, md_idx = 1:100) +#' new_tseries <- tseries_orig(data = cbind(xyz_md, val = rnorm(nrow(xyz_md))), +#' explanatory_vars = c("x", "y", "z") ) +#' +#' bs_tseries <- bootstrap.tseries(new_tseries, +#' boot.R = 500, boot.l = 5, seed = 12345, +#' sim = 'geom', endcorr = TRUE, serial = FALSE) +#' +#' @export +bootstrap.tseries <- function(.tseries, + boot.R, boot.l, seed, sim, endcorr, + serial = FALSE, progress = FALSE) { + + dplyr_version_required <- "1.0.0" + dplyr_avail <- requireNamespace("dplyr", versionCheck = list(op = ">=", version = dplyr_version_required)) + if( !dplyr_avail ){ + stop(sprintf("The 'dplyr' package in version >= %s is required to use this function!\n", + dplyr_version_required)) + } + + parallel_avail <- requireNamespace("parallel") + if( !parallel_avail & serial == FALSE ){ + stop(sprintf("The 'parallel' package is required to use this function with `serial = FALSE`!\n")) + } + + pbmcapply_avail <- requireNamespace("pbmcapply") + if( serial == FALSE & progress & !pbmcapply_avail ){ + message("'pbmcapply' package not found, there will be no progress information of the bootstrap.\n") + } + + grr_avail <- requireNamespace("grr") + if( !grr_avail ){ + stop("The 'grr' package is required to use this function!\n") + } + + if( serial ){ + my_lapply <- lapply + } else { + if( pbmcapply_avail & progress ){ + my_lapply <- pbmcapply::pbmclapply + } else { + my_lapply <- parallel::mclapply + } + } + + stopifnot(inherits(.tseries, 'tseries_orig')) + + boot.l <- ceiling(boot.l) + boot.R <- floor(boot.R) + + stopifnot(boot.l >= 1) + stopifnot(boot.l <= length(unique(.tseries$data$md_idx))) + stopifnot(boot.R >= 1) + + old_seed <- swap_seed(seed) + # unlike in other uses of tsboot in hadron, here we bootstrap the + # `md_idx` + sample_idcs <- boot::tsboot(unique(.tseries$data$md_idx), + statistic = function(x){ x }, + R = boot.R, + l = boot.l, + sim = sim, + endcorr = endcorr)$t + restore_seed(old_seed) + + ts_samples <- my_lapply(X = 1:nrow(sample_idcs), + FUN = function(row_idx) + { + idcs <- sample_idcs[row_idx,,drop=TRUE] + # compute the row indices corresponding to the bootstrap sample indices + # it is very surprising how slow this actually is, but considering the fact + # that no matter how we do this, we have some + # nrow(sample_idcs)*ncol(sample_idcs)*length(.tseries$data$md_idx) + # comparisons, the slowness is not surprising + # Seems like we've found a good reason to NOT use the long table format + # for this kind of timeseries data... + + # BaKo: this is slower ... + # df_sample_idcs <- unlist(lapply(idcs, function(x){ which(x == .tseries$data$md_idx) })) + + # BaKo: ... than this ... + # df_sample_idcs <- as.vector(apply(X = array(idcs, dim=c(1,length(idcs))), + # MARGIN = 2, + # FUN = function(x){ which( x[1] == .tseries$data$md_idx ) })) + + # BaKo: ... and this seems to be the fastest solution which actually makes it somewhat bearable + # for real datasets with hundreds of thousands of lines + df_sample_idcs <- grr::matches(x = idcs, + y = .tseries$data$md_idx, + all.y = FALSE, + all.x = FALSE, + indexes = TRUE)$y + + # we resample the data frame, this involves copies of potentially large + # data structures but I don't see a way around for now + return( + dplyr::mutate( + tseries_apply_reduction(data = dplyr::slice(.data = .tseries$data, df_sample_idcs), + reduction = mean, + explanatory_vars = .tseries$explanatory_vars), + sample_idx = row_idx + ) + ) + } + ) # lapply + ts_samples <- do.call(rbind, ts_samples) + + ts_central <- tseries_apply_reduction(data = .tseries$data, + reduction = mean, + explanatory_vars = .tseries$explanatory_vars) + + return( + tseries_boot(.tseries, + boot.R = boot.R, + boot.l = boot.l, + seed = seed, + sim = sim, + endcorr = endcorr, + central = ts_central, + samples = ts_samples, + resampling_method = "bootstrap", + sample_idcs = sample_idcs) + ) +} + +#' Check whether the resampling of two tseries objects is compatible +#' +#' @param x, y `tseries` objects with `tseries_boot` +#' +#' @details +#' Checks whether binary operations such as addition can be performed on the +#' resampling samples of `x` and `y`. +#' +#' @return +#' List of named booleans for each of the checked conditions. Since compatibility +#' may depend on the context (for example, if the samples are derived from the +#' same statistical ensembles or from different ones), it is up to the +#' calling code to finally decide if an operation should be performed or not. +#' The list contains the elements +#' \itemize{ +#' \item{boot: }{whether both objects had resampling samples} +#' \item{seed: }{whether the seeds were the same} +#' \item{boot.R: }{whether the \code{boot.R} parameter was the same} +#' \item{boot.l: }{whether the \code{boot.l} parameter was the same} +#' \item{sim: }{whether the same type of simulation was used} +#' \item{endcorr: }{whether the same end corrections were applied} +#' \item{resampling_method: }{whether the same resampling method was used} +#' \item{explanatory_vars: }{whether the names of the explanatory variables are the same (vector, one boolean per var!)} +#' \item{samples_dims: }{whether the `samples` members have the same dimensions (vector, one boolean per dim!)} +#' \item{sample_idcs: }{whether the `sample_idcs` members are the same (array-valued, unless either of them was \code{NULL})} +#' } +#' +#' @export +resampling_is_compatible.tseries <- function(x, y){ + res <- list() + res$boot <- ( inherits(x, 'tseries_boot') & inherits(y, 'tseries_boot') ) + res$seed <- ( x$seed == y$seed ) + res$boot.R <- ( x$boot.R == y$boot.R ) + res$boot.l <- ( x$boot.l == y$boot.l ) + res$sim <- ( x$sim == y$sim ) + res$endcorr <- ( x$endcorr == y$endcorr ) + res$resampling_method <- ( x$resampling_method == y$resampling_method ) + res$expanatory_vars <- ( x$explanatory_vars == y$explanatory_vars ) + res$samples_dims <- ( dim(x$samples) == dim(y$samples) ) + if( is.null(x$sample_idcs) | is.null(y$sample_idcs) ){ + res$sample_idcs <- FALSE + } else { + res$sample_idcs <- ( x$sample_idcs == y$sample_idcs ) + } + return(res) +} + +#' Arithmetically add two timeseries +#' +#' @param x, y `tseries` objects with `tseries_boot` +#' +#' @return +#' The value is +#' \deqn{x + y \,.} +#' +#' @export +'+.tseries' <- function(x, y) { + apply_binary_elementwise.tseries(x, y, `+`) +} + +#' Arithmetically subtract two timeseries +#' +#' @param x, y `tseries` objects with `tseries_boot` +#' +#' @return +#' The value is +#' \deqn{x - y \,.} +#' +#' @export +'-.tseries' <- function(x, y) { + apply_binary_elementwise.tseries(x, y, `-`) +} + +#' Arithmetically multiply two timeseries +#' +#' @param x, y `tseries` objects with `tseries_boot` +#' +#' @return +#' The value is +#' \deqn{x * y \,.} +#' +#' @export +'*.tseries' <- function(x, y) { + apply_binary_elementwise.tseries(x, y, `*`) +} + +#' Arithmetically divide two timeseries +#' +#' @param x, y `tseries` objects with `tseries_boot` +#' +#' @return +#' The value is +#' \deqn{x / y \,.} +#' +#' @export +'/.tseries' <- function(x, y) { + apply_binary_elementwise.tseries(x, y, `/`) +} + +#' apply a unary function element-wise to a `tseries` object +#' +#' @param ts `tseries` object with the `tseries_boot` mixin. +#' @param fn Function to be applied. +#' +#' @return +#' The `tseries` object with the function applied to `samples` and `central`. +#' `se` is updated and if the original \code{ts} had the `tseries_orig` mixin, +#' it is removed and the `data` field set to \code{NULL}. +#' +#' @export +apply_unary_elementwise.tseries <- function(ts, fn){ + + dplyr_version_required <- "1.0.0" + dplyr_avail <- requireNamespace("dplyr", versionCheck = list(op = ">=", version = dplyr_version_required)) + if( !dplyr_avail ){ + stop(sprintf("The 'dplyr' package in version >= %s is required to use this function!\n", + dplyr_version_required)) + } + + stopifnot(inherits(ts, 'tseries_boot')) + stopifnot('function' %in% class(fn)) + + obs_vars <- dplyr::setdiff(colnames(ts$samples), c(ts$explanatory_vars, "sample_idx")) + + samples <- dplyr::mutate(ts$samples, + dplyr::across(.cols = obs_vars, .fns = fn)) + central <- dplyr::mutate(ts$central, + dplyr::across(.cols = obs_vars, .fns = fn)) + + return( + tseries_boot(samples = samples, + central = central, + boot.R = ts$boot.R, + boot.l = ts$boot.l, + seed = ts$seed, + sim = ts$sim, + endcorr = ts$endcorr, + resampling_method = ts$resampling_method, + sample_idcs = ts$sample_idcs, + explanatory_vars = ts$explanatory_vars) + ) +} + +#' @export +apply_binary_elementwise.tseries <- function(x, y, `%op%`) { + stopifnot(inherits(x, 'tseries_boot')) + stopifnot(inherits(y, 'tseries_boot')) + + res_compat <- resampling_is_compatible(x, y) + stopifnot( res_compat$boot ) + stopifnot( res_compat$resampling_method ) + stopifnot( all(res_compat$explanatory_vars) ) + stopifnot( all(res_compat$samples_dims) ) + + # In most applications, the two timeseries that we're combining come + # from the same statistical ensemble. + # However, we may also be in the situation where we want to combine + # timeseries from different statistical ensembles. + # In those cases, it is only the dimensions of the resampling + # samples which are relevant. + # Other details such as `boot.l`, `sim`, `endcorr` get lost. + samples <- x$samples + + obs_vars <- dplyr::setdiff(colnames(samples), c(x$explanatory_vars, "sample_idx")) + + samples[,obs_vars] <- samples[,obs_vars] %op% y$samples[,obs_vars] + + central <- x$central + central[,obs_vars] <- central[,obs_vars] %op% y$central[,obs_vars] + + # we need this style because `ifelse` can't deal with NULL in either + # of its branches + if( all(res_compat$sample_idcs) ){ + res_sample_idcs <- x$sample_idcs + } else { + res_sample_idcs <- NULL + } + + return( + tseries_boot(boot.R = x$boot.R, + boot.l = ifelse(res_compat$boot.l, x$boot.l, NA), + sim = ifelse(res_compat$sim, x$sim, NA), + endcorr = ifelse(res_compat$endcorr, x$endcorr, NA), + seed = ifelse(res_compat$seed, x$seed, NA), + samples = samples, + central = central, + explanatory_vars = x$explanatory_vars, + sample_idcs = res_sample_idcs, + resampling_method = x$resampling_method) + ) +} + +#' @export +apply_reduce_plan.tseries <- function(ts, reduce_vars, plan){ + stopifnot( inherits(ts, "tseries_boot") | inherits(ts, "tseries_orig") ) + stopifnot( all( reduce_vars %in% ts$explanatory_vars ) ) + + remaining_explanatory_vars <- dplyr::setdiff(ts$explanatory_vars, reduce_vars) + + # this function can work either on the original data or the samples + # and central values + # on the original data, the reduction is applied *per measurement* + # on the samples, the reduction is applied *per sample* + # of course, it is the caller who has to decide if a *per measurement* + # evaluation is a valid thing to do + dat <- list() + if( inherits(ts, "tseries_boot") ){ + sample_vars <- c("sample_idx", remaining_explanatory_vars) + # group_by_at doesn't mind NULL or empty groupings + dat[["central"]] <- dplyr::group_by_at(ts$central, remaining_explanatory_vars) + dat[["samples"]] <- dplyr::group_by_at(ts$samples, sample_vars) + } + if( inherits(ts, "tseries_orig") ){ + md_vars <- c("md_idx", remaining_explanatory_vars) + dat[["orig"]] <- dplyr::group_by_at(ts$data, md_vars) + } + + out <- lapply( + X = dat, + FUN = function(x){ + # for each element of the reduction plan, we create a new summary variable + # accordining to the name of the element of the plan list + # and the expression for this element + res_lst <- lapply( + X = names(plan), + FUN = function(out_var){ + dplyr::summarise( + x, + !!as.character(out_var) := eval(plan[[out_var]]), + .groups = "drop" + ) + } + ) + Reduce(auto_full_join,res_lst) + } + ) + names(out) <- names(dat) + + # special treatment here because ifelse cannot deal with `NULL` in any of its branches + if( length(remaining_explanatory_vars) == 0 ){ + res_explanatory_vars <- NULL + } else { + res_explanatory_vars <- remaining_explanatory_vars + } + + if( !inherits(ts, "tseries_boot") ){ + return( tseries_orig(data = out$orig, + explanatory_vars = res_explanatory_vars) ) + } else { + if( inherits(ts, "tseries_orig") ){ + ts_base <- tseries_orig(data = out$orig, + explanatory_vars = res_explanatory_vars) + } else { + ts_base = tseries() + } + return( + tseries_boot(.tseries = ts_base, + boot.R = ts$boot.R, + boot.l = ts$boot.l, + seed = ts$seed, + sim = ts$sim, + endcorr = ts$endcorr, + samples = out$samples, + central = out$central, + resampling_method = ts$resampling_method, + explanatory_vars = res_explanatory_vars, + sample_idcs = ts$sample_idcs) + ) + } +} + +#' @export +apply_transmute_plan.tseries <- function(ts, plan){ + stopifnot( inherits(ts, "tseries_boot") ) + + dat <- list() + dat[["central"]] <- ts$central + dat[["samples"]] <- dplyr::group_by_at(ts$samples, "sample_idx") + + # we save some typing here by using a list for the central values and samples + # for each element of this list, we apply the mutation plan + out <- lapply( + X = dat, + FUN = function(x){ + # for each element of the reduction plan, we create a new variable + # accordining to the name of the element of the plan list + # and the expression for this element + res_lst <- lapply( + X = names(plan), + FUN = function(out_var){ + dplyr::ungroup( + dplyr::transmute( + x, + !!as.character(out_var) := eval(plan[[out_var]]) + ) + ) + } + ) + Reduce(auto_full_join,res_lst) + } + ) + names(out) <- names(dat) + + return( + tseries_boot(boot.R = ts$boot.R, + boot.l = ts$boot.l, + seed = ts$seed, + sim = ts$sim, + endcorr = ts$endcorr, + samples = out$samples, + central = out$central, + resampling_method = ts$resampling_method, + explanatory_vars = ts$explanatory_vars, + sample_idcs = ts$sample_idcs) + ) +} + +apply_binary_mutate_plan.tseries <- function(x, y, `%op%`, plan){ + stopifnot(inherits(x, 'tseries_boot')) + stopifnot(inherits(y, 'tseries_boot')) + stopifnot( all(c("vars_out", "x_vars", "y_vars") %in% names(plan)) ) + + scale1 <- rep(1.0, nrow(plan)) + if( "x_scale" %in% names(plan) ){ + scale1 <- plan$x_scale + } + scale2 <- rep(1.0, nrow(plan)) + if( "y_scale" %in% names(plan) ){ + scale2 <- plan$y_scale + } + + res_compat <- resampling_is_compatible(x, y) + stopifnot( res_compat$boot ) + stopifnot( res_compat$resampling_method ) + stopifnot( all(res_compat$explanatory_vars) ) + + stopifnot( all(plan$x_vars %in% names(x$samples)) ) + stopifnot( all(plan$y_vars %in% names(y$samples)) ) + + stopifnot( all(plan$x_vars %in% names(x$central)) ) + stopifnot( all(plan$y_vars %in% names(y$central)) ) + + sample_vars <- c("sample_idx", x$explanatory_vars) + samples <- x$samples[,sample_vars] + central <- x$central[,x$explanatory_vars] + + for( idx in 1:nrow(plan) ){ + samples <- + dplyr::mutate( + samples, + !!as.character(plan$vars_out[idx]) := (scale1[idx] * x$samples[[ plan$x_vars[idx] ]]) %op% + (scale2[idx] * y$samples[[ plan$y_vars[idx] ]]) + ) + + central <- + dplyr::mutate( + central, + !!as.character(plan$vars_out[idx]) := (scale1[idx] * x$central[[ plan$x_vars[idx] ]]) %op% + (scale2[idx] * y$central[[ plan$y_vars[idx] ]]) + ) + } + + # we need this style because `ifelse` can't deal with NULL in either + # of its branches + if( all(res_compat$sample_idcs) ){ + res_sample_idcs <- x$sample_idcs + } else { + res_sample_idcs <- NULL + } + return( + tseries_boot(boot.R = x$boot.R, + boot.l = ifelse(res_compat$boot.l, x$boot.l, NA), + sim = ifelse(res_compat$sim, x$sim, NA), + endcorr = ifelse(res_compat$endcorr, x$endcorr, NA), + seed = ifelse(res_compat$seed, x$seed, NA), + samples = samples, + central = central, + explanatory_vars = x$explanatory_vars, + sample_idcs = res_sample_idcs, + resampling_method = x$resampling_method) + ) +} + +#' @export +add.tseries <- function(x, y, plan){ + apply_binary_mutate_plan.tseries(x, y, `+`, plan) +} + +#' @export +subtract.tseries <- function(x, y, plan){ + apply_binary_mutate_plan.tseries(x, y, `-`, plan) +} + +#' @export +mul.tseries <- function(x, y, plan){ + apply_binary_mutate_plan.tseries(x, y, `*`, plan) +} + +#' @export +div.tseries <- function(x, y, plan){ + apply_binary_mutate_plan.tseries(x, y, `/`, plan) +} + +#' @export +subset.tseries <- function(x, subset) { + stopifnot( inherits(x, "tseries") ) + stopifnot( inherits(x, "tseries_orig") | inherits(x, "tseries_boot") ) + + x_new <- x + + if( inherits(x, "tseries_orig") ){ + cols_keep <- c("md_idx", x$explanatory_vars, subset) + stopifnot( all(subset %in% colnames(x$data) ) ) + x_new$data <- x$data[,cols_keep] + } + + if( inherits(x, "tseries_boot") ){ + stopifnot( all(subset %in% colnames(x$samples) ) ) + stopifnot( all(subset %in% colnames(x$central) ) ) + stopifnot( all(subset %in% colnames(x$se) ) ) + + cols_keep <- c(x$explanatory_vars, subset) + x_new$central <- x$central[,cols_keep] + x_new$se <- x$se[,cols_keep] + + cols_keep <- c("sample_idx", cols_keep) + x_new$samples <- x$samples[,cols_keep] + } + + return(x_new) +} + #' plot_timeseries #' #' @description @@ -22,6 +760,9 @@ #' @param errorband_color String. Colour of the error band. #' @param type String. Plot type, see \link{plot} for details. #' @param uwerr.S Numeric. `S` of the \link{uwerr} method to be used. +#' @param time_factor Numeric. Factor by which any auto-correlation times shoud be +#' multiplied. Used, for example, when running non-unit-length +#' trajectories or employing strided measurements. #' @param periodogram Boolean. Whether to show a periodogram. #' @param debug Boolean. Generate debug output. #' @param uw.summary Boolean. Generate an \link{uwerr} summary plot. @@ -34,22 +775,27 @@ #' @export plot_timeseries <- function(dat, ylab, plotsize, titletext, hist.by, - stat_range = c(1.0, length(dat$y)), + stat_range = c(1, length(dat$y)), pdf.filename, name="", xlab="$t_\\mathrm{MD}$", hist.probs=c(0.0,1.0), errorband_color=rgb(0.6,0.0,0.0,0.6), type='l', - uwerr.S=2, + uwerr.S=1.5, + time_factor=1.0, smooth_density=FALSE, periodogram=FALSE,debug=FALSE,uw.summary=TRUE,...) { - + + stopifnot( ! is.null(dat$y) ) + stopifnot( ! is.null(dat$t) ) stopifnot(length(stat_range) == 2) stopifnot(length(hist.probs) == 2) yrange <- range(dat$y) stat_y <- dat$y[ seq(stat_range[1], stat_range[2]) ] - + + dat$t <- dat$t*time_factor + uw.data <- uwerrprimary(stat_y, S=uwerr.S) if(debug) { print(paste("uw.",name,sep="")) @@ -74,11 +820,13 @@ plot_timeseries <- function(dat, xlab=xlab, main=titletext, ...) - rect(xleft=dat$t[ stat_range[1] ], - xright=dat$t[ stat_range[2] ], - ytop=uw.data$value+uw.data$dvalue, - ybottom=uw.data$value-uw.data$dvalue, - border=FALSE, col=errorband_color) + if( abs(uw.data$dvalue) < 5*abs(uw.data$value) ){ + rect(xleft=dat$t[ stat_range[1] ], + xright=dat$t[ stat_range[2] ], + ytop=uw.data$value+uw.data$dvalue, + ybottom=uw.data$value-uw.data$dvalue, + border=FALSE, col=errorband_color) + } abline(h=uw.data$value,col="black",lwd=2) legend(x="topright", @@ -86,14 +834,15 @@ plot_timeseries <- function(dat, ylab, tex.catwitherror(x=uw.data$value, dx=uw.data$dvalue, - digits=3, + digits=2, with.dollar=FALSE, - with.cdot=FALSE) + with.cdot=FALSE) ), lty=1, pch=NA, col="red", - bty='n') + bty='n', + cex = 0.8) # plot the corresponding histogram hist.data <- NULL @@ -131,11 +880,13 @@ plot_timeseries <- function(dat, ytop <- max(hist.data$counts) ybottom <- 0.0 } - rect(ytop=ytop, - ybottom=0, - xright=uw.data$value+uw.data$dvalue, - xleft=uw.data$value-uw.data$dvalue, - border=FALSE, col=errorband_color) + if( abs(uw.data$dvalue) < 5*abs(uw.data$value) ){ + rect(ytop=ytop, + ybottom=0, + xright=uw.data$value+uw.data$dvalue, + xleft=uw.data$value-uw.data$dvalue, + border=FALSE, col=errorband_color) + } abline(v=uw.data$value, col="black", lwd=2) # and a periodogram @@ -156,8 +907,8 @@ plot_timeseries <- function(dat, tikz.finalize(tikzfiles) } - return(t(data.frame(val=uw.data$value, dval=uw.data$dvalue, tauint=uw.data$tauint, - dtauint=uw.data$dtauint, Wopt=uw.data$Wopt, stringsAsFactors=FALSE))) + return(t(data.frame(val=uw.data$value, dval=uw.data$dvalue, tauint=uw.data$tauint*time_factor, + dtauint=uw.data$dtauint*time_factor, Wopt=uw.data$Wopt*time_factor, stringsAsFactors=FALSE))) } #' plot_eigenvalue_timeseries @@ -172,6 +923,9 @@ plot_timeseries <- function(dat, #' @param filelabel String. Label of the file. #' @param titletext Text in the plot title. #' @param stat_range range of statistics to use. +#' @param time_factor Numeric. Factor by which any auto-correlation times shoud be +#' multiplied. Used, for example, when running non-unit-length +#' trajectories or employing strided measurements. #' @param pdf.filename String. PDF filename. #' @param errorband_color String. Colour of the error band. #' @param debug Boolean. Generate debug output. @@ -183,6 +937,7 @@ plot_timeseries <- function(dat, #' @export plot_eigenvalue_timeseries <- function(dat, stat_range, + time_factor = 1.0, ylab, plotsize, filelabel,titletext, pdf.filename, errorband_color=rgb(0.6,0.0,0.0,0.6), @@ -190,6 +945,8 @@ plot_eigenvalue_timeseries <- function(dat, if( missing(stat_range) ) { stat_range <- c(1,nrow(dat)) } yrange <- range(dat[,2:5]) + dat$t <- dat$t*time_factor + stat_min_ev <- dat[ seq(stat_range[1], stat_range[2]), "min_ev" ] stat_max_ev <- dat[ seq(stat_range[1], stat_range[2]), "max_ev" ] @@ -226,27 +983,35 @@ plot_eigenvalue_timeseries <- function(dat, # plot the corresponding histograms with error bands and mean values hist.min_ev <- hist(stat_min_ev, main=paste("min. eval",titletext),xlab="min. eval",tcl=0.02) - rect(ytop=max(hist.min_ev$counts), - ybottom=0, - xright=uw.min_ev$value+uw.min_ev$dvalue, - xleft=uw.min_ev$value-uw.min_ev$dvalue,border=FALSE,col=errorband_color) + if( abs(uw.min_ev$dvalue) < 5*abs(uw.min_ev$value) ){ + rect(ytop=max(hist.min_ev$counts), + ybottom=0, + xright=uw.min_ev$value+uw.min_ev$dvalue, + xleft=uw.min_ev$value-uw.min_ev$dvalue,border=FALSE,col=errorband_color) + } abline(v=uw.min_ev$value,col="black") hist.max_ev <- hist(stat_max_ev, main=paste("max. eval",titletext), xlab="max. eval") - rect(ytop=max(hist.max_ev$counts), - ybottom=0, - xright=uw.max_ev$value+uw.max_ev$dvalue, - xleft=uw.max_ev$value-uw.max_ev$dvalue,border=FALSE,col=errorband_color) + if( abs(uw.max_ev$dvalue) < 5*abs(uw.max_ev$value) ){ + rect(ytop=max(hist.max_ev$counts), + ybottom=0, + xright=uw.max_ev$value+uw.max_ev$dvalue, + xleft=uw.max_ev$value-uw.max_ev$dvalue,border=FALSE,col=errorband_color) + } abline(v=uw.max_ev$value,col="black") if(!missing(pdf.filename)){ tikz.finalize(tikzfiles) } - return(list(mineval=t(data.frame(val=uw.min_ev$value, dval=uw.min_ev$dvalue, tauint=uw.min_ev$tauint, - dtauint=uw.min_ev$dtauint, Wopt=uw.min_ev$Wopt, stringsAsFactors=FALSE)), - maxeval=t(data.frame(val=uw.max_ev$value, dval=uw.max_ev$dvalue, tauint=uw.max_ev$tauint, - dtauint=uw.max_ev$dtauint, Wopt=uw.max_ev$Wopt, stringsAsFactors=FALSE)) ) ) + return(list(mineval=t(data.frame(val=uw.min_ev$value, dval=uw.min_ev$dvalue, + tauint=uw.min_ev$tauint*time_factor, + dtauint=uw.min_ev$dtauint*time_factor, + Wopt=uw.min_ev$Wopt*time_factor, stringsAsFactors=FALSE)), + maxeval=t(data.frame(val=uw.max_ev$value, dval=uw.max_ev$dvalue, + tauint=uw.max_ev$tauint*time_factor, + dtauint=uw.max_ev$dtauint*time_factor, + Wopt=uw.max_ev$Wopt*time_factor, stringsAsFactors=FALSE)) ) ) } diff --git a/data/datalist b/data/datalist index 49872c2aa..004ea4464 100644 --- a/data/datalist +++ b/data/datalist @@ -3,3 +3,5 @@ pscor.sample samplecf correlatormatrix loopdata +sample_raw.gradflow +ref_gf_scales diff --git a/data/make_ref_gf_scales.R b/data/make_ref_gf_scales.R new file mode 100644 index 000000000..9302eede5 --- /dev/null +++ b/data/make_ref_gf_scales.R @@ -0,0 +1,21 @@ +ref_gf_scales <- rbind( + data.frame(val=0.1465, dval=0.0024, + label="$(t_0^{\\mathrm{plaq}})^{1/2}/a$", + obs="tsqEplaq", + obslabel="$\\langle t^2 E_{\\mathrm{plaq}}(t) \\rangle$", + Nf="$Nf=2+1$ (Wilson)", + ref="Borsanyi et al., JHEP 09 (2012) 010, High-precision scale setting in lattice QCD"), + data.frame(val=0.1465, dval=0.0024, + label="$(t_0^{\\mathrm{plaq}})^{1/2}/a$", + obs="tsqEsym", + obslabel="$\\langle t^2 E_{\\mathrm{sym}}(t) \\rangle$", + Nf="$Nf=2+1$ (Wilson)", + ref="Borsanyi et al., JHEP 09 (2012) 010, High-precision scale setting in lattice QCD"), + data.frame(val=0.1755, dval=0.019, + label="(w_0^{\\mathrm{sym}}/a)^2", + obs="Wsym", + obslabel="$\\langle t \\frac{d}{dt} [ t^2 E(t) ] \\rangle$", + Nf="$Nf=2+1$ (Wilson)", + ref="Borsanyi et al., JHEP 09 (2012) 010, High-precision scale setting in lattice QCD") +) +save(ref_gf_scales, file = "ref_gf_scales.RData") diff --git a/data/ref_gf_scales.RData b/data/ref_gf_scales.RData new file mode 100644 index 000000000..a74f34386 Binary files /dev/null and b/data/ref_gf_scales.RData differ diff --git a/data/sample_raw_gradflow.RData b/data/sample_raw_gradflow.RData new file mode 100644 index 000000000..57235bc29 Binary files /dev/null and b/data/sample_raw_gradflow.RData differ diff --git a/vignettes/gradflow_tseries.Rmd b/vignettes/gradflow_tseries.Rmd new file mode 100644 index 000000000..79b88311f --- /dev/null +++ b/vignettes/gradflow_tseries.Rmd @@ -0,0 +1,262 @@ +--- +title: "Applying the `tseries` class to gradient flow observables" +author: "Bartosz Kostrzewa" +output: + rmarkdown::html_vignette + +vignette: > + %\VignetteIndexEntry{Applying the `tseries` class to gradient flow observables} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} +--- + +```{r echo=TRUE, results = "hide", warning = FALSE, message = FALSE} +require(hadron) +require(ggplot2) +require(latex2exp) +require(knitr) +require(parallel) +require(dplyr) +``` + +# Introduction + +```{r} +boot.R <- 3*length(unique(sample_raw_gradflow$traj)) +boot.l <- 42 + +gf_tseries <- tseries_orig(data = data.frame(md_idx = sample_raw_gradflow$traj, + t = sample_raw_gradflow$t, + P = sample_raw_gradflow$P, + Wsym = sample_raw_gradflow$Wsym, + tsqEplaq = sample_raw_gradflow$tsqEplaq, + tsqEsym = sample_raw_gradflow$tsqEsym, + Qsym = sample_raw_gradflow$Qsym), + explanatory_vars = c("t")) + +gf_tseries <- bootstrap.tseries(gf_tseries, + boot.R = boot.R, + boot.l = boot.l, + seed = 12345, + sim = 'geom', + endcorr = TRUE, + serial = FALSE) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "Observables to compute $(w_0/a)^2$, error exaggerated by a factor 20"} +p <- ggplot2::ggplot(data.frame(t = gf_tseries$central$t, + se = gf_tseries$se$Wsym, + val = gf_tseries$central$Wsym), + aes(x = t, y = val)) + + ggplot2::geom_ribbon(aes(ymin = val - 20*se, ymax = val + 20*se), + colour = "red", + fill = "red", + alpha = 0.2) + + ggplot2::geom_line() + + ggplot2::coord_cartesian(ylim = c(0,0.4), + xlim = c(0,5.0)) + + ggplot2::geom_hline(yintercept = 0.3, colour = "blue") + + ggplot2::labs(y = TeX("$< t \\frac{d}{dt} (t^2 E(t)) >$"), + x = TeX("$t / a^2$")) + + ggplot2::theme_bw() +plot(p) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "MD history of observable to compute $(w_0/a)^2$ at a discrete flow time close to $t = (w_0/a)^2$"} +t_val <- 3.35 +p <- ggplot2::ggplot(dplyr::filter(gf_tseries$data, t == t_val), + aes(x = 4*md_idx, y = Wsym)) + + ggplot2::geom_line() + + ggplot2::labs(y = TeX(sprintf("$t \\frac{d}{dt} (t^2 E(t=%.2f))$",t_val)), + x = TeX("$t_{MD}$")) + + ggplot2::theme_bw() +plot(p) +``` + +# Applying an element-wise transformation + +Element-wise transformations can be applied using `apply_unary_elementwise.tseries`, with the `tseries` class taking care of applying the transformation only to the central values and samples and not the `explanatory_vars`. +If a `data` member exists, it will be set to `NULL` and the `tseries_orig` mixin will be removed. + +```{r} +three_gf_tseries <- apply_unary_elementwise.tseries(gf_tseries, function(x){ pi*x }) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "Observables to compute $(w_0/a)^2$, multiplied by $\\pi$, error exaggerated by a factor 20"} +p <- ggplot2::ggplot(data.frame(t = three_gf_tseries$central$t, + se = three_gf_tseries$se$Wsym, + val = three_gf_tseries$central$Wsym), + aes(x = t, y = val)) + + ggplot2::geom_ribbon(aes(ymin = val - 20*se, ymax = val + 20*se), + colour = "red", + fill = "red", + alpha = 0.2) + + ggplot2::geom_line() + + ggplot2::coord_cartesian(ylim = c(0,1.2), + xlim = c(0,5.0)) + + ggplot2::labs(y = TeX("$\\pi \\times < t \\frac{d}{dt} (t^2 E(t)) >$"), + x = TeX("$t / a^2$")) + + ggplot2::theme_bw() +plot(p) +``` + +# Extracting a subset of observables from a `tseries` + +```{r} +names(gf_tseries$data) +Wsym_tsqE_tseries <- subset(x = gf_tseries, subset = c("Wsym", "tsqEplaq", "tsqEsym")) +names(Wsym_tsqE_tseries$data) +``` + +# Ratios of defining observables of gradient flow scales as a function of flow time + +The ratios + +\begin{equation} + \frac{\langle t^2 E(t) \rangle}{\langle d \frac{d}{dt} ( t^2 e(t) ) \rangle} +\end{equation + +and its inverse + +\begin{equation} + \frac{\langle d \frac{d}{dt} ( t^2 e(t) ) \rangle}{\langle t^2 E(t) \rangle} +\end{equation} + +may serve as statistically and systematically *much* more precise defining relations for the gradient flow scale, if they behave reasonably in the continuum limit. + + +```{r} +gf_ratios <- div(x = Wsym_tsqE_tseries, + y = Wsym_tsqE_tseries, + plan = data.frame(vars_out = c("tsqEplaq_ov_Wsym", "tsqEsym_ov_Wsym", "Wsym_ov_tsqEplaq", "Wsym_ov_tsqEsym"), + x_vars = c("tsqEplaq", "tsqEsym", "Wsym", "Wsym"), + y_vars = c("Wsym", "Wsym", "tsqEplaq", "tsqEsym"))) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "Ratio of defining observable for $t_0/a^2$ and $(w_0/a)^2$ ($E(t)$ for $t_0$ in the plaquette definition), error exaggerated by a factor 20"} +p <- ggplot2::ggplot(data.frame(t = gf_ratios$central$t, + se = gf_ratios$se$tsqEplaq_ov_Wsym, + val = gf_ratios$central$tsqEplaq_ov_Wsym), + aes(x = t, y = val)) + + ggplot2::geom_ribbon(aes(ymin = val - 20*se, ymax = val + 20*se), + colour = "red", + fill = "red", + alpha = 0.2) + + ggplot2::geom_line() + + ggplot2::labs(y = TeX("$ \\frac{< t^2 E(t) >}{< t \\frac{d}{dt} (t^2 E(t)) >}$"), + x = TeX("$t / a^2$")) + + ggplot2::theme_bw() +plot(p) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "Ratio of defining observable for $t_0/a^2$ and $(w_0/a)^2$ ($E(t)$ for $t_0$ in the symmetric definition), error exaggerated by a factor 20"} +p <- ggplot2::ggplot(data.frame(t = gf_ratios$central$t, + se = gf_ratios$se$tsqEsym_ov_Wsym, + val = gf_ratios$central$tsqEsym_ov_Wsym), + aes(x = t, y = val)) + + ggplot2::geom_ribbon(aes(ymin = val - 20*se, ymax = val + 20*se), + colour = "red", + fill = "red", + alpha = 0.2) + + ggplot2::geom_line() + + ggplot2::labs(y = TeX("$ \\frac{< t^2 E(t) >}{< t \\frac{d}{dt} (t^2 E(t)) >}$"), + x = TeX("$t / a^2$")) + + ggplot2::theme_bw() +plot(p) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "Ratio of defining observable for $t_0/a^2$ and $(w_0/a)^2$ ($E(t)$ for $t_0$ in the plaquette definition), error exaggerated by a factor 20"} +p <- ggplot2::ggplot(data.frame(t = gf_ratios$central$t, + se = gf_ratios$se$Wsym_ov_tsqEplaq, + val = gf_ratios$central$Wsym_ov_tsqEplaq), + aes(x = t, y = val)) + + ggplot2::geom_ribbon(aes(ymin = val - 20*se, ymax = val + 20*se), + colour = "red", + fill = "red", + alpha = 0.2) + + ggplot2::geom_line() + + ggplot2::labs(y = TeX("$ \\frac{< t \\frac{d}{dt} (t^2 E(t)) >}{< t^2 E(t) >}$"), + x = TeX("$t / a^2$")) + + ggplot2::theme_bw() +plot(p) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "Ratio of defining observable for $t_0/a^2$ and $(w_0/a)^2$ ($E(t)$ for $t_0$ in the symmetric definition), error exaggerated by a factor 20"} +p <- ggplot2::ggplot(data.frame(t = gf_ratios$central$t, + se = gf_ratios$se$Wsym_ov_tsqEsym, + val = gf_ratios$central$Wsym_ov_tsqEsym), + aes(x = t, y = val)) + + ggplot2::geom_ribbon(aes(ymin = val - 20*se, ymax = val + 20*se), + colour = "red", + fill = "red", + alpha = 0.2) + + ggplot2::geom_line() + + ggplot2::labs(y = TeX("$ \\frac{< t \\frac{d}{dt} (t^2 E(t)) >}{< t^2 E(t) >}$"), + x = TeX("$t / a^2$")) + + ggplot2::theme_bw() +plot(p) +``` + +# Bootstrapping the determination of gradient flow scales + +Now that we have bootstrapped our gradient flow time series, bootstrapping the determination is basically trivial using the `apply_reduction_plan.tseries` function. + +```{r} +# we define a reduction plan +plan <- list() +plan[["t0plaq"]] <- expression( approx(x = tsqEplaq, y = t, xout = 0.3)$y ) +plan[["t0sym"]] <- expression( approx(x = tsqEsym, y = t, xout = 0.3)$y ) +plan[["w0sq"]] <- expression( approx(x = Wsym, y = t, xout = 0.3)$y ) +plan[["w0"]] <- expression( sqrt(approx(x = Wsym, y = t, xout = 0.3)$y) ) + +# and apply it to our gradient flow time series +gf_scales <- apply_reduce_plan.tseries( + ts = gf_tseries, + reduce_vars = c("t"), + plan = plan +) + +str(gf_scales) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "The gradient flow scales on cA211b.12.48"} +require(reshape2) +plotdat <- cbind(reshape2::melt(gf_scales$central), + error = reshape2::melt(gf_scales$se)$value*20 ) + +p <- ggplot2::ggplot(plotdat, aes(x = factor(variable), y = value)) + + ggplot2::geom_point() + + ggplot2::geom_errorbar(aes(ymin = value - error, ymax = value + error), + width = 0) + + ggplot2::theme_bw() +plot(p) +``` + +# Gradient flow scale ratios + +```{r} +# define a mutation plan +plan <- list() +plan[["t0plaq_ov_w0"]] <- expression( t0plaq / w0 ) +plan[["t0sym_ov_w0"]] <- expression( t0sym / w0 ) + +gf_scale_ratios <- apply_transmute_plan.tseries( + ts = gf_scales, + plan = plan +) +``` + +```{r fig.width=7, fig.height=5, echo = FALSE, fig.cap = "Ratios of the gradient flow scales on cA211b.12.48"} +require(reshape2) + +central <- reshape2::melt(gf_scale_ratios$central, measure.vars = names(gf_scale_ratios$central)) +se <- reshape2::melt(gf_scale_ratios$se, measure.vars = names(gf_scale_ratios$se)) +plotdat <- cbind(central, error = se$value ) +p <- ggplot2::ggplot(plotdat, aes(x = factor(variable), y = value)) + + ggplot2::geom_point() + + ggplot2::geom_errorbar(aes(ymin = value - error, ymax = value + error), + width = 0) + + ggplot2::theme_bw() +plot(p) +``` + diff --git a/vignettes/tseries.Rmd b/vignettes/tseries.Rmd new file mode 100644 index 000000000..57bc174c5 --- /dev/null +++ b/vignettes/tseries.Rmd @@ -0,0 +1,112 @@ +--- +title: "The 'tseries' class" +author: "Bartosz Kostrzewa" +output: + rmarkdown::html_vignette + +vignette: > + %\VignetteIndexEntry{The 'tseries' class} + %\VignetteEngine{knitr::rmarkdown} + \usepackage[utf8]{inputenc} +--- + +```{r echo=FALSE} +require(hadron) +require(ggplot2) +``` + +# Introduction + +The `tseries` class is designed to work with arbitrary-dimensioned timeseries data following *tidy data* concepts. + +## The original data constructor + +Let's assume that we have a real-numbered 3D field on a $10 \times 10 \times 10$ grid observed at $100$ times. +The values of the field at these points in space at time will be stored in a column `val`, and we will pass to the `tseries_orig` constructor the names of the explanatory variables `x`, `y` and `z`. + +```{r} +md_idx <- 1:100 +xyz_md <- expand.grid(x=0:9, y=0:9, z=0:9, md_idx=md_idx) + + +val <- sin(xyz_md$x)*cos(xyz_md$y)*log(1 + xyz_md$z)*rnorm(prod(dim(xyz_md))) +threed_field_tseries <- + tseries_orig(data = cbind(xyz_md, val = val), + explanatory_vars = c('x','y','z')) +``` + +And let's assume that we have another 3D field, similarly observed at $100$ times. + +```{r} +val <- 2*abs(cos((xyz_md$x^2 + xyz_md$y^2)/6)^2*log(3 + xyz_md$z)*rnorm(prod(dim(xyz_md)))) +second_threed_field_tseries <- + tseries_orig(data = cbind(xyz_md, val = val), + explanatory_vars = c('x','y','z')) +``` + + +## Bootstrapping the timeseries + +```{r} +bs_threed_field_tseries <- + bootstrap.tseries(threed_field_tseries, + boot.R = 100, boot.l = 10, seed = 12345, + sim = 'geom', endcorr = TRUE, serial = FALSE) + +# here we assume that the two fields come from the same statistical ensemble, +# such that we produce the bootstrap samples with the same parameters to preserve +# the correlation +bs_second_threed_field_tseries <- + bootstrap.tseries(second_threed_field_tseries, + boot.R = 100, boot.l = 10, seed = 12345, + sim = 'geom', endcorr = TRUE, serial = FALSE) +``` + +```{r fig.width=7, fig.height=4, echo=FALSE} +p <- ggplot2::ggplot(bs_threed_field_tseries$central, aes(x = x, y = y, fill = val)) + + ggplot2::facet_wrap(z ~ ., ncol = 5) + + ggplot2::geom_tile() + + ggplot2::labs(title = "threed_field_tseries") + + ggplot2::theme_bw() +plot(p) +``` + +```{r fig.width=7, fig.height=4, echo=FALSE} +p <- ggplot2::ggplot(bs_second_threed_field_tseries$central, aes(x = x, y = y, fill = val)) + + ggplot2::facet_wrap(z ~ ., ncol = 5) + + ggplot2::geom_tile() + + ggplot2::labs(title = "second_threed_field_tseries") + + ggplot2::theme_bw() +plot(p) +``` + +## Arithmetic operations on timeseries + +We may want to perform arithmetic operations on the timeseries, applying the operation on both the central values, as well as the bootstrap samples. +With the `tseries` class, this happens automagically. + +```{r} +prod_threed_field_tseries <- bs_threed_field_tseries * bs_second_threed_field_tseries + +diff_threed_field_tseries <- bs_threed_field_tseries - bs_second_threed_field_tseries +``` + +```{r fig.width=7, fig.height=4, echo=FALSE} +p <- ggplot2::ggplot(prod_threed_field_tseries$central, aes(x = x, y = y, fill = val)) + + ggplot2::facet_wrap(z ~ ., ncol = 5) + + ggplot2::geom_tile() + + ggplot2::labs(title = "prod_threed_field_tseries") + + ggplot2::theme_bw() +plot(p) +``` + +```{r fig.width=7, fig.height=4, echo=FALSE} +p <- ggplot2::ggplot(diff_threed_field_tseries$central, aes(x = x, y = y, fill = val)) + + ggplot2::facet_wrap(z ~ ., ncol = 5) + + ggplot2::geom_tile() + + ggplot2::labs(title = "diff_threed_field_tseries") + + ggplot2::theme_bw() +plot(p) +``` + +