Skip to content
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: stoner
Title: Support for Building VIMC Montagu Touchstones, using Dettl
Version: 0.1.21
Version: 0.1.22
Authors@R:
c(person("Wes", "Hinsley",role = c("aut", "cre", "cst", "dnc", "elg", "itr", "sng", "ard"),
email = "w.hinsley@imperial.ac.uk"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export(stone_load)
export(stone_stochastic_central)
export(stone_stochastic_cert_verify)
export(stone_stochastic_graph)
export(stone_stochastic_make_meta)
export(stone_stochastic_process)
export(stone_stochastic_standardise)
export(stone_stochastic_upload)
Expand Down
76 changes: 76 additions & 0 deletions R/stochastic_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,19 @@ stone_stochastic_standardise <- function(
}
}

# Note that for MenA, we want to keep the _cwyx outcomes.

if (missing_run_id_fix) {
if ((!"run_id" %in% names(d)) && (length(index) == 200)) d$run_id <- j
}

# Remove columns "X" and "X.1" that have crept in with some of the
# inputs saved with row.names

d[["X"]] <- NULL
d[["X.1"]] <- NULL



# Round to integer, as per guidance. (Not using as.integer, as that
# has limits on how large numbers can be, so we are just truncating
Expand Down Expand Up @@ -257,3 +266,70 @@ stone_stochastic_central <- function(base, touchstone, disease, group,
outfile <- sprintf("%s_%s_central.pq", group, scenario)
arrow::write_parquet(central, file.path(path, outfile))
}



##' Create a `meta.csv` file in the root of the standardised
##' stochastics. The columns contain scalars of `touchstone`,
##' `disease`, `group`, `scenario` - and for each row, a
##' semi-colon-separated lists for `countries` and `outcomes`.
##' This is useful for making the stochastic explorer faster
##' on startup (otherwise it has to sample all of the files
##' each time you run it) - and also it is a good general
##' record of all the stochastic data we have.
##'
##' This does mean that we should re-create the meta data
##' each time we make changes to the standardised stochastic
##' data though.
##'
##' @export
##' @title Produce `meta.csv` summary of the structure and
##' content of a standardised stochastic data folder.
##' @importFrom data.table rbindlist
##' @importFrom utils write.csv
##' @param path The root folder of the stochastic data.

stone_stochastic_make_meta <- function(path) {

explore_files <- function(touchstone, folder, disease, group) {
files <- list.files(file.path(path, touchstone, folder))
first <- file.path(path, touchstone, folder, files[1])
ds <- arrow::open_dataset(first)
outcomes <- ds$schema$names
outcomes <- outcomes[!outcomes %in% c("run_id", "disease", "year", "age",
"country", "cohort_size")]
outcomes <- sort(unique(tolower(outcomes)))

files <- strsplit(list.files(file.path(path, touchstone, folder)), "_")

scenarios <- unique(unlist(lapply(files, `[[`, 2)))
df <- data.frame()
for (scenario in scenarios) {
matches <- files[unlist(lapply(files, `[[`, 2)) == scenario]
countries <- unique(unlist(lapply(matches, `[[`, 3)))
countries <- gsub(".pq", "", countries)
df <- rbind(df, data.frame(
touchstone = touchstone,
disease = disease,
group = group,
scenario = scenario,
countries = paste0(countries, collapse = ";"),
outcomes = paste0(outcomes, collapse = ";")
))
}
df
}

touchstone_meta <- function(touchstone) {
entries <- list.files(file.path(path, touchstone))
data.table::rbindlist(lapply(entries, function(x) {
xs <- strsplit(x, "_")[[1]]
explore_files(touchstone, x, xs[1], xs[2])
}))
}

touchstones <- basename(list.dirs(paste0(path, "/"), recursive = FALSE))
res <- data.table::rbindlist(lapply(touchstones, touchstone_meta))
write.csv(res, file.path(path, "meta.csv"),
row.names = FALSE, quote = FALSE)
}
Loading
Loading