vimc · weshinsley · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 30, 2026
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: stoner
 Title: Support for Building VIMC Montagu Touchstones, using Dettl
-Version: 0.1.21
+Version: 0.1.22
 Authors@R:
     c(person("Wes", "Hinsley",role = c("aut", "cre", "cst", "dnc", "elg", "itr", "sng", "ard"),
            email = "w.hinsley@imperial.ac.uk"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,7 @@ export(stone_load)
 export(stone_stochastic_central)
 export(stone_stochastic_cert_verify)
 export(stone_stochastic_graph)
+export(stone_stochastic_make_meta)
 export(stone_stochastic_process)
 export(stone_stochastic_standardise)
 export(stone_stochastic_upload)

diff --git a/R/stochastic_files.R b/R/stochastic_files.R
@@ -150,10 +150,19 @@ stone_stochastic_standardise <- function(
         }
       }
 
+      # Note that for MenA, we want to keep the _cwyx outcomes.
+
       if (missing_run_id_fix) {
         if ((!"run_id" %in% names(d)) && (length(index) == 200)) d$run_id <- j
       }
 
+      # Remove columns "X" and "X.1" that have crept in with some of the
+      # inputs saved with row.names
+
+      d[["X"]] <- NULL
+      d[["X.1"]] <- NULL
+
+
 
       # Round to integer, as per guidance. (Not using as.integer, as that
       # has limits on how large numbers can be, so we are just truncating
@@ -257,3 +266,70 @@ stone_stochastic_central <- function(base, touchstone, disease, group,
   outfile <- sprintf("%s_%s_central.pq", group, scenario)
   arrow::write_parquet(central, file.path(path, outfile))
 }
+
+
+
+##' Create a `meta.csv` file in the root of the standardised
+##' stochastics. The columns contain scalars of `touchstone`,
+##' `disease`, `group`, `scenario` - and for each row, a
+##' semi-colon-separated lists for `countries` and `outcomes`.
+##' This is useful for making the stochastic explorer faster
+##' on startup (otherwise it has to sample all of the files
+##' each time you run it) - and also it is a good general
+##' record of all the stochastic data we have.
+##'
+##' This does mean that we should re-create the meta data
+##' each time we make changes to the standardised stochastic
+##' data though.
+##'
+##' @export
+##' @title Produce `meta.csv` summary of the structure and
+##' content of a standardised stochastic data folder.
+##' @importFrom data.table rbindlist
+##' @importFrom utils write.csv
+##' @param path The root folder of the stochastic data.
+
+stone_stochastic_make_meta <- function(path) {
+
+  explore_files <- function(touchstone, folder, disease, group) {
+    files <- list.files(file.path(path, touchstone, folder))
+    first <- file.path(path, touchstone, folder, files[1])
+    ds <- arrow::open_dataset(first)
+    outcomes <- ds$schema$names
+    outcomes <- outcomes[!outcomes %in% c("run_id", "disease", "year", "age",
+                                          "country", "cohort_size")]
+    outcomes <- sort(unique(tolower(outcomes)))
+
+    files <- strsplit(list.files(file.path(path, touchstone, folder)), "_")
+
+    scenarios <- unique(unlist(lapply(files, `[[`, 2)))
+    df <- data.frame()
+    for (scenario in scenarios) {
+      matches <- files[unlist(lapply(files, `[[`, 2)) == scenario]
+      countries <- unique(unlist(lapply(matches, `[[`, 3)))
+      countries <- gsub(".pq", "", countries)
+      df <- rbind(df, data.frame(
+        touchstone = touchstone,
+        disease = disease,
+        group = group,
+        scenario = scenario,
+        countries = paste0(countries, collapse = ";"),
+        outcomes = paste0(outcomes, collapse = ";")
+      ))
+    }
+    df
+  }
+
+  touchstone_meta <- function(touchstone) {
+    entries <- list.files(file.path(path, touchstone))
+    data.table::rbindlist(lapply(entries, function(x) {
+      xs <- strsplit(x, "_")[[1]]
+      explore_files(touchstone, x, xs[1], xs[2])
+    }))
+  }
+
+  touchstones <- basename(list.dirs(paste0(path, "/"), recursive = FALSE))
+  res <- data.table::rbindlist(lapply(touchstones, touchstone_meta))
+  write.csv(res, file.path(path, "meta.csv"),
+            row.names = FALSE, quote = FALSE)
+}