diff --git a/.Rbuildignore b/.Rbuildignore index b5a39b8..42d9471 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -14,3 +14,4 @@ ^pkgdown$ ^tools$ ^scratch\.R$ +^scratch$ diff --git a/.gitignore b/.gitignore index d224dad..1ecff28 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,4 @@ rsconnect/ inst/doc docs scratch.R +scratch/ diff --git a/DESCRIPTION b/DESCRIPTION index 2f7f186..ce362e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,3 +31,12 @@ Encoding: UTF-8 Language: en-GB Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 +Imports: + cli, + dplyr, + fs, + ggplot2, + glue, + readr, + rlang, + tidyr diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..2d4bceb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,18 @@ # Generated by roxygen2: do not edit by hand +export(basic_burden_sanity) +export(check_demography_alignment) +export(check_template_alignment) +export(const_data_colnames) +export(impact_check) +export(plot_age_patterns) +export(plot_compare_demography) +export(plot_coverage_set) +export(plot_fvp) +export(plot_global_burden) +export(plot_global_burden_decades) +export(theme_vimc) +export(transfrom_coverage_fvps) +export(validate_complete_incoming_files) +export(validate_file_dict_template) +importFrom(ggplot2,ggplot) diff --git a/R/burden_diagnositics.R b/R/burden_diagnositics.R new file mode 100644 index 0000000..eac40de --- /dev/null +++ b/R/burden_diagnositics.R @@ -0,0 +1,512 @@ +#' Validate file dictionary template +#' +#' @description +#' Function to create a `file_dictionary` template. +#' It maps to touchstone disease scenarios and you will see expected number of +#' scenarios i.e. the number of files that we expect from a model. +#' Users should populate the file column to match the scenario-file. +#' This function will run if a `file_dictionary.csv` file does not exist +#' +#' @param disease A disease identifier. +#' +#' @param path_burden A directory with burden estimate data. +#' +#' @return Nothing; called primarily for its side-effets. +#' If the file `path_burden/file_dictionary.csv` does not exist, a file +#' dictionary CSV file is written to the same location. +#' Prints a message to screen informing the user whether any action has been +#' taken. +#' +#' @keywords diagnostics +#' +#' @examples +#' +#' @export +validate_file_dict_template <- function( + disease, + path_burden = "incoming_burden_estimates" +) { + # TODO: check conditions on arg disease - what is the original source `pars`? + + assert_is_directory(path_burden) + template <- file.path(path_burden, "file_dictionary.csv") + + if (file.exists(template)) { + cli::cli_inform( + "File dictionary found at {.file {template}}, no action needed." + ) + } else { + # TODO: resolve magic strings + scenario_dir <- "model_inputs" + checkmate::assert_directory_exists(scenario_dir) + scenario_data <- file.path(scenario_dir, "scenario.csv") + checkmate::assert_file_exists(scenario_data) + + sce <- readr::read_csv(scenario_data, show_col_types = FALSE) + + # NOTE: consider wrapping into check function + checkmate::assert_data_frame( + sce, + any.missing = FALSE, + min.cols = length(const_data_colnames), + ) + checkmate::assert_names( + colnames(sce), + must.include = const_data_colnames + ) + + # get distinct scenario entries and add no-vax if needed + sce <- dplyr::select(sce, {{ const_data_colnames }}) + sce <- dplyr::distinct(sce) + + novax_scenario <- "novac" + + if (!checkmate::test_subset(novax_scenario, sce$scenario_type)) { + sce <- dplyr::bind_rows( + sce, + make_novax_scenario(disease) + ) + } + + sce$file <- NA_character_ # TODO: investigate this further + readr::write_csv(sce, template) + + cli::cli_inform( + "No file dictionary found at {.file {template}}; created a file \\ + dictionary and wrote it to {.file {template}}." + ) + } + + # no return +} + +#' Validate files in a burden estimate +#' +#' @description +#' Check that incoming data files in a burden estimate are complete, and that +#' no extra files have been included. +#' This function expects that incoming burden files are in the +#' directory given by `path_burden`, which holds a file dictionary which maps +#' each data file to a specific scenario. +#' +#' @inheritParams validate_file_dict_template +#' +#' @return A `` of the scenario file dictionary in `path_burden` if all +#' checks pass. Otherwise, exits with informative errors on failed checks. +#' +#' @examples +#' +#' @keywords diagnostics +#' +#' @export +validate_complete_incoming_files <- function( + path_burden = "incoming_burden_estimates" +) { + assert_is_directory(path_burden) + + files <- list.files(path_burden, full.names = TRUE) + file_dict <- file.path(path_burden, "file_dictionary.csv") + + # checks on the file dictionary + if (file.exists(file_dict)) { + df_dict <- readr::read_csv( + file_dict, + show_col_types = FALSE + ) + + col_filenames <- "file" # TODO: remove/explain magic colnm + scenario_filenames <- df_dict[[col_filenames]] + df_dict <- dplyr::select(df_dict, -{{ col_filenames }}) + + duplicate_rows <- anyDuplicated(df_dict) + duplicate_filenames <- anyDuplicated(scenario_filenames) + + if (duplicate_rows) { + cli::cli_abort( + "Expected file dictionary {.file {file_dict}} to have non-duplicate \\ + entries, but found {duplicate_rows} duplicated rows!" + ) + } + if (duplicate_filenames) { + cli::cli_abort( + "Expected file dictionary {.file {file_dict}} to have non-duplicate \\ + scenario filenames, but found {duplicate_filenames} duplicated \\ + filenames!" + ) + } + + # expect scenario files in path_burden are the same as scenario_filenames + sce_files <- files[files != file_dict] + scenario_filenames <- file.path(path_burden, scenario_filenames) + are_good_scefiles <- checkmate::test_permutation( + scenario_filenames, + sce_files + ) + + if (!are_good_scefiles) { + extra_files <- setdiff(sce_files, scenario_filenames) + n_extra_files <- length(extra_files) + missing_files <- setdiff(scenario_filenames, sce_files) + n_missing_files <- length(missing_files) + + cli::cli_abort( + c( + "Expected as many scenario data files as scenarios \\ + ({length(scenario_filenames)}), but found \\ + {.emph {length(sce_files)}} instead.", + i = "Found {cli::no(n_extra_files)} extra files{? /}\\ + {.file {basename(extra_files)}}, {cli::qty(n_missing_files)} + {?but could not find/and} \\ + {cli::no(n_missing_files)} missing files{? /} \\ + {.file {basename(missing_files)}}", + i = "Directory searched: {.file {path_burden}}" + ), + ) + } + } else { + cli::cli_abort( + "Expected a file dictionary {.file {file_dict}}, but it was not found!" + ) + } + + df_dict +} + +#' Check incoming burden set against template +#' +#' @description +#' +#' @param burden_set +#' +#' @param template +#' +#' @return +#' +#' @examples +#' +#' @keywords diagnostics +#' +#' @export +check_template_alignment <- function(burden_set, template) { + # TODO: figure out what the args are expected to be: dfs? lists, vecs? + expected <- names(template) + provided <- names(burden_set) + + missing_cols_in_burden <- setdiff(expected, provided) + extra_cols_in_burden <- setdiff(provided, expected) + + # explicitly check each length + burden_cols_matches_template <- length(missing_cols_in_burden) + + length(extra_cols_in_burden) == + 0L + + # TODO: make magic strings constants + key_cols <- c("disease", "country", "year", "age") + template_grid <- dplyr::distinct( + template, + dplyr::across({ + key_cols + }) + ) + burden_grid <- dplyr::distinct( + burden_set, + dplyr::across({ + key_cols + }) + ) + + # TODO: if these are data.frames, this might not be the best way to check + # for differences + missing_grid_in_burden <- setdiff(template_grid, burden_grid) + extra_grid_in_burden <- setdiff(burden_grid, template_grid) + burden_grid_matches_template <- all( + c( + nrow(missing_grid_in_burden), + nrow(extra_grid_in_burden) + ) == + 0L + ) + + list( + missing_cols_in_burden = missing_cols_in_burden, + extra_cols_in_burden = extra_cols_in_burden, + burden_cols_matches_template = burden_cols_matches_template, + missing_grid_in_burden = missing_grid_in_burden, + extra_grid_in_burden = extra_grid_in_burden, + burden_grid_matches_template = burden_grid_matches_template + ) +} + +#' Check incoming burden cohort size against interpolated population +#' +#' @description +#' +#' @param burden_set +#' +#' @param wpp +#' +#' @param gender +#' +#' @return +#' +#' @keywords diagnostics +#' +#' @export +check_demography_alignment <- function(burden_set, wpp, gender = "both") { + # TODO: input checks + + # TODO: check if these can be made constants + cols_to_select <- c("country", "year", "age", "cohort_size") + provided <- dplyr::select( + burden_set, + {{ cols_to_select }} + ) + provided <- dplyr::mutate( + provided = cohort_size # check if this can be made a string const + ) + + # TODO: explain what expected is + # TODO: replace with a right-join? + expected <- dplyr::filter( + wpp, + country %in% + provided$country & + year %in% provided$year & + age %in% provided$age, + gender == {{ gender }} + ) + + cols_to_select <- c("country", "year", "age", "value") + expected <- dplyr::select( + expected, + {{ cols_to_select }} + ) + expected <- dplyr::rename( + expected, + expected = value + ) # TODO: prefer not to use NSE + + # return left join + alignment <- dplyr::left_join( + provided, + expected, + by = c("country", "year", "age") + ) + alignment <- dplyr::mutate( + alignment, + difference = provided - expected, + abs_diff = abs(difference), + prop_diff = difference / expected + ) + + alignment +} + +#' Sanity checks on burden estimates +#' +#' @description Helper function for sanity checks on burden estimate values. +#' Checks whether any burden estimates are non-numeric, missing, or negative. +#' +#' @param burden A `` of disease burden estimates. Must have +#' at least a single column named `"value"` of numeric burden estimates. +#' +#' @return A character vector of messages generated by checks on burden +#' estimates, with the length of the vector depending on how many checks fail. +#' +#' @keywords diagnostics +#' +#' @export +basic_burden_sanity <- function(burden) { + # TODO: expectations on burden + mes <- "Basic sanity check for burden estimates:" + + value_col <- "value" + value <- burden[[value_col]] + + if (is.numeric(burden$value)) { + if (anyNA(burden$value)) { + mes_any_missing <- glue::glue( + "Warning: Burden estimates should not have missing values, but some \\ + values are missing. Fix missing values by converting to zeros!" + ) + + mes <- c(mes, mes_any_missing) + } + + if (any(burden$value < 0, na.rm = TRUE)) { + mes_any_negative <- glue::glue( + "Warning: Burden estimates should all be positive or zero, but found \\ + some negative estimates!" + ) + + mes <- c(mes, mes_any_negative) + } + } else { + mes_not_numeric <- glue::glue( + "Warning: Burden estimates should be of type `numeric`, but are not. \\ + Convert burden estimates to `numeric`." + ) + + mes <- c(mes, mes_not_numeric) + } + + if (length(mes) == 1L) { + mes <- c(mes, "PASS.") + } + + mes +} + +#' @title +#' +#' @description +#' A short description... +#' +#' @param coverage +#' +#' @param wpp +#' +#' @return +#' +#' @examples +#' # example code +#' +#' @keywords diagnostics +#' +#' @export +transfrom_coverage_fvps <- function(coverage, wpp) { + # TODO: checks on coverage + # TODO: checks on wpp + + cols_to_select <- c("age_from", "age_to", "gender") + todo_list <- dplyr::select( + coverage, + cols_to_select + ) + todo_list <- dplyr::distinct(todo_list) + todo_list <- dplyr::mutate( + todo_list, + job = seq_along(.data$gender) + ) + + # TODO: THIS NEEDS TO BE CLEANED UP + # TODO: clarify structure of `coverage` and mapping of gender to age + pop_all <- list() + for (i in seq_along(todo_list$age_from)) { + pop_all[[i]] <- wpp %>% + x <- dplyr::filter( + wpp, + .data$age >= todo_list$age_from[i], + .data$age <= todo_list$age_to[i], + .data$gender == todo_list$gender[i] + ) + x <- dplyr::group_by(x, .data$country, .data$year) + x <- dplyr::summarise( + x, + target_wpp = sum(.data$value), + .groups = "drop" + ) + x <- dplyr::mutate( + x, + job = todo_list$job[i] + ) + } + pop_all <- dplyr::bind_rows(pop_all) + + # TODO: add comments or explain in fn docs + d <- dplyr::left_join( + coverage, + pop_all, + by = c("country", "year") + ) + d <- dplyr::mutate( + d, + target = dplyr::coalesce( + .data$target, + .data$target_wpp # replace NAs in target with target_wpp + ), + fvps = .data$target * .data$coverage, + fvps_adjusted = dplyr::if_else( + .data$fvps > .data$target_wpp, + .data$target_wpp, + .data$fvps + ), + target_adjusted = dplyr::if_else( + .data$target > .data$target_wpp, + .data$target_wpp, + .data$target + ), + coverage_adjusted = .data$fvps_adjusted / .data$target_adjusted + ) + d[["target_wpp"]] <- NULL + + d +} + +# TODO: fill out fn docs +#' @title +#' +#' @description +#' +#' @param burden +#' +#' @param scenario_order +#' +#' @return +#' +#' @examples +#' +#' @keywords diagnostics +#' +#' @export +impact_check <- function(burden, scenario_order) { + # TODO: input checks + scenario_cols <- c("scenario", "scenario_order") + scenario_order <- dplyr::select(scenario_order, {{ scenario_cols }}) + + d <- dplyr::summarise( + millions = sum(.data$value) / 1e6, + .by = c("scenario", "burden_outcome"), + .groups = "drop" # probably unnecessary as grouping is temporary + ) + + d <- dplyr::left_join( + d, + scenario_order, + by = "scenario" + ) + + d <- dplyr::mutate( + d, + scenario_order = glue::glue("{.data$scenario_order}:{.data$scenario}") + ) + + d$scenario <- NULL + + d <- tidyr::pivot_wider( + d, + names_from = "scenario_order", + values_from = "million" + ) + + # TODO: CLEAN THIS UP + for (i in 2:nrow(scenario_order)) { + for (j in 1:(i - 1)) { + if (any(d[i + 1] > d[j + 1])) { + cat(sprintf( + "    **Warning**: provided less disease burden in lower coverage scenario (%s) compared to higher coverage scenario (%s).", + names(d)[j + 1], + names(d)[i + 1] + )) + cat("
") + } else { + cat(sprintf( + "    **PASS**: Provided higher disease burden in scenarios with fewer FVPs." + )) + cat("
") + } + } + } + + d +} diff --git a/R/constants.R b/R/constants.R new file mode 100644 index 0000000..a375556 --- /dev/null +++ b/R/constants.R @@ -0,0 +1,14 @@ +#' Package constants +#' +#' @name constants +#' @rdname constants +#' +#' @keywords constants +#' +#' @export +const_data_colnames <- c( + "scenario_type", + "scenario_type_description", + "scenario", + "scenario_description" +) diff --git a/R/helpers.R b/R/helpers.R new file mode 100644 index 0000000..eb06f52 --- /dev/null +++ b/R/helpers.R @@ -0,0 +1,36 @@ +#' Make data for a no-vaccination scenario +#' +#' @name helpers +#' @rdname helpers +#' +#' @description +#' Helper functions for burden diagnostics. +#' +#' @inheritParams validate_file_dict_template +#' +#' @keywords internal +#' +#' @return +#' +#' - `make_novax_scenario()` returns a tibble with the minimum required column +#' names, and entries corresponding to a 'no-vaccination' scenario for +#' `disease`. +make_novax_scenario <- function(disease) { + v <- c( + "novac", + "No Vaccination", + glue::glue("{disease}-no-vaccination"), + "No vaccination" + ) + + # internal function without input checking + df <- dplyr::tibble( + variable = need_colnames, + value = v + ) + + tidyr::pivot_wider( + df, + names_from = "variable" + ) +} diff --git a/R/import-standalone-utils-assert-path.R b/R/import-standalone-utils-assert-path.R new file mode 100644 index 0000000..673fb8e --- /dev/null +++ b/R/import-standalone-utils-assert-path.R @@ -0,0 +1,157 @@ +# Standalone file: do not edit by hand +# Source: https://github.com/reside-ic/reside.utils/blob/HEAD/R/standalone-utils-assert-path.R +# Generated by: usethis::use_standalone("reside-ic/reside.utils", "utils-assert-path") +# ---------------------------------------------------------------------- +# +# --- +# repo: reside/reside.utils +# file: standalone-utils-assert-path.R +# dependencies: standalone-utils-assert.R +# imports: [cli, fs] +# --- +assert_file_exists <- function(files, name = "File", call = parent.frame(), + arg = NULL) { + err <- !file.exists(files) + ## TODO: throughout this file it would be nice to use cli's '.file' + ## class and ector contraction, *but* it renders poorly on default + ## black backgfrounds (dark blue) and makes testing a bit harder + ## because the rendering depends on cli options. + ## + ## TODO: add a canonical case check, as for the relative path bit. + if (any(err)) { + ## Because we interpolate both 'name' and the file list, we need + ## to disambiguate the quantity. + n <- cli::qty(sum(err)) + cli::cli_abort( + "{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", + call = call, arg = arg) + } +} + + +assert_file_exists_relative <- function(files, workdir, name, + call = parent.frame(), + arg = NULL) { + assert_relative_path(files, name, workdir, call) + + assert_character(files, name, call = call) + err <- !file_exists(files, workdir = workdir) + if (any(err)) { + n <- cli::qty(sum(err)) + cli::cli_abort( + c("{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", + i = "Looked within directory '{workdir}'"), + call = call) + } + + files_canonical <- file_canonical_case(files, workdir) + err <- is.na(files_canonical) | fs::path(files) != files_canonical + if (any(err)) { + i <- err & !is.na(files_canonical) + hint_case <- sprintf("For '%s', did you mean '%s'?", + files[i], files_canonical[i]) + names(hint_case) <- rep("i", length(hint_case)) + n <- cli::qty(sum(err)) + cli::cli_abort( + c("{name}{n}{?s} {?does/do} not exist: {format_file_list(files[err])}", + hint_case, + i = paste("If you don't use the canonical case for a file, your code", + "is not portable across different platforms"), + i = "Looked within directory '{workdir}'"), + call = call) + } +} + + +assert_is_directory <- function(path, name = "Directory", call = parent.frame(), + arg = NULL) { + assert_scalar_character(path, arg = arg, call = call) + assert_file_exists(path, name = name, arg = arg, call = call) + if (!fs::is_dir(path)) { + cli::cli_abort("Path exists but is not a directory: {path}", + call = call, arg = arg) + } +} + + +assert_relative_path <- function(files, name, workdir, call = parent.frame(), + arg = NULL) { + err <- fs::is_absolute_path(files) + if (any(err)) { + n <- cli::qty(sum(err)) + files_err <- files[err] + names(files_err) <- rep("x", length(files_err)) + cli::cli_abort( + c("{name}{n}{?s} must be {?a/} relative path{?s}", + files_err, + i = "Path was relative to directory '{workdir}'"), + call = call, arg = arg) + } + + err <- vapply(fs::path_split(files), function(x) any(x == ".."), TRUE) + if (any(err)) { + n <- cli::qty(sum(err)) + files_err <- files[err] + names(files_err) <- rep("x", length(files_err)) + cli::cli_abort( + c("{name}{n}{?s} must not contain '..' (parent directory) components", + files_err, + i = "Path was relative to directory '{workdir}'"), + call = call, arg = arg) + } +} + + +assert_directory_does_not_exist <- function(x, name = "Directory", arg = NULL, + call = parent.frame()) { + ok <- !fs::dir_exists(x) + if (!all(ok)) { + cli::cli_abort("{name}{?s} already exists: {format_file_list(x[!ok])}", + call = call, arg = arg) + } + invisible(x) +} + + +file_canonical_case <- function(path, workdir) { + if (length(path) != 1) { + return(vapply(path, file_canonical_case, "", workdir, USE.NAMES = FALSE)) + } + stopifnot(!fs::is_absolute_path(path)) + path_split <- fs::path_split(path)[[1]] + base <- workdir + ret <- character(length(path_split)) + for (i in seq_along(path_split)) { + pos <- dir(base, all.files = TRUE, no.. = TRUE) + el <- path_split[[i]] + j <- which(tolower(el) == tolower(pos)) + if (length(j) == 1) { + el <- pos[[j]] + } else if (el %in% pos) { + # We might want to warn here? + # message("Multiple casings present; this is not portable") + } else { + return(NA_character_) + } + ret[[i]] <- el + base <- file.path(base, el) + } + paste(ret, collapse = "/") +} + + +file_exists <- function(..., workdir = NULL) { + files <- c(...) + if (!is.null(workdir)) { + assert_scalar_character(workdir) + owd <- setwd(workdir) # nolint + on.exit(setwd(owd)) # nolint + } + fs::file_exists(files) +} + + +format_file_list <- function(x) { + cli::cli_vec(sprintf("'%s'", x), + style = list("vec-sep2" = ", ", "vec-last" = ", ")) +} diff --git a/R/import-standalone-utils-assert.R b/R/import-standalone-utils-assert.R new file mode 100644 index 0000000..441503d --- /dev/null +++ b/R/import-standalone-utils-assert.R @@ -0,0 +1,251 @@ +# Standalone file: do not edit by hand +# Source: https://github.com/reside-ic/reside.utils/blob/HEAD/R/standalone-utils-assert.R +# Generated by: usethis::use_standalone("reside-ic/reside.utils", "utils-assert") +# ---------------------------------------------------------------------- +# +# --- +# repo: reside/reside.utils +# file: standalone-utils-assert.R +# imports: cli +# --- +assert_scalar <- function(x, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + if (length(x) != 1) { + cli::cli_abort( + c("'{name}' must be a scalar", + i = "{name} has length {length(x)}"), + call = call, arg = arg) + } + invisible(x) +} + + +assert_character <- function(x, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (!is.character(x)) { + cli::cli_abort("Expected '{name}' to be character", call = call, arg = arg) + } + invisible(x) +} + + +assert_numeric <- function(x, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (!is.numeric(x)) { + cli::cli_abort("Expected '{name}' to be numeric", call = call, arg = arg) + } + invisible(x) +} + + +assert_integer <- function(x, name = deparse(substitute(x)), + tolerance = NULL, arg = name, + call = parent.frame()) { + if (is.numeric(x)) { + rx <- round(x) + if (is.null(tolerance)) { + tolerance <- sqrt(.Machine$double.eps) + } + if (!isTRUE(all.equal(x, rx, tolerance = tolerance))) { + cli::cli_abort( + c("Expected '{name}' to be integer", + i = paste("{cli::qty(length(x))}The provided", + "{?value was/values were} numeric, but not very close", + "to integer values")), + arg = arg, call = call) + } + x <- as.integer(rx) + } else { + cli::cli_abort("Expected '{name}' to be integer", call = call, arg = arg) + } + invisible(x) +} + + +assert_logical <- function(x, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (!is.logical(x)) { + cli::cli_abort("Expected '{name}' to be logical", arg = arg, call = call) + } + invisible(x) +} + + +assert_nonmissing <- function(x, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (anyNA(x)) { + cli::cli_abort("Expected '{name}' to be non-NA", arg = arg, call = call) + } + invisible(x) +} + + +assert_scalar_character <- function(x, name = deparse(substitute(x)), + allow_null = FALSE, + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar(x, name, arg = arg, call = call) + assert_character(x, name, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) +} + + +assert_scalar_numeric <- function(x, name = deparse(substitute(x)), + allow_null = FALSE, + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar(x, name, arg = arg, call = call) + assert_numeric(x, name, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) +} + + +assert_scalar_integer <- function(x, name = deparse(substitute(x)), + tolerance = NULL, allow_null = FALSE, + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar(x, name, arg = arg, call = call) + assert_integer(x, name, tolerance = tolerance, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) +} + + +assert_scalar_logical <- function(x, name = deparse(substitute(x)), + allow_null = FALSE, + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar(x, name, arg = arg, call = call) + assert_logical(x, name, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) +} + + +assert_scalar_size <- function(x, allow_zero = TRUE, allow_null = FALSE, + name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (allow_null && is.null(x)) { + return(invisible(x)) + } + assert_scalar_integer(x, name = name, arg = arg, call = call) + assert_nonmissing(x, name, arg = arg, call = call) + min <- if (allow_zero) 0 else 1 + if (x < min) { + cli::cli_abort("'{name}' must be at least {min}", arg = arg, call = call) + } + invisible(x) +} + + +assert_length <- function(x, len, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + if (length(x) != len) { + cli::cli_abort( + "Expected '{name}' to have length {len}, but was length {length(x)}", + arg = arg, call = call) + } + invisible(x) +} + + +assert_is <- function(x, what, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + if (!inherits(x, what)) { + cli::cli_abort("Expected '{name}' to be a '{what}' object", + arg = arg, call = call) + } + invisible(x) +} + + +assert_list <- function(x, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + if (!is.list(x)) { + cli::cli_abort("Expected '{name}' to be a list", + arg = arg, call = call) + } + invisible(x) +} + + +assert_scalar_positive_numeric <- function(x, allow_zero = TRUE, + name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + assert_scalar_numeric(x, name = name, call = call) + if (allow_zero) { + if (x < 0) { + cli::cli_abort("'{name}' must be at least 0", arg = arg, call = call) + } + } else { + if (x <= 0) { + cli::cli_abort("'{name}' must be greater than 0", arg = arg, call = call) + } + } + invisible(x) +} + + +assert_scalar_positive_integer <- function(x, allow_zero = TRUE, + name = deparse(substitute(x)), + tolerance = NULL, arg = name, + call = parent.frame()) { + assert_scalar_integer(x, name, tolerance = tolerance, arg = arg, call = call) + min <- if (allow_zero) 0 else 1 + if (x < min) { + cli::cli_abort("'{name}' must be at least {min}", arg = arg, call = call) + } + invisible(x) +} + + +assert_raw <- function(x, len = NULL, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + if (!is.raw(x)) { + cli::cli_abort("'{name}' must be a raw vector", arg = arg, call = call) + } + if (!is.null(len)) { + assert_length(x, len, name = name, call = call) + } + invisible(x) +} + + +assert_named <- function(x, unique = FALSE, name = deparse(substitute(x)), + arg = name, call = parent.frame()) { + nms <- names(x) + if (is.null(nms)) { + cli::cli_abort("'{name}' must be named", call = call, arg = arg) + } + if (anyNA(nms) || any(nms == "")) { + cli::cli_abort("All elements of '{name}' must be named", + call = call, arg = arg) + } + if (unique && anyDuplicated(names(x))) { + dups <- sprintf("'%s'", unique(names(x)[duplicated(names(x))])) + cli::cli_abort( + c("'{name}' must have unique names", + i = "Found {length(dups)} duplicate{?s}: {dups}"), + call = call, arg = arg) + } + invisible(x) +} + + +match_value <- function(x, choices, name = deparse(substitute(x)), arg = name, + call = parent.frame()) { + assert_scalar_character(x, call = call, name = name, arg = arg) + if (!(x %in% choices)) { + choices_str <- paste(sprintf("'%s'", choices), collapse = ", ") + cli::cli_abort(c("'{name}' must be one of {choices_str}", + i = "Instead we were given '{x}'"), call = call, + arg = arg) + } + x +} diff --git a/R/plotting.R b/R/plotting.R new file mode 100644 index 0000000..80c9b43 --- /dev/null +++ b/R/plotting.R @@ -0,0 +1,315 @@ +#' Plotting theme for vimcheck +#' +#' @description +#' A simple plotting theme building on [ggplot2::theme_bw()]. +#' +#' +#' @name plotting_theme +#' @rdname plotting_theme +#' +#' @param x_text_angle +#' +#' @param y_text_angle +#' +#' @param ... <[`dynamic-dots`][rlang::dyn-dots]> Other arguments passed to +#' [ggplot2::theme()]. +#' +#' @return A `ggplot2` theme that can be added to `ggplot2` plots or objects. +#' +#' @export +theme_vimc <- function(x_text_angle = 45, y_text_angle = 0, ...) { + ggplot2::theme_bw() + + ggplot2::theme( + axis.text.x = ggplot2::element_text( + size = 10, + angle = x_text_angle + ), + strip.text.y = ggplot2::element_text( + angle = y_text_angle + ), + plot.margin = ggplot2::margin(1, 0, 1, 0, "cm"), + ... + ) +} + +#' Plot burden and impact diagostics +#' +#' @name plotting +#' @rdname plotting +#' +#' @importFrom ggplot2 ggplot +#' +#' @description +#' Plotting functions for burden and impact diagnostics. This documentation +#' holds all plotting functions for now. +#' +#' @param d +#' +#' @param fig_number +#' +#' @return A `` object that can be printed to screen in the plot frame +#' or saved to an output device (i.e., saved as an image file). +#' +#' @examples +#' +#' @export +plot_compare_demography <- function(d, fig_number) { + num_countries <- length(unique(d$country)) + names_melting_data <- c( + "scenario", + "age", + "year", + "expected", + "provided", + "difference" + ) + names_melting_by <- c("scenario", "age", "year") + + # TODO: use tidyr; BIG PICTURE: NO DATA PREP IN PLOTTING FUNCTIONS! + tot <- reshape2::melt(d[, names_melting_data], id.vars = names_melting_by) + dat <- tot %>% + dplyr::group_by(variable, scenario, year, age) %>% + dplyr::summarise(value = sum(value)) %>% + dplyr::mutate(millions = value / 1e6) %>% + dplyr::arrange(age) + + # TODO: use .data and .vars, import or namespace functions + g <- ggplot(data = dat, aes(x = year, y = millions, fill = age)) + + geom_bar(stat = "identity") + + facet_wrap(~ scenario + variable, ncol = 3) + + scale_fill_distiller(palette = "PuOr") + + geom_hline(yintercept = 0, color = 'red') + + labs( + x = "calendar year", + y = glue::glue("people (in millions"), + title = glue::glue( + "Fig. {fig_number}. Comparison between interpolated population and \\ + cohort size ({num_countries} countries)." + ) + ) + + theme_vimc() + + g +} + +#' @name plotting +#' +#' @param burden +#' +#' @export +plot_age_patterns <- function(burden, fig_number) { + # TODO: REMOVE DATA PREP FROM PLOTTING FNS + d <- burden %>% + group_by(scenario, burden_outcome, age) %>% + summarise(millions = sum(value) / 1e6) + + g <- ggplot(d, aes(x = age, y = millions)) + + geom_bar(stat = "identity") + + facet_grid( + burden_outcome ~ scenario, + scales = "free_y", + labeller = labeller(scenario = label_wrap_gen(10)) + ) + + coord_cartesian(xlim = c(0, max(d$age) + 1)) + + labs( + y = "people (in millions)", + title = glue::glue( + "Fig. {fig_number}. Burden age patterns across all countries and years" + ) + ) + + theme_vimc() + + g +} + +#' @name plotting +#' +#' @param year_max +#' +#' @export +plot_global_burden_decades <- function(burden, year_max, fig_number) { + # TODO: prefer moving these conditional checks elsewhere + # TODO: prefer moving data prep outside plotting fn + stopifnot(year_max %% 10 == 0) + d <- burden %>% + filter(year <= year_max) %>% + mutate(year2 = ifelse(year == year_max, year_max - 1, year)) %>% + mutate(decade = floor(year2 / 10) * 10) %>% + mutate( + decade_label = ifelse( + decade == year_max - 10, + paste0(decade, "-", decade + 10), + paste0(decade, "-", decade + 9) + ) + ) %>% + group_by(scenario, burden_outcome, decade_label) %>% + summarise(millions = sum(value) / 1e6) + + g <- ggplot(d, aes(x = scenario, y = millions, fill = scenario)) + + geom_bar(stat = "identity") + + facet_grid(burden_outcome ~ decade_label, scales = "free_y") + + labs( + y = "people (in millions)", + title = glue::glue( + "Fig. {fig_number}. Global disease burden trends" + ) + ) + + + # TODO: reconcile with theme_vimc() or make new theme + theme_bw() %+replace% + theme( + axis.title.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank() + ) + + g +} + +#' @name plotting +#' +#' @param d +#' +#' @param outcome +#' +#' @export +plot_global_burden <- function(d, outcome, fig_number) { + data_ <- dplyr::filter( + d, + .data$burden_outcome == outcome + ) + + g <- ggplot( + data, + aes(x = year, y = millions, fill = age) + ) + + geom_bar(stat = "identity", aes(fill = age)) + + facet_grid( + ~scenario, + scales = "free_y", + labeller = labeller(scenario = label_wrap_gen(10)) # TODO: avoid magic numbers + ) + + scale_fill_distiller(palette = "Spectral") + + labs( + x = "calendar year", + y = paste(outcomes_list[i], "(in millions)"), # TODO: where is outcomes_list!? + title = glue::glue( + "Fig. {fig_number}: Global trends of disease burden ({outcome})." + ) + ) + + theme_vimc(x_text_angle = 90) + + g +} + +#' @name plotting +#' +#' @param cov +#' +#' @export +plot_coverage_set <- function(cov, figure_number) { + # TODO: remove data prep + no_vacc <- expand_grid( + year = unique(cov$year), + country = unique(cov$country) + ) %>% + mutate( + coverage = 0, + delivery = "none", + scenario_description = "No vaccination" + ) + + cov1 <- cov %>% + mutate(delivery = paste(vaccine, activity_type, sep = "-")) %>% + select(scenario_description, delivery, country, year, coverage) %>% + bind_rows(no_vacc) + + g <- ggplot(cov1, aes(x = year, y = coverage, fill = delivery)) + + geom_bar( + stat = "identity", + position = position_dodge(width = 0.9), + alpha = 0.7 + ) + + facet_grid( + "country ~ scenario_description", + scales = "free_y", + labeller = labeller(scenario_description = label_wrap_gen(10)) + ) + + scale_x_continuous(breaks = pretty(unique(cov1$year))) + + labs( + x = "calendar year", + y = "Proportion of target population", + title = glue::glue( + "Fig. {figure_number}: Coverage sets for {cov$disease[1]}" + ) + ) + + theme_vimc(90) + + g +} + +#' @name plotting +#' +#' @param fvp +#' +#' @param year_min +#' +#' @param year_max +#' +#' @export +plot_fvp <- function(fvp, year_min, year_max, figure_number) { + # TODO: PREFER TO REMOVE DATA PREP CODE + no_vacc <- expand_grid( + year = unique(fvp$year), + country = unique(fvp$country) + ) %>% + mutate(fvps_adjusted = 0, scenario_description = "No vaccination") + + fvp_final <- bind_rows(fvp, no_vacc) %>% + filter(year >= year_min & year <= year_max) %>% + mutate(scenario = as.factor(scenario_description)) + + fvp_final$scenario <- relevel(fvp_final$scenario, "No vaccination") + fvp_final$scenario <- gsub(tolower(fvp$disease[1L]), "", fvp_final$scenario) + fvp_final$scenario <- gsub("-", " ", fvp_final$scenario) + + scenario_order <- c(names(sort(tapply( + fvp_final$fvps_adjusted, + fvp_final$scenario, + sum, + na.rm = TRUE + )))) + + fvp_final$scenario <- forcats::fct_relevel(fvp_final$scenario, scenario_order) + + fvp_agg <- + fvp_final %>% + dplyr::group_by(year, scenario, disease) %>% + dplyr::summarise(fvp = sum(fvps_adjusted, na.rm = TRUE)) + + # TODO: prefer to use a scale transform rather than touching data + g <- ggplot(fvp_agg, aes(x = year, y = fvp / 1e6, fill = scenario)) + + geom_bar( + stat = "identity", + colour = "midnightblue", + fill = "midnightblue" + ) + + facet_grid( + ~scenario, + scales = "free_y", + labeller = labeller(scenario = label_wrap_gen(10)) + ) + + scale_x_continuous(breaks = pretty(unique(fvp_agg$year))) + + ylab(paste("FVP (in millions)")) + + labs( + x = "calendar year", + y = "FVP (in millions)", + title = glue::glue( + "Fig. {figure_number}: Fully Vaccinated Persons at global level by \\ + scenario for {fvp$disease[1L]}" + ) + ) + + theme_vimc(90, legend.position = "none") + + g +} diff --git a/R/vimcheck.R b/R/vimcheck.R index a65cf64..9428bd0 100644 --- a/R/vimcheck.R +++ b/R/vimcheck.R @@ -1,6 +1,8 @@ -#' @keywords internal +#' @keywords package_doc "_PACKAGE" ## usethis namespace: start ## usethis namespace: end NULL + +#' @import rlang diff --git a/_pkgdown.yml b/_pkgdown.yml index ca4c87b..79b8dfd 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -3,3 +3,25 @@ url: ~ template: bootstrap: 5 + +reference: + - title: Package-level documentation + contents: + - has_keyword("package_doc") + - title: Diagnostic functions + desc: Package diagnostic functions. + contents: + - has_keyword("diagnostics") + - title: Plotting functions + desc: Package plotting functions. + contents: + - plotting + - plotting_theme + - title: Internal functions + desc: Internal helper functions. + contents: + - has_keyword("internal") + - title: Constants + desc: Package constants. + contents: + - has_keyword("constants") diff --git a/man/basic_burden_sanity.Rd b/man/basic_burden_sanity.Rd new file mode 100644 index 0000000..1abd7fe --- /dev/null +++ b/man/basic_burden_sanity.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{basic_burden_sanity} +\alias{basic_burden_sanity} +\title{Sanity checks on burden estimates} +\usage{ +basic_burden_sanity(burden) +} +\arguments{ +\item{burden}{A \verb{} of disease burden estimates. Must have +at least a single column named \code{"value"} of numeric burden estimates.} +} +\value{ +A character vector of messages generated by checks on burden +estimates, with the length of the vector depending on how many checks fail. +} +\description{ +Helper function for sanity checks on burden estimate values. +Checks whether any burden estimates are non-numeric, missing, or negative. +} +\keyword{diagnostics} diff --git a/man/check_demography_alignment.Rd b/man/check_demography_alignment.Rd new file mode 100644 index 0000000..31e7338 --- /dev/null +++ b/man/check_demography_alignment.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{check_demography_alignment} +\alias{check_demography_alignment} +\title{Check incoming burden cohort size against interpolated population} +\usage{ +check_demography_alignment(burden_set, wpp, gender = "both") +} +\arguments{ +\item{burden_set}{} + +\item{wpp}{} + +\item{gender}{} +} +\description{ +Check incoming burden cohort size against interpolated population +} +\keyword{diagnostics} diff --git a/man/check_template_alignment.Rd b/man/check_template_alignment.Rd new file mode 100644 index 0000000..b22480e --- /dev/null +++ b/man/check_template_alignment.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{check_template_alignment} +\alias{check_template_alignment} +\title{Check incoming burden set against template} +\usage{ +check_template_alignment(burden_set, template) +} +\arguments{ +\item{burden_set}{} + +\item{template}{} +} +\description{ +Check incoming burden set against template +} +\keyword{diagnostics} diff --git a/man/constants.Rd b/man/constants.Rd new file mode 100644 index 0000000..e883e95 --- /dev/null +++ b/man/constants.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constants.R +\docType{data} +\name{constants} +\alias{constants} +\alias{const_data_colnames} +\title{Package constants} +\format{ +An object of class \code{character} of length 4. +} +\usage{ +const_data_colnames +} +\description{ +Package constants +} +\keyword{constants} diff --git a/man/helpers.Rd b/man/helpers.Rd new file mode 100644 index 0000000..fb9c4df --- /dev/null +++ b/man/helpers.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers.R +\name{helpers} +\alias{helpers} +\alias{make_novax_scenario} +\title{Make data for a no-vaccination scenario} +\usage{ +make_novax_scenario(disease) +} +\arguments{ +\item{disease}{A disease identifier.} +} +\value{ +\itemize{ +\item \code{make_novax_scenario()} returns a tibble with the minimum required column +names, and entries corresponding to a 'no-vaccination' scenario for +\code{disease}. +} +} +\description{ +Helper functions for burden diagnostics. +} +\keyword{internal} diff --git a/man/plotting.Rd b/man/plotting.Rd new file mode 100644 index 0000000..213634f --- /dev/null +++ b/man/plotting.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plotting} +\alias{plotting} +\alias{plot_compare_demography} +\alias{plot_age_patterns} +\alias{plot_global_burden_decades} +\alias{plot_global_burden} +\alias{plot_coverage_set} +\alias{plot_fvp} +\title{Plot burden and impact diagostics} +\usage{ +plot_compare_demography(d, fig_number) + +plot_age_patterns(burden, fig_number) + +plot_global_burden_decades(burden, year_max, fig_number) + +plot_global_burden(d, outcome, fig_number) + +plot_coverage_set(cov, figure_number) + +plot_fvp(fvp, year_min, year_max, figure_number) +} +\arguments{ +\item{d}{} + +\item{fig_number}{} + +\item{burden}{} + +\item{year_max}{} + +\item{outcome}{} + +\item{cov}{} + +\item{fvp}{} + +\item{year_min}{} +} +\value{ +A \verb{} object that can be printed to screen in the plot frame +or saved to an output device (i.e., saved as an image file). +} +\description{ +Plotting functions for burden and impact diagnostics. This documentation +holds all plotting functions for now. +} diff --git a/man/plotting_theme.Rd b/man/plotting_theme.Rd new file mode 100644 index 0000000..80fcf2e --- /dev/null +++ b/man/plotting_theme.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plotting_theme} +\alias{plotting_theme} +\alias{theme_vimc} +\title{Plotting theme for vimcheck} +\usage{ +theme_vimc(x_text_angle = 45, y_text_angle = 0, ...) +} +\arguments{ +\item{x_text_angle}{} + +\item{y_text_angle}{} + +\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Other arguments passed to +\code{\link[ggplot2:theme]{ggplot2::theme()}}.} +} +\value{ +A \code{ggplot2} theme that can be added to \code{ggplot2} plots or objects. +} +\description{ +A simple plotting theme building on \code{\link[ggplot2:ggtheme]{ggplot2::theme_bw()}}. +} diff --git a/man/validate_complete_incoming_files.Rd b/man/validate_complete_incoming_files.Rd new file mode 100644 index 0000000..9a1ecd8 --- /dev/null +++ b/man/validate_complete_incoming_files.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{validate_complete_incoming_files} +\alias{validate_complete_incoming_files} +\title{Validate files in a burden estimate} +\usage{ +validate_complete_incoming_files(path_burden = "incoming_burden_estimates") +} +\arguments{ +\item{path_burden}{A directory with burden estimate data.} +} +\value{ +A \verb{} of the scenario file dictionary in \code{path_burden} if all +checks pass. Otherwise, exits with informative errors on failed checks. +} +\description{ +Check that incoming data files in a burden estimate are complete, and that +no extra files have been included. +This function expects that incoming burden files are in the +directory given by \code{path_burden}, which holds a file dictionary which maps +each data file to a specific scenario. +} +\keyword{diagnostics} diff --git a/man/validate_file_dict_template.Rd b/man/validate_file_dict_template.Rd new file mode 100644 index 0000000..525b4a5 --- /dev/null +++ b/man/validate_file_dict_template.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/burden_diagnositics.R +\name{validate_file_dict_template} +\alias{validate_file_dict_template} +\title{Validate file dictionary template} +\usage{ +validate_file_dict_template(disease, path_burden = "incoming_burden_estimates") +} +\arguments{ +\item{disease}{A disease identifier.} + +\item{path_burden}{A directory with burden estimate data.} +} +\value{ +Nothing; called primarily for its side-effets. +If the file \code{path_burden/file_dictionary.csv} does not exist, a file +dictionary CSV file is written to the same location. +Prints a message to screen informing the user whether any action has been +taken. +} +\description{ +Function to create a \code{file_dictionary} template. +It maps to touchstone disease scenarios and you will see expected number of +scenarios i.e. the number of files that we expect from a model. +Users should populate the file column to match the scenario-file. +This function will run if a \code{file_dictionary.csv} file does not exist +} +\keyword{diagnostics} diff --git a/man/vimcheck-package.Rd b/man/vimcheck-package.Rd index c26fbd9..c8369de 100644 --- a/man/vimcheck-package.Rd +++ b/man/vimcheck-package.Rd @@ -33,4 +33,4 @@ Other contributors: } } -\keyword{internal} +\keyword{package_doc}