diff --git a/R/get_fcast.R b/R/get_fcast.R index 2783d39..58670db 100644 --- a/R/get_fcast.R +++ b/R/get_fcast.R @@ -14,7 +14,7 @@ #' models using the full dataset. #' #' @param df Data frame of weekly observations containing -#' `target_end_date` (Date) and `observation` (numeric). +#' `target_end_date` (Date), `location` (character), `target` (character), and `observation` (numeric). #' @param eval_start_date Date or string coercible to Date. First date at #' which forecasts are evaluated. At least 52 weeks of data must precede #' this date. @@ -77,7 +77,9 @@ get_fcast <- function( eval_start_date <- as.Date(eval_start_date) stopifnot( is.data.frame(df), - all(c("target_end_date", "observation") %in% names(df)), + all(c("target_end_date", "observation", "target", "location") %in% names(df)), + length(unique(df$target)) == 1, + length(unique(df$location)) == 1, is.numeric(h), length(h) == 1, h > 0, diff --git a/R/globals.R b/R/globals.R index 7dbc396..3d31271 100644 --- a/R/globals.R +++ b/R/globals.R @@ -30,7 +30,6 @@ utils::globalVariables(c( # forecasts_key / to_respilens / metadata_key "target", # metadata_key (package dataset) - "loc_data", # get_fcast plot "q95", "q95_lower", diff --git a/R/loc_data.R b/R/loc_data.R deleted file mode 100644 index d4794d7..0000000 --- a/R/loc_data.R +++ /dev/null @@ -1,21 +0,0 @@ -#' Location Dataset -#' -#' US states census data. Includes state abbreviations, full state names, ???? -#' -#' @format A data frame with 53 rows and 12 variables: -#' \describe{ -#' \item{abbreviation}{State abbreviation code} -#' \item{location}{State abbreviation code 2} -#' \item{location_name}{Full state name} -#' \item{population}{State population} -#' \item{count_rate0p3}{??} -#' \item{count_rate0p5}{??} -#' \item{count_rate0p7}{??} -#' \item{count_rate1}{??} -#' \item{count_rate1p7}{??} -#' \item{count_rate3}{??} -#' \item{count_rate4}{??} -#' \item{count_rate5}{??} -#' } -#' @source Where the data came from ?? -"loc_data" diff --git a/R/to_respilens.R b/R/to_respilens.R index dc85eb9..42c6d13 100644 --- a/R/to_respilens.R +++ b/R/to_respilens.R @@ -1,25 +1,18 @@ #' Build RespiLens metadata key from `model_out_tbl` data #' #' @param model_out_tbl Forecast tibble from `get_fcast()`. +#' @param loc Location string. #' @return Named list for RespiLens metadata key -metadata_key <- function(model_out_tbl) { +metadata_key <- function(model_out_tbl, loc) { # remove peak targets df <- model_out_tbl |> dplyr::filter(!grepl("peak", target, ignore.case = TRUE)) - abbr <- df$location[[1]] - loc_row <- loc_data[loc_data$abbreviation == abbr, ] - - # safety check - if (nrow(loc_row) == 0 || any(is.na(loc_row$location_name))) { - stop("Location not found in loc_data.") - } - list( - location = loc_row$location, - abbreviation = loc_row$abbreviation, - location_name = loc_row$location_name, - population = loc_row$population, + location = loc, + abbreviation = loc, + location_name = loc, + population = 0, dataset = "ACCIDDA Suite", series_type = "projection", hubverse_keys = list( @@ -124,6 +117,7 @@ forecasts_key <- function(model_out_tbl) { #' Convert accida_cast to RespiLens format #' @param accida_cast An object of class `accida_cast`, the output of `get_fcast()`. +#' @param loc A character string that describes the location of the data provided. #' @return A named list with a single metadata JSON structure and one JSON structure per location. #' @noRd to_respilens <- function(accida_cast) { @@ -148,15 +142,16 @@ to_respilens <- function(accida_cast) { model_out_tbl <- model_out_tbl |> dplyr::filter(output_type != "sample") - loc <- unique(model_out_tbl$location) + model_loc <- unique(model_out_tbl$location) + gt_loc <-unique(oracle_output$location) - if (length(loc) != 1) { - stop("Expected exactly one location.") + if (length(model_loc) != 1 || length(gt_loc) != 1) { + stop("Expected exactly one location in input data.") } return( list( - metadata = metadata_key(model_out_tbl), + metadata = metadata_key(model_out_tbl, loc), ground_truth = ground_truth_key(oracle_output), forecasts = forecasts_key(model_out_tbl) ) diff --git a/TODO b/TODO index 5133311..3ce992b 100644 --- a/TODO +++ b/TODO @@ -6,6 +6,4 @@ Rscript external_to_projections.R \ --target-data-path \ --locations-data-path -https://github.com/ACCIDDA/RespiLens/blob/main/scripts/external_to_projections.R - -Describe the loc_data \ No newline at end of file +https://github.com/ACCIDDA/RespiLens/blob/main/scripts/external_to_projections.R \ No newline at end of file diff --git a/data/loc_data.rda b/data/loc_data.rda deleted file mode 100644 index 492bf68..0000000 Binary files a/data/loc_data.rda and /dev/null differ diff --git a/docs/articles/acciddasuite.html b/docs/articles/acciddasuite.html index 4768f26..3ddd507 100644 --- a/docs/articles/acciddasuite.html +++ b/docs/articles/acciddasuite.html @@ -39,7 +39,8 @@ @@ -95,8 +96,11 @@

Statistical Modelling get_data

-

Ideally, you would load your own data here.

-

For demonstration purposes, we will load data from the CDC +

If you would like to load your own ground truth data, you can +follow these steps for +formatting.

+

For demonstration purposes, we will load ground truth data from the +CDC National Health Safety Network. The data dictionary is available here.

 library(dplyr)
@@ -106,19 +110,19 @@ 

df <- get_data(pathogen = "covid", geo_values = "ny") summary(df) #> as_of location target -#> Min. :2026-01-25 Length:286 Length:286 -#> 1st Qu.:2026-01-25 Class :character Class :character -#> Median :2026-01-25 Mode :character Mode :character -#> Mean :2026-01-25 -#> 3rd Qu.:2026-01-25 -#> Max. :2026-01-25 +#> Min. :2026-03-01 Length:291 Length:291 +#> 1st Qu.:2026-03-01 Class :character Class :character +#> Median :2026-03-01 Mode :character Mode :character +#> Mean :2026-03-01 +#> 3rd Qu.:2026-03-01 +#> Max. :2026-03-01 #> target_end_date observation #> Min. :2020-08-08 Min. : 60.0 -#> 1st Qu.:2021-12-19 1st Qu.: 461.2 -#> Median :2023-05-02 Median : 995.0 -#> Mean :2023-05-02 Mean : 1644.0 -#> 3rd Qu.:2024-09-12 3rd Qu.: 2241.8 -#> Max. :2026-01-24 Max. :11833.0

+#> 1st Qu.:2021-12-28 1st Qu.: 442.5 +#> Median :2023-05-20 Median : 982.0 +#> Mean :2023-05-20 Mean : 1621.8 +#> 3rd Qu.:2024-10-08 3rd Qu.: 2213.0 +#> Max. :2026-02-28 Max. :11833.0

Time Series Cross-Validation @@ -174,15 +178,15 @@

Time Series Cross-Validation#> Models evaluated: #> model_id wis #> <char> <num> -#> ARIMA 39.46377 -#> ETS 39.50977 -#> THETA 43.41059 -#> ENSEMBLE 63.82600 -#> SNAIVE 272.68963 +#> THETA 27.20068 +#> ARIMA 35.22355 +#> ETS 41.93945 +#> ENSEMBLE 53.91791 +#> SNAIVE 262.42959 #> #> Forecast horizon: -#> From: 2025-12-27 -#> To: 2026-02-21 +#> From: 2026-01-31 +#> To: 2026-03-28 #> #> Contents: #> $hubcast hub forecast object @@ -196,21 +200,21 @@

Time Series Cross-Validation#> # A tibble: 200 × 9 #> model_id reference_date target horizon location target_end_date output_type #> <chr> <date> <chr> <int> <chr> <date> <chr> -#> 1 ARIMA 2025-12-20 wk inc … 1 NY 2025-12-27 quantile -#> 2 ARIMA 2025-12-20 wk inc … 1 NY 2025-12-27 quantile -#> 3 ARIMA 2025-12-20 wk inc … 1 NY 2025-12-27 quantile -#> 4 ARIMA 2025-12-20 wk inc … 1 NY 2025-12-27 quantile -#> 5 ARIMA 2025-12-20 wk inc … 1 NY 2025-12-27 quantile -#> 6 ARIMA 2025-12-20 wk inc … 2 NY 2026-01-03 quantile -#> 7 ARIMA 2025-12-20 wk inc … 2 NY 2026-01-03 quantile -#> 8 ARIMA 2025-12-20 wk inc … 2 NY 2026-01-03 quantile -#> 9 ARIMA 2025-12-20 wk inc … 2 NY 2026-01-03 quantile -#> 10 ARIMA 2025-12-20 wk inc … 2 NY 2026-01-03 quantile +#> 1 ARIMA 2026-01-24 wk inc … 1 NY 2026-01-31 quantile +#> 2 ARIMA 2026-01-24 wk inc … 1 NY 2026-01-31 quantile +#> 3 ARIMA 2026-01-24 wk inc … 1 NY 2026-01-31 quantile +#> 4 ARIMA 2026-01-24 wk inc … 1 NY 2026-01-31 quantile +#> 5 ARIMA 2026-01-24 wk inc … 1 NY 2026-01-31 quantile +#> 6 ARIMA 2026-01-24 wk inc … 2 NY 2026-02-07 quantile +#> 7 ARIMA 2026-01-24 wk inc … 2 NY 2026-02-07 quantile +#> 8 ARIMA 2026-01-24 wk inc … 2 NY 2026-02-07 quantile +#> 9 ARIMA 2026-01-24 wk inc … 2 NY 2026-02-07 quantile +#> 10 ARIMA 2026-01-24 wk inc … 2 NY 2026-02-07 quantile #> # ℹ 190 more rows #> # ℹ 2 more variables: output_type_id <chr>, value <dbl> #> #> $oracle_output -#> # A tibble: 286 × 6 +#> # A tibble: 291 × 6 #> location target_end_date target output_type output_type_id oracle_value #> <chr> <date> <chr> <chr> <lgl> <dbl> #> 1 NY 2020-08-08 wk inc covi… quantile NA 517 @@ -223,23 +227,23 @@

Time Series Cross-Validation#> 8 NY 2020-09-26 wk inc covi… quantile NA 656 #> 9 NY 2020-10-03 wk inc covi… quantile NA 851 #> 10 NY 2020-10-10 wk inc covi… quantile NA 840 -#> # ℹ 276 more rows +#> # ℹ 281 more rows fcast$score #> Key: <model_id> #> model_id wis interval_coverage_50 interval_coverage_95 #> <char> <num> <num> <num> -#> 1: ARIMA 39.46377 0.75 1 -#> 2: ETS 39.50977 1.00 1 -#> 3: THETA 43.41059 0.50 1 -#> 4: ENSEMBLE 63.82600 1.00 1 -#> 5: SNAIVE 272.68963 0.25 1 +#> 1: THETA 27.20068 1.00 1 +#> 2: ARIMA 35.22355 0.75 1 +#> 3: ETS 41.93945 0.75 1 +#> 4: ENSEMBLE 53.91791 1.00 1 +#> 5: SNAIVE 262.42959 0.25 1 #> wis_relative_skill #> <num> -#> 1: 0.6052929 -#> 2: 0.6059985 -#> 3: 0.6658289 -#> 4: 0.9789593 -#> 5: 4.1824967

+#> 1: 0.4826402 +#> 2: 0.6249954 +#> 3: 0.7441602 +#> 4: 0.9567021 +#> 5: 4.6564669

Adding extra_models @@ -272,20 +276,19 @@

Adding extra_models#> <accida_cast> #> #> Models evaluated: -#> model_id wis -#> <char> <num> -#> EPIESTIM 30.78158 -#> ETS 32.52275 -#> ENSEMBLE 35.58608 -#> ARIMA 37.37021 -#> THETA 47.36624 -#> CUSTOM_ARIMA 126.93577 -#> SNAIVE 268.02628 -#> PROPHET 1664.29545 +#> model_id wis +#> <char> <num> +#> THETA 21.70325 +#> ARIMA 26.70864 +#> ETS 29.52682 +#> ENSEMBLE 36.52850 +#> CUSTOM_ARIMA 140.41996 +#> SNAIVE 284.57923 +#> PROPHET 425.66068 #> #> Forecast horizon: -#> From: 2025-12-27 -#> To: 2026-02-14 +#> From: 2026-01-31 +#> To: 2026-03-21 #> #> Contents: #> $hubcast hub forecast object diff --git a/docs/articles/acciddasuite_files/figure-html/models-1.png b/docs/articles/acciddasuite_files/figure-html/models-1.png index 0289971..d791214 100644 Binary files a/docs/articles/acciddasuite_files/figure-html/models-1.png and b/docs/articles/acciddasuite_files/figure-html/models-1.png differ diff --git a/docs/articles/acciddasuite_files/figure-html/tscv-setup-1.png b/docs/articles/acciddasuite_files/figure-html/tscv-setup-1.png index 5538be7..05770a3 100644 Binary files a/docs/articles/acciddasuite_files/figure-html/tscv-setup-1.png and b/docs/articles/acciddasuite_files/figure-html/tscv-setup-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 87302d5..aca61ec 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -19,7 +19,8 @@