ACCIDDA · CyGei · Mar 10, 2026 · Mar 3, 2026 · Mar 4, 2026 · Mar 6, 2026
diff --git a/R/get_fcast.R b/R/get_fcast.R
@@ -14,7 +14,7 @@
 #' models using the full dataset.
 #'
 #' @param df Data frame of weekly observations containing
-#'   `target_end_date` (Date) and `observation` (numeric).
+#'   `target_end_date` (Date), `location` (character), `target` (character), and `observation` (numeric).
 #' @param eval_start_date Date or string coercible to Date. First date at
 #'   which forecasts are evaluated. At least 52 weeks of data must precede
 #'   this date.
@@ -77,7 +77,9 @@ get_fcast <- function(
   eval_start_date <- as.Date(eval_start_date)
   stopifnot(
     is.data.frame(df),
-    all(c("target_end_date", "observation") %in% names(df)),
+    all(c("target_end_date", "observation", "target", "location") %in% names(df)),
+    length(unique(df$target)) == 1,
+    length(unique(df$location)) == 1,
     is.numeric(h),
     length(h) == 1,
     h > 0,

diff --git a/R/globals.R b/R/globals.R
@@ -30,7 +30,6 @@ utils::globalVariables(c(
   # forecasts_key / to_respilens / metadata_key
   "target",
   # metadata_key (package dataset)
-  "loc_data",
   # get_fcast plot
   "q95",
   "q95_lower",

diff --git a/R/loc_data.R b/R/loc_data.R
diff --git a/R/to_respilens.R b/R/to_respilens.R
@@ -1,25 +1,18 @@
 #' Build RespiLens metadata key from `model_out_tbl` data
 #'
 #' @param model_out_tbl Forecast tibble from `get_fcast()`.
+#' @param loc Location string.
 #' @return Named list for RespiLens metadata key
-metadata_key <- function(model_out_tbl) {
+metadata_key <- function(model_out_tbl, loc) {
   # remove peak targets
   df <- model_out_tbl |>
     dplyr::filter(!grepl("peak", target, ignore.case = TRUE))
 
-  abbr <- df$location[[1]]
-  loc_row <- loc_data[loc_data$abbreviation == abbr, ]
-
-  # safety check
-  if (nrow(loc_row) == 0 || any(is.na(loc_row$location_name))) {
-    stop("Location not found in loc_data.")
-  }
-
   list(
-    location = loc_row$location,
-    abbreviation = loc_row$abbreviation,
-    location_name = loc_row$location_name,
-    population = loc_row$population,
+    location = loc,
+    abbreviation = loc,
+    location_name = loc,
+    population = 0,
     dataset = "ACCIDDA Suite",
     series_type = "projection",
     hubverse_keys = list(
@@ -124,6 +117,7 @@ forecasts_key <- function(model_out_tbl) {
 
 #' Convert accida_cast to RespiLens format
 #' @param accida_cast An object of class `accida_cast`, the output of `get_fcast()`.
+#' @param loc A character string that describes the location of the data provided.
 #' @return A named list with a single metadata JSON structure and one JSON structure per location.
 #' @noRd
 to_respilens <- function(accida_cast) {
@@ -148,15 +142,16 @@ to_respilens <- function(accida_cast) {
   model_out_tbl <- model_out_tbl |>
     dplyr::filter(output_type != "sample")
 
-  loc <- unique(model_out_tbl$location)
+  model_loc <- unique(model_out_tbl$location)
+  gt_loc <-unique(oracle_output$location)
 
-  if (length(loc) != 1) {
-    stop("Expected exactly one location.")
+  if (length(model_loc) != 1 || length(gt_loc) != 1) {
+    stop("Expected exactly one location in input data.")
   }
 
   return(
     list(
-      metadata = metadata_key(model_out_tbl),
+      metadata = metadata_key(model_out_tbl, loc),
       ground_truth = ground_truth_key(oracle_output),
       forecasts = forecasts_key(model_out_tbl)
     )

diff --git a/TODO b/TODO
@@ -6,6 +6,4 @@ Rscript external_to_projections.R \
   --target-data-path <path/to/target-data.csv> \
   --locations-data-path <path/to/locations.csv>
 
-https://github.com/ACCIDDA/RespiLens/blob/main/scripts/external_to_projections.R
-
-Describe the loc_data
+https://github.com/ACCIDDA/RespiLens/blob/main/scripts/external_to_projections.R
diff --git a/data/loc_data.rda b/data/loc_data.rda
diff --git a/docs/articles/acciddasuite.html b/docs/articles/acciddasuite.html
diff --git a/docs/articles/acciddasuite_files/figure-html/models-1.png b/docs/articles/acciddasuite_files/figure-html/models-1.png
diff --git a/docs/articles/acciddasuite_files/figure-html/tscv-setup-1.png b/docs/articles/acciddasuite_files/figure-html/tscv-setup-1.png
diff --git a/docs/articles/index.html b/docs/articles/index.html
diff --git a/man/loc_data.Rd b/man/loc_data.Rd
diff --git a/respi.json b/respi.json
diff --git a/tests/testthat/test-loc_data.R b/tests/testthat/test-loc_data.R
diff --git a/vignettes/acciddasuite.Rmd b/vignettes/acciddasuite.Rmd
@@ -32,9 +32,9 @@ We will aim to demonstrate the basic steps in a forecasting task, as defined by
 
 ##  `get_data`
 
-**Ideally, you would load your own data here**.
+**If you would like to load your own ground truth data, you can follow [these](external_data.html) steps for formatting.**
 
-For demonstration purposes, we will load data from the [CDC National Health Safety Network](https://data.cdc.gov/Public-Health-Surveillance/Weekly-Hospital-Respiratory-Data-HRD-Metrics-by-Ju/mpgq-jmmr/about_data). The data dictionary is available [here](https://dev.socrata.com/foundry/data.cdc.gov/mpgq-jmmr).
+For demonstration purposes, we will load ground truth data from the [CDC National Health Safety Network](https://data.cdc.gov/Public-Health-Surveillance/Weekly-Hospital-Respiratory-Data-HRD-Metrics-by-Ju/mpgq-jmmr/about_data). The data dictionary is available [here](https://dev.socrata.com/foundry/data.cdc.gov/mpgq-jmmr).
 
 ```{r, get_data}
 library(dplyr)

diff --git a/vignettes/external_data.Rmd b/vignettes/external_data.Rmd
@@ -0,0 +1,95 @@
+---
+title: "Preparing External Data"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{Preparing External Data}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+# Utilizing external ground truth data
+
+The `acciddasuite` supports forecasting of hospitalization incidence due to COVID-19, RSV, or influenza based on given ground truth data. While you are able to quickly pull state-level `inc hosp` data using `get_data()`, you may desire to use your own hospitalization ground truth data for forecasting (i.e., as the `df` parameter for modeling in `get_fcast()`). To do so, you must esnure your data adheres to the format described below. Please note that `get_fcast()` only processes one location at a time; so if you have multiple locations' ground truth data, you should separate your ground truth dataset into one data frame per location and then initiate a `get_fcast()` run for each. 
+
+
+
+## Columns and data types
+
+Your ground truth data must contain 4 distinct columns:
+
+| column name | data type | description |
+|-------------|-----------|-------------|
+|`target_end_date`|Date|The date for which an observation is recorded|
+|`observation`|numeric|The observed value for a given date|
+|`location`|character|The location for which the data describes. You may use any location identifier you like, as long as it is consistent and represented as a character.|
+|`target`|character|The data stream being observed (e.g, "inc hosp influenza" or "inc hosp rsv")|
+
+
+
+## Data content restrictions
+
+Once your data has `target_end_date`, `observation`, `location` and `target` columns, you must ensure the following for successful processing in `get_fcast()`:
+
+* `target_end_date` values should be unique and have no NAs (i.e., there should be no duplicate or missing values in the `target_end_date` column)
+* `target_end_date` values should be 
+* Your data must be on a **weekly** cadence (i.e., `target_end_date` values should be exactly one week apart, **and be continuous from week 1 to end**)
+* The `target` column must contain only one unique value (i.e, the same target is recorded for all of your data)
+* Your dataset should contain ≥ 52 entries (weeks of observations; rows) where the `target_end_date` is < the `eval_start_date` parameter you pass into `get_fcast()`. Please note that NA entries in the `observation` column will be filtered out silently, and may result in the reduction of number of observations in your dataset. It is optimal to ensure that NA values are resolved prior to use in `get_fcast()`. 
+
+After ensuring compliance with all of the stipulations above, you can read your dataset in (either as `data.frame` or `tibble`) and pass it as the `df` parameter in `get_fcast()`.
+
+## Final form data
+
+Properly converted data, for example, your `df` may resemble this:
+
+```{r, include=FALSE}
+df <- data.frame(
+  target_end_date = as.Date("2024-01-01") + seq(0, by = 7, length.out = 52),
+  observation = rpois(52, lambda = 20), 
+  location = "NY",
+  target = "inc hosp influenza"
+)
+```
+
+```{r, echo=FALSE}
+head(df)
+```
+
+Data type compliance:
+
+```{r}
+class(df$target_end_date)
+class(df$observation)
+class(df$location)
+class(df$target)
+```
+
+Which can now be used as the `df` parameter in `get_fcast()`:
+
+```{r, eval=FALSE}
+x = get_fcast(
+    df,
+    eval_start_date = "2025-01-01",
+    h = 3,
+    top_n = 5
+)
+```
+
+where `eval_start_date` (`2025-01-01`) begins after 52 consecutive weeks of `target_end_date` entries:
+
+```{r, include=FALSE}
+eval_start_date <- "2025-01-01"
+```
+
+
+```{r}
+sum(df$target_end_date < eval_start_date, na.rm = TRUE)
+```
+
+You can have more than 52 entries preceding your `eval_start_date`, but 52 is the minimum.
+
+
+
+
+
+
diff --git a/vignettes/respi.json b/vignettes/respi.json