Merge pull request #252 from cmu-delphi/release/6.1.0

nmdefries · web-flow · commit de9232fcc791 · 2023-04-04T17:27:18.000-04:00
Release 6.1.0
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 6.0.0
+current_version = 6.1.0
 commit = False
 tag = False
 
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -4,13 +4,13 @@ on: push
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@v2
-      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-r@v2
         with:
           use-public-rspm: true
-      - uses: r-lib/actions/setup-r-dependencies@v1
+      - uses: r-lib/actions/setup-r-dependencies@v2
       - name: Style / Format
         shell: Rscript {0}
         run: styler::style_dir(dry="fail")
diff --git a/.github/workflows/s3_upload_ec2.yml b/.github/workflows/s3_upload_ec2.yml
@@ -50,7 +50,10 @@ jobs:
 
       - name: Cleanup
         if: ${{ always() }}
-        run: docker ps -q | xargs -n 1 -P 8 -I {} docker stop {}
+        run: |
+          docker image prune -a -f
+          docker container prune -f
+          docker ps -q | xargs -n 1 -P 8 -I {} docker stop {}
         
         
         
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: forecasteval
 Title: Forecast Evaluation Dashboard
-Version: 6.0.0
+Version: 6.1.0
 Authors@R: c(person("Kate", "Harwood", email = "kharwood@andrew.cmu.edu", role = "cre"),
 	   person("Chris", "Scott", role = "ctb"),
 	   person("Jed", "Grabman", role = "ctb"))
diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ S3_BUCKET=s3://forecast-eval
 build: build_dashboard
 
 r_build:
-	docker build --no-cache --pull -t forecast-eval-build docker_build
+	docker build --no-cache --force-rm --pull -t forecast-eval-build docker_build
 
 %.rds: dist
 	test -f dist/$@ || curl -o dist/$@ $(S3_URL)/$@
diff --git a/Report/create_reports.R b/Report/create_reports.R
@@ -144,9 +144,41 @@ save_score_errors <- list()
 ## Score predictions
 print("Evaluating state forecasts")
 geo_type <- "state"
+offline_signal_dir <- "signal_cache"
+# Take advantage of `evalcast`'s caching feature. Suppress output since we
+# only care about generating the cache.
+#
+# Since cache files are named using only the provided as-of date, the first
+# COVIDcast call for a given as-of will be used for all subsequent calls with
+# the same as-of, whether or not the cache actually contains all the desired
+# `time_value`s.
+#
+# Since data used for scoring is fetched one day or week at a time as-of
+# "today", the first such call would create a cache covering a very narrow
+# date range. Later API calls would attempt to use the incomplete cache file.
+#
+# Circumvent this by explicitly pulling the full date range and initializing a
+# complete cache for each signal used.
+sources <- list(
+  list(data_source = "hhs", signal = "confirmed_admissions_covid_1d"),
+  list(data_source = "jhu-csse", signal = "confirmed_incidence_num"),
+  list(data_source = "jhu-csse", signal = "deaths_incidence_num")
+)
+invisible({
+  for (source in sources) {
+    download_signal(
+      data_source = source$data_source, signal = source$signal,
+      # "us" can also be included in `states_geos`. Drop to avoid "Data not
+      # fetched for some geographies" error.
+      geo_type = "state", geo_values = setdiff(state_geos, "us"), offline_signal_dir = offline_signal_dir
+    )
+  }
+})
+
 state_scores <- evaluate_covid_predictions(state_predictions,
   err_measures,
-  geo_type = geo_type
+  geo_type = geo_type,
+  offline_signal_dir = offline_signal_dir
 )
 
 for (signal_name in signals) {
diff --git a/Report/error_measures.R b/Report/error_measures.R
@@ -1,5 +1,3 @@
-library(assertthat)
-
 overprediction <- function(quantile, value, actual_value) {
   score_func_param_checker(quantile, value, actual_value, "overprediction")
   if (!is_symmetric(quantile)) {
@@ -12,7 +10,9 @@ overprediction <- function(quantile, value, actual_value) {
   if (all(is.na(actual_value))) {
     return(NA)
   }
-  actual_value <- unique(actual_value)
+
+  # Already checking that actual_value is unique in score_func_param_checker
+  actual_value <- actual_value[1]
 
   lower <- value[!is.na(quantile) & quantile < .5]
   med <- value[find_quantile_match(quantile, 0.5)]
@@ -45,7 +45,9 @@ underprediction <- function(quantile, value, actual_value) {
   if (all(is.na(actual_value))) {
     return(NA)
   }
-  actual_value <- unique(actual_value)
+
+  # Already checking that actual_value is unique in score_func_param_checker
+  actual_value <- actual_value[1]
 
   upper <- value[!is.na(quantile) & quantile > .5]
   med <- value[find_quantile_match(quantile, 0.5)]
@@ -65,6 +67,122 @@ underprediction <- function(quantile, value, actual_value) {
   return(ans)
 }
 
+#' Compute weighted interval score
+#'
+#' Computes weighted interval score (WIS), a well-known quantile-based
+#' approximation of the commonly-used continuous ranked probability score
+#' (CRPS). WIS is a proper score, and can be thought of as a distributional
+#' generalization of absolute error. For example, see [Bracher et
+#' al. (2020)](https://arxiv.org/abs/2005.12881) for discussion in the context
+#' of COVID-19 forecasting.
+#'
+#' @param quantile vector of forecasted quantiles
+#' @param value vector of forecasted values
+#' @param actual_value Actual value.
+#'
+#' @export
+weighted_interval_score <- function(quantile, value, actual_value) {
+  score_func_param_checker(quantile, value, actual_value, "weighted_interval_score")
+  if (all(is.na(actual_value))) {
+    return(NA)
+  }
+
+  # Already checking that actual_value is unique in score_func_param_checker
+  actual_value <- actual_value[1]
+
+  value <- value[!is.na(quantile)]
+  quantile <- quantile[!is.na(quantile)]
+
+  # per Ryan: WIS is equivalent to quantile loss modulo an extra 0.5 AE term
+  # for the median forecast (counted twice).
+  #
+  # update: WIS is now being redefined to match exactly, still some question
+  # about the correct denominator but the formula seems to be  1 / (K + 0.5)
+  #
+  # Finally, the multiplication by 2 is because alpha_k = 2*quantile_k
+  #
+  med <- value[find_quantile_match(quantile, 0.5)]
+
+  if (length(med) > 1L) {
+    return(NA)
+  }
+
+  wis <- 2 * mean(pmax(
+    quantile * (actual_value - value),
+    (1 - quantile) * (value - actual_value),
+    na.rm = TRUE
+  ))
+
+  return(wis)
+}
+
+#' Compute absolute error
+#'
+#' Absolute error of a forecaster
+#'
+#'
+#' Intended to be used with `evaluate_predictions()`, it expects three arguments
+#' of the same length, finds the location of the point forecast, and returns
+#' the absolute error.
+#'
+#' @param quantile vector of forecasted quantiles
+#' @param value vector of forecasted values
+#' @param actual_value vector of actual values of the same length as
+#'   `quantile`/`value` or a scalar
+#'
+#' @export
+absolute_error <- function(quantile, value, actual_value) {
+  score_func_param_checker(quantile, value, actual_value, "absolute_error")
+  point_fcast <- which(is.na(quantile))
+  ae <- abs(actual_value - value)
+  if (length(point_fcast) == 1L) {
+    return(ae[point_fcast])
+  }
+  point_fcast <- which(find_quantile_match(quantile, 0.5))
+  if (length(point_fcast) == 1L) {
+    return(ae[point_fcast])
+  }
+  warning(paste(
+    "Absolute error: Forecaster must return either a point forecast",
+    "with quantile == NA or a median with quantile == 0.5",
+    "Returning NA."
+  ))
+  return(NA)
+}
+
+#' Generate interval coverage error measure function
+#'
+#' Returns an error measure function indicating whether a central interval
+#' covers the actual value. The interval is defined as the (alpha/2)-quantile
+#' to the (1 - alpha/2)-quantile, where alpha = 1 - coverage.
+#'
+#' @param coverage Nominal interval coverage (from 0 to 1).
+#'
+#' @export
+interval_coverage <- function(coverage) {
+  function(quantiles, value, actual_value) {
+    score_func_param_checker(quantiles, value, actual_value, "interval_coverage")
+    value <- value[!is.na(quantiles)]
+    quantiles <- quantiles[!is.na(quantiles)]
+    alpha <- 1 - coverage
+    lower_interval <- alpha / 2
+    upper_interval <- 1 - (alpha / 2)
+    if (!any(find_quantile_match(quantiles, lower_interval)) ||
+      !any(find_quantile_match(quantiles, upper_interval))) {
+      warning(paste(
+        "Interval Coverage:",
+        "Quantiles must cover an interval of specified width",
+        "centered at 0.5. Returning NA."
+      ))
+      return(NA)
+    }
+
+    lower <- value[which(find_quantile_match(quantiles, lower_interval))]
+    upper <- value[which(find_quantile_match(quantiles, upper_interval))]
+    return(actual_value[1] >= lower & actual_value[1] <= upper)
+  }
+}
+
 sharpness <- function(quantile, value, actual_value) {
   weighted_interval_score(quantile, value, actual_value) -
     overprediction(quantile, value, actual_value) -
@@ -74,7 +192,14 @@ sharpness <- function(quantile, value, actual_value) {
 # Utility functions required from evalcast that are not exported
 
 is_symmetric <- function(x, tol = 1e-8) {
-  x <- sort(x)
+  # Checking if `x` is sorted is much faster than trying to sort it again
+  if (is.unsorted(x, na.rm = TRUE)) {
+    # Implicitly drops NA values
+    x <- sort(x)
+  } else {
+    # Match `sort` behavior
+    x <- x[!is.na(x)]
+  }
   all(abs(x + rev(x) - 1) < tol)
 }
 
@@ -106,31 +231,33 @@ get_quantile_prediction_factory <- function(val_to_match, tol = 1e-8) {
 score_func_param_checker <- function(quantiles, values, actual_value, id = "") {
   id_str <- paste0(id, ": ")
   if (length(actual_value) > 1) {
-    assert_that(length(actual_value) == length(values),
-      msg = paste0(
+    if (length(actual_value) != length(values)) {
+      stop(paste0(
         id_str,
         "actual_value must be a scalar or the same length",
         " as values"
-      )
-    )
+      ))
+    }
     actual_value <- unique(actual_value)
   }
-  assert_that(length(actual_value) == 1,
-    msg = paste0(
+
+  if (length(actual_value) != 1) {
+    stop(paste0(
       id_str,
       "actual_value must have exactly 1 unique value"
-    )
-  )
-  assert_that(length(quantiles) == length(values),
-    msg = paste0(
+    ))
+  }
+  if (length(quantiles) != length(values)) {
+    stop(paste0(
       id_str,
       "quantiles and values must be of the same length"
-    )
-  )
-  assert_that(!any(duplicated(quantiles)),
-    msg = paste0(
+    ))
+  }
+
+  if (anyDuplicated(quantiles)) {
+    stop(paste0(
       id_str,
       "quantiles must be unique."
-    )
-  )
+    ))
+  }
 }
diff --git a/app/R/data_manipulation.R b/app/R/data_manipulation.R
@@ -1,4 +1,3 @@
-
 renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) {
   if (scoreType == "wis") {
     filteredScoreDf <- filteredScoreDf %>% rename(Score = wis)
diff --git a/app/R/delphiLayout.R b/app/R/delphiLayout.R
@@ -1,4 +1,3 @@
-
 # create a common delphi header
 delphiHeaderUI <- function(id = "delphi-header", title = "My App") {
   # Get css file
diff --git a/app/R/exportScores.R b/app/R/exportScores.R
@@ -1,4 +1,3 @@
-
 exportScoresUI <- function(id = "exportScores") {
   ns <- shiny::NS(id)
   div(
diff --git a/app/global.R b/app/global.R
@@ -8,7 +8,7 @@ library(viridis)
 library(tsibble)
 library(covidcast)
 
-appVersion <- "6.0.0"
+appVersion <- "6.1.0"
 
 COVERAGE_INTERVALS <- c("10", "20", "30", "40", "50", "60", "70", "80", "90", "95", "98")
 DEATH_FILTER <- "deaths_incidence_num"
diff --git a/app/server.R b/app/server.R
@@ -396,7 +396,7 @@ server <- function(input, output, session) {
           titlefont = list(size = 12)
         )
       ) %>%
-      config(displayModeBar = F)
+      config(displayModeBar = FALSE)
     return(finalPlot)
   }
 
@@ -463,7 +463,7 @@ server <- function(input, output, session) {
         hovermode = "x unified",
         legend = list(orientation = "h", y = -0.1, title = list(text = NULL))
       ) %>%
-      config(displayModeBar = F)
+      config(displayModeBar = FALSE)
     # Remove the extra grouping from the legend: "(___,1)"
     for (i in seq_along(finalPlot$x$data)) {
       if (!is.null(finalPlot$x$data[[i]]$name)) {
@@ -654,12 +654,10 @@ server <- function(input, output, session) {
 
   # When the target variable changes, update available forecasters, locations, and CIs to choose from
   observeEvent(input$targetVariable, {
-
     ## summaryPlot will try to use PREV_AS_OF_DATA()
     ## since it has wrong data information, it needs to be removed
     PREV_AS_OF_DATA(NULL)
     if (input$targetVariable == "Deaths") {
-
       ## Defining Filter
       FILTER <- DEATH_FILTER
     } else if (input$targetVariable == "Cases") {

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`renameScoreCol <- function(filteredScoreDf, scoreType, coverageInterval) {`
`3`	`2`	`if (scoreType == "wis") {`
`4`	`3`	`filteredScoreDf <- filteredScoreDf %>% rename(Score = wis)`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`# create a common delphi header`
`3`	`2`	`delphiHeaderUI <- function(id = "delphi-header", title = "My App") {`
`4`	`3`	`# Get css file`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-`
`2`	`1`	`exportScoresUI <- function(id = "exportScores") {`
`3`	`2`	`ns <- shiny::NS(id)`
`4`	`3`	`div(`