SAFEHR-data · milanmlft · Aug 27, 2024 · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,6 +16,7 @@ Imports:
     glue,
     tidyr,
     withr,
+    forcats,
     readr,
     lubridate,
     dplyr

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,8 +8,10 @@ importFrom(duckdb,duckdb)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,geom_bar)
 importFrom(ggplot2,geom_boxplot)
+importFrom(ggplot2,geom_col)
 importFrom(ggplot2,ggplot)
 importFrom(ggplot2,ggtitle)
+importFrom(ggplot2,labs)
 importFrom(ggplot2,xlab)
 importFrom(ggplot2,ylab)
 importFrom(glue,glue)

diff --git a/R/app_server.R b/R/app_server.R
@@ -16,10 +16,10 @@ app_server <- function(input, output, session) {
   selected_row <- mod_datatable_server("totals", selected_data)
   selected_dates <- mod_date_range_server("date_range")
 
-  # TODO: refactor monthly_count and stat_numeric modules into a single module
+  # TODO: refactor monthly_count and summary_stat modules into a single module?n
   # https://github.com/UCLH-Foundry/omop-data-catalogue/issues/30
   mod_monthly_count_server("monthly_count", monthly_counts, selected_row, selected_dates)
-  mod_stat_numeric_server("stat_numeric", summary_stats, selected_row)
+  mod_summary_stat_server("summary_stat", summary_stats, selected_row)
 
   mod_export_tab_server("export_tab", selected_data)
 }
diff --git a/R/app_ui.R b/R/app_ui.R
@@ -22,7 +22,7 @@ app_ui <- function(request) {
         card(mod_datatable_ui("totals")),
         layout_columns(
           card(mod_monthly_count_ui("monthly_count")),
-          card(mod_stat_numeric_ui("stat_numeric"))
+          card(mod_summary_stat_ui("summary_stat"))
         )
       ),
       nav_panel(

diff --git a/R/fct_stat_numeric_plot.R b/R/fct_stat_numeric_plot.R
diff --git a/R/fct_summary_stat_plot.R b/R/fct_summary_stat_plot.R
@@ -0,0 +1,111 @@
+#' summary_stat_plot
+#'
+#' Wrapper function to generate a plot for a summary statistic depending on its type
+#' (categorical or numeric).
+#'
+#' @param summary_stats A `data.frame` containing the summary statistics.
+#' @param plot_title A `character`, to be used as title of the plot.
+#'
+#' @return A `ggplot2` object.
+#'
+#' @noRd
+summary_stat_plot <- function(summary_stats, plot_title) {
+  if (.is_categorical(summary_stats)) {
+    stat_categorical_plot(summary_stats, plot_title)
+  } else {
+    stat_numeric_plot(summary_stats, plot_title)
+  }
+}
+
+#' stat_numeric_plot
+#'
+#' Generates a boxplot of the summary statistics for a numeric concept.
+#' Uses pre-calculated `mean` and `sd` to generate the boxplot.
+#'
+#' Expects the input data to have the following columns:
+#' - `concept_id`: The concept ID.
+#' - `summary_attribute`: The type of the summary attribute, e.g. `mean` or `sd`.
+#' - `value_as_number`: The value of the summary attribute as a numeric value.
+#'
+#' @param summary_stats A `data.frame` containing the summary statistics.
+#' @param plot_title A `character`, to be used as title of the plot.
+#'
+#' @return A `ggplot2` object.
+#'
+#' @importFrom ggplot2 ggplot aes geom_boxplot
+#' @noRd
+stat_numeric_plot <- function(summary_stats, plot_title) {
+  processed_stats <- .process_numeric_stats(summary_stats)
+
+  mean <- sd <- concept_id <- NULL
+  ggplot(processed_stats, aes(x = factor(concept_id))) +
+    geom_boxplot(
+      aes(
+        lower = mean - sd,
+        upper = mean + sd,
+        middle = mean,
+        ymin = mean - 3 * sd,
+        ymax = mean + 3 * sd
+      ),
+      stat = "identity"
+    ) +
+    xlab(NULL) +
+    ggtitle(plot_title)
+}
+
+#' stat_categorical_plot
+#'
+#' Generates a bar plot of the category frequencies for a categorical concept.
+#' Uses pre-calculated frequencies to generate the plot.
+#'
+#' Expects the input data to have the following columns:
+#' - `concept_id`: The concept ID.
+#' - `summary_attribute`: The type of the summary attribute, should be 'frequency'.
+#' - `value_as_string`: The name of the category
+#' - `value_as_number`: The value of the summary attribute as a numeric value.
+#'
+#' @param summary_stats A `data.frame` containing the summary statistics.
+#' @param plot_title A `character`, to be used as title of the plot.
+#'
+#' @return A `ggplot2` object.
+#'
+#' @importFrom ggplot2 ggplot aes geom_col labs
+#' @noRd
+stat_categorical_plot <- function(summary_stats, plot_title) {
+  # We expect only single concept ID at this point
+  # NOTE: this might change when we support bundles of concepts, in which case we might want to
+  # display the entire batch in one plot
+  stopifnot("Expecting a single concept ID" = length(unique(summary_stats$concept_id)) == 1)
+  stopifnot(c("concept_id", "value_as_string", "value_as_number") %in% names(summary_stats))
+
+  summary_stats$value_as_string <- as.factor(summary_stats$value_as_string)
+  # Reorder factor levels by frequency
+  summary_stats$value_as_string <- forcats::fct_reorder(
+    summary_stats$value_as_string, summary_stats$value_as_number,
+    .desc = TRUE
+  )
+
+  value_as_string <- value_as_number <- NULL
+  ggplot(summary_stats, aes(value_as_string, value_as_number)) +
+    geom_col(aes(fill = value_as_string), show.legend = FALSE) +
+    labs(x = "Category", y = "Frequency") +
+    ggtitle(plot_title)
+}
+
+.process_numeric_stats <- function(summary_stats) {
+  # We expect only single concept ID at this point
+  # NOTE: this might change when we support bundles of concepts, in which case we might want to
+  # display the entire batch in one plot
+  stopifnot("Expecting a single concept ID" = length(unique(summary_stats$concept_id)) == 1)
+  stopifnot(c("concept_id", "summary_attribute", "value_as_number") %in% names(summary_stats))
+
+  tidyr::pivot_wider(summary_stats,
+    id_cols = "concept_id",
+    names_from = "summary_attribute",
+    values_from = "value_as_number"
+  )
+}
+
+.is_categorical <- function(summary_stats) {
+  "frequency" %in% summary_stats$summary_attribute
+}
diff --git a/R/mod_monthly_count.R b/R/mod_monthly_count.R
@@ -28,17 +28,17 @@ mod_monthly_count_server <- function(id, data, selected_concept, selected_dates)
   stopifnot(is.reactive(selected_concept))
   stopifnot(is.reactive(selected_dates))
 
-
   moduleServer(id, function(input, output, session) {
-    ## Filter data based on selected concept and datea range
+    ## Filter data based on selected concept and date range
     selected_concept_id <- reactive(selected_concept()$concept_id)
     selected_concept_name <- reactive(selected_concept()$concept_name)
     filtered_monthly_counts <- reactive({
       if (is.null(selected_concept_id())) {
         return(NULL)
       }
-      req(selected_dates()) # we expect always to have dates selected
       out <- data[data$concept_id == selected_concept_id(), ]
+
+      req(selected_dates()) # we expect always to have dates selected
       .filter_dates(out, selected_dates())
     })
 
@@ -47,21 +47,20 @@ mod_monthly_count_server <- function(id, data, selected_concept, selected_dates)
       if (is.null(filtered_monthly_counts()) || nrow(filtered_monthly_counts()) == 0) {
         return(NULL)
       }
+
       monthly_count_plot(filtered_monthly_counts(), selected_concept_name())
     })
   })
 }
 
 
-
 .filter_dates <- function(monthly_counts, date_range) {
-  range_years <- lubridate::year(date_range)
-  range_months <- lubridate::month(date_range)
+  date_range <- as.Date(date_range)
+  if (date_range[2] < date_range[1]) {
+    stop("Invalid date range, end date is before start date")
+  }
 
-  date_year <- date_month <- NULL
-  dplyr::filter(
-    monthly_counts,
-    date_year >= range_years[1] & date_year <= range_years[2],
-    date_month >= range_months[1] & date_month <= range_months[2]
-  )
+  dates <- lubridate::make_date(year = monthly_counts$date_year, month = monthly_counts$date_month)
+  keep_dates <- dplyr::between(dates, date_range[1], date_range[2])
+  dplyr::filter(monthly_counts, keep_dates)
 }
diff --git a/R/mod_stat_numeric.R → R/mod_summary_stat.R b/R/mod_stat_numeric.R → R/mod_summary_stat.R
@@ -1,4 +1,4 @@
-#' stat_numeric UI Function
+#' summary_stat UI Function
 #'
 #' Displays the boxplot of the summary statistics for a numeric concept.
 #'
@@ -7,14 +7,14 @@
 #' @noRd
 #'
 #' @importFrom shiny NS tagList
-mod_stat_numeric_ui <- function(id) {
+mod_summary_stat_ui <- function(id) {
   ns <- NS(id)
   tagList(
-    plotOutput(ns("stat_numeric_plot"), height = 250)
+    plotOutput(ns("summary_stat_plot"), height = 250)
   )
 }
 
-#' stat_numeric Server Functions
+#' summary_stat Server Functions
 #'
 #' Generates the boxplot of the summary statistics for a numeric concept.
 #' When no concept was selected, an empty plot is returned.
@@ -23,7 +23,7 @@ mod_stat_numeric_ui <- function(id) {
 #' @param selected_concept Reactive value containing the selected concept, used for filtering
 #'
 #' @noRd
-mod_stat_numeric_server <- function(id, data, selected_concept) {
+mod_summary_stat_server <- function(id, data, selected_concept) {
   stopifnot(is.data.frame(data))
   stopifnot(is.reactive(selected_concept))
 
@@ -40,10 +40,11 @@ mod_stat_numeric_server <- function(id, data, selected_concept) {
       data[data$concept_id == selected_concept_id(), ]
     })
 
-    output$stat_numeric_plot <- renderPlot({
-      ## Return empty plot if no data is selected
+    output$summary_stat_plot <- renderPlot({
+      ## Return empty plot if no data is selected or if no data is available for the selected concept
       if (is.null(filtered_summary_stats())) return(NULL)
-      stat_numeric_plot(filtered_summary_stats(), selected_concept_name())
+      if (nrow(filtered_summary_stats()) == 0) return(NULL)
+      summary_stat_plot(filtered_summary_stats(), selected_concept_name())
     })
   })
 }
diff --git a/tests/testthat/test-mod_monthly_count.R b/tests/testthat/test-mod_monthly_count.R
@@ -9,7 +9,7 @@ mock_monthly_counts <- data.frame(
 # Application-logic tests ---------------------------------------------------------------------
 
 mock_concept_row <- reactiveVal()
-mock_date_range <- reactiveVal(c("2000-01-01", "2200-12-31"))
+mock_date_range <- reactiveVal(c("2019-04-01", "2024-08-01"))
 
 test_that("mod_monthly_count_server reacts to changes in the selected concept", {
   testServer(
@@ -62,6 +62,31 @@ test_that("mod_monthly_count_server reacts to changes in the selected date range
   )
 })
 
+test_that("Date filtering works as expected", {
+  testServer(
+    mod_monthly_count_server,
+    args = list(data = mock_monthly_counts, selected_concept = mock_concept_row, selected_dates = mock_date_range),
+    {
+      # We have data for this concept from 2019-04 to 2020-05
+      mock_concept_row(list(concept_id = 40213251, concept_name = "test")) # update reactive value
+
+      # Test boundary dates, we only care up to the month level
+      selected_dates <- c("2019-04-01", "2020-05-01")
+      mock_date_range(selected_dates)
+      session$flushReact()
+      expect_equal(nrow(filtered_monthly_counts()), 3)
+
+      # This checks a previous bug where a row with date_month larger than the date range months
+      # would always get removed while it should be kept in case the year is within the range
+      # e.g. 2019-04 should be kept when the range is 2019-01 to 2020-01
+      selected_dates2 <- c("2019-01-01", "2020-01-01")
+      mock_date_range(selected_dates2)
+      session$flushReact()
+      expect_equal(nrow(filtered_monthly_counts()), 1)
+    }
+  )
+})
+
 test_that("mod_monthly_count_server generates an empty plot when no row is selected", {
   testServer(
     mod_monthly_count_server,
@@ -73,6 +98,17 @@ test_that("mod_monthly_count_server generates an empty plot when no row is selec
   )
 })
 
+test_that("mod_monthly_count_server generates an empty plot when no data is available for the selected concept", {
+  testServer(
+    mod_monthly_count_server,
+    args = list(data = mock_monthly_counts, selected_concept = mock_concept_row, selected_dates = mock_date_range),
+    {
+      mock_concept_row(list(concept_id = 9999999, concept_name = "idontexist"))
+      expect_length(output$monthly_count_plot$coordmap$panels[[1]]$mapping, 0)
+    }
+  )
+})
+
 test_that("module ui works", {
   ui <- mod_monthly_count_ui(id = "test")
   golem::expect_shinytaglist(ui)
@@ -99,3 +135,8 @@ test_that("monthly_count_plot correctly parses dates", {
   expect_false(is.null(p$mapping))
   expect_false(is.null(p$layers))
 })
+
+test_that("Date range filtering fails for invalid date range", {
+  selected_dates <- c("2020-01-01", "2019-01-01")
+  expect_error(.filter_dates(monthly_counts, selected_dates), "Invalid date range, end date is before start date")
+})
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,6 +16,7 @@ Imports: @@
         glue,
         tidyr,
         withr,
+        forcats,
         readr,
         lubridate,
         dplyr
@@ Expand Down @@