Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add summary stat plot for categorical concepts #36

Merged
merged 20 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Imports:
glue,
tidyr,
withr,
forcats,
readr,
lubridate,
dplyr
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@ importFrom(duckdb,duckdb)
importFrom(ggplot2,aes)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_boxplot)
importFrom(ggplot2,geom_col)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,labs)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(glue,glue)
Expand Down
4 changes: 2 additions & 2 deletions R/app_server.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ app_server <- function(input, output, session) {
selected_row <- mod_datatable_server("totals", selected_data)
selected_dates <- mod_date_range_server("date_range")

# TODO: refactor monthly_count and stat_numeric modules into a single module
# TODO: refactor monthly_count and summary_stat modules into a single module?n
# https://github.com/UCLH-Foundry/omop-data-catalogue/issues/30
mod_monthly_count_server("monthly_count", monthly_counts, selected_row, selected_dates)
mod_stat_numeric_server("stat_numeric", summary_stats, selected_row)
mod_summary_stat_server("summary_stat", summary_stats, selected_row)

mod_export_tab_server("export_tab", selected_data)
}
2 changes: 1 addition & 1 deletion R/app_ui.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ app_ui <- function(request) {
card(mod_datatable_ui("totals")),
layout_columns(
card(mod_monthly_count_ui("monthly_count")),
card(mod_stat_numeric_ui("stat_numeric"))
card(mod_summary_stat_ui("summary_stat"))
)
),
nav_panel(
Expand Down
49 changes: 0 additions & 49 deletions R/fct_stat_numeric_plot.R

This file was deleted.

111 changes: 111 additions & 0 deletions R/fct_summary_stat_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#' summary_stat_plot
#'
#' Wrapper function to generate a plot for a summary statistic depending on its type
#' (categorical or numeric).
#'
#' @param summary_stats A `data.frame` containing the summary statistics.
#' @param plot_title A `character`, to be used as title of the plot.
#'
#' @return A `ggplot2` object.
#'
#' @noRd
summary_stat_plot <- function(summary_stats, plot_title) {
if (.is_categorical(summary_stats)) {
stat_categorical_plot(summary_stats, plot_title)
} else {
stat_numeric_plot(summary_stats, plot_title)
}
}

#' stat_numeric_plot
#'
#' Generates a boxplot of the summary statistics for a numeric concept.
#' Uses pre-calculated `mean` and `sd` to generate the boxplot.
#'
#' Expects the input data to have the following columns:
#' - `concept_id`: The concept ID.
#' - `summary_attribute`: The type of the summary attribute, e.g. `mean` or `sd`.
#' - `value_as_number`: The value of the summary attribute as a numeric value.
#'
#' @param summary_stats A `data.frame` containing the summary statistics.
#' @param plot_title A `character`, to be used as title of the plot.
#'
#' @return A `ggplot2` object.
#'
#' @importFrom ggplot2 ggplot aes geom_boxplot
#' @noRd
stat_numeric_plot <- function(summary_stats, plot_title) {
processed_stats <- .process_numeric_stats(summary_stats)

mean <- sd <- concept_id <- NULL
ggplot(processed_stats, aes(x = factor(concept_id))) +
geom_boxplot(
aes(
lower = mean - sd,
upper = mean + sd,
middle = mean,
ymin = mean - 3 * sd,
ymax = mean + 3 * sd
),
stat = "identity"
) +
xlab(NULL) +
ggtitle(plot_title)
}

#' stat_categorical_plot
#'
#' Generates a bar plot of the category frequencies for a categorical concept.
#' Uses pre-calculated frequencies to generate the plot.
#'
#' Expects the input data to have the following columns:
#' - `concept_id`: The concept ID.
#' - `summary_attribute`: The type of the summary attribute, should be 'frequency'.
#' - `value_as_string`: The name of the category
#' - `value_as_number`: The value of the summary attribute as a numeric value.
#'
#' @param summary_stats A `data.frame` containing the summary statistics.
#' @param plot_title A `character`, to be used as title of the plot.
#'
#' @return A `ggplot2` object.
#'
#' @importFrom ggplot2 ggplot aes geom_col labs
#' @noRd
stat_categorical_plot <- function(summary_stats, plot_title) {
# We expect only single concept ID at this point
# NOTE: this might change when we support bundles of concepts, in which case we might want to
# display the entire batch in one plot
stopifnot("Expecting a single concept ID" = length(unique(summary_stats$concept_id)) == 1)
stopifnot(c("concept_id", "value_as_string", "value_as_number") %in% names(summary_stats))

summary_stats$value_as_string <- as.factor(summary_stats$value_as_string)
# Reorder factor levels by frequency
summary_stats$value_as_string <- forcats::fct_reorder(
summary_stats$value_as_string, summary_stats$value_as_number,
.desc = TRUE
)

value_as_string <- value_as_number <- NULL
ggplot(summary_stats, aes(value_as_string, value_as_number)) +
geom_col(aes(fill = value_as_string), show.legend = FALSE) +
labs(x = "Category", y = "Frequency") +
ggtitle(plot_title)
}

.process_numeric_stats <- function(summary_stats) {
# We expect only single concept ID at this point
# NOTE: this might change when we support bundles of concepts, in which case we might want to
# display the entire batch in one plot
stopifnot("Expecting a single concept ID" = length(unique(summary_stats$concept_id)) == 1)
stopifnot(c("concept_id", "summary_attribute", "value_as_number") %in% names(summary_stats))

tidyr::pivot_wider(summary_stats,
id_cols = "concept_id",
names_from = "summary_attribute",
values_from = "value_as_number"
)
}

.is_categorical <- function(summary_stats) {
"frequency" %in% summary_stats$summary_attribute
}
23 changes: 11 additions & 12 deletions R/mod_monthly_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,17 @@ mod_monthly_count_server <- function(id, data, selected_concept, selected_dates)
stopifnot(is.reactive(selected_concept))
stopifnot(is.reactive(selected_dates))


moduleServer(id, function(input, output, session) {
## Filter data based on selected concept and datea range
## Filter data based on selected concept and date range
selected_concept_id <- reactive(selected_concept()$concept_id)
selected_concept_name <- reactive(selected_concept()$concept_name)
filtered_monthly_counts <- reactive({
if (is.null(selected_concept_id())) {
return(NULL)
}
req(selected_dates()) # we expect always to have dates selected
out <- data[data$concept_id == selected_concept_id(), ]

req(selected_dates()) # we expect always to have dates selected
.filter_dates(out, selected_dates())
})

Expand All @@ -47,21 +47,20 @@ mod_monthly_count_server <- function(id, data, selected_concept, selected_dates)
if (is.null(filtered_monthly_counts()) || nrow(filtered_monthly_counts()) == 0) {
return(NULL)
}

monthly_count_plot(filtered_monthly_counts(), selected_concept_name())
})
})
}



.filter_dates <- function(monthly_counts, date_range) {
range_years <- lubridate::year(date_range)
range_months <- lubridate::month(date_range)
date_range <- as.Date(date_range)
if (date_range[2] < date_range[1]) {
stop("Invalid date range, end date is before start date")
}

date_year <- date_month <- NULL
dplyr::filter(
monthly_counts,
date_year >= range_years[1] & date_year <= range_years[2],
date_month >= range_months[1] & date_month <= range_months[2]
)
dates <- lubridate::make_date(year = monthly_counts$date_year, month = monthly_counts$date_month)
keep_dates <- dplyr::between(dates, date_range[1], date_range[2])
dplyr::filter(monthly_counts, keep_dates)
}
17 changes: 9 additions & 8 deletions R/mod_stat_numeric.R → R/mod_summary_stat.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' stat_numeric UI Function
#' summary_stat UI Function
#'
#' Displays the boxplot of the summary statistics for a numeric concept.
#'
Expand All @@ -7,14 +7,14 @@
#' @noRd
#'
#' @importFrom shiny NS tagList
mod_stat_numeric_ui <- function(id) {
mod_summary_stat_ui <- function(id) {
ns <- NS(id)
tagList(
plotOutput(ns("stat_numeric_plot"), height = 250)
plotOutput(ns("summary_stat_plot"), height = 250)
)
}

#' stat_numeric Server Functions
#' summary_stat Server Functions
#'
#' Generates the boxplot of the summary statistics for a numeric concept.
#' When no concept was selected, an empty plot is returned.
Expand All @@ -23,7 +23,7 @@ mod_stat_numeric_ui <- function(id) {
#' @param selected_concept Reactive value containing the selected concept, used for filtering
#'
#' @noRd
mod_stat_numeric_server <- function(id, data, selected_concept) {
mod_summary_stat_server <- function(id, data, selected_concept) {
stopifnot(is.data.frame(data))
stopifnot(is.reactive(selected_concept))

Expand All @@ -40,10 +40,11 @@ mod_stat_numeric_server <- function(id, data, selected_concept) {
data[data$concept_id == selected_concept_id(), ]
})

output$stat_numeric_plot <- renderPlot({
## Return empty plot if no data is selected
output$summary_stat_plot <- renderPlot({
## Return empty plot if no data is selected or if no data is available for the selected concept
if (is.null(filtered_summary_stats())) return(NULL)
stat_numeric_plot(filtered_summary_stats(), selected_concept_name())
if (nrow(filtered_summary_stats()) == 0) return(NULL)
summary_stat_plot(filtered_summary_stats(), selected_concept_name())
})
})
}
43 changes: 42 additions & 1 deletion tests/testthat/test-mod_monthly_count.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ mock_monthly_counts <- data.frame(
# Application-logic tests ---------------------------------------------------------------------

mock_concept_row <- reactiveVal()
mock_date_range <- reactiveVal(c("2000-01-01", "2200-12-31"))
mock_date_range <- reactiveVal(c("2019-04-01", "2024-08-01"))

test_that("mod_monthly_count_server reacts to changes in the selected concept", {
testServer(
Expand Down Expand Up @@ -62,6 +62,31 @@ test_that("mod_monthly_count_server reacts to changes in the selected date range
)
})

test_that("Date filtering works as expected", {
testServer(
mod_monthly_count_server,
args = list(data = mock_monthly_counts, selected_concept = mock_concept_row, selected_dates = mock_date_range),
{
# We have data for this concept from 2019-04 to 2020-05
mock_concept_row(list(concept_id = 40213251, concept_name = "test")) # update reactive value

# Test boundary dates, we only care up to the month level
selected_dates <- c("2019-04-01", "2020-05-01")
mock_date_range(selected_dates)
session$flushReact()
expect_equal(nrow(filtered_monthly_counts()), 3)

# This checks a previous bug where a row with date_month larger than the date range months
# would always get removed while it should be kept in case the year is within the range
# e.g. 2019-04 should be kept when the range is 2019-01 to 2020-01
selected_dates2 <- c("2019-01-01", "2020-01-01")
mock_date_range(selected_dates2)
session$flushReact()
expect_equal(nrow(filtered_monthly_counts()), 1)
}
)
})

test_that("mod_monthly_count_server generates an empty plot when no row is selected", {
testServer(
mod_monthly_count_server,
Expand All @@ -73,6 +98,17 @@ test_that("mod_monthly_count_server generates an empty plot when no row is selec
)
})

test_that("mod_monthly_count_server generates an empty plot when no data is available for the selected concept", {
testServer(
mod_monthly_count_server,
args = list(data = mock_monthly_counts, selected_concept = mock_concept_row, selected_dates = mock_date_range),
{
mock_concept_row(list(concept_id = 9999999, concept_name = "idontexist"))
expect_length(output$monthly_count_plot$coordmap$panels[[1]]$mapping, 0)
}
)
})

test_that("module ui works", {
ui <- mod_monthly_count_ui(id = "test")
golem::expect_shinytaglist(ui)
Expand All @@ -99,3 +135,8 @@ test_that("monthly_count_plot correctly parses dates", {
expect_false(is.null(p$mapping))
expect_false(is.null(p$layers))
})

test_that("Date range filtering fails for invalid date range", {
selected_dates <- c("2020-01-01", "2019-01-01")
expect_error(.filter_dates(monthly_counts, selected_dates), "Invalid date range, end date is before start date")
})
Loading