UI-Research · wcurrangroome · Mar 14, 2026 · Mar 14, 2026
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,6 +3,11 @@
 export("%>%")
 export(calculate_custom_geographies)
 export(compile_acs_data)
+export(define_across_percent)
+export(define_across_sum)
+export(define_metadata)
+export(define_one_minus)
+export(define_percent)
 export(filter_variables)
 export(get_acs_codebook)
 export(list_acs_variables)

diff --git a/R/calculate_cvs.R b/R/calculate_cvs.R
@@ -47,7 +47,7 @@ se_sum = function(...) {
       data_long %>% dplyr::filter(is.na(estimate)),
       data_long %>%
         dplyr::filter(estimate == 0) %>%
-        dplyr::slice_max(order_by = moe, by = "observation", n = 1)) %>%
+        dplyr::slice_max(order_by = moe, by = "observation", n = 1, with_ties = FALSE)) %>%
     dplyr::group_split(observation) %>%
     purrr::map(~ .x %>% dplyr::pull(moe)) %>%
     purrr::map(se_simple) %>%
@@ -104,6 +104,8 @@ se_proportion_ratio = function(
     ((1 / estimate_denominator) * sqrt( radical_term_one + radical_term_two )),
     ((1 / estimate_denominator) * sqrt( radical_term_one - radical_term_two )))
 
+  se = dplyr::if_else(estimate_denominator == 0, NA_real_, se)
+
   return(se)
 }
 
@@ -229,6 +231,21 @@ se_weighted_mean = function(
   return(se)
 }
 
+#' @title Calculate a coefficient of variation
+#' @details Return a coefficient of variation at the 90% level
+#' @param estimate The estimate
+#' @param se The standard error
+#' @returns A coefficient of variation at the 90% level
+cv = function(estimate, se) {
+  cv = se / estimate * 100
+
+  ## when the estimate is zero, this produces an infinite value
+  ## replacing this with an NA value
+  cv = dplyr::if_else(is.infinite(cv), NA, cv)
+
+  return(cv)
+}
+
 #' @title Calculate margins of error for derived variables
 #' @details Calculates margins of error for all derived ACS estimates. Standard
 #'   errors are computed internally as an intermediate step but are not included
@@ -269,7 +286,7 @@ calculate_moes = function(.df) {
 
   ## all variables for which to calculate CVs
   cv_variables = codebook1 %>%
-    dplyr::filter(!se_calculation_type %in% c("metadata", "unknown")) %>%
+    dplyr::filter(!se_calculation_type %in% c("metadata", "unknown", "weighted_average")) %>%
     dplyr::pull(calculated_variable)
 
   ## a named list of variables, grouped by SE calculation type
@@ -317,6 +334,7 @@ calculate_moes = function(.df) {
         .fns = function(x) {
 
           current_column = dplyr::cur_column()
+          original_column = current_column
 
           ## for "one minus" variables, use the underlying variable for error calculation
           if (current_column %in% se_types[["one_minus"]]) {
@@ -340,51 +358,95 @@ calculate_moes = function(.df) {
           denominator_moe_variables = denominator_estimate_variables %>%
             stringr::str_c("_M")
 
+          ## Get pre-parsed subtract variables from codebook
+          ## SE of a difference uses the same formula as SE of a sum:
+          ## SE(A - B) = sqrt(SE_A^2 + SE_B^2)
+          numerator_subtract_estimate_variables = codebook_row %>%
+            dplyr::pull(numerator_subtract_vars) %>%
+            unlist()
+          numerator_subtract_moe_variables = numerator_subtract_estimate_variables %>%
+            stringr::str_c("_M")
+          denominator_subtract_estimate_variables = codebook_row %>%
+            dplyr::pull(denominator_subtract_vars) %>%
+            unlist()
+          denominator_subtract_moe_variables = denominator_subtract_estimate_variables %>%
+            stringr::str_c("_M")
+
+          ## combine additive and subtractive variables for SE calculations
+          all_numerator_estimate_variables = c(numerator_estimate_variables, numerator_subtract_estimate_variables)
+          all_numerator_moe_variables = c(numerator_moe_variables, numerator_subtract_moe_variables)
+          all_denominator_estimate_variables = c(denominator_estimate_variables, denominator_subtract_estimate_variables)
+          all_denominator_moe_variables = c(denominator_moe_variables, denominator_subtract_moe_variables)
+
           ## for variables where we already have an MOE, this is simple
           if (current_column %in% c(se_types$raw, se_types$sum)) {
-            SE = se_simple(get(dplyr::cur_column() %>% paste0("_M")))}
+            SE = se_simple(get(current_column %>% paste0("_M")))
 
           ## for simple percent variables with one numerator, one denominator
-          if (current_column %in% se_types[["simple_percent"]]) {
+          } else if (current_column %in% se_types[["simple_percent"]]) {
             SE = se_proportion_ratio(
               estimate_numerator = get(numerator_estimate_variables),
               estimate_denominator = get(denominator_estimate_variables),
               moe_numerator = get(numerator_moe_variables),
-              moe_denominator = get(denominator_moe_variables))}
+              moe_denominator = get(denominator_moe_variables))
 
           ## for percents with summed/subtracted numerators, one denominator
-          if (current_column %in% se_types[["complex_numerator"]]) {
+          } else if (current_column %in% se_types[["complex_numerator"]]) {
+            numerator_estimate = rowSums(dplyr::select(., dplyr::all_of(numerator_estimate_variables)))
+            if (length(numerator_subtract_estimate_variables) > 0) {
+              numerator_estimate = numerator_estimate -
+                rowSums(dplyr::select(., dplyr::all_of(numerator_subtract_estimate_variables)))
+            }
             SE = se_proportion_ratio(
-              estimate_numerator = rowSums(dplyr::select(., dplyr::all_of(numerator_estimate_variables))),
+              estimate_numerator = numerator_estimate,
               estimate_denominator = get(denominator_estimate_variables),
               se_numerator = se_sum(
-                purrr::map(numerator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)),
-                purrr::map(numerator_estimate_variables, ~ df_with_sum_moes %>% dplyr::pull(.x))),
+                purrr::map(all_numerator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)),
+                purrr::map(all_numerator_estimate_variables, ~ df_with_sum_moes %>% dplyr::pull(.x))),
               se_denominator = se_simple(
-                purrr::map(denominator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)) %>% unlist()))}
+                purrr::map(denominator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)) %>% unlist()))
 
           ## for percents with one numerator, summed/subtracted denominators
-          if (current_column %in% se_types[["complex_denominator"]]) {
+          } else if (current_column %in% se_types[["complex_denominator"]]) {
+            denominator_estimate = rowSums(dplyr::select(., dplyr::all_of(denominator_estimate_variables)))
+            if (length(denominator_subtract_estimate_variables) > 0) {
+              denominator_estimate = denominator_estimate -
+                rowSums(dplyr::select(., dplyr::all_of(denominator_subtract_estimate_variables)))
+            }
             SE = se_proportion_ratio(
               estimate_numerator = get(numerator_estimate_variables),
-              estimate_denominator = rowSums(dplyr::select(., dplyr::all_of(denominator_estimate_variables))),
+              estimate_denominator = denominator_estimate,
               se_numerator = se_simple(
                 purrr::map(numerator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)) %>% unlist()),
               se_denominator = se_sum(
-                purrr::map(denominator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)),
-                purrr::map(denominator_estimate_variables, ~ df_with_sum_moes %>% dplyr::pull(.x))))}
+                purrr::map(all_denominator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)),
+                purrr::map(all_denominator_estimate_variables, ~ df_with_sum_moes %>% dplyr::pull(.x))))
 
           ## for percents with summed numerators and summed denominators
-          if (current_column %in% se_types[["complex_both"]]) {
+          } else if (current_column %in% se_types[["complex_both"]]) {
+            numerator_estimate = rowSums(dplyr::select(., dplyr::all_of(numerator_estimate_variables)))
+            if (length(numerator_subtract_estimate_variables) > 0) {
+              numerator_estimate = numerator_estimate -
+                rowSums(dplyr::select(., dplyr::all_of(numerator_subtract_estimate_variables)))
+            }
+            denominator_estimate = rowSums(dplyr::select(., dplyr::all_of(denominator_estimate_variables)))
+            if (length(denominator_subtract_estimate_variables) > 0) {
+              denominator_estimate = denominator_estimate -
+                rowSums(dplyr::select(., dplyr::all_of(denominator_subtract_estimate_variables)))
+            }
             SE = se_proportion_ratio(
-              estimate_numerator = rowSums(dplyr::select(., dplyr::all_of(numerator_estimate_variables))),
-              estimate_denominator = rowSums(dplyr::select(., dplyr::all_of(denominator_estimate_variables))),
+              estimate_numerator = numerator_estimate,
+              estimate_denominator = denominator_estimate,
               se_numerator = se_sum(
-                purrr::map(numerator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)),
-                purrr::map(numerator_estimate_variables, ~ df_with_sum_moes %>% dplyr::pull(.x))),
+                purrr::map(all_numerator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)),
+                purrr::map(all_numerator_estimate_variables, ~ df_with_sum_moes %>% dplyr::pull(.x))),
               se_denominator = se_sum(
-                purrr::map(denominator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)),
-                purrr::map(denominator_estimate_variables, ~ df_with_sum_moes %>% dplyr::pull(.x))))}
+                purrr::map(all_denominator_moe_variables, ~ df_with_sum_moes %>% dplyr::pull(.x)),
+                purrr::map(all_denominator_estimate_variables, ~ df_with_sum_moes %>% dplyr::pull(.x))))
+
+          } else {
+            stop(paste0("Unhandled SE calculation type for variable: ", original_column))
+          }
 
           return(SE)},
         .names = "{.col}_SE"))
@@ -417,4 +479,5 @@ calculate_moes = function(.df) {
 
 utils::globalVariables(c(
   "calculated_variable", "observation", "type", "estimate", "moe",
-  "se_calculation_type", "numerator_vars", "denominator_vars"))
+  "se_calculation_type", "numerator_vars", "numerator_subtract_vars",
+  "denominator_vars", "denominator_subtract_vars"))
diff --git a/R/compile_acs_data.R b/R/compile_acs_data.R
@@ -16,8 +16,9 @@ safe_divide = function(x, y) { dplyr::if_else(y == 0, 0, x / y) }
 #' @description Construct measures frequently used in social sciences
 #'    research, leveraging \code{tidycensus::get_acs()} to acquire raw estimates from
 #'    the Census Bureau API.
-#' @param tables A character vector of table names to include. Two formats are
-#'    accepted:
+#' @param tables A character vector, list, or NULL specifying which data to
+#'    include. Three kinds of elements are accepted and can be mixed freely
+#'    inside a \code{list()}:
 #'    \itemize{
 #'      \item \strong{Registered table names} (e.g., \code{"race"}, \code{"snap"}).
 #'        These are pre-built tables with curated variable definitions. Use
@@ -28,8 +29,16 @@ safe_divide = function(x, y) { dplyr::if_else(y == 0, 0, x / y) }
 #'        label hierarchy is parsed, and percentages are computed automatically.
 #'        Use the \code{denominator} parameter to control how percentages are
 #'        calculated for these tables.
+#'      \item \strong{DSL definition objects} created with \code{\link{define_percent}},
+#'        \code{\link{define_across_percent}}, \code{\link{define_across_sum}},
+#'        \code{\link{define_one_minus}}, or \code{\link{define_metadata}}.
+#'        These let you compute custom derived variables from the columns
+#'        produced by the tables you request. User definitions are executed
+#'        after all registered and auto-table definitions, and their results
+#'        appear in the codebook and have MOEs computed automatically.
 #'    }
-#'    Both formats can be mixed freely (e.g., \code{c("snap", "B25070")}).
+#'    When mixing strings and definitions, wrap everything in \code{list()}
+#'    (e.g., \code{list("snap", define_percent(...))}).
 #'    If an ACS code corresponds to an already-registered table, the registered
 #'    version is used automatically.
 #'    When NULL (default), all registered tables are included (unregistered ACS
@@ -78,6 +87,16 @@ safe_divide = function(x, y) { dplyr::if_else(y == 0, 0, x / y) }
 #' ## Use table total as denominator instead of parent subtotals
 #' df = compile_acs_data(tables = "B25070", denominator = "total",
 #'                       years = 2022, geography = "state", states = "DC")
+#'
+#' ## Add a custom derived variable alongside a registered table
+#' df = compile_acs_data(
+#'   tables = list(
+#'     "snap",
+#'     define_percent("snap_not_received_percent",
+#'                    numerator_variables = c("snap_universe", "snap_received"),
+#'                    numerator_subtract_variables = c("snap_received"),
+#'                    denominator_variables = c("snap_universe"))),
+#'   years = 2022, geography = "county", states = "DC")
 #'   }
 #' @export
 #' @importFrom magrittr %>%
@@ -132,10 +151,24 @@ compile_acs_data = function(
     stop(paste0("`denominator` must be \"parent\", \"total\", or a valid ACS variable code (e.g., \"B25070_001\"). Got: \"", denominator, "\"."))
   }
 
-  ####----Partition tables into registry vs auto (raw ACS codes)----####
+  ####----Partition tables into registry vs auto vs user definitions----####
   auto_table_entries = list()
   registry_tables = tables
   raw_acs_codes = character(0)
+  user_definitions = list()
+  has_explicit_tables = !is.null(tables)
+
+  if (!is.null(tables)) {
+    ## separate DSL definitions from string elements
+    if (is.list(tables) && !is.character(tables)) {
+      user_definitions = purrr::keep(tables, is_dsl_definition)
+      string_elements = purrr::keep(tables, function(x) is.character(x) && length(x) == 1)
+      tables = if (length(string_elements) > 0) as.character(string_elements) else NULL
+    }
+
+    ## validate user definitions structurally (fail fast)
+    purrr::walk(user_definitions, validate_definition)
+  }
 
   if (!is.null(tables)) {
     construct_map = build_construct_map()
@@ -196,9 +229,13 @@ compile_acs_data = function(
 
   ####----Resolve tables and variables via the registry----####
   ## resolve which tables to include
-  if (is.null(tables)) {
-    ## default: all internal table names
+  if (is.null(tables) && !has_explicit_tables) {
+    ## default: all internal table names (user passed tables = NULL)
     resolved_tables = names(.table_registry$tables)
+  } else if (is.null(tables) && has_explicit_tables) {
+    ## user passed only definitions, no string tables — include total_population only
+    resolved_tables = resolve_tables(tables = NULL)
+    resolved_tables = "total_population"
   } else {
     ## pass only registry tables (not raw ACS codes) to resolve_tables
     registry_tables_input = if (length(registry_tables) > 0) registry_tables else NULL
@@ -243,6 +280,11 @@ compile_acs_data = function(
     variables = c(variables, auto_variables)
   }
 
+  ## resolve ACS codes in user definitions to clean column names
+  if (length(user_definitions) > 0) {
+    user_definitions = resolve_definition_variables(user_definitions, variables)
+  }
+
   ## default values for the states argument
   if (length(states) == 0) {
     states = tigris::fips_codes %>%
@@ -434,10 +476,18 @@ this function returns.")}
     }, .init = df_calculated_estimates)
   }
 
+  ## apply user-supplied definitions
+  if (length(user_definitions) > 0) {
+    validate_definition_variables(user_definitions, colnames(df_calculated_estimates))
+    check_multi_table_variables(user_definitions, resolved_tables, auto_table_entries)
+    df_calculated_estimates = execute_definitions(df_calculated_estimates, user_definitions)
+  }
+
   ####----Generate codebook----####
   codebook = generate_codebook(.data = df_calculated_estimates,
                                resolved_tables = resolved_tables,
-                               auto_table_entries = auto_table_entries)
+                               auto_table_entries = auto_table_entries,
+                               user_definitions = user_definitions)
 
   df_calculated_estimates = df_calculated_estimates %>%
     ## ensure the vintage of the data and the GEOID for each observation are the first columns
@@ -478,4 +528,5 @@ utils::globalVariables(c(
   "ALAND", "AWATER", "area_land_sq_kilometer", "area_water_sq_kilometer", "total_population_universe",
   "state", "GEOID", "data_source_year", ".",
   "state_code", "county_code", "county_fips", "state_name", "county",
-  "needs_tigris", "resolved_tables", "auto_table_entries"))
+  "needs_tigris", "resolved_tables", "auto_table_entries", "user_definitions",
+  "has_explicit_tables"))
diff --git a/R/generate_codebook.R b/R/generate_codebook.R
@@ -8,6 +8,8 @@
 #'   table registry. When NULL (default), all registered tables are used.
 #' @param auto_table_entries A list of auto-generated table entries from
 #'   \code{build_auto_table_entry()}. Default is an empty list.
+#' @param user_definitions A list of user-supplied DSL definition objects
+#'   (e.g., from \code{define_percent()}). Default is an empty list.
 #' @returns A tibble containing the names and definitions of  variables returned from
 #' \code{urbnindicators::compile_acs_data()}.
 #' @examples
@@ -24,7 +26,7 @@
 #' @importFrom magrittr %>%
 #' @keywords internal
 
-generate_codebook = function(.data, resolved_tables = NULL, auto_table_entries = list()) {
+generate_codebook = function(.data, resolved_tables = NULL, auto_table_entries = list(), user_definitions = list()) {
 
   .data = .data %>%
     sf::st_drop_geometry()
@@ -140,6 +142,15 @@ generate_codebook = function(.data, resolved_tables = NULL, auto_table_entries =
     partial_documentation = dplyr::bind_rows(partial_documentation, auto_documentation)
   }
 
+  ## Expand user-supplied definitions into codebook rows
+  if (length(user_definitions) > 0) {
+    user_documentation = purrr::map(
+      user_definitions,
+      ~ expand_codebook_entry(entry = .x, .data = .data, crosswalk = variable_name_crosswalk)) %>% purrr::list_rbind()
+
+    partial_documentation = dplyr::bind_rows(partial_documentation, user_documentation)
+  }
+
   ####----Raw Variables----####
   ## collect all raw variable clean names from the resolved tables
   raw_variable_names = variable_name_crosswalk$clean_name %>%

diff --git a/R/list_acs_variables.R b/R/list_acs_variables.R
@@ -102,7 +102,7 @@ list_acs_variables = function(year = "2022", tables = NULL) {
 #' get_acs_codebook() %>% dplyr::filter(stringr::str_detect(variable_clean, "snap"))
 #' }
 #' @export
-get_acs_codebook = function(year = 2022, table = NULL) {
+get_acs_codebook = function(year = 2024, table = NULL) {
   suppressWarnings({suppressMessages({
     census_variables = tidycensus::load_variables(year = year, dataset = "acs5")
   })})