From 3abdd9be32640e6934cb2c582840118abb2ef4ec Mon Sep 17 00:00:00 2001
From: William Curran-groome <wcurrangroome@mac.mynetworksettings.com>
Date: Sat, 18 Apr 2026 23:04:45 -0400
Subject: [PATCH] refactoring dsl constructors

---
 CLAUDE.md                                     |  22 +-
 NAMESPACE                                     |   5 +-
 R/auto_percent.R                              |   3 +-
 R/compile_acs_data.R                          |  19 +-
 R/generate_codebook.R                         |   3 +-
 R/table_registry.R                            | 841 ++++++++++--------
 man/compile_acs_data.Rd                       |  19 +-
 man/define_across_percent.Rd                  |  41 -
 man/define_across_sum.Rd                      |  34 -
 ...fine_one_minus.Rd => define_complement.Rd} |  14 +-
 man/define_metadata.Rd                        |   4 +-
 man/define_percent.Rd                         | 113 ++-
 man/define_sum.Rd                             |  62 ++
 man/generate_codebook.Rd                      |   3 +-
 tests/testthat/test-user_definitions.R        | 301 +++++--
 15 files changed, 853 insertions(+), 631 deletions(-)
 delete mode 100644 man/define_across_percent.Rd
 delete mode 100644 man/define_across_sum.Rd
 rename man/{define_one_minus.Rd => define_complement.Rd} (57%)
 create mode 100644 man/define_sum.Rd

diff --git a/CLAUDE.md b/CLAUDE.md
index 4679c70..6cb6a09 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -117,24 +117,24 @@ To add a new ACS table to the package:
 
 1. **Add a `register_table()` call in `R/table_registry.R`** with:
    - `raw_variables` (manual) or `raw_variable_source` (select_variables) for raw ACS variables
-   - `compute_fn` that calculates derived indicators using `safe_divide()` and `dplyr::across()`
-   - `codebook_entries` with structured entries (types: `simple_percent`, `across_percent`, `across_sum`, `complex`, `one_minus`, `metadata`)
+   - `definitions` using the DSL functions: `define_percent()`, `define_sum()`, `define_complement()`, `define_metadata()`
 2. **Add any new global variables** to the `utils::globalVariables()` call at the bottom of `R/table_registry.R`
 3. **Verify**: `devtools::load_all()` then `list_tables()` shows your table
-4. **Verify codebook**: the codebook auto-generates from `codebook_entries` -- no changes to `R/generate_codebook.R` needed
+4. **Verify codebook**: the codebook auto-generates from `definitions` -- no changes to `R/generate_codebook.R` needed
 5. **Verify MOEs**: `R/calculate_cvs.R` parses codebook definition strings -- no changes needed if definitions follow standard patterns
 6. **Update pretty names** if needed (`R/make_pretty_names.R` -- rarely needed)
 
-### Codebook entry types
+### DSL functions for definitions
 
-| Type | Use case | Key fields |
+| Function | Use case | Key params |
 |---|---|---|
-| `simple_percent` | Single numerator / denominator | `output`, `numerator`, `denominator` |
-| `across_percent` | `dplyr::across()` percentages | `input_regex`, `exclude_regex`, `output_suffix`, `denominator` or `denominator_fn` |
-| `across_sum` | `dplyr::across()` sums (e.g., male + female) | `input_regex`, `addend_fn`, `output_naming_fn` |
-| `complex` | Multi-variable numerator/denominator | `output`, `numerator_regex` or `numerator_variables`, `denominator_variables`, optional `subtract_*` (denominator) or `numerator_subtract_*` (numerator) |
-| `one_minus` | Complement (1 - x) | `output`, `source_variable` |
-| `metadata` | Non-computed variables | `output`, `definition_text` |
+| `define_percent(numerator, denominator)` | Single percentage | `numerator`, `denominator`, `output` (inferred) |
+| `define_percent(..., each = TRUE)` | Batch percentages | `numerator` (regex), `denominator` or `denominator_replace`, `exclude` |
+| `define_percent(numerator, denominator, subtract_from_*)` | Complex percentage | `subtract_from_numerator`, `subtract_from_denominator`, `exclude` |
+| `define_sum(columns, output)` | Sum columns | `columns` (character vector) |
+| `define_sum(..., each = TRUE)` | Batch pairwise sums | `columns` (regex), `add_replace`, `output_replace` |
+| `define_complement(source, output)` | Complement (1 - x) | `source`, `output` |
+| `define_metadata(output, definition)` | Non-computed variables | `output`, `definition` |
 
 ### Quality checks for new variables
 
diff --git a/NAMESPACE b/NAMESPACE
index 3b576b7..ccfbe88 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,11 +2,10 @@
 
 export("%>%")
 export(compile_acs_data)
-export(define_across_percent)
-export(define_across_sum)
+export(define_complement)
 export(define_metadata)
-export(define_one_minus)
 export(define_percent)
+export(define_sum)
 export(filter_variables)
 export(get_acs_codebook)
 export(interpolate_acs)
diff --git a/R/auto_percent.R b/R/auto_percent.R
index b15b022..e9ecdbd 100644
--- a/R/auto_percent.R
+++ b/R/auto_percent.R
@@ -177,7 +177,8 @@ generate_auto_definitions = function(nodes, denominator_mode = "parent",
     } else {
       output = paste0(numerator, "_percent")
     }
-    define_percent(output = output, numerator = numerator, denominator = denominator)
+    list(type = "simple_percent", output = output,
+         numerator = numerator, denominator = denominator)
   }) %>% purrr::compact()
 }
 
diff --git a/R/compile_acs_data.R b/R/compile_acs_data.R
index 28d87bf..1cbf733 100644
--- a/R/compile_acs_data.R
+++ b/R/compile_acs_data.R
@@ -30,12 +30,12 @@ safe_divide = function(x, y) { dplyr::if_else(y == 0, 0, x / y) }
 #'        Use the \code{denominator} parameter to control how percentages are
 #'        calculated for these tables.
 #'      \item \strong{DSL definition objects} created with \code{\link{define_percent}},
-#'        \code{\link{define_across_percent}}, \code{\link{define_across_sum}},
-#'        \code{\link{define_one_minus}}, or \code{\link{define_metadata}}.
-#'        These let you compute custom derived variables from the columns
-#'        produced by the tables you request. User definitions are executed
-#'        after all registered and auto-table definitions, and their results
-#'        appear in the codebook and have MOEs computed automatically.
+#'        \code{\link{define_sum}}, \code{\link{define_complement}}, or
+#'        \code{\link{define_metadata}}. These let you compute custom derived
+#'        variables from the columns produced by the tables you request. User
+#'        definitions are executed after all registered and auto-table
+#'        definitions, and their results appear in the codebook and have MOEs
+#'        computed automatically.
 #'    }
 #'    When mixing strings and definitions, wrap everything in \code{list()}
 #'    (e.g., \code{list("snap", define_percent(...))}).
@@ -92,10 +92,9 @@ safe_divide = function(x, y) { dplyr::if_else(y == 0, 0, x / y) }
 #' df = compile_acs_data(
 #'   tables = list(
 #'     "snap",
-#'     define_percent("snap_not_received_percent",
-#'                    numerator_variables = c("snap_universe", "snap_received"),
-#'                    numerator_subtract_variables = c("snap_received"),
-#'                    denominator_variables = c("snap_universe"))),
+#'     define_percent("snap_universe", "snap_universe",
+#'                    subtract_from_numerator = "snap_received",
+#'                    output = "snap_not_received_percent")),
 #'   years = 2022, geography = "county", states = "DC")
 #'   }
 #' @export
diff --git a/R/generate_codebook.R b/R/generate_codebook.R
index 19f81b6..f96a1d8 100644
--- a/R/generate_codebook.R
+++ b/R/generate_codebook.R
@@ -9,7 +9,8 @@
 #' @param auto_table_entries A list of auto-generated table entries from
 #'   \code{build_auto_table_entry()}. Default is an empty list.
 #' @param user_definitions A list of user-supplied DSL definition objects
-#'   (e.g., from \code{define_percent()}). Default is an empty list.
+#'   (e.g., from \code{define_percent()}, \code{define_sum()}). Default is an
+#'   empty list.
 #' @returns A tibble containing the names and definitions of  variables returned from
 #' \code{urbnindicators::compile_acs_data()}.
 #' @examples
diff --git a/R/table_registry.R b/R/table_registry.R
index 7808b23..5a04db1 100644
--- a/R/table_registry.R
+++ b/R/table_registry.R
@@ -51,173 +51,230 @@ build_construct_map = function() {
 
 ####----DSL CONSTRUCTORS----####
 
-#' Define a percentage variable (simple or complex)
+#' Define a percentage variable
 #'
-#' Creates a definition object for a derived percentage variable. When both
-#' \code{numerator} and \code{denominator} are single strings and no other
-#' fields are set, a \code{simple_percent} definition is returned. Otherwise a
-#' \code{complex} definition is returned, allowing multi-variable numerators
-#' and denominators.
+#' Creates a definition for a derived percentage (proportion) variable. Handles
+#' both single-output and batch modes:
+#' \itemize{
+#'   \item \strong{Single output} (\code{each = FALSE}): computes one percentage
+#'     from the specified numerator and denominator columns.
+#'   \item \strong{Batch output} (\code{each = TRUE}): computes one percentage
+#'     per column matching the \code{numerator} pattern. Output columns are
+#'     named \code{{matched_column}_percent}.
+#' }
 #'
-#' @param output A string. The name of the output column to create.
-#' @param numerator A string. Single numerator column name (simple case).
-#' @param denominator A string. Single denominator column name (simple case).
-#' @param numerator_variables A character vector of column names to sum for the
-#'   numerator (complex case).
-#' @param numerator_regex A regex pattern to match numerator columns (complex case).
-#' @param numerator_exclude_regex A regex pattern to exclude from numerator matches.
-#' @param numerator_note An optional annotation (not used in computation).
-#' @param numerator_subtract_variables A character vector of column names to
-#'   subtract from the numerator sum.
-#' @param numerator_subtract_regex A regex pattern to match columns to subtract
-#'   from the numerator.
-#' @param denominator_variables A character vector of column names to sum for the
-#'   denominator (complex case).
-#' @param denominator_regex A regex pattern to match denominator columns (complex case).
-#' @param denominator_exclude_regex A regex pattern to exclude from denominator matches.
-#' @param subtract_variables A character vector of column names to subtract from
-#'   the denominator sum.
-#' @param subtract_regex A regex pattern to match columns to subtract from
-#'   the denominator.
-#' @returns A list with a \code{type} field (\code{"simple_percent"} or
-#'   \code{"complex"}) and the associated fields. Can be passed in the
-#'   \code{tables} parameter of \code{\link{compile_acs_data}}.
+#' @param numerator A column name (string), character vector of column names to
+#'   sum, or regex pattern (when \code{each = TRUE}). When a character vector
+#'   of length > 1, columns are summed. When a single string and
+#'   \code{each = FALSE}, treated as a column name if it contains no regex
+#'   metacharacters, otherwise as a regex whose matches are summed.
+#' @param denominator A column name (string) or character vector of column
+#'   names to sum. When a single string, treated as a column name if it
+#'   contains no regex metacharacters, otherwise as a regex whose matches are
+#'   summed. Not required when \code{denominator_replace} is provided.
+#' @param output A string. The output column name. Auto-inferred as
+#'   \code{paste0(numerator, "_percent")} when \code{numerator} is a single
+#'   non-regex string and \code{each = FALSE}. Required when \code{numerator}
+#'   is a vector or regex. Ignored when \code{each = TRUE} (outputs are named
+#'   \code{{matched_column}_percent}).
+#' @param each Logical. When \code{TRUE}, \code{numerator} is treated as a
+#'   regex pattern and one percentage is computed per matched column. Default
+#'   \code{FALSE}.
+#' @param denominator_replace A named character vector for deriving the
+#'   denominator column name from the matched numerator column name via string
+#'   replacement (e.g., \code{c("below" = "universe")}). Only used when
+#'   \code{each = TRUE}.
+#' @param subtract_from_numerator Column name(s) to subtract from the numerator
+#'   sum (string or character vector).
+#' @param subtract_from_denominator Column name(s) to subtract from the
+#'   denominator sum (string or character vector).
+#' @param exclude A regex pattern to exclude columns from pattern matching.
+#' @returns A list with a \code{type} field and associated fields, suitable for
+#'   passing in the \code{tables} parameter of \code{\link{compile_acs_data}}.
 #' @examples
-#' # Simple percentage
-#' define_percent("snap_received_percent",
-#'               numerator = "snap_received",
-#'               denominator = "snap_universe")
+#' # Simple percentage (output inferred as "snap_received_percent")
+#' define_percent("snap_received", "snap_universe")
 #'
-#' # Complex percentage with subtraction
-#' define_percent("snap_not_received_percent",
-#'               numerator_variables = c("snap_universe"),
-#'               numerator_subtract_variables = c("snap_received"),
-#'               denominator_variables = c("snap_universe"))
+#' # Sum of columns as numerator
+#' define_percent(c("age_under_5_years", "age_5_9_years"),
+#'               denominator = "sex_by_age_universe",
+#'               output = "age_under_10_percent")
+#'
+#' # Batch: one percent per matched column
+#' define_percent("^race_nonhispanic|^race_hispanic",
+#'               denominator = "race_universe",
+#'               each = TRUE)
 #' @export
-define_percent = function(output,
-                          numerator = NULL,
+define_percent = function(numerator,
                           denominator = NULL,
-                          numerator_variables = NULL,
-                          numerator_regex = NULL,
-                          numerator_exclude_regex = NULL,
-                          numerator_note = NULL,
-                          numerator_subtract_variables = NULL,
-                          numerator_subtract_regex = NULL,
-                          denominator_variables = NULL,
-                          denominator_regex = NULL,
-                          denominator_exclude_regex = NULL,
-                          subtract_variables = NULL,
-                          subtract_regex = NULL) {
-  ## simple case: single numerator and denominator strings
-  if (!is.null(numerator) && is.character(numerator) && length(numerator) == 1 &&
-      !is.null(denominator) && is.character(denominator) && length(denominator) == 1 &&
-      is.null(numerator_variables) && is.null(numerator_regex) &&
-      is.null(denominator_variables) && is.null(denominator_regex) &&
-      is.null(subtract_variables) && is.null(subtract_regex) &&
-      is.null(numerator_subtract_variables) && is.null(numerator_subtract_regex)) {
-    return(list(
-      type = "simple_percent",
-      output = output,
-      numerator = numerator,
-      denominator = denominator))
+                          output = NULL,
+                          each = FALSE,
+                          denominator_replace = NULL,
+                          subtract_from_numerator = NULL,
+                          subtract_from_denominator = NULL,
+                          exclude = NULL) {
+
+  if (isTRUE(each)) {
+    ## batch mode -> across_percent
+    result = list(
+      type = "across_percent",
+      input_regex = numerator,
+      output_suffix = "_percent")
+    if (!is.null(denominator)) result[["denominator"]] = denominator
+    if (!is.null(denominator_replace)) {
+      result[["denominator_function"]] = function(column) {
+        purrr::reduce2(names(denominator_replace), denominator_replace,
+                       function(col, pattern, replacement) {
+                         stringr::str_replace(col, pattern, replacement)
+                       }, .init = column)
+      }
+    }
+    if (!is.null(subtract_from_denominator)) result[["denominator_subtract"]] = subtract_from_denominator
+    if (!is.null(exclude)) result[["exclude_regex"]] = exclude
+    return(result)
+  }
+
+  ## single-output mode
+  ## infer output name when numerator is a single plain string
+  if (is.null(output) && is.character(numerator) && length(numerator) == 1 &&
+      !.has_regex_metacharacters(numerator)) {
+    output = paste0(numerator, "_percent")
   }
 
-  ## complex case
+  ## complex case — all single-output definitions use "complex" type
+  ## (numerator is always treated as regex at execution time)
   result = list(type = "complex", output = output)
-  if (!is.null(numerator_variables)) result[["numerator_variables"]] = numerator_variables
-  if (!is.null(numerator_regex)) result[["numerator_regex"]] = numerator_regex
-  if (!is.null(numerator_exclude_regex)) result[["numerator_exclude_regex"]] = numerator_exclude_regex
-  if (!is.null(numerator_note)) result[["numerator_note"]] = numerator_note
-  if (!is.null(numerator_subtract_variables)) result[["numerator_subtract_variables"]] = numerator_subtract_variables
-  if (!is.null(numerator_subtract_regex)) result[["numerator_subtract_regex"]] = numerator_subtract_regex
-  if (!is.null(denominator_variables)) result[["denominator_variables"]] = denominator_variables
-  if (!is.null(denominator_regex)) result[["denominator_regex"]] = denominator_regex
-  if (!is.null(denominator_exclude_regex)) result[["denominator_exclude_regex"]] = denominator_exclude_regex
-  if (!is.null(subtract_variables)) result[["subtract_variables"]] = subtract_variables
-  if (!is.null(subtract_regex)) result[["subtract_regex"]] = subtract_regex
-  return(result)
-}
 
-#' Define an across-percent variable
-#'
-#' Creates a definition that computes a percentage for every column matching a
-#' regex pattern. Each matched column becomes a numerator; the denominator is
-#' either a fixed column or computed by a function.
-#'
-#' @param input_regex A regex pattern to match input columns.
-#' @param output_suffix A string appended to each matched column name to form
-#'   the output column name (e.g., \code{"_percent"}).
-#' @param denominator A string. A fixed denominator column name.
-#' @param denominator_function A function that takes a matched column name and
-#'   returns the denominator column name for that match.
-#' @param denominator_subtract A string. A column to subtract from the
-#'   denominator value.
-#' @param exclude_regex A regex pattern to exclude from matched columns.
-#' @returns A list with \code{type = "across_percent"} and the associated
-#'   fields. Can be passed in the \code{tables} parameter of
-#'   \code{\link{compile_acs_data}}.
-#' @export
-define_across_percent = function(input_regex,
-                                 output_suffix,
-                                 denominator = NULL,
-                                 denominator_function = NULL,
-                                 denominator_subtract = NULL,
-                                 exclude_regex = NULL) {
-  result = list(
-    type = "across_percent",
-    input_regex = input_regex,
-    output_suffix = output_suffix)
-  if (!is.null(denominator)) result[["denominator"]] = denominator
-  if (!is.null(denominator_function)) result[["denominator_function"]] = denominator_function
-  if (!is.null(denominator_subtract)) result[["denominator_subtract"]] = denominator_subtract
-  if (!is.null(exclude_regex)) result[["exclude_regex"]] = exclude_regex
+  ## numerator: vector -> numerator_variables; single string -> numerator_regex
+  if (length(numerator) > 1) {
+    result[["numerator_variables"]] = numerator
+  } else {
+    result[["numerator_regex"]] = numerator
+  }
+
+  ## denominator: vector -> denominator_variables; single string -> denominator_regex
+  if (!is.null(denominator)) {
+    if (length(denominator) > 1) {
+      result[["denominator_variables"]] = denominator
+    } else {
+      result[["denominator_regex"]] = denominator
+    }
+  }
+
+  ## subtraction — single strings always treated as regex
+  if (!is.null(subtract_from_numerator)) {
+    if (length(subtract_from_numerator) > 1) {
+      result[["numerator_subtract_variables"]] = subtract_from_numerator
+    } else {
+      result[["numerator_subtract_regex"]] = subtract_from_numerator
+    }
+  }
+  if (!is.null(subtract_from_denominator)) {
+    if (length(subtract_from_denominator) > 1) {
+      result[["subtract_variables"]] = subtract_from_denominator
+    } else {
+      result[["subtract_regex"]] = subtract_from_denominator
+    }
+  }
+
+  ## exclude
+  if (!is.null(exclude)) result[["numerator_exclude_regex"]] = exclude
+
   return(result)
 }
 
-#' Define an across-sum variable
+#' Define a sum variable
+#'
+#' Creates a definition for a derived sum variable. Handles both single-output
+#' and batch modes:
+#' \itemize{
+#'   \item \strong{Single output} (\code{each = FALSE}): sums the specified
+#'     columns into one output column.
+#'   \item \strong{Batch output} (\code{each = TRUE}): for each column matching
+#'     the \code{columns} pattern, adds a corresponding column (derived via
+#'     \code{add_replace}) and names the output via \code{output_replace}.
+#' }
 #'
-#' Creates a definition that sums each matched column with a corresponding
-#' addend column. The addend and output names are determined by user-supplied
-#' functions.
+#' @param columns Column name(s) to sum (character vector), or a regex pattern
+#'   when \code{each = TRUE}.
+#' @param output A string. The output column name. Required when
+#'   \code{each = FALSE}.
+#' @param each Logical. When \code{TRUE}, \code{columns} is treated as a regex
+#'   pattern and one pairwise sum is computed per matched column. Default
+#'   \code{FALSE}.
+#' @param add_replace A named character vector for deriving the addend column
+#'   name from the matched column name via string replacement (e.g.,
+#'   \code{c("female" = "male")}). Only used when \code{each = TRUE}.
+#' @param output_replace A named character vector for deriving the output column
+#'   name from the matched column name via string replacement (e.g.,
+#'   \code{c("sex_by_age_female_" = "age_")}). Only used when
+#'   \code{each = TRUE}.
+#' @param exclude A regex pattern to exclude columns from pattern matching.
+#' @returns A list with \code{type} field and associated fields, suitable for
+#'   passing in the \code{tables} parameter of \code{\link{compile_acs_data}}.
+#' @examples
+#' # Simple sum
+#' define_sum(c("col_a", "col_b", "col_c"), output = "total_abc")
 #'
-#' @param input_regex A regex pattern to match input columns.
-#' @param addend_function A function that takes a matched column name and
-#'   returns the name of the column to add.
-#' @param output_naming_function A function that takes a matched column name
-#'   and returns the output column name.
-#' @param exclude_regex A regex pattern to exclude from matched columns.
-#' @returns A list with \code{type = "across_sum"} and the associated fields.
-#'   Can be passed in the \code{tables} parameter of
-#'   \code{\link{compile_acs_data}}.
+#' # Batch: pairwise sum per matched column
+#' define_sum("sex_by_age_female_.*years($|_over$)",
+#'            each = TRUE,
+#'            add_replace = c("female" = "male"),
+#'            output_replace = c("sex_by_age_female_" = "age_"))
 #' @export
-define_across_sum = function(input_regex,
-                             addend_function,
-                             output_naming_function,
-                             exclude_regex = NULL) {
-  result = list(
-    type = "across_sum",
-    input_regex = input_regex,
-    addend_function = addend_function,
-    output_naming_function = output_naming_function)
-  if (!is.null(exclude_regex)) result[["exclude_regex"]] = exclude_regex
-  return(result)
+define_sum = function(columns,
+                      output = NULL,
+                      each = FALSE,
+                      add_replace = NULL,
+                      output_replace = NULL,
+                      exclude = NULL) {
+
+  if (isTRUE(each)) {
+    ## batch mode -> across_sum
+    addend_function = function(column) {
+      purrr::reduce2(names(add_replace), add_replace,
+                     function(col, pattern, replacement) {
+                       stringr::str_replace(col, pattern, replacement)
+                     }, .init = column)
+    }
+    output_naming_function = function(column) {
+      purrr::reduce2(names(output_replace), output_replace,
+                     function(col, pattern, replacement) {
+                       stringr::str_replace_all(col, pattern, replacement)
+                     }, .init = column)
+    }
+
+    result = list(
+      type = "across_sum",
+      input_regex = columns,
+      addend_function = addend_function,
+      output_naming_function = output_naming_function)
+    if (!is.null(exclude)) result[["exclude_regex"]] = exclude
+    return(result)
+  }
+
+  ## single-output sum
+  list(
+    type = "sum",
+    output = output,
+    columns = columns)
 }
 
-#' Define a one-minus (complement) variable
+#' Define a complement (1 - x) variable
 #'
-#' Creates a definition that computes \code{1 - source_variable}.
+#' Creates a definition that computes \code{1 - source}.
 #'
+#' @param source A string. The column to subtract from 1.
 #' @param output A string. The name of the output column to create.
-#' @param source_variable A string. The column to subtract from 1.
 #' @returns A list with \code{type = "one_minus"} and the associated fields.
 #'   Can be passed in the \code{tables} parameter of
 #'   \code{\link{compile_acs_data}}.
 #' @export
-define_one_minus = function(output, source_variable) {
+define_complement = function(source, output) {
   list(
     type = "one_minus",
     output = output,
-    source_variable = source_variable)
+    source_variable = source)
 }
 
 #' Define a metadata variable
@@ -226,22 +283,27 @@ define_one_minus = function(output, source_variable) {
 #' codebook entry.
 #'
 #' @param output A string. The name of the metadata column.
-#' @param definition_text A string. Human-readable description for the codebook.
+#' @param definition A string. Human-readable description for the codebook.
 #' @returns A list with \code{type = "metadata"} and the associated fields.
 #'   Can be passed in the \code{tables} parameter of
 #'   \code{\link{compile_acs_data}}.
 #' @export
-define_metadata = function(output, definition_text) {
+define_metadata = function(output, definition) {
   list(
     type = "metadata",
     output = output,
-    definition_text = definition_text)
+    definition_text = definition)
+}
+
+## Detect regex metacharacters in a string
+.has_regex_metacharacters = function(x) {
+  grepl("[.*()|\\.\\[\\]^$+?\\\\]|\\{|\\}", x, perl = TRUE)
 }
 
 ####----DSL VALIDATION AND HELPERS----####
 
 .dsl_types = c("simple_percent", "complex", "across_percent", "across_sum",
-               "one_minus", "metadata")
+               "one_minus", "metadata", "sum")
 
 ## Check whether an object is a DSL definition
 is_dsl_definition = function(x) {
@@ -330,6 +392,11 @@ validate_definition = function(definition) {
   } else if (type == "metadata") {
     check_required_string(definition[["output"]], "output", label)
     check_required_string(definition[["definition_text"]], "definition_text", label)
+  } else if (type == "sum") {
+    check_required_string(definition[["output"]], "output", label)
+    if (is.null(definition[["columns"]]) || !is.character(definition[["columns"]]) || length(definition[["columns"]]) == 0) {
+      stop(paste0("Definition `", label, "`: sum type requires `columns` as a non-empty character vector."))
+    }
   }
 
   invisible(TRUE)
@@ -354,6 +421,9 @@ extract_explicit_variables = function(definition) {
   } else if (type == "one_minus") {
     vars = c(vars, definition[["source_variable"]])
   }
+  if (type == "sum") {
+    vars = c(vars, definition[["columns"]])
+  }
   ## across_sum and metadata have no explicit variable refs to check
   unique(vars)
 }
@@ -481,6 +551,11 @@ execute_definition = function(.data, definition) {
 
   if (type == "across_percent") {
     input_columns = resolve_regex_columns(.data, definition[["input_regex"]], definition[["exclude_regex"]])
+    if (length(input_columns) == 1) {
+      warning(paste0("`each = TRUE` matched only 1 column ('", input_columns,
+                     "'). This may indicate the pattern is too narrow ",
+                     "or `each = TRUE` is not needed."), call. = FALSE)
+    }
     .data = purrr::reduce(input_columns, function(.data, column) {
       output_column = paste0(column, definition[["output_suffix"]])
       ## determine denominator
@@ -566,6 +641,12 @@ execute_definition = function(.data, definition) {
     return(.data)
   }
 
+  if (type == "sum") {
+    output = definition[["output"]]
+    .data[[output]] = rowSums(dplyr::select(.data, dplyr::all_of(definition[["columns"]])))
+    return(.data)
+  }
+
   if (type == "metadata") {
     ## no-op: metadata definitions don't produce computed columns
     return(.data)
@@ -734,7 +815,7 @@ list_variables = function(year = 2022) {
         available = state$available
         computed = state$computed
 
-        if (entry_type %in% c("simple_percent", "complex", "one_minus", "metadata")) {
+        if (entry_type %in% c("simple_percent", "complex", "one_minus", "metadata", "sum")) {
           out = entry[["output"]]
           if (is.character(out)) {
             computed = c(computed, out)
@@ -985,6 +1066,19 @@ expand_codebook_entry = function(entry, .data, crosswalk) {
       denominator_subtract_vars = list(character(0))))
   }
 
+  else if (type == "sum") {
+    columns = entry[["columns"]]
+    columns_formatted = purrr::map_chr(columns, format_variable) %>% paste0(collapse = ", ")
+    return(tibble::tibble(
+      calculated_variable = entry[["output"]],
+      variable_type = "Sum",
+      definition = paste0("Sum of: ", columns_formatted, "."),
+      numerator_vars = list(columns),
+      numerator_subtract_vars = list(character(0)),
+      denominator_vars = list(character(0)),
+      denominator_subtract_vars = list(character(0))))
+  }
+
   else if (type == "metadata") {
     return(tibble::tibble(
       calculated_variable = entry[["output"]],
@@ -1032,9 +1126,7 @@ register_table(list(
     public_assistance_universe_ = "B19058_001",
     public_assistance_received_ = "B19058_002"),
   definitions = list(
-    define_percent("public_assistance_received_percent",
-                   numerator = "public_assistance_received",
-                   denominator = "public_assistance_universe"))
+    define_percent("public_assistance_received", "public_assistance_universe"))
 ))
 
 register_table(list(
@@ -1047,9 +1139,7 @@ register_table(list(
     snap_universe_ = "B22003_001",
     snap_received_ = "B22003_002"),
   definitions = list(
-    define_percent("snap_received_percent",
-                   numerator = "snap_received",
-                   denominator = "snap_universe"))
+    define_percent("snap_received", "snap_universe"))
 ))
 
 register_table(list(
@@ -1137,11 +1227,10 @@ register_table(list(
     federal_poverty_limit_below_twoormore_ = "B17020G_002",
     federal_poverty_limit_below_white_alone_nonhispanic_ = "B17020H_002"),
   definitions = list(
-    define_across_percent(
-      input_regex = "federal_poverty_limit.*below",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator_function = function(column) { column %>% stringr::str_replace("below", "universe") }))
+    define_percent("federal_poverty_limit.*below",
+                   denominator_replace = c("below" = "universe"),
+                   each = TRUE,
+                   exclude = "percent"))
 ))
 
 ####----TABLE REGISTRATIONS: RACE AND ETHNICITY----####
@@ -1175,13 +1264,11 @@ register_table(list(
     race_hispanic_twoormore_includingotherrace_ = "B03002_020",
     race_hispanic_twoormore_excludingotherrace_ = "B03002_021"),
   definitions = list(
-    define_across_percent(
-      input_regex = "^race_nonhispanic|^race_hispanic",
-      exclude_regex = NULL,
-      output_suffix = "_percent",
-      denominator = "race_universe"),
-    define_one_minus("race_personofcolor_percent",
-                     source_variable = "race_nonhispanic_white_alone_percent"))
+    define_percent("^race_nonhispanic|^race_hispanic",
+                   denominator = "race_universe",
+                   each = TRUE),
+    define_complement("race_nonhispanic_white_alone_percent",
+                      output = "race_personofcolor_percent"))
 ))
 
 ####----TABLE REGISTRATIONS: SEX AND AGE----####
@@ -1201,28 +1288,23 @@ register_table(list(
       list(pattern = "B01001_"))),
   raw_variables = NULL,
   definitions = list(
-    define_percent("sex_female_percent",
-                   numerator = "sex_by_age_female",
-                   denominator = "sex_by_age_universe"),
-    define_percent("sex_male_percent",
-                   numerator = "sex_by_age_male",
-                   denominator = "sex_by_age_universe"),
-    define_across_sum(
-      input_regex = "sex_by_age_female_.*years($|_over$)",
-      exclude_regex = NULL,
-      addend_function = function(column) { column %>% stringr::str_replace("female", "male") },
-      output_naming_function = function(column) { column %>% stringr::str_replace("sex_by_age_female_", "age_") }),
-    define_across_percent(
-      input_regex = "^age.*years($|_over$)",
-      exclude_regex = NULL,
-      output_suffix = "_percent",
-      denominator = "sex_by_age_universe"),
-    define_percent("age_under_18_percent",
-                   numerator_variables = c("age_under_5_years", "age_5_9_years", "age_10_14_years", "age_15_17_years"),
-                   denominator_variables = c("sex_by_age_universe")),
-    define_percent("age_over_64_percent",
-                   numerator_regex = "age_(6(5|7)|7|8).*_years($|_over$)",
-                   denominator_variables = c("sex_by_age_universe")))
+    define_percent("sex_by_age_female", "sex_by_age_universe",
+                   output = "sex_female_percent"),
+    define_percent("sex_by_age_male", "sex_by_age_universe",
+                   output = "sex_male_percent"),
+    define_sum("sex_by_age_female_.*years($|_over$)",
+               each = TRUE,
+               add_replace = c("female" = "male"),
+               output_replace = c("sex_by_age_female_" = "age_")),
+    define_percent("^age.*years($|_over$)",
+                   denominator = "sex_by_age_universe",
+                   each = TRUE),
+    define_percent(c("age_under_5_years", "age_5_9_years", "age_10_14_years", "age_15_17_years"),
+                   denominator = "sex_by_age_universe",
+                   output = "age_under_18_percent"),
+    define_percent("age_(6(5|7)|7|8).*_years($|_over$)",
+                   denominator = "sex_by_age_universe",
+                   output = "age_over_64_percent"))
 ))
 
 ####----TABLE REGISTRATIONS: DISABILITY----####
@@ -1238,9 +1320,9 @@ register_table(list(
       list(pattern = "B18101_"))),
   raw_variables = NULL,
   definitions = list(
-    define_percent("disability_percent",
-                   numerator_regex = "with_a_disability",
-                   denominator_variables = c("sex_by_age_by_disability_status_universe")))
+    define_percent("with_a_disability",
+                   denominator = "sex_by_age_by_disability_status_universe",
+                   output = "disability_percent"))
 ))
 
 ####----TABLE REGISTRATIONS: HOUSING----####
@@ -1256,26 +1338,23 @@ register_table(list(
       list(pattern = "B25003"))),
   raw_variables = NULL,
   definitions = list(
-    define_across_percent(
-      input_regex = "^tenure_renter_occupied|^tenure_owner_occupied",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator = "tenure_universe"),
-    define_across_sum(
-      input_regex = "tenure_.*_householder_renter_occupied",
-      exclude_regex = "percent",
-      addend_function = function(column) { column %>% stringr::str_replace("renter", "owner") },
-      output_naming_function = function(column) { column %>% stringr::str_replace_all("renter_occupied", "renter_owner_occupied") }),
-    define_across_percent(
-      input_regex = "tenure.*householder_renter_occupied",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator_function = function(column) { column %>% stringr::str_replace("renter", "renter_owner") }),
-    define_across_percent(
-      input_regex = "tenure.*householder_owner_occupied",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator_function = function(column) { column %>% stringr::str_replace("owner", "renter_owner") }))
+    define_percent("^tenure_renter_occupied|^tenure_owner_occupied",
+                   denominator = "tenure_universe",
+                   each = TRUE,
+                   exclude = "percent"),
+    define_sum("tenure_.*_householder_renter_occupied",
+               each = TRUE,
+               add_replace = c("renter" = "owner"),
+               output_replace = c("renter_occupied" = "renter_owner_occupied"),
+               exclude = "percent"),
+    define_percent("tenure.*householder_renter_occupied",
+                   denominator_replace = c("renter" = "renter_owner"),
+                   each = TRUE,
+                   exclude = "percent"),
+    define_percent("tenure.*householder_owner_occupied",
+                   denominator_replace = c("owner" = "renter_owner"),
+                   each = TRUE,
+                   exclude = "percent"))
 ))
 
 register_table(list(
@@ -1289,12 +1368,12 @@ register_table(list(
       list(pattern = "B25014"))),
   raw_variables = NULL,
   definitions = list(
-    define_percent("overcrowding_morethan1_ppr_alltenures_percent",
-                   numerator_regex = "tenure_by_occupants_per_room.*(1_01|1_51|2_01)",
-                   denominator_variables = c("tenure_by_occupants_per_room_universe")),
-    define_percent("overcrowding_morethan1_ppr_renteroccupied_percent",
-                   numerator_regex = "tenure_by_occupants_per_room_renter.*(1_01|1_51|2_01)",
-                   denominator_variables = c("tenure_by_occupants_per_room_renter_occupied")))
+    define_percent("tenure_by_occupants_per_room.*(1_01|1_51|2_01)",
+                   denominator = "tenure_by_occupants_per_room_universe",
+                   output = "overcrowding_morethan1_ppr_alltenures_percent"),
+    define_percent("tenure_by_occupants_per_room_renter.*(1_01|1_51|2_01)",
+                   denominator = "tenure_by_occupants_per_room_renter_occupied",
+                   output = "overcrowding_morethan1_ppr_renteroccupied_percent"))
 ))
 
 register_table(list(
@@ -1308,11 +1387,10 @@ register_table(list(
       list(pattern = "B25024"))),
   raw_variables = NULL,
   definitions = list(
-    define_across_percent(
-      input_regex = "^units_in_structure",
-      exclude_regex = "universe|householder|percent",
-      output_suffix = "_percent",
-      denominator = "units_in_structure_universe"))
+    define_percent("^units_in_structure",
+                   denominator = "units_in_structure_universe",
+                   each = TRUE,
+                   exclude = "universe|householder|percent"))
 ))
 
 register_table(list(
@@ -1326,26 +1404,23 @@ register_table(list(
       list(pattern = "B25032"))),
   raw_variables = NULL,
   definitions = list(
-    define_across_sum(
-      input_regex = "tenure_by_units.*renter_occupied_housing_units",
-      exclude_regex = "owner|percent",
-      addend_function = function(column) { column %>% stringr::str_replace("renter", "owner") },
-      output_naming_function = function(column) { column %>% stringr::str_replace_all("renter_occupied_housing_units", "renter_owner_occupied_housing_units") }),
-    define_across_percent(
-      input_regex = "tenure_by_units_in_structure_renter_owner_occupied_housing_units_",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator = "tenure_by_units_in_structure_renter_owner_occupied_housing_units"),
-    define_across_percent(
-      input_regex = "tenure_by_units_in_structure_renter_occupied_housing_units_",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator = "tenure_by_units_in_structure_renter_occupied_housing_units"),
-    define_across_percent(
-      input_regex = "tenure_by_units_in_structure_owner_occupied_housing_units_",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator = "tenure_by_units_in_structure_owner_occupied_housing_units"))
+    define_sum("tenure_by_units.*renter_occupied_housing_units",
+               each = TRUE,
+               add_replace = c("renter" = "owner"),
+               output_replace = c("renter_occupied_housing_units" = "renter_owner_occupied_housing_units"),
+               exclude = "owner|percent"),
+    define_percent("tenure_by_units_in_structure_renter_owner_occupied_housing_units_",
+                   denominator = "tenure_by_units_in_structure_renter_owner_occupied_housing_units",
+                   each = TRUE,
+                   exclude = "percent"),
+    define_percent("tenure_by_units_in_structure_renter_occupied_housing_units_",
+                   denominator = "tenure_by_units_in_structure_renter_occupied_housing_units",
+                   each = TRUE,
+                   exclude = "percent"),
+    define_percent("tenure_by_units_in_structure_owner_occupied_housing_units_",
+                   denominator = "tenure_by_units_in_structure_owner_occupied_housing_units",
+                   each = TRUE,
+                   exclude = "percent"))
 ))
 
 register_table(list(
@@ -1359,49 +1434,48 @@ register_table(list(
       list(pattern = "B25034"))),
   raw_variables = NULL,
   definitions = list(
-    define_across_percent(
-      input_regex = "year_structure_built_built_[0-9]",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator_function = function(column) { column %>% stringr::str_replace("[0-9].*", "universe") %>% stringr::str_replace("built_", "") }),
-    define_percent("year_structure_built_built_since_1940_percent",
-                   numerator_regex = "year_structure_built_built_(19[4-9]|2)",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_percent("year_structure_built_built_since_1950_percent",
-                   numerator_regex = "year_structure_built_built_(19[5-9]|2)",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_percent("year_structure_built_built_since_1960_percent",
-                   numerator_regex = "year_structure_built_built_(19[6-9]|2)",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_percent("year_structure_built_built_since_1970_percent",
-                   numerator_regex = "year_structure_built_built_(19[7-9]|2)",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_percent("year_structure_built_built_since_1980_percent",
-                   numerator_regex = "year_structure_built_built_(19[8-9]|2)",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_percent("year_structure_built_built_since_1990_percent",
-                   numerator_regex = "year_structure_built_built_(19[9]|2)",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_percent("year_structure_built_built_since_2000_percent",
-                   numerator_regex = "year_structure_built_built_(200|201|202)",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_percent("year_structure_built_built_since_2010_percent",
-                   numerator_regex = "year_structure_built_built_(201|202)",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_percent("year_structure_built_built_since_2020_percent",
-                   numerator_regex = "year_structure_built_built_202",
-                   numerator_exclude_regex = "percent",
-                   denominator_variables = c("year_structure_built_universe")),
-    define_one_minus("year_structure_built_built_before_1960_percent",
-                     source_variable = "year_structure_built_built_since_1960_percent"))
+    define_percent("year_structure_built_built_[0-9]",
+                   denominator_replace = c("[0-9].*" = "universe", "built_" = ""),
+                   each = TRUE,
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_(19[4-9]|2)",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_1940_percent",
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_(19[5-9]|2)",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_1950_percent",
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_(19[6-9]|2)",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_1960_percent",
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_(19[7-9]|2)",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_1970_percent",
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_(19[8-9]|2)",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_1980_percent",
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_(19[9]|2)",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_1990_percent",
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_(200|201|202)",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_2000_percent",
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_(201|202)",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_2010_percent",
+                   exclude = "percent"),
+    define_percent("year_structure_built_built_202",
+                   denominator = "year_structure_built_universe",
+                   output = "year_structure_built_built_since_2020_percent",
+                   exclude = "percent"),
+    define_complement("year_structure_built_built_since_1960_percent",
+                      output = "year_structure_built_built_before_1960_percent"))
 ))
 
 register_table(list(
@@ -1415,30 +1489,30 @@ register_table(list(
       list(pattern = "B25074"))),
   raw_variables = NULL,
   definitions = list(
-    define_percent("cost_burdened_30percentormore_allincomes_percent",
-                   numerator_regex = "household_income_by_gross_rent.*(30_0|35_0|40_0|50_0).*(pct)",
-                   denominator_regex = "household_income_by_gross_rent.*([0-9]$|100000_more$)",
-                   subtract_regex = "household_income.*not_computed"),
-    define_percent("cost_burdened_50percentormore_allincomes_percent",
-                   numerator_regex = "household_income_by_gross_rent.*50_0.*pct",
-                   denominator_regex = "household_income_by_gross_rent.*([0-9]$|100000_more$)",
-                   subtract_regex = "household_income.*not_computed"),
-    define_percent("cost_burdened_30percentormore_incomeslessthan35000_percent",
-                   numerator_regex = "household_income_by_gross_rent.*(10000_|19999|34999).*(30_0|35_0|40_0|50_0).*(pct)",
-                   denominator_regex = "household_income_by_gross_rent.*(10000|19999|34999)$",
-                   subtract_regex = "household_income.*(10000_|19999|34999).*not_computed"),
-    define_percent("cost_burdened_50percentormore_incomeslessthan35000_percent",
-                   numerator_regex = "household_income_by_gross_rent.*(10000_|19999|34999).*50_0.*(pct)",
-                   denominator_regex = "household_income_by_gross_rent.*(10000|19999|34999)$",
-                   subtract_regex = "household_income.*(10000_|19999|34999).*not_computed"),
-    define_percent("cost_burdened_30percentormore_incomeslessthan50000_percent",
-                   numerator_regex = "household_income_by_gross_rent.*(10000_|19999|34999|49999).*(30_0|35_0|40_0|50_0).*(pct)",
-                   denominator_regex = "household_income_by_gross_rent.*(10000|19999|34999|49999)$",
-                   subtract_regex = "household_income.*(10000_|19999|34999|49999).*not_computed"),
-    define_percent("cost_burdened_50percentormore_incomeslessthan50000_percent",
-                   numerator_regex = "household_income_by_gross_rent.*(10000_|19999|34999|49999).*50_0.*pct",
-                   denominator_regex = "household_income_by_gross_rent.*(10000|19999|34999|49999)$",
-                   subtract_regex = "household_income.*(10000_|19999|34999|49999).*not_computed"))
+    define_percent("household_income_by_gross_rent.*(30_0|35_0|40_0|50_0).*(pct)",
+                   denominator = "household_income_by_gross_rent.*([0-9]$|100000_more$)",
+                   output = "cost_burdened_30percentormore_allincomes_percent",
+                   subtract_from_denominator = "household_income.*not_computed"),
+    define_percent("household_income_by_gross_rent.*50_0.*pct",
+                   denominator = "household_income_by_gross_rent.*([0-9]$|100000_more$)",
+                   output = "cost_burdened_50percentormore_allincomes_percent",
+                   subtract_from_denominator = "household_income.*not_computed"),
+    define_percent("household_income_by_gross_rent.*(10000_|19999|34999).*(30_0|35_0|40_0|50_0).*(pct)",
+                   denominator = "household_income_by_gross_rent.*(10000|19999|34999)$",
+                   output = "cost_burdened_30percentormore_incomeslessthan35000_percent",
+                   subtract_from_denominator = "household_income.*(10000_|19999|34999).*not_computed"),
+    define_percent("household_income_by_gross_rent.*(10000_|19999|34999).*50_0.*(pct)",
+                   denominator = "household_income_by_gross_rent.*(10000|19999|34999)$",
+                   output = "cost_burdened_50percentormore_incomeslessthan35000_percent",
+                   subtract_from_denominator = "household_income.*(10000_|19999|34999).*not_computed"),
+    define_percent("household_income_by_gross_rent.*(10000_|19999|34999|49999).*(30_0|35_0|40_0|50_0).*(pct)",
+                   denominator = "household_income_by_gross_rent.*(10000|19999|34999|49999)$",
+                   output = "cost_burdened_30percentormore_incomeslessthan50000_percent",
+                   subtract_from_denominator = "household_income.*(10000_|19999|34999|49999).*not_computed"),
+    define_percent("household_income_by_gross_rent.*(10000_|19999|34999|49999).*50_0.*pct",
+                   denominator = "household_income_by_gross_rent.*(10000|19999|34999|49999)$",
+                   output = "cost_burdened_50percentormore_incomeslessthan50000_percent",
+                   subtract_from_denominator = "household_income.*(10000_|19999|34999|49999).*not_computed"))
 ))
 
 register_table(list(
@@ -1502,23 +1576,20 @@ register_table(list(
       list(pattern = "B08301_"))),
   raw_variables = NULL,
   definitions = list(
-    define_across_percent(
-      input_regex = "means_transportation",
-      exclude_regex = "universe|worked_from_home|percent",
-      output_suffix = "_percent",
-      denominator = "means_transportation_work_universe",
-      denominator_subtract = "means_transportation_work_worked_from_home"),
-    define_percent("means_transportation_work_worked_from_home_percent",
-                   numerator = "means_transportation_work_worked_from_home",
-                   denominator = "means_transportation_work_universe"),
-    define_percent("means_transportation_work_bicycle_walked_percent",
-                   numerator_regex = "means_transportation_work_(bicycle|walked)$",
-                   denominator_variables = c("means_transportation_work_universe"),
-                   subtract_variables = c("means_transportation_work_worked_from_home")),
-    define_percent("means_transportation_work_motor_vehicle_percent",
-                   numerator_regex = "means_transportation_work_(car_truck_van|taxicab|motorcycle)$",
-                   denominator_variables = c("means_transportation_work_universe"),
-                   subtract_variables = c("means_transportation_work_worked_from_home")))
+    define_percent("means_transportation",
+                   denominator = "means_transportation_work_universe",
+                   each = TRUE,
+                   subtract_from_denominator = "means_transportation_work_worked_from_home",
+                   exclude = "universe|worked_from_home|percent"),
+    define_percent("means_transportation_work_worked_from_home", "means_transportation_work_universe"),
+    define_percent("means_transportation_work_(bicycle|walked)$",
+                   denominator = "means_transportation_work_universe",
+                   output = "means_transportation_work_bicycle_walked_percent",
+                   subtract_from_denominator = "means_transportation_work_worked_from_home"),
+    define_percent("means_transportation_work_(car_truck_van|taxicab|motorcycle)$",
+                   denominator = "means_transportation_work_universe",
+                   output = "means_transportation_work_motor_vehicle_percent",
+                   subtract_from_denominator = "means_transportation_work_worked_from_home"))
 ))
 
 register_table(list(
@@ -1532,11 +1603,10 @@ register_table(list(
       list(pattern = "B08303_"))),
   raw_variables = NULL,
   definitions = list(
-    define_across_percent(
-      input_regex = "travel_time_work",
-      exclude_regex = "universe|percent",
-      output_suffix = "_percent",
-      denominator = "travel_time_work_universe"))
+    define_percent("travel_time_work",
+                   denominator = "travel_time_work_universe",
+                   each = TRUE,
+                   exclude = "universe|percent"))
 ))
 
 register_table(list(
@@ -1565,30 +1635,30 @@ register_table(list(
       list(pattern = "B15003"))),
   raw_variables = NULL,
   definitions = list(
-    define_percent("educational_attainment_highschool_none_percent",
-                   numerator_regex = "educational_attainment.*(no_schooling|nursery|kindergarten|_[0-8]th_grade|1st_grade|2nd_grade|3rd_grade)",
-                   denominator_variables = c("educational_attainment_population_25_years_over_universe")),
-    define_percent("educational_attainment_highschool_nodiploma_percent",
-                   numerator_regex = "educational_attainment.*(9th|10th|11th|12th)",
-                   denominator_variables = c("educational_attainment_population_25_years_over_universe")),
-    define_percent("educational_attainment_ged_percent",
-                   numerator = "educational_attainment_population_25_years_over_ged_alternative_credential",
-                   denominator = "educational_attainment_population_25_years_over_universe"),
-    define_percent("educational_attainment_highschool_diploma_percent",
-                   numerator = "educational_attainment_population_25_years_over_regular_high_school_diploma",
-                   denominator = "educational_attainment_population_25_years_over_universe"),
-    define_percent("educational_attainment_college_some_percent",
-                   numerator_regex = "educational_attainment.*some_college",
-                   denominator_variables = c("educational_attainment_population_25_years_over_universe")),
-    define_percent("educational_attainment_degree_associate_percent",
-                   numerator = "educational_attainment_population_25_years_over_associates_degree",
-                   denominator = "educational_attainment_population_25_years_over_universe"),
-    define_percent("educational_attainment_degree_bachelors_percent",
-                   numerator = "educational_attainment_population_25_years_over_bachelors_degree",
-                   denominator = "educational_attainment_population_25_years_over_universe"),
-    define_percent("educational_attainment_degree_morethanbachelors_percent",
-                   numerator_regex = "educational_attainment.*(masters|professional|doctorate)",
-                   denominator_variables = c("educational_attainment_population_25_years_over_universe")))
+    define_percent("educational_attainment.*(no_schooling|nursery|kindergarten|_[0-8]th_grade|1st_grade|2nd_grade|3rd_grade)",
+                   denominator = "educational_attainment_population_25_years_over_universe",
+                   output = "educational_attainment_highschool_none_percent"),
+    define_percent("educational_attainment.*(9th|10th|11th|12th)",
+                   denominator = "educational_attainment_population_25_years_over_universe",
+                   output = "educational_attainment_highschool_nodiploma_percent"),
+    define_percent("educational_attainment_population_25_years_over_ged_alternative_credential",
+                   "educational_attainment_population_25_years_over_universe",
+                   output = "educational_attainment_ged_percent"),
+    define_percent("educational_attainment_population_25_years_over_regular_high_school_diploma",
+                   "educational_attainment_population_25_years_over_universe",
+                   output = "educational_attainment_highschool_diploma_percent"),
+    define_percent("educational_attainment.*some_college",
+                   denominator = "educational_attainment_population_25_years_over_universe",
+                   output = "educational_attainment_college_some_percent"),
+    define_percent("educational_attainment_population_25_years_over_associates_degree",
+                   "educational_attainment_population_25_years_over_universe",
+                   output = "educational_attainment_degree_associate_percent"),
+    define_percent("educational_attainment_population_25_years_over_bachelors_degree",
+                   "educational_attainment_population_25_years_over_universe",
+                   output = "educational_attainment_degree_bachelors_percent"),
+    define_percent("educational_attainment.*(masters|professional|doctorate)",
+                   denominator = "educational_attainment_population_25_years_over_universe",
+                   output = "educational_attainment_degree_morethanbachelors_percent"))
 ))
 
 register_table(list(
@@ -1605,16 +1675,14 @@ register_table(list(
     school_enrollment_graduate_ = "B14007_018",
     school_enrollment_notenrolled_ = "B14007_019"),
   definitions = list(
-    define_percent("educational_enrollment_grades_1thru12_percent",
-                   numerator_variables = c("school_enrollment_universe"),
-                   numerator_note = "universe minus non-1-12 enrollment categories",
-                   denominator_variables = c("school_enrollment_universe"),
-                   numerator_subtract_regex = "school_enrollment.*[^(_universe)]"),
-    define_across_percent(
-      input_regex = "school_enrollment.*[^(_universe)]",
-      exclude_regex = "percent",
-      output_suffix = "_percent",
-      denominator = "school_enrollment_universe"))
+    define_percent(c("school_enrollment_universe"),
+                   denominator = "school_enrollment_universe",
+                   output = "educational_enrollment_grades_1thru12_percent",
+                   subtract_from_numerator = "school_enrollment.*[^(_universe)]"),
+    define_percent("school_enrollment.*[^(_universe)]",
+                   denominator = "school_enrollment_universe",
+                   each = TRUE,
+                   exclude = "percent"))
 ))
 
 ####----TABLE REGISTRATIONS: NATIVITY AND LANGUAGE----####
@@ -1637,17 +1705,17 @@ register_table(list(
              match_type = "positive")))),
   raw_variables = NULL,
   definitions = list(
-    define_percent("nativity_native_born_percent",
-                   numerator = "nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_native",
-                   denominator = "nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_universe"),
-    define_percent("nativity_foreign_born_percent",
-                   numerator = "nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_foreign_born",
-                   denominator = "nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_universe"),
-    define_percent("ability_speak_english_very_well_better_percent",
-                   numerator_regex = "nativity.*(only_english|english_very_well)",
-                   denominator_variables = c("nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_universe")),
-    define_one_minus("ability_speak_english_less_than_very_well_percent",
-                     source_variable = "ability_speak_english_very_well_better_percent"))
+    define_percent("nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_native",
+                   "nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_universe",
+                   output = "nativity_native_born_percent"),
+    define_percent("nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_foreign_born",
+                   "nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_universe",
+                   output = "nativity_foreign_born_percent"),
+    define_percent("nativity.*(only_english|english_very_well)",
+                   denominator = "nativity_by_language_spoken_at_home_by_ability_speak_english_population_5_years_over_universe",
+                   output = "ability_speak_english_very_well_better_percent"),
+    define_complement("ability_speak_english_very_well_better_percent",
+                      output = "ability_speak_english_less_than_very_well_percent"))
 ))
 
 ####----TABLE REGISTRATIONS: EMPLOYMENT----####
@@ -1662,9 +1730,8 @@ register_table(list(
     employment_civilian_labor_force_universe_ = "B23025_003",
     employment_civilian_labor_force_employed_ = "B23025_004"),
   definitions = list(
-    define_percent("employment_civilian_labor_force_percent",
-                   numerator = "employment_civilian_labor_force_employed",
-                   denominator = "employment_civilian_labor_force_universe"))
+    define_percent("employment_civilian_labor_force_employed", "employment_civilian_labor_force_universe",
+                   output = "employment_civilian_labor_force_percent"))
 ))
 
 ####----TABLE REGISTRATIONS: HOUSEHOLD COMPOSITION----####
@@ -1695,17 +1762,17 @@ register_table(list(
       list(pattern = "B27011"))),
   raw_variables = NULL,
   definitions = list(
-    define_percent("health_insurance_coverage_status_covered_percent",
-                   numerator_regex = "health_insurance_coverage_status_type_by_employment_status.*with_health_insurance_coverage$",
-                   denominator_variables = c("health_insurance_coverage_status_type_by_employment_status_universe")),
-    define_one_minus("health_insurance_coverage_status_notcovered_percent",
-                     source_variable = "health_insurance_coverage_status_covered_percent"),
-    define_percent("health_insurance_coverage_status_covered_employed_percent",
-                   numerator_regex = "health_insurance_coverage_status_type_by_employment_status.*_employed.*with_health_insurance_coverage$",
-                   denominator_variables = c("health_insurance_coverage_status_type_by_employment_status_in_labor_force")),
-    define_percent("health_insurance_coverage_status_covered_unemployed_percent",
-                   numerator_regex = "health_insurance_coverage_status_type_by_employment_status.*_unemployed.*with_health_insurance_coverage$",
-                   denominator_variables = c("health_insurance_coverage_status_type_by_employment_status_in_labor_force")))
+    define_percent("health_insurance_coverage_status_type_by_employment_status.*with_health_insurance_coverage$",
+                   denominator = "health_insurance_coverage_status_type_by_employment_status_universe",
+                   output = "health_insurance_coverage_status_covered_percent"),
+    define_complement("health_insurance_coverage_status_covered_percent",
+                      output = "health_insurance_coverage_status_notcovered_percent"),
+    define_percent("health_insurance_coverage_status_type_by_employment_status.*_employed.*with_health_insurance_coverage$",
+                   denominator = "health_insurance_coverage_status_type_by_employment_status_in_labor_force",
+                   output = "health_insurance_coverage_status_covered_employed_percent"),
+    define_percent("health_insurance_coverage_status_type_by_employment_status.*_unemployed.*with_health_insurance_coverage$",
+                   denominator = "health_insurance_coverage_status_type_by_employment_status_in_labor_force",
+                   output = "health_insurance_coverage_status_covered_unemployed_percent"))
 ))
 
 ####----TABLE REGISTRATIONS: DIGITAL INFRASTRUCTURE----####
@@ -1748,7 +1815,7 @@ register_table(list(
   raw_variables = NULL,
   definitions = list(
     define_metadata("population_density_land_sq_kilometer",
-                    definition_text = "Rate. Numerator: total_population_universe (B01003_001). Denominator: area_land_sq_kilometer."))
+                    definition = "Rate. Numerator: total_population_universe (B01003_001). Denominator: area_land_sq_kilometer."))
 ))
 
 ####----GLOBAL VARIABLES----####
diff --git a/man/compile_acs_data.Rd b/man/compile_acs_data.Rd
index e8e9c13..3276b41 100644
--- a/man/compile_acs_data.Rd
+++ b/man/compile_acs_data.Rd
@@ -30,12 +30,12 @@ label hierarchy is parsed, and percentages are computed automatically.
 Use the \code{denominator} parameter to control how percentages are
 calculated for these tables.
 \item \strong{DSL definition objects} created with \code{\link{define_percent}},
-\code{\link{define_across_percent}}, \code{\link{define_across_sum}},
-\code{\link{define_one_minus}}, or \code{\link{define_metadata}}.
-These let you compute custom derived variables from the columns
-produced by the tables you request. User definitions are executed
-after all registered and auto-table definitions, and their results
-appear in the codebook and have MOEs computed automatically.
+\code{\link{define_sum}}, \code{\link{define_complement}}, or
+\code{\link{define_metadata}}. These let you compute custom derived
+variables from the columns produced by the tables you request. User
+definitions are executed after all registered and auto-table
+definitions, and their results appear in the codebook and have MOEs
+computed automatically.
 }
 When mixing strings and definitions, wrap everything in \code{list()}
 (e.g., \code{list("snap", define_percent(...))}).
@@ -106,10 +106,9 @@ df = compile_acs_data(tables = "B25070", denominator = "total",
 df = compile_acs_data(
   tables = list(
     "snap",
-    define_percent("snap_not_received_percent",
-                   numerator_variables = c("snap_universe", "snap_received"),
-                   numerator_subtract_variables = c("snap_received"),
-                   denominator_variables = c("snap_universe"))),
+    define_percent("snap_universe", "snap_universe",
+                   subtract_from_numerator = "snap_received",
+                   output = "snap_not_received_percent")),
   years = 2022, geography = "county", states = "DC")
   }
 }
diff --git a/man/define_across_percent.Rd b/man/define_across_percent.Rd
deleted file mode 100644
index 71cabb3..0000000
--- a/man/define_across_percent.Rd
+++ /dev/null
@@ -1,41 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/table_registry.R
-\name{define_across_percent}
-\alias{define_across_percent}
-\title{Define an across-percent variable}
-\usage{
-define_across_percent(
-  input_regex,
-  output_suffix,
-  denominator = NULL,
-  denominator_function = NULL,
-  denominator_subtract = NULL,
-  exclude_regex = NULL
-)
-}
-\arguments{
-\item{input_regex}{A regex pattern to match input columns.}
-
-\item{output_suffix}{A string appended to each matched column name to form
-the output column name (e.g., \code{"_percent"}).}
-
-\item{denominator}{A string. A fixed denominator column name.}
-
-\item{denominator_function}{A function that takes a matched column name and
-returns the denominator column name for that match.}
-
-\item{denominator_subtract}{A string. A column to subtract from the
-denominator value.}
-
-\item{exclude_regex}{A regex pattern to exclude from matched columns.}
-}
-\value{
-A list with \code{type = "across_percent"} and the associated
-fields. Can be passed in the \code{tables} parameter of
-\code{\link{compile_acs_data}}.
-}
-\description{
-Creates a definition that computes a percentage for every column matching a
-regex pattern. Each matched column becomes a numerator; the denominator is
-either a fixed column or computed by a function.
-}
diff --git a/man/define_across_sum.Rd b/man/define_across_sum.Rd
deleted file mode 100644
index cdfe4f8..0000000
--- a/man/define_across_sum.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/table_registry.R
-\name{define_across_sum}
-\alias{define_across_sum}
-\title{Define an across-sum variable}
-\usage{
-define_across_sum(
-  input_regex,
-  addend_function,
-  output_naming_function,
-  exclude_regex = NULL
-)
-}
-\arguments{
-\item{input_regex}{A regex pattern to match input columns.}
-
-\item{addend_function}{A function that takes a matched column name and
-returns the name of the column to add.}
-
-\item{output_naming_function}{A function that takes a matched column name
-and returns the output column name.}
-
-\item{exclude_regex}{A regex pattern to exclude from matched columns.}
-}
-\value{
-A list with \code{type = "across_sum"} and the associated fields.
-Can be passed in the \code{tables} parameter of
-\code{\link{compile_acs_data}}.
-}
-\description{
-Creates a definition that sums each matched column with a corresponding
-addend column. The addend and output names are determined by user-supplied
-functions.
-}
diff --git a/man/define_one_minus.Rd b/man/define_complement.Rd
similarity index 57%
rename from man/define_one_minus.Rd
rename to man/define_complement.Rd
index ff8ab52..cdc8556 100644
--- a/man/define_one_minus.Rd
+++ b/man/define_complement.Rd
@@ -1,15 +1,15 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/table_registry.R
-\name{define_one_minus}
-\alias{define_one_minus}
-\title{Define a one-minus (complement) variable}
+\name{define_complement}
+\alias{define_complement}
+\title{Define a complement (1 - x) variable}
 \usage{
-define_one_minus(output, source_variable)
+define_complement(source, output)
 }
 \arguments{
-\item{output}{A string. The name of the output column to create.}
+\item{source}{A string. The column to subtract from 1.}
 
-\item{source_variable}{A string. The column to subtract from 1.}
+\item{output}{A string. The name of the output column to create.}
 }
 \value{
 A list with \code{type = "one_minus"} and the associated fields.
@@ -17,5 +17,5 @@ Can be passed in the \code{tables} parameter of
 \code{\link{compile_acs_data}}.
 }
 \description{
-Creates a definition that computes \code{1 - source_variable}.
+Creates a definition that computes \code{1 - source}.
 }
diff --git a/man/define_metadata.Rd b/man/define_metadata.Rd
index ff638a8..f88f028 100644
--- a/man/define_metadata.Rd
+++ b/man/define_metadata.Rd
@@ -4,12 +4,12 @@
 \alias{define_metadata}
 \title{Define a metadata variable}
 \usage{
-define_metadata(output, definition_text)
+define_metadata(output, definition)
 }
 \arguments{
 \item{output}{A string. The name of the metadata column.}
 
-\item{definition_text}{A string. Human-readable description for the codebook.}
+\item{definition}{A string. Human-readable description for the codebook.}
 }
 \value{
 A list with \code{type = "metadata"} and the associated fields.
diff --git a/man/define_percent.Rd b/man/define_percent.Rd
index ef44048..61d1941 100644
--- a/man/define_percent.Rd
+++ b/man/define_percent.Rd
@@ -2,81 +2,80 @@
 % Please edit documentation in R/table_registry.R
 \name{define_percent}
 \alias{define_percent}
-\title{Define a percentage variable (simple or complex)}
+\title{Define a percentage variable}
 \usage{
 define_percent(
-  output,
-  numerator = NULL,
+  numerator,
   denominator = NULL,
-  numerator_variables = NULL,
-  numerator_regex = NULL,
-  numerator_exclude_regex = NULL,
-  numerator_note = NULL,
-  numerator_subtract_variables = NULL,
-  numerator_subtract_regex = NULL,
-  denominator_variables = NULL,
-  denominator_regex = NULL,
-  denominator_exclude_regex = NULL,
-  subtract_variables = NULL,
-  subtract_regex = NULL
+  output = NULL,
+  each = FALSE,
+  denominator_replace = NULL,
+  subtract_from_numerator = NULL,
+  subtract_from_denominator = NULL,
+  exclude = NULL
 )
 }
 \arguments{
-\item{output}{A string. The name of the output column to create.}
+\item{numerator}{A column name (string), character vector of column names to
+sum, or regex pattern (when \code{each = TRUE}). When a character vector
+of length > 1, columns are summed. When a single string and
+\code{each = FALSE}, treated as a column name if it contains no regex
+metacharacters, otherwise as a regex whose matches are summed.}
 
-\item{numerator}{A string. Single numerator column name (simple case).}
+\item{denominator}{A column name (string) or character vector of column
+names to sum. When a single string, treated as a column name if it
+contains no regex metacharacters, otherwise as a regex whose matches are
+summed. Not required when \code{denominator_replace} is provided.}
 
-\item{denominator}{A string. Single denominator column name (simple case).}
+\item{output}{A string. The output column name. Auto-inferred as
+\code{paste0(numerator, "_percent")} when \code{numerator} is a single
+non-regex string and \code{each = FALSE}. Required when \code{numerator}
+is a vector or regex. Ignored when \code{each = TRUE} (outputs are named
+\code{{matched_column}_percent}).}
 
-\item{numerator_variables}{A character vector of column names to sum for the
-numerator (complex case).}
+\item{each}{Logical. When \code{TRUE}, \code{numerator} is treated as a
+regex pattern and one percentage is computed per matched column. Default
+\code{FALSE}.}
 
-\item{numerator_regex}{A regex pattern to match numerator columns (complex case).}
+\item{denominator_replace}{A named character vector for deriving the
+denominator column name from the matched numerator column name via string
+replacement (e.g., \code{c("below" = "universe")}). Only used when
+\code{each = TRUE}.}
 
-\item{numerator_exclude_regex}{A regex pattern to exclude from numerator matches.}
+\item{subtract_from_numerator}{Column name(s) to subtract from the numerator
+sum (string or character vector).}
 
-\item{numerator_note}{An optional annotation (not used in computation).}
+\item{subtract_from_denominator}{Column name(s) to subtract from the
+denominator sum (string or character vector).}
 
-\item{numerator_subtract_variables}{A character vector of column names to
-subtract from the numerator sum.}
-
-\item{numerator_subtract_regex}{A regex pattern to match columns to subtract
-from the numerator.}
-
-\item{denominator_variables}{A character vector of column names to sum for the
-denominator (complex case).}
-
-\item{denominator_regex}{A regex pattern to match denominator columns (complex case).}
-
-\item{denominator_exclude_regex}{A regex pattern to exclude from denominator matches.}
-
-\item{subtract_variables}{A character vector of column names to subtract from
-the denominator sum.}
-
-\item{subtract_regex}{A regex pattern to match columns to subtract from
-the denominator.}
+\item{exclude}{A regex pattern to exclude columns from pattern matching.}
 }
 \value{
-A list with a \code{type} field (\code{"simple_percent"} or
-\code{"complex"}) and the associated fields. Can be passed in the
-\code{tables} parameter of \code{\link{compile_acs_data}}.
+A list with a \code{type} field and associated fields, suitable for
+passing in the \code{tables} parameter of \code{\link{compile_acs_data}}.
 }
 \description{
-Creates a definition object for a derived percentage variable. When both
-\code{numerator} and \code{denominator} are single strings and no other
-fields are set, a \code{simple_percent} definition is returned. Otherwise a
-\code{complex} definition is returned, allowing multi-variable numerators
-and denominators.
+Creates a definition for a derived percentage (proportion) variable. Handles
+both single-output and batch modes:
+\itemize{
+\item \strong{Single output} (\code{each = FALSE}): computes one percentage
+from the specified numerator and denominator columns.
+\item \strong{Batch output} (\code{each = TRUE}): computes one percentage
+per column matching the \code{numerator} pattern. Output columns are
+named \code{{matched_column}_percent}.
+}
 }
 \examples{
-# Simple percentage
-define_percent("snap_received_percent",
-              numerator = "snap_received",
-              denominator = "snap_universe")
+# Simple percentage (output inferred as "snap_received_percent")
+define_percent("snap_received", "snap_universe")
+
+# Sum of columns as numerator
+define_percent(c("age_under_5_years", "age_5_9_years"),
+              denominator = "sex_by_age_universe",
+              output = "age_under_10_percent")
 
-# Complex percentage with subtraction
-define_percent("snap_not_received_percent",
-              numerator_variables = c("snap_universe"),
-              numerator_subtract_variables = c("snap_received"),
-              denominator_variables = c("snap_universe"))
+# Batch: one percent per matched column
+define_percent("^race_nonhispanic|^race_hispanic",
+              denominator = "race_universe",
+              each = TRUE)
 }
diff --git a/man/define_sum.Rd b/man/define_sum.Rd
new file mode 100644
index 0000000..b1d0820
--- /dev/null
+++ b/man/define_sum.Rd
@@ -0,0 +1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/table_registry.R
+\name{define_sum}
+\alias{define_sum}
+\title{Define a sum variable}
+\usage{
+define_sum(
+  columns,
+  output = NULL,
+  each = FALSE,
+  add_replace = NULL,
+  output_replace = NULL,
+  exclude = NULL
+)
+}
+\arguments{
+\item{columns}{Column name(s) to sum (character vector), or a regex pattern
+when \code{each = TRUE}.}
+
+\item{output}{A string. The output column name. Required when
+\code{each = FALSE}.}
+
+\item{each}{Logical. When \code{TRUE}, \code{columns} is treated as a regex
+pattern and one pairwise sum is computed per matched column. Default
+\code{FALSE}.}
+
+\item{add_replace}{A named character vector for deriving the addend column
+name from the matched column name via string replacement (e.g.,
+\code{c("female" = "male")}). Only used when \code{each = TRUE}.}
+
+\item{output_replace}{A named character vector for deriving the output column
+name from the matched column name via string replacement (e.g.,
+\code{c("sex_by_age_female_" = "age_")}). Only used when
+\code{each = TRUE}.}
+
+\item{exclude}{A regex pattern to exclude columns from pattern matching.}
+}
+\value{
+A list with \code{type} field and associated fields, suitable for
+passing in the \code{tables} parameter of \code{\link{compile_acs_data}}.
+}
+\description{
+Creates a definition for a derived sum variable. Handles both single-output
+and batch modes:
+\itemize{
+\item \strong{Single output} (\code{each = FALSE}): sums the specified
+columns into one output column.
+\item \strong{Batch output} (\code{each = TRUE}): for each column matching
+the \code{columns} pattern, adds a corresponding column (derived via
+\code{add_replace}) and names the output via \code{output_replace}.
+}
+}
+\examples{
+# Simple sum
+define_sum(c("col_a", "col_b", "col_c"), output = "total_abc")
+
+# Batch: pairwise sum per matched column
+define_sum("sex_by_age_female_.*years($|_over$)",
+           each = TRUE,
+           add_replace = c("female" = "male"),
+           output_replace = c("sex_by_age_female_" = "age_"))
+}
diff --git a/man/generate_codebook.Rd b/man/generate_codebook.Rd
index 37c083a..c3b7bc4 100644
--- a/man/generate_codebook.Rd
+++ b/man/generate_codebook.Rd
@@ -21,7 +21,8 @@ table registry. When NULL (default), all registered tables are used.}
 \code{build_auto_table_entry()}. Default is an empty list.}
 
 \item{user_definitions}{A list of user-supplied DSL definition objects
-(e.g., from \code{define_percent()}). Default is an empty list.}
+(e.g., from \code{define_percent()}, \code{define_sum()}). Default is an
+empty list.}
 }
 \value{
 A tibble containing the names and definitions of  variables returned from
diff --git a/tests/testthat/test-user_definitions.R b/tests/testthat/test-user_definitions.R
index 5327da4..887632b 100644
--- a/tests/testthat/test-user_definitions.R
+++ b/tests/testthat/test-user_definitions.R
@@ -1,13 +1,14 @@
 ####----DSL Validation Tests (no API calls)----####
 
 test_that("is_dsl_definition identifies valid definitions", {
-  expect_true(is_dsl_definition(define_percent("x", numerator = "a", denominator = "b")))
-  expect_true(is_dsl_definition(define_one_minus("x", source_variable = "y")))
-  expect_true(is_dsl_definition(define_metadata("x", definition_text = "desc")))
-  expect_true(is_dsl_definition(define_across_percent(
-    input_regex = "^snap", output_suffix = "_percent", denominator = "snap_universe")))
-  expect_true(is_dsl_definition(define_across_sum(
-    input_regex = "^snap", addend_function = identity, output_naming_function = identity)))
+  expect_true(is_dsl_definition(define_percent("a", "b")))
+  expect_true(is_dsl_definition(define_complement("y", output = "x")))
+  expect_true(is_dsl_definition(define_metadata("x", definition = "desc")))
+  expect_true(is_dsl_definition(define_percent(
+    "^snap", denominator = "snap_universe", each = TRUE)))
+  expect_true(is_dsl_definition(define_sum(
+    "^snap", each = TRUE,
+    add_replace = c("a" = "b"), output_replace = c("a" = "b"))))
 
   ## negative cases
   expect_false(is_dsl_definition("snap"))
@@ -20,40 +21,39 @@ test_that("is_dsl_definition identifies valid definitions", {
 ####----validate_definition() Tests----####
 
 test_that("validate_definition accepts valid simple_percent", {
-  def = define_percent("x_percent", numerator = "x", denominator = "y")
+  def = define_percent("x", "y")
   expect_true(validate_definition(def))
 })
 
 test_that("validate_definition accepts valid complex", {
-  def = define_percent("x_percent",
-                       numerator_variables = c("a", "b"),
-                       denominator_variables = c("c"))
+  def = define_percent(c("a", "b"), denominator = "c", output = "x_percent")
   expect_true(validate_definition(def))
 })
 
 test_that("validate_definition accepts valid across_percent", {
-  def = define_across_percent(
-    input_regex = "^race_",
-    output_suffix = "_percent",
-    denominator = "race_universe")
+  def = define_percent("^race_", denominator = "race_universe", each = TRUE)
   expect_true(validate_definition(def))
 })
 
 test_that("validate_definition accepts valid across_sum", {
-  def = define_across_sum(
-    input_regex = "^male_",
-    addend_function = function(x) sub("male", "female", x),
-    output_naming_function = function(x) sub("male_", "total_", x))
+  def = define_sum("^male_", each = TRUE,
+                   add_replace = c("male" = "female"),
+                   output_replace = c("male_" = "total_"))
   expect_true(validate_definition(def))
 })
 
 test_that("validate_definition accepts valid one_minus", {
-  def = define_one_minus("not_x_percent", source_variable = "x_percent")
+  def = define_complement("x_percent", output = "not_x_percent")
   expect_true(validate_definition(def))
 })
 
 test_that("validate_definition accepts valid metadata", {
-  def = define_metadata("my_var", definition_text = "A description.")
+  def = define_metadata("my_var", definition = "A description.")
+  expect_true(validate_definition(def))
+})
+
+test_that("validate_definition accepts valid sum", {
+  def = define_sum(c("a", "b", "c"), output = "total")
   expect_true(validate_definition(def))
 })
 
@@ -132,57 +132,224 @@ test_that("validate_definition rejects metadata missing definition_text", {
     "definition_text.*must be a non-empty string")
 })
 
+test_that("validate_definition rejects sum missing columns", {
+  expect_error(
+    validate_definition(list(type = "sum", output = "x")),
+    "requires `columns`")
+})
+
+####----define_percent() constructor tests----####
+
+test_that("define_percent infers output name for simple case", {
+  def = define_percent("snap_received", "snap_universe")
+  expect_equal(def[["output"]], "snap_received_percent")
+  expect_equal(def[["type"]], "complex")
+})
+
+test_that("define_percent produces complex type for vector numerator", {
+  def = define_percent(c("a", "b"), denominator = "c", output = "x_percent")
+  expect_equal(def[["type"]], "complex")
+  expect_equal(def[["numerator_variables"]], c("a", "b"))
+  expect_equal(def[["denominator_regex"]], "c")
+})
+
+test_that("define_percent produces complex type for regex numerator", {
+  def = define_percent("age_(6[5-9]).*_years",
+                       denominator = "universe",
+                       output = "over_65_percent")
+  expect_equal(def[["type"]], "complex")
+  expect_equal(def[["numerator_regex"]], "age_(6[5-9]).*_years")
+})
+
+test_that("define_percent produces across_percent for each=TRUE", {
+  def = define_percent("^race_", denominator = "race_universe", each = TRUE)
+  expect_equal(def[["type"]], "across_percent")
+  expect_equal(def[["input_regex"]], "^race_")
+  expect_equal(def[["denominator"]], "race_universe")
+  expect_equal(def[["output_suffix"]], "_percent")
+})
+
+test_that("define_percent handles denominator_replace with single replacement", {
+  def = define_percent("poverty.*below",
+                       denominator_replace = c("below" = "universe"),
+                       each = TRUE)
+  expect_equal(def[["type"]], "across_percent")
+  expect_true(is.function(def[["denominator_function"]]))
+  expect_equal(def[["denominator_function"]]("poverty_below_black"),
+               "poverty_universe_black")
+})
+
+test_that("define_percent handles denominator_replace with multiple replacements", {
+  def = define_percent("year_built_[0-9]",
+                       denominator_replace = c("[0-9].*" = "universe", "built_" = ""),
+                       each = TRUE)
+  expect_equal(def[["denominator_function"]]("year_built_1940_1949"),
+               "year_universe")
+})
+
+test_that("define_sum handles exclude parameter", {
+  def = define_sum("^col_", each = TRUE,
+                   add_replace = c("a" = "b"),
+                   output_replace = c("a" = "b"),
+                   exclude = "unwanted")
+  expect_equal(def[["exclude_regex"]], "unwanted")
+})
+
+test_that("define_percent handles subtract_from_numerator (single string -> regex)", {
+  def = define_percent(c("universe"), denominator = "universe",
+                       subtract_from_numerator = "excluded",
+                       output = "remainder_percent")
+  expect_equal(def[["type"]], "complex")
+  expect_equal(def[["numerator_subtract_regex"]], "excluded")
+})
+
+test_that("define_percent handles subtract_from_numerator (vector -> variables)", {
+  def = define_percent(c("universe"), denominator = "universe",
+                       subtract_from_numerator = c("a", "b"),
+                       output = "remainder_percent")
+  expect_equal(def[["type"]], "complex")
+  expect_equal(def[["numerator_subtract_variables"]], c("a", "b"))
+})
+
+test_that("define_percent handles subtract_from_denominator (single string -> regex)", {
+  def = define_percent("transport.*bike$",
+                       denominator = "transport_universe",
+                       subtract_from_denominator = "transport_wfh",
+                       output = "bike_percent")
+  expect_equal(def[["type"]], "complex")
+  expect_equal(def[["subtract_regex"]], "transport_wfh")
+})
+
+test_that("define_percent handles subtract_from_denominator (vector -> variables)", {
+  def = define_percent("some_col",
+                       denominator = "some_denom",
+                       subtract_from_denominator = c("a", "b"),
+                       output = "result_percent")
+  expect_equal(def[["type"]], "complex")
+  expect_equal(def[["subtract_variables"]], c("a", "b"))
+})
+
+test_that("define_percent handles exclude parameter", {
+  def = define_percent("^race_", denominator = "race_universe",
+                       each = TRUE, exclude = "universe|percent")
+  expect_equal(def[["exclude_regex"]], "universe|percent")
+})
+
+test_that("define_percent exclude in non-each mode goes to numerator_exclude_regex", {
+  def = define_percent("some_pattern.*",
+                       denominator = "denom",
+                       output = "result_percent",
+                       exclude = "unwanted")
+  expect_equal(def[["numerator_exclude_regex"]], "unwanted")
+})
+
+test_that("execute_definition warns when each=TRUE matches only 1 column", {
+  df = data.frame(race_white = 10, race_universe = 100, other_col = 50)
+  def = define_percent("^race_white$", denominator = "race_universe", each = TRUE)
+  expect_warning(
+    execute_definition(df, def),
+    "matched only 1 column")
+})
+
+####----define_sum() constructor tests----####
+
+test_that("define_sum produces sum type for simple case", {
+  def = define_sum(c("a", "b", "c"), output = "total")
+  expect_equal(def[["type"]], "sum")
+  expect_equal(def[["columns"]], c("a", "b", "c"))
+  expect_equal(def[["output"]], "total")
+})
+
+test_that("define_sum produces across_sum for each=TRUE", {
+  def = define_sum("female_.*years", each = TRUE,
+                   add_replace = c("female" = "male"),
+                   output_replace = c("female_" = "total_"))
+  expect_equal(def[["type"]], "across_sum")
+  expect_true(is.function(def[["addend_function"]]))
+  expect_true(is.function(def[["output_naming_function"]]))
+  expect_equal(def[["addend_function"]]("female_20_years"), "male_20_years")
+  expect_equal(def[["output_naming_function"]]("female_20_years"), "total_20_years")
+})
+
+####----define_complement() constructor tests----####
+
+test_that("define_complement produces one_minus type", {
+  def = define_complement("x_percent", output = "not_x_percent")
+  expect_equal(def[["type"]], "one_minus")
+  expect_equal(def[["source_variable"]], "x_percent")
+  expect_equal(def[["output"]], "not_x_percent")
+})
+
+####----define_metadata() constructor tests----####
+
+test_that("define_metadata produces metadata type", {
+  def = define_metadata("my_var", definition = "A description.")
+  expect_equal(def[["type"]], "metadata")
+  expect_equal(def[["definition_text"]], "A description.")
+})
+
 ####----extract_explicit_variables() Tests----####
 
 test_that("extract_explicit_variables for simple_percent", {
-  def = define_percent("x_pct", numerator = "a", denominator = "b")
+  ## auto_percent produces simple_percent with known column names
+  def = list(type = "simple_percent", output = "a_percent", numerator = "a", denominator = "b")
   expect_equal(sort(extract_explicit_variables(def)), c("a", "b"))
 })
 
-test_that("extract_explicit_variables for complex", {
-  def = define_percent("x_pct",
-                       numerator_variables = c("a", "b"),
-                       numerator_subtract_variables = c("b"),
-                       denominator_variables = c("c"),
-                       subtract_variables = c("d"))
-  expect_equal(sort(extract_explicit_variables(def)), c("a", "b", "c", "d"))
+test_that("extract_explicit_variables for complex with regex returns empty", {
+  ## define_percent("a", "b") produces complex with regex — no explicit vars
+  def = define_percent("a", "b")
+  expect_equal(extract_explicit_variables(def), character(0))
+})
+
+test_that("extract_explicit_variables for complex with vector inputs", {
+  def = define_percent(c("a", "b"), denominator = "c",
+                       subtract_from_numerator = c("b", "d"),
+                       subtract_from_denominator = c("e", "f"),
+                       output = "x_percent")
+  expect_equal(sort(extract_explicit_variables(def)), c("a", "b", "d", "e", "f"))
 })
 
 test_that("extract_explicit_variables for across_percent with fixed denominator", {
-  def = define_across_percent(
-    input_regex = "^race_", output_suffix = "_pct", denominator = "race_universe")
+  def = define_percent("^race_", denominator = "race_universe", each = TRUE)
   expect_equal(extract_explicit_variables(def), "race_universe")
 })
 
-test_that("extract_explicit_variables for across_percent with function denominator", {
-  def = define_across_percent(
-    input_regex = "^race_", output_suffix = "_pct",
-    denominator_function = function(x) "race_universe")
+test_that("extract_explicit_variables for across_percent with denominator_replace", {
+  def = define_percent("^race_",
+                       denominator_replace = c("x" = "y"),
+                       each = TRUE)
   ## function-based denominators can't be statically extracted
   expect_equal(extract_explicit_variables(def), character(0))
 })
 
 test_that("extract_explicit_variables for one_minus", {
-  def = define_one_minus("not_x", source_variable = "x")
+  def = define_complement("x", output = "not_x")
   expect_equal(extract_explicit_variables(def), "x")
 })
 
 test_that("extract_explicit_variables for metadata returns empty", {
-  def = define_metadata("my_var", definition_text = "desc")
+  def = define_metadata("my_var", definition = "desc")
   expect_equal(extract_explicit_variables(def), character(0))
 })
 
 test_that("extract_explicit_variables for across_sum returns empty", {
-  def = define_across_sum(
-    input_regex = "^x_", addend_function = identity, output_naming_function = identity)
+  def = define_sum("^x_", each = TRUE,
+                   add_replace = c("a" = "b"), output_replace = c("a" = "b"))
   expect_equal(extract_explicit_variables(def), character(0))
 })
 
+test_that("extract_explicit_variables for sum returns columns", {
+  def = define_sum(c("a", "b", "c"), output = "total")
+  expect_equal(sort(extract_explicit_variables(def)), c("a", "b", "c"))
+})
+
 ####----resolve_definition_variables() Tests----####
 
-test_that("resolve_definition_variables resolves ACS codes to clean names", {
-  defs = list(
-    define_percent("x_pct", numerator = "B22003_002", denominator = "B22003_001"))
+test_that("resolve_definition_variables resolves ACS codes in simple_percent", {
+  ## auto_percent produces simple_percent with ACS codes that need resolution
+  defs = list(list(type = "simple_percent", output = "x_pct",
+                   numerator = "B22003_002", denominator = "B22003_001"))
   raw_vars = c(snap_universe_ = "B22003_001", snap_received_ = "B22003_002")
 
   resolved = resolve_definition_variables(defs, raw_vars)
@@ -190,54 +357,46 @@ test_that("resolve_definition_variables resolves ACS codes to clean names", {
   expect_equal(resolved[[1]][["denominator"]], "snap_universe")
 })
 
-test_that("resolve_definition_variables leaves clean names unchanged", {
-  defs = list(
-    define_percent("x_pct", numerator = "snap_received", denominator = "snap_universe"))
+test_that("resolve_definition_variables resolves complex type numerator_variables", {
+  defs = list(define_percent(c("B22003_001", "B22003_002"),
+                             denominator = "B22003_001",
+                             output = "x_pct"))
   raw_vars = c(snap_universe_ = "B22003_001", snap_received_ = "B22003_002")
 
   resolved = resolve_definition_variables(defs, raw_vars)
-  expect_equal(resolved[[1]][["numerator"]], "snap_received")
-  expect_equal(resolved[[1]][["denominator"]], "snap_universe")
+  expect_equal(resolved[[1]][["numerator_variables"]], c("snap_universe", "snap_received"))
 })
 
 test_that("resolve_definition_variables handles empty variables vector", {
-  defs = list(define_percent("x_pct", numerator = "a", denominator = "b"))
+  defs = list(list(type = "simple_percent", output = "a_percent",
+                   numerator = "a", denominator = "b"))
   resolved = resolve_definition_variables(defs, character(0))
   expect_equal(resolved[[1]][["numerator"]], "a")
   expect_equal(resolved[[1]][["denominator"]], "b")
 })
 
-test_that("resolve_definition_variables resolves complex type variables", {
-  defs = list(define_percent("x_pct",
-                             numerator_variables = c("B22003_001", "B22003_002"),
-                             denominator_variables = c("B22003_001")))
-  raw_vars = c(snap_universe_ = "B22003_001", snap_received_ = "B22003_002")
-
-  resolved = resolve_definition_variables(defs, raw_vars)
-  expect_equal(resolved[[1]][["numerator_variables"]], c("snap_universe", "snap_received"))
-  expect_equal(resolved[[1]][["denominator_variables"]], c("snap_universe"))
-})
-
 ####----validate_definition_variables() Tests----####
 
 test_that("validate_definition_variables passes when all variables exist", {
-  defs = list(define_percent("x_pct", numerator = "a", denominator = "b"))
+  defs = list(define_percent(c("a", "b"), denominator = "c", output = "x_pct"))
   expect_true(validate_definition_variables(defs, c("a", "b", "c")))
 })
 
 test_that("validate_definition_variables errors on missing variables", {
-  defs = list(define_percent("x_pct", numerator = "a", denominator = "missing_var"))
+  defs = list(define_percent(c("a", "missing_var"),
+                             denominator = c("c", "d"),
+                             output = "x_pct"))
   expect_error(
-    validate_definition_variables(defs, c("a", "b", "c")),
+    validate_definition_variables(defs, c("a", "c", "d")),
     "missing_var")
 })
 
 ####----check_multi_table_variables() Tests----####
 
 test_that("check_multi_table_variables warns when variables span tables", {
-  defs = list(define_percent("x_pct",
-                             numerator_variables = c("snap_universe", "public_assistance_universe"),
-                             denominator_variables = c("snap_universe")))
+  defs = list(define_percent(c("snap_universe", "public_assistance_universe"),
+                             denominator = "snap_universe",
+                             output = "x_pct"))
   expect_warning(
     check_multi_table_variables(defs,
                                 resolved_tables = c("snap", "public_assistance"),
@@ -246,9 +405,19 @@ test_that("check_multi_table_variables warns when variables span tables", {
 })
 
 test_that("check_multi_table_variables does not warn for single-table variables", {
-  defs = list(define_percent("x_pct", numerator = "snap_received", denominator = "snap_universe"))
+  defs = list(list(type = "simple_percent", output = "x_pct",
+                   numerator = "snap_received", denominator = "snap_universe"))
   expect_silent(
     check_multi_table_variables(defs,
                                 resolved_tables = c("snap"),
                                 auto_table_entries = list()))
 })
+
+####----execute_definition() Tests----####
+
+test_that("execute_definition handles sum type", {
+  df = data.frame(a = c(1, 2, 3), b = c(4, 5, 6), c = c(7, 8, 9))
+  def = define_sum(c("a", "b", "c"), output = "total")
+  result = execute_definition(df, def)
+  expect_equal(result$total, c(12, 15, 18))
+})