From 024940df71ddbc93d1442856c8f560d32ac049a6 Mon Sep 17 00:00:00 2001
From: Will Curran-Groome <wcurrangroome@urban.org>
Date: Thu, 23 Apr 2026 10:35:26 -0400
Subject: [PATCH] supporting tribal geographies as a target

---
 AGENTS.md                                     |  40 ---
 R/crosswalk_data.R                            | 251 +++++++++++++++---
 R/get_crosswalk.R                             |  16 +-
 R/get_geocorr_crosswalk.R                     |  36 ++-
 R/get_nhgis_crosswalk.R                       |  14 +
 R/plan_crosswalk_chain.R                      |  24 ++
 man/crosswalk_data.Rd                         |  28 ++
 man/get_crosswalk.Rd                          |   7 +-
 .../testthat/test-get_available_crosswalks.R  |  46 ++++
 tests/testthat/test-get_geocorr_crosswalk.R   | 116 ++++++++
 tests/testthat/test-plan_crosswalk_chain.R    |  55 ++++
 11 files changed, 554 insertions(+), 79 deletions(-)
 delete mode 100644 AGENTS.md
diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index e58b193..0000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,40 +0,0 @@
-# Agent Instructions
-
-This project uses **bd** (beads) for issue tracking. Run `bd onboard` to get started.
-
-## Quick Reference
-
-```bash
-bd ready              # Find available work
-bd show <id>          # View issue details
-bd update <id> --status in_progress  # Claim work
-bd close <id>         # Complete work
-bd sync               # Sync with git
-```
-
-## Landing the Plane (Session Completion)
-
-**When ending a work session**, you MUST complete ALL steps below. Work is NOT complete until `git push` succeeds.
-
-**MANDATORY WORKFLOW:**
-
-1. **File issues for remaining work** - Create issues for anything that needs follow-up
-2. **Run quality gates** (if code changed) - Tests, linters, builds
-3. **Update issue status** - Close finished work, update in-progress items
-4. **PUSH TO REMOTE** - This is MANDATORY:
-   ```bash
-   git pull --rebase
-   bd sync
-   git push
-   git status  # MUST show "up to date with origin"
-   ```
-5. **Clean up** - Clear stashes, prune remote branches
-6. **Verify** - All changes committed AND pushed
-7. **Hand off** - Provide context for next session
-
-**CRITICAL RULES:**
-- Work is NOT complete until `git push` succeeds
-- NEVER stop before pushing - that leaves work stranded locally
-- NEVER say "ready to push when you are" - YOU (Claude) must push
-- If push fails, resolve and retry until it succeeds
-
diff --git a/R/crosswalk_data.R b/R/crosswalk_data.R
index 3996fcc..1e9d118 100644
--- a/R/crosswalk_data.R
+++ b/R/crosswalk_data.R
@@ -41,6 +41,17 @@
 #'    mean, median, percentage, and ratio variables. These will be calculated as weighted
 #'    means using the allocation factor as weights. If NULL (default), automatically
 #'    detects columns with prefixes "mean_", "median_", "percent_", or "ratio_".
+#' @param custom_interpolations A list of lists, each specifying a group of columns
+#'    and a custom interpolation function. Each element must have:
+#'    \describe{
+#'      \item{columns}{Character vector of column names}
+#'      \item{fn}{A function or formula for interpolation. Receives two arguments:
+#'        `.x` (column values) and `.w` (allocation factors). Formulas using `~`
+#'        syntax (e.g., `~sum(.x * .w, na.rm = TRUE)`) are converted to functions
+#'        via `rlang::as_function()`.}
+#'    }
+#'    Columns in `custom_interpolations` must not overlap with `count_columns` or
+#'    `non_count_columns`. Default is NULL (no custom interpolations).
 #' @param return_intermediate Logical. If TRUE and crosswalk has multiple steps,
 #'    returns a list containing both the final result and intermediate results
 #'    from each step. Default is FALSE, which returns only the final result.
@@ -151,6 +162,21 @@
 #' # Access intermediate and final
 #' result$intermediate$step_1  # After first crosswalk
 #' result$final                # Final result
+#'
+#' # Custom interpolation functions
+#' result <- crosswalk_data(
+#'   data = my_data,
+#'   crosswalk = crosswalk,
+#'   custom_interpolations = list(
+#'     list(
+#'       columns = c("count_population", "count_housing"),
+#'       fn = ~sum(.x * .w, na.rm = TRUE)
+#'     ),
+#'     list(
+#'       columns = c("pct_poverty"),
+#'       fn = ~weighted.mean(.x, .w, na.rm = TRUE)
+#'     )
+#'   ))
 #' }
 
 crosswalk_data <- function(
@@ -165,6 +191,7 @@ crosswalk_data <- function(
     geoid_column = "source_geoid",
     count_columns = NULL,
     non_count_columns = NULL,
+    custom_interpolations = NULL,
     return_intermediate = FALSE,
     show_join_quality = TRUE,
     silent = getOption("crosswalk.silent", FALSE)) {
@@ -222,14 +249,28 @@ crosswalk_data <- function(
       stringr::str_starts(data_columns, "ratio_")]
   }
 
-  if (length(count_columns) == 0 & length(non_count_columns) == 0) {
+  # Validate custom_interpolations if provided
+  if (!is.null(custom_interpolations)) {
+    custom_interpolations <- validate_custom_interpolations(
+      custom_interpolations, names(data), count_columns, non_count_columns)
+  }
+
+  has_custom <- !is.null(custom_interpolations) && length(custom_interpolations) > 0
+
+  if (length(count_columns) == 0 & length(non_count_columns) == 0 & !has_custom) {
     stop(
-      "No columns to crosswalk. Either specify `count_columns` or `non_count_columns`, ",
-      "or ensure your data has columns with prefixes: count_, mean_, median_, percent_, or ratio_.")
+      "No columns to crosswalk. Either specify `count_columns`, `non_count_columns`, ",
+      "or `custom_interpolations`, or ensure your data has columns with prefixes: ",
+      "count_, mean_, median_, percent_, or ratio_.")
   }
 
   # Check that specified columns exist in original data
-  all_value_columns <- c(count_columns, non_count_columns)
+  custom_cols <- if (has_custom) {
+    unlist(purrr::map(custom_interpolations, "columns"))
+  } else {
+    character(0)
+  }
+  all_value_columns <- c(count_columns, non_count_columns, custom_cols)
   missing_columns <- setdiff(all_value_columns, names(data))
   if (length(missing_columns) > 0) {
     stop(
@@ -261,6 +302,7 @@ crosswalk_data <- function(
       geoid_column = current_geoid_column,
       count_columns = count_columns,
       non_count_columns = non_count_columns,
+      custom_interpolations = custom_interpolations,
       step_number = i,
       total_steps = n_steps,
       show_join_quality = show_join_quality)
@@ -633,6 +675,79 @@ report_join_quality <- function(data, crosswalk, geoid_column, step_number = 1,
 }
 
 
+#' Validate Custom Interpolations Parameter
+#'
+#' Internal function that validates and normalizes the custom_interpolations parameter.
+#' Converts formula-style functions to proper functions with `.x` and `.w` parameters.
+#'
+#' @param custom_interpolations A list of lists with `columns` and `fn` elements.
+#' @param data_columns Character vector of column names in data.
+#' @param count_columns Character vector of count column names.
+#' @param non_count_columns Character vector of non-count column names.
+#' @return The validated and normalized custom_interpolations list.
+#' @keywords internal
+#' @noRd
+validate_custom_interpolations <- function(custom_interpolations, data_columns,
+                                           count_columns, non_count_columns) {
+  if (!is.list(custom_interpolations)) {
+    stop("`custom_interpolations` must be a list of lists, each with `columns` and `fn` elements.")
+  }
+
+  all_custom_cols <- character(0)
+
+  for (i in seq_along(custom_interpolations)) {
+    group <- custom_interpolations[[i]]
+
+    if (!is.list(group) || !all(c("columns", "fn") %in% names(group))) {
+      stop(
+        "`custom_interpolations[[", i, "]]` must be a list with `columns` and `fn` elements.")
+    }
+
+    if (!is.character(group$columns) || length(group$columns) == 0) {
+      stop(
+        "`custom_interpolations[[", i, "]]$columns` must be a non-empty character vector.")
+    }
+
+    # Convert formula to function with .x and .w parameters
+    if (rlang::is_formula(group$fn)) {
+      fn_body <- rlang::f_rhs(group$fn)
+      fn_env <- rlang::f_env(group$fn)
+      custom_interpolations[[i]]$fn <- rlang::new_function(
+        alist(.x = , .w = ), fn_body, fn_env)
+    } else if (!is.function(group$fn)) {
+      stop(
+        "`custom_interpolations[[", i, "]]$fn` must be a function or formula.")
+    }
+
+    # Check for overlap within custom groups
+    overlap_within <- intersect(group$columns, all_custom_cols)
+    if (length(overlap_within) > 0) {
+      stop(
+        "Column(s) appear in multiple `custom_interpolations` groups: ",
+        paste(overlap_within, collapse = ", "))
+    }
+    all_custom_cols <- c(all_custom_cols, group$columns)
+  }
+
+  # Check for overlap with count_columns and non_count_columns
+  overlap_count <- intersect(all_custom_cols, count_columns)
+  if (length(overlap_count) > 0) {
+    stop(
+      "Column(s) in `custom_interpolations` overlap with `count_columns`: ",
+      paste(overlap_count, collapse = ", "))
+  }
+
+  overlap_non_count <- intersect(all_custom_cols, non_count_columns)
+  if (length(overlap_non_count) > 0) {
+    stop(
+      "Column(s) in `custom_interpolations` overlap with `non_count_columns`: ",
+      paste(overlap_non_count, collapse = ", "))
+  }
+
+  return(custom_interpolations)
+}
+
+
 #' Apply a Single Crosswalk Step
 #'
 #' Internal function that applies one crosswalk tibble to data.
@@ -642,6 +757,7 @@ report_join_quality <- function(data, crosswalk, geoid_column, step_number = 1,
 #' @param geoid_column Column name for source geoid
 #' @param count_columns Count variable columns
 #' @param non_count_columns Non-count variable columns
+#' @param custom_interpolations Validated custom interpolation groups (or NULL)
 #' @param step_number Integer. Current step number for multi-step reporting.
 #' @param total_steps Integer. Total number of steps for multi-step reporting.
 #' @param show_join_quality Logical. Whether to report join quality diagnostics.
@@ -654,6 +770,7 @@ apply_single_crosswalk <- function(
     geoid_column,
     count_columns,
     non_count_columns,
+    custom_interpolations = NULL,
     step_number = 1,
     total_steps = 1,
     show_join_quality = TRUE) {
@@ -700,7 +817,22 @@ apply_single_crosswalk <- function(
   current_count_cols <- intersect(count_columns, names(data))
   current_non_count_cols <- intersect(non_count_columns, names(data))
 
-  # Identify "other" columns (not geoid, count, or non-count columns)
+  # Filter custom interpolation columns to those in current data
+  current_custom_interpolations <- if (!is.null(custom_interpolations)) {
+    purrr::map(custom_interpolations, function(group) {
+      list(
+        columns = intersect(group$columns, names(data)),
+        fn = group$fn)
+    }) |>
+      purrr::keep(~ length(.x$columns) > 0)
+  } else {
+    list()
+  }
+
+  current_custom_cols <- unlist(purrr::map(current_custom_interpolations, "columns"))
+  if (is.null(current_custom_cols)) current_custom_cols <- character(0)
+
+  # Identify "other" columns (not geoid, count, non-count, or custom columns)
   # These will be aggregated by taking the first non-missing value
   # Include both original crosswalk column names AND their renamed versions
   # (e.g., "geography_name" which comes from "target_geography_name" after renaming)
@@ -714,41 +846,96 @@ apply_single_crosswalk <- function(
                       "land_area_sqmi")
   other_cols <- setdiff(
     names(data),
-    c(geoid_column, current_count_cols, current_non_count_cols, crosswalk_cols)
+    c(geoid_column, current_count_cols, current_non_count_cols,
+      current_custom_cols, crosswalk_cols)
   )
 
+  # All interpolated columns (for NA validity tracking)
+  all_interpolated_cols <- c(current_count_cols, current_non_count_cols,
+                             current_custom_cols)
 
   # Join crosswalk to data
-  result <- data |>
+  joined_data <- data |>
     dplyr::mutate(
       dplyr::across(dplyr::all_of(geoid_column), as.character)) |>
     dplyr::left_join(
       crosswalk,
-      by = stats::setNames("source_geoid", geoid_column)) |>
-    tidytable::summarize(
-      .by = dplyr::all_of(group_cols),
-      ## count variables we take the sum of the weighted count variable
-      dplyr::across(
-        .cols = dplyr::all_of(current_count_cols),
-        .fns = ~ sum(.x * allocation_factor_source_to_target, na.rm = TRUE)),
-      ## non-count variables--means, medians, percentages, ratios, etc.--
-      ## we take the weighted mean of the variable, weighted by the allocation factor
-      tidytable::across(
-        .cols = tidytable::all_of(current_non_count_cols),
-        .fns = ~ stats::weighted.mean(.x, allocation_factor_source_to_target, na.rm = TRUE)),
-      ## other columns: take first non-missing value (or NA if all missing)
-      tidytable::across(
-        .cols = tidytable::all_of(other_cols),
-        .fns = ~ dplyr::first(.x, na_rm = TRUE)),
-      tidytable::across(
-        .cols = tidytable::all_of(c(current_count_cols, current_non_count_cols)),
-        .fns = ~ sum(!is.na(.x)),
-        .names = "{.col}_validx")) |>
-    tidytable::mutate(
-      tidytable::across(
-        .cols = tidytable::all_of(c(current_count_cols, current_non_count_cols)),
-        .fns = ~ tidytable::if_else(get(stringr::str_c(tidytable::cur_column(), "_validx")) > 0, .x, NA))) |>
-    dplyr::select(-dplyr::matches("_validx$")) |>
+      by = stats::setNames("source_geoid", geoid_column))
+
+  # Standard columns for main summarize (count, non-count)
+  standard_cols <- c(current_count_cols, current_non_count_cols)
+  has_standard <- length(standard_cols) > 0 || length(other_cols) > 0
+
+  # Main summarize for standard columns (count, non-count, other)
+  if (has_standard) {
+    result <- joined_data |>
+      tidytable::summarize(
+        .by = dplyr::all_of(group_cols),
+        ## count variables: sum of weighted values
+        dplyr::across(
+          .cols = dplyr::all_of(current_count_cols),
+          .fns = ~ sum(.x * allocation_factor_source_to_target, na.rm = TRUE)),
+        ## non-count variables: weighted mean
+        tidytable::across(
+          .cols = tidytable::all_of(current_non_count_cols),
+          .fns = ~ stats::weighted.mean(.x, allocation_factor_source_to_target, na.rm = TRUE)),
+        ## other columns: first non-missing value
+        tidytable::across(
+          .cols = tidytable::all_of(other_cols),
+          .fns = ~ dplyr::first(.x, na_rm = TRUE)),
+        ## validity tracking for NA handling
+        tidytable::across(
+          .cols = tidytable::all_of(standard_cols),
+          .fns = ~ sum(!is.na(.x)),
+          .names = "{.col}_validx"))
+
+    # Replace standard interpolated values with NA where all source values were NA
+    if (length(standard_cols) > 0) {
+      result <- result |>
+        tidytable::mutate(
+          tidytable::across(
+            .cols = tidytable::all_of(standard_cols),
+            .fns = ~ tidytable::if_else(
+              get(stringr::str_c(tidytable::cur_column(), "_validx")) > 0,
+              .x, NA)))
+    }
+
+    result <- result |>
+      dplyr::select(-dplyr::matches("_validx$"))
+  } else {
+    # No standard columns; start with unique target groups
+    result <- joined_data |>
+      dplyr::distinct(dplyr::across(dplyr::all_of(group_cols)))
+  }
+
+  # Compute custom interpolations in a separate pass and join back
+  if (length(current_custom_interpolations) > 0) {
+    for (group in current_custom_interpolations) {
+      group_fn <- group$fn
+      group_cols_i <- group$columns
+
+      # Compute each custom column individually and join to result
+      custom_results <- purrr::map(group_cols_i, function(col_name) {
+        joined_data |>
+          tidytable::summarize(
+            .by = dplyr::all_of(group_cols),
+            val = group_fn(!!rlang::sym(col_name),
+                           .w = allocation_factor_source_to_target),
+            validx = sum(!is.na(!!rlang::sym(col_name)))) |>
+          tidytable::mutate(
+            val = tidytable::if_else(validx > 0, val, NA)) |>
+          dplyr::select(-validx) |>
+          dplyr::rename(!!col_name := val)
+      })
+
+      for (custom_result in custom_results) {
+        result <- result |>
+          dplyr::left_join(custom_result, by = group_cols)
+      }
+    }
+  }
+
+  result <- result |>
     dplyr::rename_with(
       .cols = dplyr::everything(),
       .fn = ~ stringr::str_remove_all(.x, "target_")) |>
diff --git a/R/get_crosswalk.R b/R/get_crosswalk.R
index df8e55d..483700b 100644
--- a/R/get_crosswalk.R
+++ b/R/get_crosswalk.R
@@ -45,7 +45,8 @@
 #' @param source_geography Character. Source geography name. One of c("block",
 #'    "block group", "tract", "place", "county", "urban_area", "zcta", "puma",
 #'    "puma12", "puma22", "cd115", "cd116", "cd118", "cd119", "urban_area",
-#'    "core_based_statistical_area").
+#'    "core_based_statistical_area"). Note: "aiannh" (tribal areas) is currently
+#'    supported as a **target** geography only, not as a source.
 #' @param target_year Character or numeric. Year of the target geography, one of
 #'    c(1990, 2000, 2010, 2020) for decennial crosswalks, or c(2011, 2012, 2014,
 #'    2015, 2022) for non-census year crosswalks (limited to block groups, tracts,
@@ -53,7 +54,9 @@
 #' @param target_geography Character. Target geography name. One of c("block",
 #'    "block group", "tract", "place", "county", "urban_area", "zcta", "puma",
 #'    "puma12", "puma22", "cd115", "cd116", "cd118", "cd119", "urban_area",
-#'    "core_based_statistical_area").
+#'    "core_based_statistical_area", "aiannh"). Tribal areas ("aiannh", American
+#'    Indian / Alaska Native / Native Hawaiian areas) are available via
+#'    GeoCorr 2022 only (target_year >= 2020).
 #' @param weight Character. Weighting variable for Geocorr crosswalks. One of
 #'    c("population", "housing", "land").
 #' @param cache Directory path. Where to download the crosswalk to. If NULL (default),
@@ -470,6 +473,15 @@ get_available_crosswalks <- function() {
       source_year = 2022L,
       target_year = 2022L)
 
+  # aiannh (tribal areas) is supported as a target geography only, via GeoCorr 2022
+  geocorr_2022_aiannh <- tibble::tibble(
+    source_geography = geocorr_2022_geographies,
+    target_geography = "aiannh",
+    source_year = 2022L,
+    target_year = 2022L)
+
+  geocorr_2022 <- dplyr::bind_rows(geocorr_2022, geocorr_2022_aiannh)
+
   # 3. Geocorr 2018: all pairwise combinations of 9 canonical geographies
   geocorr_2018_geographies <- c(
     "block", "block_group", "tract", "county", "place",
diff --git a/R/get_geocorr_crosswalk.R b/R/get_geocorr_crosswalk.R
index 4db677c..bc760e0 100644
--- a/R/get_geocorr_crosswalk.R
+++ b/R/get_geocorr_crosswalk.R
@@ -32,7 +32,11 @@ get_geocorr_config <- function(version = "2022") {
         "puma" = "puma22",
         "puma22" = "puma22",
         "cd118" = "cd118",
-        "cd119" = "cd119"),
+        "cd119" = "cd119",
+        "aiannh" = "aiannh",
+        "tribal" = "aiannh",
+        "tribal_area" = "aiannh",
+        "american_indian_area" = "aiannh"),
       puma_col = "puma22",
       cd_pattern = "^cd11[89]",
       has_name_columns = TRUE,
@@ -117,10 +121,12 @@ resolve_geocorr_geography <- function(geography, geocorr_version = "2022") {
 #'    of the geographies supported by GeoCorr.
 #'
 #'    For GeoCorr 2022: c("place", "county", "tract", "blockgroup", "zcta",
-#'    "puma22", "cd119", "cd118").
+#'    "puma22", "cd119", "cd118", "aiannh"). Note that "aiannh" (tribal areas)
+#'    is supported as a **target** geography only, not as a source.
 #'
 #'    For GeoCorr 2018: c("placefp", "county", "tract", "bg", "zcta5",
-#'    "puma12", "cd115", "cd116").
+#'    "puma12", "cd115", "cd116"). Tribal areas (aiannh) are not available in
+#'    GeoCorr 2018.
 #'
 #'    User-facing names like "puma", "zcta", "place", "blockgroup" are
 #'    automatically resolved to the correct API code for the version.
@@ -151,6 +157,15 @@ get_geocorr_crosswalk <- function(
   source_api_code <- resolve_geocorr_geography(source_geography, geocorr_version)
   target_api_code <- resolve_geocorr_geography(target_geography, geocorr_version)
 
+  # Tribal areas (aiannh) are currently supported as a target geography only.
+  # Sourcing from aiannh would require re-normalizing allocation factors across
+  # per-state query chunks for tribal areas that span state boundaries.
+  if (source_api_code == "aiannh") {
+    stop(
+      "Tribal areas ('aiannh') are currently supported as a target geography ",
+      "only, not as a source. Please specify 'aiannh' via `target_geography`.")
+  }
+
   outpath = "no file exists here"
   ## identify the relevant file paths for potentially-cached crosswalks
   if (!is.null(cache)) {
@@ -329,6 +344,11 @@ get_geocorr_crosswalk <- function(
     dplyr::rename_with(
       .cols = dplyr::matches("zip_name"),
       .fn = ~ .x |> stringr::str_replace("zip", "zcta")) |>
+    ## defensive: ensure aiannh name column is snake_cased regardless of how
+    ## janitor::clean_names handled the original aiannhName field
+    dplyr::rename_with(
+      .cols = dplyr::matches("^aiannhname$"),
+      .fn = ~ stringr::str_replace(.x, "aiannhname", "aiannh_name")) |>
     dplyr::rename_with(
       .cols = dplyr::matches(stringr::str_c(config$puma_col, "name")),
       .fn = ~ .x |> stringr::str_replace(
@@ -482,6 +502,16 @@ get_geocorr_crosswalk <- function(
       weighting_factor = weight,
       dplyr::across(.cols = dplyr::matches("allocation"), .fns = as.numeric))
 
+  ## When target is aiannh, GeoCorr returns rows with NA target_geoid for the
+  ## portion of each source geography that falls outside any tribal area.
+  ## These rows carry no crosswalk information (there is no tribal area to
+  ## allocate to) so we drop them. After this filter, allocation factors per
+  ## source sum to <= 1 rather than exactly 1, which correctly reflects that
+  ## tribal areas do not cover all land.
+  if (target_api_code == "aiannh") {
+    df2 <- df2 |> dplyr::filter(!is.na(target_geoid))
+  }
+
   if (!is.null(cache)) {
     ## if the file does not already exist and cache is TRUE
     if (!file.exists(outpath) & !is.null(cache)) {
diff --git a/R/get_nhgis_crosswalk.R b/R/get_nhgis_crosswalk.R
index 2dbf6af..9d792b2 100644
--- a/R/get_nhgis_crosswalk.R
+++ b/R/get_nhgis_crosswalk.R
@@ -864,6 +864,20 @@ variable. Get your key at https://account.ipums.org/api_keys") }
       names_to = "weighting_factor",
       values_to = "allocation_factor_source_to_target")
 
+  # Pad 1990 tract GEOIDs to standard 11 chars. The 1990 Census used 4-digit
+  # tract codes (without the ".00" decimal suffix), producing 9-char GEOIDs in
+  # NHGIS data (state=2 + county=3 + tract=4). Standard Census format uses
+  # 6-digit tract codes (tract=4 + suffix=2), so we right-pad with "0" to
+  # restore the implicit ".00" suffix.
+  if (source_geography_standardized == "tr") {
+    crosswalk_df <- crosswalk_df |>
+      dplyr::mutate(source_geoid = stringr::str_pad(source_geoid, 11, "right", "0"))
+  }
+  if (target_geography_standardized == "tr") {
+    crosswalk_df <- crosswalk_df |>
+      dplyr::mutate(target_geoid = stringr::str_pad(target_geoid, 11, "right", "0"))
+  }
+
     ## if the file does not already exist and cache is not NULL
     if (!file.exists(csv_path) & !is.null(cache)) {
       if (!dir.exists(cache)) {
diff --git a/R/plan_crosswalk_chain.R b/R/plan_crosswalk_chain.R
index 5c4535a..f7b7e27 100644
--- a/R/plan_crosswalk_chain.R
+++ b/R/plan_crosswalk_chain.R
@@ -56,6 +56,28 @@ plan_crosswalk_chain <- function(
   source_geog_std <- standardize_geography_for_chain(source_geography)
   target_geog_std <- standardize_geography_for_chain(target_geography)
 
+  # Tribal areas (aiannh) are target-only in the current implementation.
+  if (isTRUE(source_geog_std == "aiannh")) {
+    result$error <- stringr::str_c(
+      "Tribal areas ('aiannh') are currently supported as a target geography ",
+      "only, not as a source. Please specify 'aiannh' via `target_geography`.")
+    return(result)
+  }
+
+  # Tribal areas are only available via GeoCorr 2022 (2020 Census geography).
+  # Reject requests that would route to GeoCorr 2018 (target_year 2010-2019).
+  if (isTRUE(target_geog_std == "aiannh") && !is.null(target_year)) {
+    target_year_num <- suppressWarnings(as.numeric(target_year))
+    if (!is.na(target_year_num) && target_year_num < 2020) {
+      result$error <- stringr::str_c(
+        "Tribal area ('aiannh') crosswalks are only available via GeoCorr 2022 ",
+        "(2020 Census geography). The requested target_year ", target_year,
+        " would route to GeoCorr 2018, which does not support aiannh. ",
+        "Use target_year >= 2020.")
+      return(result)
+    }
+  }
+
 
   # Convert years to character for consistent handling
   # Use NA_character_ instead of NULL for tibble compatibility
@@ -326,6 +348,8 @@ standardize_geography_for_chain <- function(geography) {
     geography %in% c("cbsa", "core based statistical area") ~ "cbsa",
     geography %in% c("ua", "urban area", "urban areas") ~ "urban_area",
     geography %in% c("cd115", "cd116", "cd118", "cd119", "congressional district") ~ geography,
+    geography %in% c("aiannh", "tribal", "tribal area", "american indian area",
+                     "reservation", "aian") ~ "aiannh",
     TRUE ~ geography)
 }
 
diff --git a/man/crosswalk_data.Rd b/man/crosswalk_data.Rd
index 97d9fcc..f823343 100644
--- a/man/crosswalk_data.Rd
+++ b/man/crosswalk_data.Rd
@@ -16,6 +16,7 @@ crosswalk_data(
   geoid_column = "source_geoid",
   count_columns = NULL,
   non_count_columns = NULL,
+  custom_interpolations = NULL,
   return_intermediate = FALSE,
   show_join_quality = TRUE,
   silent = getOption("crosswalk.silent", FALSE)
@@ -66,6 +67,18 @@ mean, median, percentage, and ratio variables. These will be calculated as weigh
 means using the allocation factor as weights. If NULL (default), automatically
 detects columns with prefixes "mean_", "median_", "percent_", or "ratio_".}
 
+\item{custom_interpolations}{A list of lists, each specifying a group of columns
+and a custom interpolation function. Each element must have:
+\describe{
+\item{columns}{Character vector of column names}
+\item{fn}{A function or formula for interpolation. Receives two arguments:
+\code{.x} (column values) and \code{.w} (allocation factors). Formulas using \code{~}
+syntax (e.g., \code{~sum(.x * .w, na.rm = TRUE)}) are converted to functions
+via \code{rlang::as_function()}.}
+}
+Columns in \code{custom_interpolations} must not overlap with \code{count_columns} or
+\code{non_count_columns}. Default is NULL (no custom interpolations).}
+
 \item{return_intermediate}{Logical. If TRUE and crosswalk has multiple steps,
 returns a list containing both the final result and intermediate results
 from each step. Default is FALSE, which returns only the final result.}
@@ -186,5 +199,20 @@ result <- crosswalk_data(
 # Access intermediate and final
 result$intermediate$step_1  # After first crosswalk
 result$final                # Final result
+
+# Custom interpolation functions
+result <- crosswalk_data(
+  data = my_data,
+  crosswalk = crosswalk,
+  custom_interpolations = list(
+    list(
+      columns = c("count_population", "count_housing"),
+      fn = ~sum(.x * .w, na.rm = TRUE)
+    ),
+    list(
+      columns = c("pct_poverty"),
+      fn = ~weighted.mean(.x, .w, na.rm = TRUE)
+    )
+  ))
 }
 }
diff --git a/man/get_crosswalk.Rd b/man/get_crosswalk.Rd
index 84d0728..3e20c7e 100644
--- a/man/get_crosswalk.Rd
+++ b/man/get_crosswalk.Rd
@@ -18,12 +18,15 @@ get_crosswalk(
 \item{source_geography}{Character. Source geography name. One of c("block",
 "block group", "tract", "place", "county", "urban_area", "zcta", "puma",
 "puma12", "puma22", "cd115", "cd116", "cd118", "cd119", "urban_area",
-"core_based_statistical_area").}
+"core_based_statistical_area"). Note: "aiannh" (tribal areas) is currently
+supported as a \strong{target} geography only, not as a source.}
 
 \item{target_geography}{Character. Target geography name. One of c("block",
 "block group", "tract", "place", "county", "urban_area", "zcta", "puma",
 "puma12", "puma22", "cd115", "cd116", "cd118", "cd119", "urban_area",
-"core_based_statistical_area").}
+"core_based_statistical_area", "aiannh"). Tribal areas ("aiannh", American
+Indian / Alaska Native / Native Hawaiian areas) are available via
+GeoCorr 2022 only (target_year >= 2020).}
 
 \item{source_year}{Character or numeric. Year of the source geography, one of
 c(1990, 2000, 2010, 2020).}
diff --git a/tests/testthat/test-get_available_crosswalks.R b/tests/testthat/test-get_available_crosswalks.R
index b409c54..60e0dc9 100644
--- a/tests/testthat/test-get_available_crosswalks.R
+++ b/tests/testthat/test-get_available_crosswalks.R
@@ -109,3 +109,49 @@ test_that("get_available_crosswalks contains CTData rows", {
 
   expect_equal(nrow(ctdata_match), 1)
 })
+
+# ==============================================================================
+# Content tests - aiannh (tribal area) rows
+# ==============================================================================
+
+test_that("get_available_crosswalks contains aiannh as target from GeoCorr 2022 sources", {
+  result <- get_available_crosswalks()
+
+  # tract -> aiannh (2022)
+  tract_aiannh <- result |>
+    dplyr::filter(
+      source_geography == "tract",
+      target_geography == "aiannh",
+      source_year == 2022L,
+      target_year == 2022L)
+  expect_equal(nrow(tract_aiannh), 1)
+
+  # county -> aiannh (2022)
+  county_aiannh <- result |>
+    dplyr::filter(
+      source_geography == "county",
+      target_geography == "aiannh",
+      source_year == 2022L,
+      target_year == 2022L)
+  expect_equal(nrow(county_aiannh), 1)
+})
+
+test_that("get_available_crosswalks does NOT list aiannh as a source geography", {
+  result <- get_available_crosswalks()
+
+  aiannh_sources <- result |>
+    dplyr::filter(source_geography == "aiannh")
+
+  expect_equal(nrow(aiannh_sources), 0)
+})
+
+test_that("get_available_crosswalks does NOT list aiannh under GeoCorr 2018", {
+  result <- get_available_crosswalks()
+
+  aiannh_2018 <- result |>
+    dplyr::filter(
+      target_geography == "aiannh",
+      target_year == 2018L)
+
+  expect_equal(nrow(aiannh_2018), 0)
+})
diff --git a/tests/testthat/test-get_geocorr_crosswalk.R b/tests/testthat/test-get_geocorr_crosswalk.R
index 197e2bc..0905f18 100644
--- a/tests/testthat/test-get_geocorr_crosswalk.R
+++ b/tests/testthat/test-get_geocorr_crosswalk.R
@@ -325,3 +325,119 @@ test_that("get_crosswalk routes to Geocorr when no years specified", {
   metadata <- attr(result$crosswalks$step_1, "crosswalk_metadata")
   expect_equal(metadata$data_source, "geocorr")
 })
+
+# ==============================================================================
+# Tribal area (aiannh) tests
+# ==============================================================================
+
+test_that("get_geocorr_crosswalk handles tract to aiannh target", {
+  skip_if_offline()
+
+  result <- crosswalk:::get_geocorr_crosswalk(
+    source_geography = "tract",
+    target_geography = "aiannh",
+    weight = "population")
+
+  expect_s3_class(result, "tbl_df")
+  expect_equal(unique(result$source_geography), "tract")
+  expect_equal(unique(result$target_geography), "aiannh")
+
+  # NA target rows (tracts outside any tribal area) are filtered out
+  expect_false(any(is.na(result$target_geoid)))
+
+  # aiannh GEOIDs are 4-character federal codes
+  expect_true(all(stringr::str_length(result$target_geoid) == 4))
+
+  # Tract GEOIDs are still 11 characters
+  expect_true(all(stringr::str_length(result$source_geoid) == 11))
+
+  # Standard columns present
+  expected_cols <- c(
+    "source_geoid", "target_geoid",
+    "source_geography_name", "target_geography_name",
+    "allocation_factor_source_to_target",
+    "weighting_factor")
+  expect_true(all(expected_cols %in% colnames(result)))
+
+  # Allocation factors in valid range
+  expect_true(all(result$allocation_factor_source_to_target >= 0))
+  expect_true(all(result$allocation_factor_source_to_target <= 1))
+
+  # Allocation factors per source sum to <= 1 (not exactly 1), because tribal
+  # areas do not cover all land — any fraction of a source tract that falls
+  # outside tribal areas is intentionally dropped.
+  source_sums <- result |>
+    dplyr::summarize(
+      total = sum(allocation_factor_source_to_target, na.rm = TRUE),
+      .by = "source_geoid")
+  expect_true(all(source_sums$total <= 1 + 0.01))
+})
+
+test_that("get_geocorr_crosswalk handles county to aiannh target", {
+  skip_if_offline()
+
+  result <- crosswalk:::get_geocorr_crosswalk(
+    source_geography = "county",
+    target_geography = "aiannh",
+    weight = "population")
+
+  expect_s3_class(result, "tbl_df")
+  expect_equal(unique(result$target_geography), "aiannh")
+  expect_true(all(stringr::str_length(result$target_geoid) == 4))
+})
+
+test_that("get_geocorr_crosswalk rejects aiannh as source geography", {
+  expect_error(
+    crosswalk:::get_geocorr_crosswalk(
+      source_geography = "aiannh",
+      target_geography = "tract",
+      weight = "population"),
+    regexp = "target geography only")
+})
+
+test_that("get_geocorr_crosswalk rejects aiannh on GeoCorr 2018", {
+  # GeoCorr 2018 does not support aiannh; should error from resolve_geocorr_geography
+  expect_error(
+    crosswalk:::get_geocorr_crosswalk(
+      source_geography = "tract",
+      target_geography = "aiannh",
+      weight = "population",
+      geocorr_version = "2018"),
+    regexp = "not supported by GeoCorr 2018")
+})
+
+test_that("get_crosswalk handles tract to aiannh via top-level entry point", {
+  skip_if_offline()
+
+  result <- get_crosswalk(
+    source_geography = "tract",
+    target_geography = "aiannh",
+    weight = "population")
+
+  expect_type(result, "list")
+  expect_true("step_1" %in% names(result$crosswalks))
+
+  metadata <- attr(result$crosswalks$step_1, "crosswalk_metadata")
+  expect_equal(metadata$data_source, "geocorr")
+  expect_equal(metadata$target_geography, "aiannh")
+})
+
+test_that("get_crosswalk rejects pre-2020 target_year for aiannh target", {
+  expect_error(
+    get_crosswalk(
+      source_geography = "tract",
+      target_geography = "aiannh",
+      source_year = 2015,
+      target_year = 2015,
+      weight = "population"),
+    regexp = "GeoCorr 2022")
+})
+
+test_that("get_crosswalk rejects aiannh as source via top-level entry point", {
+  expect_error(
+    get_crosswalk(
+      source_geography = "aiannh",
+      target_geography = "tract",
+      weight = "population"),
+    regexp = "target geography only")
+})
diff --git a/tests/testthat/test-plan_crosswalk_chain.R b/tests/testthat/test-plan_crosswalk_chain.R
index 9b06d68..0bdf966 100644
--- a/tests/testthat/test-plan_crosswalk_chain.R
+++ b/tests/testthat/test-plan_crosswalk_chain.R
@@ -255,6 +255,61 @@ test_that("standardize_geography_for_chain handles 2010-era geography aliases",
   expect_equal(crosswalk:::standardize_geography_for_chain("cd116"), "cd116")
 })
 
+test_that("standardize_geography_for_chain handles aiannh aliases", {
+  expect_equal(crosswalk:::standardize_geography_for_chain("aiannh"), "aiannh")
+  expect_equal(crosswalk:::standardize_geography_for_chain("tribal"), "aiannh")
+  expect_equal(crosswalk:::standardize_geography_for_chain("tribal area"), "aiannh")
+  expect_equal(crosswalk:::standardize_geography_for_chain("tribal_area"), "aiannh")
+  expect_equal(crosswalk:::standardize_geography_for_chain("american indian area"), "aiannh")
+  expect_equal(crosswalk:::standardize_geography_for_chain("reservation"), "aiannh")
+  expect_equal(crosswalk:::standardize_geography_for_chain("aian"), "aiannh")
+})
+
+test_that("plan_crosswalk_chain rejects aiannh as a source geography", {
+  plan <- plan_crosswalk_chain(
+    source_geography = "aiannh",
+    target_geography = "tract",
+    source_year = 2020,
+    target_year = 2020)
+
+  expect_false(is.null(plan$error))
+  expect_true(stringr::str_detect(plan$error, "target geography only"))
+})
+
+test_that("plan_crosswalk_chain rejects aiannh target with pre-2020 target_year", {
+  plan <- plan_crosswalk_chain(
+    source_geography = "tract",
+    target_geography = "aiannh",
+    source_year = 2015,
+    target_year = 2015)
+
+  expect_false(is.null(plan$error))
+  expect_true(stringr::str_detect(plan$error, "GeoCorr 2022"))
+})
+
+test_that("plan_crosswalk_chain accepts aiannh target with 2020+ target_year", {
+  # Same-year: direct GeoCorr 2022 step
+  plan <- plan_crosswalk_chain(
+    source_geography = "tract",
+    target_geography = "aiannh",
+    source_year = 2020,
+    target_year = 2020)
+  expect_true(is.null(plan$error))
+  expect_false(plan$is_multi_step)
+  expect_equal(plan$steps$crosswalk_source[1], "geocorr")
+
+  # Multi-step: NHGIS temporal + GeoCorr 2022 geography
+  plan_multi <- plan_crosswalk_chain(
+    source_geography = "tract",
+    target_geography = "aiannh",
+    source_year = 2010,
+    target_year = 2020)
+  expect_true(is.null(plan_multi$error))
+  expect_true(plan_multi$is_multi_step)
+  expect_equal(plan_multi$steps$crosswalk_source[nrow(plan_multi$steps)], "geocorr")
+  expect_equal(plan_multi$steps$target_geography[nrow(plan_multi$steps)], "aiannh")
+})
+
 test_that("determine_temporal_source returns correct source", {
   expect_equal(crosswalk:::determine_temporal_source("2010", "2020"), "nhgis")
   expect_equal(crosswalk:::determine_temporal_source("2020", "2022"), "ctdata_2020_2022")