Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion R/crosswalk_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
#' @param weight Character. Weighting variable for Geocorr crosswalks when fetching.
#' One of c("population", "housing", "land"). Default is "population".
#' @param cache Directory path or NULL. Where to cache fetched crosswalks. If NULL
#' (default), crosswalk is fetched but not saved to disk.
#' (default), crosswalk is fetched but not saved to disk. The directory must
#' already exist; a non-existent path raises an error rather than being
#' created silently.
#' @param geoid_column Character. The name of the column in `data` containing
#' the source geography identifiers (GEOIDs). Default is "source_geoid".
#' @param count_columns Character vector or NULL. Column names in `data` that represent
Expand Down Expand Up @@ -199,6 +201,8 @@ crosswalk_data <- function(
old_opts <- options(crosswalk.silent = silent)
on.exit(options(old_opts), add = TRUE)

validate_cache_dir(cache)

# When silent, suppress join quality regardless of show_join_quality
if (silent) show_join_quality <- FALSE

Expand Down
5 changes: 4 additions & 1 deletion R/get_crosswalk.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
#' c("population", "housing", "land").
#' @param cache Directory path. Where to download the crosswalk to. If NULL (default),
#' crosswalk is returned but not saved to disk. Individual component crosswalks
#' are cached separately when provided.
#' are cached separately when provided. The directory must already exist;
#' a non-existent path raises an error rather than being created silently.
#' @param silent Logical. If `TRUE`, suppresses all informational messages and
#' warnings. Defaults to `getOption("crosswalk.silent", FALSE)`. Set
#' `options(crosswalk.silent = TRUE)` to silence all calls by default.
Expand Down Expand Up @@ -149,6 +150,8 @@ get_crosswalk <- function(
old_opts <- options(crosswalk.silent = silent)
on.exit(options(old_opts), add = TRUE)

validate_cache_dir(cache)

# Check for nested geographies (no crosswalk needed)
# Determine if years match (both NULL, or both non-NULL and equal)
years_match <- (is.null(source_year) && is.null(target_year)) ||
Expand Down
3 changes: 0 additions & 3 deletions R/get_ctdata_crosswalk.R
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,6 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio
# ===========================================================================

if (!is.null(cache)) {
if (!dir.exists(cache_path)) {
dir.create(cache_path, recursive = TRUE)
}
readr::write_csv(result, csv_path)
cw_message(stringr::str_c("Cached to: ", csv_path))
}
Expand Down
39 changes: 26 additions & 13 deletions R/get_geocorr_crosswalk.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,15 @@ get_geocorr_crosswalk <- function(
list(abbr = "Wv", fips = "54"),
list(abbr = "Wi", fips = "55"),
list(abbr = "Wy", fips = "56"),
list(abbr = "Pr", fips = "72")) |>
purrr::map_chr(~ paste0(.x$abbr, .x$fips))
list(abbr = "Pr", fips = "72"))

## stab (uppercase 2-letter abbreviation) → 2-char state FIPS lookup; used as
## a fallback when GeoCorr returns `stab` but neither `state` nor `county`
state_fips_lookup <- tibble::tibble(
stab = purrr::map_chr(states_data, ~ toupper(.x$abbr)),
state = purrr::map_chr(states_data, ~ .x$fips))

states_data <- purrr::map_chr(states_data, ~ paste0(.x$abbr, .x$fips))

# GeoCorr 2018 does not support Puerto Rico
if (!config$include_pr) {
Expand Down Expand Up @@ -358,11 +365,22 @@ get_geocorr_crosswalk <- function(
.cols = dplyr::matches(weight_rename_pattern),
.fn = ~ stringr::str_replace_all(.x, config$weight_rename))

## Ensure a 2-char `state` FIPS column exists. GeoCorr's response varies by
## query: sometimes `state` is returned directly; sometimes it's only
## available as the leading 2 chars of a 5-char `county` (SSCCC) field;
## sometimes (e.g., place/zcta/puma sources targeting aiannh) neither is
## present and we must derive it from `stab` (state abbreviation).
if (!"state" %in% colnames(df2)) {
df2 = df2 |>
dplyr::mutate(
state = stringr::str_sub(county, 1, 2),
county = stringr::str_sub(county, 3, 5)) }
if ("county" %in% colnames(df2)) {
df2 = df2 |>
dplyr::mutate(
state = stringr::str_sub(county, 1, 2),
county = stringr::str_sub(county, 3, 5))
} else if ("stab" %in% colnames(df2)) {
df2 = df2 |>
dplyr::left_join(state_fips_lookup, by = "stab")
}
}

# Build the blockgroup column name used by the API for this version
bg_api_col <- config$geography_api_codes[["blockgroup"]]
Expand Down Expand Up @@ -512,13 +530,8 @@ get_geocorr_crosswalk <- function(
df2 <- df2 |> dplyr::filter(!is.na(target_geoid))
}

if (!is.null(cache)) {
## if the file does not already exist and cache is TRUE
if (!file.exists(outpath) & !is.null(cache)) {
## if the specified cache directory doesn't yet exist, create it
if (!dir.exists(cache)) { dir.create(cache) }
readr::write_csv(df2, outpath)
}
if (!is.null(cache) && !file.exists(outpath)) {
readr::write_csv(df2, outpath)
}

# Attach metadata to result
Expand Down
6 changes: 1 addition & 5 deletions R/get_nhgis_crosswalk.R
Original file line number Diff line number Diff line change
Expand Up @@ -878,11 +878,7 @@ variable. Get your key at https://account.ipums.org/api_keys") }
dplyr::mutate(target_geoid = stringr::str_pad(target_geoid, 11, "right", "0"))
}

## if the file does not already exist and cache is not NULL
if (!file.exists(csv_path) & !is.null(cache)) {
if (!dir.exists(cache)) {
dir.create(cache, recursive = TRUE)
}
if (!is.null(cache) && !file.exists(csv_path)) {
readr::write_csv(crosswalk_df, csv_path)
}

Expand Down
25 changes: 25 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,28 @@ cw_message <- function(...) {
cw_warning <- function(..., call. = TRUE, immediate. = FALSE) {
if (!getOption("crosswalk.silent", FALSE)) warning(..., call. = call., immediate. = immediate.)
}

#' Validate a User-Provided Cache Directory
#'
#' Errors if `cache` is non-NULL and does not refer to an existing directory.
#' Users must create the directory themselves; the package will not create it
#' silently, so a typo in the path is surfaced rather than producing an
#' unexpected new directory.
#'
#' @param cache The user-supplied cache path (or NULL).
#' @keywords internal
#' @noRd
validate_cache_dir <- function(cache) {
if (is.null(cache)) return(invisible(NULL))
if (!is.character(cache) || length(cache) != 1L || is.na(cache) || !nzchar(cache)) {
stop("`cache` must be NULL or a single non-empty character path.", call. = FALSE)
}
if (!dir.exists(cache)) {
stop(
"The cache directory does not exist: '", cache, "'. ",
"Create it (e.g., `dir.create(\"", cache, "\", recursive = TRUE)`) ",
"or pass `cache = NULL` to skip caching.",
call. = FALSE)
}
invisible(NULL)
}
Loading