diff --git a/R/crosswalk_data.R b/R/crosswalk_data.R index 1e9d118..3685764 100644 --- a/R/crosswalk_data.R +++ b/R/crosswalk_data.R @@ -31,7 +31,9 @@ #' @param weight Character. Weighting variable for Geocorr crosswalks when fetching. #' One of c("population", "housing", "land"). Default is "population". #' @param cache Directory path or NULL. Where to cache fetched crosswalks. If NULL -#' (default), crosswalk is fetched but not saved to disk. +#' (default), crosswalk is fetched but not saved to disk. The directory must +#' already exist; a non-existent path raises an error rather than being +#' created silently. #' @param geoid_column Character. The name of the column in `data` containing #' the source geography identifiers (GEOIDs). Default is "source_geoid". #' @param count_columns Character vector or NULL. Column names in `data` that represent @@ -199,6 +201,8 @@ crosswalk_data <- function( old_opts <- options(crosswalk.silent = silent) on.exit(options(old_opts), add = TRUE) + validate_cache_dir(cache) + # When silent, suppress join quality regardless of show_join_quality if (silent) show_join_quality <- FALSE diff --git a/R/get_crosswalk.R b/R/get_crosswalk.R index 483700b..f94c662 100644 --- a/R/get_crosswalk.R +++ b/R/get_crosswalk.R @@ -61,7 +61,8 @@ #' c("population", "housing", "land"). #' @param cache Directory path. Where to download the crosswalk to. If NULL (default), #' crosswalk is returned but not saved to disk. Individual component crosswalks -#' are cached separately when provided. +#' are cached separately when provided. The directory must already exist; +#' a non-existent path raises an error rather than being created silently. #' @param silent Logical. If `TRUE`, suppresses all informational messages and #' warnings. Defaults to `getOption("crosswalk.silent", FALSE)`. Set #' `options(crosswalk.silent = TRUE)` to silence all calls by default. @@ -149,6 +150,8 @@ get_crosswalk <- function( old_opts <- options(crosswalk.silent = silent) on.exit(options(old_opts), add = TRUE) + validate_cache_dir(cache) + # Check for nested geographies (no crosswalk needed) # Determine if years match (both NULL, or both non-NULL and equal) years_match <- (is.null(source_year) && is.null(target_year)) || diff --git a/R/get_ctdata_crosswalk.R b/R/get_ctdata_crosswalk.R index b4ef83c..ccb7701 100644 --- a/R/get_ctdata_crosswalk.R +++ b/R/get_ctdata_crosswalk.R @@ -359,9 +359,6 @@ Only block, block_group, and tract geographies support the 2022 -> 2020 directio # =========================================================================== if (!is.null(cache)) { - if (!dir.exists(cache_path)) { - dir.create(cache_path, recursive = TRUE) - } readr::write_csv(result, csv_path) cw_message(stringr::str_c("Cached to: ", csv_path)) } diff --git a/R/get_geocorr_crosswalk.R b/R/get_geocorr_crosswalk.R index bc760e0..85bc8f5 100644 --- a/R/get_geocorr_crosswalk.R +++ b/R/get_geocorr_crosswalk.R @@ -267,8 +267,15 @@ get_geocorr_crosswalk <- function( list(abbr = "Wv", fips = "54"), list(abbr = "Wi", fips = "55"), list(abbr = "Wy", fips = "56"), - list(abbr = "Pr", fips = "72")) |> - purrr::map_chr(~ paste0(.x$abbr, .x$fips)) + list(abbr = "Pr", fips = "72")) + + ## stab (uppercase 2-letter abbreviation) → 2-char state FIPS lookup; used as + ## a fallback when GeoCorr returns `stab` but neither `state` nor `county` + state_fips_lookup <- tibble::tibble( + stab = purrr::map_chr(states_data, ~ toupper(.x$abbr)), + state = purrr::map_chr(states_data, ~ .x$fips)) + + states_data <- purrr::map_chr(states_data, ~ paste0(.x$abbr, .x$fips)) # GeoCorr 2018 does not support Puerto Rico if (!config$include_pr) { @@ -358,11 +365,22 @@ get_geocorr_crosswalk <- function( .cols = dplyr::matches(weight_rename_pattern), .fn = ~ stringr::str_replace_all(.x, config$weight_rename)) + ## Ensure a 2-char `state` FIPS column exists. GeoCorr's response varies by + ## query: sometimes `state` is returned directly; sometimes it's only + ## available as the leading 2 chars of a 5-char `county` (SSCCC) field; + ## sometimes (e.g., place/zcta/puma sources targeting aiannh) neither is + ## present and we must derive it from `stab` (state abbreviation). if (!"state" %in% colnames(df2)) { - df2 = df2 |> - dplyr::mutate( - state = stringr::str_sub(county, 1, 2), - county = stringr::str_sub(county, 3, 5)) } + if ("county" %in% colnames(df2)) { + df2 = df2 |> + dplyr::mutate( + state = stringr::str_sub(county, 1, 2), + county = stringr::str_sub(county, 3, 5)) + } else if ("stab" %in% colnames(df2)) { + df2 = df2 |> + dplyr::left_join(state_fips_lookup, by = "stab") + } + } # Build the blockgroup column name used by the API for this version bg_api_col <- config$geography_api_codes[["blockgroup"]] @@ -512,13 +530,8 @@ get_geocorr_crosswalk <- function( df2 <- df2 |> dplyr::filter(!is.na(target_geoid)) } - if (!is.null(cache)) { - ## if the file does not already exist and cache is TRUE - if (!file.exists(outpath) & !is.null(cache)) { - ## if the specified cache directory doesn't yet exist, create it - if (!dir.exists(cache)) { dir.create(cache) } - readr::write_csv(df2, outpath) - } + if (!is.null(cache) && !file.exists(outpath)) { + readr::write_csv(df2, outpath) } # Attach metadata to result diff --git a/R/get_nhgis_crosswalk.R b/R/get_nhgis_crosswalk.R index 9d792b2..e74c2dd 100644 --- a/R/get_nhgis_crosswalk.R +++ b/R/get_nhgis_crosswalk.R @@ -878,11 +878,7 @@ variable. Get your key at https://account.ipums.org/api_keys") } dplyr::mutate(target_geoid = stringr::str_pad(target_geoid, 11, "right", "0")) } - ## if the file does not already exist and cache is not NULL - if (!file.exists(csv_path) & !is.null(cache)) { - if (!dir.exists(cache)) { - dir.create(cache, recursive = TRUE) - } + if (!is.null(cache) && !file.exists(csv_path)) { readr::write_csv(crosswalk_df, csv_path) } diff --git a/R/utils.R b/R/utils.R index 8c7e710..384284d 100644 --- a/R/utils.R +++ b/R/utils.R @@ -23,3 +23,28 @@ cw_message <- function(...) { cw_warning <- function(..., call. = TRUE, immediate. = FALSE) { if (!getOption("crosswalk.silent", FALSE)) warning(..., call. = call., immediate. = immediate.) } + +#' Validate a User-Provided Cache Directory +#' +#' Errors if `cache` is non-NULL and does not refer to an existing directory. +#' Users must create the directory themselves; the package will not create it +#' silently, so a typo in the path is surfaced rather than producing an +#' unexpected new directory. +#' +#' @param cache The user-supplied cache path (or NULL). +#' @keywords internal +#' @noRd +validate_cache_dir <- function(cache) { + if (is.null(cache)) return(invisible(NULL)) + if (!is.character(cache) || length(cache) != 1L || is.na(cache) || !nzchar(cache)) { + stop("`cache` must be NULL or a single non-empty character path.", call. = FALSE) + } + if (!dir.exists(cache)) { + stop( + "The cache directory does not exist: '", cache, "'. ", + "Create it (e.g., `dir.create(\"", cache, "\", recursive = TRUE)`) ", + "or pass `cache = NULL` to skip caching.", + call. = FALSE) + } + invisible(NULL) +}