diff --git a/DESCRIPTION b/DESCRIPTION index 5f741891..072d0b06 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pipapi Title: API for the Poverty and Inequality Platform -Version: 1.3.20 +Version: 1.3.20.9000 Authors@R: c(person(given = "Tony", family = "Fujs", diff --git a/NEWS.md b/NEWS.md index a07af132..0b869884 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# pipapi (development version) + # pipapi 1.3.20 # pipapi 1.3.19 diff --git a/R/add_agg_stats.R b/R/add_agg_stats.R index 5b93067d..bbc81ed9 100644 --- a/R/add_agg_stats.R +++ b/R/add_agg_stats.R @@ -16,7 +16,8 @@ add_agg_stats <- function(df, aggregated_list <- split(aggregated, interaction( aggregated$country_code, - aggregated$reporting_year + aggregated$reporting_year, + aggregated$poverty_line ), drop = TRUE ) diff --git a/R/pip.R b/R/pip.R index c8d56e00..56d1e282 100644 --- a/R/pip.R +++ b/R/pip.R @@ -106,7 +106,6 @@ pip <- function(country = "ALL", aux_files = lkup$aux_files ) # lcv$est_ctrs has all the country_code that we are interested in - cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") if (!file.exists(cache_file_path)) { # Create an empty duckdb file diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R index 9cc9bc34..1dfb44c0 100644 --- a/R/pip_grp_logic.R +++ b/R/pip_grp_logic.R @@ -68,13 +68,13 @@ pip_grp_logic <- function(country = "ALL", censor = censor) return(res) - } else { + } else if (lcv$off_alt_agg == "both") { ## STEP 3.2: Compute fg_pip for ALL required countries ---- ## This will then be re-used in various part of the function ## This is to avoid re-computing and re-loading the same data over and over fg_pip_master <- fg_pip( - country = c(lcv$md_off_reg, lcv$user_off_reg), + country = c(lcv$md_off_reg, lcv$user_alt_agg, lcv$user_off_reg), year = year, povline = povline, popshare = NULL, @@ -90,19 +90,13 @@ pip_grp_logic <- function(country = "ALL", setDT(fg_pip_master) } - add_vars_out_of_pipeline(fg_pip_master, fill_gaps = TRUE, lkup = lkup) + add_vars_out_of_pipeline(fg_pip_master, + fill_gaps = TRUE, + lkup = lkup) + - if (lcv$off_alt_agg == "both") { ### STEP 3.2.1 Estimates for official aggregates ---- off_ret <- - # pip_grp(country = lcv$user_off_reg, - # year = year, - # povline = povline, - # group_by = "wb", - # welfare_type = welfare_type, - # reporting_level = reporting_level, - # lkup = lkup, - # censor = censor) pip_grp_helper(lcv_country = lcv$ctr_off_reg, country = country, year = year, @@ -111,12 +105,35 @@ pip_grp_logic <- function(country = "ALL", censor = FALSE, fg_pip = fg_pip_master, lkup = lkup) - } else { - ### STEP 3.2.2 Alternate aggregates only ---- - ### Prepare necessary variables - off_ret <- NULL - alt_agg <- country + } else { + ### STEP 3.2.2 Alternate aggregates only ---- + ### Prepare necessary variables + fg_pip_master <- fg_pip( + country = c(lcv$user_alt_agg, lcv$md_off_reg), + year = year, + povline = povline, + popshare = NULL, + welfare_type = welfare_type, + reporting_level = reporting_level, + ppp = NULL, + lkup = lkup + ) + # For now just rowbinding two dataframes, but we would need to use it more smartly in the future + fg_pip_master <- collapse::rowbind(fg_pip_master) + + if (!data.table::is.data.table(fg_pip_master)) { + setDT(fg_pip_master) } + + add_vars_out_of_pipeline(fg_pip_master, + fill_gaps = TRUE, + lkup = lkup) + + + + + off_ret <- NULL + alt_agg <- country } # ________________________________________________________ @@ -131,52 +148,58 @@ pip_grp_logic <- function(country = "ALL", ## calculation of off regions but we still have to input to missing data ## countries, we estimate official region estimates for such countries - if (lcv$grp_use %in% c("append", "not")) { + # Only if there is md countries. Otherwise, skip. + if (!is.null(lcv$grp_use)) { - grp <- pip_grp_helper(lcv_country = lcv$md_off_reg, - country = country, - year = lcv$md_year, - povline = povline, - reporting_level = reporting_level, - censor = FALSE, - fg_pip = fg_pip_master, - lkup = lkup) + if (lcv$grp_use %in% c("append", "not")) { - if (lcv$grp_use == "append") { - grp <- data.table::rbindlist(list(off_ret, grp)) - } + grp <- pip_grp_helper(lcv_country = lcv$md_off_reg, + country = country, + year = lcv$md_year, + povline = povline, + reporting_level = reporting_level, + censor = FALSE, + fg_pip = fg_pip_master, + lkup = lkup) - } else { - # If previous estimations are enough, we don't need to do any estimation. - grp <- data.table::copy(off_ret) - } + if (lcv$grp_use == "append") { + grp <- data.table::rbindlist(list(off_ret, grp)) + } + + } else { + # If previous estimations are enough, we don't need to do any estimation. + grp <- data.table::copy(off_ret) + } - names_grp <- names(grp) + names_grp <- names(grp) - ### Prepare grp to be merge with pop_md - grp[, - c("reporting_pop", "pop_in_poverty") := NULL] + ### Prepare grp to be merge with pop_md + grp[, + c("reporting_pop", "pop_in_poverty") := NULL] - ### Merge population with Missing data table --------- + ### Merge population with Missing data table --------- - ### Merge with pop_md ------ - pop_md <- lcv$md - data.table::setnames(pop_md, "year", "reporting_year") + ### Merge with pop_md ------ + pop_md <- lcv$md + data.table::setnames(pop_md, "year", "reporting_year") - # This merge will remove those countries for which there is no official - # aggregate because of lack of coverage in the region. Eg. There is not data - # for SAS in 2000, so for countries like AFG 2000 we can't input estimates - md_grp <- merge(pop_md, grp, - by = c("region_code", "reporting_year")) + # This merge will remove those countries for which there is no official + # aggregate because of lack of coverage in the region. Eg. There is not data + # for SAS in 2000, so for countries like AFG 2000 we can't input estimates + md_grp <- merge(pop_md, grp, + by = c("region_code", "reporting_year")) - ### Merge other region codes ----------- - md_grp[, - region_code := NULL] + ### Merge other region codes ----------- + md_grp[, + region_code := NULL] - md_grp <- merge(md_grp, cl, - by = "country_code", - all.x = TRUE) + md_grp <- merge(md_grp, cl, + by = "country_code", + all.x = TRUE) + } else { + md_grp <- pipapi::empty_response_fg_add + } ## Fill gaps estimates with countries with Survey ----- @@ -256,7 +279,7 @@ pip_grp_logic <- function(country = "ALL", # ret <- censor_rows(ret, lkup[["censored"]], type = "regions") # } - data.table::setcolorder(ret, names_grp) + data.table::setcolorder(ret, names(pipapi::empty_response_grp)) # Select columns if (additional_ind) { @@ -325,15 +348,13 @@ pip_grp_helper <- function(lcv_country, # out <- censor_rows(out, lkup[["censored"]], type = "regions") # } - out <- estimate_type_var(out,lkup) - } else { # Handle simple aggregation out <- pip_aggregate(df = out, return_cols = lkup$return_cols$pip_grp) - out <- estimate_type_var(out,lkup) } + out <- estimate_type_var(out,lkup) keep <- lkup$return_cols$pip_grp$cols out <- out[, .SD, .SDcols = keep] diff --git a/R/pipapi-package.R b/R/pipapi-package.R index 7a52927d..f4768769 100644 --- a/R/pipapi-package.R +++ b/R/pipapi-package.R @@ -59,6 +59,15 @@ NULL #' @format Data frame with 0 rows and 53 columns NULL +#' Dataframe for fill gaps empty response with additional variables +#' +#' @docType data +#' @keywords datasets +#' @name empty_response_fg_add +#' @usage data(empty_response_fg_add) +#' @format Data frame with 0 rows and 56 columns +NULL + utils::globalVariables( c( ".", diff --git a/R/rg_pip.R b/R/rg_pip.R index 5ff8552b..a491c8b3 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -17,7 +17,8 @@ rg_pip <- function(country, valid_regions <- lkup$query_controls$region$values svy_lkup <- lkup$svy_lkup data_dir <- lkup$data_root - + # povline is set to NULL if popshare is given + if (!is.null(popshare)) povline <- NULL cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") metadata <- subset_lkup( @@ -36,8 +37,6 @@ rg_pip <- function(country, data_present_in_master <- metadata$data_present_in_master povline <- metadata$povline metadata <- metadata$lkup - - # Remove aggregate distribution if popshare is specified # TEMPORARY FIX UNTIL popshare is supported for aggregate distributions metadata <- filter_lkup(metadata = metadata, @@ -51,12 +50,21 @@ rg_pip <- function(country, # load data lt <- load_data_list(metadata) - + # Calculate and update poverty line if popshare is passed + if (!is.null(popshare)) { + povline <- lapply(lt, \(x) wbpip:::md_infer_poverty_line(x$welfare, x$weight, popshare)) + } # parallelization # res <- get_pov_estimates(lt, povline = povline) - - # Regular lapply - res <- lapply(lt, process_dt, povline = povline) + # When poverty line is passed explicitly by user + if (length(povline) == 1) { + # Regular lapply + # passing povline[[1]] to pass povline as vector + res <- lapply(lt, process_dt, povline = povline[[1]]) + # When poverty line is calculated i.e popshare is passed + } else if (length(povline) == length(lt)) { + res <- Map(process_dt, lt, povline) + } res <- rbindlist(res, fill = TRUE) diff --git a/R/utils.R b/R/utils.R index ae4fdc89..c3e94f1a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -18,7 +18,22 @@ subset_lkup <- function(country, cache_file_path, fill_gaps ) { + lkup <- lkup_filter(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir) + # If povline is NULL, this happens when popshare is passed i.e popshare is not NULL + if (is.null(povline)) { + return(list(data_present_in_master = NULL, + lkup = lkup, + povline = NULL)) + } + # Return with grace + return_if_exists(slkup = lkup, + povline = povline, + cache_file_path = cache_file_path, + fill_gaps = fill_gaps) +} +#' @keywords internal +lkup_filter <- function(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir) { # STEP 1 - Keep every row by default keep <- rep(TRUE, nrow(lkup)) # STEP 2 - Select countries @@ -31,9 +46,6 @@ subset_lkup <- function(country, data_dir = data_dir, valid_regions = valid_regions) - # # step 4. Select MRV - # keep <- select_MRV(lkup, keep, year, country, valid_regions, data_dir) - # STEP 4 - Select welfare_type if (welfare_type[1] != "all") { keep <- keep & lkup$welfare_type == welfare_type @@ -45,14 +57,8 @@ subset_lkup <- function(country, lkup <- lkup[keep, ] - - # Return with grace - return_if_exists(slkup = lkup, - povline = povline, - cache_file_path = cache_file_path, - fill_gaps = fill_gaps) + return(lkup) } - #' select_country #' Helper function for subset_lkup() #' @inheritParams subset_lkup @@ -65,7 +71,19 @@ select_country <- function(lkup, keep, country, valid_regions) { # Select regions if (any(country %in% valid_regions)) { selected_regions <- country[country %in% valid_regions] - keep_regions <- lkup$region_code %in% selected_regions + # Find all columns ending with _code + code_cols <- grep("_code$", names(lkup), value = TRUE) + code_cols <- code_cols[!code_cols %in% "wb_region_code"] # Temporary solution + # For each code column, check if any value matches selected_regions + keep_regions_list <- lapply(code_cols, \(col) { + lkup[[col]] %in% selected_regions + }) + # Combine with logical OR across all code columns + if (length(keep_regions_list) > 0) { + keep_regions <- Reduce(`|`, keep_regions_list) + } else { + keep_regions <- rep(FALSE, nrow(lkup)) + } } else { keep_regions <- rep(FALSE, length(lkup$country_code)) } diff --git a/data-raw/data.R b/data-raw/data.R index b6d075be..570fe500 100644 --- a/data-raw/data.R +++ b/data-raw/data.R @@ -1,5 +1,6 @@ # library(pipapi) # lkups <- pipapi::create_versioned_lkups(Sys.getenv('PIPAPI_DATA_ROOT_FOLDER')) +library(data.table) pkgload::load_all() data_dir <- @@ -34,62 +35,22 @@ empty_response_cp_poverty <- list(pov_trend = tmp1, pov_mrv = tmp2) empty_response_grp <- pip_grp("all", year, lkup = lkup, group_by = "wb") empty_response_grp <- empty_response_grp[-c(1:nrow(empty_response_grp))] +fg <- fg_pip( + ctr, + year = year, + povline = 3, + welfare_type = "all", + reporting_level = "all", + popshare = NULL, + lkup = lkup +) |> + rbindlist() +empty_response_fg <- fg[-1] -empty_response_fg <- data.table::data.table( - country_code = character(0), - survey_id = character(0), - cache_id = character(0), - wb_region_code = character(0), - reporting_year = numeric(0), - surveyid_year = character(0), - survey_year = numeric(0), - survey_time = character(0), - survey_acronym = character(0), - survey_coverage = character(0), - survey_comparability = numeric(0), - comparable_spell = character(0), - welfare_type = character(0), - reporting_level = character(0), - survey_mean_lcu = numeric(0), - survey_mean_ppp = numeric(0), - survey_median_ppp = numeric(0), - survey_median_lcu = numeric(0), - predicted_mean_ppp = numeric(0), - ppp = numeric(0), - cpi = numeric(0), - reporting_pop = numeric(0), - reporting_gdp = numeric(0), - reporting_pce = numeric(0), - pop_data_level = character(0), - gdp_data_level = character(0), - pce_data_level = character(0), - cpi_data_level = character(0), - ppp_data_level = character(0), - distribution_type = character(0), - gd_type = character(0), - is_interpolated = logical(0), - is_used_for_line_up = logical(0), - is_used_for_aggregation = logical(0), - estimation_type = character(0), - interpolation_id = character(0), - display_cp = numeric(0), - country_name = character(0), - africa_split = character(0), - africa_split_code = character(0), - region_name = character(0), - region_code = character(0), - world = character(0), - world_code = character(0), - path = character(0), - data_interpolation_id = character(0), - poverty_line = numeric(0), - mean = numeric(0), - median = numeric(0), - headcount = numeric(0), - poverty_gap = numeric(0), - poverty_severity = numeric(0), - watts = numeric(0) -) + +add_vars_out_of_pipeline(fg, fill_gaps = TRUE, lkup = lkup) + +empty_response_fg_add <- fg[-1] usethis::use_data( empty_response, @@ -97,5 +58,6 @@ usethis::use_data( reporting_level_list, empty_response_grp, empty_response_fg, + empty_response_fg_add, overwrite = TRUE ) diff --git a/data/empty_response_fg.rda b/data/empty_response_fg.rda index 6785534c..e9c29fcf 100644 Binary files a/data/empty_response_fg.rda and b/data/empty_response_fg.rda differ diff --git a/data/empty_response_fg_add.rda b/data/empty_response_fg_add.rda new file mode 100644 index 00000000..01040f43 Binary files /dev/null and b/data/empty_response_fg_add.rda differ diff --git a/inst/TMP/TMP_API_launcher.R b/inst/TMP/TMP_API_launcher.R index 72e28195..3919ada1 100644 --- a/inst/TMP/TMP_API_launcher.R +++ b/inst/TMP/TMP_API_launcher.R @@ -4,21 +4,18 @@ library(pipapi) # devtools::load_all(".") if (Sys.info()[["user"]] == "wb384996") { - force <- FALSE - if (!"lkups" %in% ls() || isTRUE(force)) { data_dir <- Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") |> fs::path() fs::dir_ls(data_dir, recurse = FALSE) - } - latest_version <- pipapi:::available_versions(data_dir) |> max() - latest_version <- NULL - lkups <- create_versioned_lkups(data_dir, - vintage_pattern = latest_version) + # latest_version <- NULL + # lkups <- create_versioned_lkups(data_dir, + # vintage_pattern = latest_version) + lkups <- create_versioned_lkups(data_dir) # lkup <- lkups$versions_paths[[lkups$latest_release]] start_api(port = 8080) diff --git a/man/empty_response_fg_add.Rd b/man/empty_response_fg_add.Rd new file mode 100644 index 00000000..7bb34bb1 --- /dev/null +++ b/man/empty_response_fg_add.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipapi-package.R +\docType{data} +\name{empty_response_fg_add} +\alias{empty_response_fg_add} +\title{Dataframe for fill gaps empty response with additional variables} +\format{ +Data frame with 0 rows and 56 columns +} +\usage{ +data(empty_response_fg_add) +} +\description{ +Dataframe for fill gaps empty response with additional variables +} +\keyword{datasets} diff --git a/man/ui_cp_poverty_charts.Rd b/man/ui_cp_poverty_charts.Rd index 3b211f31..e2356d1e 100644 --- a/man/ui_cp_poverty_charts.Rd +++ b/man/ui_cp_poverty_charts.Rd @@ -4,7 +4,7 @@ \alias{ui_cp_poverty_charts} \title{CP Poverty Charts} \usage{ -ui_cp_poverty_charts(country, povline, pop_units, lkup) +ui_cp_poverty_charts(country, povline, pop_units = 1e+06, lkup) } \arguments{ \item{country}{character: Country ISO 3 codes} diff --git a/tests/testthat/test-fg_pip-local.R b/tests/testthat/test-fg_pip-local.R index 13a56c84..5d13b764 100644 --- a/tests/testthat/test-fg_pip-local.R +++ b/tests/testthat/test-fg_pip-local.R @@ -13,7 +13,7 @@ lkups <- create_versioned_lkups(data_dir, vintage_pattern = latest_version) lkup <- lkups$versions_paths[[lkups$latest_release]] -con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = fs::path(lkup$data_root, "cache", ext = "duckdb")) +# con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = fs::path(lkup$data_root, "cache", ext = "duckdb")) local_mocked_bindings( get_caller_names = function() c("else") @@ -30,8 +30,7 @@ test_that("Imputation is working for extrapolated aggregated distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 0) @@ -44,8 +43,7 @@ test_that("Imputation is working for extrapolated aggregated distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 0) @@ -61,8 +59,7 @@ test_that("Imputation is working for interpolated mixed distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 0) @@ -75,8 +72,7 @@ test_that("Imputation is working for interpolated mixed distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 0) @@ -91,11 +87,11 @@ test_that("Imputation is working for interpolated aggregate distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) - expect_equal(nrow(tmp$main_data), 2) + expect_equal(nrow(tmp$main_data), 0) + expect_equal(nrow(tmp$data_in_cache), 2) tmp <- fg_pip( country = "CHN", @@ -105,11 +101,11 @@ test_that("Imputation is working for interpolated aggregate distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) - expect_equal(nrow(tmp$main_data), 2) + expect_equal(nrow(tmp$main_data), 0) + expect_equal(nrow(tmp$data_in_cache), 2) }) @@ -158,8 +154,7 @@ tmp <- fg_pip( welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) tmp <- tmp$data_in_cache |> as.data.table() # dt <- pip(country = "ALL", diff --git a/tests/testthat/test-pip-local.R b/tests/testthat/test-pip-local.R index 59e8ac89..2dae5b28 100644 --- a/tests/testthat/test-pip-local.R +++ b/tests/testthat/test-pip-local.R @@ -543,7 +543,7 @@ test_that("pop_share option is returning consistent results for single grouped d expect_equal(round(pl$headcount, 3), round(ps$headcount, 3)) - expect_equal(round(povline, 6), round(pl$poverty_line, 6)) + expect_equal(round(povline, 2), round(pl$poverty_line, 2)) # High poverty line # Fails for higher poverty lines @@ -840,4 +840,12 @@ test_that("SPL is the same by reporting level", { ) }) +test_that("make sure popshare bug no which was reported no longer exists", { + out <- pip(country = "USA", year = 2022, + popshare = .5, lkup = lkup) + # Ensure poverty line is not the default one + expect_false(out$poverty_line %in% c(1.9, 3)) + # Ensure headcount is closer to 0.5 + expect_equal(out$headcount, 0.5, tolerance = .05) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 7b2b53a4..da735a34 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -66,7 +66,8 @@ test_that("subset_lkup correctly selects all countries", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) expect_equal(nrow(tmp$lkup), nrow(ref_lkup)) }) @@ -79,7 +80,8 @@ test_that("subset_lkup correctly selects countries", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) expect_equal(sort(unique(tmp$lkup$country_code)), sort(selection)) }) @@ -92,7 +94,8 @@ test_that("subset_lkup correctly selects single regions", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) expect_equal(sort(unique(tmp$lkup$region_code)), sort(selection)) }) @@ -105,7 +108,8 @@ test_that("subset_lkup correctly selects multiple regions", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) expect_equal(sort(unique(tmp$lkup$region_code)), sort(selection)) }) @@ -122,7 +126,8 @@ test_that("subset_lkup correctly selects countries and regions", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) # Regions are selected expect_true(all(region_selection %in% (unique(tmp$lkup$region_code))))