From ccd65b0ced922543579f58ea594df1a00838084a Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Wed, 9 Jul 2025 17:13:30 -0400 Subject: [PATCH 01/14] remove unnecessary stuff --- R/pip_grp_logic.R | 12 +++--------- inst/TMP/TMP_API_launcher.R | 6 +----- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R index 9cc9bc34..c6e6e8cf 100644 --- a/R/pip_grp_logic.R +++ b/R/pip_grp_logic.R @@ -90,19 +90,13 @@ pip_grp_logic <- function(country = "ALL", setDT(fg_pip_master) } - add_vars_out_of_pipeline(fg_pip_master, fill_gaps = TRUE, lkup = lkup) + add_vars_out_of_pipeline(fg_pip_master, + fill_gaps = TRUE, + lkup = lkup) if (lcv$off_alt_agg == "both") { ### STEP 3.2.1 Estimates for official aggregates ---- off_ret <- - # pip_grp(country = lcv$user_off_reg, - # year = year, - # povline = povline, - # group_by = "wb", - # welfare_type = welfare_type, - # reporting_level = reporting_level, - # lkup = lkup, - # censor = censor) pip_grp_helper(lcv_country = lcv$ctr_off_reg, country = country, year = year, diff --git a/inst/TMP/TMP_API_launcher.R b/inst/TMP/TMP_API_launcher.R index 72e28195..fa2719b4 100644 --- a/inst/TMP/TMP_API_launcher.R +++ b/inst/TMP/TMP_API_launcher.R @@ -4,19 +4,15 @@ library(pipapi) # devtools::load_all(".") if (Sys.info()[["user"]] == "wb384996") { - force <- FALSE - if (!"lkups" %in% ls() || isTRUE(force)) { data_dir <- Sys.getenv("PIPAPI_DATA_ROOT_FOLDER_LOCAL") |> fs::path() fs::dir_ls(data_dir, recurse = FALSE) - } - latest_version <- pipapi:::available_versions(data_dir) |> max() - latest_version <- NULL + # latest_version <- NULL lkups <- create_versioned_lkups(data_dir, vintage_pattern = latest_version) # lkup <- lkups$versions_paths[[lkups$latest_release]] From d716f04c5a352bf80f3ac9fa5bf687042d6d5b57 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 11 Jul 2025 12:43:02 -0400 Subject: [PATCH 02/14] select obs based on all available regional variables --- R/utils.R | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index ae4fdc89..a3b02384 100644 --- a/R/utils.R +++ b/R/utils.R @@ -65,7 +65,18 @@ select_country <- function(lkup, keep, country, valid_regions) { # Select regions if (any(country %in% valid_regions)) { selected_regions <- country[country %in% valid_regions] - keep_regions <- lkup$region_code %in% selected_regions + # Find all columns ending with _code + code_cols <- grep("_code$", names(lkup), value = TRUE) + # For each code column, check if any value matches selected_regions + keep_regions_list <- lapply(code_cols, \(col) { + lkup[[col]] %in% selected_regions + }) + # Combine with logical OR across all code columns + if (length(keep_regions_list) > 0) { + keep_regions <- Reduce(`|`, keep_regions_list) + } else { + keep_regions <- rep(FALSE, nrow(lkup)) + } } else { keep_regions <- rep(FALSE, length(lkup$country_code)) } From 2216243ef9cebe2b6804ac48c50e841594de459f Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 11 Jul 2025 13:36:03 -0400 Subject: [PATCH 03/14] add empty_response_fg_add --- R/pip_grp_logic.R | 117 ++++++++++++++++++++------------- data-raw/data.R | 72 +++++--------------- data/empty_response_fg.rda | Bin 670 -> 677 bytes data/empty_response_fg_add.rda | Bin 0 -> 701 bytes 4 files changed, 89 insertions(+), 100 deletions(-) create mode 100644 data/empty_response_fg_add.rda diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R index c6e6e8cf..24ae3bf8 100644 --- a/R/pip_grp_logic.R +++ b/R/pip_grp_logic.R @@ -68,7 +68,7 @@ pip_grp_logic <- function(country = "ALL", censor = censor) return(res) - } else { + } else if (lcv$off_alt_agg == "both") { ## STEP 3.2: Compute fg_pip for ALL required countries ---- ## This will then be re-used in various part of the function @@ -94,7 +94,7 @@ pip_grp_logic <- function(country = "ALL", fill_gaps = TRUE, lkup = lkup) - if (lcv$off_alt_agg == "both") { + ### STEP 3.2.1 Estimates for official aggregates ---- off_ret <- pip_grp_helper(lcv_country = lcv$ctr_off_reg, @@ -105,12 +105,35 @@ pip_grp_logic <- function(country = "ALL", censor = FALSE, fg_pip = fg_pip_master, lkup = lkup) - } else { - ### STEP 3.2.2 Alternate aggregates only ---- - ### Prepare necessary variables - off_ret <- NULL - alt_agg <- country + } else { + ### STEP 3.2.2 Alternate aggregates only ---- + ### Prepare necessary variables + fg_pip_master <- fg_pip( + country = c(lcv$user_alt_agg), + year = year, + povline = povline, + popshare = NULL, + welfare_type = welfare_type, + reporting_level = reporting_level, + ppp = NULL, + lkup = lkup + ) + # For now just rowbinding two dataframes, but we would need to use it more smartly in the future + fg_pip_master <- collapse::rowbind(fg_pip_master) + + if (!data.table::is.data.table(fg_pip_master)) { + setDT(fg_pip_master) } + + add_vars_out_of_pipeline(fg_pip_master, + fill_gaps = TRUE, + lkup = lkup) + + + + + off_ret <- NULL + alt_agg <- country } # ________________________________________________________ @@ -125,52 +148,58 @@ pip_grp_logic <- function(country = "ALL", ## calculation of off regions but we still have to input to missing data ## countries, we estimate official region estimates for such countries - if (lcv$grp_use %in% c("append", "not")) { + # Only if there is md countries. Otherwise, skip. + if (!is.null(lcv$grp_use)) { - grp <- pip_grp_helper(lcv_country = lcv$md_off_reg, - country = country, - year = lcv$md_year, - povline = povline, - reporting_level = reporting_level, - censor = FALSE, - fg_pip = fg_pip_master, - lkup = lkup) + if (lcv$grp_use %in% c("append", "not")) { - if (lcv$grp_use == "append") { - grp <- data.table::rbindlist(list(off_ret, grp)) - } + grp <- pip_grp_helper(lcv_country = lcv$md_off_reg, + country = country, + year = lcv$md_year, + povline = povline, + reporting_level = reporting_level, + censor = FALSE, + fg_pip = fg_pip_master, + lkup = lkup) - } else { - # If previous estimations are enough, we don't need to do any estimation. - grp <- data.table::copy(off_ret) - } + if (lcv$grp_use == "append") { + grp <- data.table::rbindlist(list(off_ret, grp)) + } + + } else { + # If previous estimations are enough, we don't need to do any estimation. + grp <- data.table::copy(off_ret) + } - names_grp <- names(grp) + names_grp <- names(grp) - ### Prepare grp to be merge with pop_md - grp[, - c("reporting_pop", "pop_in_poverty") := NULL] + ### Prepare grp to be merge with pop_md + grp[, + c("reporting_pop", "pop_in_poverty") := NULL] - ### Merge population with Missing data table --------- + ### Merge population with Missing data table --------- - ### Merge with pop_md ------ - pop_md <- lcv$md - data.table::setnames(pop_md, "year", "reporting_year") + ### Merge with pop_md ------ + pop_md <- lcv$md + data.table::setnames(pop_md, "year", "reporting_year") - # This merge will remove those countries for which there is no official - # aggregate because of lack of coverage in the region. Eg. There is not data - # for SAS in 2000, so for countries like AFG 2000 we can't input estimates - md_grp <- merge(pop_md, grp, - by = c("region_code", "reporting_year")) + # This merge will remove those countries for which there is no official + # aggregate because of lack of coverage in the region. Eg. There is not data + # for SAS in 2000, so for countries like AFG 2000 we can't input estimates + md_grp <- merge(pop_md, grp, + by = c("region_code", "reporting_year")) - ### Merge other region codes ----------- - md_grp[, - region_code := NULL] + ### Merge other region codes ----------- + md_grp[, + region_code := NULL] - md_grp <- merge(md_grp, cl, - by = "country_code", - all.x = TRUE) + md_grp <- merge(md_grp, cl, + by = "country_code", + all.x = TRUE) + } else { + md_grp <- pipapi::empty_response_fg + } ## Fill gaps estimates with countries with Survey ----- @@ -319,15 +348,13 @@ pip_grp_helper <- function(lcv_country, # out <- censor_rows(out, lkup[["censored"]], type = "regions") # } - out <- estimate_type_var(out,lkup) - } else { # Handle simple aggregation out <- pip_aggregate(df = out, return_cols = lkup$return_cols$pip_grp) - out <- estimate_type_var(out,lkup) } + out <- estimate_type_var(out,lkup) keep <- lkup$return_cols$pip_grp$cols out <- out[, .SD, .SDcols = keep] diff --git a/data-raw/data.R b/data-raw/data.R index b6d075be..570fe500 100644 --- a/data-raw/data.R +++ b/data-raw/data.R @@ -1,5 +1,6 @@ # library(pipapi) # lkups <- pipapi::create_versioned_lkups(Sys.getenv('PIPAPI_DATA_ROOT_FOLDER')) +library(data.table) pkgload::load_all() data_dir <- @@ -34,62 +35,22 @@ empty_response_cp_poverty <- list(pov_trend = tmp1, pov_mrv = tmp2) empty_response_grp <- pip_grp("all", year, lkup = lkup, group_by = "wb") empty_response_grp <- empty_response_grp[-c(1:nrow(empty_response_grp))] +fg <- fg_pip( + ctr, + year = year, + povline = 3, + welfare_type = "all", + reporting_level = "all", + popshare = NULL, + lkup = lkup +) |> + rbindlist() +empty_response_fg <- fg[-1] -empty_response_fg <- data.table::data.table( - country_code = character(0), - survey_id = character(0), - cache_id = character(0), - wb_region_code = character(0), - reporting_year = numeric(0), - surveyid_year = character(0), - survey_year = numeric(0), - survey_time = character(0), - survey_acronym = character(0), - survey_coverage = character(0), - survey_comparability = numeric(0), - comparable_spell = character(0), - welfare_type = character(0), - reporting_level = character(0), - survey_mean_lcu = numeric(0), - survey_mean_ppp = numeric(0), - survey_median_ppp = numeric(0), - survey_median_lcu = numeric(0), - predicted_mean_ppp = numeric(0), - ppp = numeric(0), - cpi = numeric(0), - reporting_pop = numeric(0), - reporting_gdp = numeric(0), - reporting_pce = numeric(0), - pop_data_level = character(0), - gdp_data_level = character(0), - pce_data_level = character(0), - cpi_data_level = character(0), - ppp_data_level = character(0), - distribution_type = character(0), - gd_type = character(0), - is_interpolated = logical(0), - is_used_for_line_up = logical(0), - is_used_for_aggregation = logical(0), - estimation_type = character(0), - interpolation_id = character(0), - display_cp = numeric(0), - country_name = character(0), - africa_split = character(0), - africa_split_code = character(0), - region_name = character(0), - region_code = character(0), - world = character(0), - world_code = character(0), - path = character(0), - data_interpolation_id = character(0), - poverty_line = numeric(0), - mean = numeric(0), - median = numeric(0), - headcount = numeric(0), - poverty_gap = numeric(0), - poverty_severity = numeric(0), - watts = numeric(0) -) + +add_vars_out_of_pipeline(fg, fill_gaps = TRUE, lkup = lkup) + +empty_response_fg_add <- fg[-1] usethis::use_data( empty_response, @@ -97,5 +58,6 @@ usethis::use_data( reporting_level_list, empty_response_grp, empty_response_fg, + empty_response_fg_add, overwrite = TRUE ) diff --git a/data/empty_response_fg.rda b/data/empty_response_fg.rda index 6785534c663fcb70b60f3c44a565efdf30383c72..e9c29fcf8515165c0369867ded6e3e7e60193896 100644 GIT binary patch literal 677 zcmV;W0$Tk-T4*^jL0KkKS%)*CRR98~f589$|9}7iIsgm+P(Z)u-=IJM00B?~UIhRE z14T6e(@hyP!eBv(h{W=mV88$X01XC!Gy_0j00006gFqSqpfCUc00Tjw4FJ#uNhXBQ zCLt4K)6z_u7>zSbO-y(2+OkCL96f@Gx_wXArq^KMC4l0;WKa;Yyw3ZIYn2?h}qNkkcd41ohML0+=rguQ<4 zg#`*sv#JpwfJhJ{3k89DU8j1M$h-GbBJbVmU{J+OTGY)+ikOy9CKFRK%d%Z%H5Xl* zVox(#)U(a0_jfT}vecNm6s0Lm6rBgr-MJ+&VzK`?9HvT zqLgr1NzlRA@o0?Sq-HUCnvR~gLCH>SPw?$;sBu%z0Rix(t$zE0-Ue3_?3f~x_DIOLRaw}d3{LHZ)Md(;H$5C1lzfgyCg!7BO|;^?mT+%& zBLv{3*qm52GCJv~Xmq8TBvJ2m66#W9kzMY$wP*C+X3a+S3n@bdrHyG$h6Bs)f8y>) LrwS4f=5$J+x*$8L literal 670 zcmV;P0%83^T4*^jL0KkKS+{~~%m4zVe}Mo0|9}7iIsgm+P(Z)u-=F{iPy=2C0009? z6GjtEj7&@klT1xC#zJ9$0B8UVng9R|0e}E#01TP{01W|v0B8UVng9R|0926#&?X@X z;XTwHV zWo=O;`O$x$#_qjuIy zT*;#+qX$Ips6S{VsSzroqNI@(#bJ7o6+Zt<$S)B@ltoYsK!J=1BF&mi@8Y_p540X? zkr0$ZB?*ZFK_F2N0nJNfVDD2S2YcMPLlrS=Q#B?kVp%;Gnwgeh$%4viFksEGC#kJ! zS?bjLJD9L+wI(iwDN0jCCqd%w+>)1KvEb&ioTEhlnU{Tclf|*bx$=D(wYHQ}jteO| z7j`__BRAz4j9%uWr>)R(Q+5j}?^$LElLu#uH|WuJq1v6S%3U&%>`vA@Tk%FFa#Ci$ zn`t#MNaJ@@dt0pixT6+NHJyx%6RpteejO*hIVClZbG@kGQaIw~rQ1p=M_bwE*l2B0 zz?^vZCl=!~gt0S}U~^r`W|2?3B>xm()@NeKqLh?t!0P@qNsp4!QD*dRN-zs*QQCY< zBN0i37G&hwz|n&R+|CW|*CEZYq}r8>7bJST9nGllW|_Y&{F8}B$5omw&AF{l%bJa> zJ}k{`g><{@mqjPRBO={ZXJUM@JGK}o%atzN_Hb@d;Bq-PH5$@wrxp@f!M)gvrv)%# zab2O2*G)r1r7Y1Rj`vY8Qj;W#_`2V!&+WX;nvLtMr3-Ug)FAEeb7I;bYNLGF@#8nW%X$ zRTrft%+x&AdbKRct4j4t*)Mu9xw1J*4sj5Om=K0xFqr@ewN%8xRgi>}9qDiqP6-qM zlq3W|0sulGA#&ZT4Rut~vY8iOSk|EzK&N`nYAB9RY2cMFEX6C!J`Tl67bSBvuu3jU z;)wKCFnN5bmSE6`^)pgbda+N7+__eYN-lLJDN0!-9Y@vf)QFW=D14mMOLQ_v z;el7jSszA*(x<`qV9lh$g%WI0DC$+*^Ja!I`a=P8v5CKfNx3anQ7>YOEQ{86eHc4g zD#UR|76ht`;vER$yGyR*E-;9cV6U4=7_LM&Rl*%9*lBXwh3p<`JD3(n2FC~Gp!Oqj zC7A4ZF*Xv0+nmy?NhGA;=<;YXG^MF6gl*f&7d03gB7|WQSmvv8jG-^W^nB7;nS}}= zB&8B(W3S_qBJcf3i73{Fq)A|=DL9?w5V%S$B^S3etYosvxtki?TZcD`NkOR-diL}^ zUneG{K1>mV7wOe|dN&ejaH7h0wX`Mb?@6g|1_o^e?|bNqp(yP{FDbTY;*X8TEe8e4 z7*)QDHM%ynr1_i8?QBfZ8l|~tiZ;5P$Wg{6yZWc>#*(!O@cKZ literal 0 HcmV?d00001 From aaace2e0a414d8c8338c2a3aec27ef3585835f3a Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 11 Jul 2025 14:04:34 -0400 Subject: [PATCH 04/14] working but not fully --- R/pip_grp_logic.R | 4 ++-- R/pipapi-package.R | 9 +++++++++ man/empty_response_fg_add.Rd | 16 ++++++++++++++++ man/ui_cp_poverty_charts.Rd | 2 +- 4 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 man/empty_response_fg_add.Rd diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R index 24ae3bf8..b1218ea3 100644 --- a/R/pip_grp_logic.R +++ b/R/pip_grp_logic.R @@ -198,7 +198,7 @@ pip_grp_logic <- function(country = "ALL", by = "country_code", all.x = TRUE) } else { - md_grp <- pipapi::empty_response_fg + md_grp <- pipapi::empty_response_fg_add } @@ -279,7 +279,7 @@ pip_grp_logic <- function(country = "ALL", # ret <- censor_rows(ret, lkup[["censored"]], type = "regions") # } - data.table::setcolorder(ret, names_grp) + data.table::setcolorder(ret, names(pipapi::empty_response_grp)) # Select columns if (additional_ind) { diff --git a/R/pipapi-package.R b/R/pipapi-package.R index 7a52927d..f4768769 100644 --- a/R/pipapi-package.R +++ b/R/pipapi-package.R @@ -59,6 +59,15 @@ NULL #' @format Data frame with 0 rows and 53 columns NULL +#' Dataframe for fill gaps empty response with additional variables +#' +#' @docType data +#' @keywords datasets +#' @name empty_response_fg_add +#' @usage data(empty_response_fg_add) +#' @format Data frame with 0 rows and 56 columns +NULL + utils::globalVariables( c( ".", diff --git a/man/empty_response_fg_add.Rd b/man/empty_response_fg_add.Rd new file mode 100644 index 00000000..7bb34bb1 --- /dev/null +++ b/man/empty_response_fg_add.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipapi-package.R +\docType{data} +\name{empty_response_fg_add} +\alias{empty_response_fg_add} +\title{Dataframe for fill gaps empty response with additional variables} +\format{ +Data frame with 0 rows and 56 columns +} +\usage{ +data(empty_response_fg_add) +} +\description{ +Dataframe for fill gaps empty response with additional variables +} +\keyword{datasets} diff --git a/man/ui_cp_poverty_charts.Rd b/man/ui_cp_poverty_charts.Rd index 3b211f31..e2356d1e 100644 --- a/man/ui_cp_poverty_charts.Rd +++ b/man/ui_cp_poverty_charts.Rd @@ -4,7 +4,7 @@ \alias{ui_cp_poverty_charts} \title{CP Poverty Charts} \usage{ -ui_cp_poverty_charts(country, povline, pop_units, lkup) +ui_cp_poverty_charts(country, povline, pop_units = 1e+06, lkup) } \arguments{ \item{country}{character: Country ISO 3 codes} From d8b93a9b6eb76626dfa0bd7114b01d46a6a8b4a1 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 11 Jul 2025 17:03:39 -0400 Subject: [PATCH 05/14] working. waiting for tests --- R/pip_grp_logic.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R index b1218ea3..1dfb44c0 100644 --- a/R/pip_grp_logic.R +++ b/R/pip_grp_logic.R @@ -74,7 +74,7 @@ pip_grp_logic <- function(country = "ALL", ## This will then be re-used in various part of the function ## This is to avoid re-computing and re-loading the same data over and over fg_pip_master <- fg_pip( - country = c(lcv$md_off_reg, lcv$user_off_reg), + country = c(lcv$md_off_reg, lcv$user_alt_agg, lcv$user_off_reg), year = year, povline = povline, popshare = NULL, @@ -109,7 +109,7 @@ pip_grp_logic <- function(country = "ALL", ### STEP 3.2.2 Alternate aggregates only ---- ### Prepare necessary variables fg_pip_master <- fg_pip( - country = c(lcv$user_alt_agg), + country = c(lcv$user_alt_agg, lcv$md_off_reg), year = year, povline = povline, popshare = NULL, From 94d0d0fc96441f6521338870842f3731da6069d5 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Mon, 7 Jul 2025 22:50:52 +0530 Subject: [PATCH 06/14] draft to calculate povline --- R/pip.R | 10 ++++++++++ R/utils.R | 20 ++++++++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/R/pip.R b/R/pip.R index c8d56e00..6969923a 100644 --- a/R/pip.R +++ b/R/pip.R @@ -107,6 +107,16 @@ pip <- function(country = "ALL", ) # lcv$est_ctrs has all the country_code that we are interested in + # Calculate and update poverty line if popshare is passed + if (!is.null(popshare)) { + # Filter only relevant data + lkup_metadata <- lkup_filter(lkup$svy_lkup, country, year, lkup$query_controls$region$values, + welfare_type, reporting_level, lkup$data_root) + lt <- load_data_list(lkup_metadata) |> + data.table::rbindlist(fill = TRUE) + povline <- wbpip:::md_infer_poverty_line(lt$welfare, lt$weight, popshare) + } + cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") if (!file.exists(cache_file_path)) { # Create an empty duckdb file diff --git a/R/utils.R b/R/utils.R index a3b02384..13ede395 100644 --- a/R/utils.R +++ b/R/utils.R @@ -18,7 +18,16 @@ subset_lkup <- function(country, cache_file_path, fill_gaps ) { + lkup <- lkup_filter(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir) + # Return with grace + return_if_exists(slkup = lkup, + povline = povline, + cache_file_path = cache_file_path, + fill_gaps = fill_gaps) +} +#' @keywords internal +lkup_filter <- function(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir) { # STEP 1 - Keep every row by default keep <- rep(TRUE, nrow(lkup)) # STEP 2 - Select countries @@ -31,9 +40,6 @@ subset_lkup <- function(country, data_dir = data_dir, valid_regions = valid_regions) - # # step 4. Select MRV - # keep <- select_MRV(lkup, keep, year, country, valid_regions, data_dir) - # STEP 4 - Select welfare_type if (welfare_type[1] != "all") { keep <- keep & lkup$welfare_type == welfare_type @@ -45,14 +51,8 @@ subset_lkup <- function(country, lkup <- lkup[keep, ] - - # Return with grace - return_if_exists(slkup = lkup, - povline = povline, - cache_file_path = cache_file_path, - fill_gaps = fill_gaps) + return(lkup) } - #' select_country #' Helper function for subset_lkup() #' @inheritParams subset_lkup From 736f35eefb1c8719e5a36c5f910267896e39e5f1 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 10 Jul 2025 22:20:17 +0530 Subject: [PATCH 07/14] handle popshare value --- R/pip.R | 11 ----------- R/rg_pip.R | 22 +++++++++++++++------- R/utils.R | 6 ++++++ 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/R/pip.R b/R/pip.R index 6969923a..56d1e282 100644 --- a/R/pip.R +++ b/R/pip.R @@ -106,17 +106,6 @@ pip <- function(country = "ALL", aux_files = lkup$aux_files ) # lcv$est_ctrs has all the country_code that we are interested in - - # Calculate and update poverty line if popshare is passed - if (!is.null(popshare)) { - # Filter only relevant data - lkup_metadata <- lkup_filter(lkup$svy_lkup, country, year, lkup$query_controls$region$values, - welfare_type, reporting_level, lkup$data_root) - lt <- load_data_list(lkup_metadata) |> - data.table::rbindlist(fill = TRUE) - povline <- wbpip:::md_infer_poverty_line(lt$welfare, lt$weight, popshare) - } - cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") if (!file.exists(cache_file_path)) { # Create an empty duckdb file diff --git a/R/rg_pip.R b/R/rg_pip.R index 5ff8552b..1a9b1dfa 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -17,7 +17,8 @@ rg_pip <- function(country, valid_regions <- lkup$query_controls$region$values svy_lkup <- lkup$svy_lkup data_dir <- lkup$data_root - + # povline is set to NULL if popshare is given + if (!is.null(popshare)) povline <- NULL cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb") metadata <- subset_lkup( @@ -32,12 +33,11 @@ rg_pip <- function(country, cache_file_path = cache_file_path, fill_gaps = FALSE ) + povline <- list(povline) data_present_in_master <- metadata$data_present_in_master povline <- metadata$povline metadata <- metadata$lkup - - # Remove aggregate distribution if popshare is specified # TEMPORARY FIX UNTIL popshare is supported for aggregate distributions metadata <- filter_lkup(metadata = metadata, @@ -51,12 +51,20 @@ rg_pip <- function(country, # load data lt <- load_data_list(metadata) - + # Calculate and update poverty line if popshare is passed + if (!is.null(popshare)) { + povline <- lapply(lt, \(x) wbpip:::md_infer_poverty_line(x$welfare, x$weight, popshare)) + } # parallelization # res <- get_pov_estimates(lt, povline = povline) - - # Regular lapply - res <- lapply(lt, process_dt, povline = povline) + # When poverty line is passed explicitly by user + if (length(povline) == 1) { + # Regular lapply + res <- lapply(lt, process_dt, povline = povline) + # When poverty line is calculated i.e popshare is passed + } else if (length(povline) == length(lt)) { + res <- Map(process_dt, lt, povline) + } res <- rbindlist(res, fill = TRUE) diff --git a/R/utils.R b/R/utils.R index 13ede395..c92fb349 100644 --- a/R/utils.R +++ b/R/utils.R @@ -19,6 +19,12 @@ subset_lkup <- function(country, fill_gaps ) { lkup <- lkup_filter(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir) + # If povline is NULL, this happens when popshare is passed i.e popshare is not NULL + if (is.null(povline)) { + return(list(data_present_in_master = NULL, + lkup = lkup, + povline = NULL)) + } # Return with grace return_if_exists(slkup = lkup, povline = povline, From d09535ef4f91038b70a91e668243014819c2088c Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Thu, 10 Jul 2025 22:42:13 +0530 Subject: [PATCH 08/14] handle povline of length 1 --- R/rg_pip.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/rg_pip.R b/R/rg_pip.R index 1a9b1dfa..a491c8b3 100644 --- a/R/rg_pip.R +++ b/R/rg_pip.R @@ -33,7 +33,6 @@ rg_pip <- function(country, cache_file_path = cache_file_path, fill_gaps = FALSE ) - povline <- list(povline) data_present_in_master <- metadata$data_present_in_master povline <- metadata$povline @@ -60,7 +59,8 @@ rg_pip <- function(country, # When poverty line is passed explicitly by user if (length(povline) == 1) { # Regular lapply - res <- lapply(lt, process_dt, povline = povline) + # passing povline[[1]] to pass povline as vector + res <- lapply(lt, process_dt, povline = povline[[1]]) # When poverty line is calculated i.e popshare is passed } else if (length(povline) == length(lt)) { res <- Map(process_dt, lt, povline) From 5d41805f20ef77da19b983a48410e473f0b45238 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Fri, 11 Jul 2025 17:46:37 -0400 Subject: [PATCH 09/14] fix some tests --- tests/testthat/test-fg_pip-local.R | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/tests/testthat/test-fg_pip-local.R b/tests/testthat/test-fg_pip-local.R index 13a56c84..456966cb 100644 --- a/tests/testthat/test-fg_pip-local.R +++ b/tests/testthat/test-fg_pip-local.R @@ -13,7 +13,7 @@ lkups <- create_versioned_lkups(data_dir, vintage_pattern = latest_version) lkup <- lkups$versions_paths[[lkups$latest_release]] -con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = fs::path(lkup$data_root, "cache", ext = "duckdb")) +# con <- duckdb::dbConnect(duckdb::duckdb(), dbdir = fs::path(lkup$data_root, "cache", ext = "duckdb")) local_mocked_bindings( get_caller_names = function() c("else") @@ -30,8 +30,7 @@ test_that("Imputation is working for extrapolated aggregated distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 0) @@ -44,8 +43,7 @@ test_that("Imputation is working for extrapolated aggregated distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 0) @@ -61,8 +59,7 @@ test_that("Imputation is working for interpolated mixed distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 0) @@ -75,8 +72,7 @@ test_that("Imputation is working for interpolated mixed distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 0) @@ -91,8 +87,7 @@ test_that("Imputation is working for interpolated aggregate distribution", { welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 2) @@ -105,8 +100,7 @@ test_that("Imputation is working for interpolated aggregate distribution", { welfare_type = "all", reporting_level = "national", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) expect_equal(nrow(tmp$main_data), 2) @@ -158,8 +152,7 @@ tmp <- fg_pip( welfare_type = "all", reporting_level = "all", ppp = NULL, - lkup = lkup, - con = con + lkup = lkup ) tmp <- tmp$data_in_cache |> as.data.table() # dt <- pip(country = "ALL", From 8321cebc7fef7a5dda5458a93064ba0f84377b3b Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Sat, 12 Jul 2025 10:55:46 -0400 Subject: [PATCH 10/14] fix problem with extra region.... it is a PATCH. This is needs to be fixed properly with the new methodology --- R/utils.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/utils.R b/R/utils.R index c92fb349..c3e94f1a 100644 --- a/R/utils.R +++ b/R/utils.R @@ -73,6 +73,7 @@ select_country <- function(lkup, keep, country, valid_regions) { selected_regions <- country[country %in% valid_regions] # Find all columns ending with _code code_cols <- grep("_code$", names(lkup), value = TRUE) + code_cols <- code_cols[!code_cols %in% "wb_region_code"] # Temporary solution # For each code column, check if any value matches selected_regions keep_regions_list <- lapply(code_cols, \(col) { lkup[[col]] %in% selected_regions From b07d136d921e469db0bf979d6232a137bb5ca884 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Sat, 12 Jul 2025 10:57:48 -0400 Subject: [PATCH 11/14] Increment version number to 1.3.20.9000 --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5f741891..072d0b06 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pipapi Title: API for the Poverty and Inequality Platform -Version: 1.3.20 +Version: 1.3.20.9000 Authors@R: c(person(given = "Tony", family = "Fujs", diff --git a/NEWS.md b/NEWS.md index a07af132..0b869884 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# pipapi (development version) + # pipapi 1.3.20 # pipapi 1.3.19 From 0a24aff97a445760210fe7af4d4315c23a11c8a5 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Sun, 13 Jul 2025 07:16:58 -0400 Subject: [PATCH 12/14] fix tests --- R/add_agg_stats.R | 3 ++- tests/testthat/test-fg_pip-local.R | 6 ++++-- tests/testthat/test-pip-local.R | 2 +- tests/testthat/test-utils.R | 15 ++++++++++----- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/R/add_agg_stats.R b/R/add_agg_stats.R index 5b93067d..bbc81ed9 100644 --- a/R/add_agg_stats.R +++ b/R/add_agg_stats.R @@ -16,7 +16,8 @@ add_agg_stats <- function(df, aggregated_list <- split(aggregated, interaction( aggregated$country_code, - aggregated$reporting_year + aggregated$reporting_year, + aggregated$poverty_line ), drop = TRUE ) diff --git a/tests/testthat/test-fg_pip-local.R b/tests/testthat/test-fg_pip-local.R index 456966cb..5d13b764 100644 --- a/tests/testthat/test-fg_pip-local.R +++ b/tests/testthat/test-fg_pip-local.R @@ -90,7 +90,8 @@ test_that("Imputation is working for interpolated aggregate distribution", { lkup = lkup ) - expect_equal(nrow(tmp$main_data), 2) + expect_equal(nrow(tmp$main_data), 0) + expect_equal(nrow(tmp$data_in_cache), 2) tmp <- fg_pip( country = "CHN", @@ -103,7 +104,8 @@ test_that("Imputation is working for interpolated aggregate distribution", { lkup = lkup ) - expect_equal(nrow(tmp$main_data), 2) + expect_equal(nrow(tmp$main_data), 0) + expect_equal(nrow(tmp$data_in_cache), 2) }) diff --git a/tests/testthat/test-pip-local.R b/tests/testthat/test-pip-local.R index 59e8ac89..334e02fe 100644 --- a/tests/testthat/test-pip-local.R +++ b/tests/testthat/test-pip-local.R @@ -543,7 +543,7 @@ test_that("pop_share option is returning consistent results for single grouped d expect_equal(round(pl$headcount, 3), round(ps$headcount, 3)) - expect_equal(round(povline, 6), round(pl$poverty_line, 6)) + expect_equal(round(povline, 2), round(pl$poverty_line, 2)) # High poverty line # Fails for higher poverty lines diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 7b2b53a4..da735a34 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -66,7 +66,8 @@ test_that("subset_lkup correctly selects all countries", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) expect_equal(nrow(tmp$lkup), nrow(ref_lkup)) }) @@ -79,7 +80,8 @@ test_that("subset_lkup correctly selects countries", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) expect_equal(sort(unique(tmp$lkup$country_code)), sort(selection)) }) @@ -92,7 +94,8 @@ test_that("subset_lkup correctly selects single regions", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) expect_equal(sort(unique(tmp$lkup$region_code)), sort(selection)) }) @@ -105,7 +108,8 @@ test_that("subset_lkup correctly selects multiple regions", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) expect_equal(sort(unique(tmp$lkup$region_code)), sort(selection)) }) @@ -122,7 +126,8 @@ test_that("subset_lkup correctly selects countries and regions", { reporting_level = "all", lkup = ref_lkup, valid_regions = valid_regions, - data_dir = data_dir) + data_dir = data_dir, + povline = NULL) # Regions are selected expect_true(all(region_selection %in% (unique(tmp$lkup$region_code)))) From e813cb2d6a8db9b7cf7d1937093660e4d9652fb7 Mon Sep 17 00:00:00 2001 From: shahronak47 Date: Sun, 13 Jul 2025 07:29:24 -0400 Subject: [PATCH 13/14] add additional test to ensure the bug doesn't happen again --- tests/testthat/test-pip-local.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-pip-local.R b/tests/testthat/test-pip-local.R index 334e02fe..2dae5b28 100644 --- a/tests/testthat/test-pip-local.R +++ b/tests/testthat/test-pip-local.R @@ -840,4 +840,12 @@ test_that("SPL is the same by reporting level", { ) }) +test_that("make sure popshare bug no which was reported no longer exists", { + out <- pip(country = "USA", year = 2022, + popshare = .5, lkup = lkup) + # Ensure poverty line is not the default one + expect_false(out$poverty_line %in% c(1.9, 3)) + # Ensure headcount is closer to 0.5 + expect_equal(out$headcount, 0.5, tolerance = .05) +}) From cd70446f763c5b6a4ca461736fadedf717a6df80 Mon Sep 17 00:00:00 2001 From: "R.Andres Castaneda" Date: Mon, 21 Jul 2025 17:57:14 -0400 Subject: [PATCH 14/14] launch API with all versions available in folder --- inst/TMP/TMP_API_launcher.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inst/TMP/TMP_API_launcher.R b/inst/TMP/TMP_API_launcher.R index fa2719b4..3919ada1 100644 --- a/inst/TMP/TMP_API_launcher.R +++ b/inst/TMP/TMP_API_launcher.R @@ -13,8 +13,9 @@ if (Sys.info()[["user"]] == "wb384996") { max() # latest_version <- NULL - lkups <- create_versioned_lkups(data_dir, - vintage_pattern = latest_version) + # lkups <- create_versioned_lkups(data_dir, + # vintage_pattern = latest_version) + lkups <- create_versioned_lkups(data_dir) # lkup <- lkups$versions_paths[[lkups$latest_release]] start_api(port = 8080)