PIP-Technical-Team · randrescastaneda · Jul 21, 2025 · Jul 9, 2025 · Jul 11, 2025 · Jul 11, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: pipapi
 Title: API for the Poverty and Inequality Platform
-Version: 1.3.20
+Version: 1.3.20.9000
 Authors@R: 
     c(person(given = "Tony",
             family = "Fujs",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,5 @@
+# pipapi (development version)
+
 # pipapi 1.3.20
 
 # pipapi 1.3.19

diff --git a/R/add_agg_stats.R b/R/add_agg_stats.R
@@ -16,7 +16,8 @@ add_agg_stats <- function(df,
     aggregated_list <- split(aggregated,
       interaction(
         aggregated$country_code,
-        aggregated$reporting_year
+        aggregated$reporting_year,
+        aggregated$poverty_line
       ),
       drop = TRUE
     )

diff --git a/R/pip.R b/R/pip.R
@@ -106,7 +106,6 @@ pip <- function(country         = "ALL",
       aux_files       =  lkup$aux_files
     )
   # lcv$est_ctrs has all the country_code that we are interested in
-
   cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb")
   if (!file.exists(cache_file_path)) {
     # Create an empty duckdb file

diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R
@@ -68,13 +68,13 @@ pip_grp_logic <- function(country         = "ALL",
                    censor          =  censor)
     return(res)
 
-  } else {
+  } else if (lcv$off_alt_agg == "both") {
 
     ## STEP 3.2: Compute fg_pip for ALL required countries ----
     ## This will then be re-used in various part of the function
     ## This is to avoid re-computing and re-loading the same data over and over
     fg_pip_master <- fg_pip(
-      country         = c(lcv$md_off_reg, lcv$user_off_reg),
+      country         = c(lcv$md_off_reg, lcv$user_alt_agg, lcv$user_off_reg),
       year            = year,
       povline         = povline,
       popshare        = NULL,
@@ -90,19 +90,13 @@ pip_grp_logic <- function(country         = "ALL",
       setDT(fg_pip_master)
     }
 
-    add_vars_out_of_pipeline(fg_pip_master, fill_gaps = TRUE, lkup = lkup)
+    add_vars_out_of_pipeline(fg_pip_master,
+                             fill_gaps = TRUE,
+                             lkup = lkup)
+
 
-    if (lcv$off_alt_agg == "both") {
       ### STEP 3.2.1 Estimates for official aggregates ----
       off_ret <-
-        # pip_grp(country         =  lcv$user_off_reg,
-        #         year            =  year,
-        #         povline         =  povline,
-        #         group_by        =  "wb",
-        #         welfare_type    =  welfare_type,
-        #         reporting_level =  reporting_level,
-        #         lkup            =  lkup,
-        #         censor          =  censor)
         pip_grp_helper(lcv_country     = lcv$ctr_off_reg,
                        country         = country,
                        year            = year,
@@ -111,12 +105,35 @@ pip_grp_logic <- function(country         = "ALL",
                        censor          = FALSE,
                        fg_pip          = fg_pip_master,
                        lkup            = lkup)
-    } else {
-      ### STEP 3.2.2 Alternate aggregates only ----
-      ### Prepare necessary variables
-      off_ret <- NULL
-      alt_agg <- country
+  } else {
+    ### STEP 3.2.2 Alternate aggregates only ----
+    ### Prepare necessary variables
+    fg_pip_master <- fg_pip(
+      country         = c(lcv$user_alt_agg, lcv$md_off_reg),
+      year            = year,
+      povline         = povline,
+      popshare        = NULL,
+      welfare_type    = welfare_type,
+      reporting_level = reporting_level,
+      ppp             = NULL,
+      lkup            = lkup
+    )
+    # For now just rowbinding two dataframes, but we would need to use it more smartly in the future
+    fg_pip_master <- collapse::rowbind(fg_pip_master)
+
+    if (!data.table::is.data.table(fg_pip_master)) {
+      setDT(fg_pip_master)
     }
+
+    add_vars_out_of_pipeline(fg_pip_master,
+                             fill_gaps = TRUE,
+                             lkup = lkup)
+
+
+
+
+    off_ret <- NULL
+    alt_agg <- country
   }
 
   #   ________________________________________________________
@@ -131,52 +148,58 @@ pip_grp_logic <- function(country         = "ALL",
   ## calculation of off regions but we still have to input to missing data
   ## countries, we estimate official region estimates for such countries
 
-  if (lcv$grp_use %in% c("append", "not")) {
+  # Only if there is md countries. Otherwise, skip.
+  if (!is.null(lcv$grp_use)) {
 
-    grp <- pip_grp_helper(lcv_country         = lcv$md_off_reg,
-                          country             = country,
-                          year                = lcv$md_year,
-                          povline             = povline,
-                          reporting_level     = reporting_level,
-                          censor              = FALSE,
-                          fg_pip              = fg_pip_master,
-                          lkup                = lkup)
+    if (lcv$grp_use %in% c("append", "not")) {
 
-    if (lcv$grp_use == "append") {
-      grp <- data.table::rbindlist(list(off_ret, grp))
-    }
+      grp <- pip_grp_helper(lcv_country         = lcv$md_off_reg,
+                            country             = country,
+                            year                = lcv$md_year,
+                            povline             = povline,
+                            reporting_level     = reporting_level,
+                            censor              = FALSE,
+                            fg_pip              = fg_pip_master,
+                            lkup                = lkup)
 
-  } else {
-    # If previous estimations are enough, we don't need to do any estimation.
-    grp <- data.table::copy(off_ret)
-  }
+      if (lcv$grp_use == "append") {
+        grp <- data.table::rbindlist(list(off_ret, grp))
+      }
+
+    } else {
+      # If previous estimations are enough, we don't need to do any estimation.
+      grp <- data.table::copy(off_ret)
+    }
 
-  names_grp <- names(grp)
+    names_grp <- names(grp)
 
-  ### Prepare grp to be merge with pop_md
-  grp[,
-      c("reporting_pop", "pop_in_poverty") := NULL]
+    ### Prepare grp to be merge with pop_md
+    grp[,
+        c("reporting_pop", "pop_in_poverty") := NULL]
 
 
-  ### Merge population with Missing data table ---------
+    ### Merge population with Missing data table ---------
 
-  ### Merge with pop_md ------
-  pop_md <- lcv$md
-  data.table::setnames(pop_md, "year", "reporting_year")
+    ### Merge with pop_md ------
+    pop_md <- lcv$md
+    data.table::setnames(pop_md, "year", "reporting_year")
 
-  # This merge will remove those countries for which there is no official
-  # aggregate because of lack of coverage in the region. Eg. There is not data
-  # for SAS in 2000, so for countries like AFG 2000 we can't input estimates
-  md_grp <- merge(pop_md, grp,
-                  by = c("region_code", "reporting_year"))
+    # This merge will remove those countries for which there is no official
+    # aggregate because of lack of coverage in the region. Eg. There is not data
+    # for SAS in 2000, so for countries like AFG 2000 we can't input estimates
+    md_grp <- merge(pop_md, grp,
+                    by = c("region_code", "reporting_year"))
 
-  ### Merge other region codes -----------
-  md_grp[,
-         region_code := NULL]
+    ### Merge other region codes -----------
+    md_grp[,
+           region_code := NULL]
 
-  md_grp <- merge(md_grp, cl,
-                  by = "country_code",
-                  all.x = TRUE)
+    md_grp <- merge(md_grp, cl,
+                    by = "country_code",
+                    all.x = TRUE)
+  } else {
+    md_grp <- pipapi::empty_response_fg_add
+  }
 
 
   ## Fill gaps estimates with countries with Survey  -----
@@ -256,7 +279,7 @@ pip_grp_logic <- function(country         = "ALL",
   #   ret <- censor_rows(ret, lkup[["censored"]], type = "regions")
   # }
 
-  data.table::setcolorder(ret, names_grp)
+  data.table::setcolorder(ret, names(pipapi::empty_response_grp))
 
   # Select columns
   if (additional_ind) {
@@ -325,15 +348,13 @@ pip_grp_helper <- function(lcv_country,
     #   out <- censor_rows(out, lkup[["censored"]], type = "regions")
     # }
 
-    out <- estimate_type_var(out,lkup)
-
 
   } else {
     # Handle simple aggregation
     out <- pip_aggregate(df = out,
                          return_cols = lkup$return_cols$pip_grp)
-    out <- estimate_type_var(out,lkup)
   }
+  out <- estimate_type_var(out,lkup)
 
   keep <- lkup$return_cols$pip_grp$cols
   out <- out[, .SD, .SDcols = keep]

diff --git a/R/pipapi-package.R b/R/pipapi-package.R
@@ -59,6 +59,15 @@ NULL
 #' @format Data frame with 0 rows and 53 columns
 NULL
 
+#' Dataframe for fill gaps empty response with additional variables
+#'
+#' @docType data
+#' @keywords datasets
+#' @name empty_response_fg_add
+#' @usage data(empty_response_fg_add)
+#' @format Data frame with 0 rows and 56 columns
+NULL
+
 utils::globalVariables(
   c(
     ".",

diff --git a/R/rg_pip.R b/R/rg_pip.R
@@ -17,7 +17,8 @@ rg_pip <- function(country,
   valid_regions <- lkup$query_controls$region$values
   svy_lkup      <- lkup$svy_lkup
   data_dir      <- lkup$data_root
-
+  # povline is set to NULL if popshare is given
+  if (!is.null(popshare)) povline <- NULL
   cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb")
 
   metadata <- subset_lkup(
@@ -36,8 +37,6 @@ rg_pip <- function(country,
   data_present_in_master <- metadata$data_present_in_master
   povline  <- metadata$povline
   metadata  <- metadata$lkup
-
-
   # Remove aggregate distribution if popshare is specified
   # TEMPORARY FIX UNTIL popshare is supported for aggregate distributions
   metadata <- filter_lkup(metadata = metadata,
@@ -51,12 +50,21 @@ rg_pip <- function(country,
 
   # load data
   lt <- load_data_list(metadata)
-
+  # Calculate and update poverty line if popshare is passed
+  if (!is.null(popshare)) {
+    povline <- lapply(lt, \(x) wbpip:::md_infer_poverty_line(x$welfare, x$weight, popshare))
+  }
   # parallelization
   # res <- get_pov_estimates(lt, povline = povline)
-
-  # Regular lapply
-  res <- lapply(lt, process_dt, povline = povline)
+  # When poverty line is passed explicitly by user
+  if (length(povline) == 1) {
+    # Regular lapply
+    # passing povline[[1]] to pass povline as vector
+    res <- lapply(lt, process_dt, povline = povline[[1]])
+  # When poverty line is calculated i.e popshare is passed
+  } else if (length(povline) == length(lt)) {
+    res <- Map(process_dt, lt, povline)
+  }
 
   res <- rbindlist(res, fill = TRUE)
 

diff --git a/R/utils.R b/R/utils.R
@@ -18,7 +18,22 @@ subset_lkup <- function(country,
                         cache_file_path,
                         fill_gaps
                         ) {
+  lkup <- lkup_filter(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir)
+  # If povline is NULL, this happens when popshare is passed i.e popshare is not NULL
+  if (is.null(povline)) {
+    return(list(data_present_in_master = NULL,
+                lkup = lkup,
+                povline = NULL))
+  }
+  # Return with grace
+  return_if_exists(slkup = lkup,
+                   povline = povline,
+                   cache_file_path = cache_file_path,
+                   fill_gaps = fill_gaps)
+}
 
+#' @keywords internal
+lkup_filter <- function(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir) {
   # STEP 1 - Keep every row by default
   keep <- rep(TRUE, nrow(lkup))
   # STEP 2 - Select countries
@@ -31,9 +46,6 @@ subset_lkup <- function(country,
                        data_dir =  data_dir,
                        valid_regions = valid_regions)
 
-  # # step 4. Select MRV
-  # keep <- select_MRV(lkup, keep, year, country, valid_regions, data_dir)
-
   # STEP 4 - Select welfare_type
   if (welfare_type[1] != "all") {
     keep <- keep & lkup$welfare_type == welfare_type
@@ -45,14 +57,8 @@ subset_lkup <- function(country,
 
 
   lkup <- lkup[keep, ]
-
-  # Return with grace
-  return_if_exists(slkup = lkup,
-                   povline = povline,
-                   cache_file_path = cache_file_path,
-                   fill_gaps = fill_gaps)
+  return(lkup)
 }
-
 #' select_country
 #' Helper function for subset_lkup()
 #' @inheritParams subset_lkup
@@ -65,7 +71,19 @@ select_country <- function(lkup, keep, country, valid_regions) {
     # Select regions
     if (any(country %in% valid_regions)) {
       selected_regions <- country[country %in% valid_regions]
-      keep_regions <- lkup$region_code %in% selected_regions
+      # Find all columns ending with _code
+      code_cols <- grep("_code$", names(lkup), value = TRUE)
+      code_cols <- code_cols[!code_cols %in% "wb_region_code"] # Temporary solution
+      # For each code column, check if any value matches selected_regions
+      keep_regions_list <- lapply(code_cols, \(col) {
+        lkup[[col]] %in% selected_regions
+    })
+      # Combine with logical OR across all code columns
+      if (length(keep_regions_list) > 0) {
+        keep_regions <- Reduce(`|`, keep_regions_list)
+      } else {
+        keep_regions <- rep(FALSE, nrow(lkup))
+      }
     } else {
       keep_regions <- rep(FALSE, length(lkup$country_code))
     }