Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: pipapi
Title: API for the Poverty and Inequality Platform
Version: 1.3.20
Version: 1.3.20.9000
Authors@R:
c(person(given = "Tony",
family = "Fujs",
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# pipapi (development version)

# pipapi 1.3.20

# pipapi 1.3.19
Expand Down
3 changes: 2 additions & 1 deletion R/add_agg_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ add_agg_stats <- function(df,
aggregated_list <- split(aggregated,
interaction(
aggregated$country_code,
aggregated$reporting_year
aggregated$reporting_year,
aggregated$poverty_line
),
drop = TRUE
)
Expand Down
1 change: 0 additions & 1 deletion R/pip.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ pip <- function(country = "ALL",
aux_files = lkup$aux_files
)
# lcv$est_ctrs has all the country_code that we are interested in

cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb")
if (!file.exists(cache_file_path)) {
# Create an empty duckdb file
Expand Down
133 changes: 77 additions & 56 deletions R/pip_grp_logic.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,13 @@ pip_grp_logic <- function(country = "ALL",
censor = censor)
return(res)

} else {
} else if (lcv$off_alt_agg == "both") {

## STEP 3.2: Compute fg_pip for ALL required countries ----
## This will then be re-used in various part of the function
## This is to avoid re-computing and re-loading the same data over and over
fg_pip_master <- fg_pip(
country = c(lcv$md_off_reg, lcv$user_off_reg),
country = c(lcv$md_off_reg, lcv$user_alt_agg, lcv$user_off_reg),
year = year,
povline = povline,
popshare = NULL,
Expand All @@ -90,19 +90,13 @@ pip_grp_logic <- function(country = "ALL",
setDT(fg_pip_master)
}

add_vars_out_of_pipeline(fg_pip_master, fill_gaps = TRUE, lkup = lkup)
add_vars_out_of_pipeline(fg_pip_master,
fill_gaps = TRUE,
lkup = lkup)


if (lcv$off_alt_agg == "both") {
### STEP 3.2.1 Estimates for official aggregates ----
off_ret <-
# pip_grp(country = lcv$user_off_reg,
# year = year,
# povline = povline,
# group_by = "wb",
# welfare_type = welfare_type,
# reporting_level = reporting_level,
# lkup = lkup,
# censor = censor)
pip_grp_helper(lcv_country = lcv$ctr_off_reg,
country = country,
year = year,
Expand All @@ -111,12 +105,35 @@ pip_grp_logic <- function(country = "ALL",
censor = FALSE,
fg_pip = fg_pip_master,
lkup = lkup)
} else {
### STEP 3.2.2 Alternate aggregates only ----
### Prepare necessary variables
off_ret <- NULL
alt_agg <- country
} else {
### STEP 3.2.2 Alternate aggregates only ----
### Prepare necessary variables
fg_pip_master <- fg_pip(
country = c(lcv$user_alt_agg, lcv$md_off_reg),
year = year,
povline = povline,
popshare = NULL,
welfare_type = welfare_type,
reporting_level = reporting_level,
ppp = NULL,
lkup = lkup
)
# For now just rowbinding two dataframes, but we would need to use it more smartly in the future
fg_pip_master <- collapse::rowbind(fg_pip_master)

if (!data.table::is.data.table(fg_pip_master)) {
setDT(fg_pip_master)
}

add_vars_out_of_pipeline(fg_pip_master,
fill_gaps = TRUE,
lkup = lkup)




off_ret <- NULL
alt_agg <- country
}

# ________________________________________________________
Expand All @@ -131,52 +148,58 @@ pip_grp_logic <- function(country = "ALL",
## calculation of off regions but we still have to input to missing data
## countries, we estimate official region estimates for such countries

if (lcv$grp_use %in% c("append", "not")) {
# Only if there is md countries. Otherwise, skip.
if (!is.null(lcv$grp_use)) {

grp <- pip_grp_helper(lcv_country = lcv$md_off_reg,
country = country,
year = lcv$md_year,
povline = povline,
reporting_level = reporting_level,
censor = FALSE,
fg_pip = fg_pip_master,
lkup = lkup)
if (lcv$grp_use %in% c("append", "not")) {

if (lcv$grp_use == "append") {
grp <- data.table::rbindlist(list(off_ret, grp))
}
grp <- pip_grp_helper(lcv_country = lcv$md_off_reg,
country = country,
year = lcv$md_year,
povline = povline,
reporting_level = reporting_level,
censor = FALSE,
fg_pip = fg_pip_master,
lkup = lkup)

} else {
# If previous estimations are enough, we don't need to do any estimation.
grp <- data.table::copy(off_ret)
}
if (lcv$grp_use == "append") {
grp <- data.table::rbindlist(list(off_ret, grp))
}

} else {
# If previous estimations are enough, we don't need to do any estimation.
grp <- data.table::copy(off_ret)
}

names_grp <- names(grp)
names_grp <- names(grp)

### Prepare grp to be merge with pop_md
grp[,
c("reporting_pop", "pop_in_poverty") := NULL]
### Prepare grp to be merge with pop_md
grp[,
c("reporting_pop", "pop_in_poverty") := NULL]


### Merge population with Missing data table ---------
### Merge population with Missing data table ---------

### Merge with pop_md ------
pop_md <- lcv$md
data.table::setnames(pop_md, "year", "reporting_year")
### Merge with pop_md ------
pop_md <- lcv$md
data.table::setnames(pop_md, "year", "reporting_year")

# This merge will remove those countries for which there is no official
# aggregate because of lack of coverage in the region. Eg. There is not data
# for SAS in 2000, so for countries like AFG 2000 we can't input estimates
md_grp <- merge(pop_md, grp,
by = c("region_code", "reporting_year"))
# This merge will remove those countries for which there is no official
# aggregate because of lack of coverage in the region. Eg. There is not data
# for SAS in 2000, so for countries like AFG 2000 we can't input estimates
md_grp <- merge(pop_md, grp,
by = c("region_code", "reporting_year"))

### Merge other region codes -----------
md_grp[,
region_code := NULL]
### Merge other region codes -----------
md_grp[,
region_code := NULL]

md_grp <- merge(md_grp, cl,
by = "country_code",
all.x = TRUE)
md_grp <- merge(md_grp, cl,
by = "country_code",
all.x = TRUE)
} else {
md_grp <- pipapi::empty_response_fg_add
}


## Fill gaps estimates with countries with Survey -----
Expand Down Expand Up @@ -256,7 +279,7 @@ pip_grp_logic <- function(country = "ALL",
# ret <- censor_rows(ret, lkup[["censored"]], type = "regions")
# }

data.table::setcolorder(ret, names_grp)
data.table::setcolorder(ret, names(pipapi::empty_response_grp))

# Select columns
if (additional_ind) {
Expand Down Expand Up @@ -325,15 +348,13 @@ pip_grp_helper <- function(lcv_country,
# out <- censor_rows(out, lkup[["censored"]], type = "regions")
# }

out <- estimate_type_var(out,lkup)


} else {
# Handle simple aggregation
out <- pip_aggregate(df = out,
return_cols = lkup$return_cols$pip_grp)
out <- estimate_type_var(out,lkup)
}
out <- estimate_type_var(out,lkup)

keep <- lkup$return_cols$pip_grp$cols
out <- out[, .SD, .SDcols = keep]
Expand Down
9 changes: 9 additions & 0 deletions R/pipapi-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ NULL
#' @format Data frame with 0 rows and 53 columns
NULL

#' Dataframe for fill gaps empty response with additional variables
#'
#' @docType data
#' @keywords datasets
#' @name empty_response_fg_add
#' @usage data(empty_response_fg_add)
#' @format Data frame with 0 rows and 56 columns
NULL

utils::globalVariables(
c(
".",
Expand Down
22 changes: 15 additions & 7 deletions R/rg_pip.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ rg_pip <- function(country,
valid_regions <- lkup$query_controls$region$values
svy_lkup <- lkup$svy_lkup
data_dir <- lkup$data_root

# povline is set to NULL if popshare is given
if (!is.null(popshare)) povline <- NULL
cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb")

metadata <- subset_lkup(
Expand All @@ -36,8 +37,6 @@ rg_pip <- function(country,
data_present_in_master <- metadata$data_present_in_master
povline <- metadata$povline
metadata <- metadata$lkup


# Remove aggregate distribution if popshare is specified
# TEMPORARY FIX UNTIL popshare is supported for aggregate distributions
metadata <- filter_lkup(metadata = metadata,
Expand All @@ -51,12 +50,21 @@ rg_pip <- function(country,

# load data
lt <- load_data_list(metadata)

# Calculate and update poverty line if popshare is passed
if (!is.null(popshare)) {
povline <- lapply(lt, \(x) wbpip:::md_infer_poverty_line(x$welfare, x$weight, popshare))
}
# parallelization
# res <- get_pov_estimates(lt, povline = povline)

# Regular lapply
res <- lapply(lt, process_dt, povline = povline)
# When poverty line is passed explicitly by user
if (length(povline) == 1) {
# Regular lapply
# passing povline[[1]] to pass povline as vector
res <- lapply(lt, process_dt, povline = povline[[1]])
# When poverty line is calculated i.e popshare is passed
} else if (length(povline) == length(lt)) {
res <- Map(process_dt, lt, povline)
}

res <- rbindlist(res, fill = TRUE)

Expand Down
40 changes: 29 additions & 11 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,22 @@ subset_lkup <- function(country,
cache_file_path,
fill_gaps
) {
lkup <- lkup_filter(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir)
# If povline is NULL, this happens when popshare is passed i.e popshare is not NULL
if (is.null(povline)) {
return(list(data_present_in_master = NULL,
lkup = lkup,
povline = NULL))
}
# Return with grace
return_if_exists(slkup = lkup,
povline = povline,
cache_file_path = cache_file_path,
fill_gaps = fill_gaps)
}

#' @keywords internal
lkup_filter <- function(lkup, country, year, valid_regions, reporting_level, welfare_type, data_dir) {
# STEP 1 - Keep every row by default
keep <- rep(TRUE, nrow(lkup))
# STEP 2 - Select countries
Expand All @@ -31,9 +46,6 @@ subset_lkup <- function(country,
data_dir = data_dir,
valid_regions = valid_regions)

# # step 4. Select MRV
# keep <- select_MRV(lkup, keep, year, country, valid_regions, data_dir)

# STEP 4 - Select welfare_type
if (welfare_type[1] != "all") {
keep <- keep & lkup$welfare_type == welfare_type
Expand All @@ -45,14 +57,8 @@ subset_lkup <- function(country,


lkup <- lkup[keep, ]

# Return with grace
return_if_exists(slkup = lkup,
povline = povline,
cache_file_path = cache_file_path,
fill_gaps = fill_gaps)
return(lkup)
}

#' select_country
#' Helper function for subset_lkup()
#' @inheritParams subset_lkup
Expand All @@ -65,7 +71,19 @@ select_country <- function(lkup, keep, country, valid_regions) {
# Select regions
if (any(country %in% valid_regions)) {
selected_regions <- country[country %in% valid_regions]
keep_regions <- lkup$region_code %in% selected_regions
# Find all columns ending with _code
code_cols <- grep("_code$", names(lkup), value = TRUE)
code_cols <- code_cols[!code_cols %in% "wb_region_code"] # Temporary solution
# For each code column, check if any value matches selected_regions
keep_regions_list <- lapply(code_cols, \(col) {
lkup[[col]] %in% selected_regions
})
# Combine with logical OR across all code columns
if (length(keep_regions_list) > 0) {
keep_regions <- Reduce(`|`, keep_regions_list)
} else {
keep_regions <- rep(FALSE, nrow(lkup))
}
} else {
keep_regions <- rep(FALSE, length(lkup$country_code))
}
Expand Down
Loading