diff --git a/R/pip_agg.R b/R/pip_agg.R
index 0d5485fb..87762307 100644
--- a/R/pip_agg.R
+++ b/R/pip_agg.R
@@ -7,43 +7,52 @@
 #' # Create lkups
 #' }
 #' @export
-pip_agg <- function(country         = "ALL",
-                    year            = "ALL",
-                    povline         = 1.9,
-                    group_by        = c("wb", "none"),
-                    welfare_type    = c("all", "consumption", "income"),
-                    reporting_level = c("all", "national"),
-                    lkup,
-                    censor          = FALSE,
-                    lkup_hash       = lkup$cache_data_id$hash_pip_grp,
-                    additional_ind  = FALSE) {
-
+pip_agg <- function(
+  country = "ALL",
+  year = "ALL",
+  povline = 1.9,
+  group_by = "wb",
+  welfare_type = c("all", "consumption", "income"),
+  reporting_level = c("all", "national"),
+  lkup,
+  censor = FALSE,
+  lkup_hash = lkup$cache_data_id$hash_pip_grp,
+  additional_ind = FALSE
+) {
   # Should pip_old or pip_new be used?
   #-------------------------------------
   use_new <- lkup$use_new_lineup_version
 
+  # check group_by
+  group_by <- .check_group_by(group_by = group_by, lkup = lkup)
+
   # Run correct function
   #-------------------------------------
   out <- if (use_new) {
-    pip_grp_new(country         = country,
-                year            = year,
-                povline         = povline,
-                welfare_type    = welfare_type,
-                reporting_level = reporting_level,
-                lkup            = lkup,
-                censor          = censor,
-                additional_ind  = additional_ind)
+    pip_grp_new(
+      country = country,
+      year = year,
+      povline = povline,
+      welfare_type = welfare_type,
+      reporting_level = reporting_level,
+      group_by = group_by,
+      lkup = lkup,
+      censor = censor,
+      additional_ind = additional_ind
+    )
   } else {
-    pip_grp_logic(country         = country,
-                  year            = year,
-                  povline         = povline,
-                  group_by        = group_by,
-                  welfare_type    = welfare_type,
-                  reporting_level = reporting_level,
-                  lkup            = lkup,
-                  censor          = censor,
-                  lkup_hash       = lkup_hash,
-                  additional_ind  = additional_ind)
+    pip_grp_logic(
+      country = country,
+      year = year,
+      povline = povline,
+      group_by = group_by,
+      welfare_type = welfare_type,
+      reporting_level = reporting_level,
+      lkup = lkup,
+      censor = censor,
+      lkup_hash = lkup_hash,
+      additional_ind = additional_ind
+    )
   }
 
   # Return
diff --git a/R/pip_grp.R b/R/pip_grp.R
index df0797c2..d9b88b51 100644
--- a/R/pip_grp.R
+++ b/R/pip_grp.R
@@ -15,29 +15,31 @@
 #'         lkup = lkups)
 #' }
 #' @export
-pip_grp <- function(country         = "ALL",
-                    year            = "ALL",
-                    povline         = 1.9,
-                    group_by        = c("wb", "none"),
-                    welfare_type    = c("all", "consumption", "income"),
-                    reporting_level = c("all", "national"),
-                    lkup,
-                    censor          = TRUE,
-                    lkup_hash       = lkup$cache_data_id$hash_pip_grp) {
-
-  welfare_type    <- match.arg(welfare_type)
+pip_grp <- function(
+  country = "ALL",
+  year = "ALL",
+  povline = 1.9,
+  group_by = "wb",
+  welfare_type = c("all", "consumption", "income"),
+  reporting_level = c("all", "national"),
+  lkup,
+  censor = TRUE,
+  lkup_hash = lkup$cache_data_id$hash_pip_grp
+) {
+  welfare_type <- match.arg(welfare_type)
   reporting_level <- match.arg(reporting_level)
-  group_by        <- match.arg(group_by)
 
   # TEMPORARY UNTIL SELECTION MECHANISM IS BEING IMPROVED
   country <- toupper(country)
   year <- toupper(year)
 
   # If ref_lkup is not part of lkup throw an error.
-  if (!all(c('ref_lkup') %in% names(lkup)))
-    stop("You are probably passing more than one dataset as lkup argument.
-  Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD")
-
+  if (!all(c('ref_lkup') %in% names(lkup))) {
+    stop(
+      "You are probably passing more than one dataset as lkup argument.
+  Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD"
+    )
+  }
 
   # Custom aggregations only supported at the national level
   # subgroups aggregations only supported for "all" countries
@@ -51,14 +53,15 @@ pip_grp <- function(country         = "ALL",
   }
 
   out <- fg_pip_old(
-    country         = country,
-    year            = year,
-    povline         = povline,
-    popshare        = NULL,
-    welfare_type    = welfare_type,
+    country = country,
+    year = year,
+    povline = povline,
+    popshare = NULL,
+    welfare_type = welfare_type,
     reporting_level = reporting_level,
-    ppp             = NULL,
-    lkup           = lkup)
+    ppp = NULL,
+    lkup = lkup
+  )
 
   # For now just rowbinding two dataframes, but we would need to use it more smartly in the future
   out <- collapse::rowbind(out, fill = TRUE)
@@ -69,8 +72,10 @@ pip_grp <- function(country         = "ALL",
 
   # Handles aggregated distributions (like CHN and IND)
   if (tolower(reporting_level) %in% c("national", "all")) {
-    out <- add_agg_stats(out,
-                         return_cols = lkup$return_cols$ag_average_poverty_stats)
+    out <- add_agg_stats(
+      out,
+      return_cols = lkup$return_cols$ag_average_poverty_stats
+    )
   }
 
   add_vars_out_of_pipeline(out, fill_gaps = TRUE, lkup = lkup)
@@ -83,25 +88,23 @@ pip_grp <- function(country         = "ALL",
 
   # Handle aggregations with sub-groups
   if (group_by != "none") {
-
     out <- pip_aggregate_by(
       df = out,
       country = country,
+      group_by = group_by,
       return_cols = lkup$return_cols$pip_grp
     )
 
-    out <- estimate_type_var(out,lkup)
+    out <- estimate_type_var(out, lkup)
 
     # Censor regional values
     # if (censor) {
     #   out <- censor_rows(out, lkup[["censored"]], type = "regions")
     # }
-
   } else {
     # Handle simple aggregation
-    out <- pip_aggregate(out,
-                         return_cols = lkup$return_cols$pip_grp)
-    out <- estimate_type_var(out,lkup)
+    out <- pip_aggregate(out, return_cols = lkup$return_cols$pip_grp)
+    out <- estimate_type_var(out, lkup)
   }
 
   keep <- lkup$return_cols$pip_grp$cols
@@ -123,44 +126,34 @@ pip_grp <- function(country         = "ALL",
 #'
 #' @return data.table
 pip_aggregate <- function(df, by = NULL, return_cols) {
-
   all_cols <- return_cols$cols
   weighted_cols <- return_cols$weighted_average_cols
 
   ## Assess by parameter ---------
 
   if (is.null(by)) {
-
     by_code <- "CUSTOM"
     by_name <- "CUSTOM"
 
-    to_keep <- all_cols[!all_cols %in%  c("pop_in_poverty", "estimate_type")]
-
+    to_keep <- all_cols[!all_cols %in% c("pop_in_poverty", "estimate_type")]
   } else {
-
     if (grepl("code$", by)) {
-
       by_code <- by
       by_name <- gsub("_code", "", by)
-
     } else {
-
       by_code <- paste0(by, "_code")
       by_name <- by
-
     }
 
-    to_keep <- all_cols[!all_cols %in% c("pop_in_poverty",
-                                         "region_code",
-                                         "region_name",
-                                         "estimate_type")]
+    to_keep <- all_cols[
+      !all_cols %in%
+        c("pop_in_poverty", "region_code", "region_name", "estimate_type")
+    ]
     to_keep <- c(by_name, by_code, to_keep)
 
     by <- c(by_name, by_code)
   }
 
-
-
   # Handle simple aggregation
 
   df <- df[, .SD, .SDcols = to_keep]
@@ -168,20 +161,17 @@ pip_aggregate <- function(df, by = NULL, return_cols) {
   byvar <- c(by, "reporting_year", "poverty_line")
 
   # Compute population totals
-  pop <- df[, lapply(.SD,
-                     base::sum,
-                     na.rm = TRUE),
-            by = byvar,
-            .SDcols = "reporting_pop"
+  pop <- df[,
+    lapply(.SD, base::sum, na.rm = TRUE),
+    by = byvar,
+    .SDcols = "reporting_pop"
   ]
 
   # Compute stats weighted average by groups
-  df <- df[, lapply(.SD,
-                      stats::weighted.mean,
-                      w = reporting_pop,
-                      na.rm = TRUE),
-             by = byvar,
-             .SDcols = weighted_cols
+  df <- df[,
+    lapply(.SD, stats::weighted.mean, w = reporting_pop, na.rm = TRUE),
+    by = byvar,
+    .SDcols = weighted_cols
   ]
 
   # Combine results
@@ -189,108 +179,118 @@ pip_aggregate <- function(df, by = NULL, return_cols) {
 
   ## Add region code and name -----
   if (is.null(by)) {
-
     df$region_code <- "CUSTOM"
     df$region_name <- "CUSTOM"
-
   } else {
-    data.table::setnames(df,
-                         c(by_name, by_code),
-                         c("region_name", "region_code"))
+    data.table::setnames(
+      df,
+      c(by_name, by_code),
+      c("region_name", "region_code")
+    )
   }
 
   # Compute population living in poverty
   df <- df[, pop_in_poverty := round(headcount * reporting_pop, 0)]
 
   return(df)
-
 }
 
 #' Aggregate by predefined groups
 #' @param df data.frame: Response from `fg_pip_old()` or `rg_pip()`.
 #' @param country character: Selected countries / regions
+#' @param group_by character: Grouping variable (default is "wb")
 #' @param return_cols list: lkup$return_cols$pip_grp object. Controls returned
 #' columns
 #' @keywords internal
-pip_aggregate_by <- function(df,
-                             country = "ALL",
-                             return_cols = NULL) {
-
+pip_aggregate_by <- function(
+  df,
+  country = "ALL",
+  group_by = "wb",
+  return_cols = NULL
+) {
   all_cols <- return_cols$cols
   weighted_cols <- return_cols$weighted_average_cols
 
   # Keep only rows necessary for regional aggregates
   df <- filter_for_aggregate_by(df)
 
-  to_keep <- all_cols[!all_cols %in% c("pop_in_poverty",
-                                       "estimate_type")]
+  to_keep <- all_cols[!all_cols %in% c("pop_in_poverty", "estimate_type")]
 
   # df <- df[, .SD, .SDcols = to_keep] # I think we can ommit this part
 
-  # Compute stats weighted average by groups
-  rgn <- df |>
-    fgroup_by(region_name,
-              region_code,
-              reporting_year,
-              poverty_line) |>
-    fselect(c(weighted_cols, "reporting_pop")) |>
-    fmean(w = reporting_pop, stub = FALSE)
-
-  # World aggregation
-  if (any(c("ALL", "WLD") %in% country)) {
+  # Determine grouping variables based on group_by parameter
+  if (group_by == "wb") {
+    # Default case: use region_code and region_name
+    group_code <- "region_code"
+    group_name <- "region_name"
+
+    # Compute stats weighted average by groups
+    rgn <- df |>
+      fgroup_by(region_name, region_code, reporting_year, poverty_line) |>
+      fselect(c(weighted_cols, "reporting_pop")) |>
+      fmean(w = reporting_pop, stub = FALSE)
+  } else {
+    # Custom case: construct grouping variables dynamically
+    group_code <- paste0(group_by, "_code")
+    group_name <- group_by
+
+
+    # Compute stats weighted average by groups
+    rgn <- df |>
+      fgroup_by(c(group_name, group_code, "reporting_year", "poverty_line")) |>
+      fselect(c(weighted_cols, "reporting_pop")) |>
+      fmean(w = reporting_pop, stub = FALSE)
+
+    setnames(
+      rgn,
+      old = c(group_name, group_code),
+      new = c("region_name", "region_code")
+    )
+  }
+
+  # World aggregation (only for default "wb" grouping)
+  if (group_by == "wb" && any(c("ALL", "WLD") %in% country)) {
     # Compute world aggregates
     wld <- compute_world_aggregates(rgn = rgn)
   } else {
     wld <- NULL
   }
 
-  # Africas aggregation
-  if (any(c("ALL", "AFE", "AFW") %in% country)) {
-  rgn <- df |>
-    fgroup_by(africa_split,
-              africa_split_code,
-              reporting_year,
-              poverty_line) |>
-    fselect(c(weighted_cols, "reporting_pop")) |>
-    fmean(w = reporting_pop, stub = FALSE) |>
-    fsubset(!is.na(africa_split_code)) |>
-    frename(africa_split_code = region_code,
-            africa_split      = region_name) |>
-    rowbind(rgn, fill = TRUE)
-  }
-  # Vintage aggregation
-  if ("ALL" %in% country & "regionpcn_code" %in% names(df)) {
-  rgn <- df |>
-    fgroup_by(regionpcn,
-              regionpcn_code,
-              reporting_year,
-              poverty_line) |>
-    fselect(c(weighted_cols, "reporting_pop")) |>
-    fmean(w = reporting_pop, stub = FALSE) |>
-    fsubset(!is.na(regionpcn_code)) |>
-    frename(regionpcn_code = region_code,
-            regionpcn      = region_name) |>
-    rowbind(rgn, fill = TRUE)
+  # Africa split aggregation (only for default "wb" grouping)
+  if (group_by == "wb" && any(c("ALL", "AFE", "AFW") %in% country)) {
+    rgn <- df |>
+      fgroup_by(
+        africa_split,
+        africa_split_code,
+        reporting_year,
+        poverty_line
+      ) |>
+      fselect(c(weighted_cols, "reporting_pop")) |>
+      fmean(w = reporting_pop, stub = FALSE) |>
+      fsubset(!is.na(africa_split_code)) |>
+      frename(africa_split_code = region_code, africa_split = region_name) |>
+      rowbind(rgn, fill = TRUE)
   }
 
+  # Remove regionpcn_code aggregation logic (as per requirements)
 
-    if (length(country) == 1) {
-      if (country == "WLD") {
-        # Return only world aggregate
-        wld[, pop_in_poverty := round(headcount * reporting_pop, 0)]
-        return(wld)
-      }
+  if (length(country) == 1) {
+    if (country == "WLD") {
+      # Return only world aggregate
+      wld[, pop_in_poverty := round(headcount * reporting_pop, 0)]
+      return(wld)
     }
+  }
 
   # Combine with other regional aggregates
-    out <- rowbind(rgn, wld, fill = TRUE)
-    out[, pop_in_poverty := round(headcount * reporting_pop, 0)]
+  out <- rowbind(rgn, wld, fill = TRUE)
+  out[, pop_in_poverty := round(headcount * reporting_pop, 0)]
 
-    if ("ALL" %in% country) {
-      return(out)
-    }
+  if ("ALL" %in% country) {
+    return(out)
+  }
 
-    out[region_code %in% country, ]
+  out[region_code %in% country, ]
 }
 
 
@@ -298,15 +298,12 @@ compute_world_aggregates <- function(rgn, cols = NULL) {
   # Compute stats
   # Grouping by poverty line as well since we now have vectorized poverty line values
   wld <- rgn |>
-    fgroup_by(reporting_year,
-              poverty_line) |>
+    fgroup_by(reporting_year, poverty_line) |>
     num_vars() |>
     fmean(w = reporting_pop, stub = FALSE) |>
-    ftransform(region_code = "WLD",
-               region_name = "World")
+    ftransform(region_code = "WLD", region_name = "World")
 
   return(wld)
-
 }
 
 
@@ -319,10 +316,10 @@ filter_for_aggregate_by <- function(df) {
   # If nationally representative survey is available, use it
   # Otherwise, use whatever is available
 
-  df[, check := length(reporting_level),
-      by = c("country_code", "reporting_year", "poverty_line")
-      ][
-        check == 1 | (check > 1 & reporting_level == "national"),
-        ]
-
+  df[,
+    check := length(reporting_level),
+    by = c("country_code", "reporting_year", "poverty_line")
+  ][
+    check == 1 | (check > 1 & reporting_level == "national"),
+  ]
 }
diff --git a/R/pip_grp_logic.R b/R/pip_grp_logic.R
index 5fadf427..c2ba322d 100644
--- a/R/pip_grp_logic.R
+++ b/R/pip_grp_logic.R
@@ -3,27 +3,28 @@
 #'
 #' @return data.table
 #' @export
-pip_grp_logic <- function(country         = "ALL",
-                          year            = "ALL",
-                          povline         = 1.9,
-                          group_by        = c("wb", "none"),
-                          welfare_type    = c("all", "consumption", "income"),
-                          reporting_level = c("all", "national"),
-                          lkup,
-                          censor          = TRUE,
-                          lkup_hash       = lkup$cache_data_id$hash_pip_grp,
-                          additional_ind  = FALSE) {
+pip_grp_logic <- function(
+  country = "ALL",
+  year = "ALL",
+  povline = 1.9,
+  group_by = "wb",
+  welfare_type = c("all", "consumption", "income"),
+  reporting_level = c("all", "national"),
+  lkup,
+  censor = TRUE,
+  lkup_hash = lkup$cache_data_id$hash_pip_grp,
+  additional_ind = FALSE
+) {
   #   ________________________________________________________________________
   #   STEP 1: Set up                                                      ####
 
-  welfare_type    <- match.arg(welfare_type)
+  welfare_type <- match.arg(welfare_type)
   reporting_level <- match.arg(reporting_level)
-  group_by        <- match.arg(group_by)
 
   # Custom aggregations only supported at the national level
   # subgroups aggregations only supported for "all" countries
   country <- toupper(country)
-  year    <- toupper(year)
+  year <- toupper(year)
   if (group_by != "none") {
     reporting_level <- "all"
     if (!all(country %in% c("ALL", lkup$query_controls$region$values))) {
@@ -38,54 +39,56 @@ pip_grp_logic <- function(country         = "ALL",
 
   lcv <- # List with countries vectors
     create_countries_vctr(
-      country         =  country,
-      year            =  year,
-      lkup            = lkup
+      country = country,
+      year = year,
+      lkup = lkup
     )
 
   # use the same names as before to avoid inconsistencies
   alt_agg <- lcv$user_alt_agg
   gt_code <- lcv$user_alt_gt_code
-  cl      <- lkup$aux_files$country_list
+  cl <- lkup$aux_files$country_list
 
   # STEP 3: Start pip_grp_logic algorithm ----
   ## STEP 3.1: Official regions only selection ----
   ## This will trigger an early return as no additional imputations are needed
-  if (all(lcv$off_alt_agg == "off")) { # Users only request the official regions
+  if (all(lcv$off_alt_agg == "off")) {
+    # Users only request the official regions
     ### Early return -----------
-    res <- pip_grp(country         =  country,
-                   year            =  year,
-                   povline         =  povline,
-                   group_by        =  "wb",
-                   welfare_type    =  welfare_type,
-                   reporting_level =  reporting_level,
-                   lkup            =  lkup,
-                   censor          =  censor)
+    res <- pip_grp(
+      country = country,
+      year = year,
+      povline = povline,
+      group_by = "wb",
+      welfare_type = welfare_type,
+      reporting_level = reporting_level,
+      lkup = lkup,
+      censor = censor
+    )
     return(res)
-
   } else {
-
     ## STEP 3.2: Compute fg_pip for ALL required countries ----
     ## This will then be re-used in various part of the function
     ## This is to avoid re-computing and re-loading the same data over and over
     fg_pip_master <- fg_pip_old(
-      country         = c(lcv$md_off_reg, lcv$user_alt_agg, lcv$user_off_reg),
-      year            = year,
-      povline         = povline,
-      popshare        = NULL,
-      welfare_type    = welfare_type,
+      country = c(lcv$md_off_reg, lcv$user_alt_agg, lcv$user_off_reg),
+      year = year,
+      povline = povline,
+      popshare = NULL,
+      welfare_type = welfare_type,
       reporting_level = reporting_level,
-      ppp             = NULL,
-      lkup            = lkup
-      )
+      ppp = NULL,
+      lkup = lkup
+    )
     # For now just rowbinding two dataframes, but we would need to use it more smartly in the future
-    fg_pip_master <- rowbind(fg_pip_master,
-                           # THis should not be necessary
-                          fill = TRUE) |>
+    fg_pip_master <- rowbind(
+      fg_pip_master,
+      # THis should not be necessary
+      fill = TRUE
+    ) |>
       setDT()
 
-    fg_pip_master[is.na(mean),
-                  mean := predicted_mean_ppp]
+    fg_pip_master[is.na(mean), mean := predicted_mean_ppp]
 
     add_vars_out_of_pipeline(fg_pip_master, fill_gaps = TRUE, lkup = lkup)
 
@@ -100,14 +103,16 @@ pip_grp_logic <- function(country         = "ALL",
         #         reporting_level =  reporting_level,
         #         lkup            =  lkup,
         #         censor          =  censor)
-        pip_grp_helper(lcv_country     = lcv$ctr_off_reg,
-                       country         = country,
-                       year            = year,
-                       povline         = povline,
-                       reporting_level = reporting_level,
-                       censor          = FALSE,
-                       fg_pip          = fg_pip_master,
-                       lkup            = lkup)
+        pip_grp_helper(
+          lcv_country = lcv$ctr_off_reg,
+          country = country,
+          year = year,
+          povline = povline,
+          reporting_level = reporting_level,
+          censor = FALSE,
+          fg_pip = fg_pip_master,
+          lkup = lkup
+        )
     } else {
       ### STEP 3.2.2 Alternate aggregates only ----
       ### Prepare necessary variables
@@ -129,20 +134,20 @@ pip_grp_logic <- function(country         = "ALL",
   ## countries, we estimate official region estimates for such countries
 
   if (lcv$grp_use %in% c("append", "not")) {
-
-    grp <- pip_grp_helper(lcv_country         = lcv$md_off_reg,
-                          country             = country,
-                          year                = lcv$md_year,
-                          povline             = povline,
-                          reporting_level     = reporting_level,
-                          censor              = FALSE,
-                          fg_pip              = fg_pip_master,
-                          lkup                = lkup)
+    grp <- pip_grp_helper(
+      lcv_country = lcv$md_off_reg,
+      country = country,
+      year = lcv$md_year,
+      povline = povline,
+      reporting_level = reporting_level,
+      censor = FALSE,
+      fg_pip = fg_pip_master,
+      lkup = lkup
+    )
 
     if (lcv$grp_use == "append") {
       grp <- data.table::rbindlist(list(off_ret, grp))
     }
-
   } else {
     # If previous estimations are enough, we don't need to do any estimation.
     grp <- data.table::copy(off_ret)
@@ -152,8 +157,8 @@ pip_grp_logic <- function(country         = "ALL",
 
   ### Prepare grp to be merge with pop_md
   grp[,
-      c("reporting_pop", "pop_in_poverty") := NULL]
-
+    c("reporting_pop", "pop_in_poverty") := NULL
+  ]
 
   ### Merge population with Missing data table ---------
 
@@ -164,17 +169,14 @@ pip_grp_logic <- function(country         = "ALL",
   # This merge will remove those countries for which there is no official
   # aggregate because of lack of coverage in the region. Eg. There is not data
   # for SAS in 2000, so for countries like AFG 2000 we can't input estimates
-  md_grp <- merge(pop_md, grp,
-                  by = c("region_code", "reporting_year"))
+  md_grp <- merge(pop_md, grp, by = c("region_code", "reporting_year"))
 
   ### Merge other region codes -----------
   md_grp[,
-         region_code := NULL]
-
-  md_grp <- merge(md_grp, cl,
-                  by = "country_code",
-                  all.x = TRUE)
+    region_code := NULL
+  ]
 
+  md_grp <- merge(md_grp, cl, by = "country_code", all.x = TRUE)
 
   ## Fill gaps estimates with countries with Survey  -----
   fg <- fg_pip_master[fg_pip_master$country_code %chin% lcv$est_ctrs, ]
@@ -183,29 +185,32 @@ pip_grp_logic <- function(country         = "ALL",
     fg <- fg[fg[["reporting_year"]] %in% as.numeric(year), ]
   }
 
-
   l_fg <- vector(mode = "list", length = length(gt_code))
 
   ### Split fg estimates by grouping type ============
   for (i in seq_along(gt_code)) {
-    gt_var    <- gt_code[i]
+    gt_var <- gt_code[i]
     filter_fg <-
       paste0(gt_var, " %in% alt_agg") |>
-      {\(.) parse(text = .) }()
+      {
+        \(.) parse(text = .)
+      }()
 
     # Filter both datasets
-    fdt        <- fg[eval(filter_fg)]
-    mdt        <- md_grp[eval(filter_fg)]
+    fdt <- fg[eval(filter_fg)]
+    mdt <- md_grp[eval(filter_fg)]
 
     # Find common variables
     common_vars <- intersect(names(fdt), names(mdt))
-    fdt         <- fdt[, .SD, .SDcols = common_vars]
-    mdt         <- mdt[, .SD, .SDcols = common_vars]
+    fdt <- fdt[, .SD, .SDcols = common_vars]
+    mdt <- mdt[, .SD, .SDcols = common_vars]
 
     ## Append with countries with missing data -----
-    l_fg[[i]] <- data.table::rbindlist(list(fdt, mdt),
-                                       use.names = TRUE,
-                                       fill = TRUE)
+    l_fg[[i]] <- data.table::rbindlist(
+      list(fdt, mdt),
+      use.names = TRUE,
+      fill = TRUE
+    )
   }
 
   # Estimate poverty for aggregates
@@ -218,10 +223,11 @@ pip_grp_logic <- function(country         = "ALL",
     x <- l_fg[[i]]
     y <- gt_code[i]
 
-    ld[[i]] <- pip_aggregate(df = x,
-                             by = y,
-                             return_cols = lkup$return_cols$pip_grp)
-
+    ld[[i]] <- pip_aggregate(
+      df = x,
+      by = y,
+      return_cols = lkup$return_cols$pip_grp
+    )
   }
   de <- data.table::rbindlist(ld, use.names = TRUE)
   rm(ld)
@@ -232,15 +238,19 @@ pip_grp_logic <- function(country         = "ALL",
   # Append official regions with Alt aggregates ---------
 
   if (!is.null(off_ret)) {
-    ret <- data.table::rbindlist(list(de, off_ret),
-                                 use.names = TRUE,
-                                 fill = TRUE)
-
+    ret <- data.table::rbindlist(
+      list(de, off_ret),
+      use.names = TRUE,
+      fill = TRUE
+    )
   } else {
     ret <- de
   }
   # add new estimate type
+
   ret <- estimate_type_var(ret,lkup)
+
+
   # Censor regional values ----------- We are not censoring at this stage
   # anymore because we need to show al the years in the homre page, including
   # nowcast. we are now filtering at the UI and wrappers levels
@@ -256,24 +266,28 @@ pip_grp_logic <- function(country         = "ALL",
 
   #Order rows by country code and reporting year
   setorder(ret, region_code , reporting_year)
+
+
   #   ____________________________________________________________________
   #   Return                                                         ####
   return(ret)
 }
 
 
-pip_grp_helper <- function(lcv_country,
-                           country,
-                           year,
-                           povline,
-                           reporting_level,
-                           censor,
-                           fg_pip,
-                           group_by = "wb",
-                           lkup){
-
+pip_grp_helper <- function(
+  lcv_country,
+  country,
+  year,
+  povline,
+  reporting_level,
+  censor,
+  fg_pip,
+  group_by = "wb",
+  lkup
+) {
   # Filter countries
-  keep_countries <- fg_pip[["country_code"]] %chin% lcv_country |
+  keep_countries <- fg_pip[["country_code"]] %chin%
+    lcv_country |
     fg_pip[["wb_region_code"]] %chin% lcv_country
   out <- fg_pip[keep_countries, ]
   # Filter years
@@ -287,8 +301,10 @@ pip_grp_helper <- function(lcv_country,
   }
   # Handles aggregated distributions
   if (reporting_level %in% c("national", "all")) {
-    out <- add_agg_stats(out,
-                         return_cols = lkup$return_cols$ag_average_poverty_stats)
+    out <- add_agg_stats(
+      out,
+      return_cols = lkup$return_cols$ag_average_poverty_stats
+    )
   }
 
   # Handle potential (insignificant) difference in poverty_line values that
@@ -301,10 +317,10 @@ pip_grp_helper <- function(lcv_country,
 
   # Handle aggregations with sub-groups
   if (group_by != "none") {
-
     out <- pip_aggregate_by(
       df = out,
       country = country,
+      group_by = group_by,
       return_cols = lkup$return_cols$pip_grp
     )
 
@@ -313,14 +329,11 @@ pip_grp_helper <- function(lcv_country,
     #   out <- censor_rows(out, lkup[["censored"]], type = "regions")
     # }
 
-    out <- estimate_type_var(out,lkup)
-
-
+    out <- estimate_type_var(out, lkup)
   } else {
     # Handle simple aggregation
-    out <- pip_aggregate(df = out,
-                         return_cols = lkup$return_cols$pip_grp)
-    out <- estimate_type_var(out,lkup)
+    out <- pip_aggregate(df = out, return_cols = lkup$return_cols$pip_grp)
+    out <- estimate_type_var(out, lkup)
   }
 
   keep <- lkup$return_cols$pip_grp$cols
diff --git a/R/pip_grp_new.R b/R/pip_grp_new.R
index bf45c3e0..2ca2f7c1 100644
--- a/R/pip_grp_new.R
+++ b/R/pip_grp_new.R
@@ -1,22 +1,24 @@
 #' New way to estimate Aggregate data
 #' @rdname pip_agg
-pip_grp_new <- \(country         = "ALL",
-                 year            = "ALL",
-                 povline         = 1.9,
-                 welfare_type    = c("all", "consumption", "income"),
-                 reporting_level = c("all", "national"),
-                 lkup,
-                 censor          = TRUE,
-                 additional_ind  = FALSE,
-                 lkup_hash       = lkup$cache_data_id$hash_pip_grp) {
-
-  welfare_type    <- match.arg(welfare_type)
+pip_grp_new <- \(
+  country = "ALL",
+  year = "ALL",
+  povline = 1.9,
+  welfare_type = c("all", "consumption", "income"),
+  reporting_level = c("all", "national"),
+  group_by = "wb",
+  lkup,
+  censor = TRUE,
+  additional_ind = FALSE,
+  lkup_hash = lkup$cache_data_id$hash_pip_grp
+) {
+  welfare_type <- match.arg(welfare_type)
   reporting_level <- match.arg(reporting_level)
 
   # Custom aggregations only supported at the national level
   # subgroups aggregations only supported for "all" countries
   country <- toupper(country)
-  year    <- toupper(year)
+  year <- toupper(year)
 
   if (!all(country %in% c("ALL", lkup$query_controls$region$values))) {
     country <- "ALL"
@@ -24,30 +26,34 @@ pip_grp_new <- \(country         = "ALL",
 
   # Select countries to estimate poverty
   cts <- copy(lkup$aux_files$country_list)
-  country_code  <-  if (!"ALL" %in% country) {
+  country_code <- if (!"ALL" %in% country) {
     get_country_code_subset(dt = cts, country = country)
   } else {
     "ALL"
   }
 
   out <- fg_pip(
-    country         = country_code,
-    year            = year,
-    povline         = povline,
-    popshare        = NULL,
-    welfare_type    = welfare_type,
+    country = country_code,
+    year = year,
+    povline = povline,
+    popshare = NULL,
+    welfare_type = welfare_type,
     reporting_level = reporting_level,
-    ppp             = NULL,
-    lkup           = lkup)
+    ppp = NULL,
+    lkup = lkup
+  )
 
   cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb")
   if (!file.exists(cache_file_path)) {
     # Create an empty duckdb file
     create_duckdb_file(cache_file_path)
   }
-  out <- treat_cache_and_main(out,
-                              cache_file_path = cache_file_path,
-                              lkup = lkup, fill_gaps = TRUE)
+  out <- treat_cache_and_main(
+    out,
+    cache_file_path = cache_file_path,
+    lkup = lkup,
+    fill_gaps = TRUE
+  )
 
   # return empty dataframe if no metadata is found
   if (nrow(out) == 0) {
@@ -56,8 +62,10 @@ pip_grp_new <- \(country         = "ALL",
 
   # Handles aggregated distributions (like CHN and IND)
   if (tolower(reporting_level) %in% c("national", "all")) {
-    out <- add_agg_stats(out,
-                         return_cols = lkup$return_cols$ag_average_poverty_stats)
+    out <- add_agg_stats(
+      out,
+      return_cols = lkup$return_cols$ag_average_poverty_stats
+    )
   }
 
   add_vars_out_of_pipeline(out, fill_gaps = TRUE, lkup = lkup)
@@ -70,20 +78,21 @@ pip_grp_new <- \(country         = "ALL",
 
   # Handle aggregations with sub-groups
 
-    out <- pip_aggregate_by(
-      df = out,
-      country = country,
-      return_cols = lkup$return_cols$pip_grp
-    )
+  out <- pip_aggregate_by(
+    df = out,
+    country = country,
+    group_by = group_by,
+    return_cols = lkup$return_cols$pip_grp
+  )
 
-    out <- estimate_type_var(out,lkup)
+  out <- estimate_type_var(out, lkup)
 
-    # Censor regional values
-    if (censor) {
-      out <- censor_rows(out, lkup[["censored"]], type = "regions")
-    }
+  # Censor regional values
+  if (censor) {
+    out <- censor_rows(out, lkup[["censored"]], type = "regions")
+  }
 
-    out
+  out
 }
 
 #' Subset country_code values based on matches in *_code columns and country_code
@@ -125,7 +134,8 @@ get_country_code_subset <- function(dt, country) {
   if (any(!matched)) {
     cli::cli_abort(
       "The following values in {.arg country} were not found in any *_code column or country_code:
-      {country[!matched]}")
+      {country[!matched]}"
+    )
   }
   funique(result)
 }
diff --git a/R/pip_new_lineups.R b/R/pip_new_lineups.R
index a7c51b3e..14edde52 100644
--- a/R/pip_new_lineups.R
+++ b/R/pip_new_lineups.R
@@ -1,4 +1,3 @@
-
 #' Compute PIP statistics
 #'
 #' Compute the main PIP poverty and inequality statistics.
@@ -54,27 +53,26 @@
 #'     lkup = lkups)
 #' }
 #' @export
-pip_new_lineups <- function(country         = "ALL",
-                            year            = "ALL",
-                            povline         = 1.9,
-                            popshare        = NULL,
-                            fill_gaps       = FALSE,
-                            group_by        = c("none", "wb"),
-                            welfare_type    = c("all", "consumption", "income"),
-                            reporting_level = c("all", "national", "rural", "urban"),
-                            ppp             = NULL,
-                            lkup,
-                            censor          = TRUE,
-                            lkup_hash       = lkup$cache_data_id$hash_pip,
-                            additional_ind  = FALSE) {
-
-
+pip_new_lineups <- function(
+  country = "ALL",
+  year = "ALL",
+  povline = 1.9,
+  popshare = NULL,
+  fill_gaps = FALSE,
+  group_by = c("none", "wb"),
+  welfare_type = c("all", "consumption", "income"),
+  reporting_level = c("all", "national", "rural", "urban"),
+  ppp = NULL,
+  lkup,
+  censor = TRUE,
+  lkup_hash = lkup$cache_data_id$hash_pip,
+  additional_ind = FALSE
+) {
   # set up -------------
-  welfare_type    <- match.arg(welfare_type)
+  welfare_type <- match.arg(welfare_type)
   reporting_level <- match.arg(reporting_level)
-  group_by        <- match.arg(group_by)
-  povline         <- round(povline, digits = 3)
-
+  group_by <- match.arg(group_by)
+  povline <- round(povline, digits = 3)
 
   # TEMPORARY UNTIL SELECTION MECHANISM IS BEING IMPROVED
   country <- toupper(country)
@@ -83,25 +81,27 @@ pip_new_lineups <- function(country         = "ALL",
   }
 
   # If svy_lkup is not part of lkup throw an error.
-  if (!all(c('svy_lkup') %in% names(lkup)))
-    stop("You are probably passing more than one dataset as lkup argument.
-  Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD")
-
+  if (!all(c('svy_lkup') %in% names(lkup))) {
+    stop(
+      "You are probably passing more than one dataset as lkup argument.
+  Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD"
+    )
+  }
 
   # **** TO BE REMOVED **** REMOVAL STARTS HERE
   # Once `pip-grp` has been integrated in ingestion pipeline
   # Forces fill_gaps to TRUE when using group_by option
   if (group_by != "none") {
     fill_gaps <- TRUE
-    message("Info: argument group_by in pip() is deprecated; please use pip_grp() instead.")
+    message(
+      "Info: argument group_by in pip() is deprecated; please use pip_grp() instead."
+    )
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE
 
   # Countries vector ------------
   validate_country_codes(country = country, lkup = lkup)
 
-
-
   # lcv$est_ctrs has all the country_code that we are interested in
 
   cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb")
@@ -113,43 +113,49 @@ pip_new_lineups <- function(country         = "ALL",
   if (fill_gaps) {
     ## lineup years-----------------
     out <- fg_pip(
-      country            = country,
-      year               = year,
-      povline            = povline,
-      popshare           = popshare,
-      welfare_type       = welfare_type,
-      reporting_level    = reporting_level,
-      ppp                = ppp,
-      lkup               = lkup
+      country = country,
+      year = year,
+      povline = povline,
+      popshare = popshare,
+      welfare_type = welfare_type,
+      reporting_level = reporting_level,
+      ppp = ppp,
+      lkup = lkup
     )
   } else {
     ## survey years ------------------
     out <- rg_pip(
-      country         = country,
-      year            = year,
-      povline         = povline,
-      popshare        = popshare,
-      welfare_type    = welfare_type,
+      country = country,
+      year = year,
+      povline = povline,
+      popshare = popshare,
+      welfare_type = welfare_type,
       reporting_level = reporting_level,
-      ppp             = ppp,
-      lkup            = lkup
+      ppp = ppp,
+      lkup = lkup
     )
   }
 
   # Cache new data
   #---------------------------------------------
-  out <- treat_cache_and_main(out,
-                              cache_file_path = cache_file_path,
-                              lkup = lkup, fill_gaps = fill_gaps)
+  out <- treat_cache_and_main(
+    out,
+    cache_file_path = cache_file_path,
+    lkup = lkup,
+    fill_gaps = fill_gaps
+  )
 
   # Early return for empty table---------------
-  if (nrow(out) == 0) return(pipapi::empty_response)
+  if (nrow(out) == 0) {
+    return(pipapi::empty_response)
+  }
 
   # aggregate distributions ------------------
   if (reporting_level %in% c("national", "all")) {
     out <- add_agg_stats(
-      df          = out,
-      return_cols = lkup$return_cols$ag_average_poverty_stats)
+      df = out,
+      return_cols = lkup$return_cols$ag_average_poverty_stats
+    )
     if (reporting_level == "national") {
       out <- out[reporting_level == "national"]
     }
@@ -157,9 +163,7 @@ pip_new_lineups <- function(country         = "ALL",
 
   # Add out of pipeline variable
   #---------------------------------------------
-  add_vars_out_of_pipeline(out,
-                           fill_gaps = fill_gaps,
-                           lkup      = lkup)
+  add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup)
 
   # **** TO BE REMOVED **** REMOVAL STARTS HERE
   # Once `pip-grp` has been integrated in ingestion pipeline
@@ -170,63 +174,64 @@ pip_new_lineups <- function(country         = "ALL",
     out$poverty_line <- povline
 
     out <- pip_aggregate_by(
-      df          = out,
-      return_cols = lkup$return_cols$pip_grp)
+      df = out,
+      group_by = group_by,
+      return_cols = lkup$return_cols$pip_grp
+    )
 
     # Censor regional values
     if (censor) {
-      out <- censor_rows(out,
-                         lkup[["censored"]],
-                         type = "regions")
+      out <- censor_rows(out, lkup[["censored"]], type = "regions")
     }
 
-    out <- out[, c("region_name",
-                   "region_code",
-                   "reporting_year",
-                   "reporting_pop",
-                   "poverty_line",
-                   "headcount",
-                   "poverty_gap",
-                   "poverty_severity",
-                   "watts",
-                   "mean",
-                   "pop_in_poverty")]
+    out <- out[, c(
+      "region_name",
+      "region_code",
+      "reporting_year",
+      "reporting_pop",
+      "poverty_line",
+      "headcount",
+      "poverty_gap",
+      "poverty_severity",
+      "watts",
+      "mean",
+      "pop_in_poverty"
+    )]
 
     return(out)
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE
 
-
   # pre-computed distributional stats ---------------
-  crr_names  <- names(out)    # current variables
+  crr_names <- names(out) # current variables
   names2keep <- lkup$return_cols$pip$cols # all variables
 
   out <- add_dist_stats(
-    df         = out,
-    lkup       = lkup,
-    fill_gaps  = fill_gaps)
+    df = out,
+    lkup = lkup,
+    fill_gaps = fill_gaps
+  )
 
   # Add aggregate medians ----------------
   out <- add_agg_medians(
-    df        = out,
+    df = out,
     fill_gaps = fill_gaps,
-    data_dir  = lkup$data_root
+    data_dir = lkup$data_root
   )
 
   # format ----------------
 
-
   if (fill_gaps) {
     # ZP temp NA lineups:
     #---------------------
     # ## Inequality indicators to NA for lineup years ----
-    dist_vars  <- names2keep[!(names2keep %in% crr_names)]
+    dist_vars <- names2keep[!(names2keep %in% crr_names)]
     out[,
-        (dist_vars) := NA_real_]
+      (dist_vars) := NA_real_
+    ]
 
     ## estimate_var -----
     out <- estimate_type_ctr_lnp(out, lkup)
-
   } else {
     out[, estimate_type := NA_character_]
   }
@@ -242,28 +247,30 @@ pip_new_lineups <- function(country         = "ALL",
     out <- censor_rows(out, lkup[["censored"]], type = "countries")
   }
 
-
   # Select columns
   if (additional_ind) {
     get_additional_indicators(out)
     added_names <- attr(out, "new_indicators_names")
-    names2keep  <- c(names2keep, added_names)
-
+    names2keep <- c(names2keep, added_names)
   }
   # Keep relevant variables
-  out  <- out[, .SD, .SDcols = names2keep]
-
+  out <- out[, .SD, .SDcols = names2keep]
 
   # make sure we always report the same precision in all numeric variables
   doub_vars <-
     names(out)[unlist(lapply(out, is.double))] |>
     data.table::copy()
 
-  out[, (doub_vars) := lapply(.SD, round, digits = 12),
-      .SDcols = doub_vars]
+  out[, (doub_vars) := lapply(.SD, round, digits = 12), .SDcols = doub_vars]
 
   # Order rows by country code and reporting year
-  data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type)
+  data.table::setorder(
+    out,
+    country_code,
+    reporting_year,
+    reporting_level,
+    welfare_type
+  )
   #}
 
   # Make sure no duplicate remains
@@ -273,39 +280,35 @@ pip_new_lineups <- function(country         = "ALL",
 }
 
 
-
-treat_cache_and_main <- \(out, cache_file_path,
-                          lkup, fill_gaps) {
-
-    # early return of cache data if not available.
+treat_cache_and_main <- \(out, cache_file_path, lkup, fill_gaps) {
+  # early return of cache data if not available.
   cached_data <-
     if (is.null(out$data_in_cache)) {
-    NULL
-  } else if (is.data.frame(out$data_in_cache)) {
-
-    if (fnrow(out$data_in_cache) == 0) {
       NULL
-    } else {
-      ft <- qDT(out$data_in_cache)
-      if (fill_gaps) {
-        ft <-
-          fg_remove_duplicates(ft,
-                               use_new_lineup_version = lkup$use_new_lineup_version)
-
+    } else if (is.data.frame(out$data_in_cache)) {
+      if (fnrow(out$data_in_cache) == 0) {
+        NULL
+      } else {
+        ft <- qDT(out$data_in_cache)
+        if (fill_gaps) {
+          ft <-
+            fg_remove_duplicates(
+              ft,
+              use_new_lineup_version = lkup$use_new_lineup_version
+            )
+        }
+
+        # Add just mean and median
+        get_mean_median(ft, lkup, fill_gaps = fill_gaps)
       }
-
-      # Add just mean and median
-      get_mean_median(ft, lkup, fill_gaps = fill_gaps)
-
-    }
-  } else {
-    cli::cli_abort(
-      "{.code out$data_in_cache} must be NULL or data.frame not
+    } else {
+      cli::cli_abort(
+        "{.code out$data_in_cache} must be NULL or data.frame not
       {.field {class(out$data_in_cache)}}"
-    )
-  }
+      )
+    }
 
-  main_data   <- qDT(out$main_data)
+  main_data <- qDT(out$main_data)
 
   if (nrow(main_data) > 0) {
     if (is.null(cached_data)) {
@@ -317,17 +320,14 @@ treat_cache_and_main <- \(out, cache_file_path,
 
     update_master_file(main_data, cache_file_path, fill_gaps)
     rm(main_data)
-
   } else {
     out <- cached_data
   }
 
-
   setDT(out)
 }
 
 
-
 validate_country_codes <- \(country, lkup) {
   cls <- lkup$aux_files$country_list$country_code |>
     unique() |>
@@ -335,7 +335,9 @@ validate_country_codes <- \(country, lkup) {
 
   if (any(!country %in% cls)) {
     wcls <- which(!country %in% cls)
-    cli::cli_abort("{.field {country[wcls]}} {?is/are} not {?a/} valid country code{?s}")
+    cli::cli_abort(
+      "{.field {country[wcls]}} {?is/are} not {?a/} valid country code{?s}"
+    )
   }
   invisible(TRUE)
 }
diff --git a/R/pip_old.R b/R/pip_old.R
index e0194c20..a36e239e 100644
--- a/R/pip_old.R
+++ b/R/pip_old.R
@@ -54,25 +54,25 @@
 #' }
 #' @export
 #'
-pip_old <- function(country         = "ALL",
-                    year            = "ALL",
-                    povline         = 1.9,
-                    popshare        = NULL,
-                    fill_gaps       = FALSE,
-                    group_by        = c("none", "wb"),
-                    welfare_type    = c("all", "consumption", "income"),
-                    reporting_level = c("all", "national", "rural", "urban"),
-                    ppp             = NULL,
-                    lkup,
-                    censor          = TRUE,
-                    lkup_hash       = lkup$cache_data_id$hash_pip,
-                    additional_ind  = FALSE) {
-
-
+pip_old <- function(
+  country = "ALL",
+  year = "ALL",
+  povline = 1.9,
+  popshare = NULL,
+  fill_gaps = FALSE,
+  group_by = c("none", "wb"),
+  welfare_type = c("all", "consumption", "income"),
+  reporting_level = c("all", "national", "rural", "urban"),
+  ppp = NULL,
+  lkup,
+  censor = TRUE,
+  lkup_hash = lkup$cache_data_id$hash_pip,
+  additional_ind = FALSE
+) {
   # set up -------------
-  welfare_type    <- match.arg(welfare_type)
+  welfare_type <- match.arg(welfare_type)
   reporting_level <- match.arg(reporting_level)
-  group_by        <- match.arg(group_by)
+  group_by <- match.arg(group_by)
 
   # TEMPORARY UNTIL SELECTION MECHANISM IS BEING IMPROVED
   country <- toupper(country)
@@ -81,26 +81,30 @@ pip_old <- function(country         = "ALL",
   }
 
   # If svy_lkup is not part of lkup throw an error.
-  if (!all(c('svy_lkup') %in% names(lkup)))
-    stop("You are probably passing more than one dataset as lkup argument.
-  Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD")
-
+  if (!all(c('svy_lkup') %in% names(lkup))) {
+    stop(
+      "You are probably passing more than one dataset as lkup argument.
+  Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD"
+    )
+  }
 
   # **** TO BE REMOVED **** REMOVAL STARTS HERE
   # Once `pip-grp` has been integrated in ingestion pipeline
   # Forces fill_gaps to TRUE when using group_by option
   if (group_by != "none") {
     fill_gaps <- TRUE
-    message("Info: argument group_by in pip() is deprecated; please use pip_grp() instead.")
+    message(
+      "Info: argument group_by in pip() is deprecated; please use pip_grp() instead."
+    )
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE
 
   # Countries vector ------------
   lcv <- # List with countries vectors
     create_countries_vctr(
-      country         =  country,
-      year            =  year,
-      lkup            = lkup
+      country = country,
+      year = year,
+      lkup = lkup
     )
   # lcv$est_ctrs has all the country_code that we are interested in
 
@@ -113,26 +117,26 @@ pip_old <- function(country         = "ALL",
   if (fill_gaps) {
     ## lineup years-----------------
     out <- fg_pip_old(
-      country            = lcv$est_ctrs,
-      year               = year,
-      povline            = povline,
-      popshare           = popshare,
-      welfare_type       = welfare_type,
-      reporting_level    = reporting_level,
-      ppp                = ppp,
-      lkup               = lkup
+      country = lcv$est_ctrs,
+      year = year,
+      povline = povline,
+      popshare = popshare,
+      welfare_type = welfare_type,
+      reporting_level = reporting_level,
+      ppp = ppp,
+      lkup = lkup
     )
   } else {
     ## survey years ------------------
     out <- rg_pip_old(
-      country         = lcv$est_ctrs,
-      year            = year,
-      povline         = povline,
-      popshare        = popshare,
-      welfare_type    = welfare_type,
+      country = lcv$est_ctrs,
+      year = year,
+      povline = povline,
+      popshare = popshare,
+      welfare_type = welfare_type,
       reporting_level = reporting_level,
-      ppp             = ppp,
-      lkup            = lkup
+      ppp = ppp,
+      lkup = lkup
     )
   }
 
@@ -155,7 +159,9 @@ pip_old <- function(country         = "ALL",
     setDT(out)
   }
   # Early return for empty table---------------
-  if (nrow(out) == 0) return(pipapi::empty_response)
+  if (nrow(out) == 0) {
+    return(pipapi::empty_response)
+  }
 
   # aggregate distributions ------------------
   if (reporting_level %in% c("national", "all")) {
@@ -168,8 +174,6 @@ pip_old <- function(country         = "ALL",
     }
   }
 
-
-
   add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup)
 
   # **** TO BE REMOVED **** REMOVAL STARTS HERE
@@ -181,7 +185,8 @@ pip_old <- function(country         = "ALL",
     out$poverty_line <- povline
 
     out <- pip_aggregate_by(
-      df          = out,
+      df = out,
+      group_by = group_by,
       return_cols = lkup$return_cols$pip_grp
     )
     # Censor regional values
@@ -189,25 +194,26 @@ pip_old <- function(country         = "ALL",
       out <- censor_rows(out, lkup[["censored"]], type = "regions")
     }
 
-    out <- out[, c("region_name",
-                   "region_code",
-                   "reporting_year",
-                   "reporting_pop",
-                   "poverty_line",
-                   "headcount",
-                   "poverty_gap",
-                   "poverty_severity",
-                   "watts",
-                   "mean",
-                   "pop_in_poverty")]
+    out <- out[, c(
+      "region_name",
+      "region_code",
+      "reporting_year",
+      "reporting_pop",
+      "poverty_line",
+      "headcount",
+      "poverty_gap",
+      "poverty_severity",
+      "watts",
+      "mean",
+      "pop_in_poverty"
+    )]
 
     return(out)
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE
 
-
   # pre-computed distributional stats ---------------
-  crr_names  <- names(out)    # current variables
+  crr_names <- names(out) # current variables
   names2keep <- lkup$return_cols$pip$cols # all variables
 
   out <- add_dist_stats_old(
@@ -217,24 +223,22 @@ pip_old <- function(country         = "ALL",
 
   # Add aggregate medians ----------------
   out <- add_agg_medians(
-    df        = out,
+    df = out,
     fill_gaps = fill_gaps,
-    data_dir  = lkup$data_root
+    data_dir = lkup$data_root
   )
 
   # format ----------------
 
-
   if (fill_gaps) {
-
     ## Inequality indicators to NA for lineup years ----
-    dist_vars  <- names2keep[!(names2keep %in% crr_names)]
+    dist_vars <- names2keep[!(names2keep %in% crr_names)]
     out[,
-        (dist_vars) := NA_real_]
+      (dist_vars) := NA_real_
+    ]
 
     ## estimate_var -----
     out <- estimate_type_ctr_lnp(out, lkup)
-
   } else {
     out[, estimate_type := NA_character_]
   }
@@ -249,28 +253,30 @@ pip_old <- function(country         = "ALL",
     out <- censor_rows(out, lkup[["censored"]], type = "countries")
   }
 
-
   # Select columns
   if (additional_ind) {
     get_additional_indicators(out)
     added_names <- attr(out, "new_indicators_names")
-    names2keep  <- c(names2keep, added_names)
-
+    names2keep <- c(names2keep, added_names)
   }
   # Keep relevant variables
-  out  <- out[, .SD, .SDcols = names2keep]
-
+  out <- out[, .SD, .SDcols = names2keep]
 
   # make sure we always report the same precision in all numeric variables
   doub_vars <-
     names(out)[unlist(lapply(out, is.double))] |>
     data.table::copy()
 
-  out[, (doub_vars) := lapply(.SD, round, digits = 12),
-      .SDcols = doub_vars]
+  out[, (doub_vars) := lapply(.SD, round, digits = 12), .SDcols = doub_vars]
 
   # Order rows by country code and reporting year
-  data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type)
+  data.table::setorder(
+    out,
+    country_code,
+    reporting_year,
+    reporting_level,
+    welfare_type
+  )
   #}
 
   # Make sure no duplicate remains
@@ -280,8 +286,6 @@ pip_old <- function(country         = "ALL",
 }
 
 
-
-
 #' Compute survey year stats
 #'
 #' Compute the main PIP poverty and inequality statistics for survey years.
@@ -289,50 +293,54 @@ pip_old <- function(country         = "ALL",
 #' @inheritParams pip
 #' @return data.frame
 #' @keywords internal
-rg_pip_old <- function(country,
-                       year,
-                       povline,
-                       popshare,
-                       welfare_type,
-                       reporting_level,
-                       ppp,
-                       lkup) {
+rg_pip_old <- function(
+  country,
+  year,
+  povline,
+  popshare,
+  welfare_type,
+  reporting_level,
+  ppp,
+  lkup
+) {
   # get values from lkup
   valid_regions <- lkup$query_controls$region$values
-  svy_lkup      <- lkup$svy_lkup
-  data_dir      <- lkup$data_root
+  svy_lkup <- lkup$svy_lkup
+  data_dir <- lkup$data_root
   # povline is set to NULL if popshare is given
-  if (!is.null(popshare)) povline <- NULL
-
+  if (!is.null(popshare)) {
+    povline <- NULL
+  }
 
   cache_file_path <- fs::path(lkup$data_root, 'cache', ext = "duckdb")
 
   metadata <- subset_lkup(
-    country         = country,
-    year            = year,
-    welfare_type    = welfare_type,
+    country = country,
+    year = year,
+    welfare_type = welfare_type,
     reporting_level = reporting_level,
-    lkup            = svy_lkup,
-    valid_regions   = valid_regions,
-    data_dir        = data_dir,
-    povline         = povline,
+    lkup = svy_lkup,
+    valid_regions = valid_regions,
+    data_dir = data_dir,
+    povline = povline,
     cache_file_path = cache_file_path,
-    fill_gaps       = FALSE
+    fill_gaps = FALSE
   )
 
   data_present_in_master <- metadata$data_present_in_master
   metadata <- metadata$lkup
-  povline  <- metadata$povline
+  povline <- metadata$povline
 
   # Remove aggregate distribution if popshare is specified
   # TEMPORARY FIX UNTIL popshare is supported for aggregate distributions
-  metadata <- filter_lkup(metadata = metadata,
-                          popshare = popshare)
+  metadata <- filter_lkup(metadata = metadata, popshare = popshare)
 
   # return empty dataframe if no metadata is found
   if (nrow(metadata) == 0) {
-    return(list(main_data = empty_response,
-                data_in_cache = data_present_in_master))
+    return(list(
+      main_data = empty_response,
+      data_in_cache = data_present_in_master
+    ))
   }
 
   out <- vector(mode = "list", length = nrow(metadata))
@@ -346,16 +354,16 @@ rg_pip_old <- function(country,
       path = tmp_metadata$path
     )
     tmp_stats <- wbpip:::prod_compute_pip_stats(
-      welfare           = svy_data$df0$welfare,
-      povline           = povline,
-      popshare          = popshare,
-      population        = svy_data$df0$weight,
-      requested_mean    = tmp_metadata$survey_mean_ppp,
-      svy_mean_lcu      = tmp_metadata$survey_mean_lcu,
-      svy_median_lcu    = tmp_metadata$survey_median_lcu,
-      svy_median_ppp    = tmp_metadata$survey_median_ppp,
-      default_ppp       = tmp_metadata$ppp,
-      ppp               = ppp,
+      welfare = svy_data$df0$welfare,
+      povline = povline,
+      popshare = popshare,
+      population = svy_data$df0$weight,
+      requested_mean = tmp_metadata$survey_mean_ppp,
+      svy_mean_lcu = tmp_metadata$survey_mean_lcu,
+      svy_median_lcu = tmp_metadata$survey_median_lcu,
+      svy_median_ppp = tmp_metadata$survey_median_ppp,
+      default_ppp = tmp_metadata$ppp,
+      ppp = ppp,
       distribution_type = tmp_metadata$distribution_type
     )
     # Add stats columns to data frame
diff --git a/R/pip_old_lineups.R b/R/pip_old_lineups.R
index 1bb86dea..d684f74c 100644
--- a/R/pip_old_lineups.R
+++ b/R/pip_old_lineups.R
@@ -53,28 +53,26 @@
 #'     lkup = lkups)
 #' }
 #' @export
-pip_old_lineups <- function(country         = "ALL",
-                year            = "ALL",
-                povline         = 1.9,
-                popshare        = NULL,
-                fill_gaps       = FALSE,
-                group_by        = c("none", "wb"),
-                welfare_type    = c("all", "consumption", "income"),
-                reporting_level = c("all", "national", "rural", "urban"),
-                ppp             = NULL,
-                lkup,
-                censor          = TRUE,
-                lkup_hash       = lkup$cache_data_id$hash_pip,
-                additional_ind  = FALSE) {
-
-
+pip_old_lineups <- function(
+  country = "ALL",
+  year = "ALL",
+  povline = 1.9,
+  popshare = NULL,
+  fill_gaps = FALSE,
+  group_by = c("none", "wb"),
+  welfare_type = c("all", "consumption", "income"),
+  reporting_level = c("all", "national", "rural", "urban"),
+  ppp = NULL,
+  lkup,
+  censor = TRUE,
+  lkup_hash = lkup$cache_data_id$hash_pip,
+  additional_ind = FALSE
+) {
   # set up -------------
-  welfare_type    <- match.arg(welfare_type)
+  welfare_type <- match.arg(welfare_type)
   reporting_level <- match.arg(reporting_level)
-  group_by        <- match.arg(group_by)
-  povline         <- round(povline, digits = 3)
-
-
+  group_by <- match.arg(group_by)
+  povline <- round(povline, digits = 3)
 
   # TEMPORARY UNTIL SELECTION MECHANISM IS BEING IMPROVED
   country <- toupper(country)
@@ -83,26 +81,30 @@ pip_old_lineups <- function(country         = "ALL",
   }
 
   # If svy_lkup is not part of lkup throw an error.
-  if (!all(c('svy_lkup') %in% names(lkup)))
-    stop("You are probably passing more than one dataset as lkup argument.
-  Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD")
-
+  if (!all(c('svy_lkup') %in% names(lkup))) {
+    stop(
+      "You are probably passing more than one dataset as lkup argument.
+  Try passing a single one by subsetting it lkup <- lkups$versions_paths$dataset_name_PROD"
+    )
+  }
 
   # **** TO BE REMOVED **** REMOVAL STARTS HERE
   # Once `pip-grp` has been integrated in ingestion pipeline
   # Forces fill_gaps to TRUE when using group_by option
   if (group_by != "none") {
     fill_gaps <- TRUE
-    message("Info: argument group_by in pip() is deprecated; please use pip_grp() instead.")
+    message(
+      "Info: argument group_by in pip() is deprecated; please use pip_grp() instead."
+    )
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE
 
   # Countries vector ------------
   lcv <- # List with countries vectors
     create_countries_vctr(
-      country         =  country,
-      year            =  year,
-      lkup            = lkup
+      country = country,
+      year = year,
+      lkup = lkup
     )
   # lcv$est_ctrs has all the country_code that we are interested in
 
@@ -115,26 +117,26 @@ pip_old_lineups <- function(country         = "ALL",
   if (fill_gaps) {
     ## lineup years-----------------
     out <- fg_pip_old(
-      country            = lcv$est_ctrs,
-      year               = year,
-      povline            = povline,
-      popshare           = popshare,
-      welfare_type       = welfare_type,
-      reporting_level    = reporting_level,
-      ppp                = ppp,
-      lkup               = lkup
+      country = lcv$est_ctrs,
+      year = year,
+      povline = povline,
+      popshare = popshare,
+      welfare_type = welfare_type,
+      reporting_level = reporting_level,
+      ppp = ppp,
+      lkup = lkup
     )
   } else {
     ## survey years ------------------
     out <- rg_pip_old(
-      country         = lcv$est_ctrs,
-      year            = year,
-      povline         = povline,
-      popshare        = popshare,
-      welfare_type    = welfare_type,
+      country = lcv$est_ctrs,
+      year = year,
+      povline = povline,
+      popshare = popshare,
+      welfare_type = welfare_type,
       reporting_level = reporting_level,
-      ppp             = ppp,
-      lkup            = lkup
+      ppp = ppp,
+      lkup = lkup
     )
   }
 
@@ -145,8 +147,7 @@ pip_old_lineups <- function(country         = "ALL",
     out <- main_data |>
       rowbind(cached_data)
 
-      update_master_file(main_data, cache_file_path, fill_gaps)
-
+    update_master_file(main_data, cache_file_path, fill_gaps)
   } else {
     out <- cached_data
   }
@@ -154,7 +155,9 @@ pip_old_lineups <- function(country         = "ALL",
     setDT(out)
   }
   # Early return for empty table---------------
-  if (nrow(out) == 0) return(pipapi::empty_response)
+  if (nrow(out) == 0) {
+    return(pipapi::empty_response)
+  }
 
   # aggregate distributions ------------------
   if (reporting_level %in% c("national", "all")) {
@@ -167,8 +170,6 @@ pip_old_lineups <- function(country         = "ALL",
     }
   }
 
-
-
   add_vars_out_of_pipeline(out, fill_gaps = fill_gaps, lkup = lkup)
 
   # **** TO BE REMOVED **** REMOVAL STARTS HERE
@@ -180,7 +181,8 @@ pip_old_lineups <- function(country         = "ALL",
     out$poverty_line <- povline
 
     out <- pip_aggregate_by(
-      df          = out,
+      df = out,
+      group_by = group_by,
       return_cols = lkup$return_cols$pip_grp
     )
     # Censor regional values
@@ -188,25 +190,26 @@ pip_old_lineups <- function(country         = "ALL",
       out <- censor_rows(out, lkup[["censored"]], type = "regions")
     }
 
-    out <- out[, c("region_name",
-                   "region_code",
-                   "reporting_year",
-                   "reporting_pop",
-                   "poverty_line",
-                   "headcount",
-                   "poverty_gap",
-                   "poverty_severity",
-                   "watts",
-                   "mean",
-                   "pop_in_poverty")]
+    out <- out[, c(
+      "region_name",
+      "region_code",
+      "reporting_year",
+      "reporting_pop",
+      "poverty_line",
+      "headcount",
+      "poverty_gap",
+      "poverty_severity",
+      "watts",
+      "mean",
+      "pop_in_poverty"
+    )]
 
     return(out)
   }
   # **** TO BE REMOVED **** REMOVAL ENDS HERE
 
-
   # pre-computed distributional stats ---------------
-  crr_names  <- names(out)    # current variables
+  crr_names <- names(out) # current variables
   names2keep <- lkup$return_cols$pip$cols # all variables
 
   out <- add_dist_stats_old(
@@ -216,24 +219,22 @@ pip_old_lineups <- function(country         = "ALL",
 
   # Add aggregate medians ----------------
   out <- add_agg_medians(
-    df        = out,
+    df = out,
     fill_gaps = fill_gaps,
-    data_dir  = lkup$data_root
+    data_dir = lkup$data_root
   )
 
   # format ----------------
 
-
   if (fill_gaps) {
-
     ## Inequality indicators to NA for lineup years ----
-    dist_vars  <- names2keep[!(names2keep %in% crr_names)]
+    dist_vars <- names2keep[!(names2keep %in% crr_names)]
     out[,
-        (dist_vars) := NA_real_]
+      (dist_vars) := NA_real_
+    ]
 
     ## estimate_var -----
     out <- estimate_type_ctr_lnp(out, lkup)
-
   } else {
     out[, estimate_type := NA_character_]
   }
@@ -248,28 +249,30 @@ pip_old_lineups <- function(country         = "ALL",
     out <- censor_rows(out, lkup[["censored"]], type = "countries")
   }
 
-
   # Select columns
   if (additional_ind) {
     get_additional_indicators(out)
     added_names <- attr(out, "new_indicators_names")
-    names2keep  <- c(names2keep, added_names)
-
+    names2keep <- c(names2keep, added_names)
   }
   # Keep relevant variables
-  out  <- out[, .SD, .SDcols = names2keep]
-
+  out <- out[, .SD, .SDcols = names2keep]
 
   # make sure we always report the same precision in all numeric variables
   doub_vars <-
     names(out)[unlist(lapply(out, is.double))] |>
     data.table::copy()
 
-  out[, (doub_vars) := lapply(.SD, round, digits = 12),
-      .SDcols = doub_vars]
+  out[, (doub_vars) := lapply(.SD, round, digits = 12), .SDcols = doub_vars]
 
   # Order rows by country code and reporting year
-  data.table::setorder(out, country_code, reporting_year, reporting_level, welfare_type)
+  data.table::setorder(
+    out,
+    country_code,
+    reporting_year,
+    reporting_level,
+    welfare_type
+  )
   #}
 
   # Make sure no duplicate remains
diff --git a/R/utils.R b/R/utils.R
index f68ce3c8..18936972 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -7,27 +7,28 @@
 #' @param cache_file_path file path for cache
 #' @return data.frame
 #' @keywords internal
-subset_lkup <- function(country,
-                        year,
-                        welfare_type,
-                        reporting_level,
-                        lkup,
-                        valid_regions,
-                        data_dir = NULL,
-                        povline,
-                        cache_file_path,
-                        fill_gaps,
-                        popshare = NULL
-                        ) {
-
-
-  lkup <- lkup_filter(lkup,
-                      country,
-                      year,
-                      valid_regions,
-                      reporting_level,
-                      welfare_type,
-                      data_dir)
+subset_lkup <- function(
+  country,
+  year,
+  welfare_type,
+  reporting_level,
+  lkup,
+  valid_regions,
+  data_dir = NULL,
+  povline,
+  cache_file_path,
+  fill_gaps,
+  popshare = NULL
+) {
+  lkup <- lkup_filter(
+    lkup,
+    country,
+    year,
+    valid_regions,
+    reporting_level,
+    welfare_type,
+    data_dir
+  )
   # If povline is NULL, this happens when popshare is passed
   # i.e popshare is not NULL
   if (is.null(povline)) {
@@ -48,34 +49,39 @@ subset_lkup <- function(country,
 
 
 #' @keywords internal
-lkup_filter <- function(lkup,
-                        country,
-                        year,
-                        valid_regions,
-                        reporting_level,
-                        welfare_type,
-                        data_dir) {
+lkup_filter <- function(
+  lkup,
+  country,
+  year,
+  valid_regions,
+  reporting_level,
+  welfare_type,
+  data_dir
+) {
   # STEP 1 - Keep every row by default
   keep <- rep(TRUE, nrow(lkup))
   # STEP 2 - Select countries
   keep <- select_country(lkup, keep, country, valid_regions)
   # STEP 3 - Select years
-  keep <- select_years(lkup = lkup,
-                       keep = keep,
-                       year = year,
-                       country = country,
-                       data_dir =  data_dir,
-                       valid_regions = valid_regions)
+  keep <- select_years(
+    lkup = lkup,
+    keep = keep,
+    year = year,
+    country = country,
+    data_dir = data_dir,
+    valid_regions = valid_regions
+  )
 
   # STEP 4 - Select welfare_type
   if (welfare_type[1] != "all") {
     keep <- keep & lkup$welfare_type == welfare_type
   }
   # STEP 5 - Select reporting_level
-  keep <- select_reporting_level(lkup = lkup,
-                                 keep = keep,
-                                 reporting_level = reporting_level[1])
-
+  keep <- select_reporting_level(
+    lkup = lkup,
+    keep = keep,
+    reporting_level = reporting_level[1]
+  )
 
   lkup <- lkup[keep, ]
   return(lkup)
@@ -120,12 +126,14 @@ select_country <- function(lkup, keep, country, valid_regions) {
 #' @inheritParams subset_lkup
 #' @param keep logical vector
 #' @return logical vector
-select_years <- function(lkup,
-                         keep,
-                         year,
-                         country,
-                         data_dir,
-                         valid_regions = NULL) {
+select_years <- function(
+  lkup,
+  keep,
+  year,
+  country,
+  data_dir,
+  valid_regions = NULL
+) {
   # columns i is an ID that identifies if a country has more than one
   # observation for reporting year. That is the case of IND with URB/RUR and ZWE
   # with interporaltion and microdata info
@@ -134,19 +142,19 @@ select_years <- function(lkup,
   #                   by = .(country_code, reporting_year)]
 
   caller_names <- get_caller_names()
-  is_agg       <-
+  is_agg <-
     grepl("pip_grp", caller_names) |>
     any()
 
   dtmp <- lkup
 
-  year       <- toupper(year)
-  country    <- toupper(country)
+  year <- toupper(year)
+  country <- toupper(country)
   keep_years <- rep(TRUE, nrow(dtmp))
 
-  has_region  <- FALSE
+  has_region <- FALSE
   has_country <- TRUE
-  has_all     <- "ALL" %in% country
+  has_all <- "ALL" %in% country
 
   if (!is.null(valid_regions)) {
     if (any(country %in% valid_regions[!valid_regions %in% "ALL"])) {
@@ -159,44 +167,42 @@ select_years <- function(lkup,
 
   # STEP 1 - If Most Recent Value requested
   if ("MRV" %in% year) {
-
     # for MRV, countries and regions not allowed
     if (has_country && has_region) {
-      rlang::abort("country codes and region codes not allowed with MRV in year")
+      rlang::abort(
+        "country codes and region codes not allowed with MRV in year"
+      )
     }
     # STEP 1.1 - If all countries selected. Select MRV for each country
 
     if (has_region || is_agg) {
       mr <- get_metaregion_table(data_dir)
-      dtmp[mr,
-           on = "region_code",
-           max_year := reporting_year == i.lineup_year]
+      dtmp[mr, on = "region_code", max_year := reporting_year == i.lineup_year]
 
       if (isFALSE(has_all)) {
-        dtmp[!region_code %in% country,
-             max_year := FALSE]
+        dtmp[!region_code %in% country, max_year := FALSE]
       }
-
     } else {
       # STEP 1.2 - If only some countries selected. Select MRV for each selected
       # country
       if (has_all) {
         dtmp[,
-           max_year := reporting_year == max(reporting_year),
-           by = country_code]
+          max_year := reporting_year == max(reporting_year),
+          by = country_code
+        ]
       } else {
-        dtmp[country_code %in% country | region_code %in% country,
-             max_year := reporting_year == max(reporting_year),
-             by = country_code]
+        dtmp[
+          country_code %in% country | region_code %in% country,
+          max_year := reporting_year == max(reporting_year),
+          by = country_code
+        ]
       }
     }
 
     # dtmp <- unique(dtmp[, .(country_code, reporting_year, max_year)])
     dtmp[is.na(max_year), max_year := FALSE]
 
-
     keep_years <- keep_years & as.logical(dtmp[["max_year"]])
-
   }
   # STEP 2 - If specific years are specified. Filter for these years
   if (!any(c("ALL", "MRV") %in% year)) {
@@ -218,8 +224,7 @@ select_years <- function(lkup,
 #'
 #' @return data.frame
 
-filter_lkup <- function(metadata,
-                        popshare) {
+filter_lkup <- function(metadata, popshare) {
   # popshare option not supported for aggregate distributions
   if (!is.null(popshare)) {
     return(
@@ -228,7 +233,6 @@ filter_lkup <- function(metadata,
   } else {
     return(metadata)
   }
-
 }
 
 #' helper function to correctly filter look up table according to requested
@@ -241,23 +245,20 @@ filter_lkup <- function(metadata,
 #' @return data.table
 #' @export
 #'
-select_reporting_level <- function(lkup,
-                                   keep,
-                                   reporting_level) {
+select_reporting_level <- function(lkup, keep, reporting_level) {
   # To be updated: Fix the coverage variable names in aux data (reporting_coverage?)
   if (reporting_level == "all") {
     return(keep)
-
   } else if (reporting_level == "national") {
     # Subnational levels necessary to compute national stats for aggregate distributions
-    keep <- keep & (lkup$reporting_level == reporting_level | lkup$is_used_for_aggregation)
+    keep <- keep &
+      (lkup$reporting_level == reporting_level | lkup$is_used_for_aggregation)
     return(keep)
-
   } else {
     if ("survey_coverage" %in% names(lkup)) {
       keep <- keep &
         (lkup$survey_coverage == reporting_level |
-           lkup$reporting_level == reporting_level)
+          lkup$reporting_level == reporting_level)
     } else {
       # This condition is not triggered
       keep <- keep & lkup$reporting_level == reporting_level
@@ -275,29 +276,31 @@ select_reporting_level <- function(lkup,
 #'
 #' @return data.frame
 #' @keywords internal
-get_svy_data <- function(svy_id,
-                         reporting_level,
-                         path) {
+get_svy_data <- function(svy_id, reporting_level, path) {
   # Each call should be made at a unique reporting_level (equivalent to reporting_data_level: national, urban, rural)
   # This check should be conducted at the data validation stage
   reporting_level <- unique(reporting_level)
-  assertthat::assert_that(length(reporting_level) == 1,
-                          msg = "Problem with input data: Multiple reporting_levels"
+  assertthat::assert_that(
+    length(reporting_level) == 1,
+    msg = "Problem with input data: Multiple reporting_levels"
   )
   # tictoc::tic("read_single")
   out <- lapply(path, function(x) {
-
     # Not robust. Should not be hard coded here.
     if (reporting_level %in% c("urban", "rural")) {
-      tmp <- fst::read_fst(x,
-                         columns = c("area", "welfare", "weight"),
-                         as.data.table = TRUE)
+      tmp <- fst::read_fst(
+        x,
+        columns = c("area", "welfare", "weight"),
+        as.data.table = TRUE
+      )
       tmp <- tmp[area == reporting_level, ]
       tmp[, area := NULL]
     } else {
-      tmp <- fst::read_fst(x,
-                           columns = c("welfare", "weight"),
-                           as.data.table = TRUE)
+      tmp <- fst::read_fst(
+        x,
+        columns = c("welfare", "weight"),
+        as.data.table = TRUE
+      )
     }
 
     return(tmp)
@@ -324,7 +327,6 @@ get_svy_data <- function(svy_id,
 #' @return data.table
 #' @export
 add_dist_stats <- function(df, lkup, fill_gaps) {
-
   if (fill_gaps) {
     dist_stats <- lkup[["lineup_dist_stats"]]
   } else {
@@ -332,18 +334,16 @@ add_dist_stats <- function(df, lkup, fill_gaps) {
   }
 
   if (fill_gaps) {
-
     df <- df |>
-      joyn::joyn(y                = dist_stats,
-                 by               = c("country_code",
-                                      "reporting_level",
-                                      "reporting_year"),
-                 match_type       = "m:1", # multiple poverty lines
-                 keep_common_vars = FALSE,
-                 reportvar        = FALSE,
-                 verbose          = FALSE,
-                 keep             = "left")
-
+      joyn::joyn(
+        y = dist_stats,
+        by = c("country_code", "reporting_level", "reporting_year"),
+        match_type = "m:1", # multiple poverty lines
+        keep_common_vars = FALSE,
+        reportvar = FALSE,
+        verbose = FALSE,
+        keep = "left"
+      )
   } else {
     # Keep only relevant columns
     cols <- c(
@@ -362,9 +362,10 @@ add_dist_stats <- function(df, lkup, fill_gaps) {
     # merge dist stats with main table
     # data.table::setnames(dist_stats, "survey_median_ppp", "median")
 
-    df <- dist_stats[df,
-                     on = .(cache_id, reporting_level), #.(country_code, reporting_year, welfare_type, reporting_level),
-                     allow.cartesian = TRUE
+    df <- dist_stats[
+      df,
+      on = .(cache_id, reporting_level), #.(country_code, reporting_year, welfare_type, reporting_level),
+      allow.cartesian = TRUE
     ]
   }
   df
@@ -396,9 +397,10 @@ add_dist_stats_old <- function(df, dist_stats) {
   # merge dist stats with main table
   # data.table::setnames(dist_stats, "survey_median_ppp", "median")
 
-  df <- dist_stats[df,
-                   on = .(cache_id, reporting_level), #.(country_code, reporting_year, welfare_type, reporting_level),
-                   allow.cartesian = TRUE
+  df <- dist_stats[
+    df,
+    on = .(cache_id, reporting_level), #.(country_code, reporting_year, welfare_type, reporting_level),
+    allow.cartesian = TRUE
   ]
 
   return(df)
@@ -409,11 +411,13 @@ add_dist_stats_old <- function(df, dist_stats) {
 #' @return data.table
 #' @noRd
 collapse_rows <- function(df, vars, na_var = NULL) {
-  tmp_vars      <- lapply(df[, .SD, .SDcols = vars], unique, collapse = "|")
-  tmp_vars      <- lapply(tmp_vars, paste, collapse = "|")
+  tmp_vars <- lapply(df[, .SD, .SDcols = vars], unique, collapse = "|")
+  tmp_vars <- lapply(tmp_vars, paste, collapse = "|")
   tmp_var_names <- names(df[, .SD, .SDcols = vars])
 
-  if (!is.null(na_var)) df[[na_var]] <- NA_real_
+  if (!is.null(na_var)) {
+    df[[na_var]] <- NA_real_
+  }
 
   for (tmp_var in seq_along(tmp_vars)) {
     df[[tmp_var_names[tmp_var]]] <- tmp_vars[[tmp_var]]
@@ -431,7 +435,6 @@ collapse_rows <- function(df, vars, na_var = NULL) {
 #' @return data.table
 #' @noRd
 censor_rows <- function(df, censored, type = c("countries", "regions")) {
-
   type <- match.arg(type)
 
   # Return early if there are no censoring observations
@@ -444,15 +447,18 @@ censor_rows <- function(df, censored, type = c("countries", "regions")) {
     df$tmp_id <-
       sprintf(
         "%s_%s_%s_%s_%s",
-        df$country_code, df$reporting_year,
-        df$survey_acronym, df$welfare_type,
+        df$country_code,
+        df$reporting_year,
+        df$survey_acronym,
+        df$welfare_type,
         df$reporting_level
       )
   } else {
     df$tmp_id <-
       sprintf(
         "%s_%s",
-        df$region_code, df$reporting_year
+        df$region_code,
+        df$reporting_year
       )
   }
 
@@ -487,8 +493,11 @@ censor_stats <- function(df, censored_table) {
   if (nrow(censor_stats) > 0) {
     # Perform a non-equi join to mark relevant statistics
     # Commenting mult = "first" since with multiple povline values there are more than one rows
-    df[censor_stats, on = .(tmp_id = id), #mult = "first",
-       unique(censor_stats$statistic) := NA_real_]
+    df[
+      censor_stats,
+      on = .(tmp_id = id), #mult = "first",
+      unique(censor_stats$statistic) := NA_real_
+    ]
   }
 
   # Clean up the temporary column
@@ -505,12 +514,10 @@ censor_stats <- function(df, censored_table) {
 #' @param lkup lkup value
 #' @keywords internal
 estimate_type_var <- function(df, lkup) {
-
   censored_table <- lkup$censored$regions
-  data_dir       <- lkup$data_root
-
-  mr <-  get_metaregion_table(data_dir = data_dir)
+  data_dir <- lkup$data_root
 
+  mr <- get_metaregion_table(data_dir = data_dir)
 
   df[, tmp_id := paste(region_code, reporting_year, sep = "_")]
   # Create a binary column to mark what is projections based on
@@ -529,18 +536,21 @@ estimate_type_var <- function(df, lkup) {
   # Merge metaregion and label those obs with reporting year
   # higher than lineup year as "nowcast"
   df <- mr[df, on = "region_code"]
-  df[reporting_year > lineup_year,
-     estimate_type := "nowcast"]
+  df[reporting_year > lineup_year, estimate_type := "nowcast"]
 
   # This should be done in a different function...
   # Update specific statistics to NA where not 'all'
   censor_stats <- censored_table[statistic != "all"]
   if (nrow(censor_stats) > 0) {
     # Perform a non-equi join to mark relevant statistics
-    df[censor_stats, on = .(tmp_id = id), mult = "first",
-       (censor_stats$statistic) := NA_real_]
+    df[
+      censor_stats,
+      on = .(tmp_id = id),
+      mult = "first",
+      (censor_stats$statistic) := NA_real_
+    ]
   }
-  df[, c("tmp_id", "lineup_year")  := NULL]
+  df[, c("tmp_id", "lineup_year") := NULL]
 }
 
 
@@ -552,24 +562,25 @@ estimate_type_var <- function(df, lkup) {
 #' @return out database with `estimate_type` variable
 #' @keywords internal
 estimate_type_ctr_lnp <- function(out, lkup) {
-
-  out[, estimate_type := fifelse(estimation_type == "survey", "actual", "projection")]
-  mr     <- get_metaregion_table(lkup$data_root)
-  wld    <- mr[region_code == "WLD", lineup_year]
-  regs   <- out[, unique(region_code)]
-  mr     <- mr[region_code %in% regs]
-  mr[, lineup_year := max(lineup_year, wld),
-     by = region_code]
+  out[,
+    estimate_type := fifelse(
+      estimation_type == "survey",
+      "actual",
+      "projection"
+    )
+  ]
+  mr <- get_metaregion_table(lkup$data_root)
+  wld <- mr[region_code == "WLD", lineup_year]
+  regs <- out[, unique(region_code)]
+  mr <- mr[region_code %in% regs]
+  mr[, lineup_year := max(lineup_year, wld), by = region_code]
 
   # Merge metaregion and label those obs with reporting year
   # higher than lineup year as "nowcast"
   out <- mr[out, on = "region_code"]
-  out[reporting_year > lineup_year,
-      estimate_type := "nowcast"]
+  out[reporting_year > lineup_year, estimate_type := "nowcast"]
 
   out[, lineup_year := NULL]
-
-
 }
 
 #' Create query controls
@@ -580,16 +591,18 @@ estimate_type_ctr_lnp <- function(out, lkup) {
 #' @param versions character: List of available data versions
 #' @return list
 #' @noRd
-create_query_controls <- function(svy_lkup,
-                                  ref_lkup,
-                                  aux_files,
-                                  aux_tables,
-                                  versions) {
+create_query_controls <- function(
+  svy_lkup,
+  ref_lkup,
+  aux_files,
+  aux_tables,
+  versions
+) {
   # Countries and regions
   countries <- unique(c(
-      svy_lkup$country_code,
-      ref_lkup$country_code
-    ))
+    svy_lkup$country_code,
+    ref_lkup$country_code
+  ))
 
   regions <- unique(c(
     aux_files$regions$region_code
@@ -600,8 +613,8 @@ create_query_controls <- function(svy_lkup,
       "ALL",
       sort(c(
         countries,
-        regions)
-      )
+        regions
+      ))
     ),
     type = "character"
   )
@@ -613,7 +626,8 @@ create_query_controls <- function(svy_lkup,
   # Year
   year <- list(
     values = c(
-      "all", "MRV",
+      "all",
+      "MRV",
       sort(unique(c(
         svy_lkup$reporting_year,
         ref_lkup$reporting_year
@@ -633,26 +647,23 @@ create_query_controls <- function(svy_lkup,
   )
 
   # Boolean parameters
-  fill_gaps        <-
-    aggregate      <-
-    long_format    <-
-    additional_ind <-
-    exclude        <-
-    list(values = c(TRUE, FALSE),
-         type = "logical")
-
-  # Group by
-  group_by <- list(
-    values = c("none", "wb"),
-    type = "character"
-  )
+  fill_gaps <-
+    aggregate <-
+      long_format <-
+        additional_ind <-
+          exclude <-
+            list(values = c(TRUE, FALSE), type = "logical")
+
 
   # Welfare type
   welfare_type <- list(
-    values = c("all", sort(unique(c(
-      svy_lkup$welfare_type,
-      ref_lkup$welfare_type
-    )))),
+    values = c(
+      "all",
+      sort(unique(c(
+        svy_lkup$welfare_type,
+        ref_lkup$welfare_type
+      )))
+    ),
     type = "character"
   )
   # Reporting level
@@ -677,8 +688,7 @@ create_query_controls <- function(svy_lkup,
     type = "character"
   )
   # Formats
-  format <- list(values = c("json", "csv", "rds", "arrow"),
-                 type = "character")
+  format <- list(values = c("json", "csv", "rds", "arrow"), type = "character")
   # Tables
   table <- list(values = aux_tables, type = "character")
 
@@ -688,12 +698,28 @@ create_query_controls <- function(svy_lkup,
   pass <- list(values = Sys.getenv('PIP_CACHE_SERVER_KEY'), type = "character")
   # parameters
   parameter <-
-    list(values = c("country", "year", "povline",
-                    "popshare", "fill_gaps", "aggregate",
-                    "group_by", "welfare_type",
-                    "reporting_level", "ppp", "version",
-                    "format", "table", "long_format", "exclude", "type", "pass"),
-         type = "character")
+    list(
+      values = c(
+        "country",
+        "year",
+        "povline",
+        "popshare",
+        "fill_gaps",
+        "aggregate",
+        "group_by",
+        "welfare_type",
+        "reporting_level",
+        "ppp",
+        "version",
+        "format",
+        "table",
+        "long_format",
+        "exclude",
+        "type",
+        "pass"
+      ),
+      type = "character"
+    )
 
   # cum_welfare
   cum_welfare <- list(
@@ -724,7 +750,7 @@ create_query_controls <- function(svy_lkup,
   )
 
   # lorenz
-  lorenz <- list(values = c("lb", "lq"),type = "character")
+  lorenz <- list(values = c("lb", "lq"), type = "character")
 
   # n_bins
   n_bins <- list(
@@ -734,44 +760,54 @@ create_query_controls <- function(svy_lkup,
 
   # Endpoint
   endpoint <-
-    list(values = c("all",
-                    "aux",
-                    "pip",
-                    "pip-grp",
-                    "pip-info",
-                    "valid-params"),
-         type = "character")
-
-    # Create list of query controls
+    list(
+      values = c("all", "aux", "pip", "pip-grp", "pip-info", "valid-params"),
+      type = "character"
+    )
+
+  # group_by
+  regs <- aux_files$country_list |>
+    names() |>
+    grep("_code$|_name$", x = _, value = TRUE, invert = TRUE) |>
+    c("wb", "none", "vintage", "pcn") |>
+    sort()
+
+  group_by <- list(
+    values = regs,
+    type   = "character"
+  )
+
+
+  # Create list of query controls
   query_controls <- list(
-    country         = country,
-    region          = region,
-    year            = year,
-    povline         = povline,
-    popshare        = popshare,
-    fill_gaps       = fill_gaps,
-    aggregate       = aggregate,
-    long_format     = long_format,
-    exclude         = exclude,
-    additional_ind  = additional_ind,
-    group_by        = group_by,
-    welfare_type    = welfare_type,
+    country = country,
+    region = region,
+    year = year,
+    povline = povline,
+    popshare = popshare,
+    fill_gaps = fill_gaps,
+    aggregate = aggregate,
+    long_format = long_format,
+    exclude = exclude,
+    additional_ind = additional_ind,
+    group_by = group_by,
+    welfare_type = welfare_type,
     reporting_level = reporting_level,
-    ppp             = ppp,
-    version         = version,
-    format          = format,
-    table           = table,
-    parameter       = parameter,
-    cum_welfare     = cum_welfare,
-    cum_population  = cum_population,
-    requested_mean  = requested_mean,
-    mean            = mean,
-    times_mean      = times_mean,
-    lorenz          = lorenz,
-    n_bins          = n_bins,
-    endpoint        = endpoint,
-    type            = type,
-    pass            = pass
+    ppp = ppp,
+    version = version,
+    format = format,
+    table = table,
+    parameter = parameter,
+    cum_welfare = cum_welfare,
+    cum_population = cum_population,
+    requested_mean = requested_mean,
+    mean = mean,
+    times_mean = times_mean,
+    lorenz = lorenz,
+    n_bins = n_bins,
+    endpoint = endpoint,
+    type = type,
+    pass = pass
   )
 
   return(query_controls)
@@ -792,12 +828,7 @@ convert_empty <- function(string) {
 #' @inheritParams subset_lkup
 #' @return data.frame
 #' @keywords internal
-subset_ctry_years <- function(country,
-                              year,
-                              lkup,
-                              valid_regions,
-                              data_dir) {
-
+subset_ctry_years <- function(country, year, lkup, valid_regions, data_dir) {
   is_agg <- get_caller_names()
   is_agg <- grepl(pattern = "pip_grp", x = is_agg) |>
     any()
@@ -809,8 +840,8 @@ subset_ctry_years <- function(country,
   if (!any(c("ALL", "WLD") %in% country)) {
     # Select regions
     if (any(country %in% valid_regions)) {
-      selected_regions  <- country[country %in% valid_regions]
-      keep_regions      <- lkup$region_code %in% selected_regions
+      selected_regions <- country[country %in% valid_regions]
+      keep_regions <- lkup$region_code %in% selected_regions
       country_or_region <- "region_code"
     } else {
       keep_regions <- rep(FALSE, length(lkup$region_code))
@@ -824,20 +855,20 @@ subset_ctry_years <- function(country,
   # }
 
   # Select years
-  if (year[1] == "MRV")  {
+  if (year[1] == "MRV") {
     if (is_agg) {
       mr <- get_metaregion_table(data_dir)
-      lkup[mr,
-           on = "region_code",
-           lineup_year := i.lineup_year]
+      lkup[mr, on = "region_code", lineup_year := i.lineup_year]
     } else {
       lkup[, lineup_year := reporting_year]
     }
 
     if (country[1] != "ALL") {
       max_year <-
-        lkup[get(country_or_region) == country & reporting_year == lineup_year,
-             reporting_year] |>
+        lkup[
+          get(country_or_region) == country & reporting_year == lineup_year,
+          reporting_year
+        ] |>
         max()
     } else {
       max_year <-
@@ -863,23 +894,32 @@ subset_ctry_years <- function(country,
 #' @return list
 #' @keywords internal
 clear_cache <- function(cd) {
-  tryCatch({
-    if (cd$size() > 0) {
-      cd$reset()
-      n <- cd$size()
-      if (n == 0) {
-        out <- list(status = 'success', msg = 'Cache cleared.')
+  tryCatch(
+    {
+      if (cd$size() > 0) {
+        cd$reset()
+        n <- cd$size()
+        if (n == 0) {
+          out <- list(status = 'success', msg = 'Cache cleared.')
+        } else {
+          out <- list(
+            status = 'error',
+            msg = sprintf('Something went wrong. %n items remain in cache.', n)
+          )
+        }
       } else {
-        out <- list(status = 'error', msg = sprintf('Something went wrong. %n items remain in cache.', n))
+        out <- list(
+          status = 'success',
+          msg = 'Cache directory is empty. Nothing to clear.'
+        )
       }
-    } else {
-      out <- list(status = 'success', msg = 'Cache directory is empty. Nothing to clear.')
+      return(out)
+    },
+    error = function(e) {
+      out <- list(status = 'error', msg = 'Cache directory not found.')
+      return(out)
     }
-    return(out)
-  }, error = function(e){
-    out <- list(status = 'error', msg = 'Cache directory not found.')
-    return(out)
-  })
+  )
 }
 
 #' Test whether a vector is length zero and IS not NULL
@@ -926,16 +966,13 @@ is_empty <- function(x) {
 #' z <- TRUE
 #' fillin_list(l)
 #' l
-fillin_list <- function(l,
-                        assign = TRUE) {
-
+fillin_list <- function(l, assign = TRUE) {
   #   ____________________________________________________________
   #   Defenses                                    ####
-  stopifnot( exprs = {
+  stopifnot(exprs = {
     is.list(l)
     is.data.frame(l) == FALSE
-  }
-  )
+  })
 
   #   __________________________________________________________________
   #   Early returns                                               ####
@@ -956,16 +993,17 @@ fillin_list <- function(l,
 
   # make sure that all the objects in list are in parent frame
   if (!all(nm_obj %in% obj_in_parent)) {
+    non_in_parent <- nm_obj[!nm_obj %in% obj_in_parent]
 
-    non_in_parent <-nm_obj[!nm_obj %in% obj_in_parent]
-
-    stop_msg <- paste("The following objects are not in calling function: \n",
-                      paste(non_in_parent, collapse = ", "))
+    stop_msg <- paste(
+      "The following objects are not in calling function: \n",
+      paste(non_in_parent, collapse = ", ")
+    )
 
     stop(stop_msg)
   }
 
-  val_obj        <- lapply(nm_obj, get, envir = parent.frame())
+  val_obj <- lapply(nm_obj, get, envir = parent.frame())
   names(val_obj) <- nm_obj
 
   for (i in seq_along(nm_obj)) {
@@ -980,7 +1018,6 @@ fillin_list <- function(l,
   }
 
   return(invisible(l))
-
 }
 
 #' Returns all auxiliary tables that support the long_format=TRUE parameter
@@ -999,27 +1036,24 @@ get_valid_aux_long_format_tables <- function() {
 #'
 #' @return data.table
 #' @keywords internal
-get_spr_table <- function(data_dir,
-                          table = c("spr_svy", "spr_lnp")) {
-
+get_spr_table <- function(data_dir, table = c("spr_svy", "spr_lnp")) {
   table <- match.arg(table)
 
   spr <-
     tryCatch(
       expr = {
         # Your code...
-        get_aux_table(data_dir = data_dir,
-                      table    = table)
+        get_aux_table(data_dir = data_dir, table = table)
       }, # end of expr section
       error = function(e) {
         data.table::data.table(
-          country_code    = character(0),
-          reporting_year  = numeric(0),
-          welfare_type    = character(0),
+          country_code = character(0),
+          reporting_year = numeric(0),
+          welfare_type = character(0),
           reporting_level = character(0),
-          spl             = numeric(0),
-          spr             = numeric(0),
-          median          = numeric(0)
+          spl = numeric(0),
+          spr = numeric(0),
+          median = numeric(0)
         )
       }
     ) # End of trycatch
@@ -1035,18 +1069,16 @@ get_spr_table <- function(data_dir,
 #' @return data.table
 #' @keywords internal
 get_metaregion_table <- function(data_dir) {
-
   spr <-
     tryCatch(
       expr = {
         # Your code...
-        get_aux_table(data_dir = data_dir,
-                      table    = "metaregion")
+        get_aux_table(data_dir = data_dir, table = "metaregion")
       }, # end of expr section
       error = function(e) {
         data.table::data.table(
-          region_code     = character(0),
-          lineup_year     = numeric(0)
+          region_code = character(0),
+          lineup_year = numeric(0)
         )
       }
     ) # End of trycatch
@@ -1054,7 +1086,6 @@ get_metaregion_table <- function(data_dir) {
 }
 
 
-
 #' Load prosperity gap table from aux data
 #'
 #' If there is no data available, return an empty data.frame
@@ -1063,26 +1094,23 @@ get_metaregion_table <- function(data_dir) {
 #'
 #' @return data.table
 #' @keywords internal
-get_pg_table <- function(data_dir,
-                          table = c("pg_svy", "pg_lnp")) {
-
+get_pg_table <- function(data_dir, table = c("pg_svy", "pg_lnp")) {
   table <- match.arg(table)
 
   pg <-
     tryCatch(
       expr = {
         # Your code...
-        get_aux_table(data_dir = data_dir,
-                      table    = table)
+        get_aux_table(data_dir = data_dir, table = table)
       }, # end of expr section
       error = function(e) {
         data.table::data.table(
-            country_code    = character(0),
-            reporting_level = character(0),
-            pg              = numeric(0),
-            welfare_type    = character(0),
-            reporting_year  = integer(0)
-          )
+          country_code = character(0),
+          reporting_level = character(0),
+          pg = numeric(0),
+          welfare_type = character(0),
+          reporting_year = integer(0)
+        )
       }
     ) # End of trycatch
   return(pg)
@@ -1098,24 +1126,24 @@ get_pg_table <- function(data_dir,
 #' @return data.table
 #' @keywords internal
 add_pg <- function(df, fill_gaps, data_dir) {
-
-  if (fill_gaps)  {
+  if (fill_gaps) {
     table <- "pg_lnp"
   } else {
     table <- "pg_svy"
   }
 
-  pg <- get_pg_table(data_dir = data_dir,
-                     table    = table)
-
-  df[pg,
-     on = c(
-       "country_code",
-       "reporting_year",
-       "welfare_type",
-       "reporting_level"),
-      pg := i.pg
-       ]
+  pg <- get_pg_table(data_dir = data_dir, table = table)
+
+  df[
+    pg,
+    on = c(
+      "country_code",
+      "reporting_year",
+      "welfare_type",
+      "reporting_level"
+    ),
+    pg := i.pg
+  ]
 }
 
 #' Add Distribution type
@@ -1127,7 +1155,6 @@ add_pg <- function(df, fill_gaps, data_dir) {
 #' @return data.table
 #' @keywords internal
 add_distribution_type <- function(df, lkup, fill_gaps) {
-
   # merge reference table with framework table and get distribution type
   # from framework
   rf <- copy(lkup$ref_lkup) |>
@@ -1139,11 +1166,10 @@ add_distribution_type <- function(df, lkup, fill_gaps) {
       reporting_year,
       surveyid_year
     )][,
-       surveyid_year := as.numeric(surveyid_year)]
-
+      surveyid_year := as.numeric(surveyid_year)
+    ]
 
-  fw <- get_aux_table(data_dir = lkup$data_root,
-                      "framework") |>
+  fw <- get_aux_table(data_dir = lkup$data_root, "framework") |>
     copy() |>
     _[, .(
       country_code,
@@ -1155,29 +1181,27 @@ add_distribution_type <- function(df, lkup, fill_gaps) {
       use_groupdata
     )]
 
-
   dt <- collapse::join(
-    x        = rf,
-    y        = fw,
-    on       = c("country_code", "surveyid_year", "survey_acronym"),
-    how      = "left",
+    x = rf,
+    y = fw,
+    on = c("country_code", "surveyid_year", "survey_acronym"),
+    how = "left",
     validate = "m:1",
-    verbose  = 0
+    verbose = 0
   )
 
   if (fill_gaps) {
     # line up years ----------
 
-    by_vars <- c("country_code",
-                 "reporting_year",
-                 "welfare_type"
-                 )
+    by_vars <- c("country_code", "reporting_year", "welfare_type")
 
     dt[,
-       # distribution type by year
-       distribution_type := fcase(use_groupdata == 1, "group",
-                                  use_imputed == 1,   "imputed",
-                                  default = "micro")
+      # distribution type by year
+      distribution_type := fcase(
+        use_groupdata == 1 , "group"   ,
+        use_imputed == 1   , "imputed" ,
+        default = "micro"
+      )
     ][,
       # find interpolation with different distribution type and
       # replace by "mixed"
@@ -1189,9 +1213,9 @@ add_distribution_type <- function(df, lkup, fill_gaps) {
     ]
 
     dt <- dt[,
-             # collapse by reporting_year and keep relevant variables
-             .(distribution_type = unique(distribution_type)),
-             by = by_vars
+      # collapse by reporting_year and keep relevant variables
+      .(distribution_type = unique(distribution_type)),
+      by = by_vars
     ]
 
     # df[dt,
@@ -1201,10 +1225,8 @@ add_distribution_type <- function(df, lkup, fill_gaps) {
     #      # Calculate unique counts of reporting level and add new rows
     #      unique_replevel := uniqueN(reporting_level),
     #      by = c("country_code","reporting_year")]
-
-
   } else {
-  # survey years --------------
+    # survey years --------------
     by_vars <- c(
       "country_code",
       "surveyid_year",
@@ -1213,40 +1235,42 @@ add_distribution_type <- function(df, lkup, fill_gaps) {
     )
 
     dt[,
-       # distribution type by year
-       distribution_type := fcase(use_groupdata == 1, "group",
-                                  use_imputed   == 1, "imputed",
-                                  default        =    "micro")
+      # distribution type by year
+      distribution_type := fcase(
+        use_groupdata == 1 , "group"   ,
+        use_imputed == 1   , "imputed" ,
+        default = "micro"
+      )
     ]
 
-    dt <- dt[, # collapse by reporting_year and keep relevant variables
-             .(distribution_type = unique(distribution_type)),
-             by = by_vars]
-
+    dt <- dt[,
+      # collapse by reporting_year and keep relevant variables
+      .(distribution_type = unique(distribution_type)),
+      by = by_vars
+    ]
   }
 
   if (!fill_gaps) {
     df <- df[,
-       surveyid_year := as.numeric(surveyid_year)
+      surveyid_year := as.numeric(surveyid_year)
     ]
   }
-  df[dt,
-      on = by_vars,
-      distribution_type := i.distribution_type
-      ][,
-         # Calculate unique counts of reporting level and add new rows
-         unique_replevel := uniqueN(reporting_level),
-         by = by_vars]
+  df[dt, on = by_vars, distribution_type := i.distribution_type][,
+    # Calculate unique counts of reporting level and add new rows
+    unique_replevel := uniqueN(reporting_level),
+    by = by_vars
+  ]
 
   # distribution type for national cases when aggregate data
 
-
-  df[unique_replevel == 3 &
-       reporting_level == "national" &
-       distribution_type == "group",
-     distribution_type := "synthetic"
-     ][,
-       unique_replevel := NULL]
+  df[
+    unique_replevel == 3 &
+      reporting_level == "national" &
+      distribution_type == "group",
+    distribution_type := "synthetic"
+  ][,
+    unique_replevel := NULL
+  ]
 
   setorderv(df, by_vars)
   return(invisible(df))
@@ -1263,29 +1287,28 @@ add_distribution_type <- function(df, lkup, fill_gaps) {
 #' @return data.table
 #' @keywords internal
 add_spl <- function(df, fill_gaps, data_dir) {
-
   if (fill_gaps) {
     table <- "spr_lnp"
   } else {
-    table <-  "spr_svy"
+    table <- "spr_svy"
   }
 
   spl <-
-    get_spr_table(data_dir = data_dir,
-                  table = table)
-
-
-  out <- df[spl,
-            on = c(
-              "country_code",
-              "reporting_year",
-              "welfare_type",
-              "reporting_level"
-            ),
-            `:=`(
-              spl = i.spl,
-              spr = i.spr
-            )]
+    get_spr_table(data_dir = data_dir, table = table)
+
+  out <- df[
+    spl,
+    on = c(
+      "country_code",
+      "reporting_year",
+      "welfare_type",
+      "reporting_level"
+    ),
+    `:=`(
+      spl = i.spl,
+      spr = i.spr
+    )
+  ]
 
   return(invisible(out))
 }
@@ -1301,30 +1324,31 @@ add_spl <- function(df, fill_gaps, data_dir) {
 add_agg_medians <- function(df, fill_gaps, data_dir) {
 
   if (fill_gaps) {
-    table    = "spr_lnp"
+    table = "spr_lnp"
     # set all lines up medians to NA.
     df[, median := NA_real_]
   } else {
     # if survey data, we keep the ones already calculated and add those
     # that are missing
-    table    = "spr_svy"
+    table = "spr_svy"
   }
   med <-
-    get_spr_table(data_dir = data_dir,
-                  table    = table)
+    get_spr_table(data_dir = data_dir, table = table)
 
   # join medians to missing data ---------
 
-  df[med,
-      on = c(
-        "country_code",
-        "reporting_year",
-        "welfare_type",
-        "reporting_level"
-      ),
-     # prefer median in df over the one in med as long as the one in
-     # in df is not NA. If that is the case, select the one in med.
-      median := fcoalesce(median, i.median)]
+  df[
+    med,
+    on = c(
+      "country_code",
+      "reporting_year",
+      "welfare_type",
+      "reporting_level"
+    ),
+    # prefer median in df over the one in med as long as the one in
+    # in df is not NA. If that is the case, select the one in med.
+    median := fcoalesce(median, i.median)
+  ]
 
   return(invisible(df))
 }
@@ -1339,15 +1363,15 @@ get_caller_names <- function() {
   calls <- sys.calls()
 
   lcalls <- length(calls)
-  caller_names <- vector("character" , length = lcalls)
+  caller_names <- vector("character", length = lcalls)
 
   tryCatch(
     expr = {
       i <- 1
       while (i <= lcalls) {
         call <- calls[[i]]
-        call_class  <- class(call[[1]])
-        call_type   <- typeof(call[[1]])
+        call_class <- class(call[[1]])
+        call_type <- typeof(call[[1]])
         call_length <- length(call[[1]])
 
         call[[1]] <-
@@ -1375,20 +1399,24 @@ get_caller_names <- function() {
     }, # end of expr section
 
     error = function(err) {
-      msg <- c(paste("Error in call",i),
-               paste("class:", call_class),
-               paste("type:", call_type),
-               paste("length:", call_length),
-               paste("text:", call_text))
+      msg <- c(
+        paste("Error in call", i),
+        paste("class:", call_class),
+        paste("type:", call_type),
+        paste("length:", call_length),
+        paste("text:", call_text)
+      )
       rlang::abort(msg, parent = err)
     }, # end of error section
 
     warning = function(w) {
-      msg <- c(paste("Warning in call",i),
-               paste("class:", call_class),
-               paste("type:", call_type),
-               paste("length:", call_length),
-               paste("text:", call_text))
+      msg <- c(
+        paste("Warning in call", i),
+        paste("class:", call_class),
+        paste("type:", call_type),
+        paste("length:", call_length),
+        paste("text:", call_text)
+      )
       rlang::warn(msg, parent = w)
     }
   ) # End of trycatch
@@ -1406,23 +1434,16 @@ get_caller_names <- function() {
 #' @keywords internal
 #' @return data.table from pip or pip_grp functions.
 add_vars_out_of_pipeline <- function(out, fill_gaps, lkup) {
-
   ## Add SPL and SPR  ---------------
-  out <- add_spl(df        = out,
-                 fill_gaps = fill_gaps,
-                 data_dir  = lkup$data_root)
+  out <- add_spl(df = out, fill_gaps = fill_gaps, data_dir = lkup$data_root)
 
   ## Add prosperity Gap -----------
 
-  out <- add_pg(df        = out,
-                fill_gaps = fill_gaps,
-                data_dir  = lkup$data_root)
+  out <- add_pg(df = out, fill_gaps = fill_gaps, data_dir = lkup$data_root)
 
   ## add distribution type -------------
   # based on info in framework data, rather than welfare data
-  out <- add_distribution_type(df        = out,
-                               lkup      = lkup,
-                               fill_gaps = fill_gaps)
+  out <- add_distribution_type(df = out, lkup = lkup, fill_gaps = fill_gaps)
 
   invisible(out)
 }
@@ -1446,7 +1467,6 @@ add_vars_out_of_pipeline <- function(out, fill_gaps, lkup) {
 #'  unnest_dt_longer(df, grep("^list_column", names(df), value = TRUE))
 #' }
 unnest_dt_longer <- function(tbl, cols) {
-
   tbl <- data.table::as.data.table(tbl)
   clnms <- rlang::syms(setdiff(colnames(tbl), cols))
 
@@ -1468,30 +1488,43 @@ unnest_dt_longer <- function(tbl, cols) {
 #' @return data.table with with fgt, mean and median
 #' @keywords internal
 get_mean_median <- \(fgt, lkup, fill_gaps) {
-
-  if (isFALSE(lkup$use_new_lineup_version)) return(fgt)
+  if (isFALSE(lkup$use_new_lineup_version)) {
+    return(fgt)
+  }
 
   if (fill_gaps) {
-    dist <- get_vars(lkup$lineup_dist_stats,
-                     c("country_code", "reporting_year",
-                       "reporting_level", "mean", "median"))
+    dist <- get_vars(
+      lkup$lineup_dist_stats,
+      c("country_code", "reporting_year", "reporting_level", "mean", "median")
+    )
     by_var <- c('country_code', "reporting_year", "reporting_level")
   } else {
-    dist <- get_vars(lkup$dist_stats,
-                     c("country_code", "reporting_year",
-                       "reporting_level", "mean",
-                       "survey_median_ppp", "welfare_type"))
+    dist <- get_vars(
+      lkup$dist_stats,
+      c(
+        "country_code",
+        "reporting_year",
+        "reporting_level",
+        "mean",
+        "survey_median_ppp",
+        "welfare_type"
+      )
+    )
     setnames(dist, "survey_median_ppp", "median")
 
-    by_var <- c('country_code',
-                "reporting_year",
-                "reporting_level",
-                "welfare_type")
+    by_var <- c(
+      'country_code',
+      "reporting_year",
+      "reporting_level",
+      "welfare_type"
+    )
   }
-  join(x = fgt,
-      y = dist,
-      on = by_var,
-      how = "left",
-      validate = "m:1", # multiple povlines
-      verbose = 0L)
+  join(
+    x = fgt,
+    y = dist,
+    on = by_var,
+    how = "left",
+    validate = "m:1", # multiple povlines
+    verbose = 0L
+  )
 }
diff --git a/inst/TMP/TMP_API_launcher.R b/inst/TMP/TMP_API_launcher.R
index 7ce361b3..a5f5f5a9 100644
--- a/inst/TMP/TMP_API_launcher.R
+++ b/inst/TMP/TMP_API_launcher.R
@@ -29,6 +29,3 @@ if (Sys.info()[["user"]] == "wb384996") {
 
 }
 
-
-
-
diff --git a/man/pip_agg.Rd b/man/pip_agg.Rd
index 51417e2e..87a215a3 100644
--- a/man/pip_agg.Rd
+++ b/man/pip_agg.Rd
@@ -10,7 +10,7 @@ pip_agg(
   country = "ALL",
   year = "ALL",
   povline = 1.9,
-  group_by = c("wb", "none"),
+  group_by = "wb",
   welfare_type = c("all", "consumption", "income"),
   reporting_level = c("all", "national"),
   lkup,
@@ -23,7 +23,7 @@ pip_grp_logic(
   country = "ALL",
   year = "ALL",
   povline = 1.9,
-  group_by = c("wb", "none"),
+  group_by = "wb",
   welfare_type = c("all", "consumption", "income"),
   reporting_level = c("all", "national"),
   lkup,
@@ -38,6 +38,7 @@ pip_grp_new(
   povline = 1.9,
   welfare_type = c("all", "consumption", "income"),
   reporting_level = c("all", "national"),
+  group_by = "wb",
   lkup,
   censor = TRUE,
   additional_ind = FALSE,
diff --git a/man/pip_aggregate_by.Rd b/man/pip_aggregate_by.Rd
index 642567e7..ee318e3c 100644
--- a/man/pip_aggregate_by.Rd
+++ b/man/pip_aggregate_by.Rd
@@ -4,13 +4,15 @@
 \alias{pip_aggregate_by}
 \title{Aggregate by predefined groups}
 \usage{
-pip_aggregate_by(df, country = "ALL", return_cols = NULL)
+pip_aggregate_by(df, country = "ALL", group_by = "wb", return_cols = NULL)
 }
 \arguments{
 \item{df}{data.frame: Response from \code{fg_pip_old()} or \code{rg_pip()}.}
 
 \item{country}{character: Selected countries / regions}
 
+\item{group_by}{character: Grouping variable (default is "wb")}
+
 \item{return_cols}{list: lkup$return_cols$pip_grp object. Controls returned
 columns}
 }
diff --git a/man/pip_grp.Rd b/man/pip_grp.Rd
index ad4c2423..03895a7f 100644
--- a/man/pip_grp.Rd
+++ b/man/pip_grp.Rd
@@ -8,7 +8,7 @@ pip_grp(
   country = "ALL",
   year = "ALL",
   povline = 1.9,
-  group_by = c("wb", "none"),
+  group_by = "wb",
   welfare_type = c("all", "consumption", "income"),
   reporting_level = c("all", "national"),
   lkup,