diff --git a/.Rbuildignore b/.Rbuildignore index 73b1e48..c91f411 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,11 +1,14 @@ -^.*\.Rproj$ -^\.Rproj\.user$ +^src/.*\.(o|so|dll)$ +^\.github$ +^pkgdown/.*$ +^.*\.Rproj$ +^\.Rproj\.user$ ^README\.Rmd$ ^README\.md$ ^NEWS\.md$ -^_pkgdown\.yml$ +^_pkgdown\.yml$ ^LICENSE\.md$ ^\.github$ -^docs$ -^vignettes$ +^docs$ +^vignettes$ ^pkgdown$ \ No newline at end of file diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..749d25d --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,70 @@ +name: R-CMD-check + +on: + pull_request: + branches: [master] + workflow_dispatch: + +jobs: + R-CMD-check: + name: ${{ matrix.config.os }} (R ${{ matrix.config.r }}) + runs-on: ${{ matrix.config.os }} + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, r: "release" } + - { os: macos-latest, r: "release" } + - { os: windows-latest, r: "release" } + + env: + R_KEEP_PKG_SOURCE: yes + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + RGL_USE_NULL: true + + steps: + - uses: actions/checkout@v4 + - name: Clean precompiled objects + shell: bash + run: | + rm -f src/*.o src/*.so src/*.dll || true + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::rcmdcheck + any::devtools + lzy318/HonestDiDFEct + local::. + needs: check + + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + + - name: Session info + shell: Rscript {0} + run: | + sessionInfo() + + - name: Check (CRAN-like) + shell: Rscript {0} + run: | + rcmdcheck::rcmdcheck(args = c('--no-manual','--as-cran'), error_on = 'warning', check_dir = 'check') + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-check + path: check diff --git a/.github/workflows/release-to-cran-and-docs.yaml b/.github/workflows/release-to-cran-and-docs.yaml new file mode 100644 index 0000000..97abe90 --- /dev/null +++ b/.github/workflows/release-to-cran-and-docs.yaml @@ -0,0 +1,97 @@ +name: release-to-cran-and-docs + +on: + push: + tags: + - "v*.*.*" + workflow_dispatch: + inputs: + submit_to_cran: + description: "Submit to CRAN (requires secrets.GH_PAT and CRAN_EMAIL set)" + required: true + default: "false" + tag: + description: "Git tag to create (e.g., v2.0.3)" + required: true + +jobs: + tag-release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Clean precompiled objects + shell: bash + run: | + rm -f src/*.o src/*.so src/*.dll || true + - name: Create tag + run: | + git config user.name github-actions + git config user.email github-actions@github.com + git tag ${{ github.event.inputs.tag }} + git push origin ${{ github.event.inputs.tag }} + + submit-cran: + if: ${{ github.event.inputs.submit_to_cran == 'true' }} + needs: tag-release + runs-on: ubuntu-latest + env: + R_KEEP_PKG_SOURCE: yes + _R_CHECK_CRAN_INCOMING_REMOTE_: false + RGL_USE_NULL: true + GITHUB_PAT: ${{ secrets.GH_PAT }} + CRAN_EMAIL: ${{ secrets.CRAN_EMAIL }} + steps: + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 + with: + r-version: "release" + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::devtools + any::rcmdcheck + any::pkgdown + lzy318/HonestDiDFEct + local::. + needs: check + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + - name: CRAN checks (as-cran) + shell: Rscript {0} + run: | + rcmdcheck::rcmdcheck(args=c('--no-manual','--as-cran'), error_on='warning') + - name: Submit to CRAN (manual email) + shell: bash + run: | + Rscript -e "devtools::build('.')" + echo "Please email the built tar.gz to CRAN maintainers from $CRAN_EMAIL or use devtools::release() interactively." + + pkgdown: + needs: tag-release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, lzy318/HonestDiDFEct, local::. + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + - name: Build site + shell: Rscript {0} + run: | + pkgdown::build_site(preview = FALSE) + - name: Deploy to gh-pages + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs diff --git a/.github/workflows/test-and-check.yaml b/.github/workflows/test-and-check.yaml new file mode 100644 index 0000000..0a582cc --- /dev/null +++ b/.github/workflows/test-and-check.yaml @@ -0,0 +1,83 @@ +name: test-and-check + +on: + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-latest + env: + R_KEEP_PKG_SOURCE: yes + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + RGL_USE_NULL: true + steps: + - uses: actions/checkout@v4 + - name: Clean precompiled objects + shell: bash + run: | + rm -f src/*.o src/*.so src/*.dll || true + - uses: r-lib/actions/setup-r@v2 + with: + r-version: "release" + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::devtools + any::testthat + lzy318/HonestDiDFEct + local::. + needs: tests + cache-version: 2 + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + - name: Run tests + shell: Rscript {0} + run: | + devtools::test() + + cran-check: + needs: test + runs-on: ubuntu-latest + env: + R_KEEP_PKG_SOURCE: yes + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + RGL_USE_NULL: true + steps: + - uses: actions/checkout@v4 + - name: Clean precompiled objects + shell: bash + run: | + rm -f src/*.o src/*.so src/*.dll || true + - uses: r-lib/actions/setup-r@v2 + with: + r-version: "release" + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::rcmdcheck + lzy318/HonestDiDFEct + local::. + needs: check + cache-version: 2 + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + - name: CRAN incoming checks + shell: Rscript {0} + run: | + rcmdcheck::rcmdcheck(args = c('--no-manual','--as-cran'), error_on='warning', check_dir='check') + - name: Upload check results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cran-check-results + path: check diff --git a/DESCRIPTION b/DESCRIPTION index d25597a..84c3622 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,13 +5,14 @@ Version: 2.0.3 Date: 2025-06-20 Authors@R: c(person("Licheng", "Liu", , "lichengl@stanford.edu", role = c("aut")), - person("Ziyi", "Liu", , "zyliu2023@berkeley.edu", role = c("aut", "cre")), + person("Ziyi", "Liu", , "zyliu2023@berkeley.edu", role = c("aut")), person("Ye", "Wang", , "yezhehuzhi@gmail.com", role = c("aut")), person("Yiqing", "Xu", , "yiqingxu@stanford.edu", role = c("aut")), + person("Tianzhu", "Qin", , "tianzhu@stanford.edu", role = c("aut", "cre")), person("Shiyun", "Hu", , "hushiyun@pku.edu.cn", role = c("aut")), person("Rivka", "Lipkovitz", , "rivkal@mit.edu", role = c("aut"))) -Maintainer: Ziyi Liu -Description: Provides tools for estimating causal effects in panel data using counterfactual methods, as well as other modern DID estimators. It is designed for causal panel analysis with binary treatments under the parallel trends assumption. The package supports scenarios where treatments can switch on and off and allows for limited carryover effects. It includes several imputation estimators, such as Gsynth (Xu 2017), linear factor models, and the matrix completion method. Detailed methodology is described in Liu, Wang, and Xu (2024) and Chiu et al (2025). +Maintainer: Tianzhu Qin +Description: Provides tools for estimating causal effects in panel data using counterfactual methods, as well as other modern DID estimators. It is designed for causal panel analysis with binary treatments under the parallel trends assumption. The package supports scenarios where treatments can switch on and off and allows for limited carryover effects. It includes several imputation estimators, such as Gsynth (Xu 2017), linear factor models, and the matrix completion method. Detailed methodology is described in Liu, Wang, and Xu (2024) and Chiu et al. (2025) . URL: https://yiqingxu.org/packages/fect/ NeedsCompilation: yes License: MIT + file LICENSE @@ -30,18 +31,29 @@ Imports: ggplot2, future, mvtnorm, - dplyr + dplyr, + future.apply, + reshape2, + rlang, + scales Suggests: - panelView + panelView, + testthat (>= 3.0.0), + did, + DIDmultiplegtDYN, + HonestDiDFEct, + ggrepel SystemRequirements: A C++11 compiler Depends: R (>= 3.5.0) LinkingTo: Rcpp, RcppArmadillo RoxygenNote: 7.3.2 Packaged: 2024-01-26 03:25:56 UTC; ziyil Encoding: UTF-8 -Author: Shiyun Hu [aut], - Rivka Lipkovitz [aut], - Licheng Liu [aut], +Author: Licheng Liu [aut], Ziyi Liu [aut, cre], Ye Wang [aut], - Yiqing Xu [aut] () + Yiqing Xu [aut] (), + Tianzhu Qin [aut], + Shiyun Hu [aut], + Rivka Lipkovitz [aut] +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 0e0fadf..203357e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,6 +30,10 @@ importFrom("gridExtra", "grid.arrange", "arrangeGrob") importFrom("grid", "textGrob", "gpar") importFrom("fixest", "feols") importFrom(dplyr, "%>%", "group_by", "mutate") +importFrom("future.apply", "future_lapply") +importFrom("reshape2", "melt") +importFrom("rlang", "sym") +importFrom("scales", "pretty_breaks") export(esplot) export(get.cohort) diff --git a/R/boot.R b/R/boot.R index 9fa7f9b..7ab351a 100644 --- a/R/boot.R +++ b/R/boot.R @@ -998,7 +998,7 @@ fect.boot <- function(Y, group = boot.group, time.on.seq.group = group.time.on, time.off.seq.group = group.time.off - )) + ), silent = TRUE) } else if (method == "ife") { boot <- try(fect.fe( Y = Y[, boot.id], X = X.boot, D = D.boot, W = W.boot, diff --git a/R/default.R b/R/default.R index eaeaab9..595a6ef 100644 --- a/R/default.R +++ b/R/default.R @@ -982,7 +982,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) tempID <- unique(data[, id]) for (i in tempID) { subpos <- which(data[, id] == i) - subtime <- data[data$id == i,]$time + subtime <- data[subpos, time] subd <- data[subpos, Dname] if (sum(subd) >= 1) { tr.time <- subtime[which(subd == 1)] diff --git a/R/did_wrapper.R b/R/did_wrapper.R index 3797d43..7478927 100644 --- a/R/did_wrapper.R +++ b/R/did_wrapper.R @@ -17,19 +17,18 @@ get_event_counts <- function(data, time_to_treat_var, unit_id) { # Build event-study data frame make_es_df <- function(Period_vec, est_vec, se_vec = NULL, count_df = NULL, method = method) { - df <- data.frame(ATT = est_vec, check.names = FALSE) if (!is.null(se_vec)) { - df[["S.E."]] <- se_vec - df[["CI.lower"]] <- df$ATT - 1.96 * se_vec - df[["CI.upper"]] <- df$ATT + 1.96 * se_vec - df[["p.value"]] <- 2 * (1 - stats::pnorm(abs(df$ATT / se_vec))) + df[["S.E."]] <- se_vec + df[["CI.lower"]] <- df$ATT - 1.96 * se_vec + df[["CI.upper"]] <- df$ATT + 1.96 * se_vec + df[["p.value"]] <- 2 * (1 - stats::pnorm(abs(df$ATT / se_vec))) } else { - df[["S.E."]] <- NA - df[["CI.lower"]] <- NA - df[["CI.upper"]] <- NA - df[["p.value"]] <- NA + df[["S.E."]] <- NA + df[["CI.lower"]] <- NA + df[["CI.upper"]] <- NA + df[["p.value"]] <- NA } ## attach period counts if supplied @@ -44,17 +43,17 @@ make_es_df <- function(Period_vec, est_vec, se_vec = NULL, # If Period_vec can have NAs, this might cause issues with rownames # Ensure Period_vec used for rownames is clean or handle NAs appropriately valid_periods <- !is.na(df$Period) # Or use original Period_vec if df$Period might be modified by join - if(any(valid_periods)) { + if (any(valid_periods)) { rownames(df)[valid_periods] <- df$Period[valid_periods] } - df$Period <- NULL + df$Period <- NULL } else { # Ensure Period_vec used for rownames is clean valid_rownames <- Period_vec + as.numeric(method != "didm") - if(length(valid_rownames) == nrow(df) && !any(is.na(valid_rownames))) { + if (length(valid_rownames) == nrow(df) && !any(is.na(valid_rownames))) { rownames(df) <- valid_rownames } - df$count <- NA + df$count <- NA } df <- df[, c("ATT", "S.E.", "CI.lower", "CI.upper", "p.value", "count")] @@ -65,37 +64,34 @@ make_es_df <- function(Period_vec, est_vec, se_vec = NULL, did_wrapper <- function( data, Y, D, X = NULL, - index, # c("unit_id","time_id") - method = c("twfe","st","iw","cs_never","cs_notyet","didm"), - se = c("default","boot","bootstrap","jackknife"), + index, # c("unit_id","time_id") + method = c("twfe", "st", "iw", "cs_never", "cs_notyet", "didm"), + se = c("default", "boot", "bootstrap", "jackknife"), nboots = 200, parallel = TRUE, - core = NULL, - + core = NULL, ## TWFE / stacked-DID options time_to_treat_var = "Time_to_Treatment", - treat_indicator = "treat", - + treat_indicator = "treat", ## Callaway-Sant'Anna options csdid.base_period = "universal", - ## DIDmultiplegtDYN options didm.effects = NA, - didm.placebo = NA -) { - + didm.placebo = NA) { if (method == "bootstrap") method <- "boot" method <- match.arg(method) - se <- match.arg(se) - unit_id <- index[1]; time_id <- index[2] + se <- match.arg(se) + unit_id <- index[1] + time_id <- index[2] # Define symbols for unit_id and D here for use throughout the function, including run_estimator_once unit_sym <- rlang::sym(unit_id) - D_sym <- rlang::sym(D) + D_sym <- rlang::sym(D) # Y_sym <- rlang::sym(Y) # If Y is used with dplyr NSE - if (method == "didm" && is.na(didm.effects)) + if (method == "didm" && is.na(didm.effects)) { stop('For "didm", you must supply "didm.effects" and "didm.placebo".') + } ## ── Pre-processing ────────────────────────────────────────────────────── original_units <- length(unique(data[[unit_id]])) # Base R subsetting, fine @@ -105,8 +101,10 @@ did_wrapper <- function( dplyr::mutate(treatment_mean = mean(!!D_sym, na.rm = TRUE)) %>% # Uses pre-defined D_sym dplyr::ungroup() data <- data[data$treatment_mean < 1, ] # Base R subsetting, fine - message("Dropped ", original_units - length(unique(data[[unit_id]])), # Base R subsetting, fine - " always-treated units.") + message( + "Dropped ", original_units - length(unique(data[[unit_id]])), # Base R subsetting, fine + " always-treated units." + ) ## define cohorts & event time # Ensure fect::get.cohort doesn't have issues with column names if they contain special characters @@ -116,12 +114,11 @@ did_wrapper <- function( ## ── Core estimation routine (one sample) ─────────────────────────────── run_estimator_once <- function(dd) { # dd is the data for this run - out_ATT <- out_SE <- out_lower <- out_upper <- NA - es_df <- data.frame() + out_ATT <- out_SE <- out_lower <- out_upper <- NA + es_df <- data.frame() # ── 1) TWFE ─────────────────────────────────────────────────────────── if (method == "twfe") { - if (!treat_indicator %in% names(dd)) { dd <- dd |> dplyr::group_by(!!unit_sym) |> # MODIFIED: Use unit_sym from parent scope @@ -136,20 +133,21 @@ did_wrapper <- function( stats::as.formula( paste0(Y, " ~ ", D, " + ", X_part, " | ", unit_id, " + ", time_id) ), - data = dd, + data = dd, cluster = unit_id, # fixest can take string for cluster - notes = FALSE + notes = FALSE ) - co <- fixest::coeftable(fit_att) - est <- co[D, "Estimate"] - se_ <- co[D, "Std. Error"] - out_ATT <- est - out_SE <- se_ + co <- fixest::coeftable(fit_att) + est <- co[D, "Estimate"] + se_ <- co[D, "Std. Error"] + out_ATT <- est + out_SE <- se_ out_lower <- est - 1.96 * se_ out_upper <- est + 1.96 * se_ - if (!time_to_treat_var %in% names(dd)) + if (!time_to_treat_var %in% names(dd)) { dd <- fect::get.cohort(dd, D = D, index = c(unit_id, time_id), start0 = TRUE) + } # Ensure time_to_treat_var column exists and handle NAs before using in i() if (time_to_treat_var %in% names(dd)) { @@ -170,18 +168,20 @@ did_wrapper <- function( unit_id, " + ", time_id ) ), - data = dd, + data = dd, cluster = unit_id, # fixest can take string for cluster - notes = FALSE + notes = FALSE ) - ctab <- as.data.frame(fit_es$coeftable) + ctab <- as.data.frame(fit_es$coeftable) # Robustly extract period numbers, handle cases where parsing fails period_nums <- vapply( rownames(ctab), function(x) { sp <- strsplit(x, "::")[[1]] - if (length(sp) < 2) return(NA_real_) + if (length(sp) < 2) { + return(NA_real_) + } val_str <- strsplit(sp[2], ":")[[1]][1] suppressWarnings(as.numeric(val_str)) # Suppress warnings for non-numeric if any }, @@ -189,54 +189,60 @@ did_wrapper <- function( ) # Filter out NAs that might result from parsing non-coefficient rows or failed parsing valid_coeffs <- !is.na(period_nums) - es_df <- make_es_df(period_nums[valid_coeffs], ctab$Estimate[valid_coeffs], - ctab$`Std. Error`[valid_coeffs], count_df, method) + es_df <- make_es_df( + period_nums[valid_coeffs], ctab$Estimate[valid_coeffs], + ctab$`Std. Error`[valid_coeffs], count_df, method + ) # ── 2) Stacked DID ("st") ───────────────────────────────────────────── } else if (method == "st") { - - if (!"Cohort" %in% names(dd)) + if (!"Cohort" %in% names(dd)) { dd <- fect::get.cohort(dd, D = D, index = c(unit_id, time_id), start0 = TRUE) + } target.cohorts <- setdiff(unique(dd$Cohort), "Control") df.st_list <- vector("list", length(target.cohorts)) # Pre-allocate list for (i in seq_along(target.cohorts)) { c <- target.cohorts[i] # Use a temporary data frame to avoid modifying `dd` in a loop if it's large - temp_df <- dd[dd$Cohort %in% c(c,"Control"), ] + temp_df <- dd[dd$Cohort %in% c(c, "Control"), ] temp_df$stack_id <- i # Use loop index for stack_id df.st_list[[i]] <- temp_df } df.st <- do.call(rbind, df.st_list) - df.st$st_unit <- as.numeric(factor(paste0(df.st$stack_id,"-",df.st[[unit_id]]))) - df.st$st_time <- as.numeric(factor(paste0(df.st$stack_id,"-",df.st[[time_id]]))) + df.st$st_unit <- as.numeric(factor(paste0(df.st$stack_id, "-", df.st[[unit_id]]))) + df.st$st_time <- as.numeric(factor(paste0(df.st$stack_id, "-", df.st[[time_id]]))) fit_st <- fixest::feols( - stats::as.formula(paste0(Y," ~ ",D," | st_unit + st_time")), - data = df.st, + stats::as.formula(paste0(Y, " ~ ", D, " | st_unit + st_time")), + data = df.st, cluster = "st_unit", # fixest can take string for cluster notes = FALSE, warn = FALSE ) - co <- fixest::coeftable(fit_st) - est <- co[D,"Estimate"]; se_ <- co[D,"Std. Error"] - out_ATT <- est; out_SE <- se_ - out_lower <- est - 1.96 * se_; out_upper <- est + 1.96 * se_ + co <- fixest::coeftable(fit_st) + est <- co[D, "Estimate"] + se_ <- co[D, "Std. Error"] + out_ATT <- est + out_SE <- se_ + out_lower <- est - 1.96 * se_ + out_upper <- est + 1.96 * se_ ## event study (same routine as above) if (!treat_indicator %in% names(df.st)) { df.st <- df.st |> dplyr::group_by(!!unit_sym) |> # MODIFIED: Use unit_sym from parent scope - dplyr::mutate(tmp = as.numeric(mean(!!D_sym,na.rm=TRUE) > 0)) |> # MODIFIED: Use D_sym from parent scope + dplyr::mutate(tmp = as.numeric(mean(!!D_sym, na.rm = TRUE) > 0)) |> # MODIFIED: Use D_sym from parent scope dplyr::ungroup() names(df.st)[names(df.st) == "tmp"] <- treat_indicator } - if (!time_to_treat_var %in% names(df.st)) - df.st <- fect::get.cohort(df.st, D = D, index = c(unit_id,time_id), start0 = TRUE) + if (!time_to_treat_var %in% names(df.st)) { + df.st <- fect::get.cohort(df.st, D = D, index = c(unit_id, time_id), start0 = TRUE) + } # Ensure time_to_treat_var column exists and handle NAs if (time_to_treat_var %in% names(df.st)) { @@ -251,33 +257,39 @@ did_wrapper <- function( fit_es <- fixest::feols( stats::as.formula( - paste0(Y," ~ i(", time_to_treat_var, ", ", treat_indicator, - ", ref=-1) | st_unit + st_time") + paste0( + Y, " ~ i(", time_to_treat_var, ", ", treat_indicator, + ", ref=-1) | st_unit + st_time" + ) ), - data = df.st, + data = df.st, cluster = "st_unit" # fixest can take string for cluster ) ctab <- as.data.frame(fit_es$coeftable) - per <- vapply( + per <- vapply( rownames(ctab), - function(x){ - sp<-strsplit(x,"::")[[1]] - if(length(sp)<2) return(NA_real_) - val_str <- strsplit(sp[2],":")[[1]][1] + function(x) { + sp <- strsplit(x, "::")[[1]] + if (length(sp) < 2) { + return(NA_real_) + } + val_str <- strsplit(sp[2], ":")[[1]][1] suppressWarnings(as.numeric(val_str)) }, numeric(1) ) valid_coeffs <- !is.na(per) - es_df <- make_es_df(per[valid_coeffs], ctab$Estimate[valid_coeffs], - ctab$`Std. Error`[valid_coeffs], count_df, method) + es_df <- make_es_df( + per[valid_coeffs], ctab$Estimate[valid_coeffs], + ctab$`Std. Error`[valid_coeffs], count_df, method + ) # ── 3) IW (Sun-&-Abraham) ──────────────────────────────────────────── } else if (method == "iw") { - - if (!"FirstTreat" %in% names(dd)) # 'FirstTreat' is typically generated by get.cohort or similar + if (!"FirstTreat" %in% names(dd)) { # 'FirstTreat' is typically generated by get.cohort or similar stop("Method 'iw' requires a FirstTreat column. Ensure get.cohort() was run or the column exists.") + } dd$FirstTreat[is.na(dd$FirstTreat)] <- 1000 # Or other large value indicating never/not-yet treated for sunab X_part <- if (!is.null(X)) paste(X, collapse = " + ") else "1" @@ -285,109 +297,132 @@ did_wrapper <- function( fit_iw <- fixest::feols( stats::as.formula( # Ensure time_id is a valid column name for sunab - paste0(Y," ~ sunab(FirstTreat,",time_id,") + ", X_part, - " | ", unit_id," + ", time_id) + paste0( + Y, " ~ sunab(FirstTreat,", time_id, ") + ", X_part, + " | ", unit_id, " + ", time_id + ) ), - data = dd, + data = dd, cluster = unit_id # fixest can take string for cluster ) - att_sum <- summary(fit_iw, agg = "ATT") - out_ATT <- att_sum$coeftable["ATT","Estimate"] - out_SE <- att_sum$coeftable["ATT","Std. Error"] - out_lower<- out_ATT - 1.96*out_SE - out_upper<- out_ATT + 1.96*out_SE + att_sum <- summary(fit_iw, agg = "ATT") + out_ATT <- att_sum$coeftable["ATT", "Estimate"] + out_SE <- att_sum$coeftable["ATT", "Std. Error"] + out_lower <- out_ATT - 1.96 * out_SE + out_upper <- out_ATT + 1.96 * out_SE ctab <- as.data.frame(fixest::coeftable(fit_iw)) # Robustly extract offsets for sunab offsets <- suppressWarnings( - as.numeric(sub("^.*::","",sub(":cohort::.*","",rownames(ctab)))) + as.numeric(sub("^.*::", "", sub(":cohort::.*", "", rownames(ctab)))) + ) + valid <- !is.na(offsets) + es_df <- make_es_df( + offsets[valid], ctab$Estimate[valid], + ctab$`Std. Error`[valid], count_df, method ) - valid <- !is.na(offsets) - es_df <- make_es_df(offsets[valid], ctab$Estimate[valid], - ctab$`Std. Error`[valid], count_df, method) # ── 4) Callaway-Sant'Anna (never-treated) ───────────────────────────── } else if (method == "cs_never") { - - if (!"FirstTreat" %in% names(dd)) # 'FirstTreat' is gname for did::att_gt - dd <- fect::get.cohort(dd, D = D, index = c(unit_id,time_id), start0 = TRUE) + if (!"FirstTreat" %in% names(dd)) { # 'FirstTreat' is gname for did::att_gt + dd <- fect::get.cohort(dd, D = D, index = c(unit_id, time_id), start0 = TRUE) + } # For 'nevertreated', control units usually have FirstTreat = 0 or Inf # Check did package documentation for exact requirement for gname with nevertreated dd$FirstTreat[is.na(dd$FirstTreat)] <- 0 # Assuming NA means never treated and coded as 0 cs.out <- did::att_gt( - yname = Y, gname = "FirstTreat", # Ensure 'FirstTreat' is the correct group identifier - idname = unit_id, tname = time_id, + yname = Y, gname = "FirstTreat", # Ensure 'FirstTreat' is the correct group identifier + idname = unit_id, tname = time_id, xformla = if (!is.null(X)) stats::as.formula(paste("~", paste(X, collapse = "+"))) else ~1, control_group = "nevertreated", allow_unbalanced_panel = TRUE, # Consider implications - data = as.data.frame(dd), # did package often prefers data.frame + data = as.data.frame(dd), # did package often prefers data.frame est_method = "reg", # Default, consider "dr" for doubly robust base_period = csdid.base_period ) simple <- tryCatch(did::aggte(cs.out, type = "simple", na.rm = TRUE), - error = function(e) {warning("Error in did::aggte (simple, cs_never): ", e$message); NULL}) + error = function(e) { + warning("Error in did::aggte (simple, cs_never): ", e$message) + NULL + } + ) if (!is.null(simple)) { - out_ATT <- simple$overall.att - out_SE <- simple$overall.se - out_lower <- out_ATT - 1.96*out_SE - out_upper <- out_ATT + 1.96*out_SE + out_ATT <- simple$overall.att + out_SE <- simple$overall.se + out_lower <- out_ATT - 1.96 * out_SE + out_upper <- out_ATT + 1.96 * out_SE } dyn <- tryCatch( - did::aggte(cs.out, type="dynamic", na.rm = TRUE, cband = FALSE), # cband=FALSE for point CIs - error = function(e) {warning("Error in did::aggte (dynamic, cs_never): ", e$message); NULL}) - es_df <- if (is.null(dyn) || length(dyn$egt) == 0) # Check if dyn$egt is empty + did::aggte(cs.out, type = "dynamic", na.rm = TRUE, cband = FALSE), # cband=FALSE for point CIs + error = function(e) { + warning("Error in did::aggte (dynamic, cs_never): ", e$message) + NULL + } + ) + es_df <- if (is.null(dyn) || length(dyn$egt) == 0) { # Check if dyn$egt is empty make_es_df(numeric(0), numeric(0), numeric(0), count_df, method) - else + } else { make_es_df(dyn$egt, dyn$att.egt, dyn$se.egt, count_df, method) + } # ── 5) Callaway-Sant'Anna (not-yet-treated) ─────────────────────────── } else if (method == "cs_notyet") { - - if (!"FirstTreat" %in% names(dd)) - dd <- fect::get.cohort(dd, D = D, index = c(unit_id,time_id), start0 = TRUE) + if (!"FirstTreat" %in% names(dd)) { + dd <- fect::get.cohort(dd, D = D, index = c(unit_id, time_id), start0 = TRUE) + } # For 'notyettreated', FirstTreat should be year of treatment or Inf/0 for never treated # Ensure NA handling for FirstTreat is appropriate for did::att_gt # dd$FirstTreat[is.na(dd$FirstTreat)] <- Inf # Or appropriate coding for never treated + + dd$FirstTreat[is.na(dd$FirstTreat)] <- 0 cs.out <- did::att_gt( - yname = Y, gname = "FirstTreat", - idname = unit_id, tname = time_id, + yname = Y, gname = "FirstTreat", + idname = unit_id, tname = time_id, xformla = if (!is.null(X)) stats::as.formula(paste("~", paste(X, collapse = "+"))) else ~1, control_group = "notyettreated", allow_unbalanced_panel = TRUE, - data = as.data.frame(dd), # did package often prefers data.frame + data = as.data.frame(dd), # did package often prefers data.frame est_method = "reg", base_period = csdid.base_period ) - simple <- tryCatch(did::aggte(cs.out, type="simple", na.rm=TRUE), - error=function(e) {warning("Error in did::aggte (simple, cs_notyet): ", e$message); NULL}) + simple <- tryCatch(did::aggte(cs.out, type = "simple", na.rm = TRUE), + error = function(e) { + warning("Error in did::aggte (simple, cs_notyet): ", e$message) + NULL + } + ) if (!is.null(simple)) { out_ATT <- simple$overall.att - out_SE <- simple$overall.se - out_lower <- out_ATT - 1.96*out_SE - out_upper <- out_ATT + 1.96*out_SE + out_SE <- simple$overall.se + out_lower <- out_ATT - 1.96 * out_SE + out_upper <- out_ATT + 1.96 * out_SE } dyn <- tryCatch( - did::aggte(cs.out, type="dynamic", na.rm=TRUE, cband=FALSE), - error=function(e) {warning("Error in did::aggte (dynamic, cs_notyet): ", e$message); NULL}) - es_df <- if (is.null(dyn) || length(dyn$egt) == 0) + did::aggte(cs.out, type = "dynamic", na.rm = TRUE, cband = FALSE), + error = function(e) { + warning("Error in did::aggte (dynamic, cs_notyet): ", e$message) + NULL + } + ) + es_df <- if (is.null(dyn) || length(dyn$egt) == 0) { make_es_df(numeric(0), numeric(0), numeric(0), count_df, method) - else + } else { make_es_df(dyn$egt, dyn$att.egt, dyn$se.egt, count_df, method) + } # ── 6) DIDmultiplegtDYN Dyn ────────────────────────────────────────────── } else if (method == "didm") { - res <- DIDmultiplegtDYN::did_multiplegt_dyn( df = as.data.frame(dd), # DIDmultiplegtDYN prefers data.frame outcome = Y, @@ -404,13 +439,13 @@ did_wrapper <- function( # Check structure of res$results for safety if (!is.null(res$results) && "ATE" %in% names(res$results) && length(res$results$ATE) >= 2) { out_ATT <- res$results$ATE[1] - out_SE <- res$results$ATE[2] + out_SE <- res$results$ATE[2] if (length(res$results$ATE) >= 4) { # Check if CI is provided out_lower <- res$results$ATE[3] out_upper <- res$results$ATE[4] } else { - out_lower <- out_ATT - 1.96*out_SE - out_upper <- out_ATT + 1.96*out_SE + out_lower <- out_ATT - 1.96 * out_SE + out_upper <- out_ATT + 1.96 * out_SE } } else { warning("Results from did_multiplegt_dyn ATE not found or in unexpected format.") @@ -418,47 +453,52 @@ did_wrapper <- function( Placebos <- res$results$Placebos - Effects <- res$results$Effects - T.pre <- if (!is.null(Placebos) && inherits(Placebos, "matrix")) nrow(Placebos) else 0 + Effects <- res$results$Effects + T.pre <- if (!is.null(Placebos) && inherits(Placebos, "matrix")) nrow(Placebos) else 0 T.post <- if (!is.null(Effects) && inherits(Effects, "matrix")) nrow(Effects) else 0 if (T.pre + T.post == 0) { es_df <- make_es_df(numeric(0), numeric(0), numeric(0), count_df, method) } else { - est_placebo <- if (T.pre > 0 && "Estimate" %in% colnames(Placebos)) Placebos[,"Estimate"] else numeric(0) - est_effect <- if (T.post > 0 && "Estimate" %in% colnames(Effects)) Effects[,"Estimate"] else numeric(0) + est_placebo <- if (T.pre > 0 && "Estimate" %in% colnames(Placebos)) Placebos[, "Estimate"] else numeric(0) + est_effect <- if (T.post > 0 && "Estimate" %in% colnames(Effects)) Effects[, "Estimate"] else numeric(0) est <- c(est_placebo, est_effect) - se_placebo <- if (T.pre > 0 && "SE" %in% colnames(Placebos)) Placebos[,"SE"] else numeric(0) - se_effect <- if (T.post > 0 && "SE" %in% colnames(Effects)) Effects[,"SE"] else numeric(0) + se_placebo <- if (T.pre > 0 && "SE" %in% colnames(Placebos)) Placebos[, "SE"] else numeric(0) + se_effect <- if (T.post > 0 && "SE" %in% colnames(Effects)) Effects[, "SE"] else numeric(0) ses <- c(se_placebo, se_effect) # Ensure lengths match before creating data frame - if(length(est) != length(ses)) { + if (length(est) != length(ses)) { warning("Mismatch in length of estimates and standard errors from did_multiplegt_dyn.") # Fallback to empty or handle error appropriately es_df <- make_es_df(numeric(0), numeric(0), numeric(0), count_df, method) } else { - peri <- c(if (T.pre>0) seq(-T.pre,-1) else numeric(0), - if (T.post>0) seq(0, T.post-1) else numeric(0)) # didm effects often 0-indexed - - peri <- c(if (T.pre>0) seq(-T.pre,-1) else numeric(0), - if (T.post>0) seq(1,T.post) else numeric(0)) + peri <- c( + if (T.pre > 0) seq(-T.pre, -1) else numeric(0), + if (T.post > 0) seq(0, T.post - 1) else numeric(0) + ) # didm effects often 0-indexed + + peri <- c( + if (T.pre > 0) seq(-T.pre, -1) else numeric(0), + if (T.post > 0) seq(1, T.post) else numeric(0) + ) es_df <- make_es_df(peri, est, ses, count_df, method) } } } - list(ATT = out_ATT, ATT_se = out_SE, - CI_lower = out_lower, CI_upper = out_upper, - es = es_df) + list( + ATT = out_ATT, ATT_se = out_SE, + CI_lower = out_lower, CI_upper = out_upper, + es = es_df + ) } ## ── SE / bootstrap handling ──────────────────────────────────────────── if (se == "default") { - out_full <- run_estimator_once(data) # Ensure ATT_se is not zero or NA before calculating p-value overall_p <- if (!is.na(out_full$ATT_se) && out_full$ATT_se != 0) { @@ -466,12 +506,10 @@ did_wrapper <- function( } else { NA_real_ } - - } else { # bootstrap or jackknife cluster_ids <- unique(data[[unit_id]]) # Base R, fine - n_clusters <- length(cluster_ids) + n_clusters <- length(cluster_ids) # Ensure nboots is sensible for jackknife if (se == "jackknife" && nboots != n_clusters) { @@ -481,13 +519,13 @@ did_wrapper <- function( boot_fun <- function(b) { # b is the iteration number - if (se %in% c("boot","bootstrap")) { + if (se %in% c("boot", "bootstrap")) { # Stratified bootstrap if applicable, or simple cluster bootstrap samp_indices <- sample(seq_along(cluster_ids), n_clusters, replace = TRUE) samp_cluster_names <- cluster_ids[samp_indices] # Efficiently create bootstrap sample by selecting rows # This assumes unit_id column is a factor or character - d_b <- data[data[[unit_id]] %in% samp_cluster_names, ] + d_b <- data[data[[unit_id]] %in% samp_cluster_names, ] # To handle duplicated clusters correctly if units within clusters are distinct in d_b: # This requires more care if unit IDs are not unique across original clusters # A common way is to replicate data for chosen clusters and assign new unique IDs if needed by downstream. @@ -495,7 +533,7 @@ did_wrapper <- function( # The current approach data[data[[unit_id]] %in% samp, ] is standard for cluster bootstrap. } else { # jackknife omit_cluster_name <- cluster_ids[b] # b is 1 to n_clusters - d_b <- data[data[[unit_id]] != omit_cluster_name, ] + d_b <- data[data[[unit_id]] != omit_cluster_name, ] } run_estimator_once(d_b) } @@ -505,19 +543,20 @@ did_wrapper <- function( future::plan(future::multisession, workers = core) # Use future::multisession # Ensure seed is handled correctly for parallel processing rep_list <- future.apply::future_lapply(seq_len(nboots), boot_fun, - future.seed = TRUE) # future.seed=TRUE is good + future.seed = TRUE + ) # future.seed=TRUE is good future::plan(future::sequential) # Reset plan } else { rep_list <- lapply(seq_len(nboots), boot_fun) } - att_reps <- vapply(rep_list, `[[`, numeric(1), "ATT") + att_reps <- vapply(rep_list, `[[`, numeric(1), "ATT") # Remove NAs from bootstrap replications if any estimator failed att_reps_clean <- att_reps[!is.na(att_reps)] - if(length(att_reps_clean) < length(att_reps)){ + if (length(att_reps_clean) < length(att_reps)) { warning(paste(length(att_reps) - length(att_reps_clean), "bootstrap/jackknife replications resulted in NA ATT values and were dropped.")) } - if(length(att_reps_clean) < 2){ # Need at least 2 for sd() + if (length(att_reps_clean) < 2) { # Need at least 2 for sd() warning("Too few successful bootstrap/jackknife replications to calculate SE/CI.") out_full <- run_estimator_once(data) # Get point estimate out_full$ATT_se <- NA_real_ @@ -525,14 +564,17 @@ did_wrapper <- function( out_full$CI_upper <- NA_real_ overall_p <- NA_real_ } else { - out_full <- run_estimator_once(data) # Get point estimate from full sample + out_full <- run_estimator_once(data) # Get point estimate from full sample theta_hat <- out_full$ATT - out_full$CI_lower <- 2*theta_hat - stats::quantile(att_reps_clean, 0.975, na.rm=TRUE) - out_full$CI_upper <- 2*theta_hat - stats::quantile(att_reps_clean, 0.025, na.rm=TRUE) - out_full$ATT_se <- stats::sd(att_reps_clean, na.rm = TRUE) - p1 <- if (theta_hat > 0) mean(att_reps_clean <= 0, na.rm = TRUE) - else mean(att_reps_clean >= 0, na.rm = TRUE) - overall_p <- min(2*p1, 1) + out_full$CI_lower <- 2 * theta_hat - stats::quantile(att_reps_clean, 0.975, na.rm = TRUE) + out_full$CI_upper <- 2 * theta_hat - stats::quantile(att_reps_clean, 0.025, na.rm = TRUE) + out_full$ATT_se <- stats::sd(att_reps_clean, na.rm = TRUE) + p1 <- if (theta_hat > 0) { + mean(att_reps_clean <= 0, na.rm = TRUE) + } else { + mean(att_reps_clean >= 0, na.rm = TRUE) + } + overall_p <- min(2 * p1, 1) } if (nrow(out_full$es) > 0) { @@ -540,7 +582,9 @@ did_wrapper <- function( boot_mat_list <- lapply(rep_list, function(x) { - if (is.null(x$es) || nrow(x$es) == 0) return(rep(NA_real_, length(periods_from_es))) + if (is.null(x$es) || nrow(x$es) == 0) { + return(rep(NA_real_, length(periods_from_es))) + } # Match based on rownames (periods) es_rep_periods <- as.numeric(rownames(x$es)) matched_att <- x$es$ATT[match(periods_from_es, es_rep_periods)] @@ -555,8 +599,8 @@ did_wrapper <- function( periods_filtered <- periods_from_es[valid_cols] original_att_filtered <- out_full$es$ATT[valid_cols] - if(ncol(boot_mat_filtered) > 0){ - boot_se <- apply(boot_mat_filtered, 2, stats::sd, na.rm = TRUE) + if (ncol(boot_mat_filtered) > 0) { + boot_se <- apply(boot_mat_filtered, 2, stats::sd, na.rm = TRUE) ci_quantiles_upper <- apply(boot_mat_filtered, 2, stats::quantile, probs = 0.975, na.rm = TRUE) ci_quantiles_lower <- apply(boot_mat_filtered, 2, stats::quantile, probs = 0.025, na.rm = TRUE) @@ -564,12 +608,17 @@ did_wrapper <- function( seq_len(ncol(boot_mat_filtered)), function(j) { est_j <- original_att_filtered[j] - reps_j <- boot_mat_filtered[,j] + reps_j <- boot_mat_filtered[, j] reps_j_clean <- reps_j[!is.na(reps_j)] - if(length(reps_j_clean) < 2) return(NA_real_) - p1 <- if (est_j > 0) mean(reps_j_clean <= 0, na.rm = TRUE) - else mean(reps_j_clean >= 0, na.rm = TRUE) - min(2*p1, 1) + if (length(reps_j_clean) < 2) { + return(NA_real_) + } + p1 <- if (est_j > 0) { + mean(reps_j_clean <= 0, na.rm = TRUE) + } else { + mean(reps_j_clean >= 0, na.rm = TRUE) + } + min(2 * p1, 1) }, numeric(1) ) @@ -580,10 +629,10 @@ did_wrapper <- function( out_full$es$CI.upper <- NA_real_ out_full$es$p.value <- NA_real_ - out_full$es$`S.E.`[valid_cols] <- boot_se - out_full$es$CI.lower[valid_cols] <- 2*original_att_filtered - ci_quantiles_upper - out_full$es$CI.upper[valid_cols] <- 2*original_att_filtered - ci_quantiles_lower - out_full$es$p.value[valid_cols] <- pvals_es + out_full$es$`S.E.`[valid_cols] <- boot_se + out_full$es$CI.lower[valid_cols] <- 2 * original_att_filtered - ci_quantiles_upper + out_full$es$CI.upper[valid_cols] <- 2 * original_att_filtered - ci_quantiles_lower + out_full$es$p.value[valid_cols] <- pvals_es } else { warning("No valid data for bootstrapping event study standard errors.") } @@ -593,11 +642,11 @@ did_wrapper <- function( ## ── Return object ─────────────────────────────────────────────────────── res <- list( est.avg = data.frame( - ATT.avg = out_full$ATT, - S.E. = out_full$ATT_se, + ATT.avg = out_full$ATT, + S.E. = out_full$ATT_se, CI.lower = out_full$CI_lower, CI.upper = out_full$CI_upper, - p.value = overall_p, + p.value = overall_p, row.names = NULL # Ensure no rownames for this single-row data frame ), est.att = out_full$es # This should have periods as rownames from make_es_df diff --git a/R/fect_gsynth.R b/R/fect_gsynth.R index da0b90e..bda87b1 100644 --- a/R/fect_gsynth.R +++ b/R/fect_gsynth.R @@ -93,9 +93,10 @@ fect.gsynth <- function(Y, # Outcome variable, (T*N) matrix if (is.null(W)) { W.use <- as.matrix(0) - } else { - stop("Gsynth doesn't suppoort weighted outcome model in this version.\n") - } + } + # else { + # stop("Gsynth doesn't suppoort weighted outcome model in this version.\n") + # } if (!0 %in% I.tr) { ## a (TT*Ntr) matrix, time dimension: before treatment diff --git a/R/fect_sens.R b/R/fect_sens.R index 60aa6bf..df5779f 100644 --- a/R/fect_sens.R +++ b/R/fect_sens.R @@ -1,20 +1,26 @@ fect_sens <- function( fect.out, - post.periods = NA, # Post-treatment periods - l_vec = NA, # Optional custom weighting vector - Mbarvec = seq(0, 1, by=0.1), # Vector of Mbar for RM analysis - Mvec = seq(0, 0.25, 0.05),# Vector of M for Smoothness analysis - periodMbarvec = c(0,0.5), # Vector of Mbar for period-by-period RM analysis - periodMvec = c(0,0.1), # Vector of M for Smoothness analysis - parallel = TRUE -) { + post.periods = NA, # Post-treatment periods + l_vec = NA, # Optional custom weighting vector + Mbarvec = seq(0, 1, by = 0.1), # Vector of Mbar for RM analysis + Mvec = seq(0, 0.25, 0.05), # Vector of M for Smoothness analysis + periodMbarvec = c(0, 0.5), # Vector of Mbar for period-by-period RM analysis + periodMvec = c(0, 0.1), # Vector of M for Smoothness analysis + parallel = FALSE, + cores = NULL) { + if (parallel && is.null(cores)) { + cores <- parallel::detectCores() - 1 + cl <- parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) + } + # ------------------------------------------------------------------- # 1) Identify relevant periods and extract fect estimates + vcov # ------------------------------------------------------------------- # We want event-time strings for pre and post - pre.periods = fect.out$placebo.period[1]:fect.out$placebo.period[2] - if (any(is.na(post.periods))){ - post.periods = 1:length(fect.out$time)-length(fect.out$pre.periods)-(fect.out$placebo.period[2]-fect.out$placebo.period[1]+1) + pre.periods <- fect.out$placebo.period[1]:fect.out$placebo.period[2] + if (any(is.na(post.periods))) { + post.periods <- 1:length(fect.out$time) - length(fect.out$pre.periods) - (fect.out$placebo.period[2] - fect.out$placebo.period[1] + 1) } all.periods.char <- as.character(c(pre.periods, post.periods)) @@ -27,14 +33,14 @@ fect_sens <- function( # Numeric indices corresponding to those event-time strings idx <- match(all.periods.char, rownames(fect.out$est.att)) - idx <- idx[!is.na(idx)] # remove anything not found + idx <- idx[!is.na(idx)] # remove anything not found # Extract DTE estimates (beta.hat) and var-cov (vcov.hat) beta.hat <- fect.out$est.att[idx, 1] vcov.hat <- fect.out$att.vcov[idx, idx] # Counts of pre and post periods - numPrePeriods <- length(pre.periods) + numPrePeriods <- length(pre.periods) numPostPeriods <- length(post.periods) # ------------------------------------------------------------------- @@ -59,8 +65,8 @@ fect_sens <- function( # (b) Period-by-period results # Initialize empty placeholders - fect.out$RM_Sensitivity <- NULL - fect.out$Smooth_Sensitivity <- NULL + fect.out$RM_Sensitivity <- NULL + fect.out$Smooth_Sensitivity <- NULL # ------------------------------------------------------------------- # 3) Relative Magnitude Analysis (RM), if Mbarvec is non-empty @@ -68,15 +74,16 @@ fect_sens <- function( if (!is.null(Mbarvec) && length(Mbarvec) > 0) { # 3a) Weighted-average, across the entire post-treatment window rm_sens_results <- HonestDiDFEct::createSensitivityResults_relativeMagnitudes( - betahat = beta.hat, - sigma = vcov.hat, - numPrePeriods = numPrePeriods, + betahat = beta.hat, + sigma = vcov.hat, + numPrePeriods = numPrePeriods, numPostPeriods = numPostPeriods, - l_vec = w.att, - Mbarvec = Mbarvec, + l_vec = w.att, + Mbarvec = Mbarvec, parallel = parallel ) + rm_original_cs <- HonestDiDFEct::constructOriginalCS( betahat = beta.hat, sigma = vcov.hat, @@ -88,7 +95,7 @@ fect_sens <- function( if (!is.null(periodMbarvec) && length(periodMbarvec) > 0) { # 3b) Period-by-period robust confidence sets # We'll loop over each post-treatment period and set a vector of 0s except 1 for that period - rm_period_output <- cbind.data.frame() # Will accumulate results + rm_period_output <- cbind.data.frame() # Will accumulate results for (t_i in seq_len(numPostPeriods)) { # l_vec for the "t_i-th" post period (in 1..numPostPeriods) @@ -106,7 +113,7 @@ fect_sens <- function( numPostPeriods = numPostPeriods, l_vec = dte_l, Mbarvec = periodMbarvec, - parallel = parallel + parallel = parallel ) # Convert to data.frame @@ -122,9 +129,9 @@ fect_sens <- function( # Store results in fect.out$sensitivity.rm fect.out$sensitivity.rm <- list( - results = rm_sens_results, - original = rm_original_cs, - periods = rm_period_output + results = rm_sens_results, + original = rm_original_cs, + periods = rm_period_output ) } @@ -133,14 +140,15 @@ fect_sens <- function( # ------------------------------------------------------------------- if (!is.null(Mvec) && length(Mvec) > 0) { # 4a) Weighted-average analysis + smooth_sens_results <- HonestDiDFEct::createSensitivityResults( - betahat = beta.hat, - sigma = vcov.hat, - numPrePeriods = numPrePeriods, + betahat = beta.hat, + sigma = vcov.hat, + numPrePeriods = numPrePeriods, numPostPeriods = numPostPeriods, - method = "C-LF", - l_vec = w.att, - Mvec = Mvec, + method = "C-LF", + l_vec = w.att, + Mvec = Mvec, parallel = parallel ) @@ -162,13 +170,13 @@ fect_sens <- function( dte_l[t_i] <- 1 honest.dte <- HonestDiDFEct::createSensitivityResults( - betahat = beta.hat, - sigma = vcov.hat, - numPrePeriods = numPrePeriods, + betahat = beta.hat, + sigma = vcov.hat, + numPrePeriods = numPrePeriods, numPostPeriods = numPostPeriods, - method = "C-LF", - l_vec = dte_l, - Mvec = periodMvec, + method = "C-LF", + l_vec = dte_l, + Mvec = periodMvec, parallel = parallel ) @@ -180,12 +188,14 @@ fect_sens <- function( # Store results in fect.out$sensitivity.smooth fect.out$sensitivity.smooth <- list( - results = smooth_sens_results, - original = sm_original_cs, - periods = smooth_period_output + results = smooth_sens_results, + original = sm_original_cs, + periods = smooth_period_output ) } - + if (parallel) { + stopCluster(cl) + } # ------------------------------------------------------------------- # 5) Return the updated fect.out # ------------------------------------------------------------------- diff --git a/R/getcohort.R b/R/getcohort.R index d0f31f2..0ef5b6b 100644 --- a/R/getcohort.R +++ b/R/getcohort.R @@ -1,97 +1,96 @@ get.cohort <- function(data, D, index, - varname = NULL, #length of 4 c("FirstTreated","Cohort",'Time_to_Treatment',"Time_to_Exit") + varname = NULL, # length of 4 c("FirstTreated","Cohort",'Time_to_Treatment',"Time_to_Exit") start0 = FALSE, # default 0 as the last pre-treatment period, 1 as the first post-treatment period entry.time = NULL, - drop.always.treat = FALSE){ + drop.always.treat = FALSE) { data.raw <- data - if(length(D)>1){ + if (length(D) > 1) { stop("Treatment indicator \"D\" should have length 1.") } if (is.data.frame(data) == FALSE || length(class(data)) > 1) { data <- as.data.frame(data) } varnames <- colnames(data) - if(!D%in%varnames){ + if (!D %in% varnames) { stop("\"D\" misspecified.\n") } - if (is.logical(start0) == FALSE & !start0%in%c(0, 1)) { + if (is.logical(start0) == FALSE & !start0 %in% c(0, 1)) { stop("\"start0\" is not a logical flag.") - } + } - if (is.logical(drop.always.treat) == FALSE & !drop.always.treat%in%c(0, 1)) { + if (is.logical(drop.always.treat) == FALSE & !drop.always.treat %in% c(0, 1)) { stop("\"drop.always.treat\" is not a logical flag.") - } + } - if(is.null(varname)){ - varname1 <- 'FirstTreat' - if(varname1 %in% colnames(data)){ + if (is.null(varname)) { + varname1 <- "FirstTreat" + if (varname1 %in% colnames(data)) { warnings("The variable \"FirstTreat\" will be replaced.\n") - data[,varname1] <- NULL + data[, varname1] <- NULL } - varname2 <- 'Cohort' - if(varname2 %in% colnames(data)){ + varname2 <- "Cohort" + if (varname2 %in% colnames(data)) { warnings("The variable \"Cohort\" will be replaced.\n") - data[,varname2] <- NULL + data[, varname2] <- NULL } varname3 <- "Time_to_Treatment" - if(varname3 %in% colnames(data)){ + if (varname3 %in% colnames(data)) { warnings("The variable \"Time_to_Treatment\" will be replaced.\n") - data[,varname3] <- NULL + data[, varname3] <- NULL } varname4 <- "Time_to_Exit" - if(varname4 %in% colnames(data)){ + if (varname4 %in% colnames(data)) { warnings("The variable \"Time_to_Exit\" will be replaced.\n") - data[,varname4] <- NULL + data[, varname4] <- NULL } - } - else{ - if(length(varname)!=4){ + } else { + if (length(varname) != 4) { stop("\"varname\" should have three elements.") } - if(varname[1] %in% colnames(data)){ - warnings(paste0("Variable ",varname[1]," will be replaced.\n")) - data[,varname1] <- NULL + if (varname[1] %in% colnames(data)) { + warnings(paste0("Variable ", varname[1], " will be replaced.\n")) + data[, varname1] <- NULL + } + if (varname[2] %in% colnames(data)) { + warnings(paste0("Variable ", varname[2], " will be replaced.\n")) + data[, varname2] <- NULL } - if(varname[2] %in% colnames(data)){ - warnings(paste0("Variable ",varname[2]," will be replaced.\n")) - data[,varname2] <- NULL + if (varname[3] %in% colnames(data)) { + warnings(paste0("Variable ", varname[3], " will be replaced.\n")) + data[, varname3] <- NULL } - if(varname[3] %in% colnames(data)){ - warnings(paste0("Variable ",varname[3]," will be replaced.\n")) - data[,varname3] <- NULL + if (varname[4] %in% colnames(data)) { + warnings(paste0("Variable ", varname[4], " will be replaced.\n")) + data[, varname4] <- NULL } - if(varname[4] %in% colnames(data)){ - warnings(paste0("Variable ",varname[4]," will be replaced.\n")) - data[,varname4] <- NULL - } } - if(length(index)!=2){ + if (length(index) != 2) { stop("\"index\" should have length 2.\n") } - for(sub.index in index){ - if(!sub.index%in%index){ - stop(paste0(sub.index,"is not in the dataset.\n")) + for (sub.index in index) { + if (!sub.index %in% index) { + stop(paste0(sub.index, "is not in the dataset.\n")) } } n.obs <- dim(data)[1] - + ## D and index should not have NA - for(sub.var in c(D,index)){ - if(length(which(is.na(data[,sub.var])))>0){ - stop(paste0(sub.var,"contains missing values.\n")) + for (sub.var in c(D, index)) { + if (length(which(is.na(data[, sub.var]))) > 0) { + stop(paste0(sub.var, "contains missing values.\n")) } } ## check if uniquely identified - unique_label <- unique(paste(data[,index[1]],"_",data[,index[2]],sep="")) - if (length(unique_label)!= dim(data)[1]) { + unique_label <- unique(paste(data[, index[1]], "_", data[, index[2]], sep = "")) + if (length(unique_label) != dim(data)[1]) { stop("Observations are not uniquely defined by unit and time indicators.") } @@ -99,51 +98,50 @@ get.cohort <- function(data, stop("Treatment indicator should be a numeric value.") } - if (!(1%in%data[, D] & 0%in%data[,D] & length(unique(data[,D])) == 2)) { - stop(paste("Error values in variable \"", D,"\".", sep = "")) + if (!(1 %in% data[, D] & 0 %in% data[, D] & length(unique(data[, D])) == 2)) { + stop(paste("Error values in variable \"", D, "\".", sep = "")) } ## index names index.id <- index[1] index.time <- index[2] - data.raw[,index.id] <- as.character(data.raw[,index.id]) + data.raw[, index.id] <- as.character(data.raw[, index.id]) ## raw id and time - raw.id <- sort(unique(data[,index[1]])) - raw.time <- sort(unique(data[,index[2]])) + raw.id <- sort(unique(data[, index[1]])) + raw.time <- sort(unique(data[, index[2]])) N <- length(raw.id) TT <- length(raw.time) ## entry time - if(is.null(entry.time)){ + if (is.null(entry.time)) { staggered <- 1 - } - else{ + } else { staggered <- 0 - if(!is.list(entry.time)){ + if (!is.list(entry.time)) { stop("\"entry.time\" should be a list.\n") } all.entry.time <- c() - for(sub.time in entry.time){ - if(!is.numeric(sub.time)){ + for (sub.time in entry.time) { + if (!is.numeric(sub.time)) { stop("Elements in \"entry.time\" are misspecified.\n") } - if(length(sub.time)!=2){ + if (length(sub.time) != 2) { stop("Elements in \"entry.time\" are misspecified. Each element should have length 2.\n") } - if(sub.time[1]>sub.time[2]){ + if (sub.time[1] > sub.time[2]) { stop("Elements in \"entry.time\" are misspecified. For each element, the second should not be smaller than the first.\n") } - all.entry.time <- c(all.entry.time,sub.time) + all.entry.time <- c(all.entry.time, sub.time) } - if(length(all.entry.time)>length(unique(all.entry.time))){ + if (length(all.entry.time) > length(unique(all.entry.time))) { stop("\"entry.time\" has overlapped periods.\n") } } ## sort data - data <- data[order(data[,index.id], data[,index.time]), ] + data <- data[order(data[, index.id], data[, index.time]), ] data.raw <- data id.match <- as.numeric(as.factor(raw.id)) names(id.match) <- as.character(raw.id) @@ -154,109 +152,112 @@ get.cohort <- function(data, ## check balanced panel and fill unbalanced panel Dname <- D T.on <- I <- D <- NULL - if (dim(data)[1] != TT*N) { - data[,index.id] <- as.numeric(as.factor(data[,index.id])) - data[,index.time] <- as.numeric(as.factor(data[,index.time])) - ob.indicator <- data[,index.time] + if (dim(data)[1] != TT * N) { + data[, index.id] <- as.numeric(as.factor(data[, index.id])) + data[, index.time] <- as.numeric(as.factor(data[, index.time])) + ob.indicator <- data[, index.time] id.indicator <- table(data[, index.id]) sub.start <- 1 - for (i in 1:(N - 1)) { - sub.start <- sub.start + id.indicator[i] - sub.end <- sub.start + id.indicator[i+1] - 1 + for (i in 1:(N - 1)) { + sub.start <- sub.start + id.indicator[i] + sub.end <- sub.start + id.indicator[i + 1] - 1 ob.indicator[sub.start:sub.end] <- ob.indicator[sub.start:sub.end] + i * TT } variable <- c(Dname) data_I <- matrix(0, N * TT, 1) data_I[ob.indicator, 1] <- 1 - + data_D <- matrix(NaN, N * TT, 1) - data_D[ob.indicator, 1] <- as.matrix(data[,variable]) + data_D[ob.indicator, 1] <- as.matrix(data[, variable]) colnames(data_D) <- variable I <- matrix(1, TT, N) - D <- matrix(data_D[, Dname],TT,N) + D <- matrix(data_D[, Dname], TT, N) I[is.nan(D)] <- 0 D[is.nan(D)] <- 0 - } - else { - data[,index.id] <- as.numeric(as.factor(data[,index.id])) - data[,index.time] <- as.numeric(as.factor(data[,index.time])) + } else { + data[, index.id] <- as.numeric(as.factor(data[, index.id])) + data[, index.time] <- as.numeric(as.factor(data[, index.time])) I <- matrix(1, TT, N) - D <- matrix(data[,Dname], TT, N) + D <- matrix(data[, Dname], TT, N) } T.on <- matrix(NA, TT, N) for (i in 1:N) { - T.on[, i] <- get_term(D[, i], I[, i], type = "on") + T.on[, i] <- get_term(D[, i], I[, i], type = "on") } T.off <- matrix(NA, TT, N) for (i in 1:N) { - T.off[, i] <- get_term(D[, i], I[, i], type = "off") + T.off[, i] <- get_term(D[, i], I[, i], type = "off") } - + t.on <- c(T.on) t.off <- c(T.off) - use.index <- (data[,index.id]-1)*TT + data[,index.time] - if(start0 == TRUE){ - t.on <- t.on[use.index]-1 - t.off <- t.off[use.index]-1 - } - else{ + use.index <- (data[, index.id] - 1) * TT + data[, index.time] + if (start0 == TRUE) { + t.on <- t.on[use.index] - 1 + t.off <- t.off[use.index] - 1 + } else { t.on <- t.on[use.index] - t.off <- t.off[use.index] + t.off <- t.off[use.index] } tr.pos <- which(apply(D, 2, sum) > 0) co.pos <- which(apply(D, 2, sum) == 0) tr.name <- names(id.match)[tr.pos] co.name <- names(id.match)[co.pos] - - D.cum1 <- apply(D,2,cumsum) - D.cum2 <- apply(D.cum1,2,cumsum) - T0.tr <- apply(matrix(D.cum2[,tr.pos],nrow=dim(D.cum2)[1]),2,function(vec){which(vec==1)}) - T0.tr.origin <- as.numeric(sapply(T0.tr,function(x){names(time.match)[which(time.match==x)]})) - T0.co.origin <- rep(NA,length(co.name)) + + D.cum1 <- apply(D, 2, cumsum) + D.cum2 <- apply(D.cum1, 2, cumsum) + T0.tr <- apply(matrix(D.cum2[, tr.pos], nrow = dim(D.cum2)[1]), 2, function(vec) { + which(vec == 1) + }) + T0.tr.origin <- as.numeric(sapply(T0.tr, function(x) { + names(time.match)[which(time.match == x)] + })) + T0.co.origin <- rep(NA, length(co.name)) - first.treat <- cbind.data.frame(index.id = c(tr.name,co.name), - FirstTreat = c(T0.tr.origin,T0.co.origin)) + first.treat <- cbind.data.frame( + index.id = c(tr.name, co.name), + FirstTreat = c(T0.tr.origin, T0.co.origin) + ) - first.treat[,varname1] <- first.treat[,"FirstTreat"] - if(varname1!='FirstTreat'){ - first.treat[,"FirstTreat"] <- NULL + first.treat[, varname1] <- first.treat[, "FirstTreat"] + if (varname1 != "FirstTreat") { + first.treat[, "FirstTreat"] <- NULL } - data.raw <- merge(data.raw,first.treat,by.x = index.id, by.y = "index.id") - data.raw[,varname2] <- 'Control' - data.raw[,varname3] <- t.on - if(sum(!is.na(t.off))>0){ - data.raw[,varname4] <- t.off + data.raw <- merge(data.raw, first.treat, by.x = index.id, by.y = "index.id") + data.raw[, varname2] <- "Control" + data.raw[, varname3] <- t.on + if (sum(!is.na(t.off)) > 0) { + data.raw[, varname4] <- t.off } - if(staggered==1){ - all.first.treat <- sort(unique(data.raw[,varname1])) - for(sub.first in all.first.treat){ - cohort.name <- paste0("Cohort:",sub.first) - data.raw[which(data.raw[,varname1] == sub.first),varname2] <- cohort.name + if (staggered == 1) { + all.first.treat <- sort(unique(data.raw[, varname1])) + for (sub.first in all.first.treat) { + cohort.name <- paste0("Cohort:", sub.first) + data.raw[which(data.raw[, varname1] == sub.first), varname2] <- cohort.name } - } - else{ - for(sub.time in entry.time){ - cohort.name <- paste0("Cohort:",sub.time[1],'-',sub.time[2]) - data.raw[intersect(which(data.raw[,varname1]>=sub.time[1]),which(data.raw[,varname1]<=sub.time[2])),varname2] <- cohort.name + } else { + for (sub.time in entry.time) { + cohort.name <- paste0("Cohort:", sub.time[1], "-", sub.time[2]) + data.raw[intersect(which(data.raw[, varname1] >= sub.time[1]), which(data.raw[, varname1] <= sub.time[2])), varname2] <- cohort.name } cohort.name <- "Cohort:Other" - data.raw[which(!is.na(data.raw[,varname1]) & data.raw[,varname2] == 'Control'),varname2] <- cohort.name + data.raw[which(!is.na(data.raw[, varname1]) & data.raw[, varname2] == "Control"), varname2] <- cohort.name } - if(drop.always.treat){ + if (drop.always.treat) { size.0 <- dim(data.raw)[1] unit.index <- index[1] - data.raw <- as.data.frame(data.raw %>% group_by(get(unit.index)) %>% mutate(treatment_mean = mean(get(Dname),na.rm = TRUE))) - data.raw <- data.raw[which(data.raw$treatment_mean<1),] + data.raw <- as.data.frame(data.raw %>% group_by(get(unit.index)) %>% mutate(treatment_mean = mean(get(Dname), na.rm = TRUE))) + data.raw <- data.raw[which(data.raw$treatment_mean < 1), ] size.1 <- dim(data.raw)[1] diff.1 <- size.0 - size.1 - message(paste0("Number of Always Treated Observations Removed:",diff.1,".\n")) - data.raw[,'treatment_mean'] <- NULL + message(paste0("Number of Always Treated Observations Removed:", diff.1, ".\n")) + data.raw[, "treatment_mean"] <- NULL } return(data.raw) @@ -264,21 +265,21 @@ get.cohort <- function(data, get_term <- function(d, - ii, + ii, type = "on") { dd <- d iii <- ii first.pos <- min(which(iii == 1)) if (first.pos != 1) { - dd <- dd[-(1:(first.pos-1))] - iii <- iii[-(1:(first.pos-1))] - } - T <- length(dd) + dd <- dd[-(1:(first.pos - 1))] + iii <- iii[-(1:(first.pos - 1))] + } + T <- length(dd) if (0 %in% iii) { if (T > 1) { - for (i in 1:(T-1)) { - if (iii[i+1] == 0) { - dd[i+1] <- dd[i] + for (i in 1:(T - 1)) { + if (iii[i + 1] == 0) { + dd[i + 1] <- dd[i] } } } @@ -287,55 +288,47 @@ get_term <- function(d, if (type == "off") { dd <- abs(dd - 1) } - d1 <- dd[1:(T-1)] + d1 <- dd[1:(T - 1)] d2 <- dd[2:T] - - if(T==1){ + + if (T == 1) { term <- rep(NA, 1) - } - else if (sum(d1 == d2) == (T-1)) { + } else if (sum(d1 == d2) == (T - 1)) { term <- rep(NA, T) - } - else { + } else { change.pos <- which(d1 != d2) + 1 change.length <- length(change.pos) - term <- NULL - if (dd[1] == 0) { + term <- NULL + if (dd[1] == 0) { for (i in 1:(change.length)) { if (i == 1) { part.term <- (2 - change.pos[i]):0 - } - else { + } else { if (i %% 2 == 0) { - part.term <- 1:(change.pos[i] - change.pos[i-1]) - } - else { - part.term <- (change.pos[i-1] - change.pos[i] + 1):0 + part.term <- 1:(change.pos[i] - change.pos[i - 1]) + } else { + part.term <- (change.pos[i - 1] - change.pos[i] + 1):0 } } term <- c(term, part.term) } - } - else if (dd[1] == 1) { + } else if (dd[1] == 1) { for (i in 1:(change.length)) { if (i == 1) { part.term <- rep(NA, change.pos[i] - 1) - } - else { + } else { if (i %% 2 == 0) { - part.term <- (change.pos[i-1] - change.pos[i] + 1):0 - } - else { - part.term <- 1:(change.pos[i] - change.pos[i-1]) + part.term <- (change.pos[i - 1] - change.pos[i] + 1):0 + } else { + part.term <- 1:(change.pos[i] - change.pos[i - 1]) } } term <- c(term, part.term) } - } + } if (dd[change.pos[change.length]] == 0) { term <- c(term, rep(NA, (T - change.pos[change.length] + 1))) - } - else { + } else { term <- c(term, 1:(T - change.pos[change.length] + 1)) } } diff --git a/data/gs2020.rda b/data/gs2020.rda new file mode 100644 index 0000000..fd1ad69 Binary files /dev/null and b/data/gs2020.rda differ diff --git a/data/hh2019.rda b/data/hh2019.rda new file mode 100644 index 0000000..42a3c63 Binary files /dev/null and b/data/hh2019.rda differ diff --git a/data/simdata.rda b/data/simdata.rda new file mode 100644 index 0000000..b7e0b61 Binary files /dev/null and b/data/simdata.rda differ diff --git a/data/simgsynth.rda b/data/simgsynth.rda new file mode 100644 index 0000000..16db565 Binary files /dev/null and b/data/simgsynth.rda differ diff --git a/data/turnout.rda b/data/turnout.rda new file mode 100644 index 0000000..8745d5a Binary files /dev/null and b/data/turnout.rda differ diff --git a/man/att.cumu.rd b/man/att.cumu.rd deleted file mode 100644 index a30d4c1..0000000 --- a/man/att.cumu.rd +++ /dev/null @@ -1,41 +0,0 @@ -\encoding{UTF-8} -\name{att.cumu} -\alias{att.cumu} -\title{Calculate Cumulative Treatment Effects} -\description{ - Calculate cumulative treatment effects based on the results of a \code{\link{fect}} object. -} -\usage{ -att.cumu(x, period = NULL, weighted = TRUE, alpha = 0.05, type = "on", plot = FALSE) -} -\arguments{ - \item{x}{A \code{\link{fect}} object.} - \item{period}{A two-element numeric vector specifying the range of terms during which treatment effects are to be accumulated, e.g., \code{period = c(-1, 1)}.} - \item{weighted}{A logical flag specifying whether to calculate weighted cumulative treatment effects based on counts at each period. Default is \code{TRUE}.} - \item{alpha}{A numerical value specifying the significance level. Default is \code{0.05}.} - \item{type}{A string that specifies the type of effect to calculate. Must be one of the following: \code{"on"} (switch-on treatment effect) or \code{"off"} (switch-off treatment effect). Default is \code{"on"}.} - \item{plot}{A logical flag indicating whether to plot cumulative effects. Default is \code{FALSE}.} -} -\author{ - Licheng Liu, Ye Wang, and Yiqing Xu -} -\references{ - Athey, S., Bayati, M., Doudchenko, N., Imbens, G., and Khosravi, K. (2021). - Matrix completion methods for causal panel data models. - \emph{Journal of the American Statistical Association}, 116(536), 1716-1730. - - Bai, J. (2009). - Panel data models with interactive fixed effects. - \emph{Econometrica}, 77(4), 1229-1279. - - Liu, L., Wang, Y., and Xu, Y. (2022). - A Practical Guide to Counterfactual Estimators for Causal Inference with Time-Series Cross-Sectional Data. - \emph{American Journal of Political Science}, 68(1), 160-176. - - Xu, Y. (2017). - Generalized Synthetic Control Method: Causal Inference with Interactive Fixed Effects Models. - \emph{Political Analysis}, 25(1), 57-76. -} -\seealso{ - \code{\link{fect}}, \code{\link{plot.fect}} -} \ No newline at end of file diff --git a/man/did_wrapper.Rd b/man/did_wrapper.Rd index edf2181..e40fac6 100644 --- a/man/did_wrapper.Rd +++ b/man/did_wrapper.Rd @@ -11,8 +11,8 @@ did_wrapper( D, X = NULL, index, - method = c("twfe", "st", "iw", "cs_never", "cs_notyet", "pm", "didm"), - se = c("default", "boot", "jackknife"), + method = c("twfe", "st", "iw", "cs_never", "cs_notyet", "didm"), + se = c("default", "boot", "bootstrap", "jackknife"), nboots = 200, parallel = TRUE, core = NULL, @@ -30,7 +30,7 @@ did_wrapper( \item{X}{Optional covariate vector for adjustment.} \item{index}{Character vector of unit and time variable names, e.g., \code{c("id", "time")}.} \item{method}{DiD method: \code{"twfe"}, \code{"st"}, \code{"iw"}, \code{"cs_never"}, \code{"cs_notyet"}, or \code{"didm"}.} - \item{se}{Standard error method: \code{"default"}, \code{"boot"}, or \code{"jackknife"}.} + \item{se}{Standard error method: \code{"default"}, \code{"boot"}, \code{"bootstrap"}, or \code{"jackknife"}.} \item{nboots}{Number of bootstrap replications (if applicable).} \item{parallel}{Logical; use parallel computation for bootstrapping.} \item{core}{Number of CPU cores to use if \code{parallel = TRUE}.} diff --git a/man/effect.rd b/man/effect.rd index 2c1ee63..52df4a8 100644 --- a/man/effect.rd +++ b/man/effect.rd @@ -6,7 +6,7 @@ } \usage{ -effect(x, cumu = TRUE, id = NULL, period = NULL) +effect(x, cumu = TRUE, id = NULL, period = NULL, plot = FALSE, count = TRUE, xlab = NULL, ylab = NULL, main = NULL) } \arguments{ @@ -19,6 +19,14 @@ effect(x, cumu = TRUE, id = NULL, period = NULL) \item{period}{Numeric vector of length 2 specifying the time window \code{c(start, end)} for effect calculation. If \code{NULL}, uses the maximum possible window based on the data.} \item{plot}{Logical. If \code{TRUE}, creates a visualization of the cumulative treatment effects with confidence intervals and a bar chart showing the number of treated units at each time point. Default is \code{FALSE}.} + + \item{count}{Logical. If \code{TRUE}, shows the count bars in the plot.} + + \item{xlab}{Character. X-axis label for the plot.} + + \item{ylab}{Character. Y-axis label for the plot.} + + \item{main}{Character. Main title for the plot.} } \details{ diff --git a/man/esplot.Rd b/man/esplot.Rd index 44fddea..733666e 100644 --- a/man/esplot.Rd +++ b/man/esplot.Rd @@ -18,7 +18,7 @@ esplot(data, Period = NULL, Estimate = "ATT", SE = NULL, highlight.colors = NULL, lcolor = NULL, lwidth = NULL, ltype = c("solid", "solid"), connected = FALSE, ci.outline = FALSE, main = NULL, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, - gridOff = TRUE, stats.pos = NULL, theme.bw = TRUE, + gridOff = FALSE, stats.pos = NULL, theme.bw = TRUE, cex.main = NULL, cex.axis = NULL, cex.lab = NULL, cex.text = NULL, axis.adjust = FALSE, color = "#000000", count.color = "gray70", count.alpha = 0.4, diff --git a/man/fect_sens.Rd b/man/fect_sens.Rd index 048c137..b903b05 100644 --- a/man/fect_sens.Rd +++ b/man/fect_sens.Rd @@ -13,7 +13,8 @@ fect_sens( Mvec = seq(0, 0.25, 0.05), periodMbarvec = c(0, 0.5), periodMvec = c(0, 0.1), - parallel = TRUE + parallel = FALSE, + cores = NULL ) } \arguments{ @@ -24,7 +25,8 @@ fect_sens( \item{Mvec}{Values of \code{M} for overall smoothness-based sensitivity analysis.} \item{periodMbarvec}{Values of \code{Mbar} for period-specific RM sensitivity analysis.} \item{periodMvec}{Values of \code{M} for period-specific smoothness sensitivity analysis.} - \item{parallel}{Logical; if \code{TRUE} (default), uses parallel computation where supported.} + \item{parallel}{Logical; if \code{TRUE}, uses parallel computation where supported. Default is \code{FALSE}.} + \item{cores}{Optional integer; number of workers when \code{parallel = TRUE}.} } \details{ This function: diff --git a/man/gs2020.Rd b/man/gs2020.Rd new file mode 100644 index 0000000..0c3613f --- /dev/null +++ b/man/gs2020.Rd @@ -0,0 +1,7 @@ +\name{gs2020} +\alias{gs2020} +\docType{data} +\title{Simulated Gsynth-like Panel Data (No Reversal)} +\description{A simulated dataset for demonstration and testing.} +\format{data.frame} +\keyword{datasets} diff --git a/man/hh2019.Rd b/man/hh2019.Rd new file mode 100644 index 0000000..75e324d --- /dev/null +++ b/man/hh2019.Rd @@ -0,0 +1,7 @@ +\name{hh2019} +\alias{hh2019} +\docType{data} +\title{Simulated Panel Data Example} +\description{A simulated dataset used in package vignettes and examples.} +\format{data.frame} +\keyword{datasets} diff --git a/src/RcppExports.o b/src/RcppExports.o index 4253bdc..30a0688 100644 Binary files a/src/RcppExports.o and b/src/RcppExports.o differ diff --git a/src/auxiliary.o b/src/auxiliary.o index 5680634..6c71b98 100644 Binary files a/src/auxiliary.o and b/src/auxiliary.o differ diff --git a/src/binary_qr.o b/src/binary_qr.o index 4f49055..a52dcb7 100644 Binary files a/src/binary_qr.o and b/src/binary_qr.o differ diff --git a/src/binary_sub.o b/src/binary_sub.o index 6e95a54..f797489 100644 Binary files a/src/binary_sub.o and b/src/binary_sub.o differ diff --git a/src/binary_svd.o b/src/binary_svd.o index 5082b1c..5f66324 100644 Binary files a/src/binary_svd.o and b/src/binary_svd.o differ diff --git a/src/fe_sub.o b/src/fe_sub.o index c41f03d..e643a1d 100644 Binary files a/src/fe_sub.o and b/src/fe_sub.o differ diff --git a/src/fect.so b/src/fect.so index 929e453..9898acb 100755 Binary files a/src/fect.so and b/src/fect.so differ diff --git a/src/ife.o b/src/ife.o index dc01516..20a8bbe 100644 Binary files a/src/ife.o and b/src/ife.o differ diff --git a/src/ife_sub.o b/src/ife_sub.o index 5ce3b8e..8bf9242 100644 Binary files a/src/ife_sub.o and b/src/ife_sub.o differ diff --git a/src/mc.o b/src/mc.o index 0b1d10a..2813d10 100644 Binary files a/src/mc.o and b/src/mc.o differ diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..642f6e5 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,6 @@ +library(testthat) +library(fect) + +test_check("fect") + + diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R new file mode 100644 index 0000000..6703362 --- /dev/null +++ b/tests/testthat/helper-data.R @@ -0,0 +1,19 @@ +## Ensure datasets are available in tests across installed/build contexts +if (!exists("simdata", inherits = TRUE)) { + suppressWarnings(try(utils::data("simdata", package = "fect"), silent = TRUE)) +} +if (!exists("simdata", inherits = TRUE)) { + f <- system.file("data", "fect.RData", package = "fect") + if (nzchar(f) && file.exists(f)) { + try(load(f, envir = environment()), silent = TRUE) + } +} +if (!exists("simdata", inherits = TRUE)) { + f <- file.path("data", "fect.RData") + if (file.exists(f)) { + try(load(f, envir = environment()), silent = TRUE) + } +} +if (!exists("simgsynth", inherits = TRUE)) { + suppressWarnings(try(utils::data("simgsynth", package = "fect"), silent = TRUE)) +} diff --git a/tests/testthat/test-cumu-effect-esplot.R b/tests/testthat/test-cumu-effect-esplot.R new file mode 100644 index 0000000..1249b32 --- /dev/null +++ b/tests/testthat/test-cumu-effect-esplot.R @@ -0,0 +1,33 @@ +test_that("cumu/att.cumu/esplot run without error on fect output", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = FALSE, + parallel = FALSE + ) + # cumu (att.cumu) + # att.cumu requires uncertainty objects from keep.sims=TRUE + out_keep <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = TRUE, + nboots = 20, + keep.sims = TRUE, + parallel = FALSE + ) + c <- att.cumu(out_keep, period = c(1, 3), plot = FALSE) + expect_true(is.matrix(c) || is.data.frame(c)) + # esplot + p <- esplot(out_keep$est.att, Estimate = "ATT") + expect_s3_class(p, "ggplot") +}) diff --git a/tests/testthat/test-did-wrapper.R b/tests/testthat/test-did-wrapper.R new file mode 100644 index 0000000..5f4e9cb --- /dev/null +++ b/tests/testthat/test-did-wrapper.R @@ -0,0 +1,25 @@ +test_that("did_wrapper twfe runs and returns structure", { + skip_if_not_installed("fixest") + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + + # create a binary ever-treated indicator for TWFE event study helper + sim <- simdata + sim$treat <- ave(sim$D, sim$id, FUN = function(z) as.numeric(mean(z, na.rm = TRUE) > 0)) + + res <- did_wrapper( + data = sim, + Y = "Y", + D = "D", + X = c("X1", "X2"), + index = c("id", "time"), + method = "twfe", + se = "default", + parallel = FALSE + ) + + expect_s3_class(res, "did_wrapper") + expect_true(is.data.frame(res$est.avg)) + expect_true(is.data.frame(res$est.att)) + expect_true(all(colnames(res$est.avg) %in% c("ATT.avg", "S.E.", "CI.lower", "CI.upper", "p.value"))) +}) diff --git a/tests/testthat/test-fect-basic.R b/tests/testthat/test-fect-basic.R new file mode 100644 index 0000000..b2bf5ff --- /dev/null +++ b/tests/testthat/test-fect-basic.R @@ -0,0 +1,43 @@ +test_that("fect formula basic runs on simdata and returns expected slots", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + + set.seed(1) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 2, + CV = FALSE, + se = FALSE, + parallel = FALSE + ) + + expect_s3_class(out, "fect") + expect_true(is.matrix(out$eff)) + expect_true(is.numeric(out$att.avg)) + expect_true(length(out$time) >= 1) +}) + +test_that("fect returns est.att and att.vcov when se=TRUE", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + + set.seed(2) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = TRUE, + nboots = 30, + parallel = FALSE + ) + + expect_true(!is.null(out$est.att)) + expect_true(!is.null(out$att.vcov)) + expect_true(all(colnames(out$est.att) %in% c("ATT", "S.E.", "CI.lower", "CI.upper", "p.value", "count"))) +}) diff --git a/tests/testthat/test-fect-sens.R b/tests/testthat/test-fect-sens.R new file mode 100644 index 0000000..08a0938 --- /dev/null +++ b/tests/testthat/test-fect-sens.R @@ -0,0 +1,31 @@ +test_that("fect_sens attaches sensitivity results when inputs present", { + skip_if_not_installed("HonestDiDFEct") + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + + set.seed(3) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = TRUE, + nboots = 20, + parallel = FALSE + ) + + # ensure names and shapes needed by fect_sens exist + expect_true(!is.null(out$est.att)) + expect_true(!is.null(out$att.vcov)) + + # Ensure placebo.period exists to avoid length-0 error inside fect_sens + if (is.null(out$placebo.period)) { + out$placebo.period <- c(-3, -1) + } + # restrict periods so numPre+numPost matches available row count + out$placebo.period <- c(-3, -1) + out2 <- fect_sens(out, post.periods = 1:10, parallel = FALSE) + expect_true(!is.null(out2$sensitivity.rm) || !is.null(out2$sensitivity.smooth)) +}) diff --git a/tests/testthat/test-getcohort.R b/tests/testthat/test-getcohort.R new file mode 100644 index 0000000..813f649 --- /dev/null +++ b/tests/testthat/test-getcohort.R @@ -0,0 +1,16 @@ +test_that("get.cohort adds cohort columns and handles options", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + df <- simdata + + out <- get.cohort( + data = df, + D = "D", + index = c("id", "time"), + start0 = TRUE, + drop.always.treat = TRUE + ) + + expect_true(all(c("Cohort", "Time_to_Treatment") %in% names(out))) + expect_true(all(out$Cohort %in% c("Control", unique(out$Cohort)))) +}) diff --git a/tests/testthat/test-plot-fect.R b/tests/testthat/test-plot-fect.R new file mode 100644 index 0000000..ea55ba1 --- /dev/null +++ b/tests/testthat/test-plot-fect.R @@ -0,0 +1,18 @@ +test_that("plot.fect executes for gap plot without error", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + set.seed(4) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = FALSE, + parallel = FALSE + ) + # Just ensure the plotting call produces a ggplot object + p <- plot.fect(out, type = "gap", show.count = FALSE) + expect_s3_class(p, "ggplot") +})