diff --git a/.Rbuildignore b/.Rbuildignore index 73b1e48..442a285 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,11 +1,15 @@ -^.*\.Rproj$ -^\.Rproj\.user$ +^src/.*\.(o|so|dll)$ +^\.github$ +^pkgdown/.*$ +^.*\.Rproj$ +^\.Rproj\.user$ ^README\.Rmd$ ^README\.md$ ^NEWS\.md$ -^_pkgdown\.yml$ +^NEWS\.html$ +^_pkgdown\.yml$ ^LICENSE\.md$ ^\.github$ -^docs$ -^vignettes$ +^docs$ +^vignettes$ ^pkgdown$ \ No newline at end of file diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 0000000..ed6859c --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,84 @@ +name: R-CMD-check + +on: + pull_request: + branches: [master] + workflow_dispatch: + +jobs: + R-CMD-check: + name: ${{ matrix.config.os }} (R ${{ matrix.config.r }}) + runs-on: ${{ matrix.config.os }} + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, r: "release" } + - { os: macos-latest, r: "release" } + - { os: windows-latest, r: "release" } + + env: + R_KEEP_PKG_SOURCE: yes + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + RGL_USE_NULL: true + + steps: + - uses: actions/checkout@v4 + - name: Clean precompiled objects + shell: bash + run: | + rm -f src/*.o src/*.so src/*.dll || true + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::rcmdcheck + any::devtools + lzy318/HonestDiDFEct + local::. + needs: check + + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + + - name: Session info + shell: Rscript {0} + run: | + sessionInfo() + + - name: Check (CRAN-like) + shell: Rscript {0} + run: | + rcmdcheck::rcmdcheck(args = c('--no-manual','--as-cran'), error_on = 'note', check_dir = 'check') + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-check + path: check + + aggregate: + name: R-CMD-check + runs-on: ubuntu-latest + needs: R-CMD-check + if: ${{ always() }} + steps: + - name: Aggregate matrix result + run: | + if [ "${{ needs.R-CMD-check.result }}" != "success" ]; then + echo "One or more matrix jobs failed" >&2 + exit 1 + fi + echo "All matrix jobs passed" diff --git a/.github/workflows/release-to-cran-and-docs.yaml b/.github/workflows/release-to-cran-and-docs.yaml new file mode 100644 index 0000000..0f104fd --- /dev/null +++ b/.github/workflows/release-to-cran-and-docs.yaml @@ -0,0 +1,97 @@ +name: release-to-cran-and-docs + +on: + push: + tags: + - "v*.*.*" + workflow_dispatch: + inputs: + submit_to_cran: + description: "Submit to CRAN (requires secrets.GH_PAT and CRAN_EMAIL set)" + required: true + default: "false" + tag: + description: "Git tag to create (e.g., v2.0.3)" + required: true + +jobs: + tag-release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Clean precompiled objects + shell: bash + run: | + rm -f src/*.o src/*.so src/*.dll || true + - name: Create tag + run: | + git config user.name github-actions + git config user.email github-actions@github.com + git tag ${{ github.event.inputs.tag }} + git push origin ${{ github.event.inputs.tag }} + + submit-cran: + if: ${{ github.event.inputs.submit_to_cran == 'true' }} + needs: tag-release + runs-on: ubuntu-latest + env: + R_KEEP_PKG_SOURCE: yes + _R_CHECK_CRAN_INCOMING_REMOTE_: false + RGL_USE_NULL: true + GITHUB_PAT: ${{ secrets.GH_PAT }} + CRAN_EMAIL: ${{ secrets.CRAN_EMAIL }} + steps: + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 + with: + r-version: "release" + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::devtools + any::rcmdcheck + any::pkgdown + lzy318/HonestDiDFEct + local::. + needs: check + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + - name: CRAN checks (as-cran) + shell: Rscript {0} + run: | + rcmdcheck::rcmdcheck(args=c('--no-manual','--as-cran'), error_on='note') + - name: Submit to CRAN (manual email) + shell: bash + run: | + Rscript -e "devtools::build('.')" + echo "Please email the built tar.gz to CRAN maintainers from $CRAN_EMAIL or use devtools::release() interactively." + + pkgdown: + needs: tag-release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, lzy318/HonestDiDFEct, local::. + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + - name: Build site + shell: Rscript {0} + run: | + pkgdown::build_site(preview = FALSE) + - name: Deploy to gh-pages + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs diff --git a/.github/workflows/test-and-check.yaml b/.github/workflows/test-and-check.yaml new file mode 100644 index 0000000..1d560c7 --- /dev/null +++ b/.github/workflows/test-and-check.yaml @@ -0,0 +1,83 @@ +name: test-and-check + +on: + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-latest + env: + R_KEEP_PKG_SOURCE: yes + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + RGL_USE_NULL: true + steps: + - uses: actions/checkout@v4 + - name: Clean precompiled objects + shell: bash + run: | + rm -f src/*.o src/*.so src/*.dll || true + - uses: r-lib/actions/setup-r@v2 + with: + r-version: "release" + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::devtools + any::testthat + lzy318/HonestDiDFEct + local::. + needs: tests + cache-version: 2 + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + - name: Run tests + shell: Rscript {0} + run: | + devtools::test() + + cran-check: + needs: test + runs-on: ubuntu-latest + env: + R_KEEP_PKG_SOURCE: yes + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + RGL_USE_NULL: true + steps: + - uses: actions/checkout@v4 + - name: Clean precompiled objects + shell: bash + run: | + rm -f src/*.o src/*.so src/*.dll || true + - uses: r-lib/actions/setup-r@v2 + with: + r-version: "release" + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + any::rcmdcheck + lzy318/HonestDiDFEct + local::. + needs: check + cache-version: 2 + - name: Install HonestDiDFEct via devtools + shell: Rscript {0} + run: | + if (!requireNamespace('remotes', quietly=TRUE)) install.packages('remotes', repos='https://cloud.r-project.org') + if (!requireNamespace('HonestDiDFEct', quietly=TRUE)) remotes::install_github('lzy318/HonestDiDFEct', upgrade='never') + - name: CRAN incoming checks + shell: Rscript {0} + run: | + rcmdcheck::rcmdcheck(args = c('--no-manual','--as-cran'), error_on='note', check_dir='check') + - name: Upload check results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: cran-check-results + path: check diff --git a/.gitignore b/.gitignore index d962651..5755cca 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,6 @@ man/.DS_Store .DS_Store .DS_Store .RData + +check/ +check/* \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index d25597a..f0b81ee 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,17 +1,18 @@ Package: fect Type: Package Title: Fixed Effects Counterfactual Estimators -Version: 2.0.3 -Date: 2025-06-20 +Version: 2.0.4 +Date: 2025-08-23 Authors@R: c(person("Licheng", "Liu", , "lichengl@stanford.edu", role = c("aut")), - person("Ziyi", "Liu", , "zyliu2023@berkeley.edu", role = c("aut", "cre")), + person("Ziyi", "Liu", , "zyliu2023@berkeley.edu", role = c("aut")), person("Ye", "Wang", , "yezhehuzhi@gmail.com", role = c("aut")), - person("Yiqing", "Xu", , "yiqingxu@stanford.edu", role = c("aut")), + person("Yiqing", "Xu", , "yiqingxu@stanford.edu", role = c("aut", "cre")), + person("Tianzhu", "Qin", , "tianzhu@stanford.edu", role = c("aut")), person("Shiyun", "Hu", , "hushiyun@pku.edu.cn", role = c("aut")), person("Rivka", "Lipkovitz", , "rivkal@mit.edu", role = c("aut"))) -Maintainer: Ziyi Liu -Description: Provides tools for estimating causal effects in panel data using counterfactual methods, as well as other modern DID estimators. It is designed for causal panel analysis with binary treatments under the parallel trends assumption. The package supports scenarios where treatments can switch on and off and allows for limited carryover effects. It includes several imputation estimators, such as Gsynth (Xu 2017), linear factor models, and the matrix completion method. Detailed methodology is described in Liu, Wang, and Xu (2024) and Chiu et al (2025). +Maintainer: Yiqing Xu +Description: Provides tools for estimating causal effects in panel data using counterfactual methods, as well as other modern DID estimators. It is designed for causal panel analysis with binary treatments under the parallel trends assumption. The package supports scenarios where treatments can switch on and off and allows for limited carryover effects. It includes several imputation estimators, such as Gsynth (Xu 2017), linear factor models, and the matrix completion method. Detailed methodology is described in Liu, Wang, and Xu (2024) and Chiu et al. (2025) . URL: https://yiqingxu.org/packages/fect/ NeedsCompilation: yes License: MIT + file LICENSE @@ -26,22 +27,24 @@ Imports: gridExtra, grid, fixest, - doRNG, - ggplot2, + doRNG, future, mvtnorm, - dplyr + dplyr, + future.apply, + reshape2, + rlang, + scales Suggests: - panelView -SystemRequirements: A C++11 compiler + panelView, + testthat (>= 3.0.0), + did, + DIDmultiplegtDYN, + HonestDiDFEct, + ggrepel Depends: R (>= 3.5.0) LinkingTo: Rcpp, RcppArmadillo RoxygenNote: 7.3.2 Packaged: 2024-01-26 03:25:56 UTC; ziyil Encoding: UTF-8 -Author: Shiyun Hu [aut], - Rivka Lipkovitz [aut], - Licheng Liu [aut], - Ziyi Liu [aut, cre], - Ye Wang [aut], - Yiqing Xu [aut] () +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 0e0fadf..f6b5c01 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,7 @@ useDynLib(fect, .registration=TRUE) importFrom(Rcpp, evalCpp) importFrom("stats", "na.omit", "quantile", "sd", "var", "cov", "pchisq", "lm", "as.formula", "median", "pnorm", "predict", "qnorm", "reshape", -"dnorm", "pf", "rbinom", "loess", "aggregate") +"dnorm", "pf", "rbinom", "loess", "aggregate", "pt", "qt", "setNames", "time") importFrom("foreach","foreach","%dopar%") importFrom("doParallel","registerDoParallel") importFrom("parallel", "detectCores", "stopCluster", "makeCluster") @@ -26,10 +26,16 @@ importFrom("abind", "abind") importFrom("mvtnorm", "rmvnorm") importFrom("GGally", "ggpairs") importFrom("graphics", "plot") +importFrom("graphics", "hist") importFrom("gridExtra", "grid.arrange", "arrangeGrob") importFrom("grid", "textGrob", "gpar") +importFrom("grDevices", "adjustcolor") importFrom("fixest", "feols") importFrom(dplyr, "%>%", "group_by", "mutate") +importFrom("future.apply", "future_lapply") +importFrom("reshape2", "melt") +importFrom("rlang", "sym", ".data") +importFrom("scales", "pretty_breaks") export(esplot) export(get.cohort) diff --git a/NEWS.html b/NEWS.html new file mode 100644 index 0000000..3ee1bac --- /dev/null +++ b/NEWS.html @@ -0,0 +1,437 @@ + + + + + + + + + + + + + +NEWS + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

fect 2.0.4

+
    +
  • Add new plot type = "hte"
  • +
+
+
+

fect 2.0.0

+
    +
  • New syntax
  • +
  • Merged in gsynth
  • +
+
+
+

fect 1.0.0

+
    +
  • First CRAN version
  • +
  • Fixed bugs
  • +
+
+
+

fect 0.6.5

+
    +
  • Replace fastplm with fixest for fixed effects estimation
  • +
  • Added plots for heterogeneous treatment effects
  • +
  • Fixed bugs
  • +
+
+
+

fect 0.4.1

+
    +
  • Added a NEWS.md file to track changes to the +package.
  • +
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/NEWS.md b/NEWS.md index b0d3b9e..6801b2a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# fect 2.0.4 + +* Add new plot `type = "hte"` + +# fect 2.0.0 + +* New syntax +* Merged in **gsynth** + # fect 1.0.0 * First CRAN version diff --git a/R/boot.R b/R/boot.R index 9fa7f9b..dc06f67 100644 --- a/R/boot.R +++ b/R/boot.R @@ -26,7 +26,7 @@ basic_ci_alpha <- function(theta, boots, alpha) { ci_mat } -fect.boot <- function(Y, +fect_boot <- function(Y, X, D, W, @@ -45,26 +45,26 @@ fect.boot <- function(Y, ind.matrix = NULL, knots = NULL, criterion = "mspe", - CV, - k = 5, + CV = 0, + k = 10, cv.prop = 0.1, - cv.treat = 0, - cv.nobs = 1, + cv.treat = TRUE, + cv.nobs = 3, r = 0, - r.end, + r.end = 3, lambda = NULL, nlambda = 10, alpha = 0.05, - binary, - QR, - force, + binary = 0, + QR = 0, + force = 0, hasRevs = 1, - tol, + tol = 1e-3, max.iteration = 1000, - norm.para, + norm.para = NULL, placebo.period = NULL, - placeboTest = FALSE, - carryoverTest = FALSE, + placeboTest = 0, + carryoverTest = 0, carryover.period = NULL, vartype = "bootstrap", quantile.CI = FALSE, @@ -73,7 +73,7 @@ fect.boot <- function(Y, cores = NULL, group.level = NULL, group = NULL, - dis = TRUE, + dis = 0, keep.sims = FALSE) { na.pos <- NULL TT <- dim(Y)[1] @@ -121,7 +121,7 @@ fect.boot <- function(Y, ## estimation if (CV == 0) { if (method == "gsynth") { - out <- fect.gsynth( + out <- fect_gsynth( Y = Y, X = X, D = D, W = W, I = I, II = II, T.on = T.on, T.off = T.off, CV = 0, T.on.balance = T.on.balance, @@ -137,7 +137,7 @@ fect.boot <- function(Y, group.level = group.level, group = group ) } else if (method == "ife") { - out <- fect.fe( + out <- fect_fe( Y = Y, X = X, D = D, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, T.on.balance = T.on.balance, @@ -153,7 +153,7 @@ fect.boot <- function(Y, group.level = group.level, group = group ) } else if (method == "mc") { - out <- try(fect.mc( + out <- try(fect_mc( Y = Y, X = X, D = D, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, T.on.balance = T.on.balance, @@ -171,7 +171,7 @@ fect.boot <- function(Y, stop("\nCannot estimate using full data with MC algorithm.\n") } } else if (method %in% c("polynomial", "bspline", "cfe")) { - out <- try(fect.polynomial( + out <- try(fect_polynomial( Y = Y, D = D, X = X, W = W, I = I, II = II, T.on = T.on, T.on.carry = T.on.carry, T.on.balance = T.on.balance, @@ -200,7 +200,7 @@ fect.boot <- function(Y, } else { ## cross-valiadtion if (binary == 0) { - out <- fect.cv( + out <- fect_cv( Y = Y, X = X, D = D, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, T.on.balance = T.on.balance, @@ -217,7 +217,7 @@ fect.boot <- function(Y, # method <- out$method } else { - out <- fect.binary.cv( + out <- fect_binary_cv( Y = Y, X = X, D = D, I = I, II = II, T.on = T.on, T.off = T.off, @@ -449,7 +449,7 @@ fect.boot <- function(Y, carryover.period.boot <- carryover.period } - boot <- try(fect.fe( + boot <- try(fect_fe( Y = Y.boot, X = X, D = D, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, @@ -544,12 +544,12 @@ fect.boot <- function(Y, } ## output - # synth.out <- try(fect.gsynth(Y = Y.pseudo, X = X.pseudo, D = D.pseudo, + # synth.out <- try(fect_gsynth(Y = Y.pseudo, X = X.pseudo, D = D.pseudo, # I = I.id.pseudo, II = II.id.pseudo, # force = force, r = out$r.cv, CV = 0, # tol = tol, norm.para = norm.para, boot = 1), silent = TRUE) - synth.out <- try(fect.gsynth( + synth.out <- try(fect_gsynth( Y = Y.pseudo, X = X.pseudo, D = D.pseudo, W = NULL, I = I.id.pseudo, II = II.id.pseudo, T.on = T.on.pseudo, hasRevs = hasRevs, @@ -580,7 +580,7 @@ fect.boot <- function(Y, j = 1:nboots, .combine = function(...) abind(..., along = 3), .multicombine = TRUE, - .export = c("fect.gsynth", "initialFit"), + .export = c("fect_gsynth", "initialFit"), .packages = c("fect", "mvtnorm", "fixest"), .inorder = FALSE ) %dopar% { @@ -669,7 +669,7 @@ fect.boot <- function(Y, if (!is.null(W)) { W.boot <- NULL } - synth.out <- try(fect.gsynth( + synth.out <- try(fect_gsynth( Y = Y.boot, X = X.boot, D = D.boot, W = W.boot, I = I.boot, II = II.boot, T.on = T.on[, id.boot], T.on.balance = T.on.balance[, id.boot], @@ -749,7 +749,7 @@ fect.boot <- function(Y, } if (method == "ife") { - boot <- try(fect.fe( + boot <- try(fect_fe( Y = Y.boot, X = X, D = D, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, @@ -775,7 +775,7 @@ fect.boot <- function(Y, time.off.seq.group = group.time.off ), silent = TRUE) } else if (method == "mc") { - boot <- try(fect.mc( + boot <- try(fect_mc( Y = Y.boot, X = X, D = D, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, @@ -800,7 +800,7 @@ fect.boot <- function(Y, time.off.seq.group = group.time.off ), silent = TRUE) } else if (method %in% c("polynomial", "bspline", "cfe")) { - boot <- try(fect.polynomial( + boot <- try(fect_polynomial( Y = Y.boot, D = D, X = X, W = W, I = I, II = II, T.on = T.on, @@ -974,7 +974,7 @@ fect.boot <- function(Y, } if (method == "gsynth") { - boot <- try(fect.gsynth( + boot <- try(fect_gsynth( Y = Y[, boot.id], X = X.boot, D = D.boot, W = W.boot, I = I.boot, II = II[, boot.id], T.on = T.on[, boot.id], T.off = T.off.boot, CV = 0, @@ -998,9 +998,9 @@ fect.boot <- function(Y, group = boot.group, time.on.seq.group = group.time.on, time.off.seq.group = group.time.off - )) + ), silent = TRUE) } else if (method == "ife") { - boot <- try(fect.fe( + boot <- try(fect_fe( Y = Y[, boot.id], X = X.boot, D = D.boot, W = W.boot, I = I.boot, II = II[, boot.id], T.on = T.on[, boot.id], T.off = T.off.boot, @@ -1028,7 +1028,7 @@ fect.boot <- function(Y, time.off.seq.group = group.time.off ), silent = TRUE) } else if (method == "mc") { - boot <- try(fect.mc( + boot <- try(fect_mc( Y = Y[, boot.id], X = X.boot, D = D[, boot.id], W = W.boot, I = I[, boot.id], II = II[, boot.id], T.on = T.on[, boot.id], T.off = T.off.boot, @@ -1056,7 +1056,7 @@ fect.boot <- function(Y, time.off.seq.group = group.time.off ), silent = TRUE) } else if (method %in% c("polynomial", "bspline", "cfe")) { - boot <- try(fect.polynomial( + boot <- try(fect_polynomial( Y = Y[, boot.id], X = X.boot, W = W.boot, D = D[, boot.id], I = I[, boot.id], II = II[, boot.id], @@ -1126,7 +1126,7 @@ fect.boot <- function(Y, boot.out <- foreach( j = 1:nboots, .inorder = FALSE, - .export = c("fect.fe", "fect.mc", "fect.polynomial", "get_term", "fect.gsynth", "initialFit"), + .export = c("fect_fe", "fect_mc", "fect_polynomial", "get_term", "fect_gsynth", "initialFit"), .packages = c("fect", "mvtnorm", "fixest") ) %dopar% { return(one.nonpara(boot.seq[j])) @@ -1252,7 +1252,7 @@ fect.boot <- function(Y, I.boot[, , j] <- boot$I if (is.null(boot$boot.id)){ colnames.boot <- c(colnames.boot, list(1:N)) # Parametric bootstrap - assign("boot", boot, .GlobalEnv) + # assign("boot", boot, .GlobalEnv) } else { colnames.boot <- c(colnames.boot, list(boot$boot.id)) # Raw bootstrap and jackknife } diff --git a/R/cv.R b/R/cv.R index 73aa1e8..f9ad35e 100644 --- a/R/cv.R +++ b/R/cv.R @@ -1,7 +1,7 @@ ################################################################### ## Cross-validation ################################################################### -fect.cv <- function(Y, # Outcome variable, (T*N) matrix +fect_cv <- function(Y, # Outcome variable, (T*N) matrix X, # Explanatory variables: (T*N*p) array D, # Indicator for treated unit (tr==1) W, @@ -172,10 +172,10 @@ fect.cv <- function(Y, # Outcome variable, (T*N) matrix message("Criterion: PC\n") } - ## for gsynth, use the cross-validation function in fect.gsynth + ## for gsynth, use the cross-validation function in fect_gsynth if (method == "gsynth") { message("Interactive fixed effects model...\n") - out <- fect.gsynth( + out <- fect_gsynth( Y = Y, D = D, X = X, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.balance = T.on.balance, diff --git a/R/cv_binary.R b/R/cv_binary.R index 7b3ebf0..87c51be 100644 --- a/R/cv_binary.R +++ b/R/cv_binary.R @@ -1,7 +1,7 @@ ################################################################### ## Cross-validation ################################################################### -fect.binary.cv <- function(Y, # Outcome variable, (T*N) matrix +fect_binary_cv <- function(Y, # Outcome variable, (T*N) matrix X, # Explanatory variables: (T*N*p) array D, # Indicator for treated unit (tr==1) I, diff --git a/R/default.R b/R/default.R index eaeaab9..f86adaa 100644 --- a/R/default.R +++ b/R/default.R @@ -6,12 +6,12 @@ ## fect.default() ## DEPENDENT FUNCTIONS -## fect.fe() ## interactive fixed effects model -## fect.mc() ## matrix completion -## fect.boot() ## bootstrap +## fect_fe() ## interactive fixed effects model +## fect_mc() ## matrix completion +## fect_boot() ## bootstrap ## fitness test -## fect.test ## wild bootstrap +## fect_test ## wild bootstrap ## METHODS ## print.fect() @@ -982,7 +982,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) tempID <- unique(data[, id]) for (i in tempID) { subpos <- which(data[, id] == i) - subtime <- data[data$id == i,]$time + subtime <- data[subpos, time] subd <- data[subpos, Dname] if (sum(subd) >= 1) { tr.time <- subtime[which(subd == 1)] @@ -1481,7 +1481,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) set.seed(seed) } if (is.null(cores) == TRUE) { - cores <- detectCores() + cores <- min(detectCores() - 2, 8) # default to 8 cores if not specified } para.clusters <- future::makeClusterPSOCK(cores) registerDoParallel(para.clusters) @@ -1498,7 +1498,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) if (CV == TRUE) { if (binary == FALSE) { - out <- fect.cv(Y = Y, D = D, X = X, W = W, + out <- fect_cv(Y = Y, D = D, X = X, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, T.on.balance = T.on.balance, @@ -1518,7 +1518,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) group.level = g.level, group = G) } else { - out <- fect.binary.cv(Y = Y, D = D, X = X, + out <- fect_binary_cv(Y = Y, D = D, X = X, I = I, II = II, T.on = T.on, T.off = T.off, k = k, cv.prop = cv.prop, @@ -1533,7 +1533,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) } else { ## non-binary case if (method == "ife") { - out <- fect.fe(Y = Y, D = D, X = X, + out <- fect_fe(Y = Y, D = D, X = X, W = W, I = I, II = II, T.on = T.on, T.off = T.off, r.cv = r, T.on.carry = T.on.carry, T.on.balance = T.on.balance, @@ -1549,7 +1549,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) group.level = g.level, group = G) } else if(method == "gsynth"){ - out <- fect.gsynth(Y = Y, D = D, X = X, + out <- fect_gsynth(Y = Y, D = D, X = X, W = W, I = I, II = II, T.on = T.on, T.off = T.off, r = r, CV = 0, T.on.balance = T.on.balance, @@ -1565,7 +1565,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) group.level = g.level, group = G) } else if (method == "mc") { - out <- fect.mc(Y = Y, D = D, X = X, + out <- fect_mc(Y = Y, D = D, X = X, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, T.on.balance = T.on.balance, @@ -1581,7 +1581,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) group.level = g.level, group = G) } else if (method %in% c("polynomial", "cfe")) { - out <- fect.polynomial(Y = Y, D = D, X = X, + out <- fect_polynomial(Y = Y, D = D, X = X, W = W, I = I, II = II, T.on = T.on, T.on.carry = T.on.carry, T.on.balance = T.on.balance, @@ -1615,7 +1615,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) } else { # SE == TRUE - out <- fect.boot(Y = Y, D = D, X = X, + out <- fect_boot(Y = Y, D = D, X = X, W = W, I = I, II = II, T.on = T.on, T.off = T.off, T.on.carry = T.on.carry, cl = cl, T.on.balance = T.on.balance, balance.period = balance.period, @@ -1820,7 +1820,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) } - p.out <- fect.boot(Y = pY, D = pD, X = pX, + p.out <- fect_boot(Y = pY, D = pD, X = pX, W = pW, I = pI, II = pII, T.on = pT.on, T.off = pT.off, cl = p.cl,T.on.carry = T.on.carry, method = method, degree = degree, @@ -1889,7 +1889,7 @@ fect.default <- function(formula = NULL, data, # a data frame (long-form) if (permute == TRUE) { message("Permuting under sharp null hypothesis ... ") - out.permute <- fect.permu(Y = Y, X = X, D = D, I = I, r.cv = out$r.cv, + out.permute <- fect_permu(Y = Y, X = X, D = D, I = I, r.cv = out$r.cv, lambda.cv = out$lambda.cv, m = m, permu.dimension = permu.dimension, method = out$method, degree = degree, diff --git a/R/did_wrapper.R b/R/did_wrapper.R index 3797d43..1c54d77 100644 --- a/R/did_wrapper.R +++ b/R/did_wrapper.R @@ -17,19 +17,18 @@ get_event_counts <- function(data, time_to_treat_var, unit_id) { # Build event-study data frame make_es_df <- function(Period_vec, est_vec, se_vec = NULL, count_df = NULL, method = method) { - df <- data.frame(ATT = est_vec, check.names = FALSE) if (!is.null(se_vec)) { - df[["S.E."]] <- se_vec - df[["CI.lower"]] <- df$ATT - 1.96 * se_vec - df[["CI.upper"]] <- df$ATT + 1.96 * se_vec - df[["p.value"]] <- 2 * (1 - stats::pnorm(abs(df$ATT / se_vec))) + df[["S.E."]] <- se_vec + df[["CI.lower"]] <- df$ATT - 1.96 * se_vec + df[["CI.upper"]] <- df$ATT + 1.96 * se_vec + df[["p.value"]] <- 2 * (1 - stats::pnorm(abs(df$ATT / se_vec))) } else { - df[["S.E."]] <- NA - df[["CI.lower"]] <- NA - df[["CI.upper"]] <- NA - df[["p.value"]] <- NA + df[["S.E."]] <- NA + df[["CI.lower"]] <- NA + df[["CI.upper"]] <- NA + df[["p.value"]] <- NA } ## attach period counts if supplied @@ -38,23 +37,23 @@ make_es_df <- function(Period_vec, est_vec, se_vec = NULL, df <- df |> dplyr::left_join( count_df |> - dplyr::mutate(Period = Period + 1), # Assuming Period in count_df is 0-indexed relative to event + dplyr::mutate(Period = .data$Period + 1), # Assuming Period in count_df is 0-indexed relative to event by = "Period" ) # If Period_vec can have NAs, this might cause issues with rownames # Ensure Period_vec used for rownames is clean or handle NAs appropriately valid_periods <- !is.na(df$Period) # Or use original Period_vec if df$Period might be modified by join - if(any(valid_periods)) { + if (any(valid_periods)) { rownames(df)[valid_periods] <- df$Period[valid_periods] } - df$Period <- NULL + df$Period <- NULL } else { # Ensure Period_vec used for rownames is clean valid_rownames <- Period_vec + as.numeric(method != "didm") - if(length(valid_rownames) == nrow(df) && !any(is.na(valid_rownames))) { + if (length(valid_rownames) == nrow(df) && !any(is.na(valid_rownames))) { rownames(df) <- valid_rownames } - df$count <- NA + df$count <- NA } df <- df[, c("ATT", "S.E.", "CI.lower", "CI.upper", "p.value", "count")] @@ -65,37 +64,34 @@ make_es_df <- function(Period_vec, est_vec, se_vec = NULL, did_wrapper <- function( data, Y, D, X = NULL, - index, # c("unit_id","time_id") - method = c("twfe","st","iw","cs_never","cs_notyet","didm"), - se = c("default","boot","bootstrap","jackknife"), + index, # c("unit_id","time_id") + method = c("twfe", "st", "iw", "cs_never", "cs_notyet", "didm"), + se = c("default", "boot", "bootstrap", "jackknife"), nboots = 200, parallel = TRUE, - core = NULL, - + core = NULL, ## TWFE / stacked-DID options time_to_treat_var = "Time_to_Treatment", - treat_indicator = "treat", - + treat_indicator = "treat", ## Callaway-Sant'Anna options csdid.base_period = "universal", - ## DIDmultiplegtDYN options didm.effects = NA, - didm.placebo = NA -) { - + didm.placebo = NA) { if (method == "bootstrap") method <- "boot" method <- match.arg(method) - se <- match.arg(se) - unit_id <- index[1]; time_id <- index[2] + se <- match.arg(se) + unit_id <- index[1] + time_id <- index[2] # Define symbols for unit_id and D here for use throughout the function, including run_estimator_once unit_sym <- rlang::sym(unit_id) - D_sym <- rlang::sym(D) + D_sym <- rlang::sym(D) # Y_sym <- rlang::sym(Y) # If Y is used with dplyr NSE - if (method == "didm" && is.na(didm.effects)) + if (method == "didm" && is.na(didm.effects)) { stop('For "didm", you must supply "didm.effects" and "didm.placebo".') + } ## ── Pre-processing ────────────────────────────────────────────────────── original_units <- length(unique(data[[unit_id]])) # Base R subsetting, fine @@ -105,8 +101,10 @@ did_wrapper <- function( dplyr::mutate(treatment_mean = mean(!!D_sym, na.rm = TRUE)) %>% # Uses pre-defined D_sym dplyr::ungroup() data <- data[data$treatment_mean < 1, ] # Base R subsetting, fine - message("Dropped ", original_units - length(unique(data[[unit_id]])), # Base R subsetting, fine - " always-treated units.") + message( + "Dropped ", original_units - length(unique(data[[unit_id]])), # Base R subsetting, fine + " always-treated units." + ) ## define cohorts & event time # Ensure fect::get.cohort doesn't have issues with column names if they contain special characters @@ -116,12 +114,11 @@ did_wrapper <- function( ## ── Core estimation routine (one sample) ─────────────────────────────── run_estimator_once <- function(dd) { # dd is the data for this run - out_ATT <- out_SE <- out_lower <- out_upper <- NA - es_df <- data.frame() + out_ATT <- out_SE <- out_lower <- out_upper <- NA + es_df <- data.frame() # ── 1) TWFE ─────────────────────────────────────────────────────────── if (method == "twfe") { - if (!treat_indicator %in% names(dd)) { dd <- dd |> dplyr::group_by(!!unit_sym) |> # MODIFIED: Use unit_sym from parent scope @@ -136,20 +133,21 @@ did_wrapper <- function( stats::as.formula( paste0(Y, " ~ ", D, " + ", X_part, " | ", unit_id, " + ", time_id) ), - data = dd, + data = dd, cluster = unit_id, # fixest can take string for cluster - notes = FALSE + notes = FALSE ) - co <- fixest::coeftable(fit_att) - est <- co[D, "Estimate"] - se_ <- co[D, "Std. Error"] - out_ATT <- est - out_SE <- se_ + co <- fixest::coeftable(fit_att) + est <- co[D, "Estimate"] + se_ <- co[D, "Std. Error"] + out_ATT <- est + out_SE <- se_ out_lower <- est - 1.96 * se_ out_upper <- est + 1.96 * se_ - if (!time_to_treat_var %in% names(dd)) + if (!time_to_treat_var %in% names(dd)) { dd <- fect::get.cohort(dd, D = D, index = c(unit_id, time_id), start0 = TRUE) + } # Ensure time_to_treat_var column exists and handle NAs before using in i() if (time_to_treat_var %in% names(dd)) { @@ -170,18 +168,20 @@ did_wrapper <- function( unit_id, " + ", time_id ) ), - data = dd, + data = dd, cluster = unit_id, # fixest can take string for cluster - notes = FALSE + notes = FALSE ) - ctab <- as.data.frame(fit_es$coeftable) + ctab <- as.data.frame(fit_es$coeftable) # Robustly extract period numbers, handle cases where parsing fails period_nums <- vapply( rownames(ctab), function(x) { sp <- strsplit(x, "::")[[1]] - if (length(sp) < 2) return(NA_real_) + if (length(sp) < 2) { + return(NA_real_) + } val_str <- strsplit(sp[2], ":")[[1]][1] suppressWarnings(as.numeric(val_str)) # Suppress warnings for non-numeric if any }, @@ -189,54 +189,60 @@ did_wrapper <- function( ) # Filter out NAs that might result from parsing non-coefficient rows or failed parsing valid_coeffs <- !is.na(period_nums) - es_df <- make_es_df(period_nums[valid_coeffs], ctab$Estimate[valid_coeffs], - ctab$`Std. Error`[valid_coeffs], count_df, method) + es_df <- make_es_df( + period_nums[valid_coeffs], ctab$Estimate[valid_coeffs], + ctab$`Std. Error`[valid_coeffs], count_df, method + ) # ── 2) Stacked DID ("st") ───────────────────────────────────────────── } else if (method == "st") { - - if (!"Cohort" %in% names(dd)) + if (!"Cohort" %in% names(dd)) { dd <- fect::get.cohort(dd, D = D, index = c(unit_id, time_id), start0 = TRUE) + } target.cohorts <- setdiff(unique(dd$Cohort), "Control") df.st_list <- vector("list", length(target.cohorts)) # Pre-allocate list for (i in seq_along(target.cohorts)) { c <- target.cohorts[i] # Use a temporary data frame to avoid modifying `dd` in a loop if it's large - temp_df <- dd[dd$Cohort %in% c(c,"Control"), ] + temp_df <- dd[dd$Cohort %in% c(c, "Control"), ] temp_df$stack_id <- i # Use loop index for stack_id df.st_list[[i]] <- temp_df } df.st <- do.call(rbind, df.st_list) - df.st$st_unit <- as.numeric(factor(paste0(df.st$stack_id,"-",df.st[[unit_id]]))) - df.st$st_time <- as.numeric(factor(paste0(df.st$stack_id,"-",df.st[[time_id]]))) + df.st$st_unit <- as.numeric(factor(paste0(df.st$stack_id, "-", df.st[[unit_id]]))) + df.st$st_time <- as.numeric(factor(paste0(df.st$stack_id, "-", df.st[[time_id]]))) fit_st <- fixest::feols( - stats::as.formula(paste0(Y," ~ ",D," | st_unit + st_time")), - data = df.st, + stats::as.formula(paste0(Y, " ~ ", D, " | st_unit + st_time")), + data = df.st, cluster = "st_unit", # fixest can take string for cluster notes = FALSE, warn = FALSE ) - co <- fixest::coeftable(fit_st) - est <- co[D,"Estimate"]; se_ <- co[D,"Std. Error"] - out_ATT <- est; out_SE <- se_ - out_lower <- est - 1.96 * se_; out_upper <- est + 1.96 * se_ + co <- fixest::coeftable(fit_st) + est <- co[D, "Estimate"] + se_ <- co[D, "Std. Error"] + out_ATT <- est + out_SE <- se_ + out_lower <- est - 1.96 * se_ + out_upper <- est + 1.96 * se_ ## event study (same routine as above) if (!treat_indicator %in% names(df.st)) { df.st <- df.st |> dplyr::group_by(!!unit_sym) |> # MODIFIED: Use unit_sym from parent scope - dplyr::mutate(tmp = as.numeric(mean(!!D_sym,na.rm=TRUE) > 0)) |> # MODIFIED: Use D_sym from parent scope + dplyr::mutate(tmp = as.numeric(mean(!!D_sym, na.rm = TRUE) > 0)) |> # MODIFIED: Use D_sym from parent scope dplyr::ungroup() names(df.st)[names(df.st) == "tmp"] <- treat_indicator } - if (!time_to_treat_var %in% names(df.st)) - df.st <- fect::get.cohort(df.st, D = D, index = c(unit_id,time_id), start0 = TRUE) + if (!time_to_treat_var %in% names(df.st)) { + df.st <- fect::get.cohort(df.st, D = D, index = c(unit_id, time_id), start0 = TRUE) + } # Ensure time_to_treat_var column exists and handle NAs if (time_to_treat_var %in% names(df.st)) { @@ -251,33 +257,39 @@ did_wrapper <- function( fit_es <- fixest::feols( stats::as.formula( - paste0(Y," ~ i(", time_to_treat_var, ", ", treat_indicator, - ", ref=-1) | st_unit + st_time") + paste0( + Y, " ~ i(", time_to_treat_var, ", ", treat_indicator, + ", ref=-1) | st_unit + st_time" + ) ), - data = df.st, + data = df.st, cluster = "st_unit" # fixest can take string for cluster ) ctab <- as.data.frame(fit_es$coeftable) - per <- vapply( + per <- vapply( rownames(ctab), - function(x){ - sp<-strsplit(x,"::")[[1]] - if(length(sp)<2) return(NA_real_) - val_str <- strsplit(sp[2],":")[[1]][1] + function(x) { + sp <- strsplit(x, "::")[[1]] + if (length(sp) < 2) { + return(NA_real_) + } + val_str <- strsplit(sp[2], ":")[[1]][1] suppressWarnings(as.numeric(val_str)) }, numeric(1) ) valid_coeffs <- !is.na(per) - es_df <- make_es_df(per[valid_coeffs], ctab$Estimate[valid_coeffs], - ctab$`Std. Error`[valid_coeffs], count_df, method) + es_df <- make_es_df( + per[valid_coeffs], ctab$Estimate[valid_coeffs], + ctab$`Std. Error`[valid_coeffs], count_df, method + ) # ── 3) IW (Sun-&-Abraham) ──────────────────────────────────────────── } else if (method == "iw") { - - if (!"FirstTreat" %in% names(dd)) # 'FirstTreat' is typically generated by get.cohort or similar + if (!"FirstTreat" %in% names(dd)) { # 'FirstTreat' is typically generated by get.cohort or similar stop("Method 'iw' requires a FirstTreat column. Ensure get.cohort() was run or the column exists.") + } dd$FirstTreat[is.na(dd$FirstTreat)] <- 1000 # Or other large value indicating never/not-yet treated for sunab X_part <- if (!is.null(X)) paste(X, collapse = " + ") else "1" @@ -285,109 +297,132 @@ did_wrapper <- function( fit_iw <- fixest::feols( stats::as.formula( # Ensure time_id is a valid column name for sunab - paste0(Y," ~ sunab(FirstTreat,",time_id,") + ", X_part, - " | ", unit_id," + ", time_id) + paste0( + Y, " ~ sunab(FirstTreat,", time_id, ") + ", X_part, + " | ", unit_id, " + ", time_id + ) ), - data = dd, + data = dd, cluster = unit_id # fixest can take string for cluster ) - att_sum <- summary(fit_iw, agg = "ATT") - out_ATT <- att_sum$coeftable["ATT","Estimate"] - out_SE <- att_sum$coeftable["ATT","Std. Error"] - out_lower<- out_ATT - 1.96*out_SE - out_upper<- out_ATT + 1.96*out_SE + att_sum <- summary(fit_iw, agg = "ATT") + out_ATT <- att_sum$coeftable["ATT", "Estimate"] + out_SE <- att_sum$coeftable["ATT", "Std. Error"] + out_lower <- out_ATT - 1.96 * out_SE + out_upper <- out_ATT + 1.96 * out_SE ctab <- as.data.frame(fixest::coeftable(fit_iw)) # Robustly extract offsets for sunab offsets <- suppressWarnings( - as.numeric(sub("^.*::","",sub(":cohort::.*","",rownames(ctab)))) + as.numeric(sub("^.*::", "", sub(":cohort::.*", "", rownames(ctab)))) + ) + valid <- !is.na(offsets) + es_df <- make_es_df( + offsets[valid], ctab$Estimate[valid], + ctab$`Std. Error`[valid], count_df, method ) - valid <- !is.na(offsets) - es_df <- make_es_df(offsets[valid], ctab$Estimate[valid], - ctab$`Std. Error`[valid], count_df, method) # ── 4) Callaway-Sant'Anna (never-treated) ───────────────────────────── } else if (method == "cs_never") { - - if (!"FirstTreat" %in% names(dd)) # 'FirstTreat' is gname for did::att_gt - dd <- fect::get.cohort(dd, D = D, index = c(unit_id,time_id), start0 = TRUE) + if (!"FirstTreat" %in% names(dd)) { # 'FirstTreat' is gname for did::att_gt + dd <- fect::get.cohort(dd, D = D, index = c(unit_id, time_id), start0 = TRUE) + } # For 'nevertreated', control units usually have FirstTreat = 0 or Inf # Check did package documentation for exact requirement for gname with nevertreated dd$FirstTreat[is.na(dd$FirstTreat)] <- 0 # Assuming NA means never treated and coded as 0 cs.out <- did::att_gt( - yname = Y, gname = "FirstTreat", # Ensure 'FirstTreat' is the correct group identifier - idname = unit_id, tname = time_id, + yname = Y, gname = "FirstTreat", # Ensure 'FirstTreat' is the correct group identifier + idname = unit_id, tname = time_id, xformla = if (!is.null(X)) stats::as.formula(paste("~", paste(X, collapse = "+"))) else ~1, control_group = "nevertreated", allow_unbalanced_panel = TRUE, # Consider implications - data = as.data.frame(dd), # did package often prefers data.frame + data = as.data.frame(dd), # did package often prefers data.frame est_method = "reg", # Default, consider "dr" for doubly robust base_period = csdid.base_period ) simple <- tryCatch(did::aggte(cs.out, type = "simple", na.rm = TRUE), - error = function(e) {warning("Error in did::aggte (simple, cs_never): ", e$message); NULL}) + error = function(e) { + warning("Error in did::aggte (simple, cs_never): ", e$message) + NULL + } + ) if (!is.null(simple)) { - out_ATT <- simple$overall.att - out_SE <- simple$overall.se - out_lower <- out_ATT - 1.96*out_SE - out_upper <- out_ATT + 1.96*out_SE + out_ATT <- simple$overall.att + out_SE <- simple$overall.se + out_lower <- out_ATT - 1.96 * out_SE + out_upper <- out_ATT + 1.96 * out_SE } dyn <- tryCatch( - did::aggte(cs.out, type="dynamic", na.rm = TRUE, cband = FALSE), # cband=FALSE for point CIs - error = function(e) {warning("Error in did::aggte (dynamic, cs_never): ", e$message); NULL}) - es_df <- if (is.null(dyn) || length(dyn$egt) == 0) # Check if dyn$egt is empty + did::aggte(cs.out, type = "dynamic", na.rm = TRUE, cband = FALSE), # cband=FALSE for point CIs + error = function(e) { + warning("Error in did::aggte (dynamic, cs_never): ", e$message) + NULL + } + ) + es_df <- if (is.null(dyn) || length(dyn$egt) == 0) { # Check if dyn$egt is empty make_es_df(numeric(0), numeric(0), numeric(0), count_df, method) - else + } else { make_es_df(dyn$egt, dyn$att.egt, dyn$se.egt, count_df, method) + } # ── 5) Callaway-Sant'Anna (not-yet-treated) ─────────────────────────── } else if (method == "cs_notyet") { - - if (!"FirstTreat" %in% names(dd)) - dd <- fect::get.cohort(dd, D = D, index = c(unit_id,time_id), start0 = TRUE) + if (!"FirstTreat" %in% names(dd)) { + dd <- fect::get.cohort(dd, D = D, index = c(unit_id, time_id), start0 = TRUE) + } # For 'notyettreated', FirstTreat should be year of treatment or Inf/0 for never treated # Ensure NA handling for FirstTreat is appropriate for did::att_gt # dd$FirstTreat[is.na(dd$FirstTreat)] <- Inf # Or appropriate coding for never treated + + dd$FirstTreat[is.na(dd$FirstTreat)] <- 0 cs.out <- did::att_gt( - yname = Y, gname = "FirstTreat", - idname = unit_id, tname = time_id, + yname = Y, gname = "FirstTreat", + idname = unit_id, tname = time_id, xformla = if (!is.null(X)) stats::as.formula(paste("~", paste(X, collapse = "+"))) else ~1, control_group = "notyettreated", allow_unbalanced_panel = TRUE, - data = as.data.frame(dd), # did package often prefers data.frame + data = as.data.frame(dd), # did package often prefers data.frame est_method = "reg", base_period = csdid.base_period ) - simple <- tryCatch(did::aggte(cs.out, type="simple", na.rm=TRUE), - error=function(e) {warning("Error in did::aggte (simple, cs_notyet): ", e$message); NULL}) + simple <- tryCatch(did::aggte(cs.out, type = "simple", na.rm = TRUE), + error = function(e) { + warning("Error in did::aggte (simple, cs_notyet): ", e$message) + NULL + } + ) if (!is.null(simple)) { out_ATT <- simple$overall.att - out_SE <- simple$overall.se - out_lower <- out_ATT - 1.96*out_SE - out_upper <- out_ATT + 1.96*out_SE + out_SE <- simple$overall.se + out_lower <- out_ATT - 1.96 * out_SE + out_upper <- out_ATT + 1.96 * out_SE } dyn <- tryCatch( - did::aggte(cs.out, type="dynamic", na.rm=TRUE, cband=FALSE), - error=function(e) {warning("Error in did::aggte (dynamic, cs_notyet): ", e$message); NULL}) - es_df <- if (is.null(dyn) || length(dyn$egt) == 0) + did::aggte(cs.out, type = "dynamic", na.rm = TRUE, cband = FALSE), + error = function(e) { + warning("Error in did::aggte (dynamic, cs_notyet): ", e$message) + NULL + } + ) + es_df <- if (is.null(dyn) || length(dyn$egt) == 0) { make_es_df(numeric(0), numeric(0), numeric(0), count_df, method) - else + } else { make_es_df(dyn$egt, dyn$att.egt, dyn$se.egt, count_df, method) + } # ── 6) DIDmultiplegtDYN Dyn ────────────────────────────────────────────── } else if (method == "didm") { - res <- DIDmultiplegtDYN::did_multiplegt_dyn( df = as.data.frame(dd), # DIDmultiplegtDYN prefers data.frame outcome = Y, @@ -404,13 +439,13 @@ did_wrapper <- function( # Check structure of res$results for safety if (!is.null(res$results) && "ATE" %in% names(res$results) && length(res$results$ATE) >= 2) { out_ATT <- res$results$ATE[1] - out_SE <- res$results$ATE[2] + out_SE <- res$results$ATE[2] if (length(res$results$ATE) >= 4) { # Check if CI is provided out_lower <- res$results$ATE[3] out_upper <- res$results$ATE[4] } else { - out_lower <- out_ATT - 1.96*out_SE - out_upper <- out_ATT + 1.96*out_SE + out_lower <- out_ATT - 1.96 * out_SE + out_upper <- out_ATT + 1.96 * out_SE } } else { warning("Results from did_multiplegt_dyn ATE not found or in unexpected format.") @@ -418,47 +453,52 @@ did_wrapper <- function( Placebos <- res$results$Placebos - Effects <- res$results$Effects - T.pre <- if (!is.null(Placebos) && inherits(Placebos, "matrix")) nrow(Placebos) else 0 + Effects <- res$results$Effects + T.pre <- if (!is.null(Placebos) && inherits(Placebos, "matrix")) nrow(Placebos) else 0 T.post <- if (!is.null(Effects) && inherits(Effects, "matrix")) nrow(Effects) else 0 if (T.pre + T.post == 0) { es_df <- make_es_df(numeric(0), numeric(0), numeric(0), count_df, method) } else { - est_placebo <- if (T.pre > 0 && "Estimate" %in% colnames(Placebos)) Placebos[,"Estimate"] else numeric(0) - est_effect <- if (T.post > 0 && "Estimate" %in% colnames(Effects)) Effects[,"Estimate"] else numeric(0) + est_placebo <- if (T.pre > 0 && "Estimate" %in% colnames(Placebos)) Placebos[, "Estimate"] else numeric(0) + est_effect <- if (T.post > 0 && "Estimate" %in% colnames(Effects)) Effects[, "Estimate"] else numeric(0) est <- c(est_placebo, est_effect) - se_placebo <- if (T.pre > 0 && "SE" %in% colnames(Placebos)) Placebos[,"SE"] else numeric(0) - se_effect <- if (T.post > 0 && "SE" %in% colnames(Effects)) Effects[,"SE"] else numeric(0) + se_placebo <- if (T.pre > 0 && "SE" %in% colnames(Placebos)) Placebos[, "SE"] else numeric(0) + se_effect <- if (T.post > 0 && "SE" %in% colnames(Effects)) Effects[, "SE"] else numeric(0) ses <- c(se_placebo, se_effect) # Ensure lengths match before creating data frame - if(length(est) != length(ses)) { + if (length(est) != length(ses)) { warning("Mismatch in length of estimates and standard errors from did_multiplegt_dyn.") # Fallback to empty or handle error appropriately es_df <- make_es_df(numeric(0), numeric(0), numeric(0), count_df, method) } else { - peri <- c(if (T.pre>0) seq(-T.pre,-1) else numeric(0), - if (T.post>0) seq(0, T.post-1) else numeric(0)) # didm effects often 0-indexed - - peri <- c(if (T.pre>0) seq(-T.pre,-1) else numeric(0), - if (T.post>0) seq(1,T.post) else numeric(0)) + peri <- c( + if (T.pre > 0) seq(-T.pre, -1) else numeric(0), + if (T.post > 0) seq(0, T.post - 1) else numeric(0) + ) # didm effects often 0-indexed + + peri <- c( + if (T.pre > 0) seq(-T.pre, -1) else numeric(0), + if (T.post > 0) seq(1, T.post) else numeric(0) + ) es_df <- make_es_df(peri, est, ses, count_df, method) } } } - list(ATT = out_ATT, ATT_se = out_SE, - CI_lower = out_lower, CI_upper = out_upper, - es = es_df) + list( + ATT = out_ATT, ATT_se = out_SE, + CI_lower = out_lower, CI_upper = out_upper, + es = es_df + ) } ## ── SE / bootstrap handling ──────────────────────────────────────────── if (se == "default") { - out_full <- run_estimator_once(data) # Ensure ATT_se is not zero or NA before calculating p-value overall_p <- if (!is.na(out_full$ATT_se) && out_full$ATT_se != 0) { @@ -466,12 +506,10 @@ did_wrapper <- function( } else { NA_real_ } - - } else { # bootstrap or jackknife cluster_ids <- unique(data[[unit_id]]) # Base R, fine - n_clusters <- length(cluster_ids) + n_clusters <- length(cluster_ids) # Ensure nboots is sensible for jackknife if (se == "jackknife" && nboots != n_clusters) { @@ -481,13 +519,13 @@ did_wrapper <- function( boot_fun <- function(b) { # b is the iteration number - if (se %in% c("boot","bootstrap")) { + if (se %in% c("boot", "bootstrap")) { # Stratified bootstrap if applicable, or simple cluster bootstrap samp_indices <- sample(seq_along(cluster_ids), n_clusters, replace = TRUE) samp_cluster_names <- cluster_ids[samp_indices] # Efficiently create bootstrap sample by selecting rows # This assumes unit_id column is a factor or character - d_b <- data[data[[unit_id]] %in% samp_cluster_names, ] + d_b <- data[data[[unit_id]] %in% samp_cluster_names, ] # To handle duplicated clusters correctly if units within clusters are distinct in d_b: # This requires more care if unit IDs are not unique across original clusters # A common way is to replicate data for chosen clusters and assign new unique IDs if needed by downstream. @@ -495,7 +533,7 @@ did_wrapper <- function( # The current approach data[data[[unit_id]] %in% samp, ] is standard for cluster bootstrap. } else { # jackknife omit_cluster_name <- cluster_ids[b] # b is 1 to n_clusters - d_b <- data[data[[unit_id]] != omit_cluster_name, ] + d_b <- data[data[[unit_id]] != omit_cluster_name, ] } run_estimator_once(d_b) } @@ -505,19 +543,20 @@ did_wrapper <- function( future::plan(future::multisession, workers = core) # Use future::multisession # Ensure seed is handled correctly for parallel processing rep_list <- future.apply::future_lapply(seq_len(nboots), boot_fun, - future.seed = TRUE) # future.seed=TRUE is good + future.seed = TRUE + ) # future.seed=TRUE is good future::plan(future::sequential) # Reset plan } else { rep_list <- lapply(seq_len(nboots), boot_fun) } - att_reps <- vapply(rep_list, `[[`, numeric(1), "ATT") + att_reps <- vapply(rep_list, `[[`, numeric(1), "ATT") # Remove NAs from bootstrap replications if any estimator failed att_reps_clean <- att_reps[!is.na(att_reps)] - if(length(att_reps_clean) < length(att_reps)){ + if (length(att_reps_clean) < length(att_reps)) { warning(paste(length(att_reps) - length(att_reps_clean), "bootstrap/jackknife replications resulted in NA ATT values and were dropped.")) } - if(length(att_reps_clean) < 2){ # Need at least 2 for sd() + if (length(att_reps_clean) < 2) { # Need at least 2 for sd() warning("Too few successful bootstrap/jackknife replications to calculate SE/CI.") out_full <- run_estimator_once(data) # Get point estimate out_full$ATT_se <- NA_real_ @@ -525,14 +564,17 @@ did_wrapper <- function( out_full$CI_upper <- NA_real_ overall_p <- NA_real_ } else { - out_full <- run_estimator_once(data) # Get point estimate from full sample + out_full <- run_estimator_once(data) # Get point estimate from full sample theta_hat <- out_full$ATT - out_full$CI_lower <- 2*theta_hat - stats::quantile(att_reps_clean, 0.975, na.rm=TRUE) - out_full$CI_upper <- 2*theta_hat - stats::quantile(att_reps_clean, 0.025, na.rm=TRUE) - out_full$ATT_se <- stats::sd(att_reps_clean, na.rm = TRUE) - p1 <- if (theta_hat > 0) mean(att_reps_clean <= 0, na.rm = TRUE) - else mean(att_reps_clean >= 0, na.rm = TRUE) - overall_p <- min(2*p1, 1) + out_full$CI_lower <- 2 * theta_hat - stats::quantile(att_reps_clean, 0.975, na.rm = TRUE) + out_full$CI_upper <- 2 * theta_hat - stats::quantile(att_reps_clean, 0.025, na.rm = TRUE) + out_full$ATT_se <- stats::sd(att_reps_clean, na.rm = TRUE) + p1 <- if (theta_hat > 0) { + mean(att_reps_clean <= 0, na.rm = TRUE) + } else { + mean(att_reps_clean >= 0, na.rm = TRUE) + } + overall_p <- min(2 * p1, 1) } if (nrow(out_full$es) > 0) { @@ -540,7 +582,9 @@ did_wrapper <- function( boot_mat_list <- lapply(rep_list, function(x) { - if (is.null(x$es) || nrow(x$es) == 0) return(rep(NA_real_, length(periods_from_es))) + if (is.null(x$es) || nrow(x$es) == 0) { + return(rep(NA_real_, length(periods_from_es))) + } # Match based on rownames (periods) es_rep_periods <- as.numeric(rownames(x$es)) matched_att <- x$es$ATT[match(periods_from_es, es_rep_periods)] @@ -555,8 +599,8 @@ did_wrapper <- function( periods_filtered <- periods_from_es[valid_cols] original_att_filtered <- out_full$es$ATT[valid_cols] - if(ncol(boot_mat_filtered) > 0){ - boot_se <- apply(boot_mat_filtered, 2, stats::sd, na.rm = TRUE) + if (ncol(boot_mat_filtered) > 0) { + boot_se <- apply(boot_mat_filtered, 2, stats::sd, na.rm = TRUE) ci_quantiles_upper <- apply(boot_mat_filtered, 2, stats::quantile, probs = 0.975, na.rm = TRUE) ci_quantiles_lower <- apply(boot_mat_filtered, 2, stats::quantile, probs = 0.025, na.rm = TRUE) @@ -564,12 +608,17 @@ did_wrapper <- function( seq_len(ncol(boot_mat_filtered)), function(j) { est_j <- original_att_filtered[j] - reps_j <- boot_mat_filtered[,j] + reps_j <- boot_mat_filtered[, j] reps_j_clean <- reps_j[!is.na(reps_j)] - if(length(reps_j_clean) < 2) return(NA_real_) - p1 <- if (est_j > 0) mean(reps_j_clean <= 0, na.rm = TRUE) - else mean(reps_j_clean >= 0, na.rm = TRUE) - min(2*p1, 1) + if (length(reps_j_clean) < 2) { + return(NA_real_) + } + p1 <- if (est_j > 0) { + mean(reps_j_clean <= 0, na.rm = TRUE) + } else { + mean(reps_j_clean >= 0, na.rm = TRUE) + } + min(2 * p1, 1) }, numeric(1) ) @@ -580,10 +629,10 @@ did_wrapper <- function( out_full$es$CI.upper <- NA_real_ out_full$es$p.value <- NA_real_ - out_full$es$`S.E.`[valid_cols] <- boot_se - out_full$es$CI.lower[valid_cols] <- 2*original_att_filtered - ci_quantiles_upper - out_full$es$CI.upper[valid_cols] <- 2*original_att_filtered - ci_quantiles_lower - out_full$es$p.value[valid_cols] <- pvals_es + out_full$es$`S.E.`[valid_cols] <- boot_se + out_full$es$CI.lower[valid_cols] <- 2 * original_att_filtered - ci_quantiles_upper + out_full$es$CI.upper[valid_cols] <- 2 * original_att_filtered - ci_quantiles_lower + out_full$es$p.value[valid_cols] <- pvals_es } else { warning("No valid data for bootstrapping event study standard errors.") } @@ -593,11 +642,11 @@ did_wrapper <- function( ## ── Return object ─────────────────────────────────────────────────────── res <- list( est.avg = data.frame( - ATT.avg = out_full$ATT, - S.E. = out_full$ATT_se, + ATT.avg = out_full$ATT, + S.E. = out_full$ATT_se, CI.lower = out_full$CI_lower, CI.upper = out_full$CI_upper, - p.value = overall_p, + p.value = overall_p, row.names = NULL # Ensure no rownames for this single-row data frame ), est.att = out_full$es # This should have periods as rownames from make_es_df diff --git a/R/effect.R b/R/effect.R index 75c9496..83df468 100644 --- a/R/effect.R +++ b/R/effect.R @@ -155,7 +155,7 @@ effect <- function(x, ## a fect object if (is_jackknife || is_parametric) { # For jackknife, use t-distribution with N-1 degrees of freedom N_samples <- ncol(catt.boot) - t_critical <- qt(0.975, df = N_samples - 1) + t_critical <- stats::qt(0.975, df = N_samples - 1) CI.att <- t(apply(cbind(catt, se.att), 1, function(row) { c(row[1] - t_critical * row[2], row[1] + t_critical * row[2]) })) @@ -172,7 +172,7 @@ effect <- function(x, ## a fect object N_samples <- ncol(catt.boot) pvalue.att <- sapply(1:nrow(catt.boot), function(i) { t_stat <- catt[i] / se.att[i] - 2 * pt(-abs(t_stat), df = N_samples - 1) + 2 * stats::pt(-abs(t_stat), df = N_samples - 1) }) } else { # For bootstrap, use empirical distribution @@ -253,10 +253,10 @@ effect <- function(x, ## a fect object # Create the plot p <- ggplot() + - geom_point(data = plot_data, aes(x = time, y = catt), size = 2) + + geom_point(data = plot_data, aes(x = .data$time, y = .data$catt), size = 2) + geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") + geom_linerange(data = plot_data, - aes(x = time, ymin = ci_lower, ymax = ci_upper), + aes(x = .data$time, ymin = .data$ci_lower, ymax = .data$ci_upper), size = 0.5) + labs(x = xlab, y = ylab, title = main) + theme_bw() + @@ -274,7 +274,7 @@ effect <- function(x, ## a fect object if (count == TRUE) { p <- p + geom_rect(data = data.toplot, - aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), + aes(xmin = .data$xmin, xmax = .data$xmax, ymin = .data$ymin, ymax = .data$ymax), fill = "gray50", alpha = 0.3, size = 0.3, color = "black") + annotate("text", x = time_range[which.max(count_data$count)], diff --git a/R/esplot.R b/R/esplot.R index ddcc753..751bc51 100644 --- a/R/esplot.R +++ b/R/esplot.R @@ -773,7 +773,7 @@ esplot <- function(data, # time, ATT, CI.lower, CI.upper, count, ... rect_data[,"ymin"] <- rect.min_yval rect_data[,"ymax"] <- rect.min_yval + (rect_data[[Count]] / max_rect_count) * rect_bar_max_h p <- p + geom_rect( - data = rect_data, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), + data = rect_data, aes(xmin = .data$xmin, xmax = .data$xmax, ymin = .data$ymin, ymax = .data$ymax), fill = count.color, linewidth = 0.2, inherit.aes = FALSE, alpha = count.alpha, color = count.outline.color) max_count_val <- max_rect_count diff --git a/R/fe.R b/R/fe.R index 889ab08..5deb3ec 100644 --- a/R/fe.R +++ b/R/fe.R @@ -1,7 +1,7 @@ ################################################################### ## IFE Model Function ################################################################### -fect.fe <- function(Y, # Outcome variable, (T*N) matrix +fect_fe <- function(Y, # Outcome variable, (T*N) matrix X, # Explanatory variables: (T*N*p) array D, # Indicator for treated unit (tr==1) W, @@ -582,9 +582,6 @@ fect.fe <- function(Y, # Outcome variable, (T*N) matrix } - - - ## 8. cohort effects if (!is.null(group)) { cohort <- cbind(c(group), c(D), c(eff.v)) diff --git a/R/fect_gsynth.R b/R/fect_gsynth.R index da0b90e..be39670 100644 --- a/R/fect_gsynth.R +++ b/R/fect_gsynth.R @@ -1,4 +1,4 @@ -fect.gsynth <- function(Y, # Outcome variable, (T*N) matrix +fect_gsynth <- function(Y, # Outcome variable, (T*N) matrix X, # Explanatory variables: (T*N*p) array D, # Indicator for treated unit (tr==1) W, @@ -93,9 +93,10 @@ fect.gsynth <- function(Y, # Outcome variable, (T*N) matrix if (is.null(W)) { W.use <- as.matrix(0) - } else { - stop("Gsynth doesn't suppoort weighted outcome model in this version.\n") - } + } + # else { + # stop("Gsynth doesn't suppoort weighted outcome model in this version.\n") + # } if (!0 %in% I.tr) { ## a (TT*Ntr) matrix, time dimension: before treatment diff --git a/R/fect_sens.R b/R/fect_sens.R index 60aa6bf..a7bf433 100644 --- a/R/fect_sens.R +++ b/R/fect_sens.R @@ -1,20 +1,26 @@ fect_sens <- function( fect.out, - post.periods = NA, # Post-treatment periods - l_vec = NA, # Optional custom weighting vector - Mbarvec = seq(0, 1, by=0.1), # Vector of Mbar for RM analysis - Mvec = seq(0, 0.25, 0.05),# Vector of M for Smoothness analysis - periodMbarvec = c(0,0.5), # Vector of Mbar for period-by-period RM analysis - periodMvec = c(0,0.1), # Vector of M for Smoothness analysis - parallel = TRUE -) { + post.periods = NA, # Post-treatment periods + l_vec = NA, # Optional custom weighting vector + Mbarvec = seq(0, 1, by = 0.1), # Vector of Mbar for RM analysis + Mvec = seq(0, 0.25, 0.05), # Vector of M for Smoothness analysis + periodMbarvec = c(0, 0.5), # Vector of Mbar for period-by-period RM analysis + periodMvec = c(0, 0.1), # Vector of M for Smoothness analysis + parallel = FALSE, + cores = NULL) { + if (parallel && is.null(cores)) { + cores <- min(parallel::detectCores() - 2, 8) # default to 8 cores if not specified + cl <- parallel::makeCluster(cores) + doParallel::registerDoParallel(cl) + } + # ------------------------------------------------------------------- # 1) Identify relevant periods and extract fect estimates + vcov # ------------------------------------------------------------------- # We want event-time strings for pre and post - pre.periods = fect.out$placebo.period[1]:fect.out$placebo.period[2] - if (any(is.na(post.periods))){ - post.periods = 1:length(fect.out$time)-length(fect.out$pre.periods)-(fect.out$placebo.period[2]-fect.out$placebo.period[1]+1) + pre.periods <- fect.out$placebo.period[1]:fect.out$placebo.period[2] + if (any(is.na(post.periods))) { + post.periods <- 1:length(fect.out$time) - length(fect.out$pre.periods) - (fect.out$placebo.period[2] - fect.out$placebo.period[1] + 1) } all.periods.char <- as.character(c(pre.periods, post.periods)) @@ -27,14 +33,14 @@ fect_sens <- function( # Numeric indices corresponding to those event-time strings idx <- match(all.periods.char, rownames(fect.out$est.att)) - idx <- idx[!is.na(idx)] # remove anything not found + idx <- idx[!is.na(idx)] # remove anything not found # Extract DTE estimates (beta.hat) and var-cov (vcov.hat) beta.hat <- fect.out$est.att[idx, 1] vcov.hat <- fect.out$att.vcov[idx, idx] # Counts of pre and post periods - numPrePeriods <- length(pre.periods) + numPrePeriods <- length(pre.periods) numPostPeriods <- length(post.periods) # ------------------------------------------------------------------- @@ -59,8 +65,8 @@ fect_sens <- function( # (b) Period-by-period results # Initialize empty placeholders - fect.out$RM_Sensitivity <- NULL - fect.out$Smooth_Sensitivity <- NULL + fect.out$RM_Sensitivity <- NULL + fect.out$Smooth_Sensitivity <- NULL # ------------------------------------------------------------------- # 3) Relative Magnitude Analysis (RM), if Mbarvec is non-empty @@ -68,15 +74,16 @@ fect_sens <- function( if (!is.null(Mbarvec) && length(Mbarvec) > 0) { # 3a) Weighted-average, across the entire post-treatment window rm_sens_results <- HonestDiDFEct::createSensitivityResults_relativeMagnitudes( - betahat = beta.hat, - sigma = vcov.hat, - numPrePeriods = numPrePeriods, + betahat = beta.hat, + sigma = vcov.hat, + numPrePeriods = numPrePeriods, numPostPeriods = numPostPeriods, - l_vec = w.att, - Mbarvec = Mbarvec, + l_vec = w.att, + Mbarvec = Mbarvec, parallel = parallel ) + rm_original_cs <- HonestDiDFEct::constructOriginalCS( betahat = beta.hat, sigma = vcov.hat, @@ -88,7 +95,7 @@ fect_sens <- function( if (!is.null(periodMbarvec) && length(periodMbarvec) > 0) { # 3b) Period-by-period robust confidence sets # We'll loop over each post-treatment period and set a vector of 0s except 1 for that period - rm_period_output <- cbind.data.frame() # Will accumulate results + rm_period_output <- cbind.data.frame() # Will accumulate results for (t_i in seq_len(numPostPeriods)) { # l_vec for the "t_i-th" post period (in 1..numPostPeriods) @@ -106,7 +113,7 @@ fect_sens <- function( numPostPeriods = numPostPeriods, l_vec = dte_l, Mbarvec = periodMbarvec, - parallel = parallel + parallel = parallel ) # Convert to data.frame @@ -122,9 +129,9 @@ fect_sens <- function( # Store results in fect.out$sensitivity.rm fect.out$sensitivity.rm <- list( - results = rm_sens_results, - original = rm_original_cs, - periods = rm_period_output + results = rm_sens_results, + original = rm_original_cs, + periods = rm_period_output ) } @@ -133,14 +140,15 @@ fect_sens <- function( # ------------------------------------------------------------------- if (!is.null(Mvec) && length(Mvec) > 0) { # 4a) Weighted-average analysis + smooth_sens_results <- HonestDiDFEct::createSensitivityResults( - betahat = beta.hat, - sigma = vcov.hat, - numPrePeriods = numPrePeriods, + betahat = beta.hat, + sigma = vcov.hat, + numPrePeriods = numPrePeriods, numPostPeriods = numPostPeriods, - method = "C-LF", - l_vec = w.att, - Mvec = Mvec, + method = "C-LF", + l_vec = w.att, + Mvec = Mvec, parallel = parallel ) @@ -162,13 +170,13 @@ fect_sens <- function( dte_l[t_i] <- 1 honest.dte <- HonestDiDFEct::createSensitivityResults( - betahat = beta.hat, - sigma = vcov.hat, - numPrePeriods = numPrePeriods, + betahat = beta.hat, + sigma = vcov.hat, + numPrePeriods = numPrePeriods, numPostPeriods = numPostPeriods, - method = "C-LF", - l_vec = dte_l, - Mvec = periodMvec, + method = "C-LF", + l_vec = dte_l, + Mvec = periodMvec, parallel = parallel ) @@ -180,12 +188,14 @@ fect_sens <- function( # Store results in fect.out$sensitivity.smooth fect.out$sensitivity.smooth <- list( - results = smooth_sens_results, - original = sm_original_cs, - periods = smooth_period_output + results = smooth_sens_results, + original = sm_original_cs, + periods = smooth_period_output ) } - + if (parallel) { + stopCluster(cl) + } # ------------------------------------------------------------------- # 5) Return the updated fect.out # ------------------------------------------------------------------- diff --git a/R/fittest.R b/R/fittest.R index e2c4ad0..e8f1d2b 100644 --- a/R/fittest.R +++ b/R/fittest.R @@ -2,7 +2,7 @@ ## goodness of fit test: wild ##################################### -fect.test <- function( +fect_test <- function( out, # from fect Y, X, @@ -102,7 +102,7 @@ fect.test <- function( Y.boot <- Y.f + res.p * eff Y.boot[which(I == 0)] <- 0 if (method %in% c("ife", "fe")) { - boot <- try(fect.fe(Y = Y.boot, X = X, D = D, I = I, II = II, + boot <- try(fect_fe(Y = Y.boot, X = X, D = D, I = I, II = II, T.on = T.on, T.off = NULL, r.cv = r, force = force, hasRevs = 0, tol = tol, boot = 1, @@ -110,7 +110,7 @@ fect.test <- function( placebo.period = NULL, placeboTest = 0), silent = TRUE) } else if (method == "mc") { - boot <- try(fect.mc(Y = Y.boot, X = X, D = D, + boot <- try(fect_mc(Y = Y.boot, X = X, D = D, I = I, II = II, hasF = out$validF, T.on = T.on, T.off = NULL, lambda.cv = lambda, force = force, hasRevs = 0, @@ -119,7 +119,7 @@ fect.test <- function( placebo.period = NULL, placeboTest = 0), silent = TRUE) } else if (method %in% c("polynomial", "bspline")) { - boot <- try(fect.polynomial(Y = Y.boot, X = X, D = D, I = I, II = II, + boot <- try(fect_polynomial(Y = Y.boot, X = X, D = D, I = I, II = II, T.on = T.on, T.off = NULL, method = method, degree = degree, knots = knots, force = force, hasRevs = 0, tol = tol, boot = 1, @@ -221,7 +221,7 @@ fect.test <- function( if (parallel == TRUE) { boot.out <- foreach(j=1:nboots, .inorder = FALSE, - .export = c("fect.fe", "fect.mc", "fect.polynomial", "get_term"), + .export = c("fect_fe", "fect_mc", "fect_polynomial", "get_term"), .packages = c("fect") ) %dopar% { return(one.nonpara()) diff --git a/R/getcohort.R b/R/getcohort.R index d0f31f2..0ef5b6b 100644 --- a/R/getcohort.R +++ b/R/getcohort.R @@ -1,97 +1,96 @@ get.cohort <- function(data, D, index, - varname = NULL, #length of 4 c("FirstTreated","Cohort",'Time_to_Treatment',"Time_to_Exit") + varname = NULL, # length of 4 c("FirstTreated","Cohort",'Time_to_Treatment',"Time_to_Exit") start0 = FALSE, # default 0 as the last pre-treatment period, 1 as the first post-treatment period entry.time = NULL, - drop.always.treat = FALSE){ + drop.always.treat = FALSE) { data.raw <- data - if(length(D)>1){ + if (length(D) > 1) { stop("Treatment indicator \"D\" should have length 1.") } if (is.data.frame(data) == FALSE || length(class(data)) > 1) { data <- as.data.frame(data) } varnames <- colnames(data) - if(!D%in%varnames){ + if (!D %in% varnames) { stop("\"D\" misspecified.\n") } - if (is.logical(start0) == FALSE & !start0%in%c(0, 1)) { + if (is.logical(start0) == FALSE & !start0 %in% c(0, 1)) { stop("\"start0\" is not a logical flag.") - } + } - if (is.logical(drop.always.treat) == FALSE & !drop.always.treat%in%c(0, 1)) { + if (is.logical(drop.always.treat) == FALSE & !drop.always.treat %in% c(0, 1)) { stop("\"drop.always.treat\" is not a logical flag.") - } + } - if(is.null(varname)){ - varname1 <- 'FirstTreat' - if(varname1 %in% colnames(data)){ + if (is.null(varname)) { + varname1 <- "FirstTreat" + if (varname1 %in% colnames(data)) { warnings("The variable \"FirstTreat\" will be replaced.\n") - data[,varname1] <- NULL + data[, varname1] <- NULL } - varname2 <- 'Cohort' - if(varname2 %in% colnames(data)){ + varname2 <- "Cohort" + if (varname2 %in% colnames(data)) { warnings("The variable \"Cohort\" will be replaced.\n") - data[,varname2] <- NULL + data[, varname2] <- NULL } varname3 <- "Time_to_Treatment" - if(varname3 %in% colnames(data)){ + if (varname3 %in% colnames(data)) { warnings("The variable \"Time_to_Treatment\" will be replaced.\n") - data[,varname3] <- NULL + data[, varname3] <- NULL } varname4 <- "Time_to_Exit" - if(varname4 %in% colnames(data)){ + if (varname4 %in% colnames(data)) { warnings("The variable \"Time_to_Exit\" will be replaced.\n") - data[,varname4] <- NULL + data[, varname4] <- NULL } - } - else{ - if(length(varname)!=4){ + } else { + if (length(varname) != 4) { stop("\"varname\" should have three elements.") } - if(varname[1] %in% colnames(data)){ - warnings(paste0("Variable ",varname[1]," will be replaced.\n")) - data[,varname1] <- NULL + if (varname[1] %in% colnames(data)) { + warnings(paste0("Variable ", varname[1], " will be replaced.\n")) + data[, varname1] <- NULL + } + if (varname[2] %in% colnames(data)) { + warnings(paste0("Variable ", varname[2], " will be replaced.\n")) + data[, varname2] <- NULL } - if(varname[2] %in% colnames(data)){ - warnings(paste0("Variable ",varname[2]," will be replaced.\n")) - data[,varname2] <- NULL + if (varname[3] %in% colnames(data)) { + warnings(paste0("Variable ", varname[3], " will be replaced.\n")) + data[, varname3] <- NULL } - if(varname[3] %in% colnames(data)){ - warnings(paste0("Variable ",varname[3]," will be replaced.\n")) - data[,varname3] <- NULL + if (varname[4] %in% colnames(data)) { + warnings(paste0("Variable ", varname[4], " will be replaced.\n")) + data[, varname4] <- NULL } - if(varname[4] %in% colnames(data)){ - warnings(paste0("Variable ",varname[4]," will be replaced.\n")) - data[,varname4] <- NULL - } } - if(length(index)!=2){ + if (length(index) != 2) { stop("\"index\" should have length 2.\n") } - for(sub.index in index){ - if(!sub.index%in%index){ - stop(paste0(sub.index,"is not in the dataset.\n")) + for (sub.index in index) { + if (!sub.index %in% index) { + stop(paste0(sub.index, "is not in the dataset.\n")) } } n.obs <- dim(data)[1] - + ## D and index should not have NA - for(sub.var in c(D,index)){ - if(length(which(is.na(data[,sub.var])))>0){ - stop(paste0(sub.var,"contains missing values.\n")) + for (sub.var in c(D, index)) { + if (length(which(is.na(data[, sub.var]))) > 0) { + stop(paste0(sub.var, "contains missing values.\n")) } } ## check if uniquely identified - unique_label <- unique(paste(data[,index[1]],"_",data[,index[2]],sep="")) - if (length(unique_label)!= dim(data)[1]) { + unique_label <- unique(paste(data[, index[1]], "_", data[, index[2]], sep = "")) + if (length(unique_label) != dim(data)[1]) { stop("Observations are not uniquely defined by unit and time indicators.") } @@ -99,51 +98,50 @@ get.cohort <- function(data, stop("Treatment indicator should be a numeric value.") } - if (!(1%in%data[, D] & 0%in%data[,D] & length(unique(data[,D])) == 2)) { - stop(paste("Error values in variable \"", D,"\".", sep = "")) + if (!(1 %in% data[, D] & 0 %in% data[, D] & length(unique(data[, D])) == 2)) { + stop(paste("Error values in variable \"", D, "\".", sep = "")) } ## index names index.id <- index[1] index.time <- index[2] - data.raw[,index.id] <- as.character(data.raw[,index.id]) + data.raw[, index.id] <- as.character(data.raw[, index.id]) ## raw id and time - raw.id <- sort(unique(data[,index[1]])) - raw.time <- sort(unique(data[,index[2]])) + raw.id <- sort(unique(data[, index[1]])) + raw.time <- sort(unique(data[, index[2]])) N <- length(raw.id) TT <- length(raw.time) ## entry time - if(is.null(entry.time)){ + if (is.null(entry.time)) { staggered <- 1 - } - else{ + } else { staggered <- 0 - if(!is.list(entry.time)){ + if (!is.list(entry.time)) { stop("\"entry.time\" should be a list.\n") } all.entry.time <- c() - for(sub.time in entry.time){ - if(!is.numeric(sub.time)){ + for (sub.time in entry.time) { + if (!is.numeric(sub.time)) { stop("Elements in \"entry.time\" are misspecified.\n") } - if(length(sub.time)!=2){ + if (length(sub.time) != 2) { stop("Elements in \"entry.time\" are misspecified. Each element should have length 2.\n") } - if(sub.time[1]>sub.time[2]){ + if (sub.time[1] > sub.time[2]) { stop("Elements in \"entry.time\" are misspecified. For each element, the second should not be smaller than the first.\n") } - all.entry.time <- c(all.entry.time,sub.time) + all.entry.time <- c(all.entry.time, sub.time) } - if(length(all.entry.time)>length(unique(all.entry.time))){ + if (length(all.entry.time) > length(unique(all.entry.time))) { stop("\"entry.time\" has overlapped periods.\n") } } ## sort data - data <- data[order(data[,index.id], data[,index.time]), ] + data <- data[order(data[, index.id], data[, index.time]), ] data.raw <- data id.match <- as.numeric(as.factor(raw.id)) names(id.match) <- as.character(raw.id) @@ -154,109 +152,112 @@ get.cohort <- function(data, ## check balanced panel and fill unbalanced panel Dname <- D T.on <- I <- D <- NULL - if (dim(data)[1] != TT*N) { - data[,index.id] <- as.numeric(as.factor(data[,index.id])) - data[,index.time] <- as.numeric(as.factor(data[,index.time])) - ob.indicator <- data[,index.time] + if (dim(data)[1] != TT * N) { + data[, index.id] <- as.numeric(as.factor(data[, index.id])) + data[, index.time] <- as.numeric(as.factor(data[, index.time])) + ob.indicator <- data[, index.time] id.indicator <- table(data[, index.id]) sub.start <- 1 - for (i in 1:(N - 1)) { - sub.start <- sub.start + id.indicator[i] - sub.end <- sub.start + id.indicator[i+1] - 1 + for (i in 1:(N - 1)) { + sub.start <- sub.start + id.indicator[i] + sub.end <- sub.start + id.indicator[i + 1] - 1 ob.indicator[sub.start:sub.end] <- ob.indicator[sub.start:sub.end] + i * TT } variable <- c(Dname) data_I <- matrix(0, N * TT, 1) data_I[ob.indicator, 1] <- 1 - + data_D <- matrix(NaN, N * TT, 1) - data_D[ob.indicator, 1] <- as.matrix(data[,variable]) + data_D[ob.indicator, 1] <- as.matrix(data[, variable]) colnames(data_D) <- variable I <- matrix(1, TT, N) - D <- matrix(data_D[, Dname],TT,N) + D <- matrix(data_D[, Dname], TT, N) I[is.nan(D)] <- 0 D[is.nan(D)] <- 0 - } - else { - data[,index.id] <- as.numeric(as.factor(data[,index.id])) - data[,index.time] <- as.numeric(as.factor(data[,index.time])) + } else { + data[, index.id] <- as.numeric(as.factor(data[, index.id])) + data[, index.time] <- as.numeric(as.factor(data[, index.time])) I <- matrix(1, TT, N) - D <- matrix(data[,Dname], TT, N) + D <- matrix(data[, Dname], TT, N) } T.on <- matrix(NA, TT, N) for (i in 1:N) { - T.on[, i] <- get_term(D[, i], I[, i], type = "on") + T.on[, i] <- get_term(D[, i], I[, i], type = "on") } T.off <- matrix(NA, TT, N) for (i in 1:N) { - T.off[, i] <- get_term(D[, i], I[, i], type = "off") + T.off[, i] <- get_term(D[, i], I[, i], type = "off") } - + t.on <- c(T.on) t.off <- c(T.off) - use.index <- (data[,index.id]-1)*TT + data[,index.time] - if(start0 == TRUE){ - t.on <- t.on[use.index]-1 - t.off <- t.off[use.index]-1 - } - else{ + use.index <- (data[, index.id] - 1) * TT + data[, index.time] + if (start0 == TRUE) { + t.on <- t.on[use.index] - 1 + t.off <- t.off[use.index] - 1 + } else { t.on <- t.on[use.index] - t.off <- t.off[use.index] + t.off <- t.off[use.index] } tr.pos <- which(apply(D, 2, sum) > 0) co.pos <- which(apply(D, 2, sum) == 0) tr.name <- names(id.match)[tr.pos] co.name <- names(id.match)[co.pos] - - D.cum1 <- apply(D,2,cumsum) - D.cum2 <- apply(D.cum1,2,cumsum) - T0.tr <- apply(matrix(D.cum2[,tr.pos],nrow=dim(D.cum2)[1]),2,function(vec){which(vec==1)}) - T0.tr.origin <- as.numeric(sapply(T0.tr,function(x){names(time.match)[which(time.match==x)]})) - T0.co.origin <- rep(NA,length(co.name)) + + D.cum1 <- apply(D, 2, cumsum) + D.cum2 <- apply(D.cum1, 2, cumsum) + T0.tr <- apply(matrix(D.cum2[, tr.pos], nrow = dim(D.cum2)[1]), 2, function(vec) { + which(vec == 1) + }) + T0.tr.origin <- as.numeric(sapply(T0.tr, function(x) { + names(time.match)[which(time.match == x)] + })) + T0.co.origin <- rep(NA, length(co.name)) - first.treat <- cbind.data.frame(index.id = c(tr.name,co.name), - FirstTreat = c(T0.tr.origin,T0.co.origin)) + first.treat <- cbind.data.frame( + index.id = c(tr.name, co.name), + FirstTreat = c(T0.tr.origin, T0.co.origin) + ) - first.treat[,varname1] <- first.treat[,"FirstTreat"] - if(varname1!='FirstTreat'){ - first.treat[,"FirstTreat"] <- NULL + first.treat[, varname1] <- first.treat[, "FirstTreat"] + if (varname1 != "FirstTreat") { + first.treat[, "FirstTreat"] <- NULL } - data.raw <- merge(data.raw,first.treat,by.x = index.id, by.y = "index.id") - data.raw[,varname2] <- 'Control' - data.raw[,varname3] <- t.on - if(sum(!is.na(t.off))>0){ - data.raw[,varname4] <- t.off + data.raw <- merge(data.raw, first.treat, by.x = index.id, by.y = "index.id") + data.raw[, varname2] <- "Control" + data.raw[, varname3] <- t.on + if (sum(!is.na(t.off)) > 0) { + data.raw[, varname4] <- t.off } - if(staggered==1){ - all.first.treat <- sort(unique(data.raw[,varname1])) - for(sub.first in all.first.treat){ - cohort.name <- paste0("Cohort:",sub.first) - data.raw[which(data.raw[,varname1] == sub.first),varname2] <- cohort.name + if (staggered == 1) { + all.first.treat <- sort(unique(data.raw[, varname1])) + for (sub.first in all.first.treat) { + cohort.name <- paste0("Cohort:", sub.first) + data.raw[which(data.raw[, varname1] == sub.first), varname2] <- cohort.name } - } - else{ - for(sub.time in entry.time){ - cohort.name <- paste0("Cohort:",sub.time[1],'-',sub.time[2]) - data.raw[intersect(which(data.raw[,varname1]>=sub.time[1]),which(data.raw[,varname1]<=sub.time[2])),varname2] <- cohort.name + } else { + for (sub.time in entry.time) { + cohort.name <- paste0("Cohort:", sub.time[1], "-", sub.time[2]) + data.raw[intersect(which(data.raw[, varname1] >= sub.time[1]), which(data.raw[, varname1] <= sub.time[2])), varname2] <- cohort.name } cohort.name <- "Cohort:Other" - data.raw[which(!is.na(data.raw[,varname1]) & data.raw[,varname2] == 'Control'),varname2] <- cohort.name + data.raw[which(!is.na(data.raw[, varname1]) & data.raw[, varname2] == "Control"), varname2] <- cohort.name } - if(drop.always.treat){ + if (drop.always.treat) { size.0 <- dim(data.raw)[1] unit.index <- index[1] - data.raw <- as.data.frame(data.raw %>% group_by(get(unit.index)) %>% mutate(treatment_mean = mean(get(Dname),na.rm = TRUE))) - data.raw <- data.raw[which(data.raw$treatment_mean<1),] + data.raw <- as.data.frame(data.raw %>% group_by(get(unit.index)) %>% mutate(treatment_mean = mean(get(Dname), na.rm = TRUE))) + data.raw <- data.raw[which(data.raw$treatment_mean < 1), ] size.1 <- dim(data.raw)[1] diff.1 <- size.0 - size.1 - message(paste0("Number of Always Treated Observations Removed:",diff.1,".\n")) - data.raw[,'treatment_mean'] <- NULL + message(paste0("Number of Always Treated Observations Removed:", diff.1, ".\n")) + data.raw[, "treatment_mean"] <- NULL } return(data.raw) @@ -264,21 +265,21 @@ get.cohort <- function(data, get_term <- function(d, - ii, + ii, type = "on") { dd <- d iii <- ii first.pos <- min(which(iii == 1)) if (first.pos != 1) { - dd <- dd[-(1:(first.pos-1))] - iii <- iii[-(1:(first.pos-1))] - } - T <- length(dd) + dd <- dd[-(1:(first.pos - 1))] + iii <- iii[-(1:(first.pos - 1))] + } + T <- length(dd) if (0 %in% iii) { if (T > 1) { - for (i in 1:(T-1)) { - if (iii[i+1] == 0) { - dd[i+1] <- dd[i] + for (i in 1:(T - 1)) { + if (iii[i + 1] == 0) { + dd[i + 1] <- dd[i] } } } @@ -287,55 +288,47 @@ get_term <- function(d, if (type == "off") { dd <- abs(dd - 1) } - d1 <- dd[1:(T-1)] + d1 <- dd[1:(T - 1)] d2 <- dd[2:T] - - if(T==1){ + + if (T == 1) { term <- rep(NA, 1) - } - else if (sum(d1 == d2) == (T-1)) { + } else if (sum(d1 == d2) == (T - 1)) { term <- rep(NA, T) - } - else { + } else { change.pos <- which(d1 != d2) + 1 change.length <- length(change.pos) - term <- NULL - if (dd[1] == 0) { + term <- NULL + if (dd[1] == 0) { for (i in 1:(change.length)) { if (i == 1) { part.term <- (2 - change.pos[i]):0 - } - else { + } else { if (i %% 2 == 0) { - part.term <- 1:(change.pos[i] - change.pos[i-1]) - } - else { - part.term <- (change.pos[i-1] - change.pos[i] + 1):0 + part.term <- 1:(change.pos[i] - change.pos[i - 1]) + } else { + part.term <- (change.pos[i - 1] - change.pos[i] + 1):0 } } term <- c(term, part.term) } - } - else if (dd[1] == 1) { + } else if (dd[1] == 1) { for (i in 1:(change.length)) { if (i == 1) { part.term <- rep(NA, change.pos[i] - 1) - } - else { + } else { if (i %% 2 == 0) { - part.term <- (change.pos[i-1] - change.pos[i] + 1):0 - } - else { - part.term <- 1:(change.pos[i] - change.pos[i-1]) + part.term <- (change.pos[i - 1] - change.pos[i] + 1):0 + } else { + part.term <- 1:(change.pos[i] - change.pos[i - 1]) } } term <- c(term, part.term) } - } + } if (dd[change.pos[change.length]] == 0) { term <- c(term, rep(NA, (T - change.pos[change.length] + 1))) - } - else { + } else { term <- c(term, 1:(T - change.pos[change.length] + 1)) } } diff --git a/R/mc.R b/R/mc.R index d7c65ad..5fdb54f 100644 --- a/R/mc.R +++ b/R/mc.R @@ -1,7 +1,7 @@ ################################################################### ## Matrix Completion Function ################################################################### -fect.mc <- function(Y, # Outcome variable, (T*N) matrix +fect_mc <- function(Y, # Outcome variable, (T*N) matrix X, # Explanatory variables: (T*N*p) array D, W, diff --git a/R/permutation.R b/R/permutation.R index a1a2e01..7d8fd56 100644 --- a/R/permutation.R +++ b/R/permutation.R @@ -1,7 +1,7 @@ ################################################################### ## permutation ################################################################### -fect.permu <- function(Y, +fect_permu <- function(Y, X, D, I, diff --git a/R/plot.R b/R/plot.R index 992cff6..07fe5d5 100644 --- a/R/plot.R +++ b/R/plot.R @@ -6,7 +6,7 @@ plot.fect <- function( x, - type = NULL, # gap, equiv, status, exit, factors, loadings, calendar, counterfactual + type = NULL, # gap, equiv, status, exit, factors, loadings, calendar, counterfactual, heterogeneous restrict = "rm", loo = FALSE, highlight = NULL, ## for carryover test and placebo test @@ -69,38 +69,43 @@ plot.fect <- function( preset = NULL, connected = NULL, ci.outline = FALSE, - color = NULL, + color = NULL, est.lwidth = NULL, est.pointsize = NULL, - count.color = NULL, + count.color = NULL, count.alpha = NULL, count.outline.color = NULL, - placebo.color = NULL, - carryover.color = NULL, + placebo.color = NULL, + carryover.color = NULL, carryover.rm.color = NULL, sens.original.color = NULL, - sens.colors = NULL, - counterfactual.color = NULL, - counterfactual.raw.controls.color = NULL, - counterfactual.raw.treated.color = NULL, - counterfactual.linetype = NULL, - box.control = NULL, - box.treat = NULL, - calendar.color = NULL, - calendar.lcolor = NULL, - equiv.color = NULL, - status.treat.color = NULL, + sens.colors = NULL, + counterfactual.color = NULL, + counterfactual.raw.controls.color = NULL, + counterfactual.raw.treated.color = NULL, + counterfactual.linetype = NULL, + box.control = NULL, + box.treat = NULL, + calendar.color = NULL, + calendar.lcolor = NULL, + calendar.cicolor = NULL, + heterogeneous.color = NULL, + heterogeneous.lcolor = NULL, + heterogeneous.cicolor = NULL, + equiv.color = NULL, + status.treat.color = NULL, status.control.color = NULL, - status.missing.color = NULL, - status.removed.color = NULL, + status.missing.color = NULL, + status.removed.color = NULL, status.placebo.color = NULL, - status.carryover.color = NULL, - status.carryover.rm.color = NULL, + status.carryover.color = NULL, + status.carryover.rm.color = NULL, status.balanced.post.color = NULL, status.balanced.pre.color = NULL, status.background.color = NULL, + covariate = NULL, + covariate.labels = NULL, ...) { - group <- ATT5 <- ATT6 <- CI.lower.90 <- CI.lower6 <- CI.upper.90 <- CI.upper6 <- L1 <- eff <- NULL if (!missing(vis)) { warning("'vis' is deprecated and will be removed in future versions.", call. = FALSE) @@ -116,110 +121,111 @@ plot.fect <- function( if (is.logical(count) && missing(show.count)) show.count <- count } if (is.null(preset)) { - if (is.null(connected)) connected <- FALSE - if (is.null(ltype)) ltype <- c("solid", "solid") - if (is.null(gridOff)) gridOff <- FALSE - if (is.null(color)) color <- "black" - if (is.null(count.color)) count.color <- "grey70" - if (is.null(count.alpha)) count.alpha <- 0.4 - if (is.null(count.outline.color)) count.outline.color <- "grey69" - if (is.null(placebo.color)) placebo.color <- "blue" - if (is.null(carryover.color)) carryover.color <- "red" - if (is.null(carryover.rm.color)) carryover.rm.color <- "blue" - if (is.null(sens.original.color)) sens.original.color <- "darkblue" - if (is.null(sens.colors)) sens.colors <- c("#218C23","#FF34B4","#FF521B","#2B59C3") - - if (is.null(counterfactual.color)) counterfactual.color <- "steelblue" + if (is.null(connected)) connected <- FALSE + if (is.null(ltype)) ltype <- c("solid", "solid") + if (is.null(gridOff)) gridOff <- FALSE + if (is.null(color)) color <- "black" + if (is.null(count.color)) count.color <- "grey70" + if (is.null(count.alpha)) count.alpha <- 0.4 + if (is.null(count.outline.color)) count.outline.color <- "grey69" + if (is.null(placebo.color)) placebo.color <- "blue" + if (is.null(carryover.color)) carryover.color <- "red" + if (is.null(carryover.rm.color)) carryover.rm.color <- "blue" + if (is.null(sens.original.color)) sens.original.color <- "darkblue" + if (is.null(sens.colors)) sens.colors <- c("#218C23", "#FF34B4", "#FF521B", "#2B59C3") + + if (is.null(counterfactual.color)) counterfactual.color <- "steelblue" if (is.null(counterfactual.raw.controls.color)) counterfactual.raw.controls.color <- "#4682B420" - if (is.null(counterfactual.raw.treated.color)) counterfactual.raw.treated.color <- "#77777750" + if (is.null(counterfactual.raw.treated.color)) counterfactual.raw.treated.color <- "#77777750" if (is.null(counterfactual.linetype)) counterfactual.linetype <- "longdash" - if (is.null(box.control)) box.control <- "skyblue" - if (is.null(box.treat)) box.treat <- "pink" + if (is.null(box.control)) box.control <- "skyblue" + if (is.null(box.treat)) box.treat <- "pink" - if (is.null(calendar.color)) calendar.color <- "skyblue" - if (is.null(calendar.lcolor)) calendar.lcolor <- "red" + if (is.null(calendar.color)) calendar.color <- "#2C7FB8" + if (is.null(calendar.cicolor)) calendar.cicolor <- "skyblue" + if (is.null(calendar.lcolor)) calendar.lcolor <- "red" - if (is.null(equiv.color)) equiv.color <- "red" + if (is.null(heterogeneous.color)) heterogeneous.color <- "#2C7FB8" + if (is.null(heterogeneous.cicolor)) heterogeneous.cicolor <- "skyblue" + if (is.null(heterogeneous.lcolor)) heterogeneous.lcolor <- "red" - if (is.null(status.treat.color)) status.treat.color <- "#06266F" - if (is.null(status.control.color)) status.control.color <- "#B0C4DE" - if (is.null(status.missing.color)) status.missing.color <- "#FFFFFF" - if (is.null(status.removed.color)) status.removed.color <- "#A9A9A9" - if (is.null(status.placebo.color)) status.placebo.color <- "#66C2A5" + if (is.null(equiv.color)) equiv.color <- "red" + + if (is.null(status.treat.color)) status.treat.color <- "#06266F" + if (is.null(status.control.color)) status.control.color <- "#B0C4DE" + if (is.null(status.missing.color)) status.missing.color <- "#FFFFFF" + if (is.null(status.removed.color)) status.removed.color <- "#A9A9A9" + if (is.null(status.placebo.color)) status.placebo.color <- "#66C2A5" if (is.null(status.carryover.color)) status.carryover.color <- "#E78AC3" if (is.null(status.carryover.rm.color)) status.carryover.rm.color <- "#ffc425" if (is.null(status.balanced.post.color)) status.balanced.post.color <- "#00852B" - if (is.null(status.balanced.pre.color)) status.balanced.pre.color <- "#A5CA18" + if (is.null(status.balanced.pre.color)) status.balanced.pre.color <- "#A5CA18" if (is.null(status.background.color)) status.background.color <- "gray90" - } - else if (preset == "vibrant") { - if (is.null(connected)) connected <- TRUE - if (is.null(color)) color <- "#054A91" - if (is.null(count.color)) count.color <- "#E6AF2E" - if (is.null(count.alpha)) count.alpha <- 1 - if (is.null(count.outline.color)) count.outline.color <- "#E6AF2E" - if (is.null(lwidth)) lwidth <- c(0.8, 1) - if (is.null(ltype)) ltype <- c("solid", "dashed") - if (is.null(placebo.color)) placebo.color <- "#386641" - if (is.null(carryover.color)) carryover.color <- "#A40E4C" - if (is.null(carryover.rm.color)) carryover.rm.color <- "#FF5400" - if (is.null(sens.original.color)) sens.original.color <- "#054A91" - if (is.null(sens.colors)) sens.colors <- c("#A40E4C", "#FF5400", "#E6AF2E", "#386641", "#ACC3DA") - if (is.null(counterfactual.color)) counterfactual.color <- "#777777" + } else if (preset == "vibrant") { + if (is.null(connected)) connected <- TRUE + if (is.null(color)) color <- "#054A91" + if (is.null(count.color)) count.color <- "#E6AF2E" + if (is.null(count.alpha)) count.alpha <- 1 + if (is.null(count.outline.color)) count.outline.color <- "#E6AF2E" + if (is.null(lwidth)) lwidth <- c(0.8, 1) + if (is.null(ltype)) ltype <- c("solid", "dashed") + if (is.null(placebo.color)) placebo.color <- "#386641" + if (is.null(carryover.color)) carryover.color <- "#A40E4C" + if (is.null(carryover.rm.color)) carryover.rm.color <- "#FF5400" + if (is.null(sens.original.color)) sens.original.color <- "#054A91" + if (is.null(sens.colors)) sens.colors <- c("#A40E4C", "#FF5400", "#E6AF2E", "#386641", "#ACC3DA") + if (is.null(counterfactual.color)) counterfactual.color <- "#777777" if (is.null(counterfactual.raw.controls.color)) counterfactual.raw.controls.color <- "#D5E1ED" - if (is.null(counterfactual.raw.treated.color)) counterfactual.raw.treated.color <- "#77777750" + if (is.null(counterfactual.raw.treated.color)) counterfactual.raw.treated.color <- "#77777750" if (is.null(counterfactual.linetype)) counterfactual.linetype <- "dashed" - if (is.null(box.control)) box.control <- "#ACC3DA" - if (is.null(box.treat)) box.treat <- "#E1AFC3" - if (is.null(calendar.color)) calendar.color <- "#ACC3DA" - if (is.null(calendar.lcolor)) calendar.lcolor <- "#054A91" - if (is.null(equiv.color)) equiv.color <- "#A40E4C" - if (is.null(status.treat.color)) status.treat.color <- "#054A91" - if (is.null(status.control.color)) status.control.color <- "#ACC3DA" - if (is.null(status.missing.color)) status.missing.color <- "#dddddd" - if (is.null(status.removed.color)) status.removed.color <- "#D7E8E0" - if (is.null(status.placebo.color)) status.placebo.color <- "#386641" - if (is.null(status.carryover.color)) status.carryover.color <- "#A40E4C" + if (is.null(box.control)) box.control <- "#ACC3DA" + if (is.null(box.treat)) box.treat <- "#E1AFC3" + if (is.null(calendar.color)) calendar.color <- "#ACC3DA" + if (is.null(calendar.lcolor)) calendar.lcolor <- "#054A91" + if (is.null(equiv.color)) equiv.color <- "#A40E4C" + if (is.null(status.treat.color)) status.treat.color <- "#054A91" + if (is.null(status.control.color)) status.control.color <- "#ACC3DA" + if (is.null(status.missing.color)) status.missing.color <- "#dddddd" + if (is.null(status.removed.color)) status.removed.color <- "#D7E8E0" + if (is.null(status.placebo.color)) status.placebo.color <- "#386641" + if (is.null(status.carryover.color)) status.carryover.color <- "#A40E4C" if (is.null(status.carryover.rm.color)) status.carryover.rm.color <- "#FF5400" if (is.null(status.balanced.post.color)) status.balanced.post.color <- "#E6AF2E" - if (is.null(status.balanced.pre.color)) status.balanced.pre.color <- "#777777" - if (is.null(status.background.color)) status.background.color <- "#FFFFFF" - - } - else if (preset %in% c("grayscale","greyscale")) { - if (is.null(connected)) connected <- FALSE - if (is.null(color)) color <- "black" - if (is.null(count.color)) count.color <- "gray80" - if (is.null(count.alpha)) count.alpha <- 0.5 - if (is.null(count.outline.color)) count.outline.color <- "black" - if (is.null(lwidth)) lwidth <- c(1, 1) - if (is.null(ltype)) ltype <- c("solid", "dashed") - if (is.null(placebo.color)) placebo.color <- "gray40" - if (is.null(carryover.color)) carryover.color <- "gray70" - if (is.null(carryover.rm.color)) carryover.rm.color <- "gray40" - if (is.null(sens.original.color)) sens.original.color <- "gray20" - if (is.null(sens.colors)) sens.colors <- c("gray80", "gray50", "black") - if (is.null(counterfactual.color)) counterfactual.color <- "#777777" + if (is.null(status.balanced.pre.color)) status.balanced.pre.color <- "#777777" + if (is.null(status.background.color)) status.background.color <- "#FFFFFF" + } else if (preset %in% c("grayscale", "greyscale")) { + if (is.null(connected)) connected <- FALSE + if (is.null(color)) color <- "black" + if (is.null(count.color)) count.color <- "gray80" + if (is.null(count.alpha)) count.alpha <- 0.5 + if (is.null(count.outline.color)) count.outline.color <- "black" + if (is.null(lwidth)) lwidth <- c(1, 1) + if (is.null(ltype)) ltype <- c("solid", "dashed") + if (is.null(placebo.color)) placebo.color <- "gray40" + if (is.null(carryover.color)) carryover.color <- "gray70" + if (is.null(carryover.rm.color)) carryover.rm.color <- "gray40" + if (is.null(sens.original.color)) sens.original.color <- "gray20" + if (is.null(sens.colors)) sens.colors <- c("gray80", "gray50", "black") + if (is.null(counterfactual.color)) counterfactual.color <- "#777777" if (is.null(counterfactual.raw.controls.color)) counterfactual.raw.controls.color <- "#eeeeee" - if (is.null(counterfactual.raw.treated.color)) counterfactual.raw.treated.color <- "#666666" + if (is.null(counterfactual.raw.treated.color)) counterfactual.raw.treated.color <- "#666666" if (is.null(counterfactual.linetype)) counterfactual.linetype <- "dashed" - if (is.null(box.control)) box.control <- "gray90" - if (is.null(box.treat)) box.treat <- "gray50" - if (is.null(calendar.color)) calendar.color <- "gray80" - if (is.null(calendar.lcolor)) calendar.lcolor <- "black" - if (is.null(equiv.color)) equiv.color <- "gray80" - if (is.null(status.treat.color)) status.treat.color <- "black" - if (is.null(status.control.color)) status.control.color <- "gray90" - if (is.null(status.missing.color)) status.missing.color <- "white" - if (is.null(status.removed.color)) status.removed.color <- "white" - if (is.null(status.placebo.color)) status.placebo.color <- "gray50" - if (is.null(status.carryover.color)) status.carryover.color <- "gray50" + if (is.null(box.control)) box.control <- "gray90" + if (is.null(box.treat)) box.treat <- "gray50" + if (is.null(calendar.color)) calendar.color <- "gray80" + if (is.null(calendar.lcolor)) calendar.lcolor <- "black" + if (is.null(equiv.color)) equiv.color <- "gray80" + if (is.null(status.treat.color)) status.treat.color <- "black" + if (is.null(status.control.color)) status.control.color <- "gray90" + if (is.null(status.missing.color)) status.missing.color <- "white" + if (is.null(status.removed.color)) status.removed.color <- "white" + if (is.null(status.placebo.color)) status.placebo.color <- "gray50" + if (is.null(status.carryover.color)) status.carryover.color <- "gray50" if (is.null(status.carryover.rm.color)) status.carryover.rm.color <- "#ffffff" if (is.null(status.balanced.post.color)) status.balanced.post.color <- "#aaaaaa" - if (is.null(status.balanced.pre.color)) status.balanced.pre.color <- "#eeeeee" - if (is.null(status.background.color)) status.background.color <- "#FFFFFF" - + if (is.null(status.balanced.pre.color)) status.balanced.pre.color <- "#eeeeee" + if (is.null(status.background.color)) status.background.color <- "#FFFFFF" } if (is.null(est.lwidth) || is.null(est.pointsize)) { default_est_lwidth <- .8 @@ -237,8 +243,8 @@ plot.fect <- function( default_est_pointsize <- 3 } } - if(is.null(est.lwidth)) est.lwidth <- default_est_lwidth - if(is.null(est.pointsize)) est.pointsize <- default_est_pointsize + if (is.null(est.lwidth)) est.lwidth <- default_est_lwidth + if (is.null(est.pointsize)) est.pointsize <- default_est_pointsize } @@ -329,7 +335,7 @@ plot.fect <- function( return(0) } } - if (!(restrict %in% c("rm","sm"))) { + if (!(restrict %in% c("rm", "sm"))) { stop("\"restrict\" option misspecified. Must be either \"rm\" or \"sm\".") } @@ -339,10 +345,13 @@ plot.fect <- function( type <- "counterfactual" } if (type == "es") { - type = "gap" + type <- "gap" } - if (!type %in% c("status", "gap", "equiv", "exit", "factors", "loadings", "calendar", "box", "counterfactual", "sens","sens_es", "cumul")) { - stop("\"type\" option misspecified. Must be one of the following:\"status\",\"gap\",\"equiv\",\"exit\",\"calendar\",\"box\",\"counterfactual\",\"equiv\",\"sens\",\"sens_es\",\"cumul\".") + if (type == "hte") { + type <- "heterogeneous" + } + if (!type %in% c("status", "gap", "equiv", "exit", "factors", "loadings", "calendar", "box", "counterfactual", "sens", "sens_es", "cumul", "heterogeneous")) { + stop("\"type\" option misspecified. Must be one of the following:\"status\",\"gap\",\"equiv\",\"exit\",\"calendar\",\"box\",\"counterfactual\",\"equiv\",\"sens\",\"sens_es\",\"cumul\",\"heterogeneous\".") } if (type == "exit" && is.null(x$att.off)) { stop("No exiting treatment effect to be plotted.") @@ -358,24 +367,24 @@ plot.fect <- function( type <- "gap" } } - if (!is.null(x$effect.est.att)){ - type = "cumul" + if (!is.null(x$effect.est.att)) { + type <- "cumul" } type.old <- type - if (is.null(gridOff)){ - if (type == "status"){ + if (is.null(gridOff)) { + if (type == "status") { gridOff <- FALSE - } else{ + } else { gridOff <- TRUE } } provided_xlim <- xlim # add spacing - if (!is.null(xlim) & !(type %in% c("gap", "equiv", "exit", "sens", "sens_es"))){ + if (!is.null(xlim) && !(type %in% c("gap", "equiv", "exit", "sens", "sens_es"))) { if (length(xlim) == 2) { - xlim = c(xlim[1]-0.2,xlim[2]+0.2) + xlim <- c(xlim[1] - 0.2, xlim[2] + 0.2) } } @@ -420,7 +429,7 @@ plot.fect <- function( colnames(L.hat) <- Lname rownames(L.hat) <- c() - if (x$force %in% c(1, 3) & include.FE == TRUE) { + if (x$force %in% c(1, 3) && include.FE == TRUE) { L.hat <- cbind(c(x$alpha.tr, x$alpha.co), L.hat) colnames(L.hat) <- c(paste("Factor", 0), Lname) rownames(L.hat) <- c() @@ -429,15 +438,15 @@ plot.fect <- function( } data <- cbind.data.frame(L.hat, - "id" = c(x$tr, x$co), - "group" = as.factor(c( - rep("Treated", x$Ntr), - rep("Control", x$Nco) - )) + "id" = c(x$tr, x$co), + "group" = as.factor(c( + rep("Treated", x$Ntr), + rep("Control", x$Nco) + )) ) if (nfactors == 1) { - p <- ggplot(data, aes(x = group, y = L1, fill = group)) + + p <- ggplot(data, aes(x = .data$group, y = .data$L1, fill = .data$group)) + geom_boxplot(alpha = 0.7) + coord_flip() + guides(fill = FALSE) + @@ -450,11 +459,11 @@ plot.fect <- function( geom_density(..., alpha = 0.7, color = NA) } p <- GGally::ggpairs(data, - mapping = aes(color = group, fill = group), - columns = 1:nfactors, - columnLabels = Llabel[1:nfactors], - diag = list(continuous = my_dens), - title = main + mapping = aes(color = .data$group, fill = .data$group), + columns = 1:nfactors, + columnLabels = Llabel[1:nfactors], + diag = list(continuous = my_dens), + title = main ) + theme(plot.title = element_text(hjust = 0.5)) } else if (x$Ntr > 1) { @@ -463,11 +472,11 @@ plot.fect <- function( geom_density(..., fill = "gray", alpha = 0.7, color = "gray50") } p <- GGally::ggpairs(data, - mapping = aes(color = group), - columns = 1:nfactors, - columnLabels = Llabel[1:nfactors], - diag = list(continuous = my_dens), - title = main + mapping = aes(color = .data$group), + columns = 1:nfactors, + columnLabels = Llabel[1:nfactors], + diag = list(continuous = my_dens), + title = main ) } else { my_dens <- function(data, mapping, ...) { @@ -475,11 +484,11 @@ plot.fect <- function( geom_density(..., fill = "gray", alpha = 0.7, color = "gray50") } p <- GGally::ggpairs(data, - mapping = aes(color = group), - columns = 1:nfactors, upper = "blank", - columnLabels = Llabel[1:nfactors], - diag = list(continuous = my_dens), - title = main + mapping = aes(color = .data$group), + columns = 1:nfactors, upper = "blank", + columnLabels = Llabel[1:nfactors], + diag = list(continuous = my_dens), + title = main ) } } @@ -519,7 +528,7 @@ plot.fect <- function( } if (length(provided_xlim) != 0) { show <- which(time >= provided_xlim[1] & time <= provided_xlim[2]) - proportion = 0 + proportion <- 0 } else { show <- 1:length(time) } @@ -576,7 +585,7 @@ plot.fect <- function( p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) } p <- p + xlab(xlab) + ylab(ylab) + ggtitle(main) + - geom_hline(yintercept = 0, colour = lcolor[1], size = lwidth[1], linetype = ltype[1]) + + geom_hline(yintercept = 0, colour = lcolor[1], size = lwidth[1], linetype = ltype[1]) + theme( legend.position = legend.pos, axis.text.x = element_text(angle = angle, hjust = x.h, vjust = x.v), @@ -589,8 +598,8 @@ plot.fect <- function( ) ## main plot p <- p + geom_line(aes(time, factor, - colour = group, - group = group + colour = .data$group, + group = .data$group ), size = 1.2) @@ -858,7 +867,10 @@ plot.fect <- function( maintext <- "Carryover Effects" } } else if (type == "calendar") { - maintext <- "ATT by Calendar Time" + maintext <- "CATT by Calendar Time" + ytitle <- paste("Effect on", x$Y) + } else if (type == "heterogeneous") { + maintext <- paste("CATT by", covariate) ytitle <- paste("Effect on", x$Y) } else if (type == "box") { maintext <- "Individual Treatment Effects" @@ -886,7 +898,7 @@ plot.fect <- function( } else { angle <- 0 x.v <- 0 - if (type == "status") { + if (type == "status" || type == "heterogeneous" || type == "calendar") { x.h <- 0.5 } else { x.h <- 0 @@ -1076,7 +1088,7 @@ plot.fect <- function( align_time_series <- function(Y, T0_counts, D_full, tr_idx_logical_full) { # Y: T × N matrix; T0_counts: length-N vector T_rows <- nrow(Y) - N_tr <- ncol(Y) + N_tr <- ncol(Y) # subset out just the treated columns of D_full D_tr <- D_full[, tr_idx_logical_full, drop = FALSE] if (ncol(D_tr) != N_tr) { @@ -1091,23 +1103,23 @@ plot.fect <- function( Y_aug = matrix(NA_real_, 0, N_tr) )) } - min_rel <- 1 - max(T0_valid) - max_rel <- T_rows - min(T0_valid) + min_rel <- 1 - max(T0_valid) + max_rel <- T_rows - min(T0_valid) timeline <- if (min_rel > max_rel) integer(0) else seq(min_rel, max_rel) # build aligned matrix Y_aug <- matrix(NA_real_, nrow = length(timeline), ncol = N_tr) - if (length(timeline)>0) rownames(Y_aug) <- as.character(timeline) + if (length(timeline) > 0) rownames(Y_aug) <- as.character(timeline) for (i in seq_len(N_tr)) { t0 <- T0_counts[i] if (!is.finite(t0)) next for (t_abs in seq_len(T_rows)) { rel_time <- t_abs - t0 - row_idx <- match(as.character(rel_time), rownames(Y_aug)) + row_idx <- match(as.character(rel_time), rownames(Y_aug)) if (is.na(row_idx)) next include <- (rel_time <= 0) || - (t_abs <= nrow(D_tr) && D_tr[t_abs,i]==1 && !is.na(D_tr[t_abs,i])) + (t_abs <= nrow(D_tr) && D_tr[t_abs, i] == 1 && !is.na(D_tr[t_abs, i])) if (include) Y_aug[row_idx, i] <- Y[t_abs, i] } } @@ -1130,10 +1142,11 @@ plot.fect <- function( Y.tr.bar[is.nan(Y.tr.bar)] <- NA_real_ Y.ct.bar[is.nan(Y.ct.bar)] <- NA_real_ Yb <- cbind(Y.tr.bar, Y.ct.bar) - colnames(Yb) <- c("Y.tr.bar","Y.ct.bar") + colnames(Yb) <- c("Y.tr.bar", "Y.ct.bar") } else { Yb <- matrix(NA_real_, 0, 2, - dimnames = list(NULL, c("Y.tr.bar","Y.ct.bar"))) + dimnames = list(NULL, c("Y.tr.bar", "Y.ct.bar")) + ) } list( @@ -1153,9 +1166,13 @@ plot.fect <- function( raw <- "none" } if (axis.adjust == TRUE) { - angle <- 45; x.v <- 1; x.h <- 1 + angle <- 45 + x.v <- 1 + x.h <- 1 } else { - angle <- 0; x.v <- 0; x.h <- 0.5 + angle <- 0 + x.v <- 0 + x.h <- 0.5 } plot_xlim <- xlim @@ -1169,47 +1186,74 @@ plot.fect <- function( # --- Data Extraction and Cleaning --- - subset = 1:x$N + subset <- 1:x$N subset.tr <- x$tr if (!is.null(id)) { - subset.tr = which(x$id %in% as.character(id)) - if (!all(subset.tr %in% x$tr) || length(subset.tr) < 1){ + subset.tr <- which(x$id %in% as.character(id)) + if (!all(subset.tr %in% x$tr) || length(subset.tr) < 1) { stop("One or more of the units given in \"id\" is not treated.\n") } - x$Y.avg = NULL - subset = sort(c(subset.tr,x$co)) - } - tr <- x$tr; co <- x$co; I_orig <- x$I; II_orig <- x$II; Y_orig <- x$Y.dat - Y.ct_orig <- x$Y.ct; D_orig <- x$D.dat; rawid_orig <- x$id - time_orig_vec <- x$rawtime; TT_dim <- dim(Y_orig)[1] - if (!is.numeric(time_orig_vec[1])) { time_orig_vec <- 1:TT_dim } - I_mat <- as.matrix(I_orig)[,subset]; colnames(I_mat) <- rawid_orig[subset]; rownames(I_mat) <- time_orig_vec - II_mat <- as.matrix(II_orig)[,subset]; colnames(II_mat) <- rawid_orig[subset]; rownames(II_mat) <- time_orig_vec - Y_mat <- as.matrix(Y_orig)[,subset]; colnames(Y_mat) <- rawid_orig[subset]; rownames(Y_mat) <- time_orig_vec - Y.ct_mat <- as.matrix(Y.ct_orig)[,subset]; colnames(Y.ct_mat) <- rawid_orig[subset]; rownames(Y.ct_mat) <- time_orig_vec - D_mat <- as.matrix(D_orig)[,subset]; colnames(D_mat) <- rawid_orig[subset]; rownames(D_mat) <- time_orig_vec - if (!is.null(id)) + x$Y.avg <- NULL + subset <- sort(c(subset.tr, x$co)) + } + tr <- x$tr + co <- x$co + I_orig <- x$I + II_orig <- x$II + Y_orig <- x$Y.dat + Y.ct_orig <- x$Y.ct + D_orig <- x$D.dat + rawid_orig <- x$id + time_orig_vec <- x$rawtime + TT_dim <- dim(Y_orig)[1] + if (!is.numeric(time_orig_vec[1])) { + time_orig_vec <- 1:TT_dim + } + I_mat <- as.matrix(I_orig)[, subset] + colnames(I_mat) <- rawid_orig[subset] + rownames(I_mat) <- time_orig_vec + II_mat <- as.matrix(II_orig)[, subset] + colnames(II_mat) <- rawid_orig[subset] + rownames(II_mat) <- time_orig_vec + Y_mat <- as.matrix(Y_orig)[, subset] + colnames(Y_mat) <- rawid_orig[subset] + rownames(Y_mat) <- time_orig_vec + Y.ct_mat <- as.matrix(Y.ct_orig)[, subset] + colnames(Y.ct_mat) <- rawid_orig[subset] + rownames(Y.ct_mat) <- time_orig_vec + D_mat <- as.matrix(D_orig)[, subset] + colnames(D_mat) <- rawid_orig[subset] + rownames(D_mat) <- time_orig_vec + if (!is.null(id)) { tr_idx_logical <- (1:ncol(Y_mat)) %in% which(x$id[subset] %in% as.character(id)) - else{ + } else { tr_idx_logical <- (1:ncol(Y_mat)) %in% tr } - I.tr <- I_mat[, tr_idx_logical, drop = FALSE]; II.tr <- II_mat[, tr_idx_logical, drop = FALSE] - D.tr <- D_mat[, tr_idx_logical, drop = FALSE]; Y.tr <- Y_mat[, tr_idx_logical, drop = FALSE] + I.tr <- I_mat[, tr_idx_logical, drop = FALSE] + II.tr <- II_mat[, tr_idx_logical, drop = FALSE] + D.tr <- D_mat[, tr_idx_logical, drop = FALSE] + Y.tr <- Y_mat[, tr_idx_logical, drop = FALSE] Y.ct <- Y.ct_mat[, tr_idx_logical, drop = FALSE] co_idx_logical <- (1:ncol(Y_mat)) %in% co Y.co <- Y_mat[, co_idx_logical, drop = FALSE] - if (!0 %in% I.tr) { pre <- as.matrix(D.tr == 0 & II.tr == 1) } else { pre <- as.matrix(D.tr == 0 & I.tr == 1 & II.tr == 1) } + if (!0 %in% I.tr) { + pre <- as.matrix(D.tr == 0 & II.tr == 1) + } else { + pre <- as.matrix(D.tr == 0 & I.tr == 1 & II.tr == 1) + } T0 <- apply(x$D.dat[, subset.tr, drop = FALSE], 2, function(col) { first_one <- which(col == 1)[1] if (is.na(first_one)) length(col) else first_one - 1 }) - id.tr_names <- colnames(Y.tr); id.co_names <- colnames(Y.co) + id.tr_names <- colnames(Y.tr) + id.co_names <- colnames(Y.co) sameT0 <- length(unique(T0)) == 1 is_case1_scenario <- length(id.tr_names) == 1 || sameT0 == TRUE if (is_case1_scenario) { - num_treated_units_for_plot <- ncol(D.tr); num_time_periods_for_plot <- nrow(D.tr) + num_treated_units_for_plot <- ncol(D.tr) + num_time_periods_for_plot <- nrow(D.tr) if (num_treated_units_for_plot > 0 && num_time_periods_for_plot > 0) { for (j_unit_idx in 1:num_treated_units_for_plot) { first_treated_period_indices_for_unit <- which(D.tr[, j_unit_idx] == 1) @@ -1217,7 +1261,8 @@ plot.fect <- function( first_ever_treated_period_row_idx <- min(first_treated_period_indices_for_unit) for (t_row_idx in first_ever_treated_period_row_idx:num_time_periods_for_plot) { if (is.na(D.tr[t_row_idx, j_unit_idx]) || D.tr[t_row_idx, j_unit_idx] == 0) { - Y.tr[t_row_idx, j_unit_idx] <- NA; Y.ct[t_row_idx, j_unit_idx] <- NA + Y.tr[t_row_idx, j_unit_idx] <- NA + Y.ct[t_row_idx, j_unit_idx] <- NA } } } @@ -1228,8 +1273,11 @@ plot.fect <- function( Yb <- cbind(apply(Y.tr, 1, mean, na.rm = TRUE), apply(Y.ct, 1, mean, na.rm = TRUE)) colnames(Yb) <- c("Tr_Avg", "Ct_Avg") - if (is.null(shade.post)) { shade.post <- TRUE } - else if (!is.logical(shade.post)) { stop("Option \"shade.post\" must be logical (TRUE/FALSE).") } + if (is.null(shade.post)) { + shade.post <- TRUE + } else if (!is.logical(shade.post)) { + stop("Option \"shade.post\" must be logical (TRUE/FALSE).") + } if (legendOff == TRUE) { legend.pos <- "none" } else { @@ -1241,13 +1289,17 @@ plot.fect <- function( Y.tr[is.na(Y.ct)] <- NA_real_ # Case 1: Single Treated Unit or All Treated Units have Same T0 (Absolute Time) if (is_case1_scenario) { - plot_time_abs <- time_orig_vec; time_bf_abs_val <- NA - time_step_abs <- if(length(plot_time_abs) > 1) min(diff(sort(unique(plot_time_abs))), na.rm=TRUE) else 1 - if(!is.finite(time_step_abs) || time_step_abs <= 0) time_step_abs <- 1 - if (sameT0){ + plot_time_abs <- time_orig_vec + time_bf_abs_val <- NA + time_step_abs <- if (length(plot_time_abs) > 1) min(diff(sort(unique(plot_time_abs))), na.rm = TRUE) else 1 + if (!is.finite(time_step_abs) || time_step_abs <= 0) time_step_abs <- 1 + if (sameT0) { unique_t0_val_count <- unique(T0)[1] - if(!is.na(unique_t0_val_count) && unique_t0_val_count >= 0 && (unique_t0_val_count + 1) <= length(plot_time_abs)) { time_bf_abs_val <- plot_time_abs[unique_t0_val_count + 1] } - else if (!is.na(unique_t0_val_count) && unique_t0_val_count == length(plot_time_abs)) { time_bf_abs_val <- plot_time_abs[length(plot_time_abs)] + time_step_abs } + if (!is.na(unique_t0_val_count) && unique_t0_val_count >= 0 && (unique_t0_val_count + 1) <= length(plot_time_abs)) { + time_bf_abs_val <- plot_time_abs[unique_t0_val_count + 1] + } else if (!is.na(unique_t0_val_count) && unique_t0_val_count == length(plot_time_abs)) { + time_bf_abs_val <- plot_time_abs[length(plot_time_abs)] + time_step_abs + } } vline_pos_abs <- if (!is.na(time_bf_abs_val)) time_bf_abs_val - (time_step_abs / 2) else NA @@ -1255,7 +1307,11 @@ plot.fect <- function( show_abs <- 1:length(plot_time_abs) if (!is.null(provided_xlim)) { show_abs_check <- which(plot_time_abs >= provided_xlim[1] & plot_time_abs <= provided_xlim[2]) - if (length(show_abs_check) == 0) { warning("No data points in xlim range.") } else { show_abs <- show_abs_check } + if (length(show_abs_check) == 0) { + warning("No data points in xlim range.") + } else { + show_abs <- show_abs_check + } } counts_for_filtering_abs <- apply(Y.tr, 1, function(row) sum(!is.na(row))) @@ -1272,91 +1328,103 @@ plot.fect <- function( stop("Cannot determine time range for absolute time plot.") } - nT_abs <- length(show_abs); time_label_abs <- plot_time_abs[show_abs]; T.b_abs <- integer(0) + nT_abs <- length(show_abs) + time_label_abs <- plot_time_abs[show_abs] + T.b_abs <- integer(0) if (nT_abs > 0) { if (is.numeric(time_label_abs) && length(time_label_abs) > 1) { - T.b_breaks_abs <- pretty(time_label_abs); T.b_breaks_abs <- T.b_breaks_abs[T.b_breaks_abs >= min(time_label_abs) & T.b_breaks_abs <= max(time_label_abs)] - if (length(T.b_breaks_abs) > 0) { T.b_abs <- sapply(T.b_breaks_abs, function(br) which.min(abs(time_label_abs - br))); T.b_abs <- unique(T.b_abs) } + T.b_breaks_abs <- pretty(time_label_abs) + T.b_breaks_abs <- T.b_breaks_abs[T.b_breaks_abs >= min(time_label_abs) & T.b_breaks_abs <= max(time_label_abs)] + if (length(T.b_breaks_abs) > 0) { + T.b_abs <- sapply(T.b_breaks_abs, function(br) which.min(abs(time_label_abs - br))) + T.b_abs <- unique(T.b_abs) + } + } + if (length(T.b_abs) == 0) { + max_labels <- 10 + step <- max(1, floor(length(time_label_abs) / max_labels)) + T.b_abs <- seq(1, length(time_label_abs), by = step) } - if (length(T.b_abs) == 0) { max_labels <- 10; step <- max(1, floor(length(time_label_abs) / max_labels)); T.b_abs <- seq(1, length(time_label_abs), by = step) } } xlab_final <- if (is.null(xlab)) x$index[2] else if (xlab == "") NULL else xlab ylab_final <- if (is.null(ylab)) x$Yname else if (ylab == "") NULL else ylab - plot_single_unit_flag <- FALSE; unit_to_plot_name <- NULL + plot_single_unit_flag <- FALSE + unit_to_plot_name <- NULL plot_single_unit_flag <- length(id.tr_names) == 1 unit_to_plot_name <- id.tr_names[1] if (plot_single_unit_flag) { maintext <- paste("Treated and Estimated Counterfactual (", unit_to_plot_name, ")", sep = "") unit_col_idx_in_Y.tr <- which(colnames(Y.tr) == unit_to_plot_name) - if(length(unit_col_idx_in_Y.tr) != 1) stop(paste("Could not find unique column for unit", unit_to_plot_name, "in Y.tr.")) - tr.info_unit <- Y.tr[show_abs, unit_col_idx_in_Y.tr]; ct.info_unit <- Y.ct[show_abs, unit_col_idx_in_Y.tr] # Subset by show_abs here + if (length(unit_col_idx_in_Y.tr) != 1) stop(paste("Could not find unique column for unit", unit_to_plot_name, "in Y.tr.")) + tr.info_unit <- Y.tr[show_abs, unit_col_idx_in_Y.tr] + ct.info_unit <- Y.ct[show_abs, unit_col_idx_in_Y.tr] # Subset by show_abs here y_data_for_range_calc <- c(tr.info_unit, ct.info_unit) # Already subsetted if (raw == "none") { if (x$vartype == "parametric" & !is.null(id) & !is.null(x$eff.boot)) { - if (plot.ci == "95"){ + if (plot.ci == "95") { para.ci <- basic_ci_alpha( - rowMeans(x$eff.boot[,which(tr %in% subset.tr),]), - x$eff.boot[,which(tr %in% subset.tr),], + rowMeans(x$eff.boot[, which(tr %in% subset.tr), ]), + x$eff.boot[, which(tr %in% subset.tr), ], alpha = 0.05 ) x$Y.avg <- data.frame( period = x$rawtime, - lower.tr = Yb[,"Tr_Avg"], - upper.tr = Yb[,"Tr_Avg"], - lower.ct = Yb[,"Ct_Avg"] + para.ci[ , "upper"], - upper.ct = Yb[,"Ct_Avg"] + para.ci[ , "lower"] + lower.tr = Yb[, "Tr_Avg"], + upper.tr = Yb[, "Tr_Avg"], + lower.ct = Yb[, "Ct_Avg"] + para.ci[, "upper"], + upper.ct = Yb[, "Ct_Avg"] + para.ci[, "lower"] ) - } else if (plot.ci == "90"){ + } else if (plot.ci == "90") { para.ci <- basic_ci_alpha( - rowMeans(x$eff.boot[,which(tr %in% subset.tr),]), - x$eff.boot[,which(tr %in% subset.tr),], + rowMeans(x$eff.boot[, which(tr %in% subset.tr), ]), + x$eff.boot[, which(tr %in% subset.tr), ], alpha = 0.1 ) x$Y.avg <- data.frame( - period = x$rawtime, - lower90.tr = Yb[,"Tr_Avg"], - upper90.tr = Yb[,"Tr_Avg"], - lower90.ct = Yb[,"Ct_Avg"] + para.ci[ , "upper"], - upper90.ct = Yb[,"Ct_Avg"] + para.ci[ , "lower"] + period = x$rawtime, + lower90.tr = Yb[, "Tr_Avg"], + upper90.tr = Yb[, "Tr_Avg"], + lower90.ct = Yb[, "Ct_Avg"] + para.ci[, "upper"], + upper90.ct = Yb[, "Ct_Avg"] + para.ci[, "lower"] ) - } else{ + } else { warning("Invalid plot.ci provided.") } } else if (x$vartype == "parametric" & raw == "none" & is.null(id)) { - if (plot.ci == "95"){ + if (plot.ci == "95") { x$Y.avg <- data.frame( period = x$rawtime, - lower.tr = Yb[,"Tr_Avg"], - upper.tr = Yb[,"Tr_Avg"], - lower.ct = Yb[,"Ct_Avg"] - (x$est.att[, "CI.upper"]-x$est.att[,"ATT"]), - upper.ct = Yb[,"Ct_Avg"] - (x$est.att[, "CI.lower"]-x$est.att[,"ATT"]) + lower.tr = Yb[, "Tr_Avg"], + upper.tr = Yb[, "Tr_Avg"], + lower.ct = Yb[, "Ct_Avg"] - (x$est.att[, "CI.upper"] - x$est.att[, "ATT"]), + upper.ct = Yb[, "Ct_Avg"] - (x$est.att[, "CI.lower"] - x$est.att[, "ATT"]) ) - } else if (plot.ci == "90"){ + } else if (plot.ci == "90") { x$Y.avg <- data.frame( - period = x$rawtime, - lower90.tr = Yb[,"Tr_Avg"], - upper90.tr = Yb[,"Tr_Avg"], - lower90.ct = Yb[,"Ct_Avg"] - (x$est.att90[, "CI.upper"]-x$est.att90[,"ATT"]), - upper90.ct = Yb[,"Ct_Avg"] - (x$est.att90[, "CI.lower"]-x$est.att90[,"ATT"]) + period = x$rawtime, + lower90.tr = Yb[, "Tr_Avg"], + upper90.tr = Yb[, "Tr_Avg"], + lower90.ct = Yb[, "Ct_Avg"] - (x$est.att90[, "CI.upper"] - x$est.att90[, "ATT"]), + upper90.ct = Yb[, "Ct_Avg"] - (x$est.att90[, "CI.lower"] - x$est.att90[, "ATT"]) ) - } else{ + } else { warning("Invalid plot.ci provided.") } } data_plot_abs <- data.frame( - time = rep(plot_time_abs[show_abs], 2), + time = rep(plot_time_abs[show_abs], 2), outcome = c(tr.info_unit, ct.info_unit), - type = factor( + type = factor( rep(c("tr", "ct"), each = nT_abs), levels = c("tr", "ct") ) ) p <- ggplot() - if (plot.ci %in% c("90","95") && !is.null(x$Y.avg)) { - tr_lo_col <- if (plot.ci=="95") "lower.tr" else "lower90.tr" - tr_hi_col <- if (plot.ci=="95") "upper.tr" else "upper90.tr" - cf_lo_col <- if (plot.ci=="95") "lower.ct" else "lower90.ct" - cf_hi_col <- if (plot.ci=="95") "upper.ct" else "upper90.ct" + if (plot.ci %in% c("90", "95") && !is.null(x$Y.avg)) { + tr_lo_col <- if (plot.ci == "95") "lower.tr" else "lower90.tr" + tr_hi_col <- if (plot.ci == "95") "upper.tr" else "upper90.tr" + cf_lo_col <- if (plot.ci == "95") "lower.ct" else "lower90.ct" + cf_hi_col <- if (plot.ci == "95") "upper.ct" else "upper90.ct" ci_data <- x$Y.avg[x$Y.avg$period %in% plot_time_abs[show_abs], ] if (nrow(ci_data) > 0) { @@ -1371,40 +1439,47 @@ plot.fect <- function( p <- p + geom_line( data = data_plot_abs, - aes(x = time, y = outcome, - colour = type, - linetype = type, - linewidth = type) + aes( + x = time, y = outcome, + colour = type, + linetype = type, + linewidth = type + ) ) - set.limits <- c("tr", "ct") - set.labels <- c( - paste0("Treated (", unit_to_plot_name, ")"), - paste0("Est. Y(0) (", unit_to_plot_name, ")") + set.limits <- c("tr", "ct") + set.labels <- c( + paste0("Treated (", unit_to_plot_name, ")"), + paste0("Est. Y(0) (", unit_to_plot_name, ")") ) - set.colors <- c(color, counterfactual.color) + set.colors <- c(color, counterfactual.color) set.linetypes <- c("solid", counterfactual.linetype) set.linewidth <- c(est.lwidth, est.lwidth) - set.fill <- c(color, counterfactual.color) # for the ribbon legend + set.fill <- c(color, counterfactual.color) # for the ribbon legend } else if (raw == "band") { - Y.co.quantiles <- t(apply(Y.co[show_abs, , drop=FALSE], 1, quantile, prob = c(0.05, 0.95), na.rm = TRUE)) + Y.co.quantiles <- t(apply(Y.co[show_abs, , drop = FALSE], 1, quantile, prob = c(0.05, 0.95), na.rm = TRUE)) main_lines_data_abs <- data.frame( time = rep(plot_time_abs[show_abs], 2), outcome = c(tr.info_unit, ct.info_unit), # Use already subsetted data type = factor(c(rep("tr", nT_abs), rep("ct", nT_abs)), levels = c("tr", "ct", "co.band")) ) - data.band_abs <- data.frame(time = plot_time_abs[show_abs], co5 = Y.co.quantiles[, 1], co95 = Y.co.quantiles[, 2], type="co.band") # Add type for legend + data.band_abs <- data.frame(time = plot_time_abs[show_abs], co5 = Y.co.quantiles[, 1], co95 = Y.co.quantiles[, 2], type = "co.band") # Add type for legend y_data_for_range_calc <- c(y_data_for_range_calc, data.band_abs$co5, data.band_abs$co95) p <- ggplot() + - geom_ribbon(data = data.band_abs, aes(x = time, ymin = co5, ymax = co95, fill = type), alpha = 0.15, color = if (ci.outline) adjustcolor(counterfactual.color, offset = c(0.3, 0.3, 0.3, 0)) else NA) + + geom_ribbon(data = data.band_abs, aes(x = time, ymin = .data$co5, ymax = .data$co95, fill = type), alpha = 0.15, color = if (ci.outline) adjustcolor(counterfactual.color, offset = c(0.3, 0.3, 0.3, 0)) else NA) + geom_line(data = main_lines_data_abs, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) - set.limits <- c("tr", "ct", "co.band"); set.labels <- c(paste0("Treated (",unit_to_plot_name,")"), paste0("Est. Y(0) (",unit_to_plot_name,")"), "Controls (5-95% Quantiles)") - set.colors <- c(color, counterfactual.color, NA); set.linetypes <- c("solid", counterfactual.linetype, "blank"); set.linewidth <- c(est.lwidth, est.lwidth, 0); set.fill <- c(NA, NA, counterfactual.color) + set.limits <- c("tr", "ct", "co.band") + set.labels <- c(paste0("Treated (", unit_to_plot_name, ")"), paste0("Est. Y(0) (", unit_to_plot_name, ")"), "Controls (5-95% Quantiles)") + set.colors <- c(color, counterfactual.color, NA) + set.linetypes <- c("solid", counterfactual.linetype, "blank") + set.linewidth <- c(est.lwidth, est.lwidth, 0) + set.fill <- c(NA, NA, counterfactual.color) } else if (raw == "all") { # Data for Raw Control Lines - Y.co.subset <- Y.co[show_abs, , drop = FALSE]; raw_co_data_abs <- NULL + Y.co.subset <- Y.co[show_abs, , drop = FALSE] + raw_co_data_abs <- NULL if (ncol(Y.co.subset) > 0 && nrow(Y.co.subset) > 0) { melt_temp_co <- reshape2::melt(Y.co.subset, varnames = c("time_idx_abs", "id_co_idx_abs"), value.name = "outcome") - if(nrow(melt_temp_co) > 0) { + if (nrow(melt_temp_co) > 0) { raw_co_data_abs <- data.frame(time = as.numeric(melt_temp_co$time_idx_abs), id = as.character(melt_temp_co$id_co_idx_abs), outcome = melt_temp_co$outcome, type = "raw.co") } } @@ -1412,25 +1487,31 @@ plot.fect <- function( main_tr_line_data_abs <- data.frame(time = plot_time_abs[show_abs], outcome = tr.info_unit, type = "tr", id = "_MAIN_TREATED_LINE_") # Data for Main Counterfactual Line main_ct_line_data_abs <- data.frame(time = plot_time_abs[show_abs], outcome = ct.info_unit, type = "ct", id = "_MAIN_COUNTERFACTUAL_LINE_") - y_data_for_range_calc <- c(y_data_for_range_calc, if(!is.null(raw_co_data_abs)) raw_co_data_abs$outcome else NULL) + y_data_for_range_calc <- c(y_data_for_range_calc, if (!is.null(raw_co_data_abs)) raw_co_data_abs$outcome else NULL) p <- ggplot() - if (!is.null(raw_co_data_abs)) { p <- p + geom_line(data = raw_co_data_abs, aes(x = time, y = outcome, group = id, colour = type, linetype = type, linewidth = type)) } + if (!is.null(raw_co_data_abs)) { + p <- p + geom_line(data = raw_co_data_abs, aes(x = time, y = outcome, group = id, colour = type, linetype = type, linewidth = type)) + } p <- p + geom_line(data = main_tr_line_data_abs, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) p <- p + geom_line(data = main_ct_line_data_abs, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) - set.limits <- c("tr", "ct", "raw.co"); set.labels <- c(paste0("Treated (",unit_to_plot_name,")"), paste0("Est. Y(0) (",unit_to_plot_name,")"), "Controls") - set.colors <- c(color, counterfactual.color, counterfactual.raw.controls.color); set.linetypes <- c("solid", counterfactual.linetype, "solid") - lw <- c(est.lwidth,est.lwidth/2); set.linewidth <- c(lw[1], lw[1], lw[2]) + set.limits <- c("tr", "ct", "raw.co") + set.labels <- c(paste0("Treated (", unit_to_plot_name, ")"), paste0("Est. Y(0) (", unit_to_plot_name, ")"), "Controls") + set.colors <- c(color, counterfactual.color, counterfactual.raw.controls.color) + set.linetypes <- c("solid", counterfactual.linetype, "solid") + lw <- c(est.lwidth, est.lwidth / 2) + set.linewidth <- c(lw[1], lw[1], lw[2]) } } else { # Multiple treated units, all with the same T0 - maintext <- "Treated vs Estimated Counterfactuals"; Yb_show_abs <- Yb[show_abs, , drop = FALSE] - y_data_for_range_calc <- c(Yb_show_abs[,1], Yb_show_abs[,2]) + maintext <- "Treated vs Estimated Counterfactuals" + Yb_show_abs <- Yb[show_abs, , drop = FALSE] + y_data_for_range_calc <- c(Yb_show_abs[, 1], Yb_show_abs[, 2]) if (raw == "none") { if (x$vartype == "parametric" & !is.null(id) & !is.null(x$eff.boot)) { subset.eff.boot <- sapply(seq_len(dim(x$eff.boot)[3]), function(j) { - rowMeans(x$eff.boot[,which(tr %in% subset.tr),j], na.rm = TRUE) + rowMeans(x$eff.boot[, which(tr %in% subset.tr), j], na.rm = TRUE) }) - if (plot.ci == "95"){ + if (plot.ci == "95") { para.ci <- basic_ci_alpha( rowMeans(subset.eff.boot), subset.eff.boot, @@ -1438,99 +1519,133 @@ plot.fect <- function( ) x$Y.avg <- data.frame( period = x$rawtime, - lower.tr = Yb[,"Tr_Avg"], - upper.tr = Yb[,"Tr_Avg"], - lower.ct = Yb[,"Ct_Avg"] + para.ci[ , "upper"], - upper.ct = Yb[,"Ct_Avg"] + para.ci[ , "lower"] + lower.tr = Yb[, "Tr_Avg"], + upper.tr = Yb[, "Tr_Avg"], + lower.ct = Yb[, "Ct_Avg"] + para.ci[, "upper"], + upper.ct = Yb[, "Ct_Avg"] + para.ci[, "lower"] ) - } else if (plot.ci == "90"){ + } else if (plot.ci == "90") { para.ci <- basic_ci_alpha( rowMeans(subset.eff.boot), subset.eff.boot, alpha = 0.1 ) x$Y.avg <- data.frame( - period = x$rawtime, - lower90.tr = Yb[,"Tr_Avg"], - upper90.tr = Yb[,"Tr_Avg"], - lower90.ct = Yb[,"Ct_Avg"] + para.ci[ , "upper"], - upper90.ct = Yb[,"Ct_Avg"] + para.ci[ , "lower"] + period = x$rawtime, + lower90.tr = Yb[, "Tr_Avg"], + upper90.tr = Yb[, "Tr_Avg"], + lower90.ct = Yb[, "Ct_Avg"] + para.ci[, "upper"], + upper90.ct = Yb[, "Ct_Avg"] + para.ci[, "lower"] ) - } else{ + } else { warning("Invalid plot.ci provided.") } - } else if (x$vartype == "parametric" & raw == "none" & is.null(id)){ - if (plot.ci == "95"){ + } else if (x$vartype == "parametric" & raw == "none" & is.null(id)) { + if (plot.ci == "95") { x$Y.avg <- data.frame( period = x$rawtime, - lower.tr = Yb[,"Tr_Avg"], - upper.tr = Yb[,"Tr_Avg"], - lower.ct = Yb[,"Ct_Avg"] - (x$est.att[, "CI.upper"]-x$est.att[,"ATT"]), - upper.ct = Yb[,"Ct_Avg"] - (x$est.att[, "CI.lower"]-x$est.att[,"ATT"]) + lower.tr = Yb[, "Tr_Avg"], + upper.tr = Yb[, "Tr_Avg"], + lower.ct = Yb[, "Ct_Avg"] - (x$est.att[, "CI.upper"] - x$est.att[, "ATT"]), + upper.ct = Yb[, "Ct_Avg"] - (x$est.att[, "CI.lower"] - x$est.att[, "ATT"]) ) - } else if (plot.ci == "90"){ + } else if (plot.ci == "90") { x$Y.avg <- data.frame( - period = x$rawtime, - lower90.tr = Yb[,"Tr_Avg"], - upper90.tr = Yb[,"Tr_Avg"], - lower90.ct = Yb[,"Ct_Avg"] - (x$est.att90[, "CI.upper"]-x$est.att90[,"ATT"]), - upper90.ct = Yb[,"Ct_Avg"] - (x$est.att90[, "CI.lower"]-x$est.att90[,"ATT"]) + period = x$rawtime, + lower90.tr = Yb[, "Tr_Avg"], + upper90.tr = Yb[, "Tr_Avg"], + lower90.ct = Yb[, "Ct_Avg"] - (x$est.att90[, "CI.upper"] - x$est.att90[, "ATT"]), + upper90.ct = Yb[, "Ct_Avg"] - (x$est.att90[, "CI.lower"] - x$est.att90[, "ATT"]) ) - } else{ + } else { warning("Invalid plot.ci provided.") } } data_plot_abs <- data.frame(time = rep(plot_time_abs[show_abs], 2), outcome = c(Yb_show_abs[, 1], Yb_show_abs[, 2]), type = factor(c(rep("tr", nT_abs), rep("co", nT_abs)), levels = c("tr", "co"))) p <- ggplot() # Start with empty ggplot for CI if (plot.ci %in% c("90", "95") && !is.null(x$Y.avg)) { - tr_lo_col <- if (plot.ci == "95") "lower.tr" else "lower90.tr"; tr_hi_col <- if (plot.ci == "95") "upper.tr" else "upper90.tr"; cf_lo_col <- if (plot.ci == "95") "lower.ct" else "lower90.ct"; cf_hi_col <- if (plot.ci == "95") "upper.ct" else "upper90.ct" + tr_lo_col <- if (plot.ci == "95") "lower.tr" else "lower90.tr" + tr_hi_col <- if (plot.ci == "95") "upper.tr" else "upper90.tr" + cf_lo_col <- if (plot.ci == "95") "lower.ct" else "lower90.ct" + cf_hi_col <- if (plot.ci == "95") "upper.ct" else "upper90.ct" required_cols_ci <- c("period", tr_lo_col, tr_hi_col, cf_lo_col, cf_hi_col) if (all(required_cols_ci %in% names(x$Y.avg))) { - ci_data_abs <- x$Y.avg; ci_data_filtered_abs <- NULL + ci_data_abs <- x$Y.avg + ci_data_filtered_abs <- NULL # Filter CI data based on the points being shown - ci_data_filtered_abs <- ci_data_abs[show_abs,] + ci_data_filtered_abs <- ci_data_abs[show_abs, ] if (!is.null(ci_data_filtered_abs) && nrow(ci_data_filtered_abs) > 0) { p <- p + geom_ribbon(data = ci_data_filtered_abs, aes(x = plot_time_abs[show_abs], ymin = .data[[tr_lo_col]], ymax = .data[[tr_hi_col]], fill = "tr"), alpha = 0.2, color = if (ci.outline) adjustcolor(color, offset = c(0.3, 0.3, 0.3, 0)) else NA, inherit.aes = FALSE) + geom_ribbon(data = ci_data_filtered_abs, aes(x = plot_time_abs[show_abs], ymin = .data[[cf_lo_col]], ymax = .data[[cf_hi_col]], fill = "co"), alpha = 0.2, color = if (ci.outline) adjustcolor(counterfactual.color, offset = c(0.3, 0.3, 0.3, 0)) else NA, inherit.aes = FALSE) y_data_for_range_calc <- c(y_data_for_range_calc, ci_data_filtered_abs[[tr_lo_col]], ci_data_filtered_abs[[tr_hi_col]], ci_data_filtered_abs[[cf_lo_col]], ci_data_filtered_abs[[cf_hi_col]]) } else { - warning("No CI data for treated/counterfactual averages.") } - } else {warning("CI columns not found for treated/counterfactual averages.") } + warning("No CI data for treated/counterfactual averages.") + } + } else { + warning("CI columns not found for treated/counterfactual averages.") + } } p <- p + geom_line(data = data_plot_abs, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) - set.limits <- c("tr", "co"); set.labels <- c("Treated Average", "Estimated Y(0) Average"); set.colors <- c(color, counterfactual.color); set.linetypes <- c("solid", counterfactual.linetype); set.linewidth <- c(est.lwidth, est.lwidth); set.fill <- c(color, counterfactual.color) # fill for CI ribbons + set.limits <- c("tr", "co") + set.labels <- c("Treated Average", "Estimated Y(0) Average") + set.colors <- c(color, counterfactual.color) + set.linetypes <- c("solid", counterfactual.linetype) + set.linewidth <- c(est.lwidth, est.lwidth) + set.fill <- c(color, counterfactual.color) # fill for CI ribbons } else if (raw == "band") { - Y.tr.quantiles <- t(apply(Y.tr[show_abs, , drop=FALSE], 1, quantile, prob = c(0.05, 0.95), na.rm = TRUE)) - Y.co.quantiles <- t(apply(Y.co[show_abs, , drop=FALSE], 1, quantile, prob = c(0.05, 0.95), na.rm = TRUE)) + Y.tr.quantiles <- t(apply(Y.tr[show_abs, , drop = FALSE], 1, quantile, prob = c(0.05, 0.95), na.rm = TRUE)) + Y.co.quantiles <- t(apply(Y.co[show_abs, , drop = FALSE], 1, quantile, prob = c(0.05, 0.95), na.rm = TRUE)) main_lines_data_abs <- data.frame(time = rep(plot_time_abs[show_abs], 2), outcome = c(Yb_show_abs[, 1], Yb_show_abs[, 2]), type = factor(c(rep("tr", nT_abs), rep("co", nT_abs)), levels = c("tr", "co", "tr_band", "co_band"))) - data.band_tr_abs <- data.frame(time = plot_time_abs[show_abs], tr5 = Y.tr.quantiles[, 1], tr95 = Y.tr.quantiles[, 2], type="tr_band") - data.band_co_abs <- data.frame(time = plot_time_abs[show_abs], co5 = Y.co.quantiles[, 1], co95 = Y.co.quantiles[, 2], type="co_band") + data.band_tr_abs <- data.frame(time = plot_time_abs[show_abs], tr5 = Y.tr.quantiles[, 1], tr95 = Y.tr.quantiles[, 2], type = "tr_band") + data.band_co_abs <- data.frame(time = plot_time_abs[show_abs], co5 = Y.co.quantiles[, 1], co95 = Y.co.quantiles[, 2], type = "co_band") y_data_for_range_calc <- c(y_data_for_range_calc, data.band_tr_abs$tr5, data.band_tr_abs$tr95, data.band_co_abs$co5, data.band_co_abs$co95) p <- ggplot() + - geom_ribbon(data = data.band_tr_abs, aes(x = time, ymin = tr5, ymax = tr95, fill = type), alpha = 0.15, color = if (ci.outline) adjustcolor(color, offset = c(0.3, 0.3, 0.3, 0)) else NA) + - geom_ribbon(data = data.band_co_abs, aes(x = time, ymin = co5, ymax = co95, fill = type), alpha = 0.15, color = if (ci.outline) adjustcolor(counterfactual.color, offset = c(0.3, 0.3, 0.3, 0)) else NA) + + geom_ribbon(data = data.band_tr_abs, aes(x = time, ymin = .data$tr5, ymax = .data$tr95, fill = type), alpha = 0.15, color = if (ci.outline) adjustcolor(color, offset = c(0.3, 0.3, 0.3, 0)) else NA) + + geom_ribbon(data = data.band_co_abs, aes(x = time, ymin = .data$co5, ymax = .data$co95, fill = type), alpha = 0.15, color = if (ci.outline) adjustcolor(counterfactual.color, offset = c(0.3, 0.3, 0.3, 0)) else NA) + geom_line(data = main_lines_data_abs, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) - set.limits <- c("tr", "co", "tr_band", "co_band"); set.labels <- c("Treated Average", "Estimated Y(0) Average", "Treated (5-95% Quantiles)", "Controls (5-95% Quantiles)") - set.colors <- c(color, counterfactual.color, NA, NA); set.linetypes <- c("solid", counterfactual.linetype, "blank", "blank"); set.linewidth <- c(est.lwidth, est.lwidth, 0, 0); set.fill <- c(NA, NA, color, counterfactual.color) + set.limits <- c("tr", "co", "tr_band", "co_band") + set.labels <- c("Treated Average", "Estimated Y(0) Average", "Treated (5-95% Quantiles)", "Controls (5-95% Quantiles)") + set.colors <- c(color, counterfactual.color, NA, NA) + set.linetypes <- c("solid", counterfactual.linetype, "blank", "blank") + set.linewidth <- c(est.lwidth, est.lwidth, 0, 0) + set.fill <- c(NA, NA, color, counterfactual.color) } else if (raw == "all") { - Y.tr.subset <- Y.tr[show_abs, , drop = FALSE]; Y.co.subset <- Y.co[show_abs, , drop = FALSE] - raw_tr_data_abs <- NULL; if (ncol(Y.tr.subset) > 0 && nrow(Y.tr.subset) > 0) { melt_temp_tr <- reshape2::melt(Y.tr.subset, varnames = c("t", "id"), value.name = "o"); if(nrow(melt_temp_tr)>0) raw_tr_data_abs <- data.frame(time=as.numeric(melt_temp_tr$t), id=as.character(melt_temp_tr$id), outcome=melt_temp_tr$o, type="raw.tr")} - raw_co_data_abs <- NULL; if (ncol(Y.co.subset) > 0 && nrow(Y.co.subset) > 0) { melt_temp_co <- reshape2::melt(Y.co.subset, varnames = c("t", "id"), value.name = "o"); if(nrow(melt_temp_co)>0) raw_co_data_abs <- data.frame(time=as.numeric(melt_temp_co$t), id=as.character(melt_temp_co$id), outcome=melt_temp_co$o, type="raw.co")} - avg_tr_data_abs <- data.frame(time=plot_time_abs[show_abs], outcome=Yb_show_abs[,1], type="tr", id="tr_avg_line") - avg_co_data_abs <- data.frame(time=plot_time_abs[show_abs], outcome=Yb_show_abs[,2], type="co", id="co_avg_line") - y_data_for_range_calc <- c(y_data_for_range_calc, if(!is.null(raw_tr_data_abs)) raw_tr_data_abs$outcome else NULL, if(!is.null(raw_co_data_abs)) raw_co_data_abs$outcome else NULL) + Y.tr.subset <- Y.tr[show_abs, , drop = FALSE] + Y.co.subset <- Y.co[show_abs, , drop = FALSE] + raw_tr_data_abs <- NULL + if (ncol(Y.tr.subset) > 0 && nrow(Y.tr.subset) > 0) { + melt_temp_tr <- reshape2::melt(Y.tr.subset, varnames = c("t", "id"), value.name = "o") + if (nrow(melt_temp_tr) > 0) raw_tr_data_abs <- data.frame(time = as.numeric(melt_temp_tr$t), id = as.character(melt_temp_tr$id), outcome = melt_temp_tr$o, type = "raw.tr") + } + raw_co_data_abs <- NULL + if (ncol(Y.co.subset) > 0 && nrow(Y.co.subset) > 0) { + melt_temp_co <- reshape2::melt(Y.co.subset, varnames = c("t", "id"), value.name = "o") + if (nrow(melt_temp_co) > 0) raw_co_data_abs <- data.frame(time = as.numeric(melt_temp_co$t), id = as.character(melt_temp_co$id), outcome = melt_temp_co$o, type = "raw.co") + } + avg_tr_data_abs <- data.frame(time = plot_time_abs[show_abs], outcome = Yb_show_abs[, 1], type = "tr", id = "tr_avg_line") + avg_co_data_abs <- data.frame(time = plot_time_abs[show_abs], outcome = Yb_show_abs[, 2], type = "co", id = "co_avg_line") + y_data_for_range_calc <- c(y_data_for_range_calc, if (!is.null(raw_tr_data_abs)) raw_tr_data_abs$outcome else NULL, if (!is.null(raw_co_data_abs)) raw_co_data_abs$outcome else NULL) p <- ggplot() - if(!is.null(raw_tr_data_abs)) { p <- p + geom_line(data=raw_tr_data_abs, aes(x=time, y=outcome, group=id, colour=type, linetype=type, linewidth=type)) } - if(!is.null(raw_co_data_abs)) { p <- p + geom_line(data=raw_co_data_abs, aes(x=time, y=outcome, group=id, colour=type, linetype=type, linewidth=type)) } - p <- p + geom_line(data=avg_tr_data_abs, aes(x=time, y=outcome, colour=type, linetype=type, linewidth=type)) - p <- p + geom_line(data=avg_co_data_abs, aes(x=time, y=outcome, colour=type, linetype=type, linewidth=type)) - set.limits <- c("tr", "co", "raw.tr", "raw.co"); set.labels <- c("Treated Average", "Estimated Y(0) Average", "Treated Raw Data", "Controls Raw Data") - set.colors <- c(color, counterfactual.color, counterfactual.raw.treated.color, counterfactual.raw.controls.color); set.linetypes <- c("solid", counterfactual.linetype, "solid", "solid") - lw <- c(est.lwidth,est.lwidth/2); set.linewidth <- c(lw[1], lw[1], lw[2], lw[2]) + if (!is.null(raw_tr_data_abs)) { + p <- p + geom_line(data = raw_tr_data_abs, aes(x = time, y = outcome, group = id, colour = type, linetype = type, linewidth = type)) + } + if (!is.null(raw_co_data_abs)) { + p <- p + geom_line(data = raw_co_data_abs, aes(x = time, y = outcome, group = id, colour = type, linetype = type, linewidth = type)) + } + p <- p + geom_line(data = avg_tr_data_abs, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) + p <- p + geom_line(data = avg_co_data_abs, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) + set.limits <- c("tr", "co", "raw.tr", "raw.co") + set.labels <- c("Treated Average", "Estimated Y(0) Average", "Treated Raw Data", "Controls Raw Data") + set.colors <- c(color, counterfactual.color, counterfactual.raw.treated.color, counterfactual.raw.controls.color) + set.linetypes <- c("solid", counterfactual.linetype, "solid", "solid") + lw <- c(est.lwidth, est.lwidth / 2) + set.linewidth <- c(lw[1], lw[1], lw[2], lw[2]) } } p <- p + xlab(xlab_final) + ylab(ylab_final) + theme(legend.position = legend.pos, axis.text.x = element_text(angle = angle, hjust = x.h, vjust = x.v), axis.text = element_text(size = cex.axis), axis.title = element_text(size = cex.lab), plot.title = element_text(size = cex.main, hjust = 0.5, face = "bold", margin = margin(10, 0, 10, 0))) - if (theme.bw == TRUE) { p <- p + theme_bw() } + if (theme.bw == TRUE) { + p <- p + theme_bw() + } p <- p + theme( legend.position = legend.pos, axis.text.x = element_text(angle = angle, hjust = x.h, vjust = x.v), @@ -1540,12 +1655,14 @@ plot.fect <- function( ) if (!is.na(vline_pos_abs)) { p <- p + geom_vline(xintercept = vline_pos_abs, colour = lcolor[2], linewidth = lwidth[2], linetype = ltype[2]) - if (shade.post == TRUE) { p <- p + annotate("rect", xmin = vline_pos_abs, xmax = Inf, ymin = -Inf, ymax = Inf, fill = "grey80", alpha = .3) } + if (shade.post == TRUE) { + p <- p + annotate("rect", xmin = vline_pos_abs, xmax = Inf, ymin = -Inf, ymax = Inf, fill = "grey80", alpha = .3) + } } p <- p + scale_colour_manual(name = NULL, limits = set.limits, values = set.colors, labels = set.labels, na.value = NA, drop = FALSE) + scale_linetype_manual(name = NULL, limits = set.limits, values = set.linetypes, labels = set.labels, na.value = "blank", drop = FALSE) + scale_linewidth_manual(name = NULL, limits = set.limits, values = set.linewidth, labels = set.labels, na.value = 0, drop = FALSE) - guide_obj_abs <- guide_legend(title = NULL, ncol = if(length(set.limits) > 2) ceiling(length(set.limits)/2) else length(set.limits)) + guide_obj_abs <- guide_legend(title = NULL, ncol = if (length(set.limits) > 2) ceiling(length(set.limits) / 2) else length(set.limits)) if (exists("set.fill") && !is.null(set.fill) && any(!is.na(set.fill))) { # Check if set.fill has actual values p <- p + scale_fill_manual(name = NULL, limits = set.limits, values = set.fill, labels = set.labels, na.value = NA, drop = FALSE) p <- p + guides(colour = guide_obj_abs, linetype = guide_obj_abs, linewidth = guide_obj_abs, fill = guide_obj_abs) @@ -1564,32 +1681,61 @@ plot.fect <- function( final_main_text <- if (is.null(main)) maintext else if (main == "") NULL else main if (!is.null(final_main_text)) p <- p + ggtitle(final_main_text) if (show.count == TRUE) { - counts_values_abs <- NULL; current_times_abs <- plot_time_abs[show_abs] - if (plot_single_unit_flag) { unit_col_idx_for_count <- which(colnames(Y.tr) == unit_to_plot_name); counts_values_abs <- ifelse(!is.na(Y.tr[show_abs, unit_col_idx_for_count, drop = FALSE]), 1, 0) } - else { counts_values_abs <- apply(Y.tr[show_abs, , drop=FALSE], 1, function(row) sum(!is.na(row))) } - counts_for_plot_df_abs <- data.frame(time = current_times_abs, count = counts_values_abs); counts_for_plot_df_abs <- counts_for_plot_df_abs[counts_for_plot_df_abs$count > 0 & !is.na(counts_for_plot_df_abs$count), ] + counts_values_abs <- NULL + current_times_abs <- plot_time_abs[show_abs] + if (plot_single_unit_flag) { + unit_col_idx_for_count <- which(colnames(Y.tr) == unit_to_plot_name) + counts_values_abs <- ifelse(!is.na(Y.tr[show_abs, unit_col_idx_for_count, drop = FALSE]), 1, 0) + } else { + counts_values_abs <- apply(Y.tr[show_abs, , drop = FALSE], 1, function(row) sum(!is.na(row))) + } + counts_for_plot_df_abs <- data.frame(time = current_times_abs, count = counts_values_abs) + counts_for_plot_df_abs <- counts_for_plot_df_abs[counts_for_plot_df_abs$count > 0 & !is.na(counts_for_plot_df_abs$count), ] if (nrow(counts_for_plot_df_abs) > 0) { max_count_val_abs <- max(counts_for_plot_df_abs$count, na.rm = TRUE) if (max_count_val_abs > 0 && !is.na(max_count_val_abs)) { current_plot_yrange <- NULL - if (!is.null(ylim)) { current_plot_yrange <- ylim } else { if (length(y_data_for_range_calc) == 0 || all(is.na(y_data_for_range_calc))) { gb <- ggplot_build(p); current_plot_yrange <- gb$layout$panel_scales_y[[1]]$range$range } else { current_plot_yrange <- range(y_data_for_range_calc, na.rm = TRUE) } } - count_bar_space_prop <- 0.20; count_bar_space_height_abs <- (current_plot_yrange[2] - current_plot_yrange[1]) * count_bar_space_prop; actual_rect_length_abs <- count_bar_space_height_abs * 0.8 + if (!is.null(ylim)) { + current_plot_yrange <- ylim + } else { + if (length(y_data_for_range_calc) == 0 || all(is.na(y_data_for_range_calc))) { + gb <- ggplot_build(p) + current_plot_yrange <- gb$layout$panel_scales_y[[1]]$range$range + } else { + current_plot_yrange <- range(y_data_for_range_calc, na.rm = TRUE) + } + } + count_bar_space_prop <- 0.20 + count_bar_space_height_abs <- (current_plot_yrange[2] - current_plot_yrange[1]) * count_bar_space_prop + actual_rect_length_abs <- count_bar_space_height_abs * 0.8 rect_min_val_abs <- if (!is.null(ylim)) ylim[1] else current_plot_yrange[1] - count_bar_space_height_abs - counts_for_plot_df_abs$ymin <- rect_min_val_abs; counts_for_plot_df_abs$ymax <- rect_min_val_abs + (counts_for_plot_df_abs$count / max_count_val_abs) * actual_rect_length_abs - time_step_for_bars_abs <- if(length(unique(counts_for_plot_df_abs$time)) > 1) min(diff(sort(unique(counts_for_plot_df_abs$time))),na.rm=TRUE) else 1; if(!is.finite(time_step_for_bars_abs) || time_step_for_bars_abs <=0) time_step_for_bars_abs <- 1 - bar_width_half_abs <- time_step_for_bars_abs * 0.20; counts_for_plot_df_abs$xmin <- counts_for_plot_df_abs$time - bar_width_half_abs; counts_for_plot_df_abs$xmax <- counts_for_plot_df_abs$time + bar_width_half_abs - max_count_time_pos_abs <- counts_for_plot_df_abs$time[which.max(counts_for_plot_df_abs$count)[1]]; text_y_pos_abs <- rect_min_val_abs + actual_rect_length_abs + (count_bar_space_height_abs - actual_rect_length_abs) * 0.5 - p <- p + geom_rect(data = counts_for_plot_df_abs, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), fill = count.color, color = count.outline.color, inherit.aes = FALSE,alpha = count.alpha,linewidth = 0.2) + annotate("text", x = max_count_time_pos_abs, y = text_y_pos_abs, label = max_count_val_abs, size = cex.text * 0.8, hjust = 0.5, vjust = 0.5) + counts_for_plot_df_abs$ymin <- rect_min_val_abs + counts_for_plot_df_abs$ymax <- rect_min_val_abs + (counts_for_plot_df_abs$count / max_count_val_abs) * actual_rect_length_abs + time_step_for_bars_abs <- if (length(unique(counts_for_plot_df_abs$time)) > 1) min(diff(sort(unique(counts_for_plot_df_abs$time))), na.rm = TRUE) else 1 + if (!is.finite(time_step_for_bars_abs) || time_step_for_bars_abs <= 0) time_step_for_bars_abs <- 1 + bar_width_half_abs <- time_step_for_bars_abs * 0.20 + counts_for_plot_df_abs$xmin <- counts_for_plot_df_abs$time - bar_width_half_abs + counts_for_plot_df_abs$xmax <- counts_for_plot_df_abs$time + bar_width_half_abs + max_count_time_pos_abs <- counts_for_plot_df_abs$time[which.max(counts_for_plot_df_abs$count)[1]] + text_y_pos_abs <- rect_min_val_abs + actual_rect_length_abs + (count_bar_space_height_abs - actual_rect_length_abs) * 0.5 + p <- p + geom_rect(data = counts_for_plot_df_abs, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), fill = count.color, color = count.outline.color, inherit.aes = FALSE, alpha = count.alpha, linewidth = 0.2) + annotate("text", x = max_count_time_pos_abs, y = text_y_pos_abs, label = max_count_val_abs, size = cex.text * 0.8, hjust = 0.5, vjust = 0.5) } } } coord_args_abs <- list(clip = "on", expand = TRUE) - if (!is.null(ylim)) { coord_args_abs$ylim <- ylim } else if (show.count == TRUE && exists("rect_min_val_abs") && exists("counts_for_plot_df_abs") && nrow(counts_for_plot_df_abs) > 0 && exists("current_plot_yrange") && !is.null(current_plot_yrange)) { final_yrange_min <- min(current_plot_yrange[1], rect_min_val_abs, na.rm=TRUE); final_yrange_max <- current_plot_yrange[2]; coord_args_abs$ylim <- c(final_yrange_min, final_yrange_max) } + if (!is.null(ylim)) { + coord_args_abs$ylim <- ylim + } else if (show.count == TRUE && exists("rect_min_val_abs") && exists("counts_for_plot_df_abs") && nrow(counts_for_plot_df_abs) > 0 && exists("current_plot_yrange") && !is.null(current_plot_yrange)) { + final_yrange_min <- min(current_plot_yrange[1], rect_min_val_abs, na.rm = TRUE) + final_yrange_max <- current_plot_yrange[2] + coord_args_abs$ylim <- c(final_yrange_min, final_yrange_max) + } # Use the padded plot_xlim for the final axis range - if (!is.null(plot_xlim)) { coord_args_abs$xlim <- plot_xlim } + if (!is.null(plot_xlim)) { + coord_args_abs$xlim <- plot_xlim + } p <- p + do.call(coord_cartesian, coord_args_abs) p <- p + theme(legend.position = legend.pos) - } else { # Case 2: Staggered Adoption (Different T0) -> Relative Time Plot xx <- ct.adjust(Y.tr, Y.ct, T0, D_mat, tr_idx_logical) event_time_full_series <- xx$timeline @@ -1644,29 +1790,31 @@ plot.fect <- function( if (nT_on_axis > 0) { if (is.numeric(time_for_plot_axis) && length(time_for_plot_axis) > 1) { pretty_breaks_axis <- pretty(time_for_plot_axis) - pretty_breaks_axis <- pretty_breaks_axis[pretty_breaks_axis >= min(time_for_plot_axis, na.rm=TRUE) & pretty_breaks_axis <= max(time_for_plot_axis, na.rm=TRUE)] + pretty_breaks_axis <- pretty_breaks_axis[pretty_breaks_axis >= min(time_for_plot_axis, na.rm = TRUE) & pretty_breaks_axis <= max(time_for_plot_axis, na.rm = TRUE)] if (length(pretty_breaks_axis) > 0) { T_b_axis_ticks_indices <- sapply(pretty_breaks_axis, function(br) which.min(abs(time_for_plot_axis - br))) T_b_axis_ticks_indices <- unique(T_b_axis_ticks_indices) } } if (length(T_b_axis_ticks_indices) == 0) { - max_labels <- 10; step <- max(1, floor(length(time_for_plot_axis) / max_labels)) + max_labels <- 10 + step <- max(1, floor(length(time_for_plot_axis) / max_labels)) T_b_axis_ticks_indices <- seq(1, length(time_for_plot_axis), by = step) } } - y_data_for_range_calc <- c(Yb_data_subset[,1], Yb_data_subset[,2]) + y_data_for_range_calc <- c(Yb_data_subset[, 1], Yb_data_subset[, 2]) if (raw == "none") { if (x$vartype == "parametric" & !is.null(id) & !is.null(x$eff.boot)) { subset.eff.boot <- sapply(seq_len(dim(x$eff.boot)[3]), function(j) { rowMeans(align_time_series( - x$eff.boot[,which(tr %in% subset.tr),j], + x$eff.boot[, which(tr %in% subset.tr), j], T0, - D_mat, tr_idx_logical)$Y_aug, na.rm = TRUE) + D_mat, tr_idx_logical + )$Y_aug, na.rm = TRUE) }) - if (plot.ci == "95"){ + if (plot.ci == "95") { para.ci <- basic_ci_alpha( rowMeans(subset.eff.boot), subset.eff.boot, @@ -1674,45 +1822,45 @@ plot.fect <- function( ) x$Y.avg <- data.frame( period = xx$timeline, - lower.tr = xx$Yb[,"Y.tr.bar"], - upper.tr = xx$Yb[,"Y.tr.bar"], - lower.ct = xx$Yb[,"Y.ct.bar"] + para.ci[ , "upper"], - upper.ct = xx$Yb[,"Y.ct.bar"] + para.ci[ , "lower"] + lower.tr = xx$Yb[, "Y.tr.bar"], + upper.tr = xx$Yb[, "Y.tr.bar"], + lower.ct = xx$Yb[, "Y.ct.bar"] + para.ci[, "upper"], + upper.ct = xx$Yb[, "Y.ct.bar"] + para.ci[, "lower"] ) - } else if (plot.ci == "90"){ + } else if (plot.ci == "90") { para.ci <- basic_ci_alpha( rowMeans(subset.eff.boot), subset.eff.boot, alpha = 0.1 ) x$Y.avg <- data.frame( - period = xx$timeline, - lower90.tr = xx$Yb[,"Y.tr.bar"], - upper90.tr = xx$Yb[,"Y.tr.bar"], - lower90.ct = xx$Yb[,"Y.ct.bar"] + para.ci[ , "upper"], - upper90.ct = xx$Yb[,"Y.ct.bar"] + para.ci[ , "lower"] + period = xx$timeline, + lower90.tr = xx$Yb[, "Y.tr.bar"], + upper90.tr = xx$Yb[, "Y.tr.bar"], + lower90.ct = xx$Yb[, "Y.ct.bar"] + para.ci[, "upper"], + upper90.ct = xx$Yb[, "Y.ct.bar"] + para.ci[, "lower"] ) - } else{ + } else { warning("Invalid plot.ci provided.") } - } else if (x$vartype == "parametric" & raw == "none" & is.null(id)){ - if (plot.ci == "95"){ + } else if (x$vartype == "parametric" & raw == "none" & is.null(id)) { + if (plot.ci == "95") { x$Y.avg <- data.frame( period = xx$timeline, - lower.tr = xx$Yb[,"Y.tr.bar"], - upper.tr = xx$Yb[,"Y.tr.bar"], - lower.ct = xx$Yb[,"Y.ct.bar"] - (x$est.att[, "CI.upper"]-x$est.att[,"ATT"]), - upper.ct = xx$Yb[,"Y.ct.bar"] - (x$est.att[, "CI.lower"]-x$est.att[,"ATT"]) + lower.tr = xx$Yb[, "Y.tr.bar"], + upper.tr = xx$Yb[, "Y.tr.bar"], + lower.ct = xx$Yb[, "Y.ct.bar"] - (x$est.att[, "CI.upper"] - x$est.att[, "ATT"]), + upper.ct = xx$Yb[, "Y.ct.bar"] - (x$est.att[, "CI.lower"] - x$est.att[, "ATT"]) ) - } else if (plot.ci == "90"){ + } else if (plot.ci == "90") { x$Y.avg <- data.frame( - period = xx$timeline, - lower90.tr = xx$Yb[,"Y.tr.bar"], - upper90.tr = xx$Yb[,"Y.tr.bar"], - lower90.ct = xx$Yb[,"Y.ct.bar"] - (x$est.att90[, "CI.upper"]-x$est.att90[,"ATT"]), - upper90.ct = xx$Yb[,"Y.ct.bar"] - (x$est.att90[, "CI.lower"]-x$est.att90[,"ATT"]) + period = xx$timeline, + lower90.tr = xx$Yb[, "Y.tr.bar"], + upper90.tr = xx$Yb[, "Y.tr.bar"], + lower90.ct = xx$Yb[, "Y.ct.bar"] - (x$est.att90[, "CI.upper"] - x$est.att90[, "ATT"]), + upper90.ct = xx$Yb[, "Y.ct.bar"] - (x$est.att90[, "CI.lower"] - x$est.att90[, "ATT"]) ) - } else{ + } else { warning("Invalid plot.ci provided.") } } @@ -1723,8 +1871,10 @@ plot.fect <- function( ) p <- ggplot() if (plot.ci %in% c("90", "95") && !is.null(x$Y.avg)) { - tr_lo_col <- if (plot.ci == "95") "lower.tr" else "lower90.tr"; tr_hi_col <- if (plot.ci == "95") "upper.tr" else "upper90.tr" - cf_lo_col <- if (plot.ci == "95") "lower.ct" else "lower90.ct"; cf_hi_col <- if (plot.ci == "95") "upper.ct" else "upper90.ct" + tr_lo_col <- if (plot.ci == "95") "lower.tr" else "lower90.tr" + tr_hi_col <- if (plot.ci == "95") "upper.tr" else "upper90.tr" + cf_lo_col <- if (plot.ci == "95") "lower.ct" else "lower90.ct" + cf_hi_col <- if (plot.ci == "95") "upper.ct" else "upper90.ct" required_cols_ci <- c("period", tr_lo_col, tr_hi_col, cf_lo_col, cf_hi_col) if (all(required_cols_ci %in% names(x$Y.avg))) { @@ -1736,15 +1886,23 @@ plot.fect <- function( ci_data_filtered <- ci_data_for_plot[ci_data_for_plot$period %in% event_times_for_data_subset, ] if (!is.null(ci_data_filtered) && nrow(ci_data_filtered) > 0) { - p <- p + geom_ribbon(data = ci_data_filtered, aes(x = time_axis_period, ymin = .data[[tr_lo_col]], ymax = .data[[tr_hi_col]], fill = "tr"), alpha = 0.2, color = if (ci.outline) adjustcolor(color, offset = c(0.3, 0.3, 0.3, 0)) else NA, inherit.aes = FALSE) + - geom_ribbon(data = ci_data_filtered, aes(x = time_axis_period, ymin = .data[[cf_lo_col]], ymax = .data[[cf_hi_col]], fill = "co"), alpha = 0.2, color = if (ci.outline) adjustcolor(counterfactual.color, offset = c(0.3, 0.3, 0.3, 0)) else NA, inherit.aes = FALSE) + p <- p + geom_ribbon(data = ci_data_filtered, aes(x = .data$time_axis_period, ymin = .data[[tr_lo_col]], ymax = .data[[tr_hi_col]], fill = "tr"), alpha = 0.2, color = if (ci.outline) adjustcolor(color, offset = c(0.3, 0.3, 0.3, 0)) else NA, inherit.aes = FALSE) + + geom_ribbon(data = ci_data_filtered, aes(x = .data$time_axis_period, ymin = .data[[cf_lo_col]], ymax = .data[[cf_hi_col]], fill = "co"), alpha = 0.2, color = if (ci.outline) adjustcolor(counterfactual.color, offset = c(0.3, 0.3, 0.3, 0)) else NA, inherit.aes = FALSE) y_data_for_range_calc <- c(y_data_for_range_calc, ci_data_filtered[[tr_lo_col]], ci_data_filtered[[tr_hi_col]], ci_data_filtered[[cf_lo_col]], ci_data_filtered[[cf_hi_col]]) - } else { warning("No CI data for treated/counterfactual averages.") } - } else { warning("CI columns not found for treated/counterfactual averages.") } + } else { + warning("No CI data for treated/counterfactual averages.") + } + } else { + warning("CI columns not found for treated/counterfactual averages.") + } } p <- p + geom_line(data = data_plot_main, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) - set.limits <- c("tr", "co"); set.labels <- c("Treated Average", "Estimated Y(0) Average"); set.colors <- c(color, counterfactual.color); set.linetypes <- c("solid", counterfactual.linetype); set.linewidth <- c(est.lwidth, est.lwidth); set.fill <- c(color, counterfactual.color) - + set.limits <- c("tr", "co") + set.labels <- c("Treated Average", "Estimated Y(0) Average") + set.colors <- c(color, counterfactual.color) + set.linetypes <- c("solid", counterfactual.linetype) + set.linewidth <- c(est.lwidth, est.lwidth) + set.fill <- c(color, counterfactual.color) } else if (raw == "band") { Ytr_aug_quantiles <- t(apply(Ytr_aug_data_subset, 1, quantile, prob = c(0.05, 0.95), na.rm = TRUE)) main_lines_data_plot <- data.frame( @@ -1754,19 +1912,23 @@ plot.fect <- function( ) data_band_plot <- data.frame( time = time_for_plot_axis, - tr5 = Ytr_aug_quantiles[, 1], tr95 = Ytr_aug_quantiles[, 2], type="tr_band" + tr5 = Ytr_aug_quantiles[, 1], tr95 = Ytr_aug_quantiles[, 2], type = "tr_band" ) y_data_for_range_calc <- c(y_data_for_range_calc, data_band_plot$tr5, data_band_plot$tr95) p <- ggplot() + - geom_ribbon(data = data_band_plot, aes(x = time, ymin = tr5, ymax = tr95, fill = type), alpha = 0.15, color = if (ci.outline) adjustcolor(color, offset = c(0.3, 0.3, 0.3, 0)) else NA) + + geom_ribbon(data = data_band_plot, aes(x = time, ymin = .data$tr5, ymax = .data$tr95, fill = type), alpha = 0.15, color = if (ci.outline) adjustcolor(color, offset = c(0.3, 0.3, 0.3, 0)) else NA) + geom_line(data = main_lines_data_plot, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) - set.limits <- c("tr", "co", "tr_band"); set.labels <- c("Treated Average", "Estimated Y(0) Average", "Treated (5-95% Quantiles)"); set.colors <- c(color, counterfactual.color, NA); set.linetypes <- c("solid", counterfactual.linetype, "blank"); set.linewidth <- c(est.lwidth, est.lwidth, 0); set.fill <- c(NA, NA, color) - + set.limits <- c("tr", "co", "tr_band") + set.labels <- c("Treated Average", "Estimated Y(0) Average", "Treated (5-95% Quantiles)") + set.colors <- c(color, counterfactual.color, NA) + set.linetypes <- c("solid", counterfactual.linetype, "blank") + set.linewidth <- c(est.lwidth, est.lwidth, 0) + set.fill <- c(NA, NA, color) } else if (raw == "all") { raw_tr_data_plot <- NULL if (ncol(Ytr_aug_data_subset) > 0 && nrow(Ytr_aug_data_subset) > 0) { melt_temp_tr <- reshape2::melt(Ytr_aug_data_subset, varnames = c("event_t_char", "id"), value.name = "o") - if(nrow(melt_temp_tr)>0) { + if (nrow(melt_temp_tr) > 0) { plot_time_for_melted_data <- as.numeric(as.character(melt_temp_tr$event_t_char)) if (apply_start0_shift) { plot_time_for_melted_data <- plot_time_for_melted_data - 1 @@ -1775,29 +1937,39 @@ plot.fect <- function( time = plot_time_for_melted_data, id = as.character(melt_temp_tr$id), outcome = melt_temp_tr$o, - type="raw.tr" + type = "raw.tr" ) } } - avg_tr_data_plot <- data.frame(time=time_for_plot_axis, outcome=Yb_data_subset[,1], type="tr", id="tr_avg_line") - avg_co_data_plot <- data.frame(time=time_for_plot_axis, outcome=Yb_data_subset[,2], type="co", id="co_avg_line") - y_data_for_range_calc <- c(y_data_for_range_calc, if(!is.null(raw_tr_data_plot)) raw_tr_data_plot$outcome else NULL) + avg_tr_data_plot <- data.frame(time = time_for_plot_axis, outcome = Yb_data_subset[, 1], type = "tr", id = "tr_avg_line") + avg_co_data_plot <- data.frame(time = time_for_plot_axis, outcome = Yb_data_subset[, 2], type = "co", id = "co_avg_line") + y_data_for_range_calc <- c(y_data_for_range_calc, if (!is.null(raw_tr_data_plot)) raw_tr_data_plot$outcome else NULL) p <- ggplot() - if(!is.null(raw_tr_data_plot)) { p <- p + geom_line(data=raw_tr_data_plot, aes(x=time, y=outcome, group=id, colour=type, linetype=type, linewidth=type)) } - p <- p + geom_line(data=avg_tr_data_plot, aes(x=time, y=outcome, colour=type, linetype=type, linewidth=type)) - p <- p + geom_line(data=avg_co_data_plot, aes(x=time, y=outcome, colour=type, linetype=type, linewidth=type)) - set.limits <- c("tr", "co", "raw.tr"); set.labels <- c("Treated Average", "Estimated Y(0) Average", "Treated Raw Data"); set.colors <- c(color, counterfactual.color, counterfactual.raw.treated.color); set.linetypes <- c("solid", counterfactual.linetype, "solid") - lw <- c(est.lwidth,est.lwidth/2); set.linewidth <- c(lw[1], lw[1], lw[2]) + if (!is.null(raw_tr_data_plot)) { + p <- p + geom_line(data = raw_tr_data_plot, aes(x = time, y = outcome, group = id, colour = type, linetype = type, linewidth = type)) + } + p <- p + geom_line(data = avg_tr_data_plot, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) + p <- p + geom_line(data = avg_co_data_plot, aes(x = time, y = outcome, colour = type, linetype = type, linewidth = type)) + set.limits <- c("tr", "co", "raw.tr") + set.labels <- c("Treated Average", "Estimated Y(0) Average", "Treated Raw Data") + set.colors <- c(color, counterfactual.color, counterfactual.raw.treated.color) + set.linetypes <- c("solid", counterfactual.linetype, "solid") + lw <- c(est.lwidth, est.lwidth / 2) + set.linewidth <- c(lw[1], lw[1], lw[2]) } p <- p + xlab(xlab_final) + ylab(ylab_final) + - geom_vline(xintercept = vline_pos_for_plot_axis, colour = lcolor[2], linewidth = lwidth[2], linetype = ltype[2]) + - theme(legend.position = legend.pos, - axis.text.x = element_text(angle = angle, hjust = x.h, vjust = x.v), - axis.text = element_text(size = cex.axis), - axis.title = element_text(size = cex.lab), - plot.title = element_text(size = cex.main, hjust = 0.5, face = "bold", margin = margin(10, 0, 10, 0))) - if (theme.bw == TRUE) { p <- p + theme_bw() } + geom_vline(xintercept = vline_pos_for_plot_axis, colour = lcolor[2], linewidth = lwidth[2], linetype = ltype[2]) + + theme( + legend.position = legend.pos, + axis.text.x = element_text(angle = angle, hjust = x.h, vjust = x.v), + axis.text = element_text(size = cex.axis), + axis.title = element_text(size = cex.lab), + plot.title = element_text(size = cex.main, hjust = 0.5, face = "bold", margin = margin(10, 0, 10, 0)) + ) + if (theme.bw == TRUE) { + p <- p + theme_bw() + } p <- p + theme( legend.position = legend.pos, axis.text.x = element_text(angle = angle, hjust = x.h, vjust = x.v), @@ -1808,13 +1980,13 @@ plot.fect <- function( if (shade.post == TRUE) { p <- p + annotate("rect", xmin = vline_pos_for_plot_axis, xmax = Inf, ymin = -Inf, ymax = Inf, fill = "grey80", alpha = .3) } - p <- p + scale_colour_manual(name = NULL, limits = set.limits, values = set.colors, labels = set.labels, na.value = NA, drop=FALSE) + - scale_linetype_manual(name = NULL, limits = set.limits, values = set.linetypes, labels = set.labels, na.value = "blank", drop=FALSE) + - scale_linewidth_manual(name = NULL, limits = set.limits, values = set.linewidth, labels = set.labels, na.value = 0, drop=FALSE) + p <- p + scale_colour_manual(name = NULL, limits = set.limits, values = set.colors, labels = set.labels, na.value = NA, drop = FALSE) + + scale_linetype_manual(name = NULL, limits = set.limits, values = set.linetypes, labels = set.labels, na.value = "blank", drop = FALSE) + + scale_linewidth_manual(name = NULL, limits = set.limits, values = set.linewidth, labels = set.labels, na.value = 0, drop = FALSE) - guide_obj_staggered <- guide_legend(title = NULL, ncol = if(length(set.limits) > 2) ceiling(length(set.limits)/2) else length(set.limits)) + guide_obj_staggered <- guide_legend(title = NULL, ncol = if (length(set.limits) > 2) ceiling(length(set.limits) / 2) else length(set.limits)) if (exists("set.fill") && !is.null(set.fill) && any(!is.na(set.fill))) { - p <- p + scale_fill_manual(name = NULL, limits = set.limits, values = set.fill, labels = set.labels, na.value = NA, drop=FALSE) + p <- p + scale_fill_manual(name = NULL, limits = set.limits, values = set.fill, labels = set.labels, na.value = NA, drop = FALSE) p <- p + guides(colour = guide_obj_staggered, linetype = guide_obj_staggered, linewidth = guide_obj_staggered, fill = guide_obj_staggered) } else { p <- p + guides(colour = guide_obj_staggered, linetype = guide_obj_staggered, linewidth = guide_obj_staggered) @@ -1849,7 +2021,8 @@ plot.fect <- function( current_plot_yrange <- ylim } else { if (length(y_data_for_range_calc) == 0 || all(is.na(y_data_for_range_calc))) { - gb <- ggplot_build(p); current_plot_yrange <- gb$layout$panel_scales_y[[1]]$range$range + gb <- ggplot_build(p) + current_plot_yrange <- gb$layout$panel_scales_y[[1]]$range$range } else { current_plot_yrange <- range(y_data_for_range_calc, na.rm = TRUE) } @@ -1862,8 +2035,8 @@ plot.fect <- function( counts_for_plot_df$ymin <- rect_min_val counts_for_plot_df$ymax <- rect_min_val + (counts_for_plot_df$count / max_count_val) * actual_rect_length - time_step_for_bars <- if(length(unique(counts_for_plot_df$time)) > 1) min(diff(sort(unique(counts_for_plot_df$time))), na.rm=TRUE) else 1 - if(!is.finite(time_step_for_bars) || time_step_for_bars <=0) time_step_for_bars <- 1 + time_step_for_bars <- if (length(unique(counts_for_plot_df$time)) > 1) min(diff(sort(unique(counts_for_plot_df$time))), na.rm = TRUE) else 1 + if (!is.finite(time_step_for_bars) || time_step_for_bars <= 0) time_step_for_bars <- 1 bar_width_half <- time_step_for_bars * 0.20 counts_for_plot_df$xmin <- counts_for_plot_df$time - bar_width_half counts_for_plot_df$xmax <- counts_for_plot_df$time + bar_width_half @@ -1871,7 +2044,7 @@ plot.fect <- function( max_count_time_pos <- counts_for_plot_df$time[which.max(counts_for_plot_df$count)[1]] text_y_pos <- rect_min_val + actual_rect_length + (count_bar_space_height - actual_rect_length) * 0.5 - p <- p + geom_rect(data = counts_for_plot_df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), fill = count.color, inherit.aes = FALSE, alpha = count.alpha, color= count.outline.color, linewidth = 0.2) + + p <- p + geom_rect(data = counts_for_plot_df, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), fill = count.color, inherit.aes = FALSE, alpha = count.alpha, color = count.outline.color, linewidth = 0.2) + annotate("text", x = max_count_time_pos, y = text_y_pos, label = max_count_val, size = cex.text * 0.8, hjust = 0.5, vjust = 0.5) } } @@ -1881,12 +2054,14 @@ plot.fect <- function( if (!is.null(ylim)) { coord_args_staggered$ylim <- ylim } else if (show.count == TRUE && exists("rect_min_val") && exists("counts_for_plot_df") && nrow(counts_for_plot_df) > 0 && exists("current_plot_yrange") && !is.null(current_plot_yrange)) { - final_yrange_min <- min(current_plot_yrange[1], rect_min_val, na.rm=TRUE) + final_yrange_min <- min(current_plot_yrange[1], rect_min_val, na.rm = TRUE) final_yrange_max <- current_plot_yrange[2] coord_args_staggered$ylim <- c(final_yrange_min, final_yrange_max) } # Use the padded plot_xlim for the final axis range - if (!is.null(plot_xlim)) { coord_args_staggered$xlim <- plot_xlim } + if (!is.null(plot_xlim)) { + coord_args_staggered$xlim <- plot_xlim + } p <- p + do.call(coord_cartesian, coord_args_staggered) p <- p + theme(legend.position = legend.pos) } @@ -1944,22 +2119,21 @@ plot.fect <- function( ## type of plots if (type == "gap" | type == "equiv") { - if(loo==TRUE & x$loo==TRUE){ + if (loo == TRUE & x$loo == TRUE) { time.loo <- as.numeric(rownames(x$pre.est.att)) time.post <- as.numeric(rownames(x$est.att)) - time.post <- time.post[which(time.post>0)] - time <- sort(c(time.loo,time.post)) - count.num <- c(x$pre.est.att[,'count.on'],x$est.att[which(as.numeric(rownames(x$est.att))>0),'count']) + time.post <- time.post[which(time.post > 0)] + time <- sort(c(time.loo, time.post)) + count.num <- c(x$pre.est.att[, "count.on"], x$est.att[which(as.numeric(rownames(x$est.att)) > 0), "count"]) best.pos <- 1 max.count <- max(count.num) - }else{ + } else { time <- x$time count.num <- x$count best.pos <- 1 max.count <- max(count.num) } - } - else if (type == "exit") { + } else if (type == "exit") { time <- x$time.off count.num <- x$count.off best.pos <- 0 @@ -1975,7 +2149,7 @@ plot.fect <- function( show.time <- 1:time.end if (length(xlim) != 0) { show.time <- which(time >= xlim[1] & time <= xlim[2]) - proportion = 0 + proportion <- 0 } if (type %in% c("gap", "equiv", "exit")) { if (is.null(proportion) == TRUE) { @@ -2238,9 +2412,9 @@ plot.fect <- function( if (CI == FALSE) { data <- cbind.data.frame(time, ATT = x$att, count = count.num)[show, ] } else { - tb <- est.att + tb <- est.att data <- cbind.data.frame(time, tb)[show, ] - colnames(data)[2] <- "ATT" # rename 2nd column to "ATT" + colnames(data)[2] <- "ATT" # rename 2nd column to "ATT" if (plot.ci %in% c("90", "95")) { # Optionally set the names of the CI columns: @@ -2256,7 +2430,7 @@ plot.fect <- function( if (CI == FALSE) { data <- cbind.data.frame(time, ATT = x$att.off, count = count.num)[show, ] } else { - tb <- est.att.off + tb <- est.att.off data <- cbind.data.frame(time, tb)[show, ] # second column is the estimated ATT, rename it @@ -2344,11 +2518,11 @@ plot.fect <- function( # and also in a LOO (Leave-One-Out) context. # For standard (non-LOO like) calls - cached_test_out_f <- NULL # Results when f.threshold is primary + cached_test_out_f <- NULL # Results when f.threshold is primary cached_test_out_tost <- NULL # Results when tost.threshold is primary # For LOO calls - cached_loo_test_out_f <- NULL # LOO results when f.threshold is primary + cached_loo_test_out_f <- NULL # LOO results when f.threshold is primary cached_loo_test_out_tost <- NULL # LOO results when tost.threshold is primary # Determine the effective LOO setting for the current context @@ -2364,16 +2538,17 @@ plot.fect <- function( # --- Path 1: Non-LOO calculations or types other than 'equiv'/'gap' LOO --- if (!is_current_scenario_loo) { original_x_loo_state <- x$loo # Preserve original x$loo - x$loo <- FALSE # Ensure diagtest runs in non-LOO mode + x$loo <- FALSE # Ensure diagtest runs in non-LOO mode # Category 1.1: F-statistics (F.p, F.stat, F.equiv.p) if (stat_name_from_user %in% c("F.p", "F.stat")) { # Recalculate if basic parameters changed or if not cached if (change.proportion || change.pre.periods || !is.null(show.group) || use.balance || is.null(cached_test_out_f)) { cached_test_out_f <- diagtest(x, - proportion = proportion, - pre.periods = pre.periods, - f.threshold = f.threshold) + proportion = proportion, + pre.periods = pre.periods, + f.threshold = f.threshold + ) } if (stat_name_from_user == "F.p") value_to_store <- cached_test_out_f$f.p if (stat_name_from_user == "F.stat") value_to_store <- cached_test_out_f$f.stat @@ -2381,9 +2556,10 @@ plot.fect <- function( # Recalculate also if f.threshold changed if (change.f.threshold || change.proportion || change.pre.periods || !is.null(show.group) || use.balance || is.null(cached_test_out_f)) { cached_test_out_f <- diagtest(x, - proportion = proportion, - pre.periods = pre.periods, - f.threshold = f.threshold) + proportion = proportion, + pre.periods = pre.periods, + f.threshold = f.threshold + ) } value_to_store <- cached_test_out_f$f.equiv.p } @@ -2425,7 +2601,7 @@ plot.fect <- function( # --- Path 2: LOO calculations (only for type 'equiv' or 'gap') --- else { # This means is_current_scenario_loo is TRUE original_x_loo_state <- x$loo # Preserve original x$loo - x$loo <- TRUE # Ensure diagtest runs in LOO mode + x$loo <- TRUE # Ensure diagtest runs in LOO mode # Category 2.1: LOO F-statistics if (stat_name_from_user %in% c("F.p", "F.stat")) { @@ -2434,9 +2610,10 @@ plot.fect <- function( # This version consistently uses a LOO-mode diagtest call. if (change.proportion || change.pre.periods || !is.null(show.group) || is.null(cached_loo_test_out_f)) { cached_loo_test_out_f <- diagtest(x, - proportion = proportion, - pre.periods = pre.periods, - f.threshold = f.threshold) + proportion = proportion, + pre.periods = pre.periods, + f.threshold = f.threshold + ) } # Assuming diagtest output fields (f.p, f.stat) are the LOO versions when x$loo = TRUE if (stat_name_from_user == "F.p") value_to_store <- cached_loo_test_out_f$f.p @@ -2444,9 +2621,10 @@ plot.fect <- function( } else if (stat_name_from_user == "F.equiv.p") { # LOO F-equivalence p-value if (change.f.threshold || change.proportion || change.pre.periods || !is.null(show.group) || is.null(cached_loo_test_out_f)) { cached_loo_test_out_f <- diagtest(x, - proportion = proportion, - pre.periods = pre.periods, - f.threshold = f.threshold) + proportion = proportion, + pre.periods = pre.periods, + f.threshold = f.threshold + ) } # Assuming diagtest output field f.equiv.p is the LOO version when x$loo = TRUE value_to_store <- cached_loo_test_out_f$f.equiv.p @@ -2456,9 +2634,10 @@ plot.fect <- function( else if (stat_name_from_user == "equiv.p") { if (change.tost.threshold || change.proportion || change.pre.periods || !is.null(show.group) || is.null(cached_loo_test_out_tost)) { cached_loo_test_out_tost <- diagtest(x, - proportion = proportion, - pre.periods = pre.periods, - tost.threshold = tost.threshold) + proportion = proportion, + pre.periods = pre.periods, + tost.threshold = tost.threshold + ) } # Assuming diagtest output field tost.equiv.p is the LOO version when x$loo = TRUE value_to_store <- cached_loo_test_out_tost$tost.equiv.p @@ -2478,212 +2657,223 @@ plot.fect <- function( p.label <- if (length(p.label.lines) > 0) paste(p.label.lines, collapse = "\n") else NULL if (type == "equiv" && is.null(ylim)) { - ylim = c(-max(abs(data2$bound))*1.4,max(abs(data2$bound))*1.4) + ylim <- c(-max(abs(data2$bound)) * 1.4, max(abs(data2$bound)) * 1.4) } ## point estimates if (classic == 1) { - # # --- REGULAR EVENT-STUDY PLOT (no highlights) --- # # 1) Build a data frame that esplot() expects data_es <- data.frame( - Period = data$time, - ATT = data$ATT, - `S.E.` = if (CI) data$S.E. else NA, # if you have standard errors - CI.lower = if (CI) { + Period = data$time, + ATT = data$ATT, + `S.E.` = if (CI) data$S.E. else NA, # if you have standard errors + CI.lower = if (CI) { if (plot.ci == "95") data$CI.lower else data$CI.lower.90 - } else NA, - CI.upper = if (CI) { + } else { + NA + }, + CI.upper = if (CI) { if (plot.ci == "95") data$CI.upper else data$CI.upper.90 - } else NA, - count = if (!is.null(data$count)) data$count else NA + } else { + NA + }, + count = if (!is.null(data$count)) data$count else NA ) # 2) Call esplot() p <- esplot( data_es, - Period = "Period", - Estimate = "ATT", - SE = "S.E.", - CI.lower = "CI.lower", - CI.upper = "CI.upper", - Count = "count", - show.count = show.count, + Period = "Period", + Estimate = "ATT", + SE = "S.E.", + CI.lower = "CI.lower", + CI.upper = "CI.upper", + Count = "count", + show.count = show.count, show.points = show.points, ci.outline = ci.outline, - connected = connected, - color = color, - count.color = count.color, + connected = connected, + color = color, + count.color = count.color, count.alpha = count.alpha, count.outline.color = count.outline.color, - xlab = xlab, - ylab = ylab, - main = main, - xlim = xlim, - ylim = ylim, - gridOff = gridOff, - start0 = start0, - cex.main = cex.main/16, - cex.axis = cex.axis/15, - cex.lab = cex.lab/15, - cex.text = cex.text/5, + xlab = xlab, + ylab = ylab, + main = main, + xlim = xlim, + ylim = ylim, + gridOff = gridOff, + start0 = start0, + cex.main = cex.main / 16, + cex.axis = cex.axis / 15, + cex.lab = cex.lab / 15, + cex.text = cex.text / 5, proportion = proportion, est.lwidth = est.lwidth, - est.pointsize = est.pointsize, + est.pointsize = est.pointsize, fill.gap = FALSE, lcolor = lcolor, lwidth = lwidth, ltype = ltype, axis.adjust = axis.adjust, - stats = if(exists("stats_values")) stats_values else NULL, - stats.labs = if(exists("stats_labels")) stats_labels else NULL, - stats.pos = stats.pos, + stats = if (exists("stats_values")) stats_values else NULL, + stats.labs = if (exists("stats_labels")) stats_labels else NULL, + stats.pos = stats.pos, theme.bw = theme.bw, - only.pre = type == "equiv") - + only.pre = type == "equiv" + ) } else if (classic == 0 && switch.on == TRUE) { - # # --- PLACEBO TEST PLOT --- # data_es <- data.frame( - Period = data$time, - ATT = data$ATT, - `S.E.` = if (CI) data$S.E. else NA, - CI.lower = if (CI) { + Period = data$time, + ATT = data$ATT, + `S.E.` = if (CI) data$S.E. else NA, + CI.lower = if (CI) { if (plot.ci == "95") data$CI.lower else data$CI.lower.90 - } else NA, - CI.upper = if (CI) { + } else { + NA + }, + CI.upper = if (CI) { if (plot.ci == "95") data$CI.upper else data$CI.upper.90 - } else NA, - count = if (!is.null(data$count)) data$count else NA + } else { + NA + }, + count = if (!is.null(data$count)) data$count else NA ) # highlight the placebo interval [placebo.period[1], placebo.period[2]] placebo_seq <- seq(placebo.period[1], placebo.period[2]) - n_placebo <- length(placebo_seq) + n_placebo <- length(placebo_seq) p <- esplot( data_es, - Period = "Period", - Estimate = "ATT", - SE = "S.E.", - CI.lower = "CI.lower", - CI.upper = "CI.upper", - Count = "count", - show.count = show.count, - connected = connected, - color = color, - count.color = count.color, + Period = "Period", + Estimate = "ATT", + SE = "S.E.", + CI.lower = "CI.lower", + CI.upper = "CI.upper", + Count = "count", + show.count = show.count, + connected = connected, + color = color, + count.color = count.color, count.alpha = count.alpha, count.outline.color = count.outline.color, show.points = show.points, ci.outline = ci.outline, highlight.periods = placebo_seq, - highlight.colors = rep(placebo.color, n_placebo), - xlab = xlab, - ylab = ylab, - main = main, - xlim = xlim, - ylim = ylim, - gridOff = gridOff, - start0 = start0, + highlight.colors = rep(placebo.color, n_placebo), + xlab = xlab, + ylab = ylab, + main = main, + xlim = xlim, + ylim = ylim, + gridOff = gridOff, + start0 = start0, proportion = proportion, fill.gap = FALSE, lcolor = lcolor, lwidth = lwidth, ltype = ltype, - cex.main = cex.main/16, - cex.axis = cex.axis/15, - cex.lab = cex.lab/15, - cex.text = cex.text/5, + cex.main = cex.main / 16, + cex.axis = cex.axis / 15, + cex.lab = cex.lab / 15, + cex.text = cex.text / 5, axis.adjust = axis.adjust, est.lwidth = est.lwidth, - est.pointsize = est.pointsize, - stats = if(exists("stats_values")) stats_values else NULL, - stats.labs = if(exists("stats_labels")) stats_labels else NULL, + est.pointsize = est.pointsize, + stats = if (exists("stats_values")) stats_values else NULL, + stats.labs = if (exists("stats_labels")) stats_labels else NULL, theme.bw = theme.bw, - stats.pos = stats.pos) - + stats.pos = stats.pos + ) } else if (classic == 0 && switch.on == FALSE) { # # --- CARRYOVER TEST OR EXITING TREATMENT --- # if (is.null(x$est.carry.att)) { placebo_seq <- c() - n_placebo <- 0 - shift = 0 - } else{ - placebo_seq <- seq(carryover.period[1], carryover.period[1]-1+dim(x$est.carry.att)[1]) - n_placebo <- length(placebo_seq) - shift = dim(x$est.carry.att)[1] + n_placebo <- 0 + shift <- 0 + } else { + placebo_seq <- seq(carryover.period[1], carryover.period[1] - 1 + dim(x$est.carry.att)[1]) + n_placebo <- length(placebo_seq) + shift <- dim(x$est.carry.att)[1] } data_es <- data.frame( - Period = data$time + shift, - ATT = data$ATT, - `S.E.` = if (CI) data$S.E. else NA, - CI.lower = if (CI) { + Period = data$time + shift, + ATT = data$ATT, + `S.E.` = if (CI) data$S.E. else NA, + CI.lower = if (CI) { if (plot.ci == "95") data$CI.lower else data$CI.lower.90 - } else NA, - CI.upper = if (CI) { + } else { + NA + }, + CI.upper = if (CI) { if (plot.ci == "95") data$CI.upper else data$CI.upper.90 - } else NA, - count = if (!is.null(data$count)) data$count else NA + } else { + NA + }, + count = if (!is.null(data$count)) data$count else NA ) carry_seq <- seq(carryover.period[1] + shift, carryover.period[2] + shift) - n_carry <- length(carry_seq) + n_carry <- length(carry_seq) p <- esplot( data_es, - Period = "Period", - Estimate = "ATT", - SE = "S.E.", - CI.lower = "CI.lower", - CI.upper = "CI.upper", - Count = "count", - show.count = show.count, + Period = "Period", + Estimate = "ATT", + SE = "S.E.", + CI.lower = "CI.lower", + CI.upper = "CI.upper", + Count = "count", + show.count = show.count, show.points = show.points, ci.outline = ci.outline, - connected = connected, - color = color, - count.color = count.color, + connected = connected, + color = color, + count.color = count.color, count.alpha = count.alpha, count.outline.color = count.outline.color, - highlight.periods = c(placebo_seq,carry_seq), - highlight.colors = c(rep(carryover.rm.color, n_placebo),rep(carryover.color, n_carry)), - xlab = xlab, - ylab = ylab, - main = main, - xlim = xlim, - ylim = ylim, - gridOff = gridOff, + highlight.periods = c(placebo_seq, carry_seq), + highlight.colors = c(rep(carryover.rm.color, n_placebo), rep(carryover.color, n_carry)), + xlab = xlab, + ylab = ylab, + main = main, + xlim = xlim, + ylim = ylim, + gridOff = gridOff, fill.gap = FALSE, lcolor = lcolor, lwidth = lwidth, ltype = ltype, - cex.main = cex.main/16, - cex.axis = cex.axis/15, - cex.lab = cex.lab/15, - cex.text = cex.text/5, + cex.main = cex.main / 16, + cex.axis = cex.axis / 15, + cex.lab = cex.lab / 15, + cex.text = cex.text / 5, axis.adjust = axis.adjust, - start0 = start0, + start0 = start0, proportion = proportion, ## newly added options: est.lwidth = est.lwidth, - est.pointsize = est.pointsize, - stats = if(exists("stats_values")) stats_values else NULL, - stats.labs = if(exists("stats_labels")) stats_labels else NULL, + est.pointsize = est.pointsize, + stats = if (exists("stats_values")) stats_values else NULL, + stats.labs = if (exists("stats_labels")) stats_labels else NULL, theme.bw = theme.bw, - stats.pos = stats.pos) + stats.pos = stats.pos + ) } # plot bound - if (bound.old != "none") { ## with bounds - p <- p + geom_line(data= data2,aes(time, bound, colour = type, linetype = type, size = type, group = id)) + if (bound.old != "none") { ## with bounds + p <- p + geom_line(data = data2, aes(time, bound, colour = type, linetype = type, size = type, group = id)) ## legends for bounds if (is.null(legend.nrow) == TRUE) { legend.nrow <- ifelse(length(set.limits) <= 3, 1, 2) @@ -2697,9 +2887,7 @@ plot.fect <- function( ) p <- p + theme(legend.position = "bottom") - } - } if (type == "calendar") { @@ -2710,8 +2898,8 @@ plot.fect <- function( CI <- TRUE } if (!is.null(provided_xlim)) { - x$est.eff.calendar <- x$est.eff.calendar[which(rownames(x$est.eff.calendar) >= min(provided_xlim) & rownames(x$est.eff.calendar) <= max(provided_xlim)), ] - x$est.eff.calendar.fit <- x$est.eff.calendar.fit[which(rownames(x$est.eff.calendar.fit) >= min(provided_xlim) & rownames(x$est.eff.calendar.fit) <= max(provided_xlim)), ] + x$est.eff.calendar <- x$est.eff.calendar[which(rownames(x$est.eff.calendar) >= min(provided_xlim) & rownames(x$est.eff.calendar) <= max(provided_xlim)), ] + x$est.eff.calendar.fit <- x$est.eff.calendar.fit[which(rownames(x$est.eff.calendar.fit) >= min(provided_xlim) & rownames(x$est.eff.calendar.fit) <= max(provided_xlim)), ] } if (plot.ci == "none") { CI <- FALSE @@ -2796,12 +2984,12 @@ plot.fect <- function( if (CI == FALSE) { p <- p + geom_hline(yintercept = x$att.avg, color = calendar.lcolor, size = 0.8, linetype = "dashed") - p <- p + geom_point(aes(x = TTT, y = d1[, 1]), color = "gray50", fill = "gray50", alpha = 1, size = 1.2) p <- p + geom_line(aes(x = TTT.2, y = d2[, 1]), color = calendar.color, size = 1.1) + p <- p + geom_point(aes(x = TTT, y = d1[, 1]), color = "gray50", fill = "gray50", alpha = 1, size = 1.2) } else { - p <- p + geom_line(aes(x = TTT.2, y = d2[, 1]), color = calendar.color, size = 1.1) - p <- p + geom_ribbon(aes(x = TTT.2, ymin = d2[, 3], ymax = d2[, 4]), color = calendar.color, fill = calendar.color, alpha = 0.5, size = 0) + p <- p + geom_ribbon(aes(x = TTT.2, ymin = d2[, 3], ymax = d2[, 4]), color = calendar.cicolor, fill = calendar.cicolor, alpha = 0.5, size = 0) p <- p + geom_hline(yintercept = x$att.avg, color = calendar.lcolor, size = 0.8, linetype = "dashed") + p <- p + geom_line(aes(x = TTT.2, y = d2[, 1]), color = calendar.color, size = 1.1) p <- p + geom_pointrange(aes(x = TTT, y = d1[, 1], ymin = d1[, 3], ymax = d1[, 4]), color = "gray50", fill = "gray50", alpha = 1, size = 0.6) } @@ -2824,11 +3012,11 @@ plot.fect <- function( ymax = ymax ) max.count.pos <- mean(TTT[which.max(d1[, "count"])]) - p <- p + geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), data = data.toplot, fill = count.color, size = 0.3, alpha = count.alpha, color = count.outline.color,linewidth =0.2) + p <- p + geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), data = data.toplot, fill = count.color, size = 0.3, alpha = count.alpha, color = count.outline.color, linewidth = 0.2) p <- p + annotate("text", - x = max.count.pos - 0.02 * T.gap, - y = max(data.toplot$ymax) + 0.2 * rect.length, - label = max(x$N.calendar), size = cex.text * 0.8, hjust = 0.5 + x = max.count.pos - 0.02 * T.gap, + y = max(data.toplot$ymax) + 0.2 * rect.length, + label = max(x$N.calendar), size = cex.text * 0.8, hjust = 0.5 ) } @@ -2882,6 +3070,251 @@ plot.fect <- function( ) } + if (type == "heterogeneous") { + Xs <- x[names(x) == "X"][[2]] + + if ((is.null(covariate) == TRUE) && ((dim(Xs) > 2) && (dim(Xs)[3] > 1))) { + stop("Please provide a covariate to plot heterogeneous effects.\n") + } + + if ((is.null(covariate) == FALSE) && (!covariate %in% x$X)) { + stop("Please provide a valid covariate to plot heterogeneous effects.\n") + } + + if (is.null(xlab) == TRUE) { + xlab <- covariate + } else if (xlab == "") { + xlab <- NULL + } + + if (is.null(ylab) == TRUE) { + ylab <- ytitle + } else if (ylab == "") { + ylab <- NULL + } + + D.missing <- x$D.dat + D.missing[which(D == 0)] <- NA + D.missing.vec <- as.vector(D.missing) + + eff.vec <- as.vector(x$eff) + X.vec <- as.vector(x[names(x) == "X"][2]$X[, , which(x$X == covariate)]) + + eff.vec <- eff.vec[which(!is.na(D.missing.vec))] + X.vec <- X.vec[which(!is.na(D.missing.vec))] + + j <- order(X.vec) + eff.vec <- eff.vec[j] + X.vec <- X.vec[j] + + if (length(unique(X.vec)) <= 4) { + p <- ggplot() + ## xlab and ylab + p <- p + xlab(xlab) + ylab(ylab) + + ## theme + if (theme.bw == TRUE) { + p <- p + theme_bw() + } + + ## grid + if (gridOff == TRUE) { + p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + } + + ## title + if (is.null(main) == TRUE) { + p <- p + ggtitle(maintext) + } else if (main != "") { + p <- p + ggtitle(main) + } + + ## Build discrete stats at evenly spaced factor positions + x_levels_sorted <- sort(unique(X.vec)) + x_factor <- factor(X.vec, levels = x_levels_sorted, ordered = TRUE) + # stats per level + level_means <- tapply(eff.vec, x_factor, function(v) mean(v, na.rm = TRUE)) + level_ns <- tapply(eff.vec, x_factor, function(v) sum(!is.na(v))) + level_sds <- tapply(eff.vec, x_factor, function(v) sd(v, na.rm = TRUE)) + level_se <- level_sds / sqrt(pmax(level_ns, 1)) + level_qt <- stats::qt(0.975, pmax(level_ns - 1, 1)) + level_ci <- level_qt * level_se + level_lower <- as.numeric(level_means) - level_ci + level_upper <- as.numeric(level_means) + level_ci + data_disc <- cbind.data.frame( + x = factor(names(level_means), levels = names(level_means), ordered = TRUE), + mean = as.numeric(level_means), + lower = level_lower, + upper = level_upper, + count = as.numeric(level_ns) + ) + + ## core geoms (even spacing because x is factor) + p <- p + geom_hline(yintercept = 0, colour = lcolor[1], size = lwidth[1], linetype = ltype[1]) + p <- p + geom_hline(yintercept = x$att.avg, color = heterogeneous.lcolor, size = 0.8, linetype = "dashed") + # nicer CI + mean: thick error bars + solid point + p <- p + geom_linerange( + aes(x = x, ymin = .data$lower, ymax = .data$upper), + data = data_disc, color = heterogeneous.color, linewidth = 0.9 + ) + p <- p + geom_point( + aes(x = x, y = mean), data = data_disc, + shape = 21, fill = "white", color = heterogeneous.color, stroke = 1, size = 2.6 + ) + + ## bottom rectangles for counts under each discrete level (stick to very bottom) + if (show.count == TRUE && any(!is.na(data_disc$count))) { + # obtain current plot y-range from built plot if ylim not set + current_plot_yrange <- NULL + if (!is.null(ylim)) { + current_plot_yrange <- ylim + } else { + gb <- ggplot_build(p) + current_plot_yrange <- gb$layout$panel_scales_y[[1]]$range$range + } + count_bar_space_prop <- 0.20 + count_bar_space_height <- (current_plot_yrange[2] - current_plot_yrange[1]) * count_bar_space_prop + actual_rect_length <- count_bar_space_height * 0.8 + rect_min_val <- if (!is.null(ylim)) ylim[1] else current_plot_yrange[1] - count_bar_space_height + + # Even spacing: rectangles centered on factor positions with fixed half-width + x_idx <- as.numeric(data_disc$x) + bar_half <- 0.1 + counts <- data_disc$count + ymax_scaled <- rect_min_val + actual_rect_length * counts / max(counts, na.rm = TRUE) + data_counts <- cbind.data.frame( + xmin = x_idx - bar_half, + xmax = x_idx + bar_half, + ymin = rep(rect_min_val, length(counts)), + ymax = ymax_scaled, + xcenter = x_idx, + count = counts + ) + p <- p + geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), + data = data_counts, fill = count.color, color = count.outline.color, alpha = count.alpha, linewidth = 0.2 + ) + p <- p + geom_text(aes(x = .data$xcenter, y = .data$ymax + 0.12 * count_bar_space_height, label = .data$count), + data = data_counts, size = cex.text * 0.85, hjust = 0.5, vjust = 0.5, color = "#444444") + if (!is.null(covariate.labels)) { + p <- p + scale_x_discrete(labels = covariate.labels) + } else { + p <- p + scale_x_discrete(labels = as.character(x_levels_sorted)) + } + } + + if (is.null(ylim) == FALSE) { + p <- p + coord_cartesian(ylim = ylim) + } + + if (is.null(xlim) == FALSE) { + p <- p + coord_cartesian(xlim = xlim) + } + + } else { + plx <- predict(loess(eff.vec ~ X.vec), se = T) + se <- stats::qt(0.975, plx$df) * plx$se + y_hat <- plx$fit + y_hat_lower <- y_hat - se + y_hat_upper <- y_hat + se + + p <- ggplot() + ## xlab and ylab + p <- p + xlab(xlab) + ylab(ylab) + + ## theme + if (theme.bw == TRUE) { + p <- p + theme_bw() + } + + ## grid + if (gridOff == TRUE) { + p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + } + + p <- p + geom_hline(yintercept = 0, colour = lcolor[1], size = lwidth[1], linetype = ltype[1]) + p <- p + geom_ribbon(aes(x = X.vec, ymin = y_hat_lower, ymax = y_hat_upper), color = heterogeneous.cicolor, fill = heterogeneous.cicolor, alpha = 0.5, size = 0) + p <- p + geom_hline(yintercept = x$att.avg, color = heterogeneous.lcolor, size = 0.8, linetype = "dashed") + p <- p + geom_line(aes(x = X.vec, y = y_hat), color = heterogeneous.color, size = 1.1) + + ## title + if (is.null(main) == TRUE) { + p <- p + ggtitle(maintext) + } else if (main != "") { + p <- p + ggtitle(main) + } + + ## ylim + if (is.null(ylim) == FALSE) { + p <- p + coord_cartesian(ylim = ylim) + } + + if (is.null(xlim) == FALSE) { + p <- p + coord_cartesian(xlim = xlim) + } + + if (show.count == TRUE) { + current_plot_yrange <- NULL + if (!is.null(ylim)) { + current_plot_yrange <- ylim + } else { + gb <- ggplot_build(p) + current_plot_yrange <- gb$layout$panel_scales_y[[1]]$range$range + } + count_bar_space_prop <- 0.20 + count_bar_space_height <- (current_plot_yrange[2] - current_plot_yrange[1]) * count_bar_space_prop + actual_rect_length <- count_bar_space_height * 0.8 + rect_min_val <- if (!is.null(ylim)) ylim[1] else current_plot_yrange[1] - count_bar_space_height + + X.vec.for.hist <- X.vec + if (!is.null(xlim)) { + X.vec.for.hist <- X.vec.for.hist[which(X.vec.for.hist >= min(xlim) & X.vec.for.hist <= max(xlim))] + } + if (length(na.omit(X.vec.for.hist)) > 0) { + breaks <- pretty(range(X.vec.for.hist, na.rm = TRUE), n = 50) + h <- hist(X.vec.for.hist, breaks = breaks, plot = FALSE) + bin_xmin <- h$breaks[-length(h$breaks)] + bin_xmax <- h$breaks[-1] + counts <- h$counts + ymax_scaled <- rect_min_val + actual_rect_length * counts / max(counts, na.rm = TRUE) + data.toplot <- cbind.data.frame( + xmin = bin_xmin, + xmax = bin_xmax, + ymin = rep(rect_min_val, length(counts)), + ymax = ymax_scaled + ) + max_idx <- which.max(counts) + max_count_pos <- (bin_xmin[max_idx] + bin_xmax[max_idx]) / 2 + p <- p + geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), + data = data.toplot, fill = count.color, size = 0.3, alpha = count.alpha, color = count.outline.color, linewidth = 0.2 + ) + p <- p + annotate("text", + x = max_count_pos, + y = max(data.toplot$ymax) + 0.12 * count_bar_space_height, + label = max(counts, na.rm = TRUE), size = cex.text * 0.8, hjust = 0.5 + ) + if (is.null(ylim)) { + final_yrange_min <- min(current_plot_yrange[1], rect_min_val) + final_yrange_max <- current_plot_yrange[2] + p <- p + coord_cartesian(ylim = c(final_yrange_min, final_yrange_max)) + } + } + } + } + + p <- p + theme( + legend.text = element_text(margin = margin(r = 10, unit = "pt"), size = cex.legend), + legend.position = legend.pos, + legend.background = element_rect(fill = "transparent", colour = NA), + axis.title = element_text(size = cex.lab), + axis.title.x = element_text(margin = margin(t = 8, r = 0, b = 0, l = 0)), + axis.title.y = element_text(margin = margin(t = 0, r = 0, b = 0, l = 0)), + axis.text = element_text(color = "black", size = cex.axis), + axis.text.x = element_text(size = cex.axis, angle = angle, hjust = x.h, vjust = x.v), + axis.text.y = element_text(size = cex.axis), + plot.title = element_text(size = cex.main, hjust = 0.5, face = "bold", margin = margin(10, 0, 10, 0)) + ) + } + if (type == "box") { if (is.null(xlab) == TRUE) { xlab <- index[2] @@ -2919,7 +3352,7 @@ plot.fect <- function( } # horizontal 0 line - p <- p + geom_hline(yintercept = 0, colour = lcolor[1], size = lwidth[1], linetype = ltype[1]) + p <- p + geom_hline(yintercept = 0, colour = lcolor[1], size = lwidth[1], linetype = ltype[1]) complete.index.eff <- which(!is.na(x$eff)) complete.index.time <- which(!is.na(x$T.on)) @@ -2976,25 +3409,25 @@ plot.fect <- function( data.post.1$time <- factor(data.post.1$time, levels = levels) data.post.2$time <- factor(data.post.2$time, levels = levels) - p <- p + geom_boxplot(aes(x = time, y = eff), - position = "dodge", alpha = 0.5, - data = data.pre.1, fill =box.control, - outlier.fill = box.control, outlier.size = 1.25, - outlier.color = box.control, + p <- p + geom_boxplot(aes(x = .data$time, y = .data$eff), + position = "dodge", alpha = 0.5, + data = data.pre.1, fill = box.control, + outlier.fill = box.control, outlier.size = 1.25, + outlier.color = box.control, ) - p <- p + geom_boxplot(aes(x = time, y = eff), - position = "dodge", alpha = 0.5, - data = data.post.1, fill = box.treat, outlier.fill =box.treat, - outlier.size = 1.25, outlier.color = box.treat, + p <- p + geom_boxplot(aes(x = .data$time, y = .data$eff), + position = "dodge", alpha = 0.5, + data = data.post.1, fill = box.treat, outlier.fill = box.treat, + outlier.size = 1.25, outlier.color = box.treat, ) - p <- p + geom_point(aes(x = time, y = eff), - data = data.post.2, - color = box.treat, size = 1.25, alpha = 0.8 + p <- p + geom_point(aes(x = .data$time, y = .data$eff), + data = data.post.2, + color = box.treat, size = 1.25, alpha = 0.8 ) - p <- p + geom_point(aes(x = time, y = eff), - data = data.pre.2, - color = box.control, size = 1.25, alpha = 0.8 + p <- p + geom_point(aes(x = .data$time, y = .data$eff), + data = data.pre.2, + color = box.control, size = 1.25, alpha = 0.8 ) if (show.count == TRUE & !(type == "gap" | type == "equiv")) { @@ -3016,11 +3449,11 @@ plot.fect <- function( ymax = ymax ) max.count.pos <- data.count[which.max(data.count[, 2]), 1][1] - min(data.count[, 1]) + 1 - p <- p + geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), data = data.toplot, fill = count.color, size = 0.3 , alpha = count.alpha, color = count.outline.color, linewidth =0.2) + p <- p + geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax), data = data.toplot, fill = count.color, size = 0.3, alpha = count.alpha, color = count.outline.color, linewidth = 0.2) p <- p + annotate("text", - x = max.count.pos - 0.02 * T.gap, - y = max(data.toplot$ymax) + 0.1 * rect.length, - label = max(data.count[, 2]), size = cex.text * 0.7, hjust = 0.5 + x = max.count.pos - 0.02 * T.gap, + y = max(data.toplot$ymax) + 0.1 * rect.length, + label = max(data.count[, 2]), size = cex.text * 0.7, hjust = 0.5 ) } @@ -3212,7 +3645,7 @@ plot.fect <- function( axis.text.x = element_text(size = cex.axis, angle = angle, hjust = x.h, vjust = x.v), axis.text.y = element_text(size = cex.axis), plot.background = element_rect(fill = status.background.color), - legend.background = element_rect(fill =status.background.color), + legend.background = element_rect(fill = status.background.color), legend.position = legend.pos, legend.margin = margin(c(0, 5, 5, 0)), legend.text = element_text(margin = margin(r = 10, unit = "pt"), size = cex.legend), @@ -3255,8 +3688,7 @@ plot.fect <- function( if (length(all) >= 3) { p <- p + guides(fill = guide_legend(nrow = 2, byrow = TRUE)) } - } - else if (type == "sens") { + } else if (type == "sens") { # Determine plot-specific variables based on the 'restrict' option sens_data_obj <- NULL x_var_name <- NULL @@ -3280,7 +3712,7 @@ plot.fect <- function( # Prepare data for plotting data_original <- sens_data_obj$original - data_results <- sens_data_obj$results + data_results <- sens_data_obj$results # Assign a default x-axis position for the original estimate to plot it left of zero if (!(x_var_name %in% colnames(data_original))) { @@ -3297,7 +3729,7 @@ plot.fect <- function( # Assign color groups and combine into a single data frame data_original$color_group <- "Original" - data_results$color_group <- "Robust Confidence Set" + data_results$color_group <- "Robust Confidence Set" data_combined <- rbind(data_original, data_results) # --- Handle Plot Labels and Title --- @@ -3322,8 +3754,8 @@ plot.fect <- function( p <- p + geom_hline(yintercept = 0, color = lcolor[1], size = lwidth[1], linetype = ltype[1]) + geom_errorbar( - aes(ymin = lb, ymax = ub, color = color_group), - width = 0.02, # Width of error bar caps + aes(ymin = .data$lb, ymax = .data$ub, color = .data$color_group), + width = 0.02, # Width of error bar caps linewidth = 1 ) + scale_color_manual(values = c("Original" = sens.original.color, "Robust Confidence Set" = sens.colors[1])) + @@ -3353,11 +3785,8 @@ plot.fect <- function( face = "bold", ) ) - } - - else if (type == "sens_es") { + } else if (type == "sens_es") { if (restrict == "rm") { - if (is.null(x$sensitivity.rm) || is.null(x$sensitivity.rm$periods)) { stop("No period-by-period Smoothness data found in x$sensitivity.rm$periods.") } @@ -3375,54 +3804,57 @@ plot.fect <- function( fect.output.p <- as.data.frame(x$est.att) fect.output.p$Time <- as.numeric(rownames(fect.output.p)) if (is.null(ylim)) { - ylim <- c(min(c(fect.output.p$CI.lower, dte_output$lb)) * 1.3, - max(c(fect.output.p$CI.upper, dte_output$ub)) * 1.3) + ylim <- c( + min(c(fect.output.p$CI.lower, dte_output$lb)) * 1.3, + max(c(fect.output.p$CI.upper, dte_output$ub)) * 1.3 + ) } p <- esplot( fect.output.p, - Period = "Time", - Estimate = "ATT", - SE = "S.E.", - CI.lower = "CI.lower", - CI.upper = "CI.upper", - Count = "count", - show.count = show.count, + Period = "Time", + Estimate = "ATT", + SE = "S.E.", + CI.lower = "CI.lower", + CI.upper = "CI.upper", + Count = "count", + show.count = show.count, proportion = proportion, show.points = show.points, ci.outline = ci.outline, - connected = connected, - color = color, - count.color = count.color, + connected = connected, + color = color, + count.color = count.color, count.alpha = count.alpha, count.outline.color = count.outline.color, highlight.periods = x$placebo.period[1]:x$placebo.period[2], - highlight.colors = rep(placebo.color,x$placebo.period[2]-x$placebo.period[1]+1), - xlab = xlab, - ylab = ylab, - main = main, + highlight.colors = rep(placebo.color, x$placebo.period[2] - x$placebo.period[1] + 1), + xlab = xlab, + ylab = ylab, + main = main, fill.gap = FALSE, lcolor = lcolor, lwidth = lwidth, ltype = ltype, - cex.main = cex.main/16, - cex.axis = cex.axis/15, - cex.lab = cex.lab/15, - cex.text = cex.text/5, + cex.main = cex.main / 16, + cex.axis = cex.axis / 15, + cex.lab = cex.lab / 15, + cex.text = cex.text / 5, axis.adjust = axis.adjust, - xlim = xlim, - ylim = ylim, - gridOff = gridOff, - start0 = start0, + xlim = xlim, + ylim = ylim, + gridOff = gridOff, + start0 = start0, est.lwidth = est.lwidth, theme.bw = theme.bw, - est.pointsize = est.pointsize) + est.pointsize = est.pointsize + ) mbar_levels <- sort(unique(dte_output$Mbar)) n_colors <- length(mbar_levels) final_palette <- sens.colors[1:min(n_colors, length(sens.colors))] p <- p + geom_linerange( - aes(x = postPeriod + 0.2, ymin = lb, ymax = ub, color = factor(Mbar, levels = mbar_levels)), + aes(x = .data$postPeriod + 0.2, ymin = .data$lb, ymax = .data$ub, color = factor(.data$Mbar, levels = mbar_levels)), data = dte_output, linewidth = 1, inherit.aes = FALSE ) + @@ -3433,11 +3865,7 @@ plot.fect <- function( legend.position.inside = c(0.02, 0.98), legend.justification = c("left", "top") ) - } - - - else if (restrict == "sm") { - + } else if (restrict == "sm") { if (is.null(x$sensitivity.smooth) || is.null(x$sensitivity.smooth$periods)) { stop("No period-by-period Smoothness data found in x$sensitivity.smooth$periods.") } @@ -3455,54 +3883,57 @@ plot.fect <- function( fect.output.p <- as.data.frame(x$est.att) fect.output.p$Time <- as.numeric(rownames(fect.output.p)) if (is.null(ylim)) { - ylim <- c(min(c(fect.output.p$CI.lower, dte_output$lb)) * 1.3, - max(c(fect.output.p$CI.upper, dte_output$ub)) * 1.3) + ylim <- c( + min(c(fect.output.p$CI.lower, dte_output$lb)) * 1.3, + max(c(fect.output.p$CI.upper, dte_output$ub)) * 1.3 + ) } p <- esplot( fect.output.p, - Period = "Time", - Estimate = "ATT", - SE = "S.E.", - CI.lower = "CI.lower", - CI.upper = "CI.upper", - Count = "count", - show.count = show.count, + Period = "Time", + Estimate = "ATT", + SE = "S.E.", + CI.lower = "CI.lower", + CI.upper = "CI.upper", + Count = "count", + show.count = show.count, proportion = proportion, show.points = show.points, ci.outline = ci.outline, - connected = connected, - color = color, - count.color = count.color, + connected = connected, + color = color, + count.color = count.color, count.alpha = count.alpha, count.outline.color = count.outline.color, highlight.periods = x$placebo.period[1]:x$placebo.period[2], - highlight.colors = rep(placebo.color,x$placebo.period[2]-x$placebo.period[1]+1), - xlab = xlab, - ylab = ylab, - main = main, - xlim = xlim, - ylim = ylim, - gridOff = gridOff, + highlight.colors = rep(placebo.color, x$placebo.period[2] - x$placebo.period[1] + 1), + xlab = xlab, + ylab = ylab, + main = main, + xlim = xlim, + ylim = ylim, + gridOff = gridOff, fill.gap = FALSE, lcolor = lcolor, lwidth = lwidth, ltype = ltype, - cex.main = cex.main/16, - cex.axis = cex.axis/15, - cex.lab = cex.lab/15, - cex.text = cex.text/5, + cex.main = cex.main / 16, + cex.axis = cex.axis / 15, + cex.lab = cex.lab / 15, + cex.text = cex.text / 5, axis.adjust = axis.adjust, - start0 = start0, + start0 = start0, theme.bw = theme.bw, est.lwidth = est.lwidth, - est.pointsize = est.pointsize) + est.pointsize = est.pointsize + ) m_levels <- sort(unique(dte_output$M)) n_colors <- length(m_levels) final_palette <- sens.colors[1:min(n_colors, length(sens.colors))] p <- p + geom_linerange( - aes(x = postPeriod + 0.2, ymin = lb, ymax = ub, color = factor(M, levels = m_levels)), + aes(x = .data$postPeriod + 0.2, ymin = .data$lb, ymax = .data$ub, color = factor(.data$M, levels = m_levels)), data = dte_output, linewidth = 1, inherit.aes = FALSE ) + @@ -3513,39 +3944,37 @@ plot.fect <- function( legend.position.inside = c(0.02, 0.98), legend.justification = c("left", "top") ) - } - } - else if (type == "cumul") { + } else if (type == "cumul") { if (is.null(x$effect.est.att)) { stop("No period-by-period cumulative ATT data found in x$est.eff.") } - if (is.null(main)){ - main = "Estimated Cumulative Treatment Effects" + if (is.null(main)) { + main <- "Estimated Cumulative Treatment Effects" } p <- esplot( x$effect.est.att, - Estimate = "ATT", - SE = "S.E.", - CI.lower = "CI.lower", - CI.upper = "CI.upper", - main = main, - xlab = xlab, - ylab = ylab, - xlim = xlim, - ylim = ylim, + Estimate = "ATT", + SE = "S.E.", + CI.lower = "CI.lower", + CI.upper = "CI.upper", + main = main, + xlab = xlab, + ylab = ylab, + xlim = xlim, + ylim = ylim, fill.gap = FALSE, lcolor = lcolor, lwidth = lwidth, ltype = ltype, - cex.main = cex.main/16, - cex.axis = cex.axis/15, - cex.lab = cex.lab/15, - cex.text = cex.text/5, + cex.main = cex.main / 16, + cex.axis = cex.axis / 15, + cex.lab = cex.lab / 15, + cex.text = cex.text / 5, show.points = show.points, ci.outline = ci.outline, axis.adjust = axis.adjust, - Count = "count", + Count = "count", show.count = FALSE, proportion = proportion, est.pointsize = est.pointsize, diff --git a/R/polynomial.R b/R/polynomial.R index b8520e0..32fe461 100644 --- a/R/polynomial.R +++ b/R/polynomial.R @@ -1,7 +1,7 @@ ################################################################### ## IFE Model Function ################################################################### -fect.polynomial <- function(Y, # Outcome variable, (T*N) matrix +fect_polynomial <- function(Y, # Outcome variable, (T*N) matrix X, # Explanatory variables: (T*N*p) array D, # Indicator for treated unit (tr==1) W, diff --git a/data/gs2020.rda b/data/gs2020.rda new file mode 100644 index 0000000..fd1ad69 Binary files /dev/null and b/data/gs2020.rda differ diff --git a/data/hh2019.rda b/data/hh2019.rda new file mode 100644 index 0000000..42a3c63 Binary files /dev/null and b/data/hh2019.rda differ diff --git a/data/simdata.rda b/data/simdata.rda new file mode 100644 index 0000000..b7e0b61 Binary files /dev/null and b/data/simdata.rda differ diff --git a/data/simgsynth.rda b/data/simgsynth.rda new file mode 100644 index 0000000..16db565 Binary files /dev/null and b/data/simgsynth.rda differ diff --git a/data/turnout.rda b/data/turnout.rda new file mode 100644 index 0000000..8745d5a Binary files /dev/null and b/data/turnout.rda differ diff --git a/man/att.cumu.rd b/man/att.cumu.rd deleted file mode 100644 index a30d4c1..0000000 --- a/man/att.cumu.rd +++ /dev/null @@ -1,41 +0,0 @@ -\encoding{UTF-8} -\name{att.cumu} -\alias{att.cumu} -\title{Calculate Cumulative Treatment Effects} -\description{ - Calculate cumulative treatment effects based on the results of a \code{\link{fect}} object. -} -\usage{ -att.cumu(x, period = NULL, weighted = TRUE, alpha = 0.05, type = "on", plot = FALSE) -} -\arguments{ - \item{x}{A \code{\link{fect}} object.} - \item{period}{A two-element numeric vector specifying the range of terms during which treatment effects are to be accumulated, e.g., \code{period = c(-1, 1)}.} - \item{weighted}{A logical flag specifying whether to calculate weighted cumulative treatment effects based on counts at each period. Default is \code{TRUE}.} - \item{alpha}{A numerical value specifying the significance level. Default is \code{0.05}.} - \item{type}{A string that specifies the type of effect to calculate. Must be one of the following: \code{"on"} (switch-on treatment effect) or \code{"off"} (switch-off treatment effect). Default is \code{"on"}.} - \item{plot}{A logical flag indicating whether to plot cumulative effects. Default is \code{FALSE}.} -} -\author{ - Licheng Liu, Ye Wang, and Yiqing Xu -} -\references{ - Athey, S., Bayati, M., Doudchenko, N., Imbens, G., and Khosravi, K. (2021). - Matrix completion methods for causal panel data models. - \emph{Journal of the American Statistical Association}, 116(536), 1716-1730. - - Bai, J. (2009). - Panel data models with interactive fixed effects. - \emph{Econometrica}, 77(4), 1229-1279. - - Liu, L., Wang, Y., and Xu, Y. (2022). - A Practical Guide to Counterfactual Estimators for Causal Inference with Time-Series Cross-Sectional Data. - \emph{American Journal of Political Science}, 68(1), 160-176. - - Xu, Y. (2017). - Generalized Synthetic Control Method: Causal Inference with Interactive Fixed Effects Models. - \emph{Political Analysis}, 25(1), 57-76. -} -\seealso{ - \code{\link{fect}}, \code{\link{plot.fect}} -} \ No newline at end of file diff --git a/man/did_wrapper.Rd b/man/did_wrapper.Rd index edf2181..e40fac6 100644 --- a/man/did_wrapper.Rd +++ b/man/did_wrapper.Rd @@ -11,8 +11,8 @@ did_wrapper( D, X = NULL, index, - method = c("twfe", "st", "iw", "cs_never", "cs_notyet", "pm", "didm"), - se = c("default", "boot", "jackknife"), + method = c("twfe", "st", "iw", "cs_never", "cs_notyet", "didm"), + se = c("default", "boot", "bootstrap", "jackknife"), nboots = 200, parallel = TRUE, core = NULL, @@ -30,7 +30,7 @@ did_wrapper( \item{X}{Optional covariate vector for adjustment.} \item{index}{Character vector of unit and time variable names, e.g., \code{c("id", "time")}.} \item{method}{DiD method: \code{"twfe"}, \code{"st"}, \code{"iw"}, \code{"cs_never"}, \code{"cs_notyet"}, or \code{"didm"}.} - \item{se}{Standard error method: \code{"default"}, \code{"boot"}, or \code{"jackknife"}.} + \item{se}{Standard error method: \code{"default"}, \code{"boot"}, \code{"bootstrap"}, or \code{"jackknife"}.} \item{nboots}{Number of bootstrap replications (if applicable).} \item{parallel}{Logical; use parallel computation for bootstrapping.} \item{core}{Number of CPU cores to use if \code{parallel = TRUE}.} diff --git a/man/effect.rd b/man/effect.rd index 2c1ee63..9a7812f 100644 --- a/man/effect.rd +++ b/man/effect.rd @@ -6,7 +6,17 @@ } \usage{ -effect(x, cumu = TRUE, id = NULL, period = NULL) +effect( + x, + cumu = TRUE, + id = NULL, + period = NULL, + plot = FALSE, + count = TRUE, + xlab = NULL, + ylab = NULL, + main = NULL +) } \arguments{ @@ -19,6 +29,14 @@ effect(x, cumu = TRUE, id = NULL, period = NULL) \item{period}{Numeric vector of length 2 specifying the time window \code{c(start, end)} for effect calculation. If \code{NULL}, uses the maximum possible window based on the data.} \item{plot}{Logical. If \code{TRUE}, creates a visualization of the cumulative treatment effects with confidence intervals and a bar chart showing the number of treated units at each time point. Default is \code{FALSE}.} + + \item{count}{Logical. If \code{TRUE}, shows the count bars in the plot.} + + \item{xlab}{Character. X-axis label for the plot.} + + \item{ylab}{Character. Y-axis label for the plot.} + + \item{main}{Character. Main title for the plot.} } \details{ diff --git a/man/esplot.Rd b/man/esplot.Rd index 44fddea..733666e 100644 --- a/man/esplot.Rd +++ b/man/esplot.Rd @@ -18,7 +18,7 @@ esplot(data, Period = NULL, Estimate = "ATT", SE = NULL, highlight.colors = NULL, lcolor = NULL, lwidth = NULL, ltype = c("solid", "solid"), connected = FALSE, ci.outline = FALSE, main = NULL, xlim = NULL, ylim = NULL, xlab = NULL, ylab = NULL, - gridOff = TRUE, stats.pos = NULL, theme.bw = TRUE, + gridOff = FALSE, stats.pos = NULL, theme.bw = TRUE, cex.main = NULL, cex.axis = NULL, cex.lab = NULL, cex.text = NULL, axis.adjust = FALSE, color = "#000000", count.color = "gray70", count.alpha = 0.4, diff --git a/man/fect-internal.Rd b/man/fect-internal.Rd index d51fa30..c15da8f 100644 --- a/man/fect-internal.Rd +++ b/man/fect-internal.Rd @@ -42,8 +42,8 @@ \alias{panel_fe_ub} \alias{fect.default} \alias{fect.formula} -\alias{fect.fe} -\alias{fect.mc} +\alias{fect_fe} +\alias{fect_mc} \alias{interFE.default} \alias{interFE.formula} \alias{cv.sample} diff --git a/man/fect_sens.Rd b/man/fect_sens.Rd index 048c137..b903b05 100644 --- a/man/fect_sens.Rd +++ b/man/fect_sens.Rd @@ -13,7 +13,8 @@ fect_sens( Mvec = seq(0, 0.25, 0.05), periodMbarvec = c(0, 0.5), periodMvec = c(0, 0.1), - parallel = TRUE + parallel = FALSE, + cores = NULL ) } \arguments{ @@ -24,7 +25,8 @@ fect_sens( \item{Mvec}{Values of \code{M} for overall smoothness-based sensitivity analysis.} \item{periodMbarvec}{Values of \code{Mbar} for period-specific RM sensitivity analysis.} \item{periodMvec}{Values of \code{M} for period-specific smoothness sensitivity analysis.} - \item{parallel}{Logical; if \code{TRUE} (default), uses parallel computation where supported.} + \item{parallel}{Logical; if \code{TRUE}, uses parallel computation where supported. Default is \code{FALSE}.} + \item{cores}{Optional integer; number of workers when \code{parallel = TRUE}.} } \details{ This function: diff --git a/man/gs2020.Rd b/man/gs2020.Rd new file mode 100644 index 0000000..0c3613f --- /dev/null +++ b/man/gs2020.Rd @@ -0,0 +1,7 @@ +\name{gs2020} +\alias{gs2020} +\docType{data} +\title{Simulated Gsynth-like Panel Data (No Reversal)} +\description{A simulated dataset for demonstration and testing.} +\format{data.frame} +\keyword{datasets} diff --git a/man/hh2019.Rd b/man/hh2019.Rd new file mode 100644 index 0000000..75e324d --- /dev/null +++ b/man/hh2019.Rd @@ -0,0 +1,7 @@ +\name{hh2019} +\alias{hh2019} +\docType{data} +\title{Simulated Panel Data Example} +\description{A simulated dataset used in package vignettes and examples.} +\format{data.frame} +\keyword{datasets} diff --git a/man/plot.fect.Rd b/man/plot.fect.Rd index ed277fc..74761fe 100644 --- a/man/plot.fect.Rd +++ b/man/plot.fect.Rd @@ -88,8 +88,12 @@ counterfactual.linetype = NULL, box.control = NULL, box.treat = NULL, - calendar.color = NULL, - calendar.lcolor = NULL, + calendar.color = NULL, + calendar.lcolor = NULL, + calendar.cicolor = NULL, + heterogeneous.color = NULL, + heterogeneous.lcolor = NULL, + heterogeneous.cicolor = NULL, equiv.color = NULL, status.treat.color = NULL, status.control.color = NULL, @@ -101,6 +105,8 @@ status.balanced.post.color = NULL, status.balanced.pre.color = NULL, status.background.color = NULL, + covariate = NULL, + covariate.labels = NULL, ... ) } @@ -193,6 +199,10 @@ \item{box.treat}{Character string or \code{NULL} (default). If \code{NULL}, uses a default fill color for treated boxplots (e.g., \code{"pink"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} \item{calendar.color}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for fitted lines in calendar plots (e.g., \code{"skyblue"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} \item{calendar.lcolor}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for average ATT lines in calendar plots (e.g., \code{"red"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} + \item{calendar.cicolor}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for CI in calendar plots (e.g., \code{"red"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} + \item{heterogeneous.color}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for fitted lines in heterogeneous plots (e.g., \code{"red"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} + \item{heterogeneous.lcolor}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for average ATT lines in heterogeneous plots (e.g., \code{"red"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} + \item{heterogeneous.cicolor}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for CI in heterogeneous plots (e.g., \code{"red"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} \item{equiv.color}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for equivalence bounds (e.g., \code{"red"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} \item{status.treat.color}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for treated status (e.g., \code{"#06266F"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} \item{status.control.color}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for control status (e.g., \code{"#B0C4DE"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} @@ -204,6 +214,8 @@ \item{status.balanced.post.color}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for balanced post-treatment status (e.g., \code{"#00852B"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} \item{status.balanced.pre.color}{Character string or \code{NULL} (default). If \code{NULL}, uses a default color for balanced pre-treatment status (e.g., \code{"#A5CA18"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} \item{status.background.color}{Character string or \code{NULL} (default). If \code{NULL}, uses a default background color for status plots (e.g., \code{"gray90"} for the base style). The \code{preset} argument may define a different color. Specifying this argument directly overrides any \code{preset} or base style setting.} + \item{covariate}{Character string or \code{NULL} (default). If \code{NULL}, uses a default covariate for heterogeneous plots (e.g., \code{"X"} for the base style). The \code{preset} argument may define a different covariate. Specifying this argument directly overrides any \code{preset} or base style setting.} + \item{covariate.labels}{Character vector or \code{NULL} (default). If \code{NULL}, uses a default covariate labels for heterogeneous plots (e.g., \code{"X1"} for the base style). The \code{preset} argument may define a different covariate labels. Specifying this argument directly overrides any \code{preset} or base style setting.} \item{...}{Additional graphical parameters passed to internal plotting routines, primarily those accepted by \code{esplot} for event study style plots (gap, equiv, exit, sens_es, cumul).} } \details{ @@ -242,8 +254,16 @@ library(fect) if(requireNamespace("ggplot2") && requireNamespace("ggrepel")) { data(simdata) # Estimate with fixed effects method - out.fect <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), - method = "fe", force = "two-way", se = TRUE, parallel = FALSE, nboots = 5) # nboots low for example + out.fect <- fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id","time"), + method = "fe", + force = "two-way", + se = TRUE, + parallel = FALSE, + nboots = 5 # nboots low for example + ) # Default gap plot plot(out.fect, main = "Estimated ATT (FEct)", ylab = "Effect of D on Y") diff --git a/src/Makevars b/src/Makevars index fe3a2d2..8f347d7 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,6 +1,5 @@ ## optional -CXX_STD = CXX11 PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 PKG_CXXFLAGS = $(SHLIB_CXXFLAGS) diff --git a/src/Makevars.win b/src/Makevars.win index fe3a2d2..8f347d7 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,6 +1,5 @@ ## optional -CXX_STD = CXX11 PKG_CPPFLAGS = -DARMA_64BIT_WORD=1 PKG_CXXFLAGS = $(SHLIB_CXXFLAGS) diff --git a/src/RcppExports.o b/src/RcppExports.o index 4253bdc..f5675ea 100644 Binary files a/src/RcppExports.o and b/src/RcppExports.o differ diff --git a/src/auxiliary.o b/src/auxiliary.o index 5680634..3b782da 100644 Binary files a/src/auxiliary.o and b/src/auxiliary.o differ diff --git a/src/binary_qr.o b/src/binary_qr.o index 4f49055..8830418 100644 Binary files a/src/binary_qr.o and b/src/binary_qr.o differ diff --git a/src/binary_sub.o b/src/binary_sub.o index 6e95a54..22ade05 100644 Binary files a/src/binary_sub.o and b/src/binary_sub.o differ diff --git a/src/binary_svd.o b/src/binary_svd.o index 5082b1c..a5d58db 100644 Binary files a/src/binary_svd.o and b/src/binary_svd.o differ diff --git a/src/fe_sub.o b/src/fe_sub.o index c41f03d..9a11267 100644 Binary files a/src/fe_sub.o and b/src/fe_sub.o differ diff --git a/src/fect.so b/src/fect.so index 929e453..e5087ca 100755 Binary files a/src/fect.so and b/src/fect.so differ diff --git a/src/ife.o b/src/ife.o index dc01516..a0e6a97 100644 Binary files a/src/ife.o and b/src/ife.o differ diff --git a/src/ife_sub.o b/src/ife_sub.o index 5ce3b8e..0222dbc 100644 Binary files a/src/ife_sub.o and b/src/ife_sub.o differ diff --git a/src/mc.o b/src/mc.o index 0b1d10a..5e19140 100644 Binary files a/src/mc.o and b/src/mc.o differ diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..37b9a09 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(fect) + +test_check("fect") diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R new file mode 100644 index 0000000..6703362 --- /dev/null +++ b/tests/testthat/helper-data.R @@ -0,0 +1,19 @@ +## Ensure datasets are available in tests across installed/build contexts +if (!exists("simdata", inherits = TRUE)) { + suppressWarnings(try(utils::data("simdata", package = "fect"), silent = TRUE)) +} +if (!exists("simdata", inherits = TRUE)) { + f <- system.file("data", "fect.RData", package = "fect") + if (nzchar(f) && file.exists(f)) { + try(load(f, envir = environment()), silent = TRUE) + } +} +if (!exists("simdata", inherits = TRUE)) { + f <- file.path("data", "fect.RData") + if (file.exists(f)) { + try(load(f, envir = environment()), silent = TRUE) + } +} +if (!exists("simgsynth", inherits = TRUE)) { + suppressWarnings(try(utils::data("simgsynth", package = "fect"), silent = TRUE)) +} diff --git a/tests/testthat/test-cumu-effect-esplot.R b/tests/testthat/test-cumu-effect-esplot.R new file mode 100644 index 0000000..1249b32 --- /dev/null +++ b/tests/testthat/test-cumu-effect-esplot.R @@ -0,0 +1,33 @@ +test_that("cumu/att.cumu/esplot run without error on fect output", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = FALSE, + parallel = FALSE + ) + # cumu (att.cumu) + # att.cumu requires uncertainty objects from keep.sims=TRUE + out_keep <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = TRUE, + nboots = 20, + keep.sims = TRUE, + parallel = FALSE + ) + c <- att.cumu(out_keep, period = c(1, 3), plot = FALSE) + expect_true(is.matrix(c) || is.data.frame(c)) + # esplot + p <- esplot(out_keep$est.att, Estimate = "ATT") + expect_s3_class(p, "ggplot") +}) diff --git a/tests/testthat/test-did-wrapper.R b/tests/testthat/test-did-wrapper.R new file mode 100644 index 0000000..5f4e9cb --- /dev/null +++ b/tests/testthat/test-did-wrapper.R @@ -0,0 +1,25 @@ +test_that("did_wrapper twfe runs and returns structure", { + skip_if_not_installed("fixest") + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + + # create a binary ever-treated indicator for TWFE event study helper + sim <- simdata + sim$treat <- ave(sim$D, sim$id, FUN = function(z) as.numeric(mean(z, na.rm = TRUE) > 0)) + + res <- did_wrapper( + data = sim, + Y = "Y", + D = "D", + X = c("X1", "X2"), + index = c("id", "time"), + method = "twfe", + se = "default", + parallel = FALSE + ) + + expect_s3_class(res, "did_wrapper") + expect_true(is.data.frame(res$est.avg)) + expect_true(is.data.frame(res$est.att)) + expect_true(all(colnames(res$est.avg) %in% c("ATT.avg", "S.E.", "CI.lower", "CI.upper", "p.value"))) +}) diff --git a/tests/testthat/test-fect-basic.R b/tests/testthat/test-fect-basic.R new file mode 100644 index 0000000..b2bf5ff --- /dev/null +++ b/tests/testthat/test-fect-basic.R @@ -0,0 +1,43 @@ +test_that("fect formula basic runs on simdata and returns expected slots", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + + set.seed(1) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 2, + CV = FALSE, + se = FALSE, + parallel = FALSE + ) + + expect_s3_class(out, "fect") + expect_true(is.matrix(out$eff)) + expect_true(is.numeric(out$att.avg)) + expect_true(length(out$time) >= 1) +}) + +test_that("fect returns est.att and att.vcov when se=TRUE", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + + set.seed(2) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = TRUE, + nboots = 30, + parallel = FALSE + ) + + expect_true(!is.null(out$est.att)) + expect_true(!is.null(out$att.vcov)) + expect_true(all(colnames(out$est.att) %in% c("ATT", "S.E.", "CI.lower", "CI.upper", "p.value", "count"))) +}) diff --git a/tests/testthat/test-fect-sens.R b/tests/testthat/test-fect-sens.R new file mode 100644 index 0000000..08a0938 --- /dev/null +++ b/tests/testthat/test-fect-sens.R @@ -0,0 +1,31 @@ +test_that("fect_sens attaches sensitivity results when inputs present", { + skip_if_not_installed("HonestDiDFEct") + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + + set.seed(3) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = TRUE, + nboots = 20, + parallel = FALSE + ) + + # ensure names and shapes needed by fect_sens exist + expect_true(!is.null(out$est.att)) + expect_true(!is.null(out$att.vcov)) + + # Ensure placebo.period exists to avoid length-0 error inside fect_sens + if (is.null(out$placebo.period)) { + out$placebo.period <- c(-3, -1) + } + # restrict periods so numPre+numPost matches available row count + out$placebo.period <- c(-3, -1) + out2 <- fect_sens(out, post.periods = 1:10, parallel = FALSE) + expect_true(!is.null(out2$sensitivity.rm) || !is.null(out2$sensitivity.smooth)) +}) diff --git a/tests/testthat/test-getcohort.R b/tests/testthat/test-getcohort.R new file mode 100644 index 0000000..813f649 --- /dev/null +++ b/tests/testthat/test-getcohort.R @@ -0,0 +1,16 @@ +test_that("get.cohort adds cohort columns and handles options", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + df <- simdata + + out <- get.cohort( + data = df, + D = "D", + index = c("id", "time"), + start0 = TRUE, + drop.always.treat = TRUE + ) + + expect_true(all(c("Cohort", "Time_to_Treatment") %in% names(out))) + expect_true(all(out$Cohort %in% c("Control", unique(out$Cohort)))) +}) diff --git a/tests/testthat/test-plot-fect.R b/tests/testthat/test-plot-fect.R new file mode 100644 index 0000000..ea55ba1 --- /dev/null +++ b/tests/testthat/test-plot-fect.R @@ -0,0 +1,18 @@ +test_that("plot.fect executes for gap plot without error", { + suppressWarnings(try(data("simdata", package = "fect"), silent = TRUE)) + expect_true(exists("simdata")) + set.seed(4) + out <- fect::fect( + Y ~ D + X1 + X2, + data = simdata, + index = c("id", "time"), + method = "ife", + r = 1, + CV = FALSE, + se = FALSE, + parallel = FALSE + ) + # Just ensure the plotting call produces a ggplot object + p <- plot.fect(out, type = "gap", show.count = FALSE) + expect_s3_class(p, "ggplot") +}) diff --git a/vignettes/02-fect.Rmd b/vignettes/02-fect.Rmd index 760a6cc..fd77c81 100644 --- a/vignettes/02-fect.Rmd +++ b/vignettes/02-fect.Rmd @@ -1,6 +1,6 @@ # Fect Main Program {#sec-fect} -In this chapter, we illustrate how to use the **fect** package to implement counterfactual estimators (or imputation estimators) and conduct diagnostic tests proposed by @LWX2022--[Paper]. Download the R code used in this chapter [here](rscript/02-fect.R). +In this chapter, we illustrate how to use the **fect** package to implement counterfactual estimators (or imputation estimators) and conduct diagnostic tests proposed by @LWX2022--\[Paper\]. Download the R code used in this chapter [here](rscript/02-fect.R). ## Simulated data @@ -38,15 +38,14 @@ panelview(Y ~ D, data = simdata, index = c("id","time"), ------------------------------------------------------------------------ -## Counterfactual estimators +## The imputation estimator (FEct) Using the current version of **fect**, we can apply several different methods to make counterfactual predictions and estimate treatment effects by specifying the `method` option: `"fe"` (two-way fixed effects, default), `"ife"` (interactive fixed effects), `"mc"` (matrix completion method), `"cfe"` (complex fixed effects), and `"polynomial"` (fixed effects with group-specific time trends). First, we illustrate the main syntax of **fect** using the `"fe"` method. -### FEct +The two-way fixed effects counterfactual estimator (FEct) is also independently proposed by @BJS2024 and @gardnertwo, who refer to it as the "imputation method" and "two-stage DID," respectively. -The two-way fixed effects coutnerfacutal estimator (FEct) is also independently proposed by Borusyak, Jaravel & Spiess (2024) and John Gardner (2021), who refer to it as the "imputation method" and "two-stage DID," respectively. - -**Estimation.** We estimate the average treatment effect on the treated (ATT) using the following information: the outcome variable $Y$, binary treatment variable $D$, two observed covariates $X_{1}$ and $X_{2}$, and the unit and time indicators $id$ and $time$, respectively. +### Estimation +We estimate the average treatment effect on the treated (ATT) using the following information: the outcome variable $Y$, binary treatment variable $D$, two observed covariates $X_{1}$ and $X_{2}$, and the unit and time indicators $id$ and $time$, respectively. The first variable on the right hand side of the formula is the treatment indicator $D$; the rest of the right-hand-side variables serve as controls. The `index` option specifies the unit and time indicators. The `force` option ("none", "unit", "time", and "two-way") specifies the additive component(s) of the fixed effects included in the model. The default option is "two-way" (including both unit and time fixed effects). @@ -55,7 +54,8 @@ out.fect <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), method = "fe", force = "two-way") ``` -**Visualization.** We can use the `plot` function to visualize the estimation results. By default, the `plot` function produces a "gap" plot -- as if we type `plot(out.fect, type = "gap")` --- which visualizes the estimated period-wise ATT (dynamic treatment effects). For your reference, the true population average effects in `simdata` go from 1 to 3 from the 1st to the 10th post-treatment period. +### Visualization +We can use the `plot` function to visualize the estimation results. By default, the `plot` function produces a "gap" plot -- as if we type `plot(out.fect, type = "gap")` --- which visualizes the estimated period-wise ATT (dynamic treatment effects). For your reference, the true population average effects in `simdata` go from 1 to 3 from the 1st to the 10th post-treatment period. The bar plot at the bottom of the plot shows the number of treated units for each time period. The options `cex.main`, `cex.lab`, `cex.axis`, and `cex.text` adjust the font sizes of the title, axis labels, axis numbers, and in-graph text, respectively. @@ -68,17 +68,22 @@ plot(out.fect, main = "Estimated ATT (FEct)", ylab = "Effect of D on Y", The uncertainty estimates are unavailable in the plot above because, by default, `se = FALSE` to save computational power. -The graph is a **ggplot2** object; user can conveniently use the `ggsave` (preferred) function to export the resulting plot. +The graph is a **ggplot2** object; user can conveniently use the `ggsave` (preferred) function to export the resulting plot. See [Chapter @sec-plots] for more visualization options. + +### Uncertainty estimates -**Uncertainty estimates.** The package can produce uncertainty estimates when `se = TRUE`. One can use the non-parametric bootstrap procedure by setting `vartype = "bootstrap"`. Note that it only works well when the number of units is relatively large and many units in the data set experience the treatment condition. The number of bootstrap runs can be set by `nboots`. +The package can produce uncertainty estimates when `se = TRUE`. One can use the non-parametric bootstrap procedure by setting `vartype = "bootstrap"`. Note that it only works well when the number of units is relatively large and many units in the data set experience the treatment condition. The number of bootstrap runs can be set by `nboots`. Alternatively, users can obtain uncertainty estimates using the jackknife method by specifying `vartype = "jackknife"`. The algorithm obtains standard errors by iteratively dropping one unit (the entire time-series) from the dataset. -Parallel computing will speed up both cross-validation and uncertainty estimation significantly. When `parallel = TRUE` (default) and `cores` options are omitted, the algorithm will detect the number of available cores on your computer automatically. (Warning: it may consume most of your computer's computational power if all cores are being used.) +::: {.callout-note appearance="simple"} +Parallel computing will speed up both cross-validation and uncertainty estimation significantly. We recommend that users manually set the number of cores using the `cores` option. If this is not supplied or is `NULL`, we will automatically select the smaller of `8` and the number of usable system cores minus `2`, to prevent excessive use of system resources. +::: ```{r simdata_fect, eval=TRUE, cache = TRUE, message = FALSE, results = 'hide'} out.fect <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), - method = "fe", force = "two-way", se = TRUE, parallel = TRUE, nboots = 200) + method = "fe", force = "two-way", se = TRUE, + cores = 8, parallel = TRUE, nboots = 1000) ``` The `plot()` function can visualize the estimated period-wise ATTs as well as their uncertainty estimates. `stats = "F.p"` shows the p-value for the F test of no-pretrend (more details below). @@ -88,7 +93,8 @@ plot(out.fect, main = "Estimated ATT (FEct)", ylab = "Effect of D on Y", cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8, stats = "F.p") ``` -**Result summary.** Users can use the `print` function to take a look at a summary of the estimation results or retrieve relevant statistics by directly accessing the fect object. Specifically, `est.avg` and `est.avg.unit` show the ATT averaged over all periods -- the former weights each treated observation equally while the latter weights each treated unit equally. `est.beta` reports the coefficients of the time-varying covariates. `est.att` reports the average treatment effect on the treated (ATT) by period. Treatment effect estimates from each bootstrap run is stored in `eff.boot`, an array whose dimension = (#time periods \* #treated \* #bootstrap runs). +### Save estiamtes +Users can use the `print` function to take a look at a summary of the estimation results or retrieve relevant statistics by directly accessing the fect object. Specifically, `est.avg` and `est.avg.unit` show the ATT averaged over all periods -- the former weights each treated observation equally while the latter weights each treated unit equally. `est.beta` reports the coefficients of the time-varying covariates. `est.att` reports the average treatment effect on the treated (ATT) by period. Treatment effect estimates from each bootstrap run is stored in `eff.boot`, an array whose dimension = (#time periods \* #treated \* #bootstrap runs). ```{r} print(out.fect) @@ -102,9 +108,34 @@ out.fect$est.avg out.fect$est.beta ``` -### IFEct +### Leave-on-out approach + +@li2025benchmarking show that in some applications, pre-trend estimates based on in-sample model fit can lead to the mistaken belief that no pre-trend exists, even when a non-parallel pre-trend is present. A simple fix is to use a leave-one-out method by setting `loo = TRUE` to obtain these estimates, although it is significantly more time-consuming. + +::: {.callout-note appearance="simple"} +We recommend setting `loo = TRUE` when (i) the event-study plot is intended as a critical piece of evidence to support the parallel trends assumption, which is often the case, or (ii) when implementing an equivalence test for the pre-trend estimates. For more discussion on the LOO pre-trend test, see [here](#loo-pre-trend-test) + +Our most preferred test, however, is the sensitivity analysis discussed in [Chapter @sec-panel-sens], which combines out-of-sample placebo estimates with post-treatment ATT estimates. +::: + +We can implement the leave-one-out pre-trend test by setting `loo = TRUE`. +```{r fect_loo, eval=TRUE, cache = TRUE, message = FALSE} +out.fect.loo <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), + method = "fe", force = "two-way", se = TRUE, loo = TRUE, + cores = 8, parallel = TRUE, nboots = 200) +``` + +The event study plot utilizing leave-one-out for pretreatment estimates is shown below. This graph is fairly similar to the graphics we presented earlier without using leave-one-out. However, this is not always true. +```{r, fig.width = 6, fig.height = 4.5} +plot(out.fect.loo,main = "Estimated ATT (FEct) -- LOO", + cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8) +``` -In addition to FEct, **fect** also supports the interactive fixed effects counterfactual (IFEct) method proposed by Laurent Gobillon & Thierry Magnac (2016) and Yiqing Xu (2017) and the matrix completion (MC) method proposed by Athey et al. (2021)---`method = "ife"` and `method = "mc"`, respectively. We use EM algorithm to impute the counterfactuals of treated observations. +## Interactive fixed effects & matrix completion + +In addition to FEct, **fect** also supports the interactive fixed effects counterfactual (IFEct) method proposed by @Gobillon2016 and @Xu2017 and the matrix completion (MC) method proposed by @Athey2021---`method = "ife"` and `method = "mc"`, respectively. We use EM algorithm to impute the counterfactuals of treated observations. + +### IFE For the IFE approach, we need to specify the number of factors using option `r`. For the MC method, we need to specify the tuning parameter in the penalty term using option `lambda`. By default, the algorithm will select an optimal hyper-parameter via a built-in cross-validation procedure. @@ -123,7 +154,8 @@ For the IFE method, we need to specify an interval of candidate number of unobse ```{r simdata_ife, eval=TRUE, cache = TRUE} out.ife <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), force = "two-way", method = "ife", CV = TRUE, r = c(0, 5), - se = TRUE, nboots = 200, parallel = TRUE) + se = TRUE, cores = 8, nboots = 1000, parallel = TRUE) +print(out.ife) ``` The figure below shows the estimated ATT using the IFE method. The cross-validation procedure selects the correct number of factors ($r=2$). @@ -139,13 +171,19 @@ For the MC method, we also need to specify a sequence of candidate tuning parame ```{r simdata_mc, eval=TRUE, cache = TRUE} out.mc <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), force = "two-way", method = "mc", CV = TRUE, - se = TRUE, nboots = 200, parallel = TRUE) + se = TRUE, cores = 8, nboots = 1000, parallel = TRUE) + +print(out.mc) ``` ```{r fig.width = 6, fig.height = 4.5} plot(out.mc, main = "Estimated ATT (MC)") ``` +## Weighitng treatment effects + +After obtaining the individual treatment effects using one of the counterfactual estimators, we can weight these estimates using a constructed balanced treated sample or other user-supplied weighting schemes. + ### Balanced treated sample **fect** also provides the option `balance.period`, which allows the calculation of the average treatment effects only for *treated* units that exhibit complete data in specified pre- and post-treatment periods. For instance, if the option is set to `balance.period = c(-3,4)`, the algorithm will calculate the average treatment effects for units that have at least four consecutive non-missing observations in the pre-treatment periods `(-3, -2, -1, 0)` and at least four consecutive non-missing observations in the post-treatment periods `(1, 2, 3, 4)`. Note that this option does not affect whether a never-treated unit enters estimation. @@ -169,7 +207,7 @@ plot(out.bal, main = "Estimated ATT (Balanced Sample)", color = "red", count.color = "blue") ``` -### Weighted Treatment Effect +### Weighted treatment effect The package offers the option `W` to calculate the weighted average treatment effects. The weighting variable does not affect the estimation of fixed effects or factors. Only the weighted average treatment effects or weighted dynamic treatment effects are obtained by aggregating the treatment effects using the weight `W`. @@ -186,7 +224,7 @@ We can then visualize the weighted dynamic treatment effects using the inbuilt f plot(out.w, main = "Estimated Weighted ATT") ``` -## Heterogeneous treatment effects +## Effect heterogeneity We provide several methods for researchers to explore heterogeneous treatment effects (HTE). @@ -198,7 +236,7 @@ One way to understand HTE is to use a series of box plots to visualize the estim plot(out.ife, type = "box", xlim = c(-15, 10)) ``` -### By calendar time +### CATT by calendar time Another way to explore HTE is to investigate how the treatment effect evolves over time. In the plot below, the point estimates represents the ATTs by calendar time; the blue curve and band represent a lowess fit of the estimates and its 95% confidence interval, respectively; and the red horizontal dashed line represents the ATT (averaged over all time periods). @@ -206,6 +244,32 @@ Another way to explore HTE is to investigate how the treatment effect evolves ov plot(out.ife, type = "calendar", xlim = c(1, 35)) ``` +### CATT by a covariate + +By setting `type = "hte"` or `type = "heterogeneous`, we can also plot the HTE by arbitrary covariates that are unaffected by the treatment. As before, the blue curve and band represent a lowess fit of the estimates and its 95% confidence interval, respectively. The red dashed line represents the ATT. The histogram at the bottom of the figure illustrates the distribution of the covariates, and can be turned off using `show.count = FALSE`. In our simulated case, the effect size is unrelated to the values of covariate `X1`. +```{r hte_X1, eval = TRUE, cache = TRUE, warning = FALSE, fig.width = 6, fig.height = 4.5} +plot(out.ife, type = "hte", covariate = "X1") +``` + +We can also plot the CATT when a covariate is discrete. To demonstrate this, we artificially create a moderating variable `X3`, which must be included in the outcome model and then specified in the heterogeneous treatment effect plot. + +```{r hte_discrete, eval = TRUE, cache = TRUE, warning = FALSE, fig.width = 6, fig.height = 4.5} +simdata$X3 <- sample(1:3, size = nrow(simdata), replace = TRUE) +out.ife.X3 <- fect(Y ~ D + X1 + X2 + X3, data = simdata, index = c("id","time"), + method = "ife", r = 2, se = TRUE, seed = 123, + cores = 8, nboots = 1000, parallel = TRUE) +``` + +As expected, there is not much effect heterogeneity along `X3`. In the resulting figure, we can also assign labels to the discrete values in the moderator. +```{r, fig.width = 6, fig.height = 4.5} +plot(out.ife.X3, type="hte", covariate = "X3", + xlab = "", ylab = "Effet of D on Y", + covariate.labels = c("USA", "China", "UK"), + ylim = c(-2, 6)) +``` + +Our next update will accommodate time-invariant covariates and allow users to explore effect heterogeneity around them. + ------------------------------------------------------------------------ ## Diagnostic tests @@ -286,9 +350,9 @@ plot(out.mc, type = "equiv", ylim = c(-4,4), From the above plots, we see that FEct fails both tests; IFEct passes both tests using a conventional test size (5%); and MC fails the F tests, but passes the TOST (equivalence) test. Hence, we may conclude that IFEct is a more suitable model. -#### Leave-one-out pre-trend test +### LOO pre-trend test -Instead of using estimated ATTs for periods prior to the treatment to test for pre-trends, we can use a leave-one-out (LOO) approach (`loo = TRUE`) to consecutively hide one pretreatment period (relative to the timing of the treatment) and repeatedly estimate the pseudo treatment effects for that pretreatment period. The LOO approach can be understood as an extension of the placebo test. It has the benefit of providing users with a more holistic view of whether the identifying assumptions likely hold. However, as the program needs to conduct uncertainty estimates for each turn, it is much more time-consuming than the original one. +Instead of using estimated ATTs for periods prior to the treatment to test for pre-trends, we recommend users employ a leave-one-out (LOO) approach (`loo = TRUE`) to consecutively hide one pretreatment period (relative to the timing of the treatment) and repeatedly estimate the pseudo treatment effects for that pretreatment period. The LOO approach can be understood as an extension of the placebo test. It has the benefit of providing users with a more holistic view of whether the identifying assumptions likely hold. However, as the program needs to conduct uncertainty estimates for each turn, it is much more time-consuming than the original one. ```{r simdata_fect_loo, eval=FALSE, cache = TRUE, message = FALSE, results = 'hide'} out.fect.loo <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), @@ -386,7 +450,7 @@ In the above plot, the three periods in blue are droppred from the first-stage e ------------------------------------------------------------------------ -## Cumulative Effects +## Cumulative effects Users can use `effect()` to calculate cumulative treatment effects. The behavior of `effect()` is similar to the function of the same name in `gsynth`. - Calculation of cumulative effects will need unit-time level bootstrap results. Choose the option `keep.sims=TRUE` to record them. @@ -399,6 +463,7 @@ cumu.out <- effect(out) ``` Print and plot cumulative effects + ```{r effect.plot, cache = TRUE} print(cumu.out) plot(cumu.out) @@ -560,10 +625,9 @@ plot(out.ife.g, show.group = "Cohort:22", xlim = c(-15, 10), ylim = c(-10, 10)) ``` - ------------------------------------------------------------------------ -## Additional Notes +## Additional notes 1. By default, the program will drop the units that have no larger than 5 observations under control, which is the reason why sometimes there are less available units in the placebo test or carryover test than in the original estimation. We can specify a preferred criteria in the option `min.T0` (default to 5). As a rule of thumb for the IFE estimator, the minimum number of observations under control for a unit should be larger than the specified number of factor `r`. diff --git a/vignettes/03-plots.Rmd b/vignettes/03-plots.Rmd index 92a5700..faa361d 100644 --- a/vignettes/03-plots.Rmd +++ b/vignettes/03-plots.Rmd @@ -30,7 +30,7 @@ While these customization options are demonstrated using the default `gap` plot, We will be using two datasets in this chapter. As explained in @sec-panel, @GS2020 examines the mobilizing effect of minority candidates on coethnic support in U.S. congressional elections. The treatment variable indicates the presence of an Asian candidate, and the outcome variable represents the proportion of general election contributions from Asian donors. @HH2019 study the effects of indirect democracy versus direct democracy (treatment) on naturalization rates (outcome) in Switzerland using municipality-year panel data from 1991 to 2009. -First, we load the required packages. The datasets, `hh2019` and `gs2020`, are included with the **fect** package and can be loaded using `data(fect)`. +First, we load the required packages. The datasets, `hh2019` and `gs2020`, are included with the **fect** package and can be loaded using `data(fect)`. ```{r load, message=FALSE} # load libraries and data @@ -119,6 +119,7 @@ plot(out, cex.text = 1.2, # Annotation text size main = "Text and Theme Customization") ``` + ### Presets For convenience, we can use the `preset` argument to apply preset colors. The default is `"default"`, which is mostly black and white with a bit of color. Other options include `"vibrant"` and `"grayscale"`, which can be used to create more colorful or monochromatic plots, respectively. @@ -132,7 +133,8 @@ plot(out.hh, main = "Vibrant Preset Colors: Hainmueller and Hangartner (2019)") ``` -We can change the color of the estimates (and their confidence intervals) using the `color` option. +We can change the color of the estimates (and their confidence intervals) using the `color` option. + ```{r preset-vibrant2} plot(out.hh, preset = "vibrant", # Use vibrant colors @@ -159,7 +161,7 @@ plot(out, ) ``` -Moreover, in any plot that uses a shaded band to represent the CIs, we can set `ci.outline` to `TRUE` to draw an outline around the shaded band to improve visibility. +Moreover, in any plot that uses a shaded band to represent the CIs, we can set `ci.outline` to `TRUE` to draw an outline around the shaded band to improve visibility. ```{r ci-outline} plot(out, @@ -382,7 +384,9 @@ plot(out_fe_carryover) ``` ## Status Plot -The status plot (`type = "status"`) displays the treatment status by period for all units in a similar fashion to `panelView`. Each of the indicator colors can be customized using the `status.*.color` options. + +The status plot (`type = "status"`) displays the treatment status by period for all units in a similar fashion to `panelView`. Each of the indicator colors can be customized using the `status.*.color` options. + ```{r status} plot(out_fe_carryover, type = "status", status.treat.color = "#D55E00", # Color for treated units diff --git a/vignettes/05-panel.Rmd b/vignettes/05-panel.Rmd index c2877f0..e40e5ee 100644 --- a/vignettes/05-panel.Rmd +++ b/vignettes/05-panel.Rmd @@ -6,7 +6,9 @@ editor: # New DID Methods {#sec-panel} -This chapter, authored by Ziyi Liu and Yiqing Xu, complements @CLLX2025 ([paper](https://yiqingxu.org/papers/english/2023_panel/CLLX.pdf), [slides](https://yiqingxu.org/papers/english/2023_panel/CLLX_slides.pdf)). Rivka Lipkovitz also contributes to this tutorial. Download the R code used in this chapter [here](rscript/05-panel.R). +This chapter, authored by Ziyi Liu and Yiqing Xu, complements @CLLX2025 ([paper](https://yiqingxu.org/papers/english/2023_panel/CLLX.pdf), [slides](https://yiqingxu.org/papers/english/2023_panel/CLLX_slides.pdf)). +Rivka Lipkovitz also contributes to this tutorial. +Download the R code used in this chapter [here](rscript/05-panel.R). ------------------------------------------------------------------------ @@ -656,9 +658,6 @@ p.fect.balance <- esplot(fect.balance.output,Period = 'Time', p.fect.balance ``` - - - ## With Treatment Reversals We use data from @GS2020 to demonstrate methods that can accommodate treatment reversals, where treatment can switch on and off. diff --git a/vignettes/06-sens.Rmd b/vignettes/06-sens.Rmd index 025b419..8ff2aba 100644 --- a/vignettes/06-sens.Rmd +++ b/vignettes/06-sens.Rmd @@ -85,7 +85,7 @@ out.fect.placebo <- fect_sens( periodMbarvec = Mbar_vec_period_rm, Mvec = M_vec_avg_smooth, periodMvec = M_vec_period_smooth, - parallel = FALSE # Set to TRUE for parallel processing if desired + parallel = TRUE # Set to TRUE for parallel processing if desired ) ``` diff --git a/vignettes/07-cheetsheet.Rmd b/vignettes/07-cheetsheet.Rmd index 9bcf688..425236b 100644 --- a/vignettes/07-cheetsheet.Rmd +++ b/vignettes/07-cheetsheet.Rmd @@ -22,17 +22,19 @@ In this chapter, we provide an overview of the main functionalities of the **fec | **Type** | **Description** | **Applicable Methods** | |-------------------|------------------------------|-----------------------| | **box** | Box plot of ATT by period. | fe, ife, mc, gsynth, polynomial, cfe | -| **calendar** | ATT by calendar time. | fe, ife, mc, gsynth, polynomial, cfe | -| **counterfactual** | Observed vs. imputed outcome for treated units. | fe, ife, mc, gsynth | +| **calendar** | CATT by calendar time. | fe, ife, mc, gsynth, polynomial, cfe | +| **heterogenous** or **hte** | CATT by a covariate. | fe, ife, mc, gsynth, polynomial, cfe | +| **counterfactual** or **ct** | Observed vs. imputed outcome for treated units. | fe, ife, mc, gsynth | | **equiv** | Pretreatment residuals with equivalence intervals. | fe, ife, mc, gsynth, polynomial, cfe | | **exit** | Period-wise ATT relative to treatment exit. | fe, ife, mc, polynomial, cfe | | **factors** | Estimated factors (factor-based methods). | ife, gsynth | -| **gap** | ATT by pre- and post-treatment periods. | fe, ife, mc, gsynth, polynomial, cfe | +| **gap** or **es** | Event-study plot: ATT by pre- and post-treatment periods. | fe, ife, mc, gsynth, polynomial, cfe | | **loadings** | Estimated factor loadings (factor-based methods). | ife, gsynth | | **status** | Treatment status by period for all units. | fe, ife, mc, gsynth, polynomial, cfe | | **sens** | Rambachan & Roth (2023) sensitivity analysis for treatment effects. | `fect_sens()` function applied to fe, ife, mc, gsynth, polynomial, cfe | | **sens_es** | Event-study sensitivity analysis for treatment effects. | `fect_sens()` function applied to fe, ife, mc, gsynth, polynomial, cfe | | **cumul** | Cumulative treatment effects over time. | `effect()` function applied to fe, ife, mc, gsynth, polynomial, cfe | + ## Tests as Options *Inputs for* `fect(...)` diff --git a/vignettes/_book/rscript/02-fect.R b/vignettes/_book/rscript/02-fect.R index 8db9066..9909541 100644 --- a/vignettes/_book/rscript/02-fect.R +++ b/vignettes/_book/rscript/02-fect.R @@ -62,9 +62,13 @@ out.fect$est.att out.fect$est.avg out.fect$est.beta +out.fect.loo <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), + method = "fe", force = "two-way", se = TRUE, loo = TRUE, + cores = 8, parallel = TRUE, nboots = 200) + out.ife <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), force = "two-way", method = "ife", CV = TRUE, r = c(0, 5), - se = TRUE, nboots = 200, parallel = TRUE) + se = TRUE, nboots = 1000, cores = 8, parallel = TRUE) plot(out.ife, main = "Estimated ATT (IFEct)") @@ -90,10 +94,29 @@ out.w <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), plot(out.w, main = "Estimated Weighted ATT") +## by calendar time plot(out.ife, type = "box", xlim = c(-15, 10)) plot(out.ife, type = "calendar", xlim = c(1, 35)) +## by a covariate +plot(out.ife, type = "hte", covariate = "X1") + +plot(out.ife, type = "heterogeneous", covariate = "X1", show.count = FALSE, xlab = "X1") + + +simdata$X3 <- sample(1:3, size = nrow(simdata), replace = TRUE) +out.ife.X3 <- fect(Y ~ D + X1 + X2 + X3, data = simdata, index = c("id","time"), + method = "ife", r = 2, se = TRUE, seed = 123, + cores = 8, nboots = 1000, parallel = TRUE) + +plot(out.ife.X3, type="hte", covariate = "X3", + xlab = "", ylab = "Effet of D on Y", + covariate.labels = c("USA", "China", "UK"), + ylim = c(-2, 6)) + + +# Diagnostic tests out.fect.p <- fect(Y ~ D + X1 + X2, data = simdata, index = c("id", "time"), force = "two-way", parallel = TRUE, se = TRUE, CV = 0, nboots = 200, placeboTest = TRUE, placebo.period = c(-2, 0)) diff --git a/vignettes/_book/rscript/06-sens.R b/vignettes/_book/rscript/06-sens.R index b43e431..1ce6ead 100644 --- a/vignettes/_book/rscript/06-sens.R +++ b/vignettes/_book/rscript/06-sens.R @@ -77,7 +77,7 @@ out.fect.placebo <- fect_sens( periodMbarvec = Mbar_vec_period_rm, Mvec = M_vec_avg_smooth, periodMvec = M_vec_period_smooth, - parallel = FALSE # Set to TRUE for parallel processing if desired + parallel = TRUE # Set to TRUE for parallel processing if desired ) plot(out.fect.placebo, diff --git a/vignettes/index.qmd b/vignettes/index.qmd index deaa92c..8855691 100644 --- a/vignettes/index.qmd +++ b/vignettes/index.qmd @@ -4,11 +4,11 @@ This Quarto book serves as a user manual for the **fect** package in R, which im **fect** covers a series of counterfactual estimators, including the five estimators from the last version and integrating the latest version of the **gsynth** package for the generalized synthetic control method (Gsynth). This Quarto book also facilitates the application of various new difference-in-differences (DID) estimators. For details of these methods, see -- @Xu2017 for Gsynth [Paper] +- @Xu2017 for Gsynth \[Paper\] -- @LWX2022 for counterfactual estimators [Paper] +- @LWX2022 for counterfactual estimators \[Paper\] -- @CLLX2025 for a survey of the new DID estimators [Paper] +- @CLLX2025 for a survey of the new DID estimators \[Paper\] ::: {.callout-note appearance="simple"} ### Source Code @@ -65,9 +65,9 @@ The user guide is structured into the following chapters: - [Chapter @sec-panel]\ This chapter facilitates the application of various new DID estimators. - + - [Chapter @sec-panel-sens]\ - This chapter introduces the sensitivity analysis for the counterfactual estimators. + This chapter introduces the sensitivity analysis for the counterfactual estimators. - [Chapter @sec-cheatsheet]\ The final chapter summarizes the core inputs required for implementing the six methods, along with options for plotting and diagnostics. diff --git a/vignettes/references.bib b/vignettes/references.bib index 5657647..160d043 100644 --- a/vignettes/references.bib +++ b/vignettes/references.bib @@ -6,6 +6,13 @@ @article{CLLX2025 year={2025} } +@article{li2025benchmarking, + title={Benchmarking parallel trends violations in regression imputation difference-in-differences}, + author={Li, Zikai and Strezhnev, Anton}, + year={2025}, + publisher={OSF} +} + @ARTICLE{LWX2022, title = "A Practical Guide to Counterfactual Estimators for Causal Inference with Time-Series Cross-Sectional Data", author = "Liu, Licheng and Wang, Ye and Xu, Yiqing",