|
| 1 | +#!/usr/bin/env Rscript |
| 2 | +# Generate a fixture pinning R's `synthdid::vcov(method="placebo")` SE plus |
| 3 | +# the per-replication permutations R consumed, so the Python R-parity test |
| 4 | +# can feed those exact permutations through `_placebo_variance_se` and |
| 5 | +# assert SE match at machine precision. |
| 6 | +# |
| 7 | +# Usage: |
| 8 | +# Rscript benchmarks/R/generate_sdid_placebo_parity_fixture.R |
| 9 | +# |
| 10 | +# Output: |
| 11 | +# tests/data/sdid_placebo_indices_r.json |
| 12 | +# |
| 13 | +# Symmetric with the existing jackknife R-parity test |
| 14 | +# (TestJackknifeSERParity in tests/test_methodology_sdid.py:1410). Reuses |
| 15 | +# the same Y matrix and (N0, N1, T0, T1) shape so the placebo + jackknife |
| 16 | +# parity tests share an anchor panel. |
| 17 | +# |
| 18 | +# R version: 4.5.2; synthdid version: 0.0.9. |
| 19 | + |
| 20 | +library(synthdid) |
| 21 | +library(jsonlite) |
| 22 | + |
| 23 | +# Reconstruct R's panel exactly as TestJackknifeSERParity does (set.seed(42), |
| 24 | +# 23 units × 8 periods, treated = i > N0 with effect 5 in t > T0). |
| 25 | +set.seed(42) |
| 26 | +N0 <- 20 |
| 27 | +N1 <- 3 |
| 28 | +T0 <- 5 |
| 29 | +T1 <- 3 |
| 30 | +N <- N0 + N1 |
| 31 | +T <- T0 + T1 |
| 32 | +Y <- matrix(0, nrow = N, ncol = T) |
| 33 | +for (i in 1:N) { |
| 34 | + unit_fe <- rnorm(1, sd = 2) |
| 35 | + for (t in 1:T) { |
| 36 | + Y[i, t] <- 10 + unit_fe + (t - 1) * 0.3 + rnorm(1, sd = 0.5) |
| 37 | + if (i > N0 && t > T0) Y[i, t] <- Y[i, t] + 5.0 |
| 38 | + } |
| 39 | +} |
| 40 | + |
| 41 | +# Fit-time ATT (sanity check — must match TestJackknifeSERParity.R_ATT). |
| 42 | +tau_hat <- synthdid_estimate(Y, N0, T0) |
| 43 | +r_att <- as.numeric(tau_hat) |
| 44 | + |
| 45 | +# Reproduce R's placebo_se loop exactly so we can record permutations and |
| 46 | +# the per-rep tau alongside the resulting SE. Mirrors `synthdid:::placebo_se` |
| 47 | +# (R/vcov.R), including the warm-start weights pass-through: |
| 48 | +# |
| 49 | +# theta = function(ind) { |
| 50 | +# N0 = length(ind) - N1 |
| 51 | +# weights.boot = weights |
| 52 | +# weights.boot$omega = sum_normalize(weights$omega[ind[1:N0]]) |
| 53 | +# do.call(synthdid_estimate, c(list(Y = setup$Y[ind, ], |
| 54 | +# N0 = N0, T0 = setup$T0, X = setup$X[ind, , ], |
| 55 | +# weights = weights.boot), opts)) |
| 56 | +# } |
| 57 | +# |
| 58 | +# The warm-start `weights.boot$omega` differs from a fresh uniform init |
| 59 | +# at finite FW iterations and is what `vcov(method="placebo")` actually |
| 60 | +# consumes — so reproducing it here is required for bit-identical SE. |
| 61 | +opts_used <- attr(tau_hat, "opts") |
| 62 | +fit_weights <- attr(tau_hat, "weights") |
| 63 | +fit_setup <- attr(tau_hat, "setup") |
| 64 | +replications <- 200 |
| 65 | + |
| 66 | +# Use a fresh seed for the placebo loop so the recorded permutations are |
| 67 | +# independent of the fit-time RNG state. Python consumes the recorded |
| 68 | +# permutations directly (no RNG-state matching needed). |
| 69 | +set.seed(42) |
| 70 | +perms <- vector("list", replications) |
| 71 | +taus <- numeric(replications) |
| 72 | + |
| 73 | +for (r in 1:replications) { |
| 74 | + ind <- sample(1:N0, N0) |
| 75 | + perms[[r]] <- ind |
| 76 | + N0_placebo <- N0 - N1 |
| 77 | + weights_boot <- fit_weights |
| 78 | + weights_boot$omega <- synthdid:::sum_normalize(fit_weights$omega[ind[1:N0_placebo]]) |
| 79 | + # IMPORTANT: R's `placebo_se` uses ONLY the N0 controls (subdivided into |
| 80 | + # N0-N1 pseudo-controls + N1 pseudo-treated). Real treated rows are NOT |
| 81 | + # included in the placebo Y matrix — that's what makes the placebo a |
| 82 | + # null-distribution test. ``Y = setup$Y[ind, ]`` is N0 rows; appending |
| 83 | + # the real treated rows (i.e., ``setup$Y[c(ind, (N0+1):N), ]``) would |
| 84 | + # change the test entirely (and produces SE ~0.132 instead of R's 0.226 |
| 85 | + # — a 2× drift on this fixture). |
| 86 | + est_placebo <- do.call( |
| 87 | + synthdid_estimate, |
| 88 | + c(list( |
| 89 | + Y = fit_setup$Y[ind, ], |
| 90 | + N0 = N0_placebo, |
| 91 | + T0 = T0, |
| 92 | + X = fit_setup$X[ind, , ], |
| 93 | + weights = weights_boot |
| 94 | + ), opts_used) |
| 95 | + ) |
| 96 | + taus[r] <- as.numeric(est_placebo) |
| 97 | +} |
| 98 | + |
| 99 | +r_placebo_se <- sqrt((replications - 1) / replications) * sd(taus) |
| 100 | + |
| 101 | +# Sanity check against R's vcov() entry point. With the warm-start pattern |
| 102 | +# applied explicitly above, the manual loop and `vcov()` should produce |
| 103 | +# the same SE up to MC noise on the seed sequence. Match isn't required |
| 104 | +# for the parity test (we use `r_placebo_se` from our recorded |
| 105 | +# permutations); both values are kept for transparency. |
| 106 | +set.seed(42) |
| 107 | +r_placebo_se_via_vcov <- sqrt(vcov(tau_hat, method = "placebo", replications = replications)[1, 1]) |
| 108 | + |
| 109 | +cat(sprintf("R ATT: %.15f\n", r_att)) |
| 110 | +cat(sprintf("R placebo SE (manual loop): %.15f\n", r_placebo_se)) |
| 111 | +cat(sprintf("R placebo SE (via vcov): %.15f\n", r_placebo_se_via_vcov)) |
| 112 | +cat(sprintf("Replications: %d\n", replications)) |
| 113 | + |
| 114 | +# Convert permutations to 0-indexed for Python (R uses 1-indexed). |
| 115 | +perms_0indexed <- lapply(perms, function(p) as.integer(p - 1L)) |
| 116 | + |
| 117 | +payload <- list( |
| 118 | + metadata = list( |
| 119 | + R_version = paste(R.version$major, R.version$minor, sep = "."), |
| 120 | + synthdid_version = as.character(packageVersion("synthdid")), |
| 121 | + seed = 42L, |
| 122 | + replications = as.integer(replications), |
| 123 | + note = paste( |
| 124 | + "Permutations are 0-indexed for direct numpy consumption.", |
| 125 | + "R ATT, R placebo SE (manual loop), and per-rep taus are pinned", |
| 126 | + "for downstream Python parity assertion." |
| 127 | + ) |
| 128 | + ), |
| 129 | + N0 = as.integer(N0), |
| 130 | + N1 = as.integer(N1), |
| 131 | + T0 = as.integer(T0), |
| 132 | + T1 = as.integer(T1), |
| 133 | + R_ATT = r_att, |
| 134 | + R_PLACEBO_SE = r_placebo_se, |
| 135 | + R_PLACEBO_SE_VIA_VCOV = r_placebo_se_via_vcov, |
| 136 | + R_PLACEBO_TAUS = as.numeric(taus), |
| 137 | + R_PERMUTATIONS = perms_0indexed |
| 138 | +) |
| 139 | + |
| 140 | +out_path <- "tests/data/sdid_placebo_indices_r.json" |
| 141 | +write_json(payload, out_path, auto_unbox = TRUE, digits = 17) |
| 142 | +cat(sprintf("\nWrote %s\n", out_path)) |
0 commit comments