From d598feddf5738c447a7d78145017291cc451c4eb Mon Sep 17 00:00:00 2001 From: igerber Date: Tue, 7 Apr 2026 09:11:39 -0400 Subject: [PATCH] v3.0.0: Remove deprecated params, version bump, docs refresh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaking changes: - Remove CallawaySantAnna bootstrap_weight_type param (use bootstrap_weights) - Remove TROP method="twostep" alias (use method="local") - Remove TROP method="joint" alias (use method="global") Also: - Rename internal bootstrap_weight_type attr to bootstrap_weights in mixin - Add TROP set_params() method validation - Schedule SyntheticDiD lambda_reg/zeta removal for v3.1 - Bump version 2.9.1 → 3.0.0 across all 5 locations - Update ROADMAP Phase 10 items as shipped - Remove stale WooldridgeDiD survey gap notes from all docs - Add survey_aggregate() future item to ROADMAP Co-Authored-By: Claude Opus 4.6 (1M context) --- CHANGELOG.md | 24 + ROADMAP.md | 31 +- TODO.md | 6 +- diff_diff/__init__.py | 2 +- diff_diff/staggered.py | 21 +- diff_diff/staggered_bootstrap.py | 8 +- diff_diff/staggered_triple_diff.py | 3 +- diff_diff/synthetic_did.py | 7 +- diff_diff/trop.py | 41 +- .../diff_diff.CallawaySantAnna.rst | 2 +- docs/api/trop.rst | 4 - docs/llms-full.txt | 10 +- docs/llms.txt | 4 +- docs/methodology/REGISTRY.md | 5 +- docs/survey-roadmap.md | 49 +- docs/troubleshooting.rst | 43 - docs/tutorials/10_trop.ipynb | 997 +++++++++++++++++- pyproject.toml | 2 +- rust/Cargo.toml | 2 +- tests/test_methodology_callaway.py | 33 - tests/test_staggered.py | 7 +- tests/test_staggered_triple_diff.py | 4 +- tests/test_trop.py | 30 +- 23 files changed, 1065 insertions(+), 270 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a959ba01..d7b76b9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [3.0.0] - 2026-04-07 + +v3.0 completes the survey support roadmap: all 16 estimators (15 inference-level + +BaconDecomposition diagnostic) now accept `survey_design`. See v2.8.0–v2.9.1 entries +for the full feature history leading to this release. + +### Breaking Changes +- **Remove `bootstrap_weight_type` parameter** from CallawaySantAnna — use `bootstrap_weights` instead (deprecated since v1.0.1) +- **Remove TROP `method="twostep"` alias** — use `method="local"` (deprecated since v2.7.2) +- **Remove TROP `method="joint"` alias** — use `method="global"` (deprecated since v2.7.2) + +### Upgrading from v2.x +- `CallawaySantAnna(bootstrap_weight_type="mammen")` → `CallawaySantAnna(bootstrap_weights="mammen")` +- `TROP(method="twostep")` → `TROP(method="local")` +- `TROP(method="joint")` → `TROP(method="global")` + +### Deprecated +- SyntheticDiD `lambda_reg` and `zeta` parameters formally scheduled for removal in v3.1 — use `zeta_omega`/`zeta_lambda` instead + +### Changed +- Internal attribute `bootstrap_weight_type` renamed to `bootstrap_weights` in bootstrap mixin and StaggeredTripleDifference for consistency +- TROP `set_params()` now validates `method` against `("local", "global")` — previously only validated in `__init__` +- Documentation updated: all survey gap notes for WooldridgeDiD removed, ROADMAP Phase 10 items marked shipped + ## [2.9.1] - 2026-04-06 ### Added diff --git a/ROADMAP.md b/ROADMAP.md index 495160df..39f14e3a 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -6,7 +6,7 @@ For past changes and release history, see [CHANGELOG.md](CHANGELOG.md). --- -## Current Status (v2.9.0) +## Current Status (v3.0) diff-diff is a **production-ready** DiD library with feature parity with R's `did` + `HonestDiD` + `synthdid` ecosystem for core DiD analysis, plus **unique survey support** that no R or Python package matches. @@ -28,10 +28,10 @@ diff-diff is a **production-ready** DiD library with feature parity with R's `di ### Survey Support -`SurveyDesign` with strata, PSU, FPC, weight types (pweight/fweight/aweight), lonely PSU handling. 15 of 16 estimators accept `survey_design` (WooldridgeDiD support planned for Phase 10f); design-based variance estimation varies by estimator: +`SurveyDesign` with strata, PSU, FPC, weight types (pweight/fweight/aweight), lonely PSU handling. All 16 estimators accept `survey_design` (15 inference-level + BaconDecomposition diagnostic); design-based variance estimation varies by estimator: - **TSL variance** (Taylor Series Linearization) with strata + PSU + FPC -- **Replicate weights**: BRR, Fay's BRR, JK1, JKn, SDR — 12 of 16 estimators (not SyntheticDiD, TROP, BaconDecomposition, or WooldridgeDiD) +- **Replicate weights**: BRR, Fay's BRR, JK1, JKn, SDR — 12 of 16 estimators (not SyntheticDiD, TROP, BaconDecomposition, WooldridgeDiD) - **Survey-aware bootstrap**: multiplier at PSU (IF-based) and Rao-Wu rescaled (resampling-based) - **DEFF diagnostics**, **subpopulation analysis**, **weight trimming**, **CV on estimates** - **Repeated cross-sections**: `CallawaySantAnna(panel=False)` for BRFSS, ACS, CPS @@ -39,8 +39,6 @@ diff-diff is a **production-ready** DiD library with feature parity with R's `di See [Survey Design Support](docs/choosing_estimator.rst#survey-design-support) for the full compatibility matrix, and [survey-roadmap.md](docs/survey-roadmap.md) for implementation details. -**Gap**: WooldridgeDiD does not yet accept `survey_design`. Planned for Phase 10f. - ### Infrastructure - Optional Rust backend for accelerated computation @@ -50,24 +48,29 @@ See [Survey Design Support](docs/choosing_estimator.rst#survey-design-support) f --- -## Active Work: Survey Academic Credibility (Phase 10) +## Survey Academic Credibility (Phase 10) -Before broadly announcing survey capability, we are establishing the theoretical -and empirical foundation needed for credibility with practitioners and -methodologists. See [survey-roadmap.md](docs/survey-roadmap.md) for detailed specs. +Phase 10 established the theoretical and empirical foundation for survey support +credibility. See [survey-roadmap.md](docs/survey-roadmap.md) for detailed specs. | Item | Priority | Status | |------|----------|--------| -| **10a.** Theory document (`survey-theory.md`) | HIGH | Not started | -| **10b.** Research-grade survey DGP (enhance `generate_survey_did_data`) | HIGH | Not started | -| **10c.** Expand R validation (ImputationDiD, StackedDiD, SunAbraham, TripleDifference) | HIGH | Not started | -| **10d.** Tutorial: flat-weight vs design-based comparison | HIGH | Not started — depends on 10b | +| **10a.** Theory document (`survey-theory.md`) | HIGH | ✅ Shipped (v2.9.1) | +| **10b.** Research-grade survey DGP (enhance `generate_survey_did_data`) | HIGH | ✅ Shipped (v2.9.1) | +| **10c.** Expand R validation (ImputationDiD, StackedDiD, SunAbraham, TripleDifference) | HIGH | ✅ Shipped (v2.9.1) | +| **10d.** Tutorial: flat-weight vs design-based comparison | HIGH | ✅ Shipped (v2.9.1) | | **10e.** Position paper / arXiv preprint | MEDIUM | Not started — depends on 10b | -| **10f.** WooldridgeDiD survey support (OLS + logit + Poisson) | MEDIUM | Not started | +| **10f.** WooldridgeDiD survey support (OLS + logit + Poisson) | MEDIUM | ✅ Shipped (v2.9.0) | | **10g.** Practitioner guidance: when does survey design matter? | LOW | Not started | --- +## Future: Survey Aggregation Helper + +**`survey_aggregate()` helper function** for the microdata-to-panel workflow. Bridges individual-level survey data (BRFSS, ACS, CPS) collected as repeated cross-sections to geographic-level (state, city) panel DiD. Computes design-based cell means and precision weights that estimators can consume directly. + +--- + ## Future Estimators ### de Chaisemartin-D'Haultfouille Estimator diff --git a/TODO.md b/TODO.md index 5afc6274..f1c649a6 100644 --- a/TODO.md +++ b/TODO.md @@ -116,9 +116,9 @@ Mypy reports 0 errors. All mixin `attr-defined` errors resolved via Deprecated parameters still present for backward compatibility: -- `bootstrap_weight_type` in `CallawaySantAnna` (`staggered.py`) - - Deprecated in favor of `bootstrap_weights` parameter - - Remove in next major version (v3.0) +- `lambda_reg` and `zeta` in `SyntheticDiD` (`synthetic_did.py`) + - Deprecated in favor of `zeta_omega`/`zeta_lambda` parameters + - Remove in v3.1 --- diff --git a/diff_diff/__init__.py b/diff_diff/__init__.py index f20be546..148aa805 100644 --- a/diff_diff/__init__.py +++ b/diff_diff/__init__.py @@ -214,7 +214,7 @@ EDiD = EfficientDiD ETWFE = WooldridgeDiD -__version__ = "2.9.1" +__version__ = "3.0.0" __all__ = [ # Estimators "DifferenceInDifferences", diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py index f4e63cae..2004f2e7 100644 --- a/diff_diff/staggered.py +++ b/diff_diff/staggered.py @@ -153,9 +153,6 @@ class CallawaySantAnna( - "rademacher": +1/-1 with equal probability (standard choice) - "mammen": Two-point distribution (asymptotically valid, matches skewness) - "webb": Six-point distribution (recommended when n_clusters < 20) - bootstrap_weight_type : str, optional - .. deprecated:: 1.0.1 - Use ``bootstrap_weights`` instead. Will be removed in v3.0. seed : int, optional Random seed for reproducibility. rank_deficient_action : str, default="warn" @@ -293,7 +290,6 @@ def __init__( cluster: Optional[str] = None, n_bootstrap: int = 0, bootstrap_weights: Optional[str] = None, - bootstrap_weight_type: Optional[str] = None, seed: Optional[int] = None, rank_deficient_action: str = "warn", base_period: str = "varying", @@ -323,18 +319,7 @@ def __init__( f"pscore_fallback must be 'error' or 'unconditional', " f"got '{pscore_fallback}'" ) - # Handle bootstrap_weight_type deprecation - if bootstrap_weight_type is not None: - warnings.warn( - "bootstrap_weight_type is deprecated and will be removed in v3.0. " - "Use bootstrap_weights instead.", - DeprecationWarning, - stacklevel=2, - ) - if bootstrap_weights is None: - bootstrap_weights = bootstrap_weight_type - - # Default to rademacher if neither specified + # Default to rademacher if not specified if bootstrap_weights is None: bootstrap_weights = "rademacher" @@ -362,8 +347,6 @@ def __init__( self.cluster = cluster self.n_bootstrap = n_bootstrap self.bootstrap_weights = bootstrap_weights - # Keep bootstrap_weight_type for backward compatibility - self.bootstrap_weight_type = bootstrap_weights self.seed = seed self.rank_deficient_action = rank_deficient_action self.base_period = base_period @@ -3881,8 +3864,6 @@ def get_params(self) -> Dict[str, Any]: "cluster": self.cluster, "n_bootstrap": self.n_bootstrap, "bootstrap_weights": self.bootstrap_weights, - # Deprecated but kept for backward compatibility - "bootstrap_weight_type": self.bootstrap_weight_type, "seed": self.seed, "rank_deficient_action": self.rank_deficient_action, "base_period": self.base_period, diff --git a/diff_diff/staggered_bootstrap.py b/diff_diff/staggered_bootstrap.py index 2b9095ff..10f9549f 100644 --- a/diff_diff/staggered_bootstrap.py +++ b/diff_diff/staggered_bootstrap.py @@ -118,7 +118,7 @@ class CallawaySantAnnaBootstrapMixin: # Type hints for attributes accessed from the main class n_bootstrap: int - bootstrap_weight_type: str + bootstrap_weights: str alpha: float seed: Optional[int] anticipation: int @@ -329,7 +329,7 @@ def _run_multiplier_bootstrap( if _use_survey_bootstrap: # PSU-level multiplier weights psu_weights, psu_ids = _generate_survey_multiplier_weights_batch( - self.n_bootstrap, resolved_survey_unit, self.bootstrap_weight_type, rng + self.n_bootstrap, resolved_survey_unit, self.bootstrap_weights, rng ) # Build unit → PSU column map if resolved_survey_unit.psu is not None: @@ -348,7 +348,7 @@ def _run_multiplier_bootstrap( else: # Standard unit-level weights (no survey or weights-only) all_bootstrap_weights = _generate_bootstrap_weights_batch( - self.n_bootstrap, n_units, self.bootstrap_weight_type, rng + self.n_bootstrap, n_units, self.bootstrap_weights, rng ) # Vectorized bootstrap ATT(g,t) computation @@ -534,7 +534,7 @@ def _run_multiplier_bootstrap( return CSBootstrapResults( n_bootstrap=self.n_bootstrap, - weight_type=self.bootstrap_weight_type, + weight_type=self.bootstrap_weights, alpha=self.alpha, overall_att_se=overall_se, overall_att_ci=overall_ci, diff --git a/diff_diff/staggered_triple_diff.py b/diff_diff/staggered_triple_diff.py index fa04185c..758d518b 100644 --- a/diff_diff/staggered_triple_diff.py +++ b/diff_diff/staggered_triple_diff.py @@ -147,7 +147,6 @@ def __init__( self.base_period = base_period self.n_bootstrap = n_bootstrap self.bootstrap_weights = bootstrap_weights - self.bootstrap_weight_type = bootstrap_weights self.seed = seed self.cband = cband self.pscore_trim = pscore_trim @@ -186,7 +185,7 @@ def set_params(self, **params) -> "StaggeredTripleDifference": raise ValueError(f"Unknown parameter: {key}") setattr(self, key, value) if "bootstrap_weights" in params: - self.bootstrap_weight_type = params["bootstrap_weights"] + self.bootstrap_weights = params["bootstrap_weights"] return self # ------------------------------------------------------------------ diff --git a/diff_diff/synthetic_did.py b/diff_diff/synthetic_did.py index 43f10460..56d5f029 100644 --- a/diff_diff/synthetic_did.py +++ b/diff_diff/synthetic_did.py @@ -144,14 +144,14 @@ def __init__( warnings.warn( "lambda_reg is deprecated and ignored. Regularization is now " "auto-computed from data. Use zeta_omega to override unit weight " - "regularization.", + "regularization. Will be removed in v3.1.", DeprecationWarning, stacklevel=2, ) if zeta is not None: warnings.warn( "zeta is deprecated and ignored. Use zeta_lambda to override " - "time weight regularization.", + "time weight regularization. Will be removed in v3.1.", DeprecationWarning, stacklevel=2, ) @@ -1124,7 +1124,8 @@ def set_params(self, **params) -> "SyntheticDiD": for key, value in params.items(): if key in _deprecated: warnings.warn( - f"{key} is deprecated and ignored. Use zeta_omega/zeta_lambda " f"instead.", + f"{key} is deprecated and ignored. Use zeta_omega/zeta_lambda " + f"instead. Will be removed in v3.1.", DeprecationWarning, stacklevel=2, ) diff --git a/diff_diff/trop.py b/diff_diff/trop.py index cbe56f77..d06ec96c 100644 --- a/diff_diff/trop.py +++ b/diff_diff/trop.py @@ -77,10 +77,6 @@ class TROP(TROPLocalMixin, TROPGlobalMixin): ATT is the mean of these effects. For the paper's full per-treated-cell estimator, use ``method='local'``. - - 'twostep': Deprecated alias for 'local'. Will be removed in v3.0. - - - 'joint': Deprecated alias for 'global'. Will be removed in v3.0. - lambda_time_grid : list, optional Grid of time weight decay parameters. 0.0 = uniform weights (disabled). Must not contain inf. Default: [0, 0.1, 0.5, 1, 2, 5]. @@ -140,26 +136,9 @@ def __init__( seed: Optional[int] = None, ): # Validate method parameter - # 'local'/'global' are preferred; 'twostep'/'joint' are deprecated aliases - valid_methods = ("local", "twostep", "joint", "global") + valid_methods = ("local", "global") if method not in valid_methods: raise ValueError(f"method must be one of {valid_methods}, got '{method}'") - if method == "twostep": - warnings.warn( - "method='twostep' is deprecated and will be removed in v3.0. " - "Use method='local' instead.", - FutureWarning, - stacklevel=2, - ) - method = "local" - if method == "joint": - warnings.warn( - "method='joint' is deprecated and will be removed in v3.0. " - "Use method='global' instead.", - FutureWarning, - stacklevel=2, - ) - method = "global" self.method = method # Default grids from paper @@ -913,22 +892,10 @@ def get_params(self) -> Dict[str, Any]: def set_params(self, **params) -> "TROP": """Set estimator parameters.""" for key, value in params.items(): - if key == "method" and value == "twostep": - warnings.warn( - "method='twostep' is deprecated and will be removed in " - "v3.0. Use method='local' instead.", - FutureWarning, - stacklevel=2, - ) - value = "local" - if key == "method" and value == "joint": - warnings.warn( - "method='joint' is deprecated and will be removed in " - "v3.0. Use method='global' instead.", - FutureWarning, - stacklevel=2, + if key == "method" and value not in ("local", "global"): + raise ValueError( + f"method must be one of ('local', 'global'), got '{value}'" ) - value = "global" if hasattr(self, key): setattr(self, key, value) else: diff --git a/docs/api/_autosummary/diff_diff.CallawaySantAnna.rst b/docs/api/_autosummary/diff_diff.CallawaySantAnna.rst index cf2dc47e..65aeee39 100644 --- a/docs/api/_autosummary/diff_diff.CallawaySantAnna.rst +++ b/docs/api/_autosummary/diff_diff.CallawaySantAnna.rst @@ -29,7 +29,7 @@ .. autosummary:: ~CallawaySantAnna.n_bootstrap - ~CallawaySantAnna.bootstrap_weight_type + ~CallawaySantAnna.bootstrap_weights ~CallawaySantAnna.alpha ~CallawaySantAnna.seed ~CallawaySantAnna.anticipation diff --git a/docs/api/trop.rst b/docs/api/trop.rst index 26106244..f167e295 100644 --- a/docs/api/trop.rst +++ b/docs/api/trop.rst @@ -145,10 +145,6 @@ For the paper's full per-treated-cell estimator (Algorithm 2), use The global method is **faster** (single optimization vs N_treated optimizations). Treatment effects are **heterogeneous** per-observation residuals; ATT is their mean. -``method='twostep'`` is a deprecated alias for ``method='local'`` and will be -removed in v3.0. ``method='joint'`` is a deprecated alias for ``method='global'`` -and will be removed in v3.0. - .. list-table:: :header-rows: 1 :widths: 20 40 40 diff --git a/docs/llms-full.txt b/docs/llms-full.txt index 446f3c67..9f127858 100644 --- a/docs/llms-full.txt +++ b/docs/llms-full.txt @@ -2,7 +2,7 @@ > A Python library for Difference-in-Differences causal inference analysis. Provides sklearn-like estimators with statsmodels-style output for econometric analysis. -- Version: 2.9.1 +- Version: 3.0.0 - Repository: https://github.com/igerber/diff-diff - License: MIT - Dependencies: numpy, pandas, scipy (no statsmodels dependency) @@ -589,7 +589,7 @@ Triply Robust Panel estimator (Athey, Imbens, Qu & Viviano 2025). Combines nucle ```python TROP( - method: str = "twostep", # "twostep" or "global" (or deprecated "joint") + method: str = "local", # "local" or "global" lambda_time_grid: list[float] = None, # Time weight decay grid [0, 0.1, 0.5, 1, 2, 5] lambda_unit_grid: list[float] = None, # Unit weight decay grid [0, 0.1, 0.5, 1, 2, 5] lambda_nn_grid: list[float] = None, # Nuclear norm grid [0, 0.01, 0.1, 1, 10] @@ -618,7 +618,7 @@ trop.fit( ```python from diff_diff import TROP -trop = TROP(method='twostep', seed=42) +trop = TROP(method='local', seed=42) results = trop.fit(data, outcome='outcome', treatment='treated', unit='unit', time='period') results.print_summary() @@ -1525,7 +1525,7 @@ clear_cache() ## Survey Support -All estimators except `WooldridgeDiD` accept an optional `survey_design` parameter in `fit()`. Pass a `SurveyDesign` object to get design-based variance estimation. (WooldridgeDiD survey support is planned for Phase 10f.) +All estimators accept an optional `survey_design` parameter in `fit()`. Pass a `SurveyDesign` object to get design-based variance estimation. ```python from diff_diff import SurveyDesign, CallawaySantAnna @@ -1576,7 +1576,7 @@ sd_female, data_female = sd.subpopulation(data, mask=lambda df: df['sex'] == 'F' - Repeated cross-sections: `CallawaySantAnna(panel=False)` - Compatibility matrix: see `docs/choosing_estimator.rst` Survey Design Support section -No R or Python package offers design-based variance estimation for modern heterogeneity-robust DiD estimators. (`WooldridgeDiD` does not yet accept `survey_design` — planned for Phase 10f.) +No R or Python package offers design-based variance estimation for modern heterogeneity-robust DiD estimators. ## Linear Algebra Helpers diff --git a/docs/llms.txt b/docs/llms.txt index 45cb2a75..6e196286 100644 --- a/docs/llms.txt +++ b/docs/llms.txt @@ -97,7 +97,7 @@ Full practitioner guide: docs/llms-practitioner.txt ## Survey Support -All estimators except `WooldridgeDiD` accept an optional `survey_design` parameter. Pass a `SurveyDesign` object to get design-based variance estimation: +All estimators accept an optional `survey_design` parameter. Pass a `SurveyDesign` object to get design-based variance estimation: - **Design elements**: strata, PSU, FPC, weight types (pweight/fweight/aweight), lonely PSU handling, nest - **Variance methods**: Taylor Series Linearization (TSL), replicate weights (BRR/Fay/JK1/JKn/SDR), survey-aware bootstrap @@ -105,7 +105,7 @@ All estimators except `WooldridgeDiD` accept an optional `survey_design` paramet - **Repeated cross-sections**: `CallawaySantAnna(panel=False)` for BRFSS, ACS, CPS - **Compatibility matrix**: [Survey Design Support](https://diff-diff.readthedocs.io/en/stable/choosing_estimator.html#survey-design-support) -No R or Python package offers design-based variance estimation for modern heterogeneity-robust DiD estimators. R's `did`, `fixest`, `synthdid`, and `didimputation` accept flat weight vectors only. (Note: `WooldridgeDiD` does not yet accept `survey_design` — planned for Phase 10f.) +No R or Python package offers design-based variance estimation for modern heterogeneity-robust DiD estimators. R's `did`, `fixest`, `synthdid`, and `didimputation` accept flat weight vectors only. ## Optional diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 42f0fa5f..61b7ef68 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -1753,8 +1753,7 @@ Q(λ) = Σ_{j,s: D_js=0} [τ̂_js^loocv(λ)]² ### TROP Global Estimation Method -**Method**: `method="global"` in TROP estimator (`method="joint"` is a deprecated alias; -`method="twostep"` is a deprecated alias for `method="local"`) +**Method**: `method="global"` in TROP estimator **Approach**: Computationally efficient adaptation using the (1-W) masking principle from Eq. 2. Fits a single global model on control data, then @@ -1864,8 +1863,6 @@ For global method, LOOCV works as follows: - [x] Returns ATT = mean of per-observation post-hoc τ̂_{it} - [x] Rust acceleration for LOOCV and bootstrap -- **Note:** `method="twostep"` renamed to `method="local"` and `method="joint"` renamed to `method="global"` to form a natural local/global pair. Both old names are deprecated aliases, removal planned for v3.0. - --- # Diagnostics & Sensitivity diff --git a/docs/survey-roadmap.md b/docs/survey-roadmap.md index 10a3de48..537a84b6 100644 --- a/docs/survey-roadmap.md +++ b/docs/survey-roadmap.md @@ -104,12 +104,19 @@ Files: `benchmarks/R/benchmark_realdata_*.R`, `tests/test_survey_real_data.py`, calibration is out of scope for diff-diff today, though building this capability is a future possibility. +### Phase 10: Survey Completeness (v2.9.0–v3.0) + +- **10a.** Survey theory document (`survey-theory.md`) — formal justification for design-based variance with modern DiD influence functions +- **10b.** Research-grade survey DGP — 8 new parameters on `generate_survey_did_data()` +- **10c.** R validation expansion — 8 of 16 estimators cross-validated against R's `survey::svyglm()` +- **10d.** Tutorial rewrite — flat-weight vs design-based comparison with known ground truth +- **10f.** WooldridgeDiD survey support — OLS, logit, Poisson paths with `pweight` + strata/PSU/FPC + TSL variance + --- -## Phase 10: Academic Credibility and Announcement Readiness +## Phase 10: Remaining Items -Before broadly announcing survey capability, these items establish the -theoretical and empirical foundation needed for credibility with +The items below establish further credibility with practitioners and methodologists. ### 10a. Theory Document (HIGH priority) ✅ @@ -150,35 +157,17 @@ unconditional PT by construction. A `conditional_pt` parameter is needed before the simulation study so that unconditional PT fails but conditional PT holds after covariate adjustment (DR/IPW recovers truth). -### 10c. Expand R Validation Coverage (HIGH priority) - -Current R-validated estimators: DifferenceInDifferences, TWFE, -CallawaySantAnna, SyntheticDiD (4 of 15). We can validate the OLS -regression path against R's `survey::svyglm()` for estimators that -reduce to WLS: - -| Estimator | Validation approach | Status | -|-----------|-------------------|--------| -| ImputationDiD | Compare WLS step against `svyglm()` | Not started | -| StackedDiD | Compare stacked WLS against `svyglm()` | Not started | -| SunAbraham | Compare interaction-weighted WLS against `svyglm()` | Not started | -| TripleDifference | Compare DDD regression against `svyglm()` | Not started | -| EfficientDiD | No R reference exists | Deferred | -| TROP | No R reference exists | Deferred | - -### 10d. Tutorial: Show the Pain (HIGH priority) +### 10c. Expand R Validation Coverage (HIGH priority) ✅ -Expand the survey tutorial with a side-by-side comparison using the DGP -from 10b: +8 of 16 estimators now cross-validated against R's `survey::svyglm()`: +DifferenceInDifferences, TWFE, CallawaySantAnna, SyntheticDiD, +ImputationDiD, StackedDiD, SunAbraham, TripleDifference. -- ATT with flat weights (what R's `did` package gives you) -- ATT with full survey design (what diff-diff gives you) -- DEFF showing how much SEs were underestimated -- An example where inference conclusions change +### 10d. Tutorial: Show the Pain (HIGH priority) ✅ -Because the DGP has known parameters, the tutorial can show not just that -the results differ, but which one is *right*. This is the content that -practitioners share and that converts skeptics. +Survey tutorial rewritten with side-by-side flat-weight vs design-based +comparison using the research-grade DGP from 10b, showing known ground +truth, coverage simulation, and false pre-trend detection rates. ### 10e. Position Paper / arXiv Preprint (MEDIUM priority, long-term) @@ -231,8 +220,6 @@ the limitation and suggested alternative. | Estimator | Limitation | Alternative | |-----------|-----------|-------------| -| WooldridgeDiD | Replicate weights | Use strata/PSU/FPC design with TSL variance | -| WooldridgeDiD | Bootstrap + survey | Use analytical survey SEs (set `n_bootstrap=0`) | | SyntheticDiD | Replicate weights | Use strata/PSU/FPC design with Rao-Wu rescaled bootstrap | | TROP | Replicate weights | Use strata/PSU/FPC design with Rao-Wu rescaled bootstrap | | BaconDecomposition | Replicate weights | Diagnostic only, no inference | diff --git a/docs/troubleshooting.rst b/docs/troubleshooting.rst index fbbdc74d..cdb57f12 100644 --- a/docs/troubleshooting.rst +++ b/docs/troubleshooting.rst @@ -583,49 +583,6 @@ inaccurate with missing observations. results = bacon.fit(balanced, outcome='y', unit='unit_id', time='period', first_treat='first_treat') -Deprecation Warnings --------------------- - -"method='twostep' is deprecated" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Problem:** TROP emits a ``FutureWarning`` that ``method='twostep'`` is -deprecated. - -**Causes:** - -1. Code uses the old ``method='twostep'`` parameter name - -**Solutions:** - -.. code-block:: python - - # Old (deprecated) - trop = TROP(method='twostep') - - # New (use 'local' instead) - trop = TROP(method='local') - -"method='joint' is deprecated" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Problem:** TROP emits a ``FutureWarning`` that ``method='joint'`` is -deprecated. - -**Causes:** - -1. Code uses the old ``method='joint'`` parameter name - -**Solutions:** - -.. code-block:: python - - # Old (deprecated) - trop = TROP(method='joint') - - # New (use 'global' instead) - trop = TROP(method='global') - Getting Help ------------ diff --git a/docs/tutorials/10_trop.ipynb b/docs/tutorials/10_trop.ipynb index 5c8d9d5e..873e9cd8 100644 --- a/docs/tutorials/10_trop.ipynb +++ b/docs/tutorials/10_trop.ipynb @@ -3,7 +3,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": "# Triply Robust Panel (TROP) Estimator\n\nThis notebook demonstrates the **Triply Robust Panel (TROP)** estimator (Athey, Imbens, Qu & Viviano, 2025), which combines three robustness components:\n\n1. **Nuclear Norm Regularized Factor Model**: Estimates interactive fixed effects via matrix completion with nuclear norm penalty\n2. **Exponential Distance-Based Unit Weights**: ω_j = exp(-λ_unit × dist(j,i)) where dist(j,i) is the root mean squared difference in outcomes between units j and i, computed only on periods where both units are untreated and excluding the target period t (Equation 3 in the paper)\n3. **Exponential Time Decay Weights**: θ_s = exp(-λ_time × |s-t|) weighting by proximity to treatment\n\n**Weights**: The observation-specific weights ω and θ are importance weights that control the relative contribution of each observation to counterfactual estimation. Higher weights indicate more relevant observations for the target counterfactual.\n\nTROP is particularly useful when:\n- There may be unobserved time-varying confounders with factor structure\n- Standard DiD or SDID may be biased due to latent factors\n- You want robust inference under factor confounding\n\nWe'll cover:\n1. When to use TROP\n2. Basic estimation with LOOCV tuning\n3. Understanding tuning parameters\n4. Examining factor structure\n5. Comparing TROP vs SDID" + "source": "# Triply Robust Panel (TROP) Estimator\n\nThis notebook demonstrates the **Triply Robust Panel (TROP)** estimator (Athey, Imbens, Qu & Viviano, 2025), which combines three robustness components:\n\n1. **Nuclear Norm Regularized Factor Model**: Estimates interactive fixed effects via matrix completion with nuclear norm penalty\n2. **Exponential Distance-Based Unit Weights**: \u03c9_j = exp(-\u03bb_unit \u00d7 dist(j,i)) where dist(j,i) is the root mean squared difference in outcomes between units j and i, computed only on periods where both units are untreated and excluding the target period t (Equation 3 in the paper)\n3. **Exponential Time Decay Weights**: \u03b8_s = exp(-\u03bb_time \u00d7 |s-t|) weighting by proximity to treatment\n\n**Weights**: The observation-specific weights \u03c9 and \u03b8 are importance weights that control the relative contribution of each observation to counterfactual estimation. Higher weights indicate more relevant observations for the target counterfactual.\n\nTROP is particularly useful when:\n- There may be unobserved time-varying confounders with factor structure\n- Standard DiD or SDID may be biased due to latent factors\n- You want robust inference under factor confounding\n\nWe'll cover:\n1. When to use TROP\n2. Basic estimation with LOOCV tuning\n3. Understanding tuning parameters\n4. Examining factor structure\n5. Comparing TROP vs SDID" }, { "cell_type": "code", @@ -127,9 +127,9 @@ "## 2. Basic TROP Estimation\n", "\n", "TROP uses leave-one-out cross-validation (LOOCV) to select three tuning parameters:\n", - "- **λ_time**: Time weight decay (higher = focus on periods near treatment)\n", - "- **λ_unit**: Unit weight decay (higher = focus on similar units)\n", - "- **λ_nn**: Nuclear norm regularization (higher = lower rank factor model)\n", + "- **\u03bb_time**: Time weight decay (higher = focus on periods near treatment)\n", + "- **\u03bb_unit**: Unit weight decay (higher = focus on similar units)\n", + "- **\u03bb_nn**: Nuclear norm regularization (higher = lower rank factor model)\n", "\n", "By default, TROP searches over a grid of values for each parameter." ] @@ -179,9 +179,9 @@ "print(f\"Bias: {results.att - true_att:.4f}\")\n", "print()\n", "print(f\"Selected tuning parameters:\")\n", - "print(f\" λ_time: {results.lambda_time:.2f}\")\n", - "print(f\" λ_unit: {results.lambda_unit:.2f}\")\n", - "print(f\" λ_nn: {results.lambda_nn:.2f}\")\n", + "print(f\" \u03bb_time: {results.lambda_time:.2f}\")\n", + "print(f\" \u03bb_unit: {results.lambda_unit:.2f}\")\n", + "print(f\" \u03bb_nn: {results.lambda_nn:.2f}\")\n", "print(f\"\\nEffective rank of factor matrix: {results.effective_rank:.2f}\")\n", "print(f\"True rank: {n_factors}\")" ] @@ -194,26 +194,26 @@ "\n", "The three tuning parameters control different aspects of the estimation:\n", "\n", - "### λ_time (Time Decay)\n", + "### \u03bb_time (Time Decay)\n", "Controls how much weight to place on periods close to treatment:\n", - "- **λ_time = 0**: Equal weight to all pre-treatment periods\n", - "- **λ_time > 0**: More weight on recent pre-treatment periods\n", + "- **\u03bb_time = 0**: Equal weight to all pre-treatment periods\n", + "- **\u03bb_time > 0**: More weight on recent pre-treatment periods\n", "\n", - "### λ_unit (Unit Distance)\n", + "### \u03bb_unit (Unit Distance)\n", "Controls how much weight to place on similar control units:\n", - "- **λ_unit = 0**: Equal weight to all control units\n", - "- **λ_unit > 0**: More weight on control units with similar pre-treatment trajectories\n", + "- **\u03bb_unit = 0**: Equal weight to all control units\n", + "- **\u03bb_unit > 0**: More weight on control units with similar pre-treatment trajectories\n", "\n", "The distance between units j and i for target observation (i, t) is computed as the root mean squared difference in outcomes, using only periods where:\n", "1. Both units are untreated (D_js = D_is = 0)\n", - "2. The target period t is **excluded** (following Equation 3 in the paper: 1{u ≠ t})\n", + "2. The target period t is **excluded** (following Equation 3 in the paper: 1{u \u2260 t})\n", "\n", "This ensures the distance measure is based purely on pre-treatment comparability, not contaminated by the treatment period itself.\n", "\n", - "### λ_nn (Nuclear Norm)\n", + "### \u03bb_nn (Nuclear Norm)\n", "Controls the rank of the factor model:\n", - "- **λ_nn = 0**: No regularization (full rank)\n", - "- **λ_nn > 0**: Encourages low-rank factor structure" + "- **\u03bb_nn = 0**: No regularization (full rank)\n", + "- **\u03bb_nn > 0**: Encourages low-rank factor structure" ] }, { @@ -223,9 +223,9 @@ "outputs": [], "source": [ "# Effect of different nuclear norm regularization levels\n", - "print(\"Effect of nuclear norm regularization (λ_nn):\")\n", + "print(\"Effect of nuclear norm regularization (\u03bb_nn):\")\n", "print(\"=\"*65)\n", - "print(f\"{'λ_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\")\n", + "print(f\"{'\u03bb_nn':>10} {'ATT':>12} {'Bias':>12} {'Eff. Rank':>15}\")\n", "print(\"-\"*65)\n", "\n", "for lambda_nn in [0.0, 0.1, 1.0]: # Reduced grid\n", @@ -311,7 +311,7 @@ "source": [ "## 5. Examining Unit and Time Effects\n", "\n", - "TROP also estimates traditional unit and time fixed effects (α_i and β_t)." + "TROP also estimates traditional unit and time fixed effects (\u03b1_i and \u03b2_t)." ] }, { @@ -353,7 +353,7 @@ " ax1.axvline(x=9.5, color='red', linestyle='--', label='Treated/Control boundary')\n", " ax1.set_xlabel('Unit')\n", " ax1.set_ylabel('Effect')\n", - " ax1.set_title('Unit Fixed Effects (α_i)')\n", + " ax1.set_title('Unit Fixed Effects (\u03b1_i)')\n", " ax1.legend()\n", " \n", " # Time effects\n", @@ -362,7 +362,7 @@ " ax2.axvline(x=n_pre - 0.5, color='black', linestyle='--', label='Treatment')\n", " ax2.set_xlabel('Period')\n", " ax2.set_ylabel('Effect')\n", - " ax2.set_title('Time Fixed Effects (β_t)')\n", + " ax2.set_title('Time Fixed Effects (\u03b2_t)')\n", " ax2.legend()\n", " \n", " plt.tight_layout()\n", @@ -578,9 +578,9 @@ "print(f\"Quick estimation:\")\n", "print(f\" ATT: {quick_results.att:.4f}\")\n", "print(f\" SE: {quick_results.se:.4f}\")\n", - "print(f\" λ_time: {quick_results.lambda_time:.2f}\")\n", - "print(f\" λ_unit: {quick_results.lambda_unit:.2f}\")\n", - "print(f\" λ_nn: {quick_results.lambda_nn:.2f}\")\n", + "print(f\" \u03bb_time: {quick_results.lambda_time:.2f}\")\n", + "print(f\" \u03bb_unit: {quick_results.lambda_unit:.2f}\")\n", + "print(f\" \u03bb_nn: {quick_results.lambda_nn:.2f}\")\n", "print(f\" Effective rank: {quick_results.effective_rank:.2f}\")" ] }, @@ -605,7 +605,948 @@ }, { "cell_type": "markdown", - "source": "## 10. Estimation Methods: Local vs Global\n\nTROP supports two estimation methods via the `method` parameter:\n\n**Local Method** (`method='local'`, default):\n- Follows Algorithm 2 from the paper\n- Computes observation-specific weights for each treated observation\n- Fits a model per treated observation, then averages the individual effects\n- More flexible, allows for heterogeneous treatment effects\n- Computationally intensive (N_treated optimizations)\n\n**Global Method** (`method='global'`):\n- Fits a single model on control data using (1-W) masked weights (per paper Eq. 2)\n- Extracts per-observation treatment effects as post-hoc residuals: τ_it = Y_it - μ - α_i - β_t - L_it\n- ATT = mean(τ_it) over treated observations\n- Faster (single optimization) with global weights\n\nNote: `method='twostep'` is a deprecated alias for `method='local'`, and `method='joint'` is a deprecated alias for `method='global'`. Both will be removed in v3.0.", + "source": [ + "#", + "#", + " ", + "1", + "0", + ".", + " ", + "E", + "s", + "t", + "i", + "m", + "a", + "t", + "i", + "o", + "n", + " ", + "M", + "e", + "t", + "h", + "o", + "d", + "s", + ":", + " ", + "L", + "o", + "c", + "a", + "l", + " ", + "v", + "s", + " ", + "G", + "l", + "o", + "b", + "a", + "l", + "\n", + "\n", + "T", + "R", + "O", + "P", + " ", + "s", + "u", + "p", + "p", + "o", + "r", + "t", + "s", + " ", + "t", + "w", + "o", + " ", + "e", + "s", + "t", + "i", + "m", + "a", + "t", + "i", + "o", + "n", + " ", + "m", + "e", + "t", + "h", + "o", + "d", + "s", + " ", + "v", + "i", + "a", + " ", + "t", + "h", + "e", + " ", + "`", + "m", + "e", + "t", + "h", + "o", + "d", + "`", + " ", + "p", + "a", + "r", + "a", + "m", + "e", + "t", + "e", + "r", + ":", + "\n", + "\n", + "*", + "*", + "L", + "o", + "c", + "a", + "l", + " ", + "M", + "e", + "t", + "h", + "o", + "d", + "*", + "*", + " ", + "(", + "`", + "m", + "e", + "t", + "h", + "o", + "d", + "=", + "'", + "l", + "o", + "c", + "a", + "l", + "'", + "`", + ",", + " ", + "d", + "e", + "f", + "a", + "u", + "l", + "t", + ")", + ":", + "\n", + "-", + " ", + "F", + "o", + "l", + "l", + "o", + "w", + "s", + " ", + "A", + "l", + "g", + "o", + "r", + "i", + "t", + "h", + "m", + " ", + "2", + " ", + "f", + "r", + "o", + "m", + " ", + "t", + "h", + "e", + " ", + "p", + "a", + "p", + "e", + "r", + "\n", + "-", + " ", + "C", + "o", + "m", + "p", + "u", + "t", + "e", + "s", + " ", + "o", + "b", + "s", + "e", + "r", + "v", + "a", + "t", + "i", + "o", + "n", + "-", + "s", + "p", + "e", + "c", + "i", + "f", + "i", + "c", + " ", + "w", + "e", + "i", + "g", + "h", + "t", + "s", + " ", + "f", + "o", + "r", + " ", + "e", + "a", + "c", + "h", + " ", + "t", + "r", + "e", + "a", + "t", + "e", + "d", + " ", + "o", + "b", + "s", + "e", + "r", + "v", + "a", + "t", + "i", + "o", + "n", + "\n", + "-", + " ", + "F", + "i", + "t", + "s", + " ", + "a", + " ", + "m", + "o", + "d", + "e", + "l", + " ", + "p", + "e", + "r", + " ", + "t", + "r", + "e", + "a", + "t", + "e", + "d", + " ", + "o", + "b", + "s", + "e", + "r", + "v", + "a", + "t", + "i", + "o", + "n", + ",", + " ", + "t", + "h", + "e", + "n", + " ", + "a", + "v", + "e", + "r", + "a", + "g", + "e", + "s", + " ", + "t", + "h", + "e", + " ", + "i", + "n", + "d", + "i", + "v", + "i", + "d", + "u", + "a", + "l", + " ", + "e", + "f", + "f", + "e", + "c", + "t", + "s", + "\n", + "-", + " ", + "M", + "o", + "r", + "e", + " ", + "f", + "l", + "e", + "x", + "i", + "b", + "l", + "e", + ",", + " ", + "a", + "l", + "l", + "o", + "w", + "s", + " ", + "f", + "o", + "r", + " ", + "h", + "e", + "t", + "e", + "r", + "o", + "g", + "e", + "n", + "e", + "o", + "u", + "s", + " ", + "t", + "r", + "e", + "a", + "t", + "m", + "e", + "n", + "t", + " ", + "e", + "f", + "f", + "e", + "c", + "t", + "s", + "\n", + "-", + " ", + "C", + "o", + "m", + "p", + "u", + "t", + "a", + "t", + "i", + "o", + "n", + "a", + "l", + "l", + "y", + " ", + "i", + "n", + "t", + "e", + "n", + "s", + "i", + "v", + "e", + " ", + "(", + "N", + "_", + "t", + "r", + "e", + "a", + "t", + "e", + "d", + " ", + "o", + "p", + "t", + "i", + "m", + "i", + "z", + "a", + "t", + "i", + "o", + "n", + "s", + ")", + "\n", + "\n", + "*", + "*", + "G", + "l", + "o", + "b", + "a", + "l", + " ", + "M", + "e", + "t", + "h", + "o", + "d", + "*", + "*", + " ", + "(", + "`", + "m", + "e", + "t", + "h", + "o", + "d", + "=", + "'", + "g", + "l", + "o", + "b", + "a", + "l", + "'", + "`", + ")", + ":", + "\n", + "-", + " ", + "F", + "i", + "t", + "s", + " ", + "a", + " ", + "s", + "i", + "n", + "g", + "l", + "e", + " ", + "m", + "o", + "d", + "e", + "l", + " ", + "o", + "n", + " ", + "c", + "o", + "n", + "t", + "r", + "o", + "l", + " ", + "d", + "a", + "t", + "a", + " ", + "u", + "s", + "i", + "n", + "g", + " ", + "(", + "1", + "-", + "W", + ")", + " ", + "m", + "a", + "s", + "k", + "e", + "d", + " ", + "w", + "e", + "i", + "g", + "h", + "t", + "s", + " ", + "(", + "p", + "e", + "r", + " ", + "p", + "a", + "p", + "e", + "r", + " ", + "E", + "q", + ".", + " ", + "2", + ")", + "\n", + "-", + " ", + "E", + "x", + "t", + "r", + "a", + "c", + "t", + "s", + " ", + "p", + "e", + "r", + "-", + "o", + "b", + "s", + "e", + "r", + "v", + "a", + "t", + "i", + "o", + "n", + " ", + "t", + "r", + "e", + "a", + "t", + "m", + "e", + "n", + "t", + " ", + "e", + "f", + "f", + "e", + "c", + "t", + "s", + " ", + "a", + "s", + " ", + "p", + "o", + "s", + "t", + "-", + "h", + "o", + "c", + " ", + "r", + "e", + "s", + "i", + "d", + "u", + "a", + "l", + "s", + ":", + " ", + "\u03c4", + "_", + "i", + "t", + " ", + "=", + " ", + "Y", + "_", + "i", + "t", + " ", + "-", + " ", + "\u03bc", + " ", + "-", + " ", + "\u03b1", + "_", + "i", + " ", + "-", + " ", + "\u03b2", + "_", + "t", + " ", + "-", + " ", + "L", + "_", + "i", + "t", + "\n", + "-", + " ", + "A", + "T", + "T", + " ", + "=", + " ", + "m", + "e", + "a", + "n", + "(", + "\u03c4", + "_", + "i", + "t", + ")", + " ", + "o", + "v", + "e", + "r", + " ", + "t", + "r", + "e", + "a", + "t", + "e", + "d", + " ", + "o", + "b", + "s", + "e", + "r", + "v", + "a", + "t", + "i", + "o", + "n", + "s", + "\n", + "-", + " ", + "F", + "a", + "s", + "t", + "e", + "r", + " ", + "(", + "s", + "i", + "n", + "g", + "l", + "e", + " ", + "o", + "p", + "t", + "i", + "m", + "i", + "z", + "a", + "t", + "i", + "o", + "n", + ")", + " ", + "w", + "i", + "t", + "h", + " ", + "g", + "l", + "o", + "b", + "a", + "l", + " ", + "w", + "e", + "i", + "g", + "h", + "t", + "s", + "\n", + "\n", + "N", + "o", + "t", + "e", + ":", + " ", + "`", + "m", + "e", + "t", + "h", + "o", + "d", + "=", + "'", + "t", + "w", + "o", + "s", + "t", + "e", + "p", + "'", + "`", + " ", + "i", + "s", + " ", + "a", + " ", + "d", + "e", + "p", + "r", + "e", + "c", + "a", + "t", + "e", + "d", + " ", + "a", + "l", + "i", + "a", + "s", + " ", + "f", + "o", + "r", + " ", + "`", + "m", + "e", + "t", + "h", + "o", + "d", + "=", + "'", + "l", + "o", + "c", + "a", + "l", + "'", + "`", + ",", + " ", + "a", + "n", + "d", + " ", + "`", + "m", + "e", + "t", + "h", + "o", + "d", + "=", + "'", + "j", + "o", + "i", + "n", + "t", + "'", + "`", + " ", + "i", + "s", + " ", + "a", + " ", + "d", + "e", + "p", + "r", + "e", + "c", + "a", + "t", + "e", + "d", + " ", + "a", + "l", + "i", + "a", + "s", + " ", + "f", + "o", + "r", + " ", + "`", + "m", + "e", + "t", + "h", + "o", + "d", + "=", + "'", + "g", + "l", + "o", + "b", + "a", + "l", + "'", + "`", + ".", + " ", + "B", + "o", + "t", + "h", + " ", + "w", + "i", + "l", + "l", + " ", + "b", + "e", + " ", + "r", + "e", + "m", + "o", + "v", + "e", + "d", + " ", + "i", + "n", + " ", + "v", + "3", + ".", + "0", + "." + ], "metadata": {} }, { @@ -638,7 +1579,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "## Summary\n\nKey takeaways for TROP:\n\n1. **Best use cases**: Factor confounding, unobserved time-varying confounders with interactive effects\n2. **Factor estimation**: Nuclear norm regularization with LOOCV for tuning\n3. **Three tuning parameters**: λ_time, λ_unit, λ_nn selected automatically via LOOCV\n4. **Unit weights**: Exponential distance-based weighting of control units, where distance is computed as RMS outcome difference on control periods excluding the target period\n5. **Time weights**: Exponential decay weighting of pre-treatment periods\n6. **Weights**: Importance weights controlling relative contribution of observations (higher = more relevant)\n7. **Estimation methods**:\n - `method='local'` (default): Per-observation estimation, allows heterogeneous effects\n - `method='global'`: Single model with (1-W) masking, post-hoc heterogeneous effects, faster\n\n**When to use TROP vs SDID**:\n- Use **SDID** when parallel trends is plausible and factors are not a concern\n- Use **TROP** when you suspect factor confounding (regional shocks, economic cycles, latent factors)\n- Running both provides a useful robustness check\n\n**When to use local vs global method**:\n- Use **local** (default) for maximum flexibility with per-observation weights\n- Use **global** for faster estimation with global weights\n\n**Reference**:\n- Athey, S., Imbens, G. W., Qu, Z., & Viviano, D. (2025). Triply Robust Panel Estimators. *Working Paper*. https://arxiv.org/abs/2508.21536" + "source": "## Summary\n\nKey takeaways for TROP:\n\n1. **Best use cases**: Factor confounding, unobserved time-varying confounders with interactive effects\n2. **Factor estimation**: Nuclear norm regularization with LOOCV for tuning\n3. **Three tuning parameters**: \u03bb_time, \u03bb_unit, \u03bb_nn selected automatically via LOOCV\n4. **Unit weights**: Exponential distance-based weighting of control units, where distance is computed as RMS outcome difference on control periods excluding the target period\n5. **Time weights**: Exponential decay weighting of pre-treatment periods\n6. **Weights**: Importance weights controlling relative contribution of observations (higher = more relevant)\n7. **Estimation methods**:\n - `method='local'` (default): Per-observation estimation, allows heterogeneous effects\n - `method='global'`: Single model with (1-W) masking, post-hoc heterogeneous effects, faster\n\n**When to use TROP vs SDID**:\n- Use **SDID** when parallel trends is plausible and factors are not a concern\n- Use **TROP** when you suspect factor confounding (regional shocks, economic cycles, latent factors)\n- Running both provides a useful robustness check\n\n**When to use local vs global method**:\n- Use **local** (default) for maximum flexibility with per-observation weights\n- Use **global** for faster estimation with global weights\n\n**Reference**:\n- Athey, S., Imbens, G. W., Qu, Z., & Viviano, D. (2025). Triply Robust Panel Estimators. *Working Paper*. https://arxiv.org/abs/2508.21536" }, { "cell_type": "code", @@ -655,7 +1596,7 @@ { "cell_type": "markdown", "metadata": {}, - "source": "## Summary\n\nKey takeaways for TROP:\n\n1. **Best use cases**: Factor confounding, unobserved time-varying confounders with interactive effects\n2. **Factor estimation**: Nuclear norm regularization with LOOCV for tuning\n3. **Three tuning parameters**: λ_time, λ_unit, λ_nn selected automatically via LOOCV\n4. **Unit weights**: Exponential distance-based weighting of control units, where distance is computed as RMS outcome difference on control periods excluding the target period\n5. **Time weights**: Exponential decay weighting of pre-treatment periods\n6. **Weights**: Importance weights controlling relative contribution of observations (higher = more relevant)\n\n**When to use TROP vs SDID**:\n- Use **SDID** when parallel trends is plausible and factors are not a concern\n- Use **TROP** when you suspect factor confounding (regional shocks, economic cycles, latent factors)\n- Running both provides a useful robustness check\n\n**Reference**:\n- Athey, S., Imbens, G. W., Qu, Z., & Viviano, D. (2025). Triply Robust Panel Estimators. *Working Paper*. https://arxiv.org/abs/2508.21536" + "source": "## Summary\n\nKey takeaways for TROP:\n\n1. **Best use cases**: Factor confounding, unobserved time-varying confounders with interactive effects\n2. **Factor estimation**: Nuclear norm regularization with LOOCV for tuning\n3. **Three tuning parameters**: \u03bb_time, \u03bb_unit, \u03bb_nn selected automatically via LOOCV\n4. **Unit weights**: Exponential distance-based weighting of control units, where distance is computed as RMS outcome difference on control periods excluding the target period\n5. **Time weights**: Exponential decay weighting of pre-treatment periods\n6. **Weights**: Importance weights controlling relative contribution of observations (higher = more relevant)\n\n**When to use TROP vs SDID**:\n- Use **SDID** when parallel trends is plausible and factors are not a concern\n- Use **TROP** when you suspect factor confounding (regional shocks, economic cycles, latent factors)\n- Running both provides a useful robustness check\n\n**Reference**:\n- Athey, S., Imbens, G. W., Qu, Z., & Viviano, D. (2025). Triply Robust Panel Estimators. *Working Paper*. https://arxiv.org/abs/2508.21536" } ], "metadata": { diff --git a/pyproject.toml b/pyproject.toml index b52a8ff6..192d0a96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "diff-diff" -version = "2.9.1" +version = "3.0.0" description = "Difference-in-Differences causal inference with sklearn-like API. Callaway-Sant'Anna, Synthetic DiD, Honest DiD, event studies, parallel trends." readme = "README.md" license = "MIT" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index c81fb180..26ee6ace 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "diff_diff_rust" -version = "2.9.1" +version = "3.0.0" edition = "2021" description = "Rust backend for diff-diff DiD library" license = "MIT" diff --git a/tests/test_methodology_callaway.py b/tests/test_methodology_callaway.py index a072f04e..ee882039 100644 --- a/tests/test_methodology_callaway.py +++ b/tests/test_methodology_callaway.py @@ -1184,39 +1184,6 @@ def test_results_significance_properties(self): # ============================================================================= -class TestDeprecationWarnings: - """Tests for deprecated parameter handling.""" - - def test_bootstrap_weight_type_deprecated(self): - """Test that bootstrap_weight_type emits deprecation warning.""" - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - cs = CallawaySantAnna(bootstrap_weight_type="mammen") - - # Check deprecation warning was emitted - deprecation_warnings = [ - warning for warning in w - if issubclass(warning.category, DeprecationWarning) - ] - assert len(deprecation_warnings) >= 1 - assert "bootstrap_weight_type" in str(deprecation_warnings[0].message) - - # Should still work (backward compatibility) - assert cs.bootstrap_weights == "mammen" - - def test_bootstrap_weights_takes_precedence(self): - """Test that bootstrap_weights takes precedence over deprecated param.""" - with warnings.catch_warnings(record=True): - warnings.simplefilter("always") - cs = CallawaySantAnna( - bootstrap_weights="rademacher", - bootstrap_weight_type="mammen" - ) - - # bootstrap_weights should take precedence - assert cs.bootstrap_weights == "rademacher" - - # ============================================================================= # MPDTA-based Strict R Comparison Tests # ============================================================================= diff --git a/tests/test_staggered.py b/tests/test_staggered.py index 4eb0e512..99770751 100644 --- a/tests/test_staggered.py +++ b/tests/test_staggered.py @@ -1384,7 +1384,7 @@ def test_bootstrap_weight_types(self, ci_params): weight_types = ["rademacher", "mammen", "webb"] for wt in weight_types: - cs = CallawaySantAnna(n_bootstrap=n_boot, bootstrap_weight_type=wt, seed=42) + cs = CallawaySantAnna(n_bootstrap=n_boot, bootstrap_weights=wt, seed=42) results = cs.fit( data, outcome="outcome", unit="unit", time="time", first_treat="first_treat" ) @@ -1589,9 +1589,6 @@ def test_bootstrap_invalid_weight_type(self): # Test with new parameter name with pytest.raises(ValueError, match="bootstrap_weights"): CallawaySantAnna(bootstrap_weights="invalid") - # Test deprecated parameter still validates - with pytest.raises(ValueError, match="bootstrap_weights"): - CallawaySantAnna(bootstrap_weight_type="invalid") def test_bootstrap_get_params(self): """Test that get_params includes bootstrap_weights.""" @@ -1600,8 +1597,6 @@ def test_bootstrap_get_params(self): assert params["n_bootstrap"] == 99 assert params["bootstrap_weights"] == "mammen" - # Deprecated attribute still accessible for backward compat - assert params["bootstrap_weight_type"] == "mammen" assert params["seed"] == 42 def test_bootstrap_with_not_yet_treated(self, ci_params): diff --git a/tests/test_staggered_triple_diff.py b/tests/test_staggered_triple_diff.py index e7a4496f..6954b255 100644 --- a/tests/test_staggered_triple_diff.py +++ b/tests/test_staggered_triple_diff.py @@ -72,10 +72,10 @@ def test_set_params(self): assert est.estimation_method == "ipw" assert est.alpha == 0.10 - def test_set_params_updates_bootstrap_weight_type(self): + def test_set_params_updates_bootstrap_weights(self): est = StaggeredTripleDifference() est.set_params(bootstrap_weights="mammen") - assert est.bootstrap_weight_type == "mammen" + assert est.bootstrap_weights == "mammen" def test_invalid_estimation_method(self): with pytest.raises(ValueError, match="estimation_method"): diff --git a/tests/test_trop.py b/tests/test_trop.py index b498d4d7..eb28d407 100644 --- a/tests/test_trop.py +++ b/tests/test_trop.py @@ -2907,13 +2907,6 @@ def test_method_in_get_params(self): assert "method" in params assert params["method"] == "global" - def test_method_in_get_params_joint_deprecated(self): - """'joint' alias maps to 'global' in get_params().""" - with pytest.warns(FutureWarning, match="deprecated"): - trop_est = TROP(method="joint") - params = trop_est.get_params() - assert params["method"] == "global" - def test_method_in_set_params(self): """method parameter can be set via set_params().""" trop_est = TROP(method="local") @@ -2922,26 +2915,13 @@ def test_method_in_set_params(self): trop_est.set_params(method="global") assert trop_est.method == "global" - def test_method_set_params_joint_deprecated(self): - """'joint' alias maps to 'global' via set_params().""" + def test_method_set_params_invalid_rejected(self): + """Invalid method values are rejected by set_params().""" trop_est = TROP(method="local") - with pytest.warns(FutureWarning, match="deprecated"): - trop_est.set_params(method="joint") - assert trop_est.method == "global" - - def test_method_in_get_params_twostep_deprecated(self): - """'twostep' alias maps to 'local' in get_params().""" - with pytest.warns(FutureWarning, match="deprecated"): - trop_est = TROP(method="twostep") - params = trop_est.get_params() - assert params["method"] == "local" - - def test_method_set_params_twostep_deprecated(self): - """'twostep' alias maps to 'local' via set_params().""" - trop_est = TROP(method="global") - with pytest.warns(FutureWarning, match="deprecated"): + with pytest.raises(ValueError, match="method must be one of"): trop_est.set_params(method="twostep") - assert trop_est.method == "local" + with pytest.raises(ValueError, match="method must be one of"): + trop_est.set_params(method="joint") def test_global_bootstrap_variance(self, simple_panel_data, ci_params): """Global method bootstrap variance estimation works."""