Add base_period parameter to CallawaySantAnna for pre-treatment effects

igerber · claude · igerber · commit 673a83050145 · 2026-01-21T14:06:38.000-05:00
Implement the base_period parameter matching R's did::att_gt() API to enable
computation of pre-treatment ATT(g,t) values for parallel trends assessment.

Two modes are supported:
- "varying" (default): Pre-treatment uses t-1 as base (consecutive comparisons)
- "universal": All comparisons use g-anticipation-1 as base

Both modes produce identical post-treatment ATT(g,t) values. They differ only
in how pre-treatment effects are computed. The overall ATT aggregation only
includes post-treatment effects, matching R's behavior.

Changes:
- Add base_period parameter to CallawaySantAnna.__init__ with validation
- Modify _compute_att_gt_fast to select base period based on mode
- Update fit() to compute pre-treatment ATT(g,t) where t &lt; g - anticipation
- Filter _aggregate_simple and bootstrap to only aggregate post-treatment effects
- Add base_period to CallawaySantAnnaResults and display in summary()
- Update methodology registry with base_period edge case documentation
- Add 11 new tests for pre-treatment effects

Validated against R's did package v2.3.0 with max numerical difference of 4.91e-05.

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/staggered.py b/diff_diff/staggered.py
@@ -292,6 +292,7 @@ def __init__(
         bootstrap_weight_type: Optional[str] = None,
         seed: Optional[int] = None,
         rank_deficient_action: str = "warn",
+        base_period: str = "varying",
     ):
         import warnings
 
@@ -333,6 +334,12 @@ def __init__(
                 f"got '{rank_deficient_action}'"
             )
 
+        if base_period not in ["varying", "universal"]:
+            raise ValueError(
+                f"base_period must be 'varying' or 'universal', "
+                f"got '{base_period}'"
+            )
+
         self.control_group = control_group
         self.anticipation = anticipation
         self.estimation_method = estimation_method
@@ -344,6 +351,7 @@ def __init__(
         self.bootstrap_weight_type = bootstrap_weights
         self.seed = seed
         self.rank_deficient_action = rank_deficient_action
+        self.base_period = base_period
 
         self.is_fitted_ = False
         self.results_: Optional[CallawaySantAnnaResults] = None
@@ -441,20 +449,30 @@ def _compute_att_gt_fast(
         all_units = precomputed['all_units']
         covariate_by_period = precomputed['covariate_by_period']
 
-        # Base period for comparison
-        base_period = g - 1 - self.anticipation
-        if base_period not in period_to_col:
+        # Base period selection based on mode
+        if self.base_period == "universal":
+            # Universal: always use g - 1 - anticipation
+            base_period_val = g - 1 - self.anticipation
+        else:  # varying
+            if t < g - self.anticipation:
+                # Pre-treatment: use t - 1 (consecutive comparison)
+                base_period_val = t - 1
+            else:
+                # Post-treatment: use g - 1 - anticipation
+                base_period_val = g - 1 - self.anticipation
+
+        if base_period_val not in period_to_col:
             # Find closest earlier period
-            earlier = [p for p in time_periods if p < g - self.anticipation]
+            earlier = [p for p in time_periods if p < base_period_val]
             if not earlier:
                 return None, 0.0, 0, 0, None
-            base_period = max(earlier)
+            base_period_val = max(earlier)
 
         # Check if periods exist in the data
-        if base_period not in period_to_col or t not in period_to_col:
+        if base_period_val not in period_to_col or t not in period_to_col:
             return None, 0.0, 0, 0, None
 
-        base_col = period_to_col[base_period]
+        base_col = period_to_col[base_period_val]
         post_col = period_to_col[t]
 
         # Get treated units mask (cohort g)
@@ -499,7 +517,7 @@ def _compute_att_gt_fast(
         X_treated = None
         X_control = None
         if covariates and covariate_by_period is not None:
-            cov_matrix = covariate_by_period[base_period]
+            cov_matrix = covariate_by_period[base_period_val]
             X_treated = cov_matrix[treated_valid]
             X_control = cov_matrix[control_valid]
 
@@ -640,9 +658,21 @@ def fit(
         group_time_effects = {}
         influence_func_info = {}  # Store influence functions for bootstrap
 
+        # Get minimum period for determining valid pre-treatment periods
+        min_period = min(time_periods)
+
         for g in treatment_groups:
-            # Periods for which we compute effects (t >= g - anticipation)
-            valid_periods = [t for t in time_periods if t >= g - self.anticipation]
+            # Compute valid periods including pre-treatment
+            if self.base_period == "universal":
+                # Universal: all periods except the base period (which is normalized to 0)
+                universal_base = g - 1 - self.anticipation
+                valid_periods = [t for t in time_periods if t != universal_base]
+            else:
+                # Varying: post-treatment + pre-treatment where t-1 exists
+                valid_periods = [
+                    t for t in time_periods
+                    if t >= g - self.anticipation or t > min_period
+                ]
 
             for t in valid_periods:
                 att_gt, se_gt, n_treat, n_ctrl, inf_info = self._compute_att_gt_fast(
@@ -768,6 +798,7 @@ def fit(
             n_control_units=n_control_units,
             alpha=self.alpha,
             control_group=self.control_group,
+            base_period=self.base_period,
             event_study_effects=event_study_effects,
             group_effects=group_effects,
             bootstrap_results=bootstrap_results,
@@ -1043,6 +1074,7 @@ def get_params(self) -> Dict[str, Any]:
             "bootstrap_weight_type": self.bootstrap_weight_type,
             "seed": self.seed,
             "rank_deficient_action": self.rank_deficient_action,
+            "base_period": self.base_period,
         }
 
     def set_params(self, **params) -> "CallawaySantAnna":
diff --git a/diff_diff/staggered_aggregation.py b/diff_diff/staggered_aggregation.py
@@ -31,6 +31,9 @@ class CallawaySantAnnaAggregationMixin:
     # Type hints for attributes accessed from the main class
     alpha: float
 
+    # Type hint for anticipation attribute accessed from main class
+    anticipation: int
+
     def _aggregate_simple(
         self,
         group_time_effects: Dict,
@@ -49,13 +52,21 @@ def _aggregate_simple(
         shared control units. This includes the wif (weight influence function)
         adjustment from R's `did` package that accounts for uncertainty in
         estimating the group-size weights.
+
+        Note: Only post-treatment effects (t >= g - anticipation) are included
+        in the overall ATT. Pre-treatment effects are computed for parallel
+        trends assessment but are not aggregated into the overall ATT.
         """
         effects = []
         weights_list = []
         gt_pairs = []
         groups_for_gt = []
 
         for (g, t), data in group_time_effects.items():
+            # Only include post-treatment effects (t >= g - anticipation)
+            # Pre-treatment effects are for parallel trends, not overall ATT
+            if t < g - self.anticipation:
+                continue
             effects.append(data['effect'])
             weights_list.append(data['n_treated'])
             gt_pairs.append((g, t))
diff --git a/diff_diff/staggered_bootstrap.py b/diff_diff/staggered_bootstrap.py
@@ -248,6 +248,7 @@ class CallawaySantAnnaBootstrapMixin:
     bootstrap_weight_type: str
     alpha: float
     seed: Optional[int]
+    anticipation: int
 
     def _run_multiplier_bootstrap(
         self,
@@ -310,15 +311,23 @@ def _run_multiplier_bootstrap(
         gt_pairs = list(group_time_effects.keys())
         n_gt = len(gt_pairs)
 
-        # Compute aggregation weights for overall ATT
-        overall_weights = np.array([
+        # Identify post-treatment (g,t) pairs for overall ATT
+        # Pre-treatment effects are for parallel trends assessment, not aggregated
+        post_treatment_mask = np.array([
+            t >= g - self.anticipation for (g, t) in gt_pairs
+        ])
+        post_treatment_indices = np.where(post_treatment_mask)[0]
+
+        # Compute aggregation weights for overall ATT (post-treatment only)
+        all_n_treated = np.array([
             group_time_effects[gt]['n_treated'] for gt in gt_pairs
         ], dtype=float)
-        overall_weights = overall_weights / np.sum(overall_weights)
+        post_n_treated = all_n_treated[post_treatment_mask]
+        overall_weights_post = post_n_treated / np.sum(post_n_treated)
 
         # Original point estimates
         original_atts = np.array([group_time_effects[gt]['effect'] for gt in gt_pairs])
-        original_overall = np.sum(overall_weights * original_atts)
+        original_overall = np.sum(overall_weights_post * original_atts[post_treatment_mask])
 
         # Prepare event study and group aggregation info if needed
         event_study_info = None
@@ -382,11 +391,11 @@ def _run_multiplier_bootstrap(
             # Let non-finite values propagate - they will be handled at statistics computation
             bootstrap_atts_gt[:, j] = original_atts[j] + perturbations
 
-        # Vectorized overall ATT: matrix-vector multiply
+        # Vectorized overall ATT: matrix-vector multiply (post-treatment only)
         # Shape: (n_bootstrap,)
         # Suppress RuntimeWarnings for edge cases - non-finite values handled at statistics computation
         with np.errstate(divide='ignore', invalid='ignore', over='ignore'):
-            bootstrap_overall = bootstrap_atts_gt @ overall_weights
+            bootstrap_overall = bootstrap_atts_gt[:, post_treatment_indices] @ overall_weights_post
 
         # Vectorized event study aggregation
         # Non-finite values handled at statistics computation stage
diff --git a/diff_diff/staggered_results.py b/diff_diff/staggered_results.py
@@ -106,6 +106,7 @@ class CallawaySantAnnaResults:
     n_control_units: int
     alpha: float = 0.05
     control_group: str = "never_treated"
+    base_period: str = "varying"
     event_study_effects: Optional[Dict[int, Dict[str, Any]]] = field(default=None)
     group_effects: Optional[Dict[Any, Dict[str, Any]]] = field(default=None)
     influence_functions: Optional["np.ndarray"] = field(default=None, repr=False)
@@ -149,6 +150,7 @@ def summary(self, alpha: Optional[float] = None) -> str:
             f"{'Treatment cohorts:':<30} {len(self.groups):>10}",
             f"{'Time periods:':<30} {len(self.time_periods):>10}",
             f"{'Control group:':<30} {self.control_group:>10}",
+            f"{'Base period:':<30} {self.base_period:>10}",
             "",
         ]
 
diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md
@@ -209,6 +209,11 @@ Aggregations:
   - Bootstrap: Drops non-finite samples, warns, and adjusts p-value floor accordingly
   - Threshold: Returns NaN if <50% of bootstrap samples are valid
   - **Note**: This is a defensive enhancement over reference implementations (R's `did::att_gt`, Stata's `csdid`) which may error or produce unhandled inf/nan in edge cases without informative warnings
+- Base period selection (`base_period` parameter):
+  - "varying" (default): Pre-treatment uses t-1 as base (consecutive comparisons)
+  - "universal": All comparisons use g-anticipation-1 as base
+  - Both produce identical post-treatment ATT(g,t); differ only pre-treatment
+  - Matches R `did::att_gt()` base_period parameter
 
 **Reference implementation(s):**
 - R: `did::att_gt()` (Callaway & Sant'Anna's official package)
diff --git a/tests/test_staggered.py b/tests/test_staggered.py