Address PR #346 CI review round 3: P1 period inference + P2 summary label

igerber · claude · igerber · commit ddc09e4341f5 · 2026-04-20T19:11:40.000-04:00
**P1 (Methodology): _validate_had_panel inferred pre/post by lexicographic sort**

Previously the validator sorted the two period labels alphabetically
and assigned `t_pre=periods[0]`, `t_post=periods[1]`. On supported
string-labelled panels like `("pre", "post")` the alphabetic order is
["post", "pre"], so the code flipped pre and post and then raised on
the treated-period D&gt;0 check for a valid design. Same bug for
`("before", "after")` and any non-alphabetic-chronological label pair.

Fix: identify `t_pre` as the unique period where dose == 0 for ALL
units (HAD paper Section 2 no-unit-untreated convention); `t_post` is
the other period. This is a DGP-consistent invariant, not a string
ordering. If neither period has all-zero dose, raise with the
contract message and per-period nonzero-count diagnostics. If both
periods have all-zero dose, raise (no treatment variation to estimate).

The existing pre-period D=0 check is now tautological and has been
removed since the inference itself enforces the invariant. Behavior
on valid numeric panels (e.g., 2020/2021) is unchanged.

**P2 (Code Quality): summary() hardcoded 'WAS' row label**

`HeterogeneousAdoptionDiDResults.summary()` printed "WAS" as the
parameter label regardless of the resolved design. For Design 1
paths (continuous_near_d_lower, mass_point) the stored
`target_parameter` is "WAS_d_lower" per paper Sections 3.2.2-3.2.4,
so the user-facing output misrepresented the estimand.

Fix: render `self.target_parameter` in the summary row. Now Design 1'
prints "WAS", Design 1 prints "WAS_d_lower", matching the stored
result metadata.

**Tests (+7 regression):**
- TestValidateHadPanel.test_semantic_pre_post_labels_not_lexicographic
- TestValidateHadPanel.test_semantic_pre_post_with_first_treat_col
- TestValidateHadPanel.test_semantic_pre_post_fit_end_to_end
- TestValidateHadPanel.test_before_after_labels
- TestValidateHadPanel.test_no_all_zero_period_raises
- TestValidateHadPanel.test_both_all_zero_periods_raises
- TestResultMethods.test_summary_uses_target_parameter_for_row_label

Targeted regression: 133 HAD tests + 512 total across Phase 1 and
adjacent surfaces, all green.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/diff_diff/had.py b/diff_diff/had.py
@@ -289,6 +289,7 @@ def summary(self) -> str:
             bc = self.bias_corrected_fit
             lines.append(f"{'Bandwidth h used:':<30} {bc.h:>20.6g}")
             lines.append(f"{'Obs in window (n_used):':<30} {bc.n_used:>20}")
+        param_label = self.target_parameter
         lines.extend(
             [
                 "",
@@ -299,7 +300,7 @@ def summary(self) -> str:
                 ),
                 "-" * width,
                 (
-                    f"{'WAS':<15} {self.att:>12.4f} {self.se:>12.4f} "
+                    f"{param_label:<15} {self.att:>12.4f} {self.se:>12.4f} "
                     f"{self.t_stat:>10.3f} {self.p_value:>10.4f}"
                 ),
                 "-" * width,
@@ -395,30 +396,27 @@ def _validate_had_panel(
     if missing:
         raise ValueError(f"Missing column(s) in data: {missing}. Required: {required}.")
 
-    periods = np.sort(np.asarray(data[time_col].unique()))
-    if len(periods) < 2:
+    periods_list = list(data[time_col].unique())
+    if len(periods_list) < 2:
         raise ValueError(
-            f"HAD requires a two-period panel; got {len(periods)} distinct "
+            f"HAD requires a two-period panel; got {len(periods_list)} distinct "
             f"period(s) in column {time_col!r}."
         )
-    if len(periods) > 2:
+    if len(periods_list) > 2:
         if first_treat_col is None:
             raise ValueError(
                 f"HAD Phase 2a requires exactly two time periods "
-                f"(got {len(periods)} in {time_col!r}) when "
+                f"(got {len(periods_list)} in {time_col!r}) when "
                 f"first_treat_col=None. Multi-period / staggered adoption "
                 f"support is queued for Phase 2b (Appendix B.2 event-study)."
             )
         raise ValueError(
             f"HAD Phase 2a requires exactly two time periods "
-            f"(got {len(periods)} in {time_col!r}). Staggered adoption "
+            f"(got {len(periods_list)} in {time_col!r}). Staggered adoption "
             f"reduction (first_treat_col supplied with >2 periods) is "
             f"queued for Phase 2b (Appendix B.2 event-study)."
         )
 
-    t_pre = int(periods[0]) if np.issubdtype(periods.dtype, np.integer) else periods[0]
-    t_post = int(periods[1]) if np.issubdtype(periods.dtype, np.integer) else periods[1]
-
     # Balanced-panel check: every unit appears exactly once per period.
     counts = data.groupby([unit_col, time_col]).size()
     if (counts != 1).any():
@@ -446,17 +444,35 @@ def _validate_had_panel(
                 f"calling fit()."
             )
 
-    # Pre-period no-unit-untreated check.
-    pre_mask = data[time_col] == t_pre
-    pre_doses = np.asarray(data.loc[pre_mask, dose_col], dtype=np.float64)
-    nonzero_pre = pre_doses != 0
-    if nonzero_pre.any():
-        n_bad = int(nonzero_pre.sum())
+    # Identify t_pre and t_post by the HAD invariant rather than by
+    # lexicographic sort on the time labels: D_{g, t_pre} = 0 for all
+    # units (paper Section 2 no-unit-untreated pre-period convention).
+    # Sorting labels alphabetically reverses valid chronologies like
+    # ("pre", "post") where ordering is semantic, not alphabetic.
+    per_period_nonzero: Dict[Any, int] = {}
+    for p in periods_list:
+        p_doses = np.asarray(data.loc[data[time_col] == p, dose_col], dtype=np.float64)
+        per_period_nonzero[p] = int((p_doses != 0).sum())
+    all_zero_periods = [p for p, nz in per_period_nonzero.items() if nz == 0]
+    if len(all_zero_periods) == 0:
+        # Neither period has all-zero dose: HAD pre-period contract violated.
+        stats_str = ", ".join(f"{p!r}: {nz} nonzero" for p, nz in per_period_nonzero.items())
         raise ValueError(
             f"HAD requires D_{{g,1}} = 0 for all units (pre-period "
-            f"untreated). {n_bad} unit(s) have nonzero dose at "
-            f"t_pre={t_pre}. Drop these units or verify the dose column."
+            f"untreated). Neither period in column {time_col!r} has "
+            f"all-zero dose ({stats_str}). Exactly one period must be "
+            f"the pre-treatment period with D_{{g,1}} = 0 for every unit; "
+            f"drop rows with nonzero pre-period dose or verify the dose "
+            f"column."
+        )
+    if len(all_zero_periods) == 2:
+        raise ValueError(
+            f"HAD requires variation in D_{{g,2}} for estimation. Both "
+            f"periods in column {time_col!r} have all-zero dose, so "
+            f"there is no treatment assignment to estimate."
         )
+    t_pre = all_zero_periods[0]
+    t_post = [p for p in periods_list if p != t_pre][0]
 
     # Post-period nonnegative-dose check on the ORIGINAL (unshifted) dose
     # scale. Front-door rejection per paper Assumption (dose definition
diff --git a/tests/test_had.py b/tests/test_had.py
@@ -899,6 +899,42 @@ def test_summary_returns_string(self):
         assert "WAS" in s
         assert "Confidence Interval" in s
 
+    def test_summary_uses_target_parameter_for_row_label(self):
+        """Review P2: the estimate row must render target_parameter (WAS or
+        WAS_d_lower), not hardcoded 'WAS'.
+        """
+        # Design 1' -> target_parameter = "WAS"
+        d, dy = _dgp_continuous_at_zero(400, seed=0)
+        panel = _make_panel(d, dy)
+        r_d1p = HeterogeneousAdoptionDiD(design="continuous_at_zero").fit(
+            panel, "outcome", "dose", "period", "unit"
+        )
+        s_d1p = r_d1p.summary()
+        assert r_d1p.target_parameter == "WAS"
+        assert "WAS" in s_d1p
+
+        # Design 1 continuous-near-d_lower -> target_parameter = "WAS_d_lower"
+        d, dy = _dgp_continuous_near_d_lower(400, seed=0)
+        panel = _make_panel(d, dy)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            r_d1 = HeterogeneousAdoptionDiD(design="continuous_near_d_lower").fit(
+                panel, "outcome", "dose", "period", "unit"
+            )
+        assert r_d1.target_parameter == "WAS_d_lower"
+        assert "WAS_d_lower" in r_d1.summary()
+
+        # Design 1 mass-point -> target_parameter = "WAS_d_lower"
+        d, dy = _dgp_mass_point(400, seed=0)
+        panel = _make_panel(d, dy)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", UserWarning)
+            r_mp = HeterogeneousAdoptionDiD(design="mass_point").fit(
+                panel, "outcome", "dose", "period", "unit"
+            )
+        assert r_mp.target_parameter == "WAS_d_lower"
+        assert "WAS_d_lower" in r_mp.summary()
+
     def test_print_summary_executes(self, capsys):
         r = self._result()
         r.print_summary()
@@ -1430,6 +1466,70 @@ def test_first_treat_col_dtype_agnostic_rejects_invalid_string(self):
         with pytest.raises(ValueError, match="first_treat_col"):
             _validate_had_panel(panel, "outcome", "dose", "period", "unit", "ft")
 
+    def test_semantic_pre_post_labels_not_lexicographic(self):
+        """Review P1 round 3: pre/post inference must be dose-based.
+
+        ("pre", "post") sorts alphabetically to ["post", "pre"], which
+        previously flipped the pre/post labels and raised on a valid
+        panel. The validator now infers pre from the all-zero-dose
+        period.
+        """
+        d, dy = _dgp_continuous_at_zero(100, seed=0)
+        panel = _make_panel(d, dy, periods=("pre", "post"))
+        t_pre, t_post = _validate_had_panel(panel, "outcome", "dose", "period", "unit", None)
+        assert t_pre == "pre"
+        assert t_post == "post"
+
+    def test_semantic_pre_post_with_first_treat_col(self):
+        """Combined: string periods + first_treat_col in {0, 'post'}."""
+        d, dy = _dgp_continuous_at_zero(100, seed=0)
+        panel = _make_panel(d, dy, periods=("pre", "post"))
+        ft_unit = np.array([0 if i % 2 == 0 else "post" for i in range(100)], dtype=object)
+        panel["ft"] = np.repeat(ft_unit, 2)
+        t_pre, t_post = _validate_had_panel(panel, "outcome", "dose", "period", "unit", "ft")
+        assert t_pre == "pre"
+        assert t_post == "post"
+
+    def test_semantic_pre_post_fit_end_to_end(self):
+        """End-to-end: fit() runs on ("pre","post")-labelled panel."""
+        d, dy = _dgp_continuous_at_zero(500, seed=0)
+        panel = _make_panel(d, dy, periods=("pre", "post"))
+        r = HeterogeneousAdoptionDiD(design="continuous_at_zero").fit(
+            panel, "outcome", "dose", "period", "unit"
+        )
+        assert np.isfinite(r.att)
+
+    def test_before_after_labels(self):
+        """("before","after") is also reversed alphabetically; must not fail."""
+        d, dy = _dgp_continuous_at_zero(100, seed=0)
+        panel = _make_panel(d, dy, periods=("before", "after"))
+        t_pre, t_post = _validate_had_panel(panel, "outcome", "dose", "period", "unit", None)
+        assert t_pre == "before"
+        assert t_post == "after"
+
+    def test_no_all_zero_period_raises(self):
+        """If neither period has all-zero dose, HAD's D_{g,1}=0 contract fails."""
+        d, dy = _dgp_continuous_at_zero(100, seed=0)
+        panel = _make_panel(d, dy)
+        # Inject nonzero dose into the pre period so neither period is all-zero.
+        panel.loc[panel["period"] == 1, "dose"] = 0.5
+        with pytest.raises(ValueError, match=r"D_\{g,1\}|pre-treatment"):
+            _validate_had_panel(panel, "outcome", "dose", "period", "unit", None)
+
+    def test_both_all_zero_periods_raises(self):
+        """If both periods have all-zero dose, no treatment to estimate."""
+        G = 100
+        panel = pd.DataFrame(
+            {
+                "unit": np.repeat(np.arange(G), 2),
+                "period": np.tile([1, 2], G),
+                "dose": np.zeros(2 * G),
+                "outcome": np.random.default_rng(0).standard_normal(2 * G),
+            }
+        )
+        with pytest.raises(ValueError, match="variation"):
+            _validate_had_panel(panel, "outcome", "dose", "period", "unit", None)
+
 
 # =============================================================================
 # Review P1: continuous_near_d_lower on a true mass-point sample rejects